aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c16
-rw-r--r--fs/9p/vfs_super.c5
-rw-r--r--fs/Kconfig5
-rw-r--r--fs/Makefile1
-rw-r--r--fs/adfs/super.c3
-rw-r--r--fs/affs/super.c7
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/afs/fsclient.c8
-rw-r--r--fs/afs/mntpt.c4
-rw-r--r--fs/afs/super.c7
-rw-r--r--fs/aio.c145
-rw-r--r--fs/anon_inodes.c109
-rw-r--r--fs/attr.c2
-rw-r--r--fs/autofs4/autofs_i.h12
-rw-r--r--fs/autofs4/dev-ioctl.c5
-rw-r--r--fs/autofs4/init.c6
-rw-r--r--fs/autofs4/inode.c14
-rw-r--r--fs/autofs4/waitq.c22
-rw-r--r--fs/bad_inode.c2
-rw-r--r--fs/befs/linuxvfs.c3
-rw-r--r--fs/bfs/inode.c3
-rw-r--r--fs/binfmt_aout.c37
-rw-r--r--fs/binfmt_elf.c83
-rw-r--r--fs/binfmt_elf_fdpic.c25
-rw-r--r--fs/binfmt_em86.c3
-rw-r--r--fs/binfmt_flat.c19
-rw-r--r--fs/binfmt_misc.c10
-rw-r--r--fs/binfmt_script.c3
-rw-r--r--fs/binfmt_som.c16
-rw-r--r--fs/bio-integrity.c10
-rw-r--r--fs/bio.c9
-rw-r--r--fs/block_dev.c11
-rw-r--r--fs/btrfs/acl.c4
-rw-r--r--fs/btrfs/btrfs_inode.h50
-rw-r--r--fs/btrfs/check-integrity.c585
-rw-r--r--fs/btrfs/compression.c12
-rw-r--r--fs/btrfs/ctree.c12
-rw-r--r--fs/btrfs/ctree.h45
-rw-r--r--fs/btrfs/delayed-inode.c8
-rw-r--r--fs/btrfs/disk-io.c50
-rw-r--r--fs/btrfs/disk-io.h1
-rw-r--r--fs/btrfs/extent-tree.c13
-rw-r--r--fs/btrfs/extent_io.c104
-rw-r--r--fs/btrfs/extent_io.h5
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/file.c76
-rw-r--r--fs/btrfs/free-space-cache.c47
-rw-r--r--fs/btrfs/inode.c290
-rw-r--r--fs/btrfs/ioctl.c48
-rw-r--r--fs/btrfs/ioctl.h33
-rw-r--r--fs/btrfs/lzo.c4
-rw-r--r--fs/btrfs/ordered-data.c165
-rw-r--r--fs/btrfs/ordered-data.h13
-rw-r--r--fs/btrfs/print-tree.c3
-rw-r--r--fs/btrfs/reada.c5
-rw-r--r--fs/btrfs/scrub.c92
-rw-r--r--fs/btrfs/super.c125
-rw-r--r--fs/btrfs/transaction.c4
-rw-r--r--fs/btrfs/tree-log.c35
-rw-r--r--fs/btrfs/ulist.c6
-rw-r--r--fs/btrfs/ulist.h6
-rw-r--r--fs/btrfs/volumes.c306
-rw-r--r--fs/btrfs/volumes.h52
-rw-r--r--fs/btrfs/xattr.c1
-rw-r--r--fs/btrfs/zlib.c4
-rw-r--r--fs/buffer.c22
-rw-r--r--fs/cachefiles/namei.c3
-rw-r--r--fs/ceph/inode.c11
-rw-r--r--fs/ceph/mds_client.c7
-rw-r--r--fs/ceph/snap.c2
-rw-r--r--fs/ceph/super.c22
-rw-r--r--fs/ceph/super.h4
-rw-r--r--fs/ceph/xattr.c202
-rw-r--r--fs/cifs/README6
-rw-r--r--fs/cifs/cifs_debug.c71
-rw-r--r--fs/cifs/cifs_debug.h4
-rw-r--r--fs/cifs/cifsacl.c1
-rw-r--r--fs/cifs/cifsfs.c61
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h86
-rw-r--r--fs/cifs/cifsproto.h29
-rw-r--r--fs/cifs/cifssmb.c174
-rw-r--r--fs/cifs/connect.c1487
-rw-r--r--fs/cifs/dir.c23
-rw-r--r--fs/cifs/file.c305
-rw-r--r--fs/cifs/misc.c119
-rw-r--r--fs/cifs/netmisc.c6
-rw-r--r--fs/cifs/transport.c305
-rw-r--r--fs/coda/inode.c7
-rw-r--r--fs/coda/psdev.c1
-rw-r--r--fs/coda/upcall.c1
-rw-r--r--fs/compat.c27
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/configfs/configfs_internal.h7
-rw-r--r--fs/configfs/dir.c72
-rw-r--r--fs/configfs/inode.c62
-rw-r--r--fs/configfs/mount.c16
-rw-r--r--fs/configfs/symlink.c12
-rw-r--r--fs/cramfs/inode.c12
-rw-r--r--fs/dcache.c78
-rw-r--r--fs/dcookies.c2
-rw-r--r--fs/debugfs/file.c16
-rw-r--r--fs/debugfs/inode.c149
-rw-r--r--fs/devpts/inode.c88
-rw-r--r--fs/dlm/debug_fs.c9
-rw-r--r--fs/dlm/dir.c17
-rw-r--r--fs/dlm/lock.c20
-rw-r--r--fs/dlm/lock.h3
-rw-r--r--fs/dlm/lowcomms.c24
-rw-r--r--fs/ecryptfs/file.c9
-rw-r--r--fs/ecryptfs/main.c19
-rw-r--r--fs/ecryptfs/super.c1
-rw-r--r--fs/efs/super.c3
-rw-r--r--fs/eventfd.c2
-rw-r--r--fs/eventpoll.c50
-rw-r--r--fs/exec.c42
-rw-r--r--fs/exofs/dir.c4
-rw-r--r--fs/exofs/namei.c13
-rw-r--r--fs/exofs/super.c11
-rw-r--r--fs/ext2/dir.c4
-rw-r--r--fs/ext2/ext2.h631
-rw-r--r--fs/ext2/namei.c13
-rw-r--r--fs/ext2/super.c4
-rw-r--r--fs/ext2/xattr_security.c5
-rw-r--r--fs/ext2/xattr_trusted.c5
-rw-r--r--fs/ext2/xip.c2
-rw-r--r--fs/ext3/acl.c8
-rw-r--r--fs/ext3/balloc.c94
-rw-r--r--fs/ext3/bitmap.c4
-rw-r--r--fs/ext3/dir.c7
-rw-r--r--fs/ext3/ext3.h1322
-rw-r--r--fs/ext3/ext3_jbd.c2
-rw-r--r--fs/ext3/file.c6
-rw-r--r--fs/ext3/fsync.c8
-rw-r--r--fs/ext3/hash.c4
-rw-r--r--fs/ext3/ialloc.c13
-rw-r--r--fs/ext3/inode.c21
-rw-r--r--fs/ext3/ioctl.c7
-rw-r--r--fs/ext3/namei.c14
-rw-r--r--fs/ext3/resize.c5
-rw-r--r--fs/ext3/super.c21
-rw-r--r--fs/ext3/symlink.c4
-rw-r--r--fs/ext3/xattr.c7
-rw-r--r--fs/ext3/xattr_security.c6
-rw-r--r--fs/ext3/xattr_trusted.c6
-rw-r--r--fs/ext3/xattr_user.c5
-rw-r--r--fs/ext4/balloc.c63
-rw-r--r--fs/ext4/dir.c227
-rw-r--r--fs/ext4/ext4.h43
-rw-r--r--fs/ext4/ext4_extents.h4
-rw-r--r--fs/ext4/ext4_jbd2.h128
-rw-r--r--fs/ext4/extents.c336
-rw-r--r--fs/ext4/fsync.c2
-rw-r--r--fs/ext4/hash.c4
-rw-r--r--fs/ext4/ialloc.c260
-rw-r--r--fs/ext4/inode.c95
-rw-r--r--fs/ext4/mballoc.c342
-rw-r--r--fs/ext4/mballoc.h20
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/mmp.c4
-rw-r--r--fs/ext4/namei.c2
-rw-r--r--fs/ext4/page-io.c11
-rw-r--r--fs/ext4/resize.c37
-rw-r--r--fs/ext4/super.c1117
-rw-r--r--fs/ext4/xattr.c25
-rw-r--r--fs/fat/inode.c8
-rw-r--r--fs/fat/namei_vfat.c83
-rw-r--r--fs/fcntl.c18
-rw-r--r--fs/file.c54
-rw-r--r--fs/file_table.c3
-rw-r--r--fs/freevxfs/vxfs_super.c3
-rw-r--r--fs/fs-writeback.c26
-rw-r--r--fs/fs_struct.c31
-rw-r--r--fs/fuse/dev.c4
-rw-r--r--fs/fuse/dir.c25
-rw-r--r--fs/fuse/file.c133
-rw-r--r--fs/fuse/inode.c10
-rw-r--r--fs/gfs2/Kconfig7
-rw-r--r--fs/gfs2/aops.c16
-rw-r--r--fs/gfs2/bmap.c10
-rw-r--r--fs/gfs2/dir.c4
-rw-r--r--fs/gfs2/file.c16
-rw-r--r--fs/gfs2/glock.c210
-rw-r--r--fs/gfs2/incore.h50
-rw-r--r--fs/gfs2/inode.c17
-rw-r--r--fs/gfs2/lock_dlm.c133
-rw-r--r--fs/gfs2/log.c244
-rw-r--r--fs/gfs2/log.h5
-rw-r--r--fs/gfs2/lops.c103
-rw-r--r--fs/gfs2/main.c18
-rw-r--r--fs/gfs2/ops_fstype.c12
-rw-r--r--fs/gfs2/quota.c6
-rw-r--r--fs/gfs2/rgrp.c191
-rw-r--r--fs/gfs2/rgrp.h10
-rw-r--r--fs/gfs2/super.c3
-rw-r--r--fs/gfs2/trace_gfs2.h60
-rw-r--r--fs/gfs2/util.c1
-rw-r--r--fs/gfs2/util.h3
-rw-r--r--fs/gfs2/xattr.c16
-rw-r--r--fs/hfs/super.c6
-rw-r--r--fs/hfsplus/catalog.c4
-rw-r--r--fs/hfsplus/dir.c11
-rw-r--r--fs/hfsplus/hfsplus_fs.h5
-rw-r--r--fs/hfsplus/hfsplus_raw.h2
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hfsplus/ioctl.c34
-rw-r--r--fs/hfsplus/super.c17
-rw-r--r--fs/hostfs/hostfs.h3
-rw-r--r--fs/hostfs/hostfs_kern.c9
-rw-r--r--fs/hostfs/hostfs_user.c4
-rw-r--r--fs/hpfs/super.c6
-rw-r--r--fs/hppfs/hppfs.c9
-rw-r--r--fs/hugetlbfs/inode.c151
-rw-r--r--fs/inode.c28
-rw-r--r--fs/ioctl.c2
-rw-r--r--fs/isofs/inode.c3
-rw-r--r--fs/jbd/journal.c14
-rw-r--r--fs/jbd/transaction.c4
-rw-r--r--fs/jbd2/checkpoint.c140
-rw-r--r--fs/jbd2/commit.c56
-rw-r--r--fs/jbd2/journal.c376
-rw-r--r--fs/jbd2/recovery.c5
-rw-r--r--fs/jbd2/revoke.c12
-rw-r--r--fs/jbd2/transaction.c52
-rw-r--r--fs/jffs2/acl.c2
-rw-r--r--fs/jffs2/background.c29
-rw-r--r--fs/jffs2/build.c6
-rw-r--r--fs/jffs2/compr.c32
-rw-r--r--fs/jffs2/compr_lzo.c1
-rw-r--r--fs/jffs2/compr_rubin.c2
-rw-r--r--fs/jffs2/compr_zlib.c45
-rw-r--r--fs/jffs2/debug.c22
-rw-r--r--fs/jffs2/debug.h50
-rw-r--r--fs/jffs2/dir.c41
-rw-r--r--fs/jffs2/erase.c72
-rw-r--r--fs/jffs2/file.c33
-rw-r--r--fs/jffs2/fs.c73
-rw-r--r--fs/jffs2/gc.c324
-rw-r--r--fs/jffs2/malloc.c2
-rw-r--r--fs/jffs2/nodelist.c30
-rw-r--r--fs/jffs2/nodemgmt.c214
-rw-r--r--fs/jffs2/os-linux.h4
-rw-r--r--fs/jffs2/read.c70
-rw-r--r--fs/jffs2/readinode.c2
-rw-r--r--fs/jffs2/scan.c229
-rw-r--r--fs/jffs2/security.c4
-rw-r--r--fs/jffs2/summary.c16
-rw-r--r--fs/jffs2/super.c30
-rw-r--r--fs/jffs2/symlink.c7
-rw-r--r--fs/jffs2/wbuf.c148
-rw-r--r--fs/jffs2/write.c113
-rw-r--r--fs/jffs2/xattr.c2
-rw-r--r--fs/jfs/namei.c13
-rw-r--r--fs/jfs/super.c12
-rw-r--r--fs/libfs.c19
-rw-r--r--fs/lockd/clnt4xdr.c4
-rw-r--r--fs/lockd/clntlock.c3
-rw-r--r--fs/lockd/clntxdr.c10
-rw-r--r--fs/lockd/host.c42
-rw-r--r--fs/lockd/mon.c21
-rw-r--r--fs/lockd/netns.h12
-rw-r--r--fs/lockd/svc.c119
-rw-r--r--fs/lockd/svclock.c59
-rw-r--r--fs/locks.c3
-rw-r--r--fs/logfs/dir.c21
-rw-r--r--fs/logfs/readwrite.c38
-rw-r--r--fs/logfs/segment.c4
-rw-r--r--fs/logfs/super.c12
-rw-r--r--fs/minix/dir.c4
-rw-r--r--fs/minix/inode.c38
-rw-r--r--fs/minix/minix.h1
-rw-r--r--fs/minix/namei.c14
-rw-r--r--fs/mpage.c2
-rw-r--r--fs/namei.c314
-rw-r--r--fs/ncpfs/file.c1
-rw-r--r--fs/ncpfs/inode.c7
-rw-r--r--fs/ncpfs/mmap.c1
-rw-r--r--fs/nfs/Kconfig29
-rw-r--r--fs/nfs/blocklayout/blocklayout.c165
-rw-r--r--fs/nfs/blocklayout/blocklayout.h11
-rw-r--r--fs/nfs/blocklayout/blocklayoutdev.c46
-rw-r--r--fs/nfs/blocklayout/blocklayoutdm.c33
-rw-r--r--fs/nfs/blocklayout/extents.c2
-rw-r--r--fs/nfs/cache_lib.c61
-rw-r--r--fs/nfs/cache_lib.h10
-rw-r--r--fs/nfs/callback.c19
-rw-r--r--fs/nfs/callback.h3
-rw-r--r--fs/nfs/callback_proc.c99
-rw-r--r--fs/nfs/callback_xdr.c21
-rw-r--r--fs/nfs/client.c253
-rw-r--r--fs/nfs/delegation.c68
-rw-r--r--fs/nfs/delegation.h4
-rw-r--r--fs/nfs/dir.c35
-rw-r--r--fs/nfs/direct.c7
-rw-r--r--fs/nfs/dns_resolve.c130
-rw-r--r--fs/nfs/dns_resolve.h14
-rw-r--r--fs/nfs/file.c3
-rw-r--r--fs/nfs/fscache.c2
-rw-r--r--fs/nfs/getroot.c7
-rw-r--r--fs/nfs/idmap.c736
-rw-r--r--fs/nfs/inode.c120
-rw-r--r--fs/nfs/internal.h23
-rw-r--r--fs/nfs/mount_clnt.c16
-rw-r--r--fs/nfs/namespace.c98
-rw-r--r--fs/nfs/netns.h27
-rw-r--r--fs/nfs/nfs2xdr.c2
-rw-r--r--fs/nfs/nfs3acl.c2
-rw-r--r--fs/nfs/nfs3proc.c24
-rw-r--r--fs/nfs/nfs3xdr.c4
-rw-r--r--fs/nfs/nfs4_fs.h69
-rw-r--r--fs/nfs/nfs4filelayout.c271
-rw-r--r--fs/nfs/nfs4filelayout.h7
-rw-r--r--fs/nfs/nfs4filelayoutdev.c92
-rw-r--r--fs/nfs/nfs4namespace.c96
-rw-r--r--fs/nfs/nfs4proc.c774
-rw-r--r--fs/nfs/nfs4state.c386
-rw-r--r--fs/nfs/nfs4xdr.c750
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfs/objlayout/objio_osd.c54
-rw-r--r--fs/nfs/objlayout/objlayout.c144
-rw-r--r--fs/nfs/objlayout/objlayout.h2
-rw-r--r--fs/nfs/pagelist.c92
-rw-r--r--fs/nfs/pnfs.c48
-rw-r--r--fs/nfs/pnfs.h98
-rw-r--r--fs/nfs/pnfs_dev.c4
-rw-r--r--fs/nfs/proc.c24
-rw-r--r--fs/nfs/read.c17
-rw-r--r--fs/nfs/super.c180
-rw-r--r--fs/nfs/sysctl.c2
-rw-r--r--fs/nfs/unlink.c45
-rw-r--r--fs/nfs/write.c216
-rw-r--r--fs/nfsd/current_stateid.h28
-rw-r--r--fs/nfsd/export.c2
-rw-r--r--fs/nfsd/fault_inject.c2
-rw-r--r--fs/nfsd/netns.h34
-rw-r--r--fs/nfsd/nfs3xdr.c22
-rw-r--r--fs/nfsd/nfs4callback.c27
-rw-r--r--fs/nfsd/nfs4idmap.c53
-rw-r--r--fs/nfsd/nfs4proc.c131
-rw-r--r--fs/nfsd/nfs4recover.c647
-rw-r--r--fs/nfsd/nfs4state.c390
-rw-r--r--fs/nfsd/nfs4xdr.c132
-rw-r--r--fs/nfsd/nfsctl.c28
-rw-r--r--fs/nfsd/nfsd.h7
-rw-r--r--fs/nfsd/nfssvc.c48
-rw-r--r--fs/nfsd/state.h47
-rw-r--r--fs/nfsd/stats.c5
-rw-r--r--fs/nfsd/vfs.c46
-rw-r--r--fs/nfsd/vfs.h2
-rw-r--r--fs/nfsd/xdr4.h34
-rw-r--r--fs/nilfs2/cpfile.c94
-rw-r--r--fs/nilfs2/dat.c38
-rw-r--r--fs/nilfs2/dir.c4
-rw-r--r--fs/nilfs2/ifile.c4
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/namei.c11
-rw-r--r--fs/nilfs2/page.c8
-rw-r--r--fs/nilfs2/recovery.c4
-rw-r--r--fs/nilfs2/segbuf.c4
-rw-r--r--fs/nilfs2/sufile.c68
-rw-r--r--fs/nilfs2/super.c4
-rw-r--r--fs/notify/notification.c3
-rw-r--r--fs/ntfs/aops.c20
-rw-r--r--fs/ntfs/attrib.c20
-rw-r--r--fs/ntfs/file.c16
-rw-r--r--fs/ntfs/layout.h4
-rw-r--r--fs/ntfs/super.c17
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/aops.c16
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c14
-rw-r--r--fs/ocfs2/ioctl.c2
-rw-r--r--fs/ocfs2/refcounttree.c12
-rw-r--r--fs/ocfs2/suballoc.c4
-rw-r--r--fs/ocfs2/super.c51
-rw-r--r--fs/omfs/inode.c6
-rw-r--r--fs/open.c4
-rw-r--r--fs/openpromfs/inode.c3
-rw-r--r--fs/pipe.c40
-rw-r--r--fs/posix_acl.c2
-rw-r--r--fs/proc/array.c119
-rw-r--r--fs/proc/base.c80
-rw-r--r--fs/proc/inode.c17
-rw-r--r--fs/proc/internal.h12
-rw-r--r--fs/proc/kcore.c8
-rw-r--r--fs/proc/namespaces.c8
-rw-r--r--fs/proc/page.c2
-rw-r--r--fs/proc/proc_sysctl.c1276
-rw-r--r--fs/proc/root.c9
-rw-r--r--fs/proc/stat.c96
-rw-r--r--fs/proc/task_mmu.c370
-rw-r--r--fs/proc/task_nommu.c69
-rw-r--r--fs/proc/vmcore.c23
-rw-r--r--fs/pstore/inode.c57
-rw-r--r--fs/pstore/platform.c30
-rw-r--r--fs/qnx4/inode.c88
-rw-r--r--fs/qnx4/namei.c9
-rw-r--r--fs/qnx4/qnx4.h2
-rw-r--r--fs/qnx6/Kconfig26
-rw-r--r--fs/qnx6/Makefile7
-rw-r--r--fs/qnx6/README8
-rw-r--r--fs/qnx6/dir.c291
-rw-r--r--fs/qnx6/inode.c698
-rw-r--r--fs/qnx6/namei.c42
-rw-r--r--fs/qnx6/qnx6.h135
-rw-r--r--fs/qnx6/super_mmi.c150
-rw-r--r--fs/quota/dquot.c190
-rw-r--r--fs/quota/quota.c3
-rw-r--r--fs/ramfs/inode.c30
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/readdir.c2
-rw-r--r--fs/reiserfs/acl.h76
-rw-r--r--fs/reiserfs/bitmap.c4
-rw-r--r--fs/reiserfs/dir.c2
-rw-r--r--fs/reiserfs/do_balan.c2
-rw-r--r--fs/reiserfs/file.c6
-rw-r--r--fs/reiserfs/fix_node.c2
-rw-r--r--fs/reiserfs/hashes.c2
-rw-r--r--fs/reiserfs/ibalance.c2
-rw-r--r--fs/reiserfs/inode.c6
-rw-r--r--fs/reiserfs/ioctl.c2
-rw-r--r--fs/reiserfs/item_ops.c2
-rw-r--r--fs/reiserfs/journal.c3
-rw-r--r--fs/reiserfs/lbalance.c4
-rw-r--r--fs/reiserfs/lock.c2
-rw-r--r--fs/reiserfs/namei.c6
-rw-r--r--fs/reiserfs/objectid.c3
-rw-r--r--fs/reiserfs/prints.c4
-rw-r--r--fs/reiserfs/procfs.c3
-rw-r--r--fs/reiserfs/reiserfs.h2923
-rw-r--r--fs/reiserfs/resize.c3
-rw-r--r--fs/reiserfs/stree.c6
-rw-r--r--fs/reiserfs/super.c12
-rw-r--r--fs/reiserfs/tail_conversion.c6
-rw-r--r--fs/reiserfs/xattr.c6
-rw-r--r--fs/reiserfs/xattr.h122
-rw-r--r--fs/reiserfs/xattr_acl.c6
-rw-r--r--fs/reiserfs/xattr_security.c4
-rw-r--r--fs/reiserfs/xattr_trusted.c4
-rw-r--r--fs/reiserfs/xattr_user.c4
-rw-r--r--fs/romfs/storage.c2
-rw-r--r--fs/romfs/super.c6
-rw-r--r--fs/select.c44
-rw-r--r--fs/seq_file.c114
-rw-r--r--fs/splice.c14
-rw-r--r--fs/squashfs/block.c3
-rw-r--r--fs/squashfs/dir.c7
-rw-r--r--fs/squashfs/file.c8
-rw-r--r--fs/squashfs/namei.c5
-rw-r--r--fs/squashfs/squashfs_fs.h19
-rw-r--r--fs/squashfs/super.c8
-rw-r--r--fs/squashfs/symlink.c4
-rw-r--r--fs/stack.c2
-rw-r--r--fs/stat.c4
-rw-r--r--fs/statfs.c2
-rw-r--r--fs/super.c5
-rw-r--r--fs/sync.c2
-rw-r--r--fs/sysfs/dir.c227
-rw-r--r--fs/sysfs/group.c6
-rw-r--r--fs/sysfs/inode.c11
-rw-r--r--fs/sysfs/mount.c5
-rw-r--r--fs/sysfs/sysfs.h17
-rw-r--r--fs/sysv/namei.c12
-rw-r--r--fs/sysv/super.c27
-rw-r--r--fs/sysv/sysv.h1
-rw-r--r--fs/ubifs/debug.c410
-rw-r--r--fs/ubifs/debug.h3
-rw-r--r--fs/ubifs/dir.c18
-rw-r--r--fs/ubifs/file.c4
-rw-r--r--fs/ubifs/recovery.c3
-rw-r--r--fs/ubifs/sb.c19
-rw-r--r--fs/ubifs/super.c6
-rw-r--r--fs/ubifs/ubifs.h11
-rw-r--r--fs/udf/balloc.c84
-rw-r--r--fs/udf/file.c4
-rw-r--r--fs/udf/ialloc.c1
-rw-r--r--fs/udf/inode.c20
-rw-r--r--fs/udf/namei.c13
-rw-r--r--fs/udf/super.c11
-rw-r--r--fs/udf/udf_i.h1
-rw-r--r--fs/ufs/inode.c1
-rw-r--r--fs/ufs/namei.c14
-rw-r--r--fs/ufs/super.c8
-rw-r--r--fs/xattr.c42
-rw-r--r--fs/xattr_acl.c2
-rw-r--r--fs/xfs/Makefile3
-rw-r--r--fs/xfs/xfs_alloc.c36
-rw-r--r--fs/xfs/xfs_alloc.h12
-rw-r--r--fs/xfs/xfs_aops.c183
-rw-r--r--fs/xfs/xfs_aops.h4
-rw-r--r--fs/xfs/xfs_attr.c16
-rw-r--r--fs/xfs/xfs_attr_leaf.c40
-rw-r--r--fs/xfs/xfs_bmap.c22
-rw-r--r--fs/xfs/xfs_buf.c17
-rw-r--r--fs/xfs/xfs_buf.h1
-rw-r--r--fs/xfs/xfs_da_btree.c32
-rw-r--r--fs/xfs/xfs_dfrag.c24
-rw-r--r--fs/xfs/xfs_dir2_block.c1
-rw-r--r--fs/xfs/xfs_discard.c61
-rw-r--r--fs/xfs/xfs_dquot.c418
-rw-r--r--fs/xfs/xfs_dquot.h49
-rw-r--r--fs/xfs/xfs_file.c84
-rw-r--r--fs/xfs/xfs_iget.c47
-rw-r--r--fs/xfs/xfs_inode.c94
-rw-r--r--fs/xfs/xfs_inode.h27
-rw-r--r--fs/xfs/xfs_inode_item.c297
-rw-r--r--fs/xfs/xfs_inode_item.h16
-rw-r--r--fs/xfs/xfs_ioctl.c28
-rw-r--r--fs/xfs/xfs_ioctl32.c2
-rw-r--r--fs/xfs/xfs_iomap.c19
-rw-r--r--fs/xfs/xfs_iops.c71
-rw-r--r--fs/xfs/xfs_itable.c24
-rw-r--r--fs/xfs/xfs_log.c615
-rw-r--r--fs/xfs/xfs_log.h16
-rw-r--r--fs/xfs/xfs_log_priv.h28
-rw-r--r--fs/xfs/xfs_log_recover.c39
-rw-r--r--fs/xfs/xfs_mount.c8
-rw-r--r--fs/xfs/xfs_mount.h5
-rw-r--r--fs/xfs/xfs_qm.c628
-rw-r--r--fs/xfs/xfs_qm.h49
-rw-r--r--fs/xfs/xfs_qm_bhv.c42
-rw-r--r--fs/xfs/xfs_qm_stats.c105
-rw-r--r--fs/xfs/xfs_qm_stats.h53
-rw-r--r--fs/xfs/xfs_qm_syscalls.c130
-rw-r--r--fs/xfs/xfs_quota.h2
-rw-r--r--fs/xfs/xfs_quota_priv.h11
-rw-r--r--fs/xfs/xfs_rename.c11
-rw-r--r--fs/xfs/xfs_rtalloc.c9
-rw-r--r--fs/xfs/xfs_sb.h1
-rw-r--r--fs/xfs/xfs_stats.c99
-rw-r--r--fs/xfs/xfs_stats.h10
-rw-r--r--fs/xfs/xfs_super.c204
-rw-r--r--fs/xfs/xfs_super.h8
-rw-r--r--fs/xfs/xfs_sync.c46
-rw-r--r--fs/xfs/xfs_sync.h2
-rw-r--r--fs/xfs/xfs_trace.h106
-rw-r--r--fs/xfs/xfs_trans.c31
-rw-r--r--fs/xfs/xfs_trans_ail.c83
-rw-r--r--fs/xfs/xfs_trans_buf.c25
-rw-r--r--fs/xfs/xfs_trans_dquot.c21
-rw-r--r--fs/xfs/xfs_trans_inode.c8
-rw-r--r--fs/xfs/xfs_trans_priv.h3
-rw-r--r--fs/xfs/xfs_utils.c2
-rw-r--r--fs/xfs/xfs_vnode.h1
-rw-r--r--fs/xfs/xfs_vnodeops.c16
-rw-r--r--fs/xfs/xfs_vnodeops.h3
545 files changed, 24106 insertions, 12231 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 1964f98e74b..b85efa77394 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -594,21 +594,21 @@ static int __init init_v9fs(void)
594 int err; 594 int err;
595 pr_info("Installing v9fs 9p2000 file system support\n"); 595 pr_info("Installing v9fs 9p2000 file system support\n");
596 /* TODO: Setup list of registered trasnport modules */ 596 /* TODO: Setup list of registered trasnport modules */
597 err = register_filesystem(&v9fs_fs_type);
598 if (err < 0) {
599 pr_err("Failed to register filesystem\n");
600 return err;
601 }
602 597
603 err = v9fs_cache_register(); 598 err = v9fs_cache_register();
604 if (err < 0) { 599 if (err < 0) {
605 pr_err("Failed to register v9fs for caching\n"); 600 pr_err("Failed to register v9fs for caching\n");
606 goto out_fs_unreg; 601 return err;
607 } 602 }
608 603
609 err = v9fs_sysfs_init(); 604 err = v9fs_sysfs_init();
610 if (err < 0) { 605 if (err < 0) {
611 pr_err("Failed to register with sysfs\n"); 606 pr_err("Failed to register with sysfs\n");
607 goto out_cache;
608 }
609 err = register_filesystem(&v9fs_fs_type);
610 if (err < 0) {
611 pr_err("Failed to register filesystem\n");
612 goto out_sysfs_cleanup; 612 goto out_sysfs_cleanup;
613 } 613 }
614 614
@@ -617,8 +617,8 @@ static int __init init_v9fs(void)
617out_sysfs_cleanup: 617out_sysfs_cleanup:
618 v9fs_sysfs_cleanup(); 618 v9fs_sysfs_cleanup();
619 619
620out_fs_unreg: 620out_cache:
621 unregister_filesystem(&v9fs_fs_type); 621 v9fs_cache_unregister();
622 622
623 return err; 623 return err;
624} 624}
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 7b0cd87b07c..8c92a9ba833 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -155,9 +155,8 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
155 goto release_sb; 155 goto release_sb;
156 } 156 }
157 157
158 root = d_alloc_root(inode); 158 root = d_make_root(inode);
159 if (!root) { 159 if (!root) {
160 iput(inode);
161 retval = -ENOMEM; 160 retval = -ENOMEM;
162 goto release_sb; 161 goto release_sb;
163 } 162 }
@@ -260,7 +259,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf)
260 if (v9fs_proto_dotl(v9ses)) { 259 if (v9fs_proto_dotl(v9ses)) {
261 res = p9_client_statfs(fid, &rs); 260 res = p9_client_statfs(fid, &rs);
262 if (res == 0) { 261 if (res == 0) {
263 buf->f_type = V9FS_MAGIC; 262 buf->f_type = rs.type;
264 buf->f_bsize = rs.bsize; 263 buf->f_bsize = rs.bsize;
265 buf->f_blocks = rs.blocks; 264 buf->f_blocks = rs.blocks;
266 buf->f_bfree = rs.bfree; 265 buf->f_bfree = rs.bfree;
diff --git a/fs/Kconfig b/fs/Kconfig
index d621f02a3f9..f95ae3a027f 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -4,6 +4,10 @@
4 4
5menu "File systems" 5menu "File systems"
6 6
7# Use unaligned word dcache accesses
8config DCACHE_WORD_ACCESS
9 bool
10
7if BLOCK 11if BLOCK
8 12
9source "fs/ext2/Kconfig" 13source "fs/ext2/Kconfig"
@@ -210,6 +214,7 @@ source "fs/minix/Kconfig"
210source "fs/omfs/Kconfig" 214source "fs/omfs/Kconfig"
211source "fs/hpfs/Kconfig" 215source "fs/hpfs/Kconfig"
212source "fs/qnx4/Kconfig" 216source "fs/qnx4/Kconfig"
217source "fs/qnx6/Kconfig"
213source "fs/romfs/Kconfig" 218source "fs/romfs/Kconfig"
214source "fs/pstore/Kconfig" 219source "fs/pstore/Kconfig"
215source "fs/sysv/Kconfig" 220source "fs/sysv/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index 93804d4d66e..2fb97793467 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -102,6 +102,7 @@ obj-$(CONFIG_UBIFS_FS) += ubifs/
102obj-$(CONFIG_AFFS_FS) += affs/ 102obj-$(CONFIG_AFFS_FS) += affs/
103obj-$(CONFIG_ROMFS_FS) += romfs/ 103obj-$(CONFIG_ROMFS_FS) += romfs/
104obj-$(CONFIG_QNX4FS_FS) += qnx4/ 104obj-$(CONFIG_QNX4FS_FS) += qnx4/
105obj-$(CONFIG_QNX6FS_FS) += qnx6/
105obj-$(CONFIG_AUTOFS4_FS) += autofs4/ 106obj-$(CONFIG_AUTOFS4_FS) += autofs4/
106obj-$(CONFIG_ADFS_FS) += adfs/ 107obj-$(CONFIG_ADFS_FS) += adfs/
107obj-$(CONFIG_FUSE_FS) += fuse/ 108obj-$(CONFIG_FUSE_FS) += fuse/
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 8e3b36ace30..06fdcc9382c 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -483,10 +483,9 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
483 483
484 sb->s_d_op = &adfs_dentry_operations; 484 sb->s_d_op = &adfs_dentry_operations;
485 root = adfs_iget(sb, &root_obj); 485 root = adfs_iget(sb, &root_obj);
486 sb->s_root = d_alloc_root(root); 486 sb->s_root = d_make_root(root);
487 if (!sb->s_root) { 487 if (!sb->s_root) {
488 int i; 488 int i;
489 iput(root);
490 for (i = 0; i < asb->s_map_size; i++) 489 for (i = 0; i < asb->s_map_size; i++)
491 brelse(asb->s_map[i].dm_bh); 490 brelse(asb->s_map[i].dm_bh);
492 kfree(asb->s_map); 491 kfree(asb->s_map);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 8ba73fed796..0782653a05a 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -473,7 +473,7 @@ got_root:
473 root_inode = affs_iget(sb, root_block); 473 root_inode = affs_iget(sb, root_block);
474 if (IS_ERR(root_inode)) { 474 if (IS_ERR(root_inode)) {
475 ret = PTR_ERR(root_inode); 475 ret = PTR_ERR(root_inode);
476 goto out_error_noinode; 476 goto out_error;
477 } 477 }
478 478
479 if (AFFS_SB(sb)->s_flags & SF_INTL) 479 if (AFFS_SB(sb)->s_flags & SF_INTL)
@@ -481,7 +481,7 @@ got_root:
481 else 481 else
482 sb->s_d_op = &affs_dentry_operations; 482 sb->s_d_op = &affs_dentry_operations;
483 483
484 sb->s_root = d_alloc_root(root_inode); 484 sb->s_root = d_make_root(root_inode);
485 if (!sb->s_root) { 485 if (!sb->s_root) {
486 printk(KERN_ERR "AFFS: Get root inode failed\n"); 486 printk(KERN_ERR "AFFS: Get root inode failed\n");
487 goto out_error; 487 goto out_error;
@@ -494,9 +494,6 @@ got_root:
494 * Begin the cascaded cleanup ... 494 * Begin the cascaded cleanup ...
495 */ 495 */
496out_error: 496out_error:
497 if (root_inode)
498 iput(root_inode);
499out_error_noinode:
500 kfree(sbi->s_bitmap); 497 kfree(sbi->s_bitmap);
501 affs_brelse(root_bh); 498 affs_brelse(root_bh);
502 kfree(sbi->s_prefix); 499 kfree(sbi->s_prefix);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 14d89fa58fe..8f6e9234d56 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -251,7 +251,7 @@ static int afs_readpages(struct file *file, struct address_space *mapping,
251 ASSERT(key != NULL); 251 ASSERT(key != NULL);
252 252
253 vnode = AFS_FS_I(mapping->host); 253 vnode = AFS_FS_I(mapping->host);
254 if (vnode->flags & AFS_VNODE_DELETED) { 254 if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
255 _leave(" = -ESTALE"); 255 _leave(" = -ESTALE");
256 return -ESTALE; 256 return -ESTALE;
257 } 257 }
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 2f213d109c2..b960ff05ea0 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -365,10 +365,10 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
365 _debug("extract data"); 365 _debug("extract data");
366 if (call->count > 0) { 366 if (call->count > 0) {
367 page = call->reply3; 367 page = call->reply3;
368 buffer = kmap_atomic(page, KM_USER0); 368 buffer = kmap_atomic(page);
369 ret = afs_extract_data(call, skb, last, buffer, 369 ret = afs_extract_data(call, skb, last, buffer,
370 call->count); 370 call->count);
371 kunmap_atomic(buffer, KM_USER0); 371 kunmap_atomic(buffer);
372 switch (ret) { 372 switch (ret) {
373 case 0: break; 373 case 0: break;
374 case -EAGAIN: return 0; 374 case -EAGAIN: return 0;
@@ -411,9 +411,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
411 if (call->count < PAGE_SIZE) { 411 if (call->count < PAGE_SIZE) {
412 _debug("clear"); 412 _debug("clear");
413 page = call->reply3; 413 page = call->reply3;
414 buffer = kmap_atomic(page, KM_USER0); 414 buffer = kmap_atomic(page);
415 memset(buffer + call->count, 0, PAGE_SIZE - call->count); 415 memset(buffer + call->count, 0, PAGE_SIZE - call->count);
416 kunmap_atomic(buffer, KM_USER0); 416 kunmap_atomic(buffer);
417 } 417 }
418 418
419 _leave(" = 0 [done]"); 419 _leave(" = 0 [done]");
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 8f4ce2658b7..298cf8919ec 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -200,9 +200,9 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
200 if (PageError(page)) 200 if (PageError(page))
201 goto error; 201 goto error;
202 202
203 buf = kmap_atomic(page, KM_USER0); 203 buf = kmap_atomic(page);
204 memcpy(devname, buf, size); 204 memcpy(devname, buf, size);
205 kunmap_atomic(buf, KM_USER0); 205 kunmap_atomic(buf);
206 page_cache_release(page); 206 page_cache_release(page);
207 page = NULL; 207 page = NULL;
208 } 208 }
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 983ec59fc80..f02b31e7e64 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -301,7 +301,6 @@ static int afs_fill_super(struct super_block *sb,
301{ 301{
302 struct afs_super_info *as = sb->s_fs_info; 302 struct afs_super_info *as = sb->s_fs_info;
303 struct afs_fid fid; 303 struct afs_fid fid;
304 struct dentry *root = NULL;
305 struct inode *inode = NULL; 304 struct inode *inode = NULL;
306 int ret; 305 int ret;
307 306
@@ -327,18 +326,16 @@ static int afs_fill_super(struct super_block *sb,
327 set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); 326 set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags);
328 327
329 ret = -ENOMEM; 328 ret = -ENOMEM;
330 root = d_alloc_root(inode); 329 sb->s_root = d_make_root(inode);
331 if (!root) 330 if (!sb->s_root)
332 goto error; 331 goto error;
333 332
334 sb->s_d_op = &afs_fs_dentry_operations; 333 sb->s_d_op = &afs_fs_dentry_operations;
335 sb->s_root = root;
336 334
337 _leave(" = 0"); 335 _leave(" = 0");
338 return 0; 336 return 0;
339 337
340error: 338error:
341 iput(inode);
342 _leave(" = %d", ret); 339 _leave(" = %d", ret);
343 return ret; 340 return ret;
344} 341}
diff --git a/fs/aio.c b/fs/aio.c
index b9d64d89a04..67a6db3e1b6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -13,7 +13,7 @@
13#include <linux/errno.h> 13#include <linux/errno.h>
14#include <linux/time.h> 14#include <linux/time.h>
15#include <linux/aio_abi.h> 15#include <linux/aio_abi.h>
16#include <linux/module.h> 16#include <linux/export.h>
17#include <linux/syscalls.h> 17#include <linux/syscalls.h>
18#include <linux/backing-dev.h> 18#include <linux/backing-dev.h>
19#include <linux/uio.h> 19#include <linux/uio.h>
@@ -93,9 +93,8 @@ static void aio_free_ring(struct kioctx *ctx)
93 put_page(info->ring_pages[i]); 93 put_page(info->ring_pages[i]);
94 94
95 if (info->mmap_size) { 95 if (info->mmap_size) {
96 down_write(&ctx->mm->mmap_sem); 96 BUG_ON(ctx->mm != current->mm);
97 do_munmap(ctx->mm, info->mmap_base, info->mmap_size); 97 vm_munmap(info->mmap_base, info->mmap_size);
98 up_write(&ctx->mm->mmap_sem);
99 } 98 }
100 99
101 if (info->ring_pages && info->ring_pages != info->internal_pages) 100 if (info->ring_pages && info->ring_pages != info->internal_pages)
@@ -160,7 +159,7 @@ static int aio_setup_ring(struct kioctx *ctx)
160 159
161 info->nr = nr_events; /* trusted copy */ 160 info->nr = nr_events; /* trusted copy */
162 161
163 ring = kmap_atomic(info->ring_pages[0], KM_USER0); 162 ring = kmap_atomic(info->ring_pages[0]);
164 ring->nr = nr_events; /* user copy */ 163 ring->nr = nr_events; /* user copy */
165 ring->id = ctx->user_id; 164 ring->id = ctx->user_id;
166 ring->head = ring->tail = 0; 165 ring->head = ring->tail = 0;
@@ -168,47 +167,38 @@ static int aio_setup_ring(struct kioctx *ctx)
168 ring->compat_features = AIO_RING_COMPAT_FEATURES; 167 ring->compat_features = AIO_RING_COMPAT_FEATURES;
169 ring->incompat_features = AIO_RING_INCOMPAT_FEATURES; 168 ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
170 ring->header_length = sizeof(struct aio_ring); 169 ring->header_length = sizeof(struct aio_ring);
171 kunmap_atomic(ring, KM_USER0); 170 kunmap_atomic(ring);
172 171
173 return 0; 172 return 0;
174} 173}
175 174
176 175
177/* aio_ring_event: returns a pointer to the event at the given index from 176/* aio_ring_event: returns a pointer to the event at the given index from
178 * kmap_atomic(, km). Release the pointer with put_aio_ring_event(); 177 * kmap_atomic(). Release the pointer with put_aio_ring_event();
179 */ 178 */
180#define AIO_EVENTS_PER_PAGE (PAGE_SIZE / sizeof(struct io_event)) 179#define AIO_EVENTS_PER_PAGE (PAGE_SIZE / sizeof(struct io_event))
181#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) 180#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
182#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) 181#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
183 182
184#define aio_ring_event(info, nr, km) ({ \ 183#define aio_ring_event(info, nr) ({ \
185 unsigned pos = (nr) + AIO_EVENTS_OFFSET; \ 184 unsigned pos = (nr) + AIO_EVENTS_OFFSET; \
186 struct io_event *__event; \ 185 struct io_event *__event; \
187 __event = kmap_atomic( \ 186 __event = kmap_atomic( \
188 (info)->ring_pages[pos / AIO_EVENTS_PER_PAGE], km); \ 187 (info)->ring_pages[pos / AIO_EVENTS_PER_PAGE]); \
189 __event += pos % AIO_EVENTS_PER_PAGE; \ 188 __event += pos % AIO_EVENTS_PER_PAGE; \
190 __event; \ 189 __event; \
191}) 190})
192 191
193#define put_aio_ring_event(event, km) do { \ 192#define put_aio_ring_event(event) do { \
194 struct io_event *__event = (event); \ 193 struct io_event *__event = (event); \
195 (void)__event; \ 194 (void)__event; \
196 kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \ 195 kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK)); \
197} while(0) 196} while(0)
198 197
199static void ctx_rcu_free(struct rcu_head *head) 198static void ctx_rcu_free(struct rcu_head *head)
200{ 199{
201 struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); 200 struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
202 unsigned nr_events = ctx->max_reqs;
203
204 kmem_cache_free(kioctx_cachep, ctx); 201 kmem_cache_free(kioctx_cachep, ctx);
205
206 if (nr_events) {
207 spin_lock(&aio_nr_lock);
208 BUG_ON(aio_nr - nr_events > aio_nr);
209 aio_nr -= nr_events;
210 spin_unlock(&aio_nr_lock);
211 }
212} 202}
213 203
214/* __put_ioctx 204/* __put_ioctx
@@ -217,13 +207,19 @@ static void ctx_rcu_free(struct rcu_head *head)
217 */ 207 */
218static void __put_ioctx(struct kioctx *ctx) 208static void __put_ioctx(struct kioctx *ctx)
219{ 209{
210 unsigned nr_events = ctx->max_reqs;
220 BUG_ON(ctx->reqs_active); 211 BUG_ON(ctx->reqs_active);
221 212
222 cancel_delayed_work(&ctx->wq); 213 cancel_delayed_work_sync(&ctx->wq);
223 cancel_work_sync(&ctx->wq.work);
224 aio_free_ring(ctx); 214 aio_free_ring(ctx);
225 mmdrop(ctx->mm); 215 mmdrop(ctx->mm);
226 ctx->mm = NULL; 216 ctx->mm = NULL;
217 if (nr_events) {
218 spin_lock(&aio_nr_lock);
219 BUG_ON(aio_nr - nr_events > aio_nr);
220 aio_nr -= nr_events;
221 spin_unlock(&aio_nr_lock);
222 }
227 pr_debug("__put_ioctx: freeing %p\n", ctx); 223 pr_debug("__put_ioctx: freeing %p\n", ctx);
228 call_rcu(&ctx->rcu_head, ctx_rcu_free); 224 call_rcu(&ctx->rcu_head, ctx_rcu_free);
229} 225}
@@ -247,7 +243,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
247{ 243{
248 struct mm_struct *mm; 244 struct mm_struct *mm;
249 struct kioctx *ctx; 245 struct kioctx *ctx;
250 int did_sync = 0; 246 int err = -ENOMEM;
251 247
252 /* Prevent overflows */ 248 /* Prevent overflows */
253 if ((nr_events > (0x10000000U / sizeof(struct io_event))) || 249 if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
@@ -256,7 +252,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
256 return ERR_PTR(-EINVAL); 252 return ERR_PTR(-EINVAL);
257 } 253 }
258 254
259 if ((unsigned long)nr_events > aio_max_nr) 255 if (!nr_events || (unsigned long)nr_events > aio_max_nr)
260 return ERR_PTR(-EAGAIN); 256 return ERR_PTR(-EAGAIN);
261 257
262 ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL); 258 ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
@@ -280,25 +276,14 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
280 goto out_freectx; 276 goto out_freectx;
281 277
282 /* limit the number of system wide aios */ 278 /* limit the number of system wide aios */
283 do { 279 spin_lock(&aio_nr_lock);
284 spin_lock_bh(&aio_nr_lock); 280 if (aio_nr + nr_events > aio_max_nr ||
285 if (aio_nr + nr_events > aio_max_nr || 281 aio_nr + nr_events < aio_nr) {
286 aio_nr + nr_events < aio_nr) 282 spin_unlock(&aio_nr_lock);
287 ctx->max_reqs = 0;
288 else
289 aio_nr += ctx->max_reqs;
290 spin_unlock_bh(&aio_nr_lock);
291 if (ctx->max_reqs || did_sync)
292 break;
293
294 /* wait for rcu callbacks to have completed before giving up */
295 synchronize_rcu();
296 did_sync = 1;
297 ctx->max_reqs = nr_events;
298 } while (1);
299
300 if (ctx->max_reqs == 0)
301 goto out_cleanup; 283 goto out_cleanup;
284 }
285 aio_nr += ctx->max_reqs;
286 spin_unlock(&aio_nr_lock);
302 287
303 /* now link into global list. */ 288 /* now link into global list. */
304 spin_lock(&mm->ioctx_lock); 289 spin_lock(&mm->ioctx_lock);
@@ -310,27 +295,27 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
310 return ctx; 295 return ctx;
311 296
312out_cleanup: 297out_cleanup:
313 __put_ioctx(ctx); 298 err = -EAGAIN;
314 return ERR_PTR(-EAGAIN); 299 aio_free_ring(ctx);
315
316out_freectx: 300out_freectx:
317 mmdrop(mm); 301 mmdrop(mm);
318 kmem_cache_free(kioctx_cachep, ctx); 302 kmem_cache_free(kioctx_cachep, ctx);
319 ctx = ERR_PTR(-ENOMEM); 303 dprintk("aio: error allocating ioctx %d\n", err);
320 304 return ERR_PTR(err);
321 dprintk("aio: error allocating ioctx %p\n", ctx);
322 return ctx;
323} 305}
324 306
325/* aio_cancel_all 307/* kill_ctx
326 * Cancels all outstanding aio requests on an aio context. Used 308 * Cancels all outstanding aio requests on an aio context. Used
327 * when the processes owning a context have all exited to encourage 309 * when the processes owning a context have all exited to encourage
328 * the rapid destruction of the kioctx. 310 * the rapid destruction of the kioctx.
329 */ 311 */
330static void aio_cancel_all(struct kioctx *ctx) 312static void kill_ctx(struct kioctx *ctx)
331{ 313{
332 int (*cancel)(struct kiocb *, struct io_event *); 314 int (*cancel)(struct kiocb *, struct io_event *);
315 struct task_struct *tsk = current;
316 DECLARE_WAITQUEUE(wait, tsk);
333 struct io_event res; 317 struct io_event res;
318
334 spin_lock_irq(&ctx->ctx_lock); 319 spin_lock_irq(&ctx->ctx_lock);
335 ctx->dead = 1; 320 ctx->dead = 1;
336 while (!list_empty(&ctx->active_reqs)) { 321 while (!list_empty(&ctx->active_reqs)) {
@@ -346,15 +331,7 @@ static void aio_cancel_all(struct kioctx *ctx)
346 spin_lock_irq(&ctx->ctx_lock); 331 spin_lock_irq(&ctx->ctx_lock);
347 } 332 }
348 } 333 }
349 spin_unlock_irq(&ctx->ctx_lock);
350}
351 334
352static void wait_for_all_aios(struct kioctx *ctx)
353{
354 struct task_struct *tsk = current;
355 DECLARE_WAITQUEUE(wait, tsk);
356
357 spin_lock_irq(&ctx->ctx_lock);
358 if (!ctx->reqs_active) 335 if (!ctx->reqs_active)
359 goto out; 336 goto out;
360 337
@@ -404,19 +381,24 @@ void exit_aio(struct mm_struct *mm)
404 ctx = hlist_entry(mm->ioctx_list.first, struct kioctx, list); 381 ctx = hlist_entry(mm->ioctx_list.first, struct kioctx, list);
405 hlist_del_rcu(&ctx->list); 382 hlist_del_rcu(&ctx->list);
406 383
407 aio_cancel_all(ctx); 384 kill_ctx(ctx);
408
409 wait_for_all_aios(ctx);
410 /*
411 * Ensure we don't leave the ctx on the aio_wq
412 */
413 cancel_work_sync(&ctx->wq.work);
414 385
415 if (1 != atomic_read(&ctx->users)) 386 if (1 != atomic_read(&ctx->users))
416 printk(KERN_DEBUG 387 printk(KERN_DEBUG
417 "exit_aio:ioctx still alive: %d %d %d\n", 388 "exit_aio:ioctx still alive: %d %d %d\n",
418 atomic_read(&ctx->users), ctx->dead, 389 atomic_read(&ctx->users), ctx->dead,
419 ctx->reqs_active); 390 ctx->reqs_active);
391 /*
392 * We don't need to bother with munmap() here -
393 * exit_mmap(mm) is coming and it'll unmap everything.
394 * Since aio_free_ring() uses non-zero ->mmap_size
395 * as indicator that it needs to unmap the area,
396 * just set it to 0; aio_free_ring() is the only
397 * place that uses ->mmap_size, so it's safe.
398 * That way we get all munmap done to current->mm -
399 * all other callers have ctx->mm == current->mm.
400 */
401 ctx->ring_info.mmap_size = 0;
420 put_ioctx(ctx); 402 put_ioctx(ctx);
421 } 403 }
422} 404}
@@ -920,7 +902,7 @@ static void aio_kick_handler(struct work_struct *work)
920 unuse_mm(mm); 902 unuse_mm(mm);
921 set_fs(oldfs); 903 set_fs(oldfs);
922 /* 904 /*
923 * we're in a worker thread already, don't use queue_delayed_work, 905 * we're in a worker thread already; no point using non-zero delay
924 */ 906 */
925 if (requeue) 907 if (requeue)
926 queue_delayed_work(aio_wq, &ctx->wq, 0); 908 queue_delayed_work(aio_wq, &ctx->wq, 0);
@@ -1019,10 +1001,10 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
1019 if (kiocbIsCancelled(iocb)) 1001 if (kiocbIsCancelled(iocb))
1020 goto put_rq; 1002 goto put_rq;
1021 1003
1022 ring = kmap_atomic(info->ring_pages[0], KM_IRQ1); 1004 ring = kmap_atomic(info->ring_pages[0]);
1023 1005
1024 tail = info->tail; 1006 tail = info->tail;
1025 event = aio_ring_event(info, tail, KM_IRQ0); 1007 event = aio_ring_event(info, tail);
1026 if (++tail >= info->nr) 1008 if (++tail >= info->nr)
1027 tail = 0; 1009 tail = 0;
1028 1010
@@ -1043,8 +1025,8 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
1043 info->tail = tail; 1025 info->tail = tail;
1044 ring->tail = tail; 1026 ring->tail = tail;
1045 1027
1046 put_aio_ring_event(event, KM_IRQ0); 1028 put_aio_ring_event(event);
1047 kunmap_atomic(ring, KM_IRQ1); 1029 kunmap_atomic(ring);
1048 1030
1049 pr_debug("added to ring %p at [%lu]\n", iocb, tail); 1031 pr_debug("added to ring %p at [%lu]\n", iocb, tail);
1050 1032
@@ -1089,7 +1071,7 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
1089 unsigned long head; 1071 unsigned long head;
1090 int ret = 0; 1072 int ret = 0;
1091 1073
1092 ring = kmap_atomic(info->ring_pages[0], KM_USER0); 1074 ring = kmap_atomic(info->ring_pages[0]);
1093 dprintk("in aio_read_evt h%lu t%lu m%lu\n", 1075 dprintk("in aio_read_evt h%lu t%lu m%lu\n",
1094 (unsigned long)ring->head, (unsigned long)ring->tail, 1076 (unsigned long)ring->head, (unsigned long)ring->tail,
1095 (unsigned long)ring->nr); 1077 (unsigned long)ring->nr);
@@ -1101,18 +1083,18 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
1101 1083
1102 head = ring->head % info->nr; 1084 head = ring->head % info->nr;
1103 if (head != ring->tail) { 1085 if (head != ring->tail) {
1104 struct io_event *evp = aio_ring_event(info, head, KM_USER1); 1086 struct io_event *evp = aio_ring_event(info, head);
1105 *ent = *evp; 1087 *ent = *evp;
1106 head = (head + 1) % info->nr; 1088 head = (head + 1) % info->nr;
1107 smp_mb(); /* finish reading the event before updatng the head */ 1089 smp_mb(); /* finish reading the event before updatng the head */
1108 ring->head = head; 1090 ring->head = head;
1109 ret = 1; 1091 ret = 1;
1110 put_aio_ring_event(evp, KM_USER1); 1092 put_aio_ring_event(evp);
1111 } 1093 }
1112 spin_unlock(&info->ring_lock); 1094 spin_unlock(&info->ring_lock);
1113 1095
1114out: 1096out:
1115 kunmap_atomic(ring, KM_USER0); 1097 kunmap_atomic(ring);
1116 dprintk("leaving aio_read_evt: %d h%lu t%lu\n", ret, 1098 dprintk("leaving aio_read_evt: %d h%lu t%lu\n", ret,
1117 (unsigned long)ring->head, (unsigned long)ring->tail); 1099 (unsigned long)ring->head, (unsigned long)ring->tail);
1118 return ret; 1100 return ret;
@@ -1290,8 +1272,7 @@ static void io_destroy(struct kioctx *ioctx)
1290 if (likely(!was_dead)) 1272 if (likely(!was_dead))
1291 put_ioctx(ioctx); /* twice for the list */ 1273 put_ioctx(ioctx); /* twice for the list */
1292 1274
1293 aio_cancel_all(ioctx); 1275 kill_ctx(ioctx);
1294 wait_for_all_aios(ioctx);
1295 1276
1296 /* 1277 /*
1297 * Wake up any waiters. The setting of ctx->dead must be seen 1278 * Wake up any waiters. The setting of ctx->dead must be seen
@@ -1299,7 +1280,6 @@ static void io_destroy(struct kioctx *ioctx)
1299 * locking done by the above calls to ensure this consistency. 1280 * locking done by the above calls to ensure this consistency.
1300 */ 1281 */
1301 wake_up_all(&ioctx->wait); 1282 wake_up_all(&ioctx->wait);
1302 put_ioctx(ioctx); /* once for the lookup */
1303} 1283}
1304 1284
1305/* sys_io_setup: 1285/* sys_io_setup:
@@ -1336,11 +1316,9 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
1336 ret = PTR_ERR(ioctx); 1316 ret = PTR_ERR(ioctx);
1337 if (!IS_ERR(ioctx)) { 1317 if (!IS_ERR(ioctx)) {
1338 ret = put_user(ioctx->user_id, ctxp); 1318 ret = put_user(ioctx->user_id, ctxp);
1339 if (!ret) { 1319 if (ret)
1340 put_ioctx(ioctx); 1320 io_destroy(ioctx);
1341 return 0; 1321 put_ioctx(ioctx);
1342 }
1343 io_destroy(ioctx);
1344 } 1322 }
1345 1323
1346out: 1324out:
@@ -1358,6 +1336,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1358 struct kioctx *ioctx = lookup_ioctx(ctx); 1336 struct kioctx *ioctx = lookup_ioctx(ctx);
1359 if (likely(NULL != ioctx)) { 1337 if (likely(NULL != ioctx)) {
1360 io_destroy(ioctx); 1338 io_destroy(ioctx);
1339 put_ioctx(ioctx);
1361 return 0; 1340 return 0;
1362 } 1341 }
1363 pr_debug("EINVAL: io_destroy: invalid context id\n"); 1342 pr_debug("EINVAL: io_destroy: invalid context id\n");
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index f11e43ed907..28d39fb84ae 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -39,19 +39,6 @@ static const struct dentry_operations anon_inodefs_dentry_operations = {
39 .d_dname = anon_inodefs_dname, 39 .d_dname = anon_inodefs_dname,
40}; 40};
41 41
42static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
43 int flags, const char *dev_name, void *data)
44{
45 return mount_pseudo(fs_type, "anon_inode:", NULL,
46 &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
47}
48
49static struct file_system_type anon_inode_fs_type = {
50 .name = "anon_inodefs",
51 .mount = anon_inodefs_mount,
52 .kill_sb = kill_anon_super,
53};
54
55/* 42/*
56 * nop .set_page_dirty method so that people can use .page_mkwrite on 43 * nop .set_page_dirty method so that people can use .page_mkwrite on
57 * anon inodes. 44 * anon inodes.
@@ -65,6 +52,62 @@ static const struct address_space_operations anon_aops = {
65 .set_page_dirty = anon_set_page_dirty, 52 .set_page_dirty = anon_set_page_dirty,
66}; 53};
67 54
55/*
56 * A single inode exists for all anon_inode files. Contrary to pipes,
57 * anon_inode inodes have no associated per-instance data, so we need
58 * only allocate one of them.
59 */
60static struct inode *anon_inode_mkinode(struct super_block *s)
61{
62 struct inode *inode = new_inode_pseudo(s);
63
64 if (!inode)
65 return ERR_PTR(-ENOMEM);
66
67 inode->i_ino = get_next_ino();
68 inode->i_fop = &anon_inode_fops;
69
70 inode->i_mapping->a_ops = &anon_aops;
71
72 /*
73 * Mark the inode dirty from the very beginning,
74 * that way it will never be moved to the dirty
75 * list because mark_inode_dirty() will think
76 * that it already _is_ on the dirty list.
77 */
78 inode->i_state = I_DIRTY;
79 inode->i_mode = S_IRUSR | S_IWUSR;
80 inode->i_uid = current_fsuid();
81 inode->i_gid = current_fsgid();
82 inode->i_flags |= S_PRIVATE;
83 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
84 return inode;
85}
86
87static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
88 int flags, const char *dev_name, void *data)
89{
90 struct dentry *root;
91 root = mount_pseudo(fs_type, "anon_inode:", NULL,
92 &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
93 if (!IS_ERR(root)) {
94 struct super_block *s = root->d_sb;
95 anon_inode_inode = anon_inode_mkinode(s);
96 if (IS_ERR(anon_inode_inode)) {
97 dput(root);
98 deactivate_locked_super(s);
99 root = ERR_CAST(anon_inode_inode);
100 }
101 }
102 return root;
103}
104
105static struct file_system_type anon_inode_fs_type = {
106 .name = "anon_inodefs",
107 .mount = anon_inodefs_mount,
108 .kill_sb = kill_anon_super,
109};
110
68/** 111/**
69 * anon_inode_getfile - creates a new file instance by hooking it up to an 112 * anon_inode_getfile - creates a new file instance by hooking it up to an
70 * anonymous inode, and a dentry that describe the "class" 113 * anonymous inode, and a dentry that describe the "class"
@@ -180,38 +223,6 @@ err_put_unused_fd:
180} 223}
181EXPORT_SYMBOL_GPL(anon_inode_getfd); 224EXPORT_SYMBOL_GPL(anon_inode_getfd);
182 225
183/*
184 * A single inode exists for all anon_inode files. Contrary to pipes,
185 * anon_inode inodes have no associated per-instance data, so we need
186 * only allocate one of them.
187 */
188static struct inode *anon_inode_mkinode(void)
189{
190 struct inode *inode = new_inode_pseudo(anon_inode_mnt->mnt_sb);
191
192 if (!inode)
193 return ERR_PTR(-ENOMEM);
194
195 inode->i_ino = get_next_ino();
196 inode->i_fop = &anon_inode_fops;
197
198 inode->i_mapping->a_ops = &anon_aops;
199
200 /*
201 * Mark the inode dirty from the very beginning,
202 * that way it will never be moved to the dirty
203 * list because mark_inode_dirty() will think
204 * that it already _is_ on the dirty list.
205 */
206 inode->i_state = I_DIRTY;
207 inode->i_mode = S_IRUSR | S_IWUSR;
208 inode->i_uid = current_fsuid();
209 inode->i_gid = current_fsgid();
210 inode->i_flags |= S_PRIVATE;
211 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
212 return inode;
213}
214
215static int __init anon_inode_init(void) 226static int __init anon_inode_init(void)
216{ 227{
217 int error; 228 int error;
@@ -224,16 +235,8 @@ static int __init anon_inode_init(void)
224 error = PTR_ERR(anon_inode_mnt); 235 error = PTR_ERR(anon_inode_mnt);
225 goto err_unregister_filesystem; 236 goto err_unregister_filesystem;
226 } 237 }
227 anon_inode_inode = anon_inode_mkinode();
228 if (IS_ERR(anon_inode_inode)) {
229 error = PTR_ERR(anon_inode_inode);
230 goto err_mntput;
231 }
232
233 return 0; 238 return 0;
234 239
235err_mntput:
236 kern_unmount(anon_inode_mnt);
237err_unregister_filesystem: 240err_unregister_filesystem:
238 unregister_filesystem(&anon_inode_fs_type); 241 unregister_filesystem(&anon_inode_fs_type);
239err_exit: 242err_exit:
diff --git a/fs/attr.c b/fs/attr.c
index 95053ad8abc..73f69a6ce9e 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -5,7 +5,7 @@
5 * changes by Thomas Schoebel-Theuer 5 * changes by Thomas Schoebel-Theuer
6 */ 6 */
7 7
8#include <linux/module.h> 8#include <linux/export.h>
9#include <linux/time.h> 9#include <linux/time.h>
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/string.h> 11#include <linux/string.h>
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index eb1cc92cd67..908e1845541 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -110,7 +110,6 @@ struct autofs_sb_info {
110 int sub_version; 110 int sub_version;
111 int min_proto; 111 int min_proto;
112 int max_proto; 112 int max_proto;
113 int compat_daemon;
114 unsigned long exp_timeout; 113 unsigned long exp_timeout;
115 unsigned int type; 114 unsigned int type;
116 int reghost_enabled; 115 int reghost_enabled;
@@ -270,6 +269,17 @@ int autofs4_fill_super(struct super_block *, void *, int);
270struct autofs_info *autofs4_new_ino(struct autofs_sb_info *); 269struct autofs_info *autofs4_new_ino(struct autofs_sb_info *);
271void autofs4_clean_ino(struct autofs_info *); 270void autofs4_clean_ino(struct autofs_info *);
272 271
272static inline int autofs_prepare_pipe(struct file *pipe)
273{
274 if (!pipe->f_op || !pipe->f_op->write)
275 return -EINVAL;
276 if (!S_ISFIFO(pipe->f_dentry->d_inode->i_mode))
277 return -EINVAL;
278 /* We want a packet pipe */
279 pipe->f_flags |= O_DIRECT;
280 return 0;
281}
282
273/* Queue management functions */ 283/* Queue management functions */
274 284
275int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify); 285int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify);
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 85f1fcdb30e..aa9103f8f01 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -230,7 +230,7 @@ static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
230 fdt = files_fdtable(files); 230 fdt = files_fdtable(files);
231 BUG_ON(fdt->fd[fd] != NULL); 231 BUG_ON(fdt->fd[fd] != NULL);
232 rcu_assign_pointer(fdt->fd[fd], file); 232 rcu_assign_pointer(fdt->fd[fd], file);
233 FD_SET(fd, fdt->close_on_exec); 233 __set_close_on_exec(fd, fdt);
234 spin_unlock(&files->file_lock); 234 spin_unlock(&files->file_lock);
235} 235}
236 236
@@ -376,7 +376,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
376 err = -EBADF; 376 err = -EBADF;
377 goto out; 377 goto out;
378 } 378 }
379 if (!pipe->f_op || !pipe->f_op->write) { 379 if (autofs_prepare_pipe(pipe) < 0) {
380 err = -EPIPE; 380 err = -EPIPE;
381 fput(pipe); 381 fput(pipe);
382 goto out; 382 goto out;
@@ -385,7 +385,6 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
385 sbi->pipefd = pipefd; 385 sbi->pipefd = pipefd;
386 sbi->pipe = pipe; 386 sbi->pipe = pipe;
387 sbi->catatonic = 0; 387 sbi->catatonic = 0;
388 sbi->compat_daemon = is_compat_task();
389 } 388 }
390out: 389out:
391 mutex_unlock(&sbi->wq_mutex); 390 mutex_unlock(&sbi->wq_mutex);
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index c038727b405..cddc74b9cdb 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -31,11 +31,11 @@ static int __init init_autofs4_fs(void)
31{ 31{
32 int err; 32 int err;
33 33
34 autofs_dev_ioctl_init();
35
34 err = register_filesystem(&autofs_fs_type); 36 err = register_filesystem(&autofs_fs_type);
35 if (err) 37 if (err)
36 return err; 38 autofs_dev_ioctl_exit();
37
38 autofs_dev_ioctl_init();
39 39
40 return err; 40 return err;
41} 41}
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 06858d95512..6e488ebe778 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -19,7 +19,6 @@
19#include <linux/parser.h> 19#include <linux/parser.h>
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21#include <linux/magic.h> 21#include <linux/magic.h>
22#include <linux/compat.h>
23#include "autofs_i.h" 22#include "autofs_i.h"
24#include <linux/module.h> 23#include <linux/module.h>
25 24
@@ -225,7 +224,6 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
225 set_autofs_type_indirect(&sbi->type); 224 set_autofs_type_indirect(&sbi->type);
226 sbi->min_proto = 0; 225 sbi->min_proto = 0;
227 sbi->max_proto = 0; 226 sbi->max_proto = 0;
228 sbi->compat_daemon = is_compat_task();
229 mutex_init(&sbi->wq_mutex); 227 mutex_init(&sbi->wq_mutex);
230 mutex_init(&sbi->pipe_mutex); 228 mutex_init(&sbi->pipe_mutex);
231 spin_lock_init(&sbi->fs_lock); 229 spin_lock_init(&sbi->fs_lock);
@@ -247,12 +245,9 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
247 if (!ino) 245 if (!ino)
248 goto fail_free; 246 goto fail_free;
249 root_inode = autofs4_get_inode(s, S_IFDIR | 0755); 247 root_inode = autofs4_get_inode(s, S_IFDIR | 0755);
250 if (!root_inode) 248 root = d_make_root(root_inode);
251 goto fail_ino;
252
253 root = d_alloc_root(root_inode);
254 if (!root) 249 if (!root)
255 goto fail_iput; 250 goto fail_ino;
256 pipe = NULL; 251 pipe = NULL;
257 252
258 root->d_fsdata = ino; 253 root->d_fsdata = ino;
@@ -295,7 +290,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
295 printk("autofs: could not open pipe file descriptor\n"); 290 printk("autofs: could not open pipe file descriptor\n");
296 goto fail_dput; 291 goto fail_dput;
297 } 292 }
298 if (!pipe->f_op || !pipe->f_op->write) 293 if (autofs_prepare_pipe(pipe) < 0)
299 goto fail_fput; 294 goto fail_fput;
300 sbi->pipe = pipe; 295 sbi->pipe = pipe;
301 sbi->pipefd = pipefd; 296 sbi->pipefd = pipefd;
@@ -317,9 +312,6 @@ fail_fput:
317fail_dput: 312fail_dput:
318 dput(root); 313 dput(root);
319 goto fail_free; 314 goto fail_free;
320fail_iput:
321 printk("autofs: get root dentry failed\n");
322 iput(root_inode);
323fail_ino: 315fail_ino:
324 kfree(ino); 316 kfree(ino);
325fail_free: 317fail_free:
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 9c098db4334..da8876d38a7 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -91,24 +91,7 @@ static int autofs4_write(struct autofs_sb_info *sbi,
91 91
92 return (bytes > 0); 92 return (bytes > 0);
93} 93}
94 94
95/*
96 * The autofs_v5 packet was misdesigned.
97 *
98 * The packets are identical on x86-32 and x86-64, but have different
99 * alignment. Which means that 'sizeof()' will give different results.
100 * Fix it up for the case of running 32-bit user mode on a 64-bit kernel.
101 */
102static noinline size_t autofs_v5_packet_size(struct autofs_sb_info *sbi)
103{
104 size_t pktsz = sizeof(struct autofs_v5_packet);
105#if defined(CONFIG_X86_64) && defined(CONFIG_COMPAT)
106 if (sbi->compat_daemon > 0)
107 pktsz -= 4;
108#endif
109 return pktsz;
110}
111
112static void autofs4_notify_daemon(struct autofs_sb_info *sbi, 95static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
113 struct autofs_wait_queue *wq, 96 struct autofs_wait_queue *wq,
114 int type) 97 int type)
@@ -172,7 +155,8 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
172 { 155 {
173 struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet; 156 struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet;
174 157
175 pktsz = autofs_v5_packet_size(sbi); 158 pktsz = sizeof(*packet);
159
176 packet->wait_queue_token = wq->wait_queue_token; 160 packet->wait_queue_token = wq->wait_queue_token;
177 packet->len = wq->name.len; 161 packet->len = wq->name.len;
178 memcpy(packet->name, wq->name.name, wq->name.len); 162 memcpy(packet->name, wq->name.name, wq->name.len);
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 22e9a78872f..37268c5bb98 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -9,7 +9,7 @@
9 */ 9 */
10 10
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/module.h> 12#include <linux/export.h>
13#include <linux/stat.h> 13#include <linux/stat.h>
14#include <linux/time.h> 14#include <linux/time.h>
15#include <linux/namei.h> 15#include <linux/namei.h>
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 6e6d536767f..e18da23d42b 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -852,9 +852,8 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
852 ret = PTR_ERR(root); 852 ret = PTR_ERR(root);
853 goto unacquire_priv_sbp; 853 goto unacquire_priv_sbp;
854 } 854 }
855 sb->s_root = d_alloc_root(root); 855 sb->s_root = d_make_root(root);
856 if (!sb->s_root) { 856 if (!sb->s_root) {
857 iput(root);
858 befs_error(sb, "get root inode failed"); 857 befs_error(sb, "get root inode failed");
859 goto unacquire_priv_sbp; 858 goto unacquire_priv_sbp;
860 } 859 }
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index b0391bc402b..e23dc7c8b88 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -367,9 +367,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
367 ret = PTR_ERR(inode); 367 ret = PTR_ERR(inode);
368 goto out2; 368 goto out2;
369 } 369 }
370 s->s_root = d_alloc_root(inode); 370 s->s_root = d_make_root(inode);
371 if (!s->s_root) { 371 if (!s->s_root) {
372 iput(inode);
373 ret = -ENOMEM; 372 ret = -ENOMEM;
374 goto out2; 373 goto out2;
375 } 374 }
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 1ff94054d35..d146e181d10 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -26,7 +26,6 @@
26#include <linux/coredump.h> 26#include <linux/coredump.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28 28
29#include <asm/system.h>
30#include <asm/uaccess.h> 29#include <asm/uaccess.h>
31#include <asm/cacheflush.h> 30#include <asm/cacheflush.h>
32#include <asm/a.out-core.h> 31#include <asm/a.out-core.h>
@@ -51,9 +50,7 @@ static int set_brk(unsigned long start, unsigned long end)
51 end = PAGE_ALIGN(end); 50 end = PAGE_ALIGN(end);
52 if (end > start) { 51 if (end > start) {
53 unsigned long addr; 52 unsigned long addr;
54 down_write(&current->mm->mmap_sem); 53 addr = vm_brk(start, end - start);
55 addr = do_brk(start, end - start);
56 up_write(&current->mm->mmap_sem);
57 if (BAD_ADDR(addr)) 54 if (BAD_ADDR(addr))
58 return addr; 55 return addr;
59 } 56 }
@@ -267,7 +264,6 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
267 } 264 }
268 265
269 install_exec_creds(bprm); 266 install_exec_creds(bprm);
270 current->flags &= ~PF_FORKNOEXEC;
271 267
272 if (N_MAGIC(ex) == OMAGIC) { 268 if (N_MAGIC(ex) == OMAGIC) {
273 unsigned long text_addr, map_size; 269 unsigned long text_addr, map_size;
@@ -282,9 +278,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
282 pos = 32; 278 pos = 32;
283 map_size = ex.a_text+ex.a_data; 279 map_size = ex.a_text+ex.a_data;
284#endif 280#endif
285 down_write(&current->mm->mmap_sem); 281 error = vm_brk(text_addr & PAGE_MASK, map_size);
286 error = do_brk(text_addr & PAGE_MASK, map_size);
287 up_write(&current->mm->mmap_sem);
288 if (error != (text_addr & PAGE_MASK)) { 282 if (error != (text_addr & PAGE_MASK)) {
289 send_sig(SIGKILL, current, 0); 283 send_sig(SIGKILL, current, 0);
290 return error; 284 return error;
@@ -315,9 +309,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
315 309
316 if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { 310 if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
317 loff_t pos = fd_offset; 311 loff_t pos = fd_offset;
318 down_write(&current->mm->mmap_sem); 312 vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
319 do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
320 up_write(&current->mm->mmap_sem);
321 bprm->file->f_op->read(bprm->file, 313 bprm->file->f_op->read(bprm->file,
322 (char __user *)N_TXTADDR(ex), 314 (char __user *)N_TXTADDR(ex),
323 ex.a_text+ex.a_data, &pos); 315 ex.a_text+ex.a_data, &pos);
@@ -327,24 +319,20 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
327 goto beyond_if; 319 goto beyond_if;
328 } 320 }
329 321
330 down_write(&current->mm->mmap_sem); 322 error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
331 error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
332 PROT_READ | PROT_EXEC, 323 PROT_READ | PROT_EXEC,
333 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, 324 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
334 fd_offset); 325 fd_offset);
335 up_write(&current->mm->mmap_sem);
336 326
337 if (error != N_TXTADDR(ex)) { 327 if (error != N_TXTADDR(ex)) {
338 send_sig(SIGKILL, current, 0); 328 send_sig(SIGKILL, current, 0);
339 return error; 329 return error;
340 } 330 }
341 331
342 down_write(&current->mm->mmap_sem); 332 error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
343 error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
344 PROT_READ | PROT_WRITE | PROT_EXEC, 333 PROT_READ | PROT_WRITE | PROT_EXEC,
345 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, 334 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
346 fd_offset + ex.a_text); 335 fd_offset + ex.a_text);
347 up_write(&current->mm->mmap_sem);
348 if (error != N_DATADDR(ex)) { 336 if (error != N_DATADDR(ex)) {
349 send_sig(SIGKILL, current, 0); 337 send_sig(SIGKILL, current, 0);
350 return error; 338 return error;
@@ -414,9 +402,7 @@ static int load_aout_library(struct file *file)
414 "N_TXTOFF is not page aligned. Please convert library: %s\n", 402 "N_TXTOFF is not page aligned. Please convert library: %s\n",
415 file->f_path.dentry->d_name.name); 403 file->f_path.dentry->d_name.name);
416 } 404 }
417 down_write(&current->mm->mmap_sem); 405 vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
418 do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
419 up_write(&current->mm->mmap_sem);
420 406
421 file->f_op->read(file, (char __user *)start_addr, 407 file->f_op->read(file, (char __user *)start_addr,
422 ex.a_text + ex.a_data, &pos); 408 ex.a_text + ex.a_data, &pos);
@@ -427,12 +413,10 @@ static int load_aout_library(struct file *file)
427 goto out; 413 goto out;
428 } 414 }
429 /* Now use mmap to map the library into memory. */ 415 /* Now use mmap to map the library into memory. */
430 down_write(&current->mm->mmap_sem); 416 error = vm_mmap(file, start_addr, ex.a_text + ex.a_data,
431 error = do_mmap(file, start_addr, ex.a_text + ex.a_data,
432 PROT_READ | PROT_WRITE | PROT_EXEC, 417 PROT_READ | PROT_WRITE | PROT_EXEC,
433 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, 418 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
434 N_TXTOFF(ex)); 419 N_TXTOFF(ex));
435 up_write(&current->mm->mmap_sem);
436 retval = error; 420 retval = error;
437 if (error != start_addr) 421 if (error != start_addr)
438 goto out; 422 goto out;
@@ -440,9 +424,7 @@ static int load_aout_library(struct file *file)
440 len = PAGE_ALIGN(ex.a_text + ex.a_data); 424 len = PAGE_ALIGN(ex.a_text + ex.a_data);
441 bss = ex.a_text + ex.a_data + ex.a_bss; 425 bss = ex.a_text + ex.a_data + ex.a_bss;
442 if (bss > len) { 426 if (bss > len) {
443 down_write(&current->mm->mmap_sem); 427 error = vm_brk(start_addr + len, bss - len);
444 error = do_brk(start_addr + len, bss - len);
445 up_write(&current->mm->mmap_sem);
446 retval = error; 428 retval = error;
447 if (error != start_addr + len) 429 if (error != start_addr + len)
448 goto out; 430 goto out;
@@ -454,7 +436,8 @@ out:
454 436
455static int __init init_aout_binfmt(void) 437static int __init init_aout_binfmt(void)
456{ 438{
457 return register_binfmt(&aout_format); 439 register_binfmt(&aout_format);
440 return 0;
458} 441}
459 442
460static void __exit exit_aout_binfmt(void) 443static void __exit exit_aout_binfmt(void)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 07d096c4992..16f73541707 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -35,6 +35,7 @@
35#include <asm/uaccess.h> 35#include <asm/uaccess.h>
36#include <asm/param.h> 36#include <asm/param.h>
37#include <asm/page.h> 37#include <asm/page.h>
38#include <asm/exec.h>
38 39
39static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); 40static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
40static int load_elf_library(struct file *); 41static int load_elf_library(struct file *);
@@ -81,9 +82,7 @@ static int set_brk(unsigned long start, unsigned long end)
81 end = ELF_PAGEALIGN(end); 82 end = ELF_PAGEALIGN(end);
82 if (end > start) { 83 if (end > start) {
83 unsigned long addr; 84 unsigned long addr;
84 down_write(&current->mm->mmap_sem); 85 addr = vm_brk(start, end - start);
85 addr = do_brk(start, end - start);
86 up_write(&current->mm->mmap_sem);
87 if (BAD_ADDR(addr)) 86 if (BAD_ADDR(addr))
88 return addr; 87 return addr;
89 } 88 }
@@ -513,9 +512,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
513 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); 512 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
514 513
515 /* Map the last of the bss segment */ 514 /* Map the last of the bss segment */
516 down_write(&current->mm->mmap_sem); 515 error = vm_brk(elf_bss, last_bss - elf_bss);
517 error = do_brk(elf_bss, last_bss - elf_bss);
518 up_write(&current->mm->mmap_sem);
519 if (BAD_ADDR(error)) 516 if (BAD_ADDR(error))
520 goto out_close; 517 goto out_close;
521 } 518 }
@@ -712,7 +709,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
712 goto out_free_dentry; 709 goto out_free_dentry;
713 710
714 /* OK, This is the point of no return */ 711 /* OK, This is the point of no return */
715 current->flags &= ~PF_FORKNOEXEC;
716 current->mm->def_flags = def_flags; 712 current->mm->def_flags = def_flags;
717 713
718 /* Do this immediately, since STACK_TOP as used in setup_arg_pages 714 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
@@ -934,7 +930,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
934#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ 930#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
935 931
936 install_exec_creds(bprm); 932 install_exec_creds(bprm);
937 current->flags &= ~PF_FORKNOEXEC;
938 retval = create_elf_tables(bprm, &loc->elf_ex, 933 retval = create_elf_tables(bprm, &loc->elf_ex,
939 load_addr, interp_load_addr); 934 load_addr, interp_load_addr);
940 if (retval < 0) { 935 if (retval < 0) {
@@ -963,10 +958,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
963 and some applications "depend" upon this behavior. 958 and some applications "depend" upon this behavior.
964 Since we do not have the power to recompile these, we 959 Since we do not have the power to recompile these, we
965 emulate the SVr4 behavior. Sigh. */ 960 emulate the SVr4 behavior. Sigh. */
966 down_write(&current->mm->mmap_sem); 961 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
967 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
968 MAP_FIXED | MAP_PRIVATE, 0); 962 MAP_FIXED | MAP_PRIVATE, 0);
969 up_write(&current->mm->mmap_sem);
970 } 963 }
971 964
972#ifdef ELF_PLAT_INIT 965#ifdef ELF_PLAT_INIT
@@ -1051,8 +1044,7 @@ static int load_elf_library(struct file *file)
1051 eppnt++; 1044 eppnt++;
1052 1045
1053 /* Now use mmap to map the library into memory. */ 1046 /* Now use mmap to map the library into memory. */
1054 down_write(&current->mm->mmap_sem); 1047 error = vm_mmap(file,
1055 error = do_mmap(file,
1056 ELF_PAGESTART(eppnt->p_vaddr), 1048 ELF_PAGESTART(eppnt->p_vaddr),
1057 (eppnt->p_filesz + 1049 (eppnt->p_filesz +
1058 ELF_PAGEOFFSET(eppnt->p_vaddr)), 1050 ELF_PAGEOFFSET(eppnt->p_vaddr)),
@@ -1060,7 +1052,6 @@ static int load_elf_library(struct file *file)
1060 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, 1052 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1061 (eppnt->p_offset - 1053 (eppnt->p_offset -
1062 ELF_PAGEOFFSET(eppnt->p_vaddr))); 1054 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1063 up_write(&current->mm->mmap_sem);
1064 if (error != ELF_PAGESTART(eppnt->p_vaddr)) 1055 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1065 goto out_free_ph; 1056 goto out_free_ph;
1066 1057
@@ -1073,11 +1064,8 @@ static int load_elf_library(struct file *file)
1073 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + 1064 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1074 ELF_MIN_ALIGN - 1); 1065 ELF_MIN_ALIGN - 1);
1075 bss = eppnt->p_memsz + eppnt->p_vaddr; 1066 bss = eppnt->p_memsz + eppnt->p_vaddr;
1076 if (bss > len) { 1067 if (bss > len)
1077 down_write(&current->mm->mmap_sem); 1068 vm_brk(len, bss - len);
1078 do_brk(len, bss - len);
1079 up_write(&current->mm->mmap_sem);
1080 }
1081 error = 0; 1069 error = 0;
1082 1070
1083out_free_ph: 1071out_free_ph:
@@ -1095,6 +1083,29 @@ out:
1095 */ 1083 */
1096 1084
1097/* 1085/*
1086 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1087 * that are useful for post-mortem analysis are included in every core dump.
1088 * In that way we ensure that the core dump is fully interpretable later
1089 * without matching up the same kernel and hardware config to see what PC values
1090 * meant. These special mappings include - vDSO, vsyscall, and other
1091 * architecture specific mappings
1092 */
1093static bool always_dump_vma(struct vm_area_struct *vma)
1094{
1095 /* Any vsyscall mappings? */
1096 if (vma == get_gate_vma(vma->vm_mm))
1097 return true;
1098 /*
1099 * arch_vma_name() returns non-NULL for special architecture mappings,
1100 * such as vDSO sections.
1101 */
1102 if (arch_vma_name(vma))
1103 return true;
1104
1105 return false;
1106}
1107
1108/*
1098 * Decide what to dump of a segment, part, all or none. 1109 * Decide what to dump of a segment, part, all or none.
1099 */ 1110 */
1100static unsigned long vma_dump_size(struct vm_area_struct *vma, 1111static unsigned long vma_dump_size(struct vm_area_struct *vma,
@@ -1102,10 +1113,13 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
1102{ 1113{
1103#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type)) 1114#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1104 1115
1105 /* The vma can be set up to tell us the answer directly. */ 1116 /* always dump the vdso and vsyscall sections */
1106 if (vma->vm_flags & VM_ALWAYSDUMP) 1117 if (always_dump_vma(vma))
1107 goto whole; 1118 goto whole;
1108 1119
1120 if (vma->vm_flags & VM_NODUMP)
1121 return 0;
1122
1109 /* Hugetlb memory check */ 1123 /* Hugetlb memory check */
1110 if (vma->vm_flags & VM_HUGETLB) { 1124 if (vma->vm_flags & VM_HUGETLB) {
1111 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED)) 1125 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
@@ -1390,6 +1404,22 @@ static void do_thread_regset_writeback(struct task_struct *task,
1390 regset->writeback(task, regset, 1); 1404 regset->writeback(task, regset, 1);
1391} 1405}
1392 1406
1407#ifndef PR_REG_SIZE
1408#define PR_REG_SIZE(S) sizeof(S)
1409#endif
1410
1411#ifndef PRSTATUS_SIZE
1412#define PRSTATUS_SIZE(S) sizeof(S)
1413#endif
1414
1415#ifndef PR_REG_PTR
1416#define PR_REG_PTR(S) (&((S)->pr_reg))
1417#endif
1418
1419#ifndef SET_PR_FPVALID
1420#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1421#endif
1422
1393static int fill_thread_core_info(struct elf_thread_core_info *t, 1423static int fill_thread_core_info(struct elf_thread_core_info *t,
1394 const struct user_regset_view *view, 1424 const struct user_regset_view *view,
1395 long signr, size_t *total) 1425 long signr, size_t *total)
@@ -1404,11 +1434,11 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
1404 */ 1434 */
1405 fill_prstatus(&t->prstatus, t->task, signr); 1435 fill_prstatus(&t->prstatus, t->task, signr);
1406 (void) view->regsets[0].get(t->task, &view->regsets[0], 1436 (void) view->regsets[0].get(t->task, &view->regsets[0],
1407 0, sizeof(t->prstatus.pr_reg), 1437 0, PR_REG_SIZE(t->prstatus.pr_reg),
1408 &t->prstatus.pr_reg, NULL); 1438 PR_REG_PTR(&t->prstatus), NULL);
1409 1439
1410 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, 1440 fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1411 sizeof(t->prstatus), &t->prstatus); 1441 PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1412 *total += notesize(&t->notes[0]); 1442 *total += notesize(&t->notes[0]);
1413 1443
1414 do_thread_regset_writeback(t->task, &view->regsets[0]); 1444 do_thread_regset_writeback(t->task, &view->regsets[0]);
@@ -1438,7 +1468,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
1438 regset->core_note_type, 1468 regset->core_note_type,
1439 size, data); 1469 size, data);
1440 else { 1470 else {
1441 t->prstatus.pr_fpvalid = 1; 1471 SET_PR_FPVALID(&t->prstatus, 1);
1442 fill_note(&t->notes[i], "CORE", 1472 fill_note(&t->notes[i], "CORE",
1443 NT_PRFPREG, size, data); 1473 NT_PRFPREG, size, data);
1444 } 1474 }
@@ -2077,7 +2107,8 @@ out:
2077 2107
2078static int __init init_elf_binfmt(void) 2108static int __init init_elf_binfmt(void)
2079{ 2109{
2080 return register_binfmt(&elf_format); 2110 register_binfmt(&elf_format);
2111 return 0;
2081} 2112}
2082 2113
2083static void __exit exit_elf_binfmt(void) 2114static void __exit exit_elf_binfmt(void)
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 30745f459fa..d390a0fffc6 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -39,6 +39,7 @@
39#include <asm/uaccess.h> 39#include <asm/uaccess.h>
40#include <asm/param.h> 40#include <asm/param.h>
41#include <asm/pgalloc.h> 41#include <asm/pgalloc.h>
42#include <asm/exec.h>
42 43
43typedef char *elf_caddr_t; 44typedef char *elf_caddr_t;
44 45
@@ -91,7 +92,8 @@ static struct linux_binfmt elf_fdpic_format = {
91 92
92static int __init init_elf_fdpic_binfmt(void) 93static int __init init_elf_fdpic_binfmt(void)
93{ 94{
94 return register_binfmt(&elf_fdpic_format); 95 register_binfmt(&elf_fdpic_format);
96 return 0;
95} 97}
96 98
97static void __exit exit_elf_fdpic_binfmt(void) 99static void __exit exit_elf_fdpic_binfmt(void)
@@ -334,8 +336,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
334 current->mm->context.exec_fdpic_loadmap = 0; 336 current->mm->context.exec_fdpic_loadmap = 0;
335 current->mm->context.interp_fdpic_loadmap = 0; 337 current->mm->context.interp_fdpic_loadmap = 0;
336 338
337 current->flags &= ~PF_FORKNOEXEC;
338
339#ifdef CONFIG_MMU 339#ifdef CONFIG_MMU
340 elf_fdpic_arch_lay_out_mm(&exec_params, 340 elf_fdpic_arch_lay_out_mm(&exec_params,
341 &interp_params, 341 &interp_params,
@@ -390,21 +390,17 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
390 (executable_stack == EXSTACK_DEFAULT && VM_STACK_FLAGS & VM_EXEC)) 390 (executable_stack == EXSTACK_DEFAULT && VM_STACK_FLAGS & VM_EXEC))
391 stack_prot |= PROT_EXEC; 391 stack_prot |= PROT_EXEC;
392 392
393 down_write(&current->mm->mmap_sem); 393 current->mm->start_brk = vm_mmap(NULL, 0, stack_size, stack_prot,
394 current->mm->start_brk = do_mmap(NULL, 0, stack_size, stack_prot,
395 MAP_PRIVATE | MAP_ANONYMOUS | 394 MAP_PRIVATE | MAP_ANONYMOUS |
396 MAP_UNINITIALIZED | MAP_GROWSDOWN, 395 MAP_UNINITIALIZED | MAP_GROWSDOWN,
397 0); 396 0);
398 397
399 if (IS_ERR_VALUE(current->mm->start_brk)) { 398 if (IS_ERR_VALUE(current->mm->start_brk)) {
400 up_write(&current->mm->mmap_sem);
401 retval = current->mm->start_brk; 399 retval = current->mm->start_brk;
402 current->mm->start_brk = 0; 400 current->mm->start_brk = 0;
403 goto error_kill; 401 goto error_kill;
404 } 402 }
405 403
406 up_write(&current->mm->mmap_sem);
407
408 current->mm->brk = current->mm->start_brk; 404 current->mm->brk = current->mm->start_brk;
409 current->mm->context.end_brk = current->mm->start_brk; 405 current->mm->context.end_brk = current->mm->start_brk;
410 current->mm->context.end_brk += 406 current->mm->context.end_brk +=
@@ -413,7 +409,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
413#endif 409#endif
414 410
415 install_exec_creds(bprm); 411 install_exec_creds(bprm);
416 current->flags &= ~PF_FORKNOEXEC;
417 if (create_elf_fdpic_tables(bprm, current->mm, 412 if (create_elf_fdpic_tables(bprm, current->mm,
418 &exec_params, &interp_params) < 0) 413 &exec_params, &interp_params) < 0)
419 goto error_kill; 414 goto error_kill;
@@ -956,10 +951,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
956 if (params->flags & ELF_FDPIC_FLAG_EXECUTABLE) 951 if (params->flags & ELF_FDPIC_FLAG_EXECUTABLE)
957 mflags |= MAP_EXECUTABLE; 952 mflags |= MAP_EXECUTABLE;
958 953
959 down_write(&mm->mmap_sem); 954 maddr = vm_mmap(NULL, load_addr, top - base,
960 maddr = do_mmap(NULL, load_addr, top - base,
961 PROT_READ | PROT_WRITE | PROT_EXEC, mflags, 0); 955 PROT_READ | PROT_WRITE | PROT_EXEC, mflags, 0);
962 up_write(&mm->mmap_sem);
963 if (IS_ERR_VALUE(maddr)) 956 if (IS_ERR_VALUE(maddr))
964 return (int) maddr; 957 return (int) maddr;
965 958
@@ -1097,10 +1090,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
1097 1090
1098 /* create the mapping */ 1091 /* create the mapping */
1099 disp = phdr->p_vaddr & ~PAGE_MASK; 1092 disp = phdr->p_vaddr & ~PAGE_MASK;
1100 down_write(&mm->mmap_sem); 1093 maddr = vm_mmap(file, maddr, phdr->p_memsz + disp, prot, flags,
1101 maddr = do_mmap(file, maddr, phdr->p_memsz + disp, prot, flags,
1102 phdr->p_offset - disp); 1094 phdr->p_offset - disp);
1103 up_write(&mm->mmap_sem);
1104 1095
1105 kdebug("mmap[%d] <file> sz=%lx pr=%x fl=%x of=%lx --> %08lx", 1096 kdebug("mmap[%d] <file> sz=%lx pr=%x fl=%x of=%lx --> %08lx",
1106 loop, phdr->p_memsz + disp, prot, flags, 1097 loop, phdr->p_memsz + disp, prot, flags,
@@ -1144,10 +1135,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
1144 unsigned long xmaddr; 1135 unsigned long xmaddr;
1145 1136
1146 flags |= MAP_FIXED | MAP_ANONYMOUS; 1137 flags |= MAP_FIXED | MAP_ANONYMOUS;
1147 down_write(&mm->mmap_sem); 1138 xmaddr = vm_mmap(NULL, xaddr, excess - excess1,
1148 xmaddr = do_mmap(NULL, xaddr, excess - excess1,
1149 prot, flags, 0); 1139 prot, flags, 0);
1150 up_write(&mm->mmap_sem);
1151 1140
1152 kdebug("mmap[%d] <anon>" 1141 kdebug("mmap[%d] <anon>"
1153 " ad=%lx sz=%lx pr=%x fl=%x of=0 --> %08lx", 1142 " ad=%lx sz=%lx pr=%x fl=%x of=0 --> %08lx",
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index b8e8b0acf9b..2790c7e1912 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -100,7 +100,8 @@ static struct linux_binfmt em86_format = {
100 100
101static int __init init_em86_binfmt(void) 101static int __init init_em86_binfmt(void)
102{ 102{
103 return register_binfmt(&em86_format); 103 register_binfmt(&em86_format);
104 return 0;
104} 105}
105 106
106static void __exit exit_em86_binfmt(void) 107static void __exit exit_em86_binfmt(void)
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 1bffbe0ed77..6b2daf99fab 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -15,7 +15,7 @@
15 * JAN/99 -- coded full program relocation (gerg@snapgear.com) 15 * JAN/99 -- coded full program relocation (gerg@snapgear.com)
16 */ 16 */
17 17
18#include <linux/module.h> 18#include <linux/export.h>
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/mm.h> 21#include <linux/mm.h>
@@ -37,7 +37,6 @@
37#include <linux/syscalls.h> 37#include <linux/syscalls.h>
38 38
39#include <asm/byteorder.h> 39#include <asm/byteorder.h>
40#include <asm/system.h>
41#include <asm/uaccess.h> 40#include <asm/uaccess.h>
42#include <asm/unaligned.h> 41#include <asm/unaligned.h>
43#include <asm/cacheflush.h> 42#include <asm/cacheflush.h>
@@ -543,10 +542,8 @@ static int load_flat_file(struct linux_binprm * bprm,
543 */ 542 */
544 DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n"); 543 DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n");
545 544
546 down_write(&current->mm->mmap_sem); 545 textpos = vm_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
547 textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
548 MAP_PRIVATE|MAP_EXECUTABLE, 0); 546 MAP_PRIVATE|MAP_EXECUTABLE, 0);
549 up_write(&current->mm->mmap_sem);
550 if (!textpos || IS_ERR_VALUE(textpos)) { 547 if (!textpos || IS_ERR_VALUE(textpos)) {
551 if (!textpos) 548 if (!textpos)
552 textpos = (unsigned long) -ENOMEM; 549 textpos = (unsigned long) -ENOMEM;
@@ -557,10 +554,8 @@ static int load_flat_file(struct linux_binprm * bprm,
557 554
558 len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long); 555 len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
559 len = PAGE_ALIGN(len); 556 len = PAGE_ALIGN(len);
560 down_write(&current->mm->mmap_sem); 557 realdatastart = vm_mmap(0, 0, len,
561 realdatastart = do_mmap(0, 0, len,
562 PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0); 558 PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
563 up_write(&current->mm->mmap_sem);
564 559
565 if (realdatastart == 0 || IS_ERR_VALUE(realdatastart)) { 560 if (realdatastart == 0 || IS_ERR_VALUE(realdatastart)) {
566 if (!realdatastart) 561 if (!realdatastart)
@@ -604,10 +599,8 @@ static int load_flat_file(struct linux_binprm * bprm,
604 599
605 len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long); 600 len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
606 len = PAGE_ALIGN(len); 601 len = PAGE_ALIGN(len);
607 down_write(&current->mm->mmap_sem); 602 textpos = vm_mmap(0, 0, len,
608 textpos = do_mmap(0, 0, len,
609 PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0); 603 PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
610 up_write(&current->mm->mmap_sem);
611 604
612 if (!textpos || IS_ERR_VALUE(textpos)) { 605 if (!textpos || IS_ERR_VALUE(textpos)) {
613 if (!textpos) 606 if (!textpos)
@@ -902,7 +895,6 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
902 libinfo.lib_list[j].start_data:UNLOADED_LIB; 895 libinfo.lib_list[j].start_data:UNLOADED_LIB;
903 896
904 install_exec_creds(bprm); 897 install_exec_creds(bprm);
905 current->flags &= ~PF_FORKNOEXEC;
906 898
907 set_binfmt(&flat_format); 899 set_binfmt(&flat_format);
908 900
@@ -950,7 +942,8 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
950 942
951static int __init init_flat_binfmt(void) 943static int __init init_flat_binfmt(void)
952{ 944{
953 return register_binfmt(&flat_format); 945 register_binfmt(&flat_format);
946 return 0;
954} 947}
955 948
956/****************************************************************************/ 949/****************************************************************************/
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index a9198dfd5f8..613aa061823 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -19,6 +19,7 @@
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/magic.h>
22#include <linux/binfmts.h> 23#include <linux/binfmts.h>
23#include <linux/slab.h> 24#include <linux/slab.h>
24#include <linux/ctype.h> 25#include <linux/ctype.h>
@@ -699,7 +700,7 @@ static int bm_fill_super(struct super_block * sb, void * data, int silent)
699 [3] = {"register", &bm_register_operations, S_IWUSR}, 700 [3] = {"register", &bm_register_operations, S_IWUSR},
700 /* last one */ {""} 701 /* last one */ {""}
701 }; 702 };
702 int err = simple_fill_super(sb, 0x42494e4d, bm_files); 703 int err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files);
703 if (!err) 704 if (!err)
704 sb->s_op = &s_ops; 705 sb->s_op = &s_ops;
705 return err; 706 return err;
@@ -726,11 +727,8 @@ static struct file_system_type bm_fs_type = {
726static int __init init_misc_binfmt(void) 727static int __init init_misc_binfmt(void)
727{ 728{
728 int err = register_filesystem(&bm_fs_type); 729 int err = register_filesystem(&bm_fs_type);
729 if (!err) { 730 if (!err)
730 err = insert_binfmt(&misc_format); 731 insert_binfmt(&misc_format);
731 if (err)
732 unregister_filesystem(&bm_fs_type);
733 }
734 return err; 732 return err;
735} 733}
736 734
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 396a9884591..d3b8c1f6315 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -105,7 +105,8 @@ static struct linux_binfmt script_format = {
105 105
106static int __init init_script_binfmt(void) 106static int __init init_script_binfmt(void)
107{ 107{
108 return register_binfmt(&script_format); 108 register_binfmt(&script_format);
109 return 0;
109} 110}
110 111
111static void __exit exit_script_binfmt(void) 112static void __exit exit_script_binfmt(void)
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index cc8560f6c9b..4517aaff61b 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -147,10 +147,8 @@ static int map_som_binary(struct file *file,
147 code_size = SOM_PAGEALIGN(hpuxhdr->exec_tsize); 147 code_size = SOM_PAGEALIGN(hpuxhdr->exec_tsize);
148 current->mm->start_code = code_start; 148 current->mm->start_code = code_start;
149 current->mm->end_code = code_start + code_size; 149 current->mm->end_code = code_start + code_size;
150 down_write(&current->mm->mmap_sem); 150 retval = vm_mmap(file, code_start, code_size, prot,
151 retval = do_mmap(file, code_start, code_size, prot,
152 flags, SOM_PAGESTART(hpuxhdr->exec_tfile)); 151 flags, SOM_PAGESTART(hpuxhdr->exec_tfile));
153 up_write(&current->mm->mmap_sem);
154 if (retval < 0 && retval > -1024) 152 if (retval < 0 && retval > -1024)
155 goto out; 153 goto out;
156 154
@@ -158,20 +156,16 @@ static int map_som_binary(struct file *file,
158 data_size = SOM_PAGEALIGN(hpuxhdr->exec_dsize); 156 data_size = SOM_PAGEALIGN(hpuxhdr->exec_dsize);
159 current->mm->start_data = data_start; 157 current->mm->start_data = data_start;
160 current->mm->end_data = bss_start = data_start + data_size; 158 current->mm->end_data = bss_start = data_start + data_size;
161 down_write(&current->mm->mmap_sem); 159 retval = vm_mmap(file, data_start, data_size,
162 retval = do_mmap(file, data_start, data_size,
163 prot | PROT_WRITE, flags, 160 prot | PROT_WRITE, flags,
164 SOM_PAGESTART(hpuxhdr->exec_dfile)); 161 SOM_PAGESTART(hpuxhdr->exec_dfile));
165 up_write(&current->mm->mmap_sem);
166 if (retval < 0 && retval > -1024) 162 if (retval < 0 && retval > -1024)
167 goto out; 163 goto out;
168 164
169 som_brk = bss_start + SOM_PAGEALIGN(hpuxhdr->exec_bsize); 165 som_brk = bss_start + SOM_PAGEALIGN(hpuxhdr->exec_bsize);
170 current->mm->start_brk = current->mm->brk = som_brk; 166 current->mm->start_brk = current->mm->brk = som_brk;
171 down_write(&current->mm->mmap_sem); 167 retval = vm_mmap(NULL, bss_start, som_brk - bss_start,
172 retval = do_mmap(NULL, bss_start, som_brk - bss_start,
173 prot | PROT_WRITE, MAP_FIXED | MAP_PRIVATE, 0); 168 prot | PROT_WRITE, MAP_FIXED | MAP_PRIVATE, 0);
174 up_write(&current->mm->mmap_sem);
175 if (retval > 0 || retval < -1024) 169 if (retval > 0 || retval < -1024)
176 retval = 0; 170 retval = 0;
177out: 171out:
@@ -225,7 +219,6 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
225 goto out_free; 219 goto out_free;
226 220
227 /* OK, This is the point of no return */ 221 /* OK, This is the point of no return */
228 current->flags &= ~PF_FORKNOEXEC;
229 current->personality = PER_HPUX; 222 current->personality = PER_HPUX;
230 setup_new_exec(bprm); 223 setup_new_exec(bprm);
231 224
@@ -289,7 +282,8 @@ static int load_som_library(struct file *f)
289 282
290static int __init init_som_binfmt(void) 283static int __init init_som_binfmt(void)
291{ 284{
292 return register_binfmt(&som_format); 285 register_binfmt(&som_format);
286 return 0;
293} 287}
294 288
295static void __exit exit_som_binfmt(void) 289static void __exit exit_som_binfmt(void)
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index c2183f3917c..e85c04b9f61 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -357,7 +357,7 @@ static void bio_integrity_generate(struct bio *bio)
357 bix.sector_size = bi->sector_size; 357 bix.sector_size = bi->sector_size;
358 358
359 bio_for_each_segment(bv, bio, i) { 359 bio_for_each_segment(bv, bio, i) {
360 void *kaddr = kmap_atomic(bv->bv_page, KM_USER0); 360 void *kaddr = kmap_atomic(bv->bv_page);
361 bix.data_buf = kaddr + bv->bv_offset; 361 bix.data_buf = kaddr + bv->bv_offset;
362 bix.data_size = bv->bv_len; 362 bix.data_size = bv->bv_len;
363 bix.prot_buf = prot_buf; 363 bix.prot_buf = prot_buf;
@@ -371,7 +371,7 @@ static void bio_integrity_generate(struct bio *bio)
371 total += sectors * bi->tuple_size; 371 total += sectors * bi->tuple_size;
372 BUG_ON(total > bio->bi_integrity->bip_size); 372 BUG_ON(total > bio->bi_integrity->bip_size);
373 373
374 kunmap_atomic(kaddr, KM_USER0); 374 kunmap_atomic(kaddr);
375 } 375 }
376} 376}
377 377
@@ -498,7 +498,7 @@ static int bio_integrity_verify(struct bio *bio)
498 bix.sector_size = bi->sector_size; 498 bix.sector_size = bi->sector_size;
499 499
500 bio_for_each_segment(bv, bio, i) { 500 bio_for_each_segment(bv, bio, i) {
501 void *kaddr = kmap_atomic(bv->bv_page, KM_USER0); 501 void *kaddr = kmap_atomic(bv->bv_page);
502 bix.data_buf = kaddr + bv->bv_offset; 502 bix.data_buf = kaddr + bv->bv_offset;
503 bix.data_size = bv->bv_len; 503 bix.data_size = bv->bv_len;
504 bix.prot_buf = prot_buf; 504 bix.prot_buf = prot_buf;
@@ -507,7 +507,7 @@ static int bio_integrity_verify(struct bio *bio)
507 ret = bi->verify_fn(&bix); 507 ret = bi->verify_fn(&bix);
508 508
509 if (ret) { 509 if (ret) {
510 kunmap_atomic(kaddr, KM_USER0); 510 kunmap_atomic(kaddr);
511 return ret; 511 return ret;
512 } 512 }
513 513
@@ -517,7 +517,7 @@ static int bio_integrity_verify(struct bio *bio)
517 total += sectors * bi->tuple_size; 517 total += sectors * bi->tuple_size;
518 BUG_ON(total > bio->bi_integrity->bip_size); 518 BUG_ON(total > bio->bi_integrity->bip_size);
519 519
520 kunmap_atomic(kaddr, KM_USER0); 520 kunmap_atomic(kaddr);
521 } 521 }
522 522
523 return ret; 523 return ret;
diff --git a/fs/bio.c b/fs/bio.c
index b980ecde026..84da8853904 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -22,7 +22,7 @@
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/module.h> 25#include <linux/export.h>
26#include <linux/mempool.h> 26#include <linux/mempool.h>
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <scsi/sg.h> /* for struct sg_iovec */ 28#include <scsi/sg.h> /* for struct sg_iovec */
@@ -505,9 +505,14 @@ EXPORT_SYMBOL(bio_clone);
505int bio_get_nr_vecs(struct block_device *bdev) 505int bio_get_nr_vecs(struct block_device *bdev)
506{ 506{
507 struct request_queue *q = bdev_get_queue(bdev); 507 struct request_queue *q = bdev_get_queue(bdev);
508 return min_t(unsigned, 508 int nr_pages;
509
510 nr_pages = min_t(unsigned,
509 queue_max_segments(q), 511 queue_max_segments(q),
510 queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1); 512 queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1);
513
514 return min_t(unsigned, nr_pages, BIO_MAX_PAGES);
515
511} 516}
512EXPORT_SYMBOL(bio_get_nr_vecs); 517EXPORT_SYMBOL(bio_get_nr_vecs);
513 518
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 5e9f198f771..ba11c30f302 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -16,6 +16,7 @@
16#include <linux/blkdev.h> 16#include <linux/blkdev.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/blkpg.h> 18#include <linux/blkpg.h>
19#include <linux/magic.h>
19#include <linux/buffer_head.h> 20#include <linux/buffer_head.h>
20#include <linux/swap.h> 21#include <linux/swap.h>
21#include <linux/pagevec.h> 22#include <linux/pagevec.h>
@@ -69,7 +70,7 @@ static void bdev_inode_switch_bdi(struct inode *inode,
69 spin_unlock(&dst->wb.list_lock); 70 spin_unlock(&dst->wb.list_lock);
70} 71}
71 72
72static sector_t max_block(struct block_device *bdev) 73sector_t blkdev_max_block(struct block_device *bdev)
73{ 74{
74 sector_t retval = ~((sector_t)0); 75 sector_t retval = ~((sector_t)0);
75 loff_t sz = i_size_read(bdev->bd_inode); 76 loff_t sz = i_size_read(bdev->bd_inode);
@@ -109,7 +110,7 @@ void invalidate_bdev(struct block_device *bdev)
109 /* 99% of the time, we don't need to flush the cleancache on the bdev. 110 /* 99% of the time, we don't need to flush the cleancache on the bdev.
110 * But, for the strange corners, lets be cautious 111 * But, for the strange corners, lets be cautious
111 */ 112 */
112 cleancache_flush_inode(mapping); 113 cleancache_invalidate_inode(mapping);
113} 114}
114EXPORT_SYMBOL(invalidate_bdev); 115EXPORT_SYMBOL(invalidate_bdev);
115 116
@@ -162,7 +163,7 @@ static int
162blkdev_get_block(struct inode *inode, sector_t iblock, 163blkdev_get_block(struct inode *inode, sector_t iblock,
163 struct buffer_head *bh, int create) 164 struct buffer_head *bh, int create)
164{ 165{
165 if (iblock >= max_block(I_BDEV(inode))) { 166 if (iblock >= blkdev_max_block(I_BDEV(inode))) {
166 if (create) 167 if (create)
167 return -EIO; 168 return -EIO;
168 169
@@ -184,7 +185,7 @@ static int
184blkdev_get_blocks(struct inode *inode, sector_t iblock, 185blkdev_get_blocks(struct inode *inode, sector_t iblock,
185 struct buffer_head *bh, int create) 186 struct buffer_head *bh, int create)
186{ 187{
187 sector_t end_block = max_block(I_BDEV(inode)); 188 sector_t end_block = blkdev_max_block(I_BDEV(inode));
188 unsigned long max_blocks = bh->b_size >> inode->i_blkbits; 189 unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
189 190
190 if ((iblock + max_blocks) > end_block) { 191 if ((iblock + max_blocks) > end_block) {
@@ -506,7 +507,7 @@ static const struct super_operations bdev_sops = {
506static struct dentry *bd_mount(struct file_system_type *fs_type, 507static struct dentry *bd_mount(struct file_system_type *fs_type,
507 int flags, const char *dev_name, void *data) 508 int flags, const char *dev_name, void *data)
508{ 509{
509 return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, 0x62646576); 510 return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
510} 511}
511 512
512static struct file_system_type bd_type = { 513static struct file_system_type bd_type = {
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 89b156d85d6..761e2cd8fed 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -227,7 +227,11 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
227 if (ret > 0) { 227 if (ret > 0) {
228 /* we need an acl */ 228 /* we need an acl */
229 ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); 229 ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);
230 } else {
231 cache_no_acl(inode);
230 } 232 }
233 } else {
234 cache_no_acl(inode);
231 } 235 }
232failed: 236failed:
233 posix_acl_release(acl); 237 posix_acl_release(acl);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 9b9b15fd520..e616f8872e6 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -24,6 +24,20 @@
24#include "ordered-data.h" 24#include "ordered-data.h"
25#include "delayed-inode.h" 25#include "delayed-inode.h"
26 26
27/*
28 * ordered_data_close is set by truncate when a file that used
29 * to have good data has been truncated to zero. When it is set
30 * the btrfs file release call will add this inode to the
31 * ordered operations list so that we make sure to flush out any
32 * new data the application may have written before commit.
33 */
34#define BTRFS_INODE_ORDERED_DATA_CLOSE 0
35#define BTRFS_INODE_ORPHAN_META_RESERVED 1
36#define BTRFS_INODE_DUMMY 2
37#define BTRFS_INODE_IN_DEFRAG 3
38#define BTRFS_INODE_DELALLOC_META_RESERVED 4
39#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
40
27/* in memory btrfs inode */ 41/* in memory btrfs inode */
28struct btrfs_inode { 42struct btrfs_inode {
29 /* which subvolume this inode belongs to */ 43 /* which subvolume this inode belongs to */
@@ -57,9 +71,6 @@ struct btrfs_inode {
57 /* used to order data wrt metadata */ 71 /* used to order data wrt metadata */
58 struct btrfs_ordered_inode_tree ordered_tree; 72 struct btrfs_ordered_inode_tree ordered_tree;
59 73
60 /* for keeping track of orphaned inodes */
61 struct list_head i_orphan;
62
63 /* list of all the delalloc inodes in the FS. There are times we need 74 /* list of all the delalloc inodes in the FS. There are times we need
64 * to write all the delalloc pages to disk, and this list is used 75 * to write all the delalloc pages to disk, and this list is used
65 * to walk them all. 76 * to walk them all.
@@ -78,14 +89,13 @@ struct btrfs_inode {
78 /* the space_info for where this inode's data allocations are done */ 89 /* the space_info for where this inode's data allocations are done */
79 struct btrfs_space_info *space_info; 90 struct btrfs_space_info *space_info;
80 91
92 unsigned long runtime_flags;
93
81 /* full 64 bit generation number, struct vfs_inode doesn't have a big 94 /* full 64 bit generation number, struct vfs_inode doesn't have a big
82 * enough field for this. 95 * enough field for this.
83 */ 96 */
84 u64 generation; 97 u64 generation;
85 98
86 /* sequence number for NFS changes */
87 u64 sequence;
88
89 /* 99 /*
90 * transid of the trans_handle that last modified this inode 100 * transid of the trans_handle that last modified this inode
91 */ 101 */
@@ -145,22 +155,9 @@ struct btrfs_inode {
145 unsigned reserved_extents; 155 unsigned reserved_extents;
146 156
147 /* 157 /*
148 * ordered_data_close is set by truncate when a file that used
149 * to have good data has been truncated to zero. When it is set
150 * the btrfs file release call will add this inode to the
151 * ordered operations list so that we make sure to flush out any
152 * new data the application may have written before commit.
153 */
154 unsigned ordered_data_close:1;
155 unsigned orphan_meta_reserved:1;
156 unsigned dummy_inode:1;
157 unsigned in_defrag:1;
158 unsigned delalloc_meta_reserved:1;
159
160 /*
161 * always compress this one file 158 * always compress this one file
162 */ 159 */
163 unsigned force_compress:4; 160 unsigned force_compress;
164 161
165 struct btrfs_delayed_node *delayed_node; 162 struct btrfs_delayed_node *delayed_node;
166 163
@@ -202,4 +199,17 @@ static inline bool btrfs_is_free_space_inode(struct btrfs_root *root,
202 return false; 199 return false;
203} 200}
204 201
202static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
203{
204 struct btrfs_root *root = BTRFS_I(inode)->root;
205 int ret = 0;
206
207 mutex_lock(&root->log_mutex);
208 if (BTRFS_I(inode)->logged_trans == generation &&
209 BTRFS_I(inode)->last_sub_trans <= root->last_log_commit)
210 ret = 1;
211 mutex_unlock(&root->log_mutex);
212 return ret;
213}
214
205#endif 215#endif
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index d986824bb2b..9cebb1fd6a3 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -89,7 +89,6 @@
89#include "disk-io.h" 89#include "disk-io.h"
90#include "transaction.h" 90#include "transaction.h"
91#include "extent_io.h" 91#include "extent_io.h"
92#include "disk-io.h"
93#include "volumes.h" 92#include "volumes.h"
94#include "print-tree.h" 93#include "print-tree.h"
95#include "locking.h" 94#include "locking.h"
@@ -104,8 +103,6 @@
104#define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300 103#define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
105#define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters, 104#define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters,
106 * excluding " [...]" */ 105 * excluding " [...]" */
107#define BTRFSIC_BLOCK_SIZE PAGE_SIZE
108
109#define BTRFSIC_GENERATION_UNKNOWN ((u64)-1) 106#define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
110 107
111/* 108/*
@@ -211,8 +208,9 @@ struct btrfsic_block_data_ctx {
211 u64 dev_bytenr; /* physical bytenr on device */ 208 u64 dev_bytenr; /* physical bytenr on device */
212 u32 len; 209 u32 len;
213 struct btrfsic_dev_state *dev; 210 struct btrfsic_dev_state *dev;
214 char *data; 211 char **datav;
215 struct buffer_head *bh; /* do not use if set to NULL */ 212 struct page **pagev;
213 void *mem_to_free;
216}; 214};
217 215
218/* This structure is used to implement recursion without occupying 216/* This structure is used to implement recursion without occupying
@@ -244,6 +242,8 @@ struct btrfsic_state {
244 struct btrfs_root *root; 242 struct btrfs_root *root;
245 u64 max_superblock_generation; 243 u64 max_superblock_generation;
246 struct btrfsic_block *latest_superblock; 244 struct btrfsic_block *latest_superblock;
245 u32 metablock_size;
246 u32 datablock_size;
247}; 247};
248 248
249static void btrfsic_block_init(struct btrfsic_block *b); 249static void btrfsic_block_init(struct btrfsic_block *b);
@@ -291,8 +291,10 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
291static int btrfsic_process_metablock(struct btrfsic_state *state, 291static int btrfsic_process_metablock(struct btrfsic_state *state,
292 struct btrfsic_block *block, 292 struct btrfsic_block *block,
293 struct btrfsic_block_data_ctx *block_ctx, 293 struct btrfsic_block_data_ctx *block_ctx,
294 struct btrfs_header *hdr,
295 int limit_nesting, int force_iodone_flag); 294 int limit_nesting, int force_iodone_flag);
295static void btrfsic_read_from_block_data(
296 struct btrfsic_block_data_ctx *block_ctx,
297 void *dst, u32 offset, size_t len);
296static int btrfsic_create_link_to_next_block( 298static int btrfsic_create_link_to_next_block(
297 struct btrfsic_state *state, 299 struct btrfsic_state *state,
298 struct btrfsic_block *block, 300 struct btrfsic_block *block,
@@ -319,12 +321,13 @@ static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
319static int btrfsic_read_block(struct btrfsic_state *state, 321static int btrfsic_read_block(struct btrfsic_state *state,
320 struct btrfsic_block_data_ctx *block_ctx); 322 struct btrfsic_block_data_ctx *block_ctx);
321static void btrfsic_dump_database(struct btrfsic_state *state); 323static void btrfsic_dump_database(struct btrfsic_state *state);
324static void btrfsic_complete_bio_end_io(struct bio *bio, int err);
322static int btrfsic_test_for_metadata(struct btrfsic_state *state, 325static int btrfsic_test_for_metadata(struct btrfsic_state *state,
323 const u8 *data, unsigned int size); 326 char **datav, unsigned int num_pages);
324static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, 327static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
325 u64 dev_bytenr, u8 *mapped_data, 328 u64 dev_bytenr, char **mapped_datav,
326 unsigned int len, struct bio *bio, 329 unsigned int num_pages,
327 int *bio_is_patched, 330 struct bio *bio, int *bio_is_patched,
328 struct buffer_head *bh, 331 struct buffer_head *bh,
329 int submit_bio_bh_rw); 332 int submit_bio_bh_rw);
330static int btrfsic_process_written_superblock( 333static int btrfsic_process_written_superblock(
@@ -376,7 +379,7 @@ static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
376static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, 379static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
377 u64 bytenr, 380 u64 bytenr,
378 struct btrfsic_dev_state *dev_state, 381 struct btrfsic_dev_state *dev_state,
379 u64 dev_bytenr, char *data); 382 u64 dev_bytenr);
380 383
381static struct mutex btrfsic_mutex; 384static struct mutex btrfsic_mutex;
382static int btrfsic_is_initialized; 385static int btrfsic_is_initialized;
@@ -652,7 +655,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
652 int pass; 655 int pass;
653 656
654 BUG_ON(NULL == state); 657 BUG_ON(NULL == state);
655 selected_super = kmalloc(sizeof(*selected_super), GFP_NOFS); 658 selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
656 if (NULL == selected_super) { 659 if (NULL == selected_super) {
657 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 660 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
658 return -1; 661 return -1;
@@ -719,7 +722,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
719 722
720 num_copies = 723 num_copies =
721 btrfs_num_copies(&state->root->fs_info->mapping_tree, 724 btrfs_num_copies(&state->root->fs_info->mapping_tree,
722 next_bytenr, PAGE_SIZE); 725 next_bytenr, state->metablock_size);
723 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 726 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
724 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 727 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
725 (unsigned long long)next_bytenr, num_copies); 728 (unsigned long long)next_bytenr, num_copies);
@@ -728,9 +731,9 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
728 struct btrfsic_block *next_block; 731 struct btrfsic_block *next_block;
729 struct btrfsic_block_data_ctx tmp_next_block_ctx; 732 struct btrfsic_block_data_ctx tmp_next_block_ctx;
730 struct btrfsic_block_link *l; 733 struct btrfsic_block_link *l;
731 struct btrfs_header *hdr;
732 734
733 ret = btrfsic_map_block(state, next_bytenr, PAGE_SIZE, 735 ret = btrfsic_map_block(state, next_bytenr,
736 state->metablock_size,
734 &tmp_next_block_ctx, 737 &tmp_next_block_ctx,
735 mirror_num); 738 mirror_num);
736 if (ret) { 739 if (ret) {
@@ -759,7 +762,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
759 BUG_ON(NULL == l); 762 BUG_ON(NULL == l);
760 763
761 ret = btrfsic_read_block(state, &tmp_next_block_ctx); 764 ret = btrfsic_read_block(state, &tmp_next_block_ctx);
762 if (ret < (int)BTRFSIC_BLOCK_SIZE) { 765 if (ret < (int)PAGE_CACHE_SIZE) {
763 printk(KERN_INFO 766 printk(KERN_INFO
764 "btrfsic: read @logical %llu failed!\n", 767 "btrfsic: read @logical %llu failed!\n",
765 (unsigned long long) 768 (unsigned long long)
@@ -769,11 +772,9 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
769 return -1; 772 return -1;
770 } 773 }
771 774
772 hdr = (struct btrfs_header *)tmp_next_block_ctx.data;
773 ret = btrfsic_process_metablock(state, 775 ret = btrfsic_process_metablock(state,
774 next_block, 776 next_block,
775 &tmp_next_block_ctx, 777 &tmp_next_block_ctx,
776 hdr,
777 BTRFS_MAX_LEVEL + 3, 1); 778 BTRFS_MAX_LEVEL + 3, 1);
778 btrfsic_release_block_ctx(&tmp_next_block_ctx); 779 btrfsic_release_block_ctx(&tmp_next_block_ctx);
779 } 780 }
@@ -800,7 +801,10 @@ static int btrfsic_process_superblock_dev_mirror(
800 801
801 /* super block bytenr is always the unmapped device bytenr */ 802 /* super block bytenr is always the unmapped device bytenr */
802 dev_bytenr = btrfs_sb_offset(superblock_mirror_num); 803 dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
803 bh = __bread(superblock_bdev, dev_bytenr / 4096, 4096); 804 if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
805 return -1;
806 bh = __bread(superblock_bdev, dev_bytenr / 4096,
807 BTRFS_SUPER_INFO_SIZE);
804 if (NULL == bh) 808 if (NULL == bh)
805 return -1; 809 return -1;
806 super_tmp = (struct btrfs_super_block *) 810 super_tmp = (struct btrfs_super_block *)
@@ -809,7 +813,10 @@ static int btrfsic_process_superblock_dev_mirror(
809 if (btrfs_super_bytenr(super_tmp) != dev_bytenr || 813 if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
810 strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC, 814 strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC,
811 sizeof(super_tmp->magic)) || 815 sizeof(super_tmp->magic)) ||
812 memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE)) { 816 memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
817 btrfs_super_nodesize(super_tmp) != state->metablock_size ||
818 btrfs_super_leafsize(super_tmp) != state->metablock_size ||
819 btrfs_super_sectorsize(super_tmp) != state->datablock_size) {
813 brelse(bh); 820 brelse(bh);
814 return 0; 821 return 0;
815 } 822 }
@@ -894,7 +901,7 @@ static int btrfsic_process_superblock_dev_mirror(
894 901
895 num_copies = 902 num_copies =
896 btrfs_num_copies(&state->root->fs_info->mapping_tree, 903 btrfs_num_copies(&state->root->fs_info->mapping_tree,
897 next_bytenr, PAGE_SIZE); 904 next_bytenr, state->metablock_size);
898 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 905 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
899 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 906 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
900 (unsigned long long)next_bytenr, num_copies); 907 (unsigned long long)next_bytenr, num_copies);
@@ -903,7 +910,8 @@ static int btrfsic_process_superblock_dev_mirror(
903 struct btrfsic_block_data_ctx tmp_next_block_ctx; 910 struct btrfsic_block_data_ctx tmp_next_block_ctx;
904 struct btrfsic_block_link *l; 911 struct btrfsic_block_link *l;
905 912
906 if (btrfsic_map_block(state, next_bytenr, PAGE_SIZE, 913 if (btrfsic_map_block(state, next_bytenr,
914 state->metablock_size,
907 &tmp_next_block_ctx, 915 &tmp_next_block_ctx,
908 mirror_num)) { 916 mirror_num)) {
909 printk(KERN_INFO "btrfsic: btrfsic_map_block(" 917 printk(KERN_INFO "btrfsic: btrfsic_map_block("
@@ -967,13 +975,15 @@ static int btrfsic_process_metablock(
967 struct btrfsic_state *state, 975 struct btrfsic_state *state,
968 struct btrfsic_block *const first_block, 976 struct btrfsic_block *const first_block,
969 struct btrfsic_block_data_ctx *const first_block_ctx, 977 struct btrfsic_block_data_ctx *const first_block_ctx,
970 struct btrfs_header *const first_hdr,
971 int first_limit_nesting, int force_iodone_flag) 978 int first_limit_nesting, int force_iodone_flag)
972{ 979{
973 struct btrfsic_stack_frame initial_stack_frame = { 0 }; 980 struct btrfsic_stack_frame initial_stack_frame = { 0 };
974 struct btrfsic_stack_frame *sf; 981 struct btrfsic_stack_frame *sf;
975 struct btrfsic_stack_frame *next_stack; 982 struct btrfsic_stack_frame *next_stack;
983 struct btrfs_header *const first_hdr =
984 (struct btrfs_header *)first_block_ctx->datav[0];
976 985
986 BUG_ON(!first_hdr);
977 sf = &initial_stack_frame; 987 sf = &initial_stack_frame;
978 sf->error = 0; 988 sf->error = 0;
979 sf->i = -1; 989 sf->i = -1;
@@ -1013,21 +1023,47 @@ continue_with_current_leaf_stack_frame:
1013 } 1023 }
1014 1024
1015 if (sf->i < sf->nr) { 1025 if (sf->i < sf->nr) {
1016 struct btrfs_item *disk_item = leafhdr->items + sf->i; 1026 struct btrfs_item disk_item;
1017 struct btrfs_disk_key *disk_key = &disk_item->key; 1027 u32 disk_item_offset =
1028 (uintptr_t)(leafhdr->items + sf->i) -
1029 (uintptr_t)leafhdr;
1030 struct btrfs_disk_key *disk_key;
1018 u8 type; 1031 u8 type;
1019 const u32 item_offset = le32_to_cpu(disk_item->offset); 1032 u32 item_offset;
1020 1033
1034 if (disk_item_offset + sizeof(struct btrfs_item) >
1035 sf->block_ctx->len) {
1036leaf_item_out_of_bounce_error:
1037 printk(KERN_INFO
1038 "btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
1039 sf->block_ctx->start,
1040 sf->block_ctx->dev->name);
1041 goto one_stack_frame_backwards;
1042 }
1043 btrfsic_read_from_block_data(sf->block_ctx,
1044 &disk_item,
1045 disk_item_offset,
1046 sizeof(struct btrfs_item));
1047 item_offset = le32_to_cpu(disk_item.offset);
1048 disk_key = &disk_item.key;
1021 type = disk_key->type; 1049 type = disk_key->type;
1022 1050
1023 if (BTRFS_ROOT_ITEM_KEY == type) { 1051 if (BTRFS_ROOT_ITEM_KEY == type) {
1024 const struct btrfs_root_item *const root_item = 1052 struct btrfs_root_item root_item;
1025 (struct btrfs_root_item *) 1053 u32 root_item_offset;
1026 (sf->block_ctx->data + 1054 u64 next_bytenr;
1027 offsetof(struct btrfs_leaf, items) + 1055
1028 item_offset); 1056 root_item_offset = item_offset +
1029 const u64 next_bytenr = 1057 offsetof(struct btrfs_leaf, items);
1030 le64_to_cpu(root_item->bytenr); 1058 if (root_item_offset +
1059 sizeof(struct btrfs_root_item) >
1060 sf->block_ctx->len)
1061 goto leaf_item_out_of_bounce_error;
1062 btrfsic_read_from_block_data(
1063 sf->block_ctx, &root_item,
1064 root_item_offset,
1065 sizeof(struct btrfs_root_item));
1066 next_bytenr = le64_to_cpu(root_item.bytenr);
1031 1067
1032 sf->error = 1068 sf->error =
1033 btrfsic_create_link_to_next_block( 1069 btrfsic_create_link_to_next_block(
@@ -1042,7 +1078,7 @@ continue_with_current_leaf_stack_frame:
1042 &sf->num_copies, 1078 &sf->num_copies,
1043 &sf->mirror_num, 1079 &sf->mirror_num,
1044 disk_key, 1080 disk_key,
1045 le64_to_cpu(root_item-> 1081 le64_to_cpu(root_item.
1046 generation)); 1082 generation));
1047 if (sf->error) 1083 if (sf->error)
1048 goto one_stack_frame_backwards; 1084 goto one_stack_frame_backwards;
@@ -1050,7 +1086,7 @@ continue_with_current_leaf_stack_frame:
1050 if (NULL != sf->next_block) { 1086 if (NULL != sf->next_block) {
1051 struct btrfs_header *const next_hdr = 1087 struct btrfs_header *const next_hdr =
1052 (struct btrfs_header *) 1088 (struct btrfs_header *)
1053 sf->next_block_ctx.data; 1089 sf->next_block_ctx.datav[0];
1054 1090
1055 next_stack = 1091 next_stack =
1056 btrfsic_stack_frame_alloc(); 1092 btrfsic_stack_frame_alloc();
@@ -1112,10 +1148,24 @@ continue_with_current_node_stack_frame:
1112 } 1148 }
1113 1149
1114 if (sf->i < sf->nr) { 1150 if (sf->i < sf->nr) {
1115 struct btrfs_key_ptr *disk_key_ptr = 1151 struct btrfs_key_ptr key_ptr;
1116 nodehdr->ptrs + sf->i; 1152 u32 key_ptr_offset;
1117 const u64 next_bytenr = 1153 u64 next_bytenr;
1118 le64_to_cpu(disk_key_ptr->blockptr); 1154
1155 key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) -
1156 (uintptr_t)nodehdr;
1157 if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
1158 sf->block_ctx->len) {
1159 printk(KERN_INFO
1160 "btrfsic: node item out of bounce at logical %llu, dev %s\n",
1161 sf->block_ctx->start,
1162 sf->block_ctx->dev->name);
1163 goto one_stack_frame_backwards;
1164 }
1165 btrfsic_read_from_block_data(
1166 sf->block_ctx, &key_ptr, key_ptr_offset,
1167 sizeof(struct btrfs_key_ptr));
1168 next_bytenr = le64_to_cpu(key_ptr.blockptr);
1119 1169
1120 sf->error = btrfsic_create_link_to_next_block( 1170 sf->error = btrfsic_create_link_to_next_block(
1121 state, 1171 state,
@@ -1128,15 +1178,15 @@ continue_with_current_node_stack_frame:
1128 force_iodone_flag, 1178 force_iodone_flag,
1129 &sf->num_copies, 1179 &sf->num_copies,
1130 &sf->mirror_num, 1180 &sf->mirror_num,
1131 &disk_key_ptr->key, 1181 &key_ptr.key,
1132 le64_to_cpu(disk_key_ptr->generation)); 1182 le64_to_cpu(key_ptr.generation));
1133 if (sf->error) 1183 if (sf->error)
1134 goto one_stack_frame_backwards; 1184 goto one_stack_frame_backwards;
1135 1185
1136 if (NULL != sf->next_block) { 1186 if (NULL != sf->next_block) {
1137 struct btrfs_header *const next_hdr = 1187 struct btrfs_header *const next_hdr =
1138 (struct btrfs_header *) 1188 (struct btrfs_header *)
1139 sf->next_block_ctx.data; 1189 sf->next_block_ctx.datav[0];
1140 1190
1141 next_stack = btrfsic_stack_frame_alloc(); 1191 next_stack = btrfsic_stack_frame_alloc();
1142 if (NULL == next_stack) 1192 if (NULL == next_stack)
@@ -1182,6 +1232,35 @@ one_stack_frame_backwards:
1182 return sf->error; 1232 return sf->error;
1183} 1233}
1184 1234
1235static void btrfsic_read_from_block_data(
1236 struct btrfsic_block_data_ctx *block_ctx,
1237 void *dstv, u32 offset, size_t len)
1238{
1239 size_t cur;
1240 size_t offset_in_page;
1241 char *kaddr;
1242 char *dst = (char *)dstv;
1243 size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1);
1244 unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
1245
1246 WARN_ON(offset + len > block_ctx->len);
1247 offset_in_page = (start_offset + offset) &
1248 ((unsigned long)PAGE_CACHE_SIZE - 1);
1249
1250 while (len > 0) {
1251 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
1252 BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >>
1253 PAGE_CACHE_SHIFT);
1254 kaddr = block_ctx->datav[i];
1255 memcpy(dst, kaddr + offset_in_page, cur);
1256
1257 dst += cur;
1258 len -= cur;
1259 offset_in_page = 0;
1260 i++;
1261 }
1262}
1263
1185static int btrfsic_create_link_to_next_block( 1264static int btrfsic_create_link_to_next_block(
1186 struct btrfsic_state *state, 1265 struct btrfsic_state *state,
1187 struct btrfsic_block *block, 1266 struct btrfsic_block *block,
@@ -1205,7 +1284,7 @@ static int btrfsic_create_link_to_next_block(
1205 if (0 == *num_copiesp) { 1284 if (0 == *num_copiesp) {
1206 *num_copiesp = 1285 *num_copiesp =
1207 btrfs_num_copies(&state->root->fs_info->mapping_tree, 1286 btrfs_num_copies(&state->root->fs_info->mapping_tree,
1208 next_bytenr, PAGE_SIZE); 1287 next_bytenr, state->metablock_size);
1209 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1288 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1210 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 1289 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1211 (unsigned long long)next_bytenr, *num_copiesp); 1290 (unsigned long long)next_bytenr, *num_copiesp);
@@ -1220,7 +1299,7 @@ static int btrfsic_create_link_to_next_block(
1220 "btrfsic_create_link_to_next_block(mirror_num=%d)\n", 1299 "btrfsic_create_link_to_next_block(mirror_num=%d)\n",
1221 *mirror_nump); 1300 *mirror_nump);
1222 ret = btrfsic_map_block(state, next_bytenr, 1301 ret = btrfsic_map_block(state, next_bytenr,
1223 BTRFSIC_BLOCK_SIZE, 1302 state->metablock_size,
1224 next_block_ctx, *mirror_nump); 1303 next_block_ctx, *mirror_nump);
1225 if (ret) { 1304 if (ret) {
1226 printk(KERN_INFO 1305 printk(KERN_INFO
@@ -1315,7 +1394,7 @@ static int btrfsic_create_link_to_next_block(
1315 1394
1316 if (limit_nesting > 0 && did_alloc_block_link) { 1395 if (limit_nesting > 0 && did_alloc_block_link) {
1317 ret = btrfsic_read_block(state, next_block_ctx); 1396 ret = btrfsic_read_block(state, next_block_ctx);
1318 if (ret < (int)BTRFSIC_BLOCK_SIZE) { 1397 if (ret < (int)next_block_ctx->len) {
1319 printk(KERN_INFO 1398 printk(KERN_INFO
1320 "btrfsic: read block @logical %llu failed!\n", 1399 "btrfsic: read block @logical %llu failed!\n",
1321 (unsigned long long)next_bytenr); 1400 (unsigned long long)next_bytenr);
@@ -1340,43 +1419,74 @@ static int btrfsic_handle_extent_data(
1340 u32 item_offset, int force_iodone_flag) 1419 u32 item_offset, int force_iodone_flag)
1341{ 1420{
1342 int ret; 1421 int ret;
1343 struct btrfs_file_extent_item *file_extent_item = 1422 struct btrfs_file_extent_item file_extent_item;
1344 (struct btrfs_file_extent_item *)(block_ctx->data + 1423 u64 file_extent_item_offset;
1345 offsetof(struct btrfs_leaf, 1424 u64 next_bytenr;
1346 items) + item_offset); 1425 u64 num_bytes;
1347 u64 next_bytenr = 1426 u64 generation;
1348 le64_to_cpu(file_extent_item->disk_bytenr) +
1349 le64_to_cpu(file_extent_item->offset);
1350 u64 num_bytes = le64_to_cpu(file_extent_item->num_bytes);
1351 u64 generation = le64_to_cpu(file_extent_item->generation);
1352 struct btrfsic_block_link *l; 1427 struct btrfsic_block_link *l;
1353 1428
1429 file_extent_item_offset = offsetof(struct btrfs_leaf, items) +
1430 item_offset;
1431 if (file_extent_item_offset +
1432 offsetof(struct btrfs_file_extent_item, disk_num_bytes) >
1433 block_ctx->len) {
1434 printk(KERN_INFO
1435 "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1436 block_ctx->start, block_ctx->dev->name);
1437 return -1;
1438 }
1439
1440 btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1441 file_extent_item_offset,
1442 offsetof(struct btrfs_file_extent_item, disk_num_bytes));
1443 if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
1444 ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) {
1445 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1446 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n",
1447 file_extent_item.type,
1448 (unsigned long long)
1449 le64_to_cpu(file_extent_item.disk_bytenr));
1450 return 0;
1451 }
1452
1453 if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
1454 block_ctx->len) {
1455 printk(KERN_INFO
1456 "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1457 block_ctx->start, block_ctx->dev->name);
1458 return -1;
1459 }
1460 btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1461 file_extent_item_offset,
1462 sizeof(struct btrfs_file_extent_item));
1463 next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) +
1464 le64_to_cpu(file_extent_item.offset);
1465 generation = le64_to_cpu(file_extent_item.generation);
1466 num_bytes = le64_to_cpu(file_extent_item.num_bytes);
1467 generation = le64_to_cpu(file_extent_item.generation);
1468
1354 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1469 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1355 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," 1470 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
1356 " offset = %llu, num_bytes = %llu\n", 1471 " offset = %llu, num_bytes = %llu\n",
1357 file_extent_item->type, 1472 file_extent_item.type,
1358 (unsigned long long)
1359 le64_to_cpu(file_extent_item->disk_bytenr),
1360 (unsigned long long)
1361 le64_to_cpu(file_extent_item->offset),
1362 (unsigned long long) 1473 (unsigned long long)
1363 le64_to_cpu(file_extent_item->num_bytes)); 1474 le64_to_cpu(file_extent_item.disk_bytenr),
1364 if (BTRFS_FILE_EXTENT_REG != file_extent_item->type || 1475 (unsigned long long)le64_to_cpu(file_extent_item.offset),
1365 ((u64)0) == le64_to_cpu(file_extent_item->disk_bytenr)) 1476 (unsigned long long)num_bytes);
1366 return 0;
1367 while (num_bytes > 0) { 1477 while (num_bytes > 0) {
1368 u32 chunk_len; 1478 u32 chunk_len;
1369 int num_copies; 1479 int num_copies;
1370 int mirror_num; 1480 int mirror_num;
1371 1481
1372 if (num_bytes > BTRFSIC_BLOCK_SIZE) 1482 if (num_bytes > state->datablock_size)
1373 chunk_len = BTRFSIC_BLOCK_SIZE; 1483 chunk_len = state->datablock_size;
1374 else 1484 else
1375 chunk_len = num_bytes; 1485 chunk_len = num_bytes;
1376 1486
1377 num_copies = 1487 num_copies =
1378 btrfs_num_copies(&state->root->fs_info->mapping_tree, 1488 btrfs_num_copies(&state->root->fs_info->mapping_tree,
1379 next_bytenr, PAGE_SIZE); 1489 next_bytenr, state->datablock_size);
1380 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1490 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1381 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 1491 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1382 (unsigned long long)next_bytenr, num_copies); 1492 (unsigned long long)next_bytenr, num_copies);
@@ -1476,8 +1586,9 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
1476 block_ctx_out->dev_bytenr = multi->stripes[0].physical; 1586 block_ctx_out->dev_bytenr = multi->stripes[0].physical;
1477 block_ctx_out->start = bytenr; 1587 block_ctx_out->start = bytenr;
1478 block_ctx_out->len = len; 1588 block_ctx_out->len = len;
1479 block_ctx_out->data = NULL; 1589 block_ctx_out->datav = NULL;
1480 block_ctx_out->bh = NULL; 1590 block_ctx_out->pagev = NULL;
1591 block_ctx_out->mem_to_free = NULL;
1481 1592
1482 if (0 == ret) 1593 if (0 == ret)
1483 kfree(multi); 1594 kfree(multi);
@@ -1497,8 +1608,9 @@ static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
1497 block_ctx_out->dev_bytenr = bytenr; 1608 block_ctx_out->dev_bytenr = bytenr;
1498 block_ctx_out->start = bytenr; 1609 block_ctx_out->start = bytenr;
1499 block_ctx_out->len = len; 1610 block_ctx_out->len = len;
1500 block_ctx_out->data = NULL; 1611 block_ctx_out->datav = NULL;
1501 block_ctx_out->bh = NULL; 1612 block_ctx_out->pagev = NULL;
1613 block_ctx_out->mem_to_free = NULL;
1502 if (NULL != block_ctx_out->dev) { 1614 if (NULL != block_ctx_out->dev) {
1503 return 0; 1615 return 0;
1504 } else { 1616 } else {
@@ -1509,38 +1621,127 @@ static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
1509 1621
1510static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) 1622static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
1511{ 1623{
1512 if (NULL != block_ctx->bh) { 1624 if (block_ctx->mem_to_free) {
1513 brelse(block_ctx->bh); 1625 unsigned int num_pages;
1514 block_ctx->bh = NULL; 1626
1627 BUG_ON(!block_ctx->datav);
1628 BUG_ON(!block_ctx->pagev);
1629 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1630 PAGE_CACHE_SHIFT;
1631 while (num_pages > 0) {
1632 num_pages--;
1633 if (block_ctx->datav[num_pages]) {
1634 kunmap(block_ctx->pagev[num_pages]);
1635 block_ctx->datav[num_pages] = NULL;
1636 }
1637 if (block_ctx->pagev[num_pages]) {
1638 __free_page(block_ctx->pagev[num_pages]);
1639 block_ctx->pagev[num_pages] = NULL;
1640 }
1641 }
1642
1643 kfree(block_ctx->mem_to_free);
1644 block_ctx->mem_to_free = NULL;
1645 block_ctx->pagev = NULL;
1646 block_ctx->datav = NULL;
1515 } 1647 }
1516} 1648}
1517 1649
1518static int btrfsic_read_block(struct btrfsic_state *state, 1650static int btrfsic_read_block(struct btrfsic_state *state,
1519 struct btrfsic_block_data_ctx *block_ctx) 1651 struct btrfsic_block_data_ctx *block_ctx)
1520{ 1652{
1521 block_ctx->bh = NULL; 1653 unsigned int num_pages;
1522 if (block_ctx->dev_bytenr & 4095) { 1654 unsigned int i;
1655 u64 dev_bytenr;
1656 int ret;
1657
1658 BUG_ON(block_ctx->datav);
1659 BUG_ON(block_ctx->pagev);
1660 BUG_ON(block_ctx->mem_to_free);
1661 if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
1523 printk(KERN_INFO 1662 printk(KERN_INFO
1524 "btrfsic: read_block() with unaligned bytenr %llu\n", 1663 "btrfsic: read_block() with unaligned bytenr %llu\n",
1525 (unsigned long long)block_ctx->dev_bytenr); 1664 (unsigned long long)block_ctx->dev_bytenr);
1526 return -1; 1665 return -1;
1527 } 1666 }
1528 if (block_ctx->len > 4096) { 1667
1529 printk(KERN_INFO 1668 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1530 "btrfsic: read_block() with too huge size %d\n", 1669 PAGE_CACHE_SHIFT;
1531 block_ctx->len); 1670 block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) +
1671 sizeof(*block_ctx->pagev)) *
1672 num_pages, GFP_NOFS);
1673 if (!block_ctx->mem_to_free)
1532 return -1; 1674 return -1;
1675 block_ctx->datav = block_ctx->mem_to_free;
1676 block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
1677 for (i = 0; i < num_pages; i++) {
1678 block_ctx->pagev[i] = alloc_page(GFP_NOFS);
1679 if (!block_ctx->pagev[i])
1680 return -1;
1533 } 1681 }
1534 1682
1535 block_ctx->bh = __bread(block_ctx->dev->bdev, 1683 dev_bytenr = block_ctx->dev_bytenr;
1536 block_ctx->dev_bytenr >> 12, 4096); 1684 for (i = 0; i < num_pages;) {
1537 if (NULL == block_ctx->bh) 1685 struct bio *bio;
1538 return -1; 1686 unsigned int j;
1539 block_ctx->data = block_ctx->bh->b_data; 1687 DECLARE_COMPLETION_ONSTACK(complete);
1688
1689 bio = bio_alloc(GFP_NOFS, num_pages - i);
1690 if (!bio) {
1691 printk(KERN_INFO
1692 "btrfsic: bio_alloc() for %u pages failed!\n",
1693 num_pages - i);
1694 return -1;
1695 }
1696 bio->bi_bdev = block_ctx->dev->bdev;
1697 bio->bi_sector = dev_bytenr >> 9;
1698 bio->bi_end_io = btrfsic_complete_bio_end_io;
1699 bio->bi_private = &complete;
1700
1701 for (j = i; j < num_pages; j++) {
1702 ret = bio_add_page(bio, block_ctx->pagev[j],
1703 PAGE_CACHE_SIZE, 0);
1704 if (PAGE_CACHE_SIZE != ret)
1705 break;
1706 }
1707 if (j == i) {
1708 printk(KERN_INFO
1709 "btrfsic: error, failed to add a single page!\n");
1710 return -1;
1711 }
1712 submit_bio(READ, bio);
1713
1714 /* this will also unplug the queue */
1715 wait_for_completion(&complete);
1716
1717 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
1718 printk(KERN_INFO
1719 "btrfsic: read error at logical %llu dev %s!\n",
1720 block_ctx->start, block_ctx->dev->name);
1721 bio_put(bio);
1722 return -1;
1723 }
1724 bio_put(bio);
1725 dev_bytenr += (j - i) * PAGE_CACHE_SIZE;
1726 i = j;
1727 }
1728 for (i = 0; i < num_pages; i++) {
1729 block_ctx->datav[i] = kmap(block_ctx->pagev[i]);
1730 if (!block_ctx->datav[i]) {
1731 printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n",
1732 block_ctx->dev->name);
1733 return -1;
1734 }
1735 }
1540 1736
1541 return block_ctx->len; 1737 return block_ctx->len;
1542} 1738}
1543 1739
1740static void btrfsic_complete_bio_end_io(struct bio *bio, int err)
1741{
1742 complete((struct completion *)bio->bi_private);
1743}
1744
1544static void btrfsic_dump_database(struct btrfsic_state *state) 1745static void btrfsic_dump_database(struct btrfsic_state *state)
1545{ 1746{
1546 struct list_head *elem_all; 1747 struct list_head *elem_all;
@@ -1618,32 +1819,39 @@ static void btrfsic_dump_database(struct btrfsic_state *state)
1618 * (note that this test fails for the super block) 1819 * (note that this test fails for the super block)
1619 */ 1820 */
1620static int btrfsic_test_for_metadata(struct btrfsic_state *state, 1821static int btrfsic_test_for_metadata(struct btrfsic_state *state,
1621 const u8 *data, unsigned int size) 1822 char **datav, unsigned int num_pages)
1622{ 1823{
1623 struct btrfs_header *h; 1824 struct btrfs_header *h;
1624 u8 csum[BTRFS_CSUM_SIZE]; 1825 u8 csum[BTRFS_CSUM_SIZE];
1625 u32 crc = ~(u32)0; 1826 u32 crc = ~(u32)0;
1626 int fail = 0; 1827 unsigned int i;
1627 int crc_fail = 0;
1628 1828
1629 h = (struct btrfs_header *)data; 1829 if (num_pages * PAGE_CACHE_SIZE < state->metablock_size)
1830 return 1; /* not metadata */
1831 num_pages = state->metablock_size >> PAGE_CACHE_SHIFT;
1832 h = (struct btrfs_header *)datav[0];
1630 1833
1631 if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE)) 1834 if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
1632 fail++; 1835 return 1;
1836
1837 for (i = 0; i < num_pages; i++) {
1838 u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
1839 size_t sublen = i ? PAGE_CACHE_SIZE :
1840 (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
1633 1841
1634 crc = crc32c(crc, data + BTRFS_CSUM_SIZE, PAGE_SIZE - BTRFS_CSUM_SIZE); 1842 crc = crc32c(crc, data, sublen);
1843 }
1635 btrfs_csum_final(crc, csum); 1844 btrfs_csum_final(crc, csum);
1636 if (memcmp(csum, h->csum, state->csum_size)) 1845 if (memcmp(csum, h->csum, state->csum_size))
1637 crc_fail++; 1846 return 1;
1638 1847
1639 return fail || crc_fail; 1848 return 0; /* is metadata */
1640} 1849}
1641 1850
1642static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, 1851static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1643 u64 dev_bytenr, 1852 u64 dev_bytenr, char **mapped_datav,
1644 u8 *mapped_data, unsigned int len, 1853 unsigned int num_pages,
1645 struct bio *bio, 1854 struct bio *bio, int *bio_is_patched,
1646 int *bio_is_patched,
1647 struct buffer_head *bh, 1855 struct buffer_head *bh,
1648 int submit_bio_bh_rw) 1856 int submit_bio_bh_rw)
1649{ 1857{
@@ -1653,12 +1861,19 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1653 int ret; 1861 int ret;
1654 struct btrfsic_state *state = dev_state->state; 1862 struct btrfsic_state *state = dev_state->state;
1655 struct block_device *bdev = dev_state->bdev; 1863 struct block_device *bdev = dev_state->bdev;
1864 unsigned int processed_len;
1656 1865
1657 WARN_ON(len > PAGE_SIZE);
1658 is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_data, len));
1659 if (NULL != bio_is_patched) 1866 if (NULL != bio_is_patched)
1660 *bio_is_patched = 0; 1867 *bio_is_patched = 0;
1661 1868
1869again:
1870 if (num_pages == 0)
1871 return;
1872
1873 processed_len = 0;
1874 is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav,
1875 num_pages));
1876
1662 block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr, 1877 block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
1663 &state->block_hashtable); 1878 &state->block_hashtable);
1664 if (NULL != block) { 1879 if (NULL != block) {
@@ -1668,8 +1883,16 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1668 1883
1669 if (block->is_superblock) { 1884 if (block->is_superblock) {
1670 bytenr = le64_to_cpu(((struct btrfs_super_block *) 1885 bytenr = le64_to_cpu(((struct btrfs_super_block *)
1671 mapped_data)->bytenr); 1886 mapped_datav[0])->bytenr);
1887 if (num_pages * PAGE_CACHE_SIZE <
1888 BTRFS_SUPER_INFO_SIZE) {
1889 printk(KERN_INFO
1890 "btrfsic: cannot work with too short bios!\n");
1891 return;
1892 }
1672 is_metadata = 1; 1893 is_metadata = 1;
1894 BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1));
1895 processed_len = BTRFS_SUPER_INFO_SIZE;
1673 if (state->print_mask & 1896 if (state->print_mask &
1674 BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) { 1897 BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
1675 printk(KERN_INFO 1898 printk(KERN_INFO
@@ -1679,12 +1902,18 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1679 } 1902 }
1680 if (is_metadata) { 1903 if (is_metadata) {
1681 if (!block->is_superblock) { 1904 if (!block->is_superblock) {
1905 if (num_pages * PAGE_CACHE_SIZE <
1906 state->metablock_size) {
1907 printk(KERN_INFO
1908 "btrfsic: cannot work with too short bios!\n");
1909 return;
1910 }
1911 processed_len = state->metablock_size;
1682 bytenr = le64_to_cpu(((struct btrfs_header *) 1912 bytenr = le64_to_cpu(((struct btrfs_header *)
1683 mapped_data)->bytenr); 1913 mapped_datav[0])->bytenr);
1684 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, 1914 btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
1685 dev_state, 1915 dev_state,
1686 dev_bytenr, 1916 dev_bytenr);
1687 mapped_data);
1688 } 1917 }
1689 if (block->logical_bytenr != bytenr) { 1918 if (block->logical_bytenr != bytenr) {
1690 printk(KERN_INFO 1919 printk(KERN_INFO
@@ -1711,6 +1940,13 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1711 block->mirror_num, 1940 block->mirror_num,
1712 btrfsic_get_block_type(state, block)); 1941 btrfsic_get_block_type(state, block));
1713 } else { 1942 } else {
1943 if (num_pages * PAGE_CACHE_SIZE <
1944 state->datablock_size) {
1945 printk(KERN_INFO
1946 "btrfsic: cannot work with too short bios!\n");
1947 return;
1948 }
1949 processed_len = state->datablock_size;
1714 bytenr = block->logical_bytenr; 1950 bytenr = block->logical_bytenr;
1715 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1951 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1716 printk(KERN_INFO 1952 printk(KERN_INFO
@@ -1748,7 +1984,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1748 le64_to_cpu(block->disk_key.offset), 1984 le64_to_cpu(block->disk_key.offset),
1749 (unsigned long long) 1985 (unsigned long long)
1750 le64_to_cpu(((struct btrfs_header *) 1986 le64_to_cpu(((struct btrfs_header *)
1751 mapped_data)->generation), 1987 mapped_datav[0])->generation),
1752 (unsigned long long) 1988 (unsigned long long)
1753 state->max_superblock_generation); 1989 state->max_superblock_generation);
1754 btrfsic_dump_tree(state); 1990 btrfsic_dump_tree(state);
@@ -1766,10 +2002,10 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1766 (unsigned long long)block->generation, 2002 (unsigned long long)block->generation,
1767 (unsigned long long) 2003 (unsigned long long)
1768 le64_to_cpu(((struct btrfs_header *) 2004 le64_to_cpu(((struct btrfs_header *)
1769 mapped_data)->generation)); 2005 mapped_datav[0])->generation));
1770 /* it would not be safe to go on */ 2006 /* it would not be safe to go on */
1771 btrfsic_dump_tree(state); 2007 btrfsic_dump_tree(state);
1772 return; 2008 goto continue_loop;
1773 } 2009 }
1774 2010
1775 /* 2011 /*
@@ -1797,18 +2033,19 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1797 } 2033 }
1798 2034
1799 if (block->is_superblock) 2035 if (block->is_superblock)
1800 ret = btrfsic_map_superblock(state, bytenr, len, 2036 ret = btrfsic_map_superblock(state, bytenr,
2037 processed_len,
1801 bdev, &block_ctx); 2038 bdev, &block_ctx);
1802 else 2039 else
1803 ret = btrfsic_map_block(state, bytenr, len, 2040 ret = btrfsic_map_block(state, bytenr, processed_len,
1804 &block_ctx, 0); 2041 &block_ctx, 0);
1805 if (ret) { 2042 if (ret) {
1806 printk(KERN_INFO 2043 printk(KERN_INFO
1807 "btrfsic: btrfsic_map_block(root @%llu)" 2044 "btrfsic: btrfsic_map_block(root @%llu)"
1808 " failed!\n", (unsigned long long)bytenr); 2045 " failed!\n", (unsigned long long)bytenr);
1809 return; 2046 goto continue_loop;
1810 } 2047 }
1811 block_ctx.data = mapped_data; 2048 block_ctx.datav = mapped_datav;
1812 /* the following is required in case of writes to mirrors, 2049 /* the following is required in case of writes to mirrors,
1813 * use the same that was used for the lookup */ 2050 * use the same that was used for the lookup */
1814 block_ctx.dev = dev_state; 2051 block_ctx.dev = dev_state;
@@ -1864,11 +2101,13 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1864 block->logical_bytenr = bytenr; 2101 block->logical_bytenr = bytenr;
1865 block->is_metadata = 1; 2102 block->is_metadata = 1;
1866 if (block->is_superblock) { 2103 if (block->is_superblock) {
2104 BUG_ON(PAGE_CACHE_SIZE !=
2105 BTRFS_SUPER_INFO_SIZE);
1867 ret = btrfsic_process_written_superblock( 2106 ret = btrfsic_process_written_superblock(
1868 state, 2107 state,
1869 block, 2108 block,
1870 (struct btrfs_super_block *) 2109 (struct btrfs_super_block *)
1871 mapped_data); 2110 mapped_datav[0]);
1872 if (state->print_mask & 2111 if (state->print_mask &
1873 BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) { 2112 BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) {
1874 printk(KERN_INFO 2113 printk(KERN_INFO
@@ -1881,8 +2120,6 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1881 state, 2120 state,
1882 block, 2121 block,
1883 &block_ctx, 2122 &block_ctx,
1884 (struct btrfs_header *)
1885 block_ctx.data,
1886 0, 0); 2123 0, 0);
1887 } 2124 }
1888 if (ret) 2125 if (ret)
@@ -1913,26 +2150,30 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1913 u64 bytenr; 2150 u64 bytenr;
1914 2151
1915 if (!is_metadata) { 2152 if (!is_metadata) {
2153 processed_len = state->datablock_size;
1916 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2154 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1917 printk(KERN_INFO "Written block (%s/%llu/?)" 2155 printk(KERN_INFO "Written block (%s/%llu/?)"
1918 " !found in hash table, D.\n", 2156 " !found in hash table, D.\n",
1919 dev_state->name, 2157 dev_state->name,
1920 (unsigned long long)dev_bytenr); 2158 (unsigned long long)dev_bytenr);
1921 if (!state->include_extent_data) 2159 if (!state->include_extent_data) {
1922 return; /* ignore that written D block */ 2160 /* ignore that written D block */
2161 goto continue_loop;
2162 }
1923 2163
1924 /* this is getting ugly for the 2164 /* this is getting ugly for the
1925 * include_extent_data case... */ 2165 * include_extent_data case... */
1926 bytenr = 0; /* unknown */ 2166 bytenr = 0; /* unknown */
1927 block_ctx.start = bytenr; 2167 block_ctx.start = bytenr;
1928 block_ctx.len = len; 2168 block_ctx.len = processed_len;
1929 block_ctx.bh = NULL; 2169 block_ctx.mem_to_free = NULL;
2170 block_ctx.pagev = NULL;
1930 } else { 2171 } else {
2172 processed_len = state->metablock_size;
1931 bytenr = le64_to_cpu(((struct btrfs_header *) 2173 bytenr = le64_to_cpu(((struct btrfs_header *)
1932 mapped_data)->bytenr); 2174 mapped_datav[0])->bytenr);
1933 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, 2175 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
1934 dev_bytenr, 2176 dev_bytenr);
1935 mapped_data);
1936 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2177 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1937 printk(KERN_INFO 2178 printk(KERN_INFO
1938 "Written block @%llu (%s/%llu/?)" 2179 "Written block @%llu (%s/%llu/?)"
@@ -1941,17 +2182,17 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1941 dev_state->name, 2182 dev_state->name,
1942 (unsigned long long)dev_bytenr); 2183 (unsigned long long)dev_bytenr);
1943 2184
1944 ret = btrfsic_map_block(state, bytenr, len, &block_ctx, 2185 ret = btrfsic_map_block(state, bytenr, processed_len,
1945 0); 2186 &block_ctx, 0);
1946 if (ret) { 2187 if (ret) {
1947 printk(KERN_INFO 2188 printk(KERN_INFO
1948 "btrfsic: btrfsic_map_block(root @%llu)" 2189 "btrfsic: btrfsic_map_block(root @%llu)"
1949 " failed!\n", 2190 " failed!\n",
1950 (unsigned long long)dev_bytenr); 2191 (unsigned long long)dev_bytenr);
1951 return; 2192 goto continue_loop;
1952 } 2193 }
1953 } 2194 }
1954 block_ctx.data = mapped_data; 2195 block_ctx.datav = mapped_datav;
1955 /* the following is required in case of writes to mirrors, 2196 /* the following is required in case of writes to mirrors,
1956 * use the same that was used for the lookup */ 2197 * use the same that was used for the lookup */
1957 block_ctx.dev = dev_state; 2198 block_ctx.dev = dev_state;
@@ -1961,7 +2202,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1961 if (NULL == block) { 2202 if (NULL == block) {
1962 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 2203 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
1963 btrfsic_release_block_ctx(&block_ctx); 2204 btrfsic_release_block_ctx(&block_ctx);
1964 return; 2205 goto continue_loop;
1965 } 2206 }
1966 block->dev_state = dev_state; 2207 block->dev_state = dev_state;
1967 block->dev_bytenr = dev_bytenr; 2208 block->dev_bytenr = dev_bytenr;
@@ -2021,9 +2262,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
2021 2262
2022 if (is_metadata) { 2263 if (is_metadata) {
2023 ret = btrfsic_process_metablock(state, block, 2264 ret = btrfsic_process_metablock(state, block,
2024 &block_ctx, 2265 &block_ctx, 0, 0);
2025 (struct btrfs_header *)
2026 block_ctx.data, 0, 0);
2027 if (ret) 2266 if (ret)
2028 printk(KERN_INFO 2267 printk(KERN_INFO
2029 "btrfsic: process_metablock(root @%llu)" 2268 "btrfsic: process_metablock(root @%llu)"
@@ -2032,6 +2271,13 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
2032 } 2271 }
2033 btrfsic_release_block_ctx(&block_ctx); 2272 btrfsic_release_block_ctx(&block_ctx);
2034 } 2273 }
2274
2275continue_loop:
2276 BUG_ON(!processed_len);
2277 dev_bytenr += processed_len;
2278 mapped_datav += processed_len >> PAGE_CACHE_SHIFT;
2279 num_pages -= processed_len >> PAGE_CACHE_SHIFT;
2280 goto again;
2035} 2281}
2036 2282
2037static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) 2283static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
@@ -2214,7 +2460,7 @@ static int btrfsic_process_written_superblock(
2214 2460
2215 num_copies = 2461 num_copies =
2216 btrfs_num_copies(&state->root->fs_info->mapping_tree, 2462 btrfs_num_copies(&state->root->fs_info->mapping_tree,
2217 next_bytenr, PAGE_SIZE); 2463 next_bytenr, BTRFS_SUPER_INFO_SIZE);
2218 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 2464 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
2219 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 2465 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
2220 (unsigned long long)next_bytenr, num_copies); 2466 (unsigned long long)next_bytenr, num_copies);
@@ -2225,7 +2471,8 @@ static int btrfsic_process_written_superblock(
2225 printk(KERN_INFO 2471 printk(KERN_INFO
2226 "btrfsic_process_written_superblock(" 2472 "btrfsic_process_written_superblock("
2227 "mirror_num=%d)\n", mirror_num); 2473 "mirror_num=%d)\n", mirror_num);
2228 ret = btrfsic_map_block(state, next_bytenr, PAGE_SIZE, 2474 ret = btrfsic_map_block(state, next_bytenr,
2475 BTRFS_SUPER_INFO_SIZE,
2229 &tmp_next_block_ctx, 2476 &tmp_next_block_ctx,
2230 mirror_num); 2477 mirror_num);
2231 if (ret) { 2478 if (ret) {
@@ -2690,7 +2937,7 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add(
2690static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, 2937static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2691 u64 bytenr, 2938 u64 bytenr,
2692 struct btrfsic_dev_state *dev_state, 2939 struct btrfsic_dev_state *dev_state,
2693 u64 dev_bytenr, char *data) 2940 u64 dev_bytenr)
2694{ 2941{
2695 int num_copies; 2942 int num_copies;
2696 int mirror_num; 2943 int mirror_num;
@@ -2699,10 +2946,10 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2699 int match = 0; 2946 int match = 0;
2700 2947
2701 num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree, 2948 num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree,
2702 bytenr, PAGE_SIZE); 2949 bytenr, state->metablock_size);
2703 2950
2704 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2951 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2705 ret = btrfsic_map_block(state, bytenr, PAGE_SIZE, 2952 ret = btrfsic_map_block(state, bytenr, state->metablock_size,
2706 &block_ctx, mirror_num); 2953 &block_ctx, mirror_num);
2707 if (ret) { 2954 if (ret) {
2708 printk(KERN_INFO "btrfsic:" 2955 printk(KERN_INFO "btrfsic:"
@@ -2728,7 +2975,8 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2728 (unsigned long long)bytenr, dev_state->name, 2975 (unsigned long long)bytenr, dev_state->name,
2729 (unsigned long long)dev_bytenr); 2976 (unsigned long long)dev_bytenr);
2730 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2977 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2731 ret = btrfsic_map_block(state, bytenr, PAGE_SIZE, 2978 ret = btrfsic_map_block(state, bytenr,
2979 state->metablock_size,
2732 &block_ctx, mirror_num); 2980 &block_ctx, mirror_num);
2733 if (ret) 2981 if (ret)
2734 continue; 2982 continue;
@@ -2782,13 +3030,13 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh)
2782 (unsigned long)bh->b_size, bh->b_data, 3030 (unsigned long)bh->b_size, bh->b_data,
2783 bh->b_bdev); 3031 bh->b_bdev);
2784 btrfsic_process_written_block(dev_state, dev_bytenr, 3032 btrfsic_process_written_block(dev_state, dev_bytenr,
2785 bh->b_data, bh->b_size, NULL, 3033 &bh->b_data, 1, NULL,
2786 NULL, bh, rw); 3034 NULL, bh, rw);
2787 } else if (NULL != dev_state && (rw & REQ_FLUSH)) { 3035 } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
2788 if (dev_state->state->print_mask & 3036 if (dev_state->state->print_mask &
2789 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3037 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2790 printk(KERN_INFO 3038 printk(KERN_INFO
2791 "submit_bh(rw=0x%x) FLUSH, bdev=%p)\n", 3039 "submit_bh(rw=0x%x FLUSH, bdev=%p)\n",
2792 rw, bh->b_bdev); 3040 rw, bh->b_bdev);
2793 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { 3041 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
2794 if ((dev_state->state->print_mask & 3042 if ((dev_state->state->print_mask &
@@ -2837,6 +3085,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
2837 unsigned int i; 3085 unsigned int i;
2838 u64 dev_bytenr; 3086 u64 dev_bytenr;
2839 int bio_is_patched; 3087 int bio_is_patched;
3088 char **mapped_datav;
2840 3089
2841 dev_bytenr = 512 * bio->bi_sector; 3090 dev_bytenr = 512 * bio->bi_sector;
2842 bio_is_patched = 0; 3091 bio_is_patched = 0;
@@ -2849,35 +3098,46 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
2849 (unsigned long long)dev_bytenr, 3098 (unsigned long long)dev_bytenr,
2850 bio->bi_bdev); 3099 bio->bi_bdev);
2851 3100
3101 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
3102 GFP_NOFS);
3103 if (!mapped_datav)
3104 goto leave;
2852 for (i = 0; i < bio->bi_vcnt; i++) { 3105 for (i = 0; i < bio->bi_vcnt; i++) {
2853 u8 *mapped_data; 3106 BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
2854 3107 mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
2855 mapped_data = kmap(bio->bi_io_vec[i].bv_page); 3108 if (!mapped_datav[i]) {
3109 while (i > 0) {
3110 i--;
3111 kunmap(bio->bi_io_vec[i].bv_page);
3112 }
3113 kfree(mapped_datav);
3114 goto leave;
3115 }
2856 if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3116 if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
2857 BTRFSIC_PRINT_MASK_VERBOSE) == 3117 BTRFSIC_PRINT_MASK_VERBOSE) ==
2858 (dev_state->state->print_mask & 3118 (dev_state->state->print_mask &
2859 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3119 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
2860 BTRFSIC_PRINT_MASK_VERBOSE))) 3120 BTRFSIC_PRINT_MASK_VERBOSE)))
2861 printk(KERN_INFO 3121 printk(KERN_INFO
2862 "#%u: page=%p, mapped=%p, len=%u," 3122 "#%u: page=%p, len=%u, offset=%u\n",
2863 " offset=%u\n",
2864 i, bio->bi_io_vec[i].bv_page, 3123 i, bio->bi_io_vec[i].bv_page,
2865 mapped_data,
2866 bio->bi_io_vec[i].bv_len, 3124 bio->bi_io_vec[i].bv_len,
2867 bio->bi_io_vec[i].bv_offset); 3125 bio->bi_io_vec[i].bv_offset);
2868 btrfsic_process_written_block(dev_state, dev_bytenr, 3126 }
2869 mapped_data, 3127 btrfsic_process_written_block(dev_state, dev_bytenr,
2870 bio->bi_io_vec[i].bv_len, 3128 mapped_datav, bio->bi_vcnt,
2871 bio, &bio_is_patched, 3129 bio, &bio_is_patched,
2872 NULL, rw); 3130 NULL, rw);
3131 while (i > 0) {
3132 i--;
2873 kunmap(bio->bi_io_vec[i].bv_page); 3133 kunmap(bio->bi_io_vec[i].bv_page);
2874 dev_bytenr += bio->bi_io_vec[i].bv_len;
2875 } 3134 }
3135 kfree(mapped_datav);
2876 } else if (NULL != dev_state && (rw & REQ_FLUSH)) { 3136 } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
2877 if (dev_state->state->print_mask & 3137 if (dev_state->state->print_mask &
2878 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3138 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2879 printk(KERN_INFO 3139 printk(KERN_INFO
2880 "submit_bio(rw=0x%x) FLUSH, bdev=%p)\n", 3140 "submit_bio(rw=0x%x FLUSH, bdev=%p)\n",
2881 rw, bio->bi_bdev); 3141 rw, bio->bi_bdev);
2882 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { 3142 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
2883 if ((dev_state->state->print_mask & 3143 if ((dev_state->state->print_mask &
@@ -2904,6 +3164,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
2904 bio->bi_end_io = btrfsic_bio_end_io; 3164 bio->bi_end_io = btrfsic_bio_end_io;
2905 } 3165 }
2906 } 3166 }
3167leave:
2907 mutex_unlock(&btrfsic_mutex); 3168 mutex_unlock(&btrfsic_mutex);
2908 3169
2909 submit_bio(rw, bio); 3170 submit_bio(rw, bio);
@@ -2918,6 +3179,30 @@ int btrfsic_mount(struct btrfs_root *root,
2918 struct list_head *dev_head = &fs_devices->devices; 3179 struct list_head *dev_head = &fs_devices->devices;
2919 struct btrfs_device *device; 3180 struct btrfs_device *device;
2920 3181
3182 if (root->nodesize != root->leafsize) {
3183 printk(KERN_INFO
3184 "btrfsic: cannot handle nodesize %d != leafsize %d!\n",
3185 root->nodesize, root->leafsize);
3186 return -1;
3187 }
3188 if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
3189 printk(KERN_INFO
3190 "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3191 root->nodesize, (unsigned long)PAGE_CACHE_SIZE);
3192 return -1;
3193 }
3194 if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3195 printk(KERN_INFO
3196 "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3197 root->leafsize, (unsigned long)PAGE_CACHE_SIZE);
3198 return -1;
3199 }
3200 if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3201 printk(KERN_INFO
3202 "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3203 root->sectorsize, (unsigned long)PAGE_CACHE_SIZE);
3204 return -1;
3205 }
2921 state = kzalloc(sizeof(*state), GFP_NOFS); 3206 state = kzalloc(sizeof(*state), GFP_NOFS);
2922 if (NULL == state) { 3207 if (NULL == state) {
2923 printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n"); 3208 printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n");
@@ -2934,6 +3219,8 @@ int btrfsic_mount(struct btrfs_root *root,
2934 state->print_mask = print_mask; 3219 state->print_mask = print_mask;
2935 state->include_extent_data = including_extent_data; 3220 state->include_extent_data = including_extent_data;
2936 state->csum_size = 0; 3221 state->csum_size = 0;
3222 state->metablock_size = root->nodesize;
3223 state->datablock_size = root->sectorsize;
2937 INIT_LIST_HEAD(&state->all_blocks_list); 3224 INIT_LIST_HEAD(&state->all_blocks_list);
2938 btrfsic_block_hashtable_init(&state->block_hashtable); 3225 btrfsic_block_hashtable_init(&state->block_hashtable);
2939 btrfsic_block_link_hashtable_init(&state->block_link_hashtable); 3226 btrfsic_block_link_hashtable_init(&state->block_link_hashtable);
@@ -3050,7 +3337,7 @@ void btrfsic_unmount(struct btrfs_root *root,
3050 btrfsic_block_link_free(l); 3337 btrfsic_block_link_free(l);
3051 } 3338 }
3052 3339
3053 if (b_all->is_iodone) 3340 if (b_all->is_iodone || b_all->never_written)
3054 btrfsic_block_free(b_all); 3341 btrfsic_block_free(b_all);
3055 else 3342 else
3056 printk(KERN_INFO "btrfs: attempt to free %c-block" 3343 printk(KERN_INFO "btrfs: attempt to free %c-block"
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 646f5e6f256..86eff48dab7 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -120,10 +120,10 @@ static int check_compressed_csum(struct inode *inode,
120 page = cb->compressed_pages[i]; 120 page = cb->compressed_pages[i];
121 csum = ~(u32)0; 121 csum = ~(u32)0;
122 122
123 kaddr = kmap_atomic(page, KM_USER0); 123 kaddr = kmap_atomic(page);
124 csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE); 124 csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE);
125 btrfs_csum_final(csum, (char *)&csum); 125 btrfs_csum_final(csum, (char *)&csum);
126 kunmap_atomic(kaddr, KM_USER0); 126 kunmap_atomic(kaddr);
127 127
128 if (csum != *cb_sum) { 128 if (csum != *cb_sum) {
129 printk(KERN_INFO "btrfs csum failed ino %llu " 129 printk(KERN_INFO "btrfs csum failed ino %llu "
@@ -521,10 +521,10 @@ static noinline int add_ra_bio_pages(struct inode *inode,
521 if (zero_offset) { 521 if (zero_offset) {
522 int zeros; 522 int zeros;
523 zeros = PAGE_CACHE_SIZE - zero_offset; 523 zeros = PAGE_CACHE_SIZE - zero_offset;
524 userpage = kmap_atomic(page, KM_USER0); 524 userpage = kmap_atomic(page);
525 memset(userpage + zero_offset, 0, zeros); 525 memset(userpage + zero_offset, 0, zeros);
526 flush_dcache_page(page); 526 flush_dcache_page(page);
527 kunmap_atomic(userpage, KM_USER0); 527 kunmap_atomic(userpage);
528 } 528 }
529 } 529 }
530 530
@@ -993,9 +993,9 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
993 bytes = min(PAGE_CACHE_SIZE - *pg_offset, 993 bytes = min(PAGE_CACHE_SIZE - *pg_offset,
994 PAGE_CACHE_SIZE - buf_offset); 994 PAGE_CACHE_SIZE - buf_offset);
995 bytes = min(bytes, working_bytes); 995 bytes = min(bytes, working_bytes);
996 kaddr = kmap_atomic(page_out, KM_USER0); 996 kaddr = kmap_atomic(page_out);
997 memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); 997 memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
998 kunmap_atomic(kaddr, KM_USER0); 998 kunmap_atomic(kaddr);
999 flush_dcache_page(page_out); 999 flush_dcache_page(page_out);
1000 1000
1001 *pg_offset += bytes; 1001 *pg_offset += bytes;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index b4534d918e4..d7a96cfdc50 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1390,7 +1390,11 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1390 if (!cur) 1390 if (!cur)
1391 return -EIO; 1391 return -EIO;
1392 } else if (!uptodate) { 1392 } else if (!uptodate) {
1393 btrfs_read_buffer(cur, gen); 1393 err = btrfs_read_buffer(cur, gen);
1394 if (err) {
1395 free_extent_buffer(cur);
1396 return err;
1397 }
1394 } 1398 }
1395 } 1399 }
1396 if (search_start == 0) 1400 if (search_start == 0)
@@ -1505,20 +1509,18 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
1505static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, 1509static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
1506 int level, int *slot) 1510 int level, int *slot)
1507{ 1511{
1508 if (level == 0) { 1512 if (level == 0)
1509 return generic_bin_search(eb, 1513 return generic_bin_search(eb,
1510 offsetof(struct btrfs_leaf, items), 1514 offsetof(struct btrfs_leaf, items),
1511 sizeof(struct btrfs_item), 1515 sizeof(struct btrfs_item),
1512 key, btrfs_header_nritems(eb), 1516 key, btrfs_header_nritems(eb),
1513 slot); 1517 slot);
1514 } else { 1518 else
1515 return generic_bin_search(eb, 1519 return generic_bin_search(eb,
1516 offsetof(struct btrfs_node, ptrs), 1520 offsetof(struct btrfs_node, ptrs),
1517 sizeof(struct btrfs_key_ptr), 1521 sizeof(struct btrfs_key_ptr),
1518 key, btrfs_header_nritems(eb), 1522 key, btrfs_header_nritems(eb),
1519 slot); 1523 slot);
1520 }
1521 return -1;
1522} 1524}
1523 1525
1524int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 1526int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f5f11a6c5e9..0151ca1ac65 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -173,6 +173,9 @@ static int btrfs_csum_sizes[] = { 4, 0 };
173#define BTRFS_FT_XATTR 8 173#define BTRFS_FT_XATTR 8
174#define BTRFS_FT_MAX 9 174#define BTRFS_FT_MAX 9
175 175
176/* ioprio of readahead is set to idle */
177#define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0))
178
176/* 179/*
177 * The key defines the order in the tree, and so it also defines (optimal) 180 * The key defines the order in the tree, and so it also defines (optimal)
178 * block layout. 181 * block layout.
@@ -823,6 +826,14 @@ struct btrfs_csum_item {
823 u8 csum; 826 u8 csum;
824} __attribute__ ((__packed__)); 827} __attribute__ ((__packed__));
825 828
829struct btrfs_dev_stats_item {
830 /*
831 * grow this item struct at the end for future enhancements and keep
832 * the existing values unchanged
833 */
834 __le64 values[BTRFS_DEV_STAT_VALUES_MAX];
835} __attribute__ ((__packed__));
836
826/* different types of block groups (and chunks) */ 837/* different types of block groups (and chunks) */
827#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) 838#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0)
828#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) 839#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1)
@@ -1384,7 +1395,7 @@ struct btrfs_root {
1384 struct list_head root_list; 1395 struct list_head root_list;
1385 1396
1386 spinlock_t orphan_lock; 1397 spinlock_t orphan_lock;
1387 struct list_head orphan_list; 1398 atomic_t orphan_inodes;
1388 struct btrfs_block_rsv *orphan_block_rsv; 1399 struct btrfs_block_rsv *orphan_block_rsv;
1389 int orphan_item_inserted; 1400 int orphan_item_inserted;
1390 int orphan_cleanup_state; 1401 int orphan_cleanup_state;
@@ -1517,6 +1528,12 @@ struct btrfs_ioctl_defrag_range_args {
1517#define BTRFS_BALANCE_ITEM_KEY 248 1528#define BTRFS_BALANCE_ITEM_KEY 248
1518 1529
1519/* 1530/*
1531 * Persistantly stores the io stats in the device tree.
1532 * One key for all stats, (0, BTRFS_DEV_STATS_KEY, devid).
1533 */
1534#define BTRFS_DEV_STATS_KEY 249
1535
1536/*
1520 * string items are for debugging. They just store a short string of 1537 * string items are for debugging. They just store a short string of
1521 * data in the FS 1538 * data in the FS
1522 */ 1539 */
@@ -2175,7 +2192,7 @@ BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,
2175 2192
2176static inline bool btrfs_root_readonly(struct btrfs_root *root) 2193static inline bool btrfs_root_readonly(struct btrfs_root *root)
2177{ 2194{
2178 return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY; 2195 return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0;
2179} 2196}
2180 2197
2181/* struct btrfs_root_backup */ 2198/* struct btrfs_root_backup */
@@ -2424,6 +2441,30 @@ static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
2424 return btrfs_item_size(eb, e) - offset; 2441 return btrfs_item_size(eb, e) - offset;
2425} 2442}
2426 2443
2444/* btrfs_dev_stats_item */
2445static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb,
2446 struct btrfs_dev_stats_item *ptr,
2447 int index)
2448{
2449 u64 val;
2450
2451 read_extent_buffer(eb, &val,
2452 offsetof(struct btrfs_dev_stats_item, values) +
2453 ((unsigned long)ptr) + (index * sizeof(u64)),
2454 sizeof(val));
2455 return val;
2456}
2457
2458static inline void btrfs_set_dev_stats_value(struct extent_buffer *eb,
2459 struct btrfs_dev_stats_item *ptr,
2460 int index, u64 val)
2461{
2462 write_extent_buffer(eb, &val,
2463 offsetof(struct btrfs_dev_stats_item, values) +
2464 ((unsigned long)ptr) + (index * sizeof(u64)),
2465 sizeof(val));
2466}
2467
2427static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) 2468static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
2428{ 2469{
2429 return sb->s_fs_info; 2470 return sb->s_fs_info;
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 03e3748d84d..c18d0442ae6 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -669,8 +669,8 @@ static int btrfs_delayed_inode_reserve_metadata(
669 return ret; 669 return ret;
670 } else if (src_rsv == &root->fs_info->delalloc_block_rsv) { 670 } else if (src_rsv == &root->fs_info->delalloc_block_rsv) {
671 spin_lock(&BTRFS_I(inode)->lock); 671 spin_lock(&BTRFS_I(inode)->lock);
672 if (BTRFS_I(inode)->delalloc_meta_reserved) { 672 if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
673 BTRFS_I(inode)->delalloc_meta_reserved = 0; 673 &BTRFS_I(inode)->runtime_flags)) {
674 spin_unlock(&BTRFS_I(inode)->lock); 674 spin_unlock(&BTRFS_I(inode)->lock);
675 release = true; 675 release = true;
676 goto migrate; 676 goto migrate;
@@ -1706,7 +1706,7 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1706 btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode)); 1706 btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1707 btrfs_set_stack_inode_generation(inode_item, 1707 btrfs_set_stack_inode_generation(inode_item,
1708 BTRFS_I(inode)->generation); 1708 BTRFS_I(inode)->generation);
1709 btrfs_set_stack_inode_sequence(inode_item, BTRFS_I(inode)->sequence); 1709 btrfs_set_stack_inode_sequence(inode_item, inode->i_version);
1710 btrfs_set_stack_inode_transid(inode_item, trans->transid); 1710 btrfs_set_stack_inode_transid(inode_item, trans->transid);
1711 btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); 1711 btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1712 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); 1712 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
@@ -1754,7 +1754,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1754 set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); 1754 set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1755 inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); 1755 inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1756 BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); 1756 BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1757 BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item); 1757 inode->i_version = btrfs_stack_inode_sequence(inode_item);
1758 inode->i_rdev = 0; 1758 inode->i_rdev = 0;
1759 *rdev = btrfs_stack_inode_rdev(inode_item); 1759 *rdev = btrfs_stack_inode_rdev(inode_item);
1760 BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); 1760 BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f51ad8477f1..b99d5127ba1 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1153,7 +1153,6 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1153 root->orphan_block_rsv = NULL; 1153 root->orphan_block_rsv = NULL;
1154 1154
1155 INIT_LIST_HEAD(&root->dirty_list); 1155 INIT_LIST_HEAD(&root->dirty_list);
1156 INIT_LIST_HEAD(&root->orphan_list);
1157 INIT_LIST_HEAD(&root->root_list); 1156 INIT_LIST_HEAD(&root->root_list);
1158 spin_lock_init(&root->orphan_lock); 1157 spin_lock_init(&root->orphan_lock);
1159 spin_lock_init(&root->inode_lock); 1158 spin_lock_init(&root->inode_lock);
@@ -1166,6 +1165,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1166 atomic_set(&root->log_commit[0], 0); 1165 atomic_set(&root->log_commit[0], 0);
1167 atomic_set(&root->log_commit[1], 0); 1166 atomic_set(&root->log_commit[1], 0);
1168 atomic_set(&root->log_writers, 0); 1167 atomic_set(&root->log_writers, 0);
1168 atomic_set(&root->orphan_inodes, 0);
1169 root->log_batch = 0; 1169 root->log_batch = 0;
1170 root->log_transid = 0; 1170 root->log_transid = 0;
1171 root->last_log_commit = 0; 1171 root->last_log_commit = 0;
@@ -2006,7 +2006,8 @@ int open_ctree(struct super_block *sb,
2006 BTRFS_I(fs_info->btree_inode)->root = tree_root; 2006 BTRFS_I(fs_info->btree_inode)->root = tree_root;
2007 memset(&BTRFS_I(fs_info->btree_inode)->location, 0, 2007 memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
2008 sizeof(struct btrfs_key)); 2008 sizeof(struct btrfs_key));
2009 BTRFS_I(fs_info->btree_inode)->dummy_inode = 1; 2009 set_bit(BTRFS_INODE_DUMMY,
2010 &BTRFS_I(fs_info->btree_inode)->runtime_flags);
2010 insert_inode_hash(fs_info->btree_inode); 2011 insert_inode_hash(fs_info->btree_inode);
2011 2012
2012 spin_lock_init(&fs_info->block_group_cache_lock); 2013 spin_lock_init(&fs_info->block_group_cache_lock);
@@ -2358,6 +2359,13 @@ retry_root_backup:
2358 fs_info->generation = generation; 2359 fs_info->generation = generation;
2359 fs_info->last_trans_committed = generation; 2360 fs_info->last_trans_committed = generation;
2360 2361
2362 ret = btrfs_init_dev_stats(fs_info);
2363 if (ret) {
2364 printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
2365 ret);
2366 goto fail_block_groups;
2367 }
2368
2361 ret = btrfs_init_space_info(fs_info); 2369 ret = btrfs_init_space_info(fs_info);
2362 if (ret) { 2370 if (ret) {
2363 printk(KERN_ERR "Failed to initial space info: %d\n", ret); 2371 printk(KERN_ERR "Failed to initial space info: %d\n", ret);
@@ -2561,18 +2569,19 @@ recovery_tree_root:
2561 2569
2562static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) 2570static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
2563{ 2571{
2564 char b[BDEVNAME_SIZE];
2565
2566 if (uptodate) { 2572 if (uptodate) {
2567 set_buffer_uptodate(bh); 2573 set_buffer_uptodate(bh);
2568 } else { 2574 } else {
2575 struct btrfs_device *device = (struct btrfs_device *)
2576 bh->b_private;
2577
2569 printk_ratelimited(KERN_WARNING "lost page write due to " 2578 printk_ratelimited(KERN_WARNING "lost page write due to "
2570 "I/O error on %s\n", 2579 "I/O error on %s\n", device->name);
2571 bdevname(bh->b_bdev, b));
2572 /* note, we dont' set_buffer_write_io_error because we have 2580 /* note, we dont' set_buffer_write_io_error because we have
2573 * our own ways of dealing with the IO errors 2581 * our own ways of dealing with the IO errors
2574 */ 2582 */
2575 clear_buffer_uptodate(bh); 2583 clear_buffer_uptodate(bh);
2584 btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS);
2576 } 2585 }
2577 unlock_buffer(bh); 2586 unlock_buffer(bh);
2578 put_bh(bh); 2587 put_bh(bh);
@@ -2687,6 +2696,7 @@ static int write_dev_supers(struct btrfs_device *device,
2687 set_buffer_uptodate(bh); 2696 set_buffer_uptodate(bh);
2688 lock_buffer(bh); 2697 lock_buffer(bh);
2689 bh->b_end_io = btrfs_end_buffer_write_sync; 2698 bh->b_end_io = btrfs_end_buffer_write_sync;
2699 bh->b_private = device;
2690 } 2700 }
2691 2701
2692 /* 2702 /*
@@ -2745,6 +2755,9 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
2745 } 2755 }
2746 if (!bio_flagged(bio, BIO_UPTODATE)) { 2756 if (!bio_flagged(bio, BIO_UPTODATE)) {
2747 ret = -EIO; 2757 ret = -EIO;
2758 if (!bio_flagged(bio, BIO_EOPNOTSUPP))
2759 btrfs_dev_stat_inc_and_print(device,
2760 BTRFS_DEV_STAT_FLUSH_ERRS);
2748 } 2761 }
2749 2762
2750 /* drop the reference from the wait == 0 run */ 2763 /* drop the reference from the wait == 0 run */
@@ -2907,19 +2920,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
2907 return ret; 2920 return ret;
2908} 2921}
2909 2922
2910/* Kill all outstanding I/O */
2911void btrfs_abort_devices(struct btrfs_root *root)
2912{
2913 struct list_head *head;
2914 struct btrfs_device *dev;
2915 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
2916 head = &root->fs_info->fs_devices->devices;
2917 list_for_each_entry_rcu(dev, head, dev_list) {
2918 blk_abort_queue(dev->bdev->bd_disk->queue);
2919 }
2920 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
2921}
2922
2923void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 2923void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2924{ 2924{
2925 spin_lock(&fs_info->fs_roots_radix_lock); 2925 spin_lock(&fs_info->fs_roots_radix_lock);
@@ -3676,17 +3676,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
3676 return 0; 3676 return 0;
3677} 3677}
3678 3678
3679static int btree_writepage_io_failed_hook(struct bio *bio, struct page *page,
3680 u64 start, u64 end,
3681 struct extent_state *state)
3682{
3683 struct super_block *sb = page->mapping->host->i_sb;
3684 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
3685 btrfs_error(fs_info, -EIO,
3686 "Error occured while writing out btree at %llu", start);
3687 return -EIO;
3688}
3689
3690static struct extent_io_ops btree_extent_io_ops = { 3679static struct extent_io_ops btree_extent_io_ops = {
3691 .write_cache_pages_lock_hook = btree_lock_page_hook, 3680 .write_cache_pages_lock_hook = btree_lock_page_hook,
3692 .readpage_end_io_hook = btree_readpage_end_io_hook, 3681 .readpage_end_io_hook = btree_readpage_end_io_hook,
@@ -3694,5 +3683,4 @@ static struct extent_io_ops btree_extent_io_ops = {
3694 .submit_bio_hook = btree_submit_bio_hook, 3683 .submit_bio_hook = btree_submit_bio_hook,
3695 /* note we're sharing with inode.c for the merge bio hook */ 3684 /* note we're sharing with inode.c for the merge bio hook */
3696 .merge_bio_hook = btrfs_merge_bio_hook, 3685 .merge_bio_hook = btrfs_merge_bio_hook,
3697 .writepage_io_failed_hook = btree_writepage_io_failed_hook,
3698}; 3686};
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index ab1830aaf0e..05b3fab39f7 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -89,7 +89,6 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
89int btrfs_cleanup_transaction(struct btrfs_root *root); 89int btrfs_cleanup_transaction(struct btrfs_root *root);
90void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, 90void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
91 struct btrfs_root *root); 91 struct btrfs_root *root);
92void btrfs_abort_devices(struct btrfs_root *root);
93 92
94#ifdef CONFIG_DEBUG_LOCK_ALLOC 93#ifdef CONFIG_DEBUG_LOCK_ALLOC
95void btrfs_init_lockdep(void); 94void btrfs_init_lockdep(void);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b68eb7ad05a..4b5a1e1bdef 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3578,7 +3578,7 @@ again:
3578 space_info->chunk_alloc = 0; 3578 space_info->chunk_alloc = 0;
3579 spin_unlock(&space_info->lock); 3579 spin_unlock(&space_info->lock);
3580out: 3580out:
3581 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3581 mutex_unlock(&fs_info->chunk_mutex);
3582 return ret; 3582 return ret;
3583} 3583}
3584 3584
@@ -4355,10 +4355,9 @@ static unsigned drop_outstanding_extent(struct inode *inode)
4355 BTRFS_I(inode)->outstanding_extents--; 4355 BTRFS_I(inode)->outstanding_extents--;
4356 4356
4357 if (BTRFS_I(inode)->outstanding_extents == 0 && 4357 if (BTRFS_I(inode)->outstanding_extents == 0 &&
4358 BTRFS_I(inode)->delalloc_meta_reserved) { 4358 test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
4359 &BTRFS_I(inode)->runtime_flags))
4359 drop_inode_space = 1; 4360 drop_inode_space = 1;
4360 BTRFS_I(inode)->delalloc_meta_reserved = 0;
4361 }
4362 4361
4363 /* 4362 /*
4364 * If we have more or the same amount of outsanding extents than we have 4363 * If we have more or the same amount of outsanding extents than we have
@@ -4465,7 +4464,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4465 * Add an item to reserve for updating the inode when we complete the 4464 * Add an item to reserve for updating the inode when we complete the
4466 * delalloc io. 4465 * delalloc io.
4467 */ 4466 */
4468 if (!BTRFS_I(inode)->delalloc_meta_reserved) { 4467 if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
4468 &BTRFS_I(inode)->runtime_flags)) {
4469 nr_extents++; 4469 nr_extents++;
4470 extra_reserve = 1; 4470 extra_reserve = 1;
4471 } 4471 }
@@ -4511,7 +4511,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4511 4511
4512 spin_lock(&BTRFS_I(inode)->lock); 4512 spin_lock(&BTRFS_I(inode)->lock);
4513 if (extra_reserve) { 4513 if (extra_reserve) {
4514 BTRFS_I(inode)->delalloc_meta_reserved = 1; 4514 set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
4515 &BTRFS_I(inode)->runtime_flags);
4515 nr_extents--; 4516 nr_extents--;
4516 } 4517 }
4517 BTRFS_I(inode)->reserved_extents += nr_extents; 4518 BTRFS_I(inode)->reserved_extents += nr_extents;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3daed70a401..2c8f7b20461 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -186,7 +186,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
186 return parent; 186 return parent;
187 } 187 }
188 188
189 entry = rb_entry(node, struct tree_entry, rb_node);
190 rb_link_node(node, parent, p); 189 rb_link_node(node, parent, p);
191 rb_insert_color(node, root); 190 rb_insert_color(node, root);
192 return NULL; 191 return NULL;
@@ -413,7 +412,7 @@ static struct extent_state *next_state(struct extent_state *state)
413 412
414/* 413/*
415 * utility function to clear some bits in an extent state struct. 414 * utility function to clear some bits in an extent state struct.
416 * it will optionally wake up any one waiting on this state (wake == 1) 415 * it will optionally wake up any one waiting on this state (wake == 1).
417 * 416 *
418 * If no bits are set on the state struct after clearing things, the 417 * If no bits are set on the state struct after clearing things, the
419 * struct is freed and removed from the tree 418 * struct is freed and removed from the tree
@@ -570,10 +569,8 @@ hit_next:
570 if (err) 569 if (err)
571 goto out; 570 goto out;
572 if (state->end <= end) { 571 if (state->end <= end) {
573 clear_state_bit(tree, state, &bits, wake); 572 state = clear_state_bit(tree, state, &bits, wake);
574 if (last_end == (u64)-1) 573 goto next;
575 goto out;
576 start = last_end + 1;
577 } 574 }
578 goto search_again; 575 goto search_again;
579 } 576 }
@@ -781,7 +778,6 @@ hit_next:
781 * Just lock what we found and keep going 778 * Just lock what we found and keep going
782 */ 779 */
783 if (state->start == start && state->end <= end) { 780 if (state->start == start && state->end <= end) {
784 struct rb_node *next_node;
785 if (state->state & exclusive_bits) { 781 if (state->state & exclusive_bits) {
786 *failed_start = state->start; 782 *failed_start = state->start;
787 err = -EEXIST; 783 err = -EEXIST;
@@ -789,20 +785,15 @@ hit_next:
789 } 785 }
790 786
791 set_state_bits(tree, state, &bits); 787 set_state_bits(tree, state, &bits);
792
793 cache_state(state, cached_state); 788 cache_state(state, cached_state);
794 merge_state(tree, state); 789 merge_state(tree, state);
795 if (last_end == (u64)-1) 790 if (last_end == (u64)-1)
796 goto out; 791 goto out;
797
798 start = last_end + 1; 792 start = last_end + 1;
799 next_node = rb_next(&state->rb_node); 793 state = next_state(state);
800 if (next_node && start < end && prealloc && !need_resched()) { 794 if (start < end && state && state->start == start &&
801 state = rb_entry(next_node, struct extent_state, 795 !need_resched())
802 rb_node); 796 goto hit_next;
803 if (state->start == start)
804 goto hit_next;
805 }
806 goto search_again; 797 goto search_again;
807 } 798 }
808 799
@@ -845,6 +836,10 @@ hit_next:
845 if (last_end == (u64)-1) 836 if (last_end == (u64)-1)
846 goto out; 837 goto out;
847 start = last_end + 1; 838 start = last_end + 1;
839 state = next_state(state);
840 if (start < end && state && state->start == start &&
841 !need_resched())
842 goto hit_next;
848 } 843 }
849 goto search_again; 844 goto search_again;
850 } 845 }
@@ -994,21 +989,14 @@ hit_next:
994 * Just lock what we found and keep going 989 * Just lock what we found and keep going
995 */ 990 */
996 if (state->start == start && state->end <= end) { 991 if (state->start == start && state->end <= end) {
997 struct rb_node *next_node;
998
999 set_state_bits(tree, state, &bits); 992 set_state_bits(tree, state, &bits);
1000 clear_state_bit(tree, state, &clear_bits, 0); 993 state = clear_state_bit(tree, state, &clear_bits, 0);
1001 if (last_end == (u64)-1) 994 if (last_end == (u64)-1)
1002 goto out; 995 goto out;
1003
1004 start = last_end + 1; 996 start = last_end + 1;
1005 next_node = rb_next(&state->rb_node); 997 if (start < end && state && state->start == start &&
1006 if (next_node && start < end && prealloc && !need_resched()) { 998 !need_resched())
1007 state = rb_entry(next_node, struct extent_state, 999 goto hit_next;
1008 rb_node);
1009 if (state->start == start)
1010 goto hit_next;
1011 }
1012 goto search_again; 1000 goto search_again;
1013 } 1001 }
1014 1002
@@ -1042,10 +1030,13 @@ hit_next:
1042 goto out; 1030 goto out;
1043 if (state->end <= end) { 1031 if (state->end <= end) {
1044 set_state_bits(tree, state, &bits); 1032 set_state_bits(tree, state, &bits);
1045 clear_state_bit(tree, state, &clear_bits, 0); 1033 state = clear_state_bit(tree, state, &clear_bits, 0);
1046 if (last_end == (u64)-1) 1034 if (last_end == (u64)-1)
1047 goto out; 1035 goto out;
1048 start = last_end + 1; 1036 start = last_end + 1;
1037 if (start < end && state && state->start == start &&
1038 !need_resched())
1039 goto hit_next;
1049 } 1040 }
1050 goto search_again; 1041 goto search_again;
1051 } 1042 }
@@ -1173,9 +1164,8 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
1173 cached_state, mask); 1164 cached_state, mask);
1174} 1165}
1175 1166
1176static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, 1167int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
1177 u64 end, struct extent_state **cached_state, 1168 struct extent_state **cached_state, gfp_t mask)
1178 gfp_t mask)
1179{ 1169{
1180 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, 1170 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
1181 cached_state, mask); 1171 cached_state, mask);
@@ -1293,7 +1283,7 @@ out:
1293 * returned if we find something, and *start_ret and *end_ret are 1283 * returned if we find something, and *start_ret and *end_ret are
1294 * set to reflect the state struct that was found. 1284 * set to reflect the state struct that was found.
1295 * 1285 *
1296 * If nothing was found, 1 is returned, < 0 on error 1286 * If nothing was found, 1 is returned. If found something, return 0.
1297 */ 1287 */
1298int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 1288int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1299 u64 *start_ret, u64 *end_ret, int bits) 1289 u64 *start_ret, u64 *end_ret, int bits)
@@ -1923,6 +1913,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
1923 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { 1913 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
1924 /* try to remap that extent elsewhere? */ 1914 /* try to remap that extent elsewhere? */
1925 bio_put(bio); 1915 bio_put(bio);
1916 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
1926 return -EIO; 1917 return -EIO;
1927 } 1918 }
1928 1919
@@ -2222,17 +2213,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2222 uptodate = 0; 2213 uptodate = 0;
2223 } 2214 }
2224 2215
2225 if (!uptodate && tree->ops &&
2226 tree->ops->writepage_io_failed_hook) {
2227 ret = tree->ops->writepage_io_failed_hook(NULL, page,
2228 start, end, NULL);
2229 /* Writeback already completed */
2230 if (ret == 0)
2231 return 1;
2232 }
2233
2234 if (!uptodate) { 2216 if (!uptodate) {
2235 clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS);
2236 ClearPageUptodate(page); 2217 ClearPageUptodate(page);
2237 SetPageError(page); 2218 SetPageError(page);
2238 } 2219 }
@@ -2347,10 +2328,23 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2347 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 2328 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
2348 ret = tree->ops->readpage_end_io_hook(page, start, end, 2329 ret = tree->ops->readpage_end_io_hook(page, start, end,
2349 state, mirror); 2330 state, mirror);
2350 if (ret) 2331 if (ret) {
2332 /* no IO indicated but software detected errors
2333 * in the block, either checksum errors or
2334 * issues with the contents */
2335 struct btrfs_root *root =
2336 BTRFS_I(page->mapping->host)->root;
2337 struct btrfs_device *device;
2338
2351 uptodate = 0; 2339 uptodate = 0;
2352 else 2340 device = btrfs_find_device_for_logical(
2341 root, start, mirror);
2342 if (device)
2343 btrfs_dev_stat_inc_and_print(device,
2344 BTRFS_DEV_STAT_CORRUPTION_ERRS);
2345 } else {
2353 clean_io_failure(start, page); 2346 clean_io_failure(start, page);
2347 }
2354 } 2348 }
2355 2349
2356 if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { 2350 if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
@@ -2612,10 +2606,10 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2612 2606
2613 if (zero_offset) { 2607 if (zero_offset) {
2614 iosize = PAGE_CACHE_SIZE - zero_offset; 2608 iosize = PAGE_CACHE_SIZE - zero_offset;
2615 userpage = kmap_atomic(page, KM_USER0); 2609 userpage = kmap_atomic(page);
2616 memset(userpage + zero_offset, 0, iosize); 2610 memset(userpage + zero_offset, 0, iosize);
2617 flush_dcache_page(page); 2611 flush_dcache_page(page);
2618 kunmap_atomic(userpage, KM_USER0); 2612 kunmap_atomic(userpage);
2619 } 2613 }
2620 } 2614 }
2621 while (cur <= end) { 2615 while (cur <= end) {
@@ -2624,10 +2618,10 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2624 struct extent_state *cached = NULL; 2618 struct extent_state *cached = NULL;
2625 2619
2626 iosize = PAGE_CACHE_SIZE - pg_offset; 2620 iosize = PAGE_CACHE_SIZE - pg_offset;
2627 userpage = kmap_atomic(page, KM_USER0); 2621 userpage = kmap_atomic(page);
2628 memset(userpage + pg_offset, 0, iosize); 2622 memset(userpage + pg_offset, 0, iosize);
2629 flush_dcache_page(page); 2623 flush_dcache_page(page);
2630 kunmap_atomic(userpage, KM_USER0); 2624 kunmap_atomic(userpage);
2631 set_extent_uptodate(tree, cur, cur + iosize - 1, 2625 set_extent_uptodate(tree, cur, cur + iosize - 1,
2632 &cached, GFP_NOFS); 2626 &cached, GFP_NOFS);
2633 unlock_extent_cached(tree, cur, cur + iosize - 1, 2627 unlock_extent_cached(tree, cur, cur + iosize - 1,
@@ -2673,10 +2667,10 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2673 char *userpage; 2667 char *userpage;
2674 struct extent_state *cached = NULL; 2668 struct extent_state *cached = NULL;
2675 2669
2676 userpage = kmap_atomic(page, KM_USER0); 2670 userpage = kmap_atomic(page);
2677 memset(userpage + pg_offset, 0, iosize); 2671 memset(userpage + pg_offset, 0, iosize);
2678 flush_dcache_page(page); 2672 flush_dcache_page(page);
2679 kunmap_atomic(userpage, KM_USER0); 2673 kunmap_atomic(userpage);
2680 2674
2681 set_extent_uptodate(tree, cur, cur + iosize - 1, 2675 set_extent_uptodate(tree, cur, cur + iosize - 1,
2682 &cached, GFP_NOFS); 2676 &cached, GFP_NOFS);
@@ -2823,10 +2817,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2823 if (page->index == end_index) { 2817 if (page->index == end_index) {
2824 char *userpage; 2818 char *userpage;
2825 2819
2826 userpage = kmap_atomic(page, KM_USER0); 2820 userpage = kmap_atomic(page);
2827 memset(userpage + pg_offset, 0, 2821 memset(userpage + pg_offset, 0,
2828 PAGE_CACHE_SIZE - pg_offset); 2822 PAGE_CACHE_SIZE - pg_offset);
2829 kunmap_atomic(userpage, KM_USER0); 2823 kunmap_atomic(userpage);
2830 flush_dcache_page(page); 2824 flush_dcache_page(page);
2831 } 2825 }
2832 pg_offset = 0; 2826 pg_offset = 0;
@@ -3164,7 +3158,7 @@ static int write_one_eb(struct extent_buffer *eb,
3164 u64 offset = eb->start; 3158 u64 offset = eb->start;
3165 unsigned long i, num_pages; 3159 unsigned long i, num_pages;
3166 int rw = (epd->sync_io ? WRITE_SYNC : WRITE); 3160 int rw = (epd->sync_io ? WRITE_SYNC : WRITE);
3167 int ret; 3161 int ret = 0;
3168 3162
3169 clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); 3163 clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
3170 num_pages = num_extent_pages(eb->start, eb->len); 3164 num_pages = num_extent_pages(eb->start, eb->len);
@@ -4036,12 +4030,14 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
4036 unsigned long start_idx) 4030 unsigned long start_idx)
4037{ 4031{
4038 unsigned long index; 4032 unsigned long index;
4033 unsigned long num_pages;
4039 struct page *page; 4034 struct page *page;
4040 int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); 4035 int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4041 4036
4042 BUG_ON(extent_buffer_under_io(eb)); 4037 BUG_ON(extent_buffer_under_io(eb));
4043 4038
4044 index = num_extent_pages(eb->start, eb->len); 4039 num_pages = num_extent_pages(eb->start, eb->len);
4040 index = start_idx + num_pages;
4045 if (start_idx >= index) 4041 if (start_idx >= index)
4046 return; 4042 return;
4047 4043
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 96434a61d7c..25900af5b15 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -76,9 +76,6 @@ struct extent_io_ops {
76 unsigned long bio_flags); 76 unsigned long bio_flags);
77 int (*readpage_io_hook)(struct page *page, u64 start, u64 end); 77 int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
78 int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); 78 int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
79 int (*writepage_io_failed_hook)(struct bio *bio, struct page *page,
80 u64 start, u64 end,
81 struct extent_state *state);
82 int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, 79 int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
83 struct extent_state *state, int mirror); 80 struct extent_state *state, int mirror);
84 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, 81 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
@@ -226,6 +223,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
226 struct extent_state **cached_state, gfp_t mask); 223 struct extent_state **cached_state, gfp_t mask);
227int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 224int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
228 struct extent_state **cached_state, gfp_t mask); 225 struct extent_state **cached_state, gfp_t mask);
226int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
227 struct extent_state **cached_state, gfp_t mask);
229int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 228int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
230 gfp_t mask); 229 gfp_t mask);
231int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 230int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index cab0ffb5ef3..5d158d32023 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -460,13 +460,13 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
460 sums->bytenr = ordered->start; 460 sums->bytenr = ordered->start;
461 } 461 }
462 462
463 data = kmap_atomic(bvec->bv_page, KM_USER0); 463 data = kmap_atomic(bvec->bv_page);
464 sector_sum->sum = ~(u32)0; 464 sector_sum->sum = ~(u32)0;
465 sector_sum->sum = btrfs_csum_data(root, 465 sector_sum->sum = btrfs_csum_data(root,
466 data + bvec->bv_offset, 466 data + bvec->bv_offset,
467 sector_sum->sum, 467 sector_sum->sum,
468 bvec->bv_len); 468 bvec->bv_len);
469 kunmap_atomic(data, KM_USER0); 469 kunmap_atomic(data);
470 btrfs_csum_final(sector_sum->sum, 470 btrfs_csum_final(sector_sum->sum,
471 (char *)&sector_sum->sum); 471 (char *)&sector_sum->sum);
472 sector_sum->bytenr = disk_bytenr; 472 sector_sum->bytenr = disk_bytenr;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 53bf2d764bb..876cddd6b2f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -65,6 +65,21 @@ struct inode_defrag {
65 int cycled; 65 int cycled;
66}; 66};
67 67
68static int __compare_inode_defrag(struct inode_defrag *defrag1,
69 struct inode_defrag *defrag2)
70{
71 if (defrag1->root > defrag2->root)
72 return 1;
73 else if (defrag1->root < defrag2->root)
74 return -1;
75 else if (defrag1->ino > defrag2->ino)
76 return 1;
77 else if (defrag1->ino < defrag2->ino)
78 return -1;
79 else
80 return 0;
81}
82
68/* pop a record for an inode into the defrag tree. The lock 83/* pop a record for an inode into the defrag tree. The lock
69 * must be held already 84 * must be held already
70 * 85 *
@@ -81,15 +96,17 @@ static void __btrfs_add_inode_defrag(struct inode *inode,
81 struct inode_defrag *entry; 96 struct inode_defrag *entry;
82 struct rb_node **p; 97 struct rb_node **p;
83 struct rb_node *parent = NULL; 98 struct rb_node *parent = NULL;
99 int ret;
84 100
85 p = &root->fs_info->defrag_inodes.rb_node; 101 p = &root->fs_info->defrag_inodes.rb_node;
86 while (*p) { 102 while (*p) {
87 parent = *p; 103 parent = *p;
88 entry = rb_entry(parent, struct inode_defrag, rb_node); 104 entry = rb_entry(parent, struct inode_defrag, rb_node);
89 105
90 if (defrag->ino < entry->ino) 106 ret = __compare_inode_defrag(defrag, entry);
107 if (ret < 0)
91 p = &parent->rb_left; 108 p = &parent->rb_left;
92 else if (defrag->ino > entry->ino) 109 else if (ret > 0)
93 p = &parent->rb_right; 110 p = &parent->rb_right;
94 else { 111 else {
95 /* if we're reinserting an entry for 112 /* if we're reinserting an entry for
@@ -103,7 +120,7 @@ static void __btrfs_add_inode_defrag(struct inode *inode,
103 goto exists; 120 goto exists;
104 } 121 }
105 } 122 }
106 BTRFS_I(inode)->in_defrag = 1; 123 set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
107 rb_link_node(&defrag->rb_node, parent, p); 124 rb_link_node(&defrag->rb_node, parent, p);
108 rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); 125 rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
109 return; 126 return;
@@ -131,7 +148,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
131 if (btrfs_fs_closing(root->fs_info)) 148 if (btrfs_fs_closing(root->fs_info))
132 return 0; 149 return 0;
133 150
134 if (BTRFS_I(inode)->in_defrag) 151 if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
135 return 0; 152 return 0;
136 153
137 if (trans) 154 if (trans)
@@ -148,7 +165,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
148 defrag->root = root->root_key.objectid; 165 defrag->root = root->root_key.objectid;
149 166
150 spin_lock(&root->fs_info->defrag_inodes_lock); 167 spin_lock(&root->fs_info->defrag_inodes_lock);
151 if (!BTRFS_I(inode)->in_defrag) 168 if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
152 __btrfs_add_inode_defrag(inode, defrag); 169 __btrfs_add_inode_defrag(inode, defrag);
153 else 170 else
154 kfree(defrag); 171 kfree(defrag);
@@ -159,28 +176,35 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
159/* 176/*
160 * must be called with the defrag_inodes lock held 177 * must be called with the defrag_inodes lock held
161 */ 178 */
162struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, u64 ino, 179struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info,
180 u64 root, u64 ino,
163 struct rb_node **next) 181 struct rb_node **next)
164{ 182{
165 struct inode_defrag *entry = NULL; 183 struct inode_defrag *entry = NULL;
184 struct inode_defrag tmp;
166 struct rb_node *p; 185 struct rb_node *p;
167 struct rb_node *parent = NULL; 186 struct rb_node *parent = NULL;
187 int ret;
188
189 tmp.ino = ino;
190 tmp.root = root;
168 191
169 p = info->defrag_inodes.rb_node; 192 p = info->defrag_inodes.rb_node;
170 while (p) { 193 while (p) {
171 parent = p; 194 parent = p;
172 entry = rb_entry(parent, struct inode_defrag, rb_node); 195 entry = rb_entry(parent, struct inode_defrag, rb_node);
173 196
174 if (ino < entry->ino) 197 ret = __compare_inode_defrag(&tmp, entry);
198 if (ret < 0)
175 p = parent->rb_left; 199 p = parent->rb_left;
176 else if (ino > entry->ino) 200 else if (ret > 0)
177 p = parent->rb_right; 201 p = parent->rb_right;
178 else 202 else
179 return entry; 203 return entry;
180 } 204 }
181 205
182 if (next) { 206 if (next) {
183 while (parent && ino > entry->ino) { 207 while (parent && __compare_inode_defrag(&tmp, entry) > 0) {
184 parent = rb_next(parent); 208 parent = rb_next(parent);
185 entry = rb_entry(parent, struct inode_defrag, rb_node); 209 entry = rb_entry(parent, struct inode_defrag, rb_node);
186 } 210 }
@@ -202,6 +226,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
202 struct btrfs_key key; 226 struct btrfs_key key;
203 struct btrfs_ioctl_defrag_range_args range; 227 struct btrfs_ioctl_defrag_range_args range;
204 u64 first_ino = 0; 228 u64 first_ino = 0;
229 u64 root_objectid = 0;
205 int num_defrag; 230 int num_defrag;
206 int defrag_batch = 1024; 231 int defrag_batch = 1024;
207 232
@@ -214,11 +239,14 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
214 n = NULL; 239 n = NULL;
215 240
216 /* find an inode to defrag */ 241 /* find an inode to defrag */
217 defrag = btrfs_find_defrag_inode(fs_info, first_ino, &n); 242 defrag = btrfs_find_defrag_inode(fs_info, root_objectid,
243 first_ino, &n);
218 if (!defrag) { 244 if (!defrag) {
219 if (n) 245 if (n) {
220 defrag = rb_entry(n, struct inode_defrag, rb_node); 246 defrag = rb_entry(n, struct inode_defrag,
221 else if (first_ino) { 247 rb_node);
248 } else if (root_objectid || first_ino) {
249 root_objectid = 0;
222 first_ino = 0; 250 first_ino = 0;
223 continue; 251 continue;
224 } else { 252 } else {
@@ -228,6 +256,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
228 256
229 /* remove it from the rbtree */ 257 /* remove it from the rbtree */
230 first_ino = defrag->ino + 1; 258 first_ino = defrag->ino + 1;
259 root_objectid = defrag->root;
231 rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); 260 rb_erase(&defrag->rb_node, &fs_info->defrag_inodes);
232 261
233 if (btrfs_fs_closing(fs_info)) 262 if (btrfs_fs_closing(fs_info))
@@ -252,7 +281,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
252 goto next; 281 goto next;
253 282
254 /* do a chunk of defrag */ 283 /* do a chunk of defrag */
255 BTRFS_I(inode)->in_defrag = 0; 284 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
256 range.start = defrag->last_offset; 285 range.start = defrag->last_offset;
257 num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, 286 num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
258 defrag_batch); 287 defrag_batch);
@@ -1409,7 +1438,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1409 mutex_unlock(&inode->i_mutex); 1438 mutex_unlock(&inode->i_mutex);
1410 goto out; 1439 goto out;
1411 } 1440 }
1412 BTRFS_I(inode)->sequence++;
1413 1441
1414 start_pos = round_down(pos, root->sectorsize); 1442 start_pos = round_down(pos, root->sectorsize);
1415 if (start_pos > i_size_read(inode)) { 1443 if (start_pos > i_size_read(inode)) {
@@ -1466,8 +1494,8 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
1466 * flush down new bytes that may have been written if the 1494 * flush down new bytes that may have been written if the
1467 * application were using truncate to replace a file in place. 1495 * application were using truncate to replace a file in place.
1468 */ 1496 */
1469 if (BTRFS_I(inode)->ordered_data_close) { 1497 if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
1470 BTRFS_I(inode)->ordered_data_close = 0; 1498 &BTRFS_I(inode)->runtime_flags)) {
1471 btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode); 1499 btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode);
1472 if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) 1500 if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
1473 filemap_flush(inode->i_mapping); 1501 filemap_flush(inode->i_mapping);
@@ -1498,14 +1526,15 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1498 1526
1499 trace_btrfs_sync_file(file, datasync); 1527 trace_btrfs_sync_file(file, datasync);
1500 1528
1501 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
1502 if (ret)
1503 return ret;
1504 mutex_lock(&inode->i_mutex); 1529 mutex_lock(&inode->i_mutex);
1505 1530
1506 /* we wait first, since the writeback may change the inode */ 1531 /*
1532 * we wait first, since the writeback may change the inode, also wait
1533 * ordered range does a filemape_write_and_wait_range which is why we
1534 * don't do it above like other file systems.
1535 */
1507 root->log_batch++; 1536 root->log_batch++;
1508 btrfs_wait_ordered_range(inode, 0, (u64)-1); 1537 btrfs_wait_ordered_range(inode, start, end);
1509 root->log_batch++; 1538 root->log_batch++;
1510 1539
1511 /* 1540 /*
@@ -1523,7 +1552,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1523 * syncing 1552 * syncing
1524 */ 1553 */
1525 smp_mb(); 1554 smp_mb();
1526 if (BTRFS_I(inode)->last_trans <= 1555 if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
1556 BTRFS_I(inode)->last_trans <=
1527 root->fs_info->last_trans_committed) { 1557 root->fs_info->last_trans_committed) {
1528 BTRFS_I(inode)->last_trans = 0; 1558 BTRFS_I(inode)->last_trans = 0;
1529 mutex_unlock(&inode->i_mutex); 1559 mutex_unlock(&inode->i_mutex);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index baaa518baaf..19a0d85b451 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -33,6 +33,8 @@
33 33
34static int link_free_space(struct btrfs_free_space_ctl *ctl, 34static int link_free_space(struct btrfs_free_space_ctl *ctl,
35 struct btrfs_free_space *info); 35 struct btrfs_free_space *info);
36static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
37 struct btrfs_free_space *info);
36 38
37static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 39static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38 struct btrfs_path *path, 40 struct btrfs_path *path,
@@ -584,6 +586,44 @@ static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
584 return 0; 586 return 0;
585} 587}
586 588
589/*
590 * Since we attach pinned extents after the fact we can have contiguous sections
591 * of free space that are split up in entries. This poses a problem with the
592 * tree logging stuff since it could have allocated across what appears to be 2
593 * entries since we would have merged the entries when adding the pinned extents
594 * back to the free space cache. So run through the space cache that we just
595 * loaded and merge contiguous entries. This will make the log replay stuff not
596 * blow up and it will make for nicer allocator behavior.
597 */
598static void merge_space_tree(struct btrfs_free_space_ctl *ctl)
599{
600 struct btrfs_free_space *e, *prev = NULL;
601 struct rb_node *n;
602
603again:
604 spin_lock(&ctl->tree_lock);
605 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
606 e = rb_entry(n, struct btrfs_free_space, offset_index);
607 if (!prev)
608 goto next;
609 if (e->bitmap || prev->bitmap)
610 goto next;
611 if (prev->offset + prev->bytes == e->offset) {
612 unlink_free_space(ctl, prev);
613 unlink_free_space(ctl, e);
614 prev->bytes += e->bytes;
615 kmem_cache_free(btrfs_free_space_cachep, e);
616 link_free_space(ctl, prev);
617 prev = NULL;
618 spin_unlock(&ctl->tree_lock);
619 goto again;
620 }
621next:
622 prev = e;
623 }
624 spin_unlock(&ctl->tree_lock);
625}
626
587int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 627int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
588 struct btrfs_free_space_ctl *ctl, 628 struct btrfs_free_space_ctl *ctl,
589 struct btrfs_path *path, u64 offset) 629 struct btrfs_path *path, u64 offset)
@@ -726,6 +766,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
726 } 766 }
727 767
728 io_ctl_drop_pages(&io_ctl); 768 io_ctl_drop_pages(&io_ctl);
769 merge_space_tree(ctl);
729 ret = 1; 770 ret = 1;
730out: 771out:
731 io_ctl_free(&io_ctl); 772 io_ctl_free(&io_ctl);
@@ -972,9 +1013,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
972 goto out; 1013 goto out;
973 1014
974 1015
975 ret = filemap_write_and_wait(inode->i_mapping); 1016 btrfs_wait_ordered_range(inode, 0, (u64)-1);
976 if (ret)
977 goto out;
978 1017
979 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 1018 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
980 key.offset = offset; 1019 key.offset = offset;
@@ -1065,7 +1104,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1065 spin_unlock(&block_group->lock); 1104 spin_unlock(&block_group->lock);
1066 ret = 0; 1105 ret = 0;
1067#ifdef DEBUG 1106#ifdef DEBUG
1068 printk(KERN_ERR "btrfs: failed to write free space cace " 1107 printk(KERN_ERR "btrfs: failed to write free space cache "
1069 "for block group %llu\n", block_group->key.objectid); 1108 "for block group %llu\n", block_group->key.objectid);
1070#endif 1109#endif
1071 } 1110 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3ce7805d111..92df0a5d1d9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -89,7 +89,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
89 89
90static int btrfs_setsize(struct inode *inode, loff_t newsize); 90static int btrfs_setsize(struct inode *inode, loff_t newsize);
91static int btrfs_truncate(struct inode *inode); 91static int btrfs_truncate(struct inode *inode);
92static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); 92static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
93static noinline int cow_file_range(struct inode *inode, 93static noinline int cow_file_range(struct inode *inode,
94 struct page *locked_page, 94 struct page *locked_page,
95 u64 start, u64 end, int *page_started, 95 u64 start, u64 end, int *page_started,
@@ -172,9 +172,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
172 cur_size = min_t(unsigned long, compressed_size, 172 cur_size = min_t(unsigned long, compressed_size,
173 PAGE_CACHE_SIZE); 173 PAGE_CACHE_SIZE);
174 174
175 kaddr = kmap_atomic(cpage, KM_USER0); 175 kaddr = kmap_atomic(cpage);
176 write_extent_buffer(leaf, kaddr, ptr, cur_size); 176 write_extent_buffer(leaf, kaddr, ptr, cur_size);
177 kunmap_atomic(kaddr, KM_USER0); 177 kunmap_atomic(kaddr);
178 178
179 i++; 179 i++;
180 ptr += cur_size; 180 ptr += cur_size;
@@ -186,10 +186,10 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
186 page = find_get_page(inode->i_mapping, 186 page = find_get_page(inode->i_mapping,
187 start >> PAGE_CACHE_SHIFT); 187 start >> PAGE_CACHE_SHIFT);
188 btrfs_set_file_extent_compression(leaf, ei, 0); 188 btrfs_set_file_extent_compression(leaf, ei, 0);
189 kaddr = kmap_atomic(page, KM_USER0); 189 kaddr = kmap_atomic(page);
190 offset = start & (PAGE_CACHE_SIZE - 1); 190 offset = start & (PAGE_CACHE_SIZE - 1);
191 write_extent_buffer(leaf, kaddr + offset, ptr, size); 191 write_extent_buffer(leaf, kaddr + offset, ptr, size);
192 kunmap_atomic(kaddr, KM_USER0); 192 kunmap_atomic(kaddr);
193 page_cache_release(page); 193 page_cache_release(page);
194 } 194 }
195 btrfs_mark_buffer_dirty(leaf); 195 btrfs_mark_buffer_dirty(leaf);
@@ -257,10 +257,13 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
257 ret = insert_inline_extent(trans, root, inode, start, 257 ret = insert_inline_extent(trans, root, inode, start,
258 inline_len, compressed_size, 258 inline_len, compressed_size,
259 compress_type, compressed_pages); 259 compress_type, compressed_pages);
260 if (ret) { 260 if (ret && ret != -ENOSPC) {
261 btrfs_abort_transaction(trans, root, ret); 261 btrfs_abort_transaction(trans, root, ret);
262 return ret; 262 return ret;
263 } else if (ret == -ENOSPC) {
264 return 1;
263 } 265 }
266
264 btrfs_delalloc_release_metadata(inode, end + 1 - start); 267 btrfs_delalloc_release_metadata(inode, end + 1 - start);
265 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 268 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
266 return 0; 269 return 0;
@@ -426,10 +429,10 @@ again:
426 * sending it down to disk 429 * sending it down to disk
427 */ 430 */
428 if (offset) { 431 if (offset) {
429 kaddr = kmap_atomic(page, KM_USER0); 432 kaddr = kmap_atomic(page);
430 memset(kaddr + offset, 0, 433 memset(kaddr + offset, 0,
431 PAGE_CACHE_SIZE - offset); 434 PAGE_CACHE_SIZE - offset);
432 kunmap_atomic(kaddr, KM_USER0); 435 kunmap_atomic(kaddr);
433 } 436 }
434 will_compress = 1; 437 will_compress = 1;
435 } 438 }
@@ -1572,11 +1575,11 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1572 if (btrfs_is_free_space_inode(root, inode)) 1575 if (btrfs_is_free_space_inode(root, inode))
1573 metadata = 2; 1576 metadata = 2;
1574 1577
1575 ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
1576 if (ret)
1577 return ret;
1578
1579 if (!(rw & REQ_WRITE)) { 1578 if (!(rw & REQ_WRITE)) {
1579 ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
1580 if (ret)
1581 return ret;
1582
1580 if (bio_flags & EXTENT_BIO_COMPRESSED) { 1583 if (bio_flags & EXTENT_BIO_COMPRESSED) {
1581 return btrfs_submit_compressed_read(inode, bio, 1584 return btrfs_submit_compressed_read(inode, bio,
1582 mirror_num, bio_flags); 1585 mirror_num, bio_flags);
@@ -1815,25 +1818,24 @@ out:
1815 * an ordered extent if the range of bytes in the file it covers are 1818 * an ordered extent if the range of bytes in the file it covers are
1816 * fully written. 1819 * fully written.
1817 */ 1820 */
1818static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) 1821static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
1819{ 1822{
1823 struct inode *inode = ordered_extent->inode;
1820 struct btrfs_root *root = BTRFS_I(inode)->root; 1824 struct btrfs_root *root = BTRFS_I(inode)->root;
1821 struct btrfs_trans_handle *trans = NULL; 1825 struct btrfs_trans_handle *trans = NULL;
1822 struct btrfs_ordered_extent *ordered_extent = NULL;
1823 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1826 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1824 struct extent_state *cached_state = NULL; 1827 struct extent_state *cached_state = NULL;
1825 int compress_type = 0; 1828 int compress_type = 0;
1826 int ret; 1829 int ret;
1827 bool nolock; 1830 bool nolock;
1828 1831
1829 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
1830 end - start + 1);
1831 if (!ret)
1832 return 0;
1833 BUG_ON(!ordered_extent); /* Logic error */
1834
1835 nolock = btrfs_is_free_space_inode(root, inode); 1832 nolock = btrfs_is_free_space_inode(root, inode);
1836 1833
1834 if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
1835 ret = -EIO;
1836 goto out;
1837 }
1838
1837 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1839 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1838 BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ 1840 BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
1839 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1841 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
@@ -1889,12 +1891,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1889 ordered_extent->file_offset, 1891 ordered_extent->file_offset,
1890 ordered_extent->len); 1892 ordered_extent->len);
1891 } 1893 }
1892 unlock_extent_cached(io_tree, ordered_extent->file_offset, 1894
1893 ordered_extent->file_offset +
1894 ordered_extent->len - 1, &cached_state, GFP_NOFS);
1895 if (ret < 0) { 1895 if (ret < 0) {
1896 btrfs_abort_transaction(trans, root, ret); 1896 btrfs_abort_transaction(trans, root, ret);
1897 goto out; 1897 goto out_unlock;
1898 } 1898 }
1899 1899
1900 add_pending_csums(trans, inode, ordered_extent->file_offset, 1900 add_pending_csums(trans, inode, ordered_extent->file_offset,
@@ -1905,10 +1905,14 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1905 ret = btrfs_update_inode_fallback(trans, root, inode); 1905 ret = btrfs_update_inode_fallback(trans, root, inode);
1906 if (ret) { /* -ENOMEM or corruption */ 1906 if (ret) { /* -ENOMEM or corruption */
1907 btrfs_abort_transaction(trans, root, ret); 1907 btrfs_abort_transaction(trans, root, ret);
1908 goto out; 1908 goto out_unlock;
1909 } 1909 }
1910 } 1910 }
1911 ret = 0; 1911 ret = 0;
1912out_unlock:
1913 unlock_extent_cached(io_tree, ordered_extent->file_offset,
1914 ordered_extent->file_offset +
1915 ordered_extent->len - 1, &cached_state, GFP_NOFS);
1912out: 1916out:
1913 if (root != root->fs_info->tree_root) 1917 if (root != root->fs_info->tree_root)
1914 btrfs_delalloc_release_metadata(inode, ordered_extent->len); 1918 btrfs_delalloc_release_metadata(inode, ordered_extent->len);
@@ -1919,26 +1923,57 @@ out:
1919 btrfs_end_transaction(trans, root); 1923 btrfs_end_transaction(trans, root);
1920 } 1924 }
1921 1925
1926 if (ret)
1927 clear_extent_uptodate(io_tree, ordered_extent->file_offset,
1928 ordered_extent->file_offset +
1929 ordered_extent->len - 1, NULL, GFP_NOFS);
1930
1931 /*
1932 * This needs to be dont to make sure anybody waiting knows we are done
1933 * upating everything for this ordered extent.
1934 */
1935 btrfs_remove_ordered_extent(inode, ordered_extent);
1936
1922 /* once for us */ 1937 /* once for us */
1923 btrfs_put_ordered_extent(ordered_extent); 1938 btrfs_put_ordered_extent(ordered_extent);
1924 /* once for the tree */ 1939 /* once for the tree */
1925 btrfs_put_ordered_extent(ordered_extent); 1940 btrfs_put_ordered_extent(ordered_extent);
1926 1941
1927 return 0; 1942 return ret;
1928out_unlock: 1943}
1929 unlock_extent_cached(io_tree, ordered_extent->file_offset, 1944
1930 ordered_extent->file_offset + 1945static void finish_ordered_fn(struct btrfs_work *work)
1931 ordered_extent->len - 1, &cached_state, GFP_NOFS); 1946{
1932 goto out; 1947 struct btrfs_ordered_extent *ordered_extent;
1948 ordered_extent = container_of(work, struct btrfs_ordered_extent, work);
1949 btrfs_finish_ordered_io(ordered_extent);
1933} 1950}
1934 1951
1935static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, 1952static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
1936 struct extent_state *state, int uptodate) 1953 struct extent_state *state, int uptodate)
1937{ 1954{
1955 struct inode *inode = page->mapping->host;
1956 struct btrfs_root *root = BTRFS_I(inode)->root;
1957 struct btrfs_ordered_extent *ordered_extent = NULL;
1958 struct btrfs_workers *workers;
1959
1938 trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); 1960 trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
1939 1961
1940 ClearPagePrivate2(page); 1962 ClearPagePrivate2(page);
1941 return btrfs_finish_ordered_io(page->mapping->host, start, end); 1963 if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
1964 end - start + 1, uptodate))
1965 return 0;
1966
1967 ordered_extent->work.func = finish_ordered_fn;
1968 ordered_extent->work.flags = 0;
1969
1970 if (btrfs_is_free_space_inode(root, inode))
1971 workers = &root->fs_info->endio_freespace_worker;
1972 else
1973 workers = &root->fs_info->endio_write_workers;
1974 btrfs_queue_worker(workers, &ordered_extent->work);
1975
1976 return 0;
1942} 1977}
1943 1978
1944/* 1979/*
@@ -1979,7 +2014,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
1979 } else { 2014 } else {
1980 ret = get_state_private(io_tree, start, &private); 2015 ret = get_state_private(io_tree, start, &private);
1981 } 2016 }
1982 kaddr = kmap_atomic(page, KM_USER0); 2017 kaddr = kmap_atomic(page);
1983 if (ret) 2018 if (ret)
1984 goto zeroit; 2019 goto zeroit;
1985 2020
@@ -1988,7 +2023,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
1988 if (csum != private) 2023 if (csum != private)
1989 goto zeroit; 2024 goto zeroit;
1990 2025
1991 kunmap_atomic(kaddr, KM_USER0); 2026 kunmap_atomic(kaddr);
1992good: 2027good:
1993 return 0; 2028 return 0;
1994 2029
@@ -2000,7 +2035,7 @@ zeroit:
2000 (unsigned long long)private); 2035 (unsigned long long)private);
2001 memset(kaddr + offset, 1, end - start + 1); 2036 memset(kaddr + offset, 1, end - start + 1);
2002 flush_dcache_page(page); 2037 flush_dcache_page(page);
2003 kunmap_atomic(kaddr, KM_USER0); 2038 kunmap_atomic(kaddr);
2004 if (private == 0) 2039 if (private == 0)
2005 return 0; 2040 return 0;
2006 return -EIO; 2041 return -EIO;
@@ -2072,12 +2107,12 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
2072 struct btrfs_block_rsv *block_rsv; 2107 struct btrfs_block_rsv *block_rsv;
2073 int ret; 2108 int ret;
2074 2109
2075 if (!list_empty(&root->orphan_list) || 2110 if (atomic_read(&root->orphan_inodes) ||
2076 root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) 2111 root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
2077 return; 2112 return;
2078 2113
2079 spin_lock(&root->orphan_lock); 2114 spin_lock(&root->orphan_lock);
2080 if (!list_empty(&root->orphan_list)) { 2115 if (atomic_read(&root->orphan_inodes)) {
2081 spin_unlock(&root->orphan_lock); 2116 spin_unlock(&root->orphan_lock);
2082 return; 2117 return;
2083 } 2118 }
@@ -2134,8 +2169,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2134 block_rsv = NULL; 2169 block_rsv = NULL;
2135 } 2170 }
2136 2171
2137 if (list_empty(&BTRFS_I(inode)->i_orphan)) { 2172 if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
2138 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); 2173 &BTRFS_I(inode)->runtime_flags)) {
2139#if 0 2174#if 0
2140 /* 2175 /*
2141 * For proper ENOSPC handling, we should do orphan 2176 * For proper ENOSPC handling, we should do orphan
@@ -2148,12 +2183,12 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2148 insert = 1; 2183 insert = 1;
2149#endif 2184#endif
2150 insert = 1; 2185 insert = 1;
2186 atomic_dec(&root->orphan_inodes);
2151 } 2187 }
2152 2188
2153 if (!BTRFS_I(inode)->orphan_meta_reserved) { 2189 if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
2154 BTRFS_I(inode)->orphan_meta_reserved = 1; 2190 &BTRFS_I(inode)->runtime_flags))
2155 reserve = 1; 2191 reserve = 1;
2156 }
2157 spin_unlock(&root->orphan_lock); 2192 spin_unlock(&root->orphan_lock);
2158 2193
2159 /* grab metadata reservation from transaction handle */ 2194 /* grab metadata reservation from transaction handle */
@@ -2166,6 +2201,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2166 if (insert >= 1) { 2201 if (insert >= 1) {
2167 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); 2202 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
2168 if (ret && ret != -EEXIST) { 2203 if (ret && ret != -EEXIST) {
2204 clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
2205 &BTRFS_I(inode)->runtime_flags);
2169 btrfs_abort_transaction(trans, root, ret); 2206 btrfs_abort_transaction(trans, root, ret);
2170 return ret; 2207 return ret;
2171 } 2208 }
@@ -2196,15 +2233,13 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
2196 int ret = 0; 2233 int ret = 0;
2197 2234
2198 spin_lock(&root->orphan_lock); 2235 spin_lock(&root->orphan_lock);
2199 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 2236 if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
2200 list_del_init(&BTRFS_I(inode)->i_orphan); 2237 &BTRFS_I(inode)->runtime_flags))
2201 delete_item = 1; 2238 delete_item = 1;
2202 }
2203 2239
2204 if (BTRFS_I(inode)->orphan_meta_reserved) { 2240 if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
2205 BTRFS_I(inode)->orphan_meta_reserved = 0; 2241 &BTRFS_I(inode)->runtime_flags))
2206 release_rsv = 1; 2242 release_rsv = 1;
2207 }
2208 spin_unlock(&root->orphan_lock); 2243 spin_unlock(&root->orphan_lock);
2209 2244
2210 if (trans && delete_item) { 2245 if (trans && delete_item) {
@@ -2212,8 +2247,10 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
2212 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ 2247 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
2213 } 2248 }
2214 2249
2215 if (release_rsv) 2250 if (release_rsv) {
2216 btrfs_orphan_release_metadata(inode); 2251 btrfs_orphan_release_metadata(inode);
2252 atomic_dec(&root->orphan_inodes);
2253 }
2217 2254
2218 return 0; 2255 return 0;
2219} 2256}
@@ -2341,6 +2378,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2341 ret = PTR_ERR(trans); 2378 ret = PTR_ERR(trans);
2342 goto out; 2379 goto out;
2343 } 2380 }
2381 printk(KERN_ERR "auto deleting %Lu\n",
2382 found_key.objectid);
2344 ret = btrfs_del_orphan_item(trans, root, 2383 ret = btrfs_del_orphan_item(trans, root,
2345 found_key.objectid); 2384 found_key.objectid);
2346 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ 2385 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
@@ -2352,9 +2391,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2352 * add this inode to the orphan list so btrfs_orphan_del does 2391 * add this inode to the orphan list so btrfs_orphan_del does
2353 * the proper thing when we hit it 2392 * the proper thing when we hit it
2354 */ 2393 */
2355 spin_lock(&root->orphan_lock); 2394 set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
2356 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); 2395 &BTRFS_I(inode)->runtime_flags);
2357 spin_unlock(&root->orphan_lock);
2358 2396
2359 /* if we have links, this was a truncate, lets do that */ 2397 /* if we have links, this was a truncate, lets do that */
2360 if (inode->i_nlink) { 2398 if (inode->i_nlink) {
@@ -2510,7 +2548,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
2510 2548
2511 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); 2549 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
2512 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); 2550 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
2513 BTRFS_I(inode)->sequence = btrfs_inode_sequence(leaf, inode_item); 2551 inode->i_version = btrfs_inode_sequence(leaf, inode_item);
2514 inode->i_generation = BTRFS_I(inode)->generation; 2552 inode->i_generation = BTRFS_I(inode)->generation;
2515 inode->i_rdev = 0; 2553 inode->i_rdev = 0;
2516 rdev = btrfs_inode_rdev(leaf, inode_item); 2554 rdev = btrfs_inode_rdev(leaf, inode_item);
@@ -2594,7 +2632,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2594 2632
2595 btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); 2633 btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode));
2596 btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); 2634 btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation);
2597 btrfs_set_inode_sequence(leaf, item, BTRFS_I(inode)->sequence); 2635 btrfs_set_inode_sequence(leaf, item, inode->i_version);
2598 btrfs_set_inode_transid(leaf, item, trans->transid); 2636 btrfs_set_inode_transid(leaf, item, trans->transid);
2599 btrfs_set_inode_rdev(leaf, item, inode->i_rdev); 2637 btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
2600 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); 2638 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
@@ -2752,6 +2790,8 @@ err:
2752 goto out; 2790 goto out;
2753 2791
2754 btrfs_i_size_write(dir, dir->i_size - name_len * 2); 2792 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
2793 inode_inc_iversion(inode);
2794 inode_inc_iversion(dir);
2755 inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; 2795 inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME;
2756 btrfs_update_inode(trans, root, dir); 2796 btrfs_update_inode(trans, root, dir);
2757out: 2797out:
@@ -3089,6 +3129,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
3089 } 3129 }
3090 3130
3091 btrfs_i_size_write(dir, dir->i_size - name_len * 2); 3131 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
3132 inode_inc_iversion(dir);
3092 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 3133 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
3093 ret = btrfs_update_inode(trans, root, dir); 3134 ret = btrfs_update_inode(trans, root, dir);
3094 if (ret) 3135 if (ret)
@@ -3607,7 +3648,8 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
3607 * any new writes get down to disk quickly. 3648 * any new writes get down to disk quickly.
3608 */ 3649 */
3609 if (newsize == 0) 3650 if (newsize == 0)
3610 BTRFS_I(inode)->ordered_data_close = 1; 3651 set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
3652 &BTRFS_I(inode)->runtime_flags);
3611 3653
3612 /* we don't support swapfiles, so vmtruncate shouldn't fail */ 3654 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3613 truncate_setsize(inode, newsize); 3655 truncate_setsize(inode, newsize);
@@ -3638,6 +3680,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3638 3680
3639 if (attr->ia_valid) { 3681 if (attr->ia_valid) {
3640 setattr_copy(inode, attr); 3682 setattr_copy(inode, attr);
3683 inode_inc_iversion(inode);
3641 err = btrfs_dirty_inode(inode); 3684 err = btrfs_dirty_inode(inode);
3642 3685
3643 if (!err && attr->ia_valid & ATTR_MODE) 3686 if (!err && attr->ia_valid & ATTR_MODE)
@@ -3671,7 +3714,8 @@ void btrfs_evict_inode(struct inode *inode)
3671 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3714 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3672 3715
3673 if (root->fs_info->log_root_recovering) { 3716 if (root->fs_info->log_root_recovering) {
3674 BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan)); 3717 BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3718 &BTRFS_I(inode)->runtime_flags));
3675 goto no_delete; 3719 goto no_delete;
3676 } 3720 }
3677 3721
@@ -4066,7 +4110,7 @@ static struct inode *new_simple_dir(struct super_block *s,
4066 4110
4067 BTRFS_I(inode)->root = root; 4111 BTRFS_I(inode)->root = root;
4068 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); 4112 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
4069 BTRFS_I(inode)->dummy_inode = 1; 4113 set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
4070 4114
4071 inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; 4115 inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
4072 inode->i_op = &btrfs_dir_ro_inode_operations; 4116 inode->i_op = &btrfs_dir_ro_inode_operations;
@@ -4370,7 +4414,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4370 int ret = 0; 4414 int ret = 0;
4371 bool nolock = false; 4415 bool nolock = false;
4372 4416
4373 if (BTRFS_I(inode)->dummy_inode) 4417 if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
4374 return 0; 4418 return 0;
4375 4419
4376 if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode)) 4420 if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode))
@@ -4403,7 +4447,7 @@ int btrfs_dirty_inode(struct inode *inode)
4403 struct btrfs_trans_handle *trans; 4447 struct btrfs_trans_handle *trans;
4404 int ret; 4448 int ret;
4405 4449
4406 if (BTRFS_I(inode)->dummy_inode) 4450 if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
4407 return 0; 4451 return 0;
4408 4452
4409 trans = btrfs_join_transaction(root); 4453 trans = btrfs_join_transaction(root);
@@ -4730,6 +4774,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
4730 4774
4731 btrfs_i_size_write(parent_inode, parent_inode->i_size + 4775 btrfs_i_size_write(parent_inode, parent_inode->i_size +
4732 name_len * 2); 4776 name_len * 2);
4777 inode_inc_iversion(parent_inode);
4733 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; 4778 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
4734 ret = btrfs_update_inode(trans, root, parent_inode); 4779 ret = btrfs_update_inode(trans, root, parent_inode);
4735 if (ret) 4780 if (ret)
@@ -4937,6 +4982,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4937 } 4982 }
4938 4983
4939 btrfs_inc_nlink(inode); 4984 btrfs_inc_nlink(inode);
4985 inode_inc_iversion(inode);
4940 inode->i_ctime = CURRENT_TIME; 4986 inode->i_ctime = CURRENT_TIME;
4941 ihold(inode); 4987 ihold(inode);
4942 4988
@@ -5079,12 +5125,12 @@ static noinline int uncompress_inline(struct btrfs_path *path,
5079 ret = btrfs_decompress(compress_type, tmp, page, 5125 ret = btrfs_decompress(compress_type, tmp, page,
5080 extent_offset, inline_size, max_size); 5126 extent_offset, inline_size, max_size);
5081 if (ret) { 5127 if (ret) {
5082 char *kaddr = kmap_atomic(page, KM_USER0); 5128 char *kaddr = kmap_atomic(page);
5083 unsigned long copy_size = min_t(u64, 5129 unsigned long copy_size = min_t(u64,
5084 PAGE_CACHE_SIZE - pg_offset, 5130 PAGE_CACHE_SIZE - pg_offset,
5085 max_size - extent_offset); 5131 max_size - extent_offset);
5086 memset(kaddr + pg_offset, 0, copy_size); 5132 memset(kaddr + pg_offset, 0, copy_size);
5087 kunmap_atomic(kaddr, KM_USER0); 5133 kunmap_atomic(kaddr);
5088 } 5134 }
5089 kfree(tmp); 5135 kfree(tmp);
5090 return 0; 5136 return 0;
@@ -5862,11 +5908,11 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5862 unsigned long flags; 5908 unsigned long flags;
5863 5909
5864 local_irq_save(flags); 5910 local_irq_save(flags);
5865 kaddr = kmap_atomic(page, KM_IRQ0); 5911 kaddr = kmap_atomic(page);
5866 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, 5912 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
5867 csum, bvec->bv_len); 5913 csum, bvec->bv_len);
5868 btrfs_csum_final(csum, (char *)&csum); 5914 btrfs_csum_final(csum, (char *)&csum);
5869 kunmap_atomic(kaddr, KM_IRQ0); 5915 kunmap_atomic(kaddr);
5870 local_irq_restore(flags); 5916 local_irq_restore(flags);
5871 5917
5872 flush_dcache_page(bvec->bv_page); 5918 flush_dcache_page(bvec->bv_page);
@@ -5903,9 +5949,7 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
5903 struct btrfs_dio_private *dip = bio->bi_private; 5949 struct btrfs_dio_private *dip = bio->bi_private;
5904 struct inode *inode = dip->inode; 5950 struct inode *inode = dip->inode;
5905 struct btrfs_root *root = BTRFS_I(inode)->root; 5951 struct btrfs_root *root = BTRFS_I(inode)->root;
5906 struct btrfs_trans_handle *trans;
5907 struct btrfs_ordered_extent *ordered = NULL; 5952 struct btrfs_ordered_extent *ordered = NULL;
5908 struct extent_state *cached_state = NULL;
5909 u64 ordered_offset = dip->logical_offset; 5953 u64 ordered_offset = dip->logical_offset;
5910 u64 ordered_bytes = dip->bytes; 5954 u64 ordered_bytes = dip->bytes;
5911 int ret; 5955 int ret;
@@ -5915,73 +5959,14 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
5915again: 5959again:
5916 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, 5960 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
5917 &ordered_offset, 5961 &ordered_offset,
5918 ordered_bytes); 5962 ordered_bytes, !err);
5919 if (!ret) 5963 if (!ret)
5920 goto out_test; 5964 goto out_test;
5921 5965
5922 BUG_ON(!ordered); 5966 ordered->work.func = finish_ordered_fn;
5923 5967 ordered->work.flags = 0;
5924 trans = btrfs_join_transaction(root); 5968 btrfs_queue_worker(&root->fs_info->endio_write_workers,
5925 if (IS_ERR(trans)) { 5969 &ordered->work);
5926 err = -ENOMEM;
5927 goto out;
5928 }
5929 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
5930
5931 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
5932 ret = btrfs_ordered_update_i_size(inode, 0, ordered);
5933 if (!ret)
5934 err = btrfs_update_inode_fallback(trans, root, inode);
5935 goto out;
5936 }
5937
5938 lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5939 ordered->file_offset + ordered->len - 1, 0,
5940 &cached_state);
5941
5942 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) {
5943 ret = btrfs_mark_extent_written(trans, inode,
5944 ordered->file_offset,
5945 ordered->file_offset +
5946 ordered->len);
5947 if (ret) {
5948 err = ret;
5949 goto out_unlock;
5950 }
5951 } else {
5952 ret = insert_reserved_file_extent(trans, inode,
5953 ordered->file_offset,
5954 ordered->start,
5955 ordered->disk_len,
5956 ordered->len,
5957 ordered->len,
5958 0, 0, 0,
5959 BTRFS_FILE_EXTENT_REG);
5960 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
5961 ordered->file_offset, ordered->len);
5962 if (ret) {
5963 err = ret;
5964 WARN_ON(1);
5965 goto out_unlock;
5966 }
5967 }
5968
5969 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
5970 ret = btrfs_ordered_update_i_size(inode, 0, ordered);
5971 if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags))
5972 btrfs_update_inode_fallback(trans, root, inode);
5973 ret = 0;
5974out_unlock:
5975 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5976 ordered->file_offset + ordered->len - 1,
5977 &cached_state, GFP_NOFS);
5978out:
5979 btrfs_delalloc_release_metadata(inode, ordered->len);
5980 btrfs_end_transaction(trans, root);
5981 ordered_offset = ordered->file_offset + ordered->len;
5982 btrfs_put_ordered_extent(ordered);
5983 btrfs_put_ordered_extent(ordered);
5984
5985out_test: 5970out_test:
5986 /* 5971 /*
5987 * our bio might span multiple ordered extents. If we haven't 5972 * our bio might span multiple ordered extents. If we haven't
@@ -5990,12 +5975,12 @@ out_test:
5990 if (ordered_offset < dip->logical_offset + dip->bytes) { 5975 if (ordered_offset < dip->logical_offset + dip->bytes) {
5991 ordered_bytes = dip->logical_offset + dip->bytes - 5976 ordered_bytes = dip->logical_offset + dip->bytes -
5992 ordered_offset; 5977 ordered_offset;
5978 ordered = NULL;
5993 goto again; 5979 goto again;
5994 } 5980 }
5995out_done: 5981out_done:
5996 bio->bi_private = dip->private; 5982 bio->bi_private = dip->private;
5997 5983
5998 kfree(dip->csums);
5999 kfree(dip); 5984 kfree(dip);
6000 5985
6001 /* If we had an error make sure to clear the uptodate flag */ 5986 /* If we had an error make sure to clear the uptodate flag */
@@ -6063,9 +6048,12 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
6063 int ret; 6048 int ret;
6064 6049
6065 bio_get(bio); 6050 bio_get(bio);
6066 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 6051
6067 if (ret) 6052 if (!write) {
6068 goto err; 6053 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
6054 if (ret)
6055 goto err;
6056 }
6069 6057
6070 if (skip_sum) 6058 if (skip_sum)
6071 goto map; 6059 goto map;
@@ -6485,13 +6473,13 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
6485 6473
6486static void btrfs_invalidatepage(struct page *page, unsigned long offset) 6474static void btrfs_invalidatepage(struct page *page, unsigned long offset)
6487{ 6475{
6476 struct inode *inode = page->mapping->host;
6488 struct extent_io_tree *tree; 6477 struct extent_io_tree *tree;
6489 struct btrfs_ordered_extent *ordered; 6478 struct btrfs_ordered_extent *ordered;
6490 struct extent_state *cached_state = NULL; 6479 struct extent_state *cached_state = NULL;
6491 u64 page_start = page_offset(page); 6480 u64 page_start = page_offset(page);
6492 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 6481 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
6493 6482
6494
6495 /* 6483 /*
6496 * we have the page locked, so new writeback can't start, 6484 * we have the page locked, so new writeback can't start,
6497 * and the dirty bit won't be cleared while we are here. 6485 * and the dirty bit won't be cleared while we are here.
@@ -6501,13 +6489,13 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
6501 */ 6489 */
6502 wait_on_page_writeback(page); 6490 wait_on_page_writeback(page);
6503 6491
6504 tree = &BTRFS_I(page->mapping->host)->io_tree; 6492 tree = &BTRFS_I(inode)->io_tree;
6505 if (offset) { 6493 if (offset) {
6506 btrfs_releasepage(page, GFP_NOFS); 6494 btrfs_releasepage(page, GFP_NOFS);
6507 return; 6495 return;
6508 } 6496 }
6509 lock_extent_bits(tree, page_start, page_end, 0, &cached_state); 6497 lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
6510 ordered = btrfs_lookup_ordered_extent(page->mapping->host, 6498 ordered = btrfs_lookup_ordered_extent(inode,
6511 page_offset(page)); 6499 page_offset(page));
6512 if (ordered) { 6500 if (ordered) {
6513 /* 6501 /*
@@ -6522,9 +6510,10 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
6522 * whoever cleared the private bit is responsible 6510 * whoever cleared the private bit is responsible
6523 * for the finish_ordered_io 6511 * for the finish_ordered_io
6524 */ 6512 */
6525 if (TestClearPagePrivate2(page)) { 6513 if (TestClearPagePrivate2(page) &&
6526 btrfs_finish_ordered_io(page->mapping->host, 6514 btrfs_dec_test_ordered_pending(inode, &ordered, page_start,
6527 page_start, page_end); 6515 PAGE_CACHE_SIZE, 1)) {
6516 btrfs_finish_ordered_io(ordered);
6528 } 6517 }
6529 btrfs_put_ordered_extent(ordered); 6518 btrfs_put_ordered_extent(ordered);
6530 cached_state = NULL; 6519 cached_state = NULL;
@@ -6771,7 +6760,8 @@ static int btrfs_truncate(struct inode *inode)
6771 * using truncate to replace the contents of the file will 6760 * using truncate to replace the contents of the file will
6772 * end up with a zero length file after a crash. 6761 * end up with a zero length file after a crash.
6773 */ 6762 */
6774 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) 6763 if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
6764 &BTRFS_I(inode)->runtime_flags))
6775 btrfs_add_ordered_operation(trans, root, inode); 6765 btrfs_add_ordered_operation(trans, root, inode);
6776 6766
6777 while (1) { 6767 while (1) {
@@ -6894,7 +6884,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6894 ei->root = NULL; 6884 ei->root = NULL;
6895 ei->space_info = NULL; 6885 ei->space_info = NULL;
6896 ei->generation = 0; 6886 ei->generation = 0;
6897 ei->sequence = 0;
6898 ei->last_trans = 0; 6887 ei->last_trans = 0;
6899 ei->last_sub_trans = 0; 6888 ei->last_sub_trans = 0;
6900 ei->logged_trans = 0; 6889 ei->logged_trans = 0;
@@ -6909,11 +6898,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6909 ei->outstanding_extents = 0; 6898 ei->outstanding_extents = 0;
6910 ei->reserved_extents = 0; 6899 ei->reserved_extents = 0;
6911 6900
6912 ei->ordered_data_close = 0; 6901 ei->runtime_flags = 0;
6913 ei->orphan_meta_reserved = 0;
6914 ei->dummy_inode = 0;
6915 ei->in_defrag = 0;
6916 ei->delalloc_meta_reserved = 0;
6917 ei->force_compress = BTRFS_COMPRESS_NONE; 6902 ei->force_compress = BTRFS_COMPRESS_NONE;
6918 6903
6919 ei->delayed_node = NULL; 6904 ei->delayed_node = NULL;
@@ -6927,7 +6912,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6927 mutex_init(&ei->log_mutex); 6912 mutex_init(&ei->log_mutex);
6928 mutex_init(&ei->delalloc_mutex); 6913 mutex_init(&ei->delalloc_mutex);
6929 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 6914 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
6930 INIT_LIST_HEAD(&ei->i_orphan);
6931 INIT_LIST_HEAD(&ei->delalloc_inodes); 6915 INIT_LIST_HEAD(&ei->delalloc_inodes);
6932 INIT_LIST_HEAD(&ei->ordered_operations); 6916 INIT_LIST_HEAD(&ei->ordered_operations);
6933 RB_CLEAR_NODE(&ei->rb_node); 6917 RB_CLEAR_NODE(&ei->rb_node);
@@ -6972,13 +6956,12 @@ void btrfs_destroy_inode(struct inode *inode)
6972 spin_unlock(&root->fs_info->ordered_extent_lock); 6956 spin_unlock(&root->fs_info->ordered_extent_lock);
6973 } 6957 }
6974 6958
6975 spin_lock(&root->orphan_lock); 6959 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
6976 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 6960 &BTRFS_I(inode)->runtime_flags)) {
6977 printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", 6961 printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n",
6978 (unsigned long long)btrfs_ino(inode)); 6962 (unsigned long long)btrfs_ino(inode));
6979 list_del_init(&BTRFS_I(inode)->i_orphan); 6963 atomic_dec(&root->orphan_inodes);
6980 } 6964 }
6981 spin_unlock(&root->orphan_lock);
6982 6965
6983 while (1) { 6966 while (1) {
6984 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); 6967 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
@@ -7193,6 +7176,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7193 if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode)) 7176 if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))
7194 btrfs_add_ordered_operation(trans, root, old_inode); 7177 btrfs_add_ordered_operation(trans, root, old_inode);
7195 7178
7179 inode_inc_iversion(old_dir);
7180 inode_inc_iversion(new_dir);
7181 inode_inc_iversion(old_inode);
7196 old_dir->i_ctime = old_dir->i_mtime = ctime; 7182 old_dir->i_ctime = old_dir->i_mtime = ctime;
7197 new_dir->i_ctime = new_dir->i_mtime = ctime; 7183 new_dir->i_ctime = new_dir->i_mtime = ctime;
7198 old_inode->i_ctime = ctime; 7184 old_inode->i_ctime = ctime;
@@ -7219,6 +7205,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7219 } 7205 }
7220 7206
7221 if (new_inode) { 7207 if (new_inode) {
7208 inode_inc_iversion(new_inode);
7222 new_inode->i_ctime = CURRENT_TIME; 7209 new_inode->i_ctime = CURRENT_TIME;
7223 if (unlikely(btrfs_ino(new_inode) == 7210 if (unlikely(btrfs_ino(new_inode) ==
7224 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 7211 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
@@ -7490,6 +7477,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
7490 cur_offset += ins.offset; 7477 cur_offset += ins.offset;
7491 *alloc_hint = ins.objectid + ins.offset; 7478 *alloc_hint = ins.objectid + ins.offset;
7492 7479
7480 inode_inc_iversion(inode);
7493 inode->i_ctime = CURRENT_TIME; 7481 inode->i_ctime = CURRENT_TIME;
7494 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; 7482 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
7495 if (!(mode & FALLOC_FL_KEEP_SIZE) && 7483 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7f3a91367d7..24b776c08d9 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -261,6 +261,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
261 } 261 }
262 262
263 btrfs_update_iflags(inode); 263 btrfs_update_iflags(inode);
264 inode_inc_iversion(inode);
264 inode->i_ctime = CURRENT_TIME; 265 inode->i_ctime = CURRENT_TIME;
265 ret = btrfs_update_inode(trans, root, inode); 266 ret = btrfs_update_inode(trans, root, inode);
266 267
@@ -2262,10 +2263,12 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
2262 di_args->bytes_used = dev->bytes_used; 2263 di_args->bytes_used = dev->bytes_used;
2263 di_args->total_bytes = dev->total_bytes; 2264 di_args->total_bytes = dev->total_bytes;
2264 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); 2265 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
2265 if (dev->name) 2266 if (dev->name) {
2266 strncpy(di_args->path, dev->name, sizeof(di_args->path)); 2267 strncpy(di_args->path, dev->name, sizeof(di_args->path));
2267 else 2268 di_args->path[sizeof(di_args->path) - 1] = 0;
2269 } else {
2268 di_args->path[0] = '\0'; 2270 di_args->path[0] = '\0';
2271 }
2269 2272
2270out: 2273out:
2271 if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) 2274 if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
@@ -2622,6 +2625,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2622 btrfs_mark_buffer_dirty(leaf); 2625 btrfs_mark_buffer_dirty(leaf);
2623 btrfs_release_path(path); 2626 btrfs_release_path(path);
2624 2627
2628 inode_inc_iversion(inode);
2625 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2629 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
2626 2630
2627 /* 2631 /*
@@ -2914,7 +2918,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2914 up_read(&info->groups_sem); 2918 up_read(&info->groups_sem);
2915 } 2919 }
2916 2920
2917 user_dest = (struct btrfs_ioctl_space_info *) 2921 user_dest = (struct btrfs_ioctl_space_info __user *)
2918 (arg + sizeof(struct btrfs_ioctl_space_args)); 2922 (arg + sizeof(struct btrfs_ioctl_space_args));
2919 2923
2920 if (copy_to_user(user_dest, dest_orig, alloc_size)) 2924 if (copy_to_user(user_dest, dest_orig, alloc_size))
@@ -3042,6 +3046,28 @@ static long btrfs_ioctl_scrub_progress(struct btrfs_root *root,
3042 return ret; 3046 return ret;
3043} 3047}
3044 3048
3049static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root,
3050 void __user *arg, int reset_after_read)
3051{
3052 struct btrfs_ioctl_get_dev_stats *sa;
3053 int ret;
3054
3055 if (reset_after_read && !capable(CAP_SYS_ADMIN))
3056 return -EPERM;
3057
3058 sa = memdup_user(arg, sizeof(*sa));
3059 if (IS_ERR(sa))
3060 return PTR_ERR(sa);
3061
3062 ret = btrfs_get_dev_stats(root, sa, reset_after_read);
3063
3064 if (copy_to_user(arg, sa, sizeof(*sa)))
3065 ret = -EFAULT;
3066
3067 kfree(sa);
3068 return ret;
3069}
3070
3045static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) 3071static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
3046{ 3072{
3047 int ret = 0; 3073 int ret = 0;
@@ -3212,8 +3238,9 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
3212 } 3238 }
3213} 3239}
3214 3240
3215static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) 3241static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3216{ 3242{
3243 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
3217 struct btrfs_fs_info *fs_info = root->fs_info; 3244 struct btrfs_fs_info *fs_info = root->fs_info;
3218 struct btrfs_ioctl_balance_args *bargs; 3245 struct btrfs_ioctl_balance_args *bargs;
3219 struct btrfs_balance_control *bctl; 3246 struct btrfs_balance_control *bctl;
@@ -3225,6 +3252,10 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
3225 if (fs_info->sb->s_flags & MS_RDONLY) 3252 if (fs_info->sb->s_flags & MS_RDONLY)
3226 return -EROFS; 3253 return -EROFS;
3227 3254
3255 ret = mnt_want_write(file->f_path.mnt);
3256 if (ret)
3257 return ret;
3258
3228 mutex_lock(&fs_info->volume_mutex); 3259 mutex_lock(&fs_info->volume_mutex);
3229 mutex_lock(&fs_info->balance_mutex); 3260 mutex_lock(&fs_info->balance_mutex);
3230 3261
@@ -3291,6 +3322,7 @@ out_bargs:
3291out: 3322out:
3292 mutex_unlock(&fs_info->balance_mutex); 3323 mutex_unlock(&fs_info->balance_mutex);
3293 mutex_unlock(&fs_info->volume_mutex); 3324 mutex_unlock(&fs_info->volume_mutex);
3325 mnt_drop_write(file->f_path.mnt);
3294 return ret; 3326 return ret;
3295} 3327}
3296 3328
@@ -3386,7 +3418,7 @@ long btrfs_ioctl(struct file *file, unsigned int
3386 case BTRFS_IOC_DEV_INFO: 3418 case BTRFS_IOC_DEV_INFO:
3387 return btrfs_ioctl_dev_info(root, argp); 3419 return btrfs_ioctl_dev_info(root, argp);
3388 case BTRFS_IOC_BALANCE: 3420 case BTRFS_IOC_BALANCE:
3389 return btrfs_ioctl_balance(root, NULL); 3421 return btrfs_ioctl_balance(file, NULL);
3390 case BTRFS_IOC_CLONE: 3422 case BTRFS_IOC_CLONE:
3391 return btrfs_ioctl_clone(file, arg, 0, 0, 0); 3423 return btrfs_ioctl_clone(file, arg, 0, 0, 0);
3392 case BTRFS_IOC_CLONE_RANGE: 3424 case BTRFS_IOC_CLONE_RANGE:
@@ -3419,11 +3451,15 @@ long btrfs_ioctl(struct file *file, unsigned int
3419 case BTRFS_IOC_SCRUB_PROGRESS: 3451 case BTRFS_IOC_SCRUB_PROGRESS:
3420 return btrfs_ioctl_scrub_progress(root, argp); 3452 return btrfs_ioctl_scrub_progress(root, argp);
3421 case BTRFS_IOC_BALANCE_V2: 3453 case BTRFS_IOC_BALANCE_V2:
3422 return btrfs_ioctl_balance(root, argp); 3454 return btrfs_ioctl_balance(file, argp);
3423 case BTRFS_IOC_BALANCE_CTL: 3455 case BTRFS_IOC_BALANCE_CTL:
3424 return btrfs_ioctl_balance_ctl(root, arg); 3456 return btrfs_ioctl_balance_ctl(root, arg);
3425 case BTRFS_IOC_BALANCE_PROGRESS: 3457 case BTRFS_IOC_BALANCE_PROGRESS:
3426 return btrfs_ioctl_balance_progress(root, argp); 3458 return btrfs_ioctl_balance_progress(root, argp);
3459 case BTRFS_IOC_GET_DEV_STATS:
3460 return btrfs_ioctl_get_dev_stats(root, argp, 0);
3461 case BTRFS_IOC_GET_AND_RESET_DEV_STATS:
3462 return btrfs_ioctl_get_dev_stats(root, argp, 1);
3427 } 3463 }
3428 3464
3429 return -ENOTTY; 3465 return -ENOTTY;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 086e6bdae1c..497c530724c 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -266,6 +266,35 @@ struct btrfs_ioctl_logical_ino_args {
266 __u64 inodes; 266 __u64 inodes;
267}; 267};
268 268
269enum btrfs_dev_stat_values {
270 /* disk I/O failure stats */
271 BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */
272 BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */
273 BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */
274
275 /* stats for indirect indications for I/O failures */
276 BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or
277 * contents is illegal: this is an
278 * indication that the block was damaged
279 * during read or write, or written to
280 * wrong location or read from wrong
281 * location */
282 BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not
283 * been written */
284
285 BTRFS_DEV_STAT_VALUES_MAX
286};
287
288struct btrfs_ioctl_get_dev_stats {
289 __u64 devid; /* in */
290 __u64 nr_items; /* in/out */
291
292 /* out values: */
293 __u64 values[BTRFS_DEV_STAT_VALUES_MAX];
294
295 __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */
296};
297
269#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ 298#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
270 struct btrfs_ioctl_vol_args) 299 struct btrfs_ioctl_vol_args)
271#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ 300#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
@@ -330,5 +359,9 @@ struct btrfs_ioctl_logical_ino_args {
330 struct btrfs_ioctl_ino_path_args) 359 struct btrfs_ioctl_ino_path_args)
331#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ 360#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
332 struct btrfs_ioctl_ino_path_args) 361 struct btrfs_ioctl_ino_path_args)
362#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
363 struct btrfs_ioctl_get_dev_stats)
364#define BTRFS_IOC_GET_AND_RESET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 53, \
365 struct btrfs_ioctl_get_dev_stats)
333 366
334#endif 367#endif
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index a178f5ebea7..743b86fa4fc 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -411,9 +411,9 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
411 411
412 bytes = min_t(unsigned long, destlen, out_len - start_byte); 412 bytes = min_t(unsigned long, destlen, out_len - start_byte);
413 413
414 kaddr = kmap_atomic(dest_page, KM_USER0); 414 kaddr = kmap_atomic(dest_page);
415 memcpy(kaddr, workspace->buf + start_byte, bytes); 415 memcpy(kaddr, workspace->buf + start_byte, bytes);
416 kunmap_atomic(kaddr, KM_USER0); 416 kunmap_atomic(kaddr);
417out: 417out:
418 return ret; 418 return ret;
419} 419}
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index bbf6d0d9aeb..9e138cdc36c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -196,7 +196,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
196 entry->len = len; 196 entry->len = len;
197 entry->disk_len = disk_len; 197 entry->disk_len = disk_len;
198 entry->bytes_left = len; 198 entry->bytes_left = len;
199 entry->inode = inode; 199 entry->inode = igrab(inode);
200 entry->compress_type = compress_type; 200 entry->compress_type = compress_type;
201 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) 201 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
202 set_bit(type, &entry->flags); 202 set_bit(type, &entry->flags);
@@ -212,12 +212,12 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
212 212
213 trace_btrfs_ordered_extent_add(inode, entry); 213 trace_btrfs_ordered_extent_add(inode, entry);
214 214
215 spin_lock(&tree->lock); 215 spin_lock_irq(&tree->lock);
216 node = tree_insert(&tree->tree, file_offset, 216 node = tree_insert(&tree->tree, file_offset,
217 &entry->rb_node); 217 &entry->rb_node);
218 if (node) 218 if (node)
219 ordered_data_tree_panic(inode, -EEXIST, file_offset); 219 ordered_data_tree_panic(inode, -EEXIST, file_offset);
220 spin_unlock(&tree->lock); 220 spin_unlock_irq(&tree->lock);
221 221
222 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 222 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
223 list_add_tail(&entry->root_extent_list, 223 list_add_tail(&entry->root_extent_list,
@@ -264,9 +264,9 @@ void btrfs_add_ordered_sum(struct inode *inode,
264 struct btrfs_ordered_inode_tree *tree; 264 struct btrfs_ordered_inode_tree *tree;
265 265
266 tree = &BTRFS_I(inode)->ordered_tree; 266 tree = &BTRFS_I(inode)->ordered_tree;
267 spin_lock(&tree->lock); 267 spin_lock_irq(&tree->lock);
268 list_add_tail(&sum->list, &entry->list); 268 list_add_tail(&sum->list, &entry->list);
269 spin_unlock(&tree->lock); 269 spin_unlock_irq(&tree->lock);
270} 270}
271 271
272/* 272/*
@@ -283,18 +283,19 @@ void btrfs_add_ordered_sum(struct inode *inode,
283 */ 283 */
284int btrfs_dec_test_first_ordered_pending(struct inode *inode, 284int btrfs_dec_test_first_ordered_pending(struct inode *inode,
285 struct btrfs_ordered_extent **cached, 285 struct btrfs_ordered_extent **cached,
286 u64 *file_offset, u64 io_size) 286 u64 *file_offset, u64 io_size, int uptodate)
287{ 287{
288 struct btrfs_ordered_inode_tree *tree; 288 struct btrfs_ordered_inode_tree *tree;
289 struct rb_node *node; 289 struct rb_node *node;
290 struct btrfs_ordered_extent *entry = NULL; 290 struct btrfs_ordered_extent *entry = NULL;
291 int ret; 291 int ret;
292 unsigned long flags;
292 u64 dec_end; 293 u64 dec_end;
293 u64 dec_start; 294 u64 dec_start;
294 u64 to_dec; 295 u64 to_dec;
295 296
296 tree = &BTRFS_I(inode)->ordered_tree; 297 tree = &BTRFS_I(inode)->ordered_tree;
297 spin_lock(&tree->lock); 298 spin_lock_irqsave(&tree->lock, flags);
298 node = tree_search(tree, *file_offset); 299 node = tree_search(tree, *file_offset);
299 if (!node) { 300 if (!node) {
300 ret = 1; 301 ret = 1;
@@ -323,6 +324,9 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
323 (unsigned long long)to_dec); 324 (unsigned long long)to_dec);
324 } 325 }
325 entry->bytes_left -= to_dec; 326 entry->bytes_left -= to_dec;
327 if (!uptodate)
328 set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
329
326 if (entry->bytes_left == 0) 330 if (entry->bytes_left == 0)
327 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); 331 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
328 else 332 else
@@ -332,7 +336,7 @@ out:
332 *cached = entry; 336 *cached = entry;
333 atomic_inc(&entry->refs); 337 atomic_inc(&entry->refs);
334 } 338 }
335 spin_unlock(&tree->lock); 339 spin_unlock_irqrestore(&tree->lock, flags);
336 return ret == 0; 340 return ret == 0;
337} 341}
338 342
@@ -347,15 +351,21 @@ out:
347 */ 351 */
348int btrfs_dec_test_ordered_pending(struct inode *inode, 352int btrfs_dec_test_ordered_pending(struct inode *inode,
349 struct btrfs_ordered_extent **cached, 353 struct btrfs_ordered_extent **cached,
350 u64 file_offset, u64 io_size) 354 u64 file_offset, u64 io_size, int uptodate)
351{ 355{
352 struct btrfs_ordered_inode_tree *tree; 356 struct btrfs_ordered_inode_tree *tree;
353 struct rb_node *node; 357 struct rb_node *node;
354 struct btrfs_ordered_extent *entry = NULL; 358 struct btrfs_ordered_extent *entry = NULL;
359 unsigned long flags;
355 int ret; 360 int ret;
356 361
357 tree = &BTRFS_I(inode)->ordered_tree; 362 tree = &BTRFS_I(inode)->ordered_tree;
358 spin_lock(&tree->lock); 363 spin_lock_irqsave(&tree->lock, flags);
364 if (cached && *cached) {
365 entry = *cached;
366 goto have_entry;
367 }
368
359 node = tree_search(tree, file_offset); 369 node = tree_search(tree, file_offset);
360 if (!node) { 370 if (!node) {
361 ret = 1; 371 ret = 1;
@@ -363,6 +373,7 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
363 } 373 }
364 374
365 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 375 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
376have_entry:
366 if (!offset_in_entry(entry, file_offset)) { 377 if (!offset_in_entry(entry, file_offset)) {
367 ret = 1; 378 ret = 1;
368 goto out; 379 goto out;
@@ -374,6 +385,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
374 (unsigned long long)io_size); 385 (unsigned long long)io_size);
375 } 386 }
376 entry->bytes_left -= io_size; 387 entry->bytes_left -= io_size;
388 if (!uptodate)
389 set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
390
377 if (entry->bytes_left == 0) 391 if (entry->bytes_left == 0)
378 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); 392 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
379 else 393 else
@@ -383,7 +397,7 @@ out:
383 *cached = entry; 397 *cached = entry;
384 atomic_inc(&entry->refs); 398 atomic_inc(&entry->refs);
385 } 399 }
386 spin_unlock(&tree->lock); 400 spin_unlock_irqrestore(&tree->lock, flags);
387 return ret == 0; 401 return ret == 0;
388} 402}
389 403
@@ -399,6 +413,8 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
399 trace_btrfs_ordered_extent_put(entry->inode, entry); 413 trace_btrfs_ordered_extent_put(entry->inode, entry);
400 414
401 if (atomic_dec_and_test(&entry->refs)) { 415 if (atomic_dec_and_test(&entry->refs)) {
416 if (entry->inode)
417 btrfs_add_delayed_iput(entry->inode);
402 while (!list_empty(&entry->list)) { 418 while (!list_empty(&entry->list)) {
403 cur = entry->list.next; 419 cur = entry->list.next;
404 sum = list_entry(cur, struct btrfs_ordered_sum, list); 420 sum = list_entry(cur, struct btrfs_ordered_sum, list);
@@ -411,21 +427,22 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
411 427
412/* 428/*
413 * remove an ordered extent from the tree. No references are dropped 429 * remove an ordered extent from the tree. No references are dropped
414 * and you must wake_up entry->wait. You must hold the tree lock 430 * and waiters are woken up.
415 * while you call this function.
416 */ 431 */
417static void __btrfs_remove_ordered_extent(struct inode *inode, 432void btrfs_remove_ordered_extent(struct inode *inode,
418 struct btrfs_ordered_extent *entry) 433 struct btrfs_ordered_extent *entry)
419{ 434{
420 struct btrfs_ordered_inode_tree *tree; 435 struct btrfs_ordered_inode_tree *tree;
421 struct btrfs_root *root = BTRFS_I(inode)->root; 436 struct btrfs_root *root = BTRFS_I(inode)->root;
422 struct rb_node *node; 437 struct rb_node *node;
423 438
424 tree = &BTRFS_I(inode)->ordered_tree; 439 tree = &BTRFS_I(inode)->ordered_tree;
440 spin_lock_irq(&tree->lock);
425 node = &entry->rb_node; 441 node = &entry->rb_node;
426 rb_erase(node, &tree->tree); 442 rb_erase(node, &tree->tree);
427 tree->last = NULL; 443 tree->last = NULL;
428 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); 444 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
445 spin_unlock_irq(&tree->lock);
429 446
430 spin_lock(&root->fs_info->ordered_extent_lock); 447 spin_lock(&root->fs_info->ordered_extent_lock);
431 list_del_init(&entry->root_extent_list); 448 list_del_init(&entry->root_extent_list);
@@ -442,21 +459,6 @@ static void __btrfs_remove_ordered_extent(struct inode *inode,
442 list_del_init(&BTRFS_I(inode)->ordered_operations); 459 list_del_init(&BTRFS_I(inode)->ordered_operations);
443 } 460 }
444 spin_unlock(&root->fs_info->ordered_extent_lock); 461 spin_unlock(&root->fs_info->ordered_extent_lock);
445}
446
447/*
448 * remove an ordered extent from the tree. No references are dropped
449 * but any waiters are woken.
450 */
451void btrfs_remove_ordered_extent(struct inode *inode,
452 struct btrfs_ordered_extent *entry)
453{
454 struct btrfs_ordered_inode_tree *tree;
455
456 tree = &BTRFS_I(inode)->ordered_tree;
457 spin_lock(&tree->lock);
458 __btrfs_remove_ordered_extent(inode, entry);
459 spin_unlock(&tree->lock);
460 wake_up(&entry->wait); 462 wake_up(&entry->wait);
461} 463}
462 464
@@ -621,19 +623,11 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
621 if (orig_end > INT_LIMIT(loff_t)) 623 if (orig_end > INT_LIMIT(loff_t))
622 orig_end = INT_LIMIT(loff_t); 624 orig_end = INT_LIMIT(loff_t);
623 } 625 }
624again: 626
625 /* start IO across the range first to instantiate any delalloc 627 /* start IO across the range first to instantiate any delalloc
626 * extents 628 * extents
627 */ 629 */
628 filemap_fdatawrite_range(inode->i_mapping, start, orig_end); 630 filemap_write_and_wait_range(inode->i_mapping, start, orig_end);
629
630 /* The compression code will leave pages locked but return from
631 * writepage without setting the page writeback. Starting again
632 * with WB_SYNC_ALL will end up waiting for the IO to actually start.
633 */
634 filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
635
636 filemap_fdatawait_range(inode->i_mapping, start, orig_end);
637 631
638 end = orig_end; 632 end = orig_end;
639 found = 0; 633 found = 0;
@@ -657,11 +651,6 @@ again:
657 break; 651 break;
658 end--; 652 end--;
659 } 653 }
660 if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
661 EXTENT_DELALLOC, 0, NULL)) {
662 schedule_timeout(1);
663 goto again;
664 }
665} 654}
666 655
667/* 656/*
@@ -676,7 +665,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
676 struct btrfs_ordered_extent *entry = NULL; 665 struct btrfs_ordered_extent *entry = NULL;
677 666
678 tree = &BTRFS_I(inode)->ordered_tree; 667 tree = &BTRFS_I(inode)->ordered_tree;
679 spin_lock(&tree->lock); 668 spin_lock_irq(&tree->lock);
680 node = tree_search(tree, file_offset); 669 node = tree_search(tree, file_offset);
681 if (!node) 670 if (!node)
682 goto out; 671 goto out;
@@ -687,7 +676,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
687 if (entry) 676 if (entry)
688 atomic_inc(&entry->refs); 677 atomic_inc(&entry->refs);
689out: 678out:
690 spin_unlock(&tree->lock); 679 spin_unlock_irq(&tree->lock);
691 return entry; 680 return entry;
692} 681}
693 682
@@ -703,7 +692,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
703 struct btrfs_ordered_extent *entry = NULL; 692 struct btrfs_ordered_extent *entry = NULL;
704 693
705 tree = &BTRFS_I(inode)->ordered_tree; 694 tree = &BTRFS_I(inode)->ordered_tree;
706 spin_lock(&tree->lock); 695 spin_lock_irq(&tree->lock);
707 node = tree_search(tree, file_offset); 696 node = tree_search(tree, file_offset);
708 if (!node) { 697 if (!node) {
709 node = tree_search(tree, file_offset + len); 698 node = tree_search(tree, file_offset + len);
@@ -728,7 +717,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
728out: 717out:
729 if (entry) 718 if (entry)
730 atomic_inc(&entry->refs); 719 atomic_inc(&entry->refs);
731 spin_unlock(&tree->lock); 720 spin_unlock_irq(&tree->lock);
732 return entry; 721 return entry;
733} 722}
734 723
@@ -744,7 +733,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
744 struct btrfs_ordered_extent *entry = NULL; 733 struct btrfs_ordered_extent *entry = NULL;
745 734
746 tree = &BTRFS_I(inode)->ordered_tree; 735 tree = &BTRFS_I(inode)->ordered_tree;
747 spin_lock(&tree->lock); 736 spin_lock_irq(&tree->lock);
748 node = tree_search(tree, file_offset); 737 node = tree_search(tree, file_offset);
749 if (!node) 738 if (!node)
750 goto out; 739 goto out;
@@ -752,7 +741,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
752 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 741 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
753 atomic_inc(&entry->refs); 742 atomic_inc(&entry->refs);
754out: 743out:
755 spin_unlock(&tree->lock); 744 spin_unlock_irq(&tree->lock);
756 return entry; 745 return entry;
757} 746}
758 747
@@ -764,7 +753,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
764 struct btrfs_ordered_extent *ordered) 753 struct btrfs_ordered_extent *ordered)
765{ 754{
766 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; 755 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
767 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
768 u64 disk_i_size; 756 u64 disk_i_size;
769 u64 new_i_size; 757 u64 new_i_size;
770 u64 i_size_test; 758 u64 i_size_test;
@@ -779,7 +767,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
779 else 767 else
780 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); 768 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
781 769
782 spin_lock(&tree->lock); 770 spin_lock_irq(&tree->lock);
783 disk_i_size = BTRFS_I(inode)->disk_i_size; 771 disk_i_size = BTRFS_I(inode)->disk_i_size;
784 772
785 /* truncate file */ 773 /* truncate file */
@@ -798,14 +786,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
798 } 786 }
799 787
800 /* 788 /*
801 * we can't update the disk_isize if there are delalloc bytes
802 * between disk_i_size and this ordered extent
803 */
804 if (test_range_bit(io_tree, disk_i_size, offset - 1,
805 EXTENT_DELALLOC, 0, NULL)) {
806 goto out;
807 }
808 /*
809 * walk backward from this ordered extent to disk_i_size. 789 * walk backward from this ordered extent to disk_i_size.
810 * if we find an ordered extent then we can't update disk i_size 790 * if we find an ordered extent then we can't update disk i_size
811 * yet 791 * yet
@@ -825,15 +805,18 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
825 } 805 }
826 node = prev; 806 node = prev;
827 } 807 }
828 while (node) { 808 for (; node; node = rb_prev(node)) {
829 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 809 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
810
811 /* We treat this entry as if it doesnt exist */
812 if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags))
813 continue;
830 if (test->file_offset + test->len <= disk_i_size) 814 if (test->file_offset + test->len <= disk_i_size)
831 break; 815 break;
832 if (test->file_offset >= i_size) 816 if (test->file_offset >= i_size)
833 break; 817 break;
834 if (test->file_offset >= disk_i_size) 818 if (test->file_offset >= disk_i_size)
835 goto out; 819 goto out;
836 node = rb_prev(node);
837 } 820 }
838 new_i_size = min_t(u64, offset, i_size); 821 new_i_size = min_t(u64, offset, i_size);
839 822
@@ -851,43 +834,49 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
851 else 834 else
852 node = rb_first(&tree->tree); 835 node = rb_first(&tree->tree);
853 } 836 }
854 i_size_test = 0; 837
855 if (node) { 838 /*
856 /* 839 * We are looking for an area between our current extent and the next
857 * do we have an area where IO might have finished 840 * ordered extent to update the i_size to. There are 3 cases here
858 * between our ordered extent and the next one. 841 *
859 */ 842 * 1) We don't actually have anything and we can update to i_size.
843 * 2) We have stuff but they already did their i_size update so again we
844 * can just update to i_size.
845 * 3) We have an outstanding ordered extent so the most we can update
846 * our disk_i_size to is the start of the next offset.
847 */
848 i_size_test = i_size;
849 for (; node; node = rb_next(node)) {
860 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 850 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
861 if (test->file_offset > offset) 851
852 if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags))
853 continue;
854 if (test->file_offset > offset) {
862 i_size_test = test->file_offset; 855 i_size_test = test->file_offset;
863 } else { 856 break;
864 i_size_test = i_size; 857 }
865 } 858 }
866 859
867 /* 860 /*
868 * i_size_test is the end of a region after this ordered 861 * i_size_test is the end of a region after this ordered
869 * extent where there are no ordered extents. As long as there 862 * extent where there are no ordered extents, we can safely set
870 * are no delalloc bytes in this area, it is safe to update 863 * disk_i_size to this.
871 * disk_i_size to the end of the region.
872 */ 864 */
873 if (i_size_test > offset && 865 if (i_size_test > offset)
874 !test_range_bit(io_tree, offset, i_size_test - 1,
875 EXTENT_DELALLOC, 0, NULL)) {
876 new_i_size = min_t(u64, i_size_test, i_size); 866 new_i_size = min_t(u64, i_size_test, i_size);
877 }
878 BTRFS_I(inode)->disk_i_size = new_i_size; 867 BTRFS_I(inode)->disk_i_size = new_i_size;
879 ret = 0; 868 ret = 0;
880out: 869out:
881 /* 870 /*
882 * we need to remove the ordered extent with the tree lock held 871 * We need to do this because we can't remove ordered extents until
883 * so that other people calling this function don't find our fully 872 * after the i_disk_size has been updated and then the inode has been
884 * processed ordered entry and skip updating the i_size 873 * updated to reflect the change, so we need to tell anybody who finds
874 * this ordered extent that we've already done all the real work, we
875 * just haven't completed all the other work.
885 */ 876 */
886 if (ordered) 877 if (ordered)
887 __btrfs_remove_ordered_extent(inode, ordered); 878 set_bit(BTRFS_ORDERED_UPDATED_ISIZE, &ordered->flags);
888 spin_unlock(&tree->lock); 879 spin_unlock_irq(&tree->lock);
889 if (ordered)
890 wake_up(&ordered->wait);
891 return ret; 880 return ret;
892} 881}
893 882
@@ -912,7 +901,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
912 if (!ordered) 901 if (!ordered)
913 return 1; 902 return 1;
914 903
915 spin_lock(&tree->lock); 904 spin_lock_irq(&tree->lock);
916 list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { 905 list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
917 if (disk_bytenr >= ordered_sum->bytenr) { 906 if (disk_bytenr >= ordered_sum->bytenr) {
918 num_sectors = ordered_sum->len / sectorsize; 907 num_sectors = ordered_sum->len / sectorsize;
@@ -927,7 +916,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
927 } 916 }
928 } 917 }
929out: 918out:
930 spin_unlock(&tree->lock); 919 spin_unlock_irq(&tree->lock);
931 btrfs_put_ordered_extent(ordered); 920 btrfs_put_ordered_extent(ordered);
932 return ret; 921 return ret;
933} 922}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index c355ad4dc1a..e03c560d299 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -74,6 +74,12 @@ struct btrfs_ordered_sum {
74 74
75#define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */ 75#define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */
76 76
77#define BTRFS_ORDERED_IOERR 6 /* We had an io error when writing this out */
78
79#define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates wether this ordered extent
80 * has done its due diligence in updating
81 * the isize. */
82
77struct btrfs_ordered_extent { 83struct btrfs_ordered_extent {
78 /* logical offset in the file */ 84 /* logical offset in the file */
79 u64 file_offset; 85 u64 file_offset;
@@ -113,6 +119,8 @@ struct btrfs_ordered_extent {
113 119
114 /* a per root list of all the pending ordered extents */ 120 /* a per root list of all the pending ordered extents */
115 struct list_head root_extent_list; 121 struct list_head root_extent_list;
122
123 struct btrfs_work work;
116}; 124};
117 125
118 126
@@ -143,10 +151,11 @@ void btrfs_remove_ordered_extent(struct inode *inode,
143 struct btrfs_ordered_extent *entry); 151 struct btrfs_ordered_extent *entry);
144int btrfs_dec_test_ordered_pending(struct inode *inode, 152int btrfs_dec_test_ordered_pending(struct inode *inode,
145 struct btrfs_ordered_extent **cached, 153 struct btrfs_ordered_extent **cached,
146 u64 file_offset, u64 io_size); 154 u64 file_offset, u64 io_size, int uptodate);
147int btrfs_dec_test_first_ordered_pending(struct inode *inode, 155int btrfs_dec_test_first_ordered_pending(struct inode *inode,
148 struct btrfs_ordered_extent **cached, 156 struct btrfs_ordered_extent **cached,
149 u64 *file_offset, u64 io_size); 157 u64 *file_offset, u64 io_size,
158 int uptodate);
150int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, 159int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
151 u64 start, u64 len, u64 disk_len, int type); 160 u64 start, u64 len, u64 disk_len, int type);
152int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, 161int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index f38e452486b..5e23684887e 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -294,6 +294,9 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
294 btrfs_dev_extent_chunk_offset(l, dev_extent), 294 btrfs_dev_extent_chunk_offset(l, dev_extent),
295 (unsigned long long) 295 (unsigned long long)
296 btrfs_dev_extent_length(l, dev_extent)); 296 btrfs_dev_extent_length(l, dev_extent));
297 case BTRFS_DEV_STATS_KEY:
298 printk(KERN_INFO "\t\tdevice stats\n");
299 break;
297 }; 300 };
298 } 301 }
299} 302}
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index ac5d0108588..48a4882d8ad 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -718,13 +718,18 @@ static void reada_start_machine_worker(struct btrfs_work *work)
718{ 718{
719 struct reada_machine_work *rmw; 719 struct reada_machine_work *rmw;
720 struct btrfs_fs_info *fs_info; 720 struct btrfs_fs_info *fs_info;
721 int old_ioprio;
721 722
722 rmw = container_of(work, struct reada_machine_work, work); 723 rmw = container_of(work, struct reada_machine_work, work);
723 fs_info = rmw->fs_info; 724 fs_info = rmw->fs_info;
724 725
725 kfree(rmw); 726 kfree(rmw);
726 727
728 old_ioprio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current),
729 task_nice_ioprio(current));
730 set_task_ioprio(current, BTRFS_IOPRIO_READA);
727 __reada_start_machine(fs_info); 731 __reada_start_machine(fs_info);
732 set_task_ioprio(current, old_ioprio);
728} 733}
729 734
730static void __reada_start_machine(struct btrfs_fs_info *fs_info) 735static void __reada_start_machine(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 7e487be0094..a38cfa4f251 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -50,7 +50,7 @@ struct scrub_dev;
50struct scrub_page { 50struct scrub_page {
51 struct scrub_block *sblock; 51 struct scrub_block *sblock;
52 struct page *page; 52 struct page *page;
53 struct block_device *bdev; 53 struct btrfs_device *dev;
54 u64 flags; /* extent flags */ 54 u64 flags; /* extent flags */
55 u64 generation; 55 u64 generation;
56 u64 logical; 56 u64 logical;
@@ -86,6 +86,7 @@ struct scrub_block {
86 unsigned int header_error:1; 86 unsigned int header_error:1;
87 unsigned int checksum_error:1; 87 unsigned int checksum_error:1;
88 unsigned int no_io_error_seen:1; 88 unsigned int no_io_error_seen:1;
89 unsigned int generation_error:1; /* also sets header_error */
89 }; 90 };
90}; 91};
91 92
@@ -675,6 +676,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
675 sdev->stat.read_errors++; 676 sdev->stat.read_errors++;
676 sdev->stat.uncorrectable_errors++; 677 sdev->stat.uncorrectable_errors++;
677 spin_unlock(&sdev->stat_lock); 678 spin_unlock(&sdev->stat_lock);
679 btrfs_dev_stat_inc_and_print(sdev->dev,
680 BTRFS_DEV_STAT_READ_ERRS);
678 goto out; 681 goto out;
679 } 682 }
680 683
@@ -686,6 +689,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
686 sdev->stat.read_errors++; 689 sdev->stat.read_errors++;
687 sdev->stat.uncorrectable_errors++; 690 sdev->stat.uncorrectable_errors++;
688 spin_unlock(&sdev->stat_lock); 691 spin_unlock(&sdev->stat_lock);
692 btrfs_dev_stat_inc_and_print(sdev->dev,
693 BTRFS_DEV_STAT_READ_ERRS);
689 goto out; 694 goto out;
690 } 695 }
691 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS); 696 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
@@ -699,6 +704,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
699 sdev->stat.read_errors++; 704 sdev->stat.read_errors++;
700 sdev->stat.uncorrectable_errors++; 705 sdev->stat.uncorrectable_errors++;
701 spin_unlock(&sdev->stat_lock); 706 spin_unlock(&sdev->stat_lock);
707 btrfs_dev_stat_inc_and_print(sdev->dev,
708 BTRFS_DEV_STAT_READ_ERRS);
702 goto out; 709 goto out;
703 } 710 }
704 711
@@ -725,12 +732,16 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
725 spin_unlock(&sdev->stat_lock); 732 spin_unlock(&sdev->stat_lock);
726 if (__ratelimit(&_rs)) 733 if (__ratelimit(&_rs))
727 scrub_print_warning("i/o error", sblock_to_check); 734 scrub_print_warning("i/o error", sblock_to_check);
735 btrfs_dev_stat_inc_and_print(sdev->dev,
736 BTRFS_DEV_STAT_READ_ERRS);
728 } else if (sblock_bad->checksum_error) { 737 } else if (sblock_bad->checksum_error) {
729 spin_lock(&sdev->stat_lock); 738 spin_lock(&sdev->stat_lock);
730 sdev->stat.csum_errors++; 739 sdev->stat.csum_errors++;
731 spin_unlock(&sdev->stat_lock); 740 spin_unlock(&sdev->stat_lock);
732 if (__ratelimit(&_rs)) 741 if (__ratelimit(&_rs))
733 scrub_print_warning("checksum error", sblock_to_check); 742 scrub_print_warning("checksum error", sblock_to_check);
743 btrfs_dev_stat_inc_and_print(sdev->dev,
744 BTRFS_DEV_STAT_CORRUPTION_ERRS);
734 } else if (sblock_bad->header_error) { 745 } else if (sblock_bad->header_error) {
735 spin_lock(&sdev->stat_lock); 746 spin_lock(&sdev->stat_lock);
736 sdev->stat.verify_errors++; 747 sdev->stat.verify_errors++;
@@ -738,6 +749,12 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
738 if (__ratelimit(&_rs)) 749 if (__ratelimit(&_rs))
739 scrub_print_warning("checksum/header error", 750 scrub_print_warning("checksum/header error",
740 sblock_to_check); 751 sblock_to_check);
752 if (sblock_bad->generation_error)
753 btrfs_dev_stat_inc_and_print(sdev->dev,
754 BTRFS_DEV_STAT_GENERATION_ERRS);
755 else
756 btrfs_dev_stat_inc_and_print(sdev->dev,
757 BTRFS_DEV_STAT_CORRUPTION_ERRS);
741 } 758 }
742 759
743 if (sdev->readonly) 760 if (sdev->readonly)
@@ -998,8 +1015,8 @@ static int scrub_setup_recheck_block(struct scrub_dev *sdev,
998 page = sblock->pagev + page_index; 1015 page = sblock->pagev + page_index;
999 page->logical = logical; 1016 page->logical = logical;
1000 page->physical = bbio->stripes[mirror_index].physical; 1017 page->physical = bbio->stripes[mirror_index].physical;
1001 /* for missing devices, bdev is NULL */ 1018 /* for missing devices, dev->bdev is NULL */
1002 page->bdev = bbio->stripes[mirror_index].dev->bdev; 1019 page->dev = bbio->stripes[mirror_index].dev;
1003 page->mirror_num = mirror_index + 1; 1020 page->mirror_num = mirror_index + 1;
1004 page->page = alloc_page(GFP_NOFS); 1021 page->page = alloc_page(GFP_NOFS);
1005 if (!page->page) { 1022 if (!page->page) {
@@ -1043,7 +1060,7 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
1043 struct scrub_page *page = sblock->pagev + page_num; 1060 struct scrub_page *page = sblock->pagev + page_num;
1044 DECLARE_COMPLETION_ONSTACK(complete); 1061 DECLARE_COMPLETION_ONSTACK(complete);
1045 1062
1046 if (page->bdev == NULL) { 1063 if (page->dev->bdev == NULL) {
1047 page->io_error = 1; 1064 page->io_error = 1;
1048 sblock->no_io_error_seen = 0; 1065 sblock->no_io_error_seen = 0;
1049 continue; 1066 continue;
@@ -1053,7 +1070,7 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
1053 bio = bio_alloc(GFP_NOFS, 1); 1070 bio = bio_alloc(GFP_NOFS, 1);
1054 if (!bio) 1071 if (!bio)
1055 return -EIO; 1072 return -EIO;
1056 bio->bi_bdev = page->bdev; 1073 bio->bi_bdev = page->dev->bdev;
1057 bio->bi_sector = page->physical >> 9; 1074 bio->bi_sector = page->physical >> 9;
1058 bio->bi_end_io = scrub_complete_bio_end_io; 1075 bio->bi_end_io = scrub_complete_bio_end_io;
1059 bio->bi_private = &complete; 1076 bio->bi_private = &complete;
@@ -1098,21 +1115,24 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1098 if (is_metadata) { 1115 if (is_metadata) {
1099 struct btrfs_header *h; 1116 struct btrfs_header *h;
1100 1117
1101 mapped_buffer = kmap_atomic(sblock->pagev[0].page, KM_USER0); 1118 mapped_buffer = kmap_atomic(sblock->pagev[0].page);
1102 h = (struct btrfs_header *)mapped_buffer; 1119 h = (struct btrfs_header *)mapped_buffer;
1103 1120
1104 if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) || 1121 if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) ||
1105 generation != le64_to_cpu(h->generation) ||
1106 memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || 1122 memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
1107 memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, 1123 memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1108 BTRFS_UUID_SIZE)) 1124 BTRFS_UUID_SIZE)) {
1109 sblock->header_error = 1; 1125 sblock->header_error = 1;
1126 } else if (generation != le64_to_cpu(h->generation)) {
1127 sblock->header_error = 1;
1128 sblock->generation_error = 1;
1129 }
1110 csum = h->csum; 1130 csum = h->csum;
1111 } else { 1131 } else {
1112 if (!have_csum) 1132 if (!have_csum)
1113 return; 1133 return;
1114 1134
1115 mapped_buffer = kmap_atomic(sblock->pagev[0].page, KM_USER0); 1135 mapped_buffer = kmap_atomic(sblock->pagev[0].page);
1116 } 1136 }
1117 1137
1118 for (page_num = 0;;) { 1138 for (page_num = 0;;) {
@@ -1124,14 +1144,13 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1124 crc = btrfs_csum_data(root, mapped_buffer, crc, 1144 crc = btrfs_csum_data(root, mapped_buffer, crc,
1125 PAGE_SIZE); 1145 PAGE_SIZE);
1126 1146
1127 kunmap_atomic(mapped_buffer, KM_USER0); 1147 kunmap_atomic(mapped_buffer);
1128 page_num++; 1148 page_num++;
1129 if (page_num >= sblock->page_count) 1149 if (page_num >= sblock->page_count)
1130 break; 1150 break;
1131 BUG_ON(!sblock->pagev[page_num].page); 1151 BUG_ON(!sblock->pagev[page_num].page);
1132 1152
1133 mapped_buffer = kmap_atomic(sblock->pagev[page_num].page, 1153 mapped_buffer = kmap_atomic(sblock->pagev[page_num].page);
1134 KM_USER0);
1135 } 1154 }
1136 1155
1137 btrfs_csum_final(crc, calculated_csum); 1156 btrfs_csum_final(crc, calculated_csum);
@@ -1183,7 +1202,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1183 bio = bio_alloc(GFP_NOFS, 1); 1202 bio = bio_alloc(GFP_NOFS, 1);
1184 if (!bio) 1203 if (!bio)
1185 return -EIO; 1204 return -EIO;
1186 bio->bi_bdev = page_bad->bdev; 1205 bio->bi_bdev = page_bad->dev->bdev;
1187 bio->bi_sector = page_bad->physical >> 9; 1206 bio->bi_sector = page_bad->physical >> 9;
1188 bio->bi_end_io = scrub_complete_bio_end_io; 1207 bio->bi_end_io = scrub_complete_bio_end_io;
1189 bio->bi_private = &complete; 1208 bio->bi_private = &complete;
@@ -1197,6 +1216,12 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1197 1216
1198 /* this will also unplug the queue */ 1217 /* this will also unplug the queue */
1199 wait_for_completion(&complete); 1218 wait_for_completion(&complete);
1219 if (!bio_flagged(bio, BIO_UPTODATE)) {
1220 btrfs_dev_stat_inc_and_print(page_bad->dev,
1221 BTRFS_DEV_STAT_WRITE_ERRS);
1222 bio_put(bio);
1223 return -EIO;
1224 }
1200 bio_put(bio); 1225 bio_put(bio);
1201 } 1226 }
1202 1227
@@ -1242,7 +1267,7 @@ static int scrub_checksum_data(struct scrub_block *sblock)
1242 1267
1243 on_disk_csum = sblock->pagev[0].csum; 1268 on_disk_csum = sblock->pagev[0].csum;
1244 page = sblock->pagev[0].page; 1269 page = sblock->pagev[0].page;
1245 buffer = kmap_atomic(page, KM_USER0); 1270 buffer = kmap_atomic(page);
1246 1271
1247 len = sdev->sectorsize; 1272 len = sdev->sectorsize;
1248 index = 0; 1273 index = 0;
@@ -1250,7 +1275,7 @@ static int scrub_checksum_data(struct scrub_block *sblock)
1250 u64 l = min_t(u64, len, PAGE_SIZE); 1275 u64 l = min_t(u64, len, PAGE_SIZE);
1251 1276
1252 crc = btrfs_csum_data(root, buffer, crc, l); 1277 crc = btrfs_csum_data(root, buffer, crc, l);
1253 kunmap_atomic(buffer, KM_USER0); 1278 kunmap_atomic(buffer);
1254 len -= l; 1279 len -= l;
1255 if (len == 0) 1280 if (len == 0)
1256 break; 1281 break;
@@ -1258,7 +1283,7 @@ static int scrub_checksum_data(struct scrub_block *sblock)
1258 BUG_ON(index >= sblock->page_count); 1283 BUG_ON(index >= sblock->page_count);
1259 BUG_ON(!sblock->pagev[index].page); 1284 BUG_ON(!sblock->pagev[index].page);
1260 page = sblock->pagev[index].page; 1285 page = sblock->pagev[index].page;
1261 buffer = kmap_atomic(page, KM_USER0); 1286 buffer = kmap_atomic(page);
1262 } 1287 }
1263 1288
1264 btrfs_csum_final(crc, csum); 1289 btrfs_csum_final(crc, csum);
@@ -1288,7 +1313,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
1288 1313
1289 BUG_ON(sblock->page_count < 1); 1314 BUG_ON(sblock->page_count < 1);
1290 page = sblock->pagev[0].page; 1315 page = sblock->pagev[0].page;
1291 mapped_buffer = kmap_atomic(page, KM_USER0); 1316 mapped_buffer = kmap_atomic(page);
1292 h = (struct btrfs_header *)mapped_buffer; 1317 h = (struct btrfs_header *)mapped_buffer;
1293 memcpy(on_disk_csum, h->csum, sdev->csum_size); 1318 memcpy(on_disk_csum, h->csum, sdev->csum_size);
1294 1319
@@ -1320,7 +1345,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
1320 u64 l = min_t(u64, len, mapped_size); 1345 u64 l = min_t(u64, len, mapped_size);
1321 1346
1322 crc = btrfs_csum_data(root, p, crc, l); 1347 crc = btrfs_csum_data(root, p, crc, l);
1323 kunmap_atomic(mapped_buffer, KM_USER0); 1348 kunmap_atomic(mapped_buffer);
1324 len -= l; 1349 len -= l;
1325 if (len == 0) 1350 if (len == 0)
1326 break; 1351 break;
@@ -1328,7 +1353,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
1328 BUG_ON(index >= sblock->page_count); 1353 BUG_ON(index >= sblock->page_count);
1329 BUG_ON(!sblock->pagev[index].page); 1354 BUG_ON(!sblock->pagev[index].page);
1330 page = sblock->pagev[index].page; 1355 page = sblock->pagev[index].page;
1331 mapped_buffer = kmap_atomic(page, KM_USER0); 1356 mapped_buffer = kmap_atomic(page);
1332 mapped_size = PAGE_SIZE; 1357 mapped_size = PAGE_SIZE;
1333 p = mapped_buffer; 1358 p = mapped_buffer;
1334 } 1359 }
@@ -1353,24 +1378,25 @@ static int scrub_checksum_super(struct scrub_block *sblock)
1353 u64 mapped_size; 1378 u64 mapped_size;
1354 void *p; 1379 void *p;
1355 u32 crc = ~(u32)0; 1380 u32 crc = ~(u32)0;
1356 int fail = 0; 1381 int fail_gen = 0;
1382 int fail_cor = 0;
1357 u64 len; 1383 u64 len;
1358 int index; 1384 int index;
1359 1385
1360 BUG_ON(sblock->page_count < 1); 1386 BUG_ON(sblock->page_count < 1);
1361 page = sblock->pagev[0].page; 1387 page = sblock->pagev[0].page;
1362 mapped_buffer = kmap_atomic(page, KM_USER0); 1388 mapped_buffer = kmap_atomic(page);
1363 s = (struct btrfs_super_block *)mapped_buffer; 1389 s = (struct btrfs_super_block *)mapped_buffer;
1364 memcpy(on_disk_csum, s->csum, sdev->csum_size); 1390 memcpy(on_disk_csum, s->csum, sdev->csum_size);
1365 1391
1366 if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr)) 1392 if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr))
1367 ++fail; 1393 ++fail_cor;
1368 1394
1369 if (sblock->pagev[0].generation != le64_to_cpu(s->generation)) 1395 if (sblock->pagev[0].generation != le64_to_cpu(s->generation))
1370 ++fail; 1396 ++fail_gen;
1371 1397
1372 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) 1398 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
1373 ++fail; 1399 ++fail_cor;
1374 1400
1375 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE; 1401 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
1376 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; 1402 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
@@ -1380,7 +1406,7 @@ static int scrub_checksum_super(struct scrub_block *sblock)
1380 u64 l = min_t(u64, len, mapped_size); 1406 u64 l = min_t(u64, len, mapped_size);
1381 1407
1382 crc = btrfs_csum_data(root, p, crc, l); 1408 crc = btrfs_csum_data(root, p, crc, l);
1383 kunmap_atomic(mapped_buffer, KM_USER0); 1409 kunmap_atomic(mapped_buffer);
1384 len -= l; 1410 len -= l;
1385 if (len == 0) 1411 if (len == 0)
1386 break; 1412 break;
@@ -1388,16 +1414,16 @@ static int scrub_checksum_super(struct scrub_block *sblock)
1388 BUG_ON(index >= sblock->page_count); 1414 BUG_ON(index >= sblock->page_count);
1389 BUG_ON(!sblock->pagev[index].page); 1415 BUG_ON(!sblock->pagev[index].page);
1390 page = sblock->pagev[index].page; 1416 page = sblock->pagev[index].page;
1391 mapped_buffer = kmap_atomic(page, KM_USER0); 1417 mapped_buffer = kmap_atomic(page);
1392 mapped_size = PAGE_SIZE; 1418 mapped_size = PAGE_SIZE;
1393 p = mapped_buffer; 1419 p = mapped_buffer;
1394 } 1420 }
1395 1421
1396 btrfs_csum_final(crc, calculated_csum); 1422 btrfs_csum_final(crc, calculated_csum);
1397 if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size)) 1423 if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
1398 ++fail; 1424 ++fail_cor;
1399 1425
1400 if (fail) { 1426 if (fail_cor + fail_gen) {
1401 /* 1427 /*
1402 * if we find an error in a super block, we just report it. 1428 * if we find an error in a super block, we just report it.
1403 * They will get written with the next transaction commit 1429 * They will get written with the next transaction commit
@@ -1406,9 +1432,15 @@ static int scrub_checksum_super(struct scrub_block *sblock)
1406 spin_lock(&sdev->stat_lock); 1432 spin_lock(&sdev->stat_lock);
1407 ++sdev->stat.super_errors; 1433 ++sdev->stat.super_errors;
1408 spin_unlock(&sdev->stat_lock); 1434 spin_unlock(&sdev->stat_lock);
1435 if (fail_cor)
1436 btrfs_dev_stat_inc_and_print(sdev->dev,
1437 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1438 else
1439 btrfs_dev_stat_inc_and_print(sdev->dev,
1440 BTRFS_DEV_STAT_GENERATION_ERRS);
1409 } 1441 }
1410 1442
1411 return fail; 1443 return fail_cor + fail_gen;
1412} 1444}
1413 1445
1414static void scrub_block_get(struct scrub_block *sblock) 1446static void scrub_block_get(struct scrub_block *sblock)
@@ -1552,7 +1584,7 @@ static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len,
1552 return -ENOMEM; 1584 return -ENOMEM;
1553 } 1585 }
1554 spage->sblock = sblock; 1586 spage->sblock = sblock;
1555 spage->bdev = sdev->dev->bdev; 1587 spage->dev = sdev->dev;
1556 spage->flags = flags; 1588 spage->flags = flags;
1557 spage->generation = gen; 1589 spage->generation = gen;
1558 spage->logical = logical; 1590 spage->logical = logical;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f267718cbd1..96eb9fef7bd 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -188,7 +188,8 @@ void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...)
188 va_start(args, fmt); 188 va_start(args, fmt);
189 189
190 if (fmt[0] == '<' && isdigit(fmt[1]) && fmt[2] == '>') { 190 if (fmt[0] == '<' && isdigit(fmt[1]) && fmt[2] == '>') {
191 strncpy(lvl, fmt, 3); 191 memcpy(lvl, fmt, 3);
192 lvl[3] = '\0';
192 fmt += 3; 193 fmt += 3;
193 type = logtypes[fmt[1] - '0']; 194 type = logtypes[fmt[1] - '0'];
194 } else 195 } else
@@ -435,11 +436,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
435 case Opt_thread_pool: 436 case Opt_thread_pool:
436 intarg = 0; 437 intarg = 0;
437 match_int(&args[0], &intarg); 438 match_int(&args[0], &intarg);
438 if (intarg) { 439 if (intarg)
439 info->thread_pool_size = intarg; 440 info->thread_pool_size = intarg;
440 printk(KERN_INFO "btrfs: thread pool %d\n",
441 info->thread_pool_size);
442 }
443 break; 441 break;
444 case Opt_max_inline: 442 case Opt_max_inline:
445 num = match_strdup(&args[0]); 443 num = match_strdup(&args[0]);
@@ -755,7 +753,6 @@ static int btrfs_fill_super(struct super_block *sb,
755 void *data, int silent) 753 void *data, int silent)
756{ 754{
757 struct inode *inode; 755 struct inode *inode;
758 struct dentry *root_dentry;
759 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 756 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
760 struct btrfs_key key; 757 struct btrfs_key key;
761 int err; 758 int err;
@@ -770,7 +767,7 @@ static int btrfs_fill_super(struct super_block *sb,
770#ifdef CONFIG_BTRFS_FS_POSIX_ACL 767#ifdef CONFIG_BTRFS_FS_POSIX_ACL
771 sb->s_flags |= MS_POSIXACL; 768 sb->s_flags |= MS_POSIXACL;
772#endif 769#endif
773 770 sb->s_flags |= MS_I_VERSION;
774 err = open_ctree(sb, fs_devices, (char *)data); 771 err = open_ctree(sb, fs_devices, (char *)data);
775 if (err) { 772 if (err) {
776 printk("btrfs: open_ctree failed\n"); 773 printk("btrfs: open_ctree failed\n");
@@ -786,15 +783,12 @@ static int btrfs_fill_super(struct super_block *sb,
786 goto fail_close; 783 goto fail_close;
787 } 784 }
788 785
789 root_dentry = d_alloc_root(inode); 786 sb->s_root = d_make_root(inode);
790 if (!root_dentry) { 787 if (!sb->s_root) {
791 iput(inode);
792 err = -ENOMEM; 788 err = -ENOMEM;
793 goto fail_close; 789 goto fail_close;
794 } 790 }
795 791
796 sb->s_root = root_dentry;
797
798 save_mount_options(sb, data); 792 save_mount_options(sb, data);
799 cleancache_init_fs(sb); 793 cleancache_init_fs(sb);
800 sb->s_flags |= MS_ACTIVE; 794 sb->s_flags |= MS_ACTIVE;
@@ -929,63 +923,48 @@ static inline int is_subvolume_inode(struct inode *inode)
929 */ 923 */
930static char *setup_root_args(char *args) 924static char *setup_root_args(char *args)
931{ 925{
932 unsigned copied = 0; 926 unsigned len = strlen(args) + 2 + 1;
933 unsigned len = strlen(args) + 2; 927 char *src, *dst, *buf;
934 char *pos;
935 char *ret;
936 928
937 /* 929 /*
938 * We need the same args as before, but minus 930 * We need the same args as before, but with this substitution:
939 * 931 * s!subvol=[^,]+!subvolid=0!
940 * subvol=a
941 *
942 * and add
943 * 932 *
944 * subvolid=0 933 * Since the replacement string is up to 2 bytes longer than the
945 * 934 * original, allocate strlen(args) + 2 + 1 bytes.
946 * which is a difference of 2 characters, so we allocate strlen(args) +
947 * 2 characters.
948 */ 935 */
949 ret = kzalloc(len * sizeof(char), GFP_NOFS);
950 if (!ret)
951 return NULL;
952 pos = strstr(args, "subvol=");
953 936
937 src = strstr(args, "subvol=");
954 /* This shouldn't happen, but just in case.. */ 938 /* This shouldn't happen, but just in case.. */
955 if (!pos) { 939 if (!src)
956 kfree(ret); 940 return NULL;
941
942 buf = dst = kmalloc(len, GFP_NOFS);
943 if (!buf)
957 return NULL; 944 return NULL;
958 }
959 945
960 /* 946 /*
961 * The subvol=<> arg is not at the front of the string, copy everybody 947 * If the subvol= arg is not at the start of the string,
962 * up to that into ret. 948 * copy whatever precedes it into buf.
963 */ 949 */
964 if (pos != args) { 950 if (src != args) {
965 *pos = '\0'; 951 *src++ = '\0';
966 strcpy(ret, args); 952 strcpy(buf, args);
967 copied += strlen(args); 953 dst += strlen(args);
968 pos++;
969 } 954 }
970 955
971 strncpy(ret + copied, "subvolid=0", len - copied); 956 strcpy(dst, "subvolid=0");
972 957 dst += strlen("subvolid=0");
973 /* Length of subvolid=0 */
974 copied += 10;
975 958
976 /* 959 /*
977 * If there is no , after the subvol= option then we know there's no 960 * If there is a "," after the original subvol=... string,
978 * other options and we can just return. 961 * copy that suffix into our buffer. Otherwise, we're done.
979 */ 962 */
980 pos = strchr(pos, ','); 963 src = strchr(src, ',');
981 if (!pos) 964 if (src)
982 return ret; 965 strcpy(dst, src);
983
984 /* Copy the rest of the arguments into our buffer */
985 strncpy(ret + copied, pos, len - copied);
986 copied += strlen(pos);
987 966
988 return ret; 967 return buf;
989} 968}
990 969
991static struct dentry *mount_subvol(const char *subvol_name, int flags, 970static struct dentry *mount_subvol(const char *subvol_name, int flags,
@@ -1122,6 +1101,40 @@ error_fs_info:
1122 return ERR_PTR(error); 1101 return ERR_PTR(error);
1123} 1102}
1124 1103
1104static void btrfs_set_max_workers(struct btrfs_workers *workers, int new_limit)
1105{
1106 spin_lock_irq(&workers->lock);
1107 workers->max_workers = new_limit;
1108 spin_unlock_irq(&workers->lock);
1109}
1110
1111static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
1112 int new_pool_size, int old_pool_size)
1113{
1114 if (new_pool_size == old_pool_size)
1115 return;
1116
1117 fs_info->thread_pool_size = new_pool_size;
1118
1119 printk(KERN_INFO "btrfs: resize thread pool %d -> %d\n",
1120 old_pool_size, new_pool_size);
1121
1122 btrfs_set_max_workers(&fs_info->generic_worker, new_pool_size);
1123 btrfs_set_max_workers(&fs_info->workers, new_pool_size);
1124 btrfs_set_max_workers(&fs_info->delalloc_workers, new_pool_size);
1125 btrfs_set_max_workers(&fs_info->submit_workers, new_pool_size);
1126 btrfs_set_max_workers(&fs_info->caching_workers, new_pool_size);
1127 btrfs_set_max_workers(&fs_info->fixup_workers, new_pool_size);
1128 btrfs_set_max_workers(&fs_info->endio_workers, new_pool_size);
1129 btrfs_set_max_workers(&fs_info->endio_meta_workers, new_pool_size);
1130 btrfs_set_max_workers(&fs_info->endio_meta_write_workers, new_pool_size);
1131 btrfs_set_max_workers(&fs_info->endio_write_workers, new_pool_size);
1132 btrfs_set_max_workers(&fs_info->endio_freespace_worker, new_pool_size);
1133 btrfs_set_max_workers(&fs_info->delayed_workers, new_pool_size);
1134 btrfs_set_max_workers(&fs_info->readahead_workers, new_pool_size);
1135 btrfs_set_max_workers(&fs_info->scrub_workers, new_pool_size);
1136}
1137
1125static int btrfs_remount(struct super_block *sb, int *flags, char *data) 1138static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1126{ 1139{
1127 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 1140 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
@@ -1141,6 +1154,9 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1141 goto restore; 1154 goto restore;
1142 } 1155 }
1143 1156
1157 btrfs_resize_thread_pool(fs_info,
1158 fs_info->thread_pool_size, old_thread_pool_size);
1159
1144 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 1160 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
1145 return 0; 1161 return 0;
1146 1162
@@ -1184,7 +1200,8 @@ restore:
1184 fs_info->compress_type = old_compress_type; 1200 fs_info->compress_type = old_compress_type;
1185 fs_info->max_inline = old_max_inline; 1201 fs_info->max_inline = old_max_inline;
1186 fs_info->alloc_start = old_alloc_start; 1202 fs_info->alloc_start = old_alloc_start;
1187 fs_info->thread_pool_size = old_thread_pool_size; 1203 btrfs_resize_thread_pool(fs_info,
1204 old_thread_pool_size, fs_info->thread_pool_size);
1188 fs_info->metadata_ratio = old_metadata_ratio; 1205 fs_info->metadata_ratio = old_metadata_ratio;
1189 return ret; 1206 return ret;
1190} 1207}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 667735fb45e..1791c6e3d83 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -28,6 +28,7 @@
28#include "locking.h" 28#include "locking.h"
29#include "tree-log.h" 29#include "tree-log.h"
30#include "inode-map.h" 30#include "inode-map.h"
31#include "volumes.h"
31 32
32#define BTRFS_ROOT_TRANS_TAG 0 33#define BTRFS_ROOT_TRANS_TAG 0
33 34
@@ -777,6 +778,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
777 if (ret) 778 if (ret)
778 return ret; 779 return ret;
779 780
781 ret = btrfs_run_dev_stats(trans, root->fs_info);
782 BUG_ON(ret);
783
780 while (!list_empty(&fs_info->dirty_cowonly_roots)) { 784 while (!list_empty(&fs_info->dirty_cowonly_roots)) {
781 next = fs_info->dirty_cowonly_roots.next; 785 next = fs_info->dirty_cowonly_roots.next;
782 list_del_init(next); 786 list_del_init(next);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index eb1ae908582..2017d0ff511 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1628,7 +1628,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1628 int i; 1628 int i;
1629 int ret; 1629 int ret;
1630 1630
1631 btrfs_read_buffer(eb, gen); 1631 ret = btrfs_read_buffer(eb, gen);
1632 if (ret)
1633 return ret;
1632 1634
1633 level = btrfs_header_level(eb); 1635 level = btrfs_header_level(eb);
1634 1636
@@ -1749,7 +1751,11 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1749 1751
1750 path->slots[*level]++; 1752 path->slots[*level]++;
1751 if (wc->free) { 1753 if (wc->free) {
1752 btrfs_read_buffer(next, ptr_gen); 1754 ret = btrfs_read_buffer(next, ptr_gen);
1755 if (ret) {
1756 free_extent_buffer(next);
1757 return ret;
1758 }
1753 1759
1754 btrfs_tree_lock(next); 1760 btrfs_tree_lock(next);
1755 btrfs_set_lock_blocking(next); 1761 btrfs_set_lock_blocking(next);
@@ -1766,7 +1772,11 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1766 free_extent_buffer(next); 1772 free_extent_buffer(next);
1767 continue; 1773 continue;
1768 } 1774 }
1769 btrfs_read_buffer(next, ptr_gen); 1775 ret = btrfs_read_buffer(next, ptr_gen);
1776 if (ret) {
1777 free_extent_buffer(next);
1778 return ret;
1779 }
1770 1780
1771 WARN_ON(*level <= 0); 1781 WARN_ON(*level <= 0);
1772 if (path->nodes[*level-1]) 1782 if (path->nodes[*level-1])
@@ -2657,6 +2667,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
2657 btrfs_release_path(path); 2667 btrfs_release_path(path);
2658 } 2668 }
2659 btrfs_release_path(path); 2669 btrfs_release_path(path);
2670 if (ret > 0)
2671 ret = 0;
2660 return ret; 2672 return ret;
2661} 2673}
2662 2674
@@ -3028,21 +3040,6 @@ out:
3028 return ret; 3040 return ret;
3029} 3041}
3030 3042
3031static int inode_in_log(struct btrfs_trans_handle *trans,
3032 struct inode *inode)
3033{
3034 struct btrfs_root *root = BTRFS_I(inode)->root;
3035 int ret = 0;
3036
3037 mutex_lock(&root->log_mutex);
3038 if (BTRFS_I(inode)->logged_trans == trans->transid &&
3039 BTRFS_I(inode)->last_sub_trans <= root->last_log_commit)
3040 ret = 1;
3041 mutex_unlock(&root->log_mutex);
3042 return ret;
3043}
3044
3045
3046/* 3043/*
3047 * helper function around btrfs_log_inode to make sure newly created 3044 * helper function around btrfs_log_inode to make sure newly created
3048 * parent directories also end up in the log. A minimal inode and backref 3045 * parent directories also end up in the log. A minimal inode and backref
@@ -3083,7 +3080,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
3083 if (ret) 3080 if (ret)
3084 goto end_no_trans; 3081 goto end_no_trans;
3085 3082
3086 if (inode_in_log(trans, inode)) { 3083 if (btrfs_inode_in_log(inode, trans->transid)) {
3087 ret = BTRFS_NO_LOG_SYNC; 3084 ret = BTRFS_NO_LOG_SYNC;
3088 goto end_no_trans; 3085 goto end_no_trans;
3089 } 3086 }
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c
index 2ef59400ad6..ab942f46b3d 100644
--- a/fs/btrfs/ulist.c
+++ b/fs/btrfs/ulist.c
@@ -95,7 +95,7 @@ EXPORT_SYMBOL(ulist_reinit);
95 * 95 *
96 * The allocated ulist will be returned in an initialized state. 96 * The allocated ulist will be returned in an initialized state.
97 */ 97 */
98struct ulist *ulist_alloc(unsigned long gfp_mask) 98struct ulist *ulist_alloc(gfp_t gfp_mask)
99{ 99{
100 struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask); 100 struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask);
101 101
@@ -144,13 +144,13 @@ EXPORT_SYMBOL(ulist_free);
144 * unaltered. 144 * unaltered.
145 */ 145 */
146int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, 146int ulist_add(struct ulist *ulist, u64 val, unsigned long aux,
147 unsigned long gfp_mask) 147 gfp_t gfp_mask)
148{ 148{
149 return ulist_add_merge(ulist, val, aux, NULL, gfp_mask); 149 return ulist_add_merge(ulist, val, aux, NULL, gfp_mask);
150} 150}
151 151
152int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux, 152int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux,
153 unsigned long *old_aux, unsigned long gfp_mask) 153 unsigned long *old_aux, gfp_t gfp_mask)
154{ 154{
155 int i; 155 int i;
156 156
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
index f1b1bf00c5a..21bdc8ec813 100644
--- a/fs/btrfs/ulist.h
+++ b/fs/btrfs/ulist.h
@@ -63,12 +63,12 @@ struct ulist {
63void ulist_init(struct ulist *ulist); 63void ulist_init(struct ulist *ulist);
64void ulist_fini(struct ulist *ulist); 64void ulist_fini(struct ulist *ulist);
65void ulist_reinit(struct ulist *ulist); 65void ulist_reinit(struct ulist *ulist);
66struct ulist *ulist_alloc(unsigned long gfp_mask); 66struct ulist *ulist_alloc(gfp_t gfp_mask);
67void ulist_free(struct ulist *ulist); 67void ulist_free(struct ulist *ulist);
68int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, 68int ulist_add(struct ulist *ulist, u64 val, unsigned long aux,
69 unsigned long gfp_mask); 69 gfp_t gfp_mask);
70int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux, 70int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux,
71 unsigned long *old_aux, unsigned long gfp_mask); 71 unsigned long *old_aux, gfp_t gfp_mask);
72struct ulist_node *ulist_next(struct ulist *ulist, 72struct ulist_node *ulist_next(struct ulist *ulist,
73 struct ulist_iterator *uiter); 73 struct ulist_iterator *uiter);
74 74
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1411b99555a..7782020996f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -23,6 +23,7 @@
23#include <linux/random.h> 23#include <linux/random.h>
24#include <linux/iocontext.h> 24#include <linux/iocontext.h>
25#include <linux/capability.h> 25#include <linux/capability.h>
26#include <linux/ratelimit.h>
26#include <linux/kthread.h> 27#include <linux/kthread.h>
27#include <asm/div64.h> 28#include <asm/div64.h>
28#include "compat.h" 29#include "compat.h"
@@ -39,6 +40,8 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans,
39 struct btrfs_root *root, 40 struct btrfs_root *root,
40 struct btrfs_device *device); 41 struct btrfs_device *device);
41static int btrfs_relocate_sys_chunks(struct btrfs_root *root); 42static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
43static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
44static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
42 45
43static DEFINE_MUTEX(uuid_mutex); 46static DEFINE_MUTEX(uuid_mutex);
44static LIST_HEAD(fs_uuids); 47static LIST_HEAD(fs_uuids);
@@ -361,6 +364,7 @@ static noinline int device_list_add(const char *path,
361 return -ENOMEM; 364 return -ENOMEM;
362 } 365 }
363 device->devid = devid; 366 device->devid = devid;
367 device->dev_stats_valid = 0;
364 device->work.func = pending_bios_fn; 368 device->work.func = pending_bios_fn;
365 memcpy(device->uuid, disk_super->dev_item.uuid, 369 memcpy(device->uuid, disk_super->dev_item.uuid,
366 BTRFS_UUID_SIZE); 370 BTRFS_UUID_SIZE);
@@ -1633,7 +1637,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1633 int ret = 0; 1637 int ret = 0;
1634 1638
1635 if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) 1639 if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
1636 return -EINVAL; 1640 return -EROFS;
1637 1641
1638 bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, 1642 bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
1639 root->fs_info->bdev_holder); 1643 root->fs_info->bdev_holder);
@@ -4001,13 +4005,58 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
4001 return 0; 4005 return 0;
4002} 4006}
4003 4007
4008static void *merge_stripe_index_into_bio_private(void *bi_private,
4009 unsigned int stripe_index)
4010{
4011 /*
4012 * with single, dup, RAID0, RAID1 and RAID10, stripe_index is
4013 * at most 1.
4014 * The alternative solution (instead of stealing bits from the
4015 * pointer) would be to allocate an intermediate structure
4016 * that contains the old private pointer plus the stripe_index.
4017 */
4018 BUG_ON((((uintptr_t)bi_private) & 3) != 0);
4019 BUG_ON(stripe_index > 3);
4020 return (void *)(((uintptr_t)bi_private) | stripe_index);
4021}
4022
4023static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private)
4024{
4025 return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3));
4026}
4027
4028static unsigned int extract_stripe_index_from_bio_private(void *bi_private)
4029{
4030 return (unsigned int)((uintptr_t)bi_private) & 3;
4031}
4032
4004static void btrfs_end_bio(struct bio *bio, int err) 4033static void btrfs_end_bio(struct bio *bio, int err)
4005{ 4034{
4006 struct btrfs_bio *bbio = bio->bi_private; 4035 struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private);
4007 int is_orig_bio = 0; 4036 int is_orig_bio = 0;
4008 4037
4009 if (err) 4038 if (err) {
4010 atomic_inc(&bbio->error); 4039 atomic_inc(&bbio->error);
4040 if (err == -EIO || err == -EREMOTEIO) {
4041 unsigned int stripe_index =
4042 extract_stripe_index_from_bio_private(
4043 bio->bi_private);
4044 struct btrfs_device *dev;
4045
4046 BUG_ON(stripe_index >= bbio->num_stripes);
4047 dev = bbio->stripes[stripe_index].dev;
4048 if (bio->bi_rw & WRITE)
4049 btrfs_dev_stat_inc(dev,
4050 BTRFS_DEV_STAT_WRITE_ERRS);
4051 else
4052 btrfs_dev_stat_inc(dev,
4053 BTRFS_DEV_STAT_READ_ERRS);
4054 if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
4055 btrfs_dev_stat_inc(dev,
4056 BTRFS_DEV_STAT_FLUSH_ERRS);
4057 btrfs_dev_stat_print_on_error(dev);
4058 }
4059 }
4011 4060
4012 if (bio == bbio->orig_bio) 4061 if (bio == bbio->orig_bio)
4013 is_orig_bio = 1; 4062 is_orig_bio = 1;
@@ -4149,6 +4198,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
4149 bio = first_bio; 4198 bio = first_bio;
4150 } 4199 }
4151 bio->bi_private = bbio; 4200 bio->bi_private = bbio;
4201 bio->bi_private = merge_stripe_index_into_bio_private(
4202 bio->bi_private, (unsigned int)dev_nr);
4152 bio->bi_end_io = btrfs_end_bio; 4203 bio->bi_end_io = btrfs_end_bio;
4153 bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; 4204 bio->bi_sector = bbio->stripes[dev_nr].physical >> 9;
4154 dev = bbio->stripes[dev_nr].dev; 4205 dev = bbio->stripes[dev_nr].dev;
@@ -4509,6 +4560,28 @@ int btrfs_read_sys_array(struct btrfs_root *root)
4509 return ret; 4560 return ret;
4510} 4561}
4511 4562
4563struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
4564 u64 logical, int mirror_num)
4565{
4566 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
4567 int ret;
4568 u64 map_length = 0;
4569 struct btrfs_bio *bbio = NULL;
4570 struct btrfs_device *device;
4571
4572 BUG_ON(mirror_num == 0);
4573 ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio,
4574 mirror_num);
4575 if (ret) {
4576 BUG_ON(bbio != NULL);
4577 return NULL;
4578 }
4579 BUG_ON(mirror_num != bbio->mirror_num);
4580 device = bbio->stripes[mirror_num - 1].dev;
4581 kfree(bbio);
4582 return device;
4583}
4584
4512int btrfs_read_chunk_tree(struct btrfs_root *root) 4585int btrfs_read_chunk_tree(struct btrfs_root *root)
4513{ 4586{
4514 struct btrfs_path *path; 4587 struct btrfs_path *path;
@@ -4583,3 +4656,230 @@ error:
4583 btrfs_free_path(path); 4656 btrfs_free_path(path);
4584 return ret; 4657 return ret;
4585} 4658}
4659
4660static void __btrfs_reset_dev_stats(struct btrfs_device *dev)
4661{
4662 int i;
4663
4664 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
4665 btrfs_dev_stat_reset(dev, i);
4666}
4667
4668int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
4669{
4670 struct btrfs_key key;
4671 struct btrfs_key found_key;
4672 struct btrfs_root *dev_root = fs_info->dev_root;
4673 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
4674 struct extent_buffer *eb;
4675 int slot;
4676 int ret = 0;
4677 struct btrfs_device *device;
4678 struct btrfs_path *path = NULL;
4679 int i;
4680
4681 path = btrfs_alloc_path();
4682 if (!path) {
4683 ret = -ENOMEM;
4684 goto out;
4685 }
4686
4687 mutex_lock(&fs_devices->device_list_mutex);
4688 list_for_each_entry(device, &fs_devices->devices, dev_list) {
4689 int item_size;
4690 struct btrfs_dev_stats_item *ptr;
4691
4692 key.objectid = 0;
4693 key.type = BTRFS_DEV_STATS_KEY;
4694 key.offset = device->devid;
4695 ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
4696 if (ret) {
4697 printk(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n",
4698 device->name, (unsigned long long)device->devid);
4699 __btrfs_reset_dev_stats(device);
4700 device->dev_stats_valid = 1;
4701 btrfs_release_path(path);
4702 continue;
4703 }
4704 slot = path->slots[0];
4705 eb = path->nodes[0];
4706 btrfs_item_key_to_cpu(eb, &found_key, slot);
4707 item_size = btrfs_item_size_nr(eb, slot);
4708
4709 ptr = btrfs_item_ptr(eb, slot,
4710 struct btrfs_dev_stats_item);
4711
4712 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
4713 if (item_size >= (1 + i) * sizeof(__le64))
4714 btrfs_dev_stat_set(device, i,
4715 btrfs_dev_stats_value(eb, ptr, i));
4716 else
4717 btrfs_dev_stat_reset(device, i);
4718 }
4719
4720 device->dev_stats_valid = 1;
4721 btrfs_dev_stat_print_on_load(device);
4722 btrfs_release_path(path);
4723 }
4724 mutex_unlock(&fs_devices->device_list_mutex);
4725
4726out:
4727 btrfs_free_path(path);
4728 return ret < 0 ? ret : 0;
4729}
4730
4731static int update_dev_stat_item(struct btrfs_trans_handle *trans,
4732 struct btrfs_root *dev_root,
4733 struct btrfs_device *device)
4734{
4735 struct btrfs_path *path;
4736 struct btrfs_key key;
4737 struct extent_buffer *eb;
4738 struct btrfs_dev_stats_item *ptr;
4739 int ret;
4740 int i;
4741
4742 key.objectid = 0;
4743 key.type = BTRFS_DEV_STATS_KEY;
4744 key.offset = device->devid;
4745
4746 path = btrfs_alloc_path();
4747 BUG_ON(!path);
4748 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
4749 if (ret < 0) {
4750 printk(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n",
4751 ret, device->name);
4752 goto out;
4753 }
4754
4755 if (ret == 0 &&
4756 btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) {
4757 /* need to delete old one and insert a new one */
4758 ret = btrfs_del_item(trans, dev_root, path);
4759 if (ret != 0) {
4760 printk(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n",
4761 device->name, ret);
4762 goto out;
4763 }
4764 ret = 1;
4765 }
4766
4767 if (ret == 1) {
4768 /* need to insert a new item */
4769 btrfs_release_path(path);
4770 ret = btrfs_insert_empty_item(trans, dev_root, path,
4771 &key, sizeof(*ptr));
4772 if (ret < 0) {
4773 printk(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n",
4774 device->name, ret);
4775 goto out;
4776 }
4777 }
4778
4779 eb = path->nodes[0];
4780 ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_stats_item);
4781 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
4782 btrfs_set_dev_stats_value(eb, ptr, i,
4783 btrfs_dev_stat_read(device, i));
4784 btrfs_mark_buffer_dirty(eb);
4785
4786out:
4787 btrfs_free_path(path);
4788 return ret;
4789}
4790
4791/*
4792 * called from commit_transaction. Writes all changed device stats to disk.
4793 */
4794int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
4795 struct btrfs_fs_info *fs_info)
4796{
4797 struct btrfs_root *dev_root = fs_info->dev_root;
4798 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
4799 struct btrfs_device *device;
4800 int ret = 0;
4801
4802 mutex_lock(&fs_devices->device_list_mutex);
4803 list_for_each_entry(device, &fs_devices->devices, dev_list) {
4804 if (!device->dev_stats_valid || !device->dev_stats_dirty)
4805 continue;
4806
4807 ret = update_dev_stat_item(trans, dev_root, device);
4808 if (!ret)
4809 device->dev_stats_dirty = 0;
4810 }
4811 mutex_unlock(&fs_devices->device_list_mutex);
4812
4813 return ret;
4814}
4815
4816void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index)
4817{
4818 btrfs_dev_stat_inc(dev, index);
4819 btrfs_dev_stat_print_on_error(dev);
4820}
4821
4822void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
4823{
4824 if (!dev->dev_stats_valid)
4825 return;
4826 printk_ratelimited(KERN_ERR
4827 "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
4828 dev->name,
4829 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
4830 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
4831 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
4832 btrfs_dev_stat_read(dev,
4833 BTRFS_DEV_STAT_CORRUPTION_ERRS),
4834 btrfs_dev_stat_read(dev,
4835 BTRFS_DEV_STAT_GENERATION_ERRS));
4836}
4837
4838static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
4839{
4840 printk(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
4841 dev->name,
4842 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
4843 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
4844 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
4845 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
4846 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
4847}
4848
4849int btrfs_get_dev_stats(struct btrfs_root *root,
4850 struct btrfs_ioctl_get_dev_stats *stats,
4851 int reset_after_read)
4852{
4853 struct btrfs_device *dev;
4854 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
4855 int i;
4856
4857 mutex_lock(&fs_devices->device_list_mutex);
4858 dev = btrfs_find_device(root, stats->devid, NULL, NULL);
4859 mutex_unlock(&fs_devices->device_list_mutex);
4860
4861 if (!dev) {
4862 printk(KERN_WARNING
4863 "btrfs: get dev_stats failed, device not found\n");
4864 return -ENODEV;
4865 } else if (!dev->dev_stats_valid) {
4866 printk(KERN_WARNING
4867 "btrfs: get dev_stats failed, not yet valid\n");
4868 return -ENODEV;
4869 } else if (reset_after_read) {
4870 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
4871 if (stats->nr_items > i)
4872 stats->values[i] =
4873 btrfs_dev_stat_read_and_reset(dev, i);
4874 else
4875 btrfs_dev_stat_reset(dev, i);
4876 }
4877 } else {
4878 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
4879 if (stats->nr_items > i)
4880 stats->values[i] = btrfs_dev_stat_read(dev, i);
4881 }
4882 if (stats->nr_items > BTRFS_DEV_STAT_VALUES_MAX)
4883 stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX;
4884 return 0;
4885}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index bb6b03f97aa..3406a88ca83 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -22,6 +22,7 @@
22#include <linux/bio.h> 22#include <linux/bio.h>
23#include <linux/sort.h> 23#include <linux/sort.h>
24#include "async-thread.h" 24#include "async-thread.h"
25#include "ioctl.h"
25 26
26#define BTRFS_STRIPE_LEN (64 * 1024) 27#define BTRFS_STRIPE_LEN (64 * 1024)
27 28
@@ -106,6 +107,11 @@ struct btrfs_device {
106 struct completion flush_wait; 107 struct completion flush_wait;
107 int nobarriers; 108 int nobarriers;
108 109
110 /* disk I/O failure stats. For detailed description refer to
111 * enum btrfs_dev_stat_values in ioctl.h */
112 int dev_stats_valid;
113 int dev_stats_dirty; /* counters need to be written to disk */
114 atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX];
109}; 115};
110 116
111struct btrfs_fs_devices { 117struct btrfs_fs_devices {
@@ -281,4 +287,50 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
281int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 287int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
282int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, 288int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
283 u64 *start, u64 *max_avail); 289 u64 *start, u64 *max_avail);
290struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
291 u64 logical, int mirror_num);
292void btrfs_dev_stat_print_on_error(struct btrfs_device *device);
293void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
294int btrfs_get_dev_stats(struct btrfs_root *root,
295 struct btrfs_ioctl_get_dev_stats *stats,
296 int reset_after_read);
297int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info);
298int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
299 struct btrfs_fs_info *fs_info);
300
301static inline void btrfs_dev_stat_inc(struct btrfs_device *dev,
302 int index)
303{
304 atomic_inc(dev->dev_stat_values + index);
305 dev->dev_stats_dirty = 1;
306}
307
308static inline int btrfs_dev_stat_read(struct btrfs_device *dev,
309 int index)
310{
311 return atomic_read(dev->dev_stat_values + index);
312}
313
314static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev,
315 int index)
316{
317 int ret;
318
319 ret = atomic_xchg(dev->dev_stat_values + index, 0);
320 dev->dev_stats_dirty = 1;
321 return ret;
322}
323
324static inline void btrfs_dev_stat_set(struct btrfs_device *dev,
325 int index, unsigned long val)
326{
327 atomic_set(dev->dev_stat_values + index, val);
328 dev->dev_stats_dirty = 1;
329}
330
331static inline void btrfs_dev_stat_reset(struct btrfs_device *dev,
332 int index)
333{
334 btrfs_dev_stat_set(dev, index, 0);
335}
284#endif 336#endif
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index e7a5659087e..3f4e2d69e83 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -196,6 +196,7 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans,
196 if (ret) 196 if (ret)
197 goto out; 197 goto out;
198 198
199 inode_inc_iversion(inode);
199 inode->i_ctime = CURRENT_TIME; 200 inode->i_ctime = CURRENT_TIME;
200 ret = btrfs_update_inode(trans, root, inode); 201 ret = btrfs_update_inode(trans, root, inode);
201 BUG_ON(ret); 202 BUG_ON(ret);
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index faccd47c6c4..92c20654cc5 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -370,9 +370,9 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
370 PAGE_CACHE_SIZE - buf_offset); 370 PAGE_CACHE_SIZE - buf_offset);
371 bytes = min(bytes, bytes_left); 371 bytes = min(bytes, bytes_left);
372 372
373 kaddr = kmap_atomic(dest_page, KM_USER0); 373 kaddr = kmap_atomic(dest_page);
374 memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes); 374 memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
375 kunmap_atomic(kaddr, KM_USER0); 375 kunmap_atomic(kaddr);
376 376
377 pg_offset += bytes; 377 pg_offset += bytes;
378 bytes_left -= bytes; 378 bytes_left -= bytes;
diff --git a/fs/buffer.c b/fs/buffer.c
index 1a30db77af3..ad5938ca357 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -29,7 +29,7 @@
29#include <linux/file.h> 29#include <linux/file.h>
30#include <linux/quotaops.h> 30#include <linux/quotaops.h>
31#include <linux/highmem.h> 31#include <linux/highmem.h>
32#include <linux/module.h> 32#include <linux/export.h>
33#include <linux/writeback.h> 33#include <linux/writeback.h>
34#include <linux/hash.h> 34#include <linux/hash.h>
35#include <linux/suspend.h> 35#include <linux/suspend.h>
@@ -921,6 +921,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,
921 struct buffer_head *head = page_buffers(page); 921 struct buffer_head *head = page_buffers(page);
922 struct buffer_head *bh = head; 922 struct buffer_head *bh = head;
923 int uptodate = PageUptodate(page); 923 int uptodate = PageUptodate(page);
924 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode));
924 925
925 do { 926 do {
926 if (!buffer_mapped(bh)) { 927 if (!buffer_mapped(bh)) {
@@ -929,7 +930,8 @@ init_page_buffers(struct page *page, struct block_device *bdev,
929 bh->b_blocknr = block; 930 bh->b_blocknr = block;
930 if (uptodate) 931 if (uptodate)
931 set_buffer_uptodate(bh); 932 set_buffer_uptodate(bh);
932 set_buffer_mapped(bh); 933 if (block < end_block)
934 set_buffer_mapped(bh);
933 } 935 }
934 block++; 936 block++;
935 bh = bh->b_this_page; 937 bh = bh->b_this_page;
@@ -985,7 +987,6 @@ grow_dev_page(struct block_device *bdev, sector_t block,
985 return page; 987 return page;
986 988
987failed: 989failed:
988 BUG();
989 unlock_page(page); 990 unlock_page(page);
990 page_cache_release(page); 991 page_cache_release(page);
991 return NULL; 992 return NULL;
@@ -1384,10 +1385,23 @@ static void invalidate_bh_lru(void *arg)
1384 } 1385 }
1385 put_cpu_var(bh_lrus); 1386 put_cpu_var(bh_lrus);
1386} 1387}
1388
1389static bool has_bh_in_lru(int cpu, void *dummy)
1390{
1391 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1392 int i;
1387 1393
1394 for (i = 0; i < BH_LRU_SIZE; i++) {
1395 if (b->bhs[i])
1396 return 1;
1397 }
1398
1399 return 0;
1400}
1401
1388void invalidate_bh_lrus(void) 1402void invalidate_bh_lrus(void)
1389{ 1403{
1390 on_each_cpu(invalidate_bh_lru, NULL, 1); 1404 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
1391} 1405}
1392EXPORT_SYMBOL_GPL(invalidate_bh_lrus); 1406EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1393 1407
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index a0358c2189c..7f0771d3894 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -646,7 +646,8 @@ lookup_again:
646 * (this is used to keep track of culling, and atimes are only 646 * (this is used to keep track of culling, and atimes are only
647 * updated by read, write and readdir but not lookup or 647 * updated by read, write and readdir but not lookup or
648 * open) */ 648 * open) */
649 touch_atime(cache->mnt, next); 649 path.dentry = next;
650 touch_atime(&path);
650 } 651 }
651 652
652 /* open a file interface onto a data file */ 653 /* open a file interface onto a data file */
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 2c489378b4c..9fff9f3b17e 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -677,18 +677,19 @@ static int fill_inode(struct inode *inode,
677 case S_IFLNK: 677 case S_IFLNK:
678 inode->i_op = &ceph_symlink_iops; 678 inode->i_op = &ceph_symlink_iops;
679 if (!ci->i_symlink) { 679 if (!ci->i_symlink) {
680 int symlen = iinfo->symlink_len; 680 u32 symlen = iinfo->symlink_len;
681 char *sym; 681 char *sym;
682 682
683 BUG_ON(symlen != inode->i_size);
684 spin_unlock(&ci->i_ceph_lock); 683 spin_unlock(&ci->i_ceph_lock);
685 684
685 err = -EINVAL;
686 if (WARN_ON(symlen != inode->i_size))
687 goto out;
688
686 err = -ENOMEM; 689 err = -ENOMEM;
687 sym = kmalloc(symlen+1, GFP_NOFS); 690 sym = kstrndup(iinfo->symlink, symlen, GFP_NOFS);
688 if (!sym) 691 if (!sym)
689 goto out; 692 goto out;
690 memcpy(sym, iinfo->symlink, symlen);
691 sym[symlen] = 0;
692 693
693 spin_lock(&ci->i_ceph_lock); 694 spin_lock(&ci->i_ceph_lock);
694 if (!ci->i_symlink) 695 if (!ci->i_symlink)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 866e8d7ca37..89971e137aa 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -402,7 +402,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
402 402
403 spin_lock_init(&s->s_gen_ttl_lock); 403 spin_lock_init(&s->s_gen_ttl_lock);
404 s->s_cap_gen = 0; 404 s->s_cap_gen = 0;
405 s->s_cap_ttl = 0; 405 s->s_cap_ttl = jiffies - 1;
406 406
407 spin_lock_init(&s->s_cap_lock); 407 spin_lock_init(&s->s_cap_lock);
408 s->s_renew_requested = 0; 408 s->s_renew_requested = 0;
@@ -1083,8 +1083,7 @@ static void renewed_caps(struct ceph_mds_client *mdsc,
1083 int wake = 0; 1083 int wake = 0;
1084 1084
1085 spin_lock(&session->s_cap_lock); 1085 spin_lock(&session->s_cap_lock);
1086 was_stale = is_renew && (session->s_cap_ttl == 0 || 1086 was_stale = is_renew && time_after_eq(jiffies, session->s_cap_ttl);
1087 time_after_eq(jiffies, session->s_cap_ttl));
1088 1087
1089 session->s_cap_ttl = session->s_renew_requested + 1088 session->s_cap_ttl = session->s_renew_requested +
1090 mdsc->mdsmap->m_session_timeout*HZ; 1089 mdsc->mdsmap->m_session_timeout*HZ;
@@ -2332,7 +2331,7 @@ static void handle_session(struct ceph_mds_session *session,
2332 session->s_mds); 2331 session->s_mds);
2333 spin_lock(&session->s_gen_ttl_lock); 2332 spin_lock(&session->s_gen_ttl_lock);
2334 session->s_cap_gen++; 2333 session->s_cap_gen++;
2335 session->s_cap_ttl = 0; 2334 session->s_cap_ttl = jiffies - 1;
2336 spin_unlock(&session->s_gen_ttl_lock); 2335 spin_unlock(&session->s_gen_ttl_lock);
2337 send_renew_caps(mdsc, session); 2336 send_renew_caps(mdsc, session);
2338 break; 2337 break;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index a559c80f127..f04c0961f99 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -331,7 +331,7 @@ static int build_snap_context(struct ceph_snap_realm *realm)
331 331
332 /* alloc new snap context */ 332 /* alloc new snap context */
333 err = -ENOMEM; 333 err = -ENOMEM;
334 if (num > ULONG_MAX / sizeof(u64) - sizeof(*snapc)) 334 if (num > (ULONG_MAX - sizeof(*snapc)) / sizeof(u64))
335 goto fail; 335 goto fail;
336 snapc = kzalloc(sizeof(*snapc) + num*sizeof(u64), GFP_NOFS); 336 snapc = kzalloc(sizeof(*snapc) + num*sizeof(u64), GFP_NOFS);
337 if (!snapc) 337 if (!snapc)
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 00de2c9568c..1e67dd7305a 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -130,10 +130,12 @@ enum {
130 Opt_nodirstat, 130 Opt_nodirstat,
131 Opt_rbytes, 131 Opt_rbytes,
132 Opt_norbytes, 132 Opt_norbytes,
133 Opt_asyncreaddir,
133 Opt_noasyncreaddir, 134 Opt_noasyncreaddir,
134 Opt_dcache, 135 Opt_dcache,
135 Opt_nodcache, 136 Opt_nodcache,
136 Opt_ino32, 137 Opt_ino32,
138 Opt_noino32,
137}; 139};
138 140
139static match_table_t fsopt_tokens = { 141static match_table_t fsopt_tokens = {
@@ -153,10 +155,12 @@ static match_table_t fsopt_tokens = {
153 {Opt_nodirstat, "nodirstat"}, 155 {Opt_nodirstat, "nodirstat"},
154 {Opt_rbytes, "rbytes"}, 156 {Opt_rbytes, "rbytes"},
155 {Opt_norbytes, "norbytes"}, 157 {Opt_norbytes, "norbytes"},
158 {Opt_asyncreaddir, "asyncreaddir"},
156 {Opt_noasyncreaddir, "noasyncreaddir"}, 159 {Opt_noasyncreaddir, "noasyncreaddir"},
157 {Opt_dcache, "dcache"}, 160 {Opt_dcache, "dcache"},
158 {Opt_nodcache, "nodcache"}, 161 {Opt_nodcache, "nodcache"},
159 {Opt_ino32, "ino32"}, 162 {Opt_ino32, "ino32"},
163 {Opt_noino32, "noino32"},
160 {-1, NULL} 164 {-1, NULL}
161}; 165};
162 166
@@ -232,6 +236,9 @@ static int parse_fsopt_token(char *c, void *private)
232 case Opt_norbytes: 236 case Opt_norbytes:
233 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; 237 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
234 break; 238 break;
239 case Opt_asyncreaddir:
240 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR;
241 break;
235 case Opt_noasyncreaddir: 242 case Opt_noasyncreaddir:
236 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 243 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
237 break; 244 break;
@@ -244,6 +251,9 @@ static int parse_fsopt_token(char *c, void *private)
244 case Opt_ino32: 251 case Opt_ino32:
245 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 252 fsopt->flags |= CEPH_MOUNT_OPT_INO32;
246 break; 253 break;
254 case Opt_noino32:
255 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32;
256 break;
247 default: 257 default:
248 BUG_ON(token); 258 BUG_ON(token);
249 } 259 }
@@ -334,10 +344,12 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
334 *path += 2; 344 *path += 2;
335 dout("server path '%s'\n", *path); 345 dout("server path '%s'\n", *path);
336 346
337 err = ceph_parse_options(popt, options, dev_name, dev_name_end, 347 *popt = ceph_parse_options(options, dev_name, dev_name_end,
338 parse_fsopt_token, (void *)fsopt); 348 parse_fsopt_token, (void *)fsopt);
339 if (err) 349 if (IS_ERR(*popt)) {
350 err = PTR_ERR(*popt);
340 goto out; 351 goto out;
352 }
341 353
342 /* success */ 354 /* success */
343 *pfsopt = fsopt; 355 *pfsopt = fsopt;
@@ -655,9 +667,8 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
655 dout("open_root_inode success\n"); 667 dout("open_root_inode success\n");
656 if (ceph_ino(inode) == CEPH_INO_ROOT && 668 if (ceph_ino(inode) == CEPH_INO_ROOT &&
657 fsc->sb->s_root == NULL) { 669 fsc->sb->s_root == NULL) {
658 root = d_alloc_root(inode); 670 root = d_make_root(inode);
659 if (!root) { 671 if (!root) {
660 iput(inode);
661 root = ERR_PTR(-ENOMEM); 672 root = ERR_PTR(-ENOMEM);
662 goto out; 673 goto out;
663 } 674 }
@@ -927,6 +938,7 @@ static int __init init_ceph(void)
927 if (ret) 938 if (ret)
928 goto out; 939 goto out;
929 940
941 ceph_xattr_init();
930 ret = register_filesystem(&ceph_fs_type); 942 ret = register_filesystem(&ceph_fs_type);
931 if (ret) 943 if (ret)
932 goto out_icache; 944 goto out_icache;
@@ -936,6 +948,7 @@ static int __init init_ceph(void)
936 return 0; 948 return 0;
937 949
938out_icache: 950out_icache:
951 ceph_xattr_exit();
939 destroy_caches(); 952 destroy_caches();
940out: 953out:
941 return ret; 954 return ret;
@@ -945,6 +958,7 @@ static void __exit exit_ceph(void)
945{ 958{
946 dout("exit_ceph\n"); 959 dout("exit_ceph\n");
947 unregister_filesystem(&ceph_fs_type); 960 unregister_filesystem(&ceph_fs_type);
961 ceph_xattr_exit();
948 destroy_caches(); 962 destroy_caches();
949} 963}
950 964
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 1421f3d875a..fc35036d258 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -367,7 +367,7 @@ static inline u32 ceph_ino_to_ino32(__u64 vino)
367 u32 ino = vino & 0xffffffff; 367 u32 ino = vino & 0xffffffff;
368 ino ^= vino >> 32; 368 ino ^= vino >> 32;
369 if (!ino) 369 if (!ino)
370 ino = 1; 370 ino = 2;
371 return ino; 371 return ino;
372} 372}
373 373
@@ -733,6 +733,8 @@ extern ssize_t ceph_listxattr(struct dentry *, char *, size_t);
733extern int ceph_removexattr(struct dentry *, const char *); 733extern int ceph_removexattr(struct dentry *, const char *);
734extern void __ceph_build_xattrs_blob(struct ceph_inode_info *ci); 734extern void __ceph_build_xattrs_blob(struct ceph_inode_info *ci);
735extern void __ceph_destroy_xattrs(struct ceph_inode_info *ci); 735extern void __ceph_destroy_xattrs(struct ceph_inode_info *ci);
736extern void __init ceph_xattr_init(void);
737extern void ceph_xattr_exit(void);
736 738
737/* caps.c */ 739/* caps.c */
738extern const char *ceph_cap_string(int c); 740extern const char *ceph_cap_string(int c);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index a76f697303d..35b86331d8a 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -8,9 +8,12 @@
8#include <linux/xattr.h> 8#include <linux/xattr.h>
9#include <linux/slab.h> 9#include <linux/slab.h>
10 10
11#define XATTR_CEPH_PREFIX "ceph."
12#define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)
13
11static bool ceph_is_valid_xattr(const char *name) 14static bool ceph_is_valid_xattr(const char *name)
12{ 15{
13 return !strncmp(name, "ceph.", 5) || 16 return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
14 !strncmp(name, XATTR_SECURITY_PREFIX, 17 !strncmp(name, XATTR_SECURITY_PREFIX,
15 XATTR_SECURITY_PREFIX_LEN) || 18 XATTR_SECURITY_PREFIX_LEN) ||
16 !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || 19 !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
@@ -21,79 +24,91 @@ static bool ceph_is_valid_xattr(const char *name)
21 * These define virtual xattrs exposing the recursive directory 24 * These define virtual xattrs exposing the recursive directory
22 * statistics and layout metadata. 25 * statistics and layout metadata.
23 */ 26 */
24struct ceph_vxattr_cb { 27struct ceph_vxattr {
25 bool readonly;
26 char *name; 28 char *name;
29 size_t name_size; /* strlen(name) + 1 (for '\0') */
27 size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val, 30 size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
28 size_t size); 31 size_t size);
32 bool readonly;
29}; 33};
30 34
31/* directories */ 35/* directories */
32 36
33static size_t ceph_vxattrcb_entries(struct ceph_inode_info *ci, char *val, 37static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
34 size_t size) 38 size_t size)
35{ 39{
36 return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs); 40 return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs);
37} 41}
38 42
39static size_t ceph_vxattrcb_files(struct ceph_inode_info *ci, char *val, 43static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val,
40 size_t size) 44 size_t size)
41{ 45{
42 return snprintf(val, size, "%lld", ci->i_files); 46 return snprintf(val, size, "%lld", ci->i_files);
43} 47}
44 48
45static size_t ceph_vxattrcb_subdirs(struct ceph_inode_info *ci, char *val, 49static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val,
46 size_t size) 50 size_t size)
47{ 51{
48 return snprintf(val, size, "%lld", ci->i_subdirs); 52 return snprintf(val, size, "%lld", ci->i_subdirs);
49} 53}
50 54
51static size_t ceph_vxattrcb_rentries(struct ceph_inode_info *ci, char *val, 55static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val,
52 size_t size) 56 size_t size)
53{ 57{
54 return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs); 58 return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs);
55} 59}
56 60
57static size_t ceph_vxattrcb_rfiles(struct ceph_inode_info *ci, char *val, 61static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val,
58 size_t size) 62 size_t size)
59{ 63{
60 return snprintf(val, size, "%lld", ci->i_rfiles); 64 return snprintf(val, size, "%lld", ci->i_rfiles);
61} 65}
62 66
63static size_t ceph_vxattrcb_rsubdirs(struct ceph_inode_info *ci, char *val, 67static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
64 size_t size) 68 size_t size)
65{ 69{
66 return snprintf(val, size, "%lld", ci->i_rsubdirs); 70 return snprintf(val, size, "%lld", ci->i_rsubdirs);
67} 71}
68 72
69static size_t ceph_vxattrcb_rbytes(struct ceph_inode_info *ci, char *val, 73static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
70 size_t size) 74 size_t size)
71{ 75{
72 return snprintf(val, size, "%lld", ci->i_rbytes); 76 return snprintf(val, size, "%lld", ci->i_rbytes);
73} 77}
74 78
75static size_t ceph_vxattrcb_rctime(struct ceph_inode_info *ci, char *val, 79static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
76 size_t size) 80 size_t size)
77{ 81{
78 return snprintf(val, size, "%ld.%ld", (long)ci->i_rctime.tv_sec, 82 return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec,
79 (long)ci->i_rctime.tv_nsec); 83 (long)ci->i_rctime.tv_nsec);
80} 84}
81 85
82static struct ceph_vxattr_cb ceph_dir_vxattrs[] = { 86#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
83 { true, "ceph.dir.entries", ceph_vxattrcb_entries}, 87
84 { true, "ceph.dir.files", ceph_vxattrcb_files}, 88#define XATTR_NAME_CEPH(_type, _name) \
85 { true, "ceph.dir.subdirs", ceph_vxattrcb_subdirs}, 89 { \
86 { true, "ceph.dir.rentries", ceph_vxattrcb_rentries}, 90 .name = CEPH_XATTR_NAME(_type, _name), \
87 { true, "ceph.dir.rfiles", ceph_vxattrcb_rfiles}, 91 .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
88 { true, "ceph.dir.rsubdirs", ceph_vxattrcb_rsubdirs}, 92 .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
89 { true, "ceph.dir.rbytes", ceph_vxattrcb_rbytes}, 93 .readonly = true, \
90 { true, "ceph.dir.rctime", ceph_vxattrcb_rctime}, 94 }
91 { true, NULL, NULL } 95
96static struct ceph_vxattr ceph_dir_vxattrs[] = {
97 XATTR_NAME_CEPH(dir, entries),
98 XATTR_NAME_CEPH(dir, files),
99 XATTR_NAME_CEPH(dir, subdirs),
100 XATTR_NAME_CEPH(dir, rentries),
101 XATTR_NAME_CEPH(dir, rfiles),
102 XATTR_NAME_CEPH(dir, rsubdirs),
103 XATTR_NAME_CEPH(dir, rbytes),
104 XATTR_NAME_CEPH(dir, rctime),
105 { 0 } /* Required table terminator */
92}; 106};
107static size_t ceph_dir_vxattrs_name_size; /* total size of all names */
93 108
94/* files */ 109/* files */
95 110
96static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, 111static size_t ceph_vxattrcb_file_layout(struct ceph_inode_info *ci, char *val,
97 size_t size) 112 size_t size)
98{ 113{
99 int ret; 114 int ret;
@@ -103,21 +118,32 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
103 (unsigned long long)ceph_file_layout_su(ci->i_layout), 118 (unsigned long long)ceph_file_layout_su(ci->i_layout),
104 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), 119 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
105 (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); 120 (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
106 if (ceph_file_layout_pg_preferred(ci->i_layout)) 121
107 ret += snprintf(val + ret, size, "preferred_osd=%lld\n", 122 if (ceph_file_layout_pg_preferred(ci->i_layout) >= 0) {
123 val += ret;
124 size -= ret;
125 ret += snprintf(val, size, "preferred_osd=%lld\n",
108 (unsigned long long)ceph_file_layout_pg_preferred( 126 (unsigned long long)ceph_file_layout_pg_preferred(
109 ci->i_layout)); 127 ci->i_layout));
128 }
129
110 return ret; 130 return ret;
111} 131}
112 132
113static struct ceph_vxattr_cb ceph_file_vxattrs[] = { 133static struct ceph_vxattr ceph_file_vxattrs[] = {
114 { true, "ceph.file.layout", ceph_vxattrcb_layout}, 134 XATTR_NAME_CEPH(file, layout),
115 /* The following extended attribute name is deprecated */ 135 /* The following extended attribute name is deprecated */
116 { true, "ceph.layout", ceph_vxattrcb_layout}, 136 {
117 { true, NULL, NULL } 137 .name = XATTR_CEPH_PREFIX "layout",
138 .name_size = sizeof (XATTR_CEPH_PREFIX "layout"),
139 .getxattr_cb = ceph_vxattrcb_file_layout,
140 .readonly = true,
141 },
142 { 0 } /* Required table terminator */
118}; 143};
144static size_t ceph_file_vxattrs_name_size; /* total size of all names */
119 145
120static struct ceph_vxattr_cb *ceph_inode_vxattrs(struct inode *inode) 146static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode)
121{ 147{
122 if (S_ISDIR(inode->i_mode)) 148 if (S_ISDIR(inode->i_mode))
123 return ceph_dir_vxattrs; 149 return ceph_dir_vxattrs;
@@ -126,14 +152,59 @@ static struct ceph_vxattr_cb *ceph_inode_vxattrs(struct inode *inode)
126 return NULL; 152 return NULL;
127} 153}
128 154
129static struct ceph_vxattr_cb *ceph_match_vxattr(struct ceph_vxattr_cb *vxattr, 155static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs)
156{
157 if (vxattrs == ceph_dir_vxattrs)
158 return ceph_dir_vxattrs_name_size;
159 if (vxattrs == ceph_file_vxattrs)
160 return ceph_file_vxattrs_name_size;
161 BUG();
162
163 return 0;
164}
165
166/*
167 * Compute the aggregate size (including terminating '\0') of all
168 * virtual extended attribute names in the given vxattr table.
169 */
170static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
171{
172 struct ceph_vxattr *vxattr;
173 size_t size = 0;
174
175 for (vxattr = vxattrs; vxattr->name; vxattr++)
176 size += vxattr->name_size;
177
178 return size;
179}
180
181/* Routines called at initialization and exit time */
182
183void __init ceph_xattr_init(void)
184{
185 ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs);
186 ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs);
187}
188
189void ceph_xattr_exit(void)
190{
191 ceph_dir_vxattrs_name_size = 0;
192 ceph_file_vxattrs_name_size = 0;
193}
194
195static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
130 const char *name) 196 const char *name)
131{ 197{
132 do { 198 struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode);
133 if (strcmp(vxattr->name, name) == 0) 199
134 return vxattr; 200 if (vxattr) {
135 vxattr++; 201 while (vxattr->name) {
136 } while (vxattr->name); 202 if (!strcmp(vxattr->name, name))
203 return vxattr;
204 vxattr++;
205 }
206 }
207
137 return NULL; 208 return NULL;
138} 209}
139 210
@@ -502,17 +573,15 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
502{ 573{
503 struct inode *inode = dentry->d_inode; 574 struct inode *inode = dentry->d_inode;
504 struct ceph_inode_info *ci = ceph_inode(inode); 575 struct ceph_inode_info *ci = ceph_inode(inode);
505 struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode);
506 int err; 576 int err;
507 struct ceph_inode_xattr *xattr; 577 struct ceph_inode_xattr *xattr;
508 struct ceph_vxattr_cb *vxattr = NULL; 578 struct ceph_vxattr *vxattr = NULL;
509 579
510 if (!ceph_is_valid_xattr(name)) 580 if (!ceph_is_valid_xattr(name))
511 return -ENODATA; 581 return -ENODATA;
512 582
513 /* let's see if a virtual xattr was requested */ 583 /* let's see if a virtual xattr was requested */
514 if (vxattrs) 584 vxattr = ceph_match_vxattr(inode, name);
515 vxattr = ceph_match_vxattr(vxattrs, name);
516 585
517 spin_lock(&ci->i_ceph_lock); 586 spin_lock(&ci->i_ceph_lock);
518 dout("getxattr %p ver=%lld index_ver=%lld\n", inode, 587 dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
@@ -568,7 +637,7 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
568{ 637{
569 struct inode *inode = dentry->d_inode; 638 struct inode *inode = dentry->d_inode;
570 struct ceph_inode_info *ci = ceph_inode(inode); 639 struct ceph_inode_info *ci = ceph_inode(inode);
571 struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode); 640 struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
572 u32 vir_namelen = 0; 641 u32 vir_namelen = 0;
573 u32 namelen; 642 u32 namelen;
574 int err; 643 int err;
@@ -596,11 +665,12 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
596 goto out; 665 goto out;
597 666
598list_xattr: 667list_xattr:
599 vir_namelen = 0; 668 /*
600 /* include virtual dir xattrs */ 669 * Start with virtual dir xattr names (if any) (including
601 if (vxattrs) 670 * terminating '\0' characters for each).
602 for (i = 0; vxattrs[i].name; i++) 671 */
603 vir_namelen += strlen(vxattrs[i].name) + 1; 672 vir_namelen = ceph_vxattrs_name_size(vxattrs);
673
604 /* adding 1 byte per each variable due to the null termination */ 674 /* adding 1 byte per each variable due to the null termination */
605 namelen = vir_namelen + ci->i_xattrs.names_size + ci->i_xattrs.count; 675 namelen = vir_namelen + ci->i_xattrs.names_size + ci->i_xattrs.count;
606 err = -ERANGE; 676 err = -ERANGE;
@@ -698,17 +768,17 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
698 const void *value, size_t size, int flags) 768 const void *value, size_t size, int flags)
699{ 769{
700 struct inode *inode = dentry->d_inode; 770 struct inode *inode = dentry->d_inode;
771 struct ceph_vxattr *vxattr;
701 struct ceph_inode_info *ci = ceph_inode(inode); 772 struct ceph_inode_info *ci = ceph_inode(inode);
702 struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode); 773 int issued;
703 int err; 774 int err;
775 int dirty;
704 int name_len = strlen(name); 776 int name_len = strlen(name);
705 int val_len = size; 777 int val_len = size;
706 char *newname = NULL; 778 char *newname = NULL;
707 char *newval = NULL; 779 char *newval = NULL;
708 struct ceph_inode_xattr *xattr = NULL; 780 struct ceph_inode_xattr *xattr = NULL;
709 int issued;
710 int required_blob_size; 781 int required_blob_size;
711 int dirty;
712 782
713 if (ceph_snap(inode) != CEPH_NOSNAP) 783 if (ceph_snap(inode) != CEPH_NOSNAP)
714 return -EROFS; 784 return -EROFS;
@@ -716,12 +786,9 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
716 if (!ceph_is_valid_xattr(name)) 786 if (!ceph_is_valid_xattr(name))
717 return -EOPNOTSUPP; 787 return -EOPNOTSUPP;
718 788
719 if (vxattrs) { 789 vxattr = ceph_match_vxattr(inode, name);
720 struct ceph_vxattr_cb *vxattr = 790 if (vxattr && vxattr->readonly)
721 ceph_match_vxattr(vxattrs, name); 791 return -EOPNOTSUPP;
722 if (vxattr && vxattr->readonly)
723 return -EOPNOTSUPP;
724 }
725 792
726 /* preallocate memory for xattr name, value, index node */ 793 /* preallocate memory for xattr name, value, index node */
727 err = -ENOMEM; 794 err = -ENOMEM;
@@ -730,11 +797,9 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
730 goto out; 797 goto out;
731 798
732 if (val_len) { 799 if (val_len) {
733 newval = kmalloc(val_len + 1, GFP_NOFS); 800 newval = kmemdup(value, val_len, GFP_NOFS);
734 if (!newval) 801 if (!newval)
735 goto out; 802 goto out;
736 memcpy(newval, value, val_len);
737 newval[val_len] = '\0';
738 } 803 }
739 804
740 xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS); 805 xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
@@ -744,6 +809,7 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
744 spin_lock(&ci->i_ceph_lock); 809 spin_lock(&ci->i_ceph_lock);
745retry: 810retry:
746 issued = __ceph_caps_issued(ci, NULL); 811 issued = __ceph_caps_issued(ci, NULL);
812 dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
747 if (!(issued & CEPH_CAP_XATTR_EXCL)) 813 if (!(issued & CEPH_CAP_XATTR_EXCL))
748 goto do_sync; 814 goto do_sync;
749 __build_xattrs(inode); 815 __build_xattrs(inode);
@@ -752,7 +818,7 @@ retry:
752 818
753 if (!ci->i_xattrs.prealloc_blob || 819 if (!ci->i_xattrs.prealloc_blob ||
754 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) { 820 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
755 struct ceph_buffer *blob = NULL; 821 struct ceph_buffer *blob;
756 822
757 spin_unlock(&ci->i_ceph_lock); 823 spin_unlock(&ci->i_ceph_lock);
758 dout(" preaallocating new blob size=%d\n", required_blob_size); 824 dout(" preaallocating new blob size=%d\n", required_blob_size);
@@ -766,12 +832,13 @@ retry:
766 goto retry; 832 goto retry;
767 } 833 }
768 834
769 dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
770 err = __set_xattr(ci, newname, name_len, newval, 835 err = __set_xattr(ci, newname, name_len, newval,
771 val_len, 1, 1, 1, &xattr); 836 val_len, 1, 1, 1, &xattr);
837
772 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); 838 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
773 ci->i_xattrs.dirty = true; 839 ci->i_xattrs.dirty = true;
774 inode->i_ctime = CURRENT_TIME; 840 inode->i_ctime = CURRENT_TIME;
841
775 spin_unlock(&ci->i_ceph_lock); 842 spin_unlock(&ci->i_ceph_lock);
776 if (dirty) 843 if (dirty)
777 __mark_inode_dirty(inode, dirty); 844 __mark_inode_dirty(inode, dirty);
@@ -816,8 +883,8 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
816int ceph_removexattr(struct dentry *dentry, const char *name) 883int ceph_removexattr(struct dentry *dentry, const char *name)
817{ 884{
818 struct inode *inode = dentry->d_inode; 885 struct inode *inode = dentry->d_inode;
886 struct ceph_vxattr *vxattr;
819 struct ceph_inode_info *ci = ceph_inode(inode); 887 struct ceph_inode_info *ci = ceph_inode(inode);
820 struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode);
821 int issued; 888 int issued;
822 int err; 889 int err;
823 int required_blob_size; 890 int required_blob_size;
@@ -829,22 +896,19 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
829 if (!ceph_is_valid_xattr(name)) 896 if (!ceph_is_valid_xattr(name))
830 return -EOPNOTSUPP; 897 return -EOPNOTSUPP;
831 898
832 if (vxattrs) { 899 vxattr = ceph_match_vxattr(inode, name);
833 struct ceph_vxattr_cb *vxattr = 900 if (vxattr && vxattr->readonly)
834 ceph_match_vxattr(vxattrs, name); 901 return -EOPNOTSUPP;
835 if (vxattr && vxattr->readonly)
836 return -EOPNOTSUPP;
837 }
838 902
839 err = -ENOMEM; 903 err = -ENOMEM;
840 spin_lock(&ci->i_ceph_lock); 904 spin_lock(&ci->i_ceph_lock);
841 __build_xattrs(inode);
842retry: 905retry:
843 issued = __ceph_caps_issued(ci, NULL); 906 issued = __ceph_caps_issued(ci, NULL);
844 dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); 907 dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
845 908
846 if (!(issued & CEPH_CAP_XATTR_EXCL)) 909 if (!(issued & CEPH_CAP_XATTR_EXCL))
847 goto do_sync; 910 goto do_sync;
911 __build_xattrs(inode);
848 912
849 required_blob_size = __get_required_blob_size(ci, 0, 0); 913 required_blob_size = __get_required_blob_size(ci, 0, 0);
850 914
@@ -865,10 +929,10 @@ retry:
865 } 929 }
866 930
867 err = __remove_xattr_by_name(ceph_inode(inode), name); 931 err = __remove_xattr_by_name(ceph_inode(inode), name);
932
868 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); 933 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
869 ci->i_xattrs.dirty = true; 934 ci->i_xattrs.dirty = true;
870 inode->i_ctime = CURRENT_TIME; 935 inode->i_ctime = CURRENT_TIME;
871
872 spin_unlock(&ci->i_ceph_lock); 936 spin_unlock(&ci->i_ceph_lock);
873 if (dirty) 937 if (dirty)
874 __mark_inode_dirty(inode, dirty); 938 __mark_inode_dirty(inode, dirty);
diff --git a/fs/cifs/README b/fs/cifs/README
index 895da1dc155..b7d782bab79 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -753,10 +753,6 @@ module loading or during the runtime by using the interface
753 753
754i.e. echo "value" > /sys/module/cifs/parameters/<param> 754i.e. echo "value" > /sys/module/cifs/parameters/<param>
755 755
7561. echo_retries - The number of echo attempts before giving up and 7561. enable_oplocks - Enable or disable oplocks. Oplocks are enabled by default.
757 reconnecting to the server. The default is 5. The value 0
758 means never reconnect.
759
7602. enable_oplocks - Enable or disable oplocks. Oplocks are enabled by default.
761 [Y/y/1]. To disable use any of [N/n/0]. 757 [Y/y/1]. To disable use any of [N/n/0].
762 758
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 24b3dfc0528..27046462941 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -58,15 +58,16 @@ cifs_dump_mem(char *label, void *data, int length)
58} 58}
59 59
60#ifdef CONFIG_CIFS_DEBUG2 60#ifdef CONFIG_CIFS_DEBUG2
61void cifs_dump_detail(struct smb_hdr *smb) 61void cifs_dump_detail(void *buf)
62{ 62{
63 struct smb_hdr *smb = (struct smb_hdr *)buf;
64
63 cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d", 65 cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d",
64 smb->Command, smb->Status.CifsError, 66 smb->Command, smb->Status.CifsError,
65 smb->Flags, smb->Flags2, smb->Mid, smb->Pid); 67 smb->Flags, smb->Flags2, smb->Mid, smb->Pid);
66 cERROR(1, "smb buf %p len %d", smb, smbCalcSize(smb)); 68 cERROR(1, "smb buf %p len %d", smb, smbCalcSize(smb));
67} 69}
68 70
69
70void cifs_dump_mids(struct TCP_Server_Info *server) 71void cifs_dump_mids(struct TCP_Server_Info *server)
71{ 72{
72 struct list_head *tmp; 73 struct list_head *tmp;
@@ -79,15 +80,15 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
79 spin_lock(&GlobalMid_Lock); 80 spin_lock(&GlobalMid_Lock);
80 list_for_each(tmp, &server->pending_mid_q) { 81 list_for_each(tmp, &server->pending_mid_q) {
81 mid_entry = list_entry(tmp, struct mid_q_entry, qhead); 82 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
82 cERROR(1, "State: %d Cmd: %d Pid: %d Cbdata: %p Mid %d", 83 cERROR(1, "State: %d Cmd: %d Pid: %d Cbdata: %p Mid %llu",
83 mid_entry->midState, 84 mid_entry->mid_state,
84 (int)mid_entry->command, 85 le16_to_cpu(mid_entry->command),
85 mid_entry->pid, 86 mid_entry->pid,
86 mid_entry->callback_data, 87 mid_entry->callback_data,
87 mid_entry->mid); 88 mid_entry->mid);
88#ifdef CONFIG_CIFS_STATS2 89#ifdef CONFIG_CIFS_STATS2
89 cERROR(1, "IsLarge: %d buf: %p time rcv: %ld now: %ld", 90 cERROR(1, "IsLarge: %d buf: %p time rcv: %ld now: %ld",
90 mid_entry->largeBuf, 91 mid_entry->large_buf,
91 mid_entry->resp_buf, 92 mid_entry->resp_buf,
92 mid_entry->when_received, 93 mid_entry->when_received,
93 jiffies); 94 jiffies);
@@ -171,8 +172,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
171 seq_printf(m, "TCP status: %d\n\tLocal Users To " 172 seq_printf(m, "TCP status: %d\n\tLocal Users To "
172 "Server: %d SecMode: 0x%x Req On Wire: %d", 173 "Server: %d SecMode: 0x%x Req On Wire: %d",
173 server->tcpStatus, server->srv_count, 174 server->tcpStatus, server->srv_count,
174 server->sec_mode, 175 server->sec_mode, in_flight(server));
175 atomic_read(&server->inFlight));
176 176
177#ifdef CONFIG_CIFS_STATS2 177#ifdef CONFIG_CIFS_STATS2
178 seq_printf(m, " In Send: %d In MaxReq Wait: %d", 178 seq_printf(m, " In Send: %d In MaxReq Wait: %d",
@@ -218,12 +218,12 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
218 mid_entry = list_entry(tmp3, struct mid_q_entry, 218 mid_entry = list_entry(tmp3, struct mid_q_entry,
219 qhead); 219 qhead);
220 seq_printf(m, "\tState: %d com: %d pid:" 220 seq_printf(m, "\tState: %d com: %d pid:"
221 " %d cbdata: %p mid %d\n", 221 " %d cbdata: %p mid %llu\n",
222 mid_entry->midState, 222 mid_entry->mid_state,
223 (int)mid_entry->command, 223 le16_to_cpu(mid_entry->command),
224 mid_entry->pid, 224 mid_entry->pid,
225 mid_entry->callback_data, 225 mid_entry->callback_data,
226 mid_entry->mid); 226 mid_entry->mid);
227 } 227 }
228 spin_unlock(&GlobalMid_Lock); 228 spin_unlock(&GlobalMid_Lock);
229 } 229 }
@@ -418,7 +418,6 @@ static const struct file_operations cifs_stats_proc_fops = {
418 418
419static struct proc_dir_entry *proc_fs_cifs; 419static struct proc_dir_entry *proc_fs_cifs;
420static const struct file_operations cifsFYI_proc_fops; 420static const struct file_operations cifsFYI_proc_fops;
421static const struct file_operations cifs_oplock_proc_fops;
422static const struct file_operations cifs_lookup_cache_proc_fops; 421static const struct file_operations cifs_lookup_cache_proc_fops;
423static const struct file_operations traceSMB_proc_fops; 422static const struct file_operations traceSMB_proc_fops;
424static const struct file_operations cifs_multiuser_mount_proc_fops; 423static const struct file_operations cifs_multiuser_mount_proc_fops;
@@ -439,7 +438,6 @@ cifs_proc_init(void)
439#endif /* STATS */ 438#endif /* STATS */
440 proc_create("cifsFYI", 0, proc_fs_cifs, &cifsFYI_proc_fops); 439 proc_create("cifsFYI", 0, proc_fs_cifs, &cifsFYI_proc_fops);
441 proc_create("traceSMB", 0, proc_fs_cifs, &traceSMB_proc_fops); 440 proc_create("traceSMB", 0, proc_fs_cifs, &traceSMB_proc_fops);
442 proc_create("OplockEnabled", 0, proc_fs_cifs, &cifs_oplock_proc_fops);
443 proc_create("LinuxExtensionsEnabled", 0, proc_fs_cifs, 441 proc_create("LinuxExtensionsEnabled", 0, proc_fs_cifs,
444 &cifs_linux_ext_proc_fops); 442 &cifs_linux_ext_proc_fops);
445 proc_create("MultiuserMount", 0, proc_fs_cifs, 443 proc_create("MultiuserMount", 0, proc_fs_cifs,
@@ -463,7 +461,6 @@ cifs_proc_clean(void)
463 remove_proc_entry("Stats", proc_fs_cifs); 461 remove_proc_entry("Stats", proc_fs_cifs);
464#endif 462#endif
465 remove_proc_entry("MultiuserMount", proc_fs_cifs); 463 remove_proc_entry("MultiuserMount", proc_fs_cifs);
466 remove_proc_entry("OplockEnabled", proc_fs_cifs);
467 remove_proc_entry("SecurityFlags", proc_fs_cifs); 464 remove_proc_entry("SecurityFlags", proc_fs_cifs);
468 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); 465 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
469 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); 466 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
@@ -509,46 +506,6 @@ static const struct file_operations cifsFYI_proc_fops = {
509 .write = cifsFYI_proc_write, 506 .write = cifsFYI_proc_write,
510}; 507};
511 508
512static int cifs_oplock_proc_show(struct seq_file *m, void *v)
513{
514 seq_printf(m, "%d\n", enable_oplocks);
515 return 0;
516}
517
518static int cifs_oplock_proc_open(struct inode *inode, struct file *file)
519{
520 return single_open(file, cifs_oplock_proc_show, NULL);
521}
522
523static ssize_t cifs_oplock_proc_write(struct file *file,
524 const char __user *buffer, size_t count, loff_t *ppos)
525{
526 char c;
527 int rc;
528
529 printk(KERN_WARNING "CIFS: The /proc/fs/cifs/OplockEnabled interface "
530 "will be removed in kernel version 3.4. Please migrate to "
531 "using the 'enable_oplocks' module parameter in cifs.ko.\n");
532 rc = get_user(c, buffer);
533 if (rc)
534 return rc;
535 if (c == '0' || c == 'n' || c == 'N')
536 enable_oplocks = false;
537 else if (c == '1' || c == 'y' || c == 'Y')
538 enable_oplocks = true;
539
540 return count;
541}
542
543static const struct file_operations cifs_oplock_proc_fops = {
544 .owner = THIS_MODULE,
545 .open = cifs_oplock_proc_open,
546 .read = seq_read,
547 .llseek = seq_lseek,
548 .release = single_release,
549 .write = cifs_oplock_proc_write,
550};
551
552static int cifs_linux_ext_proc_show(struct seq_file *m, void *v) 509static int cifs_linux_ext_proc_show(struct seq_file *m, void *v)
553{ 510{
554 seq_printf(m, "%d\n", linuxExtEnabled); 511 seq_printf(m, "%d\n", linuxExtEnabled);
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index 8942b28cf80..566e0ae8dc2 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -26,13 +26,13 @@
26void cifs_dump_mem(char *label, void *data, int length); 26void cifs_dump_mem(char *label, void *data, int length);
27#ifdef CONFIG_CIFS_DEBUG2 27#ifdef CONFIG_CIFS_DEBUG2
28#define DBG2 2 28#define DBG2 2
29void cifs_dump_detail(struct smb_hdr *); 29void cifs_dump_detail(void *);
30void cifs_dump_mids(struct TCP_Server_Info *); 30void cifs_dump_mids(struct TCP_Server_Info *);
31#else 31#else
32#define DBG2 0 32#define DBG2 0
33#endif 33#endif
34extern int traceSMB; /* flag which enables the function below */ 34extern int traceSMB; /* flag which enables the function below */
35void dump_smb(struct smb_hdr *, int); 35void dump_smb(void *, int);
36#define CIFS_INFO 0x01 36#define CIFS_INFO 0x01
37#define CIFS_RC 0x02 37#define CIFS_RC 0x02
38#define CIFS_TIMER 0x04 38#define CIFS_TIMER 0x04
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index c1b25448738..3cc1b251ca0 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -556,6 +556,7 @@ init_cifs_idmap(void)
556 556
557 /* instruct request_key() to use this special keyring as a cache for 557 /* instruct request_key() to use this special keyring as a cache for
558 * the results it looks up */ 558 * the results it looks up */
559 set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags);
559 cred->thread_keyring = keyring; 560 cred->thread_keyring = keyring;
560 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; 561 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
561 root_cred = cred; 562 root_cred = cred;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index b1fd382d195..541ef81f6ae 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -76,12 +76,7 @@ MODULE_PARM_DESC(cifs_min_small, "Small network buffers in pool. Default: 30 "
76unsigned int cifs_max_pending = CIFS_MAX_REQ; 76unsigned int cifs_max_pending = CIFS_MAX_REQ;
77module_param(cifs_max_pending, int, 0444); 77module_param(cifs_max_pending, int, 0444);
78MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. " 78MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. "
79 "Default: 50 Range: 2 to 256"); 79 "Default: 32767 Range: 2 to 32767.");
80unsigned short echo_retries = 5;
81module_param(echo_retries, ushort, 0644);
82MODULE_PARM_DESC(echo_retries, "Number of echo attempts before giving up and "
83 "reconnecting server. Default: 5. 0 means "
84 "never reconnect.");
85module_param(enable_oplocks, bool, 0644); 80module_param(enable_oplocks, bool, 0644);
86MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks (bool). Default:" 81MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks (bool). Default:"
87 "y/Y/1"); 82 "y/Y/1");
@@ -90,6 +85,8 @@ extern mempool_t *cifs_sm_req_poolp;
90extern mempool_t *cifs_req_poolp; 85extern mempool_t *cifs_req_poolp;
91extern mempool_t *cifs_mid_poolp; 86extern mempool_t *cifs_mid_poolp;
92 87
88struct workqueue_struct *cifsiod_wq;
89
93static int 90static int
94cifs_read_super(struct super_block *sb) 91cifs_read_super(struct super_block *sb)
95{ 92{
@@ -119,12 +116,10 @@ cifs_read_super(struct super_block *sb)
119 116
120 if (IS_ERR(inode)) { 117 if (IS_ERR(inode)) {
121 rc = PTR_ERR(inode); 118 rc = PTR_ERR(inode);
122 inode = NULL;
123 goto out_no_root; 119 goto out_no_root;
124 } 120 }
125 121
126 sb->s_root = d_alloc_root(inode); 122 sb->s_root = d_make_root(inode);
127
128 if (!sb->s_root) { 123 if (!sb->s_root) {
129 rc = -ENOMEM; 124 rc = -ENOMEM;
130 goto out_no_root; 125 goto out_no_root;
@@ -147,9 +142,6 @@ cifs_read_super(struct super_block *sb)
147 142
148out_no_root: 143out_no_root:
149 cERROR(1, "cifs_read_super: get root inode failed"); 144 cERROR(1, "cifs_read_super: get root inode failed");
150 if (inode)
151 iput(inode);
152
153 return rc; 145 return rc;
154} 146}
155 147
@@ -378,13 +370,13 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
378 (int)(srcaddr->sa_family)); 370 (int)(srcaddr->sa_family));
379 } 371 }
380 372
381 seq_printf(s, ",uid=%d", cifs_sb->mnt_uid); 373 seq_printf(s, ",uid=%u", cifs_sb->mnt_uid);
382 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) 374 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
383 seq_printf(s, ",forceuid"); 375 seq_printf(s, ",forceuid");
384 else 376 else
385 seq_printf(s, ",noforceuid"); 377 seq_printf(s, ",noforceuid");
386 378
387 seq_printf(s, ",gid=%d", cifs_sb->mnt_gid); 379 seq_printf(s, ",gid=%u", cifs_sb->mnt_gid);
388 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) 380 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
389 seq_printf(s, ",forcegid"); 381 seq_printf(s, ",forcegid");
390 else 382 else
@@ -442,11 +434,15 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
442 seq_printf(s, ",noperm"); 434 seq_printf(s, ",noperm");
443 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) 435 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
444 seq_printf(s, ",strictcache"); 436 seq_printf(s, ",strictcache");
437 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID)
438 seq_printf(s, ",backupuid=%u", cifs_sb->mnt_backupuid);
439 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID)
440 seq_printf(s, ",backupgid=%u", cifs_sb->mnt_backupgid);
445 441
446 seq_printf(s, ",rsize=%d", cifs_sb->rsize); 442 seq_printf(s, ",rsize=%u", cifs_sb->rsize);
447 seq_printf(s, ",wsize=%d", cifs_sb->wsize); 443 seq_printf(s, ",wsize=%u", cifs_sb->wsize);
448 /* convert actimeo and display it in seconds */ 444 /* convert actimeo and display it in seconds */
449 seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ); 445 seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ);
450 446
451 return 0; 447 return 0;
452} 448}
@@ -703,7 +699,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
703 * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate 699 * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
704 * the cached file length 700 * the cached file length
705 */ 701 */
706 if (origin != SEEK_SET || origin != SEEK_CUR) { 702 if (origin != SEEK_SET && origin != SEEK_CUR) {
707 int rc; 703 int rc;
708 struct inode *inode = file->f_path.dentry->d_inode; 704 struct inode *inode = file->f_path.dentry->d_inode;
709 705
@@ -1116,14 +1112,20 @@ init_cifs(void)
1116 if (cifs_max_pending < 2) { 1112 if (cifs_max_pending < 2) {
1117 cifs_max_pending = 2; 1113 cifs_max_pending = 2;
1118 cFYI(1, "cifs_max_pending set to min of 2"); 1114 cFYI(1, "cifs_max_pending set to min of 2");
1119 } else if (cifs_max_pending > 256) { 1115 } else if (cifs_max_pending > CIFS_MAX_REQ) {
1120 cifs_max_pending = 256; 1116 cifs_max_pending = CIFS_MAX_REQ;
1121 cFYI(1, "cifs_max_pending set to max of 256"); 1117 cFYI(1, "cifs_max_pending set to max of %u", CIFS_MAX_REQ);
1118 }
1119
1120 cifsiod_wq = alloc_workqueue("cifsiod", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
1121 if (!cifsiod_wq) {
1122 rc = -ENOMEM;
1123 goto out_clean_proc;
1122 } 1124 }
1123 1125
1124 rc = cifs_fscache_register(); 1126 rc = cifs_fscache_register();
1125 if (rc) 1127 if (rc)
1126 goto out_clean_proc; 1128 goto out_destroy_wq;
1127 1129
1128 rc = cifs_init_inodecache(); 1130 rc = cifs_init_inodecache();
1129 if (rc) 1131 if (rc)
@@ -1171,6 +1173,8 @@ out_destroy_inodecache:
1171 cifs_destroy_inodecache(); 1173 cifs_destroy_inodecache();
1172out_unreg_fscache: 1174out_unreg_fscache:
1173 cifs_fscache_unregister(); 1175 cifs_fscache_unregister();
1176out_destroy_wq:
1177 destroy_workqueue(cifsiod_wq);
1174out_clean_proc: 1178out_clean_proc:
1175 cifs_proc_clean(); 1179 cifs_proc_clean();
1176 return rc; 1180 return rc;
@@ -1180,11 +1184,8 @@ static void __exit
1180exit_cifs(void) 1184exit_cifs(void)
1181{ 1185{
1182 cFYI(DBG2, "exit_cifs"); 1186 cFYI(DBG2, "exit_cifs");
1183 cifs_proc_clean(); 1187 unregister_filesystem(&cifs_fs_type);
1184 cifs_fscache_unregister();
1185#ifdef CONFIG_CIFS_DFS_UPCALL
1186 cifs_dfs_release_automount_timer(); 1188 cifs_dfs_release_automount_timer();
1187#endif
1188#ifdef CONFIG_CIFS_ACL 1189#ifdef CONFIG_CIFS_ACL
1189 cifs_destroy_idmaptrees(); 1190 cifs_destroy_idmaptrees();
1190 exit_cifs_idmap(); 1191 exit_cifs_idmap();
@@ -1192,10 +1193,12 @@ exit_cifs(void)
1192#ifdef CONFIG_CIFS_UPCALL 1193#ifdef CONFIG_CIFS_UPCALL
1193 unregister_key_type(&cifs_spnego_key_type); 1194 unregister_key_type(&cifs_spnego_key_type);
1194#endif 1195#endif
1195 unregister_filesystem(&cifs_fs_type);
1196 cifs_destroy_inodecache();
1197 cifs_destroy_mids();
1198 cifs_destroy_request_bufs(); 1196 cifs_destroy_request_bufs();
1197 cifs_destroy_mids();
1198 cifs_destroy_inodecache();
1199 cifs_fscache_unregister();
1200 destroy_workqueue(cifsiod_wq);
1201 cifs_proc_clean();
1199} 1202}
1200 1203
1201MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>"); 1204MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>");
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index fe5ecf1b422..65365358c97 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -125,5 +125,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
125extern const struct export_operations cifs_export_ops; 125extern const struct export_operations cifs_export_ops;
126#endif /* CONFIG_CIFS_NFSD_EXPORT */ 126#endif /* CONFIG_CIFS_NFSD_EXPORT */
127 127
128#define CIFS_VERSION "1.76" 128#define CIFS_VERSION "1.78"
129#endif /* _CIFSFS_H */ 129#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 76e7d8b6da1..4ff6313f0a9 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -55,14 +55,9 @@
55 55
56/* 56/*
57 * MAX_REQ is the maximum number of requests that WE will send 57 * MAX_REQ is the maximum number of requests that WE will send
58 * on one socket concurrently. It also matches the most common 58 * on one socket concurrently.
59 * value of max multiplex returned by servers. We may
60 * eventually want to use the negotiated value (in case
61 * future servers can handle more) when we are more confident that
62 * we will not have problems oveloading the socket with pending
63 * write data.
64 */ 59 */
65#define CIFS_MAX_REQ 50 60#define CIFS_MAX_REQ 32767
66 61
67#define RFC1001_NAME_LEN 15 62#define RFC1001_NAME_LEN 15
68#define RFC1001_NAME_LEN_WITH_NULL (RFC1001_NAME_LEN + 1) 63#define RFC1001_NAME_LEN_WITH_NULL (RFC1001_NAME_LEN + 1)
@@ -235,6 +230,12 @@ struct cifs_mnt_data {
235 int flags; 230 int flags;
236}; 231};
237 232
233static inline unsigned int
234get_rfc1002_length(void *buf)
235{
236 return be32_to_cpu(*((__be32 *)buf));
237}
238
238struct TCP_Server_Info { 239struct TCP_Server_Info {
239 struct list_head tcp_ses_list; 240 struct list_head tcp_ses_list;
240 struct list_head smb_ses_list; 241 struct list_head smb_ses_list;
@@ -255,7 +256,9 @@ struct TCP_Server_Info {
255 bool noblocksnd; /* use blocking sendmsg */ 256 bool noblocksnd; /* use blocking sendmsg */
256 bool noautotune; /* do not autotune send buf sizes */ 257 bool noautotune; /* do not autotune send buf sizes */
257 bool tcp_nodelay; 258 bool tcp_nodelay;
258 atomic_t inFlight; /* number of requests on the wire to server */ 259 int credits; /* send no more requests at once */
260 unsigned int in_flight; /* number of requests on the wire to server */
261 spinlock_t req_lock; /* protect the two values above */
259 struct mutex srv_mutex; 262 struct mutex srv_mutex;
260 struct task_struct *tsk; 263 struct task_struct *tsk;
261 char server_GUID[16]; 264 char server_GUID[16];
@@ -263,6 +266,7 @@ struct TCP_Server_Info {
263 bool session_estab; /* mark when very first sess is established */ 266 bool session_estab; /* mark when very first sess is established */
264 u16 dialect; /* dialect index that server chose */ 267 u16 dialect; /* dialect index that server chose */
265 enum securityEnum secType; 268 enum securityEnum secType;
269 bool oplocks:1; /* enable oplocks */
266 unsigned int maxReq; /* Clients should submit no more */ 270 unsigned int maxReq; /* Clients should submit no more */
267 /* than maxReq distinct unanswered SMBs to the server when using */ 271 /* than maxReq distinct unanswered SMBs to the server when using */
268 /* multiplexed reads or writes */ 272 /* multiplexed reads or writes */
@@ -278,7 +282,7 @@ struct TCP_Server_Info {
278 vcnumbers */ 282 vcnumbers */
279 int capabilities; /* allow selective disabling of caps by smb sess */ 283 int capabilities; /* allow selective disabling of caps by smb sess */
280 int timeAdj; /* Adjust for difference in server time zone in sec */ 284 int timeAdj; /* Adjust for difference in server time zone in sec */
281 __u16 CurrentMid; /* multiplex id - rotating counter */ 285 __u64 CurrentMid; /* multiplex id - rotating counter */
282 char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */ 286 char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */
283 /* 16th byte of RFC1001 workstation name is always null */ 287 /* 16th byte of RFC1001 workstation name is always null */
284 char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; 288 char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
@@ -307,6 +311,48 @@ struct TCP_Server_Info {
307#endif 311#endif
308}; 312};
309 313
314static inline unsigned int
315in_flight(struct TCP_Server_Info *server)
316{
317 unsigned int num;
318 spin_lock(&server->req_lock);
319 num = server->in_flight;
320 spin_unlock(&server->req_lock);
321 return num;
322}
323
324static inline int*
325get_credits_field(struct TCP_Server_Info *server)
326{
327 /*
328 * This will change to switch statement when we reserve slots for echos
329 * and oplock breaks.
330 */
331 return &server->credits;
332}
333
334static inline bool
335has_credits(struct TCP_Server_Info *server, int *credits)
336{
337 int num;
338 spin_lock(&server->req_lock);
339 num = *credits;
340 spin_unlock(&server->req_lock);
341 return num > 0;
342}
343
344static inline size_t
345header_size(void)
346{
347 return sizeof(struct smb_hdr);
348}
349
350static inline size_t
351max_header_size(void)
352{
353 return MAX_CIFS_HDR_SIZE;
354}
355
310/* 356/*
311 * Macros to allow the TCP_Server_Info->net field and related code to drop out 357 * Macros to allow the TCP_Server_Info->net field and related code to drop out
312 * when CONFIG_NET_NS isn't set. 358 * when CONFIG_NET_NS isn't set.
@@ -555,9 +601,11 @@ struct cifs_io_parms {
555 * Take a reference on the file private data. Must be called with 601 * Take a reference on the file private data. Must be called with
556 * cifs_file_list_lock held. 602 * cifs_file_list_lock held.
557 */ 603 */
558static inline void cifsFileInfo_get(struct cifsFileInfo *cifs_file) 604static inline
605struct cifsFileInfo *cifsFileInfo_get(struct cifsFileInfo *cifs_file)
559{ 606{
560 ++cifs_file->count; 607 ++cifs_file->count;
608 return cifs_file;
561} 609}
562 610
563void cifsFileInfo_put(struct cifsFileInfo *cifs_file); 611void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
@@ -578,7 +626,7 @@ struct cifsInodeInfo {
578 bool delete_pending; /* DELETE_ON_CLOSE is set */ 626 bool delete_pending; /* DELETE_ON_CLOSE is set */
579 bool invalid_mapping; /* pagecache is invalid */ 627 bool invalid_mapping; /* pagecache is invalid */
580 unsigned long time; /* jiffies of last update of inode */ 628 unsigned long time; /* jiffies of last update of inode */
581 u64 server_eof; /* current file size on server */ 629 u64 server_eof; /* current file size on server -- protected by i_lock */
582 u64 uniqueid; /* server inode number */ 630 u64 uniqueid; /* server inode number */
583 u64 createtime; /* creation time on server */ 631 u64 createtime; /* creation time on server */
584#ifdef CONFIG_CIFS_FSCACHE 632#ifdef CONFIG_CIFS_FSCACHE
@@ -685,8 +733,8 @@ typedef void (mid_callback_t)(struct mid_q_entry *mid);
685/* one of these for every pending CIFS request to the server */ 733/* one of these for every pending CIFS request to the server */
686struct mid_q_entry { 734struct mid_q_entry {
687 struct list_head qhead; /* mids waiting on reply from this server */ 735 struct list_head qhead; /* mids waiting on reply from this server */
688 __u16 mid; /* multiplex id */ 736 __u64 mid; /* multiplex id */
689 __u16 pid; /* process id */ 737 __u32 pid; /* process id */
690 __u32 sequence_number; /* for CIFS signing */ 738 __u32 sequence_number; /* for CIFS signing */
691 unsigned long when_alloc; /* when mid was created */ 739 unsigned long when_alloc; /* when mid was created */
692#ifdef CONFIG_CIFS_STATS2 740#ifdef CONFIG_CIFS_STATS2
@@ -696,10 +744,10 @@ struct mid_q_entry {
696 mid_receive_t *receive; /* call receive callback */ 744 mid_receive_t *receive; /* call receive callback */
697 mid_callback_t *callback; /* call completion callback */ 745 mid_callback_t *callback; /* call completion callback */
698 void *callback_data; /* general purpose pointer for callback */ 746 void *callback_data; /* general purpose pointer for callback */
699 struct smb_hdr *resp_buf; /* pointer to received SMB header */ 747 void *resp_buf; /* pointer to received SMB header */
700 int midState; /* wish this were enum but can not pass to wait_event */ 748 int mid_state; /* wish this were enum but can not pass to wait_event */
701 __u8 command; /* smb command code */ 749 __le16 command; /* smb command code */
702 bool largeBuf:1; /* if valid response, is pointer to large buf */ 750 bool large_buf:1; /* if valid response, is pointer to large buf */
703 bool multiRsp:1; /* multiple trans2 responses for one request */ 751 bool multiRsp:1; /* multiple trans2 responses for one request */
704 bool multiEnd:1; /* both received */ 752 bool multiEnd:1; /* both received */
705}; 753};
@@ -1010,9 +1058,6 @@ GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */
1010GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */ 1058GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */
1011GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/ 1059GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/
1012 1060
1013/* reconnect after this many failed echo attempts */
1014GLOBAL_EXTERN unsigned short echo_retries;
1015
1016#ifdef CONFIG_CIFS_ACL 1061#ifdef CONFIG_CIFS_ACL
1017GLOBAL_EXTERN struct rb_root uidtree; 1062GLOBAL_EXTERN struct rb_root uidtree;
1018GLOBAL_EXTERN struct rb_root gidtree; 1063GLOBAL_EXTERN struct rb_root gidtree;
@@ -1027,5 +1072,6 @@ GLOBAL_EXTERN spinlock_t gidsidlock;
1027void cifs_oplock_break(struct work_struct *work); 1072void cifs_oplock_break(struct work_struct *work);
1028 1073
1029extern const struct slow_work_ops cifs_oplock_break_ops; 1074extern const struct slow_work_ops cifs_oplock_break_ops;
1075extern struct workqueue_struct *cifsiod_wq;
1030 1076
1031#endif /* _CIFS_GLOB_H */ 1077#endif /* _CIFS_GLOB_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 6f4e243e0f6..96192c1e380 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -77,7 +77,7 @@ extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *,
77 struct smb_hdr * /* out */ , 77 struct smb_hdr * /* out */ ,
78 int * /* bytes returned */ , const int long_op); 78 int * /* bytes returned */ , const int long_op);
79extern int SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses, 79extern int SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses,
80 struct smb_hdr *in_buf, int flags); 80 char *in_buf, int flags);
81extern int cifs_check_receive(struct mid_q_entry *mid, 81extern int cifs_check_receive(struct mid_q_entry *mid,
82 struct TCP_Server_Info *server, bool log_error); 82 struct TCP_Server_Info *server, bool log_error);
83extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, 83extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *,
@@ -88,9 +88,11 @@ extern int SendReceiveBlockingLock(const unsigned int xid,
88 struct smb_hdr *in_buf , 88 struct smb_hdr *in_buf ,
89 struct smb_hdr *out_buf, 89 struct smb_hdr *out_buf,
90 int *bytes_returned); 90 int *bytes_returned);
91extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length); 91extern void cifs_add_credits(struct TCP_Server_Info *server,
92extern bool is_valid_oplock_break(struct smb_hdr *smb, 92 const unsigned int add);
93 struct TCP_Server_Info *); 93extern void cifs_set_credits(struct TCP_Server_Info *server, const int val);
94extern int checkSMB(char *buf, unsigned int length);
95extern bool is_valid_oplock_break(char *, struct TCP_Server_Info *);
94extern bool backup_cred(struct cifs_sb_info *); 96extern bool backup_cred(struct cifs_sb_info *);
95extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); 97extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
96extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, 98extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
@@ -104,7 +106,7 @@ extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
104extern int cifs_set_port(struct sockaddr *addr, const unsigned short int port); 106extern int cifs_set_port(struct sockaddr *addr, const unsigned short int port);
105extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, 107extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
106 const unsigned short int port); 108 const unsigned short int port);
107extern int map_smb_to_linux_error(struct smb_hdr *smb, bool logErr); 109extern int map_smb_to_linux_error(char *buf, bool logErr);
108extern void header_assemble(struct smb_hdr *, char /* command */ , 110extern void header_assemble(struct smb_hdr *, char /* command */ ,
109 const struct cifs_tcon *, int /* length of 111 const struct cifs_tcon *, int /* length of
110 fixed section (word count) in two byte units */); 112 fixed section (word count) in two byte units */);
@@ -113,7 +115,7 @@ extern int small_smb_init_no_tc(const int smb_cmd, const int wct,
113 void **request_buf); 115 void **request_buf);
114extern int CIFS_SessSetup(unsigned int xid, struct cifs_ses *ses, 116extern int CIFS_SessSetup(unsigned int xid, struct cifs_ses *ses,
115 const struct nls_table *nls_cp); 117 const struct nls_table *nls_cp);
116extern __u16 GetNextMid(struct TCP_Server_Info *server); 118extern __u64 GetNextMid(struct TCP_Server_Info *server);
117extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); 119extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
118extern u64 cifs_UnixTimeToNT(struct timespec); 120extern u64 cifs_UnixTimeToNT(struct timespec);
119extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, 121extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
@@ -168,7 +170,13 @@ extern struct smb_vol *cifs_get_volume_info(char *mount_data,
168 const char *devname); 170 const char *devname);
169extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *); 171extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *);
170extern void cifs_umount(struct cifs_sb_info *); 172extern void cifs_umount(struct cifs_sb_info *);
173
174#if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL)
171extern void cifs_dfs_release_automount_timer(void); 175extern void cifs_dfs_release_automount_timer(void);
176#else /* ! IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) */
177#define cifs_dfs_release_automount_timer() do { } while (0)
178#endif /* ! IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) */
179
172void cifs_proc_init(void); 180void cifs_proc_init(void);
173void cifs_proc_clean(void); 181void cifs_proc_clean(void);
174 182
@@ -475,18 +483,25 @@ int cifs_async_readv(struct cifs_readdata *rdata);
475/* asynchronous write support */ 483/* asynchronous write support */
476struct cifs_writedata { 484struct cifs_writedata {
477 struct kref refcount; 485 struct kref refcount;
486 struct list_head list;
487 struct completion done;
478 enum writeback_sync_modes sync_mode; 488 enum writeback_sync_modes sync_mode;
479 struct work_struct work; 489 struct work_struct work;
480 struct cifsFileInfo *cfile; 490 struct cifsFileInfo *cfile;
481 __u64 offset; 491 __u64 offset;
492 pid_t pid;
482 unsigned int bytes; 493 unsigned int bytes;
483 int result; 494 int result;
495 void (*marshal_iov) (struct kvec *iov,
496 struct cifs_writedata *wdata);
484 unsigned int nr_pages; 497 unsigned int nr_pages;
485 struct page *pages[1]; 498 struct page *pages[1];
486}; 499};
487 500
488int cifs_async_writev(struct cifs_writedata *wdata); 501int cifs_async_writev(struct cifs_writedata *wdata);
489struct cifs_writedata *cifs_writedata_alloc(unsigned int nr_pages); 502void cifs_writev_complete(struct work_struct *work);
503struct cifs_writedata *cifs_writedata_alloc(unsigned int nr_pages,
504 work_func_t complete);
490void cifs_writedata_release(struct kref *refcount); 505void cifs_writedata_release(struct kref *refcount);
491 506
492#endif /* _CIFSPROTO_H */ 507#endif /* _CIFSPROTO_H */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 8b7794c3159..da2f5446fa7 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -458,7 +458,10 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses)
458 goto neg_err_exit; 458 goto neg_err_exit;
459 } 459 }
460 server->sec_mode = (__u8)le16_to_cpu(rsp->SecurityMode); 460 server->sec_mode = (__u8)le16_to_cpu(rsp->SecurityMode);
461 server->maxReq = le16_to_cpu(rsp->MaxMpxCount); 461 server->maxReq = min_t(unsigned int,
462 le16_to_cpu(rsp->MaxMpxCount),
463 cifs_max_pending);
464 cifs_set_credits(server, server->maxReq);
462 server->maxBuf = le16_to_cpu(rsp->MaxBufSize); 465 server->maxBuf = le16_to_cpu(rsp->MaxBufSize);
463 server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs); 466 server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs);
464 /* even though we do not use raw we might as well set this 467 /* even though we do not use raw we might as well set this
@@ -564,7 +567,9 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses)
564 567
565 /* one byte, so no need to convert this or EncryptionKeyLen from 568 /* one byte, so no need to convert this or EncryptionKeyLen from
566 little endian */ 569 little endian */
567 server->maxReq = le16_to_cpu(pSMBr->MaxMpxCount); 570 server->maxReq = min_t(unsigned int, le16_to_cpu(pSMBr->MaxMpxCount),
571 cifs_max_pending);
572 cifs_set_credits(server, server->maxReq);
568 /* probably no need to store and check maxvcs */ 573 /* probably no need to store and check maxvcs */
569 server->maxBuf = le32_to_cpu(pSMBr->MaxBufferSize); 574 server->maxBuf = le32_to_cpu(pSMBr->MaxBufferSize);
570 server->max_rw = le32_to_cpu(pSMBr->MaxRawSize); 575 server->max_rw = le32_to_cpu(pSMBr->MaxRawSize);
@@ -691,7 +696,7 @@ CIFSSMBTDis(const int xid, struct cifs_tcon *tcon)
691 if (rc) 696 if (rc)
692 return rc; 697 return rc;
693 698
694 rc = SendReceiveNoRsp(xid, tcon->ses, smb_buffer, 0); 699 rc = SendReceiveNoRsp(xid, tcon->ses, (char *)smb_buffer, 0);
695 if (rc) 700 if (rc)
696 cFYI(1, "Tree disconnect failed %d", rc); 701 cFYI(1, "Tree disconnect failed %d", rc);
697 702
@@ -716,8 +721,7 @@ cifs_echo_callback(struct mid_q_entry *mid)
716 struct TCP_Server_Info *server = mid->callback_data; 721 struct TCP_Server_Info *server = mid->callback_data;
717 722
718 DeleteMidQEntry(mid); 723 DeleteMidQEntry(mid);
719 atomic_dec(&server->inFlight); 724 cifs_add_credits(server, 1);
720 wake_up(&server->request_q);
721} 725}
722 726
723int 727int
@@ -788,7 +792,7 @@ CIFSSMBLogoff(const int xid, struct cifs_ses *ses)
788 pSMB->hdr.Uid = ses->Suid; 792 pSMB->hdr.Uid = ses->Suid;
789 793
790 pSMB->AndXCommand = 0xFF; 794 pSMB->AndXCommand = 0xFF;
791 rc = SendReceiveNoRsp(xid, ses, (struct smb_hdr *) pSMB, 0); 795 rc = SendReceiveNoRsp(xid, ses, (char *) pSMB, 0);
792session_already_dead: 796session_already_dead:
793 mutex_unlock(&ses->session_mutex); 797 mutex_unlock(&ses->session_mutex);
794 798
@@ -1410,8 +1414,7 @@ cifs_readdata_free(struct cifs_readdata *rdata)
1410static int 1414static int
1411cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) 1415cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1412{ 1416{
1413 READ_RSP *rsp = (READ_RSP *)server->smallbuf; 1417 unsigned int rfclen = get_rfc1002_length(server->smallbuf);
1414 unsigned int rfclen = be32_to_cpu(rsp->hdr.smb_buf_length);
1415 int remaining = rfclen + 4 - server->total_read; 1418 int remaining = rfclen + 4 - server->total_read;
1416 struct cifs_readdata *rdata = mid->callback_data; 1419 struct cifs_readdata *rdata = mid->callback_data;
1417 1420
@@ -1420,7 +1423,7 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1420 1423
1421 length = cifs_read_from_socket(server, server->bigbuf, 1424 length = cifs_read_from_socket(server, server->bigbuf,
1422 min_t(unsigned int, remaining, 1425 min_t(unsigned int, remaining,
1423 CIFSMaxBufSize + MAX_CIFS_HDR_SIZE)); 1426 CIFSMaxBufSize + max_header_size()));
1424 if (length < 0) 1427 if (length < 0)
1425 return length; 1428 return length;
1426 server->total_read += length; 1429 server->total_read += length;
@@ -1431,19 +1434,40 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1431 return 0; 1434 return 0;
1432} 1435}
1433 1436
1437static inline size_t
1438read_rsp_size(void)
1439{
1440 return sizeof(READ_RSP);
1441}
1442
1443static inline unsigned int
1444read_data_offset(char *buf)
1445{
1446 READ_RSP *rsp = (READ_RSP *)buf;
1447 return le16_to_cpu(rsp->DataOffset);
1448}
1449
1450static inline unsigned int
1451read_data_length(char *buf)
1452{
1453 READ_RSP *rsp = (READ_RSP *)buf;
1454 return (le16_to_cpu(rsp->DataLengthHigh) << 16) +
1455 le16_to_cpu(rsp->DataLength);
1456}
1457
1434static int 1458static int
1435cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) 1459cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1436{ 1460{
1437 int length, len; 1461 int length, len;
1438 unsigned int data_offset, remaining, data_len; 1462 unsigned int data_offset, remaining, data_len;
1439 struct cifs_readdata *rdata = mid->callback_data; 1463 struct cifs_readdata *rdata = mid->callback_data;
1440 READ_RSP *rsp = (READ_RSP *)server->smallbuf; 1464 char *buf = server->smallbuf;
1441 unsigned int rfclen = be32_to_cpu(rsp->hdr.smb_buf_length) + 4; 1465 unsigned int buflen = get_rfc1002_length(buf) + 4;
1442 u64 eof; 1466 u64 eof;
1443 pgoff_t eof_index; 1467 pgoff_t eof_index;
1444 struct page *page, *tpage; 1468 struct page *page, *tpage;
1445 1469
1446 cFYI(1, "%s: mid=%u offset=%llu bytes=%u", __func__, 1470 cFYI(1, "%s: mid=%llu offset=%llu bytes=%u", __func__,
1447 mid->mid, rdata->offset, rdata->bytes); 1471 mid->mid, rdata->offset, rdata->bytes);
1448 1472
1449 /* 1473 /*
@@ -1451,10 +1475,9 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1451 * can if there's not enough data. At this point, we've read down to 1475 * can if there's not enough data. At this point, we've read down to
1452 * the Mid. 1476 * the Mid.
1453 */ 1477 */
1454 len = min_t(unsigned int, rfclen, sizeof(*rsp)) - 1478 len = min_t(unsigned int, buflen, read_rsp_size()) - header_size() + 1;
1455 sizeof(struct smb_hdr) + 1;
1456 1479
1457 rdata->iov[0].iov_base = server->smallbuf + sizeof(struct smb_hdr) - 1; 1480 rdata->iov[0].iov_base = buf + header_size() - 1;
1458 rdata->iov[0].iov_len = len; 1481 rdata->iov[0].iov_len = len;
1459 1482
1460 length = cifs_readv_from_socket(server, rdata->iov, 1, len); 1483 length = cifs_readv_from_socket(server, rdata->iov, 1, len);
@@ -1463,7 +1486,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1463 server->total_read += length; 1486 server->total_read += length;
1464 1487
1465 /* Was the SMB read successful? */ 1488 /* Was the SMB read successful? */
1466 rdata->result = map_smb_to_linux_error(&rsp->hdr, false); 1489 rdata->result = map_smb_to_linux_error(buf, false);
1467 if (rdata->result != 0) { 1490 if (rdata->result != 0) {
1468 cFYI(1, "%s: server returned error %d", __func__, 1491 cFYI(1, "%s: server returned error %d", __func__,
1469 rdata->result); 1492 rdata->result);
@@ -1471,14 +1494,14 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1471 } 1494 }
1472 1495
1473 /* Is there enough to get to the rest of the READ_RSP header? */ 1496 /* Is there enough to get to the rest of the READ_RSP header? */
1474 if (server->total_read < sizeof(READ_RSP)) { 1497 if (server->total_read < read_rsp_size()) {
1475 cFYI(1, "%s: server returned short header. got=%u expected=%zu", 1498 cFYI(1, "%s: server returned short header. got=%u expected=%zu",
1476 __func__, server->total_read, sizeof(READ_RSP)); 1499 __func__, server->total_read, read_rsp_size());
1477 rdata->result = -EIO; 1500 rdata->result = -EIO;
1478 return cifs_readv_discard(server, mid); 1501 return cifs_readv_discard(server, mid);
1479 } 1502 }
1480 1503
1481 data_offset = le16_to_cpu(rsp->DataOffset) + 4; 1504 data_offset = read_data_offset(buf) + 4;
1482 if (data_offset < server->total_read) { 1505 if (data_offset < server->total_read) {
1483 /* 1506 /*
1484 * win2k8 sometimes sends an offset of 0 when the read 1507 * win2k8 sometimes sends an offset of 0 when the read
@@ -1502,7 +1525,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1502 len = data_offset - server->total_read; 1525 len = data_offset - server->total_read;
1503 if (len > 0) { 1526 if (len > 0) {
1504 /* read any junk before data into the rest of smallbuf */ 1527 /* read any junk before data into the rest of smallbuf */
1505 rdata->iov[0].iov_base = server->smallbuf + server->total_read; 1528 rdata->iov[0].iov_base = buf + server->total_read;
1506 rdata->iov[0].iov_len = len; 1529 rdata->iov[0].iov_len = len;
1507 length = cifs_readv_from_socket(server, rdata->iov, 1, len); 1530 length = cifs_readv_from_socket(server, rdata->iov, 1, len);
1508 if (length < 0) 1531 if (length < 0)
@@ -1511,15 +1534,14 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1511 } 1534 }
1512 1535
1513 /* set up first iov for signature check */ 1536 /* set up first iov for signature check */
1514 rdata->iov[0].iov_base = server->smallbuf; 1537 rdata->iov[0].iov_base = buf;
1515 rdata->iov[0].iov_len = server->total_read; 1538 rdata->iov[0].iov_len = server->total_read;
1516 cFYI(1, "0: iov_base=%p iov_len=%zu", 1539 cFYI(1, "0: iov_base=%p iov_len=%zu",
1517 rdata->iov[0].iov_base, rdata->iov[0].iov_len); 1540 rdata->iov[0].iov_base, rdata->iov[0].iov_len);
1518 1541
1519 /* how much data is in the response? */ 1542 /* how much data is in the response? */
1520 data_len = le16_to_cpu(rsp->DataLengthHigh) << 16; 1543 data_len = read_data_length(buf);
1521 data_len += le16_to_cpu(rsp->DataLength); 1544 if (data_offset + data_len > buflen) {
1522 if (data_offset + data_len > rfclen) {
1523 /* data_len is corrupt -- discard frame */ 1545 /* data_len is corrupt -- discard frame */
1524 rdata->result = -EIO; 1546 rdata->result = -EIO;
1525 return cifs_readv_discard(server, mid); 1547 return cifs_readv_discard(server, mid);
@@ -1598,11 +1620,11 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1598 1620
1599 rdata->bytes = length; 1621 rdata->bytes = length;
1600 1622
1601 cFYI(1, "total_read=%u rfclen=%u remaining=%u", server->total_read, 1623 cFYI(1, "total_read=%u buflen=%u remaining=%u", server->total_read,
1602 rfclen, remaining); 1624 buflen, remaining);
1603 1625
1604 /* discard anything left over */ 1626 /* discard anything left over */
1605 if (server->total_read < rfclen) 1627 if (server->total_read < buflen)
1606 return cifs_readv_discard(server, mid); 1628 return cifs_readv_discard(server, mid);
1607 1629
1608 dequeue_mid(mid, false); 1630 dequeue_mid(mid, false);
@@ -1643,10 +1665,10 @@ cifs_readv_callback(struct mid_q_entry *mid)
1643 struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); 1665 struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink);
1644 struct TCP_Server_Info *server = tcon->ses->server; 1666 struct TCP_Server_Info *server = tcon->ses->server;
1645 1667
1646 cFYI(1, "%s: mid=%u state=%d result=%d bytes=%u", __func__, 1668 cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__,
1647 mid->mid, mid->midState, rdata->result, rdata->bytes); 1669 mid->mid, mid->mid_state, rdata->result, rdata->bytes);
1648 1670
1649 switch (mid->midState) { 1671 switch (mid->mid_state) {
1650 case MID_RESPONSE_RECEIVED: 1672 case MID_RESPONSE_RECEIVED:
1651 /* result already set, check signature */ 1673 /* result already set, check signature */
1652 if (server->sec_mode & 1674 if (server->sec_mode &
@@ -1667,10 +1689,9 @@ cifs_readv_callback(struct mid_q_entry *mid)
1667 rdata->result = -EIO; 1689 rdata->result = -EIO;
1668 } 1690 }
1669 1691
1670 queue_work(system_nrt_wq, &rdata->work); 1692 queue_work(cifsiod_wq, &rdata->work);
1671 DeleteMidQEntry(mid); 1693 DeleteMidQEntry(mid);
1672 atomic_dec(&server->inFlight); 1694 cifs_add_credits(server, 1);
1673 wake_up(&server->request_q);
1674} 1695}
1675 1696
1676/* cifs_async_readv - send an async write, and set up mid to handle result */ 1697/* cifs_async_readv - send an async write, and set up mid to handle result */
@@ -2014,7 +2035,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
2014 kref_put(&wdata->refcount, cifs_writedata_release); 2035 kref_put(&wdata->refcount, cifs_writedata_release);
2015} 2036}
2016 2037
2017static void 2038void
2018cifs_writev_complete(struct work_struct *work) 2039cifs_writev_complete(struct work_struct *work)
2019{ 2040{
2020 struct cifs_writedata *wdata = container_of(work, 2041 struct cifs_writedata *wdata = container_of(work,
@@ -2023,7 +2044,9 @@ cifs_writev_complete(struct work_struct *work)
2023 int i = 0; 2044 int i = 0;
2024 2045
2025 if (wdata->result == 0) { 2046 if (wdata->result == 0) {
2047 spin_lock(&inode->i_lock);
2026 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes); 2048 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2049 spin_unlock(&inode->i_lock);
2027 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink), 2050 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2028 wdata->bytes); 2051 wdata->bytes);
2029 } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN) 2052 } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
@@ -2044,7 +2067,7 @@ cifs_writev_complete(struct work_struct *work)
2044} 2067}
2045 2068
2046struct cifs_writedata * 2069struct cifs_writedata *
2047cifs_writedata_alloc(unsigned int nr_pages) 2070cifs_writedata_alloc(unsigned int nr_pages, work_func_t complete)
2048{ 2071{
2049 struct cifs_writedata *wdata; 2072 struct cifs_writedata *wdata;
2050 2073
@@ -2058,14 +2081,16 @@ cifs_writedata_alloc(unsigned int nr_pages)
2058 wdata = kzalloc(sizeof(*wdata) + 2081 wdata = kzalloc(sizeof(*wdata) +
2059 sizeof(struct page *) * (nr_pages - 1), GFP_NOFS); 2082 sizeof(struct page *) * (nr_pages - 1), GFP_NOFS);
2060 if (wdata != NULL) { 2083 if (wdata != NULL) {
2061 INIT_WORK(&wdata->work, cifs_writev_complete);
2062 kref_init(&wdata->refcount); 2084 kref_init(&wdata->refcount);
2085 INIT_LIST_HEAD(&wdata->list);
2086 init_completion(&wdata->done);
2087 INIT_WORK(&wdata->work, complete);
2063 } 2088 }
2064 return wdata; 2089 return wdata;
2065} 2090}
2066 2091
2067/* 2092/*
2068 * Check the midState and signature on received buffer (if any), and queue the 2093 * Check the mid_state and signature on received buffer (if any), and queue the
2069 * workqueue completion task. 2094 * workqueue completion task.
2070 */ 2095 */
2071static void 2096static void
@@ -2076,7 +2101,7 @@ cifs_writev_callback(struct mid_q_entry *mid)
2076 unsigned int written; 2101 unsigned int written;
2077 WRITE_RSP *smb = (WRITE_RSP *)mid->resp_buf; 2102 WRITE_RSP *smb = (WRITE_RSP *)mid->resp_buf;
2078 2103
2079 switch (mid->midState) { 2104 switch (mid->mid_state) {
2080 case MID_RESPONSE_RECEIVED: 2105 case MID_RESPONSE_RECEIVED:
2081 wdata->result = cifs_check_receive(mid, tcon->ses->server, 0); 2106 wdata->result = cifs_check_receive(mid, tcon->ses->server, 0);
2082 if (wdata->result != 0) 2107 if (wdata->result != 0)
@@ -2108,10 +2133,9 @@ cifs_writev_callback(struct mid_q_entry *mid)
2108 break; 2133 break;
2109 } 2134 }
2110 2135
2111 queue_work(system_nrt_wq, &wdata->work); 2136 queue_work(cifsiod_wq, &wdata->work);
2112 DeleteMidQEntry(mid); 2137 DeleteMidQEntry(mid);
2113 atomic_dec(&tcon->ses->server->inFlight); 2138 cifs_add_credits(tcon->ses->server, 1);
2114 wake_up(&tcon->ses->server->request_q);
2115} 2139}
2116 2140
2117/* cifs_async_writev - send an async write, and set up mid to handle result */ 2141/* cifs_async_writev - send an async write, and set up mid to handle result */
@@ -2122,7 +2146,6 @@ cifs_async_writev(struct cifs_writedata *wdata)
2122 WRITE_REQ *smb = NULL; 2146 WRITE_REQ *smb = NULL;
2123 int wct; 2147 int wct;
2124 struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); 2148 struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
2125 struct inode *inode = wdata->cfile->dentry->d_inode;
2126 struct kvec *iov = NULL; 2149 struct kvec *iov = NULL;
2127 2150
2128 if (tcon->ses->capabilities & CAP_LARGE_FILES) { 2151 if (tcon->ses->capabilities & CAP_LARGE_FILES) {
@@ -2146,8 +2169,8 @@ cifs_async_writev(struct cifs_writedata *wdata)
2146 goto async_writev_out; 2169 goto async_writev_out;
2147 } 2170 }
2148 2171
2149 smb->hdr.Pid = cpu_to_le16((__u16)wdata->cfile->pid); 2172 smb->hdr.Pid = cpu_to_le16((__u16)wdata->pid);
2150 smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->cfile->pid >> 16)); 2173 smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->pid >> 16));
2151 2174
2152 smb->AndXCommand = 0xFF; /* none */ 2175 smb->AndXCommand = 0xFF; /* none */
2153 smb->Fid = wdata->cfile->netfid; 2176 smb->Fid = wdata->cfile->netfid;
@@ -2165,15 +2188,13 @@ cifs_async_writev(struct cifs_writedata *wdata)
2165 iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4 + 1; 2188 iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4 + 1;
2166 iov[0].iov_base = smb; 2189 iov[0].iov_base = smb;
2167 2190
2168 /* marshal up the pages into iov array */ 2191 /*
2169 wdata->bytes = 0; 2192 * This function should marshal up the page array into the kvec
2170 for (i = 0; i < wdata->nr_pages; i++) { 2193 * array, reserving [0] for the header. It should kmap the pages
2171 iov[i + 1].iov_len = min(inode->i_size - 2194 * and set the iov_len properly for each one. It may also set
2172 page_offset(wdata->pages[i]), 2195 * wdata->bytes too.
2173 (loff_t)PAGE_CACHE_SIZE); 2196 */
2174 iov[i + 1].iov_base = kmap(wdata->pages[i]); 2197 wdata->marshal_iov(iov, wdata);
2175 wdata->bytes += iov[i + 1].iov_len;
2176 }
2177 2198
2178 cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes); 2199 cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes);
2179 2200
@@ -2418,8 +2439,7 @@ CIFSSMBLock(const int xid, struct cifs_tcon *tcon,
2418 (struct smb_hdr *) pSMB, &bytes_returned); 2439 (struct smb_hdr *) pSMB, &bytes_returned);
2419 cifs_small_buf_release(pSMB); 2440 cifs_small_buf_release(pSMB);
2420 } else { 2441 } else {
2421 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *)pSMB, 2442 rc = SendReceiveNoRsp(xid, tcon->ses, (char *)pSMB, timeout);
2422 timeout);
2423 /* SMB buffer freed by function above */ 2443 /* SMB buffer freed by function above */
2424 } 2444 }
2425 cifs_stats_inc(&tcon->num_locks); 2445 cifs_stats_inc(&tcon->num_locks);
@@ -2586,7 +2606,7 @@ CIFSSMBClose(const int xid, struct cifs_tcon *tcon, int smb_file_id)
2586 pSMB->FileID = (__u16) smb_file_id; 2606 pSMB->FileID = (__u16) smb_file_id;
2587 pSMB->LastWriteTime = 0xFFFFFFFF; 2607 pSMB->LastWriteTime = 0xFFFFFFFF;
2588 pSMB->ByteCount = 0; 2608 pSMB->ByteCount = 0;
2589 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 2609 rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0);
2590 cifs_stats_inc(&tcon->num_closes); 2610 cifs_stats_inc(&tcon->num_closes);
2591 if (rc) { 2611 if (rc) {
2592 if (rc != -EINTR) { 2612 if (rc != -EINTR) {
@@ -2615,7 +2635,7 @@ CIFSSMBFlush(const int xid, struct cifs_tcon *tcon, int smb_file_id)
2615 2635
2616 pSMB->FileID = (__u16) smb_file_id; 2636 pSMB->FileID = (__u16) smb_file_id;
2617 pSMB->ByteCount = 0; 2637 pSMB->ByteCount = 0;
2618 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 2638 rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0);
2619 cifs_stats_inc(&tcon->num_flushes); 2639 cifs_stats_inc(&tcon->num_flushes);
2620 if (rc) 2640 if (rc)
2621 cERROR(1, "Send error in Flush = %d", rc); 2641 cERROR(1, "Send error in Flush = %d", rc);
@@ -3872,13 +3892,12 @@ CIFSSMBSetCIFSACL(const int xid, struct cifs_tcon *tcon, __u16 fid,
3872 int rc = 0; 3892 int rc = 0;
3873 int bytes_returned = 0; 3893 int bytes_returned = 0;
3874 SET_SEC_DESC_REQ *pSMB = NULL; 3894 SET_SEC_DESC_REQ *pSMB = NULL;
3875 NTRANSACT_RSP *pSMBr = NULL; 3895 void *pSMBr;
3876 3896
3877setCifsAclRetry: 3897setCifsAclRetry:
3878 rc = smb_init(SMB_COM_NT_TRANSACT, 19, tcon, (void **) &pSMB, 3898 rc = smb_init(SMB_COM_NT_TRANSACT, 19, tcon, (void **) &pSMB, &pSMBr);
3879 (void **) &pSMBr);
3880 if (rc) 3899 if (rc)
3881 return (rc); 3900 return rc;
3882 3901
3883 pSMB->MaxSetupCount = 0; 3902 pSMB->MaxSetupCount = 0;
3884 pSMB->Reserved = 0; 3903 pSMB->Reserved = 0;
@@ -3906,9 +3925,8 @@ setCifsAclRetry:
3906 pSMB->AclFlags = cpu_to_le32(aclflag); 3925 pSMB->AclFlags = cpu_to_le32(aclflag);
3907 3926
3908 if (pntsd && acllen) { 3927 if (pntsd && acllen) {
3909 memcpy((char *) &pSMBr->hdr.Protocol + data_offset, 3928 memcpy((char *)pSMBr + offsetof(struct smb_hdr, Protocol) +
3910 (char *) pntsd, 3929 data_offset, pntsd, acllen);
3911 acllen);
3912 inc_rfc1001_len(pSMB, byte_count + data_count); 3930 inc_rfc1001_len(pSMB, byte_count + data_count);
3913 } else 3931 } else
3914 inc_rfc1001_len(pSMB, byte_count); 3932 inc_rfc1001_len(pSMB, byte_count);
@@ -4623,7 +4641,7 @@ CIFSFindClose(const int xid, struct cifs_tcon *tcon,
4623 4641
4624 pSMB->FileID = searchHandle; 4642 pSMB->FileID = searchHandle;
4625 pSMB->ByteCount = 0; 4643 pSMB->ByteCount = 0;
4626 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 4644 rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0);
4627 if (rc) 4645 if (rc)
4628 cERROR(1, "Send error in FindClose = %d", rc); 4646 cERROR(1, "Send error in FindClose = %d", rc);
4629 4647
@@ -4826,8 +4844,12 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
4826 max_len = data_end - temp; 4844 max_len = data_end - temp;
4827 node->node_name = cifs_strndup_from_utf16(temp, max_len, 4845 node->node_name = cifs_strndup_from_utf16(temp, max_len,
4828 is_unicode, nls_codepage); 4846 is_unicode, nls_codepage);
4829 if (!node->node_name) 4847 if (!node->node_name) {
4830 rc = -ENOMEM; 4848 rc = -ENOMEM;
4849 goto parse_DFS_referrals_exit;
4850 }
4851
4852 ref++;
4831 } 4853 }
4832 4854
4833parse_DFS_referrals_exit: 4855parse_DFS_referrals_exit:
@@ -5644,7 +5666,7 @@ CIFSSMBSetFileSize(const int xid, struct cifs_tcon *tcon, __u64 size,
5644 pSMB->Reserved4 = 0; 5666 pSMB->Reserved4 = 0;
5645 inc_rfc1001_len(pSMB, byte_count); 5667 inc_rfc1001_len(pSMB, byte_count);
5646 pSMB->ByteCount = cpu_to_le16(byte_count); 5668 pSMB->ByteCount = cpu_to_le16(byte_count);
5647 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 5669 rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0);
5648 if (rc) { 5670 if (rc) {
5649 cFYI(1, "Send error in SetFileInfo (SetFileSize) = %d", rc); 5671 cFYI(1, "Send error in SetFileInfo (SetFileSize) = %d", rc);
5650 } 5672 }
@@ -5688,7 +5710,8 @@ CIFSSMBSetFileInfo(const int xid, struct cifs_tcon *tcon,
5688 param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; 5710 param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4;
5689 offset = param_offset + params; 5711 offset = param_offset + params;
5690 5712
5691 data_offset = (char *) (&pSMB->hdr.Protocol) + offset; 5713 data_offset = (char *)pSMB +
5714 offsetof(struct smb_hdr, Protocol) + offset;
5692 5715
5693 count = sizeof(FILE_BASIC_INFO); 5716 count = sizeof(FILE_BASIC_INFO);
5694 pSMB->MaxParameterCount = cpu_to_le16(2); 5717 pSMB->MaxParameterCount = cpu_to_le16(2);
@@ -5713,7 +5736,7 @@ CIFSSMBSetFileInfo(const int xid, struct cifs_tcon *tcon,
5713 inc_rfc1001_len(pSMB, byte_count); 5736 inc_rfc1001_len(pSMB, byte_count);
5714 pSMB->ByteCount = cpu_to_le16(byte_count); 5737 pSMB->ByteCount = cpu_to_le16(byte_count);
5715 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO)); 5738 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
5716 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 5739 rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0);
5717 if (rc) 5740 if (rc)
5718 cFYI(1, "Send error in Set Time (SetFileInfo) = %d", rc); 5741 cFYI(1, "Send error in Set Time (SetFileInfo) = %d", rc);
5719 5742
@@ -5772,7 +5795,7 @@ CIFSSMBSetFileDisposition(const int xid, struct cifs_tcon *tcon,
5772 inc_rfc1001_len(pSMB, byte_count); 5795 inc_rfc1001_len(pSMB, byte_count);
5773 pSMB->ByteCount = cpu_to_le16(byte_count); 5796 pSMB->ByteCount = cpu_to_le16(byte_count);
5774 *data_offset = delete_file ? 1 : 0; 5797 *data_offset = delete_file ? 1 : 0;
5775 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 5798 rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0);
5776 if (rc) 5799 if (rc)
5777 cFYI(1, "Send error in SetFileDisposition = %d", rc); 5800 cFYI(1, "Send error in SetFileDisposition = %d", rc);
5778 5801
@@ -5957,7 +5980,7 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifs_tcon *tcon,
5957 u16 fid, u32 pid_of_opener) 5980 u16 fid, u32 pid_of_opener)
5958{ 5981{
5959 struct smb_com_transaction2_sfi_req *pSMB = NULL; 5982 struct smb_com_transaction2_sfi_req *pSMB = NULL;
5960 FILE_UNIX_BASIC_INFO *data_offset; 5983 char *data_offset;
5961 int rc = 0; 5984 int rc = 0;
5962 u16 params, param_offset, offset, byte_count, count; 5985 u16 params, param_offset, offset, byte_count, count;
5963 5986
@@ -5979,8 +6002,9 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifs_tcon *tcon,
5979 param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; 6002 param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4;
5980 offset = param_offset + params; 6003 offset = param_offset + params;
5981 6004
5982 data_offset = (FILE_UNIX_BASIC_INFO *) 6005 data_offset = (char *)pSMB +
5983 ((char *)(&pSMB->hdr.Protocol) + offset); 6006 offsetof(struct smb_hdr, Protocol) + offset;
6007
5984 count = sizeof(FILE_UNIX_BASIC_INFO); 6008 count = sizeof(FILE_UNIX_BASIC_INFO);
5985 6009
5986 pSMB->MaxParameterCount = cpu_to_le16(2); 6010 pSMB->MaxParameterCount = cpu_to_le16(2);
@@ -6002,9 +6026,9 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifs_tcon *tcon,
6002 inc_rfc1001_len(pSMB, byte_count); 6026 inc_rfc1001_len(pSMB, byte_count);
6003 pSMB->ByteCount = cpu_to_le16(byte_count); 6027 pSMB->ByteCount = cpu_to_le16(byte_count);
6004 6028
6005 cifs_fill_unix_set_info(data_offset, args); 6029 cifs_fill_unix_set_info((FILE_UNIX_BASIC_INFO *)data_offset, args);
6006 6030
6007 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 6031 rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0);
6008 if (rc) 6032 if (rc)
6009 cFYI(1, "Send error in Set Time (SetFileInfo) = %d", rc); 6033 cFYI(1, "Send error in Set Time (SetFileInfo) = %d", rc);
6010 6034
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 602f77c304c..e0b56d7a19c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -40,6 +40,8 @@
40#include <linux/module.h> 40#include <linux/module.h>
41#include <keys/user-type.h> 41#include <keys/user-type.h>
42#include <net/ipv6.h> 42#include <net/ipv6.h>
43#include <linux/parser.h>
44
43#include "cifspdu.h" 45#include "cifspdu.h"
44#include "cifsglob.h" 46#include "cifsglob.h"
45#include "cifsproto.h" 47#include "cifsproto.h"
@@ -63,6 +65,202 @@ extern mempool_t *cifs_req_poolp;
63#define TLINK_ERROR_EXPIRE (1 * HZ) 65#define TLINK_ERROR_EXPIRE (1 * HZ)
64#define TLINK_IDLE_EXPIRE (600 * HZ) 66#define TLINK_IDLE_EXPIRE (600 * HZ)
65 67
68enum {
69
70 /* Mount options that take no arguments */
71 Opt_user_xattr, Opt_nouser_xattr,
72 Opt_forceuid, Opt_noforceuid,
73 Opt_noblocksend, Opt_noautotune,
74 Opt_hard, Opt_soft, Opt_perm, Opt_noperm,
75 Opt_mapchars, Opt_nomapchars, Opt_sfu,
76 Opt_nosfu, Opt_nodfs, Opt_posixpaths,
77 Opt_noposixpaths, Opt_nounix,
78 Opt_nocase,
79 Opt_brl, Opt_nobrl,
80 Opt_forcemandatorylock, Opt_setuids,
81 Opt_nosetuids, Opt_dynperm, Opt_nodynperm,
82 Opt_nohard, Opt_nosoft,
83 Opt_nointr, Opt_intr,
84 Opt_nostrictsync, Opt_strictsync,
85 Opt_serverino, Opt_noserverino,
86 Opt_rwpidforward, Opt_cifsacl, Opt_nocifsacl,
87 Opt_acl, Opt_noacl, Opt_locallease,
88 Opt_sign, Opt_seal, Opt_direct,
89 Opt_strictcache, Opt_noac,
90 Opt_fsc, Opt_mfsymlinks,
91 Opt_multiuser, Opt_sloppy,
92
93 /* Mount options which take numeric value */
94 Opt_backupuid, Opt_backupgid, Opt_uid,
95 Opt_cruid, Opt_gid, Opt_file_mode,
96 Opt_dirmode, Opt_port,
97 Opt_rsize, Opt_wsize, Opt_actimeo,
98
99 /* Mount options which take string value */
100 Opt_user, Opt_pass, Opt_ip,
101 Opt_unc, Opt_domain,
102 Opt_srcaddr, Opt_prefixpath,
103 Opt_iocharset, Opt_sockopt,
104 Opt_netbiosname, Opt_servern,
105 Opt_ver, Opt_sec,
106
107 /* Mount options to be ignored */
108 Opt_ignore,
109
110 /* Options which could be blank */
111 Opt_blank_pass,
112 Opt_blank_user,
113 Opt_blank_ip,
114
115 Opt_err
116};
117
118static const match_table_t cifs_mount_option_tokens = {
119
120 { Opt_user_xattr, "user_xattr" },
121 { Opt_nouser_xattr, "nouser_xattr" },
122 { Opt_forceuid, "forceuid" },
123 { Opt_noforceuid, "noforceuid" },
124 { Opt_noblocksend, "noblocksend" },
125 { Opt_noautotune, "noautotune" },
126 { Opt_hard, "hard" },
127 { Opt_soft, "soft" },
128 { Opt_perm, "perm" },
129 { Opt_noperm, "noperm" },
130 { Opt_mapchars, "mapchars" },
131 { Opt_nomapchars, "nomapchars" },
132 { Opt_sfu, "sfu" },
133 { Opt_nosfu, "nosfu" },
134 { Opt_nodfs, "nodfs" },
135 { Opt_posixpaths, "posixpaths" },
136 { Opt_noposixpaths, "noposixpaths" },
137 { Opt_nounix, "nounix" },
138 { Opt_nounix, "nolinux" },
139 { Opt_nocase, "nocase" },
140 { Opt_nocase, "ignorecase" },
141 { Opt_brl, "brl" },
142 { Opt_nobrl, "nobrl" },
143 { Opt_nobrl, "nolock" },
144 { Opt_forcemandatorylock, "forcemandatorylock" },
145 { Opt_forcemandatorylock, "forcemand" },
146 { Opt_setuids, "setuids" },
147 { Opt_nosetuids, "nosetuids" },
148 { Opt_dynperm, "dynperm" },
149 { Opt_nodynperm, "nodynperm" },
150 { Opt_nohard, "nohard" },
151 { Opt_nosoft, "nosoft" },
152 { Opt_nointr, "nointr" },
153 { Opt_intr, "intr" },
154 { Opt_nostrictsync, "nostrictsync" },
155 { Opt_strictsync, "strictsync" },
156 { Opt_serverino, "serverino" },
157 { Opt_noserverino, "noserverino" },
158 { Opt_rwpidforward, "rwpidforward" },
159 { Opt_cifsacl, "cifsacl" },
160 { Opt_nocifsacl, "nocifsacl" },
161 { Opt_acl, "acl" },
162 { Opt_noacl, "noacl" },
163 { Opt_locallease, "locallease" },
164 { Opt_sign, "sign" },
165 { Opt_seal, "seal" },
166 { Opt_direct, "direct" },
167 { Opt_direct, "directio" },
168 { Opt_direct, "forcedirectio" },
169 { Opt_strictcache, "strictcache" },
170 { Opt_noac, "noac" },
171 { Opt_fsc, "fsc" },
172 { Opt_mfsymlinks, "mfsymlinks" },
173 { Opt_multiuser, "multiuser" },
174 { Opt_sloppy, "sloppy" },
175
176 { Opt_backupuid, "backupuid=%s" },
177 { Opt_backupgid, "backupgid=%s" },
178 { Opt_uid, "uid=%s" },
179 { Opt_cruid, "cruid=%s" },
180 { Opt_gid, "gid=%s" },
181 { Opt_file_mode, "file_mode=%s" },
182 { Opt_dirmode, "dirmode=%s" },
183 { Opt_dirmode, "dir_mode=%s" },
184 { Opt_port, "port=%s" },
185 { Opt_rsize, "rsize=%s" },
186 { Opt_wsize, "wsize=%s" },
187 { Opt_actimeo, "actimeo=%s" },
188
189 { Opt_blank_user, "user=" },
190 { Opt_blank_user, "username=" },
191 { Opt_user, "user=%s" },
192 { Opt_user, "username=%s" },
193 { Opt_blank_pass, "pass=" },
194 { Opt_pass, "pass=%s" },
195 { Opt_pass, "password=%s" },
196 { Opt_blank_ip, "ip=" },
197 { Opt_blank_ip, "addr=" },
198 { Opt_ip, "ip=%s" },
199 { Opt_ip, "addr=%s" },
200 { Opt_unc, "unc=%s" },
201 { Opt_unc, "target=%s" },
202 { Opt_unc, "path=%s" },
203 { Opt_domain, "dom=%s" },
204 { Opt_domain, "domain=%s" },
205 { Opt_domain, "workgroup=%s" },
206 { Opt_srcaddr, "srcaddr=%s" },
207 { Opt_prefixpath, "prefixpath=%s" },
208 { Opt_iocharset, "iocharset=%s" },
209 { Opt_sockopt, "sockopt=%s" },
210 { Opt_netbiosname, "netbiosname=%s" },
211 { Opt_servern, "servern=%s" },
212 { Opt_ver, "ver=%s" },
213 { Opt_ver, "vers=%s" },
214 { Opt_ver, "version=%s" },
215 { Opt_sec, "sec=%s" },
216
217 { Opt_ignore, "cred" },
218 { Opt_ignore, "credentials" },
219 { Opt_ignore, "cred=%s" },
220 { Opt_ignore, "credentials=%s" },
221 { Opt_ignore, "guest" },
222 { Opt_ignore, "rw" },
223 { Opt_ignore, "ro" },
224 { Opt_ignore, "suid" },
225 { Opt_ignore, "nosuid" },
226 { Opt_ignore, "exec" },
227 { Opt_ignore, "noexec" },
228 { Opt_ignore, "nodev" },
229 { Opt_ignore, "noauto" },
230 { Opt_ignore, "dev" },
231 { Opt_ignore, "mand" },
232 { Opt_ignore, "nomand" },
233 { Opt_ignore, "_netdev" },
234
235 { Opt_err, NULL }
236};
237
238enum {
239 Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p,
240 Opt_sec_ntlmsspi, Opt_sec_ntlmssp,
241 Opt_ntlm, Opt_sec_ntlmi, Opt_sec_ntlmv2i,
242 Opt_sec_nontlm, Opt_sec_lanman,
243 Opt_sec_none,
244
245 Opt_sec_err
246};
247
248static const match_table_t cifs_secflavor_tokens = {
249 { Opt_sec_krb5, "krb5" },
250 { Opt_sec_krb5i, "krb5i" },
251 { Opt_sec_krb5p, "krb5p" },
252 { Opt_sec_ntlmsspi, "ntlmsspi" },
253 { Opt_sec_ntlmssp, "ntlmssp" },
254 { Opt_ntlm, "ntlm" },
255 { Opt_sec_ntlmi, "ntlmi" },
256 { Opt_sec_ntlmv2i, "ntlmv2i" },
257 { Opt_sec_nontlm, "nontlm" },
258 { Opt_sec_lanman, "lanman" },
259 { Opt_sec_none, "none" },
260
261 { Opt_sec_err, NULL }
262};
263
66static int ip_connect(struct TCP_Server_Info *server); 264static int ip_connect(struct TCP_Server_Info *server);
67static int generic_ip_connect(struct TCP_Server_Info *server); 265static int generic_ip_connect(struct TCP_Server_Info *server);
68static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink); 266static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink);
@@ -143,8 +341,8 @@ cifs_reconnect(struct TCP_Server_Info *server)
143 spin_lock(&GlobalMid_Lock); 341 spin_lock(&GlobalMid_Lock);
144 list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { 342 list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
145 mid_entry = list_entry(tmp, struct mid_q_entry, qhead); 343 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
146 if (mid_entry->midState == MID_REQUEST_SUBMITTED) 344 if (mid_entry->mid_state == MID_REQUEST_SUBMITTED)
147 mid_entry->midState = MID_RETRY_NEEDED; 345 mid_entry->mid_state = MID_RETRY_NEEDED;
148 list_move(&mid_entry->qhead, &retry_list); 346 list_move(&mid_entry->qhead, &retry_list);
149 } 347 }
150 spin_unlock(&GlobalMid_Lock); 348 spin_unlock(&GlobalMid_Lock);
@@ -183,8 +381,9 @@ cifs_reconnect(struct TCP_Server_Info *server)
183 -EINVAL = invalid transact2 381 -EINVAL = invalid transact2
184 382
185 */ 383 */
186static int check2ndT2(struct smb_hdr *pSMB) 384static int check2ndT2(char *buf)
187{ 385{
386 struct smb_hdr *pSMB = (struct smb_hdr *)buf;
188 struct smb_t2_rsp *pSMBt; 387 struct smb_t2_rsp *pSMBt;
189 int remaining; 388 int remaining;
190 __u16 total_data_size, data_in_this_rsp; 389 __u16 total_data_size, data_in_this_rsp;
@@ -224,10 +423,10 @@ static int check2ndT2(struct smb_hdr *pSMB)
224 return remaining; 423 return remaining;
225} 424}
226 425
227static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB) 426static int coalesce_t2(char *second_buf, struct smb_hdr *target_hdr)
228{ 427{
229 struct smb_t2_rsp *pSMBs = (struct smb_t2_rsp *)psecond; 428 struct smb_t2_rsp *pSMBs = (struct smb_t2_rsp *)second_buf;
230 struct smb_t2_rsp *pSMBt = (struct smb_t2_rsp *)pTargetSMB; 429 struct smb_t2_rsp *pSMBt = (struct smb_t2_rsp *)target_hdr;
231 char *data_area_of_tgt; 430 char *data_area_of_tgt;
232 char *data_area_of_src; 431 char *data_area_of_src;
233 int remaining; 432 int remaining;
@@ -280,23 +479,23 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
280 put_unaligned_le16(total_in_tgt, &pSMBt->t2_rsp.DataCount); 479 put_unaligned_le16(total_in_tgt, &pSMBt->t2_rsp.DataCount);
281 480
282 /* fix up the BCC */ 481 /* fix up the BCC */
283 byte_count = get_bcc(pTargetSMB); 482 byte_count = get_bcc(target_hdr);
284 byte_count += total_in_src; 483 byte_count += total_in_src;
285 /* is the result too big for the field? */ 484 /* is the result too big for the field? */
286 if (byte_count > USHRT_MAX) { 485 if (byte_count > USHRT_MAX) {
287 cFYI(1, "coalesced BCC too large (%u)", byte_count); 486 cFYI(1, "coalesced BCC too large (%u)", byte_count);
288 return -EPROTO; 487 return -EPROTO;
289 } 488 }
290 put_bcc(byte_count, pTargetSMB); 489 put_bcc(byte_count, target_hdr);
291 490
292 byte_count = be32_to_cpu(pTargetSMB->smb_buf_length); 491 byte_count = be32_to_cpu(target_hdr->smb_buf_length);
293 byte_count += total_in_src; 492 byte_count += total_in_src;
294 /* don't allow buffer to overflow */ 493 /* don't allow buffer to overflow */
295 if (byte_count > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 494 if (byte_count > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
296 cFYI(1, "coalesced BCC exceeds buffer size (%u)", byte_count); 495 cFYI(1, "coalesced BCC exceeds buffer size (%u)", byte_count);
297 return -ENOBUFS; 496 return -ENOBUFS;
298 } 497 }
299 pTargetSMB->smb_buf_length = cpu_to_be32(byte_count); 498 target_hdr->smb_buf_length = cpu_to_be32(byte_count);
300 499
301 /* copy second buffer into end of first buffer */ 500 /* copy second buffer into end of first buffer */
302 memcpy(data_area_of_tgt, data_area_of_src, total_in_src); 501 memcpy(data_area_of_tgt, data_area_of_src, total_in_src);
@@ -334,7 +533,7 @@ cifs_echo_request(struct work_struct *work)
334 server->hostname); 533 server->hostname);
335 534
336requeue_echo: 535requeue_echo:
337 queue_delayed_work(system_nrt_wq, &server->echo, SMB_ECHO_INTERVAL); 536 queue_delayed_work(cifsiod_wq, &server->echo, SMB_ECHO_INTERVAL);
338} 537}
339 538
340static bool 539static bool
@@ -350,7 +549,7 @@ allocate_buffers(struct TCP_Server_Info *server)
350 } 549 }
351 } else if (server->large_buf) { 550 } else if (server->large_buf) {
352 /* we are reusing a dirty large buf, clear its start */ 551 /* we are reusing a dirty large buf, clear its start */
353 memset(server->bigbuf, 0, sizeof(struct smb_hdr)); 552 memset(server->bigbuf, 0, header_size());
354 } 553 }
355 554
356 if (!server->smallbuf) { 555 if (!server->smallbuf) {
@@ -364,7 +563,7 @@ allocate_buffers(struct TCP_Server_Info *server)
364 /* beginning of smb buffer is cleared in our buf_get */ 563 /* beginning of smb buffer is cleared in our buf_get */
365 } else { 564 } else {
366 /* if existing small buf clear beginning */ 565 /* if existing small buf clear beginning */
367 memset(server->smallbuf, 0, sizeof(struct smb_hdr)); 566 memset(server->smallbuf, 0, header_size());
368 } 567 }
369 568
370 return true; 569 return true;
@@ -373,12 +572,22 @@ allocate_buffers(struct TCP_Server_Info *server)
373static bool 572static bool
374server_unresponsive(struct TCP_Server_Info *server) 573server_unresponsive(struct TCP_Server_Info *server)
375{ 574{
376 if (echo_retries > 0 && server->tcpStatus == CifsGood && 575 /*
377 time_after(jiffies, server->lstrp + 576 * We need to wait 2 echo intervals to make sure we handle such
378 (echo_retries * SMB_ECHO_INTERVAL))) { 577 * situations right:
578 * 1s client sends a normal SMB request
579 * 2s client gets a response
580 * 30s echo workqueue job pops, and decides we got a response recently
581 * and don't need to send another
582 * ...
583 * 65s kernel_recvmsg times out, and we see that we haven't gotten
584 * a response in >60s.
585 */
586 if (server->tcpStatus == CifsGood &&
587 time_after(jiffies, server->lstrp + 2 * SMB_ECHO_INTERVAL)) {
379 cERROR(1, "Server %s has not responded in %d seconds. " 588 cERROR(1, "Server %s has not responded in %d seconds. "
380 "Reconnecting...", server->hostname, 589 "Reconnecting...", server->hostname,
381 (echo_retries * SMB_ECHO_INTERVAL / HZ)); 590 (2 * SMB_ECHO_INTERVAL) / HZ);
382 cifs_reconnect(server); 591 cifs_reconnect(server);
383 wake_up(&server->response_q); 592 wake_up(&server->response_q);
384 return true; 593 return true;
@@ -556,15 +765,16 @@ is_smb_response(struct TCP_Server_Info *server, unsigned char type)
556} 765}
557 766
558static struct mid_q_entry * 767static struct mid_q_entry *
559find_mid(struct TCP_Server_Info *server, struct smb_hdr *buf) 768find_mid(struct TCP_Server_Info *server, char *buffer)
560{ 769{
770 struct smb_hdr *buf = (struct smb_hdr *)buffer;
561 struct mid_q_entry *mid; 771 struct mid_q_entry *mid;
562 772
563 spin_lock(&GlobalMid_Lock); 773 spin_lock(&GlobalMid_Lock);
564 list_for_each_entry(mid, &server->pending_mid_q, qhead) { 774 list_for_each_entry(mid, &server->pending_mid_q, qhead) {
565 if (mid->mid == buf->Mid && 775 if (mid->mid == buf->Mid &&
566 mid->midState == MID_REQUEST_SUBMITTED && 776 mid->mid_state == MID_REQUEST_SUBMITTED &&
567 mid->command == buf->Command) { 777 le16_to_cpu(mid->command) == buf->Command) {
568 spin_unlock(&GlobalMid_Lock); 778 spin_unlock(&GlobalMid_Lock);
569 return mid; 779 return mid;
570 } 780 }
@@ -581,16 +791,16 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed)
581#endif 791#endif
582 spin_lock(&GlobalMid_Lock); 792 spin_lock(&GlobalMid_Lock);
583 if (!malformed) 793 if (!malformed)
584 mid->midState = MID_RESPONSE_RECEIVED; 794 mid->mid_state = MID_RESPONSE_RECEIVED;
585 else 795 else
586 mid->midState = MID_RESPONSE_MALFORMED; 796 mid->mid_state = MID_RESPONSE_MALFORMED;
587 list_del_init(&mid->qhead); 797 list_del_init(&mid->qhead);
588 spin_unlock(&GlobalMid_Lock); 798 spin_unlock(&GlobalMid_Lock);
589} 799}
590 800
591static void 801static void
592handle_mid(struct mid_q_entry *mid, struct TCP_Server_Info *server, 802handle_mid(struct mid_q_entry *mid, struct TCP_Server_Info *server,
593 struct smb_hdr *buf, int malformed) 803 char *buf, int malformed)
594{ 804{
595 if (malformed == 0 && check2ndT2(buf) > 0) { 805 if (malformed == 0 && check2ndT2(buf) > 0) {
596 mid->multiRsp = true; 806 mid->multiRsp = true;
@@ -610,13 +820,13 @@ handle_mid(struct mid_q_entry *mid, struct TCP_Server_Info *server,
610 } else { 820 } else {
611 /* Have first buffer */ 821 /* Have first buffer */
612 mid->resp_buf = buf; 822 mid->resp_buf = buf;
613 mid->largeBuf = true; 823 mid->large_buf = true;
614 server->bigbuf = NULL; 824 server->bigbuf = NULL;
615 } 825 }
616 return; 826 return;
617 } 827 }
618 mid->resp_buf = buf; 828 mid->resp_buf = buf;
619 mid->largeBuf = server->large_buf; 829 mid->large_buf = server->large_buf;
620 /* Was previous buf put in mpx struct for multi-rsp? */ 830 /* Was previous buf put in mpx struct for multi-rsp? */
621 if (!mid->multiRsp) { 831 if (!mid->multiRsp) {
622 /* smb buffer will be freed by user thread */ 832 /* smb buffer will be freed by user thread */
@@ -642,19 +852,11 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
642 spin_unlock(&GlobalMid_Lock); 852 spin_unlock(&GlobalMid_Lock);
643 wake_up_all(&server->response_q); 853 wake_up_all(&server->response_q);
644 854
645 /* 855 /* check if we have blocked requests that need to free */
646 * Check if we have blocked requests that need to free. Note that 856 spin_lock(&server->req_lock);
647 * cifs_max_pending is normally 50, but can be set at module install 857 if (server->credits <= 0)
648 * time to as little as two. 858 server->credits = 1;
649 */ 859 spin_unlock(&server->req_lock);
650 spin_lock(&GlobalMid_Lock);
651 if (atomic_read(&server->inFlight) >= cifs_max_pending)
652 atomic_set(&server->inFlight, cifs_max_pending - 1);
653 /*
654 * We do not want to set the max_pending too low or we could end up
655 * with the counter going negative.
656 */
657 spin_unlock(&GlobalMid_Lock);
658 /* 860 /*
659 * Although there should not be any requests blocked on this queue it 861 * Although there should not be any requests blocked on this queue it
660 * can not hurt to be paranoid and try to wake up requests that may 862 * can not hurt to be paranoid and try to wake up requests that may
@@ -680,8 +882,8 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
680 spin_lock(&GlobalMid_Lock); 882 spin_lock(&GlobalMid_Lock);
681 list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { 883 list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
682 mid_entry = list_entry(tmp, struct mid_q_entry, qhead); 884 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
683 cFYI(1, "Clearing mid 0x%x", mid_entry->mid); 885 cFYI(1, "Clearing mid 0x%llx", mid_entry->mid);
684 mid_entry->midState = MID_SHUTDOWN; 886 mid_entry->mid_state = MID_SHUTDOWN;
685 list_move(&mid_entry->qhead, &dispose_list); 887 list_move(&mid_entry->qhead, &dispose_list);
686 } 888 }
687 spin_unlock(&GlobalMid_Lock); 889 spin_unlock(&GlobalMid_Lock);
@@ -689,7 +891,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
689 /* now walk dispose list and issue callbacks */ 891 /* now walk dispose list and issue callbacks */
690 list_for_each_safe(tmp, tmp2, &dispose_list) { 892 list_for_each_safe(tmp, tmp2, &dispose_list) {
691 mid_entry = list_entry(tmp, struct mid_q_entry, qhead); 893 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
692 cFYI(1, "Callback mid 0x%x", mid_entry->mid); 894 cFYI(1, "Callback mid 0x%llx", mid_entry->mid);
693 list_del_init(&mid_entry->qhead); 895 list_del_init(&mid_entry->qhead);
694 mid_entry->callback(mid_entry); 896 mid_entry->callback(mid_entry);
695 } 897 }
@@ -729,11 +931,10 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
729{ 931{
730 int length; 932 int length;
731 char *buf = server->smallbuf; 933 char *buf = server->smallbuf;
732 struct smb_hdr *smb_buffer = (struct smb_hdr *)buf; 934 unsigned int pdu_length = get_rfc1002_length(buf);
733 unsigned int pdu_length = be32_to_cpu(smb_buffer->smb_buf_length);
734 935
735 /* make sure this will fit in a large buffer */ 936 /* make sure this will fit in a large buffer */
736 if (pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 937 if (pdu_length > CIFSMaxBufSize + max_header_size() - 4) {
737 cERROR(1, "SMB response too long (%u bytes)", 938 cERROR(1, "SMB response too long (%u bytes)",
738 pdu_length); 939 pdu_length);
739 cifs_reconnect(server); 940 cifs_reconnect(server);
@@ -744,20 +945,18 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
744 /* switch to large buffer if too big for a small one */ 945 /* switch to large buffer if too big for a small one */
745 if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE - 4) { 946 if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE - 4) {
746 server->large_buf = true; 947 server->large_buf = true;
747 memcpy(server->bigbuf, server->smallbuf, server->total_read); 948 memcpy(server->bigbuf, buf, server->total_read);
748 buf = server->bigbuf; 949 buf = server->bigbuf;
749 smb_buffer = (struct smb_hdr *)buf;
750 } 950 }
751 951
752 /* now read the rest */ 952 /* now read the rest */
753 length = cifs_read_from_socket(server, 953 length = cifs_read_from_socket(server, buf + header_size() - 1,
754 buf + sizeof(struct smb_hdr) - 1, 954 pdu_length - header_size() + 1 + 4);
755 pdu_length - sizeof(struct smb_hdr) + 1 + 4);
756 if (length < 0) 955 if (length < 0)
757 return length; 956 return length;
758 server->total_read += length; 957 server->total_read += length;
759 958
760 dump_smb(smb_buffer, server->total_read); 959 dump_smb(buf, server->total_read);
761 960
762 /* 961 /*
763 * We know that we received enough to get to the MID as we 962 * We know that we received enough to get to the MID as we
@@ -768,7 +967,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
768 * 48 bytes is enough to display the header and a little bit 967 * 48 bytes is enough to display the header and a little bit
769 * into the payload for debugging purposes. 968 * into the payload for debugging purposes.
770 */ 969 */
771 length = checkSMB(smb_buffer, smb_buffer->Mid, server->total_read); 970 length = checkSMB(buf, server->total_read);
772 if (length != 0) 971 if (length != 0)
773 cifs_dump_mem("Bad SMB: ", buf, 972 cifs_dump_mem("Bad SMB: ", buf,
774 min_t(unsigned int, server->total_read, 48)); 973 min_t(unsigned int, server->total_read, 48));
@@ -776,7 +975,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
776 if (!mid) 975 if (!mid)
777 return length; 976 return length;
778 977
779 handle_mid(mid, server, smb_buffer, length); 978 handle_mid(mid, server, buf, length);
780 return 0; 979 return 0;
781} 980}
782 981
@@ -787,7 +986,6 @@ cifs_demultiplex_thread(void *p)
787 struct TCP_Server_Info *server = p; 986 struct TCP_Server_Info *server = p;
788 unsigned int pdu_length; 987 unsigned int pdu_length;
789 char *buf = NULL; 988 char *buf = NULL;
790 struct smb_hdr *smb_buffer = NULL;
791 struct task_struct *task_to_wake = NULL; 989 struct task_struct *task_to_wake = NULL;
792 struct mid_q_entry *mid_entry; 990 struct mid_q_entry *mid_entry;
793 991
@@ -808,7 +1006,6 @@ cifs_demultiplex_thread(void *p)
808 continue; 1006 continue;
809 1007
810 server->large_buf = false; 1008 server->large_buf = false;
811 smb_buffer = (struct smb_hdr *)server->smallbuf;
812 buf = server->smallbuf; 1009 buf = server->smallbuf;
813 pdu_length = 4; /* enough to get RFC1001 header */ 1010 pdu_length = 4; /* enough to get RFC1001 header */
814 1011
@@ -821,14 +1018,14 @@ cifs_demultiplex_thread(void *p)
821 * The right amount was read from socket - 4 bytes, 1018 * The right amount was read from socket - 4 bytes,
822 * so we can now interpret the length field. 1019 * so we can now interpret the length field.
823 */ 1020 */
824 pdu_length = be32_to_cpu(smb_buffer->smb_buf_length); 1021 pdu_length = get_rfc1002_length(buf);
825 1022
826 cFYI(1, "RFC1002 header 0x%x", pdu_length); 1023 cFYI(1, "RFC1002 header 0x%x", pdu_length);
827 if (!is_smb_response(server, buf[0])) 1024 if (!is_smb_response(server, buf[0]))
828 continue; 1025 continue;
829 1026
830 /* make sure we have enough to get to the MID */ 1027 /* make sure we have enough to get to the MID */
831 if (pdu_length < sizeof(struct smb_hdr) - 1 - 4) { 1028 if (pdu_length < header_size() - 1 - 4) {
832 cERROR(1, "SMB response too short (%u bytes)", 1029 cERROR(1, "SMB response too short (%u bytes)",
833 pdu_length); 1030 pdu_length);
834 cifs_reconnect(server); 1031 cifs_reconnect(server);
@@ -838,12 +1035,12 @@ cifs_demultiplex_thread(void *p)
838 1035
839 /* read down to the MID */ 1036 /* read down to the MID */
840 length = cifs_read_from_socket(server, buf + 4, 1037 length = cifs_read_from_socket(server, buf + 4,
841 sizeof(struct smb_hdr) - 1 - 4); 1038 header_size() - 1 - 4);
842 if (length < 0) 1039 if (length < 0)
843 continue; 1040 continue;
844 server->total_read += length; 1041 server->total_read += length;
845 1042
846 mid_entry = find_mid(server, smb_buffer); 1043 mid_entry = find_mid(server, buf);
847 1044
848 if (!mid_entry || !mid_entry->receive) 1045 if (!mid_entry || !mid_entry->receive)
849 length = standard_receive3(server, mid_entry); 1046 length = standard_receive3(server, mid_entry);
@@ -853,22 +1050,19 @@ cifs_demultiplex_thread(void *p)
853 if (length < 0) 1050 if (length < 0)
854 continue; 1051 continue;
855 1052
856 if (server->large_buf) { 1053 if (server->large_buf)
857 buf = server->bigbuf; 1054 buf = server->bigbuf;
858 smb_buffer = (struct smb_hdr *)buf;
859 }
860 1055
861 server->lstrp = jiffies; 1056 server->lstrp = jiffies;
862 if (mid_entry != NULL) { 1057 if (mid_entry != NULL) {
863 if (!mid_entry->multiRsp || mid_entry->multiEnd) 1058 if (!mid_entry->multiRsp || mid_entry->multiEnd)
864 mid_entry->callback(mid_entry); 1059 mid_entry->callback(mid_entry);
865 } else if (!is_valid_oplock_break(smb_buffer, server)) { 1060 } else if (!is_valid_oplock_break(buf, server)) {
866 cERROR(1, "No task to wake, unknown frame received! " 1061 cERROR(1, "No task to wake, unknown frame received! "
867 "NumMids %d", atomic_read(&midCount)); 1062 "NumMids %d", atomic_read(&midCount));
868 cifs_dump_mem("Received Data is: ", buf, 1063 cifs_dump_mem("Received Data is: ", buf, header_size());
869 sizeof(struct smb_hdr));
870#ifdef CONFIG_CIFS_DEBUG2 1064#ifdef CONFIG_CIFS_DEBUG2
871 cifs_dump_detail(smb_buffer); 1065 cifs_dump_detail(buf);
872 cifs_dump_mids(server); 1066 cifs_dump_mids(server);
873#endif /* CIFS_DEBUG2 */ 1067#endif /* CIFS_DEBUG2 */
874 1068
@@ -924,23 +1118,95 @@ extract_hostname(const char *unc)
924 return dst; 1118 return dst;
925} 1119}
926 1120
1121static int get_option_ul(substring_t args[], unsigned long *option)
1122{
1123 int rc;
1124 char *string;
1125
1126 string = match_strdup(args);
1127 if (string == NULL)
1128 return -ENOMEM;
1129 rc = kstrtoul(string, 0, option);
1130 kfree(string);
1131
1132 return rc;
1133}
1134
1135
1136static int cifs_parse_security_flavors(char *value,
1137 struct smb_vol *vol)
1138{
1139
1140 substring_t args[MAX_OPT_ARGS];
1141
1142 switch (match_token(value, cifs_secflavor_tokens, args)) {
1143 case Opt_sec_krb5:
1144 vol->secFlg |= CIFSSEC_MAY_KRB5;
1145 break;
1146 case Opt_sec_krb5i:
1147 vol->secFlg |= CIFSSEC_MAY_KRB5 | CIFSSEC_MUST_SIGN;
1148 break;
1149 case Opt_sec_krb5p:
1150 /* vol->secFlg |= CIFSSEC_MUST_SEAL | CIFSSEC_MAY_KRB5; */
1151 cERROR(1, "Krb5 cifs privacy not supported");
1152 break;
1153 case Opt_sec_ntlmssp:
1154 vol->secFlg |= CIFSSEC_MAY_NTLMSSP;
1155 break;
1156 case Opt_sec_ntlmsspi:
1157 vol->secFlg |= CIFSSEC_MAY_NTLMSSP | CIFSSEC_MUST_SIGN;
1158 break;
1159 case Opt_ntlm:
1160 /* ntlm is default so can be turned off too */
1161 vol->secFlg |= CIFSSEC_MAY_NTLM;
1162 break;
1163 case Opt_sec_ntlmi:
1164 vol->secFlg |= CIFSSEC_MAY_NTLM | CIFSSEC_MUST_SIGN;
1165 break;
1166 case Opt_sec_nontlm:
1167 vol->secFlg |= CIFSSEC_MAY_NTLMV2;
1168 break;
1169 case Opt_sec_ntlmv2i:
1170 vol->secFlg |= CIFSSEC_MAY_NTLMV2 | CIFSSEC_MUST_SIGN;
1171 break;
1172#ifdef CONFIG_CIFS_WEAK_PW_HASH
1173 case Opt_sec_lanman:
1174 vol->secFlg |= CIFSSEC_MAY_LANMAN;
1175 break;
1176#endif
1177 case Opt_sec_none:
1178 vol->nullauth = 1;
1179 break;
1180 default:
1181 cERROR(1, "bad security option: %s", value);
1182 return 1;
1183 }
1184
1185 return 0;
1186}
1187
927static int 1188static int
928cifs_parse_mount_options(const char *mountdata, const char *devname, 1189cifs_parse_mount_options(const char *mountdata, const char *devname,
929 struct smb_vol *vol) 1190 struct smb_vol *vol)
930{ 1191{
931 char *value, *data, *end; 1192 char *data, *end;
932 char *mountdata_copy = NULL, *options; 1193 char *mountdata_copy = NULL, *options;
933 int err;
934 unsigned int temp_len, i, j; 1194 unsigned int temp_len, i, j;
935 char separator[2]; 1195 char separator[2];
936 short int override_uid = -1; 1196 short int override_uid = -1;
937 short int override_gid = -1; 1197 short int override_gid = -1;
938 bool uid_specified = false; 1198 bool uid_specified = false;
939 bool gid_specified = false; 1199 bool gid_specified = false;
1200 bool sloppy = false;
1201 char *invalid = NULL;
940 char *nodename = utsname()->nodename; 1202 char *nodename = utsname()->nodename;
1203 char *string = NULL;
1204 char *tmp_end, *value;
1205 char delim;
941 1206
942 separator[0] = ','; 1207 separator[0] = ',';
943 separator[1] = 0; 1208 separator[1] = 0;
1209 delim = separator[0];
944 1210
945 /* 1211 /*
946 * does not have to be perfect mapping since field is 1212 * does not have to be perfect mapping since field is
@@ -979,6 +1245,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
979 1245
980 options = mountdata_copy; 1246 options = mountdata_copy;
981 end = options + strlen(options); 1247 end = options + strlen(options);
1248
982 if (strncmp(options, "sep=", 4) == 0) { 1249 if (strncmp(options, "sep=", 4) == 0) {
983 if (options[4] != 0) { 1250 if (options[4] != 0) {
984 separator[0] = options[4]; 1251 separator[0] = options[4];
@@ -991,609 +1258,603 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
991 vol->backupgid_specified = false; /* no backup intent for a group */ 1258 vol->backupgid_specified = false; /* no backup intent for a group */
992 1259
993 while ((data = strsep(&options, separator)) != NULL) { 1260 while ((data = strsep(&options, separator)) != NULL) {
1261 substring_t args[MAX_OPT_ARGS];
1262 unsigned long option;
1263 int token;
1264
994 if (!*data) 1265 if (!*data)
995 continue; 1266 continue;
996 if ((value = strchr(data, '=')) != NULL)
997 *value++ = '\0';
998 1267
999 /* Have to parse this before we parse for "user" */ 1268 token = match_token(data, cifs_mount_option_tokens, args);
1000 if (strnicmp(data, "user_xattr", 10) == 0) { 1269
1270 switch (token) {
1271
1272 /* Ingnore the following */
1273 case Opt_ignore:
1274 break;
1275
1276 /* Boolean values */
1277 case Opt_user_xattr:
1001 vol->no_xattr = 0; 1278 vol->no_xattr = 0;
1002 } else if (strnicmp(data, "nouser_xattr", 12) == 0) { 1279 break;
1280 case Opt_nouser_xattr:
1003 vol->no_xattr = 1; 1281 vol->no_xattr = 1;
1004 } else if (strnicmp(data, "user", 4) == 0) { 1282 break;
1005 if (!value) { 1283 case Opt_forceuid:
1006 printk(KERN_WARNING
1007 "CIFS: invalid or missing username\n");
1008 goto cifs_parse_mount_err;
1009 } else if (!*value) {
1010 /* null user, ie anonymous, authentication */
1011 vol->nullauth = 1;
1012 }
1013 if (strnlen(value, MAX_USERNAME_SIZE) <
1014 MAX_USERNAME_SIZE) {
1015 vol->username = kstrdup(value, GFP_KERNEL);
1016 if (!vol->username) {
1017 printk(KERN_WARNING "CIFS: no memory "
1018 "for username\n");
1019 goto cifs_parse_mount_err;
1020 }
1021 } else {
1022 printk(KERN_WARNING "CIFS: username too long\n");
1023 goto cifs_parse_mount_err;
1024 }
1025 } else if (strnicmp(data, "pass", 4) == 0) {
1026 if (!value) {
1027 vol->password = NULL;
1028 continue;
1029 } else if (value[0] == 0) {
1030 /* check if string begins with double comma
1031 since that would mean the password really
1032 does start with a comma, and would not
1033 indicate an empty string */
1034 if (value[1] != separator[0]) {
1035 vol->password = NULL;
1036 continue;
1037 }
1038 }
1039 temp_len = strlen(value);
1040 /* removed password length check, NTLM passwords
1041 can be arbitrarily long */
1042
1043 /* if comma in password, the string will be
1044 prematurely null terminated. Commas in password are
1045 specified across the cifs mount interface by a double
1046 comma ie ,, and a comma used as in other cases ie ','
1047 as a parameter delimiter/separator is single and due
1048 to the strsep above is temporarily zeroed. */
1049
1050 /* NB: password legally can have multiple commas and
1051 the only illegal character in a password is null */
1052
1053 if ((value[temp_len] == 0) &&
1054 (value + temp_len < end) &&
1055 (value[temp_len+1] == separator[0])) {
1056 /* reinsert comma */
1057 value[temp_len] = separator[0];
1058 temp_len += 2; /* move after second comma */
1059 while (value[temp_len] != 0) {
1060 if (value[temp_len] == separator[0]) {
1061 if (value[temp_len+1] ==
1062 separator[0]) {
1063 /* skip second comma */
1064 temp_len++;
1065 } else {
1066 /* single comma indicating start
1067 of next parm */
1068 break;
1069 }
1070 }
1071 temp_len++;
1072 }
1073 if (value[temp_len] == 0) {
1074 options = NULL;
1075 } else {
1076 value[temp_len] = 0;
1077 /* point option to start of next parm */
1078 options = value + temp_len + 1;
1079 }
1080 /* go from value to value + temp_len condensing
1081 double commas to singles. Note that this ends up
1082 allocating a few bytes too many, which is ok */
1083 vol->password = kzalloc(temp_len, GFP_KERNEL);
1084 if (vol->password == NULL) {
1085 printk(KERN_WARNING "CIFS: no memory "
1086 "for password\n");
1087 goto cifs_parse_mount_err;
1088 }
1089 for (i = 0, j = 0; i < temp_len; i++, j++) {
1090 vol->password[j] = value[i];
1091 if (value[i] == separator[0]
1092 && value[i+1] == separator[0]) {
1093 /* skip second comma */
1094 i++;
1095 }
1096 }
1097 vol->password[j] = 0;
1098 } else {
1099 vol->password = kzalloc(temp_len+1, GFP_KERNEL);
1100 if (vol->password == NULL) {
1101 printk(KERN_WARNING "CIFS: no memory "
1102 "for password\n");
1103 goto cifs_parse_mount_err;
1104 }
1105 strcpy(vol->password, value);
1106 }
1107 } else if (!strnicmp(data, "ip", 2) ||
1108 !strnicmp(data, "addr", 4)) {
1109 if (!value || !*value) {
1110 vol->UNCip = NULL;
1111 } else if (strnlen(value, INET6_ADDRSTRLEN) <
1112 INET6_ADDRSTRLEN) {
1113 vol->UNCip = kstrdup(value, GFP_KERNEL);
1114 if (!vol->UNCip) {
1115 printk(KERN_WARNING "CIFS: no memory "
1116 "for UNC IP\n");
1117 goto cifs_parse_mount_err;
1118 }
1119 } else {
1120 printk(KERN_WARNING "CIFS: ip address "
1121 "too long\n");
1122 goto cifs_parse_mount_err;
1123 }
1124 } else if (strnicmp(data, "sec", 3) == 0) {
1125 if (!value || !*value) {
1126 cERROR(1, "no security value specified");
1127 continue;
1128 } else if (strnicmp(value, "krb5i", 5) == 0) {
1129 vol->secFlg |= CIFSSEC_MAY_KRB5 |
1130 CIFSSEC_MUST_SIGN;
1131 } else if (strnicmp(value, "krb5p", 5) == 0) {
1132 /* vol->secFlg |= CIFSSEC_MUST_SEAL |
1133 CIFSSEC_MAY_KRB5; */
1134 cERROR(1, "Krb5 cifs privacy not supported");
1135 goto cifs_parse_mount_err;
1136 } else if (strnicmp(value, "krb5", 4) == 0) {
1137 vol->secFlg |= CIFSSEC_MAY_KRB5;
1138 } else if (strnicmp(value, "ntlmsspi", 8) == 0) {
1139 vol->secFlg |= CIFSSEC_MAY_NTLMSSP |
1140 CIFSSEC_MUST_SIGN;
1141 } else if (strnicmp(value, "ntlmssp", 7) == 0) {
1142 vol->secFlg |= CIFSSEC_MAY_NTLMSSP;
1143 } else if (strnicmp(value, "ntlmv2i", 7) == 0) {
1144 vol->secFlg |= CIFSSEC_MAY_NTLMV2 |
1145 CIFSSEC_MUST_SIGN;
1146 } else if (strnicmp(value, "ntlmv2", 6) == 0) {
1147 vol->secFlg |= CIFSSEC_MAY_NTLMV2;
1148 } else if (strnicmp(value, "ntlmi", 5) == 0) {
1149 vol->secFlg |= CIFSSEC_MAY_NTLM |
1150 CIFSSEC_MUST_SIGN;
1151 } else if (strnicmp(value, "ntlm", 4) == 0) {
1152 /* ntlm is default so can be turned off too */
1153 vol->secFlg |= CIFSSEC_MAY_NTLM;
1154 } else if (strnicmp(value, "nontlm", 6) == 0) {
1155 /* BB is there a better way to do this? */
1156 vol->secFlg |= CIFSSEC_MAY_NTLMV2;
1157#ifdef CONFIG_CIFS_WEAK_PW_HASH
1158 } else if (strnicmp(value, "lanman", 6) == 0) {
1159 vol->secFlg |= CIFSSEC_MAY_LANMAN;
1160#endif
1161 } else if (strnicmp(value, "none", 4) == 0) {
1162 vol->nullauth = 1;
1163 } else {
1164 cERROR(1, "bad security option: %s", value);
1165 goto cifs_parse_mount_err;
1166 }
1167 } else if (strnicmp(data, "vers", 3) == 0) {
1168 if (!value || !*value) {
1169 cERROR(1, "no protocol version specified"
1170 " after vers= mount option");
1171 } else if ((strnicmp(value, "cifs", 4) == 0) ||
1172 (strnicmp(value, "1", 1) == 0)) {
1173 /* this is the default */
1174 continue;
1175 }
1176 } else if ((strnicmp(data, "unc", 3) == 0)
1177 || (strnicmp(data, "target", 6) == 0)
1178 || (strnicmp(data, "path", 4) == 0)) {
1179 if (!value || !*value) {
1180 printk(KERN_WARNING "CIFS: invalid path to "
1181 "network resource\n");
1182 goto cifs_parse_mount_err;
1183 }
1184 if ((temp_len = strnlen(value, 300)) < 300) {
1185 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL);
1186 if (vol->UNC == NULL)
1187 goto cifs_parse_mount_err;
1188 strcpy(vol->UNC, value);
1189 if (strncmp(vol->UNC, "//", 2) == 0) {
1190 vol->UNC[0] = '\\';
1191 vol->UNC[1] = '\\';
1192 } else if (strncmp(vol->UNC, "\\\\", 2) != 0) {
1193 printk(KERN_WARNING
1194 "CIFS: UNC Path does not begin "
1195 "with // or \\\\ \n");
1196 goto cifs_parse_mount_err;
1197 }
1198 } else {
1199 printk(KERN_WARNING "CIFS: UNC name too long\n");
1200 goto cifs_parse_mount_err;
1201 }
1202 } else if ((strnicmp(data, "domain", 3) == 0)
1203 || (strnicmp(data, "workgroup", 5) == 0)) {
1204 if (!value || !*value) {
1205 printk(KERN_WARNING "CIFS: invalid domain name\n");
1206 goto cifs_parse_mount_err;
1207 }
1208 /* BB are there cases in which a comma can be valid in
1209 a domain name and need special handling? */
1210 if (strnlen(value, 256) < 256) {
1211 vol->domainname = kstrdup(value, GFP_KERNEL);
1212 if (!vol->domainname) {
1213 printk(KERN_WARNING "CIFS: no memory "
1214 "for domainname\n");
1215 goto cifs_parse_mount_err;
1216 }
1217 cFYI(1, "Domain name set");
1218 } else {
1219 printk(KERN_WARNING "CIFS: domain name too "
1220 "long\n");
1221 goto cifs_parse_mount_err;
1222 }
1223 } else if (strnicmp(data, "srcaddr", 7) == 0) {
1224 vol->srcaddr.ss_family = AF_UNSPEC;
1225
1226 if (!value || !*value) {
1227 printk(KERN_WARNING "CIFS: srcaddr value"
1228 " not specified.\n");
1229 goto cifs_parse_mount_err;
1230 }
1231 i = cifs_convert_address((struct sockaddr *)&vol->srcaddr,
1232 value, strlen(value));
1233 if (i == 0) {
1234 printk(KERN_WARNING "CIFS: Could not parse"
1235 " srcaddr: %s\n",
1236 value);
1237 goto cifs_parse_mount_err;
1238 }
1239 } else if (strnicmp(data, "prefixpath", 10) == 0) {
1240 if (!value || !*value) {
1241 printk(KERN_WARNING
1242 "CIFS: invalid path prefix\n");
1243 goto cifs_parse_mount_err;
1244 }
1245 if ((temp_len = strnlen(value, 1024)) < 1024) {
1246 if (value[0] != '/')
1247 temp_len++; /* missing leading slash */
1248 vol->prepath = kmalloc(temp_len+1, GFP_KERNEL);
1249 if (vol->prepath == NULL)
1250 goto cifs_parse_mount_err;
1251 if (value[0] != '/') {
1252 vol->prepath[0] = '/';
1253 strcpy(vol->prepath+1, value);
1254 } else
1255 strcpy(vol->prepath, value);
1256 cFYI(1, "prefix path %s", vol->prepath);
1257 } else {
1258 printk(KERN_WARNING "CIFS: prefix too long\n");
1259 goto cifs_parse_mount_err;
1260 }
1261 } else if (strnicmp(data, "iocharset", 9) == 0) {
1262 if (!value || !*value) {
1263 printk(KERN_WARNING "CIFS: invalid iocharset "
1264 "specified\n");
1265 goto cifs_parse_mount_err;
1266 }
1267 if (strnlen(value, 65) < 65) {
1268 if (strnicmp(value, "default", 7)) {
1269 vol->iocharset = kstrdup(value,
1270 GFP_KERNEL);
1271
1272 if (!vol->iocharset) {
1273 printk(KERN_WARNING "CIFS: no "
1274 "memory for"
1275 "charset\n");
1276 goto cifs_parse_mount_err;
1277 }
1278 }
1279 /* if iocharset not set then load_nls_default
1280 is used by caller */
1281 cFYI(1, "iocharset set to %s", value);
1282 } else {
1283 printk(KERN_WARNING "CIFS: iocharset name "
1284 "too long.\n");
1285 goto cifs_parse_mount_err;
1286 }
1287 } else if (!strnicmp(data, "uid", 3) && value && *value) {
1288 vol->linux_uid = simple_strtoul(value, &value, 0);
1289 uid_specified = true;
1290 } else if (!strnicmp(data, "cruid", 5) && value && *value) {
1291 vol->cred_uid = simple_strtoul(value, &value, 0);
1292 } else if (!strnicmp(data, "forceuid", 8)) {
1293 override_uid = 1; 1284 override_uid = 1;
1294 } else if (!strnicmp(data, "noforceuid", 10)) { 1285 break;
1286 case Opt_noforceuid:
1295 override_uid = 0; 1287 override_uid = 0;
1296 } else if (!strnicmp(data, "gid", 3) && value && *value) { 1288 break;
1297 vol->linux_gid = simple_strtoul(value, &value, 0); 1289 case Opt_noblocksend:
1298 gid_specified = true;
1299 } else if (!strnicmp(data, "forcegid", 8)) {
1300 override_gid = 1;
1301 } else if (!strnicmp(data, "noforcegid", 10)) {
1302 override_gid = 0;
1303 } else if (strnicmp(data, "file_mode", 4) == 0) {
1304 if (value && *value) {
1305 vol->file_mode =
1306 simple_strtoul(value, &value, 0);
1307 }
1308 } else if (strnicmp(data, "dir_mode", 4) == 0) {
1309 if (value && *value) {
1310 vol->dir_mode =
1311 simple_strtoul(value, &value, 0);
1312 }
1313 } else if (strnicmp(data, "dirmode", 4) == 0) {
1314 if (value && *value) {
1315 vol->dir_mode =
1316 simple_strtoul(value, &value, 0);
1317 }
1318 } else if (strnicmp(data, "port", 4) == 0) {
1319 if (value && *value) {
1320 vol->port =
1321 simple_strtoul(value, &value, 0);
1322 }
1323 } else if (strnicmp(data, "rsize", 5) == 0) {
1324 if (value && *value) {
1325 vol->rsize =
1326 simple_strtoul(value, &value, 0);
1327 }
1328 } else if (strnicmp(data, "wsize", 5) == 0) {
1329 if (value && *value) {
1330 vol->wsize =
1331 simple_strtoul(value, &value, 0);
1332 }
1333 } else if (strnicmp(data, "sockopt", 5) == 0) {
1334 if (!value || !*value) {
1335 cERROR(1, "no socket option specified");
1336 continue;
1337 } else if (strnicmp(value, "TCP_NODELAY", 11) == 0) {
1338 vol->sockopt_tcp_nodelay = 1;
1339 }
1340 } else if (strnicmp(data, "netbiosname", 4) == 0) {
1341 if (!value || !*value || (*value == ' ')) {
1342 cFYI(1, "invalid (empty) netbiosname");
1343 } else {
1344 memset(vol->source_rfc1001_name, 0x20,
1345 RFC1001_NAME_LEN);
1346 /*
1347 * FIXME: are there cases in which a comma can
1348 * be valid in workstation netbios name (and
1349 * need special handling)?
1350 */
1351 for (i = 0; i < RFC1001_NAME_LEN; i++) {
1352 /* don't ucase netbiosname for user */
1353 if (value[i] == 0)
1354 break;
1355 vol->source_rfc1001_name[i] = value[i];
1356 }
1357 /* The string has 16th byte zero still from
1358 set at top of the function */
1359 if (i == RFC1001_NAME_LEN && value[i] != 0)
1360 printk(KERN_WARNING "CIFS: netbiosname"
1361 " longer than 15 truncated.\n");
1362 }
1363 } else if (strnicmp(data, "servern", 7) == 0) {
1364 /* servernetbiosname specified override *SMBSERVER */
1365 if (!value || !*value || (*value == ' ')) {
1366 cFYI(1, "empty server netbiosname specified");
1367 } else {
1368 /* last byte, type, is 0x20 for servr type */
1369 memset(vol->target_rfc1001_name, 0x20,
1370 RFC1001_NAME_LEN_WITH_NULL);
1371
1372 for (i = 0; i < 15; i++) {
1373 /* BB are there cases in which a comma can be
1374 valid in this workstation netbios name
1375 (and need special handling)? */
1376
1377 /* user or mount helper must uppercase
1378 the netbiosname */
1379 if (value[i] == 0)
1380 break;
1381 else
1382 vol->target_rfc1001_name[i] =
1383 value[i];
1384 }
1385 /* The string has 16th byte zero still from
1386 set at top of the function */
1387 if (i == RFC1001_NAME_LEN && value[i] != 0)
1388 printk(KERN_WARNING "CIFS: server net"
1389 "biosname longer than 15 truncated.\n");
1390 }
1391 } else if (strnicmp(data, "actimeo", 7) == 0) {
1392 if (value && *value) {
1393 vol->actimeo = HZ * simple_strtoul(value,
1394 &value, 0);
1395 if (vol->actimeo > CIFS_MAX_ACTIMEO) {
1396 cERROR(1, "CIFS: attribute cache"
1397 "timeout too large");
1398 goto cifs_parse_mount_err;
1399 }
1400 }
1401 } else if (strnicmp(data, "credentials", 4) == 0) {
1402 /* ignore */
1403 } else if (strnicmp(data, "version", 3) == 0) {
1404 /* ignore */
1405 } else if (strnicmp(data, "guest", 5) == 0) {
1406 /* ignore */
1407 } else if (strnicmp(data, "rw", 2) == 0 && strlen(data) == 2) {
1408 /* ignore */
1409 } else if (strnicmp(data, "ro", 2) == 0) {
1410 /* ignore */
1411 } else if (strnicmp(data, "noblocksend", 11) == 0) {
1412 vol->noblocksnd = 1; 1290 vol->noblocksnd = 1;
1413 } else if (strnicmp(data, "noautotune", 10) == 0) { 1291 break;
1292 case Opt_noautotune:
1414 vol->noautotune = 1; 1293 vol->noautotune = 1;
1415 } else if ((strnicmp(data, "suid", 4) == 0) || 1294 break;
1416 (strnicmp(data, "nosuid", 6) == 0) || 1295 case Opt_hard:
1417 (strnicmp(data, "exec", 4) == 0) ||
1418 (strnicmp(data, "noexec", 6) == 0) ||
1419 (strnicmp(data, "nodev", 5) == 0) ||
1420 (strnicmp(data, "noauto", 6) == 0) ||
1421 (strnicmp(data, "dev", 3) == 0)) {
1422 /* The mount tool or mount.cifs helper (if present)
1423 uses these opts to set flags, and the flags are read
1424 by the kernel vfs layer before we get here (ie
1425 before read super) so there is no point trying to
1426 parse these options again and set anything and it
1427 is ok to just ignore them */
1428 continue;
1429 } else if (strnicmp(data, "hard", 4) == 0) {
1430 vol->retry = 1; 1296 vol->retry = 1;
1431 } else if (strnicmp(data, "soft", 4) == 0) { 1297 break;
1298 case Opt_soft:
1432 vol->retry = 0; 1299 vol->retry = 0;
1433 } else if (strnicmp(data, "perm", 4) == 0) { 1300 break;
1301 case Opt_perm:
1434 vol->noperm = 0; 1302 vol->noperm = 0;
1435 } else if (strnicmp(data, "noperm", 6) == 0) { 1303 break;
1304 case Opt_noperm:
1436 vol->noperm = 1; 1305 vol->noperm = 1;
1437 } else if (strnicmp(data, "mapchars", 8) == 0) { 1306 break;
1307 case Opt_mapchars:
1438 vol->remap = 1; 1308 vol->remap = 1;
1439 } else if (strnicmp(data, "nomapchars", 10) == 0) { 1309 break;
1310 case Opt_nomapchars:
1440 vol->remap = 0; 1311 vol->remap = 0;
1441 } else if (strnicmp(data, "sfu", 3) == 0) { 1312 break;
1313 case Opt_sfu:
1442 vol->sfu_emul = 1; 1314 vol->sfu_emul = 1;
1443 } else if (strnicmp(data, "nosfu", 5) == 0) { 1315 break;
1316 case Opt_nosfu:
1444 vol->sfu_emul = 0; 1317 vol->sfu_emul = 0;
1445 } else if (strnicmp(data, "nodfs", 5) == 0) { 1318 break;
1319 case Opt_nodfs:
1446 vol->nodfs = 1; 1320 vol->nodfs = 1;
1447 } else if (strnicmp(data, "posixpaths", 10) == 0) { 1321 break;
1322 case Opt_posixpaths:
1448 vol->posix_paths = 1; 1323 vol->posix_paths = 1;
1449 } else if (strnicmp(data, "noposixpaths", 12) == 0) { 1324 break;
1325 case Opt_noposixpaths:
1450 vol->posix_paths = 0; 1326 vol->posix_paths = 0;
1451 } else if (strnicmp(data, "nounix", 6) == 0) { 1327 break;
1452 vol->no_linux_ext = 1; 1328 case Opt_nounix:
1453 } else if (strnicmp(data, "nolinux", 7) == 0) {
1454 vol->no_linux_ext = 1; 1329 vol->no_linux_ext = 1;
1455 } else if ((strnicmp(data, "nocase", 6) == 0) || 1330 break;
1456 (strnicmp(data, "ignorecase", 10) == 0)) { 1331 case Opt_nocase:
1457 vol->nocase = 1; 1332 vol->nocase = 1;
1458 } else if (strnicmp(data, "mand", 4) == 0) { 1333 break;
1459 /* ignore */ 1334 case Opt_brl:
1460 } else if (strnicmp(data, "nomand", 6) == 0) {
1461 /* ignore */
1462 } else if (strnicmp(data, "_netdev", 7) == 0) {
1463 /* ignore */
1464 } else if (strnicmp(data, "brl", 3) == 0) {
1465 vol->nobrl = 0; 1335 vol->nobrl = 0;
1466 } else if ((strnicmp(data, "nobrl", 5) == 0) || 1336 break;
1467 (strnicmp(data, "nolock", 6) == 0)) { 1337 case Opt_nobrl:
1468 vol->nobrl = 1; 1338 vol->nobrl = 1;
1469 /* turn off mandatory locking in mode 1339 /*
1470 if remote locking is turned off since the 1340 * turn off mandatory locking in mode
1471 local vfs will do advisory */ 1341 * if remote locking is turned off since the
1342 * local vfs will do advisory
1343 */
1472 if (vol->file_mode == 1344 if (vol->file_mode ==
1473 (S_IALLUGO & ~(S_ISUID | S_IXGRP))) 1345 (S_IALLUGO & ~(S_ISUID | S_IXGRP)))
1474 vol->file_mode = S_IALLUGO; 1346 vol->file_mode = S_IALLUGO;
1475 } else if (strnicmp(data, "forcemandatorylock", 9) == 0) { 1347 break;
1476 /* will take the shorter form "forcemand" as well */ 1348 case Opt_forcemandatorylock:
1477 /* This mount option will force use of mandatory
1478 (DOS/Windows style) byte range locks, instead of
1479 using posix advisory byte range locks, even if the
1480 Unix extensions are available and posix locks would
1481 be supported otherwise. If Unix extensions are not
1482 negotiated this has no effect since mandatory locks
1483 would be used (mandatory locks is all that those
1484 those servers support) */
1485 vol->mand_lock = 1; 1349 vol->mand_lock = 1;
1486 } else if (strnicmp(data, "setuids", 7) == 0) { 1350 break;
1351 case Opt_setuids:
1487 vol->setuids = 1; 1352 vol->setuids = 1;
1488 } else if (strnicmp(data, "nosetuids", 9) == 0) { 1353 break;
1354 case Opt_nosetuids:
1489 vol->setuids = 0; 1355 vol->setuids = 0;
1490 } else if (strnicmp(data, "dynperm", 7) == 0) { 1356 break;
1357 case Opt_dynperm:
1491 vol->dynperm = true; 1358 vol->dynperm = true;
1492 } else if (strnicmp(data, "nodynperm", 9) == 0) { 1359 break;
1360 case Opt_nodynperm:
1493 vol->dynperm = false; 1361 vol->dynperm = false;
1494 } else if (strnicmp(data, "nohard", 6) == 0) { 1362 break;
1363 case Opt_nohard:
1495 vol->retry = 0; 1364 vol->retry = 0;
1496 } else if (strnicmp(data, "nosoft", 6) == 0) { 1365 break;
1366 case Opt_nosoft:
1497 vol->retry = 1; 1367 vol->retry = 1;
1498 } else if (strnicmp(data, "nointr", 6) == 0) { 1368 break;
1369 case Opt_nointr:
1499 vol->intr = 0; 1370 vol->intr = 0;
1500 } else if (strnicmp(data, "intr", 4) == 0) { 1371 break;
1372 case Opt_intr:
1501 vol->intr = 1; 1373 vol->intr = 1;
1502 } else if (strnicmp(data, "nostrictsync", 12) == 0) { 1374 break;
1375 case Opt_nostrictsync:
1503 vol->nostrictsync = 1; 1376 vol->nostrictsync = 1;
1504 } else if (strnicmp(data, "strictsync", 10) == 0) { 1377 break;
1378 case Opt_strictsync:
1505 vol->nostrictsync = 0; 1379 vol->nostrictsync = 0;
1506 } else if (strnicmp(data, "serverino", 7) == 0) { 1380 break;
1381 case Opt_serverino:
1507 vol->server_ino = 1; 1382 vol->server_ino = 1;
1508 } else if (strnicmp(data, "noserverino", 9) == 0) { 1383 break;
1384 case Opt_noserverino:
1509 vol->server_ino = 0; 1385 vol->server_ino = 0;
1510 } else if (strnicmp(data, "rwpidforward", 12) == 0) { 1386 break;
1387 case Opt_rwpidforward:
1511 vol->rwpidforward = 1; 1388 vol->rwpidforward = 1;
1512 } else if (strnicmp(data, "cifsacl", 7) == 0) { 1389 break;
1390 case Opt_cifsacl:
1513 vol->cifs_acl = 1; 1391 vol->cifs_acl = 1;
1514 } else if (strnicmp(data, "nocifsacl", 9) == 0) { 1392 break;
1393 case Opt_nocifsacl:
1515 vol->cifs_acl = 0; 1394 vol->cifs_acl = 0;
1516 } else if (strnicmp(data, "acl", 3) == 0) { 1395 break;
1396 case Opt_acl:
1517 vol->no_psx_acl = 0; 1397 vol->no_psx_acl = 0;
1518 } else if (strnicmp(data, "noacl", 5) == 0) { 1398 break;
1399 case Opt_noacl:
1519 vol->no_psx_acl = 1; 1400 vol->no_psx_acl = 1;
1520 } else if (strnicmp(data, "locallease", 6) == 0) { 1401 break;
1402 case Opt_locallease:
1521 vol->local_lease = 1; 1403 vol->local_lease = 1;
1522 } else if (strnicmp(data, "sign", 4) == 0) { 1404 break;
1405 case Opt_sign:
1523 vol->secFlg |= CIFSSEC_MUST_SIGN; 1406 vol->secFlg |= CIFSSEC_MUST_SIGN;
1524 } else if (strnicmp(data, "seal", 4) == 0) { 1407 break;
1408 case Opt_seal:
1525 /* we do not do the following in secFlags because seal 1409 /* we do not do the following in secFlags because seal
1526 is a per tree connection (mount) not a per socket 1410 * is a per tree connection (mount) not a per socket
1527 or per-smb connection option in the protocol */ 1411 * or per-smb connection option in the protocol
1528 /* vol->secFlg |= CIFSSEC_MUST_SEAL; */ 1412 * vol->secFlg |= CIFSSEC_MUST_SEAL;
1413 */
1529 vol->seal = 1; 1414 vol->seal = 1;
1530 } else if (strnicmp(data, "direct", 6) == 0) { 1415 break;
1531 vol->direct_io = 1; 1416 case Opt_direct:
1532 } else if (strnicmp(data, "forcedirectio", 13) == 0) {
1533 vol->direct_io = 1; 1417 vol->direct_io = 1;
1534 } else if (strnicmp(data, "strictcache", 11) == 0) { 1418 break;
1419 case Opt_strictcache:
1535 vol->strict_io = 1; 1420 vol->strict_io = 1;
1536 } else if (strnicmp(data, "noac", 4) == 0) { 1421 break;
1422 case Opt_noac:
1537 printk(KERN_WARNING "CIFS: Mount option noac not " 1423 printk(KERN_WARNING "CIFS: Mount option noac not "
1538 "supported. Instead set " 1424 "supported. Instead set "
1539 "/proc/fs/cifs/LookupCacheEnabled to 0\n"); 1425 "/proc/fs/cifs/LookupCacheEnabled to 0\n");
1540 } else if (strnicmp(data, "fsc", 3) == 0) { 1426 break;
1427 case Opt_fsc:
1541#ifndef CONFIG_CIFS_FSCACHE 1428#ifndef CONFIG_CIFS_FSCACHE
1542 cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE " 1429 cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE "
1543 "kernel config option set"); 1430 "kernel config option set");
1544 goto cifs_parse_mount_err; 1431 goto cifs_parse_mount_err;
1545#endif 1432#endif
1546 vol->fsc = true; 1433 vol->fsc = true;
1547 } else if (strnicmp(data, "mfsymlinks", 10) == 0) { 1434 break;
1435 case Opt_mfsymlinks:
1548 vol->mfsymlinks = true; 1436 vol->mfsymlinks = true;
1549 } else if (strnicmp(data, "multiuser", 8) == 0) { 1437 break;
1438 case Opt_multiuser:
1550 vol->multiuser = true; 1439 vol->multiuser = true;
1551 } else if (!strnicmp(data, "backupuid", 9) && value && *value) { 1440 break;
1552 err = kstrtouint(value, 0, &vol->backupuid); 1441 case Opt_sloppy:
1553 if (err < 0) { 1442 sloppy = true;
1443 break;
1444
1445 /* Numeric Values */
1446 case Opt_backupuid:
1447 if (get_option_ul(args, &option)) {
1554 cERROR(1, "%s: Invalid backupuid value", 1448 cERROR(1, "%s: Invalid backupuid value",
1555 __func__); 1449 __func__);
1556 goto cifs_parse_mount_err; 1450 goto cifs_parse_mount_err;
1557 } 1451 }
1452 vol->backupuid = option;
1558 vol->backupuid_specified = true; 1453 vol->backupuid_specified = true;
1559 } else if (!strnicmp(data, "backupgid", 9) && value && *value) { 1454 break;
1560 err = kstrtouint(value, 0, &vol->backupgid); 1455 case Opt_backupgid:
1561 if (err < 0) { 1456 if (get_option_ul(args, &option)) {
1562 cERROR(1, "%s: Invalid backupgid value", 1457 cERROR(1, "%s: Invalid backupgid value",
1563 __func__); 1458 __func__);
1564 goto cifs_parse_mount_err; 1459 goto cifs_parse_mount_err;
1565 } 1460 }
1461 vol->backupgid = option;
1566 vol->backupgid_specified = true; 1462 vol->backupgid_specified = true;
1567 } else 1463 break;
1568 printk(KERN_WARNING "CIFS: Unknown mount option %s\n", 1464 case Opt_uid:
1569 data); 1465 if (get_option_ul(args, &option)) {
1570 } 1466 cERROR(1, "%s: Invalid uid value",
1571 if (vol->UNC == NULL) { 1467 __func__);
1572 if (devname == NULL) { 1468 goto cifs_parse_mount_err;
1573 printk(KERN_WARNING "CIFS: Missing UNC name for mount " 1469 }
1574 "target\n"); 1470 vol->linux_uid = option;
1575 goto cifs_parse_mount_err; 1471 uid_specified = true;
1576 } 1472 break;
1577 if ((temp_len = strnlen(devname, 300)) < 300) { 1473 case Opt_cruid:
1474 if (get_option_ul(args, &option)) {
1475 cERROR(1, "%s: Invalid cruid value",
1476 __func__);
1477 goto cifs_parse_mount_err;
1478 }
1479 vol->cred_uid = option;
1480 break;
1481 case Opt_gid:
1482 if (get_option_ul(args, &option)) {
1483 cERROR(1, "%s: Invalid gid value",
1484 __func__);
1485 goto cifs_parse_mount_err;
1486 }
1487 vol->linux_gid = option;
1488 gid_specified = true;
1489 break;
1490 case Opt_file_mode:
1491 if (get_option_ul(args, &option)) {
1492 cERROR(1, "%s: Invalid file_mode value",
1493 __func__);
1494 goto cifs_parse_mount_err;
1495 }
1496 vol->file_mode = option;
1497 break;
1498 case Opt_dirmode:
1499 if (get_option_ul(args, &option)) {
1500 cERROR(1, "%s: Invalid dir_mode value",
1501 __func__);
1502 goto cifs_parse_mount_err;
1503 }
1504 vol->dir_mode = option;
1505 break;
1506 case Opt_port:
1507 if (get_option_ul(args, &option)) {
1508 cERROR(1, "%s: Invalid port value",
1509 __func__);
1510 goto cifs_parse_mount_err;
1511 }
1512 vol->port = option;
1513 break;
1514 case Opt_rsize:
1515 if (get_option_ul(args, &option)) {
1516 cERROR(1, "%s: Invalid rsize value",
1517 __func__);
1518 goto cifs_parse_mount_err;
1519 }
1520 vol->rsize = option;
1521 break;
1522 case Opt_wsize:
1523 if (get_option_ul(args, &option)) {
1524 cERROR(1, "%s: Invalid wsize value",
1525 __func__);
1526 goto cifs_parse_mount_err;
1527 }
1528 vol->wsize = option;
1529 break;
1530 case Opt_actimeo:
1531 if (get_option_ul(args, &option)) {
1532 cERROR(1, "%s: Invalid actimeo value",
1533 __func__);
1534 goto cifs_parse_mount_err;
1535 }
1536 vol->actimeo = HZ * option;
1537 if (vol->actimeo > CIFS_MAX_ACTIMEO) {
1538 cERROR(1, "CIFS: attribute cache"
1539 "timeout too large");
1540 goto cifs_parse_mount_err;
1541 }
1542 break;
1543
1544 /* String Arguments */
1545
1546 case Opt_blank_user:
1547 /* null user, ie. anonymous authentication */
1548 vol->nullauth = 1;
1549 vol->username = NULL;
1550 break;
1551 case Opt_user:
1552 string = match_strdup(args);
1553 if (string == NULL)
1554 goto out_nomem;
1555
1556 if (strnlen(string, MAX_USERNAME_SIZE) >
1557 MAX_USERNAME_SIZE) {
1558 printk(KERN_WARNING "CIFS: username too long\n");
1559 goto cifs_parse_mount_err;
1560 }
1561 vol->username = kstrdup(string, GFP_KERNEL);
1562 if (!vol->username) {
1563 printk(KERN_WARNING "CIFS: no memory "
1564 "for username\n");
1565 goto cifs_parse_mount_err;
1566 }
1567 break;
1568 case Opt_blank_pass:
1569 vol->password = NULL;
1570 break;
1571 case Opt_pass:
1572 /* passwords have to be handled differently
1573 * to allow the character used for deliminator
1574 * to be passed within them
1575 */
1576
1577 /* Obtain the value string */
1578 value = strchr(data, '=');
1579 value++;
1580
1581 /* Set tmp_end to end of the string */
1582 tmp_end = (char *) value + strlen(value);
1583
1584 /* Check if following character is the deliminator
1585 * If yes, we have encountered a double deliminator
1586 * reset the NULL character to the deliminator
1587 */
1588 if (tmp_end < end && tmp_end[1] == delim)
1589 tmp_end[0] = delim;
1590
1591 /* Keep iterating until we get to a single deliminator
1592 * OR the end
1593 */
1594 while ((tmp_end = strchr(tmp_end, delim)) != NULL &&
1595 (tmp_end[1] == delim)) {
1596 tmp_end = (char *) &tmp_end[2];
1597 }
1598
1599 /* Reset var options to point to next element */
1600 if (tmp_end) {
1601 tmp_end[0] = '\0';
1602 options = (char *) &tmp_end[1];
1603 } else
1604 /* Reached the end of the mount option string */
1605 options = end;
1606
1607 /* Now build new password string */
1608 temp_len = strlen(value);
1609 vol->password = kzalloc(temp_len+1, GFP_KERNEL);
1610 if (vol->password == NULL) {
1611 printk(KERN_WARNING "CIFS: no memory "
1612 "for password\n");
1613 goto cifs_parse_mount_err;
1614 }
1615
1616 for (i = 0, j = 0; i < temp_len; i++, j++) {
1617 vol->password[j] = value[i];
1618 if ((value[i] == delim) &&
1619 value[i+1] == delim)
1620 /* skip the second deliminator */
1621 i++;
1622 }
1623 vol->password[j] = '\0';
1624 break;
1625 case Opt_blank_ip:
1626 vol->UNCip = NULL;
1627 break;
1628 case Opt_ip:
1629 string = match_strdup(args);
1630 if (string == NULL)
1631 goto out_nomem;
1632
1633 if (strnlen(string, INET6_ADDRSTRLEN) >
1634 INET6_ADDRSTRLEN) {
1635 printk(KERN_WARNING "CIFS: ip address "
1636 "too long\n");
1637 goto cifs_parse_mount_err;
1638 }
1639 vol->UNCip = kstrdup(string, GFP_KERNEL);
1640 if (!vol->UNCip) {
1641 printk(KERN_WARNING "CIFS: no memory "
1642 "for UNC IP\n");
1643 goto cifs_parse_mount_err;
1644 }
1645 break;
1646 case Opt_unc:
1647 string = match_strdup(args);
1648 if (string == NULL)
1649 goto out_nomem;
1650
1651 temp_len = strnlen(string, 300);
1652 if (temp_len == 300) {
1653 printk(KERN_WARNING "CIFS: UNC name too long\n");
1654 goto cifs_parse_mount_err;
1655 }
1656
1578 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL); 1657 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL);
1579 if (vol->UNC == NULL) 1658 if (vol->UNC == NULL) {
1659 printk(KERN_WARNING "CIFS: no memory for UNC\n");
1580 goto cifs_parse_mount_err; 1660 goto cifs_parse_mount_err;
1581 strcpy(vol->UNC, devname); 1661 }
1582 if (strncmp(vol->UNC, "//", 2) == 0) { 1662 strcpy(vol->UNC, string);
1663
1664 if (strncmp(string, "//", 2) == 0) {
1583 vol->UNC[0] = '\\'; 1665 vol->UNC[0] = '\\';
1584 vol->UNC[1] = '\\'; 1666 vol->UNC[1] = '\\';
1585 } else if (strncmp(vol->UNC, "\\\\", 2) != 0) { 1667 } else if (strncmp(string, "\\\\", 2) != 0) {
1586 printk(KERN_WARNING "CIFS: UNC Path does not " 1668 printk(KERN_WARNING "CIFS: UNC Path does not "
1587 "begin with // or \\\\ \n"); 1669 "begin with // or \\\\\n");
1588 goto cifs_parse_mount_err; 1670 goto cifs_parse_mount_err;
1589 } 1671 }
1590 value = strpbrk(vol->UNC+2, "/\\"); 1672
1591 if (value) 1673 break;
1592 *value = '\\'; 1674 case Opt_domain:
1593 } else { 1675 string = match_strdup(args);
1594 printk(KERN_WARNING "CIFS: UNC name too long\n"); 1676 if (string == NULL)
1677 goto out_nomem;
1678
1679 if (strnlen(string, 256) == 256) {
1680 printk(KERN_WARNING "CIFS: domain name too"
1681 " long\n");
1682 goto cifs_parse_mount_err;
1683 }
1684
1685 vol->domainname = kstrdup(string, GFP_KERNEL);
1686 if (!vol->domainname) {
1687 printk(KERN_WARNING "CIFS: no memory "
1688 "for domainname\n");
1689 goto cifs_parse_mount_err;
1690 }
1691 cFYI(1, "Domain name set");
1692 break;
1693 case Opt_srcaddr:
1694 string = match_strdup(args);
1695 if (string == NULL)
1696 goto out_nomem;
1697
1698 if (!cifs_convert_address(
1699 (struct sockaddr *)&vol->srcaddr,
1700 string, strlen(string))) {
1701 printk(KERN_WARNING "CIFS: Could not parse"
1702 " srcaddr: %s\n", string);
1703 goto cifs_parse_mount_err;
1704 }
1705 break;
1706 case Opt_prefixpath:
1707 string = match_strdup(args);
1708 if (string == NULL)
1709 goto out_nomem;
1710
1711 temp_len = strnlen(string, 1024);
1712 if (string[0] != '/')
1713 temp_len++; /* missing leading slash */
1714 if (temp_len > 1024) {
1715 printk(KERN_WARNING "CIFS: prefix too long\n");
1716 goto cifs_parse_mount_err;
1717 }
1718
1719 vol->prepath = kmalloc(temp_len+1, GFP_KERNEL);
1720 if (vol->prepath == NULL) {
1721 printk(KERN_WARNING "CIFS: no memory "
1722 "for path prefix\n");
1723 goto cifs_parse_mount_err;
1724 }
1725
1726 if (string[0] != '/') {
1727 vol->prepath[0] = '/';
1728 strcpy(vol->prepath+1, string);
1729 } else
1730 strcpy(vol->prepath, string);
1731
1732 break;
1733 case Opt_iocharset:
1734 string = match_strdup(args);
1735 if (string == NULL)
1736 goto out_nomem;
1737
1738 if (strnlen(string, 1024) >= 65) {
1739 printk(KERN_WARNING "CIFS: iocharset name "
1740 "too long.\n");
1741 goto cifs_parse_mount_err;
1742 }
1743
1744 if (strnicmp(string, "default", 7) != 0) {
1745 vol->iocharset = kstrdup(string,
1746 GFP_KERNEL);
1747 if (!vol->iocharset) {
1748 printk(KERN_WARNING "CIFS: no memory"
1749 "for charset\n");
1750 goto cifs_parse_mount_err;
1751 }
1752 }
1753 /* if iocharset not set then load_nls_default
1754 * is used by caller
1755 */
1756 cFYI(1, "iocharset set to %s", string);
1757 break;
1758 case Opt_sockopt:
1759 string = match_strdup(args);
1760 if (string == NULL)
1761 goto out_nomem;
1762
1763 if (strnicmp(string, "TCP_NODELAY", 11) == 0)
1764 vol->sockopt_tcp_nodelay = 1;
1765 break;
1766 case Opt_netbiosname:
1767 string = match_strdup(args);
1768 if (string == NULL)
1769 goto out_nomem;
1770
1771 memset(vol->source_rfc1001_name, 0x20,
1772 RFC1001_NAME_LEN);
1773 /*
1774 * FIXME: are there cases in which a comma can
1775 * be valid in workstation netbios name (and
1776 * need special handling)?
1777 */
1778 for (i = 0; i < RFC1001_NAME_LEN; i++) {
1779 /* don't ucase netbiosname for user */
1780 if (string[i] == 0)
1781 break;
1782 vol->source_rfc1001_name[i] = string[i];
1783 }
1784 /* The string has 16th byte zero still from
1785 * set at top of the function
1786 */
1787 if (i == RFC1001_NAME_LEN && string[i] != 0)
1788 printk(KERN_WARNING "CIFS: netbiosname"
1789 " longer than 15 truncated.\n");
1790
1791 break;
1792 case Opt_servern:
1793 /* servernetbiosname specified override *SMBSERVER */
1794 string = match_strdup(args);
1795 if (string == NULL)
1796 goto out_nomem;
1797
1798 /* last byte, type, is 0x20 for servr type */
1799 memset(vol->target_rfc1001_name, 0x20,
1800 RFC1001_NAME_LEN_WITH_NULL);
1801
1802 /* BB are there cases in which a comma can be
1803 valid in this workstation netbios name
1804 (and need special handling)? */
1805
1806 /* user or mount helper must uppercase the
1807 netbios name */
1808 for (i = 0; i < 15; i++) {
1809 if (string[i] == 0)
1810 break;
1811 vol->target_rfc1001_name[i] = string[i];
1812 }
1813 /* The string has 16th byte zero still from
1814 set at top of the function */
1815 if (i == RFC1001_NAME_LEN && string[i] != 0)
1816 printk(KERN_WARNING "CIFS: server net"
1817 "biosname longer than 15 truncated.\n");
1818 break;
1819 case Opt_ver:
1820 string = match_strdup(args);
1821 if (string == NULL)
1822 goto out_nomem;
1823
1824 if (strnicmp(string, "cifs", 4) == 0 ||
1825 strnicmp(string, "1", 1) == 0) {
1826 /* This is the default */
1827 break;
1828 }
1829 /* For all other value, error */
1830 printk(KERN_WARNING "CIFS: Invalid version"
1831 " specified\n");
1595 goto cifs_parse_mount_err; 1832 goto cifs_parse_mount_err;
1833 case Opt_sec:
1834 string = match_strdup(args);
1835 if (string == NULL)
1836 goto out_nomem;
1837
1838 if (cifs_parse_security_flavors(string, vol) != 0)
1839 goto cifs_parse_mount_err;
1840 break;
1841 default:
1842 /*
1843 * An option we don't recognize. Save it off for later
1844 * if we haven't already found one
1845 */
1846 if (!invalid)
1847 invalid = data;
1848 break;
1596 } 1849 }
1850 /* Free up any allocated string */
1851 kfree(string);
1852 string = NULL;
1853 }
1854
1855 if (!sloppy && invalid) {
1856 printk(KERN_ERR "CIFS: Unknown mount option \"%s\"\n", invalid);
1857 goto cifs_parse_mount_err;
1597 } 1858 }
1598 1859
1599#ifndef CONFIG_KEYS 1860#ifndef CONFIG_KEYS
@@ -1623,7 +1884,10 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1623 kfree(mountdata_copy); 1884 kfree(mountdata_copy);
1624 return 0; 1885 return 0;
1625 1886
1887out_nomem:
1888 printk(KERN_WARNING "Could not allocate temporary buffer\n");
1626cifs_parse_mount_err: 1889cifs_parse_mount_err:
1890 kfree(string);
1627 kfree(mountdata_copy); 1891 kfree(mountdata_copy);
1628 return 1; 1892 return 1;
1629} 1893}
@@ -1909,7 +2173,8 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1909 tcp_ses->noblocksnd = volume_info->noblocksnd; 2173 tcp_ses->noblocksnd = volume_info->noblocksnd;
1910 tcp_ses->noautotune = volume_info->noautotune; 2174 tcp_ses->noautotune = volume_info->noautotune;
1911 tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay; 2175 tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay;
1912 atomic_set(&tcp_ses->inFlight, 0); 2176 tcp_ses->in_flight = 0;
2177 tcp_ses->credits = 1;
1913 init_waitqueue_head(&tcp_ses->response_q); 2178 init_waitqueue_head(&tcp_ses->response_q);
1914 init_waitqueue_head(&tcp_ses->request_q); 2179 init_waitqueue_head(&tcp_ses->request_q);
1915 INIT_LIST_HEAD(&tcp_ses->pending_mid_q); 2180 INIT_LIST_HEAD(&tcp_ses->pending_mid_q);
@@ -1921,6 +2186,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1921 tcp_ses->session_estab = false; 2186 tcp_ses->session_estab = false;
1922 tcp_ses->sequence_number = 0; 2187 tcp_ses->sequence_number = 0;
1923 tcp_ses->lstrp = jiffies; 2188 tcp_ses->lstrp = jiffies;
2189 spin_lock_init(&tcp_ses->req_lock);
1924 INIT_LIST_HEAD(&tcp_ses->tcp_ses_list); 2190 INIT_LIST_HEAD(&tcp_ses->tcp_ses_list);
1925 INIT_LIST_HEAD(&tcp_ses->smb_ses_list); 2191 INIT_LIST_HEAD(&tcp_ses->smb_ses_list);
1926 INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request); 2192 INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request);
@@ -1974,7 +2240,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1974 cifs_fscache_get_client_cookie(tcp_ses); 2240 cifs_fscache_get_client_cookie(tcp_ses);
1975 2241
1976 /* queue echo request delayed work */ 2242 /* queue echo request delayed work */
1977 queue_delayed_work(system_nrt_wq, &tcp_ses->echo, SMB_ECHO_INTERVAL); 2243 queue_delayed_work(cifsiod_wq, &tcp_ses->echo, SMB_ECHO_INTERVAL);
1978 2244
1979 return tcp_ses; 2245 return tcp_ses;
1980 2246
@@ -2966,10 +3232,6 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
2966 3232
2967 cifs_sb->mnt_uid = pvolume_info->linux_uid; 3233 cifs_sb->mnt_uid = pvolume_info->linux_uid;
2968 cifs_sb->mnt_gid = pvolume_info->linux_gid; 3234 cifs_sb->mnt_gid = pvolume_info->linux_gid;
2969 if (pvolume_info->backupuid_specified)
2970 cifs_sb->mnt_backupuid = pvolume_info->backupuid;
2971 if (pvolume_info->backupgid_specified)
2972 cifs_sb->mnt_backupgid = pvolume_info->backupgid;
2973 cifs_sb->mnt_file_mode = pvolume_info->file_mode; 3235 cifs_sb->mnt_file_mode = pvolume_info->file_mode;
2974 cifs_sb->mnt_dir_mode = pvolume_info->dir_mode; 3236 cifs_sb->mnt_dir_mode = pvolume_info->dir_mode;
2975 cFYI(1, "file mode: 0x%hx dir mode: 0x%hx", 3237 cFYI(1, "file mode: 0x%hx dir mode: 0x%hx",
@@ -3000,10 +3262,14 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
3000 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RWPIDFORWARD; 3262 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RWPIDFORWARD;
3001 if (pvolume_info->cifs_acl) 3263 if (pvolume_info->cifs_acl)
3002 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL; 3264 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL;
3003 if (pvolume_info->backupuid_specified) 3265 if (pvolume_info->backupuid_specified) {
3004 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPUID; 3266 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPUID;
3005 if (pvolume_info->backupgid_specified) 3267 cifs_sb->mnt_backupuid = pvolume_info->backupuid;
3268 }
3269 if (pvolume_info->backupgid_specified) {
3006 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPGID; 3270 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPGID;
3271 cifs_sb->mnt_backupgid = pvolume_info->backupgid;
3272 }
3007 if (pvolume_info->override_uid) 3273 if (pvolume_info->override_uid)
3008 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_UID; 3274 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_UID;
3009 if (pvolume_info->override_gid) 3275 if (pvolume_info->override_gid)
@@ -3352,26 +3618,10 @@ cifs_get_volume_info(char *mount_data, const char *devname)
3352 return volume_info; 3618 return volume_info;
3353} 3619}
3354 3620
3355/* make sure ra_pages is a multiple of rsize */
3356static inline unsigned int
3357cifs_ra_pages(struct cifs_sb_info *cifs_sb)
3358{
3359 unsigned int reads;
3360 unsigned int rsize_pages = cifs_sb->rsize / PAGE_CACHE_SIZE;
3361
3362 if (rsize_pages >= default_backing_dev_info.ra_pages)
3363 return default_backing_dev_info.ra_pages;
3364 else if (rsize_pages == 0)
3365 return rsize_pages;
3366
3367 reads = default_backing_dev_info.ra_pages / rsize_pages;
3368 return reads * rsize_pages;
3369}
3370
3371int 3621int
3372cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) 3622cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
3373{ 3623{
3374 int rc = 0; 3624 int rc;
3375 int xid; 3625 int xid;
3376 struct cifs_ses *pSesInfo; 3626 struct cifs_ses *pSesInfo;
3377 struct cifs_tcon *tcon; 3627 struct cifs_tcon *tcon;
@@ -3398,6 +3648,7 @@ try_mount_again:
3398 FreeXid(xid); 3648 FreeXid(xid);
3399 } 3649 }
3400#endif 3650#endif
3651 rc = 0;
3401 tcon = NULL; 3652 tcon = NULL;
3402 pSesInfo = NULL; 3653 pSesInfo = NULL;
3403 srvTcp = NULL; 3654 srvTcp = NULL;
@@ -3454,7 +3705,7 @@ try_mount_again:
3454 cifs_sb->rsize = cifs_negotiate_rsize(tcon, volume_info); 3705 cifs_sb->rsize = cifs_negotiate_rsize(tcon, volume_info);
3455 3706
3456 /* tune readahead according to rsize */ 3707 /* tune readahead according to rsize */
3457 cifs_sb->bdi.ra_pages = cifs_ra_pages(cifs_sb); 3708 cifs_sb->bdi.ra_pages = cifs_sb->rsize / PAGE_CACHE_SIZE;
3458 3709
3459remote_path_check: 3710remote_path_check:
3460#ifdef CONFIG_CIFS_DFS_UPCALL 3711#ifdef CONFIG_CIFS_DFS_UPCALL
@@ -3539,7 +3790,7 @@ remote_path_check:
3539 tlink_rb_insert(&cifs_sb->tlink_tree, tlink); 3790 tlink_rb_insert(&cifs_sb->tlink_tree, tlink);
3540 spin_unlock(&cifs_sb->tlink_tree_lock); 3791 spin_unlock(&cifs_sb->tlink_tree_lock);
3541 3792
3542 queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks, 3793 queue_delayed_work(cifsiod_wq, &cifs_sb->prune_tlinks,
3543 TLINK_IDLE_EXPIRE); 3794 TLINK_IDLE_EXPIRE);
3544 3795
3545mount_fail_check: 3796mount_fail_check:
@@ -3759,9 +4010,11 @@ int cifs_negotiate_protocol(unsigned int xid, struct cifs_ses *ses)
3759 if (server->maxBuf != 0) 4010 if (server->maxBuf != 0)
3760 return 0; 4011 return 0;
3761 4012
4013 cifs_set_credits(server, 1);
3762 rc = CIFSSMBNegotiate(xid, ses); 4014 rc = CIFSSMBNegotiate(xid, ses);
3763 if (rc == -EAGAIN) { 4015 if (rc == -EAGAIN) {
3764 /* retry only once on 1st time connection */ 4016 /* retry only once on 1st time connection */
4017 cifs_set_credits(server, 1);
3765 rc = CIFSSMBNegotiate(xid, ses); 4018 rc = CIFSSMBNegotiate(xid, ses);
3766 if (rc == -EAGAIN) 4019 if (rc == -EAGAIN)
3767 rc = -EHOSTDOWN; 4020 rc = -EHOSTDOWN;
@@ -4091,6 +4344,6 @@ cifs_prune_tlinks(struct work_struct *work)
4091 } 4344 }
4092 spin_unlock(&cifs_sb->tlink_tree_lock); 4345 spin_unlock(&cifs_sb->tlink_tree_lock);
4093 4346
4094 queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks, 4347 queue_delayed_work(cifsiod_wq, &cifs_sb->prune_tlinks,
4095 TLINK_IDLE_EXPIRE); 4348 TLINK_IDLE_EXPIRE);
4096} 4349}
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index bc7e24420ac..ec4e9a2a12f 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -171,7 +171,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
171 } 171 }
172 tcon = tlink_tcon(tlink); 172 tcon = tlink_tcon(tlink);
173 173
174 if (enable_oplocks) 174 if (tcon->ses->server->oplocks)
175 oplock = REQ_OPLOCK; 175 oplock = REQ_OPLOCK;
176 176
177 if (nd) 177 if (nd)
@@ -492,7 +492,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
492{ 492{
493 int xid; 493 int xid;
494 int rc = 0; /* to get around spurious gcc warning, set to zero here */ 494 int rc = 0; /* to get around spurious gcc warning, set to zero here */
495 __u32 oplock = enable_oplocks ? REQ_OPLOCK : 0; 495 __u32 oplock;
496 __u16 fileHandle = 0; 496 __u16 fileHandle = 0;
497 bool posix_open = false; 497 bool posix_open = false;
498 struct cifs_sb_info *cifs_sb; 498 struct cifs_sb_info *cifs_sb;
@@ -518,6 +518,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
518 } 518 }
519 pTcon = tlink_tcon(tlink); 519 pTcon = tlink_tcon(tlink);
520 520
521 oplock = pTcon->ses->server->oplocks ? REQ_OPLOCK : 0;
522
521 /* 523 /*
522 * Don't allow the separator character in a path component. 524 * Don't allow the separator character in a path component.
523 * The VFS will not allow "/", but "\" is allowed by posix. 525 * The VFS will not allow "/", but "\" is allowed by posix.
@@ -666,12 +668,19 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
666 return 0; 668 return 0;
667 else { 669 else {
668 /* 670 /*
669 * Forcibly invalidate automounting directory inodes 671 * If the inode wasn't known to be a dfs entry when
670 * (remote DFS directories) so to have them 672 * the dentry was instantiated, such as when created
671 * instantiated again for automount 673 * via ->readdir(), it needs to be set now since the
674 * attributes will have been updated by
675 * cifs_revalidate_dentry().
672 */ 676 */
673 if (IS_AUTOMOUNT(direntry->d_inode)) 677 if (IS_AUTOMOUNT(direntry->d_inode) &&
674 return 0; 678 !(direntry->d_flags & DCACHE_NEED_AUTOMOUNT)) {
679 spin_lock(&direntry->d_lock);
680 direntry->d_flags |= DCACHE_NEED_AUTOMOUNT;
681 spin_unlock(&direntry->d_lock);
682 }
683
675 return 1; 684 return 1;
676 } 685 }
677 } 686 }
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 5e64748a291..81725e9286e 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -380,7 +380,7 @@ int cifs_open(struct inode *inode, struct file *file)
380 cFYI(1, "inode = 0x%p file flags are 0x%x for %s", 380 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
381 inode, file->f_flags, full_path); 381 inode, file->f_flags, full_path);
382 382
383 if (enable_oplocks) 383 if (tcon->ses->server->oplocks)
384 oplock = REQ_OPLOCK; 384 oplock = REQ_OPLOCK;
385 else 385 else
386 oplock = 0; 386 oplock = 0;
@@ -505,7 +505,7 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
505 cFYI(1, "inode = 0x%p file flags 0x%x for %s", 505 cFYI(1, "inode = 0x%p file flags 0x%x for %s",
506 inode, pCifsFile->f_flags, full_path); 506 inode, pCifsFile->f_flags, full_path);
507 507
508 if (enable_oplocks) 508 if (tcon->ses->server->oplocks)
509 oplock = REQ_OPLOCK; 509 oplock = REQ_OPLOCK;
510 else 510 else
511 oplock = 0; 511 oplock = 0;
@@ -835,13 +835,21 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock)
835 if ((flock->fl_flags & FL_POSIX) == 0) 835 if ((flock->fl_flags & FL_POSIX) == 0)
836 return rc; 836 return rc;
837 837
838try_again:
838 mutex_lock(&cinode->lock_mutex); 839 mutex_lock(&cinode->lock_mutex);
839 if (!cinode->can_cache_brlcks) { 840 if (!cinode->can_cache_brlcks) {
840 mutex_unlock(&cinode->lock_mutex); 841 mutex_unlock(&cinode->lock_mutex);
841 return rc; 842 return rc;
842 } 843 }
843 rc = posix_lock_file_wait(file, flock); 844
845 rc = posix_lock_file(file, flock, NULL);
844 mutex_unlock(&cinode->lock_mutex); 846 mutex_unlock(&cinode->lock_mutex);
847 if (rc == FILE_LOCK_DEFERRED) {
848 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
849 if (!rc)
850 goto try_again;
851 locks_delete_block(flock);
852 }
845 return rc; 853 return rc;
846} 854}
847 855
@@ -960,9 +968,9 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
960 INIT_LIST_HEAD(&locks_to_send); 968 INIT_LIST_HEAD(&locks_to_send);
961 969
962 /* 970 /*
963 * Allocating count locks is enough because no locks can be added to 971 * Allocating count locks is enough because no FL_POSIX locks can be
964 * the list while we are holding cinode->lock_mutex that protects 972 * added to the list while we are holding cinode->lock_mutex that
965 * locking operations of this inode. 973 * protects locking operations of this inode.
966 */ 974 */
967 for (; i < count; i++) { 975 for (; i < count; i++) {
968 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); 976 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
@@ -973,18 +981,20 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
973 list_add_tail(&lck->llist, &locks_to_send); 981 list_add_tail(&lck->llist, &locks_to_send);
974 } 982 }
975 983
976 i = 0;
977 el = locks_to_send.next; 984 el = locks_to_send.next;
978 lock_flocks(); 985 lock_flocks();
979 cifs_for_each_lock(cfile->dentry->d_inode, before) { 986 cifs_for_each_lock(cfile->dentry->d_inode, before) {
987 flock = *before;
988 if ((flock->fl_flags & FL_POSIX) == 0)
989 continue;
980 if (el == &locks_to_send) { 990 if (el == &locks_to_send) {
981 /* something is really wrong */ 991 /*
992 * The list ended. We don't have enough allocated
993 * structures - something is really wrong.
994 */
982 cERROR(1, "Can't push all brlocks!"); 995 cERROR(1, "Can't push all brlocks!");
983 break; 996 break;
984 } 997 }
985 flock = *before;
986 if ((flock->fl_flags & FL_POSIX) == 0)
987 continue;
988 length = 1 + flock->fl_end - flock->fl_start; 998 length = 1 + flock->fl_end - flock->fl_start;
989 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK) 999 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
990 type = CIFS_RDLCK; 1000 type = CIFS_RDLCK;
@@ -996,7 +1006,6 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
996 lck->length = length; 1006 lck->length = length;
997 lck->type = type; 1007 lck->type = type;
998 lck->offset = flock->fl_start; 1008 lck->offset = flock->fl_start;
999 i++;
1000 el = el->next; 1009 el = el->next;
1001 } 1010 }
1002 unlock_flocks(); 1011 unlock_flocks();
@@ -1398,7 +1407,10 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1398 return rc; 1407 return rc;
1399} 1408}
1400 1409
1401/* update the file size (if needed) after a write */ 1410/*
1411 * update the file size (if needed) after a write. Should be called with
1412 * the inode->i_lock held
1413 */
1402void 1414void
1403cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, 1415cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1404 unsigned int bytes_written) 1416 unsigned int bytes_written)
@@ -1470,7 +1482,9 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
1470 return rc; 1482 return rc;
1471 } 1483 }
1472 } else { 1484 } else {
1485 spin_lock(&dentry->d_inode->i_lock);
1473 cifs_update_eof(cifsi, *poffset, bytes_written); 1486 cifs_update_eof(cifsi, *poffset, bytes_written);
1487 spin_unlock(&dentry->d_inode->i_lock);
1474 *poffset += bytes_written; 1488 *poffset += bytes_written;
1475 } 1489 }
1476 } 1490 }
@@ -1647,6 +1661,27 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1647 return rc; 1661 return rc;
1648} 1662}
1649 1663
1664/*
1665 * Marshal up the iov array, reserving the first one for the header. Also,
1666 * set wdata->bytes.
1667 */
1668static void
1669cifs_writepages_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
1670{
1671 int i;
1672 struct inode *inode = wdata->cfile->dentry->d_inode;
1673 loff_t size = i_size_read(inode);
1674
1675 /* marshal up the pages into iov array */
1676 wdata->bytes = 0;
1677 for (i = 0; i < wdata->nr_pages; i++) {
1678 iov[i + 1].iov_len = min(size - page_offset(wdata->pages[i]),
1679 (loff_t)PAGE_CACHE_SIZE);
1680 iov[i + 1].iov_base = kmap(wdata->pages[i]);
1681 wdata->bytes += iov[i + 1].iov_len;
1682 }
1683}
1684
1650static int cifs_writepages(struct address_space *mapping, 1685static int cifs_writepages(struct address_space *mapping,
1651 struct writeback_control *wbc) 1686 struct writeback_control *wbc)
1652{ 1687{
@@ -1683,7 +1718,8 @@ retry:
1683 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1, 1718 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1684 end - index) + 1; 1719 end - index) + 1;
1685 1720
1686 wdata = cifs_writedata_alloc((unsigned int)tofind); 1721 wdata = cifs_writedata_alloc((unsigned int)tofind,
1722 cifs_writev_complete);
1687 if (!wdata) { 1723 if (!wdata) {
1688 rc = -ENOMEM; 1724 rc = -ENOMEM;
1689 break; 1725 break;
@@ -1790,6 +1826,7 @@ retry:
1790 wdata->sync_mode = wbc->sync_mode; 1826 wdata->sync_mode = wbc->sync_mode;
1791 wdata->nr_pages = nr_pages; 1827 wdata->nr_pages = nr_pages;
1792 wdata->offset = page_offset(wdata->pages[0]); 1828 wdata->offset = page_offset(wdata->pages[0]);
1829 wdata->marshal_iov = cifs_writepages_marshal_iov;
1793 1830
1794 do { 1831 do {
1795 if (wdata->cfile != NULL) 1832 if (wdata->cfile != NULL)
@@ -1801,6 +1838,7 @@ retry:
1801 rc = -EBADF; 1838 rc = -EBADF;
1802 break; 1839 break;
1803 } 1840 }
1841 wdata->pid = wdata->cfile->pid;
1804 rc = cifs_async_writev(wdata); 1842 rc = cifs_async_writev(wdata);
1805 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN); 1843 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1806 1844
@@ -2042,7 +2080,7 @@ cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2042 unsigned long i; 2080 unsigned long i;
2043 2081
2044 for (i = 0; i < num_pages; i++) { 2082 for (i = 0; i < num_pages; i++) {
2045 pages[i] = alloc_page(__GFP_HIGHMEM); 2083 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2046 if (!pages[i]) { 2084 if (!pages[i]) {
2047 /* 2085 /*
2048 * save number of pages we have already allocated and 2086 * save number of pages we have already allocated and
@@ -2050,15 +2088,14 @@ cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2050 */ 2088 */
2051 num_pages = i; 2089 num_pages = i;
2052 rc = -ENOMEM; 2090 rc = -ENOMEM;
2053 goto error; 2091 break;
2054 } 2092 }
2055 } 2093 }
2056 2094
2057 return rc; 2095 if (rc) {
2058 2096 for (i = 0; i < num_pages; i++)
2059error: 2097 put_page(pages[i]);
2060 for (i = 0; i < num_pages; i++) 2098 }
2061 put_page(pages[i]);
2062 return rc; 2099 return rc;
2063} 2100}
2064 2101
@@ -2069,9 +2106,7 @@ size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2069 size_t clen; 2106 size_t clen;
2070 2107
2071 clen = min_t(const size_t, len, wsize); 2108 clen = min_t(const size_t, len, wsize);
2072 num_pages = clen / PAGE_CACHE_SIZE; 2109 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2073 if (clen % PAGE_CACHE_SIZE)
2074 num_pages++;
2075 2110
2076 if (cur_len) 2111 if (cur_len)
2077 *cur_len = clen; 2112 *cur_len = clen;
@@ -2079,24 +2114,79 @@ size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2079 return num_pages; 2114 return num_pages;
2080} 2115}
2081 2116
2117static void
2118cifs_uncached_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
2119{
2120 int i;
2121 size_t bytes = wdata->bytes;
2122
2123 /* marshal up the pages into iov array */
2124 for (i = 0; i < wdata->nr_pages; i++) {
2125 iov[i + 1].iov_len = min_t(size_t, bytes, PAGE_SIZE);
2126 iov[i + 1].iov_base = kmap(wdata->pages[i]);
2127 bytes -= iov[i + 1].iov_len;
2128 }
2129}
2130
2131static void
2132cifs_uncached_writev_complete(struct work_struct *work)
2133{
2134 int i;
2135 struct cifs_writedata *wdata = container_of(work,
2136 struct cifs_writedata, work);
2137 struct inode *inode = wdata->cfile->dentry->d_inode;
2138 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2139
2140 spin_lock(&inode->i_lock);
2141 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2142 if (cifsi->server_eof > inode->i_size)
2143 i_size_write(inode, cifsi->server_eof);
2144 spin_unlock(&inode->i_lock);
2145
2146 complete(&wdata->done);
2147
2148 if (wdata->result != -EAGAIN) {
2149 for (i = 0; i < wdata->nr_pages; i++)
2150 put_page(wdata->pages[i]);
2151 }
2152
2153 kref_put(&wdata->refcount, cifs_writedata_release);
2154}
2155
2156/* attempt to send write to server, retry on any -EAGAIN errors */
2157static int
2158cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2159{
2160 int rc;
2161
2162 do {
2163 if (wdata->cfile->invalidHandle) {
2164 rc = cifs_reopen_file(wdata->cfile, false);
2165 if (rc != 0)
2166 continue;
2167 }
2168 rc = cifs_async_writev(wdata);
2169 } while (rc == -EAGAIN);
2170
2171 return rc;
2172}
2173
2082static ssize_t 2174static ssize_t
2083cifs_iovec_write(struct file *file, const struct iovec *iov, 2175cifs_iovec_write(struct file *file, const struct iovec *iov,
2084 unsigned long nr_segs, loff_t *poffset) 2176 unsigned long nr_segs, loff_t *poffset)
2085{ 2177{
2086 unsigned int written; 2178 unsigned long nr_pages, i;
2087 unsigned long num_pages, npages, i;
2088 size_t copied, len, cur_len; 2179 size_t copied, len, cur_len;
2089 ssize_t total_written = 0; 2180 ssize_t total_written = 0;
2090 struct kvec *to_send; 2181 loff_t offset;
2091 struct page **pages;
2092 struct iov_iter it; 2182 struct iov_iter it;
2093 struct inode *inode;
2094 struct cifsFileInfo *open_file; 2183 struct cifsFileInfo *open_file;
2095 struct cifs_tcon *pTcon; 2184 struct cifs_tcon *tcon;
2096 struct cifs_sb_info *cifs_sb; 2185 struct cifs_sb_info *cifs_sb;
2097 struct cifs_io_parms io_parms; 2186 struct cifs_writedata *wdata, *tmp;
2098 int xid, rc; 2187 struct list_head wdata_list;
2099 __u32 pid; 2188 int rc;
2189 pid_t pid;
2100 2190
2101 len = iov_length(iov, nr_segs); 2191 len = iov_length(iov, nr_segs);
2102 if (!len) 2192 if (!len)
@@ -2106,103 +2196,104 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
2106 if (rc) 2196 if (rc)
2107 return rc; 2197 return rc;
2108 2198
2199 INIT_LIST_HEAD(&wdata_list);
2109 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 2200 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2110 num_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2111
2112 pages = kmalloc(sizeof(struct pages *)*num_pages, GFP_KERNEL);
2113 if (!pages)
2114 return -ENOMEM;
2115
2116 to_send = kmalloc(sizeof(struct kvec)*(num_pages + 1), GFP_KERNEL);
2117 if (!to_send) {
2118 kfree(pages);
2119 return -ENOMEM;
2120 }
2121
2122 rc = cifs_write_allocate_pages(pages, num_pages);
2123 if (rc) {
2124 kfree(pages);
2125 kfree(to_send);
2126 return rc;
2127 }
2128
2129 xid = GetXid();
2130 open_file = file->private_data; 2201 open_file = file->private_data;
2202 tcon = tlink_tcon(open_file->tlink);
2203 offset = *poffset;
2131 2204
2132 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 2205 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2133 pid = open_file->pid; 2206 pid = open_file->pid;
2134 else 2207 else
2135 pid = current->tgid; 2208 pid = current->tgid;
2136 2209
2137 pTcon = tlink_tcon(open_file->tlink);
2138 inode = file->f_path.dentry->d_inode;
2139
2140 iov_iter_init(&it, iov, nr_segs, len, 0); 2210 iov_iter_init(&it, iov, nr_segs, len, 0);
2141 npages = num_pages;
2142
2143 do { 2211 do {
2144 size_t save_len = cur_len; 2212 size_t save_len;
2145 for (i = 0; i < npages; i++) { 2213
2146 copied = min_t(const size_t, cur_len, PAGE_CACHE_SIZE); 2214 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2147 copied = iov_iter_copy_from_user(pages[i], &it, 0, 2215 wdata = cifs_writedata_alloc(nr_pages,
2148 copied); 2216 cifs_uncached_writev_complete);
2217 if (!wdata) {
2218 rc = -ENOMEM;
2219 break;
2220 }
2221
2222 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2223 if (rc) {
2224 kfree(wdata);
2225 break;
2226 }
2227
2228 save_len = cur_len;
2229 for (i = 0; i < nr_pages; i++) {
2230 copied = min_t(const size_t, cur_len, PAGE_SIZE);
2231 copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2232 0, copied);
2149 cur_len -= copied; 2233 cur_len -= copied;
2150 iov_iter_advance(&it, copied); 2234 iov_iter_advance(&it, copied);
2151 to_send[i+1].iov_base = kmap(pages[i]);
2152 to_send[i+1].iov_len = copied;
2153 } 2235 }
2154
2155 cur_len = save_len - cur_len; 2236 cur_len = save_len - cur_len;
2156 2237
2157 do { 2238 wdata->sync_mode = WB_SYNC_ALL;
2158 if (open_file->invalidHandle) { 2239 wdata->nr_pages = nr_pages;
2159 rc = cifs_reopen_file(open_file, false); 2240 wdata->offset = (__u64)offset;
2160 if (rc != 0) 2241 wdata->cfile = cifsFileInfo_get(open_file);
2161 break; 2242 wdata->pid = pid;
2162 } 2243 wdata->bytes = cur_len;
2163 io_parms.netfid = open_file->netfid; 2244 wdata->marshal_iov = cifs_uncached_marshal_iov;
2164 io_parms.pid = pid; 2245 rc = cifs_uncached_retry_writev(wdata);
2165 io_parms.tcon = pTcon; 2246 if (rc) {
2166 io_parms.offset = *poffset; 2247 kref_put(&wdata->refcount, cifs_writedata_release);
2167 io_parms.length = cur_len;
2168 rc = CIFSSMBWrite2(xid, &io_parms, &written, to_send,
2169 npages, 0);
2170 } while (rc == -EAGAIN);
2171
2172 for (i = 0; i < npages; i++)
2173 kunmap(pages[i]);
2174
2175 if (written) {
2176 len -= written;
2177 total_written += written;
2178 cifs_update_eof(CIFS_I(inode), *poffset, written);
2179 *poffset += written;
2180 } else if (rc < 0) {
2181 if (!total_written)
2182 total_written = rc;
2183 break; 2248 break;
2184 } 2249 }
2185 2250
2186 /* get length and number of kvecs of the next write */ 2251 list_add_tail(&wdata->list, &wdata_list);
2187 npages = get_numpages(cifs_sb->wsize, len, &cur_len); 2252 offset += cur_len;
2253 len -= cur_len;
2188 } while (len > 0); 2254 } while (len > 0);
2189 2255
2190 if (total_written > 0) { 2256 /*
2191 spin_lock(&inode->i_lock); 2257 * If at least one write was successfully sent, then discard any rc
2192 if (*poffset > inode->i_size) 2258 * value from the later writes. If the other write succeeds, then
2193 i_size_write(inode, *poffset); 2259 * we'll end up returning whatever was written. If it fails, then
2194 spin_unlock(&inode->i_lock); 2260 * we'll get a new rc value from that.
2261 */
2262 if (!list_empty(&wdata_list))
2263 rc = 0;
2264
2265 /*
2266 * Wait for and collect replies for any successful sends in order of
2267 * increasing offset. Once an error is hit or we get a fatal signal
2268 * while waiting, then return without waiting for any more replies.
2269 */
2270restart_loop:
2271 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2272 if (!rc) {
2273 /* FIXME: freezable too? */
2274 rc = wait_for_completion_killable(&wdata->done);
2275 if (rc)
2276 rc = -EINTR;
2277 else if (wdata->result)
2278 rc = wdata->result;
2279 else
2280 total_written += wdata->bytes;
2281
2282 /* resend call if it's a retryable error */
2283 if (rc == -EAGAIN) {
2284 rc = cifs_uncached_retry_writev(wdata);
2285 goto restart_loop;
2286 }
2287 }
2288 list_del_init(&wdata->list);
2289 kref_put(&wdata->refcount, cifs_writedata_release);
2195 } 2290 }
2196 2291
2197 cifs_stats_bytes_written(pTcon, total_written); 2292 if (total_written > 0)
2198 mark_inode_dirty_sync(inode); 2293 *poffset += total_written;
2199 2294
2200 for (i = 0; i < num_pages; i++) 2295 cifs_stats_bytes_written(tcon, total_written);
2201 put_page(pages[i]); 2296 return total_written ? total_written : (ssize_t)rc;
2202 kfree(to_send);
2203 kfree(pages);
2204 FreeXid(xid);
2205 return total_written;
2206} 2297}
2207 2298
2208ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, 2299ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 703ef5c6fdb..c29d1aa2c54 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -213,55 +213,62 @@ cifs_small_buf_release(void *buf_to_free)
213} 213}
214 214
215/* 215/*
216 Find a free multiplex id (SMB mid). Otherwise there could be 216 * Find a free multiplex id (SMB mid). Otherwise there could be
217 mid collisions which might cause problems, demultiplexing the 217 * mid collisions which might cause problems, demultiplexing the
218 wrong response to this request. Multiplex ids could collide if 218 * wrong response to this request. Multiplex ids could collide if
219 one of a series requests takes much longer than the others, or 219 * one of a series requests takes much longer than the others, or
220 if a very large number of long lived requests (byte range 220 * if a very large number of long lived requests (byte range
221 locks or FindNotify requests) are pending. No more than 221 * locks or FindNotify requests) are pending. No more than
222 64K-1 requests can be outstanding at one time. If no 222 * 64K-1 requests can be outstanding at one time. If no
223 mids are available, return zero. A future optimization 223 * mids are available, return zero. A future optimization
224 could make the combination of mids and uid the key we use 224 * could make the combination of mids and uid the key we use
225 to demultiplex on (rather than mid alone). 225 * to demultiplex on (rather than mid alone).
226 In addition to the above check, the cifs demultiplex 226 * In addition to the above check, the cifs demultiplex
227 code already used the command code as a secondary 227 * code already used the command code as a secondary
228 check of the frame and if signing is negotiated the 228 * check of the frame and if signing is negotiated the
229 response would be discarded if the mid were the same 229 * response would be discarded if the mid were the same
230 but the signature was wrong. Since the mid is not put in the 230 * but the signature was wrong. Since the mid is not put in the
231 pending queue until later (when it is about to be dispatched) 231 * pending queue until later (when it is about to be dispatched)
232 we do have to limit the number of outstanding requests 232 * we do have to limit the number of outstanding requests
233 to somewhat less than 64K-1 although it is hard to imagine 233 * to somewhat less than 64K-1 although it is hard to imagine
234 so many threads being in the vfs at one time. 234 * so many threads being in the vfs at one time.
235*/ 235 */
236__u16 GetNextMid(struct TCP_Server_Info *server) 236__u64 GetNextMid(struct TCP_Server_Info *server)
237{ 237{
238 __u16 mid = 0; 238 __u64 mid = 0;
239 __u16 last_mid; 239 __u16 last_mid, cur_mid;
240 bool collision; 240 bool collision;
241 241
242 spin_lock(&GlobalMid_Lock); 242 spin_lock(&GlobalMid_Lock);
243 last_mid = server->CurrentMid; /* we do not want to loop forever */ 243
244 server->CurrentMid++; 244 /* mid is 16 bit only for CIFS/SMB */
245 /* This nested loop looks more expensive than it is. 245 cur_mid = (__u16)((server->CurrentMid) & 0xffff);
246 In practice the list of pending requests is short, 246 /* we do not want to loop forever */
247 fewer than 50, and the mids are likely to be unique 247 last_mid = cur_mid;
248 on the first pass through the loop unless some request 248 cur_mid++;
249 takes longer than the 64 thousand requests before it 249
250 (and it would also have to have been a request that 250 /*
251 did not time out) */ 251 * This nested loop looks more expensive than it is.
252 while (server->CurrentMid != last_mid) { 252 * In practice the list of pending requests is short,
253 * fewer than 50, and the mids are likely to be unique
254 * on the first pass through the loop unless some request
255 * takes longer than the 64 thousand requests before it
256 * (and it would also have to have been a request that
257 * did not time out).
258 */
259 while (cur_mid != last_mid) {
253 struct mid_q_entry *mid_entry; 260 struct mid_q_entry *mid_entry;
254 unsigned int num_mids; 261 unsigned int num_mids;
255 262
256 collision = false; 263 collision = false;
257 if (server->CurrentMid == 0) 264 if (cur_mid == 0)
258 server->CurrentMid++; 265 cur_mid++;
259 266
260 num_mids = 0; 267 num_mids = 0;
261 list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) { 268 list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) {
262 ++num_mids; 269 ++num_mids;
263 if (mid_entry->mid == server->CurrentMid && 270 if (mid_entry->mid == cur_mid &&
264 mid_entry->midState == MID_REQUEST_SUBMITTED) { 271 mid_entry->mid_state == MID_REQUEST_SUBMITTED) {
265 /* This mid is in use, try a different one */ 272 /* This mid is in use, try a different one */
266 collision = true; 273 collision = true;
267 break; 274 break;
@@ -282,10 +289,11 @@ __u16 GetNextMid(struct TCP_Server_Info *server)
282 server->tcpStatus = CifsNeedReconnect; 289 server->tcpStatus = CifsNeedReconnect;
283 290
284 if (!collision) { 291 if (!collision) {
285 mid = server->CurrentMid; 292 mid = (__u64)cur_mid;
293 server->CurrentMid = mid;
286 break; 294 break;
287 } 295 }
288 server->CurrentMid++; 296 cur_mid++;
289 } 297 }
290 spin_unlock(&GlobalMid_Lock); 298 spin_unlock(&GlobalMid_Lock);
291 return mid; 299 return mid;
@@ -420,8 +428,10 @@ check_smb_hdr(struct smb_hdr *smb, __u16 mid)
420} 428}
421 429
422int 430int
423checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int total_read) 431checkSMB(char *buf, unsigned int total_read)
424{ 432{
433 struct smb_hdr *smb = (struct smb_hdr *)buf;
434 __u16 mid = smb->Mid;
425 __u32 rfclen = be32_to_cpu(smb->smb_buf_length); 435 __u32 rfclen = be32_to_cpu(smb->smb_buf_length);
426 __u32 clc_len; /* calculated length */ 436 __u32 clc_len; /* calculated length */
427 cFYI(0, "checkSMB Length: 0x%x, smb_buf_length: 0x%x", 437 cFYI(0, "checkSMB Length: 0x%x, smb_buf_length: 0x%x",
@@ -502,8 +512,9 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int total_read)
502} 512}
503 513
504bool 514bool
505is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) 515is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
506{ 516{
517 struct smb_hdr *buf = (struct smb_hdr *)buffer;
507 struct smb_com_lock_req *pSMB = (struct smb_com_lock_req *)buf; 518 struct smb_com_lock_req *pSMB = (struct smb_com_lock_req *)buf;
508 struct list_head *tmp, *tmp1, *tmp2; 519 struct list_head *tmp, *tmp1, *tmp2;
509 struct cifs_ses *ses; 520 struct cifs_ses *ses;
@@ -584,7 +595,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
584 595
585 cifs_set_oplock_level(pCifsInode, 596 cifs_set_oplock_level(pCifsInode,
586 pSMB->OplockLevel ? OPLOCK_READ : 0); 597 pSMB->OplockLevel ? OPLOCK_READ : 0);
587 queue_work(system_nrt_wq, 598 queue_work(cifsiod_wq,
588 &netfile->oplock_break); 599 &netfile->oplock_break);
589 netfile->oplock_break_cancelled = false; 600 netfile->oplock_break_cancelled = false;
590 601
@@ -604,16 +615,15 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
604} 615}
605 616
606void 617void
607dump_smb(struct smb_hdr *smb_buf, int smb_buf_length) 618dump_smb(void *buf, int smb_buf_length)
608{ 619{
609 int i, j; 620 int i, j;
610 char debug_line[17]; 621 char debug_line[17];
611 unsigned char *buffer; 622 unsigned char *buffer = buf;
612 623
613 if (traceSMB == 0) 624 if (traceSMB == 0)
614 return; 625 return;
615 626
616 buffer = (unsigned char *) smb_buf;
617 for (i = 0, j = 0; i < smb_buf_length; i++, j++) { 627 for (i = 0, j = 0; i < smb_buf_length; i++, j++) {
618 if (i % 8 == 0) { 628 if (i % 8 == 0) {
619 /* have reached the beginning of line */ 629 /* have reached the beginning of line */
@@ -690,3 +700,22 @@ backup_cred(struct cifs_sb_info *cifs_sb)
690 700
691 return false; 701 return false;
692} 702}
703
704void
705cifs_add_credits(struct TCP_Server_Info *server, const unsigned int add)
706{
707 spin_lock(&server->req_lock);
708 server->credits += add;
709 server->in_flight--;
710 spin_unlock(&server->req_lock);
711 wake_up(&server->request_q);
712}
713
714void
715cifs_set_credits(struct TCP_Server_Info *server, const int val)
716{
717 spin_lock(&server->req_lock);
718 server->credits = val;
719 server->oplocks = val > 1 ? enable_oplocks : false;
720 spin_unlock(&server->req_lock);
721}
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 73e47e84b61..581c225f7f5 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -197,8 +197,7 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len)
197 memcpy(scope_id, pct + 1, slen); 197 memcpy(scope_id, pct + 1, slen);
198 scope_id[slen] = '\0'; 198 scope_id[slen] = '\0';
199 199
200 rc = strict_strtoul(scope_id, 0, 200 rc = kstrtouint(scope_id, 0, &s6->sin6_scope_id);
201 (unsigned long *)&s6->sin6_scope_id);
202 rc = (rc == 0) ? 1 : 0; 201 rc = (rc == 0) ? 1 : 0;
203 } 202 }
204 203
@@ -836,8 +835,9 @@ ntstatus_to_dos(__u32 ntstatus, __u8 *eclass, __u16 *ecode)
836} 835}
837 836
838int 837int
839map_smb_to_linux_error(struct smb_hdr *smb, bool logErr) 838map_smb_to_linux_error(char *buf, bool logErr)
840{ 839{
840 struct smb_hdr *smb = (struct smb_hdr *)buf;
841 unsigned int i; 841 unsigned int i;
842 int rc = -EIO; /* if transport error smb error may not be set */ 842 int rc = -EIO; /* if transport error smb error may not be set */
843 __u8 smberrclass; 843 __u8 smberrclass;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 0cc9584f588..0961336513d 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -60,8 +60,8 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
60 memset(temp, 0, sizeof(struct mid_q_entry)); 60 memset(temp, 0, sizeof(struct mid_q_entry));
61 temp->mid = smb_buffer->Mid; /* always LE */ 61 temp->mid = smb_buffer->Mid; /* always LE */
62 temp->pid = current->pid; 62 temp->pid = current->pid;
63 temp->command = smb_buffer->Command; 63 temp->command = cpu_to_le16(smb_buffer->Command);
64 cFYI(1, "For smb_command %d", temp->command); 64 cFYI(1, "For smb_command %d", smb_buffer->Command);
65 /* do_gettimeofday(&temp->when_sent);*/ /* easier to use jiffies */ 65 /* do_gettimeofday(&temp->when_sent);*/ /* easier to use jiffies */
66 /* when mid allocated can be before when sent */ 66 /* when mid allocated can be before when sent */
67 temp->when_alloc = jiffies; 67 temp->when_alloc = jiffies;
@@ -75,7 +75,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
75 } 75 }
76 76
77 atomic_inc(&midCount); 77 atomic_inc(&midCount);
78 temp->midState = MID_REQUEST_ALLOCATED; 78 temp->mid_state = MID_REQUEST_ALLOCATED;
79 return temp; 79 return temp;
80} 80}
81 81
@@ -85,9 +85,9 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
85#ifdef CONFIG_CIFS_STATS2 85#ifdef CONFIG_CIFS_STATS2
86 unsigned long now; 86 unsigned long now;
87#endif 87#endif
88 midEntry->midState = MID_FREE; 88 midEntry->mid_state = MID_FREE;
89 atomic_dec(&midCount); 89 atomic_dec(&midCount);
90 if (midEntry->largeBuf) 90 if (midEntry->large_buf)
91 cifs_buf_release(midEntry->resp_buf); 91 cifs_buf_release(midEntry->resp_buf);
92 else 92 else
93 cifs_small_buf_release(midEntry->resp_buf); 93 cifs_small_buf_release(midEntry->resp_buf);
@@ -97,8 +97,8 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
97 something is wrong, unless it is quite a slow link or server */ 97 something is wrong, unless it is quite a slow link or server */
98 if ((now - midEntry->when_alloc) > HZ) { 98 if ((now - midEntry->when_alloc) > HZ) {
99 if ((cifsFYI & CIFS_TIMER) && 99 if ((cifsFYI & CIFS_TIMER) &&
100 (midEntry->command != SMB_COM_LOCKING_ANDX)) { 100 (midEntry->command != cpu_to_le16(SMB_COM_LOCKING_ANDX))) {
101 printk(KERN_DEBUG " CIFS slow rsp: cmd %d mid %d", 101 printk(KERN_DEBUG " CIFS slow rsp: cmd %d mid %llu",
102 midEntry->command, midEntry->mid); 102 midEntry->command, midEntry->mid);
103 printk(" A: 0x%lx S: 0x%lx R: 0x%lx\n", 103 printk(" A: 0x%lx S: 0x%lx R: 0x%lx\n",
104 now - midEntry->when_alloc, 104 now - midEntry->when_alloc,
@@ -126,11 +126,11 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
126 int rc = 0; 126 int rc = 0;
127 int i = 0; 127 int i = 0;
128 struct msghdr smb_msg; 128 struct msghdr smb_msg;
129 struct smb_hdr *smb_buffer = iov[0].iov_base; 129 __be32 *buf_len = (__be32 *)(iov[0].iov_base);
130 unsigned int len = iov[0].iov_len; 130 unsigned int len = iov[0].iov_len;
131 unsigned int total_len; 131 unsigned int total_len;
132 int first_vec = 0; 132 int first_vec = 0;
133 unsigned int smb_buf_length = be32_to_cpu(smb_buffer->smb_buf_length); 133 unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base);
134 struct socket *ssocket = server->ssocket; 134 struct socket *ssocket = server->ssocket;
135 135
136 if (ssocket == NULL) 136 if (ssocket == NULL)
@@ -150,7 +150,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
150 total_len += iov[i].iov_len; 150 total_len += iov[i].iov_len;
151 151
152 cFYI(1, "Sending smb: total_len %d", total_len); 152 cFYI(1, "Sending smb: total_len %d", total_len);
153 dump_smb(smb_buffer, len); 153 dump_smb(iov[0].iov_base, len);
154 154
155 i = 0; 155 i = 0;
156 while (total_len) { 156 while (total_len) {
@@ -158,24 +158,24 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
158 n_vec - first_vec, total_len); 158 n_vec - first_vec, total_len);
159 if ((rc == -ENOSPC) || (rc == -EAGAIN)) { 159 if ((rc == -ENOSPC) || (rc == -EAGAIN)) {
160 i++; 160 i++;
161 /* if blocking send we try 3 times, since each can block 161 /*
162 for 5 seconds. For nonblocking we have to try more 162 * If blocking send we try 3 times, since each can block
163 but wait increasing amounts of time allowing time for 163 * for 5 seconds. For nonblocking we have to try more
164 socket to clear. The overall time we wait in either 164 * but wait increasing amounts of time allowing time for
165 case to send on the socket is about 15 seconds. 165 * socket to clear. The overall time we wait in either
166 Similarly we wait for 15 seconds for 166 * case to send on the socket is about 15 seconds.
167 a response from the server in SendReceive[2] 167 * Similarly we wait for 15 seconds for a response from
168 for the server to send a response back for 168 * the server in SendReceive[2] for the server to send
169 most types of requests (except SMB Write 169 * a response back for most types of requests (except
170 past end of file which can be slow, and 170 * SMB Write past end of file which can be slow, and
171 blocking lock operations). NFS waits slightly longer 171 * blocking lock operations). NFS waits slightly longer
172 than CIFS, but this can make it take longer for 172 * than CIFS, but this can make it take longer for
173 nonresponsive servers to be detected and 15 seconds 173 * nonresponsive servers to be detected and 15 seconds
174 is more than enough time for modern networks to 174 * is more than enough time for modern networks to
175 send a packet. In most cases if we fail to send 175 * send a packet. In most cases if we fail to send
176 after the retries we will kill the socket and 176 * after the retries we will kill the socket and
177 reconnect which may clear the network problem. 177 * reconnect which may clear the network problem.
178 */ 178 */
179 if ((i >= 14) || (!server->noblocksnd && (i > 2))) { 179 if ((i >= 14) || (!server->noblocksnd && (i > 2))) {
180 cERROR(1, "sends on sock %p stuck for 15 seconds", 180 cERROR(1, "sends on sock %p stuck for 15 seconds",
181 ssocket); 181 ssocket);
@@ -235,9 +235,8 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
235 else 235 else
236 rc = 0; 236 rc = 0;
237 237
238 /* Don't want to modify the buffer as a 238 /* Don't want to modify the buffer as a side effect of this call. */
239 side effect of this call. */ 239 *buf_len = cpu_to_be32(smb_buf_length);
240 smb_buffer->smb_buf_length = cpu_to_be32(smb_buf_length);
241 240
242 return rc; 241 return rc;
243} 242}
@@ -254,44 +253,60 @@ smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer,
254 return smb_sendv(server, &iov, 1); 253 return smb_sendv(server, &iov, 1);
255} 254}
256 255
257static int wait_for_free_request(struct TCP_Server_Info *server, 256static int
258 const int long_op) 257wait_for_free_credits(struct TCP_Server_Info *server, const int optype,
258 int *credits)
259{ 259{
260 if (long_op == CIFS_ASYNC_OP) { 260 int rc;
261
262 spin_lock(&server->req_lock);
263 if (optype == CIFS_ASYNC_OP) {
261 /* oplock breaks must not be held up */ 264 /* oplock breaks must not be held up */
262 atomic_inc(&server->inFlight); 265 server->in_flight++;
266 *credits -= 1;
267 spin_unlock(&server->req_lock);
263 return 0; 268 return 0;
264 } 269 }
265 270
266 spin_lock(&GlobalMid_Lock);
267 while (1) { 271 while (1) {
268 if (atomic_read(&server->inFlight) >= cifs_max_pending) { 272 if (*credits <= 0) {
269 spin_unlock(&GlobalMid_Lock); 273 spin_unlock(&server->req_lock);
270 cifs_num_waiters_inc(server); 274 cifs_num_waiters_inc(server);
271 wait_event(server->request_q, 275 rc = wait_event_killable(server->request_q,
272 atomic_read(&server->inFlight) 276 has_credits(server, credits));
273 < cifs_max_pending);
274 cifs_num_waiters_dec(server); 277 cifs_num_waiters_dec(server);
275 spin_lock(&GlobalMid_Lock); 278 if (rc)
279 return rc;
280 spin_lock(&server->req_lock);
276 } else { 281 } else {
277 if (server->tcpStatus == CifsExiting) { 282 if (server->tcpStatus == CifsExiting) {
278 spin_unlock(&GlobalMid_Lock); 283 spin_unlock(&server->req_lock);
279 return -ENOENT; 284 return -ENOENT;
280 } 285 }
281 286
282 /* can not count locking commands against total 287 /*
283 as they are allowed to block on server */ 288 * Can not count locking commands against total
289 * as they are allowed to block on server.
290 */
284 291
285 /* update # of requests on the wire to server */ 292 /* update # of requests on the wire to server */
286 if (long_op != CIFS_BLOCKING_OP) 293 if (optype != CIFS_BLOCKING_OP) {
287 atomic_inc(&server->inFlight); 294 *credits -= 1;
288 spin_unlock(&GlobalMid_Lock); 295 server->in_flight++;
296 }
297 spin_unlock(&server->req_lock);
289 break; 298 break;
290 } 299 }
291 } 300 }
292 return 0; 301 return 0;
293} 302}
294 303
304static int
305wait_for_free_request(struct TCP_Server_Info *server, const int optype)
306{
307 return wait_for_free_credits(server, optype, get_credits_field(server));
308}
309
295static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, 310static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
296 struct mid_q_entry **ppmidQ) 311 struct mid_q_entry **ppmidQ)
297{ 312{
@@ -326,13 +341,40 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ)
326 int error; 341 int error;
327 342
328 error = wait_event_freezekillable(server->response_q, 343 error = wait_event_freezekillable(server->response_q,
329 midQ->midState != MID_REQUEST_SUBMITTED); 344 midQ->mid_state != MID_REQUEST_SUBMITTED);
330 if (error < 0) 345 if (error < 0)
331 return -ERESTARTSYS; 346 return -ERESTARTSYS;
332 347
333 return 0; 348 return 0;
334} 349}
335 350
351static int
352cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov,
353 unsigned int nvec, struct mid_q_entry **ret_mid)
354{
355 int rc;
356 struct smb_hdr *hdr = (struct smb_hdr *)iov[0].iov_base;
357 struct mid_q_entry *mid;
358
359 /* enable signing if server requires it */
360 if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
361 hdr->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
362
363 mid = AllocMidQEntry(hdr, server);
364 if (mid == NULL)
365 return -ENOMEM;
366
367 /* put it on the pending_mid_q */
368 spin_lock(&GlobalMid_Lock);
369 list_add_tail(&mid->qhead, &server->pending_mid_q);
370 spin_unlock(&GlobalMid_Lock);
371
372 rc = cifs_sign_smb2(iov, nvec, server, &mid->sequence_number);
373 if (rc)
374 delete_mid(mid);
375 *ret_mid = mid;
376 return rc;
377}
336 378
337/* 379/*
338 * Send a SMB request and set the callback function in the mid to handle 380 * Send a SMB request and set the callback function in the mid to handle
@@ -345,40 +387,24 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
345{ 387{
346 int rc; 388 int rc;
347 struct mid_q_entry *mid; 389 struct mid_q_entry *mid;
348 struct smb_hdr *hdr = (struct smb_hdr *)iov[0].iov_base;
349 390
350 rc = wait_for_free_request(server, ignore_pend ? CIFS_ASYNC_OP : 0); 391 rc = wait_for_free_request(server, ignore_pend ? CIFS_ASYNC_OP : 0);
351 if (rc) 392 if (rc)
352 return rc; 393 return rc;
353 394
354 /* enable signing if server requires it */
355 if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
356 hdr->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
357
358 mutex_lock(&server->srv_mutex); 395 mutex_lock(&server->srv_mutex);
359 mid = AllocMidQEntry(hdr, server); 396 rc = cifs_setup_async_request(server, iov, nvec, &mid);
360 if (mid == NULL) {
361 mutex_unlock(&server->srv_mutex);
362 atomic_dec(&server->inFlight);
363 wake_up(&server->request_q);
364 return -ENOMEM;
365 }
366
367 /* put it on the pending_mid_q */
368 spin_lock(&GlobalMid_Lock);
369 list_add_tail(&mid->qhead, &server->pending_mid_q);
370 spin_unlock(&GlobalMid_Lock);
371
372 rc = cifs_sign_smb2(iov, nvec, server, &mid->sequence_number);
373 if (rc) { 397 if (rc) {
374 mutex_unlock(&server->srv_mutex); 398 mutex_unlock(&server->srv_mutex);
375 goto out_err; 399 cifs_add_credits(server, 1);
400 wake_up(&server->request_q);
401 return rc;
376 } 402 }
377 403
378 mid->receive = receive; 404 mid->receive = receive;
379 mid->callback = callback; 405 mid->callback = callback;
380 mid->callback_data = cbdata; 406 mid->callback_data = cbdata;
381 mid->midState = MID_REQUEST_SUBMITTED; 407 mid->mid_state = MID_REQUEST_SUBMITTED;
382 408
383 cifs_in_send_inc(server); 409 cifs_in_send_inc(server);
384 rc = smb_sendv(server, iov, nvec); 410 rc = smb_sendv(server, iov, nvec);
@@ -392,7 +418,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
392 return rc; 418 return rc;
393out_err: 419out_err:
394 delete_mid(mid); 420 delete_mid(mid);
395 atomic_dec(&server->inFlight); 421 cifs_add_credits(server, 1);
396 wake_up(&server->request_q); 422 wake_up(&server->request_q);
397 return rc; 423 return rc;
398} 424}
@@ -408,14 +434,14 @@ out_err:
408 */ 434 */
409int 435int
410SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses, 436SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses,
411 struct smb_hdr *in_buf, int flags) 437 char *in_buf, int flags)
412{ 438{
413 int rc; 439 int rc;
414 struct kvec iov[1]; 440 struct kvec iov[1];
415 int resp_buf_type; 441 int resp_buf_type;
416 442
417 iov[0].iov_base = (char *)in_buf; 443 iov[0].iov_base = in_buf;
418 iov[0].iov_len = be32_to_cpu(in_buf->smb_buf_length) + 4; 444 iov[0].iov_len = get_rfc1002_length(in_buf) + 4;
419 flags |= CIFS_NO_RESP; 445 flags |= CIFS_NO_RESP;
420 rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags); 446 rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags);
421 cFYI(DBG2, "SendRcvNoRsp flags %d rc %d", flags, rc); 447 cFYI(DBG2, "SendRcvNoRsp flags %d rc %d", flags, rc);
@@ -428,11 +454,11 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
428{ 454{
429 int rc = 0; 455 int rc = 0;
430 456
431 cFYI(1, "%s: cmd=%d mid=%d state=%d", __func__, mid->command, 457 cFYI(1, "%s: cmd=%d mid=%llu state=%d", __func__,
432 mid->mid, mid->midState); 458 le16_to_cpu(mid->command), mid->mid, mid->mid_state);
433 459
434 spin_lock(&GlobalMid_Lock); 460 spin_lock(&GlobalMid_Lock);
435 switch (mid->midState) { 461 switch (mid->mid_state) {
436 case MID_RESPONSE_RECEIVED: 462 case MID_RESPONSE_RECEIVED:
437 spin_unlock(&GlobalMid_Lock); 463 spin_unlock(&GlobalMid_Lock);
438 return rc; 464 return rc;
@@ -447,8 +473,8 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
447 break; 473 break;
448 default: 474 default:
449 list_del_init(&mid->qhead); 475 list_del_init(&mid->qhead);
450 cERROR(1, "%s: invalid mid state mid=%d state=%d", __func__, 476 cERROR(1, "%s: invalid mid state mid=%llu state=%d", __func__,
451 mid->mid, mid->midState); 477 mid->mid, mid->mid_state);
452 rc = -EIO; 478 rc = -EIO;
453 } 479 }
454 spin_unlock(&GlobalMid_Lock); 480 spin_unlock(&GlobalMid_Lock);
@@ -498,7 +524,7 @@ int
498cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, 524cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
499 bool log_error) 525 bool log_error)
500{ 526{
501 unsigned int len = be32_to_cpu(mid->resp_buf->smb_buf_length) + 4; 527 unsigned int len = get_rfc1002_length(mid->resp_buf) + 4;
502 528
503 dump_smb(mid->resp_buf, min_t(u32, 92, len)); 529 dump_smb(mid->resp_buf, min_t(u32, 92, len));
504 530
@@ -518,6 +544,24 @@ cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
518 return map_smb_to_linux_error(mid->resp_buf, log_error); 544 return map_smb_to_linux_error(mid->resp_buf, log_error);
519} 545}
520 546
547static int
548cifs_setup_request(struct cifs_ses *ses, struct kvec *iov,
549 unsigned int nvec, struct mid_q_entry **ret_mid)
550{
551 int rc;
552 struct smb_hdr *hdr = (struct smb_hdr *)iov[0].iov_base;
553 struct mid_q_entry *mid;
554
555 rc = allocate_mid(ses, hdr, &mid);
556 if (rc)
557 return rc;
558 rc = cifs_sign_smb2(iov, nvec, ses->server, &mid->sequence_number);
559 if (rc)
560 delete_mid(mid);
561 *ret_mid = mid;
562 return rc;
563}
564
521int 565int
522SendReceive2(const unsigned int xid, struct cifs_ses *ses, 566SendReceive2(const unsigned int xid, struct cifs_ses *ses,
523 struct kvec *iov, int n_vec, int *pRespBufType /* ret */, 567 struct kvec *iov, int n_vec, int *pRespBufType /* ret */,
@@ -526,56 +570,53 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
526 int rc = 0; 570 int rc = 0;
527 int long_op; 571 int long_op;
528 struct mid_q_entry *midQ; 572 struct mid_q_entry *midQ;
529 struct smb_hdr *in_buf = iov[0].iov_base; 573 char *buf = iov[0].iov_base;
530 574
531 long_op = flags & CIFS_TIMEOUT_MASK; 575 long_op = flags & CIFS_TIMEOUT_MASK;
532 576
533 *pRespBufType = CIFS_NO_BUFFER; /* no response buf yet */ 577 *pRespBufType = CIFS_NO_BUFFER; /* no response buf yet */
534 578
535 if ((ses == NULL) || (ses->server == NULL)) { 579 if ((ses == NULL) || (ses->server == NULL)) {
536 cifs_small_buf_release(in_buf); 580 cifs_small_buf_release(buf);
537 cERROR(1, "Null session"); 581 cERROR(1, "Null session");
538 return -EIO; 582 return -EIO;
539 } 583 }
540 584
541 if (ses->server->tcpStatus == CifsExiting) { 585 if (ses->server->tcpStatus == CifsExiting) {
542 cifs_small_buf_release(in_buf); 586 cifs_small_buf_release(buf);
543 return -ENOENT; 587 return -ENOENT;
544 } 588 }
545 589
546 /* Ensure that we do not send more than 50 overlapping requests 590 /*
547 to the same server. We may make this configurable later or 591 * Ensure that we do not send more than 50 overlapping requests
548 use ses->maxReq */ 592 * to the same server. We may make this configurable later or
593 * use ses->maxReq.
594 */
549 595
550 rc = wait_for_free_request(ses->server, long_op); 596 rc = wait_for_free_request(ses->server, long_op);
551 if (rc) { 597 if (rc) {
552 cifs_small_buf_release(in_buf); 598 cifs_small_buf_release(buf);
553 return rc; 599 return rc;
554 } 600 }
555 601
556 /* make sure that we sign in the same order that we send on this socket 602 /*
557 and avoid races inside tcp sendmsg code that could cause corruption 603 * Make sure that we sign in the same order that we send on this socket
558 of smb data */ 604 * and avoid races inside tcp sendmsg code that could cause corruption
605 * of smb data.
606 */
559 607
560 mutex_lock(&ses->server->srv_mutex); 608 mutex_lock(&ses->server->srv_mutex);
561 609
562 rc = allocate_mid(ses, in_buf, &midQ); 610 rc = cifs_setup_request(ses, iov, n_vec, &midQ);
563 if (rc) { 611 if (rc) {
564 mutex_unlock(&ses->server->srv_mutex); 612 mutex_unlock(&ses->server->srv_mutex);
565 cifs_small_buf_release(in_buf); 613 cifs_small_buf_release(buf);
566 /* Update # of requests on wire to server */ 614 /* Update # of requests on wire to server */
567 atomic_dec(&ses->server->inFlight); 615 cifs_add_credits(ses->server, 1);
568 wake_up(&ses->server->request_q);
569 return rc; 616 return rc;
570 } 617 }
571 rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number);
572 if (rc) {
573 mutex_unlock(&ses->server->srv_mutex);
574 cifs_small_buf_release(in_buf);
575 goto out;
576 }
577 618
578 midQ->midState = MID_REQUEST_SUBMITTED; 619 midQ->mid_state = MID_REQUEST_SUBMITTED;
579 cifs_in_send_inc(ses->server); 620 cifs_in_send_inc(ses->server);
580 rc = smb_sendv(ses->server, iov, n_vec); 621 rc = smb_sendv(ses->server, iov, n_vec);
581 cifs_in_send_dec(ses->server); 622 cifs_in_send_dec(ses->server);
@@ -584,48 +625,47 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
584 mutex_unlock(&ses->server->srv_mutex); 625 mutex_unlock(&ses->server->srv_mutex);
585 626
586 if (rc < 0) { 627 if (rc < 0) {
587 cifs_small_buf_release(in_buf); 628 cifs_small_buf_release(buf);
588 goto out; 629 goto out;
589 } 630 }
590 631
591 if (long_op == CIFS_ASYNC_OP) { 632 if (long_op == CIFS_ASYNC_OP) {
592 cifs_small_buf_release(in_buf); 633 cifs_small_buf_release(buf);
593 goto out; 634 goto out;
594 } 635 }
595 636
596 rc = wait_for_response(ses->server, midQ); 637 rc = wait_for_response(ses->server, midQ);
597 if (rc != 0) { 638 if (rc != 0) {
598 send_nt_cancel(ses->server, in_buf, midQ); 639 send_nt_cancel(ses->server, (struct smb_hdr *)buf, midQ);
599 spin_lock(&GlobalMid_Lock); 640 spin_lock(&GlobalMid_Lock);
600 if (midQ->midState == MID_REQUEST_SUBMITTED) { 641 if (midQ->mid_state == MID_REQUEST_SUBMITTED) {
601 midQ->callback = DeleteMidQEntry; 642 midQ->callback = DeleteMidQEntry;
602 spin_unlock(&GlobalMid_Lock); 643 spin_unlock(&GlobalMid_Lock);
603 cifs_small_buf_release(in_buf); 644 cifs_small_buf_release(buf);
604 atomic_dec(&ses->server->inFlight); 645 cifs_add_credits(ses->server, 1);
605 wake_up(&ses->server->request_q);
606 return rc; 646 return rc;
607 } 647 }
608 spin_unlock(&GlobalMid_Lock); 648 spin_unlock(&GlobalMid_Lock);
609 } 649 }
610 650
611 cifs_small_buf_release(in_buf); 651 cifs_small_buf_release(buf);
612 652
613 rc = cifs_sync_mid_result(midQ, ses->server); 653 rc = cifs_sync_mid_result(midQ, ses->server);
614 if (rc != 0) { 654 if (rc != 0) {
615 atomic_dec(&ses->server->inFlight); 655 cifs_add_credits(ses->server, 1);
616 wake_up(&ses->server->request_q);
617 return rc; 656 return rc;
618 } 657 }
619 658
620 if (!midQ->resp_buf || midQ->midState != MID_RESPONSE_RECEIVED) { 659 if (!midQ->resp_buf || midQ->mid_state != MID_RESPONSE_RECEIVED) {
621 rc = -EIO; 660 rc = -EIO;
622 cFYI(1, "Bad MID state?"); 661 cFYI(1, "Bad MID state?");
623 goto out; 662 goto out;
624 } 663 }
625 664
626 iov[0].iov_base = (char *)midQ->resp_buf; 665 buf = (char *)midQ->resp_buf;
627 iov[0].iov_len = be32_to_cpu(midQ->resp_buf->smb_buf_length) + 4; 666 iov[0].iov_base = buf;
628 if (midQ->largeBuf) 667 iov[0].iov_len = get_rfc1002_length(buf) + 4;
668 if (midQ->large_buf)
629 *pRespBufType = CIFS_LARGE_BUFFER; 669 *pRespBufType = CIFS_LARGE_BUFFER;
630 else 670 else
631 *pRespBufType = CIFS_SMALL_BUFFER; 671 *pRespBufType = CIFS_SMALL_BUFFER;
@@ -637,8 +677,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
637 midQ->resp_buf = NULL; 677 midQ->resp_buf = NULL;
638out: 678out:
639 delete_mid(midQ); 679 delete_mid(midQ);
640 atomic_dec(&ses->server->inFlight); 680 cifs_add_credits(ses->server, 1);
641 wake_up(&ses->server->request_q);
642 681
643 return rc; 682 return rc;
644} 683}
@@ -688,8 +727,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
688 if (rc) { 727 if (rc) {
689 mutex_unlock(&ses->server->srv_mutex); 728 mutex_unlock(&ses->server->srv_mutex);
690 /* Update # of requests on wire to server */ 729 /* Update # of requests on wire to server */
691 atomic_dec(&ses->server->inFlight); 730 cifs_add_credits(ses->server, 1);
692 wake_up(&ses->server->request_q);
693 return rc; 731 return rc;
694 } 732 }
695 733
@@ -699,7 +737,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
699 goto out; 737 goto out;
700 } 738 }
701 739
702 midQ->midState = MID_REQUEST_SUBMITTED; 740 midQ->mid_state = MID_REQUEST_SUBMITTED;
703 741
704 cifs_in_send_inc(ses->server); 742 cifs_in_send_inc(ses->server);
705 rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); 743 rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
@@ -717,12 +755,11 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
717 if (rc != 0) { 755 if (rc != 0) {
718 send_nt_cancel(ses->server, in_buf, midQ); 756 send_nt_cancel(ses->server, in_buf, midQ);
719 spin_lock(&GlobalMid_Lock); 757 spin_lock(&GlobalMid_Lock);
720 if (midQ->midState == MID_REQUEST_SUBMITTED) { 758 if (midQ->mid_state == MID_REQUEST_SUBMITTED) {
721 /* no longer considered to be "in-flight" */ 759 /* no longer considered to be "in-flight" */
722 midQ->callback = DeleteMidQEntry; 760 midQ->callback = DeleteMidQEntry;
723 spin_unlock(&GlobalMid_Lock); 761 spin_unlock(&GlobalMid_Lock);
724 atomic_dec(&ses->server->inFlight); 762 cifs_add_credits(ses->server, 1);
725 wake_up(&ses->server->request_q);
726 return rc; 763 return rc;
727 } 764 }
728 spin_unlock(&GlobalMid_Lock); 765 spin_unlock(&GlobalMid_Lock);
@@ -730,25 +767,23 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
730 767
731 rc = cifs_sync_mid_result(midQ, ses->server); 768 rc = cifs_sync_mid_result(midQ, ses->server);
732 if (rc != 0) { 769 if (rc != 0) {
733 atomic_dec(&ses->server->inFlight); 770 cifs_add_credits(ses->server, 1);
734 wake_up(&ses->server->request_q);
735 return rc; 771 return rc;
736 } 772 }
737 773
738 if (!midQ->resp_buf || !out_buf || 774 if (!midQ->resp_buf || !out_buf ||
739 midQ->midState != MID_RESPONSE_RECEIVED) { 775 midQ->mid_state != MID_RESPONSE_RECEIVED) {
740 rc = -EIO; 776 rc = -EIO;
741 cERROR(1, "Bad MID state?"); 777 cERROR(1, "Bad MID state?");
742 goto out; 778 goto out;
743 } 779 }
744 780
745 *pbytes_returned = be32_to_cpu(midQ->resp_buf->smb_buf_length); 781 *pbytes_returned = get_rfc1002_length(midQ->resp_buf);
746 memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); 782 memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4);
747 rc = cifs_check_receive(midQ, ses->server, 0); 783 rc = cifs_check_receive(midQ, ses->server, 0);
748out: 784out:
749 delete_mid(midQ); 785 delete_mid(midQ);
750 atomic_dec(&ses->server->inFlight); 786 cifs_add_credits(ses->server, 1);
751 wake_up(&ses->server->request_q);
752 787
753 return rc; 788 return rc;
754} 789}
@@ -836,7 +871,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
836 return rc; 871 return rc;
837 } 872 }
838 873
839 midQ->midState = MID_REQUEST_SUBMITTED; 874 midQ->mid_state = MID_REQUEST_SUBMITTED;
840 cifs_in_send_inc(ses->server); 875 cifs_in_send_inc(ses->server);
841 rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); 876 rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
842 cifs_in_send_dec(ses->server); 877 cifs_in_send_dec(ses->server);
@@ -850,13 +885,13 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
850 885
851 /* Wait for a reply - allow signals to interrupt. */ 886 /* Wait for a reply - allow signals to interrupt. */
852 rc = wait_event_interruptible(ses->server->response_q, 887 rc = wait_event_interruptible(ses->server->response_q,
853 (!(midQ->midState == MID_REQUEST_SUBMITTED)) || 888 (!(midQ->mid_state == MID_REQUEST_SUBMITTED)) ||
854 ((ses->server->tcpStatus != CifsGood) && 889 ((ses->server->tcpStatus != CifsGood) &&
855 (ses->server->tcpStatus != CifsNew))); 890 (ses->server->tcpStatus != CifsNew)));
856 891
857 /* Were we interrupted by a signal ? */ 892 /* Were we interrupted by a signal ? */
858 if ((rc == -ERESTARTSYS) && 893 if ((rc == -ERESTARTSYS) &&
859 (midQ->midState == MID_REQUEST_SUBMITTED) && 894 (midQ->mid_state == MID_REQUEST_SUBMITTED) &&
860 ((ses->server->tcpStatus == CifsGood) || 895 ((ses->server->tcpStatus == CifsGood) ||
861 (ses->server->tcpStatus == CifsNew))) { 896 (ses->server->tcpStatus == CifsNew))) {
862 897
@@ -886,7 +921,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
886 if (rc) { 921 if (rc) {
887 send_nt_cancel(ses->server, in_buf, midQ); 922 send_nt_cancel(ses->server, in_buf, midQ);
888 spin_lock(&GlobalMid_Lock); 923 spin_lock(&GlobalMid_Lock);
889 if (midQ->midState == MID_REQUEST_SUBMITTED) { 924 if (midQ->mid_state == MID_REQUEST_SUBMITTED) {
890 /* no longer considered to be "in-flight" */ 925 /* no longer considered to be "in-flight" */
891 midQ->callback = DeleteMidQEntry; 926 midQ->callback = DeleteMidQEntry;
892 spin_unlock(&GlobalMid_Lock); 927 spin_unlock(&GlobalMid_Lock);
@@ -904,13 +939,13 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
904 return rc; 939 return rc;
905 940
906 /* rcvd frame is ok */ 941 /* rcvd frame is ok */
907 if (out_buf == NULL || midQ->midState != MID_RESPONSE_RECEIVED) { 942 if (out_buf == NULL || midQ->mid_state != MID_RESPONSE_RECEIVED) {
908 rc = -EIO; 943 rc = -EIO;
909 cERROR(1, "Bad MID state?"); 944 cERROR(1, "Bad MID state?");
910 goto out; 945 goto out;
911 } 946 }
912 947
913 *pbytes_returned = be32_to_cpu(midQ->resp_buf->smb_buf_length); 948 *pbytes_returned = get_rfc1002_length(midQ->resp_buf);
914 memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); 949 memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4);
915 rc = cifs_check_receive(midQ, ses->server, 0); 950 rc = cifs_check_receive(midQ, ses->server, 0);
916out: 951out:
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 5e2e1b3f068..2870597b5c9 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -21,7 +21,6 @@
21#include <linux/vfs.h> 21#include <linux/vfs.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23 23
24#include <asm/system.h>
25#include <asm/uaccess.h> 24#include <asm/uaccess.h>
26 25
27#include <linux/fs.h> 26#include <linux/fs.h>
@@ -208,13 +207,12 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
208 if (IS_ERR(root)) { 207 if (IS_ERR(root)) {
209 error = PTR_ERR(root); 208 error = PTR_ERR(root);
210 printk("Failure of coda_cnode_make for root: error %d\n", error); 209 printk("Failure of coda_cnode_make for root: error %d\n", error);
211 root = NULL;
212 goto error; 210 goto error;
213 } 211 }
214 212
215 printk("coda_read_super: rootinode is %ld dev %s\n", 213 printk("coda_read_super: rootinode is %ld dev %s\n",
216 root->i_ino, root->i_sb->s_id); 214 root->i_ino, root->i_sb->s_id);
217 sb->s_root = d_alloc_root(root); 215 sb->s_root = d_make_root(root);
218 if (!sb->s_root) { 216 if (!sb->s_root) {
219 error = -EINVAL; 217 error = -EINVAL;
220 goto error; 218 goto error;
@@ -222,9 +220,6 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
222 return 0; 220 return 0;
223 221
224error: 222error:
225 if (root)
226 iput(root);
227
228 mutex_lock(&vc->vc_mutex); 223 mutex_lock(&vc->vc_mutex);
229 bdi_destroy(&vc->bdi); 224 bdi_destroy(&vc->bdi);
230 vc->vc_sb = NULL; 225 vc->vc_sb = NULL;
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 8f616e0e252..761d5b31b18 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -38,7 +38,6 @@
38#include <linux/mutex.h> 38#include <linux/mutex.h>
39#include <linux/device.h> 39#include <linux/device.h>
40#include <asm/io.h> 40#include <asm/io.h>
41#include <asm/system.h>
42#include <asm/poll.h> 41#include <asm/poll.h>
43#include <asm/uaccess.h> 42#include <asm/uaccess.h>
44 43
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 9727e0c5257..0c68fd31fbf 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -14,7 +14,6 @@
14 * improvements to the Coda project. Contact Peter Braam <coda@cs.cmu.edu>. 14 * improvements to the Coda project. Contact Peter Braam <coda@cs.cmu.edu>.
15 */ 15 */
16 16
17#include <asm/system.h>
18#include <linux/signal.h> 17#include <linux/signal.h>
19#include <linux/sched.h> 18#include <linux/sched.h>
20#include <linux/types.h> 19#include <linux/types.h>
diff --git a/fs/compat.c b/fs/compat.c
index 07880bae28a..f2944ace7a7 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -33,7 +33,6 @@
33#include <linux/nfs4_mount.h> 33#include <linux/nfs4_mount.h>
34#include <linux/syscalls.h> 34#include <linux/syscalls.h>
35#include <linux/ctype.h> 35#include <linux/ctype.h>
36#include <linux/module.h>
37#include <linux/dirent.h> 36#include <linux/dirent.h>
38#include <linux/fsnotify.h> 37#include <linux/fsnotify.h>
39#include <linux/highuid.h> 38#include <linux/highuid.h>
@@ -1171,10 +1170,9 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
1171} 1170}
1172 1171
1173asmlinkage ssize_t 1172asmlinkage ssize_t
1174compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec, 1173compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec,
1175 unsigned long vlen, u32 pos_low, u32 pos_high) 1174 unsigned long vlen, loff_t pos)
1176{ 1175{
1177 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1178 struct file *file; 1176 struct file *file;
1179 int fput_needed; 1177 int fput_needed;
1180 ssize_t ret; 1178 ssize_t ret;
@@ -1191,6 +1189,14 @@ compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
1191 return ret; 1189 return ret;
1192} 1190}
1193 1191
1192asmlinkage ssize_t
1193compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
1194 unsigned long vlen, u32 pos_low, u32 pos_high)
1195{
1196 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1197 return compat_sys_preadv64(fd, vec, vlen, pos);
1198}
1199
1194static size_t compat_writev(struct file *file, 1200static size_t compat_writev(struct file *file,
1195 const struct compat_iovec __user *vec, 1201 const struct compat_iovec __user *vec,
1196 unsigned long vlen, loff_t *pos) 1202 unsigned long vlen, loff_t *pos)
@@ -1230,10 +1236,9 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
1230} 1236}
1231 1237
1232asmlinkage ssize_t 1238asmlinkage ssize_t
1233compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec, 1239compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec,
1234 unsigned long vlen, u32 pos_low, u32 pos_high) 1240 unsigned long vlen, loff_t pos)
1235{ 1241{
1236 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1237 struct file *file; 1242 struct file *file;
1238 int fput_needed; 1243 int fput_needed;
1239 ssize_t ret; 1244 ssize_t ret;
@@ -1250,6 +1255,14 @@ compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
1250 return ret; 1255 return ret;
1251} 1256}
1252 1257
1258asmlinkage ssize_t
1259compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
1260 unsigned long vlen, u32 pos_low, u32 pos_high)
1261{
1262 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1263 return compat_sys_pwritev64(fd, vec, vlen, pos);
1264}
1265
1253asmlinkage long 1266asmlinkage long
1254compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32, 1267compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
1255 unsigned int nr_segs, unsigned int flags) 1268 unsigned int nr_segs, unsigned int flags)
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index a26bea10e81..debdfe0fc80 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -34,7 +34,7 @@
34#include <linux/fs.h> 34#include <linux/fs.h>
35#include <linux/file.h> 35#include <linux/file.h>
36#include <linux/ppp_defs.h> 36#include <linux/ppp_defs.h>
37#include <linux/if_ppp.h> 37#include <linux/ppp-ioctl.h>
38#include <linux/if_pppox.h> 38#include <linux/if_pppox.h>
39#include <linux/mtio.h> 39#include <linux/mtio.h>
40#include <linux/auto_fs.h> 40#include <linux/auto_fs.h>
@@ -49,7 +49,6 @@
49#include <linux/elevator.h> 49#include <linux/elevator.h>
50#include <linux/rtc.h> 50#include <linux/rtc.h>
51#include <linux/pci.h> 51#include <linux/pci.h>
52#include <linux/module.h>
53#include <linux/serial.h> 52#include <linux/serial.h>
54#include <linux/if_tun.h> 53#include <linux/if_tun.h>
55#include <linux/ctype.h> 54#include <linux/ctype.h>
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index ede857d20a0..b5f0a3b91f1 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -58,12 +58,11 @@ struct configfs_dirent {
58extern struct mutex configfs_symlink_mutex; 58extern struct mutex configfs_symlink_mutex;
59extern spinlock_t configfs_dirent_lock; 59extern spinlock_t configfs_dirent_lock;
60 60
61extern struct vfsmount * configfs_mount;
62extern struct kmem_cache *configfs_dir_cachep; 61extern struct kmem_cache *configfs_dir_cachep;
63 62
64extern int configfs_is_root(struct config_item *item); 63extern int configfs_is_root(struct config_item *item);
65 64
66extern struct inode * configfs_new_inode(umode_t mode, struct configfs_dirent *); 65extern struct inode * configfs_new_inode(umode_t mode, struct configfs_dirent *, struct super_block *);
67extern int configfs_create(struct dentry *, umode_t mode, int (*init)(struct inode *)); 66extern int configfs_create(struct dentry *, umode_t mode, int (*init)(struct inode *));
68extern int configfs_inode_init(void); 67extern int configfs_inode_init(void);
69extern void configfs_inode_exit(void); 68extern void configfs_inode_exit(void);
@@ -80,15 +79,15 @@ extern const unsigned char * configfs_get_name(struct configfs_dirent *sd);
80extern void configfs_drop_dentry(struct configfs_dirent *sd, struct dentry *parent); 79extern void configfs_drop_dentry(struct configfs_dirent *sd, struct dentry *parent);
81extern int configfs_setattr(struct dentry *dentry, struct iattr *iattr); 80extern int configfs_setattr(struct dentry *dentry, struct iattr *iattr);
82 81
83extern int configfs_pin_fs(void); 82extern struct dentry *configfs_pin_fs(void);
84extern void configfs_release_fs(void); 83extern void configfs_release_fs(void);
85 84
86extern struct rw_semaphore configfs_rename_sem; 85extern struct rw_semaphore configfs_rename_sem;
87extern struct super_block * configfs_sb;
88extern const struct file_operations configfs_dir_operations; 86extern const struct file_operations configfs_dir_operations;
89extern const struct file_operations configfs_file_operations; 87extern const struct file_operations configfs_file_operations;
90extern const struct file_operations bin_fops; 88extern const struct file_operations bin_fops;
91extern const struct inode_operations configfs_dir_inode_operations; 89extern const struct inode_operations configfs_dir_inode_operations;
90extern const struct inode_operations configfs_root_inode_operations;
92extern const struct inode_operations configfs_symlink_inode_operations; 91extern const struct inode_operations configfs_symlink_inode_operations;
93extern const struct dentry_operations configfs_dentry_ops; 92extern const struct dentry_operations configfs_dentry_ops;
94 93
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 5ddd7ebd9dc..7e6c52d8a20 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -264,11 +264,13 @@ static int init_symlink(struct inode * inode)
264 return 0; 264 return 0;
265} 265}
266 266
267static int create_dir(struct config_item * k, struct dentry * p, 267static int create_dir(struct config_item *k, struct dentry *d)
268 struct dentry * d)
269{ 268{
270 int error; 269 int error;
271 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; 270 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
271 struct dentry *p = d->d_parent;
272
273 BUG_ON(!k);
272 274
273 error = configfs_dirent_exists(p->d_fsdata, d->d_name.name); 275 error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
274 if (!error) 276 if (!error)
@@ -304,19 +306,7 @@ static int create_dir(struct config_item * k, struct dentry * p,
304 306
305static int configfs_create_dir(struct config_item * item, struct dentry *dentry) 307static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
306{ 308{
307 struct dentry * parent; 309 int error = create_dir(item, dentry);
308 int error = 0;
309
310 BUG_ON(!item);
311
312 if (item->ci_parent)
313 parent = item->ci_parent->ci_dentry;
314 else if (configfs_mount)
315 parent = configfs_mount->mnt_root;
316 else
317 return -EFAULT;
318
319 error = create_dir(item,parent,dentry);
320 if (!error) 310 if (!error)
321 item->ci_dentry = dentry; 311 item->ci_dentry = dentry;
322 return error; 312 return error;
@@ -1079,23 +1069,24 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
1079 int ret; 1069 int ret;
1080 struct configfs_dirent *p, *root_sd, *subsys_sd = NULL; 1070 struct configfs_dirent *p, *root_sd, *subsys_sd = NULL;
1081 struct config_item *s_item = &subsys->su_group.cg_item; 1071 struct config_item *s_item = &subsys->su_group.cg_item;
1072 struct dentry *root;
1082 1073
1083 /* 1074 /*
1084 * Pin the configfs filesystem. This means we can safely access 1075 * Pin the configfs filesystem. This means we can safely access
1085 * the root of the configfs filesystem. 1076 * the root of the configfs filesystem.
1086 */ 1077 */
1087 ret = configfs_pin_fs(); 1078 root = configfs_pin_fs();
1088 if (ret) 1079 if (IS_ERR(root))
1089 return ret; 1080 return PTR_ERR(root);
1090 1081
1091 /* 1082 /*
1092 * Next, lock the root directory. We're going to check that the 1083 * Next, lock the root directory. We're going to check that the
1093 * subsystem is really registered, and so we need to lock out 1084 * subsystem is really registered, and so we need to lock out
1094 * configfs_[un]register_subsystem(). 1085 * configfs_[un]register_subsystem().
1095 */ 1086 */
1096 mutex_lock(&configfs_sb->s_root->d_inode->i_mutex); 1087 mutex_lock(&root->d_inode->i_mutex);
1097 1088
1098 root_sd = configfs_sb->s_root->d_fsdata; 1089 root_sd = root->d_fsdata;
1099 1090
1100 list_for_each_entry(p, &root_sd->s_children, s_sibling) { 1091 list_for_each_entry(p, &root_sd->s_children, s_sibling) {
1101 if (p->s_type & CONFIGFS_DIR) { 1092 if (p->s_type & CONFIGFS_DIR) {
@@ -1129,7 +1120,7 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
1129out_unlock_dirent_lock: 1120out_unlock_dirent_lock:
1130 spin_unlock(&configfs_dirent_lock); 1121 spin_unlock(&configfs_dirent_lock);
1131out_unlock_fs: 1122out_unlock_fs:
1132 mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); 1123 mutex_unlock(&root->d_inode->i_mutex);
1133 1124
1134 /* 1125 /*
1135 * If we succeeded, the fs is pinned via other methods. If not, 1126 * If we succeeded, the fs is pinned via other methods. If not,
@@ -1183,11 +1174,6 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
1183 struct module *subsys_owner = NULL, *new_item_owner = NULL; 1174 struct module *subsys_owner = NULL, *new_item_owner = NULL;
1184 char *name; 1175 char *name;
1185 1176
1186 if (dentry->d_parent == configfs_sb->s_root) {
1187 ret = -EPERM;
1188 goto out;
1189 }
1190
1191 sd = dentry->d_parent->d_fsdata; 1177 sd = dentry->d_parent->d_fsdata;
1192 1178
1193 /* 1179 /*
@@ -1359,9 +1345,6 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1359 struct module *subsys_owner = NULL, *dead_item_owner = NULL; 1345 struct module *subsys_owner = NULL, *dead_item_owner = NULL;
1360 int ret; 1346 int ret;
1361 1347
1362 if (dentry->d_parent == configfs_sb->s_root)
1363 return -EPERM;
1364
1365 sd = dentry->d_fsdata; 1348 sd = dentry->d_fsdata;
1366 if (sd->s_type & CONFIGFS_USET_DEFAULT) 1349 if (sd->s_type & CONFIGFS_USET_DEFAULT)
1367 return -EPERM; 1350 return -EPERM;
@@ -1459,6 +1442,11 @@ const struct inode_operations configfs_dir_inode_operations = {
1459 .setattr = configfs_setattr, 1442 .setattr = configfs_setattr,
1460}; 1443};
1461 1444
1445const struct inode_operations configfs_root_inode_operations = {
1446 .lookup = configfs_lookup,
1447 .setattr = configfs_setattr,
1448};
1449
1462#if 0 1450#if 0
1463int configfs_rename_dir(struct config_item * item, const char *new_name) 1451int configfs_rename_dir(struct config_item * item, const char *new_name)
1464{ 1452{
@@ -1546,6 +1534,7 @@ static inline unsigned char dt_type(struct configfs_dirent *sd)
1546static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir) 1534static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
1547{ 1535{
1548 struct dentry *dentry = filp->f_path.dentry; 1536 struct dentry *dentry = filp->f_path.dentry;
1537 struct super_block *sb = dentry->d_sb;
1549 struct configfs_dirent * parent_sd = dentry->d_fsdata; 1538 struct configfs_dirent * parent_sd = dentry->d_fsdata;
1550 struct configfs_dirent *cursor = filp->private_data; 1539 struct configfs_dirent *cursor = filp->private_data;
1551 struct list_head *p, *q = &cursor->s_sibling; 1540 struct list_head *p, *q = &cursor->s_sibling;
@@ -1608,7 +1597,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
1608 ino = inode->i_ino; 1597 ino = inode->i_ino;
1609 spin_unlock(&configfs_dirent_lock); 1598 spin_unlock(&configfs_dirent_lock);
1610 if (!inode) 1599 if (!inode)
1611 ino = iunique(configfs_sb, 2); 1600 ino = iunique(sb, 2);
1612 1601
1613 if (filldir(dirent, name, len, filp->f_pos, ino, 1602 if (filldir(dirent, name, len, filp->f_pos, ino,
1614 dt_type(next)) < 0) 1603 dt_type(next)) < 0)
@@ -1680,27 +1669,27 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
1680 struct config_group *group = &subsys->su_group; 1669 struct config_group *group = &subsys->su_group;
1681 struct qstr name; 1670 struct qstr name;
1682 struct dentry *dentry; 1671 struct dentry *dentry;
1672 struct dentry *root;
1683 struct configfs_dirent *sd; 1673 struct configfs_dirent *sd;
1684 1674
1685 err = configfs_pin_fs(); 1675 root = configfs_pin_fs();
1686 if (err) 1676 if (IS_ERR(root))
1687 return err; 1677 return PTR_ERR(root);
1688 1678
1689 if (!group->cg_item.ci_name) 1679 if (!group->cg_item.ci_name)
1690 group->cg_item.ci_name = group->cg_item.ci_namebuf; 1680 group->cg_item.ci_name = group->cg_item.ci_namebuf;
1691 1681
1692 sd = configfs_sb->s_root->d_fsdata; 1682 sd = root->d_fsdata;
1693 link_group(to_config_group(sd->s_element), group); 1683 link_group(to_config_group(sd->s_element), group);
1694 1684
1695 mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, 1685 mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_PARENT);
1696 I_MUTEX_PARENT);
1697 1686
1698 name.name = group->cg_item.ci_name; 1687 name.name = group->cg_item.ci_name;
1699 name.len = strlen(name.name); 1688 name.len = strlen(name.name);
1700 name.hash = full_name_hash(name.name, name.len); 1689 name.hash = full_name_hash(name.name, name.len);
1701 1690
1702 err = -ENOMEM; 1691 err = -ENOMEM;
1703 dentry = d_alloc(configfs_sb->s_root, &name); 1692 dentry = d_alloc(root, &name);
1704 if (dentry) { 1693 if (dentry) {
1705 d_add(dentry, NULL); 1694 d_add(dentry, NULL);
1706 1695
@@ -1717,7 +1706,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
1717 } 1706 }
1718 } 1707 }
1719 1708
1720 mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); 1709 mutex_unlock(&root->d_inode->i_mutex);
1721 1710
1722 if (err) { 1711 if (err) {
1723 unlink_group(group); 1712 unlink_group(group);
@@ -1731,13 +1720,14 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
1731{ 1720{
1732 struct config_group *group = &subsys->su_group; 1721 struct config_group *group = &subsys->su_group;
1733 struct dentry *dentry = group->cg_item.ci_dentry; 1722 struct dentry *dentry = group->cg_item.ci_dentry;
1723 struct dentry *root = dentry->d_sb->s_root;
1734 1724
1735 if (dentry->d_parent != configfs_sb->s_root) { 1725 if (dentry->d_parent != root) {
1736 printk(KERN_ERR "configfs: Tried to unregister non-subsystem!\n"); 1726 printk(KERN_ERR "configfs: Tried to unregister non-subsystem!\n");
1737 return; 1727 return;
1738 } 1728 }
1739 1729
1740 mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, 1730 mutex_lock_nested(&root->d_inode->i_mutex,
1741 I_MUTEX_PARENT); 1731 I_MUTEX_PARENT);
1742 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 1732 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
1743 mutex_lock(&configfs_symlink_mutex); 1733 mutex_lock(&configfs_symlink_mutex);
@@ -1754,7 +1744,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
1754 1744
1755 d_delete(dentry); 1745 d_delete(dentry);
1756 1746
1757 mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); 1747 mutex_unlock(&root->d_inode->i_mutex);
1758 1748
1759 dput(dentry); 1749 dput(dentry);
1760 1750
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 3ee36d41886..0074362d9f7 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -44,8 +44,6 @@
44static struct lock_class_key default_group_class[MAX_LOCK_DEPTH]; 44static struct lock_class_key default_group_class[MAX_LOCK_DEPTH];
45#endif 45#endif
46 46
47extern struct super_block * configfs_sb;
48
49static const struct address_space_operations configfs_aops = { 47static const struct address_space_operations configfs_aops = {
50 .readpage = simple_readpage, 48 .readpage = simple_readpage,
51 .write_begin = simple_write_begin, 49 .write_begin = simple_write_begin,
@@ -132,9 +130,10 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
132 inode->i_ctime = iattr->ia_ctime; 130 inode->i_ctime = iattr->ia_ctime;
133} 131}
134 132
135struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent * sd) 133struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent *sd,
134 struct super_block *s)
136{ 135{
137 struct inode * inode = new_inode(configfs_sb); 136 struct inode * inode = new_inode(s);
138 if (inode) { 137 if (inode) {
139 inode->i_ino = get_next_ino(); 138 inode->i_ino = get_next_ino();
140 inode->i_mapping->a_ops = &configfs_aops; 139 inode->i_mapping->a_ops = &configfs_aops;
@@ -188,36 +187,35 @@ static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
188int configfs_create(struct dentry * dentry, umode_t mode, int (*init)(struct inode *)) 187int configfs_create(struct dentry * dentry, umode_t mode, int (*init)(struct inode *))
189{ 188{
190 int error = 0; 189 int error = 0;
191 struct inode * inode = NULL; 190 struct inode *inode = NULL;
192 if (dentry) { 191 struct configfs_dirent *sd;
193 if (!dentry->d_inode) { 192 struct inode *p_inode;
194 struct configfs_dirent *sd = dentry->d_fsdata; 193
195 if ((inode = configfs_new_inode(mode, sd))) { 194 if (!dentry)
196 if (dentry->d_parent && dentry->d_parent->d_inode) { 195 return -ENOENT;
197 struct inode *p_inode = dentry->d_parent->d_inode; 196
198 p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME; 197 if (dentry->d_inode)
199 } 198 return -EEXIST;
200 configfs_set_inode_lock_class(sd, inode);
201 goto Proceed;
202 }
203 else
204 error = -ENOMEM;
205 } else
206 error = -EEXIST;
207 } else
208 error = -ENOENT;
209 goto Done;
210 199
211 Proceed: 200 sd = dentry->d_fsdata;
212 if (init) 201 inode = configfs_new_inode(mode, sd, dentry->d_sb);
202 if (!inode)
203 return -ENOMEM;
204
205 p_inode = dentry->d_parent->d_inode;
206 p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
207 configfs_set_inode_lock_class(sd, inode);
208
209 if (init) {
213 error = init(inode); 210 error = init(inode);
214 if (!error) { 211 if (error) {
215 d_instantiate(dentry, inode); 212 iput(inode);
216 if (S_ISDIR(mode) || S_ISLNK(mode)) 213 return error;
217 dget(dentry); /* pin link and directory dentries in core */ 214 }
218 } else 215 }
219 iput(inode); 216 d_instantiate(dentry, inode);
220 Done: 217 if (S_ISDIR(mode) || S_ISLNK(mode))
218 dget(dentry); /* pin link and directory dentries in core */
221 return error; 219 return error;
222} 220}
223 221
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 276e15cafd5..aee0a7ebbd8 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -37,8 +37,7 @@
37/* Random magic number */ 37/* Random magic number */
38#define CONFIGFS_MAGIC 0x62656570 38#define CONFIGFS_MAGIC 0x62656570
39 39
40struct vfsmount * configfs_mount = NULL; 40static struct vfsmount *configfs_mount = NULL;
41struct super_block * configfs_sb = NULL;
42struct kmem_cache *configfs_dir_cachep; 41struct kmem_cache *configfs_dir_cachep;
43static int configfs_mnt_count = 0; 42static int configfs_mnt_count = 0;
44 43
@@ -77,12 +76,11 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
77 sb->s_magic = CONFIGFS_MAGIC; 76 sb->s_magic = CONFIGFS_MAGIC;
78 sb->s_op = &configfs_ops; 77 sb->s_op = &configfs_ops;
79 sb->s_time_gran = 1; 78 sb->s_time_gran = 1;
80 configfs_sb = sb;
81 79
82 inode = configfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, 80 inode = configfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
83 &configfs_root); 81 &configfs_root, sb);
84 if (inode) { 82 if (inode) {
85 inode->i_op = &configfs_dir_inode_operations; 83 inode->i_op = &configfs_root_inode_operations;
86 inode->i_fop = &configfs_dir_operations; 84 inode->i_fop = &configfs_dir_operations;
87 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 85 /* directory inodes start off with i_nlink == 2 (for "." entry) */
88 inc_nlink(inode); 86 inc_nlink(inode);
@@ -91,10 +89,9 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
91 return -ENOMEM; 89 return -ENOMEM;
92 } 90 }
93 91
94 root = d_alloc_root(inode); 92 root = d_make_root(inode);
95 if (!root) { 93 if (!root) {
96 pr_debug("%s: could not get root dentry!\n",__func__); 94 pr_debug("%s: could not get root dentry!\n",__func__);
97 iput(inode);
98 return -ENOMEM; 95 return -ENOMEM;
99 } 96 }
100 config_group_init(&configfs_root_group); 97 config_group_init(&configfs_root_group);
@@ -118,10 +115,11 @@ static struct file_system_type configfs_fs_type = {
118 .kill_sb = kill_litter_super, 115 .kill_sb = kill_litter_super,
119}; 116};
120 117
121int configfs_pin_fs(void) 118struct dentry *configfs_pin_fs(void)
122{ 119{
123 return simple_pin_fs(&configfs_fs_type, &configfs_mount, 120 int err = simple_pin_fs(&configfs_fs_type, &configfs_mount,
124 &configfs_mnt_count); 121 &configfs_mnt_count);
122 return err ? ERR_PTR(err) : configfs_mount->mnt_root;
125} 123}
126 124
127void configfs_release_fs(void) 125void configfs_release_fs(void)
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 0f3eb41d920..cc9f2546ea4 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -110,13 +110,13 @@ out:
110 110
111 111
112static int get_target(const char *symname, struct path *path, 112static int get_target(const char *symname, struct path *path,
113 struct config_item **target) 113 struct config_item **target, struct super_block *sb)
114{ 114{
115 int ret; 115 int ret;
116 116
117 ret = kern_path(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, path); 117 ret = kern_path(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, path);
118 if (!ret) { 118 if (!ret) {
119 if (path->dentry->d_sb == configfs_sb) { 119 if (path->dentry->d_sb == sb) {
120 *target = configfs_get_config_item(path->dentry); 120 *target = configfs_get_config_item(path->dentry);
121 if (!*target) { 121 if (!*target) {
122 ret = -ENOENT; 122 ret = -ENOENT;
@@ -141,10 +141,6 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
141 struct config_item *target_item = NULL; 141 struct config_item *target_item = NULL;
142 struct config_item_type *type; 142 struct config_item_type *type;
143 143
144 ret = -EPERM; /* What lack-of-symlink returns */
145 if (dentry->d_parent == configfs_sb->s_root)
146 goto out;
147
148 sd = dentry->d_parent->d_fsdata; 144 sd = dentry->d_parent->d_fsdata;
149 /* 145 /*
150 * Fake invisibility if dir belongs to a group/default groups hierarchy 146 * Fake invisibility if dir belongs to a group/default groups hierarchy
@@ -162,7 +158,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
162 !type->ct_item_ops->allow_link) 158 !type->ct_item_ops->allow_link)
163 goto out_put; 159 goto out_put;
164 160
165 ret = get_target(symname, &path, &target_item); 161 ret = get_target(symname, &path, &target_item, dentry->d_sb);
166 if (ret) 162 if (ret)
167 goto out_put; 163 goto out_put;
168 164
@@ -198,8 +194,6 @@ int configfs_unlink(struct inode *dir, struct dentry *dentry)
198 if (!(sd->s_type & CONFIGFS_ITEM_LINK)) 194 if (!(sd->s_type & CONFIGFS_ITEM_LINK))
199 goto out; 195 goto out;
200 196
201 BUG_ON(dentry->d_parent == configfs_sb->s_root);
202
203 sl = sd->s_element; 197 sl = sd->s_element;
204 198
205 parent_item = configfs_get_config_item(dentry->d_parent); 199 parent_item = configfs_get_config_item(dentry->d_parent);
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index a2ee8f9f5a3..d013c46402e 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -257,10 +257,10 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
257 257
258 /* Do sanity checks on the superblock */ 258 /* Do sanity checks on the superblock */
259 if (super.magic != CRAMFS_MAGIC) { 259 if (super.magic != CRAMFS_MAGIC) {
260 /* check for wrong endianess */ 260 /* check for wrong endianness */
261 if (super.magic == CRAMFS_MAGIC_WEND) { 261 if (super.magic == CRAMFS_MAGIC_WEND) {
262 if (!silent) 262 if (!silent)
263 printk(KERN_ERR "cramfs: wrong endianess\n"); 263 printk(KERN_ERR "cramfs: wrong endianness\n");
264 goto out; 264 goto out;
265 } 265 }
266 266
@@ -270,7 +270,7 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
270 mutex_unlock(&read_mutex); 270 mutex_unlock(&read_mutex);
271 if (super.magic != CRAMFS_MAGIC) { 271 if (super.magic != CRAMFS_MAGIC) {
272 if (super.magic == CRAMFS_MAGIC_WEND && !silent) 272 if (super.magic == CRAMFS_MAGIC_WEND && !silent)
273 printk(KERN_ERR "cramfs: wrong endianess\n"); 273 printk(KERN_ERR "cramfs: wrong endianness\n");
274 else if (!silent) 274 else if (!silent)
275 printk(KERN_ERR "cramfs: wrong magic\n"); 275 printk(KERN_ERR "cramfs: wrong magic\n");
276 goto out; 276 goto out;
@@ -318,11 +318,9 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
318 root = get_cramfs_inode(sb, &super.root, 0); 318 root = get_cramfs_inode(sb, &super.root, 0);
319 if (IS_ERR(root)) 319 if (IS_ERR(root))
320 goto out; 320 goto out;
321 sb->s_root = d_alloc_root(root); 321 sb->s_root = d_make_root(root);
322 if (!sb->s_root) { 322 if (!sb->s_root)
323 iput(root);
324 goto out; 323 goto out;
325 }
326 return 0; 324 return 0;
327out: 325out:
328 kfree(sbi); 326 kfree(sbi);
diff --git a/fs/dcache.c b/fs/dcache.c
index bcbdb33fcc2..b80531c9177 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -23,7 +23,7 @@
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/hash.h> 24#include <linux/hash.h>
25#include <linux/cache.h> 25#include <linux/cache.h>
26#include <linux/module.h> 26#include <linux/export.h>
27#include <linux/mount.h> 27#include <linux/mount.h>
28#include <linux/file.h> 28#include <linux/file.h>
29#include <asm/uaccess.h> 29#include <asm/uaccess.h>
@@ -105,10 +105,10 @@ static unsigned int d_hash_shift __read_mostly;
105static struct hlist_bl_head *dentry_hashtable __read_mostly; 105static struct hlist_bl_head *dentry_hashtable __read_mostly;
106 106
107static inline struct hlist_bl_head *d_hash(const struct dentry *parent, 107static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
108 unsigned long hash) 108 unsigned int hash)
109{ 109{
110 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; 110 hash += (unsigned long) parent / L1_CACHE_BYTES;
111 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); 111 hash = hash + (hash >> D_HASHBITS);
112 return dentry_hashtable + (hash & D_HASHMASK); 112 return dentry_hashtable + (hash & D_HASHMASK);
113} 113}
114 114
@@ -141,6 +141,45 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
141 * Compare 2 name strings, return 0 if they match, otherwise non-zero. 141 * Compare 2 name strings, return 0 if they match, otherwise non-zero.
142 * The strings are both count bytes long, and count is non-zero. 142 * The strings are both count bytes long, and count is non-zero.
143 */ 143 */
144#ifdef CONFIG_DCACHE_WORD_ACCESS
145
146#include <asm/word-at-a-time.h>
147/*
148 * NOTE! 'cs' and 'scount' come from a dentry, so it has a
149 * aligned allocation for this particular component. We don't
150 * strictly need the load_unaligned_zeropad() safety, but it
151 * doesn't hurt either.
152 *
153 * In contrast, 'ct' and 'tcount' can be from a pathname, and do
154 * need the careful unaligned handling.
155 */
156static inline int dentry_cmp(const unsigned char *cs, size_t scount,
157 const unsigned char *ct, size_t tcount)
158{
159 unsigned long a,b,mask;
160
161 if (unlikely(scount != tcount))
162 return 1;
163
164 for (;;) {
165 a = load_unaligned_zeropad(cs);
166 b = load_unaligned_zeropad(ct);
167 if (tcount < sizeof(unsigned long))
168 break;
169 if (unlikely(a != b))
170 return 1;
171 cs += sizeof(unsigned long);
172 ct += sizeof(unsigned long);
173 tcount -= sizeof(unsigned long);
174 if (!tcount)
175 return 0;
176 }
177 mask = ~(~0ul << tcount*8);
178 return unlikely(!!((a ^ b) & mask));
179}
180
181#else
182
144static inline int dentry_cmp(const unsigned char *cs, size_t scount, 183static inline int dentry_cmp(const unsigned char *cs, size_t scount,
145 const unsigned char *ct, size_t tcount) 184 const unsigned char *ct, size_t tcount)
146{ 185{
@@ -157,6 +196,8 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
157 return 0; 196 return 0;
158} 197}
159 198
199#endif
200
160static void __d_free(struct rcu_head *head) 201static void __d_free(struct rcu_head *head)
161{ 202{
162 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); 203 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
@@ -1443,30 +1484,6 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1443 1484
1444EXPORT_SYMBOL(d_instantiate_unique); 1485EXPORT_SYMBOL(d_instantiate_unique);
1445 1486
1446/**
1447 * d_alloc_root - allocate root dentry
1448 * @root_inode: inode to allocate the root for
1449 *
1450 * Allocate a root ("/") dentry for the inode given. The inode is
1451 * instantiated and returned. %NULL is returned if there is insufficient
1452 * memory or the inode passed is %NULL.
1453 */
1454
1455struct dentry * d_alloc_root(struct inode * root_inode)
1456{
1457 struct dentry *res = NULL;
1458
1459 if (root_inode) {
1460 static const struct qstr name = { .name = "/", .len = 1 };
1461
1462 res = __d_alloc(root_inode->i_sb, &name);
1463 if (res)
1464 d_instantiate(res, root_inode);
1465 }
1466 return res;
1467}
1468EXPORT_SYMBOL(d_alloc_root);
1469
1470struct dentry *d_make_root(struct inode *root_inode) 1487struct dentry *d_make_root(struct inode *root_inode)
1471{ 1488{
1472 struct dentry *res = NULL; 1489 struct dentry *res = NULL;
@@ -1714,7 +1731,7 @@ EXPORT_SYMBOL(d_add_ci);
1714 * __d_lookup_rcu - search for a dentry (racy, store-free) 1731 * __d_lookup_rcu - search for a dentry (racy, store-free)
1715 * @parent: parent dentry 1732 * @parent: parent dentry
1716 * @name: qstr of name we wish to find 1733 * @name: qstr of name we wish to find
1717 * @seq: returns d_seq value at the point where the dentry was found 1734 * @seqp: returns d_seq value at the point where the dentry was found
1718 * @inode: returns dentry->d_inode when the inode was found valid. 1735 * @inode: returns dentry->d_inode when the inode was found valid.
1719 * Returns: dentry, or NULL 1736 * Returns: dentry, or NULL
1720 * 1737 *
@@ -2405,6 +2422,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2405 if (d_ancestor(alias, dentry)) { 2422 if (d_ancestor(alias, dentry)) {
2406 /* Check for loops */ 2423 /* Check for loops */
2407 actual = ERR_PTR(-ELOOP); 2424 actual = ERR_PTR(-ELOOP);
2425 spin_unlock(&inode->i_lock);
2408 } else if (IS_ROOT(alias)) { 2426 } else if (IS_ROOT(alias)) {
2409 /* Is this an anonymous mountpoint that we 2427 /* Is this an anonymous mountpoint that we
2410 * could splice into our tree? */ 2428 * could splice into our tree? */
@@ -2414,7 +2432,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2414 goto found; 2432 goto found;
2415 } else { 2433 } else {
2416 /* Nope, but we must(!) avoid directory 2434 /* Nope, but we must(!) avoid directory
2417 * aliasing */ 2435 * aliasing. This drops inode->i_lock */
2418 actual = __d_unalias(inode, dentry, alias); 2436 actual = __d_unalias(inode, dentry, alias);
2419 } 2437 }
2420 write_sequnlock(&rename_lock); 2438 write_sequnlock(&rename_lock);
diff --git a/fs/dcookies.c b/fs/dcookies.c
index dda0dc702d1..17c77996782 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -13,7 +13,7 @@
13 */ 13 */
14 14
15#include <linux/syscalls.h> 15#include <linux/syscalls.h>
16#include <linux/module.h> 16#include <linux/export.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/list.h> 18#include <linux/list.h>
19#include <linux/mount.h> 19#include <linux/mount.h>
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index ef023eef046..5dfafdd1dbd 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -33,18 +33,10 @@ static ssize_t default_write_file(struct file *file, const char __user *buf,
33 return count; 33 return count;
34} 34}
35 35
36static int default_open(struct inode *inode, struct file *file)
37{
38 if (inode->i_private)
39 file->private_data = inode->i_private;
40
41 return 0;
42}
43
44const struct file_operations debugfs_file_operations = { 36const struct file_operations debugfs_file_operations = {
45 .read = default_read_file, 37 .read = default_read_file,
46 .write = default_write_file, 38 .write = default_write_file,
47 .open = default_open, 39 .open = simple_open,
48 .llseek = noop_llseek, 40 .llseek = noop_llseek,
49}; 41};
50 42
@@ -447,7 +439,7 @@ static ssize_t write_file_bool(struct file *file, const char __user *user_buf,
447static const struct file_operations fops_bool = { 439static const struct file_operations fops_bool = {
448 .read = read_file_bool, 440 .read = read_file_bool,
449 .write = write_file_bool, 441 .write = write_file_bool,
450 .open = default_open, 442 .open = simple_open,
451 .llseek = default_llseek, 443 .llseek = default_llseek,
452}; 444};
453 445
@@ -492,7 +484,7 @@ static ssize_t read_file_blob(struct file *file, char __user *user_buf,
492 484
493static const struct file_operations fops_blob = { 485static const struct file_operations fops_blob = {
494 .read = read_file_blob, 486 .read = read_file_blob,
495 .open = default_open, 487 .open = simple_open,
496 .llseek = default_llseek, 488 .llseek = default_llseek,
497}; 489};
498 490
@@ -611,7 +603,7 @@ static const struct file_operations fops_regset32 = {
611 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling 603 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
612 * code. 604 * code.
613 */ 605 */
614struct dentry *debugfs_create_regset32(const char *name, mode_t mode, 606struct dentry *debugfs_create_regset32(const char *name, umode_t mode,
615 struct dentry *parent, 607 struct dentry *parent,
616 struct debugfs_regset32 *regset) 608 struct debugfs_regset32 *regset)
617{ 609{
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 956d5ddddf6..b80bc846a15 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -23,9 +23,13 @@
23#include <linux/debugfs.h> 23#include <linux/debugfs.h>
24#include <linux/fsnotify.h> 24#include <linux/fsnotify.h>
25#include <linux/string.h> 25#include <linux/string.h>
26#include <linux/seq_file.h>
27#include <linux/parser.h>
26#include <linux/magic.h> 28#include <linux/magic.h>
27#include <linux/slab.h> 29#include <linux/slab.h>
28 30
31#define DEBUGFS_DEFAULT_MODE 0755
32
29static struct vfsmount *debugfs_mount; 33static struct vfsmount *debugfs_mount;
30static int debugfs_mount_count; 34static int debugfs_mount_count;
31static bool debugfs_registered; 35static bool debugfs_registered;
@@ -125,11 +129,154 @@ static inline int debugfs_positive(struct dentry *dentry)
125 return dentry->d_inode && !d_unhashed(dentry); 129 return dentry->d_inode && !d_unhashed(dentry);
126} 130}
127 131
132struct debugfs_mount_opts {
133 uid_t uid;
134 gid_t gid;
135 umode_t mode;
136};
137
138enum {
139 Opt_uid,
140 Opt_gid,
141 Opt_mode,
142 Opt_err
143};
144
145static const match_table_t tokens = {
146 {Opt_uid, "uid=%u"},
147 {Opt_gid, "gid=%u"},
148 {Opt_mode, "mode=%o"},
149 {Opt_err, NULL}
150};
151
152struct debugfs_fs_info {
153 struct debugfs_mount_opts mount_opts;
154};
155
156static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts)
157{
158 substring_t args[MAX_OPT_ARGS];
159 int option;
160 int token;
161 char *p;
162
163 opts->mode = DEBUGFS_DEFAULT_MODE;
164
165 while ((p = strsep(&data, ",")) != NULL) {
166 if (!*p)
167 continue;
168
169 token = match_token(p, tokens, args);
170 switch (token) {
171 case Opt_uid:
172 if (match_int(&args[0], &option))
173 return -EINVAL;
174 opts->uid = option;
175 break;
176 case Opt_gid:
177 if (match_octal(&args[0], &option))
178 return -EINVAL;
179 opts->gid = option;
180 break;
181 case Opt_mode:
182 if (match_octal(&args[0], &option))
183 return -EINVAL;
184 opts->mode = option & S_IALLUGO;
185 break;
186 /*
187 * We might like to report bad mount options here;
188 * but traditionally debugfs has ignored all mount options
189 */
190 }
191 }
192
193 return 0;
194}
195
196static int debugfs_apply_options(struct super_block *sb)
197{
198 struct debugfs_fs_info *fsi = sb->s_fs_info;
199 struct inode *inode = sb->s_root->d_inode;
200 struct debugfs_mount_opts *opts = &fsi->mount_opts;
201
202 inode->i_mode &= ~S_IALLUGO;
203 inode->i_mode |= opts->mode;
204
205 inode->i_uid = opts->uid;
206 inode->i_gid = opts->gid;
207
208 return 0;
209}
210
211static int debugfs_remount(struct super_block *sb, int *flags, char *data)
212{
213 int err;
214 struct debugfs_fs_info *fsi = sb->s_fs_info;
215
216 err = debugfs_parse_options(data, &fsi->mount_opts);
217 if (err)
218 goto fail;
219
220 debugfs_apply_options(sb);
221
222fail:
223 return err;
224}
225
226static int debugfs_show_options(struct seq_file *m, struct dentry *root)
227{
228 struct debugfs_fs_info *fsi = root->d_sb->s_fs_info;
229 struct debugfs_mount_opts *opts = &fsi->mount_opts;
230
231 if (opts->uid != 0)
232 seq_printf(m, ",uid=%u", opts->uid);
233 if (opts->gid != 0)
234 seq_printf(m, ",gid=%u", opts->gid);
235 if (opts->mode != DEBUGFS_DEFAULT_MODE)
236 seq_printf(m, ",mode=%o", opts->mode);
237
238 return 0;
239}
240
241static const struct super_operations debugfs_super_operations = {
242 .statfs = simple_statfs,
243 .remount_fs = debugfs_remount,
244 .show_options = debugfs_show_options,
245};
246
128static int debug_fill_super(struct super_block *sb, void *data, int silent) 247static int debug_fill_super(struct super_block *sb, void *data, int silent)
129{ 248{
130 static struct tree_descr debug_files[] = {{""}}; 249 static struct tree_descr debug_files[] = {{""}};
250 struct debugfs_fs_info *fsi;
251 int err;
252
253 save_mount_options(sb, data);
254
255 fsi = kzalloc(sizeof(struct debugfs_fs_info), GFP_KERNEL);
256 sb->s_fs_info = fsi;
257 if (!fsi) {
258 err = -ENOMEM;
259 goto fail;
260 }
261
262 err = debugfs_parse_options(data, &fsi->mount_opts);
263 if (err)
264 goto fail;
265
266 err = simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
267 if (err)
268 goto fail;
269
270 sb->s_op = &debugfs_super_operations;
271
272 debugfs_apply_options(sb);
273
274 return 0;
131 275
132 return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); 276fail:
277 kfree(fsi);
278 sb->s_fs_info = NULL;
279 return err;
133} 280}
134 281
135static struct dentry *debug_mount(struct file_system_type *fs_type, 282static struct dentry *debug_mount(struct file_system_type *fs_type,
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index c4e2a58a2e8..10f5e0b484d 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -36,7 +36,61 @@
36#define DEVPTS_DEFAULT_PTMX_MODE 0000 36#define DEVPTS_DEFAULT_PTMX_MODE 0000
37#define PTMX_MINOR 2 37#define PTMX_MINOR 2
38 38
39extern int pty_limit; /* Config limit on Unix98 ptys */ 39/*
40 * sysctl support for setting limits on the number of Unix98 ptys allocated.
41 * Otherwise one can eat up all kernel memory by opening /dev/ptmx repeatedly.
42 */
43static int pty_limit = NR_UNIX98_PTY_DEFAULT;
44static int pty_reserve = NR_UNIX98_PTY_RESERVE;
45static int pty_limit_min;
46static int pty_limit_max = INT_MAX;
47static int pty_count;
48
49static struct ctl_table pty_table[] = {
50 {
51 .procname = "max",
52 .maxlen = sizeof(int),
53 .mode = 0644,
54 .data = &pty_limit,
55 .proc_handler = proc_dointvec_minmax,
56 .extra1 = &pty_limit_min,
57 .extra2 = &pty_limit_max,
58 }, {
59 .procname = "reserve",
60 .maxlen = sizeof(int),
61 .mode = 0644,
62 .data = &pty_reserve,
63 .proc_handler = proc_dointvec_minmax,
64 .extra1 = &pty_limit_min,
65 .extra2 = &pty_limit_max,
66 }, {
67 .procname = "nr",
68 .maxlen = sizeof(int),
69 .mode = 0444,
70 .data = &pty_count,
71 .proc_handler = proc_dointvec,
72 },
73 {}
74};
75
76static struct ctl_table pty_kern_table[] = {
77 {
78 .procname = "pty",
79 .mode = 0555,
80 .child = pty_table,
81 },
82 {}
83};
84
85static struct ctl_table pty_root_table[] = {
86 {
87 .procname = "kernel",
88 .mode = 0555,
89 .child = pty_kern_table,
90 },
91 {}
92};
93
40static DEFINE_MUTEX(allocated_ptys_lock); 94static DEFINE_MUTEX(allocated_ptys_lock);
41 95
42static struct vfsmount *devpts_mnt; 96static struct vfsmount *devpts_mnt;
@@ -49,10 +103,11 @@ struct pts_mount_opts {
49 umode_t mode; 103 umode_t mode;
50 umode_t ptmxmode; 104 umode_t ptmxmode;
51 int newinstance; 105 int newinstance;
106 int max;
52}; 107};
53 108
54enum { 109enum {
55 Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode, Opt_newinstance, 110 Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode, Opt_newinstance, Opt_max,
56 Opt_err 111 Opt_err
57}; 112};
58 113
@@ -63,6 +118,7 @@ static const match_table_t tokens = {
63#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES 118#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
64 {Opt_ptmxmode, "ptmxmode=%o"}, 119 {Opt_ptmxmode, "ptmxmode=%o"},
65 {Opt_newinstance, "newinstance"}, 120 {Opt_newinstance, "newinstance"},
121 {Opt_max, "max=%d"},
66#endif 122#endif
67 {Opt_err, NULL} 123 {Opt_err, NULL}
68}; 124};
@@ -109,6 +165,7 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
109 opts->gid = 0; 165 opts->gid = 0;
110 opts->mode = DEVPTS_DEFAULT_MODE; 166 opts->mode = DEVPTS_DEFAULT_MODE;
111 opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE; 167 opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
168 opts->max = NR_UNIX98_PTY_MAX;
112 169
113 /* newinstance makes sense only on initial mount */ 170 /* newinstance makes sense only on initial mount */
114 if (op == PARSE_MOUNT) 171 if (op == PARSE_MOUNT)
@@ -152,6 +209,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
152 if (op == PARSE_MOUNT) 209 if (op == PARSE_MOUNT)
153 opts->newinstance = 1; 210 opts->newinstance = 1;
154 break; 211 break;
212 case Opt_max:
213 if (match_int(&args[0], &option) ||
214 option < 0 || option > NR_UNIX98_PTY_MAX)
215 return -EINVAL;
216 opts->max = option;
217 break;
155#endif 218#endif
156 default: 219 default:
157 printk(KERN_ERR "devpts: called with bogus options\n"); 220 printk(KERN_ERR "devpts: called with bogus options\n");
@@ -258,6 +321,8 @@ static int devpts_show_options(struct seq_file *seq, struct dentry *root)
258 seq_printf(seq, ",mode=%03o", opts->mode); 321 seq_printf(seq, ",mode=%03o", opts->mode);
259#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES 322#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
260 seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode); 323 seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode);
324 if (opts->max < NR_UNIX98_PTY_MAX)
325 seq_printf(seq, ",max=%d", opts->max);
261#endif 326#endif
262 327
263 return 0; 328 return 0;
@@ -309,12 +374,11 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
309 inode->i_fop = &simple_dir_operations; 374 inode->i_fop = &simple_dir_operations;
310 set_nlink(inode, 2); 375 set_nlink(inode, 2);
311 376
312 s->s_root = d_alloc_root(inode); 377 s->s_root = d_make_root(inode);
313 if (s->s_root) 378 if (s->s_root)
314 return 0; 379 return 0;
315 380
316 printk(KERN_ERR "devpts: get root dentry failed\n"); 381 printk(KERN_ERR "devpts: get root dentry failed\n");
317 iput(inode);
318 382
319fail: 383fail:
320 return -ENOMEM; 384 return -ENOMEM;
@@ -438,6 +502,12 @@ retry:
438 return -ENOMEM; 502 return -ENOMEM;
439 503
440 mutex_lock(&allocated_ptys_lock); 504 mutex_lock(&allocated_ptys_lock);
505 if (pty_count >= pty_limit -
506 (fsi->mount_opts.newinstance ? pty_reserve : 0)) {
507 mutex_unlock(&allocated_ptys_lock);
508 return -ENOSPC;
509 }
510
441 ida_ret = ida_get_new(&fsi->allocated_ptys, &index); 511 ida_ret = ida_get_new(&fsi->allocated_ptys, &index);
442 if (ida_ret < 0) { 512 if (ida_ret < 0) {
443 mutex_unlock(&allocated_ptys_lock); 513 mutex_unlock(&allocated_ptys_lock);
@@ -446,11 +516,12 @@ retry:
446 return -EIO; 516 return -EIO;
447 } 517 }
448 518
449 if (index >= pty_limit) { 519 if (index >= fsi->mount_opts.max) {
450 ida_remove(&fsi->allocated_ptys, index); 520 ida_remove(&fsi->allocated_ptys, index);
451 mutex_unlock(&allocated_ptys_lock); 521 mutex_unlock(&allocated_ptys_lock);
452 return -EIO; 522 return -ENOSPC;
453 } 523 }
524 pty_count++;
454 mutex_unlock(&allocated_ptys_lock); 525 mutex_unlock(&allocated_ptys_lock);
455 return index; 526 return index;
456} 527}
@@ -462,6 +533,7 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx)
462 533
463 mutex_lock(&allocated_ptys_lock); 534 mutex_lock(&allocated_ptys_lock);
464 ida_remove(&fsi->allocated_ptys, idx); 535 ida_remove(&fsi->allocated_ptys, idx);
536 pty_count--;
465 mutex_unlock(&allocated_ptys_lock); 537 mutex_unlock(&allocated_ptys_lock);
466} 538}
467 539
@@ -558,11 +630,15 @@ void devpts_pty_kill(struct tty_struct *tty)
558static int __init init_devpts_fs(void) 630static int __init init_devpts_fs(void)
559{ 631{
560 int err = register_filesystem(&devpts_fs_type); 632 int err = register_filesystem(&devpts_fs_type);
633 struct ctl_table_header *table;
634
561 if (!err) { 635 if (!err) {
636 table = register_sysctl_table(pty_root_table);
562 devpts_mnt = kern_mount(&devpts_fs_type); 637 devpts_mnt = kern_mount(&devpts_fs_type);
563 if (IS_ERR(devpts_mnt)) { 638 if (IS_ERR(devpts_mnt)) {
564 err = PTR_ERR(devpts_mnt); 639 err = PTR_ERR(devpts_mnt);
565 unregister_filesystem(&devpts_fs_type); 640 unregister_filesystem(&devpts_fs_type);
641 unregister_sysctl_table(table);
566 } 642 }
567 } 643 }
568 return err; 644 return err;
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 3dca2b39e83..1c9b08095f9 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -609,13 +609,6 @@ static const struct file_operations format3_fops = {
609/* 609/*
610 * dump lkb's on the ls_waiters list 610 * dump lkb's on the ls_waiters list
611 */ 611 */
612
613static int waiters_open(struct inode *inode, struct file *file)
614{
615 file->private_data = inode->i_private;
616 return 0;
617}
618
619static ssize_t waiters_read(struct file *file, char __user *userbuf, 612static ssize_t waiters_read(struct file *file, char __user *userbuf,
620 size_t count, loff_t *ppos) 613 size_t count, loff_t *ppos)
621{ 614{
@@ -644,7 +637,7 @@ static ssize_t waiters_read(struct file *file, char __user *userbuf,
644 637
645static const struct file_operations waiters_fops = { 638static const struct file_operations waiters_fops = {
646 .owner = THIS_MODULE, 639 .owner = THIS_MODULE,
647 .open = waiters_open, 640 .open = simple_open,
648 .read = waiters_read, 641 .read = waiters_read,
649 .llseek = default_llseek, 642 .llseek = default_llseek,
650}; 643};
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c
index 83641574b01..dc5eb598b81 100644
--- a/fs/dlm/dir.c
+++ b/fs/dlm/dir.c
@@ -351,11 +351,28 @@ int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen,
351static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) 351static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len)
352{ 352{
353 struct dlm_rsb *r; 353 struct dlm_rsb *r;
354 uint32_t hash, bucket;
355 int rv;
356
357 hash = jhash(name, len, 0);
358 bucket = hash & (ls->ls_rsbtbl_size - 1);
359
360 spin_lock(&ls->ls_rsbtbl[bucket].lock);
361 rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, 0, &r);
362 if (rv)
363 rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss,
364 name, len, 0, &r);
365 spin_unlock(&ls->ls_rsbtbl[bucket].lock);
366
367 if (!rv)
368 return r;
354 369
355 down_read(&ls->ls_root_sem); 370 down_read(&ls->ls_root_sem);
356 list_for_each_entry(r, &ls->ls_root_list, res_root_list) { 371 list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
357 if (len == r->res_length && !memcmp(name, r->res_name, len)) { 372 if (len == r->res_length && !memcmp(name, r->res_name, len)) {
358 up_read(&ls->ls_root_sem); 373 up_read(&ls->ls_root_sem);
374 log_error(ls, "find_rsb_root revert to root_list %s",
375 r->res_name);
359 return r; 376 return r;
360 } 377 }
361 } 378 }
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index d47183043c5..4c58d4a3adc 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -411,8 +411,8 @@ static int rsb_cmp(struct dlm_rsb *r, const char *name, int nlen)
411 return memcmp(r->res_name, maxname, DLM_RESNAME_MAXLEN); 411 return memcmp(r->res_name, maxname, DLM_RESNAME_MAXLEN);
412} 412}
413 413
414static int search_rsb_tree(struct rb_root *tree, char *name, int len, 414int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len,
415 unsigned int flags, struct dlm_rsb **r_ret) 415 unsigned int flags, struct dlm_rsb **r_ret)
416{ 416{
417 struct rb_node *node = tree->rb_node; 417 struct rb_node *node = tree->rb_node;
418 struct dlm_rsb *r; 418 struct dlm_rsb *r;
@@ -474,12 +474,12 @@ static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b,
474 struct dlm_rsb *r; 474 struct dlm_rsb *r;
475 int error; 475 int error;
476 476
477 error = search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, flags, &r); 477 error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, flags, &r);
478 if (!error) { 478 if (!error) {
479 kref_get(&r->res_ref); 479 kref_get(&r->res_ref);
480 goto out; 480 goto out;
481 } 481 }
482 error = search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, flags, &r); 482 error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, flags, &r);
483 if (error) 483 if (error)
484 goto out; 484 goto out;
485 485
@@ -1737,6 +1737,18 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
1737 return 1; 1737 return 1;
1738 1738
1739 /* 1739 /*
1740 * Even if the convert is compat with all granted locks,
1741 * QUECVT forces it behind other locks on the convert queue.
1742 */
1743
1744 if (now && conv && (lkb->lkb_exflags & DLM_LKF_QUECVT)) {
1745 if (list_empty(&r->res_convertqueue))
1746 return 1;
1747 else
1748 goto out;
1749 }
1750
1751 /*
1740 * The NOORDER flag is set to avoid the standard vms rules on grant 1752 * The NOORDER flag is set to avoid the standard vms rules on grant
1741 * order. 1753 * order.
1742 */ 1754 */
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 265017a7c3e..1a255307f6f 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -28,6 +28,9 @@ void dlm_scan_waiters(struct dlm_ls *ls);
28void dlm_scan_timeout(struct dlm_ls *ls); 28void dlm_scan_timeout(struct dlm_ls *ls);
29void dlm_adjust_timeouts(struct dlm_ls *ls); 29void dlm_adjust_timeouts(struct dlm_ls *ls);
30 30
31int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len,
32 unsigned int flags, struct dlm_rsb **r_ret);
33
31int dlm_purge_locks(struct dlm_ls *ls); 34int dlm_purge_locks(struct dlm_ls *ls);
32void dlm_purge_mstcpy_locks(struct dlm_rsb *r); 35void dlm_purge_mstcpy_locks(struct dlm_rsb *r);
33void dlm_grant_after_purge(struct dlm_ls *ls); 36void dlm_grant_after_purge(struct dlm_ls *ls);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 0b3109ee425..133ef6dc7cb 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -52,6 +52,7 @@
52#include <linux/mutex.h> 52#include <linux/mutex.h>
53#include <linux/sctp.h> 53#include <linux/sctp.h>
54#include <linux/slab.h> 54#include <linux/slab.h>
55#include <net/sctp/sctp.h>
55#include <net/sctp/user.h> 56#include <net/sctp/user.h>
56#include <net/ipv6.h> 57#include <net/ipv6.h>
57 58
@@ -474,9 +475,6 @@ static void process_sctp_notification(struct connection *con,
474 int prim_len, ret; 475 int prim_len, ret;
475 int addr_len; 476 int addr_len;
476 struct connection *new_con; 477 struct connection *new_con;
477 sctp_peeloff_arg_t parg;
478 int parglen = sizeof(parg);
479 int err;
480 478
481 /* 479 /*
482 * We get this before any data for an association. 480 * We get this before any data for an association.
@@ -525,23 +523,19 @@ static void process_sctp_notification(struct connection *con,
525 return; 523 return;
526 524
527 /* Peel off a new sock */ 525 /* Peel off a new sock */
528 parg.associd = sn->sn_assoc_change.sac_assoc_id; 526 sctp_lock_sock(con->sock->sk);
529 ret = kernel_getsockopt(con->sock, IPPROTO_SCTP, 527 ret = sctp_do_peeloff(con->sock->sk,
530 SCTP_SOCKOPT_PEELOFF, 528 sn->sn_assoc_change.sac_assoc_id,
531 (void *)&parg, &parglen); 529 &new_con->sock);
530 sctp_release_sock(con->sock->sk);
532 if (ret < 0) { 531 if (ret < 0) {
533 log_print("Can't peel off a socket for " 532 log_print("Can't peel off a socket for "
534 "connection %d to node %d: err=%d", 533 "connection %d to node %d: err=%d",
535 parg.associd, nodeid, ret); 534 (int)sn->sn_assoc_change.sac_assoc_id,
536 return; 535 nodeid, ret);
537 }
538 new_con->sock = sockfd_lookup(parg.sd, &err);
539 if (!new_con->sock) {
540 log_print("sockfd_lookup error %d", err);
541 return; 536 return;
542 } 537 }
543 add_sock(new_con->sock, new_con); 538 add_sock(new_con->sock, new_con);
544 sockfd_put(new_con->sock);
545 539
546 log_print("connecting to %d sctp association %d", 540 log_print("connecting to %d sctp association %d",
547 nodeid, (int)sn->sn_assoc_change.sac_assoc_id); 541 nodeid, (int)sn->sn_assoc_change.sac_assoc_id);
@@ -1082,7 +1076,7 @@ static void init_local(void)
1082 int i; 1076 int i;
1083 1077
1084 dlm_local_count = 0; 1078 dlm_local_count = 0;
1085 for (i = 0; i < DLM_MAX_ADDR_COUNT - 1; i++) { 1079 for (i = 0; i < DLM_MAX_ADDR_COUNT; i++) {
1086 if (dlm_our_addr(&sas, i)) 1080 if (dlm_our_addr(&sas, i))
1087 break; 1081 break;
1088 1082
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index d3f95f941c4..2b17f2f9b12 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -48,8 +48,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
48 unsigned long nr_segs, loff_t pos) 48 unsigned long nr_segs, loff_t pos)
49{ 49{
50 ssize_t rc; 50 ssize_t rc;
51 struct dentry *lower_dentry; 51 struct path lower;
52 struct vfsmount *lower_vfsmount;
53 struct file *file = iocb->ki_filp; 52 struct file *file = iocb->ki_filp;
54 53
55 rc = generic_file_aio_read(iocb, iov, nr_segs, pos); 54 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
@@ -60,9 +59,9 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
60 if (-EIOCBQUEUED == rc) 59 if (-EIOCBQUEUED == rc)
61 rc = wait_on_sync_kiocb(iocb); 60 rc = wait_on_sync_kiocb(iocb);
62 if (rc >= 0) { 61 if (rc >= 0) {
63 lower_dentry = ecryptfs_dentry_to_lower(file->f_path.dentry); 62 lower.dentry = ecryptfs_dentry_to_lower(file->f_path.dentry);
64 lower_vfsmount = ecryptfs_dentry_to_lower_mnt(file->f_path.dentry); 63 lower.mnt = ecryptfs_dentry_to_lower_mnt(file->f_path.dentry);
65 touch_atime(lower_vfsmount, lower_dentry); 64 touch_atime(&lower);
66 } 65 }
67 return rc; 66 return rc;
68} 67}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index b4a6befb121..68954937a07 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -550,9 +550,8 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
550 if (IS_ERR(inode)) 550 if (IS_ERR(inode))
551 goto out_free; 551 goto out_free;
552 552
553 s->s_root = d_alloc_root(inode); 553 s->s_root = d_make_root(inode);
554 if (!s->s_root) { 554 if (!s->s_root) {
555 iput(inode);
556 rc = -ENOMEM; 555 rc = -ENOMEM;
557 goto out_free; 556 goto out_free;
558 } 557 }
@@ -795,15 +794,10 @@ static int __init ecryptfs_init(void)
795 "Failed to allocate one or more kmem_cache objects\n"); 794 "Failed to allocate one or more kmem_cache objects\n");
796 goto out; 795 goto out;
797 } 796 }
798 rc = register_filesystem(&ecryptfs_fs_type);
799 if (rc) {
800 printk(KERN_ERR "Failed to register filesystem\n");
801 goto out_free_kmem_caches;
802 }
803 rc = do_sysfs_registration(); 797 rc = do_sysfs_registration();
804 if (rc) { 798 if (rc) {
805 printk(KERN_ERR "sysfs registration failed\n"); 799 printk(KERN_ERR "sysfs registration failed\n");
806 goto out_unregister_filesystem; 800 goto out_free_kmem_caches;
807 } 801 }
808 rc = ecryptfs_init_kthread(); 802 rc = ecryptfs_init_kthread();
809 if (rc) { 803 if (rc) {
@@ -824,19 +818,24 @@ static int __init ecryptfs_init(void)
824 "rc = [%d]\n", rc); 818 "rc = [%d]\n", rc);
825 goto out_release_messaging; 819 goto out_release_messaging;
826 } 820 }
821 rc = register_filesystem(&ecryptfs_fs_type);
822 if (rc) {
823 printk(KERN_ERR "Failed to register filesystem\n");
824 goto out_destroy_crypto;
825 }
827 if (ecryptfs_verbosity > 0) 826 if (ecryptfs_verbosity > 0)
828 printk(KERN_CRIT "eCryptfs verbosity set to %d. Secret values " 827 printk(KERN_CRIT "eCryptfs verbosity set to %d. Secret values "
829 "will be written to the syslog!\n", ecryptfs_verbosity); 828 "will be written to the syslog!\n", ecryptfs_verbosity);
830 829
831 goto out; 830 goto out;
831out_destroy_crypto:
832 ecryptfs_destroy_crypto();
832out_release_messaging: 833out_release_messaging:
833 ecryptfs_release_messaging(); 834 ecryptfs_release_messaging();
834out_destroy_kthread: 835out_destroy_kthread:
835 ecryptfs_destroy_kthread(); 836 ecryptfs_destroy_kthread();
836out_do_sysfs_unregistration: 837out_do_sysfs_unregistration:
837 do_sysfs_unregistration(); 838 do_sysfs_unregistration();
838out_unregister_filesystem:
839 unregister_filesystem(&ecryptfs_fs_type);
840out_free_kmem_caches: 839out_free_kmem_caches:
841 ecryptfs_free_kmem_caches(); 840 ecryptfs_free_kmem_caches();
842out: 841out:
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index cf152823bbf..2dd946b636d 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -184,7 +184,6 @@ static int ecryptfs_show_options(struct seq_file *m, struct dentry *root)
184const struct super_operations ecryptfs_sops = { 184const struct super_operations ecryptfs_sops = {
185 .alloc_inode = ecryptfs_alloc_inode, 185 .alloc_inode = ecryptfs_alloc_inode,
186 .destroy_inode = ecryptfs_destroy_inode, 186 .destroy_inode = ecryptfs_destroy_inode,
187 .drop_inode = generic_drop_inode,
188 .statfs = ecryptfs_statfs, 187 .statfs = ecryptfs_statfs,
189 .remount_fs = NULL, 188 .remount_fs = NULL,
190 .evict_inode = ecryptfs_evict_inode, 189 .evict_inode = ecryptfs_evict_inode,
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 981106429a9..e755ec746c6 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -317,10 +317,9 @@ static int efs_fill_super(struct super_block *s, void *d, int silent)
317 goto out_no_fs; 317 goto out_no_fs;
318 } 318 }
319 319
320 s->s_root = d_alloc_root(root); 320 s->s_root = d_make_root(root);
321 if (!(s->s_root)) { 321 if (!(s->s_root)) {
322 printk(KERN_ERR "EFS: get root dentry failed\n"); 322 printk(KERN_ERR "EFS: get root dentry failed\n");
323 iput(root);
324 ret = -ENOMEM; 323 ret = -ENOMEM;
325 goto out_no_fs; 324 goto out_no_fs;
326 } 325 }
diff --git a/fs/eventfd.c b/fs/eventfd.c
index d9a59177391..dba15fecf23 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -16,7 +16,7 @@
16#include <linux/spinlock.h> 16#include <linux/spinlock.h>
17#include <linux/anon_inodes.h> 17#include <linux/anon_inodes.h>
18#include <linux/syscalls.h> 18#include <linux/syscalls.h>
19#include <linux/module.h> 19#include <linux/export.h>
20#include <linux/kref.h> 20#include <linux/kref.h>
21#include <linux/eventfd.h> 21#include <linux/eventfd.h>
22 22
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 4d9d3a45e35..c0b3c70ee87 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -34,7 +34,6 @@
34#include <linux/mutex.h> 34#include <linux/mutex.h>
35#include <linux/anon_inodes.h> 35#include <linux/anon_inodes.h>
36#include <asm/uaccess.h> 36#include <asm/uaccess.h>
37#include <asm/system.h>
38#include <asm/io.h> 37#include <asm/io.h>
39#include <asm/mman.h> 38#include <asm/mman.h>
40#include <linux/atomic.h> 39#include <linux/atomic.h>
@@ -427,6 +426,31 @@ out_unlock:
427 return error; 426 return error;
428} 427}
429 428
429/*
430 * As described in commit 0ccf831cb lockdep: annotate epoll
431 * the use of wait queues used by epoll is done in a very controlled
432 * manner. Wake ups can nest inside each other, but are never done
433 * with the same locking. For example:
434 *
435 * dfd = socket(...);
436 * efd1 = epoll_create();
437 * efd2 = epoll_create();
438 * epoll_ctl(efd1, EPOLL_CTL_ADD, dfd, ...);
439 * epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...);
440 *
441 * When a packet arrives to the device underneath "dfd", the net code will
442 * issue a wake_up() on its poll wake list. Epoll (efd1) has installed a
443 * callback wakeup entry on that queue, and the wake_up() performed by the
444 * "dfd" net code will end up in ep_poll_callback(). At this point epoll
445 * (efd1) notices that it may have some event ready, so it needs to wake up
446 * the waiters on its poll wait list (efd2). So it calls ep_poll_safewake()
447 * that ends up in another wake_up(), after having checked about the
448 * recursion constraints. That are, no more than EP_MAX_POLLWAKE_NESTS, to
449 * avoid stack blasting.
450 *
451 * When CONFIG_DEBUG_LOCK_ALLOC is enabled, make sure lockdep can handle
452 * this special case of epoll.
453 */
430#ifdef CONFIG_DEBUG_LOCK_ALLOC 454#ifdef CONFIG_DEBUG_LOCK_ALLOC
431static inline void ep_wake_up_nested(wait_queue_head_t *wqueue, 455static inline void ep_wake_up_nested(wait_queue_head_t *wqueue,
432 unsigned long events, int subclass) 456 unsigned long events, int subclass)
@@ -699,9 +723,12 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
699 void *priv) 723 void *priv)
700{ 724{
701 struct epitem *epi, *tmp; 725 struct epitem *epi, *tmp;
726 poll_table pt;
702 727
728 init_poll_funcptr(&pt, NULL);
703 list_for_each_entry_safe(epi, tmp, head, rdllink) { 729 list_for_each_entry_safe(epi, tmp, head, rdllink) {
704 if (epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & 730 pt._key = epi->event.events;
731 if (epi->ffd.file->f_op->poll(epi->ffd.file, &pt) &
705 epi->event.events) 732 epi->event.events)
706 return POLLIN | POLLRDNORM; 733 return POLLIN | POLLRDNORM;
707 else { 734 else {
@@ -1049,13 +1076,11 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
1049 */ 1076 */
1050static int reverse_path_check(void) 1077static int reverse_path_check(void)
1051{ 1078{
1052 int length = 0;
1053 int error = 0; 1079 int error = 0;
1054 struct file *current_file; 1080 struct file *current_file;
1055 1081
1056 /* let's call this for all tfiles */ 1082 /* let's call this for all tfiles */
1057 list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) { 1083 list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) {
1058 length++;
1059 path_count_init(); 1084 path_count_init();
1060 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, 1085 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1061 reverse_path_check_proc, current_file, 1086 reverse_path_check_proc, current_file,
@@ -1097,6 +1122,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
1097 /* Initialize the poll table using the queue callback */ 1122 /* Initialize the poll table using the queue callback */
1098 epq.epi = epi; 1123 epq.epi = epi;
1099 init_poll_funcptr(&epq.pt, ep_ptable_queue_proc); 1124 init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);
1125 epq.pt._key = event->events;
1100 1126
1101 /* 1127 /*
1102 * Attach the item to the poll hooks and get current event bits. 1128 * Attach the item to the poll hooks and get current event bits.
@@ -1191,6 +1217,9 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
1191{ 1217{
1192 int pwake = 0; 1218 int pwake = 0;
1193 unsigned int revents; 1219 unsigned int revents;
1220 poll_table pt;
1221
1222 init_poll_funcptr(&pt, NULL);
1194 1223
1195 /* 1224 /*
1196 * Set the new event interest mask before calling f_op->poll(); 1225 * Set the new event interest mask before calling f_op->poll();
@@ -1198,13 +1227,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
1198 * f_op->poll() call and the new event set registering. 1227 * f_op->poll() call and the new event set registering.
1199 */ 1228 */
1200 epi->event.events = event->events; 1229 epi->event.events = event->events;
1230 pt._key = event->events;
1201 epi->event.data = event->data; /* protected by mtx */ 1231 epi->event.data = event->data; /* protected by mtx */
1202 1232
1203 /* 1233 /*
1204 * Get current event bits. We can safely use the file* here because 1234 * Get current event bits. We can safely use the file* here because
1205 * its usage count has been increased by the caller of this function. 1235 * its usage count has been increased by the caller of this function.
1206 */ 1236 */
1207 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); 1237 revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt);
1208 1238
1209 /* 1239 /*
1210 * If the item is "hot" and it is not registered inside the ready 1240 * If the item is "hot" and it is not registered inside the ready
@@ -1239,6 +1269,9 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
1239 unsigned int revents; 1269 unsigned int revents;
1240 struct epitem *epi; 1270 struct epitem *epi;
1241 struct epoll_event __user *uevent; 1271 struct epoll_event __user *uevent;
1272 poll_table pt;
1273
1274 init_poll_funcptr(&pt, NULL);
1242 1275
1243 /* 1276 /*
1244 * We can loop without lock because we are passed a task private list. 1277 * We can loop without lock because we are passed a task private list.
@@ -1251,7 +1284,8 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
1251 1284
1252 list_del_init(&epi->rdllink); 1285 list_del_init(&epi->rdllink);
1253 1286
1254 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & 1287 pt._key = epi->event.events;
1288 revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt) &
1255 epi->event.events; 1289 epi->event.events;
1256 1290
1257 /* 1291 /*
@@ -1629,8 +1663,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1629 if (op == EPOLL_CTL_ADD) { 1663 if (op == EPOLL_CTL_ADD) {
1630 if (is_file_epoll(tfile)) { 1664 if (is_file_epoll(tfile)) {
1631 error = -ELOOP; 1665 error = -ELOOP;
1632 if (ep_loop_check(ep, tfile) != 0) 1666 if (ep_loop_check(ep, tfile) != 0) {
1667 clear_tfile_check_list();
1633 goto error_tgt_fput; 1668 goto error_tgt_fput;
1669 }
1634 } else 1670 } else
1635 list_add(&tfile->f_tfile_llink, &tfile_check_list); 1671 list_add(&tfile->f_tfile_llink, &tfile_check_list);
1636 } 1672 }
diff --git a/fs/exec.c b/fs/exec.c
index 153dee14fe5..b1fd2025e59 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -59,10 +59,13 @@
59#include <asm/uaccess.h> 59#include <asm/uaccess.h>
60#include <asm/mmu_context.h> 60#include <asm/mmu_context.h>
61#include <asm/tlb.h> 61#include <asm/tlb.h>
62#include <asm/exec.h>
62 63
63#include <trace/events/task.h> 64#include <trace/events/task.h>
64#include "internal.h" 65#include "internal.h"
65 66
67#include <trace/events/sched.h>
68
66int core_uses_pid; 69int core_uses_pid;
67char core_pattern[CORENAME_MAX_SIZE] = "core"; 70char core_pattern[CORENAME_MAX_SIZE] = "core";
68unsigned int core_pipe_limit; 71unsigned int core_pipe_limit;
@@ -79,15 +82,13 @@ static atomic_t call_count = ATOMIC_INIT(1);
79static LIST_HEAD(formats); 82static LIST_HEAD(formats);
80static DEFINE_RWLOCK(binfmt_lock); 83static DEFINE_RWLOCK(binfmt_lock);
81 84
82int __register_binfmt(struct linux_binfmt * fmt, int insert) 85void __register_binfmt(struct linux_binfmt * fmt, int insert)
83{ 86{
84 if (!fmt) 87 BUG_ON(!fmt);
85 return -EINVAL;
86 write_lock(&binfmt_lock); 88 write_lock(&binfmt_lock);
87 insert ? list_add(&fmt->lh, &formats) : 89 insert ? list_add(&fmt->lh, &formats) :
88 list_add_tail(&fmt->lh, &formats); 90 list_add_tail(&fmt->lh, &formats);
89 write_unlock(&binfmt_lock); 91 write_unlock(&binfmt_lock);
90 return 0;
91} 92}
92 93
93EXPORT_SYMBOL(__register_binfmt); 94EXPORT_SYMBOL(__register_binfmt);
@@ -822,7 +823,7 @@ static int exec_mmap(struct mm_struct *mm)
822 /* Notify parent that we're no longer interested in the old VM */ 823 /* Notify parent that we're no longer interested in the old VM */
823 tsk = current; 824 tsk = current;
824 old_mm = current->mm; 825 old_mm = current->mm;
825 sync_mm_rss(tsk, old_mm); 826 sync_mm_rss(old_mm);
826 mm_release(tsk, old_mm); 827 mm_release(tsk, old_mm);
827 828
828 if (old_mm) { 829 if (old_mm) {
@@ -848,6 +849,7 @@ static int exec_mmap(struct mm_struct *mm)
848 if (old_mm) { 849 if (old_mm) {
849 up_read(&old_mm->mmap_sem); 850 up_read(&old_mm->mmap_sem);
850 BUG_ON(active_mm != old_mm); 851 BUG_ON(active_mm != old_mm);
852 setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
851 mm_update_next_owner(old_mm); 853 mm_update_next_owner(old_mm);
852 mmput(old_mm); 854 mmput(old_mm);
853 return 0; 855 return 0;
@@ -975,8 +977,8 @@ static int de_thread(struct task_struct *tsk)
975 sig->notify_count = 0; 977 sig->notify_count = 0;
976 978
977no_thread_group: 979no_thread_group:
978 if (current->mm) 980 /* we have changed execution domain */
979 setmax_mm_hiwater_rss(&sig->maxrss, current->mm); 981 tsk->exit_signal = SIGCHLD;
980 982
981 exit_itimers(sig); 983 exit_itimers(sig);
982 flush_itimer_signals(); 984 flush_itimer_signals();
@@ -1026,10 +1028,10 @@ static void flush_old_files(struct files_struct * files)
1026 fdt = files_fdtable(files); 1028 fdt = files_fdtable(files);
1027 if (i >= fdt->max_fds) 1029 if (i >= fdt->max_fds)
1028 break; 1030 break;
1029 set = fdt->close_on_exec->fds_bits[j]; 1031 set = fdt->close_on_exec[j];
1030 if (!set) 1032 if (!set)
1031 continue; 1033 continue;
1032 fdt->close_on_exec->fds_bits[j] = 0; 1034 fdt->close_on_exec[j] = 0;
1033 spin_unlock(&files->file_lock); 1035 spin_unlock(&files->file_lock);
1034 for ( ; set ; i++,set >>= 1) { 1036 for ( ; set ; i++,set >>= 1) {
1035 if (set & 1) { 1037 if (set & 1) {
@@ -1112,7 +1114,7 @@ int flush_old_exec(struct linux_binprm * bprm)
1112 bprm->mm = NULL; /* We're using it now */ 1114 bprm->mm = NULL; /* We're using it now */
1113 1115
1114 set_fs(USER_DS); 1116 set_fs(USER_DS);
1115 current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD); 1117 current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD);
1116 flush_thread(); 1118 flush_thread();
1117 current->personality &= ~bprm->per_clear; 1119 current->personality &= ~bprm->per_clear;
1118 1120
@@ -1339,13 +1341,13 @@ int remove_arg_zero(struct linux_binprm *bprm)
1339 ret = -EFAULT; 1341 ret = -EFAULT;
1340 goto out; 1342 goto out;
1341 } 1343 }
1342 kaddr = kmap_atomic(page, KM_USER0); 1344 kaddr = kmap_atomic(page);
1343 1345
1344 for (; offset < PAGE_SIZE && kaddr[offset]; 1346 for (; offset < PAGE_SIZE && kaddr[offset];
1345 offset++, bprm->p++) 1347 offset++, bprm->p++)
1346 ; 1348 ;
1347 1349
1348 kunmap_atomic(kaddr, KM_USER0); 1350 kunmap_atomic(kaddr);
1349 put_arg_page(page); 1351 put_arg_page(page);
1350 1352
1351 if (offset == PAGE_SIZE) 1353 if (offset == PAGE_SIZE)
@@ -1369,7 +1371,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1369 unsigned int depth = bprm->recursion_depth; 1371 unsigned int depth = bprm->recursion_depth;
1370 int try,retval; 1372 int try,retval;
1371 struct linux_binfmt *fmt; 1373 struct linux_binfmt *fmt;
1372 pid_t old_pid; 1374 pid_t old_pid, old_vpid;
1373 1375
1374 retval = security_bprm_check(bprm); 1376 retval = security_bprm_check(bprm);
1375 if (retval) 1377 if (retval)
@@ -1380,8 +1382,9 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1380 return retval; 1382 return retval;
1381 1383
1382 /* Need to fetch pid before load_binary changes it */ 1384 /* Need to fetch pid before load_binary changes it */
1385 old_pid = current->pid;
1383 rcu_read_lock(); 1386 rcu_read_lock();
1384 old_pid = task_pid_nr_ns(current, task_active_pid_ns(current->parent)); 1387 old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
1385 rcu_read_unlock(); 1388 rcu_read_unlock();
1386 1389
1387 retval = -ENOENT; 1390 retval = -ENOENT;
@@ -1402,9 +1405,10 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1402 */ 1405 */
1403 bprm->recursion_depth = depth; 1406 bprm->recursion_depth = depth;
1404 if (retval >= 0) { 1407 if (retval >= 0) {
1405 if (depth == 0) 1408 if (depth == 0) {
1406 ptrace_event(PTRACE_EVENT_EXEC, 1409 trace_sched_process_exec(current, old_pid, bprm);
1407 old_pid); 1410 ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
1411 }
1408 put_binfmt(fmt); 1412 put_binfmt(fmt);
1409 allow_write_access(bprm->file); 1413 allow_write_access(bprm->file);
1410 if (bprm->file) 1414 if (bprm->file)
@@ -2064,8 +2068,8 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
2064 fd_install(0, rp); 2068 fd_install(0, rp);
2065 spin_lock(&cf->file_lock); 2069 spin_lock(&cf->file_lock);
2066 fdt = files_fdtable(cf); 2070 fdt = files_fdtable(cf);
2067 FD_SET(0, fdt->open_fds); 2071 __set_open_fd(0, fdt);
2068 FD_CLR(0, fdt->close_on_exec); 2072 __clear_close_on_exec(0, fdt);
2069 spin_unlock(&cf->file_lock); 2073 spin_unlock(&cf->file_lock);
2070 2074
2071 /* and disallow core files too */ 2075 /* and disallow core files too */
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index 80405836ba6..c61e62ac231 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -597,7 +597,7 @@ int exofs_make_empty(struct inode *inode, struct inode *parent)
597 goto fail; 597 goto fail;
598 } 598 }
599 599
600 kaddr = kmap_atomic(page, KM_USER0); 600 kaddr = kmap_atomic(page);
601 de = (struct exofs_dir_entry *)kaddr; 601 de = (struct exofs_dir_entry *)kaddr;
602 de->name_len = 1; 602 de->name_len = 1;
603 de->rec_len = cpu_to_le16(EXOFS_DIR_REC_LEN(1)); 603 de->rec_len = cpu_to_le16(EXOFS_DIR_REC_LEN(1));
@@ -611,7 +611,7 @@ int exofs_make_empty(struct inode *inode, struct inode *parent)
611 de->inode_no = cpu_to_le64(parent->i_ino); 611 de->inode_no = cpu_to_le64(parent->i_ino);
612 memcpy(de->name, PARENT_DIR, sizeof(PARENT_DIR)); 612 memcpy(de->name, PARENT_DIR, sizeof(PARENT_DIR));
613 exofs_set_de_type(de, inode); 613 exofs_set_de_type(de, inode);
614 kunmap_atomic(kaddr, KM_USER0); 614 kunmap_atomic(kaddr);
615 err = exofs_commit_chunk(page, 0, chunk_size); 615 err = exofs_commit_chunk(page, 0, chunk_size);
616fail: 616fail:
617 page_cache_release(page); 617 page_cache_release(page);
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 9dbf0c30103..fc7161d6bf6 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -143,9 +143,6 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
143{ 143{
144 struct inode *inode = old_dentry->d_inode; 144 struct inode *inode = old_dentry->d_inode;
145 145
146 if (inode->i_nlink >= EXOFS_LINK_MAX)
147 return -EMLINK;
148
149 inode->i_ctime = CURRENT_TIME; 146 inode->i_ctime = CURRENT_TIME;
150 inode_inc_link_count(inode); 147 inode_inc_link_count(inode);
151 ihold(inode); 148 ihold(inode);
@@ -156,10 +153,7 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
156static int exofs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 153static int exofs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
157{ 154{
158 struct inode *inode; 155 struct inode *inode;
159 int err = -EMLINK; 156 int err;
160
161 if (dir->i_nlink >= EXOFS_LINK_MAX)
162 goto out;
163 157
164 inode_inc_link_count(dir); 158 inode_inc_link_count(dir);
165 159
@@ -275,11 +269,6 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
275 if (err) 269 if (err)
276 goto out_dir; 270 goto out_dir;
277 } else { 271 } else {
278 if (dir_de) {
279 err = -EMLINK;
280 if (new_dir->i_nlink >= EXOFS_LINK_MAX)
281 goto out_dir;
282 }
283 err = exofs_add_link(new_dentry, old_inode); 272 err = exofs_add_link(new_dentry, old_inode);
284 if (err) 273 if (err)
285 goto out_dir; 274 goto out_dir;
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index d22cd168c6e..735ca06430a 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -389,7 +389,7 @@ static int exofs_sync_fs(struct super_block *sb, int wait)
389 ios->length = offsetof(struct exofs_fscb, s_dev_table_oid); 389 ios->length = offsetof(struct exofs_fscb, s_dev_table_oid);
390 memset(fscb, 0, ios->length); 390 memset(fscb, 0, ios->length);
391 fscb->s_nextid = cpu_to_le64(sbi->s_nextid); 391 fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
392 fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); 392 fscb->s_numfiles = cpu_to_le64(sbi->s_numfiles);
393 fscb->s_magic = cpu_to_le16(sb->s_magic); 393 fscb->s_magic = cpu_to_le16(sb->s_magic);
394 fscb->s_newfs = 0; 394 fscb->s_newfs = 0;
395 fscb->s_version = EXOFS_FSCB_VER; 395 fscb->s_version = EXOFS_FSCB_VER;
@@ -529,7 +529,8 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
529 struct osd_dev_info *odi) 529 struct osd_dev_info *odi)
530{ 530{
531 odi->systemid_len = le32_to_cpu(dt_dev->systemid_len); 531 odi->systemid_len = le32_to_cpu(dt_dev->systemid_len);
532 memcpy(odi->systemid, dt_dev->systemid, odi->systemid_len); 532 if (likely(odi->systemid_len))
533 memcpy(odi->systemid, dt_dev->systemid, OSD_SYSTEMID_LEN);
533 534
534 odi->osdname_len = le32_to_cpu(dt_dev->osdname_len); 535 odi->osdname_len = le32_to_cpu(dt_dev->osdname_len);
535 odi->osdname = dt_dev->osdname; 536 odi->osdname = dt_dev->osdname;
@@ -565,7 +566,7 @@ int __alloc_dev_table(struct exofs_sb_info *sbi, unsigned numdevs,
565 566
566 aoded = kzalloc(sizeof(*aoded), GFP_KERNEL); 567 aoded = kzalloc(sizeof(*aoded), GFP_KERNEL);
567 if (unlikely(!aoded)) { 568 if (unlikely(!aoded)) {
568 EXOFS_ERR("ERROR: faild allocating Device array[%d]\n", 569 EXOFS_ERR("ERROR: failed allocating Device array[%d]\n",
569 numdevs); 570 numdevs);
570 return -ENOMEM; 571 return -ENOMEM;
571 } 572 }
@@ -754,6 +755,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
754 sb->s_blocksize = EXOFS_BLKSIZE; 755 sb->s_blocksize = EXOFS_BLKSIZE;
755 sb->s_blocksize_bits = EXOFS_BLKSHIFT; 756 sb->s_blocksize_bits = EXOFS_BLKSHIFT;
756 sb->s_maxbytes = MAX_LFS_FILESIZE; 757 sb->s_maxbytes = MAX_LFS_FILESIZE;
758 sb->s_max_links = EXOFS_LINK_MAX;
757 atomic_set(&sbi->s_curr_pending, 0); 759 atomic_set(&sbi->s_curr_pending, 0);
758 sb->s_bdev = NULL; 760 sb->s_bdev = NULL;
759 sb->s_dev = 0; 761 sb->s_dev = 0;
@@ -818,9 +820,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
818 ret = PTR_ERR(root); 820 ret = PTR_ERR(root);
819 goto free_sbi; 821 goto free_sbi;
820 } 822 }
821 sb->s_root = d_alloc_root(root); 823 sb->s_root = d_make_root(root);
822 if (!sb->s_root) { 824 if (!sb->s_root) {
823 iput(root);
824 EXOFS_ERR("ERROR: get root inode failed\n"); 825 EXOFS_ERR("ERROR: get root inode failed\n");
825 ret = -ENOMEM; 826 ret = -ENOMEM;
826 goto free_sbi; 827 goto free_sbi;
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index d37df352d32..0f4f5c92925 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -645,7 +645,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
645 unlock_page(page); 645 unlock_page(page);
646 goto fail; 646 goto fail;
647 } 647 }
648 kaddr = kmap_atomic(page, KM_USER0); 648 kaddr = kmap_atomic(page);
649 memset(kaddr, 0, chunk_size); 649 memset(kaddr, 0, chunk_size);
650 de = (struct ext2_dir_entry_2 *)kaddr; 650 de = (struct ext2_dir_entry_2 *)kaddr;
651 de->name_len = 1; 651 de->name_len = 1;
@@ -660,7 +660,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
660 de->inode = cpu_to_le32(parent->i_ino); 660 de->inode = cpu_to_le32(parent->i_ino);
661 memcpy (de->name, "..\0", 4); 661 memcpy (de->name, "..\0", 4);
662 ext2_set_de_type (de, inode); 662 ext2_set_de_type (de, inode);
663 kunmap_atomic(kaddr, KM_USER0); 663 kunmap_atomic(kaddr);
664 err = ext2_commit_chunk(page, 0, chunk_size); 664 err = ext2_commit_chunk(page, 0, chunk_size);
665fail: 665fail:
666 page_cache_release(page); 666 page_cache_release(page);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 75ad433c669..0b2b4db5bdc 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -1,5 +1,636 @@
1/*
2 * Copyright (C) 1992, 1993, 1994, 1995
3 * Remy Card (card@masi.ibp.fr)
4 * Laboratoire MASI - Institut Blaise Pascal
5 * Universite Pierre et Marie Curie (Paris VI)
6 *
7 * from
8 *
9 * linux/include/linux/minix_fs.h
10 *
11 * Copyright (C) 1991, 1992 Linus Torvalds
12 */
1#include <linux/fs.h> 13#include <linux/fs.h>
2#include <linux/ext2_fs.h> 14#include <linux/ext2_fs.h>
15#include <linux/blockgroup_lock.h>
16#include <linux/percpu_counter.h>
17#include <linux/rbtree.h>
18
19/* XXX Here for now... not interested in restructing headers JUST now */
20
21/* data type for block offset of block group */
22typedef int ext2_grpblk_t;
23
24/* data type for filesystem-wide blocks number */
25typedef unsigned long ext2_fsblk_t;
26
27#define E2FSBLK "%lu"
28
29struct ext2_reserve_window {
30 ext2_fsblk_t _rsv_start; /* First byte reserved */
31 ext2_fsblk_t _rsv_end; /* Last byte reserved or 0 */
32};
33
34struct ext2_reserve_window_node {
35 struct rb_node rsv_node;
36 __u32 rsv_goal_size;
37 __u32 rsv_alloc_hit;
38 struct ext2_reserve_window rsv_window;
39};
40
41struct ext2_block_alloc_info {
42 /* information about reservation window */
43 struct ext2_reserve_window_node rsv_window_node;
44 /*
45 * was i_next_alloc_block in ext2_inode_info
46 * is the logical (file-relative) number of the
47 * most-recently-allocated block in this file.
48 * We use this for detecting linearly ascending allocation requests.
49 */
50 __u32 last_alloc_logical_block;
51 /*
52 * Was i_next_alloc_goal in ext2_inode_info
53 * is the *physical* companion to i_next_alloc_block.
54 * it the the physical block number of the block which was most-recentl
55 * allocated to this file. This give us the goal (target) for the next
56 * allocation when we detect linearly ascending requests.
57 */
58 ext2_fsblk_t last_alloc_physical_block;
59};
60
61#define rsv_start rsv_window._rsv_start
62#define rsv_end rsv_window._rsv_end
63
64/*
65 * second extended-fs super-block data in memory
66 */
67struct ext2_sb_info {
68 unsigned long s_frag_size; /* Size of a fragment in bytes */
69 unsigned long s_frags_per_block;/* Number of fragments per block */
70 unsigned long s_inodes_per_block;/* Number of inodes per block */
71 unsigned long s_frags_per_group;/* Number of fragments in a group */
72 unsigned long s_blocks_per_group;/* Number of blocks in a group */
73 unsigned long s_inodes_per_group;/* Number of inodes in a group */
74 unsigned long s_itb_per_group; /* Number of inode table blocks per group */
75 unsigned long s_gdb_count; /* Number of group descriptor blocks */
76 unsigned long s_desc_per_block; /* Number of group descriptors per block */
77 unsigned long s_groups_count; /* Number of groups in the fs */
78 unsigned long s_overhead_last; /* Last calculated overhead */
79 unsigned long s_blocks_last; /* Last seen block count */
80 struct buffer_head * s_sbh; /* Buffer containing the super block */
81 struct ext2_super_block * s_es; /* Pointer to the super block in the buffer */
82 struct buffer_head ** s_group_desc;
83 unsigned long s_mount_opt;
84 unsigned long s_sb_block;
85 uid_t s_resuid;
86 gid_t s_resgid;
87 unsigned short s_mount_state;
88 unsigned short s_pad;
89 int s_addr_per_block_bits;
90 int s_desc_per_block_bits;
91 int s_inode_size;
92 int s_first_ino;
93 spinlock_t s_next_gen_lock;
94 u32 s_next_generation;
95 unsigned long s_dir_count;
96 u8 *s_debts;
97 struct percpu_counter s_freeblocks_counter;
98 struct percpu_counter s_freeinodes_counter;
99 struct percpu_counter s_dirs_counter;
100 struct blockgroup_lock *s_blockgroup_lock;
101 /* root of the per fs reservation window tree */
102 spinlock_t s_rsv_window_lock;
103 struct rb_root s_rsv_window_root;
104 struct ext2_reserve_window_node s_rsv_window_head;
105 /*
106 * s_lock protects against concurrent modifications of s_mount_state,
107 * s_blocks_last, s_overhead_last and the content of superblock's
108 * buffer pointed to by sbi->s_es.
109 *
110 * Note: It is used in ext2_show_options() to provide a consistent view
111 * of the mount options.
112 */
113 spinlock_t s_lock;
114};
115
116static inline spinlock_t *
117sb_bgl_lock(struct ext2_sb_info *sbi, unsigned int block_group)
118{
119 return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
120}
121
122/*
123 * Define EXT2FS_DEBUG to produce debug messages
124 */
125#undef EXT2FS_DEBUG
126
127/*
128 * Define EXT2_RESERVATION to reserve data blocks for expanding files
129 */
130#define EXT2_DEFAULT_RESERVE_BLOCKS 8
131/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
132#define EXT2_MAX_RESERVE_BLOCKS 1027
133#define EXT2_RESERVE_WINDOW_NOT_ALLOCATED 0
134/*
135 * The second extended file system version
136 */
137#define EXT2FS_DATE "95/08/09"
138#define EXT2FS_VERSION "0.5b"
139
140/*
141 * Debug code
142 */
143#ifdef EXT2FS_DEBUG
144# define ext2_debug(f, a...) { \
145 printk ("EXT2-fs DEBUG (%s, %d): %s:", \
146 __FILE__, __LINE__, __func__); \
147 printk (f, ## a); \
148 }
149#else
150# define ext2_debug(f, a...) /**/
151#endif
152
153/*
154 * Special inode numbers
155 */
156#define EXT2_BAD_INO 1 /* Bad blocks inode */
157#define EXT2_ROOT_INO 2 /* Root inode */
158#define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */
159#define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode */
160
161/* First non-reserved inode for old ext2 filesystems */
162#define EXT2_GOOD_OLD_FIRST_INO 11
163
164static inline struct ext2_sb_info *EXT2_SB(struct super_block *sb)
165{
166 return sb->s_fs_info;
167}
168
169/*
170 * Macro-instructions used to manage several block sizes
171 */
172#define EXT2_MIN_BLOCK_SIZE 1024
173#define EXT2_MAX_BLOCK_SIZE 4096
174#define EXT2_MIN_BLOCK_LOG_SIZE 10
175#define EXT2_BLOCK_SIZE(s) ((s)->s_blocksize)
176#define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (__u32))
177#define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
178#define EXT2_ADDR_PER_BLOCK_BITS(s) (EXT2_SB(s)->s_addr_per_block_bits)
179#define EXT2_INODE_SIZE(s) (EXT2_SB(s)->s_inode_size)
180#define EXT2_FIRST_INO(s) (EXT2_SB(s)->s_first_ino)
181
182/*
183 * Macro-instructions used to manage fragments
184 */
185#define EXT2_MIN_FRAG_SIZE 1024
186#define EXT2_MAX_FRAG_SIZE 4096
187#define EXT2_MIN_FRAG_LOG_SIZE 10
188#define EXT2_FRAG_SIZE(s) (EXT2_SB(s)->s_frag_size)
189#define EXT2_FRAGS_PER_BLOCK(s) (EXT2_SB(s)->s_frags_per_block)
190
191/*
192 * Structure of a blocks group descriptor
193 */
194struct ext2_group_desc
195{
196 __le32 bg_block_bitmap; /* Blocks bitmap block */
197 __le32 bg_inode_bitmap; /* Inodes bitmap block */
198 __le32 bg_inode_table; /* Inodes table block */
199 __le16 bg_free_blocks_count; /* Free blocks count */
200 __le16 bg_free_inodes_count; /* Free inodes count */
201 __le16 bg_used_dirs_count; /* Directories count */
202 __le16 bg_pad;
203 __le32 bg_reserved[3];
204};
205
206/*
207 * Macro-instructions used to manage group descriptors
208 */
209#define EXT2_BLOCKS_PER_GROUP(s) (EXT2_SB(s)->s_blocks_per_group)
210#define EXT2_DESC_PER_BLOCK(s) (EXT2_SB(s)->s_desc_per_block)
211#define EXT2_INODES_PER_GROUP(s) (EXT2_SB(s)->s_inodes_per_group)
212#define EXT2_DESC_PER_BLOCK_BITS(s) (EXT2_SB(s)->s_desc_per_block_bits)
213
214/*
215 * Constants relative to the data blocks
216 */
217#define EXT2_NDIR_BLOCKS 12
218#define EXT2_IND_BLOCK EXT2_NDIR_BLOCKS
219#define EXT2_DIND_BLOCK (EXT2_IND_BLOCK + 1)
220#define EXT2_TIND_BLOCK (EXT2_DIND_BLOCK + 1)
221#define EXT2_N_BLOCKS (EXT2_TIND_BLOCK + 1)
222
223/*
224 * Inode flags (GETFLAGS/SETFLAGS)
225 */
226#define EXT2_SECRM_FL FS_SECRM_FL /* Secure deletion */
227#define EXT2_UNRM_FL FS_UNRM_FL /* Undelete */
228#define EXT2_COMPR_FL FS_COMPR_FL /* Compress file */
229#define EXT2_SYNC_FL FS_SYNC_FL /* Synchronous updates */
230#define EXT2_IMMUTABLE_FL FS_IMMUTABLE_FL /* Immutable file */
231#define EXT2_APPEND_FL FS_APPEND_FL /* writes to file may only append */
232#define EXT2_NODUMP_FL FS_NODUMP_FL /* do not dump file */
233#define EXT2_NOATIME_FL FS_NOATIME_FL /* do not update atime */
234/* Reserved for compression usage... */
235#define EXT2_DIRTY_FL FS_DIRTY_FL
236#define EXT2_COMPRBLK_FL FS_COMPRBLK_FL /* One or more compressed clusters */
237#define EXT2_NOCOMP_FL FS_NOCOMP_FL /* Don't compress */
238#define EXT2_ECOMPR_FL FS_ECOMPR_FL /* Compression error */
239/* End compression flags --- maybe not all used */
240#define EXT2_BTREE_FL FS_BTREE_FL /* btree format dir */
241#define EXT2_INDEX_FL FS_INDEX_FL /* hash-indexed directory */
242#define EXT2_IMAGIC_FL FS_IMAGIC_FL /* AFS directory */
243#define EXT2_JOURNAL_DATA_FL FS_JOURNAL_DATA_FL /* Reserved for ext3 */
244#define EXT2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */
245#define EXT2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */
246#define EXT2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/
247#define EXT2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */
248
249#define EXT2_FL_USER_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */
250#define EXT2_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */
251
252/* Flags that should be inherited by new inodes from their parent. */
253#define EXT2_FL_INHERITED (EXT2_SECRM_FL | EXT2_UNRM_FL | EXT2_COMPR_FL |\
254 EXT2_SYNC_FL | EXT2_NODUMP_FL |\
255 EXT2_NOATIME_FL | EXT2_COMPRBLK_FL |\
256 EXT2_NOCOMP_FL | EXT2_JOURNAL_DATA_FL |\
257 EXT2_NOTAIL_FL | EXT2_DIRSYNC_FL)
258
259/* Flags that are appropriate for regular files (all but dir-specific ones). */
260#define EXT2_REG_FLMASK (~(EXT2_DIRSYNC_FL | EXT2_TOPDIR_FL))
261
262/* Flags that are appropriate for non-directories/regular files. */
263#define EXT2_OTHER_FLMASK (EXT2_NODUMP_FL | EXT2_NOATIME_FL)
264
265/* Mask out flags that are inappropriate for the given type of inode. */
266static inline __u32 ext2_mask_flags(umode_t mode, __u32 flags)
267{
268 if (S_ISDIR(mode))
269 return flags;
270 else if (S_ISREG(mode))
271 return flags & EXT2_REG_FLMASK;
272 else
273 return flags & EXT2_OTHER_FLMASK;
274}
275
276/*
277 * ioctl commands
278 */
279#define EXT2_IOC_GETFLAGS FS_IOC_GETFLAGS
280#define EXT2_IOC_SETFLAGS FS_IOC_SETFLAGS
281#define EXT2_IOC_GETVERSION FS_IOC_GETVERSION
282#define EXT2_IOC_SETVERSION FS_IOC_SETVERSION
283#define EXT2_IOC_GETRSVSZ _IOR('f', 5, long)
284#define EXT2_IOC_SETRSVSZ _IOW('f', 6, long)
285
286/*
287 * ioctl commands in 32 bit emulation
288 */
289#define EXT2_IOC32_GETFLAGS FS_IOC32_GETFLAGS
290#define EXT2_IOC32_SETFLAGS FS_IOC32_SETFLAGS
291#define EXT2_IOC32_GETVERSION FS_IOC32_GETVERSION
292#define EXT2_IOC32_SETVERSION FS_IOC32_SETVERSION
293
294/*
295 * Structure of an inode on the disk
296 */
297struct ext2_inode {
298 __le16 i_mode; /* File mode */
299 __le16 i_uid; /* Low 16 bits of Owner Uid */
300 __le32 i_size; /* Size in bytes */
301 __le32 i_atime; /* Access time */
302 __le32 i_ctime; /* Creation time */
303 __le32 i_mtime; /* Modification time */
304 __le32 i_dtime; /* Deletion Time */
305 __le16 i_gid; /* Low 16 bits of Group Id */
306 __le16 i_links_count; /* Links count */
307 __le32 i_blocks; /* Blocks count */
308 __le32 i_flags; /* File flags */
309 union {
310 struct {
311 __le32 l_i_reserved1;
312 } linux1;
313 struct {
314 __le32 h_i_translator;
315 } hurd1;
316 struct {
317 __le32 m_i_reserved1;
318 } masix1;
319 } osd1; /* OS dependent 1 */
320 __le32 i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
321 __le32 i_generation; /* File version (for NFS) */
322 __le32 i_file_acl; /* File ACL */
323 __le32 i_dir_acl; /* Directory ACL */
324 __le32 i_faddr; /* Fragment address */
325 union {
326 struct {
327 __u8 l_i_frag; /* Fragment number */
328 __u8 l_i_fsize; /* Fragment size */
329 __u16 i_pad1;
330 __le16 l_i_uid_high; /* these 2 fields */
331 __le16 l_i_gid_high; /* were reserved2[0] */
332 __u32 l_i_reserved2;
333 } linux2;
334 struct {
335 __u8 h_i_frag; /* Fragment number */
336 __u8 h_i_fsize; /* Fragment size */
337 __le16 h_i_mode_high;
338 __le16 h_i_uid_high;
339 __le16 h_i_gid_high;
340 __le32 h_i_author;
341 } hurd2;
342 struct {
343 __u8 m_i_frag; /* Fragment number */
344 __u8 m_i_fsize; /* Fragment size */
345 __u16 m_pad1;
346 __u32 m_i_reserved2[2];
347 } masix2;
348 } osd2; /* OS dependent 2 */
349};
350
351#define i_size_high i_dir_acl
352
353#define i_reserved1 osd1.linux1.l_i_reserved1
354#define i_frag osd2.linux2.l_i_frag
355#define i_fsize osd2.linux2.l_i_fsize
356#define i_uid_low i_uid
357#define i_gid_low i_gid
358#define i_uid_high osd2.linux2.l_i_uid_high
359#define i_gid_high osd2.linux2.l_i_gid_high
360#define i_reserved2 osd2.linux2.l_i_reserved2
361
362/*
363 * File system states
364 */
365#define EXT2_VALID_FS 0x0001 /* Unmounted cleanly */
366#define EXT2_ERROR_FS 0x0002 /* Errors detected */
367
368/*
369 * Mount flags
370 */
371#define EXT2_MOUNT_CHECK 0x000001 /* Do mount-time checks */
372#define EXT2_MOUNT_OLDALLOC 0x000002 /* Don't use the new Orlov allocator */
373#define EXT2_MOUNT_GRPID 0x000004 /* Create files with directory's group */
374#define EXT2_MOUNT_DEBUG 0x000008 /* Some debugging messages */
375#define EXT2_MOUNT_ERRORS_CONT 0x000010 /* Continue on errors */
376#define EXT2_MOUNT_ERRORS_RO 0x000020 /* Remount fs ro on errors */
377#define EXT2_MOUNT_ERRORS_PANIC 0x000040 /* Panic on errors */
378#define EXT2_MOUNT_MINIX_DF 0x000080 /* Mimics the Minix statfs */
379#define EXT2_MOUNT_NOBH 0x000100 /* No buffer_heads */
380#define EXT2_MOUNT_NO_UID32 0x000200 /* Disable 32-bit UIDs */
381#define EXT2_MOUNT_XATTR_USER 0x004000 /* Extended user attributes */
382#define EXT2_MOUNT_POSIX_ACL 0x008000 /* POSIX Access Control Lists */
383#define EXT2_MOUNT_XIP 0x010000 /* Execute in place */
384#define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */
385#define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */
386#define EXT2_MOUNT_RESERVATION 0x080000 /* Preallocation */
387
388
389#define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt
390#define set_opt(o, opt) o |= EXT2_MOUNT_##opt
391#define test_opt(sb, opt) (EXT2_SB(sb)->s_mount_opt & \
392 EXT2_MOUNT_##opt)
393/*
394 * Maximal mount counts between two filesystem checks
395 */
396#define EXT2_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */
397#define EXT2_DFL_CHECKINTERVAL 0 /* Don't use interval check */
398
399/*
400 * Behaviour when detecting errors
401 */
402#define EXT2_ERRORS_CONTINUE 1 /* Continue execution */
403#define EXT2_ERRORS_RO 2 /* Remount fs read-only */
404#define EXT2_ERRORS_PANIC 3 /* Panic */
405#define EXT2_ERRORS_DEFAULT EXT2_ERRORS_CONTINUE
406
407/*
408 * Structure of the super block
409 */
410struct ext2_super_block {
411 __le32 s_inodes_count; /* Inodes count */
412 __le32 s_blocks_count; /* Blocks count */
413 __le32 s_r_blocks_count; /* Reserved blocks count */
414 __le32 s_free_blocks_count; /* Free blocks count */
415 __le32 s_free_inodes_count; /* Free inodes count */
416 __le32 s_first_data_block; /* First Data Block */
417 __le32 s_log_block_size; /* Block size */
418 __le32 s_log_frag_size; /* Fragment size */
419 __le32 s_blocks_per_group; /* # Blocks per group */
420 __le32 s_frags_per_group; /* # Fragments per group */
421 __le32 s_inodes_per_group; /* # Inodes per group */
422 __le32 s_mtime; /* Mount time */
423 __le32 s_wtime; /* Write time */
424 __le16 s_mnt_count; /* Mount count */
425 __le16 s_max_mnt_count; /* Maximal mount count */
426 __le16 s_magic; /* Magic signature */
427 __le16 s_state; /* File system state */
428 __le16 s_errors; /* Behaviour when detecting errors */
429 __le16 s_minor_rev_level; /* minor revision level */
430 __le32 s_lastcheck; /* time of last check */
431 __le32 s_checkinterval; /* max. time between checks */
432 __le32 s_creator_os; /* OS */
433 __le32 s_rev_level; /* Revision level */
434 __le16 s_def_resuid; /* Default uid for reserved blocks */
435 __le16 s_def_resgid; /* Default gid for reserved blocks */
436 /*
437 * These fields are for EXT2_DYNAMIC_REV superblocks only.
438 *
439 * Note: the difference between the compatible feature set and
440 * the incompatible feature set is that if there is a bit set
441 * in the incompatible feature set that the kernel doesn't
442 * know about, it should refuse to mount the filesystem.
443 *
444 * e2fsck's requirements are more strict; if it doesn't know
445 * about a feature in either the compatible or incompatible
446 * feature set, it must abort and not try to meddle with
447 * things it doesn't understand...
448 */
449 __le32 s_first_ino; /* First non-reserved inode */
450 __le16 s_inode_size; /* size of inode structure */
451 __le16 s_block_group_nr; /* block group # of this superblock */
452 __le32 s_feature_compat; /* compatible feature set */
453 __le32 s_feature_incompat; /* incompatible feature set */
454 __le32 s_feature_ro_compat; /* readonly-compatible feature set */
455 __u8 s_uuid[16]; /* 128-bit uuid for volume */
456 char s_volume_name[16]; /* volume name */
457 char s_last_mounted[64]; /* directory where last mounted */
458 __le32 s_algorithm_usage_bitmap; /* For compression */
459 /*
460 * Performance hints. Directory preallocation should only
461 * happen if the EXT2_COMPAT_PREALLOC flag is on.
462 */
463 __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/
464 __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
465 __u16 s_padding1;
466 /*
467 * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
468 */
469 __u8 s_journal_uuid[16]; /* uuid of journal superblock */
470 __u32 s_journal_inum; /* inode number of journal file */
471 __u32 s_journal_dev; /* device number of journal file */
472 __u32 s_last_orphan; /* start of list of inodes to delete */
473 __u32 s_hash_seed[4]; /* HTREE hash seed */
474 __u8 s_def_hash_version; /* Default hash version to use */
475 __u8 s_reserved_char_pad;
476 __u16 s_reserved_word_pad;
477 __le32 s_default_mount_opts;
478 __le32 s_first_meta_bg; /* First metablock block group */
479 __u32 s_reserved[190]; /* Padding to the end of the block */
480};
481
482/*
483 * Codes for operating systems
484 */
485#define EXT2_OS_LINUX 0
486#define EXT2_OS_HURD 1
487#define EXT2_OS_MASIX 2
488#define EXT2_OS_FREEBSD 3
489#define EXT2_OS_LITES 4
490
491/*
492 * Revision levels
493 */
494#define EXT2_GOOD_OLD_REV 0 /* The good old (original) format */
495#define EXT2_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */
496
497#define EXT2_CURRENT_REV EXT2_GOOD_OLD_REV
498#define EXT2_MAX_SUPP_REV EXT2_DYNAMIC_REV
499
500#define EXT2_GOOD_OLD_INODE_SIZE 128
501
502/*
503 * Feature set definitions
504 */
505
506#define EXT2_HAS_COMPAT_FEATURE(sb,mask) \
507 ( EXT2_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) )
508#define EXT2_HAS_RO_COMPAT_FEATURE(sb,mask) \
509 ( EXT2_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) )
510#define EXT2_HAS_INCOMPAT_FEATURE(sb,mask) \
511 ( EXT2_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) )
512#define EXT2_SET_COMPAT_FEATURE(sb,mask) \
513 EXT2_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
514#define EXT2_SET_RO_COMPAT_FEATURE(sb,mask) \
515 EXT2_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask)
516#define EXT2_SET_INCOMPAT_FEATURE(sb,mask) \
517 EXT2_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask)
518#define EXT2_CLEAR_COMPAT_FEATURE(sb,mask) \
519 EXT2_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask)
520#define EXT2_CLEAR_RO_COMPAT_FEATURE(sb,mask) \
521 EXT2_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask)
522#define EXT2_CLEAR_INCOMPAT_FEATURE(sb,mask) \
523 EXT2_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask)
524
525#define EXT2_FEATURE_COMPAT_DIR_PREALLOC 0x0001
526#define EXT2_FEATURE_COMPAT_IMAGIC_INODES 0x0002
527#define EXT3_FEATURE_COMPAT_HAS_JOURNAL 0x0004
528#define EXT2_FEATURE_COMPAT_EXT_ATTR 0x0008
529#define EXT2_FEATURE_COMPAT_RESIZE_INO 0x0010
530#define EXT2_FEATURE_COMPAT_DIR_INDEX 0x0020
531#define EXT2_FEATURE_COMPAT_ANY 0xffffffff
532
533#define EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
534#define EXT2_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
535#define EXT2_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
536#define EXT2_FEATURE_RO_COMPAT_ANY 0xffffffff
537
538#define EXT2_FEATURE_INCOMPAT_COMPRESSION 0x0001
539#define EXT2_FEATURE_INCOMPAT_FILETYPE 0x0002
540#define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004
541#define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008
542#define EXT2_FEATURE_INCOMPAT_META_BG 0x0010
543#define EXT2_FEATURE_INCOMPAT_ANY 0xffffffff
544
545#define EXT2_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
546#define EXT2_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_FILETYPE| \
547 EXT2_FEATURE_INCOMPAT_META_BG)
548#define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
549 EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
550 EXT2_FEATURE_RO_COMPAT_BTREE_DIR)
551#define EXT2_FEATURE_RO_COMPAT_UNSUPPORTED ~EXT2_FEATURE_RO_COMPAT_SUPP
552#define EXT2_FEATURE_INCOMPAT_UNSUPPORTED ~EXT2_FEATURE_INCOMPAT_SUPP
553
554/*
555 * Default values for user and/or group using reserved blocks
556 */
557#define EXT2_DEF_RESUID 0
558#define EXT2_DEF_RESGID 0
559
560/*
561 * Default mount options
562 */
563#define EXT2_DEFM_DEBUG 0x0001
564#define EXT2_DEFM_BSDGROUPS 0x0002
565#define EXT2_DEFM_XATTR_USER 0x0004
566#define EXT2_DEFM_ACL 0x0008
567#define EXT2_DEFM_UID16 0x0010
568 /* Not used by ext2, but reserved for use by ext3 */
569#define EXT3_DEFM_JMODE 0x0060
570#define EXT3_DEFM_JMODE_DATA 0x0020
571#define EXT3_DEFM_JMODE_ORDERED 0x0040
572#define EXT3_DEFM_JMODE_WBACK 0x0060
573
574/*
575 * Structure of a directory entry
576 */
577
578struct ext2_dir_entry {
579 __le32 inode; /* Inode number */
580 __le16 rec_len; /* Directory entry length */
581 __le16 name_len; /* Name length */
582 char name[]; /* File name, up to EXT2_NAME_LEN */
583};
584
585/*
586 * The new version of the directory entry. Since EXT2 structures are
587 * stored in intel byte order, and the name_len field could never be
588 * bigger than 255 chars, it's safe to reclaim the extra byte for the
589 * file_type field.
590 */
591struct ext2_dir_entry_2 {
592 __le32 inode; /* Inode number */
593 __le16 rec_len; /* Directory entry length */
594 __u8 name_len; /* Name length */
595 __u8 file_type;
596 char name[]; /* File name, up to EXT2_NAME_LEN */
597};
598
599/*
600 * Ext2 directory file types. Only the low 3 bits are used. The
601 * other bits are reserved for now.
602 */
603enum {
604 EXT2_FT_UNKNOWN = 0,
605 EXT2_FT_REG_FILE = 1,
606 EXT2_FT_DIR = 2,
607 EXT2_FT_CHRDEV = 3,
608 EXT2_FT_BLKDEV = 4,
609 EXT2_FT_FIFO = 5,
610 EXT2_FT_SOCK = 6,
611 EXT2_FT_SYMLINK = 7,
612 EXT2_FT_MAX
613};
614
615/*
616 * EXT2_DIR_PAD defines the directory entries boundaries
617 *
618 * NOTE: It must be a multiple of 4
619 */
620#define EXT2_DIR_PAD 4
621#define EXT2_DIR_ROUND (EXT2_DIR_PAD - 1)
622#define EXT2_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT2_DIR_ROUND) & \
623 ~EXT2_DIR_ROUND)
624#define EXT2_MAX_REC_LEN ((1<<16)-1)
625
626static inline void verify_offsets(void)
627{
628#define A(x,y) BUILD_BUG_ON(x != offsetof(struct ext2_super_block, y));
629 A(EXT2_SB_MAGIC_OFFSET, s_magic);
630 A(EXT2_SB_BLOCKS_OFFSET, s_blocks_count);
631 A(EXT2_SB_BSIZE_OFFSET, s_log_block_size);
632#undef A
633}
3 634
4/* 635/*
5 * ext2 mount options 636 * ext2 mount options
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 080419814ba..dffb8653628 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -195,9 +195,6 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
195 struct inode *inode = old_dentry->d_inode; 195 struct inode *inode = old_dentry->d_inode;
196 int err; 196 int err;
197 197
198 if (inode->i_nlink >= EXT2_LINK_MAX)
199 return -EMLINK;
200
201 dquot_initialize(dir); 198 dquot_initialize(dir);
202 199
203 inode->i_ctime = CURRENT_TIME_SEC; 200 inode->i_ctime = CURRENT_TIME_SEC;
@@ -217,10 +214,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
217static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) 214static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
218{ 215{
219 struct inode * inode; 216 struct inode * inode;
220 int err = -EMLINK; 217 int err;
221
222 if (dir->i_nlink >= EXT2_LINK_MAX)
223 goto out;
224 218
225 dquot_initialize(dir); 219 dquot_initialize(dir);
226 220
@@ -346,11 +340,6 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
346 drop_nlink(new_inode); 340 drop_nlink(new_inode);
347 inode_dec_link_count(new_inode); 341 inode_dec_link_count(new_inode);
348 } else { 342 } else {
349 if (dir_de) {
350 err = -EMLINK;
351 if (new_dir->i_nlink >= EXT2_LINK_MAX)
352 goto out_dir;
353 }
354 err = ext2_add_link(new_dentry, old_inode); 343 err = ext2_add_link(new_dentry, old_inode);
355 if (err) 344 if (err)
356 goto out_dir; 345 goto out_dir;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 0090595beb2..e1025c7a437 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -919,6 +919,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
919 } 919 }
920 920
921 sb->s_maxbytes = ext2_max_size(sb->s_blocksize_bits); 921 sb->s_maxbytes = ext2_max_size(sb->s_blocksize_bits);
922 sb->s_max_links = EXT2_LINK_MAX;
922 923
923 if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV) { 924 if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV) {
924 sbi->s_inode_size = EXT2_GOOD_OLD_INODE_SIZE; 925 sbi->s_inode_size = EXT2_GOOD_OLD_INODE_SIZE;
@@ -1087,9 +1088,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
1087 goto failed_mount3; 1088 goto failed_mount3;
1088 } 1089 }
1089 1090
1090 sb->s_root = d_alloc_root(root); 1091 sb->s_root = d_make_root(root);
1091 if (!sb->s_root) { 1092 if (!sb->s_root) {
1092 iput(root);
1093 ext2_msg(sb, KERN_ERR, "error: get root inode failed"); 1093 ext2_msg(sb, KERN_ERR, "error: get root inode failed");
1094 ret = -ENOMEM; 1094 ret = -ENOMEM;
1095 goto failed_mount3; 1095 goto failed_mount3;
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index be7a8d02c9a..cfedb2cb0d8 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -3,10 +3,7 @@
3 * Handler for storing security labels as extended attributes. 3 * Handler for storing security labels as extended attributes.
4 */ 4 */
5 5
6#include <linux/slab.h> 6#include "ext2.h"
7#include <linux/string.h>
8#include <linux/fs.h>
9#include <linux/ext2_fs.h>
10#include <linux/security.h> 7#include <linux/security.h>
11#include "xattr.h" 8#include "xattr.h"
12 9
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index 2989467d359..7e192574c00 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -5,10 +5,7 @@
5 * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org> 5 * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
6 */ 6 */
7 7
8#include <linux/string.h> 8#include "ext2.h"
9#include <linux/capability.h>
10#include <linux/fs.h>
11#include <linux/ext2_fs.h>
12#include "xattr.h" 9#include "xattr.h"
13 10
14static size_t 11static size_t
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index 322a56b2dfb..1c3312858fc 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -9,8 +9,6 @@
9#include <linux/fs.h> 9#include <linux/fs.h>
10#include <linux/genhd.h> 10#include <linux/genhd.h>
11#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
12#include <linux/ext2_fs_sb.h>
13#include <linux/ext2_fs.h>
14#include <linux/blkdev.h> 12#include <linux/blkdev.h>
15#include "ext2.h" 13#include "ext2.h"
16#include "xip.h" 14#include "xip.h"
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 3091f62e55b..c76832c8d19 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -4,13 +4,7 @@
4 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 4 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
5 */ 5 */
6 6
7#include <linux/init.h> 7#include "ext3.h"
8#include <linux/sched.h>
9#include <linux/slab.h>
10#include <linux/capability.h>
11#include <linux/fs.h>
12#include <linux/ext3_jbd.h>
13#include <linux/ext3_fs.h>
14#include "xattr.h" 8#include "xattr.h"
15#include "acl.h" 9#include "acl.h"
16 10
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index a2038928f9a..baac1b129fb 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -11,17 +11,9 @@
11 * David S. Miller (davem@caip.rutgers.edu), 1995 11 * David S. Miller (davem@caip.rutgers.edu), 1995
12 */ 12 */
13 13
14#include <linux/time.h>
15#include <linux/capability.h>
16#include <linux/fs.h>
17#include <linux/slab.h>
18#include <linux/jbd.h>
19#include <linux/ext3_fs.h>
20#include <linux/ext3_jbd.h>
21#include <linux/quotaops.h> 14#include <linux/quotaops.h>
22#include <linux/buffer_head.h>
23#include <linux/blkdev.h> 15#include <linux/blkdev.h>
24#include <trace/events/ext3.h> 16#include "ext3.h"
25 17
26/* 18/*
27 * balloc.c contains the blocks allocation and deallocation routines 19 * balloc.c contains the blocks allocation and deallocation routines
@@ -1743,8 +1735,11 @@ allocated:
1743 1735
1744 *errp = 0; 1736 *errp = 0;
1745 brelse(bitmap_bh); 1737 brelse(bitmap_bh);
1746 dquot_free_block(inode, *count-num); 1738
1747 *count = num; 1739 if (num < *count) {
1740 dquot_free_block(inode, *count-num);
1741 *count = num;
1742 }
1748 1743
1749 trace_ext3_allocate_blocks(inode, goal, num, 1744 trace_ext3_allocate_blocks(inode, goal, num,
1750 (unsigned long long)ret_block); 1745 (unsigned long long)ret_block);
@@ -1970,7 +1965,7 @@ static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb,
1970 sbi = EXT3_SB(sb); 1965 sbi = EXT3_SB(sb);
1971 1966
1972 /* Walk through the whole group */ 1967 /* Walk through the whole group */
1973 while (start < max) { 1968 while (start <= max) {
1974 start = bitmap_search_next_usable_block(start, bitmap_bh, max); 1969 start = bitmap_search_next_usable_block(start, bitmap_bh, max);
1975 if (start < 0) 1970 if (start < 0)
1976 break; 1971 break;
@@ -1980,7 +1975,7 @@ static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb,
1980 * Allocate contiguous free extents by setting bits in the 1975 * Allocate contiguous free extents by setting bits in the
1981 * block bitmap 1976 * block bitmap
1982 */ 1977 */
1983 while (next < max 1978 while (next <= max
1984 && claim_block(sb_bgl_lock(sbi, group), 1979 && claim_block(sb_bgl_lock(sbi, group),
1985 next, bitmap_bh)) { 1980 next, bitmap_bh)) {
1986 next++; 1981 next++;
@@ -2091,73 +2086,74 @@ err_out:
2091 */ 2086 */
2092int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range) 2087int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
2093{ 2088{
2094 ext3_grpblk_t last_block, first_block, free_blocks; 2089 ext3_grpblk_t last_block, first_block;
2095 unsigned long first_group, last_group; 2090 unsigned long group, first_group, last_group;
2096 unsigned long group, ngroups;
2097 struct ext3_group_desc *gdp; 2091 struct ext3_group_desc *gdp;
2098 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 2092 struct ext3_super_block *es = EXT3_SB(sb)->s_es;
2099 uint64_t start, len, minlen, trimmed; 2093 uint64_t start, minlen, end, trimmed = 0;
2094 ext3_fsblk_t first_data_blk =
2095 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block);
2100 ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count); 2096 ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count);
2101 int ret = 0; 2097 int ret = 0;
2102 2098
2103 start = (range->start >> sb->s_blocksize_bits) + 2099 start = range->start >> sb->s_blocksize_bits;
2104 le32_to_cpu(es->s_first_data_block); 2100 end = start + (range->len >> sb->s_blocksize_bits) - 1;
2105 len = range->len >> sb->s_blocksize_bits;
2106 minlen = range->minlen >> sb->s_blocksize_bits; 2101 minlen = range->minlen >> sb->s_blocksize_bits;
2107 trimmed = 0;
2108 2102
2109 if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb))) 2103 if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)) ||
2104 unlikely(start >= max_blks))
2110 return -EINVAL; 2105 return -EINVAL;
2111 if (start >= max_blks) 2106 if (end >= max_blks)
2112 return -EINVAL; 2107 end = max_blks - 1;
2113 if (start + len > max_blks) 2108 if (end <= first_data_blk)
2114 len = max_blks - start; 2109 goto out;
2110 if (start < first_data_blk)
2111 start = first_data_blk;
2115 2112
2116 ngroups = EXT3_SB(sb)->s_groups_count;
2117 smp_rmb(); 2113 smp_rmb();
2118 2114
2119 /* Determine first and last group to examine based on start and len */ 2115 /* Determine first and last group to examine based on start and len */
2120 ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) start, 2116 ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) start,
2121 &first_group, &first_block); 2117 &first_group, &first_block);
2122 ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) (start + len), 2118 ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) end,
2123 &last_group, &last_block); 2119 &last_group, &last_block);
2124 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
2125 last_block = EXT3_BLOCKS_PER_GROUP(sb);
2126 2120
2127 if (first_group > last_group) 2121 /* end now represents the last block to discard in this group */
2128 return -EINVAL; 2122 end = EXT3_BLOCKS_PER_GROUP(sb) - 1;
2129 2123
2130 for (group = first_group; group <= last_group; group++) { 2124 for (group = first_group; group <= last_group; group++) {
2131 gdp = ext3_get_group_desc(sb, group, NULL); 2125 gdp = ext3_get_group_desc(sb, group, NULL);
2132 if (!gdp) 2126 if (!gdp)
2133 break; 2127 break;
2134 2128
2135 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
2136 if (free_blocks < minlen)
2137 continue;
2138
2139 /* 2129 /*
2140 * For all the groups except the last one, last block will 2130 * For all the groups except the last one, last block will
2141 * always be EXT3_BLOCKS_PER_GROUP(sb), so we only need to 2131 * always be EXT3_BLOCKS_PER_GROUP(sb)-1, so we only need to
2142 * change it for the last group in which case first_block + 2132 * change it for the last group, note that last_block is
2143 * len < EXT3_BLOCKS_PER_GROUP(sb). 2133 * already computed earlier by ext3_get_group_no_and_offset()
2144 */ 2134 */
2145 if (first_block + len < EXT3_BLOCKS_PER_GROUP(sb)) 2135 if (group == last_group)
2146 last_block = first_block + len; 2136 end = last_block;
2147 len -= last_block - first_block;
2148 2137
2149 ret = ext3_trim_all_free(sb, group, first_block, 2138 if (le16_to_cpu(gdp->bg_free_blocks_count) >= minlen) {
2150 last_block, minlen); 2139 ret = ext3_trim_all_free(sb, group, first_block,
2151 if (ret < 0) 2140 end, minlen);
2152 break; 2141 if (ret < 0)
2142 break;
2143 trimmed += ret;
2144 }
2153 2145
2154 trimmed += ret; 2146 /*
2147 * For every group except the first one, we are sure
2148 * that the first block to discard will be block #0.
2149 */
2155 first_block = 0; 2150 first_block = 0;
2156 } 2151 }
2157 2152
2158 if (ret >= 0) 2153 if (ret > 0)
2159 ret = 0; 2154 ret = 0;
2160 range->len = trimmed * sb->s_blocksize;
2161 2155
2156out:
2157 range->len = trimmed * sb->s_blocksize;
2162 return ret; 2158 return ret;
2163} 2159}
diff --git a/fs/ext3/bitmap.c b/fs/ext3/bitmap.c
index 6afc39d8025..909d13e2656 100644
--- a/fs/ext3/bitmap.c
+++ b/fs/ext3/bitmap.c
@@ -7,9 +7,7 @@
7 * Universite Pierre et Marie Curie (Paris VI) 7 * Universite Pierre et Marie Curie (Paris VI)
8 */ 8 */
9 9
10#include <linux/buffer_head.h> 10#include "ext3.h"
11#include <linux/jbd.h>
12#include <linux/ext3_fs.h>
13 11
14#ifdef EXT3FS_DEBUG 12#ifdef EXT3FS_DEBUG
15 13
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 34f0a072b93..cc761ad8fa5 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -21,12 +21,7 @@
21 * 21 *
22 */ 22 */
23 23
24#include <linux/fs.h> 24#include "ext3.h"
25#include <linux/jbd.h>
26#include <linux/ext3_fs.h>
27#include <linux/buffer_head.h>
28#include <linux/slab.h>
29#include <linux/rbtree.h>
30 25
31static unsigned char ext3_filetype_table[] = { 26static unsigned char ext3_filetype_table[] = {
32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 27 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
diff --git a/fs/ext3/ext3.h b/fs/ext3/ext3.h
new file mode 100644
index 00000000000..b6515fd7e56
--- /dev/null
+++ b/fs/ext3/ext3.h
@@ -0,0 +1,1322 @@
1/*
2 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
3 *
4 * Copyright 1998--1999 Red Hat corp --- All Rights Reserved
5 *
6 * This file is part of the Linux kernel and is made available under
7 * the terms of the GNU General Public License, version 2, or at your
8 * option, any later version, incorporated herein by reference.
9 *
10 * Copyright (C) 1992, 1993, 1994, 1995
11 * Remy Card (card@masi.ibp.fr)
12 * Laboratoire MASI - Institut Blaise Pascal
13 * Universite Pierre et Marie Curie (Paris VI)
14 *
15 * from
16 *
17 * linux/include/linux/minix_fs.h
18 *
19 * Copyright (C) 1991, 1992 Linus Torvalds
20 */
21
22#include <linux/fs.h>
23#include <linux/jbd.h>
24#include <linux/magic.h>
25#include <linux/bug.h>
26#include <linux/blockgroup_lock.h>
27
28/*
29 * The second extended filesystem constants/structures
30 */
31
32/*
33 * Define EXT3FS_DEBUG to produce debug messages
34 */
35#undef EXT3FS_DEBUG
36
37/*
38 * Define EXT3_RESERVATION to reserve data blocks for expanding files
39 */
40#define EXT3_DEFAULT_RESERVE_BLOCKS 8
41/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
42#define EXT3_MAX_RESERVE_BLOCKS 1027
43#define EXT3_RESERVE_WINDOW_NOT_ALLOCATED 0
44
45/*
46 * Debug code
47 */
48#ifdef EXT3FS_DEBUG
49#define ext3_debug(f, a...) \
50 do { \
51 printk (KERN_DEBUG "EXT3-fs DEBUG (%s, %d): %s:", \
52 __FILE__, __LINE__, __func__); \
53 printk (KERN_DEBUG f, ## a); \
54 } while (0)
55#else
56#define ext3_debug(f, a...) do {} while (0)
57#endif
58
59/*
60 * Special inodes numbers
61 */
62#define EXT3_BAD_INO 1 /* Bad blocks inode */
63#define EXT3_ROOT_INO 2 /* Root inode */
64#define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */
65#define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */
66#define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */
67#define EXT3_JOURNAL_INO 8 /* Journal inode */
68
69/* First non-reserved inode for old ext3 filesystems */
70#define EXT3_GOOD_OLD_FIRST_INO 11
71
72/*
73 * Maximal count of links to a file
74 */
75#define EXT3_LINK_MAX 32000
76
77/*
78 * Macro-instructions used to manage several block sizes
79 */
80#define EXT3_MIN_BLOCK_SIZE 1024
81#define EXT3_MAX_BLOCK_SIZE 65536
82#define EXT3_MIN_BLOCK_LOG_SIZE 10
83#define EXT3_BLOCK_SIZE(s) ((s)->s_blocksize)
84#define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32))
85#define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
86#define EXT3_ADDR_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_addr_per_block_bits)
87#define EXT3_INODE_SIZE(s) (EXT3_SB(s)->s_inode_size)
88#define EXT3_FIRST_INO(s) (EXT3_SB(s)->s_first_ino)
89
90/*
91 * Macro-instructions used to manage fragments
92 */
93#define EXT3_MIN_FRAG_SIZE 1024
94#define EXT3_MAX_FRAG_SIZE 4096
95#define EXT3_MIN_FRAG_LOG_SIZE 10
96#define EXT3_FRAG_SIZE(s) (EXT3_SB(s)->s_frag_size)
97#define EXT3_FRAGS_PER_BLOCK(s) (EXT3_SB(s)->s_frags_per_block)
98
99/*
100 * Structure of a blocks group descriptor
101 */
102struct ext3_group_desc
103{
104 __le32 bg_block_bitmap; /* Blocks bitmap block */
105 __le32 bg_inode_bitmap; /* Inodes bitmap block */
106 __le32 bg_inode_table; /* Inodes table block */
107 __le16 bg_free_blocks_count; /* Free blocks count */
108 __le16 bg_free_inodes_count; /* Free inodes count */
109 __le16 bg_used_dirs_count; /* Directories count */
110 __u16 bg_pad;
111 __le32 bg_reserved[3];
112};
113
114/*
115 * Macro-instructions used to manage group descriptors
116 */
117#define EXT3_BLOCKS_PER_GROUP(s) (EXT3_SB(s)->s_blocks_per_group)
118#define EXT3_DESC_PER_BLOCK(s) (EXT3_SB(s)->s_desc_per_block)
119#define EXT3_INODES_PER_GROUP(s) (EXT3_SB(s)->s_inodes_per_group)
120#define EXT3_DESC_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_desc_per_block_bits)
121
122/*
123 * Constants relative to the data blocks
124 */
125#define EXT3_NDIR_BLOCKS 12
126#define EXT3_IND_BLOCK EXT3_NDIR_BLOCKS
127#define EXT3_DIND_BLOCK (EXT3_IND_BLOCK + 1)
128#define EXT3_TIND_BLOCK (EXT3_DIND_BLOCK + 1)
129#define EXT3_N_BLOCKS (EXT3_TIND_BLOCK + 1)
130
131/*
132 * Inode flags
133 */
134#define EXT3_SECRM_FL 0x00000001 /* Secure deletion */
135#define EXT3_UNRM_FL 0x00000002 /* Undelete */
136#define EXT3_COMPR_FL 0x00000004 /* Compress file */
137#define EXT3_SYNC_FL 0x00000008 /* Synchronous updates */
138#define EXT3_IMMUTABLE_FL 0x00000010 /* Immutable file */
139#define EXT3_APPEND_FL 0x00000020 /* writes to file may only append */
140#define EXT3_NODUMP_FL 0x00000040 /* do not dump file */
141#define EXT3_NOATIME_FL 0x00000080 /* do not update atime */
142/* Reserved for compression usage... */
143#define EXT3_DIRTY_FL 0x00000100
144#define EXT3_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
145#define EXT3_NOCOMPR_FL 0x00000400 /* Don't compress */
146#define EXT3_ECOMPR_FL 0x00000800 /* Compression error */
147/* End compression flags --- maybe not all used */
148#define EXT3_INDEX_FL 0x00001000 /* hash-indexed directory */
149#define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */
150#define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */
151#define EXT3_NOTAIL_FL 0x00008000 /* file tail should not be merged */
152#define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
153#define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
154#define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */
155
156#define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
157#define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
158
159/* Flags that should be inherited by new inodes from their parent. */
160#define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\
161 EXT3_SYNC_FL | EXT3_NODUMP_FL |\
162 EXT3_NOATIME_FL | EXT3_COMPRBLK_FL |\
163 EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\
164 EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL)
165
166/* Flags that are appropriate for regular files (all but dir-specific ones). */
167#define EXT3_REG_FLMASK (~(EXT3_DIRSYNC_FL | EXT3_TOPDIR_FL))
168
169/* Flags that are appropriate for non-directories/regular files. */
170#define EXT3_OTHER_FLMASK (EXT3_NODUMP_FL | EXT3_NOATIME_FL)
171
172/* Mask out flags that are inappropriate for the given type of inode. */
173static inline __u32 ext3_mask_flags(umode_t mode, __u32 flags)
174{
175 if (S_ISDIR(mode))
176 return flags;
177 else if (S_ISREG(mode))
178 return flags & EXT3_REG_FLMASK;
179 else
180 return flags & EXT3_OTHER_FLMASK;
181}
182
183/* Used to pass group descriptor data when online resize is done */
184struct ext3_new_group_input {
185 __u32 group; /* Group number for this data */
186 __u32 block_bitmap; /* Absolute block number of block bitmap */
187 __u32 inode_bitmap; /* Absolute block number of inode bitmap */
188 __u32 inode_table; /* Absolute block number of inode table start */
189 __u32 blocks_count; /* Total number of blocks in this group */
190 __u16 reserved_blocks; /* Number of reserved blocks in this group */
191 __u16 unused;
192};
193
194/* The struct ext3_new_group_input in kernel space, with free_blocks_count */
195struct ext3_new_group_data {
196 __u32 group;
197 __u32 block_bitmap;
198 __u32 inode_bitmap;
199 __u32 inode_table;
200 __u32 blocks_count;
201 __u16 reserved_blocks;
202 __u16 unused;
203 __u32 free_blocks_count;
204};
205
206
207/*
208 * ioctl commands
209 */
210#define EXT3_IOC_GETFLAGS FS_IOC_GETFLAGS
211#define EXT3_IOC_SETFLAGS FS_IOC_SETFLAGS
212#define EXT3_IOC_GETVERSION _IOR('f', 3, long)
213#define EXT3_IOC_SETVERSION _IOW('f', 4, long)
214#define EXT3_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
215#define EXT3_IOC_GROUP_ADD _IOW('f', 8,struct ext3_new_group_input)
216#define EXT3_IOC_GETVERSION_OLD FS_IOC_GETVERSION
217#define EXT3_IOC_SETVERSION_OLD FS_IOC_SETVERSION
218#ifdef CONFIG_JBD_DEBUG
219#define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
220#endif
221#define EXT3_IOC_GETRSVSZ _IOR('f', 5, long)
222#define EXT3_IOC_SETRSVSZ _IOW('f', 6, long)
223
224/*
225 * ioctl commands in 32 bit emulation
226 */
227#define EXT3_IOC32_GETFLAGS FS_IOC32_GETFLAGS
228#define EXT3_IOC32_SETFLAGS FS_IOC32_SETFLAGS
229#define EXT3_IOC32_GETVERSION _IOR('f', 3, int)
230#define EXT3_IOC32_SETVERSION _IOW('f', 4, int)
231#define EXT3_IOC32_GETRSVSZ _IOR('f', 5, int)
232#define EXT3_IOC32_SETRSVSZ _IOW('f', 6, int)
233#define EXT3_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
234#ifdef CONFIG_JBD_DEBUG
235#define EXT3_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
236#endif
237#define EXT3_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
238#define EXT3_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
239
240
241/*
242 * Mount options
243 */
244struct ext3_mount_options {
245 unsigned long s_mount_opt;
246 uid_t s_resuid;
247 gid_t s_resgid;
248 unsigned long s_commit_interval;
249#ifdef CONFIG_QUOTA
250 int s_jquota_fmt;
251 char *s_qf_names[MAXQUOTAS];
252#endif
253};
254
255/*
256 * Structure of an inode on the disk
257 */
258struct ext3_inode {
259 __le16 i_mode; /* File mode */
260 __le16 i_uid; /* Low 16 bits of Owner Uid */
261 __le32 i_size; /* Size in bytes */
262 __le32 i_atime; /* Access time */
263 __le32 i_ctime; /* Creation time */
264 __le32 i_mtime; /* Modification time */
265 __le32 i_dtime; /* Deletion Time */
266 __le16 i_gid; /* Low 16 bits of Group Id */
267 __le16 i_links_count; /* Links count */
268 __le32 i_blocks; /* Blocks count */
269 __le32 i_flags; /* File flags */
270 union {
271 struct {
272 __u32 l_i_reserved1;
273 } linux1;
274 struct {
275 __u32 h_i_translator;
276 } hurd1;
277 struct {
278 __u32 m_i_reserved1;
279 } masix1;
280 } osd1; /* OS dependent 1 */
281 __le32 i_block[EXT3_N_BLOCKS];/* Pointers to blocks */
282 __le32 i_generation; /* File version (for NFS) */
283 __le32 i_file_acl; /* File ACL */
284 __le32 i_dir_acl; /* Directory ACL */
285 __le32 i_faddr; /* Fragment address */
286 union {
287 struct {
288 __u8 l_i_frag; /* Fragment number */
289 __u8 l_i_fsize; /* Fragment size */
290 __u16 i_pad1;
291 __le16 l_i_uid_high; /* these 2 fields */
292 __le16 l_i_gid_high; /* were reserved2[0] */
293 __u32 l_i_reserved2;
294 } linux2;
295 struct {
296 __u8 h_i_frag; /* Fragment number */
297 __u8 h_i_fsize; /* Fragment size */
298 __u16 h_i_mode_high;
299 __u16 h_i_uid_high;
300 __u16 h_i_gid_high;
301 __u32 h_i_author;
302 } hurd2;
303 struct {
304 __u8 m_i_frag; /* Fragment number */
305 __u8 m_i_fsize; /* Fragment size */
306 __u16 m_pad1;
307 __u32 m_i_reserved2[2];
308 } masix2;
309 } osd2; /* OS dependent 2 */
310 __le16 i_extra_isize;
311 __le16 i_pad1;
312};
313
314#define i_size_high i_dir_acl
315
316#define i_reserved1 osd1.linux1.l_i_reserved1
317#define i_frag osd2.linux2.l_i_frag
318#define i_fsize osd2.linux2.l_i_fsize
319#define i_uid_low i_uid
320#define i_gid_low i_gid
321#define i_uid_high osd2.linux2.l_i_uid_high
322#define i_gid_high osd2.linux2.l_i_gid_high
323#define i_reserved2 osd2.linux2.l_i_reserved2
324
325/*
326 * File system states
327 */
328#define EXT3_VALID_FS 0x0001 /* Unmounted cleanly */
329#define EXT3_ERROR_FS 0x0002 /* Errors detected */
330#define EXT3_ORPHAN_FS 0x0004 /* Orphans being recovered */
331
332/*
333 * Misc. filesystem flags
334 */
335#define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */
336#define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */
337#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */
338
339/*
340 * Mount flags
341 */
342#define EXT3_MOUNT_CHECK 0x00001 /* Do mount-time checks */
343/* EXT3_MOUNT_OLDALLOC was there */
344#define EXT3_MOUNT_GRPID 0x00004 /* Create files with directory's group */
345#define EXT3_MOUNT_DEBUG 0x00008 /* Some debugging messages */
346#define EXT3_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
347#define EXT3_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */
348#define EXT3_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
349#define EXT3_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
350#define EXT3_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
351#define EXT3_MOUNT_ABORT 0x00200 /* Fatal error detected */
352#define EXT3_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
353#define EXT3_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
354#define EXT3_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
355#define EXT3_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */
356#define EXT3_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */
357#define EXT3_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */
358#define EXT3_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */
359#define EXT3_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
360#define EXT3_MOUNT_RESERVATION 0x10000 /* Preallocation */
361#define EXT3_MOUNT_BARRIER 0x20000 /* Use block barriers */
362#define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */
363#define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
364#define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
365#define EXT3_MOUNT_DATA_ERR_ABORT 0x400000 /* Abort on file data write
366 * error in ordered mode */
367
368/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
369#ifndef _LINUX_EXT2_FS_H
370#define clear_opt(o, opt) o &= ~EXT3_MOUNT_##opt
371#define set_opt(o, opt) o |= EXT3_MOUNT_##opt
372#define test_opt(sb, opt) (EXT3_SB(sb)->s_mount_opt & \
373 EXT3_MOUNT_##opt)
374#else
375#define EXT2_MOUNT_NOLOAD EXT3_MOUNT_NOLOAD
376#define EXT2_MOUNT_ABORT EXT3_MOUNT_ABORT
377#define EXT2_MOUNT_DATA_FLAGS EXT3_MOUNT_DATA_FLAGS
378#endif
379
380#define ext3_set_bit __set_bit_le
381#define ext3_set_bit_atomic ext2_set_bit_atomic
382#define ext3_clear_bit __clear_bit_le
383#define ext3_clear_bit_atomic ext2_clear_bit_atomic
384#define ext3_test_bit test_bit_le
385#define ext3_find_next_zero_bit find_next_zero_bit_le
386
387/*
388 * Maximal mount counts between two filesystem checks
389 */
390#define EXT3_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */
391#define EXT3_DFL_CHECKINTERVAL 0 /* Don't use interval check */
392
393/*
394 * Behaviour when detecting errors
395 */
396#define EXT3_ERRORS_CONTINUE 1 /* Continue execution */
397#define EXT3_ERRORS_RO 2 /* Remount fs read-only */
398#define EXT3_ERRORS_PANIC 3 /* Panic */
399#define EXT3_ERRORS_DEFAULT EXT3_ERRORS_CONTINUE
400
401/*
402 * Structure of the super block
403 */
404struct ext3_super_block {
405/*00*/ __le32 s_inodes_count; /* Inodes count */
406 __le32 s_blocks_count; /* Blocks count */
407 __le32 s_r_blocks_count; /* Reserved blocks count */
408 __le32 s_free_blocks_count; /* Free blocks count */
409/*10*/ __le32 s_free_inodes_count; /* Free inodes count */
410 __le32 s_first_data_block; /* First Data Block */
411 __le32 s_log_block_size; /* Block size */
412 __le32 s_log_frag_size; /* Fragment size */
413/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
414 __le32 s_frags_per_group; /* # Fragments per group */
415 __le32 s_inodes_per_group; /* # Inodes per group */
416 __le32 s_mtime; /* Mount time */
417/*30*/ __le32 s_wtime; /* Write time */
418 __le16 s_mnt_count; /* Mount count */
419 __le16 s_max_mnt_count; /* Maximal mount count */
420 __le16 s_magic; /* Magic signature */
421 __le16 s_state; /* File system state */
422 __le16 s_errors; /* Behaviour when detecting errors */
423 __le16 s_minor_rev_level; /* minor revision level */
424/*40*/ __le32 s_lastcheck; /* time of last check */
425 __le32 s_checkinterval; /* max. time between checks */
426 __le32 s_creator_os; /* OS */
427 __le32 s_rev_level; /* Revision level */
428/*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */
429 __le16 s_def_resgid; /* Default gid for reserved blocks */
430 /*
431 * These fields are for EXT3_DYNAMIC_REV superblocks only.
432 *
433 * Note: the difference between the compatible feature set and
434 * the incompatible feature set is that if there is a bit set
435 * in the incompatible feature set that the kernel doesn't
436 * know about, it should refuse to mount the filesystem.
437 *
438 * e2fsck's requirements are more strict; if it doesn't know
439 * about a feature in either the compatible or incompatible
440 * feature set, it must abort and not try to meddle with
441 * things it doesn't understand...
442 */
443 __le32 s_first_ino; /* First non-reserved inode */
444 __le16 s_inode_size; /* size of inode structure */
445 __le16 s_block_group_nr; /* block group # of this superblock */
446 __le32 s_feature_compat; /* compatible feature set */
447/*60*/ __le32 s_feature_incompat; /* incompatible feature set */
448 __le32 s_feature_ro_compat; /* readonly-compatible feature set */
449/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
450/*78*/ char s_volume_name[16]; /* volume name */
451/*88*/ char s_last_mounted[64]; /* directory where last mounted */
452/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
453 /*
454 * Performance hints. Directory preallocation should only
455 * happen if the EXT3_FEATURE_COMPAT_DIR_PREALLOC flag is on.
456 */
457 __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/
458 __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
459 __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */
460 /*
461 * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
462 */
463/*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */
464/*E0*/ __le32 s_journal_inum; /* inode number of journal file */
465 __le32 s_journal_dev; /* device number of journal file */
466 __le32 s_last_orphan; /* start of list of inodes to delete */
467 __le32 s_hash_seed[4]; /* HTREE hash seed */
468 __u8 s_def_hash_version; /* Default hash version to use */
469 __u8 s_reserved_char_pad;
470 __u16 s_reserved_word_pad;
471 __le32 s_default_mount_opts;
472 __le32 s_first_meta_bg; /* First metablock block group */
473 __le32 s_mkfs_time; /* When the filesystem was created */
474 __le32 s_jnl_blocks[17]; /* Backup of the journal inode */
475 /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
476/*150*/ __le32 s_blocks_count_hi; /* Blocks count */
477 __le32 s_r_blocks_count_hi; /* Reserved blocks count */
478 __le32 s_free_blocks_count_hi; /* Free blocks count */
479 __le16 s_min_extra_isize; /* All inodes have at least # bytes */
480 __le16 s_want_extra_isize; /* New inodes should reserve # bytes */
481 __le32 s_flags; /* Miscellaneous flags */
482 __le16 s_raid_stride; /* RAID stride */
483 __le16 s_mmp_interval; /* # seconds to wait in MMP checking */
484 __le64 s_mmp_block; /* Block for multi-mount protection */
485 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
486 __u8 s_log_groups_per_flex; /* FLEX_BG group size */
487 __u8 s_reserved_char_pad2;
488 __le16 s_reserved_pad;
489 __u32 s_reserved[162]; /* Padding to the end of the block */
490};
491
492/* data type for block offset of block group */
493typedef int ext3_grpblk_t;
494
495/* data type for filesystem-wide blocks number */
496typedef unsigned long ext3_fsblk_t;
497
498#define E3FSBLK "%lu"
499
500struct ext3_reserve_window {
501 ext3_fsblk_t _rsv_start; /* First byte reserved */
502 ext3_fsblk_t _rsv_end; /* Last byte reserved or 0 */
503};
504
505struct ext3_reserve_window_node {
506 struct rb_node rsv_node;
507 __u32 rsv_goal_size;
508 __u32 rsv_alloc_hit;
509 struct ext3_reserve_window rsv_window;
510};
511
512struct ext3_block_alloc_info {
513 /* information about reservation window */
514 struct ext3_reserve_window_node rsv_window_node;
515 /*
516 * was i_next_alloc_block in ext3_inode_info
517 * is the logical (file-relative) number of the
518 * most-recently-allocated block in this file.
519 * We use this for detecting linearly ascending allocation requests.
520 */
521 __u32 last_alloc_logical_block;
522 /*
523 * Was i_next_alloc_goal in ext3_inode_info
524 * is the *physical* companion to i_next_alloc_block.
525 * it the physical block number of the block which was most-recentl
526 * allocated to this file. This give us the goal (target) for the next
527 * allocation when we detect linearly ascending requests.
528 */
529 ext3_fsblk_t last_alloc_physical_block;
530};
531
532#define rsv_start rsv_window._rsv_start
533#define rsv_end rsv_window._rsv_end
534
535/*
536 * third extended file system inode data in memory
537 */
538struct ext3_inode_info {
539 __le32 i_data[15]; /* unconverted */
540 __u32 i_flags;
541#ifdef EXT3_FRAGMENTS
542 __u32 i_faddr;
543 __u8 i_frag_no;
544 __u8 i_frag_size;
545#endif
546 ext3_fsblk_t i_file_acl;
547 __u32 i_dir_acl;
548 __u32 i_dtime;
549
550 /*
551 * i_block_group is the number of the block group which contains
552 * this file's inode. Constant across the lifetime of the inode,
553 * it is ued for making block allocation decisions - we try to
554 * place a file's data blocks near its inode block, and new inodes
555 * near to their parent directory's inode.
556 */
557 __u32 i_block_group;
558 unsigned long i_state_flags; /* Dynamic state flags for ext3 */
559
560 /* block reservation info */
561 struct ext3_block_alloc_info *i_block_alloc_info;
562
563 __u32 i_dir_start_lookup;
564#ifdef CONFIG_EXT3_FS_XATTR
565 /*
566 * Extended attributes can be read independently of the main file
567 * data. Taking i_mutex even when reading would cause contention
568 * between readers of EAs and writers of regular file data, so
569 * instead we synchronize on xattr_sem when reading or changing
570 * EAs.
571 */
572 struct rw_semaphore xattr_sem;
573#endif
574
575 struct list_head i_orphan; /* unlinked but open inodes */
576
577 /*
578 * i_disksize keeps track of what the inode size is ON DISK, not
579 * in memory. During truncate, i_size is set to the new size by
580 * the VFS prior to calling ext3_truncate(), but the filesystem won't
581 * set i_disksize to 0 until the truncate is actually under way.
582 *
583 * The intent is that i_disksize always represents the blocks which
584 * are used by this file. This allows recovery to restart truncate
585 * on orphans if we crash during truncate. We actually write i_disksize
586 * into the on-disk inode when writing inodes out, instead of i_size.
587 *
588 * The only time when i_disksize and i_size may be different is when
589 * a truncate is in progress. The only things which change i_disksize
590 * are ext3_get_block (growth) and ext3_truncate (shrinkth).
591 */
592 loff_t i_disksize;
593
594 /* on-disk additional length */
595 __u16 i_extra_isize;
596
597 /*
598 * truncate_mutex is for serialising ext3_truncate() against
599 * ext3_getblock(). In the 2.4 ext2 design, great chunks of inode's
600 * data tree are chopped off during truncate. We can't do that in
601 * ext3 because whenever we perform intermediate commits during
602 * truncate, the inode and all the metadata blocks *must* be in a
603 * consistent state which allows truncation of the orphans to restart
604 * during recovery. Hence we must fix the get_block-vs-truncate race
605 * by other means, so we have truncate_mutex.
606 */
607 struct mutex truncate_mutex;
608
609 /*
610 * Transactions that contain inode's metadata needed to complete
611 * fsync and fdatasync, respectively.
612 */
613 atomic_t i_sync_tid;
614 atomic_t i_datasync_tid;
615
616 struct inode vfs_inode;
617};
618
619/*
620 * third extended-fs super-block data in memory
621 */
622struct ext3_sb_info {
623 unsigned long s_frag_size; /* Size of a fragment in bytes */
624 unsigned long s_frags_per_block;/* Number of fragments per block */
625 unsigned long s_inodes_per_block;/* Number of inodes per block */
626 unsigned long s_frags_per_group;/* Number of fragments in a group */
627 unsigned long s_blocks_per_group;/* Number of blocks in a group */
628 unsigned long s_inodes_per_group;/* Number of inodes in a group */
629 unsigned long s_itb_per_group; /* Number of inode table blocks per group */
630 unsigned long s_gdb_count; /* Number of group descriptor blocks */
631 unsigned long s_desc_per_block; /* Number of group descriptors per block */
632 unsigned long s_groups_count; /* Number of groups in the fs */
633 unsigned long s_overhead_last; /* Last calculated overhead */
634 unsigned long s_blocks_last; /* Last seen block count */
635 struct buffer_head * s_sbh; /* Buffer containing the super block */
636 struct ext3_super_block * s_es; /* Pointer to the super block in the buffer */
637 struct buffer_head ** s_group_desc;
638 unsigned long s_mount_opt;
639 ext3_fsblk_t s_sb_block;
640 uid_t s_resuid;
641 gid_t s_resgid;
642 unsigned short s_mount_state;
643 unsigned short s_pad;
644 int s_addr_per_block_bits;
645 int s_desc_per_block_bits;
646 int s_inode_size;
647 int s_first_ino;
648 spinlock_t s_next_gen_lock;
649 u32 s_next_generation;
650 u32 s_hash_seed[4];
651 int s_def_hash_version;
652 int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
653 struct percpu_counter s_freeblocks_counter;
654 struct percpu_counter s_freeinodes_counter;
655 struct percpu_counter s_dirs_counter;
656 struct blockgroup_lock *s_blockgroup_lock;
657
658 /* root of the per fs reservation window tree */
659 spinlock_t s_rsv_window_lock;
660 struct rb_root s_rsv_window_root;
661 struct ext3_reserve_window_node s_rsv_window_head;
662
663 /* Journaling */
664 struct inode * s_journal_inode;
665 struct journal_s * s_journal;
666 struct list_head s_orphan;
667 struct mutex s_orphan_lock;
668 struct mutex s_resize_lock;
669 unsigned long s_commit_interval;
670 struct block_device *journal_bdev;
671#ifdef CONFIG_QUOTA
672 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
673 int s_jquota_fmt; /* Format of quota to use */
674#endif
675};
676
677static inline spinlock_t *
678sb_bgl_lock(struct ext3_sb_info *sbi, unsigned int block_group)
679{
680 return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
681}
682
683static inline struct ext3_sb_info * EXT3_SB(struct super_block *sb)
684{
685 return sb->s_fs_info;
686}
687static inline struct ext3_inode_info *EXT3_I(struct inode *inode)
688{
689 return container_of(inode, struct ext3_inode_info, vfs_inode);
690}
691
692static inline int ext3_valid_inum(struct super_block *sb, unsigned long ino)
693{
694 return ino == EXT3_ROOT_INO ||
695 ino == EXT3_JOURNAL_INO ||
696 ino == EXT3_RESIZE_INO ||
697 (ino >= EXT3_FIRST_INO(sb) &&
698 ino <= le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count));
699}
700
701/*
702 * Inode dynamic state flags
703 */
704enum {
705 EXT3_STATE_JDATA, /* journaled data exists */
706 EXT3_STATE_NEW, /* inode is newly created */
707 EXT3_STATE_XATTR, /* has in-inode xattrs */
708 EXT3_STATE_FLUSH_ON_CLOSE, /* flush dirty pages on close */
709};
710
711static inline int ext3_test_inode_state(struct inode *inode, int bit)
712{
713 return test_bit(bit, &EXT3_I(inode)->i_state_flags);
714}
715
716static inline void ext3_set_inode_state(struct inode *inode, int bit)
717{
718 set_bit(bit, &EXT3_I(inode)->i_state_flags);
719}
720
721static inline void ext3_clear_inode_state(struct inode *inode, int bit)
722{
723 clear_bit(bit, &EXT3_I(inode)->i_state_flags);
724}
725
726#define NEXT_ORPHAN(inode) EXT3_I(inode)->i_dtime
727
728/*
729 * Codes for operating systems
730 */
731#define EXT3_OS_LINUX 0
732#define EXT3_OS_HURD 1
733#define EXT3_OS_MASIX 2
734#define EXT3_OS_FREEBSD 3
735#define EXT3_OS_LITES 4
736
737/*
738 * Revision levels
739 */
740#define EXT3_GOOD_OLD_REV 0 /* The good old (original) format */
741#define EXT3_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */
742
743#define EXT3_CURRENT_REV EXT3_GOOD_OLD_REV
744#define EXT3_MAX_SUPP_REV EXT3_DYNAMIC_REV
745
746#define EXT3_GOOD_OLD_INODE_SIZE 128
747
748/*
749 * Feature set definitions
750 */
751
752#define EXT3_HAS_COMPAT_FEATURE(sb,mask) \
753 ( EXT3_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) )
754#define EXT3_HAS_RO_COMPAT_FEATURE(sb,mask) \
755 ( EXT3_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) )
756#define EXT3_HAS_INCOMPAT_FEATURE(sb,mask) \
757 ( EXT3_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) )
758#define EXT3_SET_COMPAT_FEATURE(sb,mask) \
759 EXT3_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
760#define EXT3_SET_RO_COMPAT_FEATURE(sb,mask) \
761 EXT3_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask)
762#define EXT3_SET_INCOMPAT_FEATURE(sb,mask) \
763 EXT3_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask)
764#define EXT3_CLEAR_COMPAT_FEATURE(sb,mask) \
765 EXT3_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask)
766#define EXT3_CLEAR_RO_COMPAT_FEATURE(sb,mask) \
767 EXT3_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask)
768#define EXT3_CLEAR_INCOMPAT_FEATURE(sb,mask) \
769 EXT3_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask)
770
771#define EXT3_FEATURE_COMPAT_DIR_PREALLOC 0x0001
772#define EXT3_FEATURE_COMPAT_IMAGIC_INODES 0x0002
773#define EXT3_FEATURE_COMPAT_HAS_JOURNAL 0x0004
774#define EXT3_FEATURE_COMPAT_EXT_ATTR 0x0008
775#define EXT3_FEATURE_COMPAT_RESIZE_INODE 0x0010
776#define EXT3_FEATURE_COMPAT_DIR_INDEX 0x0020
777
778#define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
779#define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
780#define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
781
782#define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001
783#define EXT3_FEATURE_INCOMPAT_FILETYPE 0x0002
784#define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */
785#define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */
786#define EXT3_FEATURE_INCOMPAT_META_BG 0x0010
787
788#define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
789#define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \
790 EXT3_FEATURE_INCOMPAT_RECOVER| \
791 EXT3_FEATURE_INCOMPAT_META_BG)
792#define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
793 EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \
794 EXT3_FEATURE_RO_COMPAT_BTREE_DIR)
795
796/*
797 * Default values for user and/or group using reserved blocks
798 */
799#define EXT3_DEF_RESUID 0
800#define EXT3_DEF_RESGID 0
801
802/*
803 * Default mount options
804 */
805#define EXT3_DEFM_DEBUG 0x0001
806#define EXT3_DEFM_BSDGROUPS 0x0002
807#define EXT3_DEFM_XATTR_USER 0x0004
808#define EXT3_DEFM_ACL 0x0008
809#define EXT3_DEFM_UID16 0x0010
810#define EXT3_DEFM_JMODE 0x0060
811#define EXT3_DEFM_JMODE_DATA 0x0020
812#define EXT3_DEFM_JMODE_ORDERED 0x0040
813#define EXT3_DEFM_JMODE_WBACK 0x0060
814
815/*
816 * Structure of a directory entry
817 */
818#define EXT3_NAME_LEN 255
819
820struct ext3_dir_entry {
821 __le32 inode; /* Inode number */
822 __le16 rec_len; /* Directory entry length */
823 __le16 name_len; /* Name length */
824 char name[EXT3_NAME_LEN]; /* File name */
825};
826
827/*
828 * The new version of the directory entry. Since EXT3 structures are
829 * stored in intel byte order, and the name_len field could never be
830 * bigger than 255 chars, it's safe to reclaim the extra byte for the
831 * file_type field.
832 */
833struct ext3_dir_entry_2 {
834 __le32 inode; /* Inode number */
835 __le16 rec_len; /* Directory entry length */
836 __u8 name_len; /* Name length */
837 __u8 file_type;
838 char name[EXT3_NAME_LEN]; /* File name */
839};
840
841/*
842 * Ext3 directory file types. Only the low 3 bits are used. The
843 * other bits are reserved for now.
844 */
845#define EXT3_FT_UNKNOWN 0
846#define EXT3_FT_REG_FILE 1
847#define EXT3_FT_DIR 2
848#define EXT3_FT_CHRDEV 3
849#define EXT3_FT_BLKDEV 4
850#define EXT3_FT_FIFO 5
851#define EXT3_FT_SOCK 6
852#define EXT3_FT_SYMLINK 7
853
854#define EXT3_FT_MAX 8
855
856/*
857 * EXT3_DIR_PAD defines the directory entries boundaries
858 *
859 * NOTE: It must be a multiple of 4
860 */
861#define EXT3_DIR_PAD 4
862#define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1)
863#define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \
864 ~EXT3_DIR_ROUND)
865#define EXT3_MAX_REC_LEN ((1<<16)-1)
866
867/*
868 * Tests against MAX_REC_LEN etc were put in place for 64k block
869 * sizes; if that is not possible on this arch, we can skip
870 * those tests and speed things up.
871 */
872static inline unsigned ext3_rec_len_from_disk(__le16 dlen)
873{
874 unsigned len = le16_to_cpu(dlen);
875
876#if (PAGE_CACHE_SIZE >= 65536)
877 if (len == EXT3_MAX_REC_LEN)
878 return 1 << 16;
879#endif
880 return len;
881}
882
883static inline __le16 ext3_rec_len_to_disk(unsigned len)
884{
885#if (PAGE_CACHE_SIZE >= 65536)
886 if (len == (1 << 16))
887 return cpu_to_le16(EXT3_MAX_REC_LEN);
888 else if (len > (1 << 16))
889 BUG();
890#endif
891 return cpu_to_le16(len);
892}
893
894/*
895 * Hash Tree Directory indexing
896 * (c) Daniel Phillips, 2001
897 */
898
899#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
900 EXT3_FEATURE_COMPAT_DIR_INDEX) && \
901 (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
902#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
903#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
904
905/* Legal values for the dx_root hash_version field: */
906
907#define DX_HASH_LEGACY 0
908#define DX_HASH_HALF_MD4 1
909#define DX_HASH_TEA 2
910#define DX_HASH_LEGACY_UNSIGNED 3
911#define DX_HASH_HALF_MD4_UNSIGNED 4
912#define DX_HASH_TEA_UNSIGNED 5
913
914/* hash info structure used by the directory hash */
915struct dx_hash_info
916{
917 u32 hash;
918 u32 minor_hash;
919 int hash_version;
920 u32 *seed;
921};
922
923#define EXT3_HTREE_EOF 0x7fffffff
924
925/*
926 * Control parameters used by ext3_htree_next_block
927 */
928#define HASH_NB_ALWAYS 1
929
930
931/*
932 * Describe an inode's exact location on disk and in memory
933 */
934struct ext3_iloc
935{
936 struct buffer_head *bh;
937 unsigned long offset;
938 unsigned long block_group;
939};
940
941static inline struct ext3_inode *ext3_raw_inode(struct ext3_iloc *iloc)
942{
943 return (struct ext3_inode *) (iloc->bh->b_data + iloc->offset);
944}
945
946/*
947 * This structure is stuffed into the struct file's private_data field
948 * for directories. It is where we put information so that we can do
949 * readdir operations in hash tree order.
950 */
951struct dir_private_info {
952 struct rb_root root;
953 struct rb_node *curr_node;
954 struct fname *extra_fname;
955 loff_t last_pos;
956 __u32 curr_hash;
957 __u32 curr_minor_hash;
958 __u32 next_hash;
959};
960
961/* calculate the first block number of the group */
962static inline ext3_fsblk_t
963ext3_group_first_block_no(struct super_block *sb, unsigned long group_no)
964{
965 return group_no * (ext3_fsblk_t)EXT3_BLOCKS_PER_GROUP(sb) +
966 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block);
967}
968
969/*
970 * Special error return code only used by dx_probe() and its callers.
971 */
972#define ERR_BAD_DX_DIR -75000
973
974/*
975 * Function prototypes
976 */
977
978/*
979 * Ok, these declarations are also in <linux/kernel.h> but none of the
980 * ext3 source programs needs to include it so they are duplicated here.
981 */
982# define NORET_TYPE /**/
983# define ATTRIB_NORET __attribute__((noreturn))
984# define NORET_AND noreturn,
985
986/* balloc.c */
987extern int ext3_bg_has_super(struct super_block *sb, int group);
988extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
989extern ext3_fsblk_t ext3_new_block (handle_t *handle, struct inode *inode,
990 ext3_fsblk_t goal, int *errp);
991extern ext3_fsblk_t ext3_new_blocks (handle_t *handle, struct inode *inode,
992 ext3_fsblk_t goal, unsigned long *count, int *errp);
993extern void ext3_free_blocks (handle_t *handle, struct inode *inode,
994 ext3_fsblk_t block, unsigned long count);
995extern void ext3_free_blocks_sb (handle_t *handle, struct super_block *sb,
996 ext3_fsblk_t block, unsigned long count,
997 unsigned long *pdquot_freed_blocks);
998extern ext3_fsblk_t ext3_count_free_blocks (struct super_block *);
999extern void ext3_check_blocks_bitmap (struct super_block *);
1000extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
1001 unsigned int block_group,
1002 struct buffer_head ** bh);
1003extern int ext3_should_retry_alloc(struct super_block *sb, int *retries);
1004extern void ext3_init_block_alloc_info(struct inode *);
1005extern void ext3_rsv_window_add(struct super_block *sb, struct ext3_reserve_window_node *rsv);
1006extern int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range);
1007
1008/* dir.c */
1009extern int ext3_check_dir_entry(const char *, struct inode *,
1010 struct ext3_dir_entry_2 *,
1011 struct buffer_head *, unsigned long);
1012extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
1013 __u32 minor_hash,
1014 struct ext3_dir_entry_2 *dirent);
1015extern void ext3_htree_free_dir_info(struct dir_private_info *p);
1016
1017/* fsync.c */
1018extern int ext3_sync_file(struct file *, loff_t, loff_t, int);
1019
1020/* hash.c */
1021extern int ext3fs_dirhash(const char *name, int len, struct
1022 dx_hash_info *hinfo);
1023
1024/* ialloc.c */
1025extern struct inode * ext3_new_inode (handle_t *, struct inode *,
1026 const struct qstr *, umode_t);
1027extern void ext3_free_inode (handle_t *, struct inode *);
1028extern struct inode * ext3_orphan_get (struct super_block *, unsigned long);
1029extern unsigned long ext3_count_free_inodes (struct super_block *);
1030extern unsigned long ext3_count_dirs (struct super_block *);
1031extern void ext3_check_inodes_bitmap (struct super_block *);
1032extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
1033
1034
1035/* inode.c */
1036int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
1037 struct buffer_head *bh, ext3_fsblk_t blocknr);
1038struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
1039struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
1040int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
1041 sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result,
1042 int create);
1043
1044extern struct inode *ext3_iget(struct super_block *, unsigned long);
1045extern int ext3_write_inode (struct inode *, struct writeback_control *);
1046extern int ext3_setattr (struct dentry *, struct iattr *);
1047extern void ext3_evict_inode (struct inode *);
1048extern int ext3_sync_inode (handle_t *, struct inode *);
1049extern void ext3_discard_reservation (struct inode *);
1050extern void ext3_dirty_inode(struct inode *, int);
1051extern int ext3_change_inode_journal_flag(struct inode *, int);
1052extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
1053extern int ext3_can_truncate(struct inode *inode);
1054extern void ext3_truncate(struct inode *inode);
1055extern void ext3_set_inode_flags(struct inode *);
1056extern void ext3_get_inode_flags(struct ext3_inode_info *);
1057extern void ext3_set_aops(struct inode *inode);
1058extern int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1059 u64 start, u64 len);
1060
1061/* ioctl.c */
1062extern long ext3_ioctl(struct file *, unsigned int, unsigned long);
1063extern long ext3_compat_ioctl(struct file *, unsigned int, unsigned long);
1064
1065/* namei.c */
1066extern int ext3_orphan_add(handle_t *, struct inode *);
1067extern int ext3_orphan_del(handle_t *, struct inode *);
1068extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
1069 __u32 start_minor_hash, __u32 *next_hash);
1070
1071/* resize.c */
1072extern int ext3_group_add(struct super_block *sb,
1073 struct ext3_new_group_data *input);
1074extern int ext3_group_extend(struct super_block *sb,
1075 struct ext3_super_block *es,
1076 ext3_fsblk_t n_blocks_count);
1077
1078/* super.c */
1079extern __printf(3, 4)
1080void ext3_error(struct super_block *, const char *, const char *, ...);
1081extern void __ext3_std_error (struct super_block *, const char *, int);
1082extern __printf(3, 4)
1083void ext3_abort(struct super_block *, const char *, const char *, ...);
1084extern __printf(3, 4)
1085void ext3_warning(struct super_block *, const char *, const char *, ...);
1086extern __printf(3, 4)
1087void ext3_msg(struct super_block *, const char *, const char *, ...);
1088extern void ext3_update_dynamic_rev (struct super_block *sb);
1089
1090#define ext3_std_error(sb, errno) \
1091do { \
1092 if ((errno)) \
1093 __ext3_std_error((sb), __func__, (errno)); \
1094} while (0)
1095
1096/*
1097 * Inodes and files operations
1098 */
1099
1100/* dir.c */
1101extern const struct file_operations ext3_dir_operations;
1102
1103/* file.c */
1104extern const struct inode_operations ext3_file_inode_operations;
1105extern const struct file_operations ext3_file_operations;
1106
1107/* namei.c */
1108extern const struct inode_operations ext3_dir_inode_operations;
1109extern const struct inode_operations ext3_special_inode_operations;
1110
1111/* symlink.c */
1112extern const struct inode_operations ext3_symlink_inode_operations;
1113extern const struct inode_operations ext3_fast_symlink_inode_operations;
1114
1115#define EXT3_JOURNAL(inode) (EXT3_SB((inode)->i_sb)->s_journal)
1116
1117/* Define the number of blocks we need to account to a transaction to
1118 * modify one block of data.
1119 *
1120 * We may have to touch one inode, one bitmap buffer, up to three
1121 * indirection blocks, the group and superblock summaries, and the data
1122 * block to complete the transaction. */
1123
1124#define EXT3_SINGLEDATA_TRANS_BLOCKS 8U
1125
1126/* Extended attribute operations touch at most two data buffers,
1127 * two bitmap buffers, and two group summaries, in addition to the inode
1128 * and the superblock, which are already accounted for. */
1129
1130#define EXT3_XATTR_TRANS_BLOCKS 6U
1131
1132/* Define the minimum size for a transaction which modifies data. This
1133 * needs to take into account the fact that we may end up modifying two
1134 * quota files too (one for the group, one for the user quota). The
1135 * superblock only gets updated once, of course, so don't bother
1136 * counting that again for the quota updates. */
1137
1138#define EXT3_DATA_TRANS_BLOCKS(sb) (EXT3_SINGLEDATA_TRANS_BLOCKS + \
1139 EXT3_XATTR_TRANS_BLOCKS - 2 + \
1140 EXT3_MAXQUOTAS_TRANS_BLOCKS(sb))
1141
1142/* Delete operations potentially hit one directory's namespace plus an
1143 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
1144 * generous. We can grow the delete transaction later if necessary. */
1145
1146#define EXT3_DELETE_TRANS_BLOCKS(sb) (EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) + 64)
1147
1148/* Define an arbitrary limit for the amount of data we will anticipate
1149 * writing to any given transaction. For unbounded transactions such as
1150 * write(2) and truncate(2) we can write more than this, but we always
1151 * start off at the maximum transaction size and grow the transaction
1152 * optimistically as we go. */
1153
1154#define EXT3_MAX_TRANS_DATA 64U
1155
1156/* We break up a large truncate or write transaction once the handle's
1157 * buffer credits gets this low, we need either to extend the
1158 * transaction or to start a new one. Reserve enough space here for
1159 * inode, bitmap, superblock, group and indirection updates for at least
1160 * one block, plus two quota updates. Quota allocations are not
1161 * needed. */
1162
1163#define EXT3_RESERVE_TRANS_BLOCKS 12U
1164
1165#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8
1166
1167#ifdef CONFIG_QUOTA
1168/* Amount of blocks needed for quota update - we know that the structure was
1169 * allocated so we need to update only inode+data */
1170#define EXT3_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
1171/* Amount of blocks needed for quota insert/delete - we do some block writes
1172 * but inode, sb and group updates are done only once */
1173#define EXT3_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
1174 (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0)
1175#define EXT3_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
1176 (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0)
1177#else
1178#define EXT3_QUOTA_TRANS_BLOCKS(sb) 0
1179#define EXT3_QUOTA_INIT_BLOCKS(sb) 0
1180#define EXT3_QUOTA_DEL_BLOCKS(sb) 0
1181#endif
1182#define EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_TRANS_BLOCKS(sb))
1183#define EXT3_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_INIT_BLOCKS(sb))
1184#define EXT3_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_DEL_BLOCKS(sb))
1185
1186int
1187ext3_mark_iloc_dirty(handle_t *handle,
1188 struct inode *inode,
1189 struct ext3_iloc *iloc);
1190
1191/*
1192 * On success, We end up with an outstanding reference count against
1193 * iloc->bh. This _must_ be cleaned up later.
1194 */
1195
1196int ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
1197 struct ext3_iloc *iloc);
1198
1199int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode);
1200
1201/*
1202 * Wrapper functions with which ext3 calls into JBD. The intent here is
1203 * to allow these to be turned into appropriate stubs so ext3 can control
1204 * ext2 filesystems, so ext2+ext3 systems only nee one fs. This work hasn't
1205 * been done yet.
1206 */
1207
1208static inline void ext3_journal_release_buffer(handle_t *handle,
1209 struct buffer_head *bh)
1210{
1211 journal_release_buffer(handle, bh);
1212}
1213
1214void ext3_journal_abort_handle(const char *caller, const char *err_fn,
1215 struct buffer_head *bh, handle_t *handle, int err);
1216
1217int __ext3_journal_get_undo_access(const char *where, handle_t *handle,
1218 struct buffer_head *bh);
1219
1220int __ext3_journal_get_write_access(const char *where, handle_t *handle,
1221 struct buffer_head *bh);
1222
1223int __ext3_journal_forget(const char *where, handle_t *handle,
1224 struct buffer_head *bh);
1225
1226int __ext3_journal_revoke(const char *where, handle_t *handle,
1227 unsigned long blocknr, struct buffer_head *bh);
1228
1229int __ext3_journal_get_create_access(const char *where,
1230 handle_t *handle, struct buffer_head *bh);
1231
1232int __ext3_journal_dirty_metadata(const char *where,
1233 handle_t *handle, struct buffer_head *bh);
1234
1235#define ext3_journal_get_undo_access(handle, bh) \
1236 __ext3_journal_get_undo_access(__func__, (handle), (bh))
1237#define ext3_journal_get_write_access(handle, bh) \
1238 __ext3_journal_get_write_access(__func__, (handle), (bh))
1239#define ext3_journal_revoke(handle, blocknr, bh) \
1240 __ext3_journal_revoke(__func__, (handle), (blocknr), (bh))
1241#define ext3_journal_get_create_access(handle, bh) \
1242 __ext3_journal_get_create_access(__func__, (handle), (bh))
1243#define ext3_journal_dirty_metadata(handle, bh) \
1244 __ext3_journal_dirty_metadata(__func__, (handle), (bh))
1245#define ext3_journal_forget(handle, bh) \
1246 __ext3_journal_forget(__func__, (handle), (bh))
1247
1248int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
1249
1250handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks);
1251int __ext3_journal_stop(const char *where, handle_t *handle);
1252
1253static inline handle_t *ext3_journal_start(struct inode *inode, int nblocks)
1254{
1255 return ext3_journal_start_sb(inode->i_sb, nblocks);
1256}
1257
1258#define ext3_journal_stop(handle) \
1259 __ext3_journal_stop(__func__, (handle))
1260
1261static inline handle_t *ext3_journal_current_handle(void)
1262{
1263 return journal_current_handle();
1264}
1265
1266static inline int ext3_journal_extend(handle_t *handle, int nblocks)
1267{
1268 return journal_extend(handle, nblocks);
1269}
1270
1271static inline int ext3_journal_restart(handle_t *handle, int nblocks)
1272{
1273 return journal_restart(handle, nblocks);
1274}
1275
1276static inline int ext3_journal_blocks_per_page(struct inode *inode)
1277{
1278 return journal_blocks_per_page(inode);
1279}
1280
1281static inline int ext3_journal_force_commit(journal_t *journal)
1282{
1283 return journal_force_commit(journal);
1284}
1285
1286/* super.c */
1287int ext3_force_commit(struct super_block *sb);
1288
1289static inline int ext3_should_journal_data(struct inode *inode)
1290{
1291 if (!S_ISREG(inode->i_mode))
1292 return 1;
1293 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA)
1294 return 1;
1295 if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
1296 return 1;
1297 return 0;
1298}
1299
1300static inline int ext3_should_order_data(struct inode *inode)
1301{
1302 if (!S_ISREG(inode->i_mode))
1303 return 0;
1304 if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
1305 return 0;
1306 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA)
1307 return 1;
1308 return 0;
1309}
1310
1311static inline int ext3_should_writeback_data(struct inode *inode)
1312{
1313 if (!S_ISREG(inode->i_mode))
1314 return 0;
1315 if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
1316 return 0;
1317 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
1318 return 1;
1319 return 0;
1320}
1321
1322#include <trace/events/ext3.h>
diff --git a/fs/ext3/ext3_jbd.c b/fs/ext3/ext3_jbd.c
index d401f148d74..785a3261a26 100644
--- a/fs/ext3/ext3_jbd.c
+++ b/fs/ext3/ext3_jbd.c
@@ -2,7 +2,7 @@
2 * Interface between ext3 and JBD 2 * Interface between ext3 and JBD
3 */ 3 */
4 4
5#include <linux/ext3_jbd.h> 5#include "ext3.h"
6 6
7int __ext3_journal_get_undo_access(const char *where, handle_t *handle, 7int __ext3_journal_get_undo_access(const char *where, handle_t *handle,
8 struct buffer_head *bh) 8 struct buffer_head *bh)
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 724df69847d..25cb413277e 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -18,12 +18,8 @@
18 * (jj@sunsite.ms.mff.cuni.cz) 18 * (jj@sunsite.ms.mff.cuni.cz)
19 */ 19 */
20 20
21#include <linux/time.h>
22#include <linux/fs.h>
23#include <linux/jbd.h>
24#include <linux/quotaops.h> 21#include <linux/quotaops.h>
25#include <linux/ext3_fs.h> 22#include "ext3.h"
26#include <linux/ext3_jbd.h>
27#include "xattr.h" 23#include "xattr.h"
28#include "acl.h" 24#include "acl.h"
29 25
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 1860ed35632..d4dff278cbd 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -22,15 +22,9 @@
22 * we can depend on generic_block_fdatasync() to sync the data blocks. 22 * we can depend on generic_block_fdatasync() to sync the data blocks.
23 */ 23 */
24 24
25#include <linux/time.h>
26#include <linux/blkdev.h> 25#include <linux/blkdev.h>
27#include <linux/fs.h>
28#include <linux/sched.h>
29#include <linux/writeback.h> 26#include <linux/writeback.h>
30#include <linux/jbd.h> 27#include "ext3.h"
31#include <linux/ext3_fs.h>
32#include <linux/ext3_jbd.h>
33#include <trace/events/ext3.h>
34 28
35/* 29/*
36 * akpm: A new design for ext3_sync_file(). 30 * akpm: A new design for ext3_sync_file().
diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c
index 7d215b4d4f2..d10231ddcf8 100644
--- a/fs/ext3/hash.c
+++ b/fs/ext3/hash.c
@@ -9,9 +9,7 @@
9 * License. 9 * License.
10 */ 10 */
11 11
12#include <linux/fs.h> 12#include "ext3.h"
13#include <linux/jbd.h>
14#include <linux/ext3_fs.h>
15#include <linux/cryptohash.h> 13#include <linux/cryptohash.h>
16 14
17#define DELTA 0x9E3779B9 15#define DELTA 0x9E3779B9
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 1cde2843801..e3c39e4cec1 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -12,21 +12,10 @@
12 * David S. Miller (davem@caip.rutgers.edu), 1995 12 * David S. Miller (davem@caip.rutgers.edu), 1995
13 */ 13 */
14 14
15#include <linux/time.h>
16#include <linux/fs.h>
17#include <linux/jbd.h>
18#include <linux/ext3_fs.h>
19#include <linux/ext3_jbd.h>
20#include <linux/stat.h>
21#include <linux/string.h>
22#include <linux/quotaops.h> 15#include <linux/quotaops.h>
23#include <linux/buffer_head.h>
24#include <linux/random.h> 16#include <linux/random.h>
25#include <linux/bitops.h>
26#include <trace/events/ext3.h>
27
28#include <asm/byteorder.h>
29 17
18#include "ext3.h"
30#include "xattr.h" 19#include "xattr.h"
31#include "acl.h" 20#include "acl.h"
32 21
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2d0afeca0b4..10d7812f602 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -22,22 +22,12 @@
22 * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000 22 * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000
23 */ 23 */
24 24
25#include <linux/fs.h>
26#include <linux/time.h>
27#include <linux/ext3_jbd.h>
28#include <linux/jbd.h>
29#include <linux/highuid.h> 25#include <linux/highuid.h>
30#include <linux/pagemap.h>
31#include <linux/quotaops.h> 26#include <linux/quotaops.h>
32#include <linux/string.h>
33#include <linux/buffer_head.h>
34#include <linux/writeback.h> 27#include <linux/writeback.h>
35#include <linux/mpage.h> 28#include <linux/mpage.h>
36#include <linux/uio.h>
37#include <linux/bio.h>
38#include <linux/fiemap.h>
39#include <linux/namei.h> 29#include <linux/namei.h>
40#include <trace/events/ext3.h> 30#include "ext3.h"
41#include "xattr.h" 31#include "xattr.h"
42#include "acl.h" 32#include "acl.h"
43 33
@@ -756,6 +746,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
756 struct ext3_block_alloc_info *block_i; 746 struct ext3_block_alloc_info *block_i;
757 ext3_fsblk_t current_block; 747 ext3_fsblk_t current_block;
758 struct ext3_inode_info *ei = EXT3_I(inode); 748 struct ext3_inode_info *ei = EXT3_I(inode);
749 struct timespec now;
759 750
760 block_i = ei->i_block_alloc_info; 751 block_i = ei->i_block_alloc_info;
761 /* 752 /*
@@ -795,9 +786,11 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
795 } 786 }
796 787
797 /* We are done with atomic stuff, now do the rest of housekeeping */ 788 /* We are done with atomic stuff, now do the rest of housekeeping */
798 789 now = CURRENT_TIME_SEC;
799 inode->i_ctime = CURRENT_TIME_SEC; 790 if (!timespec_equal(&inode->i_ctime, &now) || !where->bh) {
800 ext3_mark_inode_dirty(handle, inode); 791 inode->i_ctime = now;
792 ext3_mark_inode_dirty(handle, inode);
793 }
801 /* ext3_mark_inode_dirty already updated i_sync_tid */ 794 /* ext3_mark_inode_dirty already updated i_sync_tid */
802 atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); 795 atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
803 796
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 4af574ce4a4..677a5c27dc6 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -7,15 +7,10 @@
7 * Universite Pierre et Marie Curie (Paris VI) 7 * Universite Pierre et Marie Curie (Paris VI)
8 */ 8 */
9 9
10#include <linux/fs.h>
11#include <linux/jbd.h>
12#include <linux/capability.h>
13#include <linux/ext3_fs.h>
14#include <linux/ext3_jbd.h>
15#include <linux/mount.h> 10#include <linux/mount.h>
16#include <linux/time.h>
17#include <linux/compat.h> 11#include <linux/compat.h>
18#include <asm/uaccess.h> 12#include <asm/uaccess.h>
13#include "ext3.h"
19 14
20long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 15long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
21{ 16{
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index e8e211795e9..d7940b24cf6 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -24,20 +24,8 @@
24 * Theodore Ts'o, 2002 24 * Theodore Ts'o, 2002
25 */ 25 */
26 26
27#include <linux/fs.h>
28#include <linux/pagemap.h>
29#include <linux/jbd.h>
30#include <linux/time.h>
31#include <linux/ext3_fs.h>
32#include <linux/ext3_jbd.h>
33#include <linux/fcntl.h>
34#include <linux/stat.h>
35#include <linux/string.h>
36#include <linux/quotaops.h> 27#include <linux/quotaops.h>
37#include <linux/buffer_head.h> 28#include "ext3.h"
38#include <linux/bio.h>
39#include <trace/events/ext3.h>
40
41#include "namei.h" 29#include "namei.h"
42#include "xattr.h" 30#include "xattr.h"
43#include "acl.h" 31#include "acl.h"
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 7916e4ce166..0f814f3450d 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -11,10 +11,7 @@
11 11
12#define EXT3FS_DEBUG 12#define EXT3FS_DEBUG
13 13
14#include <linux/ext3_jbd.h> 14#include "ext3.h"
15
16#include <linux/errno.h>
17#include <linux/slab.h>
18 15
19 16
20#define outside(b, first, last) ((b) < (first) || (b) >= (last)) 17#define outside(b, first, last) ((b) < (first) || (b) >= (last))
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 726c7ef6cdf..cf0b5921cf0 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -17,22 +17,12 @@
17 */ 17 */
18 18
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/string.h>
21#include <linux/fs.h>
22#include <linux/time.h>
23#include <linux/jbd.h>
24#include <linux/ext3_fs.h>
25#include <linux/ext3_jbd.h>
26#include <linux/slab.h>
27#include <linux/init.h>
28#include <linux/blkdev.h> 20#include <linux/blkdev.h>
29#include <linux/parser.h> 21#include <linux/parser.h>
30#include <linux/buffer_head.h>
31#include <linux/exportfs.h> 22#include <linux/exportfs.h>
32#include <linux/vfs.h> 23#include <linux/statfs.h>
33#include <linux/random.h> 24#include <linux/random.h>
34#include <linux/mount.h> 25#include <linux/mount.h>
35#include <linux/namei.h>
36#include <linux/quotaops.h> 26#include <linux/quotaops.h>
37#include <linux/seq_file.h> 27#include <linux/seq_file.h>
38#include <linux/log2.h> 28#include <linux/log2.h>
@@ -40,13 +30,13 @@
40 30
41#include <asm/uaccess.h> 31#include <asm/uaccess.h>
42 32
33#define CREATE_TRACE_POINTS
34
35#include "ext3.h"
43#include "xattr.h" 36#include "xattr.h"
44#include "acl.h" 37#include "acl.h"
45#include "namei.h" 38#include "namei.h"
46 39
47#define CREATE_TRACE_POINTS
48#include <trace/events/ext3.h>
49
50#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED 40#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED
51 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA 41 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA
52#else 42#else
@@ -2046,10 +2036,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
2046 ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck"); 2036 ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
2047 goto failed_mount3; 2037 goto failed_mount3;
2048 } 2038 }
2049 sb->s_root = d_alloc_root(root); 2039 sb->s_root = d_make_root(root);
2050 if (!sb->s_root) { 2040 if (!sb->s_root) {
2051 ext3_msg(sb, KERN_ERR, "error: get root dentry failed"); 2041 ext3_msg(sb, KERN_ERR, "error: get root dentry failed");
2052 iput(root);
2053 ret = -ENOMEM; 2042 ret = -ENOMEM;
2054 goto failed_mount3; 2043 goto failed_mount3;
2055 } 2044 }
diff --git a/fs/ext3/symlink.c b/fs/ext3/symlink.c
index 7c489820777..6b01c3eab1f 100644
--- a/fs/ext3/symlink.c
+++ b/fs/ext3/symlink.c
@@ -17,10 +17,8 @@
17 * ext3 symlink handling code 17 * ext3 symlink handling code
18 */ 18 */
19 19
20#include <linux/fs.h>
21#include <linux/jbd.h>
22#include <linux/ext3_fs.h>
23#include <linux/namei.h> 20#include <linux/namei.h>
21#include "ext3.h"
24#include "xattr.h" 22#include "xattr.h"
25 23
26static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd) 24static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index d565759d82e..d22ebb7a4f5 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -50,14 +50,9 @@
50 * by the buffer lock. 50 * by the buffer lock.
51 */ 51 */
52 52
53#include <linux/init.h> 53#include "ext3.h"
54#include <linux/fs.h>
55#include <linux/slab.h>
56#include <linux/ext3_jbd.h>
57#include <linux/ext3_fs.h>
58#include <linux/mbcache.h> 54#include <linux/mbcache.h>
59#include <linux/quotaops.h> 55#include <linux/quotaops.h>
60#include <linux/rwsem.h>
61#include "xattr.h" 56#include "xattr.h"
62#include "acl.h" 57#include "acl.h"
63 58
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index ea26f2acab9..3387664ad70 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -3,12 +3,8 @@
3 * Handler for storing security labels as extended attributes. 3 * Handler for storing security labels as extended attributes.
4 */ 4 */
5 5
6#include <linux/slab.h>
7#include <linux/string.h>
8#include <linux/fs.h>
9#include <linux/ext3_jbd.h>
10#include <linux/ext3_fs.h>
11#include <linux/security.h> 6#include <linux/security.h>
7#include "ext3.h"
12#include "xattr.h" 8#include "xattr.h"
13 9
14static size_t 10static size_t
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index 2526a8829de..d75727cc67f 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -5,11 +5,7 @@
5 * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org> 5 * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
6 */ 6 */
7 7
8#include <linux/string.h> 8#include "ext3.h"
9#include <linux/capability.h>
10#include <linux/fs.h>
11#include <linux/ext3_jbd.h>
12#include <linux/ext3_fs.h>
13#include "xattr.h" 9#include "xattr.h"
14 10
15static size_t 11static size_t
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index b32e473a1e3..5612af3567e 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -5,10 +5,7 @@
5 * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> 5 * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
6 */ 6 */
7 7
8#include <linux/string.h> 8#include "ext3.h"
9#include <linux/fs.h>
10#include <linux/ext3_jbd.h>
11#include <linux/ext3_fs.h>
12#include "xattr.h" 9#include "xattr.h"
13 10
14static size_t 11static size_t
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index f9e2cd8cf71..4bbd07a6fa1 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -336,10 +336,10 @@ err_out:
336 * Return buffer_head on success or NULL in case of failure. 336 * Return buffer_head on success or NULL in case of failure.
337 */ 337 */
338struct buffer_head * 338struct buffer_head *
339ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) 339ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
340{ 340{
341 struct ext4_group_desc *desc; 341 struct ext4_group_desc *desc;
342 struct buffer_head *bh = NULL; 342 struct buffer_head *bh;
343 ext4_fsblk_t bitmap_blk; 343 ext4_fsblk_t bitmap_blk;
344 344
345 desc = ext4_get_group_desc(sb, block_group, NULL); 345 desc = ext4_get_group_desc(sb, block_group, NULL);
@@ -348,9 +348,9 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
348 bitmap_blk = ext4_block_bitmap(sb, desc); 348 bitmap_blk = ext4_block_bitmap(sb, desc);
349 bh = sb_getblk(sb, bitmap_blk); 349 bh = sb_getblk(sb, bitmap_blk);
350 if (unlikely(!bh)) { 350 if (unlikely(!bh)) {
351 ext4_error(sb, "Cannot read block bitmap - " 351 ext4_error(sb, "Cannot get buffer for block bitmap - "
352 "block_group = %u, block_bitmap = %llu", 352 "block_group = %u, block_bitmap = %llu",
353 block_group, bitmap_blk); 353 block_group, bitmap_blk);
354 return NULL; 354 return NULL;
355 } 355 }
356 356
@@ -382,25 +382,50 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
382 return bh; 382 return bh;
383 } 383 }
384 /* 384 /*
385 * submit the buffer_head for read. We can 385 * submit the buffer_head for reading
386 * safely mark the bitmap as uptodate now.
387 * We do it here so the bitmap uptodate bit
388 * get set with buffer lock held.
389 */ 386 */
387 set_buffer_new(bh);
390 trace_ext4_read_block_bitmap_load(sb, block_group); 388 trace_ext4_read_block_bitmap_load(sb, block_group);
391 set_bitmap_uptodate(bh); 389 bh->b_end_io = ext4_end_bitmap_read;
392 if (bh_submit_read(bh) < 0) { 390 get_bh(bh);
393 put_bh(bh); 391 submit_bh(READ, bh);
392 return bh;
393}
394
395/* Returns 0 on success, 1 on error */
396int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
397 struct buffer_head *bh)
398{
399 struct ext4_group_desc *desc;
400
401 if (!buffer_new(bh))
402 return 0;
403 desc = ext4_get_group_desc(sb, block_group, NULL);
404 if (!desc)
405 return 1;
406 wait_on_buffer(bh);
407 if (!buffer_uptodate(bh)) {
394 ext4_error(sb, "Cannot read block bitmap - " 408 ext4_error(sb, "Cannot read block bitmap - "
395 "block_group = %u, block_bitmap = %llu", 409 "block_group = %u, block_bitmap = %llu",
396 block_group, bitmap_blk); 410 block_group, (unsigned long long) bh->b_blocknr);
397 return NULL; 411 return 1;
398 } 412 }
413 clear_buffer_new(bh);
414 /* Panic or remount fs read-only if block bitmap is invalid */
399 ext4_valid_block_bitmap(sb, desc, block_group, bh); 415 ext4_valid_block_bitmap(sb, desc, block_group, bh);
400 /* 416 return 0;
401 * file system mounted not to panic on error, 417}
402 * continue with corrupt bitmap 418
403 */ 419struct buffer_head *
420ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
421{
422 struct buffer_head *bh;
423
424 bh = ext4_read_block_bitmap_nowait(sb, block_group);
425 if (ext4_wait_block_bitmap(sb, block_group, bh)) {
426 put_bh(bh);
427 return NULL;
428 }
404 return bh; 429 return bh;
405} 430}
406 431
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 164c56092e5..b8678620264 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -32,24 +32,8 @@ static unsigned char ext4_filetype_table[] = {
32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
33}; 33};
34 34
35static int ext4_readdir(struct file *, void *, filldir_t);
36static int ext4_dx_readdir(struct file *filp, 35static int ext4_dx_readdir(struct file *filp,
37 void *dirent, filldir_t filldir); 36 void *dirent, filldir_t filldir);
38static int ext4_release_dir(struct inode *inode,
39 struct file *filp);
40
41const struct file_operations ext4_dir_operations = {
42 .llseek = ext4_llseek,
43 .read = generic_read_dir,
44 .readdir = ext4_readdir, /* we take BKL. needed?*/
45 .unlocked_ioctl = ext4_ioctl,
46#ifdef CONFIG_COMPAT
47 .compat_ioctl = ext4_compat_ioctl,
48#endif
49 .fsync = ext4_sync_file,
50 .release = ext4_release_dir,
51};
52
53 37
54static unsigned char get_dtype(struct super_block *sb, int filetype) 38static unsigned char get_dtype(struct super_block *sb, int filetype)
55{ 39{
@@ -60,6 +44,26 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
60 return (ext4_filetype_table[filetype]); 44 return (ext4_filetype_table[filetype]);
61} 45}
62 46
47/**
48 * Check if the given dir-inode refers to an htree-indexed directory
49 * (or a directory which chould potentially get coverted to use htree
50 * indexing).
51 *
52 * Return 1 if it is a dx dir, 0 if not
53 */
54static int is_dx_dir(struct inode *inode)
55{
56 struct super_block *sb = inode->i_sb;
57
58 if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
59 EXT4_FEATURE_COMPAT_DIR_INDEX) &&
60 ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
61 ((inode->i_size >> sb->s_blocksize_bits) == 1)))
62 return 1;
63
64 return 0;
65}
66
63/* 67/*
64 * Return 0 if the directory entry is OK, and 1 if there is a problem 68 * Return 0 if the directory entry is OK, and 1 if there is a problem
65 * 69 *
@@ -91,17 +95,17 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
91 return 0; 95 return 0;
92 96
93 if (filp) 97 if (filp)
94 ext4_error_file(filp, function, line, bh ? bh->b_blocknr : 0, 98 ext4_error_file(filp, function, line, bh->b_blocknr,
95 "bad entry in directory: %s - offset=%u(%u), " 99 "bad entry in directory: %s - offset=%u(%u), "
96 "inode=%u, rec_len=%d, name_len=%d", 100 "inode=%u, rec_len=%d, name_len=%d",
97 error_msg, (unsigned) (offset%bh->b_size), 101 error_msg, (unsigned) (offset % bh->b_size),
98 offset, le32_to_cpu(de->inode), 102 offset, le32_to_cpu(de->inode),
99 rlen, de->name_len); 103 rlen, de->name_len);
100 else 104 else
101 ext4_error_inode(dir, function, line, bh ? bh->b_blocknr : 0, 105 ext4_error_inode(dir, function, line, bh->b_blocknr,
102 "bad entry in directory: %s - offset=%u(%u), " 106 "bad entry in directory: %s - offset=%u(%u), "
103 "inode=%u, rec_len=%d, name_len=%d", 107 "inode=%u, rec_len=%d, name_len=%d",
104 error_msg, (unsigned) (offset%bh->b_size), 108 error_msg, (unsigned) (offset % bh->b_size),
105 offset, le32_to_cpu(de->inode), 109 offset, le32_to_cpu(de->inode),
106 rlen, de->name_len); 110 rlen, de->name_len);
107 111
@@ -115,18 +119,13 @@ static int ext4_readdir(struct file *filp,
115 unsigned int offset; 119 unsigned int offset;
116 int i, stored; 120 int i, stored;
117 struct ext4_dir_entry_2 *de; 121 struct ext4_dir_entry_2 *de;
118 struct super_block *sb;
119 int err; 122 int err;
120 struct inode *inode = filp->f_path.dentry->d_inode; 123 struct inode *inode = filp->f_path.dentry->d_inode;
124 struct super_block *sb = inode->i_sb;
121 int ret = 0; 125 int ret = 0;
122 int dir_has_error = 0; 126 int dir_has_error = 0;
123 127
124 sb = inode->i_sb; 128 if (is_dx_dir(inode)) {
125
126 if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
127 EXT4_FEATURE_COMPAT_DIR_INDEX) &&
128 ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
129 ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
130 err = ext4_dx_readdir(filp, dirent, filldir); 129 err = ext4_dx_readdir(filp, dirent, filldir);
131 if (err != ERR_BAD_DX_DIR) { 130 if (err != ERR_BAD_DX_DIR) {
132 ret = err; 131 ret = err;
@@ -254,22 +253,134 @@ out:
254 return ret; 253 return ret;
255} 254}
256 255
256static inline int is_32bit_api(void)
257{
258#ifdef CONFIG_COMPAT
259 return is_compat_task();
260#else
261 return (BITS_PER_LONG == 32);
262#endif
263}
264
257/* 265/*
258 * These functions convert from the major/minor hash to an f_pos 266 * These functions convert from the major/minor hash to an f_pos
259 * value. 267 * value for dx directories
268 *
269 * Upper layer (for example NFS) should specify FMODE_32BITHASH or
270 * FMODE_64BITHASH explicitly. On the other hand, we allow ext4 to be mounted
271 * directly on both 32-bit and 64-bit nodes, under such case, neither
272 * FMODE_32BITHASH nor FMODE_64BITHASH is specified.
273 */
274static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor)
275{
276 if ((filp->f_mode & FMODE_32BITHASH) ||
277 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
278 return major >> 1;
279 else
280 return ((__u64)(major >> 1) << 32) | (__u64)minor;
281}
282
283static inline __u32 pos2maj_hash(struct file *filp, loff_t pos)
284{
285 if ((filp->f_mode & FMODE_32BITHASH) ||
286 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
287 return (pos << 1) & 0xffffffff;
288 else
289 return ((pos >> 32) << 1) & 0xffffffff;
290}
291
292static inline __u32 pos2min_hash(struct file *filp, loff_t pos)
293{
294 if ((filp->f_mode & FMODE_32BITHASH) ||
295 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
296 return 0;
297 else
298 return pos & 0xffffffff;
299}
300
301/*
302 * Return 32- or 64-bit end-of-file for dx directories
303 */
304static inline loff_t ext4_get_htree_eof(struct file *filp)
305{
306 if ((filp->f_mode & FMODE_32BITHASH) ||
307 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
308 return EXT4_HTREE_EOF_32BIT;
309 else
310 return EXT4_HTREE_EOF_64BIT;
311}
312
313
314/*
315 * ext4_dir_llseek() based on generic_file_llseek() to handle both
316 * non-htree and htree directories, where the "offset" is in terms
317 * of the filename hash value instead of the byte offset.
260 * 318 *
261 * Currently we only use major hash numer. This is unfortunate, but 319 * NOTE: offsets obtained *before* ext4_set_inode_flag(dir, EXT4_INODE_INDEX)
262 * on 32-bit machines, the same VFS interface is used for lseek and 320 * will be invalid once the directory was converted into a dx directory
263 * llseek, so if we use the 64 bit offset, then the 32-bit versions of
264 * lseek/telldir/seekdir will blow out spectacularly, and from within
265 * the ext2 low-level routine, we don't know if we're being called by
266 * a 64-bit version of the system call or the 32-bit version of the
267 * system call. Worse yet, NFSv2 only allows for a 32-bit readdir
268 * cookie. Sigh.
269 */ 321 */
270#define hash2pos(major, minor) (major >> 1) 322loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin)
271#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) 323{
272#define pos2min_hash(pos) (0) 324 struct inode *inode = file->f_mapping->host;
325 loff_t ret = -EINVAL;
326 int dx_dir = is_dx_dir(inode);
327
328 mutex_lock(&inode->i_mutex);
329
330 /* NOTE: relative offsets with dx directories might not work
331 * as expected, as it is difficult to figure out the
332 * correct offset between dx hashes */
333
334 switch (origin) {
335 case SEEK_END:
336 if (unlikely(offset > 0))
337 goto out_err; /* not supported for directories */
338
339 /* so only negative offsets are left, does that have a
340 * meaning for directories at all? */
341 if (dx_dir)
342 offset += ext4_get_htree_eof(file);
343 else
344 offset += inode->i_size;
345 break;
346 case SEEK_CUR:
347 /*
348 * Here we special-case the lseek(fd, 0, SEEK_CUR)
349 * position-querying operation. Avoid rewriting the "same"
350 * f_pos value back to the file because a concurrent read(),
351 * write() or lseek() might have altered it
352 */
353 if (offset == 0) {
354 offset = file->f_pos;
355 goto out_ok;
356 }
357
358 offset += file->f_pos;
359 break;
360 }
361
362 if (unlikely(offset < 0))
363 goto out_err;
364
365 if (!dx_dir) {
366 if (offset > inode->i_sb->s_maxbytes)
367 goto out_err;
368 } else if (offset > ext4_get_htree_eof(file))
369 goto out_err;
370
371 /* Special lock needed here? */
372 if (offset != file->f_pos) {
373 file->f_pos = offset;
374 file->f_version = 0;
375 }
376
377out_ok:
378 ret = offset;
379out_err:
380 mutex_unlock(&inode->i_mutex);
381
382 return ret;
383}
273 384
274/* 385/*
275 * This structure holds the nodes of the red-black tree used to store 386 * This structure holds the nodes of the red-black tree used to store
@@ -330,15 +441,16 @@ static void free_rb_tree_fname(struct rb_root *root)
330} 441}
331 442
332 443
333static struct dir_private_info *ext4_htree_create_dir_info(loff_t pos) 444static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp,
445 loff_t pos)
334{ 446{
335 struct dir_private_info *p; 447 struct dir_private_info *p;
336 448
337 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); 449 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
338 if (!p) 450 if (!p)
339 return NULL; 451 return NULL;
340 p->curr_hash = pos2maj_hash(pos); 452 p->curr_hash = pos2maj_hash(filp, pos);
341 p->curr_minor_hash = pos2min_hash(pos); 453 p->curr_minor_hash = pos2min_hash(filp, pos);
342 return p; 454 return p;
343} 455}
344 456
@@ -425,11 +537,12 @@ static int call_filldir(struct file *filp, void *dirent,
425 sb = inode->i_sb; 537 sb = inode->i_sb;
426 538
427 if (!fname) { 539 if (!fname) {
428 printk(KERN_ERR "EXT4-fs: call_filldir: called with " 540 ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: comm %s: "
429 "null fname?!?\n"); 541 "called with null fname?!?", __func__, __LINE__,
542 inode->i_ino, current->comm);
430 return 0; 543 return 0;
431 } 544 }
432 curr_pos = hash2pos(fname->hash, fname->minor_hash); 545 curr_pos = hash2pos(filp, fname->hash, fname->minor_hash);
433 while (fname) { 546 while (fname) {
434 error = filldir(dirent, fname->name, 547 error = filldir(dirent, fname->name,
435 fname->name_len, curr_pos, 548 fname->name_len, curr_pos,
@@ -454,13 +567,13 @@ static int ext4_dx_readdir(struct file *filp,
454 int ret; 567 int ret;
455 568
456 if (!info) { 569 if (!info) {
457 info = ext4_htree_create_dir_info(filp->f_pos); 570 info = ext4_htree_create_dir_info(filp, filp->f_pos);
458 if (!info) 571 if (!info)
459 return -ENOMEM; 572 return -ENOMEM;
460 filp->private_data = info; 573 filp->private_data = info;
461 } 574 }
462 575
463 if (filp->f_pos == EXT4_HTREE_EOF) 576 if (filp->f_pos == ext4_get_htree_eof(filp))
464 return 0; /* EOF */ 577 return 0; /* EOF */
465 578
466 /* Some one has messed with f_pos; reset the world */ 579 /* Some one has messed with f_pos; reset the world */
@@ -468,8 +581,8 @@ static int ext4_dx_readdir(struct file *filp,
468 free_rb_tree_fname(&info->root); 581 free_rb_tree_fname(&info->root);
469 info->curr_node = NULL; 582 info->curr_node = NULL;
470 info->extra_fname = NULL; 583 info->extra_fname = NULL;
471 info->curr_hash = pos2maj_hash(filp->f_pos); 584 info->curr_hash = pos2maj_hash(filp, filp->f_pos);
472 info->curr_minor_hash = pos2min_hash(filp->f_pos); 585 info->curr_minor_hash = pos2min_hash(filp, filp->f_pos);
473 } 586 }
474 587
475 /* 588 /*
@@ -501,7 +614,7 @@ static int ext4_dx_readdir(struct file *filp,
501 if (ret < 0) 614 if (ret < 0)
502 return ret; 615 return ret;
503 if (ret == 0) { 616 if (ret == 0) {
504 filp->f_pos = EXT4_HTREE_EOF; 617 filp->f_pos = ext4_get_htree_eof(filp);
505 break; 618 break;
506 } 619 }
507 info->curr_node = rb_first(&info->root); 620 info->curr_node = rb_first(&info->root);
@@ -521,7 +634,7 @@ static int ext4_dx_readdir(struct file *filp,
521 info->curr_minor_hash = fname->minor_hash; 634 info->curr_minor_hash = fname->minor_hash;
522 } else { 635 } else {
523 if (info->next_hash == ~0) { 636 if (info->next_hash == ~0) {
524 filp->f_pos = EXT4_HTREE_EOF; 637 filp->f_pos = ext4_get_htree_eof(filp);
525 break; 638 break;
526 } 639 }
527 info->curr_hash = info->next_hash; 640 info->curr_hash = info->next_hash;
@@ -540,3 +653,15 @@ static int ext4_release_dir(struct inode *inode, struct file *filp)
540 653
541 return 0; 654 return 0;
542} 655}
656
657const struct file_operations ext4_dir_operations = {
658 .llseek = ext4_dir_llseek,
659 .read = generic_read_dir,
660 .readdir = ext4_readdir,
661 .unlocked_ioctl = ext4_ioctl,
662#ifdef CONFIG_COMPAT
663 .compat_ioctl = ext4_compat_ioctl,
664#endif
665 .fsync = ext4_sync_file,
666 .release = ext4_release_dir,
667};
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 513004fc3d8..0e01e90add8 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -53,7 +53,7 @@
53 printk(KERN_DEBUG f, ## a); \ 53 printk(KERN_DEBUG f, ## a); \
54 } while (0) 54 } while (0)
55#else 55#else
56#define ext4_debug(f, a...) do {} while (0) 56#define ext4_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
57#endif 57#endif
58 58
59#define EXT4_ERROR_INODE(inode, fmt, a...) \ 59#define EXT4_ERROR_INODE(inode, fmt, a...) \
@@ -184,6 +184,8 @@ struct mpage_da_data {
184#define EXT4_IO_END_UNWRITTEN 0x0001 184#define EXT4_IO_END_UNWRITTEN 0x0001
185#define EXT4_IO_END_ERROR 0x0002 185#define EXT4_IO_END_ERROR 0x0002
186#define EXT4_IO_END_QUEUED 0x0004 186#define EXT4_IO_END_QUEUED 0x0004
187#define EXT4_IO_END_DIRECT 0x0008
188#define EXT4_IO_END_IN_FSYNC 0x0010
187 189
188struct ext4_io_page { 190struct ext4_io_page {
189 struct page *p_page; 191 struct page *p_page;
@@ -192,18 +194,25 @@ struct ext4_io_page {
192 194
193#define MAX_IO_PAGES 128 195#define MAX_IO_PAGES 128
194 196
197/*
198 * For converting uninitialized extents on a work queue.
199 *
200 * 'page' is only used from the writepage() path; 'pages' is only used for
201 * buffered writes; they are used to keep page references until conversion
202 * takes place. For AIO/DIO, neither field is filled in.
203 */
195typedef struct ext4_io_end { 204typedef struct ext4_io_end {
196 struct list_head list; /* per-file finished IO list */ 205 struct list_head list; /* per-file finished IO list */
197 struct inode *inode; /* file being written to */ 206 struct inode *inode; /* file being written to */
198 unsigned int flag; /* unwritten or not */ 207 unsigned int flag; /* unwritten or not */
199 struct page *page; /* page struct for buffer write */ 208 struct page *page; /* for writepage() path */
200 loff_t offset; /* offset in the file */ 209 loff_t offset; /* offset in the file */
201 ssize_t size; /* size of the extent */ 210 ssize_t size; /* size of the extent */
202 struct work_struct work; /* data work queue */ 211 struct work_struct work; /* data work queue */
203 struct kiocb *iocb; /* iocb struct for AIO */ 212 struct kiocb *iocb; /* iocb struct for AIO */
204 int result; /* error value for AIO */ 213 int result; /* error value for AIO */
205 int num_io_pages; 214 int num_io_pages; /* for writepages() */
206 struct ext4_io_page *pages[MAX_IO_PAGES]; 215 struct ext4_io_page *pages[MAX_IO_PAGES]; /* for writepages() */
207} ext4_io_end_t; 216} ext4_io_end_t;
208 217
209struct ext4_io_submit { 218struct ext4_io_submit {
@@ -923,6 +932,7 @@ struct ext4_inode_info {
923#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ 932#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
924#define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ 933#define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */
925#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ 934#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
935#define EXT4_MOUNT_ERRORS_MASK 0x00070
926#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ 936#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
927#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ 937#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
928#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ 938#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
@@ -941,7 +951,6 @@ struct ext4_inode_info {
941#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ 951#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
942#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ 952#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
943#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ 953#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
944#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
945#define EXT4_MOUNT_MBLK_IO_SUBMIT 0x4000000 /* multi-block io submits */ 954#define EXT4_MOUNT_MBLK_IO_SUBMIT 0x4000000 /* multi-block io submits */
946#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ 955#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
947#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ 956#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
@@ -1142,6 +1151,7 @@ struct ext4_sb_info {
1142 unsigned int s_mount_opt; 1151 unsigned int s_mount_opt;
1143 unsigned int s_mount_opt2; 1152 unsigned int s_mount_opt2;
1144 unsigned int s_mount_flags; 1153 unsigned int s_mount_flags;
1154 unsigned int s_def_mount_opt;
1145 ext4_fsblk_t s_sb_block; 1155 ext4_fsblk_t s_sb_block;
1146 uid_t s_resuid; 1156 uid_t s_resuid;
1147 gid_t s_resgid; 1157 gid_t s_resgid;
@@ -1193,9 +1203,6 @@ struct ext4_sb_info {
1193 unsigned long s_ext_blocks; 1203 unsigned long s_ext_blocks;
1194 unsigned long s_ext_extents; 1204 unsigned long s_ext_extents;
1195#endif 1205#endif
1196 /* ext4 extent cache stats */
1197 unsigned long extent_cache_hits;
1198 unsigned long extent_cache_misses;
1199 1206
1200 /* for buddy allocator */ 1207 /* for buddy allocator */
1201 struct ext4_group_info ***s_group_info; 1208 struct ext4_group_info ***s_group_info;
@@ -1420,8 +1427,9 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1420#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 1427#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
1421#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ 1428#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */
1422#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ 1429#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */
1423#define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x2000 /* data in inode */ 1430#define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */
1424#define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */ 1431#define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */
1432#define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x8000 /* data in inode */
1425 1433
1426#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR 1434#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
1427#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ 1435#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
@@ -1612,7 +1620,11 @@ struct dx_hash_info
1612 u32 *seed; 1620 u32 *seed;
1613}; 1621};
1614 1622
1615#define EXT4_HTREE_EOF 0x7fffffff 1623
1624/* 32 and 64 bit signed EOF for dx directories */
1625#define EXT4_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1)
1626#define EXT4_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1)
1627
1616 1628
1617/* 1629/*
1618 * Control parameters used by ext4_htree_next_block 1630 * Control parameters used by ext4_htree_next_block
@@ -1794,8 +1806,14 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
1794 ext4_group_t block_group, 1806 ext4_group_t block_group,
1795 struct buffer_head ** bh); 1807 struct buffer_head ** bh);
1796extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); 1808extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
1797struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, 1809
1798 ext4_group_t block_group); 1810extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb,
1811 ext4_group_t block_group);
1812extern int ext4_wait_block_bitmap(struct super_block *sb,
1813 ext4_group_t block_group,
1814 struct buffer_head *bh);
1815extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
1816 ext4_group_t block_group);
1799extern void ext4_init_block_bitmap(struct super_block *sb, 1817extern void ext4_init_block_bitmap(struct super_block *sb,
1800 struct buffer_head *bh, 1818 struct buffer_head *bh,
1801 ext4_group_t group, 1819 ext4_group_t group,
@@ -1841,6 +1859,7 @@ extern void ext4_check_inodes_bitmap(struct super_block *);
1841extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap); 1859extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
1842extern int ext4_init_inode_table(struct super_block *sb, 1860extern int ext4_init_inode_table(struct super_block *sb,
1843 ext4_group_t group, int barrier); 1861 ext4_group_t group, int barrier);
1862extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
1844 1863
1845/* mballoc.c */ 1864/* mballoc.c */
1846extern long ext4_mb_stats; 1865extern long ext4_mb_stats;
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index a52db3a69a3..0f58b86e3a0 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -47,9 +47,9 @@
47 */ 47 */
48#define EXT_DEBUG__ 48#define EXT_DEBUG__
49#ifdef EXT_DEBUG 49#ifdef EXT_DEBUG
50#define ext_debug(a...) printk(a) 50#define ext_debug(fmt, ...) printk(fmt, ##__VA_ARGS__)
51#else 51#else
52#define ext_debug(a...) 52#define ext_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
53#endif 53#endif
54 54
55/* 55/*
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 5802fa1dab1..83b20fcf940 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -104,6 +104,78 @@
104#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) 104#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
105#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) 105#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
106 106
107/**
108 * struct ext4_journal_cb_entry - Base structure for callback information.
109 *
110 * This struct is a 'seed' structure for a using with your own callback
111 * structs. If you are using callbacks you must allocate one of these
112 * or another struct of your own definition which has this struct
113 * as it's first element and pass it to ext4_journal_callback_add().
114 */
115struct ext4_journal_cb_entry {
116 /* list information for other callbacks attached to the same handle */
117 struct list_head jce_list;
118
119 /* Function to call with this callback structure */
120 void (*jce_func)(struct super_block *sb,
121 struct ext4_journal_cb_entry *jce, int error);
122
123 /* user data goes here */
124};
125
126/**
127 * ext4_journal_callback_add: add a function to call after transaction commit
128 * @handle: active journal transaction handle to register callback on
129 * @func: callback function to call after the transaction has committed:
130 * @sb: superblock of current filesystem for transaction
131 * @jce: returned journal callback data
132 * @rc: journal state at commit (0 = transaction committed properly)
133 * @jce: journal callback data (internal and function private data struct)
134 *
135 * The registered function will be called in the context of the journal thread
136 * after the transaction for which the handle was created has completed.
137 *
138 * No locks are held when the callback function is called, so it is safe to
139 * call blocking functions from within the callback, but the callback should
140 * not block or run for too long, or the filesystem will be blocked waiting for
141 * the next transaction to commit. No journaling functions can be used, or
142 * there is a risk of deadlock.
143 *
144 * There is no guaranteed calling order of multiple registered callbacks on
145 * the same transaction.
146 */
147static inline void ext4_journal_callback_add(handle_t *handle,
148 void (*func)(struct super_block *sb,
149 struct ext4_journal_cb_entry *jce,
150 int rc),
151 struct ext4_journal_cb_entry *jce)
152{
153 struct ext4_sb_info *sbi =
154 EXT4_SB(handle->h_transaction->t_journal->j_private);
155
156 /* Add the jce to transaction's private list */
157 jce->jce_func = func;
158 spin_lock(&sbi->s_md_lock);
159 list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list);
160 spin_unlock(&sbi->s_md_lock);
161}
162
163/**
164 * ext4_journal_callback_del: delete a registered callback
165 * @handle: active journal transaction handle on which callback was registered
166 * @jce: registered journal callback entry to unregister
167 */
168static inline void ext4_journal_callback_del(handle_t *handle,
169 struct ext4_journal_cb_entry *jce)
170{
171 struct ext4_sb_info *sbi =
172 EXT4_SB(handle->h_transaction->t_journal->j_private);
173
174 spin_lock(&sbi->s_md_lock);
175 list_del_init(&jce->jce_list);
176 spin_unlock(&sbi->s_md_lock);
177}
178
107int 179int
108ext4_mark_iloc_dirty(handle_t *handle, 180ext4_mark_iloc_dirty(handle_t *handle,
109 struct inode *inode, 181 struct inode *inode,
@@ -261,43 +333,45 @@ static inline void ext4_update_inode_fsync_trans(handle_t *handle,
261/* super.c */ 333/* super.c */
262int ext4_force_commit(struct super_block *sb); 334int ext4_force_commit(struct super_block *sb);
263 335
264static inline int ext4_should_journal_data(struct inode *inode) 336/*
337 * Ext4 inode journal modes
338 */
339#define EXT4_INODE_JOURNAL_DATA_MODE 0x01 /* journal data mode */
340#define EXT4_INODE_ORDERED_DATA_MODE 0x02 /* ordered data mode */
341#define EXT4_INODE_WRITEBACK_DATA_MODE 0x04 /* writeback data mode */
342
343static inline int ext4_inode_journal_mode(struct inode *inode)
265{ 344{
266 if (EXT4_JOURNAL(inode) == NULL) 345 if (EXT4_JOURNAL(inode) == NULL)
267 return 0; 346 return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
268 if (!S_ISREG(inode->i_mode)) 347 /* We do not support data journalling with delayed allocation */
269 return 1; 348 if (!S_ISREG(inode->i_mode) ||
270 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 349 test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
271 return 1; 350 return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
272 if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) 351 if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
273 return 1; 352 !test_opt(inode->i_sb, DELALLOC))
274 return 0; 353 return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
354 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
355 return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */
356 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
357 return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
358 else
359 BUG();
360}
361
362static inline int ext4_should_journal_data(struct inode *inode)
363{
364 return ext4_inode_journal_mode(inode) & EXT4_INODE_JOURNAL_DATA_MODE;
275} 365}
276 366
277static inline int ext4_should_order_data(struct inode *inode) 367static inline int ext4_should_order_data(struct inode *inode)
278{ 368{
279 if (EXT4_JOURNAL(inode) == NULL) 369 return ext4_inode_journal_mode(inode) & EXT4_INODE_ORDERED_DATA_MODE;
280 return 0;
281 if (!S_ISREG(inode->i_mode))
282 return 0;
283 if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
284 return 0;
285 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
286 return 1;
287 return 0;
288} 370}
289 371
290static inline int ext4_should_writeback_data(struct inode *inode) 372static inline int ext4_should_writeback_data(struct inode *inode)
291{ 373{
292 if (EXT4_JOURNAL(inode) == NULL) 374 return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE;
293 return 1;
294 if (!S_ISREG(inode->i_mode))
295 return 0;
296 if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
297 return 0;
298 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
299 return 1;
300 return 0;
301} 375}
302 376
303/* 377/*
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 74f23c292e1..abcdeab67f5 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -44,6 +44,14 @@
44 44
45#include <trace/events/ext4.h> 45#include <trace/events/ext4.h>
46 46
47/*
48 * used by extent splitting.
49 */
50#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \
51 due to ENOSPC */
52#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */
53#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */
54
47static int ext4_split_extent(handle_t *handle, 55static int ext4_split_extent(handle_t *handle,
48 struct inode *inode, 56 struct inode *inode,
49 struct ext4_ext_path *path, 57 struct ext4_ext_path *path,
@@ -51,6 +59,13 @@ static int ext4_split_extent(handle_t *handle,
51 int split_flag, 59 int split_flag,
52 int flags); 60 int flags);
53 61
62static int ext4_split_extent_at(handle_t *handle,
63 struct inode *inode,
64 struct ext4_ext_path *path,
65 ext4_lblk_t split,
66 int split_flag,
67 int flags);
68
54static int ext4_ext_truncate_extend_restart(handle_t *handle, 69static int ext4_ext_truncate_extend_restart(handle_t *handle,
55 struct inode *inode, 70 struct inode *inode,
56 int needed) 71 int needed)
@@ -300,6 +315,8 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
300 ext4_fsblk_t block = ext4_ext_pblock(ext); 315 ext4_fsblk_t block = ext4_ext_pblock(ext);
301 int len = ext4_ext_get_actual_len(ext); 316 int len = ext4_ext_get_actual_len(ext);
302 317
318 if (len == 0)
319 return 0;
303 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); 320 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
304} 321}
305 322
@@ -2049,10 +2066,6 @@ static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
2049 ret = 1; 2066 ret = 1;
2050 } 2067 }
2051errout: 2068errout:
2052 if (!ret)
2053 sbi->extent_cache_misses++;
2054 else
2055 sbi->extent_cache_hits++;
2056 trace_ext4_ext_in_cache(inode, block, ret); 2069 trace_ext4_ext_in_cache(inode, block, ret);
2057 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 2070 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
2058 return ret; 2071 return ret;
@@ -2308,7 +2321,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2308 struct ext4_extent *ex; 2321 struct ext4_extent *ex;
2309 2322
2310 /* the header must be checked already in ext4_ext_remove_space() */ 2323 /* the header must be checked already in ext4_ext_remove_space() */
2311 ext_debug("truncate since %u in leaf\n", start); 2324 ext_debug("truncate since %u in leaf to %u\n", start, end);
2312 if (!path[depth].p_hdr) 2325 if (!path[depth].p_hdr)
2313 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); 2326 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
2314 eh = path[depth].p_hdr; 2327 eh = path[depth].p_hdr;
@@ -2343,14 +2356,17 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2343 ext_debug(" border %u:%u\n", a, b); 2356 ext_debug(" border %u:%u\n", a, b);
2344 2357
2345 /* If this extent is beyond the end of the hole, skip it */ 2358 /* If this extent is beyond the end of the hole, skip it */
2346 if (end <= ex_ee_block) { 2359 if (end < ex_ee_block) {
2347 ex--; 2360 ex--;
2348 ex_ee_block = le32_to_cpu(ex->ee_block); 2361 ex_ee_block = le32_to_cpu(ex->ee_block);
2349 ex_ee_len = ext4_ext_get_actual_len(ex); 2362 ex_ee_len = ext4_ext_get_actual_len(ex);
2350 continue; 2363 continue;
2351 } else if (b != ex_ee_block + ex_ee_len - 1) { 2364 } else if (b != ex_ee_block + ex_ee_len - 1) {
2352 EXT4_ERROR_INODE(inode," bad truncate %u:%u\n", 2365 EXT4_ERROR_INODE(inode,
2353 start, end); 2366 "can not handle truncate %u:%u "
2367 "on extent %u:%u",
2368 start, end, ex_ee_block,
2369 ex_ee_block + ex_ee_len - 1);
2354 err = -EIO; 2370 err = -EIO;
2355 goto out; 2371 goto out;
2356 } else if (a != ex_ee_block) { 2372 } else if (a != ex_ee_block) {
@@ -2482,7 +2498,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path)
2482 return 1; 2498 return 1;
2483} 2499}
2484 2500
2485static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) 2501static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
2502 ext4_lblk_t end)
2486{ 2503{
2487 struct super_block *sb = inode->i_sb; 2504 struct super_block *sb = inode->i_sb;
2488 int depth = ext_depth(inode); 2505 int depth = ext_depth(inode);
@@ -2491,7 +2508,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
2491 handle_t *handle; 2508 handle_t *handle;
2492 int i, err; 2509 int i, err;
2493 2510
2494 ext_debug("truncate since %u\n", start); 2511 ext_debug("truncate since %u to %u\n", start, end);
2495 2512
2496 /* probably first extent we're gonna free will be last in block */ 2513 /* probably first extent we're gonna free will be last in block */
2497 handle = ext4_journal_start(inode, depth + 1); 2514 handle = ext4_journal_start(inode, depth + 1);
@@ -2504,6 +2521,61 @@ again:
2504 trace_ext4_ext_remove_space(inode, start, depth); 2521 trace_ext4_ext_remove_space(inode, start, depth);
2505 2522
2506 /* 2523 /*
2524 * Check if we are removing extents inside the extent tree. If that
2525 * is the case, we are going to punch a hole inside the extent tree
2526 * so we have to check whether we need to split the extent covering
2527 * the last block to remove so we can easily remove the part of it
2528 * in ext4_ext_rm_leaf().
2529 */
2530 if (end < EXT_MAX_BLOCKS - 1) {
2531 struct ext4_extent *ex;
2532 ext4_lblk_t ee_block;
2533
2534 /* find extent for this block */
2535 path = ext4_ext_find_extent(inode, end, NULL);
2536 if (IS_ERR(path)) {
2537 ext4_journal_stop(handle);
2538 return PTR_ERR(path);
2539 }
2540 depth = ext_depth(inode);
2541 ex = path[depth].p_ext;
2542 if (!ex)
2543 goto cont;
2544
2545 ee_block = le32_to_cpu(ex->ee_block);
2546
2547 /*
2548 * See if the last block is inside the extent, if so split
2549 * the extent at 'end' block so we can easily remove the
2550 * tail of the first part of the split extent in
2551 * ext4_ext_rm_leaf().
2552 */
2553 if (end >= ee_block &&
2554 end < ee_block + ext4_ext_get_actual_len(ex) - 1) {
2555 int split_flag = 0;
2556
2557 if (ext4_ext_is_uninitialized(ex))
2558 split_flag = EXT4_EXT_MARK_UNINIT1 |
2559 EXT4_EXT_MARK_UNINIT2;
2560
2561 /*
2562 * Split the extent in two so that 'end' is the last
2563 * block in the first new extent
2564 */
2565 err = ext4_split_extent_at(handle, inode, path,
2566 end + 1, split_flag,
2567 EXT4_GET_BLOCKS_PRE_IO |
2568 EXT4_GET_BLOCKS_PUNCH_OUT_EXT);
2569
2570 if (err < 0)
2571 goto out;
2572 }
2573 ext4_ext_drop_refs(path);
2574 kfree(path);
2575 }
2576cont:
2577
2578 /*
2507 * We start scanning from right side, freeing all the blocks 2579 * We start scanning from right side, freeing all the blocks
2508 * after i_size and walking into the tree depth-wise. 2580 * after i_size and walking into the tree depth-wise.
2509 */ 2581 */
@@ -2515,6 +2587,7 @@ again:
2515 } 2587 }
2516 path[0].p_depth = depth; 2588 path[0].p_depth = depth;
2517 path[0].p_hdr = ext_inode_hdr(inode); 2589 path[0].p_hdr = ext_inode_hdr(inode);
2590
2518 if (ext4_ext_check(inode, path[0].p_hdr, depth)) { 2591 if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
2519 err = -EIO; 2592 err = -EIO;
2520 goto out; 2593 goto out;
@@ -2526,7 +2599,7 @@ again:
2526 /* this is leaf block */ 2599 /* this is leaf block */
2527 err = ext4_ext_rm_leaf(handle, inode, path, 2600 err = ext4_ext_rm_leaf(handle, inode, path,
2528 &partial_cluster, start, 2601 &partial_cluster, start,
2529 EXT_MAX_BLOCKS - 1); 2602 end);
2530 /* root level has p_bh == NULL, brelse() eats this */ 2603 /* root level has p_bh == NULL, brelse() eats this */
2531 brelse(path[i].p_bh); 2604 brelse(path[i].p_bh);
2532 path[i].p_bh = NULL; 2605 path[i].p_bh = NULL;
@@ -2651,17 +2724,17 @@ void ext4_ext_init(struct super_block *sb)
2651 2724
2652 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { 2725 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
2653#if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS) 2726#if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
2654 printk(KERN_INFO "EXT4-fs: file extents enabled"); 2727 printk(KERN_INFO "EXT4-fs: file extents enabled"
2655#ifdef AGGRESSIVE_TEST 2728#ifdef AGGRESSIVE_TEST
2656 printk(", aggressive tests"); 2729 ", aggressive tests"
2657#endif 2730#endif
2658#ifdef CHECK_BINSEARCH 2731#ifdef CHECK_BINSEARCH
2659 printk(", check binsearch"); 2732 ", check binsearch"
2660#endif 2733#endif
2661#ifdef EXTENTS_STATS 2734#ifdef EXTENTS_STATS
2662 printk(", stats"); 2735 ", stats"
2663#endif 2736#endif
2664 printk("\n"); 2737 "\n");
2665#endif 2738#endif
2666#ifdef EXTENTS_STATS 2739#ifdef EXTENTS_STATS
2667 spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock); 2740 spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
@@ -2709,14 +2782,6 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
2709} 2782}
2710 2783
2711/* 2784/*
2712 * used by extent splitting.
2713 */
2714#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \
2715 due to ENOSPC */
2716#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */
2717#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */
2718
2719/*
2720 * ext4_split_extent_at() splits an extent at given block. 2785 * ext4_split_extent_at() splits an extent at given block.
2721 * 2786 *
2722 * @handle: the journal handle 2787 * @handle: the journal handle
@@ -2813,7 +2878,7 @@ static int ext4_split_extent_at(handle_t *handle,
2813 if (err) 2878 if (err)
2814 goto fix_extent_len; 2879 goto fix_extent_len;
2815 /* update the extent length and mark as initialized */ 2880 /* update the extent length and mark as initialized */
2816 ex->ee_len = cpu_to_le32(ee_len); 2881 ex->ee_len = cpu_to_le16(ee_len);
2817 ext4_ext_try_to_merge(inode, path, ex); 2882 ext4_ext_try_to_merge(inode, path, ex);
2818 err = ext4_ext_dirty(handle, inode, path + depth); 2883 err = ext4_ext_dirty(handle, inode, path + depth);
2819 goto out; 2884 goto out;
@@ -3224,11 +3289,13 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
3224 depth = ext_depth(inode); 3289 depth = ext_depth(inode);
3225 eh = path[depth].p_hdr; 3290 eh = path[depth].p_hdr;
3226 3291
3227 if (unlikely(!eh->eh_entries)) { 3292 /*
3228 EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " 3293 * We're going to remove EOFBLOCKS_FL entirely in future so we
3229 "EOFBLOCKS_FL set"); 3294 * do not care for this case anymore. Simply remove the flag
3230 return -EIO; 3295 * if there are no extents.
3231 } 3296 */
3297 if (unlikely(!eh->eh_entries))
3298 goto out;
3232 last_ex = EXT_LAST_EXTENT(eh); 3299 last_ex = EXT_LAST_EXTENT(eh);
3233 /* 3300 /*
3234 * We should clear the EOFBLOCKS_FL flag if we are writing the 3301 * We should clear the EOFBLOCKS_FL flag if we are writing the
@@ -3252,6 +3319,7 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
3252 for (i = depth-1; i >= 0; i--) 3319 for (i = depth-1; i >= 0; i--)
3253 if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) 3320 if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
3254 return 0; 3321 return 0;
3322out:
3255 ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); 3323 ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
3256 return ext4_mark_inode_dirty(handle, inode); 3324 return ext4_mark_inode_dirty(handle, inode);
3257} 3325}
@@ -3710,8 +3778,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3710 int free_on_err = 0, err = 0, depth, ret; 3778 int free_on_err = 0, err = 0, depth, ret;
3711 unsigned int allocated = 0, offset = 0; 3779 unsigned int allocated = 0, offset = 0;
3712 unsigned int allocated_clusters = 0; 3780 unsigned int allocated_clusters = 0;
3713 unsigned int punched_out = 0;
3714 unsigned int result = 0;
3715 struct ext4_allocation_request ar; 3781 struct ext4_allocation_request ar;
3716 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3782 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
3717 ext4_lblk_t cluster_offset; 3783 ext4_lblk_t cluster_offset;
@@ -3721,8 +3787,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3721 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); 3787 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
3722 3788
3723 /* check in cache */ 3789 /* check in cache */
3724 if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) && 3790 if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
3725 ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
3726 if (!newex.ee_start_lo && !newex.ee_start_hi) { 3791 if (!newex.ee_start_lo && !newex.ee_start_hi) {
3727 if ((sbi->s_cluster_ratio > 1) && 3792 if ((sbi->s_cluster_ratio > 1) &&
3728 ext4_find_delalloc_cluster(inode, map->m_lblk, 0)) 3793 ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
@@ -3790,113 +3855,25 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3790 3855
3791 /* if found extent covers block, simply return it */ 3856 /* if found extent covers block, simply return it */
3792 if (in_range(map->m_lblk, ee_block, ee_len)) { 3857 if (in_range(map->m_lblk, ee_block, ee_len)) {
3793 struct ext4_map_blocks punch_map;
3794 ext4_fsblk_t partial_cluster = 0;
3795
3796 newblock = map->m_lblk - ee_block + ee_start; 3858 newblock = map->m_lblk - ee_block + ee_start;
3797 /* number of remaining blocks in the extent */ 3859 /* number of remaining blocks in the extent */
3798 allocated = ee_len - (map->m_lblk - ee_block); 3860 allocated = ee_len - (map->m_lblk - ee_block);
3799 ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, 3861 ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk,
3800 ee_block, ee_len, newblock); 3862 ee_block, ee_len, newblock);
3801 3863
3802 if ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0) {
3803 /*
3804 * Do not put uninitialized extent
3805 * in the cache
3806 */
3807 if (!ext4_ext_is_uninitialized(ex)) {
3808 ext4_ext_put_in_cache(inode, ee_block,
3809 ee_len, ee_start);
3810 goto out;
3811 }
3812 ret = ext4_ext_handle_uninitialized_extents(
3813 handle, inode, map, path, flags,
3814 allocated, newblock);
3815 return ret;
3816 }
3817
3818 /*
3819 * Punch out the map length, but only to the
3820 * end of the extent
3821 */
3822 punched_out = allocated < map->m_len ?
3823 allocated : map->m_len;
3824
3825 /* 3864 /*
3826 * Sense extents need to be converted to 3865 * Do not put uninitialized extent
3827 * uninitialized, they must fit in an 3866 * in the cache
3828 * uninitialized extent
3829 */ 3867 */
3830 if (punched_out > EXT_UNINIT_MAX_LEN) 3868 if (!ext4_ext_is_uninitialized(ex)) {
3831 punched_out = EXT_UNINIT_MAX_LEN; 3869 ext4_ext_put_in_cache(inode, ee_block,
3832 3870 ee_len, ee_start);
3833 punch_map.m_lblk = map->m_lblk; 3871 goto out;
3834 punch_map.m_pblk = newblock;
3835 punch_map.m_len = punched_out;
3836 punch_map.m_flags = 0;
3837
3838 /* Check to see if the extent needs to be split */
3839 if (punch_map.m_len != ee_len ||
3840 punch_map.m_lblk != ee_block) {
3841
3842 ret = ext4_split_extent(handle, inode,
3843 path, &punch_map, 0,
3844 EXT4_GET_BLOCKS_PUNCH_OUT_EXT |
3845 EXT4_GET_BLOCKS_PRE_IO);
3846
3847 if (ret < 0) {
3848 err = ret;
3849 goto out2;
3850 }
3851 /*
3852 * find extent for the block at
3853 * the start of the hole
3854 */
3855 ext4_ext_drop_refs(path);
3856 kfree(path);
3857
3858 path = ext4_ext_find_extent(inode,
3859 map->m_lblk, NULL);
3860 if (IS_ERR(path)) {
3861 err = PTR_ERR(path);
3862 path = NULL;
3863 goto out2;
3864 }
3865
3866 depth = ext_depth(inode);
3867 ex = path[depth].p_ext;
3868 ee_len = ext4_ext_get_actual_len(ex);
3869 ee_block = le32_to_cpu(ex->ee_block);
3870 ee_start = ext4_ext_pblock(ex);
3871
3872 }
3873
3874 ext4_ext_mark_uninitialized(ex);
3875
3876 ext4_ext_invalidate_cache(inode);
3877
3878 err = ext4_ext_rm_leaf(handle, inode, path,
3879 &partial_cluster, map->m_lblk,
3880 map->m_lblk + punched_out);
3881
3882 if (!err && path->p_hdr->eh_entries == 0) {
3883 /*
3884 * Punch hole freed all of this sub tree,
3885 * so we need to correct eh_depth
3886 */
3887 err = ext4_ext_get_access(handle, inode, path);
3888 if (err == 0) {
3889 ext_inode_hdr(inode)->eh_depth = 0;
3890 ext_inode_hdr(inode)->eh_max =
3891 cpu_to_le16(ext4_ext_space_root(
3892 inode, 0));
3893
3894 err = ext4_ext_dirty(
3895 handle, inode, path);
3896 }
3897 } 3872 }
3898 3873 ret = ext4_ext_handle_uninitialized_extents(
3899 goto out2; 3874 handle, inode, map, path, flags,
3875 allocated, newblock);
3876 return ret;
3900 } 3877 }
3901 } 3878 }
3902 3879
@@ -4165,13 +4142,11 @@ out2:
4165 ext4_ext_drop_refs(path); 4142 ext4_ext_drop_refs(path);
4166 kfree(path); 4143 kfree(path);
4167 } 4144 }
4168 result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ?
4169 punched_out : allocated;
4170 4145
4171 trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, 4146 trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
4172 newblock, map->m_len, err ? err : result); 4147 newblock, map->m_len, err ? err : allocated);
4173 4148
4174 return err ? err : result; 4149 return err ? err : allocated;
4175} 4150}
4176 4151
4177void ext4_ext_truncate(struct inode *inode) 4152void ext4_ext_truncate(struct inode *inode)
@@ -4228,7 +4203,7 @@ void ext4_ext_truncate(struct inode *inode)
4228 4203
4229 last_block = (inode->i_size + sb->s_blocksize - 1) 4204 last_block = (inode->i_size + sb->s_blocksize - 1)
4230 >> EXT4_BLOCK_SIZE_BITS(sb); 4205 >> EXT4_BLOCK_SIZE_BITS(sb);
4231 err = ext4_ext_remove_space(inode, last_block); 4206 err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
4232 4207
4233 /* In a multi-transaction truncate, we only make the final 4208 /* In a multi-transaction truncate, we only make the final
4234 * transaction synchronous. 4209 * transaction synchronous.
@@ -4436,10 +4411,11 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
4436 EXT4_GET_BLOCKS_IO_CONVERT_EXT); 4411 EXT4_GET_BLOCKS_IO_CONVERT_EXT);
4437 if (ret <= 0) { 4412 if (ret <= 0) {
4438 WARN_ON(ret <= 0); 4413 WARN_ON(ret <= 0);
4439 printk(KERN_ERR "%s: ext4_ext_map_blocks " 4414 ext4_msg(inode->i_sb, KERN_ERR,
4440 "returned error inode#%lu, block=%u, " 4415 "%s:%d: inode #%lu: block %u: len %u: "
4441 "max_blocks=%u", __func__, 4416 "ext4_ext_map_blocks returned %d",
4442 inode->i_ino, map.m_lblk, map.m_len); 4417 __func__, __LINE__, inode->i_ino, map.m_lblk,
4418 map.m_len, ret);
4443 } 4419 }
4444 ext4_mark_inode_dirty(handle, inode); 4420 ext4_mark_inode_dirty(handle, inode);
4445 ret2 = ext4_journal_stop(handle); 4421 ret2 = ext4_journal_stop(handle);
@@ -4705,14 +4681,12 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4705{ 4681{
4706 struct inode *inode = file->f_path.dentry->d_inode; 4682 struct inode *inode = file->f_path.dentry->d_inode;
4707 struct super_block *sb = inode->i_sb; 4683 struct super_block *sb = inode->i_sb;
4708 struct ext4_ext_cache cache_ex; 4684 ext4_lblk_t first_block, stop_block;
4709 ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks;
4710 struct address_space *mapping = inode->i_mapping; 4685 struct address_space *mapping = inode->i_mapping;
4711 struct ext4_map_blocks map;
4712 handle_t *handle; 4686 handle_t *handle;
4713 loff_t first_page, last_page, page_len; 4687 loff_t first_page, last_page, page_len;
4714 loff_t first_page_offset, last_page_offset; 4688 loff_t first_page_offset, last_page_offset;
4715 int ret, credits, blocks_released, err = 0; 4689 int credits, err = 0;
4716 4690
4717 /* No need to punch hole beyond i_size */ 4691 /* No need to punch hole beyond i_size */
4718 if (offset >= inode->i_size) 4692 if (offset >= inode->i_size)
@@ -4728,10 +4702,6 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4728 offset; 4702 offset;
4729 } 4703 }
4730 4704
4731 first_block = (offset + sb->s_blocksize - 1) >>
4732 EXT4_BLOCK_SIZE_BITS(sb);
4733 last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
4734
4735 first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 4705 first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
4736 last_page = (offset + length) >> PAGE_CACHE_SHIFT; 4706 last_page = (offset + length) >> PAGE_CACHE_SHIFT;
4737 4707
@@ -4810,7 +4780,6 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4810 } 4780 }
4811 } 4781 }
4812 4782
4813
4814 /* 4783 /*
4815 * If i_size is contained in the last page, we need to 4784 * If i_size is contained in the last page, we need to
4816 * unmap and zero the partial page after i_size 4785 * unmap and zero the partial page after i_size
@@ -4830,73 +4799,22 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4830 } 4799 }
4831 } 4800 }
4832 4801
4802 first_block = (offset + sb->s_blocksize - 1) >>
4803 EXT4_BLOCK_SIZE_BITS(sb);
4804 stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
4805
4833 /* If there are no blocks to remove, return now */ 4806 /* If there are no blocks to remove, return now */
4834 if (first_block >= last_block) 4807 if (first_block >= stop_block)
4835 goto out; 4808 goto out;
4836 4809
4837 down_write(&EXT4_I(inode)->i_data_sem); 4810 down_write(&EXT4_I(inode)->i_data_sem);
4838 ext4_ext_invalidate_cache(inode); 4811 ext4_ext_invalidate_cache(inode);
4839 ext4_discard_preallocations(inode); 4812 ext4_discard_preallocations(inode);
4840 4813
4841 /* 4814 err = ext4_ext_remove_space(inode, first_block, stop_block - 1);
4842 * Loop over all the blocks and identify blocks
4843 * that need to be punched out
4844 */
4845 iblock = first_block;
4846 blocks_released = 0;
4847 while (iblock < last_block) {
4848 max_blocks = last_block - iblock;
4849 num_blocks = 1;
4850 memset(&map, 0, sizeof(map));
4851 map.m_lblk = iblock;
4852 map.m_len = max_blocks;
4853 ret = ext4_ext_map_blocks(handle, inode, &map,
4854 EXT4_GET_BLOCKS_PUNCH_OUT_EXT);
4855
4856 if (ret > 0) {
4857 blocks_released += ret;
4858 num_blocks = ret;
4859 } else if (ret == 0) {
4860 /*
4861 * If map blocks could not find the block,
4862 * then it is in a hole. If the hole was
4863 * not already cached, then map blocks should
4864 * put it in the cache. So we can get the hole
4865 * out of the cache
4866 */
4867 memset(&cache_ex, 0, sizeof(cache_ex));
4868 if ((ext4_ext_check_cache(inode, iblock, &cache_ex)) &&
4869 !cache_ex.ec_start) {
4870
4871 /* The hole is cached */
4872 num_blocks = cache_ex.ec_block +
4873 cache_ex.ec_len - iblock;
4874
4875 } else {
4876 /* The block could not be identified */
4877 err = -EIO;
4878 break;
4879 }
4880 } else {
4881 /* Map blocks error */
4882 err = ret;
4883 break;
4884 }
4885
4886 if (num_blocks == 0) {
4887 /* This condition should never happen */
4888 ext_debug("Block lookup failed");
4889 err = -EIO;
4890 break;
4891 }
4892
4893 iblock += num_blocks;
4894 }
4895 4815
4896 if (blocks_released > 0) { 4816 ext4_ext_invalidate_cache(inode);
4897 ext4_ext_invalidate_cache(inode); 4817 ext4_discard_preallocations(inode);
4898 ext4_discard_preallocations(inode);
4899 }
4900 4818
4901 if (IS_SYNC(inode)) 4819 if (IS_SYNC(inode))
4902 ext4_handle_sync(handle); 4820 ext4_handle_sync(handle);
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 00a2cb753ef..bb6c7d81131 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -89,6 +89,7 @@ int ext4_flush_completed_IO(struct inode *inode)
89 io = list_entry(ei->i_completed_io_list.next, 89 io = list_entry(ei->i_completed_io_list.next,
90 ext4_io_end_t, list); 90 ext4_io_end_t, list);
91 list_del_init(&io->list); 91 list_del_init(&io->list);
92 io->flag |= EXT4_IO_END_IN_FSYNC;
92 /* 93 /*
93 * Calling ext4_end_io_nolock() to convert completed 94 * Calling ext4_end_io_nolock() to convert completed
94 * IO to written. 95 * IO to written.
@@ -108,6 +109,7 @@ int ext4_flush_completed_IO(struct inode *inode)
108 if (ret < 0) 109 if (ret < 0)
109 ret2 = ret; 110 ret2 = ret;
110 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 111 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
112 io->flag &= ~EXT4_IO_END_IN_FSYNC;
111 } 113 }
112 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 114 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
113 return (ret2 < 0) ? ret2 : 0; 115 return (ret2 < 0) ? ret2 : 0;
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index ac8f168c8ab..fa8e4911d35 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -200,8 +200,8 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
200 return -1; 200 return -1;
201 } 201 }
202 hash = hash & ~1; 202 hash = hash & ~1;
203 if (hash == (EXT4_HTREE_EOF << 1)) 203 if (hash == (EXT4_HTREE_EOF_32BIT << 1))
204 hash = (EXT4_HTREE_EOF-1) << 1; 204 hash = (EXT4_HTREE_EOF_32BIT - 1) << 1;
205 hinfo->hash = hash; 205 hinfo->hash = hash;
206 hinfo->minor_hash = minor_hash; 206 hinfo->minor_hash = minor_hash;
207 return 0; 207 return 0;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 25d8c9781ad..409c2ee7750 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -92,6 +92,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
92 return EXT4_INODES_PER_GROUP(sb); 92 return EXT4_INODES_PER_GROUP(sb);
93} 93}
94 94
95void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate)
96{
97 if (uptodate) {
98 set_buffer_uptodate(bh);
99 set_bitmap_uptodate(bh);
100 }
101 unlock_buffer(bh);
102 put_bh(bh);
103}
104
95/* 105/*
96 * Read the inode allocation bitmap for a given block_group, reading 106 * Read the inode allocation bitmap for a given block_group, reading
97 * into the specified slot in the superblock's bitmap cache. 107 * into the specified slot in the superblock's bitmap cache.
@@ -147,18 +157,18 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
147 return bh; 157 return bh;
148 } 158 }
149 /* 159 /*
150 * submit the buffer_head for read. We can 160 * submit the buffer_head for reading
151 * safely mark the bitmap as uptodate now.
152 * We do it here so the bitmap uptodate bit
153 * get set with buffer lock held.
154 */ 161 */
155 trace_ext4_load_inode_bitmap(sb, block_group); 162 trace_ext4_load_inode_bitmap(sb, block_group);
156 set_bitmap_uptodate(bh); 163 bh->b_end_io = ext4_end_bitmap_read;
157 if (bh_submit_read(bh) < 0) { 164 get_bh(bh);
165 submit_bh(READ, bh);
166 wait_on_buffer(bh);
167 if (!buffer_uptodate(bh)) {
158 put_bh(bh); 168 put_bh(bh);
159 ext4_error(sb, "Cannot read inode bitmap - " 169 ext4_error(sb, "Cannot read inode bitmap - "
160 "block_group = %u, inode_bitmap = %llu", 170 "block_group = %u, inode_bitmap = %llu",
161 block_group, bitmap_blk); 171 block_group, bitmap_blk);
162 return NULL; 172 return NULL;
163 } 173 }
164 return bh; 174 return bh;
@@ -194,19 +204,20 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
194 struct ext4_sb_info *sbi; 204 struct ext4_sb_info *sbi;
195 int fatal = 0, err, count, cleared; 205 int fatal = 0, err, count, cleared;
196 206
197 if (atomic_read(&inode->i_count) > 1) { 207 if (!sb) {
198 printk(KERN_ERR "ext4_free_inode: inode has count=%d\n", 208 printk(KERN_ERR "EXT4-fs: %s:%d: inode on "
199 atomic_read(&inode->i_count)); 209 "nonexistent device\n", __func__, __LINE__);
200 return; 210 return;
201 } 211 }
202 if (inode->i_nlink) { 212 if (atomic_read(&inode->i_count) > 1) {
203 printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n", 213 ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d",
204 inode->i_nlink); 214 __func__, __LINE__, inode->i_ino,
215 atomic_read(&inode->i_count));
205 return; 216 return;
206 } 217 }
207 if (!sb) { 218 if (inode->i_nlink) {
208 printk(KERN_ERR "ext4_free_inode: inode on " 219 ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: nlink=%d\n",
209 "nonexistent device\n"); 220 __func__, __LINE__, inode->i_ino, inode->i_nlink);
210 return; 221 return;
211 } 222 }
212 sbi = EXT4_SB(sb); 223 sbi = EXT4_SB(sb);
@@ -593,94 +604,6 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
593} 604}
594 605
595/* 606/*
596 * claim the inode from the inode bitmap. If the group
597 * is uninit we need to take the groups's ext4_group_lock
598 * and clear the uninit flag. The inode bitmap update
599 * and group desc uninit flag clear should be done
600 * after holding ext4_group_lock so that ext4_read_inode_bitmap
601 * doesn't race with the ext4_claim_inode
602 */
603static int ext4_claim_inode(struct super_block *sb,
604 struct buffer_head *inode_bitmap_bh,
605 unsigned long ino, ext4_group_t group, umode_t mode)
606{
607 int free = 0, retval = 0, count;
608 struct ext4_sb_info *sbi = EXT4_SB(sb);
609 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
610 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
611
612 /*
613 * We have to be sure that new inode allocation does not race with
614 * inode table initialization, because otherwise we may end up
615 * allocating and writing new inode right before sb_issue_zeroout
616 * takes place and overwriting our new inode with zeroes. So we
617 * take alloc_sem to prevent it.
618 */
619 down_read(&grp->alloc_sem);
620 ext4_lock_group(sb, group);
621 if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) {
622 /* not a free inode */
623 retval = 1;
624 goto err_ret;
625 }
626 ino++;
627 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
628 ino > EXT4_INODES_PER_GROUP(sb)) {
629 ext4_unlock_group(sb, group);
630 up_read(&grp->alloc_sem);
631 ext4_error(sb, "reserved inode or inode > inodes count - "
632 "block_group = %u, inode=%lu", group,
633 ino + group * EXT4_INODES_PER_GROUP(sb));
634 return 1;
635 }
636 /* If we didn't allocate from within the initialized part of the inode
637 * table then we need to initialize up to this inode. */
638 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
639
640 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
641 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
642 /* When marking the block group with
643 * ~EXT4_BG_INODE_UNINIT we don't want to depend
644 * on the value of bg_itable_unused even though
645 * mke2fs could have initialized the same for us.
646 * Instead we calculated the value below
647 */
648
649 free = 0;
650 } else {
651 free = EXT4_INODES_PER_GROUP(sb) -
652 ext4_itable_unused_count(sb, gdp);
653 }
654
655 /*
656 * Check the relative inode number against the last used
657 * relative inode number in this group. if it is greater
658 * we need to update the bg_itable_unused count
659 *
660 */
661 if (ino > free)
662 ext4_itable_unused_set(sb, gdp,
663 (EXT4_INODES_PER_GROUP(sb) - ino));
664 }
665 count = ext4_free_inodes_count(sb, gdp) - 1;
666 ext4_free_inodes_set(sb, gdp, count);
667 if (S_ISDIR(mode)) {
668 count = ext4_used_dirs_count(sb, gdp) + 1;
669 ext4_used_dirs_set(sb, gdp, count);
670 if (sbi->s_log_groups_per_flex) {
671 ext4_group_t f = ext4_flex_group(sbi, group);
672
673 atomic_inc(&sbi->s_flex_groups[f].used_dirs);
674 }
675 }
676 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
677err_ret:
678 ext4_unlock_group(sb, group);
679 up_read(&grp->alloc_sem);
680 return retval;
681}
682
683/*
684 * There are two policies for allocating an inode. If the new inode is 607 * There are two policies for allocating an inode. If the new inode is
685 * a directory, then a forward search is made for a block group with both 608 * a directory, then a forward search is made for a block group with both
686 * free space and a low directory-to-inode ratio; if that fails, then of 609 * free space and a low directory-to-inode ratio; if that fails, then of
@@ -741,6 +664,11 @@ got_group:
741 if (ret2 == -1) 664 if (ret2 == -1)
742 goto out; 665 goto out;
743 666
667 /*
668 * Normally we will only go through one pass of this loop,
669 * unless we get unlucky and it turns out the group we selected
670 * had its last inode grabbed by someone else.
671 */
744 for (i = 0; i < ngroups; i++, ino = 0) { 672 for (i = 0; i < ngroups; i++, ino = 0) {
745 err = -EIO; 673 err = -EIO;
746 674
@@ -757,51 +685,24 @@ repeat_in_this_group:
757 ino = ext4_find_next_zero_bit((unsigned long *) 685 ino = ext4_find_next_zero_bit((unsigned long *)
758 inode_bitmap_bh->b_data, 686 inode_bitmap_bh->b_data,
759 EXT4_INODES_PER_GROUP(sb), ino); 687 EXT4_INODES_PER_GROUP(sb), ino);
760 688 if (ino >= EXT4_INODES_PER_GROUP(sb)) {
761 if (ino < EXT4_INODES_PER_GROUP(sb)) { 689 if (++group == ngroups)
762 690 group = 0;
763 BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); 691 continue;
764 err = ext4_journal_get_write_access(handle,
765 inode_bitmap_bh);
766 if (err)
767 goto fail;
768
769 BUFFER_TRACE(group_desc_bh, "get_write_access");
770 err = ext4_journal_get_write_access(handle,
771 group_desc_bh);
772 if (err)
773 goto fail;
774 if (!ext4_claim_inode(sb, inode_bitmap_bh,
775 ino, group, mode)) {
776 /* we won it */
777 BUFFER_TRACE(inode_bitmap_bh,
778 "call ext4_handle_dirty_metadata");
779 err = ext4_handle_dirty_metadata(handle,
780 NULL,
781 inode_bitmap_bh);
782 if (err)
783 goto fail;
784 /* zero bit is inode number 1*/
785 ino++;
786 goto got;
787 }
788 /* we lost it */
789 ext4_handle_release_buffer(handle, inode_bitmap_bh);
790 ext4_handle_release_buffer(handle, group_desc_bh);
791
792 if (++ino < EXT4_INODES_PER_GROUP(sb))
793 goto repeat_in_this_group;
794 } 692 }
795 693 if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
796 /* 694 ext4_error(sb, "reserved inode found cleared - "
797 * This case is possible in concurrent environment. It is very 695 "inode=%lu", ino + 1);
798 * rare. We cannot repeat the find_group_xxx() call because 696 continue;
799 * that will simply return the same blockgroup, because the 697 }
800 * group descriptor metadata has not yet been updated. 698 ext4_lock_group(sb, group);
801 * So we just go onto the next blockgroup. 699 ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
802 */ 700 ext4_unlock_group(sb, group);
803 if (++group == ngroups) 701 ino++; /* the inode bitmap is zero-based */
804 group = 0; 702 if (!ret2)
703 goto got; /* we grabbed the inode! */
704 if (ino < EXT4_INODES_PER_GROUP(sb))
705 goto repeat_in_this_group;
805 } 706 }
806 err = -ENOSPC; 707 err = -ENOSPC;
807 goto out; 708 goto out;
@@ -838,6 +739,59 @@ got:
838 if (err) 739 if (err)
839 goto fail; 740 goto fail;
840 } 741 }
742
743 BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
744 err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
745 if (err)
746 goto fail;
747
748 BUFFER_TRACE(group_desc_bh, "get_write_access");
749 err = ext4_journal_get_write_access(handle, group_desc_bh);
750 if (err)
751 goto fail;
752
753 /* Update the relevant bg descriptor fields */
754 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
755 int free;
756 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
757
758 down_read(&grp->alloc_sem); /* protect vs itable lazyinit */
759 ext4_lock_group(sb, group); /* while we modify the bg desc */
760 free = EXT4_INODES_PER_GROUP(sb) -
761 ext4_itable_unused_count(sb, gdp);
762 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
763 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
764 free = 0;
765 }
766 /*
767 * Check the relative inode number against the last used
768 * relative inode number in this group. if it is greater
769 * we need to update the bg_itable_unused count
770 */
771 if (ino > free)
772 ext4_itable_unused_set(sb, gdp,
773 (EXT4_INODES_PER_GROUP(sb) - ino));
774 up_read(&grp->alloc_sem);
775 }
776 ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
777 if (S_ISDIR(mode)) {
778 ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1);
779 if (sbi->s_log_groups_per_flex) {
780 ext4_group_t f = ext4_flex_group(sbi, group);
781
782 atomic_inc(&sbi->s_flex_groups[f].used_dirs);
783 }
784 }
785 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
786 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
787 ext4_unlock_group(sb, group);
788 }
789
790 BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
791 err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
792 if (err)
793 goto fail;
794
841 BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); 795 BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
842 err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); 796 err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
843 if (err) 797 if (err)
@@ -1101,7 +1055,7 @@ unsigned long ext4_count_dirs(struct super_block * sb)
1101 * where it is called from on active part of filesystem is ext4lazyinit 1055 * where it is called from on active part of filesystem is ext4lazyinit
1102 * thread, so we do not need any special locks, however we have to prevent 1056 * thread, so we do not need any special locks, however we have to prevent
1103 * inode allocation from the current group, so we take alloc_sem lock, to 1057 * inode allocation from the current group, so we take alloc_sem lock, to
1104 * block ext4_claim_inode until we are finished. 1058 * block ext4_new_inode() until we are finished.
1105 */ 1059 */
1106int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, 1060int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
1107 int barrier) 1061 int barrier)
@@ -1149,9 +1103,9 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
1149 sbi->s_inodes_per_block); 1103 sbi->s_inodes_per_block);
1150 1104
1151 if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { 1105 if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
1152 ext4_error(sb, "Something is wrong with group %u\n" 1106 ext4_error(sb, "Something is wrong with group %u: "
1153 "Used itable blocks: %d" 1107 "used itable blocks: %d; "
1154 "itable unused count: %u\n", 1108 "itable unused count: %u",
1155 group, used_blks, 1109 group, used_blks,
1156 ext4_itable_unused_count(sb, gdp)); 1110 ext4_itable_unused_count(sb, gdp));
1157 ret = 1; 1111 ret = 1;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index feaa82fe629..c77b0bd2c71 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -272,7 +272,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
272 trace_ext4_da_update_reserve_space(inode, used, quota_claim); 272 trace_ext4_da_update_reserve_space(inode, used, quota_claim);
273 if (unlikely(used > ei->i_reserved_data_blocks)) { 273 if (unlikely(used > ei->i_reserved_data_blocks)) {
274 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " 274 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
275 "with only %d reserved data blocks\n", 275 "with only %d reserved data blocks",
276 __func__, inode->i_ino, used, 276 __func__, inode->i_ino, used,
277 ei->i_reserved_data_blocks); 277 ei->i_reserved_data_blocks);
278 WARN_ON(1); 278 WARN_ON(1);
@@ -1165,7 +1165,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
1165 */ 1165 */
1166 ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " 1166 ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
1167 "ino %lu, to_free %d with only %d reserved " 1167 "ino %lu, to_free %d with only %d reserved "
1168 "data blocks\n", inode->i_ino, to_free, 1168 "data blocks", inode->i_ino, to_free,
1169 ei->i_reserved_data_blocks); 1169 ei->i_reserved_data_blocks);
1170 WARN_ON(1); 1170 WARN_ON(1);
1171 to_free = ei->i_reserved_data_blocks; 1171 to_free = ei->i_reserved_data_blocks;
@@ -1428,20 +1428,22 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
1428static void ext4_print_free_blocks(struct inode *inode) 1428static void ext4_print_free_blocks(struct inode *inode)
1429{ 1429{
1430 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1430 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1431 printk(KERN_CRIT "Total free blocks count %lld\n", 1431 struct super_block *sb = inode->i_sb;
1432
1433 ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld",
1432 EXT4_C2B(EXT4_SB(inode->i_sb), 1434 EXT4_C2B(EXT4_SB(inode->i_sb),
1433 ext4_count_free_clusters(inode->i_sb))); 1435 ext4_count_free_clusters(inode->i_sb)));
1434 printk(KERN_CRIT "Free/Dirty block details\n"); 1436 ext4_msg(sb, KERN_CRIT, "Free/Dirty block details");
1435 printk(KERN_CRIT "free_blocks=%lld\n", 1437 ext4_msg(sb, KERN_CRIT, "free_blocks=%lld",
1436 (long long) EXT4_C2B(EXT4_SB(inode->i_sb), 1438 (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
1437 percpu_counter_sum(&sbi->s_freeclusters_counter))); 1439 percpu_counter_sum(&sbi->s_freeclusters_counter)));
1438 printk(KERN_CRIT "dirty_blocks=%lld\n", 1440 ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld",
1439 (long long) EXT4_C2B(EXT4_SB(inode->i_sb), 1441 (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
1440 percpu_counter_sum(&sbi->s_dirtyclusters_counter))); 1442 percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
1441 printk(KERN_CRIT "Block reservation details\n"); 1443 ext4_msg(sb, KERN_CRIT, "Block reservation details");
1442 printk(KERN_CRIT "i_reserved_data_blocks=%u\n", 1444 ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u",
1443 EXT4_I(inode)->i_reserved_data_blocks); 1445 EXT4_I(inode)->i_reserved_data_blocks);
1444 printk(KERN_CRIT "i_reserved_meta_blocks=%u\n", 1446 ext4_msg(sb, KERN_CRIT, "i_reserved_meta_blocks=%u",
1445 EXT4_I(inode)->i_reserved_meta_blocks); 1447 EXT4_I(inode)->i_reserved_meta_blocks);
1446 return; 1448 return;
1447} 1449}
@@ -2482,13 +2484,14 @@ static int ext4_da_write_end(struct file *file,
2482 int write_mode = (int)(unsigned long)fsdata; 2484 int write_mode = (int)(unsigned long)fsdata;
2483 2485
2484 if (write_mode == FALL_BACK_TO_NONDELALLOC) { 2486 if (write_mode == FALL_BACK_TO_NONDELALLOC) {
2485 if (ext4_should_order_data(inode)) { 2487 switch (ext4_inode_journal_mode(inode)) {
2488 case EXT4_INODE_ORDERED_DATA_MODE:
2486 return ext4_ordered_write_end(file, mapping, pos, 2489 return ext4_ordered_write_end(file, mapping, pos,
2487 len, copied, page, fsdata); 2490 len, copied, page, fsdata);
2488 } else if (ext4_should_writeback_data(inode)) { 2491 case EXT4_INODE_WRITEBACK_DATA_MODE:
2489 return ext4_writeback_write_end(file, mapping, pos, 2492 return ext4_writeback_write_end(file, mapping, pos,
2490 len, copied, page, fsdata); 2493 len, copied, page, fsdata);
2491 } else { 2494 default:
2492 BUG(); 2495 BUG();
2493 } 2496 }
2494 } 2497 }
@@ -2763,7 +2766,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
2763 goto out; 2766 goto out;
2764 2767
2765 ext_debug("ext4_end_io_dio(): io_end 0x%p " 2768 ext_debug("ext4_end_io_dio(): io_end 0x%p "
2766 "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", 2769 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
2767 iocb->private, io_end->inode->i_ino, iocb, offset, 2770 iocb->private, io_end->inode->i_ino, iocb, offset,
2768 size); 2771 size);
2769 2772
@@ -2795,9 +2798,6 @@ out:
2795 2798
2796 /* queue the work to convert unwritten extents to written */ 2799 /* queue the work to convert unwritten extents to written */
2797 queue_work(wq, &io_end->work); 2800 queue_work(wq, &io_end->work);
2798
2799 /* XXX: probably should move into the real I/O completion handler */
2800 inode_dio_done(inode);
2801} 2801}
2802 2802
2803static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) 2803static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
@@ -2811,8 +2811,9 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
2811 goto out; 2811 goto out;
2812 2812
2813 if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) { 2813 if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) {
2814 printk("sb umounted, discard end_io request for inode %lu\n", 2814 ext4_msg(io_end->inode->i_sb, KERN_INFO,
2815 io_end->inode->i_ino); 2815 "sb umounted, discard end_io request for inode %lu",
2816 io_end->inode->i_ino);
2816 ext4_free_io_end(io_end); 2817 ext4_free_io_end(io_end);
2817 goto out; 2818 goto out;
2818 } 2819 }
@@ -2921,9 +2922,12 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
2921 iocb->private = NULL; 2922 iocb->private = NULL;
2922 EXT4_I(inode)->cur_aio_dio = NULL; 2923 EXT4_I(inode)->cur_aio_dio = NULL;
2923 if (!is_sync_kiocb(iocb)) { 2924 if (!is_sync_kiocb(iocb)) {
2924 iocb->private = ext4_init_io_end(inode, GFP_NOFS); 2925 ext4_io_end_t *io_end =
2925 if (!iocb->private) 2926 ext4_init_io_end(inode, GFP_NOFS);
2927 if (!io_end)
2926 return -ENOMEM; 2928 return -ENOMEM;
2929 io_end->flag |= EXT4_IO_END_DIRECT;
2930 iocb->private = io_end;
2927 /* 2931 /*
2928 * we save the io structure for current async 2932 * we save the io structure for current async
2929 * direct IO, so that later ext4_map_blocks() 2933 * direct IO, so that later ext4_map_blocks()
@@ -2940,7 +2944,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
2940 ext4_get_block_write, 2944 ext4_get_block_write,
2941 ext4_end_io_dio, 2945 ext4_end_io_dio,
2942 NULL, 2946 NULL,
2943 DIO_LOCKING | DIO_SKIP_HOLES); 2947 DIO_LOCKING);
2944 if (iocb->private) 2948 if (iocb->private)
2945 EXT4_I(inode)->cur_aio_dio = NULL; 2949 EXT4_I(inode)->cur_aio_dio = NULL;
2946 /* 2950 /*
@@ -3086,18 +3090,25 @@ static const struct address_space_operations ext4_da_aops = {
3086 3090
3087void ext4_set_aops(struct inode *inode) 3091void ext4_set_aops(struct inode *inode)
3088{ 3092{
3089 if (ext4_should_order_data(inode) && 3093 switch (ext4_inode_journal_mode(inode)) {
3090 test_opt(inode->i_sb, DELALLOC)) 3094 case EXT4_INODE_ORDERED_DATA_MODE:
3091 inode->i_mapping->a_ops = &ext4_da_aops; 3095 if (test_opt(inode->i_sb, DELALLOC))
3092 else if (ext4_should_order_data(inode)) 3096 inode->i_mapping->a_ops = &ext4_da_aops;
3093 inode->i_mapping->a_ops = &ext4_ordered_aops; 3097 else
3094 else if (ext4_should_writeback_data(inode) && 3098 inode->i_mapping->a_ops = &ext4_ordered_aops;
3095 test_opt(inode->i_sb, DELALLOC)) 3099 break;
3096 inode->i_mapping->a_ops = &ext4_da_aops; 3100 case EXT4_INODE_WRITEBACK_DATA_MODE:
3097 else if (ext4_should_writeback_data(inode)) 3101 if (test_opt(inode->i_sb, DELALLOC))
3098 inode->i_mapping->a_ops = &ext4_writeback_aops; 3102 inode->i_mapping->a_ops = &ext4_da_aops;
3099 else 3103 else
3104 inode->i_mapping->a_ops = &ext4_writeback_aops;
3105 break;
3106 case EXT4_INODE_JOURNAL_DATA_MODE:
3100 inode->i_mapping->a_ops = &ext4_journalled_aops; 3107 inode->i_mapping->a_ops = &ext4_journalled_aops;
3108 break;
3109 default:
3110 BUG();
3111 }
3101} 3112}
3102 3113
3103 3114
@@ -3329,16 +3340,16 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
3329{ 3340{
3330 struct inode *inode = file->f_path.dentry->d_inode; 3341 struct inode *inode = file->f_path.dentry->d_inode;
3331 if (!S_ISREG(inode->i_mode)) 3342 if (!S_ISREG(inode->i_mode))
3332 return -ENOTSUPP; 3343 return -EOPNOTSUPP;
3333 3344
3334 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 3345 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
3335 /* TODO: Add support for non extent hole punching */ 3346 /* TODO: Add support for non extent hole punching */
3336 return -ENOTSUPP; 3347 return -EOPNOTSUPP;
3337 } 3348 }
3338 3349
3339 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) { 3350 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
3340 /* TODO: Add support for bigalloc file systems */ 3351 /* TODO: Add support for bigalloc file systems */
3341 return -ENOTSUPP; 3352 return -EOPNOTSUPP;
3342 } 3353 }
3343 3354
3344 return ext4_ext_punch_hole(file, offset, length); 3355 return ext4_ext_punch_hole(file, offset, length);
@@ -3924,10 +3935,8 @@ static int ext4_do_update_inode(handle_t *handle,
3924 ext4_update_dynamic_rev(sb); 3935 ext4_update_dynamic_rev(sb);
3925 EXT4_SET_RO_COMPAT_FEATURE(sb, 3936 EXT4_SET_RO_COMPAT_FEATURE(sb,
3926 EXT4_FEATURE_RO_COMPAT_LARGE_FILE); 3937 EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
3927 sb->s_dirt = 1;
3928 ext4_handle_sync(handle); 3938 ext4_handle_sync(handle);
3929 err = ext4_handle_dirty_metadata(handle, NULL, 3939 err = ext4_handle_dirty_super(handle, sb);
3930 EXT4_SB(sb)->s_sbh);
3931 } 3940 }
3932 } 3941 }
3933 raw_inode->i_generation = cpu_to_le32(inode->i_generation); 3942 raw_inode->i_generation = cpu_to_le32(inode->i_generation);
@@ -4152,11 +4161,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4152 } 4161 }
4153 4162
4154 if (attr->ia_valid & ATTR_SIZE) { 4163 if (attr->ia_valid & ATTR_SIZE) {
4155 if (attr->ia_size != i_size_read(inode)) { 4164 if (attr->ia_size != i_size_read(inode))
4156 truncate_setsize(inode, attr->ia_size); 4165 truncate_setsize(inode, attr->ia_size);
4157 ext4_truncate(inode); 4166 ext4_truncate(inode);
4158 } else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
4159 ext4_truncate(inode);
4160 } 4167 }
4161 4168
4162 if (!rc) { 4169 if (!rc) {
@@ -4314,7 +4321,7 @@ int ext4_mark_iloc_dirty(handle_t *handle,
4314{ 4321{
4315 int err = 0; 4322 int err = 0;
4316 4323
4317 if (test_opt(inode->i_sb, I_VERSION)) 4324 if (IS_I_VERSION(inode))
4318 inode_inc_iversion(inode); 4325 inode_inc_iversion(inode);
4319 4326
4320 /* the do_update_inode consumes one bh->b_count */ 4327 /* the do_update_inode consumes one bh->b_count */
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index cb990b21c69..99ab428bcfa 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -21,6 +21,7 @@
21 * mballoc.c contains the multiblocks allocation routines 21 * mballoc.c contains the multiblocks allocation routines
22 */ 22 */
23 23
24#include "ext4_jbd2.h"
24#include "mballoc.h" 25#include "mballoc.h"
25#include <linux/debugfs.h> 26#include <linux/debugfs.h>
26#include <linux/slab.h> 27#include <linux/slab.h>
@@ -339,7 +340,7 @@
339 */ 340 */
340static struct kmem_cache *ext4_pspace_cachep; 341static struct kmem_cache *ext4_pspace_cachep;
341static struct kmem_cache *ext4_ac_cachep; 342static struct kmem_cache *ext4_ac_cachep;
342static struct kmem_cache *ext4_free_ext_cachep; 343static struct kmem_cache *ext4_free_data_cachep;
343 344
344/* We create slab caches for groupinfo data structures based on the 345/* We create slab caches for groupinfo data structures based on the
345 * superblock block size. There will be one per mounted filesystem for 346 * superblock block size. There will be one per mounted filesystem for
@@ -357,7 +358,8 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
357 ext4_group_t group); 358 ext4_group_t group);
358static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 359static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
359 ext4_group_t group); 360 ext4_group_t group);
360static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); 361static void ext4_free_data_callback(struct super_block *sb,
362 struct ext4_journal_cb_entry *jce, int rc);
361 363
362static inline void *mb_correct_addr_and_bit(int *bit, void *addr) 364static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
363{ 365{
@@ -425,7 +427,7 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
425{ 427{
426 char *bb; 428 char *bb;
427 429
428 BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); 430 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
429 BUG_ON(max == NULL); 431 BUG_ON(max == NULL);
430 432
431 if (order > e4b->bd_blkbits + 1) { 433 if (order > e4b->bd_blkbits + 1) {
@@ -436,10 +438,10 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
436 /* at order 0 we see each particular block */ 438 /* at order 0 we see each particular block */
437 if (order == 0) { 439 if (order == 0) {
438 *max = 1 << (e4b->bd_blkbits + 3); 440 *max = 1 << (e4b->bd_blkbits + 3);
439 return EXT4_MB_BITMAP(e4b); 441 return e4b->bd_bitmap;
440 } 442 }
441 443
442 bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; 444 bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
443 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; 445 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
444 446
445 return bb; 447 return bb;
@@ -588,7 +590,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
588 for (j = 0; j < (1 << order); j++) { 590 for (j = 0; j < (1 << order); j++) {
589 k = (i * (1 << order)) + j; 591 k = (i * (1 << order)) + j;
590 MB_CHECK_ASSERT( 592 MB_CHECK_ASSERT(
591 !mb_test_bit(k, EXT4_MB_BITMAP(e4b))); 593 !mb_test_bit(k, e4b->bd_bitmap));
592 } 594 }
593 count++; 595 count++;
594 } 596 }
@@ -782,7 +784,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
782 int groups_per_page; 784 int groups_per_page;
783 int err = 0; 785 int err = 0;
784 int i; 786 int i;
785 ext4_group_t first_group; 787 ext4_group_t first_group, group;
786 int first_block; 788 int first_block;
787 struct super_block *sb; 789 struct super_block *sb;
788 struct buffer_head *bhs; 790 struct buffer_head *bhs;
@@ -806,24 +808,23 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
806 808
807 /* allocate buffer_heads to read bitmaps */ 809 /* allocate buffer_heads to read bitmaps */
808 if (groups_per_page > 1) { 810 if (groups_per_page > 1) {
809 err = -ENOMEM;
810 i = sizeof(struct buffer_head *) * groups_per_page; 811 i = sizeof(struct buffer_head *) * groups_per_page;
811 bh = kzalloc(i, GFP_NOFS); 812 bh = kzalloc(i, GFP_NOFS);
812 if (bh == NULL) 813 if (bh == NULL) {
814 err = -ENOMEM;
813 goto out; 815 goto out;
816 }
814 } else 817 } else
815 bh = &bhs; 818 bh = &bhs;
816 819
817 first_group = page->index * blocks_per_page / 2; 820 first_group = page->index * blocks_per_page / 2;
818 821
819 /* read all groups the page covers into the cache */ 822 /* read all groups the page covers into the cache */
820 for (i = 0; i < groups_per_page; i++) { 823 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
821 struct ext4_group_desc *desc; 824 if (group >= ngroups)
822
823 if (first_group + i >= ngroups)
824 break; 825 break;
825 826
826 grinfo = ext4_get_group_info(sb, first_group + i); 827 grinfo = ext4_get_group_info(sb, group);
827 /* 828 /*
828 * If page is uptodate then we came here after online resize 829 * If page is uptodate then we came here after online resize
829 * which added some new uninitialized group info structs, so 830 * which added some new uninitialized group info structs, so
@@ -834,69 +835,21 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
834 bh[i] = NULL; 835 bh[i] = NULL;
835 continue; 836 continue;
836 } 837 }
837 838 if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) {
838 err = -EIO; 839 err = -ENOMEM;
839 desc = ext4_get_group_desc(sb, first_group + i, NULL);
840 if (desc == NULL)
841 goto out;
842
843 err = -ENOMEM;
844 bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc));
845 if (bh[i] == NULL)
846 goto out; 840 goto out;
847
848 if (bitmap_uptodate(bh[i]))
849 continue;
850
851 lock_buffer(bh[i]);
852 if (bitmap_uptodate(bh[i])) {
853 unlock_buffer(bh[i]);
854 continue;
855 }
856 ext4_lock_group(sb, first_group + i);
857 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
858 ext4_init_block_bitmap(sb, bh[i],
859 first_group + i, desc);
860 set_bitmap_uptodate(bh[i]);
861 set_buffer_uptodate(bh[i]);
862 ext4_unlock_group(sb, first_group + i);
863 unlock_buffer(bh[i]);
864 continue;
865 } 841 }
866 ext4_unlock_group(sb, first_group + i); 842 mb_debug(1, "read bitmap for group %u\n", group);
867 if (buffer_uptodate(bh[i])) {
868 /*
869 * if not uninit if bh is uptodate,
870 * bitmap is also uptodate
871 */
872 set_bitmap_uptodate(bh[i]);
873 unlock_buffer(bh[i]);
874 continue;
875 }
876 get_bh(bh[i]);
877 /*
878 * submit the buffer_head for read. We can
879 * safely mark the bitmap as uptodate now.
880 * We do it here so the bitmap uptodate bit
881 * get set with buffer lock held.
882 */
883 set_bitmap_uptodate(bh[i]);
884 bh[i]->b_end_io = end_buffer_read_sync;
885 submit_bh(READ, bh[i]);
886 mb_debug(1, "read bitmap for group %u\n", first_group + i);
887 } 843 }
888 844
889 /* wait for I/O completion */ 845 /* wait for I/O completion */
890 for (i = 0; i < groups_per_page; i++) 846 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
891 if (bh[i]) 847 if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) {
892 wait_on_buffer(bh[i]); 848 err = -EIO;
893
894 err = -EIO;
895 for (i = 0; i < groups_per_page; i++)
896 if (bh[i] && !buffer_uptodate(bh[i]))
897 goto out; 849 goto out;
850 }
851 }
898 852
899 err = 0;
900 first_block = page->index * blocks_per_page; 853 first_block = page->index * blocks_per_page;
901 for (i = 0; i < blocks_per_page; i++) { 854 for (i = 0; i < blocks_per_page; i++) {
902 int group; 855 int group;
@@ -1250,10 +1203,10 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1250 int order = 1; 1203 int order = 1;
1251 void *bb; 1204 void *bb;
1252 1205
1253 BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); 1206 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
1254 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3))); 1207 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
1255 1208
1256 bb = EXT4_MB_BUDDY(e4b); 1209 bb = e4b->bd_buddy;
1257 while (order <= e4b->bd_blkbits + 1) { 1210 while (order <= e4b->bd_blkbits + 1) {
1258 block = block >> 1; 1211 block = block >> 1;
1259 if (!mb_test_bit(block, bb)) { 1212 if (!mb_test_bit(block, bb)) {
@@ -1323,9 +1276,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1323 1276
1324 /* let's maintain fragments counter */ 1277 /* let's maintain fragments counter */
1325 if (first != 0) 1278 if (first != 0)
1326 block = !mb_test_bit(first - 1, EXT4_MB_BITMAP(e4b)); 1279 block = !mb_test_bit(first - 1, e4b->bd_bitmap);
1327 if (first + count < EXT4_SB(sb)->s_mb_maxs[0]) 1280 if (first + count < EXT4_SB(sb)->s_mb_maxs[0])
1328 max = !mb_test_bit(first + count, EXT4_MB_BITMAP(e4b)); 1281 max = !mb_test_bit(first + count, e4b->bd_bitmap);
1329 if (block && max) 1282 if (block && max)
1330 e4b->bd_info->bb_fragments--; 1283 e4b->bd_info->bb_fragments--;
1331 else if (!block && !max) 1284 else if (!block && !max)
@@ -1336,7 +1289,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1336 block = first++; 1289 block = first++;
1337 order = 0; 1290 order = 0;
1338 1291
1339 if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) { 1292 if (!mb_test_bit(block, e4b->bd_bitmap)) {
1340 ext4_fsblk_t blocknr; 1293 ext4_fsblk_t blocknr;
1341 1294
1342 blocknr = ext4_group_first_block_no(sb, e4b->bd_group); 1295 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
@@ -1347,7 +1300,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1347 "freeing already freed block " 1300 "freeing already freed block "
1348 "(bit %u)", block); 1301 "(bit %u)", block);
1349 } 1302 }
1350 mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); 1303 mb_clear_bit(block, e4b->bd_bitmap);
1351 e4b->bd_info->bb_counters[order]++; 1304 e4b->bd_info->bb_counters[order]++;
1352 1305
1353 /* start of the buddy */ 1306 /* start of the buddy */
@@ -1429,7 +1382,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1429 break; 1382 break;
1430 1383
1431 next = (block + 1) * (1 << order); 1384 next = (block + 1) * (1 << order);
1432 if (mb_test_bit(next, EXT4_MB_BITMAP(e4b))) 1385 if (mb_test_bit(next, e4b->bd_bitmap))
1433 break; 1386 break;
1434 1387
1435 order = mb_find_order_for_block(e4b, next); 1388 order = mb_find_order_for_block(e4b, next);
@@ -1466,9 +1419,9 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1466 1419
1467 /* let's maintain fragments counter */ 1420 /* let's maintain fragments counter */
1468 if (start != 0) 1421 if (start != 0)
1469 mlen = !mb_test_bit(start - 1, EXT4_MB_BITMAP(e4b)); 1422 mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
1470 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0]) 1423 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
1471 max = !mb_test_bit(start + len, EXT4_MB_BITMAP(e4b)); 1424 max = !mb_test_bit(start + len, e4b->bd_bitmap);
1472 if (mlen && max) 1425 if (mlen && max)
1473 e4b->bd_info->bb_fragments++; 1426 e4b->bd_info->bb_fragments++;
1474 else if (!mlen && !max) 1427 else if (!mlen && !max)
@@ -1511,7 +1464,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1511 } 1464 }
1512 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); 1465 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
1513 1466
1514 ext4_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); 1467 ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
1515 mb_check_buddy(e4b); 1468 mb_check_buddy(e4b);
1516 1469
1517 return ret; 1470 return ret;
@@ -1810,7 +1763,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1810 struct ext4_buddy *e4b) 1763 struct ext4_buddy *e4b)
1811{ 1764{
1812 struct super_block *sb = ac->ac_sb; 1765 struct super_block *sb = ac->ac_sb;
1813 void *bitmap = EXT4_MB_BITMAP(e4b); 1766 void *bitmap = e4b->bd_bitmap;
1814 struct ext4_free_extent ex; 1767 struct ext4_free_extent ex;
1815 int i; 1768 int i;
1816 int free; 1769 int free;
@@ -1870,7 +1823,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1870{ 1823{
1871 struct super_block *sb = ac->ac_sb; 1824 struct super_block *sb = ac->ac_sb;
1872 struct ext4_sb_info *sbi = EXT4_SB(sb); 1825 struct ext4_sb_info *sbi = EXT4_SB(sb);
1873 void *bitmap = EXT4_MB_BITMAP(e4b); 1826 void *bitmap = e4b->bd_bitmap;
1874 struct ext4_free_extent ex; 1827 struct ext4_free_extent ex;
1875 ext4_fsblk_t first_group_block; 1828 ext4_fsblk_t first_group_block;
1876 ext4_fsblk_t a; 1829 ext4_fsblk_t a;
@@ -2224,7 +2177,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2224 EXT4_DESC_PER_BLOCK_BITS(sb); 2177 EXT4_DESC_PER_BLOCK_BITS(sb);
2225 meta_group_info = kmalloc(metalen, GFP_KERNEL); 2178 meta_group_info = kmalloc(metalen, GFP_KERNEL);
2226 if (meta_group_info == NULL) { 2179 if (meta_group_info == NULL) {
2227 ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate mem " 2180 ext4_msg(sb, KERN_ERR, "can't allocate mem "
2228 "for a buddy group"); 2181 "for a buddy group");
2229 goto exit_meta_group_info; 2182 goto exit_meta_group_info;
2230 } 2183 }
@@ -2238,7 +2191,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2238 2191
2239 meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); 2192 meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
2240 if (meta_group_info[i] == NULL) { 2193 if (meta_group_info[i] == NULL) {
2241 ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate buddy mem"); 2194 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
2242 goto exit_group_info; 2195 goto exit_group_info;
2243 } 2196 }
2244 memset(meta_group_info[i], 0, kmem_cache_size(cachep)); 2197 memset(meta_group_info[i], 0, kmem_cache_size(cachep));
@@ -2522,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2522 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, 2475 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2523 &ext4_mb_seq_groups_fops, sb); 2476 &ext4_mb_seq_groups_fops, sb);
2524 2477
2525 if (sbi->s_journal)
2526 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2527
2528 return 0; 2478 return 0;
2529 2479
2530out_free_locality_groups: 2480out_free_locality_groups:
@@ -2637,58 +2587,55 @@ static inline int ext4_issue_discard(struct super_block *sb,
2637 * This function is called by the jbd2 layer once the commit has finished, 2587 * This function is called by the jbd2 layer once the commit has finished,
2638 * so we know we can free the blocks that were released with that commit. 2588 * so we know we can free the blocks that were released with that commit.
2639 */ 2589 */
2640static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) 2590static void ext4_free_data_callback(struct super_block *sb,
2591 struct ext4_journal_cb_entry *jce,
2592 int rc)
2641{ 2593{
2642 struct super_block *sb = journal->j_private; 2594 struct ext4_free_data *entry = (struct ext4_free_data *)jce;
2643 struct ext4_buddy e4b; 2595 struct ext4_buddy e4b;
2644 struct ext4_group_info *db; 2596 struct ext4_group_info *db;
2645 int err, count = 0, count2 = 0; 2597 int err, count = 0, count2 = 0;
2646 struct ext4_free_data *entry;
2647 struct list_head *l, *ltmp;
2648 2598
2649 list_for_each_safe(l, ltmp, &txn->t_private_list) { 2599 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2650 entry = list_entry(l, struct ext4_free_data, list); 2600 entry->efd_count, entry->efd_group, entry);
2651 2601
2652 mb_debug(1, "gonna free %u blocks in group %u (0x%p):", 2602 if (test_opt(sb, DISCARD))
2653 entry->count, entry->group, entry); 2603 ext4_issue_discard(sb, entry->efd_group,
2604 entry->efd_start_cluster, entry->efd_count);
2654 2605
2655 if (test_opt(sb, DISCARD)) 2606 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
2656 ext4_issue_discard(sb, entry->group, 2607 /* we expect to find existing buddy because it's pinned */
2657 entry->start_cluster, entry->count); 2608 BUG_ON(err != 0);
2658 2609
2659 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2660 /* we expect to find existing buddy because it's pinned */
2661 BUG_ON(err != 0);
2662 2610
2663 db = e4b.bd_info; 2611 db = e4b.bd_info;
2664 /* there are blocks to put in buddy to make them really free */ 2612 /* there are blocks to put in buddy to make them really free */
2665 count += entry->count; 2613 count += entry->efd_count;
2666 count2++; 2614 count2++;
2667 ext4_lock_group(sb, entry->group); 2615 ext4_lock_group(sb, entry->efd_group);
2668 /* Take it out of per group rb tree */ 2616 /* Take it out of per group rb tree */
2669 rb_erase(&entry->node, &(db->bb_free_root)); 2617 rb_erase(&entry->efd_node, &(db->bb_free_root));
2670 mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count); 2618 mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
2671 2619
2672 /* 2620 /*
2673 * Clear the trimmed flag for the group so that the next 2621 * Clear the trimmed flag for the group so that the next
2674 * ext4_trim_fs can trim it. 2622 * ext4_trim_fs can trim it.
2675 * If the volume is mounted with -o discard, online discard 2623 * If the volume is mounted with -o discard, online discard
2676 * is supported and the free blocks will be trimmed online. 2624 * is supported and the free blocks will be trimmed online.
2677 */ 2625 */
2678 if (!test_opt(sb, DISCARD)) 2626 if (!test_opt(sb, DISCARD))
2679 EXT4_MB_GRP_CLEAR_TRIMMED(db); 2627 EXT4_MB_GRP_CLEAR_TRIMMED(db);
2680 2628
2681 if (!db->bb_free_root.rb_node) { 2629 if (!db->bb_free_root.rb_node) {
2682 /* No more items in the per group rb tree 2630 /* No more items in the per group rb tree
2683 * balance refcounts from ext4_mb_free_metadata() 2631 * balance refcounts from ext4_mb_free_metadata()
2684 */ 2632 */
2685 page_cache_release(e4b.bd_buddy_page); 2633 page_cache_release(e4b.bd_buddy_page);
2686 page_cache_release(e4b.bd_bitmap_page); 2634 page_cache_release(e4b.bd_bitmap_page);
2687 }
2688 ext4_unlock_group(sb, entry->group);
2689 kmem_cache_free(ext4_free_ext_cachep, entry);
2690 ext4_mb_unload_buddy(&e4b);
2691 } 2635 }
2636 ext4_unlock_group(sb, entry->efd_group);
2637 kmem_cache_free(ext4_free_data_cachep, entry);
2638 ext4_mb_unload_buddy(&e4b);
2692 2639
2693 mb_debug(1, "freed %u blocks in %u structures\n", count, count2); 2640 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2694} 2641}
@@ -2741,9 +2688,9 @@ int __init ext4_init_mballoc(void)
2741 return -ENOMEM; 2688 return -ENOMEM;
2742 } 2689 }
2743 2690
2744 ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data, 2691 ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
2745 SLAB_RECLAIM_ACCOUNT); 2692 SLAB_RECLAIM_ACCOUNT);
2746 if (ext4_free_ext_cachep == NULL) { 2693 if (ext4_free_data_cachep == NULL) {
2747 kmem_cache_destroy(ext4_pspace_cachep); 2694 kmem_cache_destroy(ext4_pspace_cachep);
2748 kmem_cache_destroy(ext4_ac_cachep); 2695 kmem_cache_destroy(ext4_ac_cachep);
2749 return -ENOMEM; 2696 return -ENOMEM;
@@ -2761,7 +2708,7 @@ void ext4_exit_mballoc(void)
2761 rcu_barrier(); 2708 rcu_barrier();
2762 kmem_cache_destroy(ext4_pspace_cachep); 2709 kmem_cache_destroy(ext4_pspace_cachep);
2763 kmem_cache_destroy(ext4_ac_cachep); 2710 kmem_cache_destroy(ext4_ac_cachep);
2764 kmem_cache_destroy(ext4_free_ext_cachep); 2711 kmem_cache_destroy(ext4_free_data_cachep);
2765 ext4_groupinfo_destroy_slabs(); 2712 ext4_groupinfo_destroy_slabs();
2766 ext4_remove_debugfs_entry(); 2713 ext4_remove_debugfs_entry();
2767} 2714}
@@ -2815,7 +2762,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2815 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len); 2762 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
2816 if (!ext4_data_block_valid(sbi, block, len)) { 2763 if (!ext4_data_block_valid(sbi, block, len)) {
2817 ext4_error(sb, "Allocating blocks %llu-%llu which overlap " 2764 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2818 "fs metadata\n", block, block+len); 2765 "fs metadata", block, block+len);
2819 /* File system mounted not to panic on error 2766 /* File system mounted not to panic on error
2820 * Fix the bitmap and repeat the block allocation 2767 * Fix the bitmap and repeat the block allocation
2821 * We leak some of the blocks here. 2768 * We leak some of the blocks here.
@@ -2911,7 +2858,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2911 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 2858 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2912 int bsbits, max; 2859 int bsbits, max;
2913 ext4_lblk_t end; 2860 ext4_lblk_t end;
2914 loff_t size, orig_size, start_off; 2861 loff_t size, start_off;
2862 loff_t orig_size __maybe_unused;
2915 ext4_lblk_t start; 2863 ext4_lblk_t start;
2916 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 2864 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
2917 struct ext4_prealloc_space *pa; 2865 struct ext4_prealloc_space *pa;
@@ -3321,8 +3269,8 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3321 n = rb_first(&(grp->bb_free_root)); 3269 n = rb_first(&(grp->bb_free_root));
3322 3270
3323 while (n) { 3271 while (n) {
3324 entry = rb_entry(n, struct ext4_free_data, node); 3272 entry = rb_entry(n, struct ext4_free_data, efd_node);
3325 ext4_set_bits(bitmap, entry->start_cluster, entry->count); 3273 ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
3326 n = rb_next(n); 3274 n = rb_next(n);
3327 } 3275 }
3328 return; 3276 return;
@@ -3916,11 +3864,11 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3916 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) 3864 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
3917 return; 3865 return;
3918 3866
3919 ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: Can't allocate:" 3867 ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:"
3920 " Allocation context details:"); 3868 " Allocation context details:");
3921 ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: status %d flags %d", 3869 ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d",
3922 ac->ac_status, ac->ac_flags); 3870 ac->ac_status, ac->ac_flags);
3923 ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: orig %lu/%lu/%lu@%lu, " 3871 ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, "
3924 "goal %lu/%lu/%lu@%lu, " 3872 "goal %lu/%lu/%lu@%lu, "
3925 "best %lu/%lu/%lu@%lu cr %d", 3873 "best %lu/%lu/%lu@%lu cr %d",
3926 (unsigned long)ac->ac_o_ex.fe_group, 3874 (unsigned long)ac->ac_o_ex.fe_group,
@@ -3936,9 +3884,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3936 (unsigned long)ac->ac_b_ex.fe_len, 3884 (unsigned long)ac->ac_b_ex.fe_len,
3937 (unsigned long)ac->ac_b_ex.fe_logical, 3885 (unsigned long)ac->ac_b_ex.fe_logical,
3938 (int)ac->ac_criteria); 3886 (int)ac->ac_criteria);
3939 ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: %lu scanned, %d found", 3887 ext4_msg(ac->ac_sb, KERN_ERR, "%lu scanned, %d found",
3940 ac->ac_ex_scanned, ac->ac_found); 3888 ac->ac_ex_scanned, ac->ac_found);
3941 ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: groups: "); 3889 ext4_msg(ac->ac_sb, KERN_ERR, "groups: ");
3942 ngroups = ext4_get_groups_count(sb); 3890 ngroups = ext4_get_groups_count(sb);
3943 for (i = 0; i < ngroups; i++) { 3891 for (i = 0; i < ngroups; i++) {
3944 struct ext4_group_info *grp = ext4_get_group_info(sb, i); 3892 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
@@ -4428,9 +4376,9 @@ out:
4428static int can_merge(struct ext4_free_data *entry1, 4376static int can_merge(struct ext4_free_data *entry1,
4429 struct ext4_free_data *entry2) 4377 struct ext4_free_data *entry2)
4430{ 4378{
4431 if ((entry1->t_tid == entry2->t_tid) && 4379 if ((entry1->efd_tid == entry2->efd_tid) &&
4432 (entry1->group == entry2->group) && 4380 (entry1->efd_group == entry2->efd_group) &&
4433 ((entry1->start_cluster + entry1->count) == entry2->start_cluster)) 4381 ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster))
4434 return 1; 4382 return 1;
4435 return 0; 4383 return 0;
4436} 4384}
@@ -4452,8 +4400,8 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4452 BUG_ON(e4b->bd_bitmap_page == NULL); 4400 BUG_ON(e4b->bd_bitmap_page == NULL);
4453 BUG_ON(e4b->bd_buddy_page == NULL); 4401 BUG_ON(e4b->bd_buddy_page == NULL);
4454 4402
4455 new_node = &new_entry->node; 4403 new_node = &new_entry->efd_node;
4456 cluster = new_entry->start_cluster; 4404 cluster = new_entry->efd_start_cluster;
4457 4405
4458 if (!*n) { 4406 if (!*n) {
4459 /* first free block exent. We need to 4407 /* first free block exent. We need to
@@ -4466,10 +4414,10 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4466 } 4414 }
4467 while (*n) { 4415 while (*n) {
4468 parent = *n; 4416 parent = *n;
4469 entry = rb_entry(parent, struct ext4_free_data, node); 4417 entry = rb_entry(parent, struct ext4_free_data, efd_node);
4470 if (cluster < entry->start_cluster) 4418 if (cluster < entry->efd_start_cluster)
4471 n = &(*n)->rb_left; 4419 n = &(*n)->rb_left;
4472 else if (cluster >= (entry->start_cluster + entry->count)) 4420 else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
4473 n = &(*n)->rb_right; 4421 n = &(*n)->rb_right;
4474 else { 4422 else {
4475 ext4_grp_locked_error(sb, group, 0, 4423 ext4_grp_locked_error(sb, group, 0,
@@ -4486,34 +4434,29 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4486 /* Now try to see the extent can be merged to left and right */ 4434 /* Now try to see the extent can be merged to left and right */
4487 node = rb_prev(new_node); 4435 node = rb_prev(new_node);
4488 if (node) { 4436 if (node) {
4489 entry = rb_entry(node, struct ext4_free_data, node); 4437 entry = rb_entry(node, struct ext4_free_data, efd_node);
4490 if (can_merge(entry, new_entry)) { 4438 if (can_merge(entry, new_entry)) {
4491 new_entry->start_cluster = entry->start_cluster; 4439 new_entry->efd_start_cluster = entry->efd_start_cluster;
4492 new_entry->count += entry->count; 4440 new_entry->efd_count += entry->efd_count;
4493 rb_erase(node, &(db->bb_free_root)); 4441 rb_erase(node, &(db->bb_free_root));
4494 spin_lock(&sbi->s_md_lock); 4442 ext4_journal_callback_del(handle, &entry->efd_jce);
4495 list_del(&entry->list); 4443 kmem_cache_free(ext4_free_data_cachep, entry);
4496 spin_unlock(&sbi->s_md_lock);
4497 kmem_cache_free(ext4_free_ext_cachep, entry);
4498 } 4444 }
4499 } 4445 }
4500 4446
4501 node = rb_next(new_node); 4447 node = rb_next(new_node);
4502 if (node) { 4448 if (node) {
4503 entry = rb_entry(node, struct ext4_free_data, node); 4449 entry = rb_entry(node, struct ext4_free_data, efd_node);
4504 if (can_merge(new_entry, entry)) { 4450 if (can_merge(new_entry, entry)) {
4505 new_entry->count += entry->count; 4451 new_entry->efd_count += entry->efd_count;
4506 rb_erase(node, &(db->bb_free_root)); 4452 rb_erase(node, &(db->bb_free_root));
4507 spin_lock(&sbi->s_md_lock); 4453 ext4_journal_callback_del(handle, &entry->efd_jce);
4508 list_del(&entry->list); 4454 kmem_cache_free(ext4_free_data_cachep, entry);
4509 spin_unlock(&sbi->s_md_lock);
4510 kmem_cache_free(ext4_free_ext_cachep, entry);
4511 } 4455 }
4512 } 4456 }
4513 /* Add the extent to transaction's private list */ 4457 /* Add the extent to transaction's private list */
4514 spin_lock(&sbi->s_md_lock); 4458 ext4_journal_callback_add(handle, ext4_free_data_callback,
4515 list_add(&new_entry->list, &handle->h_transaction->t_private_list); 4459 &new_entry->efd_jce);
4516 spin_unlock(&sbi->s_md_lock);
4517 return 0; 4460 return 0;
4518} 4461}
4519 4462
@@ -4691,15 +4634,15 @@ do_more:
4691 * blocks being freed are metadata. these blocks shouldn't 4634 * blocks being freed are metadata. these blocks shouldn't
4692 * be used until this transaction is committed 4635 * be used until this transaction is committed
4693 */ 4636 */
4694 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); 4637 new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
4695 if (!new_entry) { 4638 if (!new_entry) {
4696 err = -ENOMEM; 4639 err = -ENOMEM;
4697 goto error_return; 4640 goto error_return;
4698 } 4641 }
4699 new_entry->start_cluster = bit; 4642 new_entry->efd_start_cluster = bit;
4700 new_entry->group = block_group; 4643 new_entry->efd_group = block_group;
4701 new_entry->count = count_clusters; 4644 new_entry->efd_count = count_clusters;
4702 new_entry->t_tid = handle->h_transaction->t_tid; 4645 new_entry->efd_tid = handle->h_transaction->t_tid;
4703 4646
4704 ext4_lock_group(sb, block_group); 4647 ext4_lock_group(sb, block_group);
4705 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); 4648 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
@@ -4971,11 +4914,11 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
4971 start = (e4b.bd_info->bb_first_free > start) ? 4914 start = (e4b.bd_info->bb_first_free > start) ?
4972 e4b.bd_info->bb_first_free : start; 4915 e4b.bd_info->bb_first_free : start;
4973 4916
4974 while (start < max) { 4917 while (start <= max) {
4975 start = mb_find_next_zero_bit(bitmap, max, start); 4918 start = mb_find_next_zero_bit(bitmap, max + 1, start);
4976 if (start >= max) 4919 if (start > max)
4977 break; 4920 break;
4978 next = mb_find_next_bit(bitmap, max, start); 4921 next = mb_find_next_bit(bitmap, max + 1, start);
4979 4922
4980 if ((next - start) >= minblocks) { 4923 if ((next - start) >= minblocks) {
4981 ext4_trim_extent(sb, start, 4924 ext4_trim_extent(sb, start,
@@ -5027,37 +4970,36 @@ out:
5027int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) 4970int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
5028{ 4971{
5029 struct ext4_group_info *grp; 4972 struct ext4_group_info *grp;
5030 ext4_group_t first_group, last_group; 4973 ext4_group_t group, first_group, last_group;
5031 ext4_group_t group, ngroups = ext4_get_groups_count(sb);
5032 ext4_grpblk_t cnt = 0, first_cluster, last_cluster; 4974 ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
5033 uint64_t start, len, minlen, trimmed = 0; 4975 uint64_t start, end, minlen, trimmed = 0;
5034 ext4_fsblk_t first_data_blk = 4976 ext4_fsblk_t first_data_blk =
5035 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 4977 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
4978 ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
5036 int ret = 0; 4979 int ret = 0;
5037 4980
5038 start = range->start >> sb->s_blocksize_bits; 4981 start = range->start >> sb->s_blocksize_bits;
5039 len = range->len >> sb->s_blocksize_bits; 4982 end = start + (range->len >> sb->s_blocksize_bits) - 1;
5040 minlen = range->minlen >> sb->s_blocksize_bits; 4983 minlen = range->minlen >> sb->s_blocksize_bits;
5041 4984
5042 if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb))) 4985 if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) ||
4986 unlikely(start >= max_blks))
5043 return -EINVAL; 4987 return -EINVAL;
5044 if (start + len <= first_data_blk) 4988 if (end >= max_blks)
4989 end = max_blks - 1;
4990 if (end <= first_data_blk)
5045 goto out; 4991 goto out;
5046 if (start < first_data_blk) { 4992 if (start < first_data_blk)
5047 len -= first_data_blk - start;
5048 start = first_data_blk; 4993 start = first_data_blk;
5049 }
5050 4994
5051 /* Determine first and last group to examine based on start and len */ 4995 /* Determine first and last group to examine based on start and end */
5052 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, 4996 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
5053 &first_group, &first_cluster); 4997 &first_group, &first_cluster);
5054 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len), 4998 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
5055 &last_group, &last_cluster); 4999 &last_group, &last_cluster);
5056 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
5057 last_cluster = EXT4_CLUSTERS_PER_GROUP(sb);
5058 5000
5059 if (first_group > last_group) 5001 /* end now represents the last cluster to discard in this group */
5060 return -EINVAL; 5002 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
5061 5003
5062 for (group = first_group; group <= last_group; group++) { 5004 for (group = first_group; group <= last_group; group++) {
5063 grp = ext4_get_group_info(sb, group); 5005 grp = ext4_get_group_info(sb, group);
@@ -5069,31 +5011,35 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
5069 } 5011 }
5070 5012
5071 /* 5013 /*
5072 * For all the groups except the last one, last block will 5014 * For all the groups except the last one, last cluster will
5073 * always be EXT4_BLOCKS_PER_GROUP(sb), so we only need to 5015 * always be EXT4_CLUSTERS_PER_GROUP(sb)-1, so we only need to
5074 * change it for the last group in which case start + 5016 * change it for the last group, note that last_cluster is
5075 * len < EXT4_BLOCKS_PER_GROUP(sb). 5017 * already computed earlier by ext4_get_group_no_and_offset()
5076 */ 5018 */
5077 if (first_cluster + len < EXT4_CLUSTERS_PER_GROUP(sb)) 5019 if (group == last_group)
5078 last_cluster = first_cluster + len; 5020 end = last_cluster;
5079 len -= last_cluster - first_cluster;
5080 5021
5081 if (grp->bb_free >= minlen) { 5022 if (grp->bb_free >= minlen) {
5082 cnt = ext4_trim_all_free(sb, group, first_cluster, 5023 cnt = ext4_trim_all_free(sb, group, first_cluster,
5083 last_cluster, minlen); 5024 end, minlen);
5084 if (cnt < 0) { 5025 if (cnt < 0) {
5085 ret = cnt; 5026 ret = cnt;
5086 break; 5027 break;
5087 } 5028 }
5029 trimmed += cnt;
5088 } 5030 }
5089 trimmed += cnt; 5031
5032 /*
5033 * For every group except the first one, we are sure
5034 * that the first cluster to discard will be cluster #0.
5035 */
5090 first_cluster = 0; 5036 first_cluster = 0;
5091 } 5037 }
5092 range->len = trimmed * sb->s_blocksize;
5093 5038
5094 if (!ret) 5039 if (!ret)
5095 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); 5040 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
5096 5041
5097out: 5042out:
5043 range->len = trimmed * sb->s_blocksize;
5098 return ret; 5044 return ret;
5099} 5045}
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 47705f3285e..c070618c21c 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -96,21 +96,23 @@ extern u8 mb_enable_debug;
96 96
97 97
98struct ext4_free_data { 98struct ext4_free_data {
99 /* this links the free block information from group_info */ 99 /* MUST be the first member */
100 struct rb_node node; 100 struct ext4_journal_cb_entry efd_jce;
101
102 /* ext4_free_data private data starts from here */
101 103
102 /* this links the free block information from ext4_sb_info */ 104 /* this links the free block information from group_info */
103 struct list_head list; 105 struct rb_node efd_node;
104 106
105 /* group which free block extent belongs */ 107 /* group which free block extent belongs */
106 ext4_group_t group; 108 ext4_group_t efd_group;
107 109
108 /* free block extent */ 110 /* free block extent */
109 ext4_grpblk_t start_cluster; 111 ext4_grpblk_t efd_start_cluster;
110 ext4_grpblk_t count; 112 ext4_grpblk_t efd_count;
111 113
112 /* transaction which freed this extent */ 114 /* transaction which freed this extent */
113 tid_t t_tid; 115 tid_t efd_tid;
114}; 116};
115 117
116struct ext4_prealloc_space { 118struct ext4_prealloc_space {
@@ -210,8 +212,6 @@ struct ext4_buddy {
210 __u16 bd_blkbits; 212 __u16 bd_blkbits;
211 ext4_group_t bd_group; 213 ext4_group_t bd_group;
212}; 214};
213#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
214#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
215 215
216static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, 216static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
217 struct ext4_free_extent *fex) 217 struct ext4_free_extent *fex)
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index e7d6bb0acfa..f39f80f8f2c 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -471,7 +471,7 @@ int ext4_ext_migrate(struct inode *inode)
471 tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, 471 tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
472 S_IFREG, NULL, goal, owner); 472 S_IFREG, NULL, goal, owner);
473 if (IS_ERR(tmp_inode)) { 473 if (IS_ERR(tmp_inode)) {
474 retval = PTR_ERR(inode); 474 retval = PTR_ERR(tmp_inode);
475 ext4_journal_stop(handle); 475 ext4_journal_stop(handle);
476 return retval; 476 return retval;
477 } 477 }
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 7ea4ba4eff2..ed6548d8916 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -257,8 +257,8 @@ int ext4_multi_mount_protect(struct super_block *sb,
257 * If check_interval in MMP block is larger, use that instead of 257 * If check_interval in MMP block is larger, use that instead of
258 * update_interval from the superblock. 258 * update_interval from the superblock.
259 */ 259 */
260 if (mmp->mmp_check_interval > mmp_check_interval) 260 if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
261 mmp_check_interval = mmp->mmp_check_interval; 261 mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
262 262
263 seq = le32_to_cpu(mmp->mmp_seq); 263 seq = le32_to_cpu(mmp->mmp_seq);
264 if (seq == EXT4_MMP_SEQ_CLEAN) 264 if (seq == EXT4_MMP_SEQ_CLEAN)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 2043f482375..349d7b3671c 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -468,7 +468,7 @@ fail2:
468fail: 468fail:
469 if (*err == ERR_BAD_DX_DIR) 469 if (*err == ERR_BAD_DX_DIR)
470 ext4_warning(dir->i_sb, 470 ext4_warning(dir->i_sb,
471 "Corrupt dir inode %ld, running e2fsck is " 471 "Corrupt dir inode %lu, running e2fsck is "
472 "recommended.", dir->i_ino); 472 "recommended.", dir->i_ino);
473 return NULL; 473 return NULL;
474} 474}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 47585189651..dcdeef169a6 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -110,6 +110,8 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
110 if (io->iocb) 110 if (io->iocb)
111 aio_complete(io->iocb, io->result, 0); 111 aio_complete(io->iocb, io->result, 0);
112 112
113 if (io->flag & EXT4_IO_END_DIRECT)
114 inode_dio_done(inode);
113 /* Wake up anyone waiting on unwritten extent conversion */ 115 /* Wake up anyone waiting on unwritten extent conversion */
114 if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten)) 116 if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten))
115 wake_up_all(ext4_ioend_wq(io->inode)); 117 wake_up_all(ext4_ioend_wq(io->inode));
@@ -127,12 +129,18 @@ static void ext4_end_io_work(struct work_struct *work)
127 unsigned long flags; 129 unsigned long flags;
128 130
129 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 131 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
132 if (io->flag & EXT4_IO_END_IN_FSYNC)
133 goto requeue;
130 if (list_empty(&io->list)) { 134 if (list_empty(&io->list)) {
131 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 135 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
132 goto free; 136 goto free;
133 } 137 }
134 138
135 if (!mutex_trylock(&inode->i_mutex)) { 139 if (!mutex_trylock(&inode->i_mutex)) {
140 bool was_queued;
141requeue:
142 was_queued = !!(io->flag & EXT4_IO_END_QUEUED);
143 io->flag |= EXT4_IO_END_QUEUED;
136 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 144 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
137 /* 145 /*
138 * Requeue the work instead of waiting so that the work 146 * Requeue the work instead of waiting so that the work
@@ -145,9 +153,8 @@ static void ext4_end_io_work(struct work_struct *work)
145 * yield the cpu if it sees an end_io request that has already 153 * yield the cpu if it sees an end_io request that has already
146 * been requeued. 154 * been requeued.
147 */ 155 */
148 if (io->flag & EXT4_IO_END_QUEUED) 156 if (was_queued)
149 yield(); 157 yield();
150 io->flag |= EXT4_IO_END_QUEUED;
151 return; 158 return;
152 } 159 }
153 list_del_init(&io->list); 160 list_del_init(&io->list);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index f9d948f0eb8..59fa0be2725 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1163,8 +1163,11 @@ static void ext4_update_super(struct super_block *sb,
1163 do_div(reserved_blocks, 100); 1163 do_div(reserved_blocks, 100);
1164 1164
1165 ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count); 1165 ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count);
1166 ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + free_blocks);
1166 le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) * 1167 le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) *
1167 flex_gd->count); 1168 flex_gd->count);
1169 le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) *
1170 flex_gd->count);
1168 1171
1169 /* 1172 /*
1170 * We need to protect s_groups_count against other CPUs seeing 1173 * We need to protect s_groups_count against other CPUs seeing
@@ -1465,6 +1468,7 @@ static int ext4_group_extend_no_check(struct super_block *sb,
1465 } 1468 }
1466 1469
1467 ext4_blocks_count_set(es, o_blocks_count + add); 1470 ext4_blocks_count_set(es, o_blocks_count + add);
1471 ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + add);
1468 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, 1472 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
1469 o_blocks_count + add); 1473 o_blocks_count + add);
1470 /* We add the blocks to the bitmap and set the group need init bit */ 1474 /* We add the blocks to the bitmap and set the group need init bit */
@@ -1512,16 +1516,17 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1512 o_blocks_count = ext4_blocks_count(es); 1516 o_blocks_count = ext4_blocks_count(es);
1513 1517
1514 if (test_opt(sb, DEBUG)) 1518 if (test_opt(sb, DEBUG))
1515 printk(KERN_DEBUG "EXT4-fs: extending last group from %llu to %llu blocks\n", 1519 ext4_msg(sb, KERN_DEBUG,
1516 o_blocks_count, n_blocks_count); 1520 "extending last group from %llu to %llu blocks",
1521 o_blocks_count, n_blocks_count);
1517 1522
1518 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) 1523 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
1519 return 0; 1524 return 0;
1520 1525
1521 if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 1526 if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
1522 printk(KERN_ERR "EXT4-fs: filesystem on %s:" 1527 ext4_msg(sb, KERN_ERR,
1523 " too large to resize to %llu blocks safely\n", 1528 "filesystem too large to resize to %llu blocks safely",
1524 sb->s_id, n_blocks_count); 1529 n_blocks_count);
1525 if (sizeof(sector_t) < 8) 1530 if (sizeof(sector_t) < 8)
1526 ext4_warning(sb, "CONFIG_LBDAF not enabled"); 1531 ext4_warning(sb, "CONFIG_LBDAF not enabled");
1527 return -EINVAL; 1532 return -EINVAL;
@@ -1582,7 +1587,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
1582 ext4_fsblk_t o_blocks_count; 1587 ext4_fsblk_t o_blocks_count;
1583 ext4_group_t o_group; 1588 ext4_group_t o_group;
1584 ext4_group_t n_group; 1589 ext4_group_t n_group;
1585 ext4_grpblk_t offset; 1590 ext4_grpblk_t offset, add;
1586 unsigned long n_desc_blocks; 1591 unsigned long n_desc_blocks;
1587 unsigned long o_desc_blocks; 1592 unsigned long o_desc_blocks;
1588 unsigned long desc_blocks; 1593 unsigned long desc_blocks;
@@ -1591,8 +1596,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
1591 o_blocks_count = ext4_blocks_count(es); 1596 o_blocks_count = ext4_blocks_count(es);
1592 1597
1593 if (test_opt(sb, DEBUG)) 1598 if (test_opt(sb, DEBUG))
1594 printk(KERN_DEBUG "EXT4-fs: resizing filesystem from %llu " 1599 ext4_msg(sb, KERN_DEBUG, "resizing filesystem from %llu "
1595 "upto %llu blocks\n", o_blocks_count, n_blocks_count); 1600 "to %llu blocks", o_blocks_count, n_blocks_count);
1596 1601
1597 if (n_blocks_count < o_blocks_count) { 1602 if (n_blocks_count < o_blocks_count) {
1598 /* On-line shrinking not supported */ 1603 /* On-line shrinking not supported */
@@ -1605,7 +1610,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
1605 return 0; 1610 return 0;
1606 1611
1607 ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); 1612 ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset);
1608 ext4_get_group_no_and_offset(sb, o_blocks_count, &o_group, &offset); 1613 ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset);
1609 1614
1610 n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / 1615 n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) /
1611 EXT4_DESC_PER_BLOCK(sb); 1616 EXT4_DESC_PER_BLOCK(sb);
@@ -1634,10 +1639,12 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
1634 } 1639 }
1635 brelse(bh); 1640 brelse(bh);
1636 1641
1637 if (offset != 0) { 1642 /* extend the last group */
1638 /* extend the last group */ 1643 if (n_group == o_group)
1639 ext4_grpblk_t add; 1644 add = n_blocks_count - o_blocks_count;
1640 add = EXT4_BLOCKS_PER_GROUP(sb) - offset; 1645 else
1646 add = EXT4_BLOCKS_PER_GROUP(sb) - (offset + 1);
1647 if (add > 0) {
1641 err = ext4_group_extend_no_check(sb, o_blocks_count, add); 1648 err = ext4_group_extend_no_check(sb, o_blocks_count, add);
1642 if (err) 1649 if (err)
1643 goto out; 1650 goto out;
@@ -1674,7 +1681,7 @@ out:
1674 1681
1675 iput(resize_inode); 1682 iput(resize_inode);
1676 if (test_opt(sb, DEBUG)) 1683 if (test_opt(sb, DEBUG))
1677 printk(KERN_DEBUG "EXT4-fs: resized filesystem from %llu " 1684 ext4_msg(sb, KERN_DEBUG, "resized filesystem from %llu "
1678 "upto %llu blocks\n", o_blocks_count, n_blocks_count); 1685 "upto %llu blocks", o_blocks_count, n_blocks_count);
1679 return err; 1686 return err;
1680} 1687}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 502c61fd739..e1fb1d5de58 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -62,6 +62,7 @@ static struct ext4_features *ext4_feat;
62 62
63static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 63static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
64 unsigned long journal_devnum); 64 unsigned long journal_devnum);
65static int ext4_show_options(struct seq_file *seq, struct dentry *root);
65static int ext4_commit_super(struct super_block *sb, int sync); 66static int ext4_commit_super(struct super_block *sb, int sync);
66static void ext4_mark_recovery_complete(struct super_block *sb, 67static void ext4_mark_recovery_complete(struct super_block *sb,
67 struct ext4_super_block *es); 68 struct ext4_super_block *es);
@@ -375,7 +376,7 @@ void ext4_journal_abort_handle(const char *caller, unsigned int line,
375 if (is_handle_aborted(handle)) 376 if (is_handle_aborted(handle))
376 return; 377 return;
377 378
378 printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n", 379 printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n",
379 caller, line, errstr, err_fn); 380 caller, line, errstr, err_fn);
380 381
381 jbd2_journal_abort_handle(handle); 382 jbd2_journal_abort_handle(handle);
@@ -431,6 +432,22 @@ static int block_device_ejected(struct super_block *sb)
431 return bdi->dev == NULL; 432 return bdi->dev == NULL;
432} 433}
433 434
435static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
436{
437 struct super_block *sb = journal->j_private;
438 struct ext4_sb_info *sbi = EXT4_SB(sb);
439 int error = is_journal_aborted(journal);
440 struct ext4_journal_cb_entry *jce, *tmp;
441
442 spin_lock(&sbi->s_md_lock);
443 list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
444 list_del_init(&jce->jce_list);
445 spin_unlock(&sbi->s_md_lock);
446 jce->jce_func(sb, jce, error);
447 spin_lock(&sbi->s_md_lock);
448 }
449 spin_unlock(&sbi->s_md_lock);
450}
434 451
435/* Deal with the reporting of failure conditions on a filesystem such as 452/* Deal with the reporting of failure conditions on a filesystem such as
436 * inconsistencies detected or read IO failures. 453 * inconsistencies detected or read IO failures.
@@ -498,11 +515,16 @@ void ext4_error_inode(struct inode *inode, const char *function,
498 va_start(args, fmt); 515 va_start(args, fmt);
499 vaf.fmt = fmt; 516 vaf.fmt = fmt;
500 vaf.va = &args; 517 vaf.va = &args;
501 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
502 inode->i_sb->s_id, function, line, inode->i_ino);
503 if (block) 518 if (block)
504 printk(KERN_CONT "block %llu: ", block); 519 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
505 printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf); 520 "inode #%lu: block %llu: comm %s: %pV\n",
521 inode->i_sb->s_id, function, line, inode->i_ino,
522 block, current->comm, &vaf);
523 else
524 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
525 "inode #%lu: comm %s: %pV\n",
526 inode->i_sb->s_id, function, line, inode->i_ino,
527 current->comm, &vaf);
506 va_end(args); 528 va_end(args);
507 529
508 ext4_handle_error(inode->i_sb); 530 ext4_handle_error(inode->i_sb);
@@ -524,15 +546,21 @@ void ext4_error_file(struct file *file, const char *function,
524 path = d_path(&(file->f_path), pathname, sizeof(pathname)); 546 path = d_path(&(file->f_path), pathname, sizeof(pathname));
525 if (IS_ERR(path)) 547 if (IS_ERR(path))
526 path = "(unknown)"; 548 path = "(unknown)";
527 printk(KERN_CRIT
528 "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
529 inode->i_sb->s_id, function, line, inode->i_ino);
530 if (block)
531 printk(KERN_CONT "block %llu: ", block);
532 va_start(args, fmt); 549 va_start(args, fmt);
533 vaf.fmt = fmt; 550 vaf.fmt = fmt;
534 vaf.va = &args; 551 vaf.va = &args;
535 printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf); 552 if (block)
553 printk(KERN_CRIT
554 "EXT4-fs error (device %s): %s:%d: inode #%lu: "
555 "block %llu: comm %s: path %s: %pV\n",
556 inode->i_sb->s_id, function, line, inode->i_ino,
557 block, current->comm, path, &vaf);
558 else
559 printk(KERN_CRIT
560 "EXT4-fs error (device %s): %s:%d: inode #%lu: "
561 "comm %s: path %s: %pV\n",
562 inode->i_sb->s_id, function, line, inode->i_ino,
563 current->comm, path, &vaf);
536 va_end(args); 564 va_end(args);
537 565
538 ext4_handle_error(inode->i_sb); 566 ext4_handle_error(inode->i_sb);
@@ -808,9 +836,6 @@ static void ext4_put_super(struct super_block *sb)
808 destroy_workqueue(sbi->dio_unwritten_wq); 836 destroy_workqueue(sbi->dio_unwritten_wq);
809 837
810 lock_super(sb); 838 lock_super(sb);
811 if (sb->s_dirt)
812 ext4_commit_super(sb, 1);
813
814 if (sbi->s_journal) { 839 if (sbi->s_journal) {
815 err = jbd2_journal_destroy(sbi->s_journal); 840 err = jbd2_journal_destroy(sbi->s_journal);
816 sbi->s_journal = NULL; 841 sbi->s_journal = NULL;
@@ -827,9 +852,12 @@ static void ext4_put_super(struct super_block *sb)
827 if (!(sb->s_flags & MS_RDONLY)) { 852 if (!(sb->s_flags & MS_RDONLY)) {
828 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 853 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
829 es->s_state = cpu_to_le16(sbi->s_mount_state); 854 es->s_state = cpu_to_le16(sbi->s_mount_state);
830 ext4_commit_super(sb, 1);
831 } 855 }
856 if (sb->s_dirt || !(sb->s_flags & MS_RDONLY))
857 ext4_commit_super(sb, 1);
858
832 if (sbi->s_proc) { 859 if (sbi->s_proc) {
860 remove_proc_entry("options", sbi->s_proc);
833 remove_proc_entry(sb->s_id, ext4_proc_root); 861 remove_proc_entry(sb->s_id, ext4_proc_root);
834 } 862 }
835 kobject_del(&sbi->s_kobj); 863 kobject_del(&sbi->s_kobj);
@@ -990,180 +1018,6 @@ void ext4_clear_inode(struct inode *inode)
990 } 1018 }
991} 1019}
992 1020
993static inline void ext4_show_quota_options(struct seq_file *seq,
994 struct super_block *sb)
995{
996#if defined(CONFIG_QUOTA)
997 struct ext4_sb_info *sbi = EXT4_SB(sb);
998
999 if (sbi->s_jquota_fmt) {
1000 char *fmtname = "";
1001
1002 switch (sbi->s_jquota_fmt) {
1003 case QFMT_VFS_OLD:
1004 fmtname = "vfsold";
1005 break;
1006 case QFMT_VFS_V0:
1007 fmtname = "vfsv0";
1008 break;
1009 case QFMT_VFS_V1:
1010 fmtname = "vfsv1";
1011 break;
1012 }
1013 seq_printf(seq, ",jqfmt=%s", fmtname);
1014 }
1015
1016 if (sbi->s_qf_names[USRQUOTA])
1017 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
1018
1019 if (sbi->s_qf_names[GRPQUOTA])
1020 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
1021
1022 if (test_opt(sb, USRQUOTA))
1023 seq_puts(seq, ",usrquota");
1024
1025 if (test_opt(sb, GRPQUOTA))
1026 seq_puts(seq, ",grpquota");
1027#endif
1028}
1029
1030/*
1031 * Show an option if
1032 * - it's set to a non-default value OR
1033 * - if the per-sb default is different from the global default
1034 */
1035static int ext4_show_options(struct seq_file *seq, struct dentry *root)
1036{
1037 int def_errors;
1038 unsigned long def_mount_opts;
1039 struct super_block *sb = root->d_sb;
1040 struct ext4_sb_info *sbi = EXT4_SB(sb);
1041 struct ext4_super_block *es = sbi->s_es;
1042
1043 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
1044 def_errors = le16_to_cpu(es->s_errors);
1045
1046 if (sbi->s_sb_block != 1)
1047 seq_printf(seq, ",sb=%llu", sbi->s_sb_block);
1048 if (test_opt(sb, MINIX_DF))
1049 seq_puts(seq, ",minixdf");
1050 if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS))
1051 seq_puts(seq, ",grpid");
1052 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS))
1053 seq_puts(seq, ",nogrpid");
1054 if (sbi->s_resuid != EXT4_DEF_RESUID ||
1055 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) {
1056 seq_printf(seq, ",resuid=%u", sbi->s_resuid);
1057 }
1058 if (sbi->s_resgid != EXT4_DEF_RESGID ||
1059 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) {
1060 seq_printf(seq, ",resgid=%u", sbi->s_resgid);
1061 }
1062 if (test_opt(sb, ERRORS_RO)) {
1063 if (def_errors == EXT4_ERRORS_PANIC ||
1064 def_errors == EXT4_ERRORS_CONTINUE) {
1065 seq_puts(seq, ",errors=remount-ro");
1066 }
1067 }
1068 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
1069 seq_puts(seq, ",errors=continue");
1070 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
1071 seq_puts(seq, ",errors=panic");
1072 if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16))
1073 seq_puts(seq, ",nouid32");
1074 if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG))
1075 seq_puts(seq, ",debug");
1076#ifdef CONFIG_EXT4_FS_XATTR
1077 if (test_opt(sb, XATTR_USER))
1078 seq_puts(seq, ",user_xattr");
1079 if (!test_opt(sb, XATTR_USER))
1080 seq_puts(seq, ",nouser_xattr");
1081#endif
1082#ifdef CONFIG_EXT4_FS_POSIX_ACL
1083 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
1084 seq_puts(seq, ",acl");
1085 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
1086 seq_puts(seq, ",noacl");
1087#endif
1088 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
1089 seq_printf(seq, ",commit=%u",
1090 (unsigned) (sbi->s_commit_interval / HZ));
1091 }
1092 if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) {
1093 seq_printf(seq, ",min_batch_time=%u",
1094 (unsigned) sbi->s_min_batch_time);
1095 }
1096 if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
1097 seq_printf(seq, ",max_batch_time=%u",
1098 (unsigned) sbi->s_max_batch_time);
1099 }
1100
1101 /*
1102 * We're changing the default of barrier mount option, so
1103 * let's always display its mount state so it's clear what its
1104 * status is.
1105 */
1106 seq_puts(seq, ",barrier=");
1107 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
1108 if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
1109 seq_puts(seq, ",journal_async_commit");
1110 else if (test_opt(sb, JOURNAL_CHECKSUM))
1111 seq_puts(seq, ",journal_checksum");
1112 if (test_opt(sb, I_VERSION))
1113 seq_puts(seq, ",i_version");
1114 if (!test_opt(sb, DELALLOC) &&
1115 !(def_mount_opts & EXT4_DEFM_NODELALLOC))
1116 seq_puts(seq, ",nodelalloc");
1117
1118 if (!test_opt(sb, MBLK_IO_SUBMIT))
1119 seq_puts(seq, ",nomblk_io_submit");
1120 if (sbi->s_stripe)
1121 seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
1122 /*
1123 * journal mode get enabled in different ways
1124 * So just print the value even if we didn't specify it
1125 */
1126 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
1127 seq_puts(seq, ",data=journal");
1128 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
1129 seq_puts(seq, ",data=ordered");
1130 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
1131 seq_puts(seq, ",data=writeback");
1132
1133 if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
1134 seq_printf(seq, ",inode_readahead_blks=%u",
1135 sbi->s_inode_readahead_blks);
1136
1137 if (test_opt(sb, DATA_ERR_ABORT))
1138 seq_puts(seq, ",data_err=abort");
1139
1140 if (test_opt(sb, NO_AUTO_DA_ALLOC))
1141 seq_puts(seq, ",noauto_da_alloc");
1142
1143 if (test_opt(sb, DISCARD) && !(def_mount_opts & EXT4_DEFM_DISCARD))
1144 seq_puts(seq, ",discard");
1145
1146 if (test_opt(sb, NOLOAD))
1147 seq_puts(seq, ",norecovery");
1148
1149 if (test_opt(sb, DIOREAD_NOLOCK))
1150 seq_puts(seq, ",dioread_nolock");
1151
1152 if (test_opt(sb, BLOCK_VALIDITY) &&
1153 !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY))
1154 seq_puts(seq, ",block_validity");
1155
1156 if (!test_opt(sb, INIT_INODE_TABLE))
1157 seq_puts(seq, ",noinit_itable");
1158 else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)
1159 seq_printf(seq, ",init_itable=%u",
1160 (unsigned) sbi->s_li_wait_mult);
1161
1162 ext4_show_quota_options(seq, sb);
1163
1164 return 0;
1165}
1166
1167static struct inode *ext4_nfs_get_inode(struct super_block *sb, 1021static struct inode *ext4_nfs_get_inode(struct super_block *sb,
1168 u64 ino, u32 generation) 1022 u64 ino, u32 generation)
1169{ 1023{
@@ -1316,18 +1170,17 @@ static const struct export_operations ext4_export_ops = {
1316enum { 1170enum {
1317 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 1171 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1318 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 1172 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
1319 Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, 1173 Opt_nouid32, Opt_debug, Opt_removed,
1320 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 1174 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1321 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh, 1175 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1322 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, 1176 Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
1323 Opt_journal_update, Opt_journal_dev, 1177 Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit,
1324 Opt_journal_checksum, Opt_journal_async_commit,
1325 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1178 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1326 Opt_data_err_abort, Opt_data_err_ignore, 1179 Opt_data_err_abort, Opt_data_err_ignore,
1327 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1180 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1328 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 1181 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1329 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, 1182 Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1330 Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, 1183 Opt_usrquota, Opt_grpquota, Opt_i_version,
1331 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, 1184 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
1332 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, 1185 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1333 Opt_inode_readahead_blks, Opt_journal_ioprio, 1186 Opt_inode_readahead_blks, Opt_journal_ioprio,
@@ -1350,20 +1203,19 @@ static const match_table_t tokens = {
1350 {Opt_err_ro, "errors=remount-ro"}, 1203 {Opt_err_ro, "errors=remount-ro"},
1351 {Opt_nouid32, "nouid32"}, 1204 {Opt_nouid32, "nouid32"},
1352 {Opt_debug, "debug"}, 1205 {Opt_debug, "debug"},
1353 {Opt_oldalloc, "oldalloc"}, 1206 {Opt_removed, "oldalloc"},
1354 {Opt_orlov, "orlov"}, 1207 {Opt_removed, "orlov"},
1355 {Opt_user_xattr, "user_xattr"}, 1208 {Opt_user_xattr, "user_xattr"},
1356 {Opt_nouser_xattr, "nouser_xattr"}, 1209 {Opt_nouser_xattr, "nouser_xattr"},
1357 {Opt_acl, "acl"}, 1210 {Opt_acl, "acl"},
1358 {Opt_noacl, "noacl"}, 1211 {Opt_noacl, "noacl"},
1359 {Opt_noload, "noload"},
1360 {Opt_noload, "norecovery"}, 1212 {Opt_noload, "norecovery"},
1361 {Opt_nobh, "nobh"}, 1213 {Opt_noload, "noload"},
1362 {Opt_bh, "bh"}, 1214 {Opt_removed, "nobh"},
1215 {Opt_removed, "bh"},
1363 {Opt_commit, "commit=%u"}, 1216 {Opt_commit, "commit=%u"},
1364 {Opt_min_batch_time, "min_batch_time=%u"}, 1217 {Opt_min_batch_time, "min_batch_time=%u"},
1365 {Opt_max_batch_time, "max_batch_time=%u"}, 1218 {Opt_max_batch_time, "max_batch_time=%u"},
1366 {Opt_journal_update, "journal=update"},
1367 {Opt_journal_dev, "journal_dev=%u"}, 1219 {Opt_journal_dev, "journal_dev=%u"},
1368 {Opt_journal_checksum, "journal_checksum"}, 1220 {Opt_journal_checksum, "journal_checksum"},
1369 {Opt_journal_async_commit, "journal_async_commit"}, 1221 {Opt_journal_async_commit, "journal_async_commit"},
@@ -1389,7 +1241,6 @@ static const match_table_t tokens = {
1389 {Opt_nobarrier, "nobarrier"}, 1241 {Opt_nobarrier, "nobarrier"},
1390 {Opt_i_version, "i_version"}, 1242 {Opt_i_version, "i_version"},
1391 {Opt_stripe, "stripe=%u"}, 1243 {Opt_stripe, "stripe=%u"},
1392 {Opt_resize, "resize"},
1393 {Opt_delalloc, "delalloc"}, 1244 {Opt_delalloc, "delalloc"},
1394 {Opt_nodelalloc, "nodelalloc"}, 1245 {Opt_nodelalloc, "nodelalloc"},
1395 {Opt_mblk_io_submit, "mblk_io_submit"}, 1246 {Opt_mblk_io_submit, "mblk_io_submit"},
@@ -1408,6 +1259,11 @@ static const match_table_t tokens = {
1408 {Opt_init_itable, "init_itable=%u"}, 1259 {Opt_init_itable, "init_itable=%u"},
1409 {Opt_init_itable, "init_itable"}, 1260 {Opt_init_itable, "init_itable"},
1410 {Opt_noinit_itable, "noinit_itable"}, 1261 {Opt_noinit_itable, "noinit_itable"},
1262 {Opt_removed, "check=none"}, /* mount option from ext2/3 */
1263 {Opt_removed, "nocheck"}, /* mount option from ext2/3 */
1264 {Opt_removed, "reservation"}, /* mount option from ext2/3 */
1265 {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
1266 {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */
1411 {Opt_err, NULL}, 1267 {Opt_err, NULL},
1412}; 1268};
1413 1269
@@ -1449,20 +1305,20 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1449 ext4_msg(sb, KERN_ERR, 1305 ext4_msg(sb, KERN_ERR,
1450 "Cannot change journaled " 1306 "Cannot change journaled "
1451 "quota options when quota turned on"); 1307 "quota options when quota turned on");
1452 return 0; 1308 return -1;
1453 } 1309 }
1454 qname = match_strdup(args); 1310 qname = match_strdup(args);
1455 if (!qname) { 1311 if (!qname) {
1456 ext4_msg(sb, KERN_ERR, 1312 ext4_msg(sb, KERN_ERR,
1457 "Not enough memory for storing quotafile name"); 1313 "Not enough memory for storing quotafile name");
1458 return 0; 1314 return -1;
1459 } 1315 }
1460 if (sbi->s_qf_names[qtype] && 1316 if (sbi->s_qf_names[qtype] &&
1461 strcmp(sbi->s_qf_names[qtype], qname)) { 1317 strcmp(sbi->s_qf_names[qtype], qname)) {
1462 ext4_msg(sb, KERN_ERR, 1318 ext4_msg(sb, KERN_ERR,
1463 "%s quota file already specified", QTYPE2NAME(qtype)); 1319 "%s quota file already specified", QTYPE2NAME(qtype));
1464 kfree(qname); 1320 kfree(qname);
1465 return 0; 1321 return -1;
1466 } 1322 }
1467 sbi->s_qf_names[qtype] = qname; 1323 sbi->s_qf_names[qtype] = qname;
1468 if (strchr(sbi->s_qf_names[qtype], '/')) { 1324 if (strchr(sbi->s_qf_names[qtype], '/')) {
@@ -1470,7 +1326,7 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1470 "quotafile must be on filesystem root"); 1326 "quotafile must be on filesystem root");
1471 kfree(sbi->s_qf_names[qtype]); 1327 kfree(sbi->s_qf_names[qtype]);
1472 sbi->s_qf_names[qtype] = NULL; 1328 sbi->s_qf_names[qtype] = NULL;
1473 return 0; 1329 return -1;
1474 } 1330 }
1475 set_opt(sb, QUOTA); 1331 set_opt(sb, QUOTA);
1476 return 1; 1332 return 1;
@@ -1485,7 +1341,7 @@ static int clear_qf_name(struct super_block *sb, int qtype)
1485 sbi->s_qf_names[qtype]) { 1341 sbi->s_qf_names[qtype]) {
1486 ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options" 1342 ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
1487 " when quota turned on"); 1343 " when quota turned on");
1488 return 0; 1344 return -1;
1489 } 1345 }
1490 /* 1346 /*
1491 * The space will be released later when all options are confirmed 1347 * The space will be released later when all options are confirmed
@@ -1496,420 +1352,273 @@ static int clear_qf_name(struct super_block *sb, int qtype)
1496} 1352}
1497#endif 1353#endif
1498 1354
1499static int parse_options(char *options, struct super_block *sb, 1355#define MOPT_SET 0x0001
1500 unsigned long *journal_devnum, 1356#define MOPT_CLEAR 0x0002
1501 unsigned int *journal_ioprio, 1357#define MOPT_NOSUPPORT 0x0004
1502 ext4_fsblk_t *n_blocks_count, int is_remount) 1358#define MOPT_EXPLICIT 0x0008
1503{ 1359#define MOPT_CLEAR_ERR 0x0010
1504 struct ext4_sb_info *sbi = EXT4_SB(sb); 1360#define MOPT_GTE0 0x0020
1505 char *p;
1506 substring_t args[MAX_OPT_ARGS];
1507 int data_opt = 0;
1508 int option;
1509#ifdef CONFIG_QUOTA 1361#ifdef CONFIG_QUOTA
1510 int qfmt; 1362#define MOPT_Q 0
1363#define MOPT_QFMT 0x0040
1364#else
1365#define MOPT_Q MOPT_NOSUPPORT
1366#define MOPT_QFMT MOPT_NOSUPPORT
1511#endif 1367#endif
1512 1368#define MOPT_DATAJ 0x0080
1513 if (!options) 1369
1514 return 1; 1370static const struct mount_opts {
1515 1371 int token;
1516 while ((p = strsep(&options, ",")) != NULL) { 1372 int mount_opt;
1517 int token; 1373 int flags;
1518 if (!*p) 1374} ext4_mount_opts[] = {
1519 continue; 1375 {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
1520 1376 {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1521 /* 1377 {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1522 * Initialize args struct so we know whether arg was 1378 {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1523 * found; some options take optional arguments. 1379 {Opt_mblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_SET},
1524 */ 1380 {Opt_nomblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_CLEAR},
1525 args[0].to = args[0].from = NULL; 1381 {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1526 token = match_token(p, tokens, args); 1382 {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1527 switch (token) { 1383 {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_SET},
1528 case Opt_bsd_df: 1384 {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_CLEAR},
1529 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1385 {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1530 clear_opt(sb, MINIX_DF); 1386 {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1531 break; 1387 {Opt_delalloc, EXT4_MOUNT_DELALLOC, MOPT_SET | MOPT_EXPLICIT},
1532 case Opt_minix_df: 1388 {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, MOPT_CLEAR | MOPT_EXPLICIT},
1533 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1389 {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_SET},
1534 set_opt(sb, MINIX_DF); 1390 {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1535 1391 EXT4_MOUNT_JOURNAL_CHECKSUM), MOPT_SET},
1536 break; 1392 {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_SET},
1537 case Opt_grpid: 1393 {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
1538 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1394 {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
1539 set_opt(sb, GRPID); 1395 {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
1540 1396 {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_SET},
1541 break; 1397 {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_CLEAR},
1542 case Opt_nogrpid: 1398 {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1543 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1399 {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1544 clear_opt(sb, GRPID); 1400 {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
1545 1401 {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
1546 break; 1402 {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
1547 case Opt_resuid: 1403 {Opt_commit, 0, MOPT_GTE0},
1548 if (match_int(&args[0], &option)) 1404 {Opt_max_batch_time, 0, MOPT_GTE0},
1549 return 0; 1405 {Opt_min_batch_time, 0, MOPT_GTE0},
1550 sbi->s_resuid = option; 1406 {Opt_inode_readahead_blks, 0, MOPT_GTE0},
1551 break; 1407 {Opt_init_itable, 0, MOPT_GTE0},
1552 case Opt_resgid: 1408 {Opt_stripe, 0, MOPT_GTE0},
1553 if (match_int(&args[0], &option)) 1409 {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_DATAJ},
1554 return 0; 1410 {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_DATAJ},
1555 sbi->s_resgid = option; 1411 {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, MOPT_DATAJ},
1556 break;
1557 case Opt_sb:
1558 /* handled by get_sb_block() instead of here */
1559 /* *sb_block = match_int(&args[0]); */
1560 break;
1561 case Opt_err_panic:
1562 clear_opt(sb, ERRORS_CONT);
1563 clear_opt(sb, ERRORS_RO);
1564 set_opt(sb, ERRORS_PANIC);
1565 break;
1566 case Opt_err_ro:
1567 clear_opt(sb, ERRORS_CONT);
1568 clear_opt(sb, ERRORS_PANIC);
1569 set_opt(sb, ERRORS_RO);
1570 break;
1571 case Opt_err_cont:
1572 clear_opt(sb, ERRORS_RO);
1573 clear_opt(sb, ERRORS_PANIC);
1574 set_opt(sb, ERRORS_CONT);
1575 break;
1576 case Opt_nouid32:
1577 set_opt(sb, NO_UID32);
1578 break;
1579 case Opt_debug:
1580 set_opt(sb, DEBUG);
1581 break;
1582 case Opt_oldalloc:
1583 ext4_msg(sb, KERN_WARNING,
1584 "Ignoring deprecated oldalloc option");
1585 break;
1586 case Opt_orlov:
1587 ext4_msg(sb, KERN_WARNING,
1588 "Ignoring deprecated orlov option");
1589 break;
1590#ifdef CONFIG_EXT4_FS_XATTR 1412#ifdef CONFIG_EXT4_FS_XATTR
1591 case Opt_user_xattr: 1413 {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1592 set_opt(sb, XATTR_USER); 1414 {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
1593 break;
1594 case Opt_nouser_xattr:
1595 clear_opt(sb, XATTR_USER);
1596 break;
1597#else 1415#else
1598 case Opt_user_xattr: 1416 {Opt_user_xattr, 0, MOPT_NOSUPPORT},
1599 case Opt_nouser_xattr: 1417 {Opt_nouser_xattr, 0, MOPT_NOSUPPORT},
1600 ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported");
1601 break;
1602#endif 1418#endif
1603#ifdef CONFIG_EXT4_FS_POSIX_ACL 1419#ifdef CONFIG_EXT4_FS_POSIX_ACL
1604 case Opt_acl: 1420 {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1605 set_opt(sb, POSIX_ACL); 1421 {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
1606 break;
1607 case Opt_noacl:
1608 clear_opt(sb, POSIX_ACL);
1609 break;
1610#else 1422#else
1611 case Opt_acl: 1423 {Opt_acl, 0, MOPT_NOSUPPORT},
1612 case Opt_noacl: 1424 {Opt_noacl, 0, MOPT_NOSUPPORT},
1613 ext4_msg(sb, KERN_ERR, "(no)acl options not supported");
1614 break;
1615#endif 1425#endif
1616 case Opt_journal_update: 1426 {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
1617 /* @@@ FIXME */ 1427 {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
1618 /* Eventually we will want to be able to create 1428 {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
1619 a journal file here. For now, only allow the 1429 {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
1620 user to specify an existing inode to be the 1430 MOPT_SET | MOPT_Q},
1621 journal file. */ 1431 {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
1622 if (is_remount) { 1432 MOPT_SET | MOPT_Q},
1623 ext4_msg(sb, KERN_ERR, 1433 {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
1624 "Cannot specify journal on remount"); 1434 EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q},
1625 return 0; 1435 {Opt_usrjquota, 0, MOPT_Q},
1626 } 1436 {Opt_grpjquota, 0, MOPT_Q},
1627 set_opt(sb, UPDATE_JOURNAL); 1437 {Opt_offusrjquota, 0, MOPT_Q},
1628 break; 1438 {Opt_offgrpjquota, 0, MOPT_Q},
1629 case Opt_journal_dev: 1439 {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
1630 if (is_remount) { 1440 {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
1441 {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
1442 {Opt_err, 0, 0}
1443};
1444
1445static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1446 substring_t *args, unsigned long *journal_devnum,
1447 unsigned int *journal_ioprio, int is_remount)
1448{
1449 struct ext4_sb_info *sbi = EXT4_SB(sb);
1450 const struct mount_opts *m;
1451 int arg = 0;
1452
1453#ifdef CONFIG_QUOTA
1454 if (token == Opt_usrjquota)
1455 return set_qf_name(sb, USRQUOTA, &args[0]);
1456 else if (token == Opt_grpjquota)
1457 return set_qf_name(sb, GRPQUOTA, &args[0]);
1458 else if (token == Opt_offusrjquota)
1459 return clear_qf_name(sb, USRQUOTA);
1460 else if (token == Opt_offgrpjquota)
1461 return clear_qf_name(sb, GRPQUOTA);
1462#endif
1463 if (args->from && match_int(args, &arg))
1464 return -1;
1465 switch (token) {
1466 case Opt_noacl:
1467 case Opt_nouser_xattr:
1468 ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5");
1469 break;
1470 case Opt_sb:
1471 return 1; /* handled by get_sb_block() */
1472 case Opt_removed:
1473 ext4_msg(sb, KERN_WARNING,
1474 "Ignoring removed %s option", opt);
1475 return 1;
1476 case Opt_resuid:
1477 sbi->s_resuid = arg;
1478 return 1;
1479 case Opt_resgid:
1480 sbi->s_resgid = arg;
1481 return 1;
1482 case Opt_abort:
1483 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1484 return 1;
1485 case Opt_i_version:
1486 sb->s_flags |= MS_I_VERSION;
1487 return 1;
1488 case Opt_journal_dev:
1489 if (is_remount) {
1490 ext4_msg(sb, KERN_ERR,
1491 "Cannot specify journal on remount");
1492 return -1;
1493 }
1494 *journal_devnum = arg;
1495 return 1;
1496 case Opt_journal_ioprio:
1497 if (arg < 0 || arg > 7)
1498 return -1;
1499 *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
1500 return 1;
1501 }
1502
1503 for (m = ext4_mount_opts; m->token != Opt_err; m++) {
1504 if (token != m->token)
1505 continue;
1506 if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
1507 return -1;
1508 if (m->flags & MOPT_EXPLICIT)
1509 set_opt2(sb, EXPLICIT_DELALLOC);
1510 if (m->flags & MOPT_CLEAR_ERR)
1511 clear_opt(sb, ERRORS_MASK);
1512 if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
1513 ext4_msg(sb, KERN_ERR, "Cannot change quota "
1514 "options when quota turned on");
1515 return -1;
1516 }
1517
1518 if (m->flags & MOPT_NOSUPPORT) {
1519 ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
1520 } else if (token == Opt_commit) {
1521 if (arg == 0)
1522 arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
1523 sbi->s_commit_interval = HZ * arg;
1524 } else if (token == Opt_max_batch_time) {
1525 if (arg == 0)
1526 arg = EXT4_DEF_MAX_BATCH_TIME;
1527 sbi->s_max_batch_time = arg;
1528 } else if (token == Opt_min_batch_time) {
1529 sbi->s_min_batch_time = arg;
1530 } else if (token == Opt_inode_readahead_blks) {
1531 if (arg > (1 << 30))
1532 return -1;
1533 if (arg && !is_power_of_2(arg)) {
1631 ext4_msg(sb, KERN_ERR, 1534 ext4_msg(sb, KERN_ERR,
1632 "Cannot specify journal on remount"); 1535 "EXT4-fs: inode_readahead_blks"
1633 return 0; 1536 " must be a power of 2");
1537 return -1;
1634 } 1538 }
1635 if (match_int(&args[0], &option)) 1539 sbi->s_inode_readahead_blks = arg;
1636 return 0; 1540 } else if (token == Opt_init_itable) {
1637 *journal_devnum = option; 1541 set_opt(sb, INIT_INODE_TABLE);
1638 break; 1542 if (!args->from)
1639 case Opt_journal_checksum: 1543 arg = EXT4_DEF_LI_WAIT_MULT;
1640 set_opt(sb, JOURNAL_CHECKSUM); 1544 sbi->s_li_wait_mult = arg;
1641 break; 1545 } else if (token == Opt_stripe) {
1642 case Opt_journal_async_commit: 1546 sbi->s_stripe = arg;
1643 set_opt(sb, JOURNAL_ASYNC_COMMIT); 1547 } else if (m->flags & MOPT_DATAJ) {
1644 set_opt(sb, JOURNAL_CHECKSUM);
1645 break;
1646 case Opt_noload:
1647 set_opt(sb, NOLOAD);
1648 break;
1649 case Opt_commit:
1650 if (match_int(&args[0], &option))
1651 return 0;
1652 if (option < 0)
1653 return 0;
1654 if (option == 0)
1655 option = JBD2_DEFAULT_MAX_COMMIT_AGE;
1656 sbi->s_commit_interval = HZ * option;
1657 break;
1658 case Opt_max_batch_time:
1659 if (match_int(&args[0], &option))
1660 return 0;
1661 if (option < 0)
1662 return 0;
1663 if (option == 0)
1664 option = EXT4_DEF_MAX_BATCH_TIME;
1665 sbi->s_max_batch_time = option;
1666 break;
1667 case Opt_min_batch_time:
1668 if (match_int(&args[0], &option))
1669 return 0;
1670 if (option < 0)
1671 return 0;
1672 sbi->s_min_batch_time = option;
1673 break;
1674 case Opt_data_journal:
1675 data_opt = EXT4_MOUNT_JOURNAL_DATA;
1676 goto datacheck;
1677 case Opt_data_ordered:
1678 data_opt = EXT4_MOUNT_ORDERED_DATA;
1679 goto datacheck;
1680 case Opt_data_writeback:
1681 data_opt = EXT4_MOUNT_WRITEBACK_DATA;
1682 datacheck:
1683 if (is_remount) { 1548 if (is_remount) {
1684 if (!sbi->s_journal) 1549 if (!sbi->s_journal)
1685 ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option"); 1550 ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
1686 else if (test_opt(sb, DATA_FLAGS) != data_opt) { 1551 else if (test_opt(sb, DATA_FLAGS) !=
1552 m->mount_opt) {
1687 ext4_msg(sb, KERN_ERR, 1553 ext4_msg(sb, KERN_ERR,
1688 "Cannot change data mode on remount"); 1554 "Cannot change data mode on remount");
1689 return 0; 1555 return -1;
1690 } 1556 }
1691 } else { 1557 } else {
1692 clear_opt(sb, DATA_FLAGS); 1558 clear_opt(sb, DATA_FLAGS);
1693 sbi->s_mount_opt |= data_opt; 1559 sbi->s_mount_opt |= m->mount_opt;
1694 } 1560 }
1695 break;
1696 case Opt_data_err_abort:
1697 set_opt(sb, DATA_ERR_ABORT);
1698 break;
1699 case Opt_data_err_ignore:
1700 clear_opt(sb, DATA_ERR_ABORT);
1701 break;
1702#ifdef CONFIG_QUOTA 1561#ifdef CONFIG_QUOTA
1703 case Opt_usrjquota: 1562 } else if (m->flags & MOPT_QFMT) {
1704 if (!set_qf_name(sb, USRQUOTA, &args[0]))
1705 return 0;
1706 break;
1707 case Opt_grpjquota:
1708 if (!set_qf_name(sb, GRPQUOTA, &args[0]))
1709 return 0;
1710 break;
1711 case Opt_offusrjquota:
1712 if (!clear_qf_name(sb, USRQUOTA))
1713 return 0;
1714 break;
1715 case Opt_offgrpjquota:
1716 if (!clear_qf_name(sb, GRPQUOTA))
1717 return 0;
1718 break;
1719
1720 case Opt_jqfmt_vfsold:
1721 qfmt = QFMT_VFS_OLD;
1722 goto set_qf_format;
1723 case Opt_jqfmt_vfsv0:
1724 qfmt = QFMT_VFS_V0;
1725 goto set_qf_format;
1726 case Opt_jqfmt_vfsv1:
1727 qfmt = QFMT_VFS_V1;
1728set_qf_format:
1729 if (sb_any_quota_loaded(sb) && 1563 if (sb_any_quota_loaded(sb) &&
1730 sbi->s_jquota_fmt != qfmt) { 1564 sbi->s_jquota_fmt != m->mount_opt) {
1731 ext4_msg(sb, KERN_ERR, "Cannot change " 1565 ext4_msg(sb, KERN_ERR, "Cannot "
1732 "journaled quota options when " 1566 "change journaled quota options "
1733 "quota turned on"); 1567 "when quota turned on");
1734 return 0; 1568 return -1;
1735 } 1569 }
1736 sbi->s_jquota_fmt = qfmt; 1570 sbi->s_jquota_fmt = m->mount_opt;
1737 break;
1738 case Opt_quota:
1739 case Opt_usrquota:
1740 set_opt(sb, QUOTA);
1741 set_opt(sb, USRQUOTA);
1742 break;
1743 case Opt_grpquota:
1744 set_opt(sb, QUOTA);
1745 set_opt(sb, GRPQUOTA);
1746 break;
1747 case Opt_noquota:
1748 if (sb_any_quota_loaded(sb)) {
1749 ext4_msg(sb, KERN_ERR, "Cannot change quota "
1750 "options when quota turned on");
1751 return 0;
1752 }
1753 clear_opt(sb, QUOTA);
1754 clear_opt(sb, USRQUOTA);
1755 clear_opt(sb, GRPQUOTA);
1756 break;
1757#else
1758 case Opt_quota:
1759 case Opt_usrquota:
1760 case Opt_grpquota:
1761 ext4_msg(sb, KERN_ERR,
1762 "quota options not supported");
1763 break;
1764 case Opt_usrjquota:
1765 case Opt_grpjquota:
1766 case Opt_offusrjquota:
1767 case Opt_offgrpjquota:
1768 case Opt_jqfmt_vfsold:
1769 case Opt_jqfmt_vfsv0:
1770 case Opt_jqfmt_vfsv1:
1771 ext4_msg(sb, KERN_ERR,
1772 "journaled quota options not supported");
1773 break;
1774 case Opt_noquota:
1775 break;
1776#endif 1571#endif
1777 case Opt_abort: 1572 } else {
1778 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; 1573 if (!args->from)
1779 break; 1574 arg = 1;
1780 case Opt_nobarrier: 1575 if (m->flags & MOPT_CLEAR)
1781 clear_opt(sb, BARRIER); 1576 arg = !arg;
1782 break; 1577 else if (unlikely(!(m->flags & MOPT_SET))) {
1783 case Opt_barrier: 1578 ext4_msg(sb, KERN_WARNING,
1784 if (args[0].from) { 1579 "buggy handling of option %s", opt);
1785 if (match_int(&args[0], &option)) 1580 WARN_ON(1);
1786 return 0; 1581 return -1;
1787 } else
1788 option = 1; /* No argument, default to 1 */
1789 if (option)
1790 set_opt(sb, BARRIER);
1791 else
1792 clear_opt(sb, BARRIER);
1793 break;
1794 case Opt_ignore:
1795 break;
1796 case Opt_resize:
1797 if (!is_remount) {
1798 ext4_msg(sb, KERN_ERR,
1799 "resize option only available "
1800 "for remount");
1801 return 0;
1802 }
1803 if (match_int(&args[0], &option) != 0)
1804 return 0;
1805 *n_blocks_count = option;
1806 break;
1807 case Opt_nobh:
1808 ext4_msg(sb, KERN_WARNING,
1809 "Ignoring deprecated nobh option");
1810 break;
1811 case Opt_bh:
1812 ext4_msg(sb, KERN_WARNING,
1813 "Ignoring deprecated bh option");
1814 break;
1815 case Opt_i_version:
1816 set_opt(sb, I_VERSION);
1817 sb->s_flags |= MS_I_VERSION;
1818 break;
1819 case Opt_nodelalloc:
1820 clear_opt(sb, DELALLOC);
1821 clear_opt2(sb, EXPLICIT_DELALLOC);
1822 break;
1823 case Opt_mblk_io_submit:
1824 set_opt(sb, MBLK_IO_SUBMIT);
1825 break;
1826 case Opt_nomblk_io_submit:
1827 clear_opt(sb, MBLK_IO_SUBMIT);
1828 break;
1829 case Opt_stripe:
1830 if (match_int(&args[0], &option))
1831 return 0;
1832 if (option < 0)
1833 return 0;
1834 sbi->s_stripe = option;
1835 break;
1836 case Opt_delalloc:
1837 set_opt(sb, DELALLOC);
1838 set_opt2(sb, EXPLICIT_DELALLOC);
1839 break;
1840 case Opt_block_validity:
1841 set_opt(sb, BLOCK_VALIDITY);
1842 break;
1843 case Opt_noblock_validity:
1844 clear_opt(sb, BLOCK_VALIDITY);
1845 break;
1846 case Opt_inode_readahead_blks:
1847 if (match_int(&args[0], &option))
1848 return 0;
1849 if (option < 0 || option > (1 << 30))
1850 return 0;
1851 if (option && !is_power_of_2(option)) {
1852 ext4_msg(sb, KERN_ERR,
1853 "EXT4-fs: inode_readahead_blks"
1854 " must be a power of 2");
1855 return 0;
1856 } 1582 }
1857 sbi->s_inode_readahead_blks = option; 1583 if (arg != 0)
1858 break; 1584 sbi->s_mount_opt |= m->mount_opt;
1859 case Opt_journal_ioprio:
1860 if (match_int(&args[0], &option))
1861 return 0;
1862 if (option < 0 || option > 7)
1863 break;
1864 *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE,
1865 option);
1866 break;
1867 case Opt_noauto_da_alloc:
1868 set_opt(sb, NO_AUTO_DA_ALLOC);
1869 break;
1870 case Opt_auto_da_alloc:
1871 if (args[0].from) {
1872 if (match_int(&args[0], &option))
1873 return 0;
1874 } else
1875 option = 1; /* No argument, default to 1 */
1876 if (option)
1877 clear_opt(sb, NO_AUTO_DA_ALLOC);
1878 else 1585 else
1879 set_opt(sb,NO_AUTO_DA_ALLOC); 1586 sbi->s_mount_opt &= ~m->mount_opt;
1880 break;
1881 case Opt_discard:
1882 set_opt(sb, DISCARD);
1883 break;
1884 case Opt_nodiscard:
1885 clear_opt(sb, DISCARD);
1886 break;
1887 case Opt_dioread_nolock:
1888 set_opt(sb, DIOREAD_NOLOCK);
1889 break;
1890 case Opt_dioread_lock:
1891 clear_opt(sb, DIOREAD_NOLOCK);
1892 break;
1893 case Opt_init_itable:
1894 set_opt(sb, INIT_INODE_TABLE);
1895 if (args[0].from) {
1896 if (match_int(&args[0], &option))
1897 return 0;
1898 } else
1899 option = EXT4_DEF_LI_WAIT_MULT;
1900 if (option < 0)
1901 return 0;
1902 sbi->s_li_wait_mult = option;
1903 break;
1904 case Opt_noinit_itable:
1905 clear_opt(sb, INIT_INODE_TABLE);
1906 break;
1907 default:
1908 ext4_msg(sb, KERN_ERR,
1909 "Unrecognized mount option \"%s\" "
1910 "or missing value", p);
1911 return 0;
1912 } 1587 }
1588 return 1;
1589 }
1590 ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
1591 "or missing value", opt);
1592 return -1;
1593}
1594
1595static int parse_options(char *options, struct super_block *sb,
1596 unsigned long *journal_devnum,
1597 unsigned int *journal_ioprio,
1598 int is_remount)
1599{
1600#ifdef CONFIG_QUOTA
1601 struct ext4_sb_info *sbi = EXT4_SB(sb);
1602#endif
1603 char *p;
1604 substring_t args[MAX_OPT_ARGS];
1605 int token;
1606
1607 if (!options)
1608 return 1;
1609
1610 while ((p = strsep(&options, ",")) != NULL) {
1611 if (!*p)
1612 continue;
1613 /*
1614 * Initialize args struct so we know whether arg was
1615 * found; some options take optional arguments.
1616 */
1617 args[0].to = args[0].from = 0;
1618 token = match_token(p, tokens, args);
1619 if (handle_mount_opt(sb, p, token, args, journal_devnum,
1620 journal_ioprio, is_remount) < 0)
1621 return 0;
1913 } 1622 }
1914#ifdef CONFIG_QUOTA 1623#ifdef CONFIG_QUOTA
1915 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1624 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
@@ -1942,6 +1651,160 @@ set_qf_format:
1942 return 1; 1651 return 1;
1943} 1652}
1944 1653
1654static inline void ext4_show_quota_options(struct seq_file *seq,
1655 struct super_block *sb)
1656{
1657#if defined(CONFIG_QUOTA)
1658 struct ext4_sb_info *sbi = EXT4_SB(sb);
1659
1660 if (sbi->s_jquota_fmt) {
1661 char *fmtname = "";
1662
1663 switch (sbi->s_jquota_fmt) {
1664 case QFMT_VFS_OLD:
1665 fmtname = "vfsold";
1666 break;
1667 case QFMT_VFS_V0:
1668 fmtname = "vfsv0";
1669 break;
1670 case QFMT_VFS_V1:
1671 fmtname = "vfsv1";
1672 break;
1673 }
1674 seq_printf(seq, ",jqfmt=%s", fmtname);
1675 }
1676
1677 if (sbi->s_qf_names[USRQUOTA])
1678 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
1679
1680 if (sbi->s_qf_names[GRPQUOTA])
1681 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
1682
1683 if (test_opt(sb, USRQUOTA))
1684 seq_puts(seq, ",usrquota");
1685
1686 if (test_opt(sb, GRPQUOTA))
1687 seq_puts(seq, ",grpquota");
1688#endif
1689}
1690
1691static const char *token2str(int token)
1692{
1693 static const struct match_token *t;
1694
1695 for (t = tokens; t->token != Opt_err; t++)
1696 if (t->token == token && !strchr(t->pattern, '='))
1697 break;
1698 return t->pattern;
1699}
1700
1701/*
1702 * Show an option if
1703 * - it's set to a non-default value OR
1704 * - if the per-sb default is different from the global default
1705 */
1706static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
1707 int nodefs)
1708{
1709 struct ext4_sb_info *sbi = EXT4_SB(sb);
1710 struct ext4_super_block *es = sbi->s_es;
1711 int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt;
1712 const struct mount_opts *m;
1713 char sep = nodefs ? '\n' : ',';
1714
1715#define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
1716#define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
1717
1718 if (sbi->s_sb_block != 1)
1719 SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
1720
1721 for (m = ext4_mount_opts; m->token != Opt_err; m++) {
1722 int want_set = m->flags & MOPT_SET;
1723 if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
1724 (m->flags & MOPT_CLEAR_ERR))
1725 continue;
1726 if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
1727 continue; /* skip if same as the default */
1728 if ((want_set &&
1729 (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
1730 (!want_set && (sbi->s_mount_opt & m->mount_opt)))
1731 continue; /* select Opt_noFoo vs Opt_Foo */
1732 SEQ_OPTS_PRINT("%s", token2str(m->token));
1733 }
1734
1735 if (nodefs || sbi->s_resuid != EXT4_DEF_RESUID ||
1736 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
1737 SEQ_OPTS_PRINT("resuid=%u", sbi->s_resuid);
1738 if (nodefs || sbi->s_resgid != EXT4_DEF_RESGID ||
1739 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
1740 SEQ_OPTS_PRINT("resgid=%u", sbi->s_resgid);
1741 def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
1742 if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
1743 SEQ_OPTS_PUTS("errors=remount-ro");
1744 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
1745 SEQ_OPTS_PUTS("errors=continue");
1746 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
1747 SEQ_OPTS_PUTS("errors=panic");
1748 if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
1749 SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
1750 if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
1751 SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
1752 if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
1753 SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
1754 if (sb->s_flags & MS_I_VERSION)
1755 SEQ_OPTS_PUTS("i_version");
1756 if (nodefs || sbi->s_stripe)
1757 SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
1758 if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) {
1759 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
1760 SEQ_OPTS_PUTS("data=journal");
1761 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
1762 SEQ_OPTS_PUTS("data=ordered");
1763 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
1764 SEQ_OPTS_PUTS("data=writeback");
1765 }
1766 if (nodefs ||
1767 sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
1768 SEQ_OPTS_PRINT("inode_readahead_blks=%u",
1769 sbi->s_inode_readahead_blks);
1770
1771 if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
1772 (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
1773 SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
1774
1775 ext4_show_quota_options(seq, sb);
1776 return 0;
1777}
1778
1779static int ext4_show_options(struct seq_file *seq, struct dentry *root)
1780{
1781 return _ext4_show_options(seq, root->d_sb, 0);
1782}
1783
1784static int options_seq_show(struct seq_file *seq, void *offset)
1785{
1786 struct super_block *sb = seq->private;
1787 int rc;
1788
1789 seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw");
1790 rc = _ext4_show_options(seq, sb, 1);
1791 seq_puts(seq, "\n");
1792 return rc;
1793}
1794
1795static int options_open_fs(struct inode *inode, struct file *file)
1796{
1797 return single_open(file, options_seq_show, PDE(inode)->data);
1798}
1799
1800static const struct file_operations ext4_seq_options_fops = {
1801 .owner = THIS_MODULE,
1802 .open = options_open_fs,
1803 .read = seq_read,
1804 .llseek = seq_lseek,
1805 .release = single_release,
1806};
1807
1945static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, 1808static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1946 int read_only) 1809 int read_only)
1947{ 1810{
@@ -2503,18 +2366,6 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2503 EXT4_SB(sb)->s_sectors_written_start) >> 1))); 2366 EXT4_SB(sb)->s_sectors_written_start) >> 1)));
2504} 2367}
2505 2368
2506static ssize_t extent_cache_hits_show(struct ext4_attr *a,
2507 struct ext4_sb_info *sbi, char *buf)
2508{
2509 return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_hits);
2510}
2511
2512static ssize_t extent_cache_misses_show(struct ext4_attr *a,
2513 struct ext4_sb_info *sbi, char *buf)
2514{
2515 return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_misses);
2516}
2517
2518static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2369static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2519 struct ext4_sb_info *sbi, 2370 struct ext4_sb_info *sbi,
2520 const char *buf, size_t count) 2371 const char *buf, size_t count)
@@ -2572,8 +2423,6 @@ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
2572EXT4_RO_ATTR(delayed_allocation_blocks); 2423EXT4_RO_ATTR(delayed_allocation_blocks);
2573EXT4_RO_ATTR(session_write_kbytes); 2424EXT4_RO_ATTR(session_write_kbytes);
2574EXT4_RO_ATTR(lifetime_write_kbytes); 2425EXT4_RO_ATTR(lifetime_write_kbytes);
2575EXT4_RO_ATTR(extent_cache_hits);
2576EXT4_RO_ATTR(extent_cache_misses);
2577EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, 2426EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
2578 inode_readahead_blks_store, s_inode_readahead_blks); 2427 inode_readahead_blks_store, s_inode_readahead_blks);
2579EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); 2428EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
@@ -2589,8 +2438,6 @@ static struct attribute *ext4_attrs[] = {
2589 ATTR_LIST(delayed_allocation_blocks), 2438 ATTR_LIST(delayed_allocation_blocks),
2590 ATTR_LIST(session_write_kbytes), 2439 ATTR_LIST(session_write_kbytes),
2591 ATTR_LIST(lifetime_write_kbytes), 2440 ATTR_LIST(lifetime_write_kbytes),
2592 ATTR_LIST(extent_cache_hits),
2593 ATTR_LIST(extent_cache_misses),
2594 ATTR_LIST(inode_readahead_blks), 2441 ATTR_LIST(inode_readahead_blks),
2595 ATTR_LIST(inode_goal), 2442 ATTR_LIST(inode_goal),
2596 ATTR_LIST(mb_stats), 2443 ATTR_LIST(mb_stats),
@@ -2945,7 +2792,7 @@ static int ext4_run_lazyinit_thread(void)
2945 ext4_clear_request_list(); 2792 ext4_clear_request_list();
2946 kfree(ext4_li_info); 2793 kfree(ext4_li_info);
2947 ext4_li_info = NULL; 2794 ext4_li_info = NULL;
2948 printk(KERN_CRIT "EXT4: error %d creating inode table " 2795 printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
2949 "initialization thread\n", 2796 "initialization thread\n",
2950 err); 2797 err);
2951 return err; 2798 return err;
@@ -3183,11 +3030,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3183 set_opt(sb, INIT_INODE_TABLE); 3030 set_opt(sb, INIT_INODE_TABLE);
3184 if (def_mount_opts & EXT4_DEFM_DEBUG) 3031 if (def_mount_opts & EXT4_DEFM_DEBUG)
3185 set_opt(sb, DEBUG); 3032 set_opt(sb, DEBUG);
3186 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { 3033 if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
3187 ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups",
3188 "2.6.38");
3189 set_opt(sb, GRPID); 3034 set_opt(sb, GRPID);
3190 }
3191 if (def_mount_opts & EXT4_DEFM_UID16) 3035 if (def_mount_opts & EXT4_DEFM_UID16)
3192 set_opt(sb, NO_UID32); 3036 set_opt(sb, NO_UID32);
3193 /* xattr user namespace & acls are now defaulted on */ 3037 /* xattr user namespace & acls are now defaulted on */
@@ -3240,13 +3084,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3240 sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; 3084 sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
3241 3085
3242 if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, 3086 if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
3243 &journal_devnum, &journal_ioprio, NULL, 0)) { 3087 &journal_devnum, &journal_ioprio, 0)) {
3244 ext4_msg(sb, KERN_WARNING, 3088 ext4_msg(sb, KERN_WARNING,
3245 "failed to parse options in superblock: %s", 3089 "failed to parse options in superblock: %s",
3246 sbi->s_es->s_mount_opts); 3090 sbi->s_es->s_mount_opts);
3247 } 3091 }
3092 sbi->s_def_mount_opt = sbi->s_mount_opt;
3248 if (!parse_options((char *) data, sb, &journal_devnum, 3093 if (!parse_options((char *) data, sb, &journal_devnum,
3249 &journal_ioprio, NULL, 0)) 3094 &journal_ioprio, 0))
3250 goto failed_mount; 3095 goto failed_mount;
3251 3096
3252 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 3097 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
@@ -3416,7 +3261,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3416#else 3261#else
3417 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); 3262 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
3418#endif 3263#endif
3419 sb->s_dirt = 1;
3420 } 3264 }
3421 3265
3422 /* Handle clustersize */ 3266 /* Handle clustersize */
@@ -3540,6 +3384,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3540 if (ext4_proc_root) 3384 if (ext4_proc_root)
3541 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 3385 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
3542 3386
3387 if (sbi->s_proc)
3388 proc_create_data("options", S_IRUGO, sbi->s_proc,
3389 &ext4_seq_options_fops, sb);
3390
3543 bgl_lock_init(sbi->s_blockgroup_lock); 3391 bgl_lock_init(sbi->s_blockgroup_lock);
3544 3392
3545 for (i = 0; i < db_count; i++) { 3393 for (i = 0; i < db_count; i++) {
@@ -3694,6 +3542,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3694 } 3542 }
3695 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 3543 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
3696 3544
3545 sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
3546
3697 /* 3547 /*
3698 * The journal may have updated the bg summary counts, so we 3548 * The journal may have updated the bg summary counts, so we
3699 * need to update the global counters. 3549 * need to update the global counters.
@@ -3735,9 +3585,8 @@ no_journal:
3735 iput(root); 3585 iput(root);
3736 goto failed_mount4; 3586 goto failed_mount4;
3737 } 3587 }
3738 sb->s_root = d_alloc_root(root); 3588 sb->s_root = d_make_root(root);
3739 if (!sb->s_root) { 3589 if (!sb->s_root) {
3740 iput(root);
3741 ext4_msg(sb, KERN_ERR, "get root dentry failed"); 3590 ext4_msg(sb, KERN_ERR, "get root dentry failed");
3742 ret = -ENOMEM; 3591 ret = -ENOMEM;
3743 goto failed_mount4; 3592 goto failed_mount4;
@@ -3862,6 +3711,7 @@ failed_mount2:
3862 ext4_kvfree(sbi->s_group_desc); 3711 ext4_kvfree(sbi->s_group_desc);
3863failed_mount: 3712failed_mount:
3864 if (sbi->s_proc) { 3713 if (sbi->s_proc) {
3714 remove_proc_entry("options", sbi->s_proc);
3865 remove_proc_entry(sb->s_id, ext4_proc_root); 3715 remove_proc_entry(sb->s_id, ext4_proc_root);
3866 } 3716 }
3867#ifdef CONFIG_QUOTA 3717#ifdef CONFIG_QUOTA
@@ -4091,15 +3941,6 @@ static int ext4_load_journal(struct super_block *sb,
4091 if (!(journal->j_flags & JBD2_BARRIER)) 3941 if (!(journal->j_flags & JBD2_BARRIER))
4092 ext4_msg(sb, KERN_INFO, "barriers disabled"); 3942 ext4_msg(sb, KERN_INFO, "barriers disabled");
4093 3943
4094 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
4095 err = jbd2_journal_update_format(journal);
4096 if (err) {
4097 ext4_msg(sb, KERN_ERR, "error updating journal");
4098 jbd2_journal_destroy(journal);
4099 return err;
4100 }
4101 }
4102
4103 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) 3944 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
4104 err = jbd2_journal_wipe(journal, !really_read_only); 3945 err = jbd2_journal_wipe(journal, !really_read_only);
4105 if (!err) { 3946 if (!err) {
@@ -4386,7 +4227,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4386{ 4227{
4387 struct ext4_super_block *es; 4228 struct ext4_super_block *es;
4388 struct ext4_sb_info *sbi = EXT4_SB(sb); 4229 struct ext4_sb_info *sbi = EXT4_SB(sb);
4389 ext4_fsblk_t n_blocks_count = 0;
4390 unsigned long old_sb_flags; 4230 unsigned long old_sb_flags;
4391 struct ext4_mount_options old_opts; 4231 struct ext4_mount_options old_opts;
4392 int enable_quota = 0; 4232 int enable_quota = 0;
@@ -4419,8 +4259,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4419 /* 4259 /*
4420 * Allow the "check" option to be passed as a remount option. 4260 * Allow the "check" option to be passed as a remount option.
4421 */ 4261 */
4422 if (!parse_options(data, sb, NULL, &journal_ioprio, 4262 if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
4423 &n_blocks_count, 1)) {
4424 err = -EINVAL; 4263 err = -EINVAL;
4425 goto restore_opts; 4264 goto restore_opts;
4426 } 4265 }
@@ -4438,8 +4277,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4438 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 4277 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
4439 } 4278 }
4440 4279
4441 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 4280 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
4442 n_blocks_count > ext4_blocks_count(es)) {
4443 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { 4281 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
4444 err = -EROFS; 4282 err = -EROFS;
4445 goto restore_opts; 4283 goto restore_opts;
@@ -4514,8 +4352,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4514 if (sbi->s_journal) 4352 if (sbi->s_journal)
4515 ext4_clear_journal_err(sb, es); 4353 ext4_clear_journal_err(sb, es);
4516 sbi->s_mount_state = le16_to_cpu(es->s_state); 4354 sbi->s_mount_state = le16_to_cpu(es->s_state);
4517 if ((err = ext4_group_extend(sb, es, n_blocks_count)))
4518 goto restore_opts;
4519 if (!ext4_setup_super(sb, es, 0)) 4355 if (!ext4_setup_super(sb, es, 0))
4520 sb->s_flags &= ~MS_RDONLY; 4356 sb->s_flags &= ~MS_RDONLY;
4521 if (EXT4_HAS_INCOMPAT_FEATURE(sb, 4357 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
@@ -5056,6 +4892,9 @@ static int __init ext4_init_fs(void)
5056{ 4892{
5057 int i, err; 4893 int i, err;
5058 4894
4895 ext4_li_info = NULL;
4896 mutex_init(&ext4_li_mtx);
4897
5059 ext4_check_flag_values(); 4898 ext4_check_flag_values();
5060 4899
5061 for (i = 0; i < EXT4_WQ_HASH_SZ; i++) { 4900 for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
@@ -5094,8 +4933,6 @@ static int __init ext4_init_fs(void)
5094 if (err) 4933 if (err)
5095 goto out; 4934 goto out;
5096 4935
5097 ext4_li_info = NULL;
5098 mutex_init(&ext4_li_mtx);
5099 return 0; 4936 return 0;
5100out: 4937out:
5101 unregister_as_ext2(); 4938 unregister_as_ext2();
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 93a00d89a22..e88748e55c0 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -82,8 +82,8 @@
82 printk("\n"); \ 82 printk("\n"); \
83 } while (0) 83 } while (0)
84#else 84#else
85# define ea_idebug(f...) 85# define ea_idebug(inode, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
86# define ea_bdebug(f...) 86# define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
87#endif 87#endif
88 88
89static void ext4_xattr_cache_insert(struct buffer_head *); 89static void ext4_xattr_cache_insert(struct buffer_head *);
@@ -158,13 +158,10 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end)
158static inline int 158static inline int
159ext4_xattr_check_block(struct buffer_head *bh) 159ext4_xattr_check_block(struct buffer_head *bh)
160{ 160{
161 int error;
162
163 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || 161 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
164 BHDR(bh)->h_blocks != cpu_to_le32(1)) 162 BHDR(bh)->h_blocks != cpu_to_le32(1))
165 return -EIO; 163 return -EIO;
166 error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); 164 return ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
167 return error;
168} 165}
169 166
170static inline int 167static inline int
@@ -220,7 +217,8 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
220 error = -ENODATA; 217 error = -ENODATA;
221 if (!EXT4_I(inode)->i_file_acl) 218 if (!EXT4_I(inode)->i_file_acl)
222 goto cleanup; 219 goto cleanup;
223 ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl); 220 ea_idebug(inode, "reading block %llu",
221 (unsigned long long)EXT4_I(inode)->i_file_acl);
224 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); 222 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
225 if (!bh) 223 if (!bh)
226 goto cleanup; 224 goto cleanup;
@@ -363,7 +361,8 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
363 error = 0; 361 error = 0;
364 if (!EXT4_I(inode)->i_file_acl) 362 if (!EXT4_I(inode)->i_file_acl)
365 goto cleanup; 363 goto cleanup;
366 ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl); 364 ea_idebug(inode, "reading block %llu",
365 (unsigned long long)EXT4_I(inode)->i_file_acl);
367 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); 366 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
368 error = -EIO; 367 error = -EIO;
369 if (!bh) 368 if (!bh)
@@ -487,18 +486,19 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
487 ext4_free_blocks(handle, inode, bh, 0, 1, 486 ext4_free_blocks(handle, inode, bh, 0, 1,
488 EXT4_FREE_BLOCKS_METADATA | 487 EXT4_FREE_BLOCKS_METADATA |
489 EXT4_FREE_BLOCKS_FORGET); 488 EXT4_FREE_BLOCKS_FORGET);
489 unlock_buffer(bh);
490 } else { 490 } else {
491 le32_add_cpu(&BHDR(bh)->h_refcount, -1); 491 le32_add_cpu(&BHDR(bh)->h_refcount, -1);
492 if (ce)
493 mb_cache_entry_release(ce);
494 unlock_buffer(bh);
492 error = ext4_handle_dirty_metadata(handle, inode, bh); 495 error = ext4_handle_dirty_metadata(handle, inode, bh);
493 if (IS_SYNC(inode)) 496 if (IS_SYNC(inode))
494 ext4_handle_sync(handle); 497 ext4_handle_sync(handle);
495 dquot_free_block(inode, 1); 498 dquot_free_block(inode, 1);
496 ea_bdebug(bh, "refcount now=%d; releasing", 499 ea_bdebug(bh, "refcount now=%d; releasing",
497 le32_to_cpu(BHDR(bh)->h_refcount)); 500 le32_to_cpu(BHDR(bh)->h_refcount));
498 if (ce)
499 mb_cache_entry_release(ce);
500 } 501 }
501 unlock_buffer(bh);
502out: 502out:
503 ext4_std_error(inode->i_sb, error); 503 ext4_std_error(inode->i_sb, error);
504 return; 504 return;
@@ -834,7 +834,8 @@ inserted:
834 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 834 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
835 BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); 835 BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
836 836
837 ea_idebug(inode, "creating block %d", block); 837 ea_idebug(inode, "creating block %llu",
838 (unsigned long long)block);
838 839
839 new_bh = sb_getblk(sb, block); 840 new_bh = sb_getblk(sb, block);
840 if (!new_bh) { 841 if (!new_bh) {
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 3ab841054d5..21687e31acc 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1496,11 +1496,13 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
1496 root_inode->i_ino = MSDOS_ROOT_INO; 1496 root_inode->i_ino = MSDOS_ROOT_INO;
1497 root_inode->i_version = 1; 1497 root_inode->i_version = 1;
1498 error = fat_read_root(root_inode); 1498 error = fat_read_root(root_inode);
1499 if (error < 0) 1499 if (error < 0) {
1500 iput(root_inode);
1500 goto out_fail; 1501 goto out_fail;
1502 }
1501 error = -ENOMEM; 1503 error = -ENOMEM;
1502 insert_inode_hash(root_inode); 1504 insert_inode_hash(root_inode);
1503 sb->s_root = d_alloc_root(root_inode); 1505 sb->s_root = d_make_root(root_inode);
1504 if (!sb->s_root) { 1506 if (!sb->s_root) {
1505 fat_msg(sb, KERN_ERR, "get root inode failed"); 1507 fat_msg(sb, KERN_ERR, "get root inode failed");
1506 goto out_fail; 1508 goto out_fail;
@@ -1516,8 +1518,6 @@ out_invalid:
1516out_fail: 1518out_fail:
1517 if (fat_inode) 1519 if (fat_inode)
1518 iput(fat_inode); 1520 iput(fat_inode);
1519 if (root_inode)
1520 iput(root_inode);
1521 unload_nls(sbi->nls_io); 1521 unload_nls(sbi->nls_io);
1522 unload_nls(sbi->nls_disk); 1522 unload_nls(sbi->nls_disk);
1523 if (sbi->options.iocharset != fat_default_iocharset) 1523 if (sbi->options.iocharset != fat_default_iocharset)
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index a81eb2367d3..98ae804f527 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -521,57 +521,46 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
521 521
522 op = &outname[*outlen * sizeof(wchar_t)]; 522 op = &outname[*outlen * sizeof(wchar_t)];
523 } else { 523 } else {
524 if (nls) { 524 for (i = 0, ip = name, op = outname, *outlen = 0;
525 for (i = 0, ip = name, op = outname, *outlen = 0; 525 i < len && *outlen < FAT_LFN_LEN;
526 i < len && *outlen <= FAT_LFN_LEN; 526 *outlen += 1) {
527 *outlen += 1) 527 if (escape && (*ip == ':')) {
528 { 528 if (i > len - 5)
529 if (escape && (*ip == ':')) { 529 return -EINVAL;
530 if (i > len - 5) 530 ec = 0;
531 return -EINVAL; 531 for (k = 1; k < 5; k++) {
532 ec = 0; 532 nc = ip[k];
533 for (k = 1; k < 5; k++) { 533 ec <<= 4;
534 nc = ip[k]; 534 if (nc >= '0' && nc <= '9') {
535 ec <<= 4; 535 ec |= nc - '0';
536 if (nc >= '0' && nc <= '9') { 536 continue;
537 ec |= nc - '0';
538 continue;
539 }
540 if (nc >= 'a' && nc <= 'f') {
541 ec |= nc - ('a' - 10);
542 continue;
543 }
544 if (nc >= 'A' && nc <= 'F') {
545 ec |= nc - ('A' - 10);
546 continue;
547 }
548 return -EINVAL;
549 } 537 }
550 *op++ = ec & 0xFF; 538 if (nc >= 'a' && nc <= 'f') {
551 *op++ = ec >> 8; 539 ec |= nc - ('a' - 10);
552 ip += 5; 540 continue;
553 i += 5; 541 }
554 } else { 542 if (nc >= 'A' && nc <= 'F') {
555 if ((charlen = nls->char2uni(ip, len - i, (wchar_t *)op)) < 0) 543 ec |= nc - ('A' - 10);
556 return -EINVAL; 544 continue;
557 ip += charlen; 545 }
558 i += charlen; 546 return -EINVAL;
559 op += 2;
560 } 547 }
548 *op++ = ec & 0xFF;
549 *op++ = ec >> 8;
550 ip += 5;
551 i += 5;
552 } else {
553 charlen = nls->char2uni(ip, len - i,
554 (wchar_t *)op);
555 if (charlen < 0)
556 return -EINVAL;
557 ip += charlen;
558 i += charlen;
559 op += 2;
561 } 560 }
562 if (i < len)
563 return -ENAMETOOLONG;
564 } else {
565 for (i = 0, ip = name, op = outname, *outlen = 0;
566 i < len && *outlen <= FAT_LFN_LEN;
567 i++, *outlen += 1)
568 {
569 *op++ = *ip++;
570 *op++ = 0;
571 }
572 if (i < len)
573 return -ENAMETOOLONG;
574 } 561 }
562 if (i < len)
563 return -ENAMETOOLONG;
575 } 564 }
576 565
577 *longlen = *outlen; 566 *longlen = *outlen;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 22764c7c838..75e7c1f3a08 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -32,20 +32,20 @@ void set_close_on_exec(unsigned int fd, int flag)
32 spin_lock(&files->file_lock); 32 spin_lock(&files->file_lock);
33 fdt = files_fdtable(files); 33 fdt = files_fdtable(files);
34 if (flag) 34 if (flag)
35 FD_SET(fd, fdt->close_on_exec); 35 __set_close_on_exec(fd, fdt);
36 else 36 else
37 FD_CLR(fd, fdt->close_on_exec); 37 __clear_close_on_exec(fd, fdt);
38 spin_unlock(&files->file_lock); 38 spin_unlock(&files->file_lock);
39} 39}
40 40
41static int get_close_on_exec(unsigned int fd) 41static bool get_close_on_exec(unsigned int fd)
42{ 42{
43 struct files_struct *files = current->files; 43 struct files_struct *files = current->files;
44 struct fdtable *fdt; 44 struct fdtable *fdt;
45 int res; 45 bool res;
46 rcu_read_lock(); 46 rcu_read_lock();
47 fdt = files_fdtable(files); 47 fdt = files_fdtable(files);
48 res = FD_ISSET(fd, fdt->close_on_exec); 48 res = close_on_exec(fd, fdt);
49 rcu_read_unlock(); 49 rcu_read_unlock();
50 return res; 50 return res;
51} 51}
@@ -90,15 +90,15 @@ SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
90 err = -EBUSY; 90 err = -EBUSY;
91 fdt = files_fdtable(files); 91 fdt = files_fdtable(files);
92 tofree = fdt->fd[newfd]; 92 tofree = fdt->fd[newfd];
93 if (!tofree && FD_ISSET(newfd, fdt->open_fds)) 93 if (!tofree && fd_is_open(newfd, fdt))
94 goto out_unlock; 94 goto out_unlock;
95 get_file(file); 95 get_file(file);
96 rcu_assign_pointer(fdt->fd[newfd], file); 96 rcu_assign_pointer(fdt->fd[newfd], file);
97 FD_SET(newfd, fdt->open_fds); 97 __set_open_fd(newfd, fdt);
98 if (flags & O_CLOEXEC) 98 if (flags & O_CLOEXEC)
99 FD_SET(newfd, fdt->close_on_exec); 99 __set_close_on_exec(newfd, fdt);
100 else 100 else
101 FD_CLR(newfd, fdt->close_on_exec); 101 __clear_close_on_exec(newfd, fdt);
102 spin_unlock(&files->file_lock); 102 spin_unlock(&files->file_lock);
103 103
104 if (tofree) 104 if (tofree)
diff --git a/fs/file.c b/fs/file.c
index 4c6992d8f3b..ba3f6053025 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -6,7 +6,7 @@
6 * Manage the dynamic fd arrays in the process files_struct. 6 * Manage the dynamic fd arrays in the process files_struct.
7 */ 7 */
8 8
9#include <linux/module.h> 9#include <linux/export.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/mm.h> 11#include <linux/mm.h>
12#include <linux/mmzone.h> 12#include <linux/mmzone.h>
@@ -40,7 +40,7 @@ int sysctl_nr_open_max = 1024 * 1024; /* raised later */
40 */ 40 */
41static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); 41static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
42 42
43static void *alloc_fdmem(unsigned int size) 43static void *alloc_fdmem(size_t size)
44{ 44{
45 /* 45 /*
46 * Very large allocations can stress page reclaim, so fall back to 46 * Very large allocations can stress page reclaim, so fall back to
@@ -142,7 +142,7 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
142static struct fdtable * alloc_fdtable(unsigned int nr) 142static struct fdtable * alloc_fdtable(unsigned int nr)
143{ 143{
144 struct fdtable *fdt; 144 struct fdtable *fdt;
145 char *data; 145 void *data;
146 146
147 /* 147 /*
148 * Figure out how many fds we actually want to support in this fdtable. 148 * Figure out how many fds we actually want to support in this fdtable.
@@ -172,14 +172,15 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
172 data = alloc_fdmem(nr * sizeof(struct file *)); 172 data = alloc_fdmem(nr * sizeof(struct file *));
173 if (!data) 173 if (!data)
174 goto out_fdt; 174 goto out_fdt;
175 fdt->fd = (struct file **)data; 175 fdt->fd = data;
176 data = alloc_fdmem(max_t(unsigned int, 176
177 data = alloc_fdmem(max_t(size_t,
177 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES)); 178 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES));
178 if (!data) 179 if (!data)
179 goto out_arr; 180 goto out_arr;
180 fdt->open_fds = (fd_set *)data; 181 fdt->open_fds = data;
181 data += nr / BITS_PER_BYTE; 182 data += nr / BITS_PER_BYTE;
182 fdt->close_on_exec = (fd_set *)data; 183 fdt->close_on_exec = data;
183 fdt->next = NULL; 184 fdt->next = NULL;
184 185
185 return fdt; 186 return fdt;
@@ -275,11 +276,11 @@ static int count_open_files(struct fdtable *fdt)
275 int i; 276 int i;
276 277
277 /* Find the last open fd */ 278 /* Find the last open fd */
278 for (i = size/(8*sizeof(long)); i > 0; ) { 279 for (i = size / BITS_PER_LONG; i > 0; ) {
279 if (fdt->open_fds->fds_bits[--i]) 280 if (fdt->open_fds[--i])
280 break; 281 break;
281 } 282 }
282 i = (i+1) * 8 * sizeof(long); 283 i = (i + 1) * BITS_PER_LONG;
283 return i; 284 return i;
284} 285}
285 286
@@ -306,8 +307,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
306 newf->next_fd = 0; 307 newf->next_fd = 0;
307 new_fdt = &newf->fdtab; 308 new_fdt = &newf->fdtab;
308 new_fdt->max_fds = NR_OPEN_DEFAULT; 309 new_fdt->max_fds = NR_OPEN_DEFAULT;
309 new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; 310 new_fdt->close_on_exec = newf->close_on_exec_init;
310 new_fdt->open_fds = (fd_set *)&newf->open_fds_init; 311 new_fdt->open_fds = newf->open_fds_init;
311 new_fdt->fd = &newf->fd_array[0]; 312 new_fdt->fd = &newf->fd_array[0];
312 new_fdt->next = NULL; 313 new_fdt->next = NULL;
313 314
@@ -350,10 +351,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
350 old_fds = old_fdt->fd; 351 old_fds = old_fdt->fd;
351 new_fds = new_fdt->fd; 352 new_fds = new_fdt->fd;
352 353
353 memcpy(new_fdt->open_fds->fds_bits, 354 memcpy(new_fdt->open_fds, old_fdt->open_fds, open_files / 8);
354 old_fdt->open_fds->fds_bits, open_files/8); 355 memcpy(new_fdt->close_on_exec, old_fdt->close_on_exec, open_files / 8);
355 memcpy(new_fdt->close_on_exec->fds_bits,
356 old_fdt->close_on_exec->fds_bits, open_files/8);
357 356
358 for (i = open_files; i != 0; i--) { 357 for (i = open_files; i != 0; i--) {
359 struct file *f = *old_fds++; 358 struct file *f = *old_fds++;
@@ -366,7 +365,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
366 * is partway through open(). So make sure that this 365 * is partway through open(). So make sure that this
367 * fd is available to the new process. 366 * fd is available to the new process.
368 */ 367 */
369 FD_CLR(open_files - i, new_fdt->open_fds); 368 __clear_open_fd(open_files - i, new_fdt);
370 } 369 }
371 rcu_assign_pointer(*new_fds++, f); 370 rcu_assign_pointer(*new_fds++, f);
372 } 371 }
@@ -379,11 +378,11 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
379 memset(new_fds, 0, size); 378 memset(new_fds, 0, size);
380 379
381 if (new_fdt->max_fds > open_files) { 380 if (new_fdt->max_fds > open_files) {
382 int left = (new_fdt->max_fds-open_files)/8; 381 int left = (new_fdt->max_fds - open_files) / 8;
383 int start = open_files / (8 * sizeof(unsigned long)); 382 int start = open_files / BITS_PER_LONG;
384 383
385 memset(&new_fdt->open_fds->fds_bits[start], 0, left); 384 memset(&new_fdt->open_fds[start], 0, left);
386 memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); 385 memset(&new_fdt->close_on_exec[start], 0, left);
387 } 386 }
388 387
389 rcu_assign_pointer(newf->fdt, new_fdt); 388 rcu_assign_pointer(newf->fdt, new_fdt);
@@ -419,8 +418,8 @@ struct files_struct init_files = {
419 .fdtab = { 418 .fdtab = {
420 .max_fds = NR_OPEN_DEFAULT, 419 .max_fds = NR_OPEN_DEFAULT,
421 .fd = &init_files.fd_array[0], 420 .fd = &init_files.fd_array[0],
422 .close_on_exec = (fd_set *)&init_files.close_on_exec_init, 421 .close_on_exec = init_files.close_on_exec_init,
423 .open_fds = (fd_set *)&init_files.open_fds_init, 422 .open_fds = init_files.open_fds_init,
424 }, 423 },
425 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), 424 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
426}; 425};
@@ -443,8 +442,7 @@ repeat:
443 fd = files->next_fd; 442 fd = files->next_fd;
444 443
445 if (fd < fdt->max_fds) 444 if (fd < fdt->max_fds)
446 fd = find_next_zero_bit(fdt->open_fds->fds_bits, 445 fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd);
447 fdt->max_fds, fd);
448 446
449 error = expand_files(files, fd); 447 error = expand_files(files, fd);
450 if (error < 0) 448 if (error < 0)
@@ -460,11 +458,11 @@ repeat:
460 if (start <= files->next_fd) 458 if (start <= files->next_fd)
461 files->next_fd = fd + 1; 459 files->next_fd = fd + 1;
462 460
463 FD_SET(fd, fdt->open_fds); 461 __set_open_fd(fd, fdt);
464 if (flags & O_CLOEXEC) 462 if (flags & O_CLOEXEC)
465 FD_SET(fd, fdt->close_on_exec); 463 __set_close_on_exec(fd, fdt);
466 else 464 else
467 FD_CLR(fd, fdt->close_on_exec); 465 __clear_close_on_exec(fd, fdt);
468 error = fd; 466 error = fd;
469#if 1 467#if 1
470 /* Sanity check */ 468 /* Sanity check */
diff --git a/fs/file_table.c b/fs/file_table.c
index 20002e39754..70f2a0fd6ae 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -204,7 +204,7 @@ EXPORT_SYMBOL(alloc_file);
204 * to write to @file, along with access to write through 204 * to write to @file, along with access to write through
205 * its vfsmount. 205 * its vfsmount.
206 */ 206 */
207void drop_file_write_access(struct file *file) 207static void drop_file_write_access(struct file *file)
208{ 208{
209 struct vfsmount *mnt = file->f_path.mnt; 209 struct vfsmount *mnt = file->f_path.mnt;
210 struct dentry *dentry = file->f_path.dentry; 210 struct dentry *dentry = file->f_path.dentry;
@@ -219,7 +219,6 @@ void drop_file_write_access(struct file *file)
219 mnt_drop_write(mnt); 219 mnt_drop_write(mnt);
220 file_release_write(file); 220 file_release_write(file);
221} 221}
222EXPORT_SYMBOL_GPL(drop_file_write_access);
223 222
224/* the real guts of fput() - releasing the last reference to file 223/* the real guts of fput() - releasing the last reference to file
225 */ 224 */
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 9d1c9955838..d4fabd26084 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -224,9 +224,8 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent)
224 ret = PTR_ERR(root); 224 ret = PTR_ERR(root);
225 goto out; 225 goto out;
226 } 226 }
227 sbp->s_root = d_alloc_root(root); 227 sbp->s_root = d_make_root(root);
228 if (!sbp->s_root) { 228 if (!sbp->s_root) {
229 iput(root);
230 printk(KERN_WARNING "vxfs: unable to get root dentry.\n"); 229 printk(KERN_WARNING "vxfs: unable to get root dentry.\n");
231 goto out_free_ilist; 230 goto out_free_ilist;
232 } 231 }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5b4a9362d5a..539f36cf3e4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -14,7 +14,7 @@
14 */ 14 */
15 15
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/module.h> 17#include <linux/export.h>
18#include <linux/spinlock.h> 18#include <linux/spinlock.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
@@ -256,7 +256,8 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
256} 256}
257 257
258/* 258/*
259 * Move expired dirty inodes from @delaying_queue to @dispatch_queue. 259 * Move expired (dirtied after work->older_than_this) dirty inodes from
260 * @delaying_queue to @dispatch_queue.
260 */ 261 */
261static int move_expired_inodes(struct list_head *delaying_queue, 262static int move_expired_inodes(struct list_head *delaying_queue,
262 struct list_head *dispatch_queue, 263 struct list_head *dispatch_queue,
@@ -1148,23 +1149,6 @@ out_unlock_inode:
1148} 1149}
1149EXPORT_SYMBOL(__mark_inode_dirty); 1150EXPORT_SYMBOL(__mark_inode_dirty);
1150 1151
1151/*
1152 * Write out a superblock's list of dirty inodes. A wait will be performed
1153 * upon no inodes, all inodes or the final one, depending upon sync_mode.
1154 *
1155 * If older_than_this is non-NULL, then only write out inodes which
1156 * had their first dirtying at a time earlier than *older_than_this.
1157 *
1158 * If `bdi' is non-zero then we're being asked to writeback a specific queue.
1159 * This function assumes that the blockdev superblock's inodes are backed by
1160 * a variety of queues, so all inodes are searched. For other superblocks,
1161 * assume that all inodes are backed by the same queue.
1162 *
1163 * The inodes to be written are parked on bdi->b_io. They are moved back onto
1164 * bdi->b_dirty as they are selected for writing. This way, none can be missed
1165 * on the writer throttling path, and we get decent balancing between many
1166 * throttled threads: we don't want them all piling up on inode_sync_wait.
1167 */
1168static void wait_sb_inodes(struct super_block *sb) 1152static void wait_sb_inodes(struct super_block *sb)
1169{ 1153{
1170 struct inode *inode, *old_inode = NULL; 1154 struct inode *inode, *old_inode = NULL;
@@ -1284,7 +1268,7 @@ int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason)
1284EXPORT_SYMBOL(writeback_inodes_sb_if_idle); 1268EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1285 1269
1286/** 1270/**
1287 * writeback_inodes_sb_if_idle - start writeback if none underway 1271 * writeback_inodes_sb_nr_if_idle - start writeback if none underway
1288 * @sb: the superblock 1272 * @sb: the superblock
1289 * @nr: the number of pages to write 1273 * @nr: the number of pages to write
1290 * @reason: reason why some writeback work was initiated 1274 * @reason: reason why some writeback work was initiated
@@ -1364,8 +1348,6 @@ int write_inode_now(struct inode *inode, int sync)
1364 ret = writeback_single_inode(inode, wb, &wbc); 1348 ret = writeback_single_inode(inode, wb, &wbc);
1365 spin_unlock(&inode->i_lock); 1349 spin_unlock(&inode->i_lock);
1366 spin_unlock(&wb->list_lock); 1350 spin_unlock(&wb->list_lock);
1367 if (sync)
1368 inode_sync_wait(inode);
1369 return ret; 1351 return ret;
1370} 1352}
1371EXPORT_SYMBOL(write_inode_now); 1353EXPORT_SYMBOL(write_inode_now);
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 78b519c1353..e159e682ad4 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -1,4 +1,4 @@
1#include <linux/module.h> 1#include <linux/export.h>
2#include <linux/sched.h> 2#include <linux/sched.h>
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/path.h> 4#include <linux/path.h>
@@ -26,11 +26,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
26{ 26{
27 struct path old_root; 27 struct path old_root;
28 28
29 path_get_longterm(path);
29 spin_lock(&fs->lock); 30 spin_lock(&fs->lock);
30 write_seqcount_begin(&fs->seq); 31 write_seqcount_begin(&fs->seq);
31 old_root = fs->root; 32 old_root = fs->root;
32 fs->root = *path; 33 fs->root = *path;
33 path_get_longterm(path);
34 write_seqcount_end(&fs->seq); 34 write_seqcount_end(&fs->seq);
35 spin_unlock(&fs->lock); 35 spin_unlock(&fs->lock);
36 if (old_root.dentry) 36 if (old_root.dentry)
@@ -45,11 +45,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
45{ 45{
46 struct path old_pwd; 46 struct path old_pwd;
47 47
48 path_get_longterm(path);
48 spin_lock(&fs->lock); 49 spin_lock(&fs->lock);
49 write_seqcount_begin(&fs->seq); 50 write_seqcount_begin(&fs->seq);
50 old_pwd = fs->pwd; 51 old_pwd = fs->pwd;
51 fs->pwd = *path; 52 fs->pwd = *path;
52 path_get_longterm(path);
53 write_seqcount_end(&fs->seq); 53 write_seqcount_end(&fs->seq);
54 spin_unlock(&fs->lock); 54 spin_unlock(&fs->lock);
55 55
@@ -57,6 +57,14 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
57 path_put_longterm(&old_pwd); 57 path_put_longterm(&old_pwd);
58} 58}
59 59
60static inline int replace_path(struct path *p, const struct path *old, const struct path *new)
61{
62 if (likely(p->dentry != old->dentry || p->mnt != old->mnt))
63 return 0;
64 *p = *new;
65 return 1;
66}
67
60void chroot_fs_refs(struct path *old_root, struct path *new_root) 68void chroot_fs_refs(struct path *old_root, struct path *new_root)
61{ 69{
62 struct task_struct *g, *p; 70 struct task_struct *g, *p;
@@ -68,21 +76,16 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
68 task_lock(p); 76 task_lock(p);
69 fs = p->fs; 77 fs = p->fs;
70 if (fs) { 78 if (fs) {
79 int hits = 0;
71 spin_lock(&fs->lock); 80 spin_lock(&fs->lock);
72 write_seqcount_begin(&fs->seq); 81 write_seqcount_begin(&fs->seq);
73 if (fs->root.dentry == old_root->dentry 82 hits += replace_path(&fs->root, old_root, new_root);
74 && fs->root.mnt == old_root->mnt) { 83 hits += replace_path(&fs->pwd, old_root, new_root);
75 path_get_longterm(new_root); 84 write_seqcount_end(&fs->seq);
76 fs->root = *new_root; 85 while (hits--) {
77 count++; 86 count++;
78 }
79 if (fs->pwd.dentry == old_root->dentry
80 && fs->pwd.mnt == old_root->mnt) {
81 path_get_longterm(new_root); 87 path_get_longterm(new_root);
82 fs->pwd = *new_root;
83 count++;
84 } 88 }
85 write_seqcount_end(&fs->seq);
86 spin_unlock(&fs->lock); 89 spin_unlock(&fs->lock);
87 } 90 }
88 task_unlock(p); 91 task_unlock(p);
@@ -107,10 +110,8 @@ void exit_fs(struct task_struct *tsk)
107 int kill; 110 int kill;
108 task_lock(tsk); 111 task_lock(tsk);
109 spin_lock(&fs->lock); 112 spin_lock(&fs->lock);
110 write_seqcount_begin(&fs->seq);
111 tsk->fs = NULL; 113 tsk->fs = NULL;
112 kill = !--fs->users; 114 kill = !--fs->users;
113 write_seqcount_end(&fs->seq);
114 spin_unlock(&fs->lock); 115 spin_unlock(&fs->lock);
115 task_unlock(tsk); 116 task_unlock(tsk);
116 if (kill) 117 if (kill)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 5f3368ab0fa..7df2b5e8fbe 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -838,10 +838,10 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
838 } 838 }
839 } 839 }
840 if (page) { 840 if (page) {
841 void *mapaddr = kmap_atomic(page, KM_USER0); 841 void *mapaddr = kmap_atomic(page);
842 void *buf = mapaddr + offset; 842 void *buf = mapaddr + offset;
843 offset += fuse_copy_do(cs, &buf, &count); 843 offset += fuse_copy_do(cs, &buf, &count);
844 kunmap_atomic(mapaddr, KM_USER0); 844 kunmap_atomic(mapaddr);
845 } else 845 } else
846 offset += fuse_copy_do(cs, NULL, &count); 846 offset += fuse_copy_do(cs, NULL, &count);
847 } 847 }
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 206632887bb..df5ac048dc7 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -387,9 +387,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
387 if (fc->no_create) 387 if (fc->no_create)
388 return -ENOSYS; 388 return -ENOSYS;
389 389
390 if (flags & O_DIRECT)
391 return -EINVAL;
392
393 forget = fuse_alloc_forget(); 390 forget = fuse_alloc_forget();
394 if (!forget) 391 if (!forget)
395 return -ENOMEM; 392 return -ENOMEM;
@@ -644,13 +641,12 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
644 fuse_put_request(fc, req); 641 fuse_put_request(fc, req);
645 if (!err) { 642 if (!err) {
646 struct inode *inode = entry->d_inode; 643 struct inode *inode = entry->d_inode;
644 struct fuse_inode *fi = get_fuse_inode(inode);
647 645
648 /* 646 spin_lock(&fc->lock);
649 * Set nlink to zero so the inode can be cleared, if the inode 647 fi->attr_version = ++fc->attr_version;
650 * does have more links this will be discovered at the next 648 drop_nlink(inode);
651 * lookup/getattr. 649 spin_unlock(&fc->lock);
652 */
653 clear_nlink(inode);
654 fuse_invalidate_attr(inode); 650 fuse_invalidate_attr(inode);
655 fuse_invalidate_attr(dir); 651 fuse_invalidate_attr(dir);
656 fuse_invalidate_entry_cache(entry); 652 fuse_invalidate_entry_cache(entry);
@@ -762,8 +758,17 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
762 will reflect changes in the backing inode (link count, 758 will reflect changes in the backing inode (link count,
763 etc.) 759 etc.)
764 */ 760 */
765 if (!err || err == -EINTR) 761 if (!err) {
762 struct fuse_inode *fi = get_fuse_inode(inode);
763
764 spin_lock(&fc->lock);
765 fi->attr_version = ++fc->attr_version;
766 inc_nlink(inode);
767 spin_unlock(&fc->lock);
768 fuse_invalidate_attr(inode);
769 } else if (err == -EINTR) {
766 fuse_invalidate_attr(inode); 770 fuse_invalidate_attr(inode);
771 }
767 return err; 772 return err;
768} 773}
769 774
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 4a199fd93fb..504e61b7fd7 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -194,10 +194,6 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
194 struct fuse_conn *fc = get_fuse_conn(inode); 194 struct fuse_conn *fc = get_fuse_conn(inode);
195 int err; 195 int err;
196 196
197 /* VFS checks this, but only _after_ ->open() */
198 if (file->f_flags & O_DIRECT)
199 return -EINVAL;
200
201 err = generic_file_open(inode, file); 197 err = generic_file_open(inode, file);
202 if (err) 198 if (err)
203 return err; 199 return err;
@@ -932,17 +928,23 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
932 struct file *file = iocb->ki_filp; 928 struct file *file = iocb->ki_filp;
933 struct address_space *mapping = file->f_mapping; 929 struct address_space *mapping = file->f_mapping;
934 size_t count = 0; 930 size_t count = 0;
931 size_t ocount = 0;
935 ssize_t written = 0; 932 ssize_t written = 0;
933 ssize_t written_buffered = 0;
936 struct inode *inode = mapping->host; 934 struct inode *inode = mapping->host;
937 ssize_t err; 935 ssize_t err;
938 struct iov_iter i; 936 struct iov_iter i;
937 loff_t endbyte = 0;
939 938
940 WARN_ON(iocb->ki_pos != pos); 939 WARN_ON(iocb->ki_pos != pos);
941 940
942 err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); 941 ocount = 0;
942 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
943 if (err) 943 if (err)
944 return err; 944 return err;
945 945
946 count = ocount;
947
946 mutex_lock(&inode->i_mutex); 948 mutex_lock(&inode->i_mutex);
947 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 949 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
948 950
@@ -962,11 +964,41 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
962 964
963 file_update_time(file); 965 file_update_time(file);
964 966
965 iov_iter_init(&i, iov, nr_segs, count, 0); 967 if (file->f_flags & O_DIRECT) {
966 written = fuse_perform_write(file, mapping, &i, pos); 968 written = generic_file_direct_write(iocb, iov, &nr_segs,
967 if (written >= 0) 969 pos, &iocb->ki_pos,
968 iocb->ki_pos = pos + written; 970 count, ocount);
971 if (written < 0 || written == count)
972 goto out;
973
974 pos += written;
975 count -= written;
969 976
977 iov_iter_init(&i, iov, nr_segs, count, written);
978 written_buffered = fuse_perform_write(file, mapping, &i, pos);
979 if (written_buffered < 0) {
980 err = written_buffered;
981 goto out;
982 }
983 endbyte = pos + written_buffered - 1;
984
985 err = filemap_write_and_wait_range(file->f_mapping, pos,
986 endbyte);
987 if (err)
988 goto out;
989
990 invalidate_mapping_pages(file->f_mapping,
991 pos >> PAGE_CACHE_SHIFT,
992 endbyte >> PAGE_CACHE_SHIFT);
993
994 written += written_buffered;
995 iocb->ki_pos = pos + written_buffered;
996 } else {
997 iov_iter_init(&i, iov, nr_segs, count, 0);
998 written = fuse_perform_write(file, mapping, &i, pos);
999 if (written >= 0)
1000 iocb->ki_pos = pos + written;
1001 }
970out: 1002out:
971 current->backing_dev_info = NULL; 1003 current->backing_dev_info = NULL;
972 mutex_unlock(&inode->i_mutex); 1004 mutex_unlock(&inode->i_mutex);
@@ -1101,30 +1133,41 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf,
1101 return res; 1133 return res;
1102} 1134}
1103 1135
1104static ssize_t fuse_direct_write(struct file *file, const char __user *buf, 1136static ssize_t __fuse_direct_write(struct file *file, const char __user *buf,
1105 size_t count, loff_t *ppos) 1137 size_t count, loff_t *ppos)
1106{ 1138{
1107 struct inode *inode = file->f_path.dentry->d_inode; 1139 struct inode *inode = file->f_path.dentry->d_inode;
1108 ssize_t res; 1140 ssize_t res;
1109 1141
1110 if (is_bad_inode(inode))
1111 return -EIO;
1112
1113 /* Don't allow parallel writes to the same file */
1114 mutex_lock(&inode->i_mutex);
1115 res = generic_write_checks(file, ppos, &count, 0); 1142 res = generic_write_checks(file, ppos, &count, 0);
1116 if (!res) { 1143 if (!res) {
1117 res = fuse_direct_io(file, buf, count, ppos, 1); 1144 res = fuse_direct_io(file, buf, count, ppos, 1);
1118 if (res > 0) 1145 if (res > 0)
1119 fuse_write_update_size(inode, *ppos); 1146 fuse_write_update_size(inode, *ppos);
1120 } 1147 }
1121 mutex_unlock(&inode->i_mutex);
1122 1148
1123 fuse_invalidate_attr(inode); 1149 fuse_invalidate_attr(inode);
1124 1150
1125 return res; 1151 return res;
1126} 1152}
1127 1153
1154static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
1155 size_t count, loff_t *ppos)
1156{
1157 struct inode *inode = file->f_path.dentry->d_inode;
1158 ssize_t res;
1159
1160 if (is_bad_inode(inode))
1161 return -EIO;
1162
1163 /* Don't allow parallel writes to the same file */
1164 mutex_lock(&inode->i_mutex);
1165 res = __fuse_direct_write(file, buf, count, ppos);
1166 mutex_unlock(&inode->i_mutex);
1167
1168 return res;
1169}
1170
1128static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) 1171static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
1129{ 1172{
1130 __free_page(req->pages[0]); 1173 __free_page(req->pages[0]);
@@ -1887,11 +1930,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1887 in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) 1930 in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
1888 goto out; 1931 goto out;
1889 1932
1890 vaddr = kmap_atomic(pages[0], KM_USER0); 1933 vaddr = kmap_atomic(pages[0]);
1891 err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr, 1934 err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr,
1892 transferred, in_iovs + out_iovs, 1935 transferred, in_iovs + out_iovs,
1893 (flags & FUSE_IOCTL_COMPAT) != 0); 1936 (flags & FUSE_IOCTL_COMPAT) != 0);
1894 kunmap_atomic(vaddr, KM_USER0); 1937 kunmap_atomic(vaddr);
1895 if (err) 1938 if (err)
1896 goto out; 1939 goto out;
1897 1940
@@ -2077,6 +2120,57 @@ int fuse_notify_poll_wakeup(struct fuse_conn *fc,
2077 return 0; 2120 return 0;
2078} 2121}
2079 2122
2123static ssize_t fuse_loop_dio(struct file *filp, const struct iovec *iov,
2124 unsigned long nr_segs, loff_t *ppos, int rw)
2125{
2126 const struct iovec *vector = iov;
2127 ssize_t ret = 0;
2128
2129 while (nr_segs > 0) {
2130 void __user *base;
2131 size_t len;
2132 ssize_t nr;
2133
2134 base = vector->iov_base;
2135 len = vector->iov_len;
2136 vector++;
2137 nr_segs--;
2138
2139 if (rw == WRITE)
2140 nr = __fuse_direct_write(filp, base, len, ppos);
2141 else
2142 nr = fuse_direct_read(filp, base, len, ppos);
2143
2144 if (nr < 0) {
2145 if (!ret)
2146 ret = nr;
2147 break;
2148 }
2149 ret += nr;
2150 if (nr != len)
2151 break;
2152 }
2153
2154 return ret;
2155}
2156
2157
2158static ssize_t
2159fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2160 loff_t offset, unsigned long nr_segs)
2161{
2162 ssize_t ret = 0;
2163 struct file *file = NULL;
2164 loff_t pos = 0;
2165
2166 file = iocb->ki_filp;
2167 pos = offset;
2168
2169 ret = fuse_loop_dio(file, iov, nr_segs, &pos, rw);
2170
2171 return ret;
2172}
2173
2080static const struct file_operations fuse_file_operations = { 2174static const struct file_operations fuse_file_operations = {
2081 .llseek = fuse_file_llseek, 2175 .llseek = fuse_file_llseek,
2082 .read = do_sync_read, 2176 .read = do_sync_read,
@@ -2120,6 +2214,7 @@ static const struct address_space_operations fuse_file_aops = {
2120 .readpages = fuse_readpages, 2214 .readpages = fuse_readpages,
2121 .set_page_dirty = __set_page_dirty_nobuffers, 2215 .set_page_dirty = __set_page_dirty_nobuffers,
2122 .bmap = fuse_bmap, 2216 .bmap = fuse_bmap,
2217 .direct_IO = fuse_direct_IO,
2123}; 2218};
2124 2219
2125void fuse_init_file_inode(struct inode *inode) 2220void fuse_init_file_inode(struct inode *inode)
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 64cf8d07393..26783eb2b1f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -947,6 +947,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
947 sb->s_magic = FUSE_SUPER_MAGIC; 947 sb->s_magic = FUSE_SUPER_MAGIC;
948 sb->s_op = &fuse_super_operations; 948 sb->s_op = &fuse_super_operations;
949 sb->s_maxbytes = MAX_LFS_FILESIZE; 949 sb->s_maxbytes = MAX_LFS_FILESIZE;
950 sb->s_time_gran = 1;
950 sb->s_export_op = &fuse_export_operations; 951 sb->s_export_op = &fuse_export_operations;
951 952
952 file = fget(d.fd); 953 file = fget(d.fd);
@@ -988,14 +989,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
988 989
989 err = -ENOMEM; 990 err = -ENOMEM;
990 root = fuse_get_root_inode(sb, d.rootmode); 991 root = fuse_get_root_inode(sb, d.rootmode);
991 if (!root) 992 root_dentry = d_make_root(root);
993 if (!root_dentry)
992 goto err_put_conn; 994 goto err_put_conn;
993
994 root_dentry = d_alloc_root(root);
995 if (!root_dentry) {
996 iput(root);
997 goto err_put_conn;
998 }
999 /* only now - we want root dentry with NULL ->d_op */ 995 /* only now - we want root dentry with NULL ->d_op */
1000 sb->s_d_op = &fuse_dentry_operations; 996 sb->s_d_op = &fuse_dentry_operations;
1001 997
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index c465ae066c6..eb08c9e43c2 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -1,10 +1,6 @@
1config GFS2_FS 1config GFS2_FS
2 tristate "GFS2 file system support" 2 tristate "GFS2 file system support"
3 depends on (64BIT || LBDAF) 3 depends on (64BIT || LBDAF)
4 select DLM if GFS2_FS_LOCKING_DLM
5 select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
6 select SYSFS if GFS2_FS_LOCKING_DLM
7 select IP_SCTP if DLM_SCTP
8 select FS_POSIX_ACL 4 select FS_POSIX_ACL
9 select CRC32 5 select CRC32
10 select QUOTACTL 6 select QUOTACTL
@@ -29,7 +25,8 @@ config GFS2_FS
29 25
30config GFS2_FS_LOCKING_DLM 26config GFS2_FS_LOCKING_DLM
31 bool "GFS2 DLM locking" 27 bool "GFS2 DLM locking"
32 depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && HOTPLUG 28 depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \
29 HOTPLUG && DLM && CONFIGFS_FS && SYSFS
33 help 30 help
34 Multiple node locking module for GFS2 31 Multiple node locking module for GFS2
35 32
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 501e5cba09b..9b2ff0e851b 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -434,12 +434,12 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
434 if (error) 434 if (error)
435 return error; 435 return error;
436 436
437 kaddr = kmap_atomic(page, KM_USER0); 437 kaddr = kmap_atomic(page);
438 if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode))) 438 if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode)))
439 dsize = (dibh->b_size - sizeof(struct gfs2_dinode)); 439 dsize = (dibh->b_size - sizeof(struct gfs2_dinode));
440 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); 440 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
441 memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize); 441 memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize);
442 kunmap_atomic(kaddr, KM_USER0); 442 kunmap_atomic(kaddr);
443 flush_dcache_page(page); 443 flush_dcache_page(page);
444 brelse(dibh); 444 brelse(dibh);
445 SetPageUptodate(page); 445 SetPageUptodate(page);
@@ -542,9 +542,9 @@ int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
542 page = read_cache_page(mapping, index, __gfs2_readpage, NULL); 542 page = read_cache_page(mapping, index, __gfs2_readpage, NULL);
543 if (IS_ERR(page)) 543 if (IS_ERR(page))
544 return PTR_ERR(page); 544 return PTR_ERR(page);
545 p = kmap_atomic(page, KM_USER0); 545 p = kmap_atomic(page);
546 memcpy(buf + copied, p + offset, amt); 546 memcpy(buf + copied, p + offset, amt);
547 kunmap_atomic(p, KM_USER0); 547 kunmap_atomic(p);
548 mark_page_accessed(page); 548 mark_page_accessed(page);
549 page_cache_release(page); 549 page_cache_release(page);
550 copied += amt; 550 copied += amt;
@@ -788,11 +788,11 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
788 unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode); 788 unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
789 789
790 BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode))); 790 BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode)));
791 kaddr = kmap_atomic(page, KM_USER0); 791 kaddr = kmap_atomic(page);
792 memcpy(buf + pos, kaddr + pos, copied); 792 memcpy(buf + pos, kaddr + pos, copied);
793 memset(kaddr + pos + copied, 0, len - copied); 793 memset(kaddr + pos + copied, 0, len - copied);
794 flush_dcache_page(page); 794 flush_dcache_page(page);
795 kunmap_atomic(kaddr, KM_USER0); 795 kunmap_atomic(kaddr);
796 796
797 if (!PageUptodate(page)) 797 if (!PageUptodate(page))
798 SetPageUptodate(page); 798 SetPageUptodate(page);
@@ -807,7 +807,7 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
807 807
808 if (inode == sdp->sd_rindex) { 808 if (inode == sdp->sd_rindex) {
809 adjust_fs_space(inode); 809 adjust_fs_space(inode);
810 ip->i_gh.gh_flags |= GL_NOCACHE; 810 sdp->sd_rindex_uptodate = 0;
811 } 811 }
812 812
813 brelse(dibh); 813 brelse(dibh);
@@ -873,7 +873,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
873 873
874 if (inode == sdp->sd_rindex) { 874 if (inode == sdp->sd_rindex) {
875 adjust_fs_space(inode); 875 adjust_fs_space(inode);
876 ip->i_gh.gh_flags |= GL_NOCACHE; 876 sdp->sd_rindex_uptodate = 0;
877 } 877 }
878 878
879 brelse(dibh); 879 brelse(dibh);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 14a70401597..03c04febe26 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -60,7 +60,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
60 int release = 0; 60 int release = 0;
61 61
62 if (!page || page->index) { 62 if (!page || page->index) {
63 page = grab_cache_page(inode->i_mapping, 0); 63 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
64 if (!page) 64 if (!page)
65 return -ENOMEM; 65 return -ENOMEM;
66 release = 1; 66 release = 1;
@@ -724,7 +724,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
724 int metadata; 724 int metadata;
725 unsigned int revokes = 0; 725 unsigned int revokes = 0;
726 int x; 726 int x;
727 int error = 0; 727 int error;
728
729 error = gfs2_rindex_update(sdp);
730 if (error)
731 return error;
728 732
729 if (!*top) 733 if (!*top)
730 sm->sm_first = 0; 734 sm->sm_first = 0;
@@ -930,7 +934,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
930 struct page *page; 934 struct page *page;
931 int err; 935 int err;
932 936
933 page = grab_cache_page(mapping, index); 937 page = find_or_create_page(mapping, index, GFP_NOFS);
934 if (!page) 938 if (!page)
935 return 0; 939 return 0;
936 940
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index c35573abd37..a836056343f 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1844,6 +1844,10 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1844 unsigned int x, size = len * sizeof(u64); 1844 unsigned int x, size = len * sizeof(u64);
1845 int error; 1845 int error;
1846 1846
1847 error = gfs2_rindex_update(sdp);
1848 if (error)
1849 return error;
1850
1847 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); 1851 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
1848 1852
1849 ht = kzalloc(size, GFP_NOFS); 1853 ht = kzalloc(size, GFP_NOFS);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index c5fb3597f69..a3d2c9ee8d6 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -18,7 +18,6 @@
18#include <linux/mount.h> 18#include <linux/mount.h>
19#include <linux/fs.h> 19#include <linux/fs.h>
20#include <linux/gfs2_ondisk.h> 20#include <linux/gfs2_ondisk.h>
21#include <linux/ext2_fs.h>
22#include <linux/falloc.h> 21#include <linux/falloc.h>
23#include <linux/swap.h> 22#include <linux/swap.h>
24#include <linux/crc32.h> 23#include <linux/crc32.h>
@@ -313,6 +312,8 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
313 return gfs2_get_flags(filp, (u32 __user *)arg); 312 return gfs2_get_flags(filp, (u32 __user *)arg);
314 case FS_IOC_SETFLAGS: 313 case FS_IOC_SETFLAGS:
315 return gfs2_set_flags(filp, (u32 __user *)arg); 314 return gfs2_set_flags(filp, (u32 __user *)arg);
315 case FITRIM:
316 return gfs2_fitrim(filp, (void __user *)arg);
316 } 317 }
317 return -ENOTTY; 318 return -ENOTTY;
318} 319}
@@ -674,6 +675,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
674 struct gfs2_inode *ip = GFS2_I(inode); 675 struct gfs2_inode *ip = GFS2_I(inode);
675 struct buffer_head *dibh; 676 struct buffer_head *dibh;
676 int error; 677 int error;
678 loff_t size = len;
677 unsigned int nr_blks; 679 unsigned int nr_blks;
678 sector_t lblock = offset >> inode->i_blkbits; 680 sector_t lblock = offset >> inode->i_blkbits;
679 681
@@ -707,8 +709,8 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
707 goto out; 709 goto out;
708 } 710 }
709 } 711 }
710 if (offset + len > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE)) 712 if (offset + size > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE))
711 i_size_write(inode, offset + len); 713 i_size_write(inode, offset + size);
712 714
713 mark_inode_dirty(inode); 715 mark_inode_dirty(inode);
714 716
@@ -777,12 +779,14 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
777 if (unlikely(error)) 779 if (unlikely(error))
778 goto out_uninit; 780 goto out_uninit;
779 781
780 if (!gfs2_write_alloc_required(ip, offset, len))
781 goto out_unlock;
782
783 while (len > 0) { 782 while (len > 0) {
784 if (len < bytes) 783 if (len < bytes)
785 bytes = len; 784 bytes = len;
785 if (!gfs2_write_alloc_required(ip, offset, bytes)) {
786 len -= bytes;
787 offset += bytes;
788 continue;
789 }
786 qa = gfs2_qadata_get(ip); 790 qa = gfs2_qadata_get(ip);
787 if (!qa) { 791 if (!qa) {
788 error = -ENOMEM; 792 error = -ENOMEM;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 351a3e79778..dab2526071c 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -29,6 +29,7 @@
29#include <linux/rcupdate.h> 29#include <linux/rcupdate.h>
30#include <linux/rculist_bl.h> 30#include <linux/rculist_bl.h>
31#include <linux/bit_spinlock.h> 31#include <linux/bit_spinlock.h>
32#include <linux/percpu.h>
32 33
33#include "gfs2.h" 34#include "gfs2.h"
34#include "incore.h" 35#include "incore.h"
@@ -543,6 +544,11 @@ __acquires(&gl->gl_spin)
543 do_error(gl, 0); /* Fail queued try locks */ 544 do_error(gl, 0); /* Fail queued try locks */
544 } 545 }
545 gl->gl_req = target; 546 gl->gl_req = target;
547 set_bit(GLF_BLOCKING, &gl->gl_flags);
548 if ((gl->gl_req == LM_ST_UNLOCKED) ||
549 (gl->gl_state == LM_ST_EXCLUSIVE) ||
550 (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)))
551 clear_bit(GLF_BLOCKING, &gl->gl_flags);
546 spin_unlock(&gl->gl_spin); 552 spin_unlock(&gl->gl_spin);
547 if (glops->go_xmote_th) 553 if (glops->go_xmote_th)
548 glops->go_xmote_th(gl); 554 glops->go_xmote_th(gl);
@@ -744,6 +750,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
744 return -ENOMEM; 750 return -ENOMEM;
745 751
746 atomic_inc(&sdp->sd_glock_disposal); 752 atomic_inc(&sdp->sd_glock_disposal);
753 gl->gl_sbd = sdp;
747 gl->gl_flags = 0; 754 gl->gl_flags = 0;
748 gl->gl_name = name; 755 gl->gl_name = name;
749 atomic_set(&gl->gl_ref, 1); 756 atomic_set(&gl->gl_ref, 1);
@@ -752,12 +759,17 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
752 gl->gl_demote_state = LM_ST_EXCLUSIVE; 759 gl->gl_demote_state = LM_ST_EXCLUSIVE;
753 gl->gl_hash = hash; 760 gl->gl_hash = hash;
754 gl->gl_ops = glops; 761 gl->gl_ops = glops;
755 snprintf(gl->gl_strname, GDLM_STRNAME_BYTES, "%8x%16llx", name.ln_type, (unsigned long long)number); 762 gl->gl_dstamp = ktime_set(0, 0);
763 preempt_disable();
764 /* We use the global stats to estimate the initial per-glock stats */
765 gl->gl_stats = this_cpu_ptr(sdp->sd_lkstats)->lkstats[glops->go_type];
766 preempt_enable();
767 gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0;
768 gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0;
756 memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); 769 memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
757 gl->gl_lksb.sb_lvbptr = gl->gl_lvb; 770 gl->gl_lksb.sb_lvbptr = gl->gl_lvb;
758 gl->gl_tchange = jiffies; 771 gl->gl_tchange = jiffies;
759 gl->gl_object = NULL; 772 gl->gl_object = NULL;
760 gl->gl_sbd = sdp;
761 gl->gl_hold_time = GL_GLOCK_DFT_HOLD; 773 gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
762 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); 774 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
763 INIT_WORK(&gl->gl_delete, delete_work_func); 775 INIT_WORK(&gl->gl_delete, delete_work_func);
@@ -999,6 +1011,8 @@ fail:
999 } 1011 }
1000 set_bit(GLF_QUEUED, &gl->gl_flags); 1012 set_bit(GLF_QUEUED, &gl->gl_flags);
1001 trace_gfs2_glock_queue(gh, 1); 1013 trace_gfs2_glock_queue(gh, 1);
1014 gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT);
1015 gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT);
1002 if (likely(insert_pt == NULL)) { 1016 if (likely(insert_pt == NULL)) {
1003 list_add_tail(&gh->gh_list, &gl->gl_holders); 1017 list_add_tail(&gh->gh_list, &gl->gl_holders);
1004 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) 1018 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
@@ -1658,6 +1672,8 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
1658 *p++ = 'L'; 1672 *p++ = 'L';
1659 if (gl->gl_object) 1673 if (gl->gl_object)
1660 *p++ = 'o'; 1674 *p++ = 'o';
1675 if (test_bit(GLF_BLOCKING, gflags))
1676 *p++ = 'b';
1661 *p = 0; 1677 *p = 0;
1662 return buf; 1678 return buf;
1663} 1679}
@@ -1714,8 +1730,78 @@ out:
1714 return error; 1730 return error;
1715} 1731}
1716 1732
1733static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr)
1734{
1735 struct gfs2_glock *gl = iter_ptr;
1736
1737 seq_printf(seq, "G: n:%u/%llx rtt:%lld/%lld rttb:%lld/%lld irt:%lld/%lld dcnt: %lld qcnt: %lld\n",
1738 gl->gl_name.ln_type,
1739 (unsigned long long)gl->gl_name.ln_number,
1740 (long long)gl->gl_stats.stats[GFS2_LKS_SRTT],
1741 (long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR],
1742 (long long)gl->gl_stats.stats[GFS2_LKS_SRTTB],
1743 (long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB],
1744 (long long)gl->gl_stats.stats[GFS2_LKS_SIRT],
1745 (long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR],
1746 (long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT],
1747 (long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]);
1748 return 0;
1749}
1750
1751static const char *gfs2_gltype[] = {
1752 "type",
1753 "reserved",
1754 "nondisk",
1755 "inode",
1756 "rgrp",
1757 "meta",
1758 "iopen",
1759 "flock",
1760 "plock",
1761 "quota",
1762 "journal",
1763};
1764
1765static const char *gfs2_stype[] = {
1766 [GFS2_LKS_SRTT] = "srtt",
1767 [GFS2_LKS_SRTTVAR] = "srttvar",
1768 [GFS2_LKS_SRTTB] = "srttb",
1769 [GFS2_LKS_SRTTVARB] = "srttvarb",
1770 [GFS2_LKS_SIRT] = "sirt",
1771 [GFS2_LKS_SIRTVAR] = "sirtvar",
1772 [GFS2_LKS_DCOUNT] = "dlm",
1773 [GFS2_LKS_QCOUNT] = "queue",
1774};
1775
1776#define GFS2_NR_SBSTATS (ARRAY_SIZE(gfs2_gltype) * ARRAY_SIZE(gfs2_stype))
1777
1778static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr)
1779{
1780 struct gfs2_glock_iter *gi = seq->private;
1781 struct gfs2_sbd *sdp = gi->sdp;
1782 unsigned index = gi->hash >> 3;
1783 unsigned subindex = gi->hash & 0x07;
1784 s64 value;
1785 int i;
1786
1787 if (index == 0 && subindex != 0)
1788 return 0;
1717 1789
1790 seq_printf(seq, "%-10s %8s:", gfs2_gltype[index],
1791 (index == 0) ? "cpu": gfs2_stype[subindex]);
1718 1792
1793 for_each_possible_cpu(i) {
1794 const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i);
1795 if (index == 0) {
1796 value = i;
1797 } else {
1798 value = lkstats->lkstats[index - 1].stats[subindex];
1799 }
1800 seq_printf(seq, " %15lld", (long long)value);
1801 }
1802 seq_putc(seq, '\n');
1803 return 0;
1804}
1719 1805
1720int __init gfs2_glock_init(void) 1806int __init gfs2_glock_init(void)
1721{ 1807{
@@ -1828,6 +1914,35 @@ static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
1828 return dump_glock(seq, iter_ptr); 1914 return dump_glock(seq, iter_ptr);
1829} 1915}
1830 1916
1917static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos)
1918{
1919 struct gfs2_glock_iter *gi = seq->private;
1920
1921 gi->hash = *pos;
1922 if (*pos >= GFS2_NR_SBSTATS)
1923 return NULL;
1924 preempt_disable();
1925 return SEQ_START_TOKEN;
1926}
1927
1928static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr,
1929 loff_t *pos)
1930{
1931 struct gfs2_glock_iter *gi = seq->private;
1932 (*pos)++;
1933 gi->hash++;
1934 if (gi->hash >= GFS2_NR_SBSTATS) {
1935 preempt_enable();
1936 return NULL;
1937 }
1938 return SEQ_START_TOKEN;
1939}
1940
1941static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr)
1942{
1943 preempt_enable();
1944}
1945
1831static const struct seq_operations gfs2_glock_seq_ops = { 1946static const struct seq_operations gfs2_glock_seq_ops = {
1832 .start = gfs2_glock_seq_start, 1947 .start = gfs2_glock_seq_start,
1833 .next = gfs2_glock_seq_next, 1948 .next = gfs2_glock_seq_next,
@@ -1835,7 +1950,21 @@ static const struct seq_operations gfs2_glock_seq_ops = {
1835 .show = gfs2_glock_seq_show, 1950 .show = gfs2_glock_seq_show,
1836}; 1951};
1837 1952
1838static int gfs2_debugfs_open(struct inode *inode, struct file *file) 1953static const struct seq_operations gfs2_glstats_seq_ops = {
1954 .start = gfs2_glock_seq_start,
1955 .next = gfs2_glock_seq_next,
1956 .stop = gfs2_glock_seq_stop,
1957 .show = gfs2_glstats_seq_show,
1958};
1959
1960static const struct seq_operations gfs2_sbstats_seq_ops = {
1961 .start = gfs2_sbstats_seq_start,
1962 .next = gfs2_sbstats_seq_next,
1963 .stop = gfs2_sbstats_seq_stop,
1964 .show = gfs2_sbstats_seq_show,
1965};
1966
1967static int gfs2_glocks_open(struct inode *inode, struct file *file)
1839{ 1968{
1840 int ret = seq_open_private(file, &gfs2_glock_seq_ops, 1969 int ret = seq_open_private(file, &gfs2_glock_seq_ops,
1841 sizeof(struct gfs2_glock_iter)); 1970 sizeof(struct gfs2_glock_iter));
@@ -1847,9 +1976,49 @@ static int gfs2_debugfs_open(struct inode *inode, struct file *file)
1847 return ret; 1976 return ret;
1848} 1977}
1849 1978
1850static const struct file_operations gfs2_debug_fops = { 1979static int gfs2_glstats_open(struct inode *inode, struct file *file)
1980{
1981 int ret = seq_open_private(file, &gfs2_glstats_seq_ops,
1982 sizeof(struct gfs2_glock_iter));
1983 if (ret == 0) {
1984 struct seq_file *seq = file->private_data;
1985 struct gfs2_glock_iter *gi = seq->private;
1986 gi->sdp = inode->i_private;
1987 }
1988 return ret;
1989}
1990
1991static int gfs2_sbstats_open(struct inode *inode, struct file *file)
1992{
1993 int ret = seq_open_private(file, &gfs2_sbstats_seq_ops,
1994 sizeof(struct gfs2_glock_iter));
1995 if (ret == 0) {
1996 struct seq_file *seq = file->private_data;
1997 struct gfs2_glock_iter *gi = seq->private;
1998 gi->sdp = inode->i_private;
1999 }
2000 return ret;
2001}
2002
2003static const struct file_operations gfs2_glocks_fops = {
2004 .owner = THIS_MODULE,
2005 .open = gfs2_glocks_open,
2006 .read = seq_read,
2007 .llseek = seq_lseek,
2008 .release = seq_release_private,
2009};
2010
2011static const struct file_operations gfs2_glstats_fops = {
1851 .owner = THIS_MODULE, 2012 .owner = THIS_MODULE,
1852 .open = gfs2_debugfs_open, 2013 .open = gfs2_glstats_open,
2014 .read = seq_read,
2015 .llseek = seq_lseek,
2016 .release = seq_release_private,
2017};
2018
2019static const struct file_operations gfs2_sbstats_fops = {
2020 .owner = THIS_MODULE,
2021 .open = gfs2_sbstats_open,
1853 .read = seq_read, 2022 .read = seq_read,
1854 .llseek = seq_lseek, 2023 .llseek = seq_lseek,
1855 .release = seq_release_private, 2024 .release = seq_release_private,
@@ -1863,20 +2032,45 @@ int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
1863 sdp->debugfs_dentry_glocks = debugfs_create_file("glocks", 2032 sdp->debugfs_dentry_glocks = debugfs_create_file("glocks",
1864 S_IFREG | S_IRUGO, 2033 S_IFREG | S_IRUGO,
1865 sdp->debugfs_dir, sdp, 2034 sdp->debugfs_dir, sdp,
1866 &gfs2_debug_fops); 2035 &gfs2_glocks_fops);
1867 if (!sdp->debugfs_dentry_glocks) 2036 if (!sdp->debugfs_dentry_glocks)
1868 return -ENOMEM; 2037 goto fail;
2038
2039 sdp->debugfs_dentry_glstats = debugfs_create_file("glstats",
2040 S_IFREG | S_IRUGO,
2041 sdp->debugfs_dir, sdp,
2042 &gfs2_glstats_fops);
2043 if (!sdp->debugfs_dentry_glstats)
2044 goto fail;
2045
2046 sdp->debugfs_dentry_sbstats = debugfs_create_file("sbstats",
2047 S_IFREG | S_IRUGO,
2048 sdp->debugfs_dir, sdp,
2049 &gfs2_sbstats_fops);
2050 if (!sdp->debugfs_dentry_sbstats)
2051 goto fail;
1869 2052
1870 return 0; 2053 return 0;
2054fail:
2055 gfs2_delete_debugfs_file(sdp);
2056 return -ENOMEM;
1871} 2057}
1872 2058
1873void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) 2059void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
1874{ 2060{
1875 if (sdp && sdp->debugfs_dir) { 2061 if (sdp->debugfs_dir) {
1876 if (sdp->debugfs_dentry_glocks) { 2062 if (sdp->debugfs_dentry_glocks) {
1877 debugfs_remove(sdp->debugfs_dentry_glocks); 2063 debugfs_remove(sdp->debugfs_dentry_glocks);
1878 sdp->debugfs_dentry_glocks = NULL; 2064 sdp->debugfs_dentry_glocks = NULL;
1879 } 2065 }
2066 if (sdp->debugfs_dentry_glstats) {
2067 debugfs_remove(sdp->debugfs_dentry_glstats);
2068 sdp->debugfs_dentry_glstats = NULL;
2069 }
2070 if (sdp->debugfs_dentry_sbstats) {
2071 debugfs_remove(sdp->debugfs_dentry_sbstats);
2072 sdp->debugfs_dentry_sbstats = NULL;
2073 }
1880 debugfs_remove(sdp->debugfs_dir); 2074 debugfs_remove(sdp->debugfs_dir);
1881 sdp->debugfs_dir = NULL; 2075 sdp->debugfs_dir = NULL;
1882 } 2076 }
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 97742a7ea9c..47d0bda5ac2 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -19,6 +19,8 @@
19#include <linux/rculist_bl.h> 19#include <linux/rculist_bl.h>
20#include <linux/completion.h> 20#include <linux/completion.h>
21#include <linux/rbtree.h> 21#include <linux/rbtree.h>
22#include <linux/ktime.h>
23#include <linux/percpu.h>
22 24
23#define DIO_WAIT 0x00000010 25#define DIO_WAIT 0x00000010
24#define DIO_METADATA 0x00000020 26#define DIO_METADATA 0x00000020
@@ -205,6 +207,22 @@ struct gfs2_glock_operations {
205}; 207};
206 208
207enum { 209enum {
210 GFS2_LKS_SRTT = 0, /* Non blocking smoothed round trip time */
211 GFS2_LKS_SRTTVAR = 1, /* Non blocking smoothed variance */
212 GFS2_LKS_SRTTB = 2, /* Blocking smoothed round trip time */
213 GFS2_LKS_SRTTVARB = 3, /* Blocking smoothed variance */
214 GFS2_LKS_SIRT = 4, /* Smoothed Inter-request time */
215 GFS2_LKS_SIRTVAR = 5, /* Smoothed Inter-request variance */
216 GFS2_LKS_DCOUNT = 6, /* Count of dlm requests */
217 GFS2_LKS_QCOUNT = 7, /* Count of gfs2_holder queues */
218 GFS2_NR_LKSTATS
219};
220
221struct gfs2_lkstats {
222 s64 stats[GFS2_NR_LKSTATS];
223};
224
225enum {
208 /* States */ 226 /* States */
209 HIF_HOLDER = 6, /* Set for gh that "holds" the glock */ 227 HIF_HOLDER = 6, /* Set for gh that "holds" the glock */
210 HIF_FIRST = 7, 228 HIF_FIRST = 7,
@@ -238,10 +256,12 @@ enum {
238 GLF_QUEUED = 12, 256 GLF_QUEUED = 12,
239 GLF_LRU = 13, 257 GLF_LRU = 13,
240 GLF_OBJECT = 14, /* Used only for tracing */ 258 GLF_OBJECT = 14, /* Used only for tracing */
259 GLF_BLOCKING = 15,
241}; 260};
242 261
243struct gfs2_glock { 262struct gfs2_glock {
244 struct hlist_bl_node gl_list; 263 struct hlist_bl_node gl_list;
264 struct gfs2_sbd *gl_sbd;
245 unsigned long gl_flags; /* GLF_... */ 265 unsigned long gl_flags; /* GLF_... */
246 struct lm_lockname gl_name; 266 struct lm_lockname gl_name;
247 atomic_t gl_ref; 267 atomic_t gl_ref;
@@ -261,16 +281,14 @@ struct gfs2_glock {
261 struct list_head gl_holders; 281 struct list_head gl_holders;
262 282
263 const struct gfs2_glock_operations *gl_ops; 283 const struct gfs2_glock_operations *gl_ops;
264 char gl_strname[GDLM_STRNAME_BYTES]; 284 ktime_t gl_dstamp;
285 struct gfs2_lkstats gl_stats;
265 struct dlm_lksb gl_lksb; 286 struct dlm_lksb gl_lksb;
266 char gl_lvb[32]; 287 char gl_lvb[32];
267 unsigned long gl_tchange; 288 unsigned long gl_tchange;
268 void *gl_object; 289 void *gl_object;
269 290
270 struct list_head gl_lru; 291 struct list_head gl_lru;
271
272 struct gfs2_sbd *gl_sbd;
273
274 struct list_head gl_ail_list; 292 struct list_head gl_ail_list;
275 atomic_t gl_ail_count; 293 atomic_t gl_ail_count;
276 atomic_t gl_revokes; 294 atomic_t gl_revokes;
@@ -560,8 +578,14 @@ struct lm_lockstruct {
560 uint32_t *ls_recover_result; /* result of last jid recovery */ 578 uint32_t *ls_recover_result; /* result of last jid recovery */
561}; 579};
562 580
581struct gfs2_pcpu_lkstats {
582 /* One struct for each glock type */
583 struct gfs2_lkstats lkstats[10];
584};
585
563struct gfs2_sbd { 586struct gfs2_sbd {
564 struct super_block *sd_vfs; 587 struct super_block *sd_vfs;
588 struct gfs2_pcpu_lkstats __percpu *sd_lkstats;
565 struct kobject sd_kobj; 589 struct kobject sd_kobj;
566 unsigned long sd_flags; /* SDF_... */ 590 unsigned long sd_flags; /* SDF_... */
567 struct gfs2_sb_host sd_sb; 591 struct gfs2_sb_host sd_sb;
@@ -620,7 +644,6 @@ struct gfs2_sbd {
620 644
621 int sd_rindex_uptodate; 645 int sd_rindex_uptodate;
622 spinlock_t sd_rindex_spin; 646 spinlock_t sd_rindex_spin;
623 struct mutex sd_rindex_mutex;
624 struct rb_root sd_rindex_tree; 647 struct rb_root sd_rindex_tree;
625 unsigned int sd_rgrps; 648 unsigned int sd_rgrps;
626 unsigned int sd_max_rg_data; 649 unsigned int sd_max_rg_data;
@@ -725,8 +748,23 @@ struct gfs2_sbd {
725 748
726 unsigned long sd_last_warning; 749 unsigned long sd_last_warning;
727 struct dentry *debugfs_dir; /* debugfs directory */ 750 struct dentry *debugfs_dir; /* debugfs directory */
728 struct dentry *debugfs_dentry_glocks; /* for debugfs */ 751 struct dentry *debugfs_dentry_glocks;
752 struct dentry *debugfs_dentry_glstats;
753 struct dentry *debugfs_dentry_sbstats;
729}; 754};
730 755
756static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which)
757{
758 gl->gl_stats.stats[which]++;
759}
760
761static inline void gfs2_sbstats_inc(const struct gfs2_glock *gl, int which)
762{
763 const struct gfs2_sbd *sdp = gl->gl_sbd;
764 preempt_disable();
765 this_cpu_ptr(sdp->sd_lkstats)->lkstats[gl->gl_name.ln_type].stats[which]++;
766 preempt_enable();
767}
768
731#endif /* __INCORE_DOT_H__ */ 769#endif /* __INCORE_DOT_H__ */
732 770
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 56987460cda..a9ba2444e07 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1031,12 +1031,18 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
1031 struct buffer_head *bh; 1031 struct buffer_head *bh;
1032 struct gfs2_holder ghs[3]; 1032 struct gfs2_holder ghs[3];
1033 struct gfs2_rgrpd *rgd; 1033 struct gfs2_rgrpd *rgd;
1034 int error = -EROFS; 1034 int error;
1035
1036 error = gfs2_rindex_update(sdp);
1037 if (error)
1038 return error;
1039
1040 error = -EROFS;
1035 1041
1036 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); 1042 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
1037 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); 1043 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
1038 1044
1039 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); 1045 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
1040 if (!rgd) 1046 if (!rgd)
1041 goto out_inodes; 1047 goto out_inodes;
1042 1048
@@ -1224,6 +1230,10 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1224 return 0; 1230 return 0;
1225 } 1231 }
1226 1232
1233 error = gfs2_rindex_update(sdp);
1234 if (error)
1235 return error;
1236
1227 if (odip != ndip) { 1237 if (odip != ndip) {
1228 error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 1238 error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
1229 0, &r_gh); 1239 0, &r_gh);
@@ -1255,7 +1265,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1255 * this is the case of the target file already existing 1265 * this is the case of the target file already existing
1256 * so we unlink before doing the rename 1266 * so we unlink before doing the rename
1257 */ 1267 */
1258 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr); 1268 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr, 1);
1259 if (nrgd) 1269 if (nrgd)
1260 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); 1270 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
1261 } 1271 }
@@ -1345,7 +1355,6 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1345 error = alloc_required; 1355 error = alloc_required;
1346 if (error < 0) 1356 if (error < 0)
1347 goto out_gunlock; 1357 goto out_gunlock;
1348 error = 0;
1349 1358
1350 if (alloc_required) { 1359 if (alloc_required) {
1351 struct gfs2_qadata *qa = gfs2_qadata_get(ndip); 1360 struct gfs2_qadata *qa = gfs2_qadata_get(ndip);
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 8944d1e32ab..5f5e70e047d 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -18,14 +18,106 @@
18#include "glock.h" 18#include "glock.h"
19#include "util.h" 19#include "util.h"
20#include "sys.h" 20#include "sys.h"
21#include "trace_gfs2.h"
21 22
22extern struct workqueue_struct *gfs2_control_wq; 23extern struct workqueue_struct *gfs2_control_wq;
23 24
25/**
26 * gfs2_update_stats - Update time based stats
27 * @mv: Pointer to mean/variance structure to update
28 * @sample: New data to include
29 *
30 * @delta is the difference between the current rtt sample and the
31 * running average srtt. We add 1/8 of that to the srtt in order to
32 * update the current srtt estimate. The varience estimate is a bit
33 * more complicated. We subtract the abs value of the @delta from
34 * the current variance estimate and add 1/4 of that to the running
35 * total.
36 *
37 * Note that the index points at the array entry containing the smoothed
38 * mean value, and the variance is always in the following entry
39 *
40 * Reference: TCP/IP Illustrated, vol 2, p. 831,832
41 * All times are in units of integer nanoseconds. Unlike the TCP/IP case,
42 * they are not scaled fixed point.
43 */
44
45static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index,
46 s64 sample)
47{
48 s64 delta = sample - s->stats[index];
49 s->stats[index] += (delta >> 3);
50 index++;
51 s->stats[index] += ((abs64(delta) - s->stats[index]) >> 2);
52}
53
54/**
55 * gfs2_update_reply_times - Update locking statistics
56 * @gl: The glock to update
57 *
58 * This assumes that gl->gl_dstamp has been set earlier.
59 *
60 * The rtt (lock round trip time) is an estimate of the time
61 * taken to perform a dlm lock request. We update it on each
62 * reply from the dlm.
63 *
64 * The blocking flag is set on the glock for all dlm requests
65 * which may potentially block due to lock requests from other nodes.
66 * DLM requests where the current lock state is exclusive, the
67 * requested state is null (or unlocked) or where the TRY or
68 * TRY_1CB flags are set are classified as non-blocking. All
69 * other DLM requests are counted as (potentially) blocking.
70 */
71static inline void gfs2_update_reply_times(struct gfs2_glock *gl)
72{
73 struct gfs2_pcpu_lkstats *lks;
74 const unsigned gltype = gl->gl_name.ln_type;
75 unsigned index = test_bit(GLF_BLOCKING, &gl->gl_flags) ?
76 GFS2_LKS_SRTTB : GFS2_LKS_SRTT;
77 s64 rtt;
78
79 preempt_disable();
80 rtt = ktime_to_ns(ktime_sub(ktime_get_real(), gl->gl_dstamp));
81 lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats);
82 gfs2_update_stats(&gl->gl_stats, index, rtt); /* Local */
83 gfs2_update_stats(&lks->lkstats[gltype], index, rtt); /* Global */
84 preempt_enable();
85
86 trace_gfs2_glock_lock_time(gl, rtt);
87}
88
89/**
90 * gfs2_update_request_times - Update locking statistics
91 * @gl: The glock to update
92 *
93 * The irt (lock inter-request times) measures the average time
94 * between requests to the dlm. It is updated immediately before
95 * each dlm call.
96 */
97
98static inline void gfs2_update_request_times(struct gfs2_glock *gl)
99{
100 struct gfs2_pcpu_lkstats *lks;
101 const unsigned gltype = gl->gl_name.ln_type;
102 ktime_t dstamp;
103 s64 irt;
104
105 preempt_disable();
106 dstamp = gl->gl_dstamp;
107 gl->gl_dstamp = ktime_get_real();
108 irt = ktime_to_ns(ktime_sub(gl->gl_dstamp, dstamp));
109 lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats);
110 gfs2_update_stats(&gl->gl_stats, GFS2_LKS_SIRT, irt); /* Local */
111 gfs2_update_stats(&lks->lkstats[gltype], GFS2_LKS_SIRT, irt); /* Global */
112 preempt_enable();
113}
114
24static void gdlm_ast(void *arg) 115static void gdlm_ast(void *arg)
25{ 116{
26 struct gfs2_glock *gl = arg; 117 struct gfs2_glock *gl = arg;
27 unsigned ret = gl->gl_state; 118 unsigned ret = gl->gl_state;
28 119
120 gfs2_update_reply_times(gl);
29 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); 121 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
30 122
31 if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) 123 if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID)
@@ -108,10 +200,11 @@ static int make_mode(const unsigned int lmstate)
108 return -1; 200 return -1;
109} 201}
110 202
111static u32 make_flags(const u32 lkid, const unsigned int gfs_flags, 203static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags,
112 const int req) 204 const int req)
113{ 205{
114 u32 lkf = 0; 206 u32 lkf = DLM_LKF_VALBLK;
207 u32 lkid = gl->gl_lksb.sb_lkid;
115 208
116 if (gfs_flags & LM_FLAG_TRY) 209 if (gfs_flags & LM_FLAG_TRY)
117 lkf |= DLM_LKF_NOQUEUE; 210 lkf |= DLM_LKF_NOQUEUE;
@@ -135,29 +228,49 @@ static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
135 BUG(); 228 BUG();
136 } 229 }
137 230
138 if (lkid != 0) 231 if (lkid != 0) {
139 lkf |= DLM_LKF_CONVERT; 232 lkf |= DLM_LKF_CONVERT;
140 233 if (test_bit(GLF_BLOCKING, &gl->gl_flags))
141 lkf |= DLM_LKF_VALBLK; 234 lkf |= DLM_LKF_QUECVT;
235 }
142 236
143 return lkf; 237 return lkf;
144} 238}
145 239
240static void gfs2_reverse_hex(char *c, u64 value)
241{
242 while (value) {
243 *c-- = hex_asc[value & 0x0f];
244 value >>= 4;
245 }
246}
247
146static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, 248static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
147 unsigned int flags) 249 unsigned int flags)
148{ 250{
149 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; 251 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
150 int req; 252 int req;
151 u32 lkf; 253 u32 lkf;
254 char strname[GDLM_STRNAME_BYTES] = "";
152 255
153 req = make_mode(req_state); 256 req = make_mode(req_state);
154 lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req); 257 lkf = make_flags(gl, flags, req);
155 258 gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
259 gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
260 if (gl->gl_lksb.sb_lkid) {
261 gfs2_update_request_times(gl);
262 } else {
263 memset(strname, ' ', GDLM_STRNAME_BYTES - 1);
264 strname[GDLM_STRNAME_BYTES - 1] = '\0';
265 gfs2_reverse_hex(strname + 7, gl->gl_name.ln_type);
266 gfs2_reverse_hex(strname + 23, gl->gl_name.ln_number);
267 gl->gl_dstamp = ktime_get_real();
268 }
156 /* 269 /*
157 * Submit the actual lock request. 270 * Submit the actual lock request.
158 */ 271 */
159 272
160 return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname, 273 return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname,
161 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); 274 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
162} 275}
163 276
@@ -172,6 +285,10 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
172 return; 285 return;
173 } 286 }
174 287
288 clear_bit(GLF_BLOCKING, &gl->gl_flags);
289 gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
290 gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
291 gfs2_update_request_times(gl);
175 error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, 292 error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
176 NULL, gl); 293 NULL, gl);
177 if (error) { 294 if (error) {
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 756fae9eaf8..4752eadc7f6 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -19,6 +19,7 @@
19#include <linux/freezer.h> 19#include <linux/freezer.h>
20#include <linux/bio.h> 20#include <linux/bio.h>
21#include <linux/writeback.h> 21#include <linux/writeback.h>
22#include <linux/list_sort.h>
22 23
23#include "gfs2.h" 24#include "gfs2.h"
24#include "incore.h" 25#include "incore.h"
@@ -358,7 +359,7 @@ retry:
358 return 0; 359 return 0;
359} 360}
360 361
361static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn) 362u64 gfs2_log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
362{ 363{
363 struct gfs2_journal_extent *je; 364 struct gfs2_journal_extent *je;
364 365
@@ -467,8 +468,8 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
467 468
468void gfs2_log_incr_head(struct gfs2_sbd *sdp) 469void gfs2_log_incr_head(struct gfs2_sbd *sdp)
469{ 470{
470 if (sdp->sd_log_flush_head == sdp->sd_log_tail) 471 BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) &&
471 BUG_ON(sdp->sd_log_flush_head != sdp->sd_log_head); 472 (sdp->sd_log_flush_head != sdp->sd_log_head));
472 473
473 if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) { 474 if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
474 sdp->sd_log_flush_head = 0; 475 sdp->sd_log_flush_head = 0;
@@ -476,99 +477,6 @@ void gfs2_log_incr_head(struct gfs2_sbd *sdp)
476 } 477 }
477} 478}
478 479
479/**
480 * gfs2_log_write_endio - End of I/O for a log buffer
481 * @bh: The buffer head
482 * @uptodate: I/O Status
483 *
484 */
485
486static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate)
487{
488 struct gfs2_sbd *sdp = bh->b_private;
489 bh->b_private = NULL;
490
491 end_buffer_write_sync(bh, uptodate);
492 if (atomic_dec_and_test(&sdp->sd_log_in_flight))
493 wake_up(&sdp->sd_log_flush_wait);
494}
495
496/**
497 * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
498 * @sdp: The GFS2 superblock
499 *
500 * Returns: the buffer_head
501 */
502
503struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
504{
505 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
506 struct buffer_head *bh;
507
508 bh = sb_getblk(sdp->sd_vfs, blkno);
509 lock_buffer(bh);
510 memset(bh->b_data, 0, bh->b_size);
511 set_buffer_uptodate(bh);
512 clear_buffer_dirty(bh);
513 gfs2_log_incr_head(sdp);
514 atomic_inc(&sdp->sd_log_in_flight);
515 bh->b_private = sdp;
516 bh->b_end_io = gfs2_log_write_endio;
517
518 return bh;
519}
520
521/**
522 * gfs2_fake_write_endio -
523 * @bh: The buffer head
524 * @uptodate: The I/O Status
525 *
526 */
527
528static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate)
529{
530 struct buffer_head *real_bh = bh->b_private;
531 struct gfs2_bufdata *bd = real_bh->b_private;
532 struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd;
533
534 end_buffer_write_sync(bh, uptodate);
535 free_buffer_head(bh);
536 unlock_buffer(real_bh);
537 brelse(real_bh);
538 if (atomic_dec_and_test(&sdp->sd_log_in_flight))
539 wake_up(&sdp->sd_log_flush_wait);
540}
541
542/**
543 * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
544 * @sdp: the filesystem
545 * @data: the data the buffer_head should point to
546 *
547 * Returns: the log buffer descriptor
548 */
549
550struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
551 struct buffer_head *real)
552{
553 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
554 struct buffer_head *bh;
555
556 bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
557 atomic_set(&bh->b_count, 1);
558 bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock);
559 set_bh_page(bh, real->b_page, bh_offset(real));
560 bh->b_blocknr = blkno;
561 bh->b_size = sdp->sd_sb.sb_bsize;
562 bh->b_bdev = sdp->sd_vfs->s_bdev;
563 bh->b_private = real;
564 bh->b_end_io = gfs2_fake_write_endio;
565
566 gfs2_log_incr_head(sdp);
567 atomic_inc(&sdp->sd_log_in_flight);
568
569 return bh;
570}
571
572static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) 480static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
573{ 481{
574 unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail); 482 unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
@@ -583,66 +491,8 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
583 sdp->sd_log_tail = new_tail; 491 sdp->sd_log_tail = new_tail;
584} 492}
585 493
586/**
587 * log_write_header - Get and initialize a journal header buffer
588 * @sdp: The GFS2 superblock
589 *
590 * Returns: the initialized log buffer descriptor
591 */
592 494
593static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) 495static void log_flush_wait(struct gfs2_sbd *sdp)
594{
595 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
596 struct buffer_head *bh;
597 struct gfs2_log_header *lh;
598 unsigned int tail;
599 u32 hash;
600
601 bh = sb_getblk(sdp->sd_vfs, blkno);
602 lock_buffer(bh);
603 memset(bh->b_data, 0, bh->b_size);
604 set_buffer_uptodate(bh);
605 clear_buffer_dirty(bh);
606
607 gfs2_ail1_empty(sdp);
608 tail = current_tail(sdp);
609
610 lh = (struct gfs2_log_header *)bh->b_data;
611 memset(lh, 0, sizeof(struct gfs2_log_header));
612 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
613 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
614 lh->lh_header.__pad0 = cpu_to_be64(0);
615 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
616 lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
617 lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++);
618 lh->lh_flags = cpu_to_be32(flags);
619 lh->lh_tail = cpu_to_be32(tail);
620 lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head);
621 hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
622 lh->lh_hash = cpu_to_be32(hash);
623
624 bh->b_end_io = end_buffer_write_sync;
625 get_bh(bh);
626 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
627 submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
628 else
629 submit_bh(WRITE_FLUSH_FUA | REQ_META, bh);
630 wait_on_buffer(bh);
631
632 if (!buffer_uptodate(bh))
633 gfs2_io_error_bh(sdp, bh);
634 brelse(bh);
635
636 if (sdp->sd_log_tail != tail)
637 log_pull_tail(sdp, tail);
638 else
639 gfs2_assert_withdraw(sdp, !pull);
640
641 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
642 gfs2_log_incr_head(sdp);
643}
644
645static void log_flush_commit(struct gfs2_sbd *sdp)
646{ 496{
647 DEFINE_WAIT(wait); 497 DEFINE_WAIT(wait);
648 498
@@ -655,8 +505,20 @@ static void log_flush_commit(struct gfs2_sbd *sdp)
655 } while(atomic_read(&sdp->sd_log_in_flight)); 505 } while(atomic_read(&sdp->sd_log_in_flight));
656 finish_wait(&sdp->sd_log_flush_wait, &wait); 506 finish_wait(&sdp->sd_log_flush_wait, &wait);
657 } 507 }
508}
509
510static int bd_cmp(void *priv, struct list_head *a, struct list_head *b)
511{
512 struct gfs2_bufdata *bda, *bdb;
658 513
659 log_write_header(sdp, 0, 0); 514 bda = list_entry(a, struct gfs2_bufdata, bd_le.le_list);
515 bdb = list_entry(b, struct gfs2_bufdata, bd_le.le_list);
516
517 if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr)
518 return -1;
519 if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr)
520 return 1;
521 return 0;
660} 522}
661 523
662static void gfs2_ordered_write(struct gfs2_sbd *sdp) 524static void gfs2_ordered_write(struct gfs2_sbd *sdp)
@@ -666,6 +528,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
666 LIST_HEAD(written); 528 LIST_HEAD(written);
667 529
668 gfs2_log_lock(sdp); 530 gfs2_log_lock(sdp);
531 list_sort(NULL, &sdp->sd_log_le_ordered, &bd_cmp);
669 while (!list_empty(&sdp->sd_log_le_ordered)) { 532 while (!list_empty(&sdp->sd_log_le_ordered)) {
670 bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_le.le_list); 533 bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_le.le_list);
671 list_move(&bd->bd_le.le_list, &written); 534 list_move(&bd->bd_le.le_list, &written);
@@ -711,6 +574,68 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
711} 574}
712 575
713/** 576/**
577 * log_write_header - Get and initialize a journal header buffer
578 * @sdp: The GFS2 superblock
579 *
580 * Returns: the initialized log buffer descriptor
581 */
582
583static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
584{
585 u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head);
586 struct buffer_head *bh;
587 struct gfs2_log_header *lh;
588 unsigned int tail;
589 u32 hash;
590
591 bh = sb_getblk(sdp->sd_vfs, blkno);
592 lock_buffer(bh);
593 memset(bh->b_data, 0, bh->b_size);
594 set_buffer_uptodate(bh);
595 clear_buffer_dirty(bh);
596
597 gfs2_ail1_empty(sdp);
598 tail = current_tail(sdp);
599
600 lh = (struct gfs2_log_header *)bh->b_data;
601 memset(lh, 0, sizeof(struct gfs2_log_header));
602 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
603 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
604 lh->lh_header.__pad0 = cpu_to_be64(0);
605 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
606 lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
607 lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++);
608 lh->lh_flags = cpu_to_be32(flags);
609 lh->lh_tail = cpu_to_be32(tail);
610 lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head);
611 hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
612 lh->lh_hash = cpu_to_be32(hash);
613
614 bh->b_end_io = end_buffer_write_sync;
615 get_bh(bh);
616 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
617 gfs2_ordered_wait(sdp);
618 log_flush_wait(sdp);
619 submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
620 } else {
621 submit_bh(WRITE_FLUSH_FUA | REQ_META, bh);
622 }
623 wait_on_buffer(bh);
624
625 if (!buffer_uptodate(bh))
626 gfs2_io_error_bh(sdp, bh);
627 brelse(bh);
628
629 if (sdp->sd_log_tail != tail)
630 log_pull_tail(sdp, tail);
631 else
632 gfs2_assert_withdraw(sdp, !pull);
633
634 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
635 gfs2_log_incr_head(sdp);
636}
637
638/**
714 * gfs2_log_flush - flush incore transaction(s) 639 * gfs2_log_flush - flush incore transaction(s)
715 * @sdp: the filesystem 640 * @sdp: the filesystem
716 * @gl: The glock structure to flush. If NULL, flush the whole incore log 641 * @gl: The glock structure to flush. If NULL, flush the whole incore log
@@ -753,11 +678,10 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
753 678
754 gfs2_ordered_write(sdp); 679 gfs2_ordered_write(sdp);
755 lops_before_commit(sdp); 680 lops_before_commit(sdp);
756 gfs2_ordered_wait(sdp);
757 681
758 if (sdp->sd_log_head != sdp->sd_log_flush_head) 682 if (sdp->sd_log_head != sdp->sd_log_flush_head) {
759 log_flush_commit(sdp); 683 log_write_header(sdp, 0, 0);
760 else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ 684 } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
761 gfs2_log_lock(sdp); 685 gfs2_log_lock(sdp);
762 atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ 686 atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
763 trace_gfs2_log_blocks(sdp, -1); 687 trace_gfs2_log_blocks(sdp, -1);
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index ab0621698b7..ff07454b582 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -53,10 +53,7 @@ extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
53 53
54extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); 54extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
55extern void gfs2_log_incr_head(struct gfs2_sbd *sdp); 55extern void gfs2_log_incr_head(struct gfs2_sbd *sdp);
56 56extern u64 gfs2_log_bmap(struct gfs2_sbd *sdp, unsigned int lbn);
57extern struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp);
58extern struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
59 struct buffer_head *real);
60extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); 57extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
61extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); 58extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
62extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); 59extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 0301be655b1..6b1efb594d9 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -12,6 +12,7 @@
12#include <linux/spinlock.h> 12#include <linux/spinlock.h>
13#include <linux/completion.h> 13#include <linux/completion.h>
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/mempool.h>
15#include <linux/gfs2_ondisk.h> 16#include <linux/gfs2_ondisk.h>
16#include <linux/bio.h> 17#include <linux/bio.h>
17#include <linux/fs.h> 18#include <linux/fs.h>
@@ -76,7 +77,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
76 if (bi->bi_clone == 0) 77 if (bi->bi_clone == 0)
77 return; 78 return;
78 if (sdp->sd_args.ar_discard) 79 if (sdp->sd_args.ar_discard)
79 gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi); 80 gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
80 memcpy(bi->bi_clone + bi->bi_offset, 81 memcpy(bi->bi_clone + bi->bi_offset,
81 bd->bd_bh->b_data + bi->bi_offset, bi->bi_len); 82 bd->bd_bh->b_data + bi->bi_offset, bi->bi_len);
82 clear_bit(GBF_FULL, &bi->bi_flags); 83 clear_bit(GBF_FULL, &bi->bi_flags);
@@ -143,6 +144,98 @@ static inline __be64 *bh_ptr_end(struct buffer_head *bh)
143 return (__force __be64 *)(bh->b_data + bh->b_size); 144 return (__force __be64 *)(bh->b_data + bh->b_size);
144} 145}
145 146
147/**
148 * gfs2_log_write_endio - End of I/O for a log buffer
149 * @bh: The buffer head
150 * @uptodate: I/O Status
151 *
152 */
153
154static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate)
155{
156 struct gfs2_sbd *sdp = bh->b_private;
157 bh->b_private = NULL;
158
159 end_buffer_write_sync(bh, uptodate);
160 if (atomic_dec_and_test(&sdp->sd_log_in_flight))
161 wake_up(&sdp->sd_log_flush_wait);
162}
163
164/**
165 * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
166 * @sdp: The GFS2 superblock
167 *
168 * tReturns: the buffer_head
169 */
170
171static struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
172{
173 u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head);
174 struct buffer_head *bh;
175
176 bh = sb_getblk(sdp->sd_vfs, blkno);
177 lock_buffer(bh);
178 memset(bh->b_data, 0, bh->b_size);
179 set_buffer_uptodate(bh);
180 clear_buffer_dirty(bh);
181 gfs2_log_incr_head(sdp);
182 atomic_inc(&sdp->sd_log_in_flight);
183 bh->b_private = sdp;
184 bh->b_end_io = gfs2_log_write_endio;
185
186 return bh;
187}
188
189/**
190 * gfs2_fake_write_endio -
191 * @bh: The buffer head
192 * @uptodate: The I/O Status
193 *
194 */
195
196static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate)
197{
198 struct buffer_head *real_bh = bh->b_private;
199 struct gfs2_bufdata *bd = real_bh->b_private;
200 struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd;
201
202 end_buffer_write_sync(bh, uptodate);
203 mempool_free(bh, gfs2_bh_pool);
204 unlock_buffer(real_bh);
205 brelse(real_bh);
206 if (atomic_dec_and_test(&sdp->sd_log_in_flight))
207 wake_up(&sdp->sd_log_flush_wait);
208}
209
210/**
211 * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
212 * @sdp: the filesystem
213 * @data: the data the buffer_head should point to
214 *
215 * Returns: the log buffer descriptor
216 */
217
218static struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
219 struct buffer_head *real)
220{
221 u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head);
222 struct buffer_head *bh;
223
224 bh = mempool_alloc(gfs2_bh_pool, GFP_NOFS);
225 atomic_set(&bh->b_count, 1);
226 bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock);
227 set_bh_page(bh, real->b_page, bh_offset(real));
228 bh->b_blocknr = blkno;
229 bh->b_size = sdp->sd_sb.sb_bsize;
230 bh->b_bdev = sdp->sd_vfs->s_bdev;
231 bh->b_private = real;
232 bh->b_end_io = gfs2_fake_write_endio;
233
234 gfs2_log_incr_head(sdp);
235 atomic_inc(&sdp->sd_log_in_flight);
236
237 return bh;
238}
146 239
147static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type) 240static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type)
148{ 241{
@@ -553,11 +646,11 @@ static void gfs2_check_magic(struct buffer_head *bh)
553 __be32 *ptr; 646 __be32 *ptr;
554 647
555 clear_buffer_escaped(bh); 648 clear_buffer_escaped(bh);
556 kaddr = kmap_atomic(bh->b_page, KM_USER0); 649 kaddr = kmap_atomic(bh->b_page);
557 ptr = kaddr + bh_offset(bh); 650 ptr = kaddr + bh_offset(bh);
558 if (*ptr == cpu_to_be32(GFS2_MAGIC)) 651 if (*ptr == cpu_to_be32(GFS2_MAGIC))
559 set_buffer_escaped(bh); 652 set_buffer_escaped(bh);
560 kunmap_atomic(kaddr, KM_USER0); 653 kunmap_atomic(kaddr);
561} 654}
562 655
563static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, 656static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
@@ -594,10 +687,10 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
594 if (buffer_escaped(bd->bd_bh)) { 687 if (buffer_escaped(bd->bd_bh)) {
595 void *kaddr; 688 void *kaddr;
596 bh1 = gfs2_log_get_buf(sdp); 689 bh1 = gfs2_log_get_buf(sdp);
597 kaddr = kmap_atomic(bd->bd_bh->b_page, KM_USER0); 690 kaddr = kmap_atomic(bd->bd_bh->b_page);
598 memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh), 691 memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh),
599 bh1->b_size); 692 bh1->b_size);
600 kunmap_atomic(kaddr, KM_USER0); 693 kunmap_atomic(kaddr);
601 *(__be32 *)bh1->b_data = 0; 694 *(__be32 *)bh1->b_data = 0;
602 clear_buffer_escaped(bd->bd_bh); 695 clear_buffer_escaped(bd->bd_bh);
603 unlock_buffer(bd->bd_bh); 696 unlock_buffer(bd->bd_bh);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index a8d9bcd0e19..754426b1e52 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -17,6 +17,7 @@
17#include <linux/rcupdate.h> 17#include <linux/rcupdate.h>
18#include <linux/rculist_bl.h> 18#include <linux/rculist_bl.h>
19#include <linux/atomic.h> 19#include <linux/atomic.h>
20#include <linux/mempool.h>
20 21
21#include "gfs2.h" 22#include "gfs2.h"
22#include "incore.h" 23#include "incore.h"
@@ -69,6 +70,16 @@ static void gfs2_init_gl_aspace_once(void *foo)
69 address_space_init_once(mapping); 70 address_space_init_once(mapping);
70} 71}
71 72
73static void *gfs2_bh_alloc(gfp_t mask, void *data)
74{
75 return alloc_buffer_head(mask);
76}
77
78static void gfs2_bh_free(void *ptr, void *data)
79{
80 return free_buffer_head(ptr);
81}
82
72/** 83/**
73 * init_gfs2_fs - Register GFS2 as a filesystem 84 * init_gfs2_fs - Register GFS2 as a filesystem
74 * 85 *
@@ -151,6 +162,10 @@ static int __init init_gfs2_fs(void)
151 gfs2_control_wq = alloc_workqueue("gfs2_control", 162 gfs2_control_wq = alloc_workqueue("gfs2_control",
152 WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0); 163 WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0);
153 if (!gfs2_control_wq) 164 if (!gfs2_control_wq)
165 goto fail_recovery;
166
167 gfs2_bh_pool = mempool_create(1024, gfs2_bh_alloc, gfs2_bh_free, NULL);
168 if (!gfs2_bh_pool)
154 goto fail_control; 169 goto fail_control;
155 170
156 gfs2_register_debugfs(); 171 gfs2_register_debugfs();
@@ -160,6 +175,8 @@ static int __init init_gfs2_fs(void)
160 return 0; 175 return 0;
161 176
162fail_control: 177fail_control:
178 destroy_workqueue(gfs2_control_wq);
179fail_recovery:
163 destroy_workqueue(gfs_recovery_wq); 180 destroy_workqueue(gfs_recovery_wq);
164fail_wq: 181fail_wq:
165 unregister_filesystem(&gfs2meta_fs_type); 182 unregister_filesystem(&gfs2meta_fs_type);
@@ -208,6 +225,7 @@ static void __exit exit_gfs2_fs(void)
208 225
209 rcu_barrier(); 226 rcu_barrier();
210 227
228 mempool_destroy(gfs2_bh_pool);
211 kmem_cache_destroy(gfs2_quotad_cachep); 229 kmem_cache_destroy(gfs2_quotad_cachep);
212 kmem_cache_destroy(gfs2_rgrpd_cachep); 230 kmem_cache_destroy(gfs2_rgrpd_cachep);
213 kmem_cache_destroy(gfs2_bufdata_cachep); 231 kmem_cache_destroy(gfs2_bufdata_cachep);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 24f609c9ef9..6f3a18f9e17 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -68,6 +68,12 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
68 68
69 sb->s_fs_info = sdp; 69 sb->s_fs_info = sdp;
70 sdp->sd_vfs = sb; 70 sdp->sd_vfs = sb;
71 sdp->sd_lkstats = alloc_percpu(struct gfs2_pcpu_lkstats);
72 if (!sdp->sd_lkstats) {
73 kfree(sdp);
74 return NULL;
75 }
76
71 set_bit(SDF_NOJOURNALID, &sdp->sd_flags); 77 set_bit(SDF_NOJOURNALID, &sdp->sd_flags);
72 gfs2_tune_init(&sdp->sd_tune); 78 gfs2_tune_init(&sdp->sd_tune);
73 79
@@ -77,7 +83,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
77 spin_lock_init(&sdp->sd_statfs_spin); 83 spin_lock_init(&sdp->sd_statfs_spin);
78 84
79 spin_lock_init(&sdp->sd_rindex_spin); 85 spin_lock_init(&sdp->sd_rindex_spin);
80 mutex_init(&sdp->sd_rindex_mutex);
81 sdp->sd_rindex_tree.rb_node = NULL; 86 sdp->sd_rindex_tree.rb_node = NULL;
82 87
83 INIT_LIST_HEAD(&sdp->sd_jindex_list); 88 INIT_LIST_HEAD(&sdp->sd_jindex_list);
@@ -431,10 +436,9 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
431 fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); 436 fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
432 return PTR_ERR(inode); 437 return PTR_ERR(inode);
433 } 438 }
434 dentry = d_alloc_root(inode); 439 dentry = d_make_root(inode);
435 if (!dentry) { 440 if (!dentry) {
436 fs_err(sdp, "can't alloc %s dentry\n", name); 441 fs_err(sdp, "can't alloc %s dentry\n", name);
437 iput(inode);
438 return -ENOMEM; 442 return -ENOMEM;
439 } 443 }
440 *dptr = dentry; 444 *dptr = dentry;
@@ -1221,6 +1225,7 @@ fail_sys:
1221 gfs2_sys_fs_del(sdp); 1225 gfs2_sys_fs_del(sdp);
1222fail: 1226fail:
1223 gfs2_delete_debugfs_file(sdp); 1227 gfs2_delete_debugfs_file(sdp);
1228 free_percpu(sdp->sd_lkstats);
1224 kfree(sdp); 1229 kfree(sdp);
1225 sb->s_fs_info = NULL; 1230 sb->s_fs_info = NULL;
1226 return error; 1231 return error;
@@ -1393,6 +1398,7 @@ static void gfs2_kill_sb(struct super_block *sb)
1393 shrink_dcache_sb(sb); 1398 shrink_dcache_sb(sb);
1394 kill_block_super(sb); 1399 kill_block_super(sb);
1395 gfs2_delete_debugfs_file(sdp); 1400 gfs2_delete_debugfs_file(sdp);
1401 free_percpu(sdp->sd_lkstats);
1396 kfree(sdp); 1402 kfree(sdp);
1397} 1403}
1398 1404
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a45b21b0391..6019da3dcae 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -681,7 +681,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
681 ptr = qp; 681 ptr = qp;
682 nbytes = sizeof(struct gfs2_quota); 682 nbytes = sizeof(struct gfs2_quota);
683get_a_page: 683get_a_page:
684 page = grab_cache_page(mapping, index); 684 page = find_or_create_page(mapping, index, GFP_NOFS);
685 if (!page) 685 if (!page)
686 return -ENOMEM; 686 return -ENOMEM;
687 687
@@ -720,12 +720,12 @@ get_a_page:
720 720
721 gfs2_trans_add_bh(ip->i_gl, bh, 0); 721 gfs2_trans_add_bh(ip->i_gl, bh, 0);
722 722
723 kaddr = kmap_atomic(page, KM_USER0); 723 kaddr = kmap_atomic(page);
724 if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE) 724 if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE)
725 nbytes = PAGE_CACHE_SIZE - offset; 725 nbytes = PAGE_CACHE_SIZE - offset;
726 memcpy(kaddr + offset, ptr, nbytes); 726 memcpy(kaddr + offset, ptr, nbytes);
727 flush_dcache_page(page); 727 flush_dcache_page(page);
728 kunmap_atomic(kaddr, KM_USER0); 728 kunmap_atomic(kaddr);
729 unlock_page(page); 729 unlock_page(page);
730 page_cache_release(page); 730 page_cache_release(page);
731 731
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 49ada95209d..3df65c9ab73 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -327,23 +327,31 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
327 * Returns: The resource group, or NULL if not found 327 * Returns: The resource group, or NULL if not found
328 */ 328 */
329 329
330struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk) 330struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
331{ 331{
332 struct rb_node **newn; 332 struct rb_node *n, *next;
333 struct gfs2_rgrpd *cur; 333 struct gfs2_rgrpd *cur;
334 334
335 spin_lock(&sdp->sd_rindex_spin); 335 spin_lock(&sdp->sd_rindex_spin);
336 newn = &sdp->sd_rindex_tree.rb_node; 336 n = sdp->sd_rindex_tree.rb_node;
337 while (*newn) { 337 while (n) {
338 cur = rb_entry(*newn, struct gfs2_rgrpd, rd_node); 338 cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
339 next = NULL;
339 if (blk < cur->rd_addr) 340 if (blk < cur->rd_addr)
340 newn = &((*newn)->rb_left); 341 next = n->rb_left;
341 else if (blk >= cur->rd_data0 + cur->rd_data) 342 else if (blk >= cur->rd_data0 + cur->rd_data)
342 newn = &((*newn)->rb_right); 343 next = n->rb_right;
343 else { 344 if (next == NULL) {
344 spin_unlock(&sdp->sd_rindex_spin); 345 spin_unlock(&sdp->sd_rindex_spin);
346 if (exact) {
347 if (blk < cur->rd_addr)
348 return NULL;
349 if (blk >= cur->rd_data0 + cur->rd_data)
350 return NULL;
351 }
345 return cur; 352 return cur;
346 } 353 }
354 n = next;
347 } 355 }
348 spin_unlock(&sdp->sd_rindex_spin); 356 spin_unlock(&sdp->sd_rindex_spin);
349 357
@@ -532,7 +540,6 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
532 struct file_ra_state ra_state; 540 struct file_ra_state ra_state;
533 int error, rgrps; 541 int error, rgrps;
534 542
535 mutex_lock(&sdp->sd_rindex_mutex);
536 file_ra_state_init(&ra_state, inode->i_mapping); 543 file_ra_state_init(&ra_state, inode->i_mapping);
537 for (rgrps = 0;; rgrps++) { 544 for (rgrps = 0;; rgrps++) {
538 loff_t pos = rgrps * sizeof(struct gfs2_rindex); 545 loff_t pos = rgrps * sizeof(struct gfs2_rindex);
@@ -545,11 +552,10 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
545 break; 552 break;
546 total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data); 553 total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
547 } 554 }
548 mutex_unlock(&sdp->sd_rindex_mutex);
549 return total_data; 555 return total_data;
550} 556}
551 557
552static void rgd_insert(struct gfs2_rgrpd *rgd) 558static int rgd_insert(struct gfs2_rgrpd *rgd)
553{ 559{
554 struct gfs2_sbd *sdp = rgd->rd_sbd; 560 struct gfs2_sbd *sdp = rgd->rd_sbd;
555 struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL; 561 struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
@@ -565,11 +571,13 @@ static void rgd_insert(struct gfs2_rgrpd *rgd)
565 else if (rgd->rd_addr > cur->rd_addr) 571 else if (rgd->rd_addr > cur->rd_addr)
566 newn = &((*newn)->rb_right); 572 newn = &((*newn)->rb_right);
567 else 573 else
568 return; 574 return -EEXIST;
569 } 575 }
570 576
571 rb_link_node(&rgd->rd_node, parent, newn); 577 rb_link_node(&rgd->rd_node, parent, newn);
572 rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree); 578 rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
579 sdp->sd_rgrps++;
580 return 0;
573} 581}
574 582
575/** 583/**
@@ -623,10 +631,13 @@ static int read_rindex_entry(struct gfs2_inode *ip,
623 if (rgd->rd_data > sdp->sd_max_rg_data) 631 if (rgd->rd_data > sdp->sd_max_rg_data)
624 sdp->sd_max_rg_data = rgd->rd_data; 632 sdp->sd_max_rg_data = rgd->rd_data;
625 spin_lock(&sdp->sd_rindex_spin); 633 spin_lock(&sdp->sd_rindex_spin);
626 rgd_insert(rgd); 634 error = rgd_insert(rgd);
627 sdp->sd_rgrps++;
628 spin_unlock(&sdp->sd_rindex_spin); 635 spin_unlock(&sdp->sd_rindex_spin);
629 return error; 636 if (!error)
637 return 0;
638
639 error = 0; /* someone else read in the rgrp; free it and ignore it */
640 gfs2_glock_put(rgd->rd_gl);
630 641
631fail: 642fail:
632 kfree(rgd->rd_bits); 643 kfree(rgd->rd_bits);
@@ -687,7 +698,6 @@ int gfs2_rindex_update(struct gfs2_sbd *sdp)
687 698
688 /* Read new copy from disk if we don't have the latest */ 699 /* Read new copy from disk if we don't have the latest */
689 if (!sdp->sd_rindex_uptodate) { 700 if (!sdp->sd_rindex_uptodate) {
690 mutex_lock(&sdp->sd_rindex_mutex);
691 if (!gfs2_glock_is_locked_by_me(gl)) { 701 if (!gfs2_glock_is_locked_by_me(gl)) {
692 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh); 702 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
693 if (error) 703 if (error)
@@ -698,10 +708,8 @@ int gfs2_rindex_update(struct gfs2_sbd *sdp)
698 error = gfs2_ri_update(ip); 708 error = gfs2_ri_update(ip);
699 if (unlock_required) 709 if (unlock_required)
700 gfs2_glock_dq_uninit(&ri_gh); 710 gfs2_glock_dq_uninit(&ri_gh);
701 mutex_unlock(&sdp->sd_rindex_mutex);
702 } 711 }
703 712
704
705 return error; 713 return error;
706} 714}
707 715
@@ -810,9 +818,9 @@ void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
810 818
811} 819}
812 820
813void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, 821int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
814 struct buffer_head *bh, 822 struct buffer_head *bh,
815 const struct gfs2_bitmap *bi) 823 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
816{ 824{
817 struct super_block *sb = sdp->sd_vfs; 825 struct super_block *sb = sdp->sd_vfs;
818 struct block_device *bdev = sb->s_bdev; 826 struct block_device *bdev = sb->s_bdev;
@@ -823,11 +831,19 @@ void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
823 sector_t nr_sects = 0; 831 sector_t nr_sects = 0;
824 int rv; 832 int rv;
825 unsigned int x; 833 unsigned int x;
834 u32 trimmed = 0;
835 u8 diff;
826 836
827 for (x = 0; x < bi->bi_len; x++) { 837 for (x = 0; x < bi->bi_len; x++) {
828 const u8 *orig = bh->b_data + bi->bi_offset + x; 838 const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
829 const u8 *clone = bi->bi_clone + bi->bi_offset + x; 839 clone += bi->bi_offset;
830 u8 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1)); 840 clone += x;
841 if (bh) {
842 const u8 *orig = bh->b_data + bi->bi_offset + x;
843 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
844 } else {
845 diff = ~(*clone | (*clone >> 1));
846 }
831 diff &= 0x55; 847 diff &= 0x55;
832 if (diff == 0) 848 if (diff == 0)
833 continue; 849 continue;
@@ -838,11 +854,14 @@ void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
838 if (nr_sects == 0) 854 if (nr_sects == 0)
839 goto start_new_extent; 855 goto start_new_extent;
840 if ((start + nr_sects) != blk) { 856 if ((start + nr_sects) != blk) {
841 rv = blkdev_issue_discard(bdev, start, 857 if (nr_sects >= minlen) {
842 nr_sects, GFP_NOFS, 858 rv = blkdev_issue_discard(bdev,
843 0); 859 start, nr_sects,
844 if (rv) 860 GFP_NOFS, 0);
845 goto fail; 861 if (rv)
862 goto fail;
863 trimmed += nr_sects;
864 }
846 nr_sects = 0; 865 nr_sects = 0;
847start_new_extent: 866start_new_extent:
848 start = blk; 867 start = blk;
@@ -853,15 +872,108 @@ start_new_extent:
853 blk += sects_per_blk; 872 blk += sects_per_blk;
854 } 873 }
855 } 874 }
856 if (nr_sects) { 875 if (nr_sects >= minlen) {
857 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0); 876 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0);
858 if (rv) 877 if (rv)
859 goto fail; 878 goto fail;
879 trimmed += nr_sects;
860 } 880 }
861 return; 881 if (ptrimmed)
882 *ptrimmed = trimmed;
883 return 0;
884
862fail: 885fail:
863 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv); 886 if (sdp->sd_args.ar_discard)
887 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
864 sdp->sd_args.ar_discard = 0; 888 sdp->sd_args.ar_discard = 0;
889 return -EIO;
890}
891
892/**
893 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
894 * @filp: Any file on the filesystem
895 * @argp: Pointer to the arguments (also used to pass result)
896 *
897 * Returns: 0 on success, otherwise error code
898 */
899
900int gfs2_fitrim(struct file *filp, void __user *argp)
901{
902 struct inode *inode = filp->f_dentry->d_inode;
903 struct gfs2_sbd *sdp = GFS2_SB(inode);
904 struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
905 struct buffer_head *bh;
906 struct gfs2_rgrpd *rgd;
907 struct gfs2_rgrpd *rgd_end;
908 struct gfs2_holder gh;
909 struct fstrim_range r;
910 int ret = 0;
911 u64 amt;
912 u64 trimmed = 0;
913 unsigned int x;
914
915 if (!capable(CAP_SYS_ADMIN))
916 return -EPERM;
917
918 if (!blk_queue_discard(q))
919 return -EOPNOTSUPP;
920
921 if (argp == NULL) {
922 r.start = 0;
923 r.len = ULLONG_MAX;
924 r.minlen = 0;
925 } else if (copy_from_user(&r, argp, sizeof(r)))
926 return -EFAULT;
927
928 ret = gfs2_rindex_update(sdp);
929 if (ret)
930 return ret;
931
932 rgd = gfs2_blk2rgrpd(sdp, r.start, 0);
933 rgd_end = gfs2_blk2rgrpd(sdp, r.start + r.len, 0);
934
935 while (1) {
936
937 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
938 if (ret)
939 goto out;
940
941 if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
942 /* Trim each bitmap in the rgrp */
943 for (x = 0; x < rgd->rd_length; x++) {
944 struct gfs2_bitmap *bi = rgd->rd_bits + x;
945 ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, r.minlen, &amt);
946 if (ret) {
947 gfs2_glock_dq_uninit(&gh);
948 goto out;
949 }
950 trimmed += amt;
951 }
952
953 /* Mark rgrp as having been trimmed */
954 ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
955 if (ret == 0) {
956 bh = rgd->rd_bits[0].bi_bh;
957 rgd->rd_flags |= GFS2_RGF_TRIMMED;
958 gfs2_trans_add_bh(rgd->rd_gl, bh, 1);
959 gfs2_rgrp_out(rgd, bh->b_data);
960 gfs2_trans_end(sdp);
961 }
962 }
963 gfs2_glock_dq_uninit(&gh);
964
965 if (rgd == rgd_end)
966 break;
967
968 rgd = gfs2_rgrpd_get_next(rgd);
969 }
970
971out:
972 r.len = trimmed << 9;
973 if (argp && copy_to_user(argp, &r, sizeof(r)))
974 return -EFAULT;
975
976 return ret;
865} 977}
866 978
867/** 979/**
@@ -1008,7 +1120,7 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1008 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) 1120 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal))
1009 rgd = begin = ip->i_rgd; 1121 rgd = begin = ip->i_rgd;
1010 else 1122 else
1011 rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal); 1123 rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1012 1124
1013 if (rgd == NULL) 1125 if (rgd == NULL)
1014 return -EBADSLT; 1126 return -EBADSLT;
@@ -1293,7 +1405,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1293 u32 length, rgrp_blk, buf_blk; 1405 u32 length, rgrp_blk, buf_blk;
1294 unsigned int buf; 1406 unsigned int buf;
1295 1407
1296 rgd = gfs2_blk2rgrpd(sdp, bstart); 1408 rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
1297 if (!rgd) { 1409 if (!rgd) {
1298 if (gfs2_consist(sdp)) 1410 if (gfs2_consist(sdp))
1299 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); 1411 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
@@ -1474,7 +1586,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
1474 return; 1586 return;
1475 trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE); 1587 trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
1476 rgd->rd_free += blen; 1588 rgd->rd_free += blen;
1477 1589 rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
1478 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1590 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1479 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1591 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1480 1592
@@ -1560,14 +1672,9 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
1560{ 1672{
1561 struct gfs2_rgrpd *rgd; 1673 struct gfs2_rgrpd *rgd;
1562 struct gfs2_holder rgd_gh; 1674 struct gfs2_holder rgd_gh;
1563 int error; 1675 int error = -EINVAL;
1564
1565 error = gfs2_rindex_update(sdp);
1566 if (error)
1567 return error;
1568 1676
1569 error = -EINVAL; 1677 rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
1570 rgd = gfs2_blk2rgrpd(sdp, no_addr);
1571 if (!rgd) 1678 if (!rgd)
1572 goto fail; 1679 goto fail;
1573 1680
@@ -1610,7 +1717,7 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
1610 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block)) 1717 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block))
1611 rgd = ip->i_rgd; 1718 rgd = ip->i_rgd;
1612 else 1719 else
1613 rgd = gfs2_blk2rgrpd(sdp, block); 1720 rgd = gfs2_blk2rgrpd(sdp, block, 1);
1614 if (!rgd) { 1721 if (!rgd) {
1615 fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block); 1722 fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block);
1616 return; 1723 return;
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index ceec9106cdf..b4b10f4de25 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -11,6 +11,7 @@
11#define __RGRP_DOT_H__ 11#define __RGRP_DOT_H__
12 12
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/uaccess.h>
14 15
15struct gfs2_rgrpd; 16struct gfs2_rgrpd;
16struct gfs2_sbd; 17struct gfs2_sbd;
@@ -18,7 +19,7 @@ struct gfs2_holder;
18 19
19extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); 20extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
20 21
21extern struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk); 22extern struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact);
22extern struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp); 23extern struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
23extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); 24extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
24 25
@@ -62,8 +63,9 @@ extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state);
62extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); 63extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
63extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); 64extern u64 gfs2_ri_total(struct gfs2_sbd *sdp);
64extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl); 65extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl);
65extern void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, 66extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
66 struct buffer_head *bh, 67 struct buffer_head *bh,
67 const struct gfs2_bitmap *bi); 68 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
69extern int gfs2_fitrim(struct file *filp, void __user *argp);
68 70
69#endif /* __RGRP_DOT_H__ */ 71#endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 4553ce515f6..6172fa77ad5 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1417,7 +1417,7 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1417 if (error) 1417 if (error)
1418 goto out; 1418 goto out;
1419 1419
1420 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); 1420 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
1421 if (!rgd) { 1421 if (!rgd) {
1422 gfs2_consist_inode(ip); 1422 gfs2_consist_inode(ip);
1423 error = -EIO; 1423 error = -EIO;
@@ -1557,6 +1557,7 @@ out:
1557 end_writeback(inode); 1557 end_writeback(inode);
1558 gfs2_dir_hash_inval(ip); 1558 gfs2_dir_hash_inval(ip);
1559 ip->i_gl->gl_object = NULL; 1559 ip->i_gl->gl_object = NULL;
1560 flush_delayed_work_sync(&ip->i_gl->gl_work);
1560 gfs2_glock_add_to_lru(ip->i_gl); 1561 gfs2_glock_add_to_lru(ip->i_gl);
1561 gfs2_glock_put(ip->i_gl); 1562 gfs2_glock_put(ip->i_gl);
1562 ip->i_gl = NULL; 1563 ip->i_gl = NULL;
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 5d07609ec57..dfa89cd7553 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -11,6 +11,7 @@
11#include <linux/dlmconstants.h> 11#include <linux/dlmconstants.h>
12#include <linux/gfs2_ondisk.h> 12#include <linux/gfs2_ondisk.h>
13#include <linux/writeback.h> 13#include <linux/writeback.h>
14#include <linux/ktime.h>
14#include "incore.h" 15#include "incore.h"
15#include "glock.h" 16#include "glock.h"
16 17
@@ -43,7 +44,8 @@
43 {(1UL << GLF_FROZEN), "F" }, \ 44 {(1UL << GLF_FROZEN), "F" }, \
44 {(1UL << GLF_QUEUED), "q" }, \ 45 {(1UL << GLF_QUEUED), "q" }, \
45 {(1UL << GLF_LRU), "L" }, \ 46 {(1UL << GLF_LRU), "L" }, \
46 {(1UL << GLF_OBJECT), "o" }) 47 {(1UL << GLF_OBJECT), "o" }, \
48 {(1UL << GLF_BLOCKING), "b" })
47 49
48#ifndef NUMPTY 50#ifndef NUMPTY
49#define NUMPTY 51#define NUMPTY
@@ -236,6 +238,62 @@ TRACE_EVENT(gfs2_glock_queue,
236 glock_trace_name(__entry->state)) 238 glock_trace_name(__entry->state))
237); 239);
238 240
241/* DLM sends a reply to GFS2 */
242TRACE_EVENT(gfs2_glock_lock_time,
243
244 TP_PROTO(const struct gfs2_glock *gl, s64 tdiff),
245
246 TP_ARGS(gl, tdiff),
247
248 TP_STRUCT__entry(
249 __field( dev_t, dev )
250 __field( u64, glnum )
251 __field( u32, gltype )
252 __field( int, status )
253 __field( char, flags )
254 __field( s64, tdiff )
255 __field( s64, srtt )
256 __field( s64, srttvar )
257 __field( s64, srttb )
258 __field( s64, srttvarb )
259 __field( s64, sirt )
260 __field( s64, sirtvar )
261 __field( s64, dcount )
262 __field( s64, qcount )
263 ),
264
265 TP_fast_assign(
266 __entry->dev = gl->gl_sbd->sd_vfs->s_dev;
267 __entry->glnum = gl->gl_name.ln_number;
268 __entry->gltype = gl->gl_name.ln_type;
269 __entry->status = gl->gl_lksb.sb_status;
270 __entry->flags = gl->gl_lksb.sb_flags;
271 __entry->tdiff = tdiff;
272 __entry->srtt = gl->gl_stats.stats[GFS2_LKS_SRTT];
273 __entry->srttvar = gl->gl_stats.stats[GFS2_LKS_SRTTVAR];
274 __entry->srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
275 __entry->srttvarb = gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
276 __entry->sirt = gl->gl_stats.stats[GFS2_LKS_SIRT];
277 __entry->sirtvar = gl->gl_stats.stats[GFS2_LKS_SIRTVAR];
278 __entry->dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
279 __entry->qcount = gl->gl_stats.stats[GFS2_LKS_QCOUNT];
280 ),
281
282 TP_printk("%u,%u glock %d:%lld status:%d flags:%02x tdiff:%lld srtt:%lld/%lld srttb:%lld/%lld sirt:%lld/%lld dcnt:%lld qcnt:%lld",
283 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
284 (unsigned long long)__entry->glnum,
285 __entry->status, __entry->flags,
286 (long long)__entry->tdiff,
287 (long long)__entry->srtt,
288 (long long)__entry->srttvar,
289 (long long)__entry->srttb,
290 (long long)__entry->srttvarb,
291 (long long)__entry->sirt,
292 (long long)__entry->sirtvar,
293 (long long)__entry->dcount,
294 (long long)__entry->qcount)
295);
296
239/* Section 2 - Log/journal 297/* Section 2 - Log/journal
240 * 298 *
241 * Objectives: 299 * Objectives:
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 53511291fe3..9e7765e8e7b 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -25,6 +25,7 @@ struct kmem_cache *gfs2_inode_cachep __read_mostly;
25struct kmem_cache *gfs2_bufdata_cachep __read_mostly; 25struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
26struct kmem_cache *gfs2_rgrpd_cachep __read_mostly; 26struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
27struct kmem_cache *gfs2_quotad_cachep __read_mostly; 27struct kmem_cache *gfs2_quotad_cachep __read_mostly;
28mempool_t *gfs2_bh_pool __read_mostly;
28 29
29void gfs2_assert_i(struct gfs2_sbd *sdp) 30void gfs2_assert_i(struct gfs2_sbd *sdp)
30{ 31{
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index b432e04600d..a4ce76c67db 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -10,6 +10,8 @@
10#ifndef __UTIL_DOT_H__ 10#ifndef __UTIL_DOT_H__
11#define __UTIL_DOT_H__ 11#define __UTIL_DOT_H__
12 12
13#include <linux/mempool.h>
14
13#include "incore.h" 15#include "incore.h"
14 16
15#define fs_printk(level, fs, fmt, arg...) \ 17#define fs_printk(level, fs, fmt, arg...) \
@@ -150,6 +152,7 @@ extern struct kmem_cache *gfs2_inode_cachep;
150extern struct kmem_cache *gfs2_bufdata_cachep; 152extern struct kmem_cache *gfs2_bufdata_cachep;
151extern struct kmem_cache *gfs2_rgrpd_cachep; 153extern struct kmem_cache *gfs2_rgrpd_cachep;
152extern struct kmem_cache *gfs2_quotad_cachep; 154extern struct kmem_cache *gfs2_quotad_cachep;
155extern mempool_t *gfs2_bh_pool;
153 156
154static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, 157static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
155 unsigned int *p) 158 unsigned int *p)
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index e9636591b5d..927f4df874a 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -238,6 +238,10 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
238 unsigned int x; 238 unsigned int x;
239 int error; 239 int error;
240 240
241 error = gfs2_rindex_update(sdp);
242 if (error)
243 return error;
244
241 if (GFS2_EA_IS_STUFFED(ea)) 245 if (GFS2_EA_IS_STUFFED(ea))
242 return 0; 246 return 0;
243 247
@@ -251,7 +255,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
251 if (!blks) 255 if (!blks)
252 return 0; 256 return 0;
253 257
254 rgd = gfs2_blk2rgrpd(sdp, bn); 258 rgd = gfs2_blk2rgrpd(sdp, bn, 1);
255 if (!rgd) { 259 if (!rgd) {
256 gfs2_consist_inode(ip); 260 gfs2_consist_inode(ip);
257 return -EIO; 261 return -EIO;
@@ -1330,6 +1334,10 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
1330 unsigned int x; 1334 unsigned int x;
1331 int error; 1335 int error;
1332 1336
1337 error = gfs2_rindex_update(sdp);
1338 if (error)
1339 return error;
1340
1333 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); 1341 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
1334 1342
1335 error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, &indbh); 1343 error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, &indbh);
@@ -1439,7 +1447,11 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
1439 struct gfs2_holder gh; 1447 struct gfs2_holder gh;
1440 int error; 1448 int error;
1441 1449
1442 rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr); 1450 error = gfs2_rindex_update(sdp);
1451 if (error)
1452 return error;
1453
1454 rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr, 1);
1443 if (!rgd) { 1455 if (!rgd) {
1444 gfs2_consist_inode(ip); 1456 gfs2_consist_inode(ip);
1445 return -EIO; 1457 return -EIO;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 8137fb3e678..7b4c537d6e1 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -430,15 +430,13 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
430 430
431 sb->s_d_op = &hfs_dentry_operations; 431 sb->s_d_op = &hfs_dentry_operations;
432 res = -ENOMEM; 432 res = -ENOMEM;
433 sb->s_root = d_alloc_root(root_inode); 433 sb->s_root = d_make_root(root_inode);
434 if (!sb->s_root) 434 if (!sb->s_root)
435 goto bail_iput; 435 goto bail_no_root;
436 436
437 /* everything's okay */ 437 /* everything's okay */
438 return 0; 438 return 0;
439 439
440bail_iput:
441 iput(root_inode);
442bail_no_root: 440bail_no_root:
443 printk(KERN_ERR "hfs: get root inode failed.\n"); 441 printk(KERN_ERR "hfs: get root inode failed.\n");
444bail: 442bail:
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 4dfbfec357e..ec2a9c23f0c 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -366,6 +366,10 @@ int hfsplus_rename_cat(u32 cnid,
366 err = hfs_brec_find(&src_fd); 366 err = hfs_brec_find(&src_fd);
367 if (err) 367 if (err)
368 goto out; 368 goto out;
369 if (src_fd.entrylength > sizeof(entry) || src_fd.entrylength < 0) {
370 err = -EIO;
371 goto out;
372 }
369 373
370 hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset, 374 hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset,
371 src_fd.entrylength); 375 src_fd.entrylength);
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 88e155f895c..26b53fb09f6 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -150,6 +150,11 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
150 filp->f_pos++; 150 filp->f_pos++;
151 /* fall through */ 151 /* fall through */
152 case 1: 152 case 1:
153 if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) {
154 err = -EIO;
155 goto out;
156 }
157
153 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, 158 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
154 fd.entrylength); 159 fd.entrylength);
155 if (be16_to_cpu(entry.type) != HFSPLUS_FOLDER_THREAD) { 160 if (be16_to_cpu(entry.type) != HFSPLUS_FOLDER_THREAD) {
@@ -181,6 +186,12 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
181 err = -EIO; 186 err = -EIO;
182 goto out; 187 goto out;
183 } 188 }
189
190 if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) {
191 err = -EIO;
192 goto out;
193 }
194
184 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, 195 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
185 fd.entrylength); 196 fd.entrylength);
186 type = be16_to_cpu(entry.type); 197 type = be16_to_cpu(entry.type);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 21a5b7fc6db..4e75ac646fe 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -317,6 +317,11 @@ static inline unsigned short hfsplus_min_io_size(struct super_block *sb)
317 317
318 318
319/* 319/*
320 * hfs+-specific ioctl for making the filesystem bootable
321 */
322#define HFSPLUS_IOC_BLESS _IO('h', 0x80)
323
324/*
320 * Functions in any *.c used in other files 325 * Functions in any *.c used in other files
321 */ 326 */
322 327
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h
index 927cdd6d5bf..921967e5abb 100644
--- a/fs/hfsplus/hfsplus_raw.h
+++ b/fs/hfsplus/hfsplus_raw.h
@@ -117,7 +117,7 @@ struct hfsplus_vh {
117 __be32 write_count; 117 __be32 write_count;
118 __be64 encodings_bmp; 118 __be64 encodings_bmp;
119 119
120 u8 finder_info[32]; 120 u32 finder_info[8];
121 121
122 struct hfsplus_fork_raw alloc_file; 122 struct hfsplus_fork_raw alloc_file;
123 struct hfsplus_fork_raw ext_file; 123 struct hfsplus_fork_raw ext_file;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 6643b242bdd..82b69ee4dac 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -193,6 +193,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir,
193 mutex_init(&hip->extents_lock); 193 mutex_init(&hip->extents_lock);
194 hip->extent_state = 0; 194 hip->extent_state = 0;
195 hip->flags = 0; 195 hip->flags = 0;
196 hip->userflags = 0;
196 set_bit(HFSPLUS_I_RSRC, &hip->flags); 197 set_bit(HFSPLUS_I_RSRC, &hip->flags);
197 198
198 err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); 199 err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
@@ -400,6 +401,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode)
400 atomic_set(&hip->opencnt, 0); 401 atomic_set(&hip->opencnt, 0);
401 hip->extent_state = 0; 402 hip->extent_state = 0;
402 hip->flags = 0; 403 hip->flags = 0;
404 hip->userflags = 0;
403 memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec)); 405 memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec));
404 memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); 406 memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec));
405 hip->alloc_blocks = 0; 407 hip->alloc_blocks = 0;
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index f66c7655b3f..c640ba57074 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -20,6 +20,38 @@
20#include <asm/uaccess.h> 20#include <asm/uaccess.h>
21#include "hfsplus_fs.h" 21#include "hfsplus_fs.h"
22 22
23/*
24 * "Blessing" an HFS+ filesystem writes metadata to the superblock informing
25 * the platform firmware which file to boot from
26 */
27static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags)
28{
29 struct dentry *dentry = file->f_path.dentry;
30 struct inode *inode = dentry->d_inode;
31 struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
32 struct hfsplus_vh *vh = sbi->s_vhdr;
33 struct hfsplus_vh *bvh = sbi->s_backup_vhdr;
34
35 if (!capable(CAP_SYS_ADMIN))
36 return -EPERM;
37
38 mutex_lock(&sbi->vh_mutex);
39
40 /* Directory containing the bootable system */
41 vh->finder_info[0] = bvh->finder_info[0] =
42 cpu_to_be32(parent_ino(dentry));
43
44 /* Bootloader */
45 vh->finder_info[1] = bvh->finder_info[1] = cpu_to_be32(inode->i_ino);
46
47 /* Per spec, the OS X system folder - same as finder_info[0] here */
48 vh->finder_info[5] = bvh->finder_info[5] =
49 cpu_to_be32(parent_ino(dentry));
50
51 mutex_unlock(&sbi->vh_mutex);
52 return 0;
53}
54
23static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags) 55static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags)
24{ 56{
25 struct inode *inode = file->f_path.dentry->d_inode; 57 struct inode *inode = file->f_path.dentry->d_inode;
@@ -108,6 +140,8 @@ long hfsplus_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
108 return hfsplus_ioctl_getflags(file, argp); 140 return hfsplus_ioctl_getflags(file, argp);
109 case HFSPLUS_IOC_EXT2_SETFLAGS: 141 case HFSPLUS_IOC_EXT2_SETFLAGS:
110 return hfsplus_ioctl_setflags(file, argp); 142 return hfsplus_ioctl_setflags(file, argp);
143 case HFSPLUS_IOC_BLESS:
144 return hfsplus_ioctl_bless(file, argp);
111 default: 145 default:
112 return -ENOTTY; 146 return -ENOTTY;
113 } 147 }
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 427682ca9e4..ceb1c281eef 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -465,6 +465,13 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
465 goto out_put_alloc_file; 465 goto out_put_alloc_file;
466 } 466 }
467 467
468 sb->s_d_op = &hfsplus_dentry_operations;
469 sb->s_root = d_make_root(root);
470 if (!sb->s_root) {
471 err = -ENOMEM;
472 goto out_put_alloc_file;
473 }
474
468 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; 475 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
469 str.name = HFSP_HIDDENDIR_NAME; 476 str.name = HFSP_HIDDENDIR_NAME;
470 err = hfs_find_init(sbi->cat_tree, &fd); 477 err = hfs_find_init(sbi->cat_tree, &fd);
@@ -515,13 +522,6 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
515 } 522 }
516 } 523 }
517 524
518 sb->s_d_op = &hfsplus_dentry_operations;
519 sb->s_root = d_alloc_root(root);
520 if (!sb->s_root) {
521 err = -ENOMEM;
522 goto out_put_hidden_dir;
523 }
524
525 unload_nls(sbi->nls); 525 unload_nls(sbi->nls);
526 sbi->nls = nls; 526 sbi->nls = nls;
527 return 0; 527 return 0;
@@ -529,7 +529,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
529out_put_hidden_dir: 529out_put_hidden_dir:
530 iput(sbi->hidden_dir); 530 iput(sbi->hidden_dir);
531out_put_root: 531out_put_root:
532 iput(root); 532 dput(sb->s_root);
533 sb->s_root = NULL;
533out_put_alloc_file: 534out_put_alloc_file:
534 iput(sbi->alloc_file); 535 iput(sbi->alloc_file);
535out_close_cat_tree: 536out_close_cat_tree:
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 3cbfa93cd78..1fe731337f0 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -67,7 +67,8 @@ extern int access_file(char *path, int r, int w, int x);
67extern int open_file(char *path, int r, int w, int append); 67extern int open_file(char *path, int r, int w, int append);
68extern void *open_dir(char *path, int *err_out); 68extern void *open_dir(char *path, int *err_out);
69extern char *read_dir(void *stream, unsigned long long *pos, 69extern char *read_dir(void *stream, unsigned long long *pos,
70 unsigned long long *ino_out, int *len_out); 70 unsigned long long *ino_out, int *len_out,
71 unsigned int *type_out);
71extern void close_file(void *stream); 72extern void close_file(void *stream);
72extern int replace_file(int oldfd, int fd); 73extern int replace_file(int oldfd, int fd);
73extern void close_dir(void *stream); 74extern void close_dir(void *stream);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index e130bd46d67..07c516bfea7 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -283,6 +283,7 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
283 char *name; 283 char *name;
284 unsigned long long next, ino; 284 unsigned long long next, ino;
285 int error, len; 285 int error, len;
286 unsigned int type;
286 287
287 name = dentry_name(file->f_path.dentry); 288 name = dentry_name(file->f_path.dentry);
288 if (name == NULL) 289 if (name == NULL)
@@ -292,9 +293,9 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
292 if (dir == NULL) 293 if (dir == NULL)
293 return -error; 294 return -error;
294 next = file->f_pos; 295 next = file->f_pos;
295 while ((name = read_dir(dir, &next, &ino, &len)) != NULL) { 296 while ((name = read_dir(dir, &next, &ino, &len, &type)) != NULL) {
296 error = (*filldir)(ent, name, len, file->f_pos, 297 error = (*filldir)(ent, name, len, file->f_pos,
297 ino, DT_UNKNOWN); 298 ino, type);
298 if (error) break; 299 if (error) break;
299 file->f_pos = next; 300 file->f_pos = next;
300 } 301 }
@@ -966,9 +967,9 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
966 } 967 }
967 968
968 err = -ENOMEM; 969 err = -ENOMEM;
969 sb->s_root = d_alloc_root(root_inode); 970 sb->s_root = d_make_root(root_inode);
970 if (sb->s_root == NULL) 971 if (sb->s_root == NULL)
971 goto out_put; 972 goto out;
972 973
973 return 0; 974 return 0;
974 975
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index dd7bc38a382..a74ad0d371c 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -98,7 +98,8 @@ void *open_dir(char *path, int *err_out)
98} 98}
99 99
100char *read_dir(void *stream, unsigned long long *pos, 100char *read_dir(void *stream, unsigned long long *pos,
101 unsigned long long *ino_out, int *len_out) 101 unsigned long long *ino_out, int *len_out,
102 unsigned int *type_out)
102{ 103{
103 DIR *dir = stream; 104 DIR *dir = stream;
104 struct dirent *ent; 105 struct dirent *ent;
@@ -109,6 +110,7 @@ char *read_dir(void *stream, unsigned long long *pos,
109 return NULL; 110 return NULL;
110 *len_out = strlen(ent->d_name); 111 *len_out = strlen(ent->d_name);
111 *ino_out = ent->d_ino; 112 *ino_out = ent->d_ino;
113 *type_out = ent->d_type;
112 *pos = telldir(dir); 114 *pos = telldir(dir);
113 return ent->d_name; 115 return ent->d_name;
114} 116}
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 3690467c944..54f6eccb79d 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -625,11 +625,9 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
625 hpfs_init_inode(root); 625 hpfs_init_inode(root);
626 hpfs_read_inode(root); 626 hpfs_read_inode(root);
627 unlock_new_inode(root); 627 unlock_new_inode(root);
628 s->s_root = d_alloc_root(root); 628 s->s_root = d_make_root(root);
629 if (!s->s_root) { 629 if (!s->s_root)
630 iput(root);
631 goto bail0; 630 goto bail0;
632 }
633 631
634 /* 632 /*
635 * find the root directory's . pointer & finish filling in the inode 633 * find the root directory's . pointer & finish filling in the inode
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index d92f4ce8092..a80e45a690a 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -726,17 +726,12 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
726 726
727 err = -ENOMEM; 727 err = -ENOMEM;
728 root_inode = get_inode(sb, dget(proc_mnt->mnt_root)); 728 root_inode = get_inode(sb, dget(proc_mnt->mnt_root));
729 if (!root_inode) 729 sb->s_root = d_make_root(root_inode);
730 goto out_mntput;
731
732 sb->s_root = d_alloc_root(root_inode);
733 if (!sb->s_root) 730 if (!sb->s_root)
734 goto out_iput; 731 goto out_mntput;
735 732
736 return 0; 733 return 0;
737 734
738 out_iput:
739 iput(root_inode);
740 out_mntput: 735 out_mntput:
741 mntput(proc_mnt); 736 mntput(proc_mnt);
742 out: 737 out:
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 1e85a7ac021..001ef01d2fe 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -41,6 +41,25 @@ const struct file_operations hugetlbfs_file_operations;
41static const struct inode_operations hugetlbfs_dir_inode_operations; 41static const struct inode_operations hugetlbfs_dir_inode_operations;
42static const struct inode_operations hugetlbfs_inode_operations; 42static const struct inode_operations hugetlbfs_inode_operations;
43 43
44struct hugetlbfs_config {
45 uid_t uid;
46 gid_t gid;
47 umode_t mode;
48 long nr_blocks;
49 long nr_inodes;
50 struct hstate *hstate;
51};
52
53struct hugetlbfs_inode_info {
54 struct shared_policy policy;
55 struct inode vfs_inode;
56};
57
58static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
59{
60 return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
61}
62
44static struct backing_dev_info hugetlbfs_backing_dev_info = { 63static struct backing_dev_info hugetlbfs_backing_dev_info = {
45 .name = "hugetlbfs", 64 .name = "hugetlbfs",
46 .ra_pages = 0, /* No readahead */ 65 .ra_pages = 0, /* No readahead */
@@ -154,10 +173,12 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
154 return addr; 173 return addr;
155 } 174 }
156 175
157 start_addr = mm->free_area_cache; 176 if (len > mm->cached_hole_size)
158 177 start_addr = mm->free_area_cache;
159 if (len <= mm->cached_hole_size) 178 else {
160 start_addr = TASK_UNMAPPED_BASE; 179 start_addr = TASK_UNMAPPED_BASE;
180 mm->cached_hole_size = 0;
181 }
161 182
162full_search: 183full_search:
163 addr = ALIGN(start_addr, huge_page_size(h)); 184 addr = ALIGN(start_addr, huge_page_size(h));
@@ -171,13 +192,18 @@ full_search:
171 */ 192 */
172 if (start_addr != TASK_UNMAPPED_BASE) { 193 if (start_addr != TASK_UNMAPPED_BASE) {
173 start_addr = TASK_UNMAPPED_BASE; 194 start_addr = TASK_UNMAPPED_BASE;
195 mm->cached_hole_size = 0;
174 goto full_search; 196 goto full_search;
175 } 197 }
176 return -ENOMEM; 198 return -ENOMEM;
177 } 199 }
178 200
179 if (!vma || addr + len <= vma->vm_start) 201 if (!vma || addr + len <= vma->vm_start) {
202 mm->free_area_cache = addr + len;
180 return addr; 203 return addr;
204 }
205 if (addr + mm->cached_hole_size < vma->vm_start)
206 mm->cached_hole_size = vma->vm_start - addr;
181 addr = ALIGN(vma->vm_end, huge_page_size(h)); 207 addr = ALIGN(vma->vm_end, huge_page_size(h));
182 } 208 }
183} 209}
@@ -238,17 +264,10 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
238 loff_t isize; 264 loff_t isize;
239 ssize_t retval = 0; 265 ssize_t retval = 0;
240 266
241 mutex_lock(&inode->i_mutex);
242
243 /* validate length */ 267 /* validate length */
244 if (len == 0) 268 if (len == 0)
245 goto out; 269 goto out;
246 270
247 isize = i_size_read(inode);
248 if (!isize)
249 goto out;
250
251 end_index = (isize - 1) >> huge_page_shift(h);
252 for (;;) { 271 for (;;) {
253 struct page *page; 272 struct page *page;
254 unsigned long nr, ret; 273 unsigned long nr, ret;
@@ -256,18 +275,21 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
256 275
257 /* nr is the maximum number of bytes to copy from this page */ 276 /* nr is the maximum number of bytes to copy from this page */
258 nr = huge_page_size(h); 277 nr = huge_page_size(h);
278 isize = i_size_read(inode);
279 if (!isize)
280 goto out;
281 end_index = (isize - 1) >> huge_page_shift(h);
259 if (index >= end_index) { 282 if (index >= end_index) {
260 if (index > end_index) 283 if (index > end_index)
261 goto out; 284 goto out;
262 nr = ((isize - 1) & ~huge_page_mask(h)) + 1; 285 nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
263 if (nr <= offset) { 286 if (nr <= offset)
264 goto out; 287 goto out;
265 }
266 } 288 }
267 nr = nr - offset; 289 nr = nr - offset;
268 290
269 /* Find the page */ 291 /* Find the page */
270 page = find_get_page(mapping, index); 292 page = find_lock_page(mapping, index);
271 if (unlikely(page == NULL)) { 293 if (unlikely(page == NULL)) {
272 /* 294 /*
273 * We have a HOLE, zero out the user-buffer for the 295 * We have a HOLE, zero out the user-buffer for the
@@ -279,17 +301,18 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
279 else 301 else
280 ra = 0; 302 ra = 0;
281 } else { 303 } else {
304 unlock_page(page);
305
282 /* 306 /*
283 * We have the page, copy it to user space buffer. 307 * We have the page, copy it to user space buffer.
284 */ 308 */
285 ra = hugetlbfs_read_actor(page, offset, buf, len, nr); 309 ra = hugetlbfs_read_actor(page, offset, buf, len, nr);
286 ret = ra; 310 ret = ra;
311 page_cache_release(page);
287 } 312 }
288 if (ra < 0) { 313 if (ra < 0) {
289 if (retval == 0) 314 if (retval == 0)
290 retval = ra; 315 retval = ra;
291 if (page)
292 page_cache_release(page);
293 goto out; 316 goto out;
294 } 317 }
295 318
@@ -299,16 +322,12 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
299 index += offset >> huge_page_shift(h); 322 index += offset >> huge_page_shift(h);
300 offset &= ~huge_page_mask(h); 323 offset &= ~huge_page_mask(h);
301 324
302 if (page)
303 page_cache_release(page);
304
305 /* short read or no more work */ 325 /* short read or no more work */
306 if ((ret != nr) || (len == 0)) 326 if ((ret != nr) || (len == 0))
307 break; 327 break;
308 } 328 }
309out: 329out:
310 *ppos = ((loff_t)index << huge_page_shift(h)) + offset; 330 *ppos = ((loff_t)index << huge_page_shift(h)) + offset;
311 mutex_unlock(&inode->i_mutex);
312 return retval; 331 return retval;
313} 332}
314 333
@@ -466,6 +485,7 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
466 inode->i_fop = &simple_dir_operations; 485 inode->i_fop = &simple_dir_operations;
467 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 486 /* directory inodes start off with i_nlink == 2 (for "." entry) */
468 inc_nlink(inode); 487 inc_nlink(inode);
488 lockdep_annotate_inode_mutex_key(inode);
469 } 489 }
470 return inode; 490 return inode;
471} 491}
@@ -607,9 +627,15 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
607 spin_lock(&sbinfo->stat_lock); 627 spin_lock(&sbinfo->stat_lock);
608 /* If no limits set, just report 0 for max/free/used 628 /* If no limits set, just report 0 for max/free/used
609 * blocks, like simple_statfs() */ 629 * blocks, like simple_statfs() */
610 if (sbinfo->max_blocks >= 0) { 630 if (sbinfo->spool) {
611 buf->f_blocks = sbinfo->max_blocks; 631 long free_pages;
612 buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; 632
633 spin_lock(&sbinfo->spool->lock);
634 buf->f_blocks = sbinfo->spool->max_hpages;
635 free_pages = sbinfo->spool->max_hpages
636 - sbinfo->spool->used_hpages;
637 buf->f_bavail = buf->f_bfree = free_pages;
638 spin_unlock(&sbinfo->spool->lock);
613 buf->f_files = sbinfo->max_inodes; 639 buf->f_files = sbinfo->max_inodes;
614 buf->f_ffree = sbinfo->free_inodes; 640 buf->f_ffree = sbinfo->free_inodes;
615 } 641 }
@@ -625,6 +651,10 @@ static void hugetlbfs_put_super(struct super_block *sb)
625 651
626 if (sbi) { 652 if (sbi) {
627 sb->s_fs_info = NULL; 653 sb->s_fs_info = NULL;
654
655 if (sbi->spool)
656 hugepage_put_subpool(sbi->spool);
657
628 kfree(sbi); 658 kfree(sbi);
629 } 659 }
630} 660}
@@ -831,8 +861,6 @@ bad_val:
831static int 861static int
832hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) 862hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
833{ 863{
834 struct inode * inode;
835 struct dentry * root;
836 int ret; 864 int ret;
837 struct hugetlbfs_config config; 865 struct hugetlbfs_config config;
838 struct hugetlbfs_sb_info *sbinfo; 866 struct hugetlbfs_sb_info *sbinfo;
@@ -855,60 +883,31 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
855 sb->s_fs_info = sbinfo; 883 sb->s_fs_info = sbinfo;
856 sbinfo->hstate = config.hstate; 884 sbinfo->hstate = config.hstate;
857 spin_lock_init(&sbinfo->stat_lock); 885 spin_lock_init(&sbinfo->stat_lock);
858 sbinfo->max_blocks = config.nr_blocks;
859 sbinfo->free_blocks = config.nr_blocks;
860 sbinfo->max_inodes = config.nr_inodes; 886 sbinfo->max_inodes = config.nr_inodes;
861 sbinfo->free_inodes = config.nr_inodes; 887 sbinfo->free_inodes = config.nr_inodes;
888 sbinfo->spool = NULL;
889 if (config.nr_blocks != -1) {
890 sbinfo->spool = hugepage_new_subpool(config.nr_blocks);
891 if (!sbinfo->spool)
892 goto out_free;
893 }
862 sb->s_maxbytes = MAX_LFS_FILESIZE; 894 sb->s_maxbytes = MAX_LFS_FILESIZE;
863 sb->s_blocksize = huge_page_size(config.hstate); 895 sb->s_blocksize = huge_page_size(config.hstate);
864 sb->s_blocksize_bits = huge_page_shift(config.hstate); 896 sb->s_blocksize_bits = huge_page_shift(config.hstate);
865 sb->s_magic = HUGETLBFS_MAGIC; 897 sb->s_magic = HUGETLBFS_MAGIC;
866 sb->s_op = &hugetlbfs_ops; 898 sb->s_op = &hugetlbfs_ops;
867 sb->s_time_gran = 1; 899 sb->s_time_gran = 1;
868 inode = hugetlbfs_get_root(sb, &config); 900 sb->s_root = d_make_root(hugetlbfs_get_root(sb, &config));
869 if (!inode) 901 if (!sb->s_root)
870 goto out_free;
871
872 root = d_alloc_root(inode);
873 if (!root) {
874 iput(inode);
875 goto out_free; 902 goto out_free;
876 }
877 sb->s_root = root;
878 return 0; 903 return 0;
879out_free: 904out_free:
905 if (sbinfo->spool)
906 kfree(sbinfo->spool);
880 kfree(sbinfo); 907 kfree(sbinfo);
881 return -ENOMEM; 908 return -ENOMEM;
882} 909}
883 910
884int hugetlb_get_quota(struct address_space *mapping, long delta)
885{
886 int ret = 0;
887 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
888
889 if (sbinfo->free_blocks > -1) {
890 spin_lock(&sbinfo->stat_lock);
891 if (sbinfo->free_blocks - delta >= 0)
892 sbinfo->free_blocks -= delta;
893 else
894 ret = -ENOMEM;
895 spin_unlock(&sbinfo->stat_lock);
896 }
897
898 return ret;
899}
900
901void hugetlb_put_quota(struct address_space *mapping, long delta)
902{
903 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
904
905 if (sbinfo->free_blocks > -1) {
906 spin_lock(&sbinfo->stat_lock);
907 sbinfo->free_blocks += delta;
908 spin_unlock(&sbinfo->stat_lock);
909 }
910}
911
912static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type, 911static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
913 int flags, const char *dev_name, void *data) 912 int flags, const char *dev_name, void *data)
914{ 913{
@@ -928,8 +927,8 @@ static int can_do_hugetlb_shm(void)
928 return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); 927 return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
929} 928}
930 929
931struct file *hugetlb_file_setup(const char *name, size_t size, 930struct file *hugetlb_file_setup(const char *name, unsigned long addr,
932 vm_flags_t acctflag, 931 size_t size, vm_flags_t acctflag,
933 struct user_struct **user, int creat_flags) 932 struct user_struct **user, int creat_flags)
934{ 933{
935 int error = -ENOMEM; 934 int error = -ENOMEM;
@@ -938,6 +937,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
938 struct path path; 937 struct path path;
939 struct dentry *root; 938 struct dentry *root;
940 struct qstr quick_string; 939 struct qstr quick_string;
940 struct hstate *hstate;
941 unsigned long num_pages;
941 942
942 *user = NULL; 943 *user = NULL;
943 if (!hugetlbfs_vfsmount) 944 if (!hugetlbfs_vfsmount)
@@ -946,7 +947,11 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
946 if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { 947 if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
947 *user = current_user(); 948 *user = current_user();
948 if (user_shm_lock(size, *user)) { 949 if (user_shm_lock(size, *user)) {
949 printk_once(KERN_WARNING "Using mlock ulimits for SHM_HUGETLB is deprecated\n"); 950 task_lock(current);
951 printk_once(KERN_WARNING
952 "%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n",
953 current->comm, current->pid);
954 task_unlock(current);
950 } else { 955 } else {
951 *user = NULL; 956 *user = NULL;
952 return ERR_PTR(-EPERM); 957 return ERR_PTR(-EPERM);
@@ -967,10 +972,12 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
967 if (!inode) 972 if (!inode)
968 goto out_dentry; 973 goto out_dentry;
969 974
975 hstate = hstate_inode(inode);
976 size += addr & ~huge_page_mask(hstate);
977 num_pages = ALIGN(size, huge_page_size(hstate)) >>
978 huge_page_shift(hstate);
970 error = -ENOMEM; 979 error = -ENOMEM;
971 if (hugetlb_reserve_pages(inode, 0, 980 if (hugetlb_reserve_pages(inode, 0, num_pages, NULL, acctflag))
972 size >> huge_page_shift(hstate_inode(inode)), NULL,
973 acctflag))
974 goto out_inode; 981 goto out_inode;
975 982
976 d_instantiate(path.dentry, inode); 983 d_instantiate(path.dentry, inode);
@@ -1006,6 +1013,7 @@ static int __init init_hugetlbfs_fs(void)
1006 if (error) 1013 if (error)
1007 return error; 1014 return error;
1008 1015
1016 error = -ENOMEM;
1009 hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache", 1017 hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache",
1010 sizeof(struct hugetlbfs_inode_info), 1018 sizeof(struct hugetlbfs_inode_info),
1011 0, 0, init_once); 1019 0, 0, init_once);
@@ -1026,8 +1034,7 @@ static int __init init_hugetlbfs_fs(void)
1026 error = PTR_ERR(vfsmount); 1034 error = PTR_ERR(vfsmount);
1027 1035
1028 out: 1036 out:
1029 if (error) 1037 kmem_cache_destroy(hugetlbfs_inode_cachep);
1030 kmem_cache_destroy(hugetlbfs_inode_cachep);
1031 out2: 1038 out2:
1032 bdi_destroy(&hugetlbfs_backing_dev_info); 1039 bdi_destroy(&hugetlbfs_backing_dev_info);
1033 return error; 1040 return error;
diff --git a/fs/inode.c b/fs/inode.c
index 83ab215baab..9f4f5fecc09 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2,29 +2,19 @@
2 * (C) 1997 Linus Torvalds 2 * (C) 1997 Linus Torvalds
3 * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation) 3 * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation)
4 */ 4 */
5#include <linux/export.h>
5#include <linux/fs.h> 6#include <linux/fs.h>
6#include <linux/mm.h> 7#include <linux/mm.h>
7#include <linux/dcache.h>
8#include <linux/init.h>
9#include <linux/slab.h>
10#include <linux/writeback.h>
11#include <linux/module.h>
12#include <linux/backing-dev.h> 8#include <linux/backing-dev.h>
13#include <linux/wait.h>
14#include <linux/rwsem.h>
15#include <linux/hash.h> 9#include <linux/hash.h>
16#include <linux/swap.h> 10#include <linux/swap.h>
17#include <linux/security.h> 11#include <linux/security.h>
18#include <linux/pagemap.h>
19#include <linux/cdev.h> 12#include <linux/cdev.h>
20#include <linux/bootmem.h> 13#include <linux/bootmem.h>
21#include <linux/fsnotify.h> 14#include <linux/fsnotify.h>
22#include <linux/mount.h> 15#include <linux/mount.h>
23#include <linux/async.h>
24#include <linux/posix_acl.h> 16#include <linux/posix_acl.h>
25#include <linux/prefetch.h> 17#include <linux/prefetch.h>
26#include <linux/ima.h>
27#include <linux/cred.h>
28#include <linux/buffer_head.h> /* for inode_has_buffers */ 18#include <linux/buffer_head.h> /* for inode_has_buffers */
29#include <linux/ratelimit.h> 19#include <linux/ratelimit.h>
30#include "internal.h" 20#include "internal.h"
@@ -1369,17 +1359,6 @@ int generic_delete_inode(struct inode *inode)
1369EXPORT_SYMBOL(generic_delete_inode); 1359EXPORT_SYMBOL(generic_delete_inode);
1370 1360
1371/* 1361/*
1372 * Normal UNIX filesystem behaviour: delete the
1373 * inode when the usage count drops to zero, and
1374 * i_nlink is zero.
1375 */
1376int generic_drop_inode(struct inode *inode)
1377{
1378 return !inode->i_nlink || inode_unhashed(inode);
1379}
1380EXPORT_SYMBOL_GPL(generic_drop_inode);
1381
1382/*
1383 * Called when we're dropping the last reference 1362 * Called when we're dropping the last reference
1384 * to an inode. 1363 * to an inode.
1385 * 1364 *
@@ -1510,9 +1489,10 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
1510 * This function automatically handles read only file systems and media, 1489 * This function automatically handles read only file systems and media,
1511 * as well as the "noatime" flag and inode specific "noatime" markers. 1490 * as well as the "noatime" flag and inode specific "noatime" markers.
1512 */ 1491 */
1513void touch_atime(struct vfsmount *mnt, struct dentry *dentry) 1492void touch_atime(struct path *path)
1514{ 1493{
1515 struct inode *inode = dentry->d_inode; 1494 struct vfsmount *mnt = path->mnt;
1495 struct inode *inode = path->dentry->d_inode;
1516 struct timespec now; 1496 struct timespec now;
1517 1497
1518 if (inode->i_flags & S_NOATIME) 1498 if (inode->i_flags & S_NOATIME)
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 066836e8184..29167bebe87 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -10,7 +10,7 @@
10#include <linux/file.h> 10#include <linux/file.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/security.h> 12#include <linux/security.h>
13#include <linux/module.h> 13#include <linux/export.h>
14#include <linux/uaccess.h> 14#include <linux/uaccess.h>
15#include <linux/writeback.h> 15#include <linux/writeback.h>
16#include <linux/buffer_head.h> 16#include <linux/buffer_head.h>
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index bd62c76fb5d..29037c365ba 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -947,9 +947,8 @@ root_found:
947 s->s_d_op = &isofs_dentry_ops[table]; 947 s->s_d_op = &isofs_dentry_ops[table];
948 948
949 /* get the root dentry */ 949 /* get the root dentry */
950 s->s_root = d_alloc_root(inode); 950 s->s_root = d_make_root(inode);
951 if (!(s->s_root)) { 951 if (!(s->s_root)) {
952 iput(inode);
953 error = -ENOMEM; 952 error = -ENOMEM;
954 goto out_no_inode; 953 goto out_no_inode;
955 } 954 }
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 59c09f9541b..0971e921780 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -129,6 +129,8 @@ static int kjournald(void *arg)
129 setup_timer(&journal->j_commit_timer, commit_timeout, 129 setup_timer(&journal->j_commit_timer, commit_timeout,
130 (unsigned long)current); 130 (unsigned long)current);
131 131
132 set_freezable();
133
132 /* Record that the journal thread is running */ 134 /* Record that the journal thread is running */
133 journal->j_task = current; 135 journal->j_task = current;
134 wake_up(&journal->j_wait_done_commit); 136 wake_up(&journal->j_wait_done_commit);
@@ -328,7 +330,7 @@ repeat:
328 new_offset = offset_in_page(jh2bh(jh_in)->b_data); 330 new_offset = offset_in_page(jh2bh(jh_in)->b_data);
329 } 331 }
330 332
331 mapped_data = kmap_atomic(new_page, KM_USER0); 333 mapped_data = kmap_atomic(new_page);
332 /* 334 /*
333 * Check for escaping 335 * Check for escaping
334 */ 336 */
@@ -337,7 +339,7 @@ repeat:
337 need_copy_out = 1; 339 need_copy_out = 1;
338 do_escape = 1; 340 do_escape = 1;
339 } 341 }
340 kunmap_atomic(mapped_data, KM_USER0); 342 kunmap_atomic(mapped_data);
341 343
342 /* 344 /*
343 * Do we need to do a data copy? 345 * Do we need to do a data copy?
@@ -354,9 +356,9 @@ repeat:
354 } 356 }
355 357
356 jh_in->b_frozen_data = tmp; 358 jh_in->b_frozen_data = tmp;
357 mapped_data = kmap_atomic(new_page, KM_USER0); 359 mapped_data = kmap_atomic(new_page);
358 memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); 360 memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size);
359 kunmap_atomic(mapped_data, KM_USER0); 361 kunmap_atomic(mapped_data);
360 362
361 new_page = virt_to_page(tmp); 363 new_page = virt_to_page(tmp);
362 new_offset = offset_in_page(tmp); 364 new_offset = offset_in_page(tmp);
@@ -368,9 +370,9 @@ repeat:
368 * copying, we can finally do so. 370 * copying, we can finally do so.
369 */ 371 */
370 if (do_escape) { 372 if (do_escape) {
371 mapped_data = kmap_atomic(new_page, KM_USER0); 373 mapped_data = kmap_atomic(new_page);
372 *((unsigned int *)(mapped_data + new_offset)) = 0; 374 *((unsigned int *)(mapped_data + new_offset)) = 0;
373 kunmap_atomic(mapped_data, KM_USER0); 375 kunmap_atomic(mapped_data);
374 } 376 }
375 377
376 set_bh_page(new_bh, new_page, new_offset); 378 set_bh_page(new_bh, new_page, new_offset);
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 7fce94b04bc..b2a7e5244e3 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -718,9 +718,9 @@ done:
718 "Possible IO failure.\n"); 718 "Possible IO failure.\n");
719 page = jh2bh(jh)->b_page; 719 page = jh2bh(jh)->b_page;
720 offset = offset_in_page(jh2bh(jh)->b_data); 720 offset = offset_in_page(jh2bh(jh)->b_data);
721 source = kmap_atomic(page, KM_USER0); 721 source = kmap_atomic(page);
722 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); 722 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
723 kunmap_atomic(source, KM_USER0); 723 kunmap_atomic(source);
724 } 724 }
725 jbd_unlock_bh_state(bh); 725 jbd_unlock_bh_state(bh);
726 726
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index d49d202903f..c78841ee81c 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -88,14 +88,13 @@ static inline void __buffer_relink_io(struct journal_head *jh)
88 * whole transaction. 88 * whole transaction.
89 * 89 *
90 * Requires j_list_lock 90 * Requires j_list_lock
91 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
92 */ 91 */
93static int __try_to_free_cp_buf(struct journal_head *jh) 92static int __try_to_free_cp_buf(struct journal_head *jh)
94{ 93{
95 int ret = 0; 94 int ret = 0;
96 struct buffer_head *bh = jh2bh(jh); 95 struct buffer_head *bh = jh2bh(jh);
97 96
98 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && 97 if (jh->b_transaction == NULL && !buffer_locked(bh) &&
99 !buffer_dirty(bh) && !buffer_write_io_error(bh)) { 98 !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
100 /* 99 /*
101 * Get our reference so that bh cannot be freed before 100 * Get our reference so that bh cannot be freed before
@@ -104,11 +103,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
104 get_bh(bh); 103 get_bh(bh);
105 JBUFFER_TRACE(jh, "remove from checkpoint list"); 104 JBUFFER_TRACE(jh, "remove from checkpoint list");
106 ret = __jbd2_journal_remove_checkpoint(jh) + 1; 105 ret = __jbd2_journal_remove_checkpoint(jh) + 1;
107 jbd_unlock_bh_state(bh);
108 BUFFER_TRACE(bh, "release"); 106 BUFFER_TRACE(bh, "release");
109 __brelse(bh); 107 __brelse(bh);
110 } else {
111 jbd_unlock_bh_state(bh);
112 } 108 }
113 return ret; 109 return ret;
114} 110}
@@ -180,21 +176,6 @@ void __jbd2_log_wait_for_space(journal_t *journal)
180} 176}
181 177
182/* 178/*
183 * We were unable to perform jbd_trylock_bh_state() inside j_list_lock.
184 * The caller must restart a list walk. Wait for someone else to run
185 * jbd_unlock_bh_state().
186 */
187static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
188 __releases(journal->j_list_lock)
189{
190 get_bh(bh);
191 spin_unlock(&journal->j_list_lock);
192 jbd_lock_bh_state(bh);
193 jbd_unlock_bh_state(bh);
194 put_bh(bh);
195}
196
197/*
198 * Clean up transaction's list of buffers submitted for io. 179 * Clean up transaction's list of buffers submitted for io.
199 * We wait for any pending IO to complete and remove any clean 180 * We wait for any pending IO to complete and remove any clean
200 * buffers. Note that we take the buffers in the opposite ordering 181 * buffers. Note that we take the buffers in the opposite ordering
@@ -222,15 +203,9 @@ restart:
222 while (!released && transaction->t_checkpoint_io_list) { 203 while (!released && transaction->t_checkpoint_io_list) {
223 jh = transaction->t_checkpoint_io_list; 204 jh = transaction->t_checkpoint_io_list;
224 bh = jh2bh(jh); 205 bh = jh2bh(jh);
225 if (!jbd_trylock_bh_state(bh)) {
226 jbd_sync_bh(journal, bh);
227 spin_lock(&journal->j_list_lock);
228 goto restart;
229 }
230 get_bh(bh); 206 get_bh(bh);
231 if (buffer_locked(bh)) { 207 if (buffer_locked(bh)) {
232 spin_unlock(&journal->j_list_lock); 208 spin_unlock(&journal->j_list_lock);
233 jbd_unlock_bh_state(bh);
234 wait_on_buffer(bh); 209 wait_on_buffer(bh);
235 /* the journal_head may have gone by now */ 210 /* the journal_head may have gone by now */
236 BUFFER_TRACE(bh, "brelse"); 211 BUFFER_TRACE(bh, "brelse");
@@ -246,7 +221,6 @@ restart:
246 * it has been written out and so we can drop it from the list 221 * it has been written out and so we can drop it from the list
247 */ 222 */
248 released = __jbd2_journal_remove_checkpoint(jh); 223 released = __jbd2_journal_remove_checkpoint(jh);
249 jbd_unlock_bh_state(bh);
250 __brelse(bh); 224 __brelse(bh);
251 } 225 }
252 226
@@ -266,7 +240,6 @@ __flush_batch(journal_t *journal, int *batch_count)
266 240
267 for (i = 0; i < *batch_count; i++) { 241 for (i = 0; i < *batch_count; i++) {
268 struct buffer_head *bh = journal->j_chkpt_bhs[i]; 242 struct buffer_head *bh = journal->j_chkpt_bhs[i];
269 clear_buffer_jwrite(bh);
270 BUFFER_TRACE(bh, "brelse"); 243 BUFFER_TRACE(bh, "brelse");
271 __brelse(bh); 244 __brelse(bh);
272 } 245 }
@@ -281,7 +254,6 @@ __flush_batch(journal_t *journal, int *batch_count)
281 * be written out. 254 * be written out.
282 * 255 *
283 * Called with j_list_lock held and drops it if 1 is returned 256 * Called with j_list_lock held and drops it if 1 is returned
284 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
285 */ 257 */
286static int __process_buffer(journal_t *journal, struct journal_head *jh, 258static int __process_buffer(journal_t *journal, struct journal_head *jh,
287 int *batch_count, transaction_t *transaction) 259 int *batch_count, transaction_t *transaction)
@@ -292,7 +264,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
292 if (buffer_locked(bh)) { 264 if (buffer_locked(bh)) {
293 get_bh(bh); 265 get_bh(bh);
294 spin_unlock(&journal->j_list_lock); 266 spin_unlock(&journal->j_list_lock);
295 jbd_unlock_bh_state(bh);
296 wait_on_buffer(bh); 267 wait_on_buffer(bh);
297 /* the journal_head may have gone by now */ 268 /* the journal_head may have gone by now */
298 BUFFER_TRACE(bh, "brelse"); 269 BUFFER_TRACE(bh, "brelse");
@@ -304,7 +275,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
304 275
305 transaction->t_chp_stats.cs_forced_to_close++; 276 transaction->t_chp_stats.cs_forced_to_close++;
306 spin_unlock(&journal->j_list_lock); 277 spin_unlock(&journal->j_list_lock);
307 jbd_unlock_bh_state(bh);
308 if (unlikely(journal->j_flags & JBD2_UNMOUNT)) 278 if (unlikely(journal->j_flags & JBD2_UNMOUNT))
309 /* 279 /*
310 * The journal thread is dead; so starting and 280 * The journal thread is dead; so starting and
@@ -323,11 +293,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
323 if (unlikely(buffer_write_io_error(bh))) 293 if (unlikely(buffer_write_io_error(bh)))
324 ret = -EIO; 294 ret = -EIO;
325 get_bh(bh); 295 get_bh(bh);
326 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
327 BUFFER_TRACE(bh, "remove from checkpoint"); 296 BUFFER_TRACE(bh, "remove from checkpoint");
328 __jbd2_journal_remove_checkpoint(jh); 297 __jbd2_journal_remove_checkpoint(jh);
329 spin_unlock(&journal->j_list_lock); 298 spin_unlock(&journal->j_list_lock);
330 jbd_unlock_bh_state(bh);
331 __brelse(bh); 299 __brelse(bh);
332 } else { 300 } else {
333 /* 301 /*
@@ -340,10 +308,8 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
340 BUFFER_TRACE(bh, "queue"); 308 BUFFER_TRACE(bh, "queue");
341 get_bh(bh); 309 get_bh(bh);
342 J_ASSERT_BH(bh, !buffer_jwrite(bh)); 310 J_ASSERT_BH(bh, !buffer_jwrite(bh));
343 set_buffer_jwrite(bh);
344 journal->j_chkpt_bhs[*batch_count] = bh; 311 journal->j_chkpt_bhs[*batch_count] = bh;
345 __buffer_relink_io(jh); 312 __buffer_relink_io(jh);
346 jbd_unlock_bh_state(bh);
347 transaction->t_chp_stats.cs_written++; 313 transaction->t_chp_stats.cs_written++;
348 (*batch_count)++; 314 (*batch_count)++;
349 if (*batch_count == JBD2_NR_BATCH) { 315 if (*batch_count == JBD2_NR_BATCH) {
@@ -407,15 +373,7 @@ restart:
407 int retry = 0, err; 373 int retry = 0, err;
408 374
409 while (!retry && transaction->t_checkpoint_list) { 375 while (!retry && transaction->t_checkpoint_list) {
410 struct buffer_head *bh;
411
412 jh = transaction->t_checkpoint_list; 376 jh = transaction->t_checkpoint_list;
413 bh = jh2bh(jh);
414 if (!jbd_trylock_bh_state(bh)) {
415 jbd_sync_bh(journal, bh);
416 retry = 1;
417 break;
418 }
419 retry = __process_buffer(journal, jh, &batch_count, 377 retry = __process_buffer(journal, jh, &batch_count,
420 transaction); 378 transaction);
421 if (retry < 0 && !result) 379 if (retry < 0 && !result)
@@ -478,79 +436,28 @@ out:
478 436
479int jbd2_cleanup_journal_tail(journal_t *journal) 437int jbd2_cleanup_journal_tail(journal_t *journal)
480{ 438{
481 transaction_t * transaction;
482 tid_t first_tid; 439 tid_t first_tid;
483 unsigned long blocknr, freed; 440 unsigned long blocknr;
484 441
485 if (is_journal_aborted(journal)) 442 if (is_journal_aborted(journal))
486 return 1; 443 return 1;
487 444
488 /* OK, work out the oldest transaction remaining in the log, and 445 if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
489 * the log block it starts at.
490 *
491 * If the log is now empty, we need to work out which is the
492 * next transaction ID we will write, and where it will
493 * start. */
494
495 write_lock(&journal->j_state_lock);
496 spin_lock(&journal->j_list_lock);
497 transaction = journal->j_checkpoint_transactions;
498 if (transaction) {
499 first_tid = transaction->t_tid;
500 blocknr = transaction->t_log_start;
501 } else if ((transaction = journal->j_committing_transaction) != NULL) {
502 first_tid = transaction->t_tid;
503 blocknr = transaction->t_log_start;
504 } else if ((transaction = journal->j_running_transaction) != NULL) {
505 first_tid = transaction->t_tid;
506 blocknr = journal->j_head;
507 } else {
508 first_tid = journal->j_transaction_sequence;
509 blocknr = journal->j_head;
510 }
511 spin_unlock(&journal->j_list_lock);
512 J_ASSERT(blocknr != 0);
513
514 /* If the oldest pinned transaction is at the tail of the log
515 already then there's not much we can do right now. */
516 if (journal->j_tail_sequence == first_tid) {
517 write_unlock(&journal->j_state_lock);
518 return 1; 446 return 1;
519 } 447 J_ASSERT(blocknr != 0);
520
521 /* OK, update the superblock to recover the freed space.
522 * Physical blocks come first: have we wrapped beyond the end of
523 * the log? */
524 freed = blocknr - journal->j_tail;
525 if (blocknr < journal->j_tail)
526 freed = freed + journal->j_last - journal->j_first;
527
528 trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed);
529 jbd_debug(1,
530 "Cleaning journal tail from %d to %d (offset %lu), "
531 "freeing %lu\n",
532 journal->j_tail_sequence, first_tid, blocknr, freed);
533
534 journal->j_free += freed;
535 journal->j_tail_sequence = first_tid;
536 journal->j_tail = blocknr;
537 write_unlock(&journal->j_state_lock);
538 448
539 /* 449 /*
540 * If there is an external journal, we need to make sure that 450 * We need to make sure that any blocks that were recently written out
541 * any data blocks that were recently written out --- perhaps 451 * --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before
542 * by jbd2_log_do_checkpoint() --- are flushed out before we 452 * we drop the transactions from the journal. It's unlikely this will
543 * drop the transactions from the external journal. It's 453 * be necessary, especially with an appropriately sized journal, but we
544 * unlikely this will be necessary, especially with a 454 * need this to guarantee correctness. Fortunately
545 * appropriately sized journal, but we need this to guarantee 455 * jbd2_cleanup_journal_tail() doesn't get called all that often.
546 * correctness. Fortunately jbd2_cleanup_journal_tail()
547 * doesn't get called all that often.
548 */ 456 */
549 if ((journal->j_fs_dev != journal->j_dev) && 457 if (journal->j_flags & JBD2_BARRIER)
550 (journal->j_flags & JBD2_BARRIER))
551 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); 458 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
552 if (!(journal->j_flags & JBD2_ABORT)) 459
553 jbd2_journal_update_superblock(journal, 1); 460 __jbd2_update_log_tail(journal, first_tid, blocknr);
554 return 0; 461 return 0;
555} 462}
556 463
@@ -582,15 +489,12 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
582 do { 489 do {
583 jh = next_jh; 490 jh = next_jh;
584 next_jh = jh->b_cpnext; 491 next_jh = jh->b_cpnext;
585 /* Use trylock because of the ranking */ 492 ret = __try_to_free_cp_buf(jh);
586 if (jbd_trylock_bh_state(jh2bh(jh))) { 493 if (ret) {
587 ret = __try_to_free_cp_buf(jh); 494 freed++;
588 if (ret) { 495 if (ret == 2) {
589 freed++; 496 *released = 1;
590 if (ret == 2) { 497 return freed;
591 *released = 1;
592 return freed;
593 }
594 } 498 }
595 } 499 }
596 /* 500 /*
@@ -673,9 +577,7 @@ out:
673 * The function can free jh and bh. 577 * The function can free jh and bh.
674 * 578 *
675 * This function is called with j_list_lock held. 579 * This function is called with j_list_lock held.
676 * This function is called with jbd_lock_bh_state(jh2bh(jh))
677 */ 580 */
678
679int __jbd2_journal_remove_checkpoint(struct journal_head *jh) 581int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
680{ 582{
681 struct transaction_chp_stats_s *stats; 583 struct transaction_chp_stats_s *stats;
@@ -722,7 +624,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
722 transaction->t_tid, stats); 624 transaction->t_tid, stats);
723 625
724 __jbd2_journal_drop_transaction(journal, transaction); 626 __jbd2_journal_drop_transaction(journal, transaction);
725 kfree(transaction); 627 jbd2_journal_free_transaction(transaction);
726 628
727 /* Just in case anybody was waiting for more transactions to be 629 /* Just in case anybody was waiting for more transactions to be
728 checkpointed... */ 630 checkpointed... */
@@ -797,5 +699,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
797 J_ASSERT(journal->j_committing_transaction != transaction); 699 J_ASSERT(journal->j_committing_transaction != transaction);
798 J_ASSERT(journal->j_running_transaction != transaction); 700 J_ASSERT(journal->j_running_transaction != transaction);
799 701
702 trace_jbd2_drop_transaction(journal, transaction);
703
800 jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); 704 jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
801} 705}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 5069b847515..840f70f5079 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -28,7 +28,6 @@
28#include <linux/blkdev.h> 28#include <linux/blkdev.h>
29#include <linux/bitops.h> 29#include <linux/bitops.h>
30#include <trace/events/jbd2.h> 30#include <trace/events/jbd2.h>
31#include <asm/system.h>
32 31
33/* 32/*
34 * Default IO end handler for temporary BJ_IO buffer_heads. 33 * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -286,10 +285,10 @@ static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
286 char *addr; 285 char *addr;
287 __u32 checksum; 286 __u32 checksum;
288 287
289 addr = kmap_atomic(page, KM_USER0); 288 addr = kmap_atomic(page);
290 checksum = crc32_be(crc32_sum, 289 checksum = crc32_be(crc32_sum,
291 (void *)(addr + offset_in_page(bh->b_data)), bh->b_size); 290 (void *)(addr + offset_in_page(bh->b_data)), bh->b_size);
292 kunmap_atomic(addr, KM_USER0); 291 kunmap_atomic(addr);
293 292
294 return checksum; 293 return checksum;
295} 294}
@@ -331,6 +330,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
331 struct buffer_head *cbh = NULL; /* For transactional checksums */ 330 struct buffer_head *cbh = NULL; /* For transactional checksums */
332 __u32 crc32_sum = ~0; 331 __u32 crc32_sum = ~0;
333 struct blk_plug plug; 332 struct blk_plug plug;
333 /* Tail of the journal */
334 unsigned long first_block;
335 tid_t first_tid;
336 int update_tail;
334 337
335 /* 338 /*
336 * First job: lock down the current transaction and wait for 339 * First job: lock down the current transaction and wait for
@@ -340,7 +343,18 @@ void jbd2_journal_commit_transaction(journal_t *journal)
340 /* Do we need to erase the effects of a prior jbd2_journal_flush? */ 343 /* Do we need to erase the effects of a prior jbd2_journal_flush? */
341 if (journal->j_flags & JBD2_FLUSHED) { 344 if (journal->j_flags & JBD2_FLUSHED) {
342 jbd_debug(3, "super block updated\n"); 345 jbd_debug(3, "super block updated\n");
343 jbd2_journal_update_superblock(journal, 1); 346 mutex_lock(&journal->j_checkpoint_mutex);
347 /*
348 * We hold j_checkpoint_mutex so tail cannot change under us.
349 * We don't need any special data guarantees for writing sb
350 * since journal is empty and it is ok for write to be
351 * flushed only with transaction commit.
352 */
353 jbd2_journal_update_sb_log_tail(journal,
354 journal->j_tail_sequence,
355 journal->j_tail,
356 WRITE_SYNC);
357 mutex_unlock(&journal->j_checkpoint_mutex);
344 } else { 358 } else {
345 jbd_debug(3, "superblock not updated\n"); 359 jbd_debug(3, "superblock not updated\n");
346 } 360 }
@@ -677,10 +691,30 @@ start_journal_io:
677 err = 0; 691 err = 0;
678 } 692 }
679 693
694 /*
695 * Get current oldest transaction in the log before we issue flush
696 * to the filesystem device. After the flush we can be sure that
697 * blocks of all older transactions are checkpointed to persistent
698 * storage and we will be safe to update journal start in the
699 * superblock with the numbers we get here.
700 */
701 update_tail =
702 jbd2_journal_get_log_tail(journal, &first_tid, &first_block);
703
680 write_lock(&journal->j_state_lock); 704 write_lock(&journal->j_state_lock);
705 if (update_tail) {
706 long freed = first_block - journal->j_tail;
707
708 if (first_block < journal->j_tail)
709 freed += journal->j_last - journal->j_first;
710 /* Update tail only if we free significant amount of space */
711 if (freed < journal->j_maxlen / 4)
712 update_tail = 0;
713 }
681 J_ASSERT(commit_transaction->t_state == T_COMMIT); 714 J_ASSERT(commit_transaction->t_state == T_COMMIT);
682 commit_transaction->t_state = T_COMMIT_DFLUSH; 715 commit_transaction->t_state = T_COMMIT_DFLUSH;
683 write_unlock(&journal->j_state_lock); 716 write_unlock(&journal->j_state_lock);
717
684 /* 718 /*
685 * If the journal is not located on the file system device, 719 * If the journal is not located on the file system device,
686 * then we must flush the file system device before we issue 720 * then we must flush the file system device before we issue
@@ -689,7 +723,7 @@ start_journal_io:
689 if (commit_transaction->t_need_data_flush && 723 if (commit_transaction->t_need_data_flush &&
690 (journal->j_fs_dev != journal->j_dev) && 724 (journal->j_fs_dev != journal->j_dev) &&
691 (journal->j_flags & JBD2_BARRIER)) 725 (journal->j_flags & JBD2_BARRIER))
692 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); 726 blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL);
693 727
694 /* Done it all: now write the commit record asynchronously. */ 728 /* Done it all: now write the commit record asynchronously. */
695 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 729 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
@@ -825,12 +859,20 @@ wait_for_iobuf:
825 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 859 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
826 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) && 860 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) &&
827 journal->j_flags & JBD2_BARRIER) { 861 journal->j_flags & JBD2_BARRIER) {
828 blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL); 862 blkdev_issue_flush(journal->j_dev, GFP_NOFS, NULL);
829 } 863 }
830 864
831 if (err) 865 if (err)
832 jbd2_journal_abort(journal, err); 866 jbd2_journal_abort(journal, err);
833 867
868 /*
869 * Now disk caches for filesystem device are flushed so we are safe to
870 * erase checkpointed transactions from the log by updating journal
871 * superblock.
872 */
873 if (update_tail)
874 jbd2_update_log_tail(journal, first_tid, first_block);
875
834 /* End of a transaction! Finally, we can do checkpoint 876 /* End of a transaction! Finally, we can do checkpoint
835 processing: any buffers committed as a result of this 877 processing: any buffers committed as a result of this
836 transaction can be removed from any checkpoint list it was on 878 transaction can be removed from any checkpoint list it was on
@@ -1048,7 +1090,7 @@ restart_loop:
1048 jbd_debug(1, "JBD2: commit %d complete, head %d\n", 1090 jbd_debug(1, "JBD2: commit %d complete, head %d\n",
1049 journal->j_commit_sequence, journal->j_tail_sequence); 1091 journal->j_commit_sequence, journal->j_tail_sequence);
1050 if (to_free) 1092 if (to_free)
1051 kfree(commit_transaction); 1093 jbd2_journal_free_transaction(commit_transaction);
1052 1094
1053 wake_up(&journal->j_wait_done_commit); 1095 wake_up(&journal->j_wait_done_commit);
1054} 1096}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index c0a5f9f1b12..1afb701622b 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -50,7 +50,6 @@
50 50
51#include <asm/uaccess.h> 51#include <asm/uaccess.h>
52#include <asm/page.h> 52#include <asm/page.h>
53#include <asm/system.h>
54 53
55EXPORT_SYMBOL(jbd2_journal_extend); 54EXPORT_SYMBOL(jbd2_journal_extend);
56EXPORT_SYMBOL(jbd2_journal_stop); 55EXPORT_SYMBOL(jbd2_journal_stop);
@@ -71,7 +70,6 @@ EXPORT_SYMBOL(jbd2_journal_revoke);
71 70
72EXPORT_SYMBOL(jbd2_journal_init_dev); 71EXPORT_SYMBOL(jbd2_journal_init_dev);
73EXPORT_SYMBOL(jbd2_journal_init_inode); 72EXPORT_SYMBOL(jbd2_journal_init_inode);
74EXPORT_SYMBOL(jbd2_journal_update_format);
75EXPORT_SYMBOL(jbd2_journal_check_used_features); 73EXPORT_SYMBOL(jbd2_journal_check_used_features);
76EXPORT_SYMBOL(jbd2_journal_check_available_features); 74EXPORT_SYMBOL(jbd2_journal_check_available_features);
77EXPORT_SYMBOL(jbd2_journal_set_features); 75EXPORT_SYMBOL(jbd2_journal_set_features);
@@ -96,7 +94,6 @@ EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
96EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); 94EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
97EXPORT_SYMBOL(jbd2_inode_cache); 95EXPORT_SYMBOL(jbd2_inode_cache);
98 96
99static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
100static void __journal_abort_soft (journal_t *journal, int errno); 97static void __journal_abort_soft (journal_t *journal, int errno);
101static int jbd2_journal_create_slab(size_t slab_size); 98static int jbd2_journal_create_slab(size_t slab_size);
102 99
@@ -139,6 +136,8 @@ static int kjournald2(void *arg)
139 setup_timer(&journal->j_commit_timer, commit_timeout, 136 setup_timer(&journal->j_commit_timer, commit_timeout,
140 (unsigned long)current); 137 (unsigned long)current);
141 138
139 set_freezable();
140
142 /* Record that the journal thread is running */ 141 /* Record that the journal thread is running */
143 journal->j_task = current; 142 journal->j_task = current;
144 wake_up(&journal->j_wait_done_commit); 143 wake_up(&journal->j_wait_done_commit);
@@ -345,7 +344,7 @@ repeat:
345 new_offset = offset_in_page(jh2bh(jh_in)->b_data); 344 new_offset = offset_in_page(jh2bh(jh_in)->b_data);
346 } 345 }
347 346
348 mapped_data = kmap_atomic(new_page, KM_USER0); 347 mapped_data = kmap_atomic(new_page);
349 /* 348 /*
350 * Fire data frozen trigger if data already wasn't frozen. Do this 349 * Fire data frozen trigger if data already wasn't frozen. Do this
351 * before checking for escaping, as the trigger may modify the magic 350 * before checking for escaping, as the trigger may modify the magic
@@ -364,7 +363,7 @@ repeat:
364 need_copy_out = 1; 363 need_copy_out = 1;
365 do_escape = 1; 364 do_escape = 1;
366 } 365 }
367 kunmap_atomic(mapped_data, KM_USER0); 366 kunmap_atomic(mapped_data);
368 367
369 /* 368 /*
370 * Do we need to do a data copy? 369 * Do we need to do a data copy?
@@ -385,9 +384,9 @@ repeat:
385 } 384 }
386 385
387 jh_in->b_frozen_data = tmp; 386 jh_in->b_frozen_data = tmp;
388 mapped_data = kmap_atomic(new_page, KM_USER0); 387 mapped_data = kmap_atomic(new_page);
389 memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); 388 memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size);
390 kunmap_atomic(mapped_data, KM_USER0); 389 kunmap_atomic(mapped_data);
391 390
392 new_page = virt_to_page(tmp); 391 new_page = virt_to_page(tmp);
393 new_offset = offset_in_page(tmp); 392 new_offset = offset_in_page(tmp);
@@ -406,9 +405,9 @@ repeat:
406 * copying, we can finally do so. 405 * copying, we can finally do so.
407 */ 406 */
408 if (do_escape) { 407 if (do_escape) {
409 mapped_data = kmap_atomic(new_page, KM_USER0); 408 mapped_data = kmap_atomic(new_page);
410 *((unsigned int *)(mapped_data + new_offset)) = 0; 409 *((unsigned int *)(mapped_data + new_offset)) = 0;
411 kunmap_atomic(mapped_data, KM_USER0); 410 kunmap_atomic(mapped_data);
412 } 411 }
413 412
414 set_bh_page(new_bh, new_page, new_offset); 413 set_bh_page(new_bh, new_page, new_offset);
@@ -744,6 +743,98 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
744 return jbd2_journal_add_journal_head(bh); 743 return jbd2_journal_add_journal_head(bh);
745} 744}
746 745
746/*
747 * Return tid of the oldest transaction in the journal and block in the journal
748 * where the transaction starts.
749 *
750 * If the journal is now empty, return which will be the next transaction ID
751 * we will write and where will that transaction start.
752 *
753 * The return value is 0 if journal tail cannot be pushed any further, 1 if
754 * it can.
755 */
756int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
757 unsigned long *block)
758{
759 transaction_t *transaction;
760 int ret;
761
762 read_lock(&journal->j_state_lock);
763 spin_lock(&journal->j_list_lock);
764 transaction = journal->j_checkpoint_transactions;
765 if (transaction) {
766 *tid = transaction->t_tid;
767 *block = transaction->t_log_start;
768 } else if ((transaction = journal->j_committing_transaction) != NULL) {
769 *tid = transaction->t_tid;
770 *block = transaction->t_log_start;
771 } else if ((transaction = journal->j_running_transaction) != NULL) {
772 *tid = transaction->t_tid;
773 *block = journal->j_head;
774 } else {
775 *tid = journal->j_transaction_sequence;
776 *block = journal->j_head;
777 }
778 ret = tid_gt(*tid, journal->j_tail_sequence);
779 spin_unlock(&journal->j_list_lock);
780 read_unlock(&journal->j_state_lock);
781
782 return ret;
783}
784
785/*
786 * Update information in journal structure and in on disk journal superblock
787 * about log tail. This function does not check whether information passed in
788 * really pushes log tail further. It's responsibility of the caller to make
789 * sure provided log tail information is valid (e.g. by holding
790 * j_checkpoint_mutex all the time between computing log tail and calling this
791 * function as is the case with jbd2_cleanup_journal_tail()).
792 *
793 * Requires j_checkpoint_mutex
794 */
795void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
796{
797 unsigned long freed;
798
799 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
800
801 /*
802 * We cannot afford for write to remain in drive's caches since as
803 * soon as we update j_tail, next transaction can start reusing journal
804 * space and if we lose sb update during power failure we'd replay
805 * old transaction with possibly newly overwritten data.
806 */
807 jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
808 write_lock(&journal->j_state_lock);
809 freed = block - journal->j_tail;
810 if (block < journal->j_tail)
811 freed += journal->j_last - journal->j_first;
812
813 trace_jbd2_update_log_tail(journal, tid, block, freed);
814 jbd_debug(1,
815 "Cleaning journal tail from %d to %d (offset %lu), "
816 "freeing %lu\n",
817 journal->j_tail_sequence, tid, block, freed);
818
819 journal->j_free += freed;
820 journal->j_tail_sequence = tid;
821 journal->j_tail = block;
822 write_unlock(&journal->j_state_lock);
823}
824
825/*
826 * This is a variaon of __jbd2_update_log_tail which checks for validity of
827 * provided log tail and locks j_checkpoint_mutex. So it is safe against races
828 * with other threads updating log tail.
829 */
830void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
831{
832 mutex_lock(&journal->j_checkpoint_mutex);
833 if (tid_gt(tid, journal->j_tail_sequence))
834 __jbd2_update_log_tail(journal, tid, block);
835 mutex_unlock(&journal->j_checkpoint_mutex);
836}
837
747struct jbd2_stats_proc_session { 838struct jbd2_stats_proc_session {
748 journal_t *journal; 839 journal_t *journal;
749 struct transaction_stats_s *stats; 840 struct transaction_stats_s *stats;
@@ -1112,40 +1203,45 @@ static int journal_reset(journal_t *journal)
1112 1203
1113 journal->j_max_transaction_buffers = journal->j_maxlen / 4; 1204 journal->j_max_transaction_buffers = journal->j_maxlen / 4;
1114 1205
1115 /* Add the dynamic fields and write it to disk. */
1116 jbd2_journal_update_superblock(journal, 1);
1117 return jbd2_journal_start_thread(journal);
1118}
1119
1120/**
1121 * void jbd2_journal_update_superblock() - Update journal sb on disk.
1122 * @journal: The journal to update.
1123 * @wait: Set to '0' if you don't want to wait for IO completion.
1124 *
1125 * Update a journal's dynamic superblock fields and write it to disk,
1126 * optionally waiting for the IO to complete.
1127 */
1128void jbd2_journal_update_superblock(journal_t *journal, int wait)
1129{
1130 journal_superblock_t *sb = journal->j_superblock;
1131 struct buffer_head *bh = journal->j_sb_buffer;
1132
1133 /* 1206 /*
1134 * As a special case, if the on-disk copy is already marked as needing 1207 * As a special case, if the on-disk copy is already marked as needing
1135 * no recovery (s_start == 0) and there are no outstanding transactions 1208 * no recovery (s_start == 0), then we can safely defer the superblock
1136 * in the filesystem, then we can safely defer the superblock update 1209 * update until the next commit by setting JBD2_FLUSHED. This avoids
1137 * until the next commit by setting JBD2_FLUSHED. This avoids
1138 * attempting a write to a potential-readonly device. 1210 * attempting a write to a potential-readonly device.
1139 */ 1211 */
1140 if (sb->s_start == 0 && journal->j_tail_sequence == 1212 if (sb->s_start == 0) {
1141 journal->j_transaction_sequence) {
1142 jbd_debug(1, "JBD2: Skipping superblock update on recovered sb " 1213 jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
1143 "(start %ld, seq %d, errno %d)\n", 1214 "(start %ld, seq %d, errno %d)\n",
1144 journal->j_tail, journal->j_tail_sequence, 1215 journal->j_tail, journal->j_tail_sequence,
1145 journal->j_errno); 1216 journal->j_errno);
1146 goto out; 1217 journal->j_flags |= JBD2_FLUSHED;
1218 } else {
1219 /* Lock here to make assertions happy... */
1220 mutex_lock(&journal->j_checkpoint_mutex);
1221 /*
1222 * Update log tail information. We use WRITE_FUA since new
1223 * transaction will start reusing journal space and so we
1224 * must make sure information about current log tail is on
1225 * disk before that.
1226 */
1227 jbd2_journal_update_sb_log_tail(journal,
1228 journal->j_tail_sequence,
1229 journal->j_tail,
1230 WRITE_FUA);
1231 mutex_unlock(&journal->j_checkpoint_mutex);
1147 } 1232 }
1233 return jbd2_journal_start_thread(journal);
1234}
1148 1235
1236static void jbd2_write_superblock(journal_t *journal, int write_op)
1237{
1238 struct buffer_head *bh = journal->j_sb_buffer;
1239 int ret;
1240
1241 trace_jbd2_write_superblock(journal, write_op);
1242 if (!(journal->j_flags & JBD2_BARRIER))
1243 write_op &= ~(REQ_FUA | REQ_FLUSH);
1244 lock_buffer(bh);
1149 if (buffer_write_io_error(bh)) { 1245 if (buffer_write_io_error(bh)) {
1150 /* 1246 /*
1151 * Oh, dear. A previous attempt to write the journal 1247 * Oh, dear. A previous attempt to write the journal
@@ -1161,48 +1257,106 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
1161 clear_buffer_write_io_error(bh); 1257 clear_buffer_write_io_error(bh);
1162 set_buffer_uptodate(bh); 1258 set_buffer_uptodate(bh);
1163 } 1259 }
1260 get_bh(bh);
1261 bh->b_end_io = end_buffer_write_sync;
1262 ret = submit_bh(write_op, bh);
1263 wait_on_buffer(bh);
1264 if (buffer_write_io_error(bh)) {
1265 clear_buffer_write_io_error(bh);
1266 set_buffer_uptodate(bh);
1267 ret = -EIO;
1268 }
1269 if (ret) {
1270 printk(KERN_ERR "JBD2: Error %d detected when updating "
1271 "journal superblock for %s.\n", ret,
1272 journal->j_devname);
1273 }
1274}
1164 1275
1276/**
1277 * jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk.
1278 * @journal: The journal to update.
1279 * @tail_tid: TID of the new transaction at the tail of the log
1280 * @tail_block: The first block of the transaction at the tail of the log
1281 * @write_op: With which operation should we write the journal sb
1282 *
1283 * Update a journal's superblock information about log tail and write it to
1284 * disk, waiting for the IO to complete.
1285 */
1286void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
1287 unsigned long tail_block, int write_op)
1288{
1289 journal_superblock_t *sb = journal->j_superblock;
1290
1291 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1292 jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
1293 tail_block, tail_tid);
1294
1295 sb->s_sequence = cpu_to_be32(tail_tid);
1296 sb->s_start = cpu_to_be32(tail_block);
1297
1298 jbd2_write_superblock(journal, write_op);
1299
1300 /* Log is no longer empty */
1301 write_lock(&journal->j_state_lock);
1302 WARN_ON(!sb->s_sequence);
1303 journal->j_flags &= ~JBD2_FLUSHED;
1304 write_unlock(&journal->j_state_lock);
1305}
1306
1307/**
1308 * jbd2_mark_journal_empty() - Mark on disk journal as empty.
1309 * @journal: The journal to update.
1310 *
1311 * Update a journal's dynamic superblock fields to show that journal is empty.
1312 * Write updated superblock to disk waiting for IO to complete.
1313 */
1314static void jbd2_mark_journal_empty(journal_t *journal)
1315{
1316 journal_superblock_t *sb = journal->j_superblock;
1317
1318 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1165 read_lock(&journal->j_state_lock); 1319 read_lock(&journal->j_state_lock);
1166 jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d, errno %d)\n", 1320 jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
1167 journal->j_tail, journal->j_tail_sequence, journal->j_errno); 1321 journal->j_tail_sequence);
1168 1322
1169 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); 1323 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
1170 sb->s_start = cpu_to_be32(journal->j_tail); 1324 sb->s_start = cpu_to_be32(0);
1171 sb->s_errno = cpu_to_be32(journal->j_errno);
1172 read_unlock(&journal->j_state_lock); 1325 read_unlock(&journal->j_state_lock);
1173 1326
1174 BUFFER_TRACE(bh, "marking dirty"); 1327 jbd2_write_superblock(journal, WRITE_FUA);
1175 mark_buffer_dirty(bh);
1176 if (wait) {
1177 sync_dirty_buffer(bh);
1178 if (buffer_write_io_error(bh)) {
1179 printk(KERN_ERR "JBD2: I/O error detected "
1180 "when updating journal superblock for %s.\n",
1181 journal->j_devname);
1182 clear_buffer_write_io_error(bh);
1183 set_buffer_uptodate(bh);
1184 }
1185 } else
1186 write_dirty_buffer(bh, WRITE);
1187
1188out:
1189 /* If we have just flushed the log (by marking s_start==0), then
1190 * any future commit will have to be careful to update the
1191 * superblock again to re-record the true start of the log. */
1192 1328
1329 /* Log is no longer empty */
1193 write_lock(&journal->j_state_lock); 1330 write_lock(&journal->j_state_lock);
1194 if (sb->s_start) 1331 journal->j_flags |= JBD2_FLUSHED;
1195 journal->j_flags &= ~JBD2_FLUSHED;
1196 else
1197 journal->j_flags |= JBD2_FLUSHED;
1198 write_unlock(&journal->j_state_lock); 1332 write_unlock(&journal->j_state_lock);
1199} 1333}
1200 1334
1335
1336/**
1337 * jbd2_journal_update_sb_errno() - Update error in the journal.
1338 * @journal: The journal to update.
1339 *
1340 * Update a journal's errno. Write updated superblock to disk waiting for IO
1341 * to complete.
1342 */
1343static void jbd2_journal_update_sb_errno(journal_t *journal)
1344{
1345 journal_superblock_t *sb = journal->j_superblock;
1346
1347 read_lock(&journal->j_state_lock);
1348 jbd_debug(1, "JBD2: updating superblock error (errno %d)\n",
1349 journal->j_errno);
1350 sb->s_errno = cpu_to_be32(journal->j_errno);
1351 read_unlock(&journal->j_state_lock);
1352
1353 jbd2_write_superblock(journal, WRITE_SYNC);
1354}
1355
1201/* 1356/*
1202 * Read the superblock for a given journal, performing initial 1357 * Read the superblock for a given journal, performing initial
1203 * validation of the format. 1358 * validation of the format.
1204 */ 1359 */
1205
1206static int journal_get_superblock(journal_t *journal) 1360static int journal_get_superblock(journal_t *journal)
1207{ 1361{
1208 struct buffer_head *bh; 1362 struct buffer_head *bh;
@@ -1396,14 +1550,11 @@ int jbd2_journal_destroy(journal_t *journal)
1396 1550
1397 if (journal->j_sb_buffer) { 1551 if (journal->j_sb_buffer) {
1398 if (!is_journal_aborted(journal)) { 1552 if (!is_journal_aborted(journal)) {
1399 /* We can now mark the journal as empty. */ 1553 mutex_lock(&journal->j_checkpoint_mutex);
1400 journal->j_tail = 0; 1554 jbd2_mark_journal_empty(journal);
1401 journal->j_tail_sequence = 1555 mutex_unlock(&journal->j_checkpoint_mutex);
1402 ++journal->j_transaction_sequence; 1556 } else
1403 jbd2_journal_update_superblock(journal, 1);
1404 } else {
1405 err = -EIO; 1557 err = -EIO;
1406 }
1407 brelse(journal->j_sb_buffer); 1558 brelse(journal->j_sb_buffer);
1408 } 1559 }
1409 1560
@@ -1550,61 +1701,6 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
1550EXPORT_SYMBOL(jbd2_journal_clear_features); 1701EXPORT_SYMBOL(jbd2_journal_clear_features);
1551 1702
1552/** 1703/**
1553 * int jbd2_journal_update_format () - Update on-disk journal structure.
1554 * @journal: Journal to act on.
1555 *
1556 * Given an initialised but unloaded journal struct, poke about in the
1557 * on-disk structure to update it to the most recent supported version.
1558 */
1559int jbd2_journal_update_format (journal_t *journal)
1560{
1561 journal_superblock_t *sb;
1562 int err;
1563
1564 err = journal_get_superblock(journal);
1565 if (err)
1566 return err;
1567
1568 sb = journal->j_superblock;
1569
1570 switch (be32_to_cpu(sb->s_header.h_blocktype)) {
1571 case JBD2_SUPERBLOCK_V2:
1572 return 0;
1573 case JBD2_SUPERBLOCK_V1:
1574 return journal_convert_superblock_v1(journal, sb);
1575 default:
1576 break;
1577 }
1578 return -EINVAL;
1579}
1580
1581static int journal_convert_superblock_v1(journal_t *journal,
1582 journal_superblock_t *sb)
1583{
1584 int offset, blocksize;
1585 struct buffer_head *bh;
1586
1587 printk(KERN_WARNING
1588 "JBD2: Converting superblock from version 1 to 2.\n");
1589
1590 /* Pre-initialise new fields to zero */
1591 offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb);
1592 blocksize = be32_to_cpu(sb->s_blocksize);
1593 memset(&sb->s_feature_compat, 0, blocksize-offset);
1594
1595 sb->s_nr_users = cpu_to_be32(1);
1596 sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2);
1597 journal->j_format_version = 2;
1598
1599 bh = journal->j_sb_buffer;
1600 BUFFER_TRACE(bh, "marking dirty");
1601 mark_buffer_dirty(bh);
1602 sync_dirty_buffer(bh);
1603 return 0;
1604}
1605
1606
1607/**
1608 * int jbd2_journal_flush () - Flush journal 1704 * int jbd2_journal_flush () - Flush journal
1609 * @journal: Journal to act on. 1705 * @journal: Journal to act on.
1610 * 1706 *
@@ -1617,7 +1713,6 @@ int jbd2_journal_flush(journal_t *journal)
1617{ 1713{
1618 int err = 0; 1714 int err = 0;
1619 transaction_t *transaction = NULL; 1715 transaction_t *transaction = NULL;
1620 unsigned long old_tail;
1621 1716
1622 write_lock(&journal->j_state_lock); 1717 write_lock(&journal->j_state_lock);
1623 1718
@@ -1652,6 +1747,7 @@ int jbd2_journal_flush(journal_t *journal)
1652 if (is_journal_aborted(journal)) 1747 if (is_journal_aborted(journal))
1653 return -EIO; 1748 return -EIO;
1654 1749
1750 mutex_lock(&journal->j_checkpoint_mutex);
1655 jbd2_cleanup_journal_tail(journal); 1751 jbd2_cleanup_journal_tail(journal);
1656 1752
1657 /* Finally, mark the journal as really needing no recovery. 1753 /* Finally, mark the journal as really needing no recovery.
@@ -1659,14 +1755,9 @@ int jbd2_journal_flush(journal_t *journal)
1659 * the magic code for a fully-recovered superblock. Any future 1755 * the magic code for a fully-recovered superblock. Any future
1660 * commits of data to the journal will restore the current 1756 * commits of data to the journal will restore the current
1661 * s_start value. */ 1757 * s_start value. */
1758 jbd2_mark_journal_empty(journal);
1759 mutex_unlock(&journal->j_checkpoint_mutex);
1662 write_lock(&journal->j_state_lock); 1760 write_lock(&journal->j_state_lock);
1663 old_tail = journal->j_tail;
1664 journal->j_tail = 0;
1665 write_unlock(&journal->j_state_lock);
1666 jbd2_journal_update_superblock(journal, 1);
1667 write_lock(&journal->j_state_lock);
1668 journal->j_tail = old_tail;
1669
1670 J_ASSERT(!journal->j_running_transaction); 1761 J_ASSERT(!journal->j_running_transaction);
1671 J_ASSERT(!journal->j_committing_transaction); 1762 J_ASSERT(!journal->j_committing_transaction);
1672 J_ASSERT(!journal->j_checkpoint_transactions); 1763 J_ASSERT(!journal->j_checkpoint_transactions);
@@ -1706,8 +1797,12 @@ int jbd2_journal_wipe(journal_t *journal, int write)
1706 write ? "Clearing" : "Ignoring"); 1797 write ? "Clearing" : "Ignoring");
1707 1798
1708 err = jbd2_journal_skip_recovery(journal); 1799 err = jbd2_journal_skip_recovery(journal);
1709 if (write) 1800 if (write) {
1710 jbd2_journal_update_superblock(journal, 1); 1801 /* Lock to make assertions happy... */
1802 mutex_lock(&journal->j_checkpoint_mutex);
1803 jbd2_mark_journal_empty(journal);
1804 mutex_unlock(&journal->j_checkpoint_mutex);
1805 }
1711 1806
1712 no_recovery: 1807 no_recovery:
1713 return err; 1808 return err;
@@ -1757,7 +1852,7 @@ static void __journal_abort_soft (journal_t *journal, int errno)
1757 __jbd2_journal_abort_hard(journal); 1852 __jbd2_journal_abort_hard(journal);
1758 1853
1759 if (errno) 1854 if (errno)
1760 jbd2_journal_update_superblock(journal, 1); 1855 jbd2_journal_update_sb_errno(journal);
1761} 1856}
1762 1857
1763/** 1858/**
@@ -2015,7 +2110,7 @@ static struct kmem_cache *jbd2_journal_head_cache;
2015static atomic_t nr_journal_heads = ATOMIC_INIT(0); 2110static atomic_t nr_journal_heads = ATOMIC_INIT(0);
2016#endif 2111#endif
2017 2112
2018static int journal_init_jbd2_journal_head_cache(void) 2113static int jbd2_journal_init_journal_head_cache(void)
2019{ 2114{
2020 int retval; 2115 int retval;
2021 2116
@@ -2033,7 +2128,7 @@ static int journal_init_jbd2_journal_head_cache(void)
2033 return retval; 2128 return retval;
2034} 2129}
2035 2130
2036static void jbd2_journal_destroy_jbd2_journal_head_cache(void) 2131static void jbd2_journal_destroy_journal_head_cache(void)
2037{ 2132{
2038 if (jbd2_journal_head_cache) { 2133 if (jbd2_journal_head_cache) {
2039 kmem_cache_destroy(jbd2_journal_head_cache); 2134 kmem_cache_destroy(jbd2_journal_head_cache);
@@ -2321,7 +2416,7 @@ static void __exit jbd2_remove_jbd_stats_proc_entry(void)
2321 2416
2322struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache; 2417struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache;
2323 2418
2324static int __init journal_init_handle_cache(void) 2419static int __init jbd2_journal_init_handle_cache(void)
2325{ 2420{
2326 jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY); 2421 jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY);
2327 if (jbd2_handle_cache == NULL) { 2422 if (jbd2_handle_cache == NULL) {
@@ -2356,17 +2451,20 @@ static int __init journal_init_caches(void)
2356 2451
2357 ret = jbd2_journal_init_revoke_caches(); 2452 ret = jbd2_journal_init_revoke_caches();
2358 if (ret == 0) 2453 if (ret == 0)
2359 ret = journal_init_jbd2_journal_head_cache(); 2454 ret = jbd2_journal_init_journal_head_cache();
2455 if (ret == 0)
2456 ret = jbd2_journal_init_handle_cache();
2360 if (ret == 0) 2457 if (ret == 0)
2361 ret = journal_init_handle_cache(); 2458 ret = jbd2_journal_init_transaction_cache();
2362 return ret; 2459 return ret;
2363} 2460}
2364 2461
2365static void jbd2_journal_destroy_caches(void) 2462static void jbd2_journal_destroy_caches(void)
2366{ 2463{
2367 jbd2_journal_destroy_revoke_caches(); 2464 jbd2_journal_destroy_revoke_caches();
2368 jbd2_journal_destroy_jbd2_journal_head_cache(); 2465 jbd2_journal_destroy_journal_head_cache();
2369 jbd2_journal_destroy_handle_cache(); 2466 jbd2_journal_destroy_handle_cache();
2467 jbd2_journal_destroy_transaction_cache();
2370 jbd2_journal_destroy_slabs(); 2468 jbd2_journal_destroy_slabs();
2371} 2469}
2372 2470
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index da6d7baf139..c1a03354a22 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -21,6 +21,7 @@
21#include <linux/jbd2.h> 21#include <linux/jbd2.h>
22#include <linux/errno.h> 22#include <linux/errno.h>
23#include <linux/crc32.h> 23#include <linux/crc32.h>
24#include <linux/blkdev.h>
24#endif 25#endif
25 26
26/* 27/*
@@ -265,7 +266,9 @@ int jbd2_journal_recover(journal_t *journal)
265 err2 = sync_blockdev(journal->j_fs_dev); 266 err2 = sync_blockdev(journal->j_fs_dev);
266 if (!err) 267 if (!err)
267 err = err2; 268 err = err2;
268 269 /* Make sure all replayed data is on permanent storage */
270 if (journal->j_flags & JBD2_BARRIER)
271 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
269 return err; 272 return err;
270} 273}
271 274
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 30b2867d6cc..6973705d6a3 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -208,17 +208,13 @@ int __init jbd2_journal_init_revoke_caches(void)
208 J_ASSERT(!jbd2_revoke_record_cache); 208 J_ASSERT(!jbd2_revoke_record_cache);
209 J_ASSERT(!jbd2_revoke_table_cache); 209 J_ASSERT(!jbd2_revoke_table_cache);
210 210
211 jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", 211 jbd2_revoke_record_cache = KMEM_CACHE(jbd2_revoke_record_s,
212 sizeof(struct jbd2_revoke_record_s), 212 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY);
213 0,
214 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
215 NULL);
216 if (!jbd2_revoke_record_cache) 213 if (!jbd2_revoke_record_cache)
217 goto record_cache_failure; 214 goto record_cache_failure;
218 215
219 jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", 216 jbd2_revoke_table_cache = KMEM_CACHE(jbd2_revoke_table_s,
220 sizeof(struct jbd2_revoke_table_s), 217 SLAB_TEMPORARY);
221 0, SLAB_TEMPORARY, NULL);
222 if (!jbd2_revoke_table_cache) 218 if (!jbd2_revoke_table_cache)
223 goto table_cache_failure; 219 goto table_cache_failure;
224 return 0; 220 return 0;
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 35ae096bed5..ddcd3549c6c 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -33,6 +33,35 @@
33static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); 33static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
34static void __jbd2_journal_unfile_buffer(struct journal_head *jh); 34static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
35 35
36static struct kmem_cache *transaction_cache;
37int __init jbd2_journal_init_transaction_cache(void)
38{
39 J_ASSERT(!transaction_cache);
40 transaction_cache = kmem_cache_create("jbd2_transaction_s",
41 sizeof(transaction_t),
42 0,
43 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
44 NULL);
45 if (transaction_cache)
46 return 0;
47 return -ENOMEM;
48}
49
50void jbd2_journal_destroy_transaction_cache(void)
51{
52 if (transaction_cache) {
53 kmem_cache_destroy(transaction_cache);
54 transaction_cache = NULL;
55 }
56}
57
58void jbd2_journal_free_transaction(transaction_t *transaction)
59{
60 if (unlikely(ZERO_OR_NULL_PTR(transaction)))
61 return;
62 kmem_cache_free(transaction_cache, transaction);
63}
64
36/* 65/*
37 * jbd2_get_transaction: obtain a new transaction_t object. 66 * jbd2_get_transaction: obtain a new transaction_t object.
38 * 67 *
@@ -133,7 +162,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
133 162
134alloc_transaction: 163alloc_transaction:
135 if (!journal->j_running_transaction) { 164 if (!journal->j_running_transaction) {
136 new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask); 165 new_transaction = kmem_cache_alloc(transaction_cache,
166 gfp_mask | __GFP_ZERO);
137 if (!new_transaction) { 167 if (!new_transaction) {
138 /* 168 /*
139 * If __GFP_FS is not present, then we may be 169 * If __GFP_FS is not present, then we may be
@@ -162,7 +192,7 @@ repeat:
162 if (is_journal_aborted(journal) || 192 if (is_journal_aborted(journal) ||
163 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { 193 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
164 read_unlock(&journal->j_state_lock); 194 read_unlock(&journal->j_state_lock);
165 kfree(new_transaction); 195 jbd2_journal_free_transaction(new_transaction);
166 return -EROFS; 196 return -EROFS;
167 } 197 }
168 198
@@ -284,7 +314,7 @@ repeat:
284 read_unlock(&journal->j_state_lock); 314 read_unlock(&journal->j_state_lock);
285 315
286 lock_map_acquire(&handle->h_lockdep_map); 316 lock_map_acquire(&handle->h_lockdep_map);
287 kfree(new_transaction); 317 jbd2_journal_free_transaction(new_transaction);
288 return 0; 318 return 0;
289} 319}
290 320
@@ -783,12 +813,12 @@ done:
783 "Possible IO failure.\n"); 813 "Possible IO failure.\n");
784 page = jh2bh(jh)->b_page; 814 page = jh2bh(jh)->b_page;
785 offset = offset_in_page(jh2bh(jh)->b_data); 815 offset = offset_in_page(jh2bh(jh)->b_data);
786 source = kmap_atomic(page, KM_USER0); 816 source = kmap_atomic(page);
787 /* Fire data frozen trigger just before we copy the data */ 817 /* Fire data frozen trigger just before we copy the data */
788 jbd2_buffer_frozen_trigger(jh, source + offset, 818 jbd2_buffer_frozen_trigger(jh, source + offset,
789 jh->b_triggers); 819 jh->b_triggers);
790 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); 820 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
791 kunmap_atomic(source, KM_USER0); 821 kunmap_atomic(source);
792 822
793 /* 823 /*
794 * Now that the frozen data is saved off, we need to store 824 * Now that the frozen data is saved off, we need to store
@@ -1549,9 +1579,9 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
1549 * of these pointers, it could go bad. Generally the caller needs to re-read 1579 * of these pointers, it could go bad. Generally the caller needs to re-read
1550 * the pointer from the transaction_t. 1580 * the pointer from the transaction_t.
1551 * 1581 *
1552 * Called under j_list_lock. The journal may not be locked. 1582 * Called under j_list_lock.
1553 */ 1583 */
1554void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) 1584static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
1555{ 1585{
1556 struct journal_head **list = NULL; 1586 struct journal_head **list = NULL;
1557 transaction_t *transaction; 1587 transaction_t *transaction;
@@ -1646,10 +1676,8 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
1646 spin_lock(&journal->j_list_lock); 1676 spin_lock(&journal->j_list_lock);
1647 if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { 1677 if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
1648 /* written-back checkpointed metadata buffer */ 1678 /* written-back checkpointed metadata buffer */
1649 if (jh->b_jlist == BJ_None) { 1679 JBUFFER_TRACE(jh, "remove from checkpoint list");
1650 JBUFFER_TRACE(jh, "remove from checkpoint list"); 1680 __jbd2_journal_remove_checkpoint(jh);
1651 __jbd2_journal_remove_checkpoint(jh);
1652 }
1653 } 1681 }
1654 spin_unlock(&journal->j_list_lock); 1682 spin_unlock(&journal->j_list_lock);
1655out: 1683out:
@@ -1949,6 +1977,8 @@ zap_buffer_unlocked:
1949 clear_buffer_mapped(bh); 1977 clear_buffer_mapped(bh);
1950 clear_buffer_req(bh); 1978 clear_buffer_req(bh);
1951 clear_buffer_new(bh); 1979 clear_buffer_new(bh);
1980 clear_buffer_delay(bh);
1981 clear_buffer_unwritten(bh);
1952 bh->b_bdev = NULL; 1982 bh->b_bdev = NULL;
1953 return may_free; 1983 return may_free;
1954} 1984}
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 926d02068a1..922f146e423 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -9,6 +9,8 @@
9 * 9 *
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
12#include <linux/kernel.h> 14#include <linux/kernel.h>
13#include <linux/slab.h> 15#include <linux/slab.h>
14#include <linux/fs.h> 16#include <linux/fs.h>
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 404111b016c..2b60ce1996a 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -10,6 +10,8 @@
10 * 10 *
11 */ 11 */
12 12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
13#include <linux/kernel.h> 15#include <linux/kernel.h>
14#include <linux/jffs2.h> 16#include <linux/jffs2.h>
15#include <linux/mtd/mtd.h> 17#include <linux/mtd/mtd.h>
@@ -42,12 +44,13 @@ int jffs2_start_garbage_collect_thread(struct jffs2_sb_info *c)
42 44
43 tsk = kthread_run(jffs2_garbage_collect_thread, c, "jffs2_gcd_mtd%d", c->mtd->index); 45 tsk = kthread_run(jffs2_garbage_collect_thread, c, "jffs2_gcd_mtd%d", c->mtd->index);
44 if (IS_ERR(tsk)) { 46 if (IS_ERR(tsk)) {
45 printk(KERN_WARNING "fork failed for JFFS2 garbage collect thread: %ld\n", -PTR_ERR(tsk)); 47 pr_warn("fork failed for JFFS2 garbage collect thread: %ld\n",
48 -PTR_ERR(tsk));
46 complete(&c->gc_thread_exit); 49 complete(&c->gc_thread_exit);
47 ret = PTR_ERR(tsk); 50 ret = PTR_ERR(tsk);
48 } else { 51 } else {
49 /* Wait for it... */ 52 /* Wait for it... */
50 D1(printk(KERN_DEBUG "JFFS2: Garbage collect thread is pid %d\n", tsk->pid)); 53 jffs2_dbg(1, "Garbage collect thread is pid %d\n", tsk->pid);
51 wait_for_completion(&c->gc_thread_start); 54 wait_for_completion(&c->gc_thread_start);
52 ret = tsk->pid; 55 ret = tsk->pid;
53 } 56 }
@@ -60,7 +63,7 @@ void jffs2_stop_garbage_collect_thread(struct jffs2_sb_info *c)
60 int wait = 0; 63 int wait = 0;
61 spin_lock(&c->erase_completion_lock); 64 spin_lock(&c->erase_completion_lock);
62 if (c->gc_task) { 65 if (c->gc_task) {
63 D1(printk(KERN_DEBUG "jffs2: Killing GC task %d\n", c->gc_task->pid)); 66 jffs2_dbg(1, "Killing GC task %d\n", c->gc_task->pid);
64 send_sig(SIGKILL, c->gc_task, 1); 67 send_sig(SIGKILL, c->gc_task, 1);
65 wait = 1; 68 wait = 1;
66 } 69 }
@@ -90,7 +93,7 @@ static int jffs2_garbage_collect_thread(void *_c)
90 if (!jffs2_thread_should_wake(c)) { 93 if (!jffs2_thread_should_wake(c)) {
91 set_current_state (TASK_INTERRUPTIBLE); 94 set_current_state (TASK_INTERRUPTIBLE);
92 spin_unlock(&c->erase_completion_lock); 95 spin_unlock(&c->erase_completion_lock);
93 D1(printk(KERN_DEBUG "jffs2_garbage_collect_thread sleeping...\n")); 96 jffs2_dbg(1, "%s(): sleeping...\n", __func__);
94 schedule(); 97 schedule();
95 } else 98 } else
96 spin_unlock(&c->erase_completion_lock); 99 spin_unlock(&c->erase_completion_lock);
@@ -109,7 +112,7 @@ static int jffs2_garbage_collect_thread(void *_c)
109 schedule_timeout_interruptible(msecs_to_jiffies(50)); 112 schedule_timeout_interruptible(msecs_to_jiffies(50));
110 113
111 if (kthread_should_stop()) { 114 if (kthread_should_stop()) {
112 D1(printk(KERN_DEBUG "jffs2_garbage_collect_thread(): kthread_stop() called.\n")); 115 jffs2_dbg(1, "%s(): kthread_stop() called\n", __func__);
113 goto die; 116 goto die;
114 } 117 }
115 118
@@ -126,28 +129,32 @@ static int jffs2_garbage_collect_thread(void *_c)
126 129
127 switch(signr) { 130 switch(signr) {
128 case SIGSTOP: 131 case SIGSTOP:
129 D1(printk(KERN_DEBUG "jffs2_garbage_collect_thread(): SIGSTOP received.\n")); 132 jffs2_dbg(1, "%s(): SIGSTOP received\n",
133 __func__);
130 set_current_state(TASK_STOPPED); 134 set_current_state(TASK_STOPPED);
131 schedule(); 135 schedule();
132 break; 136 break;
133 137
134 case SIGKILL: 138 case SIGKILL:
135 D1(printk(KERN_DEBUG "jffs2_garbage_collect_thread(): SIGKILL received.\n")); 139 jffs2_dbg(1, "%s(): SIGKILL received\n",
140 __func__);
136 goto die; 141 goto die;
137 142
138 case SIGHUP: 143 case SIGHUP:
139 D1(printk(KERN_DEBUG "jffs2_garbage_collect_thread(): SIGHUP received.\n")); 144 jffs2_dbg(1, "%s(): SIGHUP received\n",
145 __func__);
140 break; 146 break;
141 default: 147 default:
142 D1(printk(KERN_DEBUG "jffs2_garbage_collect_thread(): signal %ld received\n", signr)); 148 jffs2_dbg(1, "%s(): signal %ld received\n",
149 __func__, signr);
143 } 150 }
144 } 151 }
145 /* We don't want SIGHUP to interrupt us. STOP and KILL are OK though. */ 152 /* We don't want SIGHUP to interrupt us. STOP and KILL are OK though. */
146 disallow_signal(SIGHUP); 153 disallow_signal(SIGHUP);
147 154
148 D1(printk(KERN_DEBUG "jffs2_garbage_collect_thread(): pass\n")); 155 jffs2_dbg(1, "%s(): pass\n", __func__);
149 if (jffs2_garbage_collect_pass(c) == -ENOSPC) { 156 if (jffs2_garbage_collect_pass(c) == -ENOSPC) {
150 printk(KERN_NOTICE "No space for garbage collection. Aborting GC thread\n"); 157 pr_notice("No space for garbage collection. Aborting GC thread\n");
151 goto die; 158 goto die;
152 } 159 }
153 } 160 }
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index 3005ec4520a..a3750f902ad 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -10,6 +10,8 @@
10 * 10 *
11 */ 11 */
12 12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
13#include <linux/kernel.h> 15#include <linux/kernel.h>
14#include <linux/sched.h> 16#include <linux/sched.h>
15#include <linux/slab.h> 17#include <linux/slab.h>
@@ -307,8 +309,8 @@ static void jffs2_calc_trigger_levels(struct jffs2_sb_info *c)
307 trying to GC to make more space. It'll be a fruitless task */ 309 trying to GC to make more space. It'll be a fruitless task */
308 c->nospc_dirty_size = c->sector_size + (c->flash_size / 100); 310 c->nospc_dirty_size = c->sector_size + (c->flash_size / 100);
309 311
310 dbg_fsbuild("JFFS2 trigger levels (size %d KiB, block size %d KiB, %d blocks)\n", 312 dbg_fsbuild("trigger levels (size %d KiB, block size %d KiB, %d blocks)\n",
311 c->flash_size / 1024, c->sector_size / 1024, c->nr_blocks); 313 c->flash_size / 1024, c->sector_size / 1024, c->nr_blocks);
312 dbg_fsbuild("Blocks required to allow deletion: %d (%d KiB)\n", 314 dbg_fsbuild("Blocks required to allow deletion: %d (%d KiB)\n",
313 c->resv_blocks_deletion, c->resv_blocks_deletion*c->sector_size/1024); 315 c->resv_blocks_deletion, c->resv_blocks_deletion*c->sector_size/1024);
314 dbg_fsbuild("Blocks required to allow writes: %d (%d KiB)\n", 316 dbg_fsbuild("Blocks required to allow writes: %d (%d KiB)\n",
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index 5b6c9d1a2fb..4849a4c9a0e 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -12,6 +12,8 @@
12 * 12 *
13 */ 13 */
14 14
15#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
16
15#include "compr.h" 17#include "compr.h"
16 18
17static DEFINE_SPINLOCK(jffs2_compressor_list_lock); 19static DEFINE_SPINLOCK(jffs2_compressor_list_lock);
@@ -79,7 +81,7 @@ static int jffs2_selected_compress(u8 compr, unsigned char *data_in,
79 81
80 output_buf = kmalloc(*cdatalen, GFP_KERNEL); 82 output_buf = kmalloc(*cdatalen, GFP_KERNEL);
81 if (!output_buf) { 83 if (!output_buf) {
82 printk(KERN_WARNING "JFFS2: No memory for compressor allocation. Compression failed.\n"); 84 pr_warn("No memory for compressor allocation. Compression failed.\n");
83 return ret; 85 return ret;
84 } 86 }
85 orig_slen = *datalen; 87 orig_slen = *datalen;
@@ -188,7 +190,8 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
188 tmp_buf = kmalloc(orig_slen, GFP_KERNEL); 190 tmp_buf = kmalloc(orig_slen, GFP_KERNEL);
189 spin_lock(&jffs2_compressor_list_lock); 191 spin_lock(&jffs2_compressor_list_lock);
190 if (!tmp_buf) { 192 if (!tmp_buf) {
191 printk(KERN_WARNING "JFFS2: No memory for compressor allocation. (%d bytes)\n", orig_slen); 193 pr_warn("No memory for compressor allocation. (%d bytes)\n",
194 orig_slen);
192 continue; 195 continue;
193 } 196 }
194 else { 197 else {
@@ -235,7 +238,7 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
235 cpage_out, datalen, cdatalen); 238 cpage_out, datalen, cdatalen);
236 break; 239 break;
237 default: 240 default:
238 printk(KERN_ERR "JFFS2: unknown compression mode.\n"); 241 pr_err("unknown compression mode\n");
239 } 242 }
240 243
241 if (ret == JFFS2_COMPR_NONE) { 244 if (ret == JFFS2_COMPR_NONE) {
@@ -277,7 +280,8 @@ int jffs2_decompress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
277 ret = this->decompress(cdata_in, data_out, cdatalen, datalen); 280 ret = this->decompress(cdata_in, data_out, cdatalen, datalen);
278 spin_lock(&jffs2_compressor_list_lock); 281 spin_lock(&jffs2_compressor_list_lock);
279 if (ret) { 282 if (ret) {
280 printk(KERN_WARNING "Decompressor \"%s\" returned %d\n", this->name, ret); 283 pr_warn("Decompressor \"%s\" returned %d\n",
284 this->name, ret);
281 } 285 }
282 else { 286 else {
283 this->stat_decompr_blocks++; 287 this->stat_decompr_blocks++;
@@ -287,7 +291,7 @@ int jffs2_decompress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
287 return ret; 291 return ret;
288 } 292 }
289 } 293 }
290 printk(KERN_WARNING "JFFS2 compression type 0x%02x not available.\n", comprtype); 294 pr_warn("compression type 0x%02x not available\n", comprtype);
291 spin_unlock(&jffs2_compressor_list_lock); 295 spin_unlock(&jffs2_compressor_list_lock);
292 return -EIO; 296 return -EIO;
293 } 297 }
@@ -299,7 +303,7 @@ int jffs2_register_compressor(struct jffs2_compressor *comp)
299 struct jffs2_compressor *this; 303 struct jffs2_compressor *this;
300 304
301 if (!comp->name) { 305 if (!comp->name) {
302 printk(KERN_WARNING "NULL compressor name at registering JFFS2 compressor. Failed.\n"); 306 pr_warn("NULL compressor name at registering JFFS2 compressor. Failed.\n");
303 return -1; 307 return -1;
304 } 308 }
305 comp->compr_buf_size=0; 309 comp->compr_buf_size=0;
@@ -309,7 +313,7 @@ int jffs2_register_compressor(struct jffs2_compressor *comp)
309 comp->stat_compr_new_size=0; 313 comp->stat_compr_new_size=0;
310 comp->stat_compr_blocks=0; 314 comp->stat_compr_blocks=0;
311 comp->stat_decompr_blocks=0; 315 comp->stat_decompr_blocks=0;
312 D1(printk(KERN_DEBUG "Registering JFFS2 compressor \"%s\"\n", comp->name)); 316 jffs2_dbg(1, "Registering JFFS2 compressor \"%s\"\n", comp->name);
313 317
314 spin_lock(&jffs2_compressor_list_lock); 318 spin_lock(&jffs2_compressor_list_lock);
315 319
@@ -332,15 +336,15 @@ out:
332 336
333int jffs2_unregister_compressor(struct jffs2_compressor *comp) 337int jffs2_unregister_compressor(struct jffs2_compressor *comp)
334{ 338{
335 D2(struct jffs2_compressor *this;) 339 D2(struct jffs2_compressor *this);
336 340
337 D1(printk(KERN_DEBUG "Unregistering JFFS2 compressor \"%s\"\n", comp->name)); 341 jffs2_dbg(1, "Unregistering JFFS2 compressor \"%s\"\n", comp->name);
338 342
339 spin_lock(&jffs2_compressor_list_lock); 343 spin_lock(&jffs2_compressor_list_lock);
340 344
341 if (comp->usecount) { 345 if (comp->usecount) {
342 spin_unlock(&jffs2_compressor_list_lock); 346 spin_unlock(&jffs2_compressor_list_lock);
343 printk(KERN_WARNING "JFFS2: Compressor modul is in use. Unregister failed.\n"); 347 pr_warn("Compressor module is in use. Unregister failed.\n");
344 return -1; 348 return -1;
345 } 349 }
346 list_del(&comp->list); 350 list_del(&comp->list);
@@ -377,17 +381,17 @@ int __init jffs2_compressors_init(void)
377/* Setting default compression mode */ 381/* Setting default compression mode */
378#ifdef CONFIG_JFFS2_CMODE_NONE 382#ifdef CONFIG_JFFS2_CMODE_NONE
379 jffs2_compression_mode = JFFS2_COMPR_MODE_NONE; 383 jffs2_compression_mode = JFFS2_COMPR_MODE_NONE;
380 D1(printk(KERN_INFO "JFFS2: default compression mode: none\n");) 384 jffs2_dbg(1, "default compression mode: none\n");
381#else 385#else
382#ifdef CONFIG_JFFS2_CMODE_SIZE 386#ifdef CONFIG_JFFS2_CMODE_SIZE
383 jffs2_compression_mode = JFFS2_COMPR_MODE_SIZE; 387 jffs2_compression_mode = JFFS2_COMPR_MODE_SIZE;
384 D1(printk(KERN_INFO "JFFS2: default compression mode: size\n");) 388 jffs2_dbg(1, "default compression mode: size\n");
385#else 389#else
386#ifdef CONFIG_JFFS2_CMODE_FAVOURLZO 390#ifdef CONFIG_JFFS2_CMODE_FAVOURLZO
387 jffs2_compression_mode = JFFS2_COMPR_MODE_FAVOURLZO; 391 jffs2_compression_mode = JFFS2_COMPR_MODE_FAVOURLZO;
388 D1(printk(KERN_INFO "JFFS2: default compression mode: favourlzo\n");) 392 jffs2_dbg(1, "default compression mode: favourlzo\n");
389#else 393#else
390 D1(printk(KERN_INFO "JFFS2: default compression mode: priority\n");) 394 jffs2_dbg(1, "default compression mode: priority\n");
391#endif 395#endif
392#endif 396#endif
393#endif 397#endif
diff --git a/fs/jffs2/compr_lzo.c b/fs/jffs2/compr_lzo.c
index af186ee674d..c553bd6506d 100644
--- a/fs/jffs2/compr_lzo.c
+++ b/fs/jffs2/compr_lzo.c
@@ -33,7 +33,6 @@ static int __init alloc_workspace(void)
33 lzo_compress_buf = vmalloc(lzo1x_worst_compress(PAGE_SIZE)); 33 lzo_compress_buf = vmalloc(lzo1x_worst_compress(PAGE_SIZE));
34 34
35 if (!lzo_mem || !lzo_compress_buf) { 35 if (!lzo_mem || !lzo_compress_buf) {
36 printk(KERN_WARNING "Failed to allocate lzo deflate workspace\n");
37 free_workspace(); 36 free_workspace();
38 return -ENOMEM; 37 return -ENOMEM;
39 } 38 }
diff --git a/fs/jffs2/compr_rubin.c b/fs/jffs2/compr_rubin.c
index 9e7cec808c4..92e0644bf86 100644
--- a/fs/jffs2/compr_rubin.c
+++ b/fs/jffs2/compr_rubin.c
@@ -10,6 +10,8 @@
10 * 10 *
11 */ 11 */
12 12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
13#include <linux/string.h> 15#include <linux/string.h>
14#include <linux/types.h> 16#include <linux/types.h>
15#include <linux/jffs2.h> 17#include <linux/jffs2.h>
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 5a001020c54..0b9a1e44e83 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -14,6 +14,8 @@
14#error "The userspace support got too messy and was removed. Update your mkfs.jffs2" 14#error "The userspace support got too messy and was removed. Update your mkfs.jffs2"
15#endif 15#endif
16 16
17#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
17#include <linux/kernel.h> 19#include <linux/kernel.h>
18#include <linux/zlib.h> 20#include <linux/zlib.h>
19#include <linux/zutil.h> 21#include <linux/zutil.h>
@@ -42,18 +44,18 @@ static int __init alloc_workspaces(void)
42{ 44{
43 def_strm.workspace = vmalloc(zlib_deflate_workspacesize(MAX_WBITS, 45 def_strm.workspace = vmalloc(zlib_deflate_workspacesize(MAX_WBITS,
44 MAX_MEM_LEVEL)); 46 MAX_MEM_LEVEL));
45 if (!def_strm.workspace) { 47 if (!def_strm.workspace)
46 printk(KERN_WARNING "Failed to allocate %d bytes for deflate workspace\n", zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL));
47 return -ENOMEM; 48 return -ENOMEM;
48 } 49
49 D1(printk(KERN_DEBUG "Allocated %d bytes for deflate workspace\n", zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL))); 50 jffs2_dbg(1, "Allocated %d bytes for deflate workspace\n",
51 zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL));
50 inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); 52 inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
51 if (!inf_strm.workspace) { 53 if (!inf_strm.workspace) {
52 printk(KERN_WARNING "Failed to allocate %d bytes for inflate workspace\n", zlib_inflate_workspacesize());
53 vfree(def_strm.workspace); 54 vfree(def_strm.workspace);
54 return -ENOMEM; 55 return -ENOMEM;
55 } 56 }
56 D1(printk(KERN_DEBUG "Allocated %d bytes for inflate workspace\n", zlib_inflate_workspacesize())); 57 jffs2_dbg(1, "Allocated %d bytes for inflate workspace\n",
58 zlib_inflate_workspacesize());
57 return 0; 59 return 0;
58} 60}
59 61
@@ -79,7 +81,7 @@ static int jffs2_zlib_compress(unsigned char *data_in,
79 mutex_lock(&deflate_mutex); 81 mutex_lock(&deflate_mutex);
80 82
81 if (Z_OK != zlib_deflateInit(&def_strm, 3)) { 83 if (Z_OK != zlib_deflateInit(&def_strm, 3)) {
82 printk(KERN_WARNING "deflateInit failed\n"); 84 pr_warn("deflateInit failed\n");
83 mutex_unlock(&deflate_mutex); 85 mutex_unlock(&deflate_mutex);
84 return -1; 86 return -1;
85 } 87 }
@@ -93,13 +95,14 @@ static int jffs2_zlib_compress(unsigned char *data_in,
93 while (def_strm.total_out < *dstlen - STREAM_END_SPACE && def_strm.total_in < *sourcelen) { 95 while (def_strm.total_out < *dstlen - STREAM_END_SPACE && def_strm.total_in < *sourcelen) {
94 def_strm.avail_out = *dstlen - (def_strm.total_out + STREAM_END_SPACE); 96 def_strm.avail_out = *dstlen - (def_strm.total_out + STREAM_END_SPACE);
95 def_strm.avail_in = min((unsigned)(*sourcelen-def_strm.total_in), def_strm.avail_out); 97 def_strm.avail_in = min((unsigned)(*sourcelen-def_strm.total_in), def_strm.avail_out);
96 D1(printk(KERN_DEBUG "calling deflate with avail_in %d, avail_out %d\n", 98 jffs2_dbg(1, "calling deflate with avail_in %d, avail_out %d\n",
97 def_strm.avail_in, def_strm.avail_out)); 99 def_strm.avail_in, def_strm.avail_out);
98 ret = zlib_deflate(&def_strm, Z_PARTIAL_FLUSH); 100 ret = zlib_deflate(&def_strm, Z_PARTIAL_FLUSH);
99 D1(printk(KERN_DEBUG "deflate returned with avail_in %d, avail_out %d, total_in %ld, total_out %ld\n", 101 jffs2_dbg(1, "deflate returned with avail_in %d, avail_out %d, total_in %ld, total_out %ld\n",
100 def_strm.avail_in, def_strm.avail_out, def_strm.total_in, def_strm.total_out)); 102 def_strm.avail_in, def_strm.avail_out,
103 def_strm.total_in, def_strm.total_out);
101 if (ret != Z_OK) { 104 if (ret != Z_OK) {
102 D1(printk(KERN_DEBUG "deflate in loop returned %d\n", ret)); 105 jffs2_dbg(1, "deflate in loop returned %d\n", ret);
103 zlib_deflateEnd(&def_strm); 106 zlib_deflateEnd(&def_strm);
104 mutex_unlock(&deflate_mutex); 107 mutex_unlock(&deflate_mutex);
105 return -1; 108 return -1;
@@ -111,20 +114,20 @@ static int jffs2_zlib_compress(unsigned char *data_in,
111 zlib_deflateEnd(&def_strm); 114 zlib_deflateEnd(&def_strm);
112 115
113 if (ret != Z_STREAM_END) { 116 if (ret != Z_STREAM_END) {
114 D1(printk(KERN_DEBUG "final deflate returned %d\n", ret)); 117 jffs2_dbg(1, "final deflate returned %d\n", ret);
115 ret = -1; 118 ret = -1;
116 goto out; 119 goto out;
117 } 120 }
118 121
119 if (def_strm.total_out >= def_strm.total_in) { 122 if (def_strm.total_out >= def_strm.total_in) {
120 D1(printk(KERN_DEBUG "zlib compressed %ld bytes into %ld; failing\n", 123 jffs2_dbg(1, "zlib compressed %ld bytes into %ld; failing\n",
121 def_strm.total_in, def_strm.total_out)); 124 def_strm.total_in, def_strm.total_out);
122 ret = -1; 125 ret = -1;
123 goto out; 126 goto out;
124 } 127 }
125 128
126 D1(printk(KERN_DEBUG "zlib compressed %ld bytes into %ld\n", 129 jffs2_dbg(1, "zlib compressed %ld bytes into %ld\n",
127 def_strm.total_in, def_strm.total_out)); 130 def_strm.total_in, def_strm.total_out);
128 131
129 *dstlen = def_strm.total_out; 132 *dstlen = def_strm.total_out;
130 *sourcelen = def_strm.total_in; 133 *sourcelen = def_strm.total_in;
@@ -157,18 +160,18 @@ static int jffs2_zlib_decompress(unsigned char *data_in,
157 ((data_in[0] & 0x0f) == Z_DEFLATED) && 160 ((data_in[0] & 0x0f) == Z_DEFLATED) &&
158 !(((data_in[0]<<8) + data_in[1]) % 31)) { 161 !(((data_in[0]<<8) + data_in[1]) % 31)) {
159 162
160 D2(printk(KERN_DEBUG "inflate skipping adler32\n")); 163 jffs2_dbg(2, "inflate skipping adler32\n");
161 wbits = -((data_in[0] >> 4) + 8); 164 wbits = -((data_in[0] >> 4) + 8);
162 inf_strm.next_in += 2; 165 inf_strm.next_in += 2;
163 inf_strm.avail_in -= 2; 166 inf_strm.avail_in -= 2;
164 } else { 167 } else {
165 /* Let this remain D1 for now -- it should never happen */ 168 /* Let this remain D1 for now -- it should never happen */
166 D1(printk(KERN_DEBUG "inflate not skipping adler32\n")); 169 jffs2_dbg(1, "inflate not skipping adler32\n");
167 } 170 }
168 171
169 172
170 if (Z_OK != zlib_inflateInit2(&inf_strm, wbits)) { 173 if (Z_OK != zlib_inflateInit2(&inf_strm, wbits)) {
171 printk(KERN_WARNING "inflateInit failed\n"); 174 pr_warn("inflateInit failed\n");
172 mutex_unlock(&inflate_mutex); 175 mutex_unlock(&inflate_mutex);
173 return 1; 176 return 1;
174 } 177 }
@@ -176,7 +179,7 @@ static int jffs2_zlib_decompress(unsigned char *data_in,
176 while((ret = zlib_inflate(&inf_strm, Z_FINISH)) == Z_OK) 179 while((ret = zlib_inflate(&inf_strm, Z_FINISH)) == Z_OK)
177 ; 180 ;
178 if (ret != Z_STREAM_END) { 181 if (ret != Z_STREAM_END) {
179 printk(KERN_NOTICE "inflate returned %d\n", ret); 182 pr_notice("inflate returned %d\n", ret);
180 } 183 }
181 zlib_inflateEnd(&inf_strm); 184 zlib_inflateEnd(&inf_strm);
182 mutex_unlock(&inflate_mutex); 185 mutex_unlock(&inflate_mutex);
diff --git a/fs/jffs2/debug.c b/fs/jffs2/debug.c
index e0b76c87a91..1090eb64b90 100644
--- a/fs/jffs2/debug.c
+++ b/fs/jffs2/debug.c
@@ -10,6 +10,8 @@
10 * 10 *
11 */ 11 */
12 12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
13#include <linux/kernel.h> 15#include <linux/kernel.h>
14#include <linux/types.h> 16#include <linux/types.h>
15#include <linux/pagemap.h> 17#include <linux/pagemap.h>
@@ -261,12 +263,15 @@ void __jffs2_dbg_superblock_counts(struct jffs2_sb_info *c)
261 bad += c->sector_size; 263 bad += c->sector_size;
262 } 264 }
263 265
264#define check(sz) \ 266#define check(sz) \
265 if (sz != c->sz##_size) { \ 267do { \
266 printk(KERN_WARNING #sz "_size mismatch counted 0x%x, c->" #sz "_size 0x%x\n", \ 268 if (sz != c->sz##_size) { \
267 sz, c->sz##_size); \ 269 pr_warn("%s_size mismatch counted 0x%x, c->%s_size 0x%x\n", \
268 dump = 1; \ 270 #sz, sz, #sz, c->sz##_size); \
269 } 271 dump = 1; \
272 } \
273} while (0)
274
270 check(free); 275 check(free);
271 check(dirty); 276 check(dirty);
272 check(used); 277 check(used);
@@ -274,11 +279,12 @@ void __jffs2_dbg_superblock_counts(struct jffs2_sb_info *c)
274 check(unchecked); 279 check(unchecked);
275 check(bad); 280 check(bad);
276 check(erasing); 281 check(erasing);
282
277#undef check 283#undef check
278 284
279 if (nr_counted != c->nr_blocks) { 285 if (nr_counted != c->nr_blocks) {
280 printk(KERN_WARNING "%s counted only 0x%x blocks of 0x%x. Where are the others?\n", 286 pr_warn("%s counted only 0x%x blocks of 0x%x. Where are the others?\n",
281 __func__, nr_counted, c->nr_blocks); 287 __func__, nr_counted, c->nr_blocks);
282 dump = 1; 288 dump = 1;
283 } 289 }
284 290
diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h
index c4f8eef5ca6..4fd9be4cbc9 100644
--- a/fs/jffs2/debug.h
+++ b/fs/jffs2/debug.h
@@ -51,6 +51,7 @@
51 * superseded by nicer dbg_xxx() macros... 51 * superseded by nicer dbg_xxx() macros...
52 */ 52 */
53#if CONFIG_JFFS2_FS_DEBUG > 0 53#if CONFIG_JFFS2_FS_DEBUG > 0
54#define DEBUG
54#define D1(x) x 55#define D1(x) x
55#else 56#else
56#define D1(x) 57#define D1(x)
@@ -62,50 +63,33 @@
62#define D2(x) 63#define D2(x)
63#endif 64#endif
64 65
66#define jffs2_dbg(level, fmt, ...) \
67do { \
68 if (CONFIG_JFFS2_FS_DEBUG >= level) \
69 pr_debug(fmt, ##__VA_ARGS__); \
70} while (0)
71
65/* The prefixes of JFFS2 messages */ 72/* The prefixes of JFFS2 messages */
73#define JFFS2_DBG KERN_DEBUG
66#define JFFS2_DBG_PREFIX "[JFFS2 DBG]" 74#define JFFS2_DBG_PREFIX "[JFFS2 DBG]"
67#define JFFS2_ERR_PREFIX "JFFS2 error:"
68#define JFFS2_WARN_PREFIX "JFFS2 warning:"
69#define JFFS2_NOTICE_PREFIX "JFFS2 notice:"
70
71#define JFFS2_ERR KERN_ERR
72#define JFFS2_WARN KERN_WARNING
73#define JFFS2_NOT KERN_NOTICE
74#define JFFS2_DBG KERN_DEBUG
75
76#define JFFS2_DBG_MSG_PREFIX JFFS2_DBG JFFS2_DBG_PREFIX 75#define JFFS2_DBG_MSG_PREFIX JFFS2_DBG JFFS2_DBG_PREFIX
77#define JFFS2_ERR_MSG_PREFIX JFFS2_ERR JFFS2_ERR_PREFIX
78#define JFFS2_WARN_MSG_PREFIX JFFS2_WARN JFFS2_WARN_PREFIX
79#define JFFS2_NOTICE_MSG_PREFIX JFFS2_NOT JFFS2_NOTICE_PREFIX
80 76
81/* JFFS2 message macros */ 77/* JFFS2 message macros */
82#define JFFS2_ERROR(fmt, ...) \ 78#define JFFS2_ERROR(fmt, ...) \
83 do { \ 79 pr_err("error: (%d) %s: " fmt, \
84 printk(JFFS2_ERR_MSG_PREFIX \ 80 task_pid_nr(current), __func__, ##__VA_ARGS__)
85 " (%d) %s: " fmt, task_pid_nr(current), \
86 __func__ , ##__VA_ARGS__); \
87 } while(0)
88 81
89#define JFFS2_WARNING(fmt, ...) \ 82#define JFFS2_WARNING(fmt, ...) \
90 do { \ 83 pr_warn("warning: (%d) %s: " fmt, \
91 printk(JFFS2_WARN_MSG_PREFIX \ 84 task_pid_nr(current), __func__, ##__VA_ARGS__)
92 " (%d) %s: " fmt, task_pid_nr(current), \
93 __func__ , ##__VA_ARGS__); \
94 } while(0)
95 85
96#define JFFS2_NOTICE(fmt, ...) \ 86#define JFFS2_NOTICE(fmt, ...) \
97 do { \ 87 pr_notice("notice: (%d) %s: " fmt, \
98 printk(JFFS2_NOTICE_MSG_PREFIX \ 88 task_pid_nr(current), __func__, ##__VA_ARGS__)
99 " (%d) %s: " fmt, task_pid_nr(current), \
100 __func__ , ##__VA_ARGS__); \
101 } while(0)
102 89
103#define JFFS2_DEBUG(fmt, ...) \ 90#define JFFS2_DEBUG(fmt, ...) \
104 do { \ 91 printk(KERN_DEBUG "[JFFS2 DBG] (%d) %s: " fmt, \
105 printk(JFFS2_DBG_MSG_PREFIX \ 92 task_pid_nr(current), __func__, ##__VA_ARGS__)
106 " (%d) %s: " fmt, task_pid_nr(current), \
107 __func__ , ##__VA_ARGS__); \
108 } while(0)
109 93
110/* 94/*
111 * We split our debugging messages on several parts, depending on the JFFS2 95 * We split our debugging messages on several parts, depending on the JFFS2
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 973ac5822bd..b56018896d5 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -10,6 +10,8 @@
10 * 10 *
11 */ 11 */
12 12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
13#include <linux/kernel.h> 15#include <linux/kernel.h>
14#include <linux/slab.h> 16#include <linux/slab.h>
15#include <linux/fs.h> 17#include <linux/fs.h>
@@ -79,7 +81,7 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
79 uint32_t ino = 0; 81 uint32_t ino = 0;
80 struct inode *inode = NULL; 82 struct inode *inode = NULL;
81 83
82 D1(printk(KERN_DEBUG "jffs2_lookup()\n")); 84 jffs2_dbg(1, "jffs2_lookup()\n");
83 85
84 if (target->d_name.len > JFFS2_MAX_NAME_LEN) 86 if (target->d_name.len > JFFS2_MAX_NAME_LEN)
85 return ERR_PTR(-ENAMETOOLONG); 87 return ERR_PTR(-ENAMETOOLONG);
@@ -103,7 +105,7 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
103 if (ino) { 105 if (ino) {
104 inode = jffs2_iget(dir_i->i_sb, ino); 106 inode = jffs2_iget(dir_i->i_sb, ino);
105 if (IS_ERR(inode)) 107 if (IS_ERR(inode))
106 printk(KERN_WARNING "iget() failed for ino #%u\n", ino); 108 pr_warn("iget() failed for ino #%u\n", ino);
107 } 109 }
108 110
109 return d_splice_alias(inode, target); 111 return d_splice_alias(inode, target);
@@ -119,21 +121,22 @@ static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir)
119 struct jffs2_full_dirent *fd; 121 struct jffs2_full_dirent *fd;
120 unsigned long offset, curofs; 122 unsigned long offset, curofs;
121 123
122 D1(printk(KERN_DEBUG "jffs2_readdir() for dir_i #%lu\n", filp->f_path.dentry->d_inode->i_ino)); 124 jffs2_dbg(1, "jffs2_readdir() for dir_i #%lu\n",
125 filp->f_path.dentry->d_inode->i_ino);
123 126
124 f = JFFS2_INODE_INFO(inode); 127 f = JFFS2_INODE_INFO(inode);
125 128
126 offset = filp->f_pos; 129 offset = filp->f_pos;
127 130
128 if (offset == 0) { 131 if (offset == 0) {
129 D1(printk(KERN_DEBUG "Dirent 0: \".\", ino #%lu\n", inode->i_ino)); 132 jffs2_dbg(1, "Dirent 0: \".\", ino #%lu\n", inode->i_ino);
130 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) 133 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
131 goto out; 134 goto out;
132 offset++; 135 offset++;
133 } 136 }
134 if (offset == 1) { 137 if (offset == 1) {
135 unsigned long pino = parent_ino(filp->f_path.dentry); 138 unsigned long pino = parent_ino(filp->f_path.dentry);
136 D1(printk(KERN_DEBUG "Dirent 1: \"..\", ino #%lu\n", pino)); 139 jffs2_dbg(1, "Dirent 1: \"..\", ino #%lu\n", pino);
137 if (filldir(dirent, "..", 2, 1, pino, DT_DIR) < 0) 140 if (filldir(dirent, "..", 2, 1, pino, DT_DIR) < 0)
138 goto out; 141 goto out;
139 offset++; 142 offset++;
@@ -146,16 +149,18 @@ static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir)
146 curofs++; 149 curofs++;
147 /* First loop: curofs = 2; offset = 2 */ 150 /* First loop: curofs = 2; offset = 2 */
148 if (curofs < offset) { 151 if (curofs < offset) {
149 D2(printk(KERN_DEBUG "Skipping dirent: \"%s\", ino #%u, type %d, because curofs %ld < offset %ld\n", 152 jffs2_dbg(2, "Skipping dirent: \"%s\", ino #%u, type %d, because curofs %ld < offset %ld\n",
150 fd->name, fd->ino, fd->type, curofs, offset)); 153 fd->name, fd->ino, fd->type, curofs, offset);
151 continue; 154 continue;
152 } 155 }
153 if (!fd->ino) { 156 if (!fd->ino) {
154 D2(printk(KERN_DEBUG "Skipping deletion dirent \"%s\"\n", fd->name)); 157 jffs2_dbg(2, "Skipping deletion dirent \"%s\"\n",
158 fd->name);
155 offset++; 159 offset++;
156 continue; 160 continue;
157 } 161 }
158 D2(printk(KERN_DEBUG "Dirent %ld: \"%s\", ino #%u, type %d\n", offset, fd->name, fd->ino, fd->type)); 162 jffs2_dbg(2, "Dirent %ld: \"%s\", ino #%u, type %d\n",
163 offset, fd->name, fd->ino, fd->type);
159 if (filldir(dirent, fd->name, strlen(fd->name), offset, fd->ino, fd->type) < 0) 164 if (filldir(dirent, fd->name, strlen(fd->name), offset, fd->ino, fd->type) < 0)
160 break; 165 break;
161 offset++; 166 offset++;
@@ -184,12 +189,12 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
184 189
185 c = JFFS2_SB_INFO(dir_i->i_sb); 190 c = JFFS2_SB_INFO(dir_i->i_sb);
186 191
187 D1(printk(KERN_DEBUG "jffs2_create()\n")); 192 jffs2_dbg(1, "%s()\n", __func__);
188 193
189 inode = jffs2_new_inode(dir_i, mode, ri); 194 inode = jffs2_new_inode(dir_i, mode, ri);
190 195
191 if (IS_ERR(inode)) { 196 if (IS_ERR(inode)) {
192 D1(printk(KERN_DEBUG "jffs2_new_inode() failed\n")); 197 jffs2_dbg(1, "jffs2_new_inode() failed\n");
193 jffs2_free_raw_inode(ri); 198 jffs2_free_raw_inode(ri);
194 return PTR_ERR(inode); 199 return PTR_ERR(inode);
195 } 200 }
@@ -217,9 +222,9 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
217 222
218 jffs2_free_raw_inode(ri); 223 jffs2_free_raw_inode(ri);
219 224
220 D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n", 225 jffs2_dbg(1, "%s(): Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n",
221 inode->i_ino, inode->i_mode, inode->i_nlink, 226 __func__, inode->i_ino, inode->i_mode, inode->i_nlink,
222 f->inocache->pino_nlink, inode->i_mapping->nrpages)); 227 f->inocache->pino_nlink, inode->i_mapping->nrpages);
223 228
224 d_instantiate(dentry, inode); 229 d_instantiate(dentry, inode);
225 unlock_new_inode(inode); 230 unlock_new_inode(inode);
@@ -362,14 +367,15 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
362 /* We use f->target field to store the target path. */ 367 /* We use f->target field to store the target path. */
363 f->target = kmemdup(target, targetlen + 1, GFP_KERNEL); 368 f->target = kmemdup(target, targetlen + 1, GFP_KERNEL);
364 if (!f->target) { 369 if (!f->target) {
365 printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1); 370 pr_warn("Can't allocate %d bytes of memory\n", targetlen + 1);
366 mutex_unlock(&f->sem); 371 mutex_unlock(&f->sem);
367 jffs2_complete_reservation(c); 372 jffs2_complete_reservation(c);
368 ret = -ENOMEM; 373 ret = -ENOMEM;
369 goto fail; 374 goto fail;
370 } 375 }
371 376
372 D1(printk(KERN_DEBUG "jffs2_symlink: symlink's target '%s' cached\n", (char *)f->target)); 377 jffs2_dbg(1, "%s(): symlink's target '%s' cached\n",
378 __func__, (char *)f->target);
373 379
374 /* No data here. Only a metadata node, which will be 380 /* No data here. Only a metadata node, which will be
375 obsoleted by the first data write 381 obsoleted by the first data write
@@ -856,7 +862,8 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
856 f->inocache->pino_nlink++; 862 f->inocache->pino_nlink++;
857 mutex_unlock(&f->sem); 863 mutex_unlock(&f->sem);
858 864
859 printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret); 865 pr_notice("%s(): Link succeeded, unlink failed (err %d). You now have a hard link\n",
866 __func__, ret);
860 /* Might as well let the VFS know */ 867 /* Might as well let the VFS know */
861 d_instantiate(new_dentry, old_dentry->d_inode); 868 d_instantiate(new_dentry, old_dentry->d_inode);
862 ihold(old_dentry->d_inode); 869 ihold(old_dentry->d_inode);
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index eafb8d37a6f..4a6cf289be2 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -10,6 +10,8 @@
10 * 10 *
11 */ 11 */
12 12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
13#include <linux/kernel.h> 15#include <linux/kernel.h>
14#include <linux/slab.h> 16#include <linux/slab.h>
15#include <linux/mtd/mtd.h> 17#include <linux/mtd/mtd.h>
@@ -46,11 +48,12 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
46#else /* Linux */ 48#else /* Linux */
47 struct erase_info *instr; 49 struct erase_info *instr;
48 50
49 D1(printk(KERN_DEBUG "jffs2_erase_block(): erase block %#08x (range %#08x-%#08x)\n", 51 jffs2_dbg(1, "%s(): erase block %#08x (range %#08x-%#08x)\n",
50 jeb->offset, jeb->offset, jeb->offset + c->sector_size)); 52 __func__,
53 jeb->offset, jeb->offset, jeb->offset + c->sector_size);
51 instr = kmalloc(sizeof(struct erase_info) + sizeof(struct erase_priv_struct), GFP_KERNEL); 54 instr = kmalloc(sizeof(struct erase_info) + sizeof(struct erase_priv_struct), GFP_KERNEL);
52 if (!instr) { 55 if (!instr) {
53 printk(KERN_WARNING "kmalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n"); 56 pr_warn("kmalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n");
54 mutex_lock(&c->erase_free_sem); 57 mutex_lock(&c->erase_free_sem);
55 spin_lock(&c->erase_completion_lock); 58 spin_lock(&c->erase_completion_lock);
56 list_move(&jeb->list, &c->erase_pending_list); 59 list_move(&jeb->list, &c->erase_pending_list);
@@ -69,7 +72,6 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
69 instr->len = c->sector_size; 72 instr->len = c->sector_size;
70 instr->callback = jffs2_erase_callback; 73 instr->callback = jffs2_erase_callback;
71 instr->priv = (unsigned long)(&instr[1]); 74 instr->priv = (unsigned long)(&instr[1]);
72 instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
73 75
74 ((struct erase_priv_struct *)instr->priv)->jeb = jeb; 76 ((struct erase_priv_struct *)instr->priv)->jeb = jeb;
75 ((struct erase_priv_struct *)instr->priv)->c = c; 77 ((struct erase_priv_struct *)instr->priv)->c = c;
@@ -84,7 +86,8 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
84 86
85 if (ret == -ENOMEM || ret == -EAGAIN) { 87 if (ret == -ENOMEM || ret == -EAGAIN) {
86 /* Erase failed immediately. Refile it on the list */ 88 /* Erase failed immediately. Refile it on the list */
87 D1(printk(KERN_DEBUG "Erase at 0x%08x failed: %d. Refiling on erase_pending_list\n", jeb->offset, ret)); 89 jffs2_dbg(1, "Erase at 0x%08x failed: %d. Refiling on erase_pending_list\n",
90 jeb->offset, ret);
88 mutex_lock(&c->erase_free_sem); 91 mutex_lock(&c->erase_free_sem);
89 spin_lock(&c->erase_completion_lock); 92 spin_lock(&c->erase_completion_lock);
90 list_move(&jeb->list, &c->erase_pending_list); 93 list_move(&jeb->list, &c->erase_pending_list);
@@ -97,9 +100,11 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
97 } 100 }
98 101
99 if (ret == -EROFS) 102 if (ret == -EROFS)
100 printk(KERN_WARNING "Erase at 0x%08x failed immediately: -EROFS. Is the sector locked?\n", jeb->offset); 103 pr_warn("Erase at 0x%08x failed immediately: -EROFS. Is the sector locked?\n",
104 jeb->offset);
101 else 105 else
102 printk(KERN_WARNING "Erase at 0x%08x failed immediately: errno %d\n", jeb->offset, ret); 106 pr_warn("Erase at 0x%08x failed immediately: errno %d\n",
107 jeb->offset, ret);
103 108
104 jffs2_erase_failed(c, jeb, bad_offset); 109 jffs2_erase_failed(c, jeb, bad_offset);
105} 110}
@@ -125,13 +130,14 @@ int jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
125 130
126 work_done++; 131 work_done++;
127 if (!--count) { 132 if (!--count) {
128 D1(printk(KERN_DEBUG "Count reached. jffs2_erase_pending_blocks leaving\n")); 133 jffs2_dbg(1, "Count reached. jffs2_erase_pending_blocks leaving\n");
129 goto done; 134 goto done;
130 } 135 }
131 136
132 } else if (!list_empty(&c->erase_pending_list)) { 137 } else if (!list_empty(&c->erase_pending_list)) {
133 jeb = list_entry(c->erase_pending_list.next, struct jffs2_eraseblock, list); 138 jeb = list_entry(c->erase_pending_list.next, struct jffs2_eraseblock, list);
134 D1(printk(KERN_DEBUG "Starting erase of pending block 0x%08x\n", jeb->offset)); 139 jffs2_dbg(1, "Starting erase of pending block 0x%08x\n",
140 jeb->offset);
135 list_del(&jeb->list); 141 list_del(&jeb->list);
136 c->erasing_size += c->sector_size; 142 c->erasing_size += c->sector_size;
137 c->wasted_size -= jeb->wasted_size; 143 c->wasted_size -= jeb->wasted_size;
@@ -159,13 +165,13 @@ int jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
159 spin_unlock(&c->erase_completion_lock); 165 spin_unlock(&c->erase_completion_lock);
160 mutex_unlock(&c->erase_free_sem); 166 mutex_unlock(&c->erase_free_sem);
161 done: 167 done:
162 D1(printk(KERN_DEBUG "jffs2_erase_pending_blocks completed\n")); 168 jffs2_dbg(1, "jffs2_erase_pending_blocks completed\n");
163 return work_done; 169 return work_done;
164} 170}
165 171
166static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) 172static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
167{ 173{
168 D1(printk(KERN_DEBUG "Erase completed successfully at 0x%08x\n", jeb->offset)); 174 jffs2_dbg(1, "Erase completed successfully at 0x%08x\n", jeb->offset);
169 mutex_lock(&c->erase_free_sem); 175 mutex_lock(&c->erase_free_sem);
170 spin_lock(&c->erase_completion_lock); 176 spin_lock(&c->erase_completion_lock);
171 list_move_tail(&jeb->list, &c->erase_complete_list); 177 list_move_tail(&jeb->list, &c->erase_complete_list);
@@ -214,7 +220,7 @@ static void jffs2_erase_callback(struct erase_info *instr)
214 struct erase_priv_struct *priv = (void *)instr->priv; 220 struct erase_priv_struct *priv = (void *)instr->priv;
215 221
216 if(instr->state != MTD_ERASE_DONE) { 222 if(instr->state != MTD_ERASE_DONE) {
217 printk(KERN_WARNING "Erase at 0x%08llx finished, but state != MTD_ERASE_DONE. State is 0x%x instead.\n", 223 pr_warn("Erase at 0x%08llx finished, but state != MTD_ERASE_DONE. State is 0x%x instead.\n",
218 (unsigned long long)instr->addr, instr->state); 224 (unsigned long long)instr->addr, instr->state);
219 jffs2_erase_failed(priv->c, priv->jeb, instr->fail_addr); 225 jffs2_erase_failed(priv->c, priv->jeb, instr->fail_addr);
220 } else { 226 } else {
@@ -269,8 +275,8 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
269 return; 275 return;
270 } 276 }
271 277
272 D1(printk(KERN_DEBUG "Removed nodes in range 0x%08x-0x%08x from ino #%u\n", 278 jffs2_dbg(1, "Removed nodes in range 0x%08x-0x%08x from ino #%u\n",
273 jeb->offset, jeb->offset + c->sector_size, ic->ino)); 279 jeb->offset, jeb->offset + c->sector_size, ic->ino);
274 280
275 D2({ 281 D2({
276 int i=0; 282 int i=0;
@@ -281,7 +287,7 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
281 287
282 printk(KERN_DEBUG); 288 printk(KERN_DEBUG);
283 while(this) { 289 while(this) {
284 printk(KERN_CONT "0x%08x(%d)->", 290 pr_cont("0x%08x(%d)->",
285 ref_offset(this), ref_flags(this)); 291 ref_offset(this), ref_flags(this));
286 if (++i == 5) { 292 if (++i == 5) {
287 printk(KERN_DEBUG); 293 printk(KERN_DEBUG);
@@ -289,7 +295,7 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
289 } 295 }
290 this = this->next_in_ino; 296 this = this->next_in_ino;
291 } 297 }
292 printk(KERN_CONT "\n"); 298 pr_cont("\n");
293 }); 299 });
294 300
295 switch (ic->class) { 301 switch (ic->class) {
@@ -310,7 +316,8 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
310void jffs2_free_jeb_node_refs(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) 316void jffs2_free_jeb_node_refs(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
311{ 317{
312 struct jffs2_raw_node_ref *block, *ref; 318 struct jffs2_raw_node_ref *block, *ref;
313 D1(printk(KERN_DEBUG "Freeing all node refs for eraseblock offset 0x%08x\n", jeb->offset)); 319 jffs2_dbg(1, "Freeing all node refs for eraseblock offset 0x%08x\n",
320 jeb->offset);
314 321
315 block = ref = jeb->first_node; 322 block = ref = jeb->first_node;
316 323
@@ -342,12 +349,13 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
342 &ebuf, NULL); 349 &ebuf, NULL);
343 if (ret != -EOPNOTSUPP) { 350 if (ret != -EOPNOTSUPP) {
344 if (ret) { 351 if (ret) {
345 D1(printk(KERN_DEBUG "MTD point failed %d\n", ret)); 352 jffs2_dbg(1, "MTD point failed %d\n", ret);
346 goto do_flash_read; 353 goto do_flash_read;
347 } 354 }
348 if (retlen < c->sector_size) { 355 if (retlen < c->sector_size) {
349 /* Don't muck about if it won't let us point to the whole erase sector */ 356 /* Don't muck about if it won't let us point to the whole erase sector */
350 D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", retlen)); 357 jffs2_dbg(1, "MTD point returned len too short: 0x%zx\n",
358 retlen);
351 mtd_unpoint(c->mtd, jeb->offset, retlen); 359 mtd_unpoint(c->mtd, jeb->offset, retlen);
352 goto do_flash_read; 360 goto do_flash_read;
353 } 361 }
@@ -359,8 +367,10 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
359 } while(--retlen); 367 } while(--retlen);
360 mtd_unpoint(c->mtd, jeb->offset, c->sector_size); 368 mtd_unpoint(c->mtd, jeb->offset, c->sector_size);
361 if (retlen) { 369 if (retlen) {
362 printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08tx\n", 370 pr_warn("Newly-erased block contained word 0x%lx at offset 0x%08tx\n",
363 *wordebuf, jeb->offset + c->sector_size-retlen*sizeof(*wordebuf)); 371 *wordebuf,
372 jeb->offset +
373 c->sector_size-retlen * sizeof(*wordebuf));
364 return -EIO; 374 return -EIO;
365 } 375 }
366 return 0; 376 return 0;
@@ -368,11 +378,12 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
368 do_flash_read: 378 do_flash_read:
369 ebuf = kmalloc(PAGE_SIZE, GFP_KERNEL); 379 ebuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
370 if (!ebuf) { 380 if (!ebuf) {
371 printk(KERN_WARNING "Failed to allocate page buffer for verifying erase at 0x%08x. Refiling\n", jeb->offset); 381 pr_warn("Failed to allocate page buffer for verifying erase at 0x%08x. Refiling\n",
382 jeb->offset);
372 return -EAGAIN; 383 return -EAGAIN;
373 } 384 }
374 385
375 D1(printk(KERN_DEBUG "Verifying erase at 0x%08x\n", jeb->offset)); 386 jffs2_dbg(1, "Verifying erase at 0x%08x\n", jeb->offset);
376 387
377 for (ofs = jeb->offset; ofs < jeb->offset + c->sector_size; ) { 388 for (ofs = jeb->offset; ofs < jeb->offset + c->sector_size; ) {
378 uint32_t readlen = min((uint32_t)PAGE_SIZE, jeb->offset + c->sector_size - ofs); 389 uint32_t readlen = min((uint32_t)PAGE_SIZE, jeb->offset + c->sector_size - ofs);
@@ -382,12 +393,14 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
382 393
383 ret = mtd_read(c->mtd, ofs, readlen, &retlen, ebuf); 394 ret = mtd_read(c->mtd, ofs, readlen, &retlen, ebuf);
384 if (ret) { 395 if (ret) {
385 printk(KERN_WARNING "Read of newly-erased block at 0x%08x failed: %d. Putting on bad_list\n", ofs, ret); 396 pr_warn("Read of newly-erased block at 0x%08x failed: %d. Putting on bad_list\n",
397 ofs, ret);
386 ret = -EIO; 398 ret = -EIO;
387 goto fail; 399 goto fail;
388 } 400 }
389 if (retlen != readlen) { 401 if (retlen != readlen) {
390 printk(KERN_WARNING "Short read from newly-erased block at 0x%08x. Wanted %d, got %zd\n", ofs, readlen, retlen); 402 pr_warn("Short read from newly-erased block at 0x%08x. Wanted %d, got %zd\n",
403 ofs, readlen, retlen);
391 ret = -EIO; 404 ret = -EIO;
392 goto fail; 405 goto fail;
393 } 406 }
@@ -396,7 +409,8 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
396 unsigned long *datum = ebuf + i; 409 unsigned long *datum = ebuf + i;
397 if (*datum + 1) { 410 if (*datum + 1) {
398 *bad_offset += i; 411 *bad_offset += i;
399 printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08x\n", *datum, *bad_offset); 412 pr_warn("Newly-erased block contained word 0x%lx at offset 0x%08x\n",
413 *datum, *bad_offset);
400 ret = -EIO; 414 ret = -EIO;
401 goto fail; 415 goto fail;
402 } 416 }
@@ -422,7 +436,7 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb
422 } 436 }
423 437
424 /* Write the erase complete marker */ 438 /* Write the erase complete marker */
425 D1(printk(KERN_DEBUG "Writing erased marker to block at 0x%08x\n", jeb->offset)); 439 jffs2_dbg(1, "Writing erased marker to block at 0x%08x\n", jeb->offset);
426 bad_offset = jeb->offset; 440 bad_offset = jeb->offset;
427 441
428 /* Cleanmarker in oob area or no cleanmarker at all ? */ 442 /* Cleanmarker in oob area or no cleanmarker at all ? */
@@ -451,10 +465,10 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb
451 465
452 if (ret || retlen != sizeof(marker)) { 466 if (ret || retlen != sizeof(marker)) {
453 if (ret) 467 if (ret)
454 printk(KERN_WARNING "Write clean marker to block at 0x%08x failed: %d\n", 468 pr_warn("Write clean marker to block at 0x%08x failed: %d\n",
455 jeb->offset, ret); 469 jeb->offset, ret);
456 else 470 else
457 printk(KERN_WARNING "Short write to newly-erased block at 0x%08x: Wanted %zd, got %zd\n", 471 pr_warn("Short write to newly-erased block at 0x%08x: Wanted %zd, got %zd\n",
458 jeb->offset, sizeof(marker), retlen); 472 jeb->offset, sizeof(marker), retlen);
459 473
460 goto filebad; 474 goto filebad;
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 61e6723535b..db3889ba881 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -10,6 +10,8 @@
10 * 10 *
11 */ 11 */
12 12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
13#include <linux/kernel.h> 15#include <linux/kernel.h>
14#include <linux/fs.h> 16#include <linux/fs.h>
15#include <linux/time.h> 17#include <linux/time.h>
@@ -85,7 +87,8 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg)
85 unsigned char *pg_buf; 87 unsigned char *pg_buf;
86 int ret; 88 int ret;
87 89
88 D2(printk(KERN_DEBUG "jffs2_do_readpage_nolock(): ino #%lu, page at offset 0x%lx\n", inode->i_ino, pg->index << PAGE_CACHE_SHIFT)); 90 jffs2_dbg(2, "%s(): ino #%lu, page at offset 0x%lx\n",
91 __func__, inode->i_ino, pg->index << PAGE_CACHE_SHIFT);
89 92
90 BUG_ON(!PageLocked(pg)); 93 BUG_ON(!PageLocked(pg));
91 94
@@ -105,7 +108,7 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg)
105 flush_dcache_page(pg); 108 flush_dcache_page(pg);
106 kunmap(pg); 109 kunmap(pg);
107 110
108 D2(printk(KERN_DEBUG "readpage finished\n")); 111 jffs2_dbg(2, "readpage finished\n");
109 return ret; 112 return ret;
110} 113}
111 114
@@ -144,7 +147,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
144 return -ENOMEM; 147 return -ENOMEM;
145 *pagep = pg; 148 *pagep = pg;
146 149
147 D1(printk(KERN_DEBUG "jffs2_write_begin()\n")); 150 jffs2_dbg(1, "%s()\n", __func__);
148 151
149 if (pageofs > inode->i_size) { 152 if (pageofs > inode->i_size) {
150 /* Make new hole frag from old EOF to new page */ 153 /* Make new hole frag from old EOF to new page */
@@ -153,8 +156,8 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
153 struct jffs2_full_dnode *fn; 156 struct jffs2_full_dnode *fn;
154 uint32_t alloc_len; 157 uint32_t alloc_len;
155 158
156 D1(printk(KERN_DEBUG "Writing new hole frag 0x%x-0x%x between current EOF and new page\n", 159 jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
157 (unsigned int)inode->i_size, pageofs)); 160 (unsigned int)inode->i_size, pageofs);
158 161
159 ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, 162 ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
160 ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); 163 ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
@@ -198,7 +201,8 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
198 f->metadata = NULL; 201 f->metadata = NULL;
199 } 202 }
200 if (ret) { 203 if (ret) {
201 D1(printk(KERN_DEBUG "Eep. add_full_dnode_to_inode() failed in write_begin, returned %d\n", ret)); 204 jffs2_dbg(1, "Eep. add_full_dnode_to_inode() failed in write_begin, returned %d\n",
205 ret);
202 jffs2_mark_node_obsolete(c, fn->raw); 206 jffs2_mark_node_obsolete(c, fn->raw);
203 jffs2_free_full_dnode(fn); 207 jffs2_free_full_dnode(fn);
204 jffs2_complete_reservation(c); 208 jffs2_complete_reservation(c);
@@ -222,7 +226,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
222 if (ret) 226 if (ret)
223 goto out_page; 227 goto out_page;
224 } 228 }
225 D1(printk(KERN_DEBUG "end write_begin(). pg->flags %lx\n", pg->flags)); 229 jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
226 return ret; 230 return ret;
227 231
228out_page: 232out_page:
@@ -248,8 +252,9 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
248 int ret = 0; 252 int ret = 0;
249 uint32_t writtenlen = 0; 253 uint32_t writtenlen = 0;
250 254
251 D1(printk(KERN_DEBUG "jffs2_write_end(): ino #%lu, page at 0x%lx, range %d-%d, flags %lx\n", 255 jffs2_dbg(1, "%s(): ino #%lu, page at 0x%lx, range %d-%d, flags %lx\n",
252 inode->i_ino, pg->index << PAGE_CACHE_SHIFT, start, end, pg->flags)); 256 __func__, inode->i_ino, pg->index << PAGE_CACHE_SHIFT,
257 start, end, pg->flags);
253 258
254 /* We need to avoid deadlock with page_cache_read() in 259 /* We need to avoid deadlock with page_cache_read() in
255 jffs2_garbage_collect_pass(). So the page must be 260 jffs2_garbage_collect_pass(). So the page must be
@@ -268,7 +273,8 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
268 ri = jffs2_alloc_raw_inode(); 273 ri = jffs2_alloc_raw_inode();
269 274
270 if (!ri) { 275 if (!ri) {
271 D1(printk(KERN_DEBUG "jffs2_write_end(): Allocation of raw inode failed\n")); 276 jffs2_dbg(1, "%s(): Allocation of raw inode failed\n",
277 __func__);
272 unlock_page(pg); 278 unlock_page(pg);
273 page_cache_release(pg); 279 page_cache_release(pg);
274 return -ENOMEM; 280 return -ENOMEM;
@@ -315,13 +321,14 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
315 /* generic_file_write has written more to the page cache than we've 321 /* generic_file_write has written more to the page cache than we've
316 actually written to the medium. Mark the page !Uptodate so that 322 actually written to the medium. Mark the page !Uptodate so that
317 it gets reread */ 323 it gets reread */
318 D1(printk(KERN_DEBUG "jffs2_write_end(): Not all bytes written. Marking page !uptodate\n")); 324 jffs2_dbg(1, "%s(): Not all bytes written. Marking page !uptodate\n",
325 __func__);
319 SetPageError(pg); 326 SetPageError(pg);
320 ClearPageUptodate(pg); 327 ClearPageUptodate(pg);
321 } 328 }
322 329
323 D1(printk(KERN_DEBUG "jffs2_write_end() returning %d\n", 330 jffs2_dbg(1, "%s() returning %d\n",
324 writtenlen > 0 ? writtenlen : ret)); 331 __func__, writtenlen > 0 ? writtenlen : ret);
325 unlock_page(pg); 332 unlock_page(pg);
326 page_cache_release(pg); 333 page_cache_release(pg);
327 return writtenlen > 0 ? writtenlen : ret; 334 return writtenlen > 0 ? writtenlen : ret;
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 2e0123867cb..bb6f993ebca 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -10,6 +10,8 @@
10 * 10 *
11 */ 11 */
12 12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
13#include <linux/capability.h> 15#include <linux/capability.h>
14#include <linux/kernel.h> 16#include <linux/kernel.h>
15#include <linux/sched.h> 17#include <linux/sched.h>
@@ -39,7 +41,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
39 int ret; 41 int ret;
40 int alloc_type = ALLOC_NORMAL; 42 int alloc_type = ALLOC_NORMAL;
41 43
42 D1(printk(KERN_DEBUG "jffs2_setattr(): ino #%lu\n", inode->i_ino)); 44 jffs2_dbg(1, "%s(): ino #%lu\n", __func__, inode->i_ino);
43 45
44 /* Special cases - we don't want more than one data node 46 /* Special cases - we don't want more than one data node
45 for these types on the medium at any time. So setattr 47 for these types on the medium at any time. So setattr
@@ -50,7 +52,8 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
50 /* For these, we don't actually need to read the old node */ 52 /* For these, we don't actually need to read the old node */
51 mdatalen = jffs2_encode_dev(&dev, inode->i_rdev); 53 mdatalen = jffs2_encode_dev(&dev, inode->i_rdev);
52 mdata = (char *)&dev; 54 mdata = (char *)&dev;
53 D1(printk(KERN_DEBUG "jffs2_setattr(): Writing %d bytes of kdev_t\n", mdatalen)); 55 jffs2_dbg(1, "%s(): Writing %d bytes of kdev_t\n",
56 __func__, mdatalen);
54 } else if (S_ISLNK(inode->i_mode)) { 57 } else if (S_ISLNK(inode->i_mode)) {
55 mutex_lock(&f->sem); 58 mutex_lock(&f->sem);
56 mdatalen = f->metadata->size; 59 mdatalen = f->metadata->size;
@@ -66,7 +69,8 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
66 return ret; 69 return ret;
67 } 70 }
68 mutex_unlock(&f->sem); 71 mutex_unlock(&f->sem);
69 D1(printk(KERN_DEBUG "jffs2_setattr(): Writing %d bytes of symlink target\n", mdatalen)); 72 jffs2_dbg(1, "%s(): Writing %d bytes of symlink target\n",
73 __func__, mdatalen);
70 } 74 }
71 75
72 ri = jffs2_alloc_raw_inode(); 76 ri = jffs2_alloc_raw_inode();
@@ -233,7 +237,8 @@ void jffs2_evict_inode (struct inode *inode)
233 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); 237 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
234 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); 238 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
235 239
236 D1(printk(KERN_DEBUG "jffs2_evict_inode(): ino #%lu mode %o\n", inode->i_ino, inode->i_mode)); 240 jffs2_dbg(1, "%s(): ino #%lu mode %o\n",
241 __func__, inode->i_ino, inode->i_mode);
237 truncate_inode_pages(&inode->i_data, 0); 242 truncate_inode_pages(&inode->i_data, 0);
238 end_writeback(inode); 243 end_writeback(inode);
239 jffs2_do_clear_inode(c, f); 244 jffs2_do_clear_inode(c, f);
@@ -249,7 +254,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
249 dev_t rdev = 0; 254 dev_t rdev = 0;
250 int ret; 255 int ret;
251 256
252 D1(printk(KERN_DEBUG "jffs2_iget(): ino == %lu\n", ino)); 257 jffs2_dbg(1, "%s(): ino == %lu\n", __func__, ino);
253 258
254 inode = iget_locked(sb, ino); 259 inode = iget_locked(sb, ino);
255 if (!inode) 260 if (!inode)
@@ -317,14 +322,16 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
317 /* Read the device numbers from the media */ 322 /* Read the device numbers from the media */
318 if (f->metadata->size != sizeof(jdev.old_id) && 323 if (f->metadata->size != sizeof(jdev.old_id) &&
319 f->metadata->size != sizeof(jdev.new_id)) { 324 f->metadata->size != sizeof(jdev.new_id)) {
320 printk(KERN_NOTICE "Device node has strange size %d\n", f->metadata->size); 325 pr_notice("Device node has strange size %d\n",
326 f->metadata->size);
321 goto error_io; 327 goto error_io;
322 } 328 }
323 D1(printk(KERN_DEBUG "Reading device numbers from flash\n")); 329 jffs2_dbg(1, "Reading device numbers from flash\n");
324 ret = jffs2_read_dnode(c, f, f->metadata, (char *)&jdev, 0, f->metadata->size); 330 ret = jffs2_read_dnode(c, f, f->metadata, (char *)&jdev, 0, f->metadata->size);
325 if (ret < 0) { 331 if (ret < 0) {
326 /* Eep */ 332 /* Eep */
327 printk(KERN_NOTICE "Read device numbers for inode %lu failed\n", (unsigned long)inode->i_ino); 333 pr_notice("Read device numbers for inode %lu failed\n",
334 (unsigned long)inode->i_ino);
328 goto error; 335 goto error;
329 } 336 }
330 if (f->metadata->size == sizeof(jdev.old_id)) 337 if (f->metadata->size == sizeof(jdev.old_id))
@@ -339,12 +346,13 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
339 break; 346 break;
340 347
341 default: 348 default:
342 printk(KERN_WARNING "jffs2_read_inode(): Bogus imode %o for ino %lu\n", inode->i_mode, (unsigned long)inode->i_ino); 349 pr_warn("%s(): Bogus i_mode %o for ino %lu\n",
350 __func__, inode->i_mode, (unsigned long)inode->i_ino);
343 } 351 }
344 352
345 mutex_unlock(&f->sem); 353 mutex_unlock(&f->sem);
346 354
347 D1(printk(KERN_DEBUG "jffs2_read_inode() returning\n")); 355 jffs2_dbg(1, "jffs2_read_inode() returning\n");
348 unlock_new_inode(inode); 356 unlock_new_inode(inode);
349 return inode; 357 return inode;
350 358
@@ -362,11 +370,13 @@ void jffs2_dirty_inode(struct inode *inode, int flags)
362 struct iattr iattr; 370 struct iattr iattr;
363 371
364 if (!(inode->i_state & I_DIRTY_DATASYNC)) { 372 if (!(inode->i_state & I_DIRTY_DATASYNC)) {
365 D2(printk(KERN_DEBUG "jffs2_dirty_inode() not calling setattr() for ino #%lu\n", inode->i_ino)); 373 jffs2_dbg(2, "%s(): not calling setattr() for ino #%lu\n",
374 __func__, inode->i_ino);
366 return; 375 return;
367 } 376 }
368 377
369 D1(printk(KERN_DEBUG "jffs2_dirty_inode() calling setattr() for ino #%lu\n", inode->i_ino)); 378 jffs2_dbg(1, "%s(): calling setattr() for ino #%lu\n",
379 __func__, inode->i_ino);
370 380
371 iattr.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_MTIME|ATTR_CTIME; 381 iattr.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_MTIME|ATTR_CTIME;
372 iattr.ia_mode = inode->i_mode; 382 iattr.ia_mode = inode->i_mode;
@@ -414,7 +424,8 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
414 struct jffs2_inode_info *f; 424 struct jffs2_inode_info *f;
415 int ret; 425 int ret;
416 426
417 D1(printk(KERN_DEBUG "jffs2_new_inode(): dir_i %ld, mode 0x%x\n", dir_i->i_ino, mode)); 427 jffs2_dbg(1, "%s(): dir_i %ld, mode 0x%x\n",
428 __func__, dir_i->i_ino, mode);
418 429
419 c = JFFS2_SB_INFO(sb); 430 c = JFFS2_SB_INFO(sb);
420 431
@@ -504,11 +515,11 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
504 515
505#ifndef CONFIG_JFFS2_FS_WRITEBUFFER 516#ifndef CONFIG_JFFS2_FS_WRITEBUFFER
506 if (c->mtd->type == MTD_NANDFLASH) { 517 if (c->mtd->type == MTD_NANDFLASH) {
507 printk(KERN_ERR "jffs2: Cannot operate on NAND flash unless jffs2 NAND support is compiled in.\n"); 518 pr_err("Cannot operate on NAND flash unless jffs2 NAND support is compiled in\n");
508 return -EINVAL; 519 return -EINVAL;
509 } 520 }
510 if (c->mtd->type == MTD_DATAFLASH) { 521 if (c->mtd->type == MTD_DATAFLASH) {
511 printk(KERN_ERR "jffs2: Cannot operate on DataFlash unless jffs2 DataFlash support is compiled in.\n"); 522 pr_err("Cannot operate on DataFlash unless jffs2 DataFlash support is compiled in\n");
512 return -EINVAL; 523 return -EINVAL;
513 } 524 }
514#endif 525#endif
@@ -522,12 +533,13 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
522 */ 533 */
523 if ((c->sector_size * blocks) != c->flash_size) { 534 if ((c->sector_size * blocks) != c->flash_size) {
524 c->flash_size = c->sector_size * blocks; 535 c->flash_size = c->sector_size * blocks;
525 printk(KERN_INFO "jffs2: Flash size not aligned to erasesize, reducing to %dKiB\n", 536 pr_info("Flash size not aligned to erasesize, reducing to %dKiB\n",
526 c->flash_size / 1024); 537 c->flash_size / 1024);
527 } 538 }
528 539
529 if (c->flash_size < 5*c->sector_size) { 540 if (c->flash_size < 5*c->sector_size) {
530 printk(KERN_ERR "jffs2: Too few erase blocks (%d)\n", c->flash_size / c->sector_size); 541 pr_err("Too few erase blocks (%d)\n",
542 c->flash_size / c->sector_size);
531 return -EINVAL; 543 return -EINVAL;
532 } 544 }
533 545
@@ -550,20 +562,20 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
550 if ((ret = jffs2_do_mount_fs(c))) 562 if ((ret = jffs2_do_mount_fs(c)))
551 goto out_inohash; 563 goto out_inohash;
552 564
553 D1(printk(KERN_DEBUG "jffs2_do_fill_super(): Getting root inode\n")); 565 jffs2_dbg(1, "%s(): Getting root inode\n", __func__);
554 root_i = jffs2_iget(sb, 1); 566 root_i = jffs2_iget(sb, 1);
555 if (IS_ERR(root_i)) { 567 if (IS_ERR(root_i)) {
556 D1(printk(KERN_WARNING "get root inode failed\n")); 568 jffs2_dbg(1, "get root inode failed\n");
557 ret = PTR_ERR(root_i); 569 ret = PTR_ERR(root_i);
558 goto out_root; 570 goto out_root;
559 } 571 }
560 572
561 ret = -ENOMEM; 573 ret = -ENOMEM;
562 574
563 D1(printk(KERN_DEBUG "jffs2_do_fill_super(): d_alloc_root()\n")); 575 jffs2_dbg(1, "%s(): d_make_root()\n", __func__);
564 sb->s_root = d_alloc_root(root_i); 576 sb->s_root = d_make_root(root_i);
565 if (!sb->s_root) 577 if (!sb->s_root)
566 goto out_root_i; 578 goto out_root;
567 579
568 sb->s_maxbytes = 0xFFFFFFFF; 580 sb->s_maxbytes = 0xFFFFFFFF;
569 sb->s_blocksize = PAGE_CACHE_SIZE; 581 sb->s_blocksize = PAGE_CACHE_SIZE;
@@ -573,8 +585,6 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
573 jffs2_start_garbage_collect_thread(c); 585 jffs2_start_garbage_collect_thread(c);
574 return 0; 586 return 0;
575 587
576 out_root_i:
577 iput(root_i);
578out_root: 588out_root:
579 jffs2_free_ino_caches(c); 589 jffs2_free_ino_caches(c);
580 jffs2_free_raw_node_refs(c); 590 jffs2_free_raw_node_refs(c);
@@ -620,20 +630,21 @@ struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c,
620 */ 630 */
621 inode = ilookup(OFNI_BS_2SFFJ(c), inum); 631 inode = ilookup(OFNI_BS_2SFFJ(c), inum);
622 if (!inode) { 632 if (!inode) {
623 D1(printk(KERN_DEBUG "ilookup() failed for ino #%u; inode is probably deleted.\n", 633 jffs2_dbg(1, "ilookup() failed for ino #%u; inode is probably deleted.\n",
624 inum)); 634 inum);
625 635
626 spin_lock(&c->inocache_lock); 636 spin_lock(&c->inocache_lock);
627 ic = jffs2_get_ino_cache(c, inum); 637 ic = jffs2_get_ino_cache(c, inum);
628 if (!ic) { 638 if (!ic) {
629 D1(printk(KERN_DEBUG "Inode cache for ino #%u is gone.\n", inum)); 639 jffs2_dbg(1, "Inode cache for ino #%u is gone\n",
640 inum);
630 spin_unlock(&c->inocache_lock); 641 spin_unlock(&c->inocache_lock);
631 return NULL; 642 return NULL;
632 } 643 }
633 if (ic->state != INO_STATE_CHECKEDABSENT) { 644 if (ic->state != INO_STATE_CHECKEDABSENT) {
634 /* Wait for progress. Don't just loop */ 645 /* Wait for progress. Don't just loop */
635 D1(printk(KERN_DEBUG "Waiting for ino #%u in state %d\n", 646 jffs2_dbg(1, "Waiting for ino #%u in state %d\n",
636 ic->ino, ic->state)); 647 ic->ino, ic->state);
637 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock); 648 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
638 } else { 649 } else {
639 spin_unlock(&c->inocache_lock); 650 spin_unlock(&c->inocache_lock);
@@ -651,8 +662,8 @@ struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c,
651 return ERR_CAST(inode); 662 return ERR_CAST(inode);
652 } 663 }
653 if (is_bad_inode(inode)) { 664 if (is_bad_inode(inode)) {
654 printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u. unlinked %d\n", 665 pr_notice("Eep. read_inode() failed for ino #%u. unlinked %d\n",
655 inum, unlinked); 666 inum, unlinked);
656 /* NB. This will happen again. We need to do something appropriate here. */ 667 /* NB. This will happen again. We need to do something appropriate here. */
657 iput(inode); 668 iput(inode);
658 return ERR_PTR(-EIO); 669 return ERR_PTR(-EIO);
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 31dce611337..5a2dec2b064 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -10,6 +10,8 @@
10 * 10 *
11 */ 11 */
12 12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
13#include <linux/kernel.h> 15#include <linux/kernel.h>
14#include <linux/mtd/mtd.h> 16#include <linux/mtd/mtd.h>
15#include <linux/slab.h> 17#include <linux/slab.h>
@@ -51,44 +53,44 @@ static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
51 number of free blocks is low. */ 53 number of free blocks is low. */
52again: 54again:
53 if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) { 55 if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
54 D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n")); 56 jffs2_dbg(1, "Picking block from bad_used_list to GC next\n");
55 nextlist = &c->bad_used_list; 57 nextlist = &c->bad_used_list;
56 } else if (n < 50 && !list_empty(&c->erasable_list)) { 58 } else if (n < 50 && !list_empty(&c->erasable_list)) {
57 /* Note that most of them will have gone directly to be erased. 59 /* Note that most of them will have gone directly to be erased.
58 So don't favour the erasable_list _too_ much. */ 60 So don't favour the erasable_list _too_ much. */
59 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n")); 61 jffs2_dbg(1, "Picking block from erasable_list to GC next\n");
60 nextlist = &c->erasable_list; 62 nextlist = &c->erasable_list;
61 } else if (n < 110 && !list_empty(&c->very_dirty_list)) { 63 } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
62 /* Most of the time, pick one off the very_dirty list */ 64 /* Most of the time, pick one off the very_dirty list */
63 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n")); 65 jffs2_dbg(1, "Picking block from very_dirty_list to GC next\n");
64 nextlist = &c->very_dirty_list; 66 nextlist = &c->very_dirty_list;
65 } else if (n < 126 && !list_empty(&c->dirty_list)) { 67 } else if (n < 126 && !list_empty(&c->dirty_list)) {
66 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n")); 68 jffs2_dbg(1, "Picking block from dirty_list to GC next\n");
67 nextlist = &c->dirty_list; 69 nextlist = &c->dirty_list;
68 } else if (!list_empty(&c->clean_list)) { 70 } else if (!list_empty(&c->clean_list)) {
69 D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n")); 71 jffs2_dbg(1, "Picking block from clean_list to GC next\n");
70 nextlist = &c->clean_list; 72 nextlist = &c->clean_list;
71 } else if (!list_empty(&c->dirty_list)) { 73 } else if (!list_empty(&c->dirty_list)) {
72 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n")); 74 jffs2_dbg(1, "Picking block from dirty_list to GC next (clean_list was empty)\n");
73 75
74 nextlist = &c->dirty_list; 76 nextlist = &c->dirty_list;
75 } else if (!list_empty(&c->very_dirty_list)) { 77 } else if (!list_empty(&c->very_dirty_list)) {
76 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n")); 78 jffs2_dbg(1, "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n");
77 nextlist = &c->very_dirty_list; 79 nextlist = &c->very_dirty_list;
78 } else if (!list_empty(&c->erasable_list)) { 80 } else if (!list_empty(&c->erasable_list)) {
79 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n")); 81 jffs2_dbg(1, "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n");
80 82
81 nextlist = &c->erasable_list; 83 nextlist = &c->erasable_list;
82 } else if (!list_empty(&c->erasable_pending_wbuf_list)) { 84 } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
83 /* There are blocks are wating for the wbuf sync */ 85 /* There are blocks are wating for the wbuf sync */
84 D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n")); 86 jffs2_dbg(1, "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n");
85 spin_unlock(&c->erase_completion_lock); 87 spin_unlock(&c->erase_completion_lock);
86 jffs2_flush_wbuf_pad(c); 88 jffs2_flush_wbuf_pad(c);
87 spin_lock(&c->erase_completion_lock); 89 spin_lock(&c->erase_completion_lock);
88 goto again; 90 goto again;
89 } else { 91 } else {
90 /* Eep. All were empty */ 92 /* Eep. All were empty */
91 D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n")); 93 jffs2_dbg(1, "No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n");
92 return NULL; 94 return NULL;
93 } 95 }
94 96
@@ -97,13 +99,15 @@ again:
97 c->gcblock = ret; 99 c->gcblock = ret;
98 ret->gc_node = ret->first_node; 100 ret->gc_node = ret->first_node;
99 if (!ret->gc_node) { 101 if (!ret->gc_node) {
100 printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset); 102 pr_warn("Eep. ret->gc_node for block at 0x%08x is NULL\n",
103 ret->offset);
101 BUG(); 104 BUG();
102 } 105 }
103 106
104 /* Have we accidentally picked a clean block with wasted space ? */ 107 /* Have we accidentally picked a clean block with wasted space ? */
105 if (ret->wasted_size) { 108 if (ret->wasted_size) {
106 D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size)); 109 jffs2_dbg(1, "Converting wasted_size %08x to dirty_size\n",
110 ret->wasted_size);
107 ret->dirty_size += ret->wasted_size; 111 ret->dirty_size += ret->wasted_size;
108 c->wasted_size -= ret->wasted_size; 112 c->wasted_size -= ret->wasted_size;
109 c->dirty_size += ret->wasted_size; 113 c->dirty_size += ret->wasted_size;
@@ -140,8 +144,8 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
140 144
141 /* checked_ino is protected by the alloc_sem */ 145 /* checked_ino is protected by the alloc_sem */
142 if (c->checked_ino > c->highest_ino && xattr) { 146 if (c->checked_ino > c->highest_ino && xattr) {
143 printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n", 147 pr_crit("Checked all inodes but still 0x%x bytes of unchecked space?\n",
144 c->unchecked_size); 148 c->unchecked_size);
145 jffs2_dbg_dump_block_lists_nolock(c); 149 jffs2_dbg_dump_block_lists_nolock(c);
146 spin_unlock(&c->erase_completion_lock); 150 spin_unlock(&c->erase_completion_lock);
147 mutex_unlock(&c->alloc_sem); 151 mutex_unlock(&c->alloc_sem);
@@ -163,8 +167,8 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
163 } 167 }
164 168
165 if (!ic->pino_nlink) { 169 if (!ic->pino_nlink) {
166 D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink/pino zero\n", 170 jffs2_dbg(1, "Skipping check of ino #%d with nlink/pino zero\n",
167 ic->ino)); 171 ic->ino);
168 spin_unlock(&c->inocache_lock); 172 spin_unlock(&c->inocache_lock);
169 jffs2_xattr_delete_inode(c, ic); 173 jffs2_xattr_delete_inode(c, ic);
170 continue; 174 continue;
@@ -172,13 +176,15 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
172 switch(ic->state) { 176 switch(ic->state) {
173 case INO_STATE_CHECKEDABSENT: 177 case INO_STATE_CHECKEDABSENT:
174 case INO_STATE_PRESENT: 178 case INO_STATE_PRESENT:
175 D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino)); 179 jffs2_dbg(1, "Skipping ino #%u already checked\n",
180 ic->ino);
176 spin_unlock(&c->inocache_lock); 181 spin_unlock(&c->inocache_lock);
177 continue; 182 continue;
178 183
179 case INO_STATE_GC: 184 case INO_STATE_GC:
180 case INO_STATE_CHECKING: 185 case INO_STATE_CHECKING:
181 printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state); 186 pr_warn("Inode #%u is in state %d during CRC check phase!\n",
187 ic->ino, ic->state);
182 spin_unlock(&c->inocache_lock); 188 spin_unlock(&c->inocache_lock);
183 BUG(); 189 BUG();
184 190
@@ -186,7 +192,8 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
186 /* We need to wait for it to finish, lest we move on 192 /* We need to wait for it to finish, lest we move on
187 and trigger the BUG() above while we haven't yet 193 and trigger the BUG() above while we haven't yet
188 finished checking all its nodes */ 194 finished checking all its nodes */
189 D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino)); 195 jffs2_dbg(1, "Waiting for ino #%u to finish reading\n",
196 ic->ino);
190 /* We need to come back again for the _same_ inode. We've 197 /* We need to come back again for the _same_ inode. We've
191 made no progress in this case, but that should be OK */ 198 made no progress in this case, but that should be OK */
192 c->checked_ino--; 199 c->checked_ino--;
@@ -204,11 +211,13 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
204 ic->state = INO_STATE_CHECKING; 211 ic->state = INO_STATE_CHECKING;
205 spin_unlock(&c->inocache_lock); 212 spin_unlock(&c->inocache_lock);
206 213
207 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino)); 214 jffs2_dbg(1, "%s(): triggering inode scan of ino#%u\n",
215 __func__, ic->ino);
208 216
209 ret = jffs2_do_crccheck_inode(c, ic); 217 ret = jffs2_do_crccheck_inode(c, ic);
210 if (ret) 218 if (ret)
211 printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino); 219 pr_warn("Returned error for crccheck of ino #%u. Expect badness...\n",
220 ic->ino);
212 221
213 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT); 222 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
214 mutex_unlock(&c->alloc_sem); 223 mutex_unlock(&c->alloc_sem);
@@ -220,13 +229,13 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
220 !list_empty(&c->erase_pending_list)) { 229 !list_empty(&c->erase_pending_list)) {
221 spin_unlock(&c->erase_completion_lock); 230 spin_unlock(&c->erase_completion_lock);
222 mutex_unlock(&c->alloc_sem); 231 mutex_unlock(&c->alloc_sem);
223 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() erasing pending blocks\n")); 232 jffs2_dbg(1, "%s(): erasing pending blocks\n", __func__);
224 if (jffs2_erase_pending_blocks(c, 1)) 233 if (jffs2_erase_pending_blocks(c, 1))
225 return 0; 234 return 0;
226 235
227 D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n")); 236 jffs2_dbg(1, "No progress from erasing block; doing GC anyway\n");
228 spin_lock(&c->erase_completion_lock);
229 mutex_lock(&c->alloc_sem); 237 mutex_lock(&c->alloc_sem);
238 spin_lock(&c->erase_completion_lock);
230 } 239 }
231 240
232 /* First, work out which block we're garbage-collecting */ 241 /* First, work out which block we're garbage-collecting */
@@ -242,13 +251,14 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
242 mutex_unlock(&c->alloc_sem); 251 mutex_unlock(&c->alloc_sem);
243 return -EAGAIN; 252 return -EAGAIN;
244 } 253 }
245 D1(printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n")); 254 jffs2_dbg(1, "Couldn't find erase block to garbage collect!\n");
246 spin_unlock(&c->erase_completion_lock); 255 spin_unlock(&c->erase_completion_lock);
247 mutex_unlock(&c->alloc_sem); 256 mutex_unlock(&c->alloc_sem);
248 return -EIO; 257 return -EIO;
249 } 258 }
250 259
251 D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size)); 260 jffs2_dbg(1, "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n",
261 jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size);
252 D1(if (c->nextblock) 262 D1(if (c->nextblock)
253 printk(KERN_DEBUG "Nextblock at %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size)); 263 printk(KERN_DEBUG "Nextblock at %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
254 264
@@ -261,12 +271,14 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
261 gcblock_dirty = jeb->dirty_size; 271 gcblock_dirty = jeb->dirty_size;
262 272
263 while(ref_obsolete(raw)) { 273 while(ref_obsolete(raw)) {
264 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw))); 274 jffs2_dbg(1, "Node at 0x%08x is obsolete... skipping\n",
275 ref_offset(raw));
265 raw = ref_next(raw); 276 raw = ref_next(raw);
266 if (unlikely(!raw)) { 277 if (unlikely(!raw)) {
267 printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n"); 278 pr_warn("eep. End of raw list while still supposedly nodes to GC\n");
268 printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n", 279 pr_warn("erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
269 jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size); 280 jeb->offset, jeb->free_size,
281 jeb->dirty_size, jeb->used_size);
270 jeb->gc_node = raw; 282 jeb->gc_node = raw;
271 spin_unlock(&c->erase_completion_lock); 283 spin_unlock(&c->erase_completion_lock);
272 mutex_unlock(&c->alloc_sem); 284 mutex_unlock(&c->alloc_sem);
@@ -275,7 +287,8 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
275 } 287 }
276 jeb->gc_node = raw; 288 jeb->gc_node = raw;
277 289
278 D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw))); 290 jffs2_dbg(1, "Going to garbage collect node at 0x%08x\n",
291 ref_offset(raw));
279 292
280 if (!raw->next_in_ino) { 293 if (!raw->next_in_ino) {
281 /* Inode-less node. Clean marker, snapshot or something like that */ 294 /* Inode-less node. Clean marker, snapshot or something like that */
@@ -316,7 +329,9 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
316 329
317 spin_unlock(&c->erase_completion_lock); 330 spin_unlock(&c->erase_completion_lock);
318 331
319 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino)); 332 jffs2_dbg(1, "%s(): collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n",
333 __func__, jeb->offset, ref_offset(raw), ref_flags(raw),
334 ic->ino);
320 335
321 /* Three possibilities: 336 /* Three possibilities:
322 1. Inode is already in-core. We must iget it and do proper 337 1. Inode is already in-core. We must iget it and do proper
@@ -336,8 +351,8 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
336 if (ref_flags(raw) == REF_PRISTINE) 351 if (ref_flags(raw) == REF_PRISTINE)
337 ic->state = INO_STATE_GC; 352 ic->state = INO_STATE_GC;
338 else { 353 else {
339 D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n", 354 jffs2_dbg(1, "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
340 ic->ino)); 355 ic->ino);
341 } 356 }
342 break; 357 break;
343 358
@@ -353,8 +368,8 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
353 we're holding the alloc_sem, no other garbage collection 368 we're holding the alloc_sem, no other garbage collection
354 can happen. 369 can happen.
355 */ 370 */
356 printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n", 371 pr_crit("Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
357 ic->ino, ic->state); 372 ic->ino, ic->state);
358 mutex_unlock(&c->alloc_sem); 373 mutex_unlock(&c->alloc_sem);
359 spin_unlock(&c->inocache_lock); 374 spin_unlock(&c->inocache_lock);
360 BUG(); 375 BUG();
@@ -367,8 +382,8 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
367 drop the alloc_sem before sleeping. */ 382 drop the alloc_sem before sleeping. */
368 383
369 mutex_unlock(&c->alloc_sem); 384 mutex_unlock(&c->alloc_sem);
370 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n", 385 jffs2_dbg(1, "%s(): waiting for ino #%u in state %d\n",
371 ic->ino, ic->state)); 386 __func__, ic->ino, ic->state);
372 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock); 387 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
373 /* And because we dropped the alloc_sem we must start again from the 388 /* And because we dropped the alloc_sem we must start again from the
374 beginning. Ponder chance of livelock here -- we're returning success 389 beginning. Ponder chance of livelock here -- we're returning success
@@ -433,7 +448,8 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
433 test_gcnode: 448 test_gcnode:
434 if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) { 449 if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
435 /* Eep. This really should never happen. GC is broken */ 450 /* Eep. This really should never happen. GC is broken */
436 printk(KERN_ERR "Error garbage collecting node at %08x!\n", ref_offset(jeb->gc_node)); 451 pr_err("Error garbage collecting node at %08x!\n",
452 ref_offset(jeb->gc_node));
437 ret = -ENOSPC; 453 ret = -ENOSPC;
438 } 454 }
439 release_sem: 455 release_sem:
@@ -445,7 +461,8 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
445 461
446 eraseit: 462 eraseit:
447 if (c->gcblock && !c->gcblock->used_size) { 463 if (c->gcblock && !c->gcblock->used_size) {
448 D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset)); 464 jffs2_dbg(1, "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n",
465 c->gcblock->offset);
449 /* We're GC'ing an empty block? */ 466 /* We're GC'ing an empty block? */
450 list_add_tail(&c->gcblock->list, &c->erase_pending_list); 467 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
451 c->gcblock = NULL; 468 c->gcblock = NULL;
@@ -475,12 +492,12 @@ static int jffs2_garbage_collect_live(struct jffs2_sb_info *c, struct jffs2_era
475 492
476 if (c->gcblock != jeb) { 493 if (c->gcblock != jeb) {
477 spin_unlock(&c->erase_completion_lock); 494 spin_unlock(&c->erase_completion_lock);
478 D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n")); 495 jffs2_dbg(1, "GC block is no longer gcblock. Restart\n");
479 goto upnout; 496 goto upnout;
480 } 497 }
481 if (ref_obsolete(raw)) { 498 if (ref_obsolete(raw)) {
482 spin_unlock(&c->erase_completion_lock); 499 spin_unlock(&c->erase_completion_lock);
483 D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n")); 500 jffs2_dbg(1, "node to be GC'd was obsoleted in the meantime.\n");
484 /* They'll call again */ 501 /* They'll call again */
485 goto upnout; 502 goto upnout;
486 } 503 }
@@ -536,10 +553,10 @@ static int jffs2_garbage_collect_live(struct jffs2_sb_info *c, struct jffs2_era
536 } else if (fd) { 553 } else if (fd) {
537 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd); 554 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
538 } else { 555 } else {
539 printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n", 556 pr_warn("Raw node at 0x%08x wasn't in node lists for ino #%u\n",
540 ref_offset(raw), f->inocache->ino); 557 ref_offset(raw), f->inocache->ino);
541 if (ref_obsolete(raw)) { 558 if (ref_obsolete(raw)) {
542 printk(KERN_WARNING "But it's obsolete so we don't mind too much\n"); 559 pr_warn("But it's obsolete so we don't mind too much\n");
543 } else { 560 } else {
544 jffs2_dbg_dump_node(c, ref_offset(raw)); 561 jffs2_dbg_dump_node(c, ref_offset(raw));
545 BUG(); 562 BUG();
@@ -562,7 +579,8 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
562 uint32_t crc, rawlen; 579 uint32_t crc, rawlen;
563 int retried = 0; 580 int retried = 0;
564 581
565 D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw))); 582 jffs2_dbg(1, "Going to GC REF_PRISTINE node at 0x%08x\n",
583 ref_offset(raw));
566 584
567 alloclen = rawlen = ref_totlen(c, c->gcblock, raw); 585 alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
568 586
@@ -595,8 +613,8 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
595 613
596 crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4); 614 crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
597 if (je32_to_cpu(node->u.hdr_crc) != crc) { 615 if (je32_to_cpu(node->u.hdr_crc) != crc) {
598 printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n", 616 pr_warn("Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
599 ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc); 617 ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
600 goto bail; 618 goto bail;
601 } 619 }
602 620
@@ -604,16 +622,18 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
604 case JFFS2_NODETYPE_INODE: 622 case JFFS2_NODETYPE_INODE:
605 crc = crc32(0, node, sizeof(node->i)-8); 623 crc = crc32(0, node, sizeof(node->i)-8);
606 if (je32_to_cpu(node->i.node_crc) != crc) { 624 if (je32_to_cpu(node->i.node_crc) != crc) {
607 printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n", 625 pr_warn("Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
608 ref_offset(raw), je32_to_cpu(node->i.node_crc), crc); 626 ref_offset(raw), je32_to_cpu(node->i.node_crc),
627 crc);
609 goto bail; 628 goto bail;
610 } 629 }
611 630
612 if (je32_to_cpu(node->i.dsize)) { 631 if (je32_to_cpu(node->i.dsize)) {
613 crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize)); 632 crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
614 if (je32_to_cpu(node->i.data_crc) != crc) { 633 if (je32_to_cpu(node->i.data_crc) != crc) {
615 printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n", 634 pr_warn("Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
616 ref_offset(raw), je32_to_cpu(node->i.data_crc), crc); 635 ref_offset(raw),
636 je32_to_cpu(node->i.data_crc), crc);
617 goto bail; 637 goto bail;
618 } 638 }
619 } 639 }
@@ -622,21 +642,24 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
622 case JFFS2_NODETYPE_DIRENT: 642 case JFFS2_NODETYPE_DIRENT:
623 crc = crc32(0, node, sizeof(node->d)-8); 643 crc = crc32(0, node, sizeof(node->d)-8);
624 if (je32_to_cpu(node->d.node_crc) != crc) { 644 if (je32_to_cpu(node->d.node_crc) != crc) {
625 printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n", 645 pr_warn("Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
626 ref_offset(raw), je32_to_cpu(node->d.node_crc), crc); 646 ref_offset(raw),
647 je32_to_cpu(node->d.node_crc), crc);
627 goto bail; 648 goto bail;
628 } 649 }
629 650
630 if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) { 651 if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) {
631 printk(KERN_WARNING "Name in dirent node at 0x%08x contains zeroes\n", ref_offset(raw)); 652 pr_warn("Name in dirent node at 0x%08x contains zeroes\n",
653 ref_offset(raw));
632 goto bail; 654 goto bail;
633 } 655 }
634 656
635 if (node->d.nsize) { 657 if (node->d.nsize) {
636 crc = crc32(0, node->d.name, node->d.nsize); 658 crc = crc32(0, node->d.name, node->d.nsize);
637 if (je32_to_cpu(node->d.name_crc) != crc) { 659 if (je32_to_cpu(node->d.name_crc) != crc) {
638 printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n", 660 pr_warn("Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
639 ref_offset(raw), je32_to_cpu(node->d.name_crc), crc); 661 ref_offset(raw),
662 je32_to_cpu(node->d.name_crc), crc);
640 goto bail; 663 goto bail;
641 } 664 }
642 } 665 }
@@ -644,8 +667,8 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
644 default: 667 default:
645 /* If it's inode-less, we don't _know_ what it is. Just copy it intact */ 668 /* If it's inode-less, we don't _know_ what it is. Just copy it intact */
646 if (ic) { 669 if (ic) {
647 printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n", 670 pr_warn("Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
648 ref_offset(raw), je16_to_cpu(node->u.nodetype)); 671 ref_offset(raw), je16_to_cpu(node->u.nodetype));
649 goto bail; 672 goto bail;
650 } 673 }
651 } 674 }
@@ -657,12 +680,13 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
657 ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node); 680 ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
658 681
659 if (ret || (retlen != rawlen)) { 682 if (ret || (retlen != rawlen)) {
660 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n", 683 pr_notice("Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
661 rawlen, phys_ofs, ret, retlen); 684 rawlen, phys_ofs, ret, retlen);
662 if (retlen) { 685 if (retlen) {
663 jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL); 686 jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
664 } else { 687 } else {
665 printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", phys_ofs); 688 pr_notice("Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n",
689 phys_ofs);
666 } 690 }
667 if (!retried) { 691 if (!retried) {
668 /* Try to reallocate space and retry */ 692 /* Try to reallocate space and retry */
@@ -671,7 +695,7 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
671 695
672 retried = 1; 696 retried = 1;
673 697
674 D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n")); 698 jffs2_dbg(1, "Retrying failed write of REF_PRISTINE node.\n");
675 699
676 jffs2_dbg_acct_sanity_check(c,jeb); 700 jffs2_dbg_acct_sanity_check(c,jeb);
677 jffs2_dbg_acct_paranoia_check(c, jeb); 701 jffs2_dbg_acct_paranoia_check(c, jeb);
@@ -681,14 +705,16 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
681 it is only an upper estimation */ 705 it is only an upper estimation */
682 706
683 if (!ret) { 707 if (!ret) {
684 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs)); 708 jffs2_dbg(1, "Allocated space at 0x%08x to retry failed write.\n",
709 phys_ofs);
685 710
686 jffs2_dbg_acct_sanity_check(c,jeb); 711 jffs2_dbg_acct_sanity_check(c,jeb);
687 jffs2_dbg_acct_paranoia_check(c, jeb); 712 jffs2_dbg_acct_paranoia_check(c, jeb);
688 713
689 goto retry; 714 goto retry;
690 } 715 }
691 D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret)); 716 jffs2_dbg(1, "Failed to allocate space to retry failed write: %d!\n",
717 ret);
692 } 718 }
693 719
694 if (!ret) 720 if (!ret)
@@ -698,7 +724,8 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
698 jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic); 724 jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
699 725
700 jffs2_mark_node_obsolete(c, raw); 726 jffs2_mark_node_obsolete(c, raw);
701 D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw))); 727 jffs2_dbg(1, "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n",
728 ref_offset(raw));
702 729
703 out_node: 730 out_node:
704 kfree(node); 731 kfree(node);
@@ -725,29 +752,32 @@ static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_
725 /* For these, we don't actually need to read the old node */ 752 /* For these, we don't actually need to read the old node */
726 mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f)); 753 mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
727 mdata = (char *)&dev; 754 mdata = (char *)&dev;
728 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen)); 755 jffs2_dbg(1, "%s(): Writing %d bytes of kdev_t\n",
756 __func__, mdatalen);
729 } else if (S_ISLNK(JFFS2_F_I_MODE(f))) { 757 } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
730 mdatalen = fn->size; 758 mdatalen = fn->size;
731 mdata = kmalloc(fn->size, GFP_KERNEL); 759 mdata = kmalloc(fn->size, GFP_KERNEL);
732 if (!mdata) { 760 if (!mdata) {
733 printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n"); 761 pr_warn("kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
734 return -ENOMEM; 762 return -ENOMEM;
735 } 763 }
736 ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen); 764 ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
737 if (ret) { 765 if (ret) {
738 printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret); 766 pr_warn("read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n",
767 ret);
739 kfree(mdata); 768 kfree(mdata);
740 return ret; 769 return ret;
741 } 770 }
742 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen)); 771 jffs2_dbg(1, "%s(): Writing %d bites of symlink target\n",
772 __func__, mdatalen);
743 773
744 } 774 }
745 775
746 ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen, 776 ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
747 JFFS2_SUMMARY_INODE_SIZE); 777 JFFS2_SUMMARY_INODE_SIZE);
748 if (ret) { 778 if (ret) {
749 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n", 779 pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
750 sizeof(ri)+ mdatalen, ret); 780 sizeof(ri) + mdatalen, ret);
751 goto out; 781 goto out;
752 } 782 }
753 783
@@ -784,7 +814,7 @@ static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_
784 new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC); 814 new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
785 815
786 if (IS_ERR(new_fn)) { 816 if (IS_ERR(new_fn)) {
787 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn)); 817 pr_warn("Error writing new dnode: %ld\n", PTR_ERR(new_fn));
788 ret = PTR_ERR(new_fn); 818 ret = PTR_ERR(new_fn);
789 goto out; 819 goto out;
790 } 820 }
@@ -827,14 +857,15 @@ static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_er
827 ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen, 857 ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
828 JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize)); 858 JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
829 if (ret) { 859 if (ret) {
830 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n", 860 pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
831 sizeof(rd)+rd.nsize, ret); 861 sizeof(rd)+rd.nsize, ret);
832 return ret; 862 return ret;
833 } 863 }
834 new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC); 864 new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
835 865
836 if (IS_ERR(new_fd)) { 866 if (IS_ERR(new_fd)) {
837 printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd)); 867 pr_warn("jffs2_write_dirent in garbage_collect_dirent failed: %ld\n",
868 PTR_ERR(new_fd));
838 return PTR_ERR(new_fd); 869 return PTR_ERR(new_fd);
839 } 870 }
840 jffs2_add_fd_to_list(c, new_fd, &f->dents); 871 jffs2_add_fd_to_list(c, new_fd, &f->dents);
@@ -887,19 +918,22 @@ static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct
887 if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset)) 918 if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
888 continue; 919 continue;
889 920
890 D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw))); 921 jffs2_dbg(1, "Check potential deletion dirent at %08x\n",
922 ref_offset(raw));
891 923
892 /* This is an obsolete node belonging to the same directory, and it's of the right 924 /* This is an obsolete node belonging to the same directory, and it's of the right
893 length. We need to take a closer look...*/ 925 length. We need to take a closer look...*/
894 ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd); 926 ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
895 if (ret) { 927 if (ret) {
896 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw)); 928 pr_warn("%s(): Read error (%d) reading obsolete node at %08x\n",
929 __func__, ret, ref_offset(raw));
897 /* If we can't read it, we don't need to continue to obsolete it. Continue */ 930 /* If we can't read it, we don't need to continue to obsolete it. Continue */
898 continue; 931 continue;
899 } 932 }
900 if (retlen != rawlen) { 933 if (retlen != rawlen) {
901 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n", 934 pr_warn("%s(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
902 retlen, rawlen, ref_offset(raw)); 935 __func__, retlen, rawlen,
936 ref_offset(raw));
903 continue; 937 continue;
904 } 938 }
905 939
@@ -923,8 +957,9 @@ static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct
923 a new deletion dirent to replace it */ 957 a new deletion dirent to replace it */
924 mutex_unlock(&c->erase_free_sem); 958 mutex_unlock(&c->erase_free_sem);
925 959
926 D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n", 960 jffs2_dbg(1, "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
927 ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino))); 961 ref_offset(fd->raw), fd->name,
962 ref_offset(raw), je32_to_cpu(rd->ino));
928 kfree(rd); 963 kfree(rd);
929 964
930 return jffs2_garbage_collect_dirent(c, jeb, f, fd); 965 return jffs2_garbage_collect_dirent(c, jeb, f, fd);
@@ -947,7 +982,8 @@ static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct
947 fdp = &(*fdp)->next; 982 fdp = &(*fdp)->next;
948 } 983 }
949 if (!found) { 984 if (!found) {
950 printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino); 985 pr_warn("Deletion dirent \"%s\" not found in list for ino #%u\n",
986 fd->name, f->inocache->ino);
951 } 987 }
952 jffs2_mark_node_obsolete(c, fd->raw); 988 jffs2_mark_node_obsolete(c, fd->raw);
953 jffs2_free_full_dirent(fd); 989 jffs2_free_full_dirent(fd);
@@ -964,8 +1000,8 @@ static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eras
964 uint32_t alloclen, ilen; 1000 uint32_t alloclen, ilen;
965 int ret; 1001 int ret;
966 1002
967 D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n", 1003 jffs2_dbg(1, "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
968 f->inocache->ino, start, end)); 1004 f->inocache->ino, start, end);
969 1005
970 memset(&ri, 0, sizeof(ri)); 1006 memset(&ri, 0, sizeof(ri));
971 1007
@@ -976,35 +1012,37 @@ static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eras
976 write it out again with the _same_ version as before */ 1012 write it out again with the _same_ version as before */
977 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri); 1013 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
978 if (readlen != sizeof(ri) || ret) { 1014 if (readlen != sizeof(ri) || ret) {
979 printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen); 1015 pr_warn("Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n",
1016 ret, readlen);
980 goto fill; 1017 goto fill;
981 } 1018 }
982 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) { 1019 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
983 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n", 1020 pr_warn("%s(): Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
984 ref_offset(fn->raw), 1021 __func__, ref_offset(fn->raw),
985 je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE); 1022 je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
986 return -EIO; 1023 return -EIO;
987 } 1024 }
988 if (je32_to_cpu(ri.totlen) != sizeof(ri)) { 1025 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
989 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n", 1026 pr_warn("%s(): Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
990 ref_offset(fn->raw), 1027 __func__, ref_offset(fn->raw),
991 je32_to_cpu(ri.totlen), sizeof(ri)); 1028 je32_to_cpu(ri.totlen), sizeof(ri));
992 return -EIO; 1029 return -EIO;
993 } 1030 }
994 crc = crc32(0, &ri, sizeof(ri)-8); 1031 crc = crc32(0, &ri, sizeof(ri)-8);
995 if (crc != je32_to_cpu(ri.node_crc)) { 1032 if (crc != je32_to_cpu(ri.node_crc)) {
996 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n", 1033 pr_warn("%s: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
997 ref_offset(fn->raw), 1034 __func__, ref_offset(fn->raw),
998 je32_to_cpu(ri.node_crc), crc); 1035 je32_to_cpu(ri.node_crc), crc);
999 /* FIXME: We could possibly deal with this by writing new holes for each frag */ 1036 /* FIXME: We could possibly deal with this by writing new holes for each frag */
1000 printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n", 1037 pr_warn("Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1001 start, end, f->inocache->ino); 1038 start, end, f->inocache->ino);
1002 goto fill; 1039 goto fill;
1003 } 1040 }
1004 if (ri.compr != JFFS2_COMPR_ZERO) { 1041 if (ri.compr != JFFS2_COMPR_ZERO) {
1005 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw)); 1042 pr_warn("%s(): Node 0x%08x wasn't a hole node!\n",
1006 printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n", 1043 __func__, ref_offset(fn->raw));
1007 start, end, f->inocache->ino); 1044 pr_warn("Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1045 start, end, f->inocache->ino);
1008 goto fill; 1046 goto fill;
1009 } 1047 }
1010 } else { 1048 } else {
@@ -1043,14 +1081,14 @@ static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eras
1043 ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen, 1081 ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1044 JFFS2_SUMMARY_INODE_SIZE); 1082 JFFS2_SUMMARY_INODE_SIZE);
1045 if (ret) { 1083 if (ret) {
1046 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n", 1084 pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1047 sizeof(ri), ret); 1085 sizeof(ri), ret);
1048 return ret; 1086 return ret;
1049 } 1087 }
1050 new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC); 1088 new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1051 1089
1052 if (IS_ERR(new_fn)) { 1090 if (IS_ERR(new_fn)) {
1053 printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn)); 1091 pr_warn("Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1054 return PTR_ERR(new_fn); 1092 return PTR_ERR(new_fn);
1055 } 1093 }
1056 if (je32_to_cpu(ri.version) == f->highest_version) { 1094 if (je32_to_cpu(ri.version) == f->highest_version) {
@@ -1070,9 +1108,9 @@ static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eras
1070 * above.) 1108 * above.)
1071 */ 1109 */
1072 D1(if(unlikely(fn->frags <= 1)) { 1110 D1(if(unlikely(fn->frags <= 1)) {
1073 printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n", 1111 pr_warn("%s(): Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1074 fn->frags, je32_to_cpu(ri.version), f->highest_version, 1112 __func__, fn->frags, je32_to_cpu(ri.version),
1075 je32_to_cpu(ri.ino)); 1113 f->highest_version, je32_to_cpu(ri.ino));
1076 }); 1114 });
1077 1115
1078 /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */ 1116 /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
@@ -1089,11 +1127,11 @@ static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eras
1089 } 1127 }
1090 } 1128 }
1091 if (fn->frags) { 1129 if (fn->frags) {
1092 printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n"); 1130 pr_warn("%s(): Old node still has frags!\n", __func__);
1093 BUG(); 1131 BUG();
1094 } 1132 }
1095 if (!new_fn->frags) { 1133 if (!new_fn->frags) {
1096 printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n"); 1134 pr_warn("%s(): New node has no frags!\n", __func__);
1097 BUG(); 1135 BUG();
1098 } 1136 }
1099 1137
@@ -1117,8 +1155,8 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
1117 1155
1118 memset(&ri, 0, sizeof(ri)); 1156 memset(&ri, 0, sizeof(ri));
1119 1157
1120 D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n", 1158 jffs2_dbg(1, "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1121 f->inocache->ino, start, end)); 1159 f->inocache->ino, start, end);
1122 1160
1123 orig_end = end; 1161 orig_end = end;
1124 orig_start = start; 1162 orig_start = start;
@@ -1149,15 +1187,15 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
1149 /* If the previous frag doesn't even reach the beginning, there's 1187 /* If the previous frag doesn't even reach the beginning, there's
1150 excessive fragmentation. Just merge. */ 1188 excessive fragmentation. Just merge. */
1151 if (frag->ofs > min) { 1189 if (frag->ofs > min) {
1152 D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n", 1190 jffs2_dbg(1, "Expanding down to cover partial frag (0x%x-0x%x)\n",
1153 frag->ofs, frag->ofs+frag->size)); 1191 frag->ofs, frag->ofs+frag->size);
1154 start = frag->ofs; 1192 start = frag->ofs;
1155 continue; 1193 continue;
1156 } 1194 }
1157 /* OK. This frag holds the first byte of the page. */ 1195 /* OK. This frag holds the first byte of the page. */
1158 if (!frag->node || !frag->node->raw) { 1196 if (!frag->node || !frag->node->raw) {
1159 D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n", 1197 jffs2_dbg(1, "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1160 frag->ofs, frag->ofs+frag->size)); 1198 frag->ofs, frag->ofs+frag->size);
1161 break; 1199 break;
1162 } else { 1200 } else {
1163 1201
@@ -1171,19 +1209,25 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
1171 jeb = &c->blocks[raw->flash_offset / c->sector_size]; 1209 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1172 1210
1173 if (jeb == c->gcblock) { 1211 if (jeb == c->gcblock) {
1174 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n", 1212 jffs2_dbg(1, "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1175 frag->ofs, frag->ofs+frag->size, ref_offset(raw))); 1213 frag->ofs,
1214 frag->ofs + frag->size,
1215 ref_offset(raw));
1176 start = frag->ofs; 1216 start = frag->ofs;
1177 break; 1217 break;
1178 } 1218 }
1179 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) { 1219 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1180 D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n", 1220 jffs2_dbg(1, "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1181 frag->ofs, frag->ofs+frag->size, jeb->offset)); 1221 frag->ofs,
1222 frag->ofs + frag->size,
1223 jeb->offset);
1182 break; 1224 break;
1183 } 1225 }
1184 1226
1185 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n", 1227 jffs2_dbg(1, "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1186 frag->ofs, frag->ofs+frag->size, jeb->offset)); 1228 frag->ofs,
1229 frag->ofs + frag->size,
1230 jeb->offset);
1187 start = frag->ofs; 1231 start = frag->ofs;
1188 break; 1232 break;
1189 } 1233 }
@@ -1199,15 +1243,15 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
1199 /* If the previous frag doesn't even reach the beginning, there's lots 1243 /* If the previous frag doesn't even reach the beginning, there's lots
1200 of fragmentation. Just merge. */ 1244 of fragmentation. Just merge. */
1201 if (frag->ofs+frag->size < max) { 1245 if (frag->ofs+frag->size < max) {
1202 D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n", 1246 jffs2_dbg(1, "Expanding up to cover partial frag (0x%x-0x%x)\n",
1203 frag->ofs, frag->ofs+frag->size)); 1247 frag->ofs, frag->ofs+frag->size);
1204 end = frag->ofs + frag->size; 1248 end = frag->ofs + frag->size;
1205 continue; 1249 continue;
1206 } 1250 }
1207 1251
1208 if (!frag->node || !frag->node->raw) { 1252 if (!frag->node || !frag->node->raw) {
1209 D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n", 1253 jffs2_dbg(1, "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1210 frag->ofs, frag->ofs+frag->size)); 1254 frag->ofs, frag->ofs+frag->size);
1211 break; 1255 break;
1212 } else { 1256 } else {
1213 1257
@@ -1221,25 +1265,31 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
1221 jeb = &c->blocks[raw->flash_offset / c->sector_size]; 1265 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1222 1266
1223 if (jeb == c->gcblock) { 1267 if (jeb == c->gcblock) {
1224 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n", 1268 jffs2_dbg(1, "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1225 frag->ofs, frag->ofs+frag->size, ref_offset(raw))); 1269 frag->ofs,
1270 frag->ofs + frag->size,
1271 ref_offset(raw));
1226 end = frag->ofs + frag->size; 1272 end = frag->ofs + frag->size;
1227 break; 1273 break;
1228 } 1274 }
1229 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) { 1275 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1230 D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n", 1276 jffs2_dbg(1, "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1231 frag->ofs, frag->ofs+frag->size, jeb->offset)); 1277 frag->ofs,
1278 frag->ofs + frag->size,
1279 jeb->offset);
1232 break; 1280 break;
1233 } 1281 }
1234 1282
1235 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n", 1283 jffs2_dbg(1, "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1236 frag->ofs, frag->ofs+frag->size, jeb->offset)); 1284 frag->ofs,
1285 frag->ofs + frag->size,
1286 jeb->offset);
1237 end = frag->ofs + frag->size; 1287 end = frag->ofs + frag->size;
1238 break; 1288 break;
1239 } 1289 }
1240 } 1290 }
1241 D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n", 1291 jffs2_dbg(1, "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1242 orig_start, orig_end, start, end)); 1292 orig_start, orig_end, start, end);
1243 1293
1244 D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size)); 1294 D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1245 BUG_ON(end < orig_end); 1295 BUG_ON(end < orig_end);
@@ -1256,7 +1306,8 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
1256 pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg); 1306 pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1257 1307
1258 if (IS_ERR(pg_ptr)) { 1308 if (IS_ERR(pg_ptr)) {
1259 printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr)); 1309 pr_warn("read_cache_page() returned error: %ld\n",
1310 PTR_ERR(pg_ptr));
1260 return PTR_ERR(pg_ptr); 1311 return PTR_ERR(pg_ptr);
1261 } 1312 }
1262 1313
@@ -1270,8 +1321,8 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
1270 &alloclen, JFFS2_SUMMARY_INODE_SIZE); 1321 &alloclen, JFFS2_SUMMARY_INODE_SIZE);
1271 1322
1272 if (ret) { 1323 if (ret) {
1273 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n", 1324 pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1274 sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret); 1325 sizeof(ri) + JFFS2_MIN_DATA_LEN, ret);
1275 break; 1326 break;
1276 } 1327 }
1277 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset); 1328 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
@@ -1308,7 +1359,8 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
1308 jffs2_free_comprbuf(comprbuf, writebuf); 1359 jffs2_free_comprbuf(comprbuf, writebuf);
1309 1360
1310 if (IS_ERR(new_fn)) { 1361 if (IS_ERR(new_fn)) {
1311 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn)); 1362 pr_warn("Error writing new dnode: %ld\n",
1363 PTR_ERR(new_fn));
1312 ret = PTR_ERR(new_fn); 1364 ret = PTR_ERR(new_fn);
1313 break; 1365 break;
1314 } 1366 }
diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c
index c082868910f..4f47aa24b55 100644
--- a/fs/jffs2/malloc.c
+++ b/fs/jffs2/malloc.c
@@ -9,6 +9,8 @@
9 * 9 *
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
12#include <linux/kernel.h> 14#include <linux/kernel.h>
13#include <linux/slab.h> 15#include <linux/slab.h>
14#include <linux/init.h> 16#include <linux/init.h>
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index 5e03233c236..975a1f562c1 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -9,6 +9,8 @@
9 * 9 *
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
12#include <linux/kernel.h> 14#include <linux/kernel.h>
13#include <linux/sched.h> 15#include <linux/sched.h>
14#include <linux/fs.h> 16#include <linux/fs.h>
@@ -687,8 +689,8 @@ int jffs2_scan_dirty_space(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb
687 if (!size) 689 if (!size)
688 return 0; 690 return 0;
689 if (unlikely(size > jeb->free_size)) { 691 if (unlikely(size > jeb->free_size)) {
690 printk(KERN_CRIT "Dirty space 0x%x larger then free_size 0x%x (wasted 0x%x)\n", 692 pr_crit("Dirty space 0x%x larger then free_size 0x%x (wasted 0x%x)\n",
691 size, jeb->free_size, jeb->wasted_size); 693 size, jeb->free_size, jeb->wasted_size);
692 BUG(); 694 BUG();
693 } 695 }
694 /* REF_EMPTY_NODE is !obsolete, so that works OK */ 696 /* REF_EMPTY_NODE is !obsolete, so that works OK */
@@ -726,8 +728,10 @@ static inline uint32_t __ref_totlen(struct jffs2_sb_info *c,
726 728
727 /* Last node in block. Use free_space */ 729 /* Last node in block. Use free_space */
728 if (unlikely(ref != jeb->last_node)) { 730 if (unlikely(ref != jeb->last_node)) {
729 printk(KERN_CRIT "ref %p @0x%08x is not jeb->last_node (%p @0x%08x)\n", 731 pr_crit("ref %p @0x%08x is not jeb->last_node (%p @0x%08x)\n",
730 ref, ref_offset(ref), jeb->last_node, jeb->last_node?ref_offset(jeb->last_node):0); 732 ref, ref_offset(ref), jeb->last_node,
733 jeb->last_node ?
734 ref_offset(jeb->last_node) : 0);
731 BUG(); 735 BUG();
732 } 736 }
733 ref_end = jeb->offset + c->sector_size - jeb->free_size; 737 ref_end = jeb->offset + c->sector_size - jeb->free_size;
@@ -747,16 +751,20 @@ uint32_t __jffs2_ref_totlen(struct jffs2_sb_info *c, struct jffs2_eraseblock *je
747 if (!jeb) 751 if (!jeb)
748 jeb = &c->blocks[ref->flash_offset / c->sector_size]; 752 jeb = &c->blocks[ref->flash_offset / c->sector_size];
749 753
750 printk(KERN_CRIT "Totlen for ref at %p (0x%08x-0x%08x) miscalculated as 0x%x instead of %x\n", 754 pr_crit("Totlen for ref at %p (0x%08x-0x%08x) miscalculated as 0x%x instead of %x\n",
751 ref, ref_offset(ref), ref_offset(ref)+ref->__totlen, 755 ref, ref_offset(ref), ref_offset(ref) + ref->__totlen,
752 ret, ref->__totlen); 756 ret, ref->__totlen);
753 if (ref_next(ref)) { 757 if (ref_next(ref)) {
754 printk(KERN_CRIT "next %p (0x%08x-0x%08x)\n", ref_next(ref), ref_offset(ref_next(ref)), 758 pr_crit("next %p (0x%08x-0x%08x)\n",
755 ref_offset(ref_next(ref))+ref->__totlen); 759 ref_next(ref), ref_offset(ref_next(ref)),
760 ref_offset(ref_next(ref)) + ref->__totlen);
756 } else 761 } else
757 printk(KERN_CRIT "No next ref. jeb->last_node is %p\n", jeb->last_node); 762 pr_crit("No next ref. jeb->last_node is %p\n",
763 jeb->last_node);
758 764
759 printk(KERN_CRIT "jeb->wasted_size %x, dirty_size %x, used_size %x, free_size %x\n", jeb->wasted_size, jeb->dirty_size, jeb->used_size, jeb->free_size); 765 pr_crit("jeb->wasted_size %x, dirty_size %x, used_size %x, free_size %x\n",
766 jeb->wasted_size, jeb->dirty_size, jeb->used_size,
767 jeb->free_size);
760 768
761#if defined(JFFS2_DBG_DUMPS) || defined(JFFS2_DBG_PARANOIA_CHECKS) 769#if defined(JFFS2_DBG_DUMPS) || defined(JFFS2_DBG_PARANOIA_CHECKS)
762 __jffs2_dbg_dump_node_refs_nolock(c, jeb); 770 __jffs2_dbg_dump_node_refs_nolock(c, jeb);
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index 694aa5b0350..6784d1e7a7e 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -9,6 +9,8 @@
9 * 9 *
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
12#include <linux/kernel.h> 14#include <linux/kernel.h>
13#include <linux/mtd/mtd.h> 15#include <linux/mtd/mtd.h>
14#include <linux/compiler.h> 16#include <linux/compiler.h>
@@ -46,10 +48,10 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
46 /* align it */ 48 /* align it */
47 minsize = PAD(minsize); 49 minsize = PAD(minsize);
48 50
49 D1(printk(KERN_DEBUG "jffs2_reserve_space(): Requested 0x%x bytes\n", minsize)); 51 jffs2_dbg(1, "%s(): Requested 0x%x bytes\n", __func__, minsize);
50 mutex_lock(&c->alloc_sem); 52 mutex_lock(&c->alloc_sem);
51 53
52 D1(printk(KERN_DEBUG "jffs2_reserve_space(): alloc sem got\n")); 54 jffs2_dbg(1, "%s(): alloc sem got\n", __func__);
53 55
54 spin_lock(&c->erase_completion_lock); 56 spin_lock(&c->erase_completion_lock);
55 57
@@ -73,11 +75,13 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
73 dirty = c->dirty_size + c->erasing_size - c->nr_erasing_blocks * c->sector_size + c->unchecked_size; 75 dirty = c->dirty_size + c->erasing_size - c->nr_erasing_blocks * c->sector_size + c->unchecked_size;
74 if (dirty < c->nospc_dirty_size) { 76 if (dirty < c->nospc_dirty_size) {
75 if (prio == ALLOC_DELETION && c->nr_free_blocks + c->nr_erasing_blocks >= c->resv_blocks_deletion) { 77 if (prio == ALLOC_DELETION && c->nr_free_blocks + c->nr_erasing_blocks >= c->resv_blocks_deletion) {
76 D1(printk(KERN_NOTICE "jffs2_reserve_space(): Low on dirty space to GC, but it's a deletion. Allowing...\n")); 78 jffs2_dbg(1, "%s(): Low on dirty space to GC, but it's a deletion. Allowing...\n",
79 __func__);
77 break; 80 break;
78 } 81 }
79 D1(printk(KERN_DEBUG "dirty size 0x%08x + unchecked_size 0x%08x < nospc_dirty_size 0x%08x, returning -ENOSPC\n", 82 jffs2_dbg(1, "dirty size 0x%08x + unchecked_size 0x%08x < nospc_dirty_size 0x%08x, returning -ENOSPC\n",
80 dirty, c->unchecked_size, c->sector_size)); 83 dirty, c->unchecked_size,
84 c->sector_size);
81 85
82 spin_unlock(&c->erase_completion_lock); 86 spin_unlock(&c->erase_completion_lock);
83 mutex_unlock(&c->alloc_sem); 87 mutex_unlock(&c->alloc_sem);
@@ -96,12 +100,13 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
96 avail = c->free_size + c->dirty_size + c->erasing_size + c->unchecked_size; 100 avail = c->free_size + c->dirty_size + c->erasing_size + c->unchecked_size;
97 if ( (avail / c->sector_size) <= blocksneeded) { 101 if ( (avail / c->sector_size) <= blocksneeded) {
98 if (prio == ALLOC_DELETION && c->nr_free_blocks + c->nr_erasing_blocks >= c->resv_blocks_deletion) { 102 if (prio == ALLOC_DELETION && c->nr_free_blocks + c->nr_erasing_blocks >= c->resv_blocks_deletion) {
99 D1(printk(KERN_NOTICE "jffs2_reserve_space(): Low on possibly available space, but it's a deletion. Allowing...\n")); 103 jffs2_dbg(1, "%s(): Low on possibly available space, but it's a deletion. Allowing...\n",
104 __func__);
100 break; 105 break;
101 } 106 }
102 107
103 D1(printk(KERN_DEBUG "max. available size 0x%08x < blocksneeded * sector_size 0x%08x, returning -ENOSPC\n", 108 jffs2_dbg(1, "max. available size 0x%08x < blocksneeded * sector_size 0x%08x, returning -ENOSPC\n",
104 avail, blocksneeded * c->sector_size)); 109 avail, blocksneeded * c->sector_size);
105 spin_unlock(&c->erase_completion_lock); 110 spin_unlock(&c->erase_completion_lock);
106 mutex_unlock(&c->alloc_sem); 111 mutex_unlock(&c->alloc_sem);
107 return -ENOSPC; 112 return -ENOSPC;
@@ -109,9 +114,14 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
109 114
110 mutex_unlock(&c->alloc_sem); 115 mutex_unlock(&c->alloc_sem);
111 116
112 D1(printk(KERN_DEBUG "Triggering GC pass. nr_free_blocks %d, nr_erasing_blocks %d, free_size 0x%08x, dirty_size 0x%08x, wasted_size 0x%08x, used_size 0x%08x, erasing_size 0x%08x, bad_size 0x%08x (total 0x%08x of 0x%08x)\n", 117 jffs2_dbg(1, "Triggering GC pass. nr_free_blocks %d, nr_erasing_blocks %d, free_size 0x%08x, dirty_size 0x%08x, wasted_size 0x%08x, used_size 0x%08x, erasing_size 0x%08x, bad_size 0x%08x (total 0x%08x of 0x%08x)\n",
113 c->nr_free_blocks, c->nr_erasing_blocks, c->free_size, c->dirty_size, c->wasted_size, c->used_size, c->erasing_size, c->bad_size, 118 c->nr_free_blocks, c->nr_erasing_blocks,
114 c->free_size + c->dirty_size + c->wasted_size + c->used_size + c->erasing_size + c->bad_size, c->flash_size)); 119 c->free_size, c->dirty_size, c->wasted_size,
120 c->used_size, c->erasing_size, c->bad_size,
121 c->free_size + c->dirty_size +
122 c->wasted_size + c->used_size +
123 c->erasing_size + c->bad_size,
124 c->flash_size);
115 spin_unlock(&c->erase_completion_lock); 125 spin_unlock(&c->erase_completion_lock);
116 126
117 ret = jffs2_garbage_collect_pass(c); 127 ret = jffs2_garbage_collect_pass(c);
@@ -124,7 +134,8 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
124 DECLARE_WAITQUEUE(wait, current); 134 DECLARE_WAITQUEUE(wait, current);
125 set_current_state(TASK_UNINTERRUPTIBLE); 135 set_current_state(TASK_UNINTERRUPTIBLE);
126 add_wait_queue(&c->erase_wait, &wait); 136 add_wait_queue(&c->erase_wait, &wait);
127 D1(printk(KERN_DEBUG "%s waiting for erase to complete\n", __func__)); 137 jffs2_dbg(1, "%s waiting for erase to complete\n",
138 __func__);
128 spin_unlock(&c->erase_completion_lock); 139 spin_unlock(&c->erase_completion_lock);
129 140
130 schedule(); 141 schedule();
@@ -144,7 +155,7 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
144 155
145 ret = jffs2_do_reserve_space(c, minsize, len, sumsize); 156 ret = jffs2_do_reserve_space(c, minsize, len, sumsize);
146 if (ret) { 157 if (ret) {
147 D1(printk(KERN_DEBUG "jffs2_reserve_space: ret is %d\n", ret)); 158 jffs2_dbg(1, "%s(): ret is %d\n", __func__, ret);
148 } 159 }
149 } 160 }
150 spin_unlock(&c->erase_completion_lock); 161 spin_unlock(&c->erase_completion_lock);
@@ -161,13 +172,14 @@ int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize,
161 int ret = -EAGAIN; 172 int ret = -EAGAIN;
162 minsize = PAD(minsize); 173 minsize = PAD(minsize);
163 174
164 D1(printk(KERN_DEBUG "jffs2_reserve_space_gc(): Requested 0x%x bytes\n", minsize)); 175 jffs2_dbg(1, "%s(): Requested 0x%x bytes\n", __func__, minsize);
165 176
166 spin_lock(&c->erase_completion_lock); 177 spin_lock(&c->erase_completion_lock);
167 while(ret == -EAGAIN) { 178 while(ret == -EAGAIN) {
168 ret = jffs2_do_reserve_space(c, minsize, len, sumsize); 179 ret = jffs2_do_reserve_space(c, minsize, len, sumsize);
169 if (ret) { 180 if (ret) {
170 D1(printk(KERN_DEBUG "jffs2_reserve_space_gc: looping, ret is %d\n", ret)); 181 jffs2_dbg(1, "%s(): looping, ret is %d\n",
182 __func__, ret);
171 } 183 }
172 } 184 }
173 spin_unlock(&c->erase_completion_lock); 185 spin_unlock(&c->erase_completion_lock);
@@ -184,8 +196,8 @@ static void jffs2_close_nextblock(struct jffs2_sb_info *c, struct jffs2_eraseblo
184{ 196{
185 197
186 if (c->nextblock == NULL) { 198 if (c->nextblock == NULL) {
187 D1(printk(KERN_DEBUG "jffs2_close_nextblock: Erase block at 0x%08x has already been placed in a list\n", 199 jffs2_dbg(1, "%s(): Erase block at 0x%08x has already been placed in a list\n",
188 jeb->offset)); 200 __func__, jeb->offset);
189 return; 201 return;
190 } 202 }
191 /* Check, if we have a dirty block now, or if it was dirty already */ 203 /* Check, if we have a dirty block now, or if it was dirty already */
@@ -195,17 +207,20 @@ static void jffs2_close_nextblock(struct jffs2_sb_info *c, struct jffs2_eraseblo
195 jeb->dirty_size += jeb->wasted_size; 207 jeb->dirty_size += jeb->wasted_size;
196 jeb->wasted_size = 0; 208 jeb->wasted_size = 0;
197 if (VERYDIRTY(c, jeb->dirty_size)) { 209 if (VERYDIRTY(c, jeb->dirty_size)) {
198 D1(printk(KERN_DEBUG "Adding full erase block at 0x%08x to very_dirty_list (free 0x%08x, dirty 0x%08x, used 0x%08x\n", 210 jffs2_dbg(1, "Adding full erase block at 0x%08x to very_dirty_list (free 0x%08x, dirty 0x%08x, used 0x%08x\n",
199 jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size)); 211 jeb->offset, jeb->free_size, jeb->dirty_size,
212 jeb->used_size);
200 list_add_tail(&jeb->list, &c->very_dirty_list); 213 list_add_tail(&jeb->list, &c->very_dirty_list);
201 } else { 214 } else {
202 D1(printk(KERN_DEBUG "Adding full erase block at 0x%08x to dirty_list (free 0x%08x, dirty 0x%08x, used 0x%08x\n", 215 jffs2_dbg(1, "Adding full erase block at 0x%08x to dirty_list (free 0x%08x, dirty 0x%08x, used 0x%08x\n",
203 jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size)); 216 jeb->offset, jeb->free_size, jeb->dirty_size,
217 jeb->used_size);
204 list_add_tail(&jeb->list, &c->dirty_list); 218 list_add_tail(&jeb->list, &c->dirty_list);
205 } 219 }
206 } else { 220 } else {
207 D1(printk(KERN_DEBUG "Adding full erase block at 0x%08x to clean_list (free 0x%08x, dirty 0x%08x, used 0x%08x\n", 221 jffs2_dbg(1, "Adding full erase block at 0x%08x to clean_list (free 0x%08x, dirty 0x%08x, used 0x%08x\n",
208 jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size)); 222 jeb->offset, jeb->free_size, jeb->dirty_size,
223 jeb->used_size);
209 list_add_tail(&jeb->list, &c->clean_list); 224 list_add_tail(&jeb->list, &c->clean_list);
210 } 225 }
211 c->nextblock = NULL; 226 c->nextblock = NULL;
@@ -230,13 +245,14 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c)
230 list_move_tail(&ejeb->list, &c->erase_pending_list); 245 list_move_tail(&ejeb->list, &c->erase_pending_list);
231 c->nr_erasing_blocks++; 246 c->nr_erasing_blocks++;
232 jffs2_garbage_collect_trigger(c); 247 jffs2_garbage_collect_trigger(c);
233 D1(printk(KERN_DEBUG "jffs2_find_nextblock: Triggering erase of erasable block at 0x%08x\n", 248 jffs2_dbg(1, "%s(): Triggering erase of erasable block at 0x%08x\n",
234 ejeb->offset)); 249 __func__, ejeb->offset);
235 } 250 }
236 251
237 if (!c->nr_erasing_blocks && 252 if (!c->nr_erasing_blocks &&
238 !list_empty(&c->erasable_pending_wbuf_list)) { 253 !list_empty(&c->erasable_pending_wbuf_list)) {
239 D1(printk(KERN_DEBUG "jffs2_find_nextblock: Flushing write buffer\n")); 254 jffs2_dbg(1, "%s(): Flushing write buffer\n",
255 __func__);
240 /* c->nextblock is NULL, no update to c->nextblock allowed */ 256 /* c->nextblock is NULL, no update to c->nextblock allowed */
241 spin_unlock(&c->erase_completion_lock); 257 spin_unlock(&c->erase_completion_lock);
242 jffs2_flush_wbuf_pad(c); 258 jffs2_flush_wbuf_pad(c);
@@ -248,9 +264,11 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c)
248 if (!c->nr_erasing_blocks) { 264 if (!c->nr_erasing_blocks) {
249 /* Ouch. We're in GC, or we wouldn't have got here. 265 /* Ouch. We're in GC, or we wouldn't have got here.
250 And there's no space left. At all. */ 266 And there's no space left. At all. */
251 printk(KERN_CRIT "Argh. No free space left for GC. nr_erasing_blocks is %d. nr_free_blocks is %d. (erasableempty: %s, erasingempty: %s, erasependingempty: %s)\n", 267 pr_crit("Argh. No free space left for GC. nr_erasing_blocks is %d. nr_free_blocks is %d. (erasableempty: %s, erasingempty: %s, erasependingempty: %s)\n",
252 c->nr_erasing_blocks, c->nr_free_blocks, list_empty(&c->erasable_list)?"yes":"no", 268 c->nr_erasing_blocks, c->nr_free_blocks,
253 list_empty(&c->erasing_list)?"yes":"no", list_empty(&c->erase_pending_list)?"yes":"no"); 269 list_empty(&c->erasable_list) ? "yes" : "no",
270 list_empty(&c->erasing_list) ? "yes" : "no",
271 list_empty(&c->erase_pending_list) ? "yes" : "no");
254 return -ENOSPC; 272 return -ENOSPC;
255 } 273 }
256 274
@@ -278,7 +296,8 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c)
278 c->wbuf_ofs = 0xffffffff; 296 c->wbuf_ofs = 0xffffffff;
279#endif 297#endif
280 298
281 D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset)); 299 jffs2_dbg(1, "%s(): new nextblock = 0x%08x\n",
300 __func__, c->nextblock->offset);
282 301
283 return 0; 302 return 0;
284} 303}
@@ -345,7 +364,8 @@ static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
345 364
346 if (jffs2_wbuf_dirty(c)) { 365 if (jffs2_wbuf_dirty(c)) {
347 spin_unlock(&c->erase_completion_lock); 366 spin_unlock(&c->erase_completion_lock);
348 D1(printk(KERN_DEBUG "jffs2_do_reserve_space: Flushing write buffer\n")); 367 jffs2_dbg(1, "%s(): Flushing write buffer\n",
368 __func__);
349 jffs2_flush_wbuf_pad(c); 369 jffs2_flush_wbuf_pad(c);
350 spin_lock(&c->erase_completion_lock); 370 spin_lock(&c->erase_completion_lock);
351 jeb = c->nextblock; 371 jeb = c->nextblock;
@@ -387,7 +407,8 @@ static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
387 jeb = c->nextblock; 407 jeb = c->nextblock;
388 408
389 if (jeb->free_size != c->sector_size - c->cleanmarker_size) { 409 if (jeb->free_size != c->sector_size - c->cleanmarker_size) {
390 printk(KERN_WARNING "Eep. Block 0x%08x taken from free_list had free_size of 0x%08x!!\n", jeb->offset, jeb->free_size); 410 pr_warn("Eep. Block 0x%08x taken from free_list had free_size of 0x%08x!!\n",
411 jeb->offset, jeb->free_size);
391 goto restart; 412 goto restart;
392 } 413 }
393 } 414 }
@@ -408,8 +429,9 @@ static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
408 spin_lock(&c->erase_completion_lock); 429 spin_lock(&c->erase_completion_lock);
409 } 430 }
410 431
411 D1(printk(KERN_DEBUG "jffs2_do_reserve_space(): Giving 0x%x bytes at 0x%x\n", 432 jffs2_dbg(1, "%s(): Giving 0x%x bytes at 0x%x\n",
412 *len, jeb->offset + (c->sector_size - jeb->free_size))); 433 __func__,
434 *len, jeb->offset + (c->sector_size - jeb->free_size));
413 return 0; 435 return 0;
414} 436}
415 437
@@ -434,20 +456,22 @@ struct jffs2_raw_node_ref *jffs2_add_physical_node_ref(struct jffs2_sb_info *c,
434 456
435 jeb = &c->blocks[ofs / c->sector_size]; 457 jeb = &c->blocks[ofs / c->sector_size];
436 458
437 D1(printk(KERN_DEBUG "jffs2_add_physical_node_ref(): Node at 0x%x(%d), size 0x%x\n", 459 jffs2_dbg(1, "%s(): Node at 0x%x(%d), size 0x%x\n",
438 ofs & ~3, ofs & 3, len)); 460 __func__, ofs & ~3, ofs & 3, len);
439#if 1 461#if 1
440 /* Allow non-obsolete nodes only to be added at the end of c->nextblock, 462 /* Allow non-obsolete nodes only to be added at the end of c->nextblock,
441 if c->nextblock is set. Note that wbuf.c will file obsolete nodes 463 if c->nextblock is set. Note that wbuf.c will file obsolete nodes
442 even after refiling c->nextblock */ 464 even after refiling c->nextblock */
443 if ((c->nextblock || ((ofs & 3) != REF_OBSOLETE)) 465 if ((c->nextblock || ((ofs & 3) != REF_OBSOLETE))
444 && (jeb != c->nextblock || (ofs & ~3) != jeb->offset + (c->sector_size - jeb->free_size))) { 466 && (jeb != c->nextblock || (ofs & ~3) != jeb->offset + (c->sector_size - jeb->free_size))) {
445 printk(KERN_WARNING "argh. node added in wrong place at 0x%08x(%d)\n", ofs & ~3, ofs & 3); 467 pr_warn("argh. node added in wrong place at 0x%08x(%d)\n",
468 ofs & ~3, ofs & 3);
446 if (c->nextblock) 469 if (c->nextblock)
447 printk(KERN_WARNING "nextblock 0x%08x", c->nextblock->offset); 470 pr_warn("nextblock 0x%08x", c->nextblock->offset);
448 else 471 else
449 printk(KERN_WARNING "No nextblock"); 472 pr_warn("No nextblock");
450 printk(", expected at %08x\n", jeb->offset + (c->sector_size - jeb->free_size)); 473 pr_cont(", expected at %08x\n",
474 jeb->offset + (c->sector_size - jeb->free_size));
451 return ERR_PTR(-EINVAL); 475 return ERR_PTR(-EINVAL);
452 } 476 }
453#endif 477#endif
@@ -457,8 +481,9 @@ struct jffs2_raw_node_ref *jffs2_add_physical_node_ref(struct jffs2_sb_info *c,
457 481
458 if (!jeb->free_size && !jeb->dirty_size && !ISDIRTY(jeb->wasted_size)) { 482 if (!jeb->free_size && !jeb->dirty_size && !ISDIRTY(jeb->wasted_size)) {
459 /* If it lives on the dirty_list, jffs2_reserve_space will put it there */ 483 /* If it lives on the dirty_list, jffs2_reserve_space will put it there */
460 D1(printk(KERN_DEBUG "Adding full erase block at 0x%08x to clean_list (free 0x%08x, dirty 0x%08x, used 0x%08x\n", 484 jffs2_dbg(1, "Adding full erase block at 0x%08x to clean_list (free 0x%08x, dirty 0x%08x, used 0x%08x\n",
461 jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size)); 485 jeb->offset, jeb->free_size, jeb->dirty_size,
486 jeb->used_size);
462 if (jffs2_wbuf_dirty(c)) { 487 if (jffs2_wbuf_dirty(c)) {
463 /* Flush the last write in the block if it's outstanding */ 488 /* Flush the last write in the block if it's outstanding */
464 spin_unlock(&c->erase_completion_lock); 489 spin_unlock(&c->erase_completion_lock);
@@ -480,7 +505,7 @@ struct jffs2_raw_node_ref *jffs2_add_physical_node_ref(struct jffs2_sb_info *c,
480 505
481void jffs2_complete_reservation(struct jffs2_sb_info *c) 506void jffs2_complete_reservation(struct jffs2_sb_info *c)
482{ 507{
483 D1(printk(KERN_DEBUG "jffs2_complete_reservation()\n")); 508 jffs2_dbg(1, "jffs2_complete_reservation()\n");
484 spin_lock(&c->erase_completion_lock); 509 spin_lock(&c->erase_completion_lock);
485 jffs2_garbage_collect_trigger(c); 510 jffs2_garbage_collect_trigger(c);
486 spin_unlock(&c->erase_completion_lock); 511 spin_unlock(&c->erase_completion_lock);
@@ -493,7 +518,7 @@ static inline int on_list(struct list_head *obj, struct list_head *head)
493 518
494 list_for_each(this, head) { 519 list_for_each(this, head) {
495 if (this == obj) { 520 if (this == obj) {
496 D1(printk("%p is on list at %p\n", obj, head)); 521 jffs2_dbg(1, "%p is on list at %p\n", obj, head);
497 return 1; 522 return 1;
498 523
499 } 524 }
@@ -511,16 +536,18 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
511 uint32_t freed_len; 536 uint32_t freed_len;
512 537
513 if(unlikely(!ref)) { 538 if(unlikely(!ref)) {
514 printk(KERN_NOTICE "EEEEEK. jffs2_mark_node_obsolete called with NULL node\n"); 539 pr_notice("EEEEEK. jffs2_mark_node_obsolete called with NULL node\n");
515 return; 540 return;
516 } 541 }
517 if (ref_obsolete(ref)) { 542 if (ref_obsolete(ref)) {
518 D1(printk(KERN_DEBUG "jffs2_mark_node_obsolete called with already obsolete node at 0x%08x\n", ref_offset(ref))); 543 jffs2_dbg(1, "%s(): called with already obsolete node at 0x%08x\n",
544 __func__, ref_offset(ref));
519 return; 545 return;
520 } 546 }
521 blocknr = ref->flash_offset / c->sector_size; 547 blocknr = ref->flash_offset / c->sector_size;
522 if (blocknr >= c->nr_blocks) { 548 if (blocknr >= c->nr_blocks) {
523 printk(KERN_NOTICE "raw node at 0x%08x is off the end of device!\n", ref->flash_offset); 549 pr_notice("raw node at 0x%08x is off the end of device!\n",
550 ref->flash_offset);
524 BUG(); 551 BUG();
525 } 552 }
526 jeb = &c->blocks[blocknr]; 553 jeb = &c->blocks[blocknr];
@@ -542,27 +569,31 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
542 569
543 if (ref_flags(ref) == REF_UNCHECKED) { 570 if (ref_flags(ref) == REF_UNCHECKED) {
544 D1(if (unlikely(jeb->unchecked_size < freed_len)) { 571 D1(if (unlikely(jeb->unchecked_size < freed_len)) {
545 printk(KERN_NOTICE "raw unchecked node of size 0x%08x freed from erase block %d at 0x%08x, but unchecked_size was already 0x%08x\n", 572 pr_notice("raw unchecked node of size 0x%08x freed from erase block %d at 0x%08x, but unchecked_size was already 0x%08x\n",
546 freed_len, blocknr, ref->flash_offset, jeb->used_size); 573 freed_len, blocknr,
574 ref->flash_offset, jeb->used_size);
547 BUG(); 575 BUG();
548 }) 576 })
549 D1(printk(KERN_DEBUG "Obsoleting previously unchecked node at 0x%08x of len %x: ", ref_offset(ref), freed_len)); 577 jffs2_dbg(1, "Obsoleting previously unchecked node at 0x%08x of len %x\n",
578 ref_offset(ref), freed_len);
550 jeb->unchecked_size -= freed_len; 579 jeb->unchecked_size -= freed_len;
551 c->unchecked_size -= freed_len; 580 c->unchecked_size -= freed_len;
552 } else { 581 } else {
553 D1(if (unlikely(jeb->used_size < freed_len)) { 582 D1(if (unlikely(jeb->used_size < freed_len)) {
554 printk(KERN_NOTICE "raw node of size 0x%08x freed from erase block %d at 0x%08x, but used_size was already 0x%08x\n", 583 pr_notice("raw node of size 0x%08x freed from erase block %d at 0x%08x, but used_size was already 0x%08x\n",
555 freed_len, blocknr, ref->flash_offset, jeb->used_size); 584 freed_len, blocknr,
585 ref->flash_offset, jeb->used_size);
556 BUG(); 586 BUG();
557 }) 587 })
558 D1(printk(KERN_DEBUG "Obsoleting node at 0x%08x of len %#x: ", ref_offset(ref), freed_len)); 588 jffs2_dbg(1, "Obsoleting node at 0x%08x of len %#x: ",
589 ref_offset(ref), freed_len);
559 jeb->used_size -= freed_len; 590 jeb->used_size -= freed_len;
560 c->used_size -= freed_len; 591 c->used_size -= freed_len;
561 } 592 }
562 593
563 // Take care, that wasted size is taken into concern 594 // Take care, that wasted size is taken into concern
564 if ((jeb->dirty_size || ISDIRTY(jeb->wasted_size + freed_len)) && jeb != c->nextblock) { 595 if ((jeb->dirty_size || ISDIRTY(jeb->wasted_size + freed_len)) && jeb != c->nextblock) {
565 D1(printk("Dirtying\n")); 596 jffs2_dbg(1, "Dirtying\n");
566 addedsize = freed_len; 597 addedsize = freed_len;
567 jeb->dirty_size += freed_len; 598 jeb->dirty_size += freed_len;
568 c->dirty_size += freed_len; 599 c->dirty_size += freed_len;
@@ -570,12 +601,12 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
570 /* Convert wasted space to dirty, if not a bad block */ 601 /* Convert wasted space to dirty, if not a bad block */
571 if (jeb->wasted_size) { 602 if (jeb->wasted_size) {
572 if (on_list(&jeb->list, &c->bad_used_list)) { 603 if (on_list(&jeb->list, &c->bad_used_list)) {
573 D1(printk(KERN_DEBUG "Leaving block at %08x on the bad_used_list\n", 604 jffs2_dbg(1, "Leaving block at %08x on the bad_used_list\n",
574 jeb->offset)); 605 jeb->offset);
575 addedsize = 0; /* To fool the refiling code later */ 606 addedsize = 0; /* To fool the refiling code later */
576 } else { 607 } else {
577 D1(printk(KERN_DEBUG "Converting %d bytes of wasted space to dirty in block at %08x\n", 608 jffs2_dbg(1, "Converting %d bytes of wasted space to dirty in block at %08x\n",
578 jeb->wasted_size, jeb->offset)); 609 jeb->wasted_size, jeb->offset);
579 addedsize += jeb->wasted_size; 610 addedsize += jeb->wasted_size;
580 jeb->dirty_size += jeb->wasted_size; 611 jeb->dirty_size += jeb->wasted_size;
581 c->dirty_size += jeb->wasted_size; 612 c->dirty_size += jeb->wasted_size;
@@ -584,7 +615,7 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
584 } 615 }
585 } 616 }
586 } else { 617 } else {
587 D1(printk("Wasting\n")); 618 jffs2_dbg(1, "Wasting\n");
588 addedsize = 0; 619 addedsize = 0;
589 jeb->wasted_size += freed_len; 620 jeb->wasted_size += freed_len;
590 c->wasted_size += freed_len; 621 c->wasted_size += freed_len;
@@ -606,50 +637,57 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
606 } 637 }
607 638
608 if (jeb == c->nextblock) { 639 if (jeb == c->nextblock) {
609 D2(printk(KERN_DEBUG "Not moving nextblock 0x%08x to dirty/erase_pending list\n", jeb->offset)); 640 jffs2_dbg(2, "Not moving nextblock 0x%08x to dirty/erase_pending list\n",
641 jeb->offset);
610 } else if (!jeb->used_size && !jeb->unchecked_size) { 642 } else if (!jeb->used_size && !jeb->unchecked_size) {
611 if (jeb == c->gcblock) { 643 if (jeb == c->gcblock) {
612 D1(printk(KERN_DEBUG "gcblock at 0x%08x completely dirtied. Clearing gcblock...\n", jeb->offset)); 644 jffs2_dbg(1, "gcblock at 0x%08x completely dirtied. Clearing gcblock...\n",
645 jeb->offset);
613 c->gcblock = NULL; 646 c->gcblock = NULL;
614 } else { 647 } else {
615 D1(printk(KERN_DEBUG "Eraseblock at 0x%08x completely dirtied. Removing from (dirty?) list...\n", jeb->offset)); 648 jffs2_dbg(1, "Eraseblock at 0x%08x completely dirtied. Removing from (dirty?) list...\n",
649 jeb->offset);
616 list_del(&jeb->list); 650 list_del(&jeb->list);
617 } 651 }
618 if (jffs2_wbuf_dirty(c)) { 652 if (jffs2_wbuf_dirty(c)) {
619 D1(printk(KERN_DEBUG "...and adding to erasable_pending_wbuf_list\n")); 653 jffs2_dbg(1, "...and adding to erasable_pending_wbuf_list\n");
620 list_add_tail(&jeb->list, &c->erasable_pending_wbuf_list); 654 list_add_tail(&jeb->list, &c->erasable_pending_wbuf_list);
621 } else { 655 } else {
622 if (jiffies & 127) { 656 if (jiffies & 127) {
623 /* Most of the time, we just erase it immediately. Otherwise we 657 /* Most of the time, we just erase it immediately. Otherwise we
624 spend ages scanning it on mount, etc. */ 658 spend ages scanning it on mount, etc. */
625 D1(printk(KERN_DEBUG "...and adding to erase_pending_list\n")); 659 jffs2_dbg(1, "...and adding to erase_pending_list\n");
626 list_add_tail(&jeb->list, &c->erase_pending_list); 660 list_add_tail(&jeb->list, &c->erase_pending_list);
627 c->nr_erasing_blocks++; 661 c->nr_erasing_blocks++;
628 jffs2_garbage_collect_trigger(c); 662 jffs2_garbage_collect_trigger(c);
629 } else { 663 } else {
630 /* Sometimes, however, we leave it elsewhere so it doesn't get 664 /* Sometimes, however, we leave it elsewhere so it doesn't get
631 immediately reused, and we spread the load a bit. */ 665 immediately reused, and we spread the load a bit. */
632 D1(printk(KERN_DEBUG "...and adding to erasable_list\n")); 666 jffs2_dbg(1, "...and adding to erasable_list\n");
633 list_add_tail(&jeb->list, &c->erasable_list); 667 list_add_tail(&jeb->list, &c->erasable_list);
634 } 668 }
635 } 669 }
636 D1(printk(KERN_DEBUG "Done OK\n")); 670 jffs2_dbg(1, "Done OK\n");
637 } else if (jeb == c->gcblock) { 671 } else if (jeb == c->gcblock) {
638 D2(printk(KERN_DEBUG "Not moving gcblock 0x%08x to dirty_list\n", jeb->offset)); 672 jffs2_dbg(2, "Not moving gcblock 0x%08x to dirty_list\n",
673 jeb->offset);
639 } else if (ISDIRTY(jeb->dirty_size) && !ISDIRTY(jeb->dirty_size - addedsize)) { 674 } else if (ISDIRTY(jeb->dirty_size) && !ISDIRTY(jeb->dirty_size - addedsize)) {
640 D1(printk(KERN_DEBUG "Eraseblock at 0x%08x is freshly dirtied. Removing from clean list...\n", jeb->offset)); 675 jffs2_dbg(1, "Eraseblock at 0x%08x is freshly dirtied. Removing from clean list...\n",
676 jeb->offset);
641 list_del(&jeb->list); 677 list_del(&jeb->list);
642 D1(printk(KERN_DEBUG "...and adding to dirty_list\n")); 678 jffs2_dbg(1, "...and adding to dirty_list\n");
643 list_add_tail(&jeb->list, &c->dirty_list); 679 list_add_tail(&jeb->list, &c->dirty_list);
644 } else if (VERYDIRTY(c, jeb->dirty_size) && 680 } else if (VERYDIRTY(c, jeb->dirty_size) &&
645 !VERYDIRTY(c, jeb->dirty_size - addedsize)) { 681 !VERYDIRTY(c, jeb->dirty_size - addedsize)) {
646 D1(printk(KERN_DEBUG "Eraseblock at 0x%08x is now very dirty. Removing from dirty list...\n", jeb->offset)); 682 jffs2_dbg(1, "Eraseblock at 0x%08x is now very dirty. Removing from dirty list...\n",
683 jeb->offset);
647 list_del(&jeb->list); 684 list_del(&jeb->list);
648 D1(printk(KERN_DEBUG "...and adding to very_dirty_list\n")); 685 jffs2_dbg(1, "...and adding to very_dirty_list\n");
649 list_add_tail(&jeb->list, &c->very_dirty_list); 686 list_add_tail(&jeb->list, &c->very_dirty_list);
650 } else { 687 } else {
651 D1(printk(KERN_DEBUG "Eraseblock at 0x%08x not moved anywhere. (free 0x%08x, dirty 0x%08x, used 0x%08x)\n", 688 jffs2_dbg(1, "Eraseblock at 0x%08x not moved anywhere. (free 0x%08x, dirty 0x%08x, used 0x%08x)\n",
652 jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size)); 689 jeb->offset, jeb->free_size, jeb->dirty_size,
690 jeb->used_size);
653 } 691 }
654 692
655 spin_unlock(&c->erase_completion_lock); 693 spin_unlock(&c->erase_completion_lock);
@@ -665,33 +703,40 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
665 the block hasn't _already_ been erased, and that 'ref' itself hasn't been freed yet 703 the block hasn't _already_ been erased, and that 'ref' itself hasn't been freed yet
666 by jffs2_free_jeb_node_refs() in erase.c. Which is nice. */ 704 by jffs2_free_jeb_node_refs() in erase.c. Which is nice. */
667 705
668 D1(printk(KERN_DEBUG "obliterating obsoleted node at 0x%08x\n", ref_offset(ref))); 706 jffs2_dbg(1, "obliterating obsoleted node at 0x%08x\n",
707 ref_offset(ref));
669 ret = jffs2_flash_read(c, ref_offset(ref), sizeof(n), &retlen, (char *)&n); 708 ret = jffs2_flash_read(c, ref_offset(ref), sizeof(n), &retlen, (char *)&n);
670 if (ret) { 709 if (ret) {
671 printk(KERN_WARNING "Read error reading from obsoleted node at 0x%08x: %d\n", ref_offset(ref), ret); 710 pr_warn("Read error reading from obsoleted node at 0x%08x: %d\n",
711 ref_offset(ref), ret);
672 goto out_erase_sem; 712 goto out_erase_sem;
673 } 713 }
674 if (retlen != sizeof(n)) { 714 if (retlen != sizeof(n)) {
675 printk(KERN_WARNING "Short read from obsoleted node at 0x%08x: %zd\n", ref_offset(ref), retlen); 715 pr_warn("Short read from obsoleted node at 0x%08x: %zd\n",
716 ref_offset(ref), retlen);
676 goto out_erase_sem; 717 goto out_erase_sem;
677 } 718 }
678 if (PAD(je32_to_cpu(n.totlen)) != PAD(freed_len)) { 719 if (PAD(je32_to_cpu(n.totlen)) != PAD(freed_len)) {
679 printk(KERN_WARNING "Node totlen on flash (0x%08x) != totlen from node ref (0x%08x)\n", je32_to_cpu(n.totlen), freed_len); 720 pr_warn("Node totlen on flash (0x%08x) != totlen from node ref (0x%08x)\n",
721 je32_to_cpu(n.totlen), freed_len);
680 goto out_erase_sem; 722 goto out_erase_sem;
681 } 723 }
682 if (!(je16_to_cpu(n.nodetype) & JFFS2_NODE_ACCURATE)) { 724 if (!(je16_to_cpu(n.nodetype) & JFFS2_NODE_ACCURATE)) {
683 D1(printk(KERN_DEBUG "Node at 0x%08x was already marked obsolete (nodetype 0x%04x)\n", ref_offset(ref), je16_to_cpu(n.nodetype))); 725 jffs2_dbg(1, "Node at 0x%08x was already marked obsolete (nodetype 0x%04x)\n",
726 ref_offset(ref), je16_to_cpu(n.nodetype));
684 goto out_erase_sem; 727 goto out_erase_sem;
685 } 728 }
686 /* XXX FIXME: This is ugly now */ 729 /* XXX FIXME: This is ugly now */
687 n.nodetype = cpu_to_je16(je16_to_cpu(n.nodetype) & ~JFFS2_NODE_ACCURATE); 730 n.nodetype = cpu_to_je16(je16_to_cpu(n.nodetype) & ~JFFS2_NODE_ACCURATE);
688 ret = jffs2_flash_write(c, ref_offset(ref), sizeof(n), &retlen, (char *)&n); 731 ret = jffs2_flash_write(c, ref_offset(ref), sizeof(n), &retlen, (char *)&n);
689 if (ret) { 732 if (ret) {
690 printk(KERN_WARNING "Write error in obliterating obsoleted node at 0x%08x: %d\n", ref_offset(ref), ret); 733 pr_warn("Write error in obliterating obsoleted node at 0x%08x: %d\n",
734 ref_offset(ref), ret);
691 goto out_erase_sem; 735 goto out_erase_sem;
692 } 736 }
693 if (retlen != sizeof(n)) { 737 if (retlen != sizeof(n)) {
694 printk(KERN_WARNING "Short write in obliterating obsoleted node at 0x%08x: %zd\n", ref_offset(ref), retlen); 738 pr_warn("Short write in obliterating obsoleted node at 0x%08x: %zd\n",
739 ref_offset(ref), retlen);
695 goto out_erase_sem; 740 goto out_erase_sem;
696 } 741 }
697 742
@@ -751,8 +796,8 @@ int jffs2_thread_should_wake(struct jffs2_sb_info *c)
751 return 1; 796 return 1;
752 797
753 if (c->unchecked_size) { 798 if (c->unchecked_size) {
754 D1(printk(KERN_DEBUG "jffs2_thread_should_wake(): unchecked_size %d, checked_ino #%d\n", 799 jffs2_dbg(1, "jffs2_thread_should_wake(): unchecked_size %d, checked_ino #%d\n",
755 c->unchecked_size, c->checked_ino)); 800 c->unchecked_size, c->checked_ino);
756 return 1; 801 return 1;
757 } 802 }
758 803
@@ -780,8 +825,9 @@ int jffs2_thread_should_wake(struct jffs2_sb_info *c)
780 } 825 }
781 } 826 }
782 827
783 D1(printk(KERN_DEBUG "jffs2_thread_should_wake(): nr_free_blocks %d, nr_erasing_blocks %d, dirty_size 0x%x, vdirty_blocks %d: %s\n", 828 jffs2_dbg(1, "%s(): nr_free_blocks %d, nr_erasing_blocks %d, dirty_size 0x%x, vdirty_blocks %d: %s\n",
784 c->nr_free_blocks, c->nr_erasing_blocks, c->dirty_size, nr_very_dirty, ret?"yes":"no")); 829 __func__, c->nr_free_blocks, c->nr_erasing_blocks,
830 c->dirty_size, nr_very_dirty, ret ? "yes" : "no");
785 831
786 return ret; 832 return ret;
787} 833}
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index ab65ee3ec85..1cd3aec9d9a 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -76,7 +76,7 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f)
76#define jffs2_write_nand_cleanmarker(c,jeb) (-EIO) 76#define jffs2_write_nand_cleanmarker(c,jeb) (-EIO)
77 77
78#define jffs2_flash_write(c, ofs, len, retlen, buf) jffs2_flash_direct_write(c, ofs, len, retlen, buf) 78#define jffs2_flash_write(c, ofs, len, retlen, buf) jffs2_flash_direct_write(c, ofs, len, retlen, buf)
79#define jffs2_flash_read(c, ofs, len, retlen, buf) ((c)->mtd->read((c)->mtd, ofs, len, retlen, buf)) 79#define jffs2_flash_read(c, ofs, len, retlen, buf) (mtd_read((c)->mtd, ofs, len, retlen, buf))
80#define jffs2_flush_wbuf_pad(c) ({ do{} while(0); (void)(c), 0; }) 80#define jffs2_flush_wbuf_pad(c) ({ do{} while(0); (void)(c), 0; })
81#define jffs2_flush_wbuf_gc(c, i) ({ do{} while(0); (void)(c), (void) i, 0; }) 81#define jffs2_flush_wbuf_gc(c, i) ({ do{} while(0); (void)(c), (void) i, 0; })
82#define jffs2_write_nand_badblock(c,jeb,bad_offset) (1) 82#define jffs2_write_nand_badblock(c,jeb,bad_offset) (1)
@@ -108,8 +108,6 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f)
108 108
109#define jffs2_cleanmarker_oob(c) (c->mtd->type == MTD_NANDFLASH) 109#define jffs2_cleanmarker_oob(c) (c->mtd->type == MTD_NANDFLASH)
110 110
111#define jffs2_flash_write_oob(c, ofs, len, retlen, buf) ((c)->mtd->write_oob((c)->mtd, ofs, len, retlen, buf))
112#define jffs2_flash_read_oob(c, ofs, len, retlen, buf) ((c)->mtd->read_oob((c)->mtd, ofs, len, retlen, buf))
113#define jffs2_wbuf_dirty(c) (!!(c)->wbuf_len) 111#define jffs2_wbuf_dirty(c) (!!(c)->wbuf_len)
114 112
115/* wbuf.c */ 113/* wbuf.c */
diff --git a/fs/jffs2/read.c b/fs/jffs2/read.c
index 3f39be1b045..0b042b1fc82 100644
--- a/fs/jffs2/read.c
+++ b/fs/jffs2/read.c
@@ -9,6 +9,8 @@
9 * 9 *
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
12#include <linux/kernel.h> 14#include <linux/kernel.h>
13#include <linux/slab.h> 15#include <linux/slab.h>
14#include <linux/crc32.h> 16#include <linux/crc32.h>
@@ -36,24 +38,25 @@ int jffs2_read_dnode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
36 ret = jffs2_flash_read(c, ref_offset(fd->raw), sizeof(*ri), &readlen, (char *)ri); 38 ret = jffs2_flash_read(c, ref_offset(fd->raw), sizeof(*ri), &readlen, (char *)ri);
37 if (ret) { 39 if (ret) {
38 jffs2_free_raw_inode(ri); 40 jffs2_free_raw_inode(ri);
39 printk(KERN_WARNING "Error reading node from 0x%08x: %d\n", ref_offset(fd->raw), ret); 41 pr_warn("Error reading node from 0x%08x: %d\n",
42 ref_offset(fd->raw), ret);
40 return ret; 43 return ret;
41 } 44 }
42 if (readlen != sizeof(*ri)) { 45 if (readlen != sizeof(*ri)) {
43 jffs2_free_raw_inode(ri); 46 jffs2_free_raw_inode(ri);
44 printk(KERN_WARNING "Short read from 0x%08x: wanted 0x%zx bytes, got 0x%zx\n", 47 pr_warn("Short read from 0x%08x: wanted 0x%zx bytes, got 0x%zx\n",
45 ref_offset(fd->raw), sizeof(*ri), readlen); 48 ref_offset(fd->raw), sizeof(*ri), readlen);
46 return -EIO; 49 return -EIO;
47 } 50 }
48 crc = crc32(0, ri, sizeof(*ri)-8); 51 crc = crc32(0, ri, sizeof(*ri)-8);
49 52
50 D1(printk(KERN_DEBUG "Node read from %08x: node_crc %08x, calculated CRC %08x. dsize %x, csize %x, offset %x, buf %p\n", 53 jffs2_dbg(1, "Node read from %08x: node_crc %08x, calculated CRC %08x. dsize %x, csize %x, offset %x, buf %p\n",
51 ref_offset(fd->raw), je32_to_cpu(ri->node_crc), 54 ref_offset(fd->raw), je32_to_cpu(ri->node_crc),
52 crc, je32_to_cpu(ri->dsize), je32_to_cpu(ri->csize), 55 crc, je32_to_cpu(ri->dsize), je32_to_cpu(ri->csize),
53 je32_to_cpu(ri->offset), buf)); 56 je32_to_cpu(ri->offset), buf);
54 if (crc != je32_to_cpu(ri->node_crc)) { 57 if (crc != je32_to_cpu(ri->node_crc)) {
55 printk(KERN_WARNING "Node CRC %08x != calculated CRC %08x for node at %08x\n", 58 pr_warn("Node CRC %08x != calculated CRC %08x for node at %08x\n",
56 je32_to_cpu(ri->node_crc), crc, ref_offset(fd->raw)); 59 je32_to_cpu(ri->node_crc), crc, ref_offset(fd->raw));
57 ret = -EIO; 60 ret = -EIO;
58 goto out_ri; 61 goto out_ri;
59 } 62 }
@@ -66,8 +69,8 @@ int jffs2_read_dnode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
66 } 69 }
67 70
68 D1(if(ofs + len > je32_to_cpu(ri->dsize)) { 71 D1(if(ofs + len > je32_to_cpu(ri->dsize)) {
69 printk(KERN_WARNING "jffs2_read_dnode() asked for %d bytes at %d from %d-byte node\n", 72 pr_warn("jffs2_read_dnode() asked for %d bytes at %d from %d-byte node\n",
70 len, ofs, je32_to_cpu(ri->dsize)); 73 len, ofs, je32_to_cpu(ri->dsize));
71 ret = -EINVAL; 74 ret = -EINVAL;
72 goto out_ri; 75 goto out_ri;
73 }); 76 });
@@ -107,8 +110,8 @@ int jffs2_read_dnode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
107 decomprbuf = readbuf; 110 decomprbuf = readbuf;
108 } 111 }
109 112
110 D2(printk(KERN_DEBUG "Read %d bytes to %p\n", je32_to_cpu(ri->csize), 113 jffs2_dbg(2, "Read %d bytes to %p\n", je32_to_cpu(ri->csize),
111 readbuf)); 114 readbuf);
112 ret = jffs2_flash_read(c, (ref_offset(fd->raw)) + sizeof(*ri), 115 ret = jffs2_flash_read(c, (ref_offset(fd->raw)) + sizeof(*ri),
113 je32_to_cpu(ri->csize), &readlen, readbuf); 116 je32_to_cpu(ri->csize), &readlen, readbuf);
114 117
@@ -119,18 +122,19 @@ int jffs2_read_dnode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
119 122
120 crc = crc32(0, readbuf, je32_to_cpu(ri->csize)); 123 crc = crc32(0, readbuf, je32_to_cpu(ri->csize));
121 if (crc != je32_to_cpu(ri->data_crc)) { 124 if (crc != je32_to_cpu(ri->data_crc)) {
122 printk(KERN_WARNING "Data CRC %08x != calculated CRC %08x for node at %08x\n", 125 pr_warn("Data CRC %08x != calculated CRC %08x for node at %08x\n",
123 je32_to_cpu(ri->data_crc), crc, ref_offset(fd->raw)); 126 je32_to_cpu(ri->data_crc), crc, ref_offset(fd->raw));
124 ret = -EIO; 127 ret = -EIO;
125 goto out_decomprbuf; 128 goto out_decomprbuf;
126 } 129 }
127 D2(printk(KERN_DEBUG "Data CRC matches calculated CRC %08x\n", crc)); 130 jffs2_dbg(2, "Data CRC matches calculated CRC %08x\n", crc);
128 if (ri->compr != JFFS2_COMPR_NONE) { 131 if (ri->compr != JFFS2_COMPR_NONE) {
129 D2(printk(KERN_DEBUG "Decompress %d bytes from %p to %d bytes at %p\n", 132 jffs2_dbg(2, "Decompress %d bytes from %p to %d bytes at %p\n",
130 je32_to_cpu(ri->csize), readbuf, je32_to_cpu(ri->dsize), decomprbuf)); 133 je32_to_cpu(ri->csize), readbuf,
134 je32_to_cpu(ri->dsize), decomprbuf);
131 ret = jffs2_decompress(c, f, ri->compr | (ri->usercompr << 8), readbuf, decomprbuf, je32_to_cpu(ri->csize), je32_to_cpu(ri->dsize)); 135 ret = jffs2_decompress(c, f, ri->compr | (ri->usercompr << 8), readbuf, decomprbuf, je32_to_cpu(ri->csize), je32_to_cpu(ri->dsize));
132 if (ret) { 136 if (ret) {
133 printk(KERN_WARNING "Error: jffs2_decompress returned %d\n", ret); 137 pr_warn("Error: jffs2_decompress returned %d\n", ret);
134 goto out_decomprbuf; 138 goto out_decomprbuf;
135 } 139 }
136 } 140 }
@@ -157,8 +161,8 @@ int jffs2_read_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
157 struct jffs2_node_frag *frag; 161 struct jffs2_node_frag *frag;
158 int ret; 162 int ret;
159 163
160 D1(printk(KERN_DEBUG "jffs2_read_inode_range: ino #%u, range 0x%08x-0x%08x\n", 164 jffs2_dbg(1, "%s(): ino #%u, range 0x%08x-0x%08x\n",
161 f->inocache->ino, offset, offset+len)); 165 __func__, f->inocache->ino, offset, offset + len);
162 166
163 frag = jffs2_lookup_node_frag(&f->fragtree, offset); 167 frag = jffs2_lookup_node_frag(&f->fragtree, offset);
164 168
@@ -168,22 +172,27 @@ int jffs2_read_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
168 * (or perhaps is before it, if we've been asked to read off the 172 * (or perhaps is before it, if we've been asked to read off the
169 * end of the file). */ 173 * end of the file). */
170 while(offset < end) { 174 while(offset < end) {
171 D2(printk(KERN_DEBUG "jffs2_read_inode_range: offset %d, end %d\n", offset, end)); 175 jffs2_dbg(2, "%s(): offset %d, end %d\n",
176 __func__, offset, end);
172 if (unlikely(!frag || frag->ofs > offset || 177 if (unlikely(!frag || frag->ofs > offset ||
173 frag->ofs + frag->size <= offset)) { 178 frag->ofs + frag->size <= offset)) {
174 uint32_t holesize = end - offset; 179 uint32_t holesize = end - offset;
175 if (frag && frag->ofs > offset) { 180 if (frag && frag->ofs > offset) {
176 D1(printk(KERN_NOTICE "Eep. Hole in ino #%u fraglist. frag->ofs = 0x%08x, offset = 0x%08x\n", f->inocache->ino, frag->ofs, offset)); 181 jffs2_dbg(1, "Eep. Hole in ino #%u fraglist. frag->ofs = 0x%08x, offset = 0x%08x\n",
182 f->inocache->ino, frag->ofs, offset);
177 holesize = min(holesize, frag->ofs - offset); 183 holesize = min(holesize, frag->ofs - offset);
178 } 184 }
179 D1(printk(KERN_DEBUG "Filling non-frag hole from %d-%d\n", offset, offset+holesize)); 185 jffs2_dbg(1, "Filling non-frag hole from %d-%d\n",
186 offset, offset + holesize);
180 memset(buf, 0, holesize); 187 memset(buf, 0, holesize);
181 buf += holesize; 188 buf += holesize;
182 offset += holesize; 189 offset += holesize;
183 continue; 190 continue;
184 } else if (unlikely(!frag->node)) { 191 } else if (unlikely(!frag->node)) {
185 uint32_t holeend = min(end, frag->ofs + frag->size); 192 uint32_t holeend = min(end, frag->ofs + frag->size);
186 D1(printk(KERN_DEBUG "Filling frag hole from %d-%d (frag 0x%x 0x%x)\n", offset, holeend, frag->ofs, frag->ofs + frag->size)); 193 jffs2_dbg(1, "Filling frag hole from %d-%d (frag 0x%x 0x%x)\n",
194 offset, holeend, frag->ofs,
195 frag->ofs + frag->size);
187 memset(buf, 0, holeend - offset); 196 memset(buf, 0, holeend - offset);
188 buf += holeend - offset; 197 buf += holeend - offset;
189 offset = holeend; 198 offset = holeend;
@@ -195,20 +204,23 @@ int jffs2_read_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
195 204
196 fragofs = offset - frag->ofs; 205 fragofs = offset - frag->ofs;
197 readlen = min(frag->size - fragofs, end - offset); 206 readlen = min(frag->size - fragofs, end - offset);
198 D1(printk(KERN_DEBUG "Reading %d-%d from node at 0x%08x (%d)\n", 207 jffs2_dbg(1, "Reading %d-%d from node at 0x%08x (%d)\n",
199 frag->ofs+fragofs, frag->ofs+fragofs+readlen, 208 frag->ofs+fragofs,
200 ref_offset(frag->node->raw), ref_flags(frag->node->raw))); 209 frag->ofs + fragofs+readlen,
210 ref_offset(frag->node->raw),
211 ref_flags(frag->node->raw));
201 ret = jffs2_read_dnode(c, f, frag->node, buf, fragofs + frag->ofs - frag->node->ofs, readlen); 212 ret = jffs2_read_dnode(c, f, frag->node, buf, fragofs + frag->ofs - frag->node->ofs, readlen);
202 D2(printk(KERN_DEBUG "node read done\n")); 213 jffs2_dbg(2, "node read done\n");
203 if (ret) { 214 if (ret) {
204 D1(printk(KERN_DEBUG"jffs2_read_inode_range error %d\n",ret)); 215 jffs2_dbg(1, "%s(): error %d\n",
216 __func__, ret);
205 memset(buf, 0, readlen); 217 memset(buf, 0, readlen);
206 return ret; 218 return ret;
207 } 219 }
208 buf += readlen; 220 buf += readlen;
209 offset += readlen; 221 offset += readlen;
210 frag = frag_next(frag); 222 frag = frag_next(frag);
211 D2(printk(KERN_DEBUG "node read was OK. Looping\n")); 223 jffs2_dbg(2, "node read was OK. Looping\n");
212 } 224 }
213 } 225 }
214 return 0; 226 return 0;
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 3093ac4fb24..dc0437e8476 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -9,6 +9,8 @@
9 * 9 *
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
12#include <linux/kernel.h> 14#include <linux/kernel.h>
13#include <linux/sched.h> 15#include <linux/sched.h>
14#include <linux/slab.h> 16#include <linux/slab.h>
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index f99464833bb..7654e87b042 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -9,6 +9,8 @@
9 * 9 *
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
12#include <linux/kernel.h> 14#include <linux/kernel.h>
13#include <linux/sched.h> 15#include <linux/sched.h>
14#include <linux/slab.h> 16#include <linux/slab.h>
@@ -22,15 +24,15 @@
22 24
23#define DEFAULT_EMPTY_SCAN_SIZE 256 25#define DEFAULT_EMPTY_SCAN_SIZE 256
24 26
25#define noisy_printk(noise, args...) do { \ 27#define noisy_printk(noise, fmt, ...) \
26 if (*(noise)) { \ 28do { \
27 printk(KERN_NOTICE args); \ 29 if (*(noise)) { \
28 (*(noise))--; \ 30 pr_notice(fmt, ##__VA_ARGS__); \
29 if (!(*(noise))) { \ 31 (*(noise))--; \
30 printk(KERN_NOTICE "Further such events for this erase block will not be printed\n"); \ 32 if (!(*(noise))) \
31 } \ 33 pr_notice("Further such events for this erase block will not be printed\n"); \
32 } \ 34 } \
33} while(0) 35} while (0)
34 36
35static uint32_t pseudo_random; 37static uint32_t pseudo_random;
36 38
@@ -96,18 +98,17 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
96#ifndef __ECOS 98#ifndef __ECOS
97 size_t pointlen, try_size; 99 size_t pointlen, try_size;
98 100
99 if (c->mtd->point) { 101 ret = mtd_point(c->mtd, 0, c->mtd->size, &pointlen,
100 ret = mtd_point(c->mtd, 0, c->mtd->size, &pointlen, 102 (void **)&flashbuf, NULL);
101 (void **)&flashbuf, NULL); 103 if (!ret && pointlen < c->mtd->size) {
102 if (!ret && pointlen < c->mtd->size) { 104 /* Don't muck about if it won't let us point to the whole flash */
103 /* Don't muck about if it won't let us point to the whole flash */ 105 jffs2_dbg(1, "MTD point returned len too short: 0x%zx\n",
104 D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen)); 106 pointlen);
105 mtd_unpoint(c->mtd, 0, pointlen); 107 mtd_unpoint(c->mtd, 0, pointlen);
106 flashbuf = NULL; 108 flashbuf = NULL;
107 }
108 if (ret && ret != -EOPNOTSUPP)
109 D1(printk(KERN_DEBUG "MTD point failed %d\n", ret));
110 } 109 }
110 if (ret && ret != -EOPNOTSUPP)
111 jffs2_dbg(1, "MTD point failed %d\n", ret);
111#endif 112#endif
112 if (!flashbuf) { 113 if (!flashbuf) {
113 /* For NAND it's quicker to read a whole eraseblock at a time, 114 /* For NAND it's quicker to read a whole eraseblock at a time,
@@ -117,15 +118,15 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
117 else 118 else
118 try_size = PAGE_SIZE; 119 try_size = PAGE_SIZE;
119 120
120 D1(printk(KERN_DEBUG "Trying to allocate readbuf of %zu " 121 jffs2_dbg(1, "Trying to allocate readbuf of %zu "
121 "bytes\n", try_size)); 122 "bytes\n", try_size);
122 123
123 flashbuf = mtd_kmalloc_up_to(c->mtd, &try_size); 124 flashbuf = mtd_kmalloc_up_to(c->mtd, &try_size);
124 if (!flashbuf) 125 if (!flashbuf)
125 return -ENOMEM; 126 return -ENOMEM;
126 127
127 D1(printk(KERN_DEBUG "Allocated readbuf of %zu bytes\n", 128 jffs2_dbg(1, "Allocated readbuf of %zu bytes\n",
128 try_size)); 129 try_size);
129 130
130 buf_size = (uint32_t)try_size; 131 buf_size = (uint32_t)try_size;
131 } 132 }
@@ -178,7 +179,8 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
178 c->nr_free_blocks++; 179 c->nr_free_blocks++;
179 } else { 180 } else {
180 /* Dirt */ 181 /* Dirt */
181 D1(printk(KERN_DEBUG "Adding all-dirty block at 0x%08x to erase_pending_list\n", jeb->offset)); 182 jffs2_dbg(1, "Adding all-dirty block at 0x%08x to erase_pending_list\n",
183 jeb->offset);
182 list_add(&jeb->list, &c->erase_pending_list); 184 list_add(&jeb->list, &c->erase_pending_list);
183 c->nr_erasing_blocks++; 185 c->nr_erasing_blocks++;
184 } 186 }
@@ -205,7 +207,8 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
205 } 207 }
206 /* update collected summary information for the current nextblock */ 208 /* update collected summary information for the current nextblock */
207 jffs2_sum_move_collected(c, s); 209 jffs2_sum_move_collected(c, s);
208 D1(printk(KERN_DEBUG "jffs2_scan_medium(): new nextblock = 0x%08x\n", jeb->offset)); 210 jffs2_dbg(1, "%s(): new nextblock = 0x%08x\n",
211 __func__, jeb->offset);
209 c->nextblock = jeb; 212 c->nextblock = jeb;
210 } else { 213 } else {
211 ret = file_dirty(c, jeb); 214 ret = file_dirty(c, jeb);
@@ -217,20 +220,21 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
217 case BLK_STATE_ALLDIRTY: 220 case BLK_STATE_ALLDIRTY:
218 /* Nothing valid - not even a clean marker. Needs erasing. */ 221 /* Nothing valid - not even a clean marker. Needs erasing. */
219 /* For now we just put it on the erasing list. We'll start the erases later */ 222 /* For now we just put it on the erasing list. We'll start the erases later */
220 D1(printk(KERN_NOTICE "JFFS2: Erase block at 0x%08x is not formatted. It will be erased\n", jeb->offset)); 223 jffs2_dbg(1, "Erase block at 0x%08x is not formatted. It will be erased\n",
224 jeb->offset);
221 list_add(&jeb->list, &c->erase_pending_list); 225 list_add(&jeb->list, &c->erase_pending_list);
222 c->nr_erasing_blocks++; 226 c->nr_erasing_blocks++;
223 break; 227 break;
224 228
225 case BLK_STATE_BADBLOCK: 229 case BLK_STATE_BADBLOCK:
226 D1(printk(KERN_NOTICE "JFFS2: Block at 0x%08x is bad\n", jeb->offset)); 230 jffs2_dbg(1, "Block at 0x%08x is bad\n", jeb->offset);
227 list_add(&jeb->list, &c->bad_list); 231 list_add(&jeb->list, &c->bad_list);
228 c->bad_size += c->sector_size; 232 c->bad_size += c->sector_size;
229 c->free_size -= c->sector_size; 233 c->free_size -= c->sector_size;
230 bad_blocks++; 234 bad_blocks++;
231 break; 235 break;
232 default: 236 default:
233 printk(KERN_WARNING "jffs2_scan_medium(): unknown block state\n"); 237 pr_warn("%s(): unknown block state\n", __func__);
234 BUG(); 238 BUG();
235 } 239 }
236 } 240 }
@@ -250,16 +254,17 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
250 254
251 uint32_t skip = c->nextblock->free_size % c->wbuf_pagesize; 255 uint32_t skip = c->nextblock->free_size % c->wbuf_pagesize;
252 256
253 D1(printk(KERN_DEBUG "jffs2_scan_medium(): Skipping %d bytes in nextblock to ensure page alignment\n", 257 jffs2_dbg(1, "%s(): Skipping %d bytes in nextblock to ensure page alignment\n",
254 skip)); 258 __func__, skip);
255 jffs2_prealloc_raw_node_refs(c, c->nextblock, 1); 259 jffs2_prealloc_raw_node_refs(c, c->nextblock, 1);
256 jffs2_scan_dirty_space(c, c->nextblock, skip); 260 jffs2_scan_dirty_space(c, c->nextblock, skip);
257 } 261 }
258#endif 262#endif
259 if (c->nr_erasing_blocks) { 263 if (c->nr_erasing_blocks) {
260 if ( !c->used_size && ((c->nr_free_blocks+empty_blocks+bad_blocks)!= c->nr_blocks || bad_blocks == c->nr_blocks) ) { 264 if ( !c->used_size && ((c->nr_free_blocks+empty_blocks+bad_blocks)!= c->nr_blocks || bad_blocks == c->nr_blocks) ) {
261 printk(KERN_NOTICE "Cowardly refusing to erase blocks on filesystem with no valid JFFS2 nodes\n"); 265 pr_notice("Cowardly refusing to erase blocks on filesystem with no valid JFFS2 nodes\n");
262 printk(KERN_NOTICE "empty_blocks %d, bad_blocks %d, c->nr_blocks %d\n",empty_blocks,bad_blocks,c->nr_blocks); 266 pr_notice("empty_blocks %d, bad_blocks %d, c->nr_blocks %d\n",
267 empty_blocks, bad_blocks, c->nr_blocks);
263 ret = -EIO; 268 ret = -EIO;
264 goto out; 269 goto out;
265 } 270 }
@@ -287,11 +292,13 @@ static int jffs2_fill_scan_buf(struct jffs2_sb_info *c, void *buf,
287 292
288 ret = jffs2_flash_read(c, ofs, len, &retlen, buf); 293 ret = jffs2_flash_read(c, ofs, len, &retlen, buf);
289 if (ret) { 294 if (ret) {
290 D1(printk(KERN_WARNING "mtd->read(0x%x bytes from 0x%x) returned %d\n", len, ofs, ret)); 295 jffs2_dbg(1, "mtd->read(0x%x bytes from 0x%x) returned %d\n",
296 len, ofs, ret);
291 return ret; 297 return ret;
292 } 298 }
293 if (retlen < len) { 299 if (retlen < len) {
294 D1(printk(KERN_WARNING "Read at 0x%x gave only 0x%zx bytes\n", ofs, retlen)); 300 jffs2_dbg(1, "Read at 0x%x gave only 0x%zx bytes\n",
301 ofs, retlen);
295 return -EIO; 302 return -EIO;
296 } 303 }
297 return 0; 304 return 0;
@@ -368,7 +375,7 @@ static int jffs2_scan_xattr_node(struct jffs2_sb_info *c, struct jffs2_erasebloc
368 375
369 if (jffs2_sum_active()) 376 if (jffs2_sum_active())
370 jffs2_sum_add_xattr_mem(s, rx, ofs - jeb->offset); 377 jffs2_sum_add_xattr_mem(s, rx, ofs - jeb->offset);
371 dbg_xattr("scaning xdatum at %#08x (xid=%u, version=%u)\n", 378 dbg_xattr("scanning xdatum at %#08x (xid=%u, version=%u)\n",
372 ofs, xd->xid, xd->version); 379 ofs, xd->xid, xd->version);
373 return 0; 380 return 0;
374} 381}
@@ -449,7 +456,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
449 ofs = jeb->offset; 456 ofs = jeb->offset;
450 prevofs = jeb->offset - 1; 457 prevofs = jeb->offset - 1;
451 458
452 D1(printk(KERN_DEBUG "jffs2_scan_eraseblock(): Scanning block at 0x%x\n", ofs)); 459 jffs2_dbg(1, "%s(): Scanning block at 0x%x\n", __func__, ofs);
453 460
454#ifdef CONFIG_JFFS2_FS_WRITEBUFFER 461#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
455 if (jffs2_cleanmarker_oob(c)) { 462 if (jffs2_cleanmarker_oob(c)) {
@@ -459,7 +466,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
459 return BLK_STATE_BADBLOCK; 466 return BLK_STATE_BADBLOCK;
460 467
461 ret = jffs2_check_nand_cleanmarker(c, jeb); 468 ret = jffs2_check_nand_cleanmarker(c, jeb);
462 D2(printk(KERN_NOTICE "jffs_check_nand_cleanmarker returned %d\n",ret)); 469 jffs2_dbg(2, "jffs_check_nand_cleanmarker returned %d\n", ret);
463 470
464 /* Even if it's not found, we still scan to see 471 /* Even if it's not found, we still scan to see
465 if the block is empty. We use this information 472 if the block is empty. We use this information
@@ -561,7 +568,8 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
561 if (jffs2_cleanmarker_oob(c)) { 568 if (jffs2_cleanmarker_oob(c)) {
562 /* scan oob, take care of cleanmarker */ 569 /* scan oob, take care of cleanmarker */
563 int ret = jffs2_check_oob_empty(c, jeb, cleanmarkerfound); 570 int ret = jffs2_check_oob_empty(c, jeb, cleanmarkerfound);
564 D2(printk(KERN_NOTICE "jffs2_check_oob_empty returned %d\n",ret)); 571 jffs2_dbg(2, "jffs2_check_oob_empty returned %d\n",
572 ret);
565 switch (ret) { 573 switch (ret) {
566 case 0: return cleanmarkerfound ? BLK_STATE_CLEANMARKER : BLK_STATE_ALLFF; 574 case 0: return cleanmarkerfound ? BLK_STATE_CLEANMARKER : BLK_STATE_ALLFF;
567 case 1: return BLK_STATE_ALLDIRTY; 575 case 1: return BLK_STATE_ALLDIRTY;
@@ -569,15 +577,16 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
569 } 577 }
570 } 578 }
571#endif 579#endif
572 D1(printk(KERN_DEBUG "Block at 0x%08x is empty (erased)\n", jeb->offset)); 580 jffs2_dbg(1, "Block at 0x%08x is empty (erased)\n",
581 jeb->offset);
573 if (c->cleanmarker_size == 0) 582 if (c->cleanmarker_size == 0)
574 return BLK_STATE_CLEANMARKER; /* don't bother with re-erase */ 583 return BLK_STATE_CLEANMARKER; /* don't bother with re-erase */
575 else 584 else
576 return BLK_STATE_ALLFF; /* OK to erase if all blocks are like this */ 585 return BLK_STATE_ALLFF; /* OK to erase if all blocks are like this */
577 } 586 }
578 if (ofs) { 587 if (ofs) {
579 D1(printk(KERN_DEBUG "Free space at %08x ends at %08x\n", jeb->offset, 588 jffs2_dbg(1, "Free space at %08x ends at %08x\n", jeb->offset,
580 jeb->offset + ofs)); 589 jeb->offset + ofs);
581 if ((err = jffs2_prealloc_raw_node_refs(c, jeb, 1))) 590 if ((err = jffs2_prealloc_raw_node_refs(c, jeb, 1)))
582 return err; 591 return err;
583 if ((err = jffs2_scan_dirty_space(c, jeb, ofs))) 592 if ((err = jffs2_scan_dirty_space(c, jeb, ofs)))
@@ -604,12 +613,13 @@ scan_more:
604 cond_resched(); 613 cond_resched();
605 614
606 if (ofs & 3) { 615 if (ofs & 3) {
607 printk(KERN_WARNING "Eep. ofs 0x%08x not word-aligned!\n", ofs); 616 pr_warn("Eep. ofs 0x%08x not word-aligned!\n", ofs);
608 ofs = PAD(ofs); 617 ofs = PAD(ofs);
609 continue; 618 continue;
610 } 619 }
611 if (ofs == prevofs) { 620 if (ofs == prevofs) {
612 printk(KERN_WARNING "ofs 0x%08x has already been seen. Skipping\n", ofs); 621 pr_warn("ofs 0x%08x has already been seen. Skipping\n",
622 ofs);
613 if ((err = jffs2_scan_dirty_space(c, jeb, 4))) 623 if ((err = jffs2_scan_dirty_space(c, jeb, 4)))
614 return err; 624 return err;
615 ofs += 4; 625 ofs += 4;
@@ -618,8 +628,10 @@ scan_more:
618 prevofs = ofs; 628 prevofs = ofs;
619 629
620 if (jeb->offset + c->sector_size < ofs + sizeof(*node)) { 630 if (jeb->offset + c->sector_size < ofs + sizeof(*node)) {
621 D1(printk(KERN_DEBUG "Fewer than %zd bytes left to end of block. (%x+%x<%x+%zx) Not reading\n", sizeof(struct jffs2_unknown_node), 631 jffs2_dbg(1, "Fewer than %zd bytes left to end of block. (%x+%x<%x+%zx) Not reading\n",
622 jeb->offset, c->sector_size, ofs, sizeof(*node))); 632 sizeof(struct jffs2_unknown_node),
633 jeb->offset, c->sector_size, ofs,
634 sizeof(*node));
623 if ((err = jffs2_scan_dirty_space(c, jeb, (jeb->offset + c->sector_size)-ofs))) 635 if ((err = jffs2_scan_dirty_space(c, jeb, (jeb->offset + c->sector_size)-ofs)))
624 return err; 636 return err;
625 break; 637 break;
@@ -627,8 +639,9 @@ scan_more:
627 639
628 if (buf_ofs + buf_len < ofs + sizeof(*node)) { 640 if (buf_ofs + buf_len < ofs + sizeof(*node)) {
629 buf_len = min_t(uint32_t, buf_size, jeb->offset + c->sector_size - ofs); 641 buf_len = min_t(uint32_t, buf_size, jeb->offset + c->sector_size - ofs);
630 D1(printk(KERN_DEBUG "Fewer than %zd bytes (node header) left to end of buf. Reading 0x%x at 0x%08x\n", 642 jffs2_dbg(1, "Fewer than %zd bytes (node header) left to end of buf. Reading 0x%x at 0x%08x\n",
631 sizeof(struct jffs2_unknown_node), buf_len, ofs)); 643 sizeof(struct jffs2_unknown_node),
644 buf_len, ofs);
632 err = jffs2_fill_scan_buf(c, buf, ofs, buf_len); 645 err = jffs2_fill_scan_buf(c, buf, ofs, buf_len);
633 if (err) 646 if (err)
634 return err; 647 return err;
@@ -645,13 +658,13 @@ scan_more:
645 ofs += 4; 658 ofs += 4;
646 scan_end = min_t(uint32_t, EMPTY_SCAN_SIZE(c->sector_size)/8, buf_len); 659 scan_end = min_t(uint32_t, EMPTY_SCAN_SIZE(c->sector_size)/8, buf_len);
647 660
648 D1(printk(KERN_DEBUG "Found empty flash at 0x%08x\n", ofs)); 661 jffs2_dbg(1, "Found empty flash at 0x%08x\n", ofs);
649 more_empty: 662 more_empty:
650 inbuf_ofs = ofs - buf_ofs; 663 inbuf_ofs = ofs - buf_ofs;
651 while (inbuf_ofs < scan_end) { 664 while (inbuf_ofs < scan_end) {
652 if (unlikely(*(uint32_t *)(&buf[inbuf_ofs]) != 0xffffffff)) { 665 if (unlikely(*(uint32_t *)(&buf[inbuf_ofs]) != 0xffffffff)) {
653 printk(KERN_WARNING "Empty flash at 0x%08x ends at 0x%08x\n", 666 pr_warn("Empty flash at 0x%08x ends at 0x%08x\n",
654 empty_start, ofs); 667 empty_start, ofs);
655 if ((err = jffs2_scan_dirty_space(c, jeb, ofs-empty_start))) 668 if ((err = jffs2_scan_dirty_space(c, jeb, ofs-empty_start)))
656 return err; 669 return err;
657 goto scan_more; 670 goto scan_more;
@@ -661,13 +674,15 @@ scan_more:
661 ofs += 4; 674 ofs += 4;
662 } 675 }
663 /* Ran off end. */ 676 /* Ran off end. */
664 D1(printk(KERN_DEBUG "Empty flash to end of buffer at 0x%08x\n", ofs)); 677 jffs2_dbg(1, "Empty flash to end of buffer at 0x%08x\n",
678 ofs);
665 679
666 /* If we're only checking the beginning of a block with a cleanmarker, 680 /* If we're only checking the beginning of a block with a cleanmarker,
667 bail now */ 681 bail now */
668 if (buf_ofs == jeb->offset && jeb->used_size == PAD(c->cleanmarker_size) && 682 if (buf_ofs == jeb->offset && jeb->used_size == PAD(c->cleanmarker_size) &&
669 c->cleanmarker_size && !jeb->dirty_size && !ref_next(jeb->first_node)) { 683 c->cleanmarker_size && !jeb->dirty_size && !ref_next(jeb->first_node)) {
670 D1(printk(KERN_DEBUG "%d bytes at start of block seems clean... assuming all clean\n", EMPTY_SCAN_SIZE(c->sector_size))); 684 jffs2_dbg(1, "%d bytes at start of block seems clean... assuming all clean\n",
685 EMPTY_SCAN_SIZE(c->sector_size));
671 return BLK_STATE_CLEANMARKER; 686 return BLK_STATE_CLEANMARKER;
672 } 687 }
673 if (!buf_size && (scan_end != buf_len)) {/* XIP/point case */ 688 if (!buf_size && (scan_end != buf_len)) {/* XIP/point case */
@@ -680,13 +695,14 @@ scan_more:
680 if (!buf_len) { 695 if (!buf_len) {
681 /* No more to read. Break out of main loop without marking 696 /* No more to read. Break out of main loop without marking
682 this range of empty space as dirty (because it's not) */ 697 this range of empty space as dirty (because it's not) */
683 D1(printk(KERN_DEBUG "Empty flash at %08x runs to end of block. Treating as free_space\n", 698 jffs2_dbg(1, "Empty flash at %08x runs to end of block. Treating as free_space\n",
684 empty_start)); 699 empty_start);
685 break; 700 break;
686 } 701 }
687 /* point never reaches here */ 702 /* point never reaches here */
688 scan_end = buf_len; 703 scan_end = buf_len;
689 D1(printk(KERN_DEBUG "Reading another 0x%x at 0x%08x\n", buf_len, ofs)); 704 jffs2_dbg(1, "Reading another 0x%x at 0x%08x\n",
705 buf_len, ofs);
690 err = jffs2_fill_scan_buf(c, buf, ofs, buf_len); 706 err = jffs2_fill_scan_buf(c, buf, ofs, buf_len);
691 if (err) 707 if (err)
692 return err; 708 return err;
@@ -695,22 +711,23 @@ scan_more:
695 } 711 }
696 712
697 if (ofs == jeb->offset && je16_to_cpu(node->magic) == KSAMTIB_CIGAM_2SFFJ) { 713 if (ofs == jeb->offset && je16_to_cpu(node->magic) == KSAMTIB_CIGAM_2SFFJ) {
698 printk(KERN_WARNING "Magic bitmask is backwards at offset 0x%08x. Wrong endian filesystem?\n", ofs); 714 pr_warn("Magic bitmask is backwards at offset 0x%08x. Wrong endian filesystem?\n",
715 ofs);
699 if ((err = jffs2_scan_dirty_space(c, jeb, 4))) 716 if ((err = jffs2_scan_dirty_space(c, jeb, 4)))
700 return err; 717 return err;
701 ofs += 4; 718 ofs += 4;
702 continue; 719 continue;
703 } 720 }
704 if (je16_to_cpu(node->magic) == JFFS2_DIRTY_BITMASK) { 721 if (je16_to_cpu(node->magic) == JFFS2_DIRTY_BITMASK) {
705 D1(printk(KERN_DEBUG "Dirty bitmask at 0x%08x\n", ofs)); 722 jffs2_dbg(1, "Dirty bitmask at 0x%08x\n", ofs);
706 if ((err = jffs2_scan_dirty_space(c, jeb, 4))) 723 if ((err = jffs2_scan_dirty_space(c, jeb, 4)))
707 return err; 724 return err;
708 ofs += 4; 725 ofs += 4;
709 continue; 726 continue;
710 } 727 }
711 if (je16_to_cpu(node->magic) == JFFS2_OLD_MAGIC_BITMASK) { 728 if (je16_to_cpu(node->magic) == JFFS2_OLD_MAGIC_BITMASK) {
712 printk(KERN_WARNING "Old JFFS2 bitmask found at 0x%08x\n", ofs); 729 pr_warn("Old JFFS2 bitmask found at 0x%08x\n", ofs);
713 printk(KERN_WARNING "You cannot use older JFFS2 filesystems with newer kernels\n"); 730 pr_warn("You cannot use older JFFS2 filesystems with newer kernels\n");
714 if ((err = jffs2_scan_dirty_space(c, jeb, 4))) 731 if ((err = jffs2_scan_dirty_space(c, jeb, 4)))
715 return err; 732 return err;
716 ofs += 4; 733 ofs += 4;
@@ -718,7 +735,8 @@ scan_more:
718 } 735 }
719 if (je16_to_cpu(node->magic) != JFFS2_MAGIC_BITMASK) { 736 if (je16_to_cpu(node->magic) != JFFS2_MAGIC_BITMASK) {
720 /* OK. We're out of possibilities. Whinge and move on */ 737 /* OK. We're out of possibilities. Whinge and move on */
721 noisy_printk(&noise, "jffs2_scan_eraseblock(): Magic bitmask 0x%04x not found at 0x%08x: 0x%04x instead\n", 738 noisy_printk(&noise, "%s(): Magic bitmask 0x%04x not found at 0x%08x: 0x%04x instead\n",
739 __func__,
722 JFFS2_MAGIC_BITMASK, ofs, 740 JFFS2_MAGIC_BITMASK, ofs,
723 je16_to_cpu(node->magic)); 741 je16_to_cpu(node->magic));
724 if ((err = jffs2_scan_dirty_space(c, jeb, 4))) 742 if ((err = jffs2_scan_dirty_space(c, jeb, 4)))
@@ -733,7 +751,8 @@ scan_more:
733 hdr_crc = crc32(0, &crcnode, sizeof(crcnode)-4); 751 hdr_crc = crc32(0, &crcnode, sizeof(crcnode)-4);
734 752
735 if (hdr_crc != je32_to_cpu(node->hdr_crc)) { 753 if (hdr_crc != je32_to_cpu(node->hdr_crc)) {
736 noisy_printk(&noise, "jffs2_scan_eraseblock(): Node at 0x%08x {0x%04x, 0x%04x, 0x%08x) has invalid CRC 0x%08x (calculated 0x%08x)\n", 754 noisy_printk(&noise, "%s(): Node at 0x%08x {0x%04x, 0x%04x, 0x%08x) has invalid CRC 0x%08x (calculated 0x%08x)\n",
755 __func__,
737 ofs, je16_to_cpu(node->magic), 756 ofs, je16_to_cpu(node->magic),
738 je16_to_cpu(node->nodetype), 757 je16_to_cpu(node->nodetype),
739 je32_to_cpu(node->totlen), 758 je32_to_cpu(node->totlen),
@@ -747,9 +766,9 @@ scan_more:
747 766
748 if (ofs + je32_to_cpu(node->totlen) > jeb->offset + c->sector_size) { 767 if (ofs + je32_to_cpu(node->totlen) > jeb->offset + c->sector_size) {
749 /* Eep. Node goes over the end of the erase block. */ 768 /* Eep. Node goes over the end of the erase block. */
750 printk(KERN_WARNING "Node at 0x%08x with length 0x%08x would run over the end of the erase block\n", 769 pr_warn("Node at 0x%08x with length 0x%08x would run over the end of the erase block\n",
751 ofs, je32_to_cpu(node->totlen)); 770 ofs, je32_to_cpu(node->totlen));
752 printk(KERN_WARNING "Perhaps the file system was created with the wrong erase size?\n"); 771 pr_warn("Perhaps the file system was created with the wrong erase size?\n");
753 if ((err = jffs2_scan_dirty_space(c, jeb, 4))) 772 if ((err = jffs2_scan_dirty_space(c, jeb, 4)))
754 return err; 773 return err;
755 ofs += 4; 774 ofs += 4;
@@ -758,7 +777,8 @@ scan_more:
758 777
759 if (!(je16_to_cpu(node->nodetype) & JFFS2_NODE_ACCURATE)) { 778 if (!(je16_to_cpu(node->nodetype) & JFFS2_NODE_ACCURATE)) {
760 /* Wheee. This is an obsoleted node */ 779 /* Wheee. This is an obsoleted node */
761 D2(printk(KERN_DEBUG "Node at 0x%08x is obsolete. Skipping\n", ofs)); 780 jffs2_dbg(2, "Node at 0x%08x is obsolete. Skipping\n",
781 ofs);
762 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(node->totlen))))) 782 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(node->totlen)))))
763 return err; 783 return err;
764 ofs += PAD(je32_to_cpu(node->totlen)); 784 ofs += PAD(je32_to_cpu(node->totlen));
@@ -769,8 +789,9 @@ scan_more:
769 case JFFS2_NODETYPE_INODE: 789 case JFFS2_NODETYPE_INODE:
770 if (buf_ofs + buf_len < ofs + sizeof(struct jffs2_raw_inode)) { 790 if (buf_ofs + buf_len < ofs + sizeof(struct jffs2_raw_inode)) {
771 buf_len = min_t(uint32_t, buf_size, jeb->offset + c->sector_size - ofs); 791 buf_len = min_t(uint32_t, buf_size, jeb->offset + c->sector_size - ofs);
772 D1(printk(KERN_DEBUG "Fewer than %zd bytes (inode node) left to end of buf. Reading 0x%x at 0x%08x\n", 792 jffs2_dbg(1, "Fewer than %zd bytes (inode node) left to end of buf. Reading 0x%x at 0x%08x\n",
773 sizeof(struct jffs2_raw_inode), buf_len, ofs)); 793 sizeof(struct jffs2_raw_inode),
794 buf_len, ofs);
774 err = jffs2_fill_scan_buf(c, buf, ofs, buf_len); 795 err = jffs2_fill_scan_buf(c, buf, ofs, buf_len);
775 if (err) 796 if (err)
776 return err; 797 return err;
@@ -785,8 +806,9 @@ scan_more:
785 case JFFS2_NODETYPE_DIRENT: 806 case JFFS2_NODETYPE_DIRENT:
786 if (buf_ofs + buf_len < ofs + je32_to_cpu(node->totlen)) { 807 if (buf_ofs + buf_len < ofs + je32_to_cpu(node->totlen)) {
787 buf_len = min_t(uint32_t, buf_size, jeb->offset + c->sector_size - ofs); 808 buf_len = min_t(uint32_t, buf_size, jeb->offset + c->sector_size - ofs);
788 D1(printk(KERN_DEBUG "Fewer than %d bytes (dirent node) left to end of buf. Reading 0x%x at 0x%08x\n", 809 jffs2_dbg(1, "Fewer than %d bytes (dirent node) left to end of buf. Reading 0x%x at 0x%08x\n",
789 je32_to_cpu(node->totlen), buf_len, ofs)); 810 je32_to_cpu(node->totlen), buf_len,
811 ofs);
790 err = jffs2_fill_scan_buf(c, buf, ofs, buf_len); 812 err = jffs2_fill_scan_buf(c, buf, ofs, buf_len);
791 if (err) 813 if (err)
792 return err; 814 return err;
@@ -802,9 +824,9 @@ scan_more:
802 case JFFS2_NODETYPE_XATTR: 824 case JFFS2_NODETYPE_XATTR:
803 if (buf_ofs + buf_len < ofs + je32_to_cpu(node->totlen)) { 825 if (buf_ofs + buf_len < ofs + je32_to_cpu(node->totlen)) {
804 buf_len = min_t(uint32_t, buf_size, jeb->offset + c->sector_size - ofs); 826 buf_len = min_t(uint32_t, buf_size, jeb->offset + c->sector_size - ofs);
805 D1(printk(KERN_DEBUG "Fewer than %d bytes (xattr node)" 827 jffs2_dbg(1, "Fewer than %d bytes (xattr node) left to end of buf. Reading 0x%x at 0x%08x\n",
806 " left to end of buf. Reading 0x%x at 0x%08x\n", 828 je32_to_cpu(node->totlen), buf_len,
807 je32_to_cpu(node->totlen), buf_len, ofs)); 829 ofs);
808 err = jffs2_fill_scan_buf(c, buf, ofs, buf_len); 830 err = jffs2_fill_scan_buf(c, buf, ofs, buf_len);
809 if (err) 831 if (err)
810 return err; 832 return err;
@@ -819,9 +841,9 @@ scan_more:
819 case JFFS2_NODETYPE_XREF: 841 case JFFS2_NODETYPE_XREF:
820 if (buf_ofs + buf_len < ofs + je32_to_cpu(node->totlen)) { 842 if (buf_ofs + buf_len < ofs + je32_to_cpu(node->totlen)) {
821 buf_len = min_t(uint32_t, buf_size, jeb->offset + c->sector_size - ofs); 843 buf_len = min_t(uint32_t, buf_size, jeb->offset + c->sector_size - ofs);
822 D1(printk(KERN_DEBUG "Fewer than %d bytes (xref node)" 844 jffs2_dbg(1, "Fewer than %d bytes (xref node) left to end of buf. Reading 0x%x at 0x%08x\n",
823 " left to end of buf. Reading 0x%x at 0x%08x\n", 845 je32_to_cpu(node->totlen), buf_len,
824 je32_to_cpu(node->totlen), buf_len, ofs)); 846 ofs);
825 err = jffs2_fill_scan_buf(c, buf, ofs, buf_len); 847 err = jffs2_fill_scan_buf(c, buf, ofs, buf_len);
826 if (err) 848 if (err)
827 return err; 849 return err;
@@ -836,15 +858,17 @@ scan_more:
836#endif /* CONFIG_JFFS2_FS_XATTR */ 858#endif /* CONFIG_JFFS2_FS_XATTR */
837 859
838 case JFFS2_NODETYPE_CLEANMARKER: 860 case JFFS2_NODETYPE_CLEANMARKER:
839 D1(printk(KERN_DEBUG "CLEANMARKER node found at 0x%08x\n", ofs)); 861 jffs2_dbg(1, "CLEANMARKER node found at 0x%08x\n", ofs);
840 if (je32_to_cpu(node->totlen) != c->cleanmarker_size) { 862 if (je32_to_cpu(node->totlen) != c->cleanmarker_size) {
841 printk(KERN_NOTICE "CLEANMARKER node found at 0x%08x has totlen 0x%x != normal 0x%x\n", 863 pr_notice("CLEANMARKER node found at 0x%08x has totlen 0x%x != normal 0x%x\n",
842 ofs, je32_to_cpu(node->totlen), c->cleanmarker_size); 864 ofs, je32_to_cpu(node->totlen),
865 c->cleanmarker_size);
843 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(sizeof(struct jffs2_unknown_node))))) 866 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(sizeof(struct jffs2_unknown_node)))))
844 return err; 867 return err;
845 ofs += PAD(sizeof(struct jffs2_unknown_node)); 868 ofs += PAD(sizeof(struct jffs2_unknown_node));
846 } else if (jeb->first_node) { 869 } else if (jeb->first_node) {
847 printk(KERN_NOTICE "CLEANMARKER node found at 0x%08x, not first node in block (0x%08x)\n", ofs, jeb->offset); 870 pr_notice("CLEANMARKER node found at 0x%08x, not first node in block (0x%08x)\n",
871 ofs, jeb->offset);
848 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(sizeof(struct jffs2_unknown_node))))) 872 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(sizeof(struct jffs2_unknown_node)))))
849 return err; 873 return err;
850 ofs += PAD(sizeof(struct jffs2_unknown_node)); 874 ofs += PAD(sizeof(struct jffs2_unknown_node));
@@ -866,7 +890,8 @@ scan_more:
866 default: 890 default:
867 switch (je16_to_cpu(node->nodetype) & JFFS2_COMPAT_MASK) { 891 switch (je16_to_cpu(node->nodetype) & JFFS2_COMPAT_MASK) {
868 case JFFS2_FEATURE_ROCOMPAT: 892 case JFFS2_FEATURE_ROCOMPAT:
869 printk(KERN_NOTICE "Read-only compatible feature node (0x%04x) found at offset 0x%08x\n", je16_to_cpu(node->nodetype), ofs); 893 pr_notice("Read-only compatible feature node (0x%04x) found at offset 0x%08x\n",
894 je16_to_cpu(node->nodetype), ofs);
870 c->flags |= JFFS2_SB_FLAG_RO; 895 c->flags |= JFFS2_SB_FLAG_RO;
871 if (!(jffs2_is_readonly(c))) 896 if (!(jffs2_is_readonly(c)))
872 return -EROFS; 897 return -EROFS;
@@ -876,18 +901,21 @@ scan_more:
876 break; 901 break;
877 902
878 case JFFS2_FEATURE_INCOMPAT: 903 case JFFS2_FEATURE_INCOMPAT:
879 printk(KERN_NOTICE "Incompatible feature node (0x%04x) found at offset 0x%08x\n", je16_to_cpu(node->nodetype), ofs); 904 pr_notice("Incompatible feature node (0x%04x) found at offset 0x%08x\n",
905 je16_to_cpu(node->nodetype), ofs);
880 return -EINVAL; 906 return -EINVAL;
881 907
882 case JFFS2_FEATURE_RWCOMPAT_DELETE: 908 case JFFS2_FEATURE_RWCOMPAT_DELETE:
883 D1(printk(KERN_NOTICE "Unknown but compatible feature node (0x%04x) found at offset 0x%08x\n", je16_to_cpu(node->nodetype), ofs)); 909 jffs2_dbg(1, "Unknown but compatible feature node (0x%04x) found at offset 0x%08x\n",
910 je16_to_cpu(node->nodetype), ofs);
884 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(node->totlen))))) 911 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(node->totlen)))))
885 return err; 912 return err;
886 ofs += PAD(je32_to_cpu(node->totlen)); 913 ofs += PAD(je32_to_cpu(node->totlen));
887 break; 914 break;
888 915
889 case JFFS2_FEATURE_RWCOMPAT_COPY: { 916 case JFFS2_FEATURE_RWCOMPAT_COPY: {
890 D1(printk(KERN_NOTICE "Unknown but compatible feature node (0x%04x) found at offset 0x%08x\n", je16_to_cpu(node->nodetype), ofs)); 917 jffs2_dbg(1, "Unknown but compatible feature node (0x%04x) found at offset 0x%08x\n",
918 je16_to_cpu(node->nodetype), ofs);
891 919
892 jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, PAD(je32_to_cpu(node->totlen)), NULL); 920 jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, PAD(je32_to_cpu(node->totlen)), NULL);
893 921
@@ -908,8 +936,9 @@ scan_more:
908 } 936 }
909 } 937 }
910 938
911 D1(printk(KERN_DEBUG "Block at 0x%08x: free 0x%08x, dirty 0x%08x, unchecked 0x%08x, used 0x%08x, wasted 0x%08x\n", 939 jffs2_dbg(1, "Block at 0x%08x: free 0x%08x, dirty 0x%08x, unchecked 0x%08x, used 0x%08x, wasted 0x%08x\n",
912 jeb->offset,jeb->free_size, jeb->dirty_size, jeb->unchecked_size, jeb->used_size, jeb->wasted_size)); 940 jeb->offset, jeb->free_size, jeb->dirty_size,
941 jeb->unchecked_size, jeb->used_size, jeb->wasted_size);
913 942
914 /* mark_node_obsolete can add to wasted !! */ 943 /* mark_node_obsolete can add to wasted !! */
915 if (jeb->wasted_size) { 944 if (jeb->wasted_size) {
@@ -935,7 +964,7 @@ struct jffs2_inode_cache *jffs2_scan_make_ino_cache(struct jffs2_sb_info *c, uin
935 964
936 ic = jffs2_alloc_inode_cache(); 965 ic = jffs2_alloc_inode_cache();
937 if (!ic) { 966 if (!ic) {
938 printk(KERN_NOTICE "jffs2_scan_make_inode_cache(): allocation of inode cache failed\n"); 967 pr_notice("%s(): allocation of inode cache failed\n", __func__);
939 return NULL; 968 return NULL;
940 } 969 }
941 memset(ic, 0, sizeof(*ic)); 970 memset(ic, 0, sizeof(*ic));
@@ -954,7 +983,7 @@ static int jffs2_scan_inode_node(struct jffs2_sb_info *c, struct jffs2_erasebloc
954 struct jffs2_inode_cache *ic; 983 struct jffs2_inode_cache *ic;
955 uint32_t crc, ino = je32_to_cpu(ri->ino); 984 uint32_t crc, ino = je32_to_cpu(ri->ino);
956 985
957 D1(printk(KERN_DEBUG "jffs2_scan_inode_node(): Node at 0x%08x\n", ofs)); 986 jffs2_dbg(1, "%s(): Node at 0x%08x\n", __func__, ofs);
958 987
959 /* We do very little here now. Just check the ino# to which we should attribute 988 /* We do very little here now. Just check the ino# to which we should attribute
960 this node; we can do all the CRC checking etc. later. There's a tradeoff here -- 989 this node; we can do all the CRC checking etc. later. There's a tradeoff here --
@@ -968,9 +997,8 @@ static int jffs2_scan_inode_node(struct jffs2_sb_info *c, struct jffs2_erasebloc
968 /* Check the node CRC in any case. */ 997 /* Check the node CRC in any case. */
969 crc = crc32(0, ri, sizeof(*ri)-8); 998 crc = crc32(0, ri, sizeof(*ri)-8);
970 if (crc != je32_to_cpu(ri->node_crc)) { 999 if (crc != je32_to_cpu(ri->node_crc)) {
971 printk(KERN_NOTICE "jffs2_scan_inode_node(): CRC failed on " 1000 pr_notice("%s(): CRC failed on node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
972 "node at 0x%08x: Read 0x%08x, calculated 0x%08x\n", 1001 __func__, ofs, je32_to_cpu(ri->node_crc), crc);
973 ofs, je32_to_cpu(ri->node_crc), crc);
974 /* 1002 /*
975 * We believe totlen because the CRC on the node 1003 * We believe totlen because the CRC on the node
976 * _header_ was OK, just the node itself failed. 1004 * _header_ was OK, just the node itself failed.
@@ -989,10 +1017,10 @@ static int jffs2_scan_inode_node(struct jffs2_sb_info *c, struct jffs2_erasebloc
989 /* Wheee. It worked */ 1017 /* Wheee. It worked */
990 jffs2_link_node_ref(c, jeb, ofs | REF_UNCHECKED, PAD(je32_to_cpu(ri->totlen)), ic); 1018 jffs2_link_node_ref(c, jeb, ofs | REF_UNCHECKED, PAD(je32_to_cpu(ri->totlen)), ic);
991 1019
992 D1(printk(KERN_DEBUG "Node is ino #%u, version %d. Range 0x%x-0x%x\n", 1020 jffs2_dbg(1, "Node is ino #%u, version %d. Range 0x%x-0x%x\n",
993 je32_to_cpu(ri->ino), je32_to_cpu(ri->version), 1021 je32_to_cpu(ri->ino), je32_to_cpu(ri->version),
994 je32_to_cpu(ri->offset), 1022 je32_to_cpu(ri->offset),
995 je32_to_cpu(ri->offset)+je32_to_cpu(ri->dsize))); 1023 je32_to_cpu(ri->offset)+je32_to_cpu(ri->dsize));
996 1024
997 pseudo_random += je32_to_cpu(ri->version); 1025 pseudo_random += je32_to_cpu(ri->version);
998 1026
@@ -1012,15 +1040,15 @@ static int jffs2_scan_dirent_node(struct jffs2_sb_info *c, struct jffs2_eraseblo
1012 uint32_t crc; 1040 uint32_t crc;
1013 int err; 1041 int err;
1014 1042
1015 D1(printk(KERN_DEBUG "jffs2_scan_dirent_node(): Node at 0x%08x\n", ofs)); 1043 jffs2_dbg(1, "%s(): Node at 0x%08x\n", __func__, ofs);
1016 1044
1017 /* We don't get here unless the node is still valid, so we don't have to 1045 /* We don't get here unless the node is still valid, so we don't have to
1018 mask in the ACCURATE bit any more. */ 1046 mask in the ACCURATE bit any more. */
1019 crc = crc32(0, rd, sizeof(*rd)-8); 1047 crc = crc32(0, rd, sizeof(*rd)-8);
1020 1048
1021 if (crc != je32_to_cpu(rd->node_crc)) { 1049 if (crc != je32_to_cpu(rd->node_crc)) {
1022 printk(KERN_NOTICE "jffs2_scan_dirent_node(): Node CRC failed on node at 0x%08x: Read 0x%08x, calculated 0x%08x\n", 1050 pr_notice("%s(): Node CRC failed on node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
1023 ofs, je32_to_cpu(rd->node_crc), crc); 1051 __func__, ofs, je32_to_cpu(rd->node_crc), crc);
1024 /* We believe totlen because the CRC on the node _header_ was OK, just the node itself failed. */ 1052 /* We believe totlen because the CRC on the node _header_ was OK, just the node itself failed. */
1025 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(rd->totlen))))) 1053 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(rd->totlen)))))
1026 return err; 1054 return err;
@@ -1032,7 +1060,7 @@ static int jffs2_scan_dirent_node(struct jffs2_sb_info *c, struct jffs2_eraseblo
1032 /* Should never happen. Did. (OLPC trac #4184)*/ 1060 /* Should never happen. Did. (OLPC trac #4184)*/
1033 checkedlen = strnlen(rd->name, rd->nsize); 1061 checkedlen = strnlen(rd->name, rd->nsize);
1034 if (checkedlen < rd->nsize) { 1062 if (checkedlen < rd->nsize) {
1035 printk(KERN_ERR "Dirent at %08x has zeroes in name. Truncating to %d chars\n", 1063 pr_err("Dirent at %08x has zeroes in name. Truncating to %d chars\n",
1036 ofs, checkedlen); 1064 ofs, checkedlen);
1037 } 1065 }
1038 fd = jffs2_alloc_full_dirent(checkedlen+1); 1066 fd = jffs2_alloc_full_dirent(checkedlen+1);
@@ -1044,9 +1072,10 @@ static int jffs2_scan_dirent_node(struct jffs2_sb_info *c, struct jffs2_eraseblo
1044 1072
1045 crc = crc32(0, fd->name, rd->nsize); 1073 crc = crc32(0, fd->name, rd->nsize);
1046 if (crc != je32_to_cpu(rd->name_crc)) { 1074 if (crc != je32_to_cpu(rd->name_crc)) {
1047 printk(KERN_NOTICE "jffs2_scan_dirent_node(): Name CRC failed on node at 0x%08x: Read 0x%08x, calculated 0x%08x\n", 1075 pr_notice("%s(): Name CRC failed on node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
1048 ofs, je32_to_cpu(rd->name_crc), crc); 1076 __func__, ofs, je32_to_cpu(rd->name_crc), crc);
1049 D1(printk(KERN_NOTICE "Name for which CRC failed is (now) '%s', ino #%d\n", fd->name, je32_to_cpu(rd->ino))); 1077 jffs2_dbg(1, "Name for which CRC failed is (now) '%s', ino #%d\n",
1078 fd->name, je32_to_cpu(rd->ino));
1050 jffs2_free_full_dirent(fd); 1079 jffs2_free_full_dirent(fd);
1051 /* FIXME: Why do we believe totlen? */ 1080 /* FIXME: Why do we believe totlen? */
1052 /* We believe totlen because the CRC on the node _header_ was OK, just the name failed. */ 1081 /* We believe totlen because the CRC on the node _header_ was OK, just the name failed. */
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index 0f20208df60..aca97f35b29 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -23,8 +23,8 @@
23#include "nodelist.h" 23#include "nodelist.h"
24 24
25/* ---- Initial Security Label(s) Attachment callback --- */ 25/* ---- Initial Security Label(s) Attachment callback --- */
26int jffs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, 26static int jffs2_initxattrs(struct inode *inode,
27 void *fs_info) 27 const struct xattr *xattr_array, void *fs_info)
28{ 28{
29 const struct xattr *xattr; 29 const struct xattr *xattr;
30 int err = 0; 30 int err = 0;
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index e537fb0e018..c522d098bb4 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -11,6 +11,8 @@
11 * 11 *
12 */ 12 */
13 13
14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
14#include <linux/kernel.h> 16#include <linux/kernel.h>
15#include <linux/slab.h> 17#include <linux/slab.h>
16#include <linux/mtd/mtd.h> 18#include <linux/mtd/mtd.h>
@@ -442,13 +444,16 @@ static int jffs2_sum_process_sum_data(struct jffs2_sb_info *c, struct jffs2_eras
442 /* This should never happen, but https://dev.laptop.org/ticket/4184 */ 444 /* This should never happen, but https://dev.laptop.org/ticket/4184 */
443 checkedlen = strnlen(spd->name, spd->nsize); 445 checkedlen = strnlen(spd->name, spd->nsize);
444 if (!checkedlen) { 446 if (!checkedlen) {
445 printk(KERN_ERR "Dirent at %08x has zero at start of name. Aborting mount.\n", 447 pr_err("Dirent at %08x has zero at start of name. Aborting mount.\n",
446 jeb->offset + je32_to_cpu(spd->offset)); 448 jeb->offset +
449 je32_to_cpu(spd->offset));
447 return -EIO; 450 return -EIO;
448 } 451 }
449 if (checkedlen < spd->nsize) { 452 if (checkedlen < spd->nsize) {
450 printk(KERN_ERR "Dirent at %08x has zeroes in name. Truncating to %d chars\n", 453 pr_err("Dirent at %08x has zeroes in name. Truncating to %d chars\n",
451 jeb->offset + je32_to_cpu(spd->offset), checkedlen); 454 jeb->offset +
455 je32_to_cpu(spd->offset),
456 checkedlen);
452 } 457 }
453 458
454 459
@@ -808,8 +813,7 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
808 813
809 sum_ofs = jeb->offset + c->sector_size - jeb->free_size; 814 sum_ofs = jeb->offset + c->sector_size - jeb->free_size;
810 815
811 dbg_summary("JFFS2: writing out data to flash to pos : 0x%08x\n", 816 dbg_summary("writing out data to flash to pos : 0x%08x\n", sum_ofs);
812 sum_ofs);
813 817
814 ret = jffs2_flash_writev(c, vecs, 2, sum_ofs, &retlen, 0); 818 ret = jffs2_flash_writev(c, vecs, 2, sum_ofs, &retlen, 0);
815 819
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index f2d96b5e64f..f9916f312bd 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -9,6 +9,8 @@
9 * 9 *
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
12#include <linux/kernel.h> 14#include <linux/kernel.h>
13#include <linux/module.h> 15#include <linux/module.h>
14#include <linux/slab.h> 16#include <linux/slab.h>
@@ -69,7 +71,7 @@ static void jffs2_write_super(struct super_block *sb)
69 sb->s_dirt = 0; 71 sb->s_dirt = 0;
70 72
71 if (!(sb->s_flags & MS_RDONLY)) { 73 if (!(sb->s_flags & MS_RDONLY)) {
72 D1(printk(KERN_DEBUG "jffs2_write_super()\n")); 74 jffs2_dbg(1, "%s()\n", __func__);
73 jffs2_flush_wbuf_gc(c, 0); 75 jffs2_flush_wbuf_gc(c, 0);
74 } 76 }
75 77
@@ -214,8 +216,8 @@ static int jffs2_parse_options(struct jffs2_sb_info *c, char *data)
214 JFFS2_COMPR_MODE_FORCEZLIB; 216 JFFS2_COMPR_MODE_FORCEZLIB;
215#endif 217#endif
216 else { 218 else {
217 printk(KERN_ERR "JFFS2 Error: unknown compressor \"%s\"", 219 pr_err("Error: unknown compressor \"%s\"\n",
218 name); 220 name);
219 kfree(name); 221 kfree(name);
220 return -EINVAL; 222 return -EINVAL;
221 } 223 }
@@ -223,8 +225,8 @@ static int jffs2_parse_options(struct jffs2_sb_info *c, char *data)
223 c->mount_opts.override_compr = true; 225 c->mount_opts.override_compr = true;
224 break; 226 break;
225 default: 227 default:
226 printk(KERN_ERR "JFFS2 Error: unrecognized mount option '%s' or missing value\n", 228 pr_err("Error: unrecognized mount option '%s' or missing value\n",
227 p); 229 p);
228 return -EINVAL; 230 return -EINVAL;
229 } 231 }
230 } 232 }
@@ -266,9 +268,9 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent)
266 struct jffs2_sb_info *c; 268 struct jffs2_sb_info *c;
267 int ret; 269 int ret;
268 270
269 D1(printk(KERN_DEBUG "jffs2_get_sb_mtd():" 271 jffs2_dbg(1, "jffs2_get_sb_mtd():"
270 " New superblock for device %d (\"%s\")\n", 272 " New superblock for device %d (\"%s\")\n",
271 sb->s_mtd->index, sb->s_mtd->name)); 273 sb->s_mtd->index, sb->s_mtd->name);
272 274
273 c = kzalloc(sizeof(*c), GFP_KERNEL); 275 c = kzalloc(sizeof(*c), GFP_KERNEL);
274 if (!c) 276 if (!c)
@@ -315,7 +317,7 @@ static void jffs2_put_super (struct super_block *sb)
315{ 317{
316 struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); 318 struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
317 319
318 D2(printk(KERN_DEBUG "jffs2: jffs2_put_super()\n")); 320 jffs2_dbg(2, "%s()\n", __func__);
319 321
320 if (sb->s_dirt) 322 if (sb->s_dirt)
321 jffs2_write_super(sb); 323 jffs2_write_super(sb);
@@ -336,7 +338,7 @@ static void jffs2_put_super (struct super_block *sb)
336 kfree(c->inocache_list); 338 kfree(c->inocache_list);
337 jffs2_clear_xattr_subsystem(c); 339 jffs2_clear_xattr_subsystem(c);
338 mtd_sync(c->mtd); 340 mtd_sync(c->mtd);
339 D1(printk(KERN_DEBUG "jffs2_put_super returning\n")); 341 jffs2_dbg(1, "%s(): returning\n", __func__);
340} 342}
341 343
342static void jffs2_kill_sb(struct super_block *sb) 344static void jffs2_kill_sb(struct super_block *sb)
@@ -371,7 +373,7 @@ static int __init init_jffs2_fs(void)
371 BUILD_BUG_ON(sizeof(struct jffs2_raw_inode) != 68); 373 BUILD_BUG_ON(sizeof(struct jffs2_raw_inode) != 68);
372 BUILD_BUG_ON(sizeof(struct jffs2_raw_summary) != 32); 374 BUILD_BUG_ON(sizeof(struct jffs2_raw_summary) != 32);
373 375
374 printk(KERN_INFO "JFFS2 version 2.2." 376 pr_info("version 2.2."
375#ifdef CONFIG_JFFS2_FS_WRITEBUFFER 377#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
376 " (NAND)" 378 " (NAND)"
377#endif 379#endif
@@ -386,22 +388,22 @@ static int __init init_jffs2_fs(void)
386 SLAB_MEM_SPREAD), 388 SLAB_MEM_SPREAD),
387 jffs2_i_init_once); 389 jffs2_i_init_once);
388 if (!jffs2_inode_cachep) { 390 if (!jffs2_inode_cachep) {
389 printk(KERN_ERR "JFFS2 error: Failed to initialise inode cache\n"); 391 pr_err("error: Failed to initialise inode cache\n");
390 return -ENOMEM; 392 return -ENOMEM;
391 } 393 }
392 ret = jffs2_compressors_init(); 394 ret = jffs2_compressors_init();
393 if (ret) { 395 if (ret) {
394 printk(KERN_ERR "JFFS2 error: Failed to initialise compressors\n"); 396 pr_err("error: Failed to initialise compressors\n");
395 goto out; 397 goto out;
396 } 398 }
397 ret = jffs2_create_slab_caches(); 399 ret = jffs2_create_slab_caches();
398 if (ret) { 400 if (ret) {
399 printk(KERN_ERR "JFFS2 error: Failed to initialise slab caches\n"); 401 pr_err("error: Failed to initialise slab caches\n");
400 goto out_compressors; 402 goto out_compressors;
401 } 403 }
402 ret = register_filesystem(&jffs2_fs_type); 404 ret = register_filesystem(&jffs2_fs_type);
403 if (ret) { 405 if (ret) {
404 printk(KERN_ERR "JFFS2 error: Failed to register filesystem\n"); 406 pr_err("error: Failed to register filesystem\n");
405 goto out_slab; 407 goto out_slab;
406 } 408 }
407 return 0; 409 return 0;
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c
index e3035afb181..6e563332bb2 100644
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c
@@ -9,6 +9,8 @@
9 * 9 *
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
12#include <linux/kernel.h> 14#include <linux/kernel.h>
13#include <linux/fs.h> 15#include <linux/fs.h>
14#include <linux/namei.h> 16#include <linux/namei.h>
@@ -47,10 +49,11 @@ static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd)
47 */ 49 */
48 50
49 if (!p) { 51 if (!p) {
50 printk(KERN_ERR "jffs2_follow_link(): can't find symlink target\n"); 52 pr_err("%s(): can't find symlink target\n", __func__);
51 p = ERR_PTR(-EIO); 53 p = ERR_PTR(-EIO);
52 } 54 }
53 D1(printk(KERN_DEBUG "jffs2_follow_link(): target path is '%s'\n", (char *) f->target)); 55 jffs2_dbg(1, "%s(): target path is '%s'\n",
56 __func__, (char *)f->target);
54 57
55 nd_set_link(nd, p); 58 nd_set_link(nd, p);
56 59
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 30e8f47e8a2..74d9be19df3 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -11,6 +11,8 @@
11 * 11 *
12 */ 12 */
13 13
14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
14#include <linux/kernel.h> 16#include <linux/kernel.h>
15#include <linux/slab.h> 17#include <linux/slab.h>
16#include <linux/mtd/mtd.h> 18#include <linux/mtd/mtd.h>
@@ -91,7 +93,7 @@ static void jffs2_wbuf_dirties_inode(struct jffs2_sb_info *c, uint32_t ino)
91 93
92 new = kmalloc(sizeof(*new), GFP_KERNEL); 94 new = kmalloc(sizeof(*new), GFP_KERNEL);
93 if (!new) { 95 if (!new) {
94 D1(printk(KERN_DEBUG "No memory to allocate inodirty. Fallback to all considered dirty\n")); 96 jffs2_dbg(1, "No memory to allocate inodirty. Fallback to all considered dirty\n");
95 jffs2_clear_wbuf_ino_list(c); 97 jffs2_clear_wbuf_ino_list(c);
96 c->wbuf_inodes = &inodirty_nomem; 98 c->wbuf_inodes = &inodirty_nomem;
97 return; 99 return;
@@ -113,19 +115,20 @@ static inline void jffs2_refile_wbuf_blocks(struct jffs2_sb_info *c)
113 list_for_each_safe(this, next, &c->erasable_pending_wbuf_list) { 115 list_for_each_safe(this, next, &c->erasable_pending_wbuf_list) {
114 struct jffs2_eraseblock *jeb = list_entry(this, struct jffs2_eraseblock, list); 116 struct jffs2_eraseblock *jeb = list_entry(this, struct jffs2_eraseblock, list);
115 117
116 D1(printk(KERN_DEBUG "Removing eraseblock at 0x%08x from erasable_pending_wbuf_list...\n", jeb->offset)); 118 jffs2_dbg(1, "Removing eraseblock at 0x%08x from erasable_pending_wbuf_list...\n",
119 jeb->offset);
117 list_del(this); 120 list_del(this);
118 if ((jiffies + (n++)) & 127) { 121 if ((jiffies + (n++)) & 127) {
119 /* Most of the time, we just erase it immediately. Otherwise we 122 /* Most of the time, we just erase it immediately. Otherwise we
120 spend ages scanning it on mount, etc. */ 123 spend ages scanning it on mount, etc. */
121 D1(printk(KERN_DEBUG "...and adding to erase_pending_list\n")); 124 jffs2_dbg(1, "...and adding to erase_pending_list\n");
122 list_add_tail(&jeb->list, &c->erase_pending_list); 125 list_add_tail(&jeb->list, &c->erase_pending_list);
123 c->nr_erasing_blocks++; 126 c->nr_erasing_blocks++;
124 jffs2_garbage_collect_trigger(c); 127 jffs2_garbage_collect_trigger(c);
125 } else { 128 } else {
126 /* Sometimes, however, we leave it elsewhere so it doesn't get 129 /* Sometimes, however, we leave it elsewhere so it doesn't get
127 immediately reused, and we spread the load a bit. */ 130 immediately reused, and we spread the load a bit. */
128 D1(printk(KERN_DEBUG "...and adding to erasable_list\n")); 131 jffs2_dbg(1, "...and adding to erasable_list\n");
129 list_add_tail(&jeb->list, &c->erasable_list); 132 list_add_tail(&jeb->list, &c->erasable_list);
130 } 133 }
131 } 134 }
@@ -136,7 +139,7 @@ static inline void jffs2_refile_wbuf_blocks(struct jffs2_sb_info *c)
136 139
137static void jffs2_block_refile(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, int allow_empty) 140static void jffs2_block_refile(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, int allow_empty)
138{ 141{
139 D1(printk("About to refile bad block at %08x\n", jeb->offset)); 142 jffs2_dbg(1, "About to refile bad block at %08x\n", jeb->offset);
140 143
141 /* File the existing block on the bad_used_list.... */ 144 /* File the existing block on the bad_used_list.... */
142 if (c->nextblock == jeb) 145 if (c->nextblock == jeb)
@@ -144,12 +147,14 @@ static void jffs2_block_refile(struct jffs2_sb_info *c, struct jffs2_eraseblock
144 else /* Not sure this should ever happen... need more coffee */ 147 else /* Not sure this should ever happen... need more coffee */
145 list_del(&jeb->list); 148 list_del(&jeb->list);
146 if (jeb->first_node) { 149 if (jeb->first_node) {
147 D1(printk("Refiling block at %08x to bad_used_list\n", jeb->offset)); 150 jffs2_dbg(1, "Refiling block at %08x to bad_used_list\n",
151 jeb->offset);
148 list_add(&jeb->list, &c->bad_used_list); 152 list_add(&jeb->list, &c->bad_used_list);
149 } else { 153 } else {
150 BUG_ON(allow_empty == REFILE_NOTEMPTY); 154 BUG_ON(allow_empty == REFILE_NOTEMPTY);
151 /* It has to have had some nodes or we couldn't be here */ 155 /* It has to have had some nodes or we couldn't be here */
152 D1(printk("Refiling block at %08x to erase_pending_list\n", jeb->offset)); 156 jffs2_dbg(1, "Refiling block at %08x to erase_pending_list\n",
157 jeb->offset);
153 list_add(&jeb->list, &c->erase_pending_list); 158 list_add(&jeb->list, &c->erase_pending_list);
154 c->nr_erasing_blocks++; 159 c->nr_erasing_blocks++;
155 jffs2_garbage_collect_trigger(c); 160 jffs2_garbage_collect_trigger(c);
@@ -230,10 +235,12 @@ static int jffs2_verify_write(struct jffs2_sb_info *c, unsigned char *buf,
230 235
231 ret = mtd_read(c->mtd, ofs, c->wbuf_pagesize, &retlen, c->wbuf_verify); 236 ret = mtd_read(c->mtd, ofs, c->wbuf_pagesize, &retlen, c->wbuf_verify);
232 if (ret && ret != -EUCLEAN && ret != -EBADMSG) { 237 if (ret && ret != -EUCLEAN && ret != -EBADMSG) {
233 printk(KERN_WARNING "jffs2_verify_write(): Read back of page at %08x failed: %d\n", c->wbuf_ofs, ret); 238 pr_warn("%s(): Read back of page at %08x failed: %d\n",
239 __func__, c->wbuf_ofs, ret);
234 return ret; 240 return ret;
235 } else if (retlen != c->wbuf_pagesize) { 241 } else if (retlen != c->wbuf_pagesize) {
236 printk(KERN_WARNING "jffs2_verify_write(): Read back of page at %08x gave short read: %zd not %d.\n", ofs, retlen, c->wbuf_pagesize); 242 pr_warn("%s(): Read back of page at %08x gave short read: %zd not %d\n",
243 __func__, ofs, retlen, c->wbuf_pagesize);
237 return -EIO; 244 return -EIO;
238 } 245 }
239 if (!memcmp(buf, c->wbuf_verify, c->wbuf_pagesize)) 246 if (!memcmp(buf, c->wbuf_verify, c->wbuf_pagesize))
@@ -246,12 +253,12 @@ static int jffs2_verify_write(struct jffs2_sb_info *c, unsigned char *buf,
246 else 253 else
247 eccstr = "OK or unused"; 254 eccstr = "OK or unused";
248 255
249 printk(KERN_WARNING "Write verify error (ECC %s) at %08x. Wrote:\n", 256 pr_warn("Write verify error (ECC %s) at %08x. Wrote:\n",
250 eccstr, c->wbuf_ofs); 257 eccstr, c->wbuf_ofs);
251 print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, 258 print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1,
252 c->wbuf, c->wbuf_pagesize, 0); 259 c->wbuf, c->wbuf_pagesize, 0);
253 260
254 printk(KERN_WARNING "Read back:\n"); 261 pr_warn("Read back:\n");
255 print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, 262 print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1,
256 c->wbuf_verify, c->wbuf_pagesize, 0); 263 c->wbuf_verify, c->wbuf_pagesize, 0);
257 264
@@ -308,7 +315,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
308 315
309 if (!first_raw) { 316 if (!first_raw) {
310 /* All nodes were obsolete. Nothing to recover. */ 317 /* All nodes were obsolete. Nothing to recover. */
311 D1(printk(KERN_DEBUG "No non-obsolete nodes to be recovered. Just filing block bad\n")); 318 jffs2_dbg(1, "No non-obsolete nodes to be recovered. Just filing block bad\n");
312 c->wbuf_len = 0; 319 c->wbuf_len = 0;
313 return; 320 return;
314 } 321 }
@@ -331,7 +338,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
331 338
332 buf = kmalloc(end - start, GFP_KERNEL); 339 buf = kmalloc(end - start, GFP_KERNEL);
333 if (!buf) { 340 if (!buf) {
334 printk(KERN_CRIT "Malloc failure in wbuf recovery. Data loss ensues.\n"); 341 pr_crit("Malloc failure in wbuf recovery. Data loss ensues.\n");
335 342
336 goto read_failed; 343 goto read_failed;
337 } 344 }
@@ -346,7 +353,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
346 ret = 0; 353 ret = 0;
347 354
348 if (ret || retlen != c->wbuf_ofs - start) { 355 if (ret || retlen != c->wbuf_ofs - start) {
349 printk(KERN_CRIT "Old data are already lost in wbuf recovery. Data loss ensues.\n"); 356 pr_crit("Old data are already lost in wbuf recovery. Data loss ensues.\n");
350 357
351 kfree(buf); 358 kfree(buf);
352 buf = NULL; 359 buf = NULL;
@@ -380,7 +387,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
380 /* ... and get an allocation of space from a shiny new block instead */ 387 /* ... and get an allocation of space from a shiny new block instead */
381 ret = jffs2_reserve_space_gc(c, end-start, &len, JFFS2_SUMMARY_NOSUM_SIZE); 388 ret = jffs2_reserve_space_gc(c, end-start, &len, JFFS2_SUMMARY_NOSUM_SIZE);
382 if (ret) { 389 if (ret) {
383 printk(KERN_WARNING "Failed to allocate space for wbuf recovery. Data loss ensues.\n"); 390 pr_warn("Failed to allocate space for wbuf recovery. Data loss ensues.\n");
384 kfree(buf); 391 kfree(buf);
385 return; 392 return;
386 } 393 }
@@ -390,7 +397,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
390 397
391 ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, nr_refile); 398 ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, nr_refile);
392 if (ret) { 399 if (ret) {
393 printk(KERN_WARNING "Failed to allocate node refs for wbuf recovery. Data loss ensues.\n"); 400 pr_warn("Failed to allocate node refs for wbuf recovery. Data loss ensues.\n");
394 kfree(buf); 401 kfree(buf);
395 return; 402 return;
396 } 403 }
@@ -406,13 +413,13 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
406 unsigned char *rewrite_buf = buf?:c->wbuf; 413 unsigned char *rewrite_buf = buf?:c->wbuf;
407 uint32_t towrite = (end-start) - ((end-start)%c->wbuf_pagesize); 414 uint32_t towrite = (end-start) - ((end-start)%c->wbuf_pagesize);
408 415
409 D1(printk(KERN_DEBUG "Write 0x%x bytes at 0x%08x in wbuf recover\n", 416 jffs2_dbg(1, "Write 0x%x bytes at 0x%08x in wbuf recover\n",
410 towrite, ofs)); 417 towrite, ofs);
411 418
412#ifdef BREAKMEHEADER 419#ifdef BREAKMEHEADER
413 static int breakme; 420 static int breakme;
414 if (breakme++ == 20) { 421 if (breakme++ == 20) {
415 printk(KERN_NOTICE "Faking write error at 0x%08x\n", ofs); 422 pr_notice("Faking write error at 0x%08x\n", ofs);
416 breakme = 0; 423 breakme = 0;
417 mtd_write(c->mtd, ofs, towrite, &retlen, brokenbuf); 424 mtd_write(c->mtd, ofs, towrite, &retlen, brokenbuf);
418 ret = -EIO; 425 ret = -EIO;
@@ -423,7 +430,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
423 430
424 if (ret || retlen != towrite || jffs2_verify_write(c, rewrite_buf, ofs)) { 431 if (ret || retlen != towrite || jffs2_verify_write(c, rewrite_buf, ofs)) {
425 /* Argh. We tried. Really we did. */ 432 /* Argh. We tried. Really we did. */
426 printk(KERN_CRIT "Recovery of wbuf failed due to a second write error\n"); 433 pr_crit("Recovery of wbuf failed due to a second write error\n");
427 kfree(buf); 434 kfree(buf);
428 435
429 if (retlen) 436 if (retlen)
@@ -431,7 +438,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
431 438
432 return; 439 return;
433 } 440 }
434 printk(KERN_NOTICE "Recovery of wbuf succeeded to %08x\n", ofs); 441 pr_notice("Recovery of wbuf succeeded to %08x\n", ofs);
435 442
436 c->wbuf_len = (end - start) - towrite; 443 c->wbuf_len = (end - start) - towrite;
437 c->wbuf_ofs = ofs + towrite; 444 c->wbuf_ofs = ofs + towrite;
@@ -459,8 +466,8 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
459 struct jffs2_raw_node_ref **adjust_ref = NULL; 466 struct jffs2_raw_node_ref **adjust_ref = NULL;
460 struct jffs2_inode_info *f = NULL; 467 struct jffs2_inode_info *f = NULL;
461 468
462 D1(printk(KERN_DEBUG "Refiling block of %08x at %08x(%d) to %08x\n", 469 jffs2_dbg(1, "Refiling block of %08x at %08x(%d) to %08x\n",
463 rawlen, ref_offset(raw), ref_flags(raw), ofs)); 470 rawlen, ref_offset(raw), ref_flags(raw), ofs);
464 471
465 ic = jffs2_raw_ref_to_ic(raw); 472 ic = jffs2_raw_ref_to_ic(raw);
466 473
@@ -540,7 +547,8 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
540 547
541 /* Fix up the original jeb now it's on the bad_list */ 548 /* Fix up the original jeb now it's on the bad_list */
542 if (first_raw == jeb->first_node) { 549 if (first_raw == jeb->first_node) {
543 D1(printk(KERN_DEBUG "Failing block at %08x is now empty. Moving to erase_pending_list\n", jeb->offset)); 550 jffs2_dbg(1, "Failing block at %08x is now empty. Moving to erase_pending_list\n",
551 jeb->offset);
544 list_move(&jeb->list, &c->erase_pending_list); 552 list_move(&jeb->list, &c->erase_pending_list);
545 c->nr_erasing_blocks++; 553 c->nr_erasing_blocks++;
546 jffs2_garbage_collect_trigger(c); 554 jffs2_garbage_collect_trigger(c);
@@ -554,7 +562,8 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
554 562
555 spin_unlock(&c->erase_completion_lock); 563 spin_unlock(&c->erase_completion_lock);
556 564
557 D1(printk(KERN_DEBUG "wbuf recovery completed OK. wbuf_ofs 0x%08x, len 0x%x\n", c->wbuf_ofs, c->wbuf_len)); 565 jffs2_dbg(1, "wbuf recovery completed OK. wbuf_ofs 0x%08x, len 0x%x\n",
566 c->wbuf_ofs, c->wbuf_len);
558 567
559} 568}
560 569
@@ -579,7 +588,7 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
579 return 0; 588 return 0;
580 589
581 if (!mutex_is_locked(&c->alloc_sem)) { 590 if (!mutex_is_locked(&c->alloc_sem)) {
582 printk(KERN_CRIT "jffs2_flush_wbuf() called with alloc_sem not locked!\n"); 591 pr_crit("jffs2_flush_wbuf() called with alloc_sem not locked!\n");
583 BUG(); 592 BUG();
584 } 593 }
585 594
@@ -617,7 +626,7 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
617#ifdef BREAKME 626#ifdef BREAKME
618 static int breakme; 627 static int breakme;
619 if (breakme++ == 20) { 628 if (breakme++ == 20) {
620 printk(KERN_NOTICE "Faking write error at 0x%08x\n", c->wbuf_ofs); 629 pr_notice("Faking write error at 0x%08x\n", c->wbuf_ofs);
621 breakme = 0; 630 breakme = 0;
622 mtd_write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, 631 mtd_write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen,
623 brokenbuf); 632 brokenbuf);
@@ -629,11 +638,11 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
629 &retlen, c->wbuf); 638 &retlen, c->wbuf);
630 639
631 if (ret) { 640 if (ret) {
632 printk(KERN_WARNING "jffs2_flush_wbuf(): Write failed with %d\n", ret); 641 pr_warn("jffs2_flush_wbuf(): Write failed with %d\n", ret);
633 goto wfail; 642 goto wfail;
634 } else if (retlen != c->wbuf_pagesize) { 643 } else if (retlen != c->wbuf_pagesize) {
635 printk(KERN_WARNING "jffs2_flush_wbuf(): Write was short: %zd instead of %d\n", 644 pr_warn("jffs2_flush_wbuf(): Write was short: %zd instead of %d\n",
636 retlen, c->wbuf_pagesize); 645 retlen, c->wbuf_pagesize);
637 ret = -EIO; 646 ret = -EIO;
638 goto wfail; 647 goto wfail;
639 } else if ((ret = jffs2_verify_write(c, c->wbuf, c->wbuf_ofs))) { 648 } else if ((ret = jffs2_verify_write(c, c->wbuf, c->wbuf_ofs))) {
@@ -647,17 +656,18 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
647 if (pad) { 656 if (pad) {
648 uint32_t waste = c->wbuf_pagesize - c->wbuf_len; 657 uint32_t waste = c->wbuf_pagesize - c->wbuf_len;
649 658
650 D1(printk(KERN_DEBUG "jffs2_flush_wbuf() adjusting free_size of %sblock at %08x\n", 659 jffs2_dbg(1, "jffs2_flush_wbuf() adjusting free_size of %sblock at %08x\n",
651 (wbuf_jeb==c->nextblock)?"next":"", wbuf_jeb->offset)); 660 (wbuf_jeb == c->nextblock) ? "next" : "",
661 wbuf_jeb->offset);
652 662
653 /* wbuf_pagesize - wbuf_len is the amount of space that's to be 663 /* wbuf_pagesize - wbuf_len is the amount of space that's to be
654 padded. If there is less free space in the block than that, 664 padded. If there is less free space in the block than that,
655 something screwed up */ 665 something screwed up */
656 if (wbuf_jeb->free_size < waste) { 666 if (wbuf_jeb->free_size < waste) {
657 printk(KERN_CRIT "jffs2_flush_wbuf(): Accounting error. wbuf at 0x%08x has 0x%03x bytes, 0x%03x left.\n", 667 pr_crit("jffs2_flush_wbuf(): Accounting error. wbuf at 0x%08x has 0x%03x bytes, 0x%03x left.\n",
658 c->wbuf_ofs, c->wbuf_len, waste); 668 c->wbuf_ofs, c->wbuf_len, waste);
659 printk(KERN_CRIT "jffs2_flush_wbuf(): But free_size for block at 0x%08x is only 0x%08x\n", 669 pr_crit("jffs2_flush_wbuf(): But free_size for block at 0x%08x is only 0x%08x\n",
660 wbuf_jeb->offset, wbuf_jeb->free_size); 670 wbuf_jeb->offset, wbuf_jeb->free_size);
661 BUG(); 671 BUG();
662 } 672 }
663 673
@@ -694,14 +704,14 @@ int jffs2_flush_wbuf_gc(struct jffs2_sb_info *c, uint32_t ino)
694 uint32_t old_wbuf_len; 704 uint32_t old_wbuf_len;
695 int ret = 0; 705 int ret = 0;
696 706
697 D1(printk(KERN_DEBUG "jffs2_flush_wbuf_gc() called for ino #%u...\n", ino)); 707 jffs2_dbg(1, "jffs2_flush_wbuf_gc() called for ino #%u...\n", ino);
698 708
699 if (!c->wbuf) 709 if (!c->wbuf)
700 return 0; 710 return 0;
701 711
702 mutex_lock(&c->alloc_sem); 712 mutex_lock(&c->alloc_sem);
703 if (!jffs2_wbuf_pending_for_ino(c, ino)) { 713 if (!jffs2_wbuf_pending_for_ino(c, ino)) {
704 D1(printk(KERN_DEBUG "Ino #%d not pending in wbuf. Returning\n", ino)); 714 jffs2_dbg(1, "Ino #%d not pending in wbuf. Returning\n", ino);
705 mutex_unlock(&c->alloc_sem); 715 mutex_unlock(&c->alloc_sem);
706 return 0; 716 return 0;
707 } 717 }
@@ -711,7 +721,8 @@ int jffs2_flush_wbuf_gc(struct jffs2_sb_info *c, uint32_t ino)
711 721
712 if (c->unchecked_size) { 722 if (c->unchecked_size) {
713 /* GC won't make any progress for a while */ 723 /* GC won't make any progress for a while */
714 D1(printk(KERN_DEBUG "jffs2_flush_wbuf_gc() padding. Not finished checking\n")); 724 jffs2_dbg(1, "%s(): padding. Not finished checking\n",
725 __func__);
715 down_write(&c->wbuf_sem); 726 down_write(&c->wbuf_sem);
716 ret = __jffs2_flush_wbuf(c, PAD_ACCOUNTING); 727 ret = __jffs2_flush_wbuf(c, PAD_ACCOUNTING);
717 /* retry flushing wbuf in case jffs2_wbuf_recover 728 /* retry flushing wbuf in case jffs2_wbuf_recover
@@ -724,7 +735,7 @@ int jffs2_flush_wbuf_gc(struct jffs2_sb_info *c, uint32_t ino)
724 735
725 mutex_unlock(&c->alloc_sem); 736 mutex_unlock(&c->alloc_sem);
726 737
727 D1(printk(KERN_DEBUG "jffs2_flush_wbuf_gc() calls gc pass\n")); 738 jffs2_dbg(1, "%s(): calls gc pass\n", __func__);
728 739
729 ret = jffs2_garbage_collect_pass(c); 740 ret = jffs2_garbage_collect_pass(c);
730 if (ret) { 741 if (ret) {
@@ -742,7 +753,7 @@ int jffs2_flush_wbuf_gc(struct jffs2_sb_info *c, uint32_t ino)
742 mutex_lock(&c->alloc_sem); 753 mutex_lock(&c->alloc_sem);
743 } 754 }
744 755
745 D1(printk(KERN_DEBUG "jffs2_flush_wbuf_gc() ends...\n")); 756 jffs2_dbg(1, "%s(): ends...\n", __func__);
746 757
747 mutex_unlock(&c->alloc_sem); 758 mutex_unlock(&c->alloc_sem);
748 return ret; 759 return ret;
@@ -811,9 +822,8 @@ int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs,
811 if (SECTOR_ADDR(to) != SECTOR_ADDR(c->wbuf_ofs)) { 822 if (SECTOR_ADDR(to) != SECTOR_ADDR(c->wbuf_ofs)) {
812 /* It's a write to a new block */ 823 /* It's a write to a new block */
813 if (c->wbuf_len) { 824 if (c->wbuf_len) {
814 D1(printk(KERN_DEBUG "jffs2_flash_writev() to 0x%lx " 825 jffs2_dbg(1, "%s(): to 0x%lx causes flush of wbuf at 0x%08x\n",
815 "causes flush of wbuf at 0x%08x\n", 826 __func__, (unsigned long)to, c->wbuf_ofs);
816 (unsigned long)to, c->wbuf_ofs));
817 ret = __jffs2_flush_wbuf(c, PAD_NOACCOUNT); 827 ret = __jffs2_flush_wbuf(c, PAD_NOACCOUNT);
818 if (ret) 828 if (ret)
819 goto outerr; 829 goto outerr;
@@ -825,11 +835,11 @@ int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs,
825 835
826 if (to != PAD(c->wbuf_ofs + c->wbuf_len)) { 836 if (to != PAD(c->wbuf_ofs + c->wbuf_len)) {
827 /* We're not writing immediately after the writebuffer. Bad. */ 837 /* We're not writing immediately after the writebuffer. Bad. */
828 printk(KERN_CRIT "jffs2_flash_writev(): Non-contiguous write " 838 pr_crit("%s(): Non-contiguous write to %08lx\n",
829 "to %08lx\n", (unsigned long)to); 839 __func__, (unsigned long)to);
830 if (c->wbuf_len) 840 if (c->wbuf_len)
831 printk(KERN_CRIT "wbuf was previously %08x-%08x\n", 841 pr_crit("wbuf was previously %08x-%08x\n",
832 c->wbuf_ofs, c->wbuf_ofs+c->wbuf_len); 842 c->wbuf_ofs, c->wbuf_ofs + c->wbuf_len);
833 BUG(); 843 BUG();
834 } 844 }
835 845
@@ -957,8 +967,8 @@ int jffs2_flash_read(struct jffs2_sb_info *c, loff_t ofs, size_t len, size_t *re
957 967
958 if ( (ret == -EBADMSG || ret == -EUCLEAN) && (*retlen == len) ) { 968 if ( (ret == -EBADMSG || ret == -EUCLEAN) && (*retlen == len) ) {
959 if (ret == -EBADMSG) 969 if (ret == -EBADMSG)
960 printk(KERN_WARNING "mtd->read(0x%zx bytes from 0x%llx)" 970 pr_warn("mtd->read(0x%zx bytes from 0x%llx) returned ECC error\n",
961 " returned ECC error\n", len, ofs); 971 len, ofs);
962 /* 972 /*
963 * We have the raw data without ECC correction in the buffer, 973 * We have the raw data without ECC correction in the buffer,
964 * maybe we are lucky and all data or parts are correct. We 974 * maybe we are lucky and all data or parts are correct. We
@@ -1034,9 +1044,8 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c,
1034 1044
1035 ret = mtd_read_oob(c->mtd, jeb->offset, &ops); 1045 ret = mtd_read_oob(c->mtd, jeb->offset, &ops);
1036 if (ret || ops.oobretlen != ops.ooblen) { 1046 if (ret || ops.oobretlen != ops.ooblen) {
1037 printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd" 1047 pr_err("cannot read OOB for EB at %08x, requested %zd bytes, read %zd bytes, error %d\n",
1038 " bytes, read %zd bytes, error %d\n", 1048 jeb->offset, ops.ooblen, ops.oobretlen, ret);
1039 jeb->offset, ops.ooblen, ops.oobretlen, ret);
1040 if (!ret) 1049 if (!ret)
1041 ret = -EIO; 1050 ret = -EIO;
1042 return ret; 1051 return ret;
@@ -1048,8 +1057,8 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c,
1048 continue; 1057 continue;
1049 1058
1050 if (ops.oobbuf[i] != 0xFF) { 1059 if (ops.oobbuf[i] != 0xFF) {
1051 D2(printk(KERN_DEBUG "Found %02x at %x in OOB for " 1060 jffs2_dbg(2, "Found %02x at %x in OOB for "
1052 "%08x\n", ops.oobbuf[i], i, jeb->offset)); 1061 "%08x\n", ops.oobbuf[i], i, jeb->offset);
1053 return 1; 1062 return 1;
1054 } 1063 }
1055 } 1064 }
@@ -1077,9 +1086,8 @@ int jffs2_check_nand_cleanmarker(struct jffs2_sb_info *c,
1077 1086
1078 ret = mtd_read_oob(c->mtd, jeb->offset, &ops); 1087 ret = mtd_read_oob(c->mtd, jeb->offset, &ops);
1079 if (ret || ops.oobretlen != ops.ooblen) { 1088 if (ret || ops.oobretlen != ops.ooblen) {
1080 printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd" 1089 pr_err("cannot read OOB for EB at %08x, requested %zd bytes, read %zd bytes, error %d\n",
1081 " bytes, read %zd bytes, error %d\n", 1090 jeb->offset, ops.ooblen, ops.oobretlen, ret);
1082 jeb->offset, ops.ooblen, ops.oobretlen, ret);
1083 if (!ret) 1091 if (!ret)
1084 ret = -EIO; 1092 ret = -EIO;
1085 return ret; 1093 return ret;
@@ -1103,9 +1111,8 @@ int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c,
1103 1111
1104 ret = mtd_write_oob(c->mtd, jeb->offset, &ops); 1112 ret = mtd_write_oob(c->mtd, jeb->offset, &ops);
1105 if (ret || ops.oobretlen != ops.ooblen) { 1113 if (ret || ops.oobretlen != ops.ooblen) {
1106 printk(KERN_ERR "cannot write OOB for EB at %08x, requested %zd" 1114 pr_err("cannot write OOB for EB at %08x, requested %zd bytes, read %zd bytes, error %d\n",
1107 " bytes, read %zd bytes, error %d\n", 1115 jeb->offset, ops.ooblen, ops.oobretlen, ret);
1108 jeb->offset, ops.ooblen, ops.oobretlen, ret);
1109 if (!ret) 1116 if (!ret)
1110 ret = -EIO; 1117 ret = -EIO;
1111 return ret; 1118 return ret;
@@ -1130,11 +1137,12 @@ int jffs2_write_nand_badblock(struct jffs2_sb_info *c, struct jffs2_eraseblock *
1130 if( ++jeb->bad_count < MAX_ERASE_FAILURES) 1137 if( ++jeb->bad_count < MAX_ERASE_FAILURES)
1131 return 0; 1138 return 0;
1132 1139
1133 printk(KERN_WARNING "JFFS2: marking eraseblock at %08x\n as bad", bad_offset); 1140 pr_warn("marking eraseblock at %08x as bad\n", bad_offset);
1134 ret = mtd_block_markbad(c->mtd, bad_offset); 1141 ret = mtd_block_markbad(c->mtd, bad_offset);
1135 1142
1136 if (ret) { 1143 if (ret) {
1137 D1(printk(KERN_WARNING "jffs2_write_nand_badblock(): Write failed for block at %08x: error %d\n", jeb->offset, ret)); 1144 jffs2_dbg(1, "%s(): Write failed for block at %08x: error %d\n",
1145 __func__, jeb->offset, ret);
1138 return ret; 1146 return ret;
1139 } 1147 }
1140 return 1; 1148 return 1;
@@ -1151,11 +1159,11 @@ int jffs2_nand_flash_setup(struct jffs2_sb_info *c)
1151 c->cleanmarker_size = 0; 1159 c->cleanmarker_size = 0;
1152 1160
1153 if (!oinfo || oinfo->oobavail == 0) { 1161 if (!oinfo || oinfo->oobavail == 0) {
1154 printk(KERN_ERR "inconsistent device description\n"); 1162 pr_err("inconsistent device description\n");
1155 return -EINVAL; 1163 return -EINVAL;
1156 } 1164 }
1157 1165
1158 D1(printk(KERN_DEBUG "JFFS2 using OOB on NAND\n")); 1166 jffs2_dbg(1, "using OOB on NAND\n");
1159 1167
1160 c->oobavail = oinfo->oobavail; 1168 c->oobavail = oinfo->oobavail;
1161 1169
@@ -1222,7 +1230,7 @@ int jffs2_dataflash_setup(struct jffs2_sb_info *c) {
1222 1230
1223 if ((c->flash_size % c->sector_size) != 0) { 1231 if ((c->flash_size % c->sector_size) != 0) {
1224 c->flash_size = (c->flash_size / c->sector_size) * c->sector_size; 1232 c->flash_size = (c->flash_size / c->sector_size) * c->sector_size;
1225 printk(KERN_WARNING "JFFS2 flash size adjusted to %dKiB\n", c->flash_size); 1233 pr_warn("flash size adjusted to %dKiB\n", c->flash_size);
1226 }; 1234 };
1227 1235
1228 c->wbuf_ofs = 0xFFFFFFFF; 1236 c->wbuf_ofs = 0xFFFFFFFF;
@@ -1239,7 +1247,8 @@ int jffs2_dataflash_setup(struct jffs2_sb_info *c) {
1239 } 1247 }
1240#endif 1248#endif
1241 1249
1242 printk(KERN_INFO "JFFS2 write-buffering enabled buffer (%d) erasesize (%d)\n", c->wbuf_pagesize, c->sector_size); 1250 pr_info("write-buffering enabled buffer (%d) erasesize (%d)\n",
1251 c->wbuf_pagesize, c->sector_size);
1243 1252
1244 return 0; 1253 return 0;
1245} 1254}
@@ -1297,7 +1306,8 @@ int jffs2_ubivol_setup(struct jffs2_sb_info *c) {
1297 if (!c->wbuf) 1306 if (!c->wbuf)
1298 return -ENOMEM; 1307 return -ENOMEM;
1299 1308
1300 printk(KERN_INFO "JFFS2 write-buffering enabled buffer (%d) erasesize (%d)\n", c->wbuf_pagesize, c->sector_size); 1309 pr_info("write-buffering enabled buffer (%d) erasesize (%d)\n",
1310 c->wbuf_pagesize, c->sector_size);
1301 1311
1302 return 0; 1312 return 0;
1303} 1313}
diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c
index 30d175b6d29..b634de4c810 100644
--- a/fs/jffs2/write.c
+++ b/fs/jffs2/write.c
@@ -9,6 +9,8 @@
9 * 9 *
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
12#include <linux/kernel.h> 14#include <linux/kernel.h>
13#include <linux/fs.h> 15#include <linux/fs.h>
14#include <linux/crc32.h> 16#include <linux/crc32.h>
@@ -36,7 +38,7 @@ int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
36 f->inocache->state = INO_STATE_PRESENT; 38 f->inocache->state = INO_STATE_PRESENT;
37 39
38 jffs2_add_ino_cache(c, f->inocache); 40 jffs2_add_ino_cache(c, f->inocache);
39 D1(printk(KERN_DEBUG "jffs2_do_new_inode(): Assigned ino# %d\n", f->inocache->ino)); 41 jffs2_dbg(1, "%s(): Assigned ino# %d\n", __func__, f->inocache->ino);
40 ri->ino = cpu_to_je32(f->inocache->ino); 42 ri->ino = cpu_to_je32(f->inocache->ino);
41 43
42 ri->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 44 ri->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@@ -68,7 +70,7 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2
68 unsigned long cnt = 2; 70 unsigned long cnt = 2;
69 71
70 D1(if(je32_to_cpu(ri->hdr_crc) != crc32(0, ri, sizeof(struct jffs2_unknown_node)-4)) { 72 D1(if(je32_to_cpu(ri->hdr_crc) != crc32(0, ri, sizeof(struct jffs2_unknown_node)-4)) {
71 printk(KERN_CRIT "Eep. CRC not correct in jffs2_write_dnode()\n"); 73 pr_crit("Eep. CRC not correct in jffs2_write_dnode()\n");
72 BUG(); 74 BUG();
73 } 75 }
74 ); 76 );
@@ -78,7 +80,9 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2
78 vecs[1].iov_len = datalen; 80 vecs[1].iov_len = datalen;
79 81
80 if (je32_to_cpu(ri->totlen) != sizeof(*ri) + datalen) { 82 if (je32_to_cpu(ri->totlen) != sizeof(*ri) + datalen) {
81 printk(KERN_WARNING "jffs2_write_dnode: ri->totlen (0x%08x) != sizeof(*ri) (0x%08zx) + datalen (0x%08x)\n", je32_to_cpu(ri->totlen), sizeof(*ri), datalen); 83 pr_warn("%s(): ri->totlen (0x%08x) != sizeof(*ri) (0x%08zx) + datalen (0x%08x)\n",
84 __func__, je32_to_cpu(ri->totlen),
85 sizeof(*ri), datalen);
82 } 86 }
83 87
84 fn = jffs2_alloc_full_dnode(); 88 fn = jffs2_alloc_full_dnode();
@@ -95,9 +99,9 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2
95 99
96 if ((alloc_mode!=ALLOC_GC) && (je32_to_cpu(ri->version) < f->highest_version)) { 100 if ((alloc_mode!=ALLOC_GC) && (je32_to_cpu(ri->version) < f->highest_version)) {
97 BUG_ON(!retried); 101 BUG_ON(!retried);
98 D1(printk(KERN_DEBUG "jffs2_write_dnode : dnode_version %d, " 102 jffs2_dbg(1, "%s(): dnode_version %d, highest version %d -> updating dnode\n",
99 "highest version %d -> updating dnode\n", 103 __func__,
100 je32_to_cpu(ri->version), f->highest_version)); 104 je32_to_cpu(ri->version), f->highest_version);
101 ri->version = cpu_to_je32(++f->highest_version); 105 ri->version = cpu_to_je32(++f->highest_version);
102 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); 106 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8));
103 } 107 }
@@ -106,8 +110,8 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2
106 (alloc_mode==ALLOC_GC)?0:f->inocache->ino); 110 (alloc_mode==ALLOC_GC)?0:f->inocache->ino);
107 111
108 if (ret || (retlen != sizeof(*ri) + datalen)) { 112 if (ret || (retlen != sizeof(*ri) + datalen)) {
109 printk(KERN_NOTICE "Write of %zd bytes at 0x%08x failed. returned %d, retlen %zd\n", 113 pr_notice("Write of %zd bytes at 0x%08x failed. returned %d, retlen %zd\n",
110 sizeof(*ri)+datalen, flash_ofs, ret, retlen); 114 sizeof(*ri) + datalen, flash_ofs, ret, retlen);
111 115
112 /* Mark the space as dirtied */ 116 /* Mark the space as dirtied */
113 if (retlen) { 117 if (retlen) {
@@ -118,7 +122,8 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2
118 this node */ 122 this node */
119 jffs2_add_physical_node_ref(c, flash_ofs | REF_OBSOLETE, PAD(sizeof(*ri)+datalen), NULL); 123 jffs2_add_physical_node_ref(c, flash_ofs | REF_OBSOLETE, PAD(sizeof(*ri)+datalen), NULL);
120 } else { 124 } else {
121 printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", flash_ofs); 125 pr_notice("Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n",
126 flash_ofs);
122 } 127 }
123 if (!retried && alloc_mode != ALLOC_NORETRY) { 128 if (!retried && alloc_mode != ALLOC_NORETRY) {
124 /* Try to reallocate space and retry */ 129 /* Try to reallocate space and retry */
@@ -127,7 +132,7 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2
127 132
128 retried = 1; 133 retried = 1;
129 134
130 D1(printk(KERN_DEBUG "Retrying failed write.\n")); 135 jffs2_dbg(1, "Retrying failed write.\n");
131 136
132 jffs2_dbg_acct_sanity_check(c,jeb); 137 jffs2_dbg_acct_sanity_check(c,jeb);
133 jffs2_dbg_acct_paranoia_check(c, jeb); 138 jffs2_dbg_acct_paranoia_check(c, jeb);
@@ -147,14 +152,16 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2
147 152
148 if (!ret) { 153 if (!ret) {
149 flash_ofs = write_ofs(c); 154 flash_ofs = write_ofs(c);
150 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", flash_ofs)); 155 jffs2_dbg(1, "Allocated space at 0x%08x to retry failed write.\n",
156 flash_ofs);
151 157
152 jffs2_dbg_acct_sanity_check(c,jeb); 158 jffs2_dbg_acct_sanity_check(c,jeb);
153 jffs2_dbg_acct_paranoia_check(c, jeb); 159 jffs2_dbg_acct_paranoia_check(c, jeb);
154 160
155 goto retry; 161 goto retry;
156 } 162 }
157 D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret)); 163 jffs2_dbg(1, "Failed to allocate space to retry failed write: %d!\n",
164 ret);
158 } 165 }
159 /* Release the full_dnode which is now useless, and return */ 166 /* Release the full_dnode which is now useless, and return */
160 jffs2_free_full_dnode(fn); 167 jffs2_free_full_dnode(fn);
@@ -183,10 +190,10 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2
183 fn->size = je32_to_cpu(ri->dsize); 190 fn->size = je32_to_cpu(ri->dsize);
184 fn->frags = 0; 191 fn->frags = 0;
185 192
186 D1(printk(KERN_DEBUG "jffs2_write_dnode wrote node at 0x%08x(%d) with dsize 0x%x, csize 0x%x, node_crc 0x%08x, data_crc 0x%08x, totlen 0x%08x\n", 193 jffs2_dbg(1, "jffs2_write_dnode wrote node at 0x%08x(%d) with dsize 0x%x, csize 0x%x, node_crc 0x%08x, data_crc 0x%08x, totlen 0x%08x\n",
187 flash_ofs & ~3, flash_ofs & 3, je32_to_cpu(ri->dsize), 194 flash_ofs & ~3, flash_ofs & 3, je32_to_cpu(ri->dsize),
188 je32_to_cpu(ri->csize), je32_to_cpu(ri->node_crc), 195 je32_to_cpu(ri->csize), je32_to_cpu(ri->node_crc),
189 je32_to_cpu(ri->data_crc), je32_to_cpu(ri->totlen))); 196 je32_to_cpu(ri->data_crc), je32_to_cpu(ri->totlen));
190 197
191 if (retried) { 198 if (retried) {
192 jffs2_dbg_acct_sanity_check(c,NULL); 199 jffs2_dbg_acct_sanity_check(c,NULL);
@@ -206,22 +213,23 @@ struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jff
206 int retried = 0; 213 int retried = 0;
207 int ret; 214 int ret;
208 215
209 D1(printk(KERN_DEBUG "jffs2_write_dirent(ino #%u, name at *0x%p \"%s\"->ino #%u, name_crc 0x%08x)\n", 216 jffs2_dbg(1, "%s(ino #%u, name at *0x%p \"%s\"->ino #%u, name_crc 0x%08x)\n",
217 __func__,
210 je32_to_cpu(rd->pino), name, name, je32_to_cpu(rd->ino), 218 je32_to_cpu(rd->pino), name, name, je32_to_cpu(rd->ino),
211 je32_to_cpu(rd->name_crc))); 219 je32_to_cpu(rd->name_crc));
212 220
213 D1(if(je32_to_cpu(rd->hdr_crc) != crc32(0, rd, sizeof(struct jffs2_unknown_node)-4)) { 221 D1(if(je32_to_cpu(rd->hdr_crc) != crc32(0, rd, sizeof(struct jffs2_unknown_node)-4)) {
214 printk(KERN_CRIT "Eep. CRC not correct in jffs2_write_dirent()\n"); 222 pr_crit("Eep. CRC not correct in jffs2_write_dirent()\n");
215 BUG(); 223 BUG();
216 }); 224 });
217 225
218 if (strnlen(name, namelen) != namelen) { 226 if (strnlen(name, namelen) != namelen) {
219 /* This should never happen, but seems to have done on at least one 227 /* This should never happen, but seems to have done on at least one
220 occasion: https://dev.laptop.org/ticket/4184 */ 228 occasion: https://dev.laptop.org/ticket/4184 */
221 printk(KERN_CRIT "Error in jffs2_write_dirent() -- name contains zero bytes!\n"); 229 pr_crit("Error in jffs2_write_dirent() -- name contains zero bytes!\n");
222 printk(KERN_CRIT "Directory inode #%u, name at *0x%p \"%s\"->ino #%u, name_crc 0x%08x\n", 230 pr_crit("Directory inode #%u, name at *0x%p \"%s\"->ino #%u, name_crc 0x%08x\n",
223 je32_to_cpu(rd->pino), name, name, je32_to_cpu(rd->ino), 231 je32_to_cpu(rd->pino), name, name, je32_to_cpu(rd->ino),
224 je32_to_cpu(rd->name_crc)); 232 je32_to_cpu(rd->name_crc));
225 WARN_ON(1); 233 WARN_ON(1);
226 return ERR_PTR(-EIO); 234 return ERR_PTR(-EIO);
227 } 235 }
@@ -249,9 +257,9 @@ struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jff
249 257
250 if ((alloc_mode!=ALLOC_GC) && (je32_to_cpu(rd->version) < f->highest_version)) { 258 if ((alloc_mode!=ALLOC_GC) && (je32_to_cpu(rd->version) < f->highest_version)) {
251 BUG_ON(!retried); 259 BUG_ON(!retried);
252 D1(printk(KERN_DEBUG "jffs2_write_dirent : dirent_version %d, " 260 jffs2_dbg(1, "%s(): dirent_version %d, highest version %d -> updating dirent\n",
253 "highest version %d -> updating dirent\n", 261 __func__,
254 je32_to_cpu(rd->version), f->highest_version)); 262 je32_to_cpu(rd->version), f->highest_version);
255 rd->version = cpu_to_je32(++f->highest_version); 263 rd->version = cpu_to_je32(++f->highest_version);
256 fd->version = je32_to_cpu(rd->version); 264 fd->version = je32_to_cpu(rd->version);
257 rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8)); 265 rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8));
@@ -260,13 +268,14 @@ struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jff
260 ret = jffs2_flash_writev(c, vecs, 2, flash_ofs, &retlen, 268 ret = jffs2_flash_writev(c, vecs, 2, flash_ofs, &retlen,
261 (alloc_mode==ALLOC_GC)?0:je32_to_cpu(rd->pino)); 269 (alloc_mode==ALLOC_GC)?0:je32_to_cpu(rd->pino));
262 if (ret || (retlen != sizeof(*rd) + namelen)) { 270 if (ret || (retlen != sizeof(*rd) + namelen)) {
263 printk(KERN_NOTICE "Write of %zd bytes at 0x%08x failed. returned %d, retlen %zd\n", 271 pr_notice("Write of %zd bytes at 0x%08x failed. returned %d, retlen %zd\n",
264 sizeof(*rd)+namelen, flash_ofs, ret, retlen); 272 sizeof(*rd) + namelen, flash_ofs, ret, retlen);
265 /* Mark the space as dirtied */ 273 /* Mark the space as dirtied */
266 if (retlen) { 274 if (retlen) {
267 jffs2_add_physical_node_ref(c, flash_ofs | REF_OBSOLETE, PAD(sizeof(*rd)+namelen), NULL); 275 jffs2_add_physical_node_ref(c, flash_ofs | REF_OBSOLETE, PAD(sizeof(*rd)+namelen), NULL);
268 } else { 276 } else {
269 printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", flash_ofs); 277 pr_notice("Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n",
278 flash_ofs);
270 } 279 }
271 if (!retried) { 280 if (!retried) {
272 /* Try to reallocate space and retry */ 281 /* Try to reallocate space and retry */
@@ -275,7 +284,7 @@ struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jff
275 284
276 retried = 1; 285 retried = 1;
277 286
278 D1(printk(KERN_DEBUG "Retrying failed write.\n")); 287 jffs2_dbg(1, "Retrying failed write.\n");
279 288
280 jffs2_dbg_acct_sanity_check(c,jeb); 289 jffs2_dbg_acct_sanity_check(c,jeb);
281 jffs2_dbg_acct_paranoia_check(c, jeb); 290 jffs2_dbg_acct_paranoia_check(c, jeb);
@@ -295,12 +304,14 @@ struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jff
295 304
296 if (!ret) { 305 if (!ret) {
297 flash_ofs = write_ofs(c); 306 flash_ofs = write_ofs(c);
298 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", flash_ofs)); 307 jffs2_dbg(1, "Allocated space at 0x%08x to retry failed write\n",
308 flash_ofs);
299 jffs2_dbg_acct_sanity_check(c,jeb); 309 jffs2_dbg_acct_sanity_check(c,jeb);
300 jffs2_dbg_acct_paranoia_check(c, jeb); 310 jffs2_dbg_acct_paranoia_check(c, jeb);
301 goto retry; 311 goto retry;
302 } 312 }
303 D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret)); 313 jffs2_dbg(1, "Failed to allocate space to retry failed write: %d!\n",
314 ret);
304 } 315 }
305 /* Release the full_dnode which is now useless, and return */ 316 /* Release the full_dnode which is now useless, and return */
306 jffs2_free_full_dirent(fd); 317 jffs2_free_full_dirent(fd);
@@ -333,8 +344,8 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
333 int ret = 0; 344 int ret = 0;
334 uint32_t writtenlen = 0; 345 uint32_t writtenlen = 0;
335 346
336 D1(printk(KERN_DEBUG "jffs2_write_inode_range(): Ino #%u, ofs 0x%x, len 0x%x\n", 347 jffs2_dbg(1, "%s(): Ino #%u, ofs 0x%x, len 0x%x\n",
337 f->inocache->ino, offset, writelen)); 348 __func__, f->inocache->ino, offset, writelen);
338 349
339 while(writelen) { 350 while(writelen) {
340 struct jffs2_full_dnode *fn; 351 struct jffs2_full_dnode *fn;
@@ -345,12 +356,13 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
345 int retried = 0; 356 int retried = 0;
346 357
347 retry: 358 retry:
348 D2(printk(KERN_DEBUG "jffs2_commit_write() loop: 0x%x to write to 0x%x\n", writelen, offset)); 359 jffs2_dbg(2, "jffs2_commit_write() loop: 0x%x to write to 0x%x\n",
360 writelen, offset);
349 361
350 ret = jffs2_reserve_space(c, sizeof(*ri) + JFFS2_MIN_DATA_LEN, 362 ret = jffs2_reserve_space(c, sizeof(*ri) + JFFS2_MIN_DATA_LEN,
351 &alloclen, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); 363 &alloclen, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
352 if (ret) { 364 if (ret) {
353 D1(printk(KERN_DEBUG "jffs2_reserve_space returned %d\n", ret)); 365 jffs2_dbg(1, "jffs2_reserve_space returned %d\n", ret);
354 break; 366 break;
355 } 367 }
356 mutex_lock(&f->sem); 368 mutex_lock(&f->sem);
@@ -386,7 +398,7 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
386 if (!retried) { 398 if (!retried) {
387 /* Write error to be retried */ 399 /* Write error to be retried */
388 retried = 1; 400 retried = 1;
389 D1(printk(KERN_DEBUG "Retrying node write in jffs2_write_inode_range()\n")); 401 jffs2_dbg(1, "Retrying node write in jffs2_write_inode_range()\n");
390 goto retry; 402 goto retry;
391 } 403 }
392 break; 404 break;
@@ -399,7 +411,8 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
399 } 411 }
400 if (ret) { 412 if (ret) {
401 /* Eep */ 413 /* Eep */
402 D1(printk(KERN_DEBUG "Eep. add_full_dnode_to_inode() failed in commit_write, returned %d\n", ret)); 414 jffs2_dbg(1, "Eep. add_full_dnode_to_inode() failed in commit_write, returned %d\n",
415 ret);
403 jffs2_mark_node_obsolete(c, fn->raw); 416 jffs2_mark_node_obsolete(c, fn->raw);
404 jffs2_free_full_dnode(fn); 417 jffs2_free_full_dnode(fn);
405 418
@@ -410,11 +423,11 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
410 mutex_unlock(&f->sem); 423 mutex_unlock(&f->sem);
411 jffs2_complete_reservation(c); 424 jffs2_complete_reservation(c);
412 if (!datalen) { 425 if (!datalen) {
413 printk(KERN_WARNING "Eep. We didn't actually write any data in jffs2_write_inode_range()\n"); 426 pr_warn("Eep. We didn't actually write any data in jffs2_write_inode_range()\n");
414 ret = -EIO; 427 ret = -EIO;
415 break; 428 break;
416 } 429 }
417 D1(printk(KERN_DEBUG "increasing writtenlen by %d\n", datalen)); 430 jffs2_dbg(1, "increasing writtenlen by %d\n", datalen);
418 writtenlen += datalen; 431 writtenlen += datalen;
419 offset += datalen; 432 offset += datalen;
420 writelen -= datalen; 433 writelen -= datalen;
@@ -439,7 +452,7 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
439 */ 452 */
440 ret = jffs2_reserve_space(c, sizeof(*ri), &alloclen, ALLOC_NORMAL, 453 ret = jffs2_reserve_space(c, sizeof(*ri), &alloclen, ALLOC_NORMAL,
441 JFFS2_SUMMARY_INODE_SIZE); 454 JFFS2_SUMMARY_INODE_SIZE);
442 D1(printk(KERN_DEBUG "jffs2_do_create(): reserved 0x%x bytes\n", alloclen)); 455 jffs2_dbg(1, "%s(): reserved 0x%x bytes\n", __func__, alloclen);
443 if (ret) 456 if (ret)
444 return ret; 457 return ret;
445 458
@@ -450,11 +463,11 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
450 463
451 fn = jffs2_write_dnode(c, f, ri, NULL, 0, ALLOC_NORMAL); 464 fn = jffs2_write_dnode(c, f, ri, NULL, 0, ALLOC_NORMAL);
452 465
453 D1(printk(KERN_DEBUG "jffs2_do_create created file with mode 0x%x\n", 466 jffs2_dbg(1, "jffs2_do_create created file with mode 0x%x\n",
454 jemode_to_cpu(ri->mode))); 467 jemode_to_cpu(ri->mode));
455 468
456 if (IS_ERR(fn)) { 469 if (IS_ERR(fn)) {
457 D1(printk(KERN_DEBUG "jffs2_write_dnode() failed\n")); 470 jffs2_dbg(1, "jffs2_write_dnode() failed\n");
458 /* Eeek. Wave bye bye */ 471 /* Eeek. Wave bye bye */
459 mutex_unlock(&f->sem); 472 mutex_unlock(&f->sem);
460 jffs2_complete_reservation(c); 473 jffs2_complete_reservation(c);
@@ -480,7 +493,7 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
480 493
481 if (ret) { 494 if (ret) {
482 /* Eep. */ 495 /* Eep. */
483 D1(printk(KERN_DEBUG "jffs2_reserve_space() for dirent failed\n")); 496 jffs2_dbg(1, "jffs2_reserve_space() for dirent failed\n");
484 return ret; 497 return ret;
485 } 498 }
486 499
@@ -597,8 +610,8 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
597 !memcmp(fd->name, name, namelen) && 610 !memcmp(fd->name, name, namelen) &&
598 !fd->name[namelen]) { 611 !fd->name[namelen]) {
599 612
600 D1(printk(KERN_DEBUG "Marking old dirent node (ino #%u) @%08x obsolete\n", 613 jffs2_dbg(1, "Marking old dirent node (ino #%u) @%08x obsolete\n",
601 fd->ino, ref_offset(fd->raw))); 614 fd->ino, ref_offset(fd->raw));
602 jffs2_mark_node_obsolete(c, fd->raw); 615 jffs2_mark_node_obsolete(c, fd->raw);
603 /* We don't want to remove it from the list immediately, 616 /* We don't want to remove it from the list immediately,
604 because that screws up getdents()/seek() semantics even 617 because that screws up getdents()/seek() semantics even
@@ -627,11 +640,13 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
627 dead_f->dents = fd->next; 640 dead_f->dents = fd->next;
628 641
629 if (fd->ino) { 642 if (fd->ino) {
630 printk(KERN_WARNING "Deleting inode #%u with active dentry \"%s\"->ino #%u\n", 643 pr_warn("Deleting inode #%u with active dentry \"%s\"->ino #%u\n",
631 dead_f->inocache->ino, fd->name, fd->ino); 644 dead_f->inocache->ino,
645 fd->name, fd->ino);
632 } else { 646 } else {
633 D1(printk(KERN_DEBUG "Removing deletion dirent for \"%s\" from dir ino #%u\n", 647 jffs2_dbg(1, "Removing deletion dirent for \"%s\" from dir ino #%u\n",
634 fd->name, dead_f->inocache->ino)); 648 fd->name,
649 dead_f->inocache->ino);
635 } 650 }
636 if (fd->raw) 651 if (fd->raw)
637 jffs2_mark_node_obsolete(c, fd->raw); 652 jffs2_mark_node_obsolete(c, fd->raw);
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 3e93cdd1900..b55b803eddc 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -9,6 +9,8 @@
9 * 9 *
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
12#include <linux/kernel.h> 14#include <linux/kernel.h>
13#include <linux/slab.h> 15#include <linux/slab.h>
14#include <linux/fs.h> 16#include <linux/fs.h>
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 5f7c160ea64..07c91ca6017 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -220,12 +220,6 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
220 220
221 dquot_initialize(dip); 221 dquot_initialize(dip);
222 222
223 /* link count overflow on parent directory ? */
224 if (dip->i_nlink == JFS_LINK_MAX) {
225 rc = -EMLINK;
226 goto out1;
227 }
228
229 /* 223 /*
230 * search parent directory for entry/freespace 224 * search parent directory for entry/freespace
231 * (dtSearch() returns parent directory page pinned) 225 * (dtSearch() returns parent directory page pinned)
@@ -806,9 +800,6 @@ static int jfs_link(struct dentry *old_dentry,
806 jfs_info("jfs_link: %s %s", old_dentry->d_name.name, 800 jfs_info("jfs_link: %s %s", old_dentry->d_name.name,
807 dentry->d_name.name); 801 dentry->d_name.name);
808 802
809 if (ip->i_nlink == JFS_LINK_MAX)
810 return -EMLINK;
811
812 dquot_initialize(dir); 803 dquot_initialize(dir);
813 804
814 tid = txBegin(ip->i_sb, 0); 805 tid = txBegin(ip->i_sb, 0);
@@ -1138,10 +1129,6 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1138 rc = -ENOTEMPTY; 1129 rc = -ENOTEMPTY;
1139 goto out3; 1130 goto out3;
1140 } 1131 }
1141 } else if ((new_dir != old_dir) &&
1142 (new_dir->i_nlink == JFS_LINK_MAX)) {
1143 rc = -EMLINK;
1144 goto out3;
1145 } 1132 }
1146 } else if (new_ip) { 1133 } else if (new_ip) {
1147 IWRITE_LOCK(new_ip, RDWRLOCK_NORMAL); 1134 IWRITE_LOCK(new_ip, RDWRLOCK_NORMAL);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 682bca642f3..4a82950f412 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -441,6 +441,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
441 return -ENOMEM; 441 return -ENOMEM;
442 442
443 sb->s_fs_info = sbi; 443 sb->s_fs_info = sbi;
444 sb->s_max_links = JFS_LINK_MAX;
444 sbi->sb = sb; 445 sbi->sb = sb;
445 sbi->uid = sbi->gid = sbi->umask = -1; 446 sbi->uid = sbi->gid = sbi->umask = -1;
446 447
@@ -521,7 +522,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
521 ret = PTR_ERR(inode); 522 ret = PTR_ERR(inode);
522 goto out_no_rw; 523 goto out_no_rw;
523 } 524 }
524 sb->s_root = d_alloc_root(inode); 525 sb->s_root = d_make_root(inode);
525 if (!sb->s_root) 526 if (!sb->s_root)
526 goto out_no_root; 527 goto out_no_root;
527 528
@@ -539,7 +540,6 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
539 540
540out_no_root: 541out_no_root:
541 jfs_err("jfs_read_super: get root dentry failed"); 542 jfs_err("jfs_read_super: get root dentry failed");
542 iput(inode);
543 543
544out_no_rw: 544out_no_rw:
545 rc = jfs_umount(sb); 545 rc = jfs_umount(sb);
@@ -860,8 +860,14 @@ static int __init init_jfs_fs(void)
860 jfs_proc_init(); 860 jfs_proc_init();
861#endif 861#endif
862 862
863 return register_filesystem(&jfs_fs_type); 863 rc = register_filesystem(&jfs_fs_type);
864 if (!rc)
865 return 0;
864 866
867#ifdef PROC_FS_JFS
868 jfs_proc_clean();
869#endif
870 kthread_stop(jfsSyncThread);
865kill_committask: 871kill_committask:
866 for (i = 0; i < commit_threads; i++) 872 for (i = 0; i < commit_threads; i++)
867 kthread_stop(jfsCommitThread[i]); 873 kthread_stop(jfsCommitThread[i]);
diff --git a/fs/libfs.c b/fs/libfs.c
index 5b2dbb3ba4f..18d08f5db53 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -3,7 +3,7 @@
3 * Library for filesystems writers. 3 * Library for filesystems writers.
4 */ 4 */
5 5
6#include <linux/module.h> 6#include <linux/export.h>
7#include <linux/pagemap.h> 7#include <linux/pagemap.h>
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <linux/mount.h> 9#include <linux/mount.h>
@@ -264,6 +264,13 @@ Enomem:
264 return ERR_PTR(-ENOMEM); 264 return ERR_PTR(-ENOMEM);
265} 265}
266 266
267int simple_open(struct inode *inode, struct file *file)
268{
269 if (inode->i_private)
270 file->private_data = inode->i_private;
271 return 0;
272}
273
267int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) 274int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
268{ 275{
269 struct inode *inode = old_dentry->d_inode; 276 struct inode *inode = old_dentry->d_inode;
@@ -491,11 +498,9 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
491 inode->i_op = &simple_dir_inode_operations; 498 inode->i_op = &simple_dir_inode_operations;
492 inode->i_fop = &simple_dir_operations; 499 inode->i_fop = &simple_dir_operations;
493 set_nlink(inode, 2); 500 set_nlink(inode, 2);
494 root = d_alloc_root(inode); 501 root = d_make_root(inode);
495 if (!root) { 502 if (!root)
496 iput(inode);
497 return -ENOMEM; 503 return -ENOMEM;
498 }
499 for (i = 0; !files->name || files->name[0]; i++, files++) { 504 for (i = 0; !files->name || files->name[0]; i++, files++) {
500 if (!files->name) 505 if (!files->name)
501 continue; 506 continue;
@@ -524,6 +529,7 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
524 return 0; 529 return 0;
525out: 530out:
526 d_genocide(root); 531 d_genocide(root);
532 shrink_dcache_parent(root);
527 dput(root); 533 dput(root);
528 return -ENOMEM; 534 return -ENOMEM;
529} 535}
@@ -536,7 +542,7 @@ int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *c
536 spin_lock(&pin_fs_lock); 542 spin_lock(&pin_fs_lock);
537 if (unlikely(!*mount)) { 543 if (unlikely(!*mount)) {
538 spin_unlock(&pin_fs_lock); 544 spin_unlock(&pin_fs_lock);
539 mnt = vfs_kern_mount(type, 0, type->name, NULL); 545 mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, NULL);
540 if (IS_ERR(mnt)) 546 if (IS_ERR(mnt))
541 return PTR_ERR(mnt); 547 return PTR_ERR(mnt);
542 spin_lock(&pin_fs_lock); 548 spin_lock(&pin_fs_lock);
@@ -986,6 +992,7 @@ EXPORT_SYMBOL(simple_dir_operations);
986EXPORT_SYMBOL(simple_empty); 992EXPORT_SYMBOL(simple_empty);
987EXPORT_SYMBOL(simple_fill_super); 993EXPORT_SYMBOL(simple_fill_super);
988EXPORT_SYMBOL(simple_getattr); 994EXPORT_SYMBOL(simple_getattr);
995EXPORT_SYMBOL(simple_open);
989EXPORT_SYMBOL(simple_link); 996EXPORT_SYMBOL(simple_link);
990EXPORT_SYMBOL(simple_lookup); 997EXPORT_SYMBOL(simple_lookup);
991EXPORT_SYMBOL(simple_pin_fs); 998EXPORT_SYMBOL(simple_pin_fs);
diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index f848b52c67b..13ad1539fbf 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -241,7 +241,7 @@ static int decode_nlm4_stat(struct xdr_stream *xdr, __be32 *stat)
241 p = xdr_inline_decode(xdr, 4); 241 p = xdr_inline_decode(xdr, 4);
242 if (unlikely(p == NULL)) 242 if (unlikely(p == NULL))
243 goto out_overflow; 243 goto out_overflow;
244 if (unlikely(*p > nlm4_failed)) 244 if (unlikely(ntohl(*p) > ntohl(nlm4_failed)))
245 goto out_bad_xdr; 245 goto out_bad_xdr;
246 *stat = *p; 246 *stat = *p;
247 return 0; 247 return 0;
@@ -598,7 +598,7 @@ static struct rpc_procinfo nlm4_procedures[] = {
598 PROC(GRANTED_RES, res, norep), 598 PROC(GRANTED_RES, res, norep),
599}; 599};
600 600
601struct rpc_version nlm_version4 = { 601const struct rpc_version nlm_version4 = {
602 .number = 4, 602 .number = 4,
603 .nrprocs = ARRAY_SIZE(nlm4_procedures), 603 .nrprocs = ARRAY_SIZE(nlm4_procedures),
604 .procs = nlm4_procedures, 604 .procs = nlm4_procedures,
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 8d4ea8351e3..ba1dc2eebd1 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -62,7 +62,8 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
62 62
63 host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen, 63 host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen,
64 nlm_init->protocol, nlm_version, 64 nlm_init->protocol, nlm_version,
65 nlm_init->hostname, nlm_init->noresvport); 65 nlm_init->hostname, nlm_init->noresvport,
66 nlm_init->net);
66 if (host == NULL) { 67 if (host == NULL) {
67 lockd_down(); 68 lockd_down();
68 return ERR_PTR(-ENOLCK); 69 return ERR_PTR(-ENOLCK);
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 180ac34feb9..d269ada7670 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -236,7 +236,7 @@ static int decode_nlm_stat(struct xdr_stream *xdr,
236 p = xdr_inline_decode(xdr, 4); 236 p = xdr_inline_decode(xdr, 4);
237 if (unlikely(p == NULL)) 237 if (unlikely(p == NULL))
238 goto out_overflow; 238 goto out_overflow;
239 if (unlikely(*p > nlm_lck_denied_grace_period)) 239 if (unlikely(ntohl(*p) > ntohl(nlm_lck_denied_grace_period)))
240 goto out_enum; 240 goto out_enum;
241 *stat = *p; 241 *stat = *p;
242 return 0; 242 return 0;
@@ -596,19 +596,19 @@ static struct rpc_procinfo nlm_procedures[] = {
596 PROC(GRANTED_RES, res, norep), 596 PROC(GRANTED_RES, res, norep),
597}; 597};
598 598
599static struct rpc_version nlm_version1 = { 599static const struct rpc_version nlm_version1 = {
600 .number = 1, 600 .number = 1,
601 .nrprocs = ARRAY_SIZE(nlm_procedures), 601 .nrprocs = ARRAY_SIZE(nlm_procedures),
602 .procs = nlm_procedures, 602 .procs = nlm_procedures,
603}; 603};
604 604
605static struct rpc_version nlm_version3 = { 605static const struct rpc_version nlm_version3 = {
606 .number = 3, 606 .number = 3,
607 .nrprocs = ARRAY_SIZE(nlm_procedures), 607 .nrprocs = ARRAY_SIZE(nlm_procedures),
608 .procs = nlm_procedures, 608 .procs = nlm_procedures,
609}; 609};
610 610
611static struct rpc_version *nlm_versions[] = { 611static const struct rpc_version *nlm_versions[] = {
612 [1] = &nlm_version1, 612 [1] = &nlm_version1,
613 [3] = &nlm_version3, 613 [3] = &nlm_version3,
614#ifdef CONFIG_LOCKD_V4 614#ifdef CONFIG_LOCKD_V4
@@ -618,7 +618,7 @@ static struct rpc_version *nlm_versions[] = {
618 618
619static struct rpc_stat nlm_rpc_stats; 619static struct rpc_stat nlm_rpc_stats;
620 620
621struct rpc_program nlm_program = { 621const struct rpc_program nlm_program = {
622 .name = "lockd", 622 .name = "lockd",
623 .number = NLM_PROGRAM, 623 .number = NLM_PROGRAM,
624 .nrvers = ARRAY_SIZE(nlm_versions), 624 .nrvers = ARRAY_SIZE(nlm_versions),
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 6f29836ec0c..eb75ca7c2d6 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -17,6 +17,8 @@
17#include <linux/lockd/lockd.h> 17#include <linux/lockd/lockd.h>
18#include <linux/mutex.h> 18#include <linux/mutex.h>
19 19
20#include <linux/sunrpc/svc_xprt.h>
21
20#include <net/ipv6.h> 22#include <net/ipv6.h>
21 23
22#define NLMDBG_FACILITY NLMDBG_HOSTCACHE 24#define NLMDBG_FACILITY NLMDBG_HOSTCACHE
@@ -54,6 +56,7 @@ struct nlm_lookup_host_info {
54 const char *hostname; /* remote's hostname */ 56 const char *hostname; /* remote's hostname */
55 const size_t hostname_len; /* it's length */ 57 const size_t hostname_len; /* it's length */
56 const int noresvport; /* use non-priv port */ 58 const int noresvport; /* use non-priv port */
59 struct net *net; /* network namespace to bind */
57}; 60};
58 61
59/* 62/*
@@ -155,6 +158,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
155 INIT_LIST_HEAD(&host->h_reclaim); 158 INIT_LIST_HEAD(&host->h_reclaim);
156 host->h_nsmhandle = nsm; 159 host->h_nsmhandle = nsm;
157 host->h_addrbuf = nsm->sm_addrbuf; 160 host->h_addrbuf = nsm->sm_addrbuf;
161 host->net = ni->net;
158 162
159out: 163out:
160 return host; 164 return host;
@@ -206,7 +210,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
206 const unsigned short protocol, 210 const unsigned short protocol,
207 const u32 version, 211 const u32 version,
208 const char *hostname, 212 const char *hostname,
209 int noresvport) 213 int noresvport,
214 struct net *net)
210{ 215{
211 struct nlm_lookup_host_info ni = { 216 struct nlm_lookup_host_info ni = {
212 .server = 0, 217 .server = 0,
@@ -217,6 +222,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
217 .hostname = hostname, 222 .hostname = hostname,
218 .hostname_len = strlen(hostname), 223 .hostname_len = strlen(hostname),
219 .noresvport = noresvport, 224 .noresvport = noresvport,
225 .net = net,
220 }; 226 };
221 struct hlist_head *chain; 227 struct hlist_head *chain;
222 struct hlist_node *pos; 228 struct hlist_node *pos;
@@ -231,6 +237,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
231 237
232 chain = &nlm_client_hosts[nlm_hash_address(sap)]; 238 chain = &nlm_client_hosts[nlm_hash_address(sap)];
233 hlist_for_each_entry(host, pos, chain, h_hash) { 239 hlist_for_each_entry(host, pos, chain, h_hash) {
240 if (host->net != net)
241 continue;
234 if (!rpc_cmp_addr(nlm_addr(host), sap)) 242 if (!rpc_cmp_addr(nlm_addr(host), sap))
235 continue; 243 continue;
236 244
@@ -318,6 +326,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
318 struct nsm_handle *nsm = NULL; 326 struct nsm_handle *nsm = NULL;
319 struct sockaddr *src_sap = svc_daddr(rqstp); 327 struct sockaddr *src_sap = svc_daddr(rqstp);
320 size_t src_len = rqstp->rq_daddrlen; 328 size_t src_len = rqstp->rq_daddrlen;
329 struct net *net = rqstp->rq_xprt->xpt_net;
321 struct nlm_lookup_host_info ni = { 330 struct nlm_lookup_host_info ni = {
322 .server = 1, 331 .server = 1,
323 .sap = svc_addr(rqstp), 332 .sap = svc_addr(rqstp),
@@ -326,6 +335,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
326 .version = rqstp->rq_vers, 335 .version = rqstp->rq_vers,
327 .hostname = hostname, 336 .hostname = hostname,
328 .hostname_len = hostname_len, 337 .hostname_len = hostname_len,
338 .net = net,
329 }; 339 };
330 340
331 dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__, 341 dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__,
@@ -339,6 +349,8 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
339 349
340 chain = &nlm_server_hosts[nlm_hash_address(ni.sap)]; 350 chain = &nlm_server_hosts[nlm_hash_address(ni.sap)];
341 hlist_for_each_entry(host, pos, chain, h_hash) { 351 hlist_for_each_entry(host, pos, chain, h_hash) {
352 if (host->net != net)
353 continue;
342 if (!rpc_cmp_addr(nlm_addr(host), ni.sap)) 354 if (!rpc_cmp_addr(nlm_addr(host), ni.sap))
343 continue; 355 continue;
344 356
@@ -431,7 +443,7 @@ nlm_bind_host(struct nlm_host *host)
431 .to_retries = 5U, 443 .to_retries = 5U,
432 }; 444 };
433 struct rpc_create_args args = { 445 struct rpc_create_args args = {
434 .net = &init_net, 446 .net = host->net,
435 .protocol = host->h_proto, 447 .protocol = host->h_proto,
436 .address = nlm_addr(host), 448 .address = nlm_addr(host),
437 .addrsize = host->h_addrlen, 449 .addrsize = host->h_addrlen,
@@ -553,12 +565,8 @@ void nlm_host_rebooted(const struct nlm_reboot *info)
553 nsm_release(nsm); 565 nsm_release(nsm);
554} 566}
555 567
556/*
557 * Shut down the hosts module.
558 * Note that this routine is called only at server shutdown time.
559 */
560void 568void
561nlm_shutdown_hosts(void) 569nlm_shutdown_hosts_net(struct net *net)
562{ 570{
563 struct hlist_head *chain; 571 struct hlist_head *chain;
564 struct hlist_node *pos; 572 struct hlist_node *pos;
@@ -570,6 +578,8 @@ nlm_shutdown_hosts(void)
570 /* First, make all hosts eligible for gc */ 578 /* First, make all hosts eligible for gc */
571 dprintk("lockd: nuking all hosts...\n"); 579 dprintk("lockd: nuking all hosts...\n");
572 for_each_host(host, pos, chain, nlm_server_hosts) { 580 for_each_host(host, pos, chain, nlm_server_hosts) {
581 if (net && host->net != net)
582 continue;
573 host->h_expires = jiffies - 1; 583 host->h_expires = jiffies - 1;
574 if (host->h_rpcclnt) { 584 if (host->h_rpcclnt) {
575 rpc_shutdown_client(host->h_rpcclnt); 585 rpc_shutdown_client(host->h_rpcclnt);
@@ -580,15 +590,29 @@ nlm_shutdown_hosts(void)
580 /* Then, perform a garbage collection pass */ 590 /* Then, perform a garbage collection pass */
581 nlm_gc_hosts(); 591 nlm_gc_hosts();
582 mutex_unlock(&nlm_host_mutex); 592 mutex_unlock(&nlm_host_mutex);
593}
594
595/*
596 * Shut down the hosts module.
597 * Note that this routine is called only at server shutdown time.
598 */
599void
600nlm_shutdown_hosts(void)
601{
602 struct hlist_head *chain;
603 struct hlist_node *pos;
604 struct nlm_host *host;
605
606 nlm_shutdown_hosts_net(NULL);
583 607
584 /* complain if any hosts are left */ 608 /* complain if any hosts are left */
585 if (nrhosts != 0) { 609 if (nrhosts != 0) {
586 printk(KERN_WARNING "lockd: couldn't shutdown host module!\n"); 610 printk(KERN_WARNING "lockd: couldn't shutdown host module!\n");
587 dprintk("lockd: %lu hosts left:\n", nrhosts); 611 dprintk("lockd: %lu hosts left:\n", nrhosts);
588 for_each_host(host, pos, chain, nlm_server_hosts) { 612 for_each_host(host, pos, chain, nlm_server_hosts) {
589 dprintk(" %s (cnt %d use %d exp %ld)\n", 613 dprintk(" %s (cnt %d use %d exp %ld net %p)\n",
590 host->h_name, atomic_read(&host->h_count), 614 host->h_name, atomic_read(&host->h_count),
591 host->h_inuse, host->h_expires); 615 host->h_inuse, host->h_expires, host->net);
592 } 616 }
593 } 617 }
594} 618}
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 65ba36b80a9..7ef14b3c5be 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -47,7 +47,7 @@ struct nsm_res {
47 u32 state; 47 u32 state;
48}; 48};
49 49
50static struct rpc_program nsm_program; 50static const struct rpc_program nsm_program;
51static LIST_HEAD(nsm_handles); 51static LIST_HEAD(nsm_handles);
52static DEFINE_SPINLOCK(nsm_lock); 52static DEFINE_SPINLOCK(nsm_lock);
53 53
@@ -62,14 +62,14 @@ static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm)
62 return (struct sockaddr *)&nsm->sm_addr; 62 return (struct sockaddr *)&nsm->sm_addr;
63} 63}
64 64
65static struct rpc_clnt *nsm_create(void) 65static struct rpc_clnt *nsm_create(struct net *net)
66{ 66{
67 struct sockaddr_in sin = { 67 struct sockaddr_in sin = {
68 .sin_family = AF_INET, 68 .sin_family = AF_INET,
69 .sin_addr.s_addr = htonl(INADDR_LOOPBACK), 69 .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
70 }; 70 };
71 struct rpc_create_args args = { 71 struct rpc_create_args args = {
72 .net = &init_net, 72 .net = net,
73 .protocol = XPRT_TRANSPORT_UDP, 73 .protocol = XPRT_TRANSPORT_UDP,
74 .address = (struct sockaddr *)&sin, 74 .address = (struct sockaddr *)&sin,
75 .addrsize = sizeof(sin), 75 .addrsize = sizeof(sin),
@@ -83,7 +83,8 @@ static struct rpc_clnt *nsm_create(void)
83 return rpc_create(&args); 83 return rpc_create(&args);
84} 84}
85 85
86static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res) 86static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
87 struct net *net)
87{ 88{
88 struct rpc_clnt *clnt; 89 struct rpc_clnt *clnt;
89 int status; 90 int status;
@@ -99,7 +100,7 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
99 .rpc_resp = res, 100 .rpc_resp = res,
100 }; 101 };
101 102
102 clnt = nsm_create(); 103 clnt = nsm_create(net);
103 if (IS_ERR(clnt)) { 104 if (IS_ERR(clnt)) {
104 status = PTR_ERR(clnt); 105 status = PTR_ERR(clnt);
105 dprintk("lockd: failed to create NSM upcall transport, " 106 dprintk("lockd: failed to create NSM upcall transport, "
@@ -149,7 +150,7 @@ int nsm_monitor(const struct nlm_host *host)
149 */ 150 */
150 nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf; 151 nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf;
151 152
152 status = nsm_mon_unmon(nsm, NSMPROC_MON, &res); 153 status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, host->net);
153 if (unlikely(res.status != 0)) 154 if (unlikely(res.status != 0))
154 status = -EIO; 155 status = -EIO;
155 if (unlikely(status < 0)) { 156 if (unlikely(status < 0)) {
@@ -183,7 +184,7 @@ void nsm_unmonitor(const struct nlm_host *host)
183 && nsm->sm_monitored && !nsm->sm_sticky) { 184 && nsm->sm_monitored && !nsm->sm_sticky) {
184 dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name); 185 dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name);
185 186
186 status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res); 187 status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, host->net);
187 if (res.status != 0) 188 if (res.status != 0)
188 status = -EIO; 189 status = -EIO;
189 if (status < 0) 190 if (status < 0)
@@ -534,19 +535,19 @@ static struct rpc_procinfo nsm_procedures[] = {
534 }, 535 },
535}; 536};
536 537
537static struct rpc_version nsm_version1 = { 538static const struct rpc_version nsm_version1 = {
538 .number = 1, 539 .number = 1,
539 .nrprocs = ARRAY_SIZE(nsm_procedures), 540 .nrprocs = ARRAY_SIZE(nsm_procedures),
540 .procs = nsm_procedures 541 .procs = nsm_procedures
541}; 542};
542 543
543static struct rpc_version * nsm_version[] = { 544static const struct rpc_version *nsm_version[] = {
544 [1] = &nsm_version1, 545 [1] = &nsm_version1,
545}; 546};
546 547
547static struct rpc_stat nsm_stats; 548static struct rpc_stat nsm_stats;
548 549
549static struct rpc_program nsm_program = { 550static const struct rpc_program nsm_program = {
550 .name = "statd", 551 .name = "statd",
551 .number = NSM_PROGRAM, 552 .number = NSM_PROGRAM,
552 .nrvers = ARRAY_SIZE(nsm_version), 553 .nrvers = ARRAY_SIZE(nsm_version),
diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h
new file mode 100644
index 00000000000..ce227e0fbc5
--- /dev/null
+++ b/fs/lockd/netns.h
@@ -0,0 +1,12 @@
1#ifndef __LOCKD_NETNS_H__
2#define __LOCKD_NETNS_H__
3
4#include <net/netns/generic.h>
5
6struct lockd_net {
7 unsigned int nlmsvc_users;
8};
9
10extern int lockd_net_id;
11
12#endif
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index c061b9aa7dd..f49b9afc443 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -35,6 +35,8 @@
35#include <linux/lockd/lockd.h> 35#include <linux/lockd/lockd.h>
36#include <linux/nfs.h> 36#include <linux/nfs.h>
37 37
38#include "netns.h"
39
38#define NLMDBG_FACILITY NLMDBG_SVC 40#define NLMDBG_FACILITY NLMDBG_SVC
39#define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE) 41#define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE)
40#define ALLOWED_SIGS (sigmask(SIGKILL)) 42#define ALLOWED_SIGS (sigmask(SIGKILL))
@@ -50,6 +52,8 @@ static struct task_struct *nlmsvc_task;
50static struct svc_rqst *nlmsvc_rqst; 52static struct svc_rqst *nlmsvc_rqst;
51unsigned long nlmsvc_timeout; 53unsigned long nlmsvc_timeout;
52 54
55int lockd_net_id;
56
53/* 57/*
54 * These can be set at insmod time (useful for NFS as root filesystem), 58 * These can be set at insmod time (useful for NFS as root filesystem),
55 * and also changed through the sysctl interface. -- Jamie Lokier, Aug 2003 59 * and also changed through the sysctl interface. -- Jamie Lokier, Aug 2003
@@ -189,27 +193,29 @@ lockd(void *vrqstp)
189} 193}
190 194
191static int create_lockd_listener(struct svc_serv *serv, const char *name, 195static int create_lockd_listener(struct svc_serv *serv, const char *name,
192 const int family, const unsigned short port) 196 struct net *net, const int family,
197 const unsigned short port)
193{ 198{
194 struct svc_xprt *xprt; 199 struct svc_xprt *xprt;
195 200
196 xprt = svc_find_xprt(serv, name, family, 0); 201 xprt = svc_find_xprt(serv, name, net, family, 0);
197 if (xprt == NULL) 202 if (xprt == NULL)
198 return svc_create_xprt(serv, name, &init_net, family, port, 203 return svc_create_xprt(serv, name, net, family, port,
199 SVC_SOCK_DEFAULTS); 204 SVC_SOCK_DEFAULTS);
200 svc_xprt_put(xprt); 205 svc_xprt_put(xprt);
201 return 0; 206 return 0;
202} 207}
203 208
204static int create_lockd_family(struct svc_serv *serv, const int family) 209static int create_lockd_family(struct svc_serv *serv, struct net *net,
210 const int family)
205{ 211{
206 int err; 212 int err;
207 213
208 err = create_lockd_listener(serv, "udp", family, nlm_udpport); 214 err = create_lockd_listener(serv, "udp", net, family, nlm_udpport);
209 if (err < 0) 215 if (err < 0)
210 return err; 216 return err;
211 217
212 return create_lockd_listener(serv, "tcp", family, nlm_tcpport); 218 return create_lockd_listener(serv, "tcp", net, family, nlm_tcpport);
213} 219}
214 220
215/* 221/*
@@ -222,16 +228,16 @@ static int create_lockd_family(struct svc_serv *serv, const int family)
222 * Returns zero if all listeners are available; otherwise a 228 * Returns zero if all listeners are available; otherwise a
223 * negative errno value is returned. 229 * negative errno value is returned.
224 */ 230 */
225static int make_socks(struct svc_serv *serv) 231static int make_socks(struct svc_serv *serv, struct net *net)
226{ 232{
227 static int warned; 233 static int warned;
228 int err; 234 int err;
229 235
230 err = create_lockd_family(serv, PF_INET); 236 err = create_lockd_family(serv, net, PF_INET);
231 if (err < 0) 237 if (err < 0)
232 goto out_err; 238 goto out_err;
233 239
234 err = create_lockd_family(serv, PF_INET6); 240 err = create_lockd_family(serv, net, PF_INET6);
235 if (err < 0 && err != -EAFNOSUPPORT) 241 if (err < 0 && err != -EAFNOSUPPORT)
236 goto out_err; 242 goto out_err;
237 243
@@ -245,6 +251,47 @@ out_err:
245 return err; 251 return err;
246} 252}
247 253
254static int lockd_up_net(struct net *net)
255{
256 struct lockd_net *ln = net_generic(net, lockd_net_id);
257 struct svc_serv *serv = nlmsvc_rqst->rq_server;
258 int error;
259
260 if (ln->nlmsvc_users)
261 return 0;
262
263 error = svc_rpcb_setup(serv, net);
264 if (error)
265 goto err_rpcb;
266
267 error = make_socks(serv, net);
268 if (error < 0)
269 goto err_socks;
270 return 0;
271
272err_socks:
273 svc_rpcb_cleanup(serv, net);
274err_rpcb:
275 return error;
276}
277
278static void lockd_down_net(struct net *net)
279{
280 struct lockd_net *ln = net_generic(net, lockd_net_id);
281 struct svc_serv *serv = nlmsvc_rqst->rq_server;
282
283 if (ln->nlmsvc_users) {
284 if (--ln->nlmsvc_users == 0) {
285 nlm_shutdown_hosts_net(net);
286 svc_shutdown_net(serv, net);
287 }
288 } else {
289 printk(KERN_ERR "lockd_down_net: no users! task=%p, net=%p\n",
290 nlmsvc_task, net);
291 BUG();
292 }
293}
294
248/* 295/*
249 * Bring up the lockd process if it's not already up. 296 * Bring up the lockd process if it's not already up.
250 */ 297 */
@@ -252,13 +299,16 @@ int lockd_up(void)
252{ 299{
253 struct svc_serv *serv; 300 struct svc_serv *serv;
254 int error = 0; 301 int error = 0;
302 struct net *net = current->nsproxy->net_ns;
255 303
256 mutex_lock(&nlmsvc_mutex); 304 mutex_lock(&nlmsvc_mutex);
257 /* 305 /*
258 * Check whether we're already up and running. 306 * Check whether we're already up and running.
259 */ 307 */
260 if (nlmsvc_rqst) 308 if (nlmsvc_rqst) {
309 error = lockd_up_net(net);
261 goto out; 310 goto out;
311 }
262 312
263 /* 313 /*
264 * Sanity check: if there's no pid, 314 * Sanity check: if there's no pid,
@@ -275,7 +325,7 @@ int lockd_up(void)
275 goto out; 325 goto out;
276 } 326 }
277 327
278 error = make_socks(serv); 328 error = make_socks(serv, net);
279 if (error < 0) 329 if (error < 0)
280 goto destroy_and_out; 330 goto destroy_and_out;
281 331
@@ -313,8 +363,12 @@ int lockd_up(void)
313destroy_and_out: 363destroy_and_out:
314 svc_destroy(serv); 364 svc_destroy(serv);
315out: 365out:
316 if (!error) 366 if (!error) {
367 struct lockd_net *ln = net_generic(net, lockd_net_id);
368
369 ln->nlmsvc_users++;
317 nlmsvc_users++; 370 nlmsvc_users++;
371 }
318 mutex_unlock(&nlmsvc_mutex); 372 mutex_unlock(&nlmsvc_mutex);
319 return error; 373 return error;
320} 374}
@@ -328,8 +382,10 @@ lockd_down(void)
328{ 382{
329 mutex_lock(&nlmsvc_mutex); 383 mutex_lock(&nlmsvc_mutex);
330 if (nlmsvc_users) { 384 if (nlmsvc_users) {
331 if (--nlmsvc_users) 385 if (--nlmsvc_users) {
386 lockd_down_net(current->nsproxy->net_ns);
332 goto out; 387 goto out;
388 }
333 } else { 389 } else {
334 printk(KERN_ERR "lockd_down: no users! task=%p\n", 390 printk(KERN_ERR "lockd_down: no users! task=%p\n",
335 nlmsvc_task); 391 nlmsvc_task);
@@ -440,7 +496,7 @@ static int param_set_##name(const char *val, struct kernel_param *kp) \
440 __typeof__(type) num = which_strtol(val, &endp, 0); \ 496 __typeof__(type) num = which_strtol(val, &endp, 0); \
441 if (endp == val || *endp || num < (min) || num > (max)) \ 497 if (endp == val || *endp || num < (min) || num > (max)) \
442 return -EINVAL; \ 498 return -EINVAL; \
443 *((int *) kp->arg) = num; \ 499 *((type *) kp->arg) = num; \
444 return 0; \ 500 return 0; \
445} 501}
446 502
@@ -497,24 +553,55 @@ module_param_call(nlm_tcpport, param_set_port, param_get_int,
497module_param(nsm_use_hostnames, bool, 0644); 553module_param(nsm_use_hostnames, bool, 0644);
498module_param(nlm_max_connections, uint, 0644); 554module_param(nlm_max_connections, uint, 0644);
499 555
556static int lockd_init_net(struct net *net)
557{
558 return 0;
559}
560
561static void lockd_exit_net(struct net *net)
562{
563}
564
565static struct pernet_operations lockd_net_ops = {
566 .init = lockd_init_net,
567 .exit = lockd_exit_net,
568 .id = &lockd_net_id,
569 .size = sizeof(struct lockd_net),
570};
571
572
500/* 573/*
501 * Initialising and terminating the module. 574 * Initialising and terminating the module.
502 */ 575 */
503 576
504static int __init init_nlm(void) 577static int __init init_nlm(void)
505{ 578{
579 int err;
580
506#ifdef CONFIG_SYSCTL 581#ifdef CONFIG_SYSCTL
582 err = -ENOMEM;
507 nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root); 583 nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root);
508 return nlm_sysctl_table ? 0 : -ENOMEM; 584 if (nlm_sysctl_table == NULL)
509#else 585 goto err_sysctl;
586#endif
587 err = register_pernet_subsys(&lockd_net_ops);
588 if (err)
589 goto err_pernet;
510 return 0; 590 return 0;
591
592err_pernet:
593#ifdef CONFIG_SYSCTL
594 unregister_sysctl_table(nlm_sysctl_table);
511#endif 595#endif
596err_sysctl:
597 return err;
512} 598}
513 599
514static void __exit exit_nlm(void) 600static void __exit exit_nlm(void)
515{ 601{
516 /* FIXME: delete all NLM clients */ 602 /* FIXME: delete all NLM clients */
517 nlm_shutdown_hosts(); 603 nlm_shutdown_hosts();
604 unregister_pernet_subsys(&lockd_net_ops);
518#ifdef CONFIG_SYSCTL 605#ifdef CONFIG_SYSCTL
519 unregister_sysctl_table(nlm_sysctl_table); 606 unregister_sysctl_table(nlm_sysctl_table);
520#endif 607#endif
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index f0179c3745d..e46353f41a4 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -46,7 +46,6 @@ static void nlmsvc_remove_block(struct nlm_block *block);
46static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock); 46static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
47static void nlmsvc_freegrantargs(struct nlm_rqst *call); 47static void nlmsvc_freegrantargs(struct nlm_rqst *call);
48static const struct rpc_call_ops nlmsvc_grant_ops; 48static const struct rpc_call_ops nlmsvc_grant_ops;
49static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie);
50 49
51/* 50/*
52 * The list of blocked locks to retry 51 * The list of blocked locks to retry
@@ -54,6 +53,35 @@ static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie);
54static LIST_HEAD(nlm_blocked); 53static LIST_HEAD(nlm_blocked);
55static DEFINE_SPINLOCK(nlm_blocked_lock); 54static DEFINE_SPINLOCK(nlm_blocked_lock);
56 55
56#ifdef LOCKD_DEBUG
57static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
58{
59 /*
60 * We can get away with a static buffer because we're only
61 * called with BKL held.
62 */
63 static char buf[2*NLM_MAXCOOKIELEN+1];
64 unsigned int i, len = sizeof(buf);
65 char *p = buf;
66
67 len--; /* allow for trailing \0 */
68 if (len < 3)
69 return "???";
70 for (i = 0 ; i < cookie->len ; i++) {
71 if (len < 2) {
72 strcpy(p-3, "...");
73 break;
74 }
75 sprintf(p, "%02x", cookie->data[i]);
76 p += 2;
77 len -= 2;
78 }
79 *p = '\0';
80
81 return buf;
82}
83#endif
84
57/* 85/*
58 * Insert a blocked lock into the global list 86 * Insert a blocked lock into the global list
59 */ 87 */
@@ -935,32 +963,3 @@ nlmsvc_retry_blocked(void)
935 963
936 return timeout; 964 return timeout;
937} 965}
938
939#ifdef RPC_DEBUG
940static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
941{
942 /*
943 * We can get away with a static buffer because we're only
944 * called with BKL held.
945 */
946 static char buf[2*NLM_MAXCOOKIELEN+1];
947 unsigned int i, len = sizeof(buf);
948 char *p = buf;
949
950 len--; /* allow for trailing \0 */
951 if (len < 3)
952 return "???";
953 for (i = 0 ; i < cookie->len ; i++) {
954 if (len < 2) {
955 strcpy(p-3, "...");
956 break;
957 }
958 sprintf(p, "%02x", cookie->data[i]);
959 p += 2;
960 len -= 2;
961 }
962 *p = '\0';
963
964 return buf;
965}
966#endif
diff --git a/fs/locks.c b/fs/locks.c
index 637694bf3a0..0d68f1f8179 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -510,12 +510,13 @@ static void __locks_delete_block(struct file_lock *waiter)
510 510
511/* 511/*
512 */ 512 */
513static void locks_delete_block(struct file_lock *waiter) 513void locks_delete_block(struct file_lock *waiter)
514{ 514{
515 lock_flocks(); 515 lock_flocks();
516 __locks_delete_block(waiter); 516 __locks_delete_block(waiter);
517 unlock_flocks(); 517 unlock_flocks();
518} 518}
519EXPORT_SYMBOL(locks_delete_block);
519 520
520/* Insert waiter into blocker's block list. 521/* Insert waiter into blocker's block list.
521 * We use a circular list so that processes can be easily woken up in 522 * We use a circular list so that processes can be easily woken up in
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 3de7a32cadb..bea5d1b9954 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -177,17 +177,17 @@ static struct page *logfs_get_dd_page(struct inode *dir, struct dentry *dentry)
177 (filler_t *)logfs_readpage, NULL); 177 (filler_t *)logfs_readpage, NULL);
178 if (IS_ERR(page)) 178 if (IS_ERR(page))
179 return page; 179 return page;
180 dd = kmap_atomic(page, KM_USER0); 180 dd = kmap_atomic(page);
181 BUG_ON(dd->namelen == 0); 181 BUG_ON(dd->namelen == 0);
182 182
183 if (name->len != be16_to_cpu(dd->namelen) || 183 if (name->len != be16_to_cpu(dd->namelen) ||
184 memcmp(name->name, dd->name, name->len)) { 184 memcmp(name->name, dd->name, name->len)) {
185 kunmap_atomic(dd, KM_USER0); 185 kunmap_atomic(dd);
186 page_cache_release(page); 186 page_cache_release(page);
187 continue; 187 continue;
188 } 188 }
189 189
190 kunmap_atomic(dd, KM_USER0); 190 kunmap_atomic(dd);
191 return page; 191 return page;
192 } 192 }
193 return NULL; 193 return NULL;
@@ -365,9 +365,9 @@ static struct dentry *logfs_lookup(struct inode *dir, struct dentry *dentry,
365 return NULL; 365 return NULL;
366 } 366 }
367 index = page->index; 367 index = page->index;
368 dd = kmap_atomic(page, KM_USER0); 368 dd = kmap_atomic(page);
369 ino = be64_to_cpu(dd->ino); 369 ino = be64_to_cpu(dd->ino);
370 kunmap_atomic(dd, KM_USER0); 370 kunmap_atomic(dd);
371 page_cache_release(page); 371 page_cache_release(page);
372 372
373 inode = logfs_iget(dir->i_sb, ino); 373 inode = logfs_iget(dir->i_sb, ino);
@@ -402,12 +402,12 @@ static int logfs_write_dir(struct inode *dir, struct dentry *dentry,
402 if (!page) 402 if (!page)
403 return -ENOMEM; 403 return -ENOMEM;
404 404
405 dd = kmap_atomic(page, KM_USER0); 405 dd = kmap_atomic(page);
406 memset(dd, 0, sizeof(*dd)); 406 memset(dd, 0, sizeof(*dd));
407 dd->ino = cpu_to_be64(inode->i_ino); 407 dd->ino = cpu_to_be64(inode->i_ino);
408 dd->type = logfs_type(inode); 408 dd->type = logfs_type(inode);
409 logfs_set_name(dd, &dentry->d_name); 409 logfs_set_name(dd, &dentry->d_name);
410 kunmap_atomic(dd, KM_USER0); 410 kunmap_atomic(dd);
411 411
412 err = logfs_write_buf(dir, page, WF_LOCK); 412 err = logfs_write_buf(dir, page, WF_LOCK);
413 unlock_page(page); 413 unlock_page(page);
@@ -558,9 +558,6 @@ static int logfs_link(struct dentry *old_dentry, struct inode *dir,
558{ 558{
559 struct inode *inode = old_dentry->d_inode; 559 struct inode *inode = old_dentry->d_inode;
560 560
561 if (inode->i_nlink >= LOGFS_LINK_MAX)
562 return -EMLINK;
563
564 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 561 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
565 ihold(inode); 562 ihold(inode);
566 inc_nlink(inode); 563 inc_nlink(inode);
@@ -579,9 +576,9 @@ static int logfs_get_dd(struct inode *dir, struct dentry *dentry,
579 if (IS_ERR(page)) 576 if (IS_ERR(page))
580 return PTR_ERR(page); 577 return PTR_ERR(page);
581 *pos = page->index; 578 *pos = page->index;
582 map = kmap_atomic(page, KM_USER0); 579 map = kmap_atomic(page);
583 memcpy(dd, map, sizeof(*dd)); 580 memcpy(dd, map, sizeof(*dd));
584 kunmap_atomic(map, KM_USER0); 581 kunmap_atomic(map);
585 page_cache_release(page); 582 page_cache_release(page);
586 return 0; 583 return 0;
587} 584}
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 4153e65b014..e3ab5e5a904 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -517,9 +517,9 @@ static int indirect_write_alias(struct super_block *sb,
517 517
518 ino = page->mapping->host->i_ino; 518 ino = page->mapping->host->i_ino;
519 logfs_unpack_index(page->index, &bix, &level); 519 logfs_unpack_index(page->index, &bix, &level);
520 child = kmap_atomic(page, KM_USER0); 520 child = kmap_atomic(page);
521 val = child[pos]; 521 val = child[pos];
522 kunmap_atomic(child, KM_USER0); 522 kunmap_atomic(child);
523 err = write_one_alias(sb, ino, bix, level, pos, val); 523 err = write_one_alias(sb, ino, bix, level, pos, val);
524 if (err) 524 if (err)
525 return err; 525 return err;
@@ -673,9 +673,9 @@ static void alloc_indirect_block(struct inode *inode, struct page *page,
673 alloc_data_block(inode, page); 673 alloc_data_block(inode, page);
674 674
675 block = logfs_block(page); 675 block = logfs_block(page);
676 array = kmap_atomic(page, KM_USER0); 676 array = kmap_atomic(page);
677 initialize_block_counters(page, block, array, page_is_empty); 677 initialize_block_counters(page, block, array, page_is_empty);
678 kunmap_atomic(array, KM_USER0); 678 kunmap_atomic(array);
679} 679}
680 680
681static void block_set_pointer(struct page *page, int index, u64 ptr) 681static void block_set_pointer(struct page *page, int index, u64 ptr)
@@ -685,10 +685,10 @@ static void block_set_pointer(struct page *page, int index, u64 ptr)
685 u64 oldptr; 685 u64 oldptr;
686 686
687 BUG_ON(!block); 687 BUG_ON(!block);
688 array = kmap_atomic(page, KM_USER0); 688 array = kmap_atomic(page);
689 oldptr = be64_to_cpu(array[index]); 689 oldptr = be64_to_cpu(array[index]);
690 array[index] = cpu_to_be64(ptr); 690 array[index] = cpu_to_be64(ptr);
691 kunmap_atomic(array, KM_USER0); 691 kunmap_atomic(array);
692 SetPageUptodate(page); 692 SetPageUptodate(page);
693 693
694 block->full += !!(ptr & LOGFS_FULLY_POPULATED) 694 block->full += !!(ptr & LOGFS_FULLY_POPULATED)
@@ -701,9 +701,9 @@ static u64 block_get_pointer(struct page *page, int index)
701 __be64 *block; 701 __be64 *block;
702 u64 ptr; 702 u64 ptr;
703 703
704 block = kmap_atomic(page, KM_USER0); 704 block = kmap_atomic(page);
705 ptr = be64_to_cpu(block[index]); 705 ptr = be64_to_cpu(block[index]);
706 kunmap_atomic(block, KM_USER0); 706 kunmap_atomic(block);
707 return ptr; 707 return ptr;
708} 708}
709 709
@@ -850,7 +850,7 @@ static u64 seek_holedata_loop(struct inode *inode, u64 bix, int data)
850 } 850 }
851 851
852 slot = get_bits(bix, SUBLEVEL(level)); 852 slot = get_bits(bix, SUBLEVEL(level));
853 rblock = kmap_atomic(page, KM_USER0); 853 rblock = kmap_atomic(page);
854 while (slot < LOGFS_BLOCK_FACTOR) { 854 while (slot < LOGFS_BLOCK_FACTOR) {
855 if (data && (rblock[slot] != 0)) 855 if (data && (rblock[slot] != 0))
856 break; 856 break;
@@ -861,12 +861,12 @@ static u64 seek_holedata_loop(struct inode *inode, u64 bix, int data)
861 bix &= ~(increment - 1); 861 bix &= ~(increment - 1);
862 } 862 }
863 if (slot >= LOGFS_BLOCK_FACTOR) { 863 if (slot >= LOGFS_BLOCK_FACTOR) {
864 kunmap_atomic(rblock, KM_USER0); 864 kunmap_atomic(rblock);
865 logfs_put_read_page(page); 865 logfs_put_read_page(page);
866 return bix; 866 return bix;
867 } 867 }
868 bofs = be64_to_cpu(rblock[slot]); 868 bofs = be64_to_cpu(rblock[slot]);
869 kunmap_atomic(rblock, KM_USER0); 869 kunmap_atomic(rblock);
870 logfs_put_read_page(page); 870 logfs_put_read_page(page);
871 if (!bofs) { 871 if (!bofs) {
872 BUG_ON(data); 872 BUG_ON(data);
@@ -1961,9 +1961,9 @@ int logfs_read_inode(struct inode *inode)
1961 if (IS_ERR(page)) 1961 if (IS_ERR(page))
1962 return PTR_ERR(page); 1962 return PTR_ERR(page);
1963 1963
1964 di = kmap_atomic(page, KM_USER0); 1964 di = kmap_atomic(page);
1965 logfs_disk_to_inode(di, inode); 1965 logfs_disk_to_inode(di, inode);
1966 kunmap_atomic(di, KM_USER0); 1966 kunmap_atomic(di);
1967 move_page_to_inode(inode, page); 1967 move_page_to_inode(inode, page);
1968 page_cache_release(page); 1968 page_cache_release(page);
1969 return 0; 1969 return 0;
@@ -1982,9 +1982,9 @@ static struct page *inode_to_page(struct inode *inode)
1982 if (!page) 1982 if (!page)
1983 return NULL; 1983 return NULL;
1984 1984
1985 di = kmap_atomic(page, KM_USER0); 1985 di = kmap_atomic(page);
1986 logfs_inode_to_disk(inode, di); 1986 logfs_inode_to_disk(inode, di);
1987 kunmap_atomic(di, KM_USER0); 1987 kunmap_atomic(di);
1988 move_inode_to_page(page, inode); 1988 move_inode_to_page(page, inode);
1989 return page; 1989 return page;
1990} 1990}
@@ -2041,13 +2041,13 @@ static void logfs_mod_segment_entry(struct super_block *sb, u32 segno,
2041 2041
2042 if (write) 2042 if (write)
2043 alloc_indirect_block(inode, page, 0); 2043 alloc_indirect_block(inode, page, 0);
2044 se = kmap_atomic(page, KM_USER0); 2044 se = kmap_atomic(page);
2045 change_se(se + child_no, arg); 2045 change_se(se + child_no, arg);
2046 if (write) { 2046 if (write) {
2047 logfs_set_alias(sb, logfs_block(page), child_no); 2047 logfs_set_alias(sb, logfs_block(page), child_no);
2048 BUG_ON((int)be32_to_cpu(se[child_no].valid) > super->s_segsize); 2048 BUG_ON((int)be32_to_cpu(se[child_no].valid) > super->s_segsize);
2049 } 2049 }
2050 kunmap_atomic(se, KM_USER0); 2050 kunmap_atomic(se);
2051 2051
2052 logfs_put_write_page(page); 2052 logfs_put_write_page(page);
2053} 2053}
@@ -2245,10 +2245,10 @@ int logfs_inode_write(struct inode *inode, const void *buf, size_t count,
2245 if (!page) 2245 if (!page)
2246 return -ENOMEM; 2246 return -ENOMEM;
2247 2247
2248 pagebuf = kmap_atomic(page, KM_USER0); 2248 pagebuf = kmap_atomic(page);
2249 memcpy(pagebuf, buf, count); 2249 memcpy(pagebuf, buf, count);
2250 flush_dcache_page(page); 2250 flush_dcache_page(page);
2251 kunmap_atomic(pagebuf, KM_USER0); 2251 kunmap_atomic(pagebuf);
2252 2252
2253 if (i_size_read(inode) < pos + LOGFS_BLOCKSIZE) 2253 if (i_size_read(inode) < pos + LOGFS_BLOCKSIZE)
2254 i_size_write(inode, pos + LOGFS_BLOCKSIZE); 2254 i_size_write(inode, pos + LOGFS_BLOCKSIZE);
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index ab798ed1cc8..e28d090c98d 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -543,9 +543,9 @@ void move_page_to_btree(struct page *page)
543 BUG_ON(!item); /* mempool empty */ 543 BUG_ON(!item); /* mempool empty */
544 memset(item, 0, sizeof(*item)); 544 memset(item, 0, sizeof(*item));
545 545
546 child = kmap_atomic(page, KM_USER0); 546 child = kmap_atomic(page);
547 item->val = child[pos]; 547 item->val = child[pos];
548 kunmap_atomic(child, KM_USER0); 548 kunmap_atomic(child);
549 item->child_no = pos; 549 item->child_no = pos;
550 list_add(&item->list, &block->item_list); 550 list_add(&item->list, &block->item_list);
551 } 551 }
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index c9ee7f5d1ca..97bca623d89 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -315,11 +315,9 @@ static int logfs_get_sb_final(struct super_block *sb)
315 if (IS_ERR(rootdir)) 315 if (IS_ERR(rootdir))
316 goto fail; 316 goto fail;
317 317
318 sb->s_root = d_alloc_root(rootdir); 318 sb->s_root = d_make_root(rootdir);
319 if (!sb->s_root) { 319 if (!sb->s_root)
320 iput(rootdir);
321 goto fail; 320 goto fail;
322 }
323 321
324 /* at that point we know that ->put_super() will be called */ 322 /* at that point we know that ->put_super() will be called */
325 super->s_erase_page = alloc_pages(GFP_KERNEL, 0); 323 super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
@@ -542,6 +540,7 @@ static struct dentry *logfs_get_sb_device(struct logfs_super *super,
542 * the filesystem incompatible with 32bit systems. 540 * the filesystem incompatible with 32bit systems.
543 */ 541 */
544 sb->s_maxbytes = (1ull << 43) - 1; 542 sb->s_maxbytes = (1ull << 43) - 1;
543 sb->s_max_links = LOGFS_LINK_MAX;
545 sb->s_op = &logfs_super_operations; 544 sb->s_op = &logfs_super_operations;
546 sb->s_flags = flags | MS_NOATIME; 545 sb->s_flags = flags | MS_NOATIME;
547 546
@@ -627,7 +626,10 @@ static int __init logfs_init(void)
627 if (ret) 626 if (ret)
628 goto out2; 627 goto out2;
629 628
630 return register_filesystem(&logfs_fs_type); 629 ret = register_filesystem(&logfs_fs_type);
630 if (!ret)
631 return 0;
632 logfs_destroy_inode_cache();
631out2: 633out2:
632 logfs_compr_exit(); 634 logfs_compr_exit();
633out1: 635out1:
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 085a9262c69..685b2d981b8 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -335,7 +335,7 @@ int minix_make_empty(struct inode *inode, struct inode *dir)
335 goto fail; 335 goto fail;
336 } 336 }
337 337
338 kaddr = kmap_atomic(page, KM_USER0); 338 kaddr = kmap_atomic(page);
339 memset(kaddr, 0, PAGE_CACHE_SIZE); 339 memset(kaddr, 0, PAGE_CACHE_SIZE);
340 340
341 if (sbi->s_version == MINIX_V3) { 341 if (sbi->s_version == MINIX_V3) {
@@ -355,7 +355,7 @@ int minix_make_empty(struct inode *inode, struct inode *dir)
355 de->inode = dir->i_ino; 355 de->inode = dir->i_ino;
356 strcpy(de->name, ".."); 356 strcpy(de->name, "..");
357 } 357 }
358 kunmap_atomic(kaddr, KM_USER0); 358 kunmap_atomic(kaddr);
359 359
360 err = dir_commit_chunk(page, 0, 2 * sbi->s_dirsize); 360 err = dir_commit_chunk(page, 0, 2 * sbi->s_dirsize);
361fail: 361fail:
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index fa8b612b8ce..fcb05d2c6b5 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -190,24 +190,24 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
190 sbi->s_version = MINIX_V1; 190 sbi->s_version = MINIX_V1;
191 sbi->s_dirsize = 16; 191 sbi->s_dirsize = 16;
192 sbi->s_namelen = 14; 192 sbi->s_namelen = 14;
193 sbi->s_link_max = MINIX_LINK_MAX; 193 s->s_max_links = MINIX_LINK_MAX;
194 } else if (s->s_magic == MINIX_SUPER_MAGIC2) { 194 } else if (s->s_magic == MINIX_SUPER_MAGIC2) {
195 sbi->s_version = MINIX_V1; 195 sbi->s_version = MINIX_V1;
196 sbi->s_dirsize = 32; 196 sbi->s_dirsize = 32;
197 sbi->s_namelen = 30; 197 sbi->s_namelen = 30;
198 sbi->s_link_max = MINIX_LINK_MAX; 198 s->s_max_links = MINIX_LINK_MAX;
199 } else if (s->s_magic == MINIX2_SUPER_MAGIC) { 199 } else if (s->s_magic == MINIX2_SUPER_MAGIC) {
200 sbi->s_version = MINIX_V2; 200 sbi->s_version = MINIX_V2;
201 sbi->s_nzones = ms->s_zones; 201 sbi->s_nzones = ms->s_zones;
202 sbi->s_dirsize = 16; 202 sbi->s_dirsize = 16;
203 sbi->s_namelen = 14; 203 sbi->s_namelen = 14;
204 sbi->s_link_max = MINIX2_LINK_MAX; 204 s->s_max_links = MINIX2_LINK_MAX;
205 } else if (s->s_magic == MINIX2_SUPER_MAGIC2) { 205 } else if (s->s_magic == MINIX2_SUPER_MAGIC2) {
206 sbi->s_version = MINIX_V2; 206 sbi->s_version = MINIX_V2;
207 sbi->s_nzones = ms->s_zones; 207 sbi->s_nzones = ms->s_zones;
208 sbi->s_dirsize = 32; 208 sbi->s_dirsize = 32;
209 sbi->s_namelen = 30; 209 sbi->s_namelen = 30;
210 sbi->s_link_max = MINIX2_LINK_MAX; 210 s->s_max_links = MINIX2_LINK_MAX;
211 } else if ( *(__u16 *)(bh->b_data + 24) == MINIX3_SUPER_MAGIC) { 211 } else if ( *(__u16 *)(bh->b_data + 24) == MINIX3_SUPER_MAGIC) {
212 m3s = (struct minix3_super_block *) bh->b_data; 212 m3s = (struct minix3_super_block *) bh->b_data;
213 s->s_magic = m3s->s_magic; 213 s->s_magic = m3s->s_magic;
@@ -221,9 +221,9 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
221 sbi->s_dirsize = 64; 221 sbi->s_dirsize = 64;
222 sbi->s_namelen = 60; 222 sbi->s_namelen = 60;
223 sbi->s_version = MINIX_V3; 223 sbi->s_version = MINIX_V3;
224 sbi->s_link_max = MINIX2_LINK_MAX;
225 sbi->s_mount_state = MINIX_VALID_FS; 224 sbi->s_mount_state = MINIX_VALID_FS;
226 sb_set_blocksize(s, m3s->s_blocksize); 225 sb_set_blocksize(s, m3s->s_blocksize);
226 s->s_max_links = MINIX2_LINK_MAX;
227 } else 227 } else
228 goto out_no_fs; 228 goto out_no_fs;
229 229
@@ -254,14 +254,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
254 minix_set_bit(0,sbi->s_imap[0]->b_data); 254 minix_set_bit(0,sbi->s_imap[0]->b_data);
255 minix_set_bit(0,sbi->s_zmap[0]->b_data); 255 minix_set_bit(0,sbi->s_zmap[0]->b_data);
256 256
257 /* set up enough so that it can read an inode */
258 s->s_op = &minix_sops;
259 root_inode = minix_iget(s, MINIX_ROOT_INO);
260 if (IS_ERR(root_inode)) {
261 ret = PTR_ERR(root_inode);
262 goto out_no_root;
263 }
264
265 /* Apparently minix can create filesystems that allocate more blocks for 257 /* Apparently minix can create filesystems that allocate more blocks for
266 * the bitmaps than needed. We simply ignore that, but verify it didn't 258 * the bitmaps than needed. We simply ignore that, but verify it didn't
267 * create one with not enough blocks and bail out if so. 259 * create one with not enough blocks and bail out if so.
@@ -270,7 +262,7 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
270 if (sbi->s_imap_blocks < block) { 262 if (sbi->s_imap_blocks < block) {
271 printk("MINIX-fs: file system does not have enough " 263 printk("MINIX-fs: file system does not have enough "
272 "imap blocks allocated. Refusing to mount\n"); 264 "imap blocks allocated. Refusing to mount\n");
273 goto out_iput; 265 goto out_no_bitmap;
274 } 266 }
275 267
276 block = minix_blocks_needed( 268 block = minix_blocks_needed(
@@ -279,13 +271,21 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
279 if (sbi->s_zmap_blocks < block) { 271 if (sbi->s_zmap_blocks < block) {
280 printk("MINIX-fs: file system does not have enough " 272 printk("MINIX-fs: file system does not have enough "
281 "zmap blocks allocated. Refusing to mount.\n"); 273 "zmap blocks allocated. Refusing to mount.\n");
282 goto out_iput; 274 goto out_no_bitmap;
275 }
276
277 /* set up enough so that it can read an inode */
278 s->s_op = &minix_sops;
279 root_inode = minix_iget(s, MINIX_ROOT_INO);
280 if (IS_ERR(root_inode)) {
281 ret = PTR_ERR(root_inode);
282 goto out_no_root;
283 } 283 }
284 284
285 ret = -ENOMEM; 285 ret = -ENOMEM;
286 s->s_root = d_alloc_root(root_inode); 286 s->s_root = d_make_root(root_inode);
287 if (!s->s_root) 287 if (!s->s_root)
288 goto out_iput; 288 goto out_no_root;
289 289
290 if (!(s->s_flags & MS_RDONLY)) { 290 if (!(s->s_flags & MS_RDONLY)) {
291 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */ 291 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
@@ -301,10 +301,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
301 301
302 return 0; 302 return 0;
303 303
304out_iput:
305 iput(root_inode);
306 goto out_freemap;
307
308out_no_root: 304out_no_root:
309 if (!silent) 305 if (!silent)
310 printk("MINIX-fs: get root inode failed\n"); 306 printk("MINIX-fs: get root inode failed\n");
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index c889ef0aa57..1ebd1185462 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -34,7 +34,6 @@ struct minix_sb_info {
34 unsigned long s_max_size; 34 unsigned long s_max_size;
35 int s_dirsize; 35 int s_dirsize;
36 int s_namelen; 36 int s_namelen;
37 int s_link_max;
38 struct buffer_head ** s_imap; 37 struct buffer_head ** s_imap;
39 struct buffer_head ** s_zmap; 38 struct buffer_head ** s_zmap;
40 struct buffer_head * s_sbh; 39 struct buffer_head * s_sbh;
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 2f76e38c206..2d0ee178630 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -94,9 +94,6 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
94{ 94{
95 struct inode *inode = old_dentry->d_inode; 95 struct inode *inode = old_dentry->d_inode;
96 96
97 if (inode->i_nlink >= minix_sb(inode->i_sb)->s_link_max)
98 return -EMLINK;
99
100 inode->i_ctime = CURRENT_TIME_SEC; 97 inode->i_ctime = CURRENT_TIME_SEC;
101 inode_inc_link_count(inode); 98 inode_inc_link_count(inode);
102 ihold(inode); 99 ihold(inode);
@@ -106,10 +103,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
106static int minix_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode) 103static int minix_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode)
107{ 104{
108 struct inode * inode; 105 struct inode * inode;
109 int err = -EMLINK; 106 int err;
110
111 if (dir->i_nlink >= minix_sb(dir->i_sb)->s_link_max)
112 goto out;
113 107
114 inode_inc_link_count(dir); 108 inode_inc_link_count(dir);
115 109
@@ -181,7 +175,6 @@ static int minix_rmdir(struct inode * dir, struct dentry *dentry)
181static int minix_rename(struct inode * old_dir, struct dentry *old_dentry, 175static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
182 struct inode * new_dir, struct dentry *new_dentry) 176 struct inode * new_dir, struct dentry *new_dentry)
183{ 177{
184 struct minix_sb_info * info = minix_sb(old_dir->i_sb);
185 struct inode * old_inode = old_dentry->d_inode; 178 struct inode * old_inode = old_dentry->d_inode;
186 struct inode * new_inode = new_dentry->d_inode; 179 struct inode * new_inode = new_dentry->d_inode;
187 struct page * dir_page = NULL; 180 struct page * dir_page = NULL;
@@ -219,11 +212,6 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
219 drop_nlink(new_inode); 212 drop_nlink(new_inode);
220 inode_dec_link_count(new_inode); 213 inode_dec_link_count(new_inode);
221 } else { 214 } else {
222 if (dir_de) {
223 err = -EMLINK;
224 if (new_dir->i_nlink >= info->s_link_max)
225 goto out_dir;
226 }
227 err = minix_add_link(new_dentry, old_inode); 215 err = minix_add_link(new_dentry, old_inode);
228 if (err) 216 if (err)
229 goto out_dir; 217 goto out_dir;
diff --git a/fs/mpage.c b/fs/mpage.c
index 643e9f55ef2..0face1c4d4c 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -13,7 +13,7 @@
13 */ 13 */
14 14
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/module.h> 16#include <linux/export.h>
17#include <linux/mm.h> 17#include <linux/mm.h>
18#include <linux/kdev_t.h> 18#include <linux/kdev_t.h>
19#include <linux/gfp.h> 19#include <linux/gfp.h>
diff --git a/fs/namei.c b/fs/namei.c
index 46ea9cc1664..c42791914f8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -15,7 +15,7 @@
15 */ 15 */
16 16
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/module.h> 18#include <linux/export.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/namei.h> 21#include <linux/namei.h>
@@ -161,7 +161,7 @@ static char *getname_flags(const char __user *filename, int flags, int *empty)
161 161
162char *getname(const char __user * filename) 162char *getname(const char __user * filename)
163{ 163{
164 return getname_flags(filename, 0, 0); 164 return getname_flags(filename, 0, NULL);
165} 165}
166 166
167#ifdef CONFIG_AUDITSYSCALL 167#ifdef CONFIG_AUDITSYSCALL
@@ -642,7 +642,7 @@ follow_link(struct path *link, struct nameidata *nd, void **p)
642 cond_resched(); 642 cond_resched();
643 current->total_link_count++; 643 current->total_link_count++;
644 644
645 touch_atime(link->mnt, dentry); 645 touch_atime(link);
646 nd_set_link(nd, NULL); 646 nd_set_link(nd, NULL);
647 647
648 error = security_inode_follow_link(link->dentry, nd); 648 error = security_inode_follow_link(link->dentry, nd);
@@ -1054,53 +1054,65 @@ static void follow_dotdot(struct nameidata *nd)
1054} 1054}
1055 1055
1056/* 1056/*
1057 * Allocate a dentry with name and parent, and perform a parent 1057 * This looks up the name in dcache, possibly revalidates the old dentry and
1058 * directory ->lookup on it. Returns the new dentry, or ERR_PTR 1058 * allocates a new one if not found or not valid. In the need_lookup argument
1059 * on error. parent->d_inode->i_mutex must be held. d_lookup must 1059 * returns whether i_op->lookup is necessary.
1060 * have verified that no child exists while under i_mutex. 1060 *
1061 * dir->d_inode->i_mutex must be held
1061 */ 1062 */
1062static struct dentry *d_alloc_and_lookup(struct dentry *parent, 1063static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
1063 struct qstr *name, struct nameidata *nd) 1064 struct nameidata *nd, bool *need_lookup)
1064{ 1065{
1065 struct inode *inode = parent->d_inode;
1066 struct dentry *dentry; 1066 struct dentry *dentry;
1067 struct dentry *old; 1067 int error;
1068 1068
1069 /* Don't create child dentry for a dead directory. */ 1069 *need_lookup = false;
1070 if (unlikely(IS_DEADDIR(inode))) 1070 dentry = d_lookup(dir, name);
1071 return ERR_PTR(-ENOENT); 1071 if (dentry) {
1072 if (d_need_lookup(dentry)) {
1073 *need_lookup = true;
1074 } else if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
1075 error = d_revalidate(dentry, nd);
1076 if (unlikely(error <= 0)) {
1077 if (error < 0) {
1078 dput(dentry);
1079 return ERR_PTR(error);
1080 } else if (!d_invalidate(dentry)) {
1081 dput(dentry);
1082 dentry = NULL;
1083 }
1084 }
1085 }
1086 }
1072 1087
1073 dentry = d_alloc(parent, name); 1088 if (!dentry) {
1074 if (unlikely(!dentry)) 1089 dentry = d_alloc(dir, name);
1075 return ERR_PTR(-ENOMEM); 1090 if (unlikely(!dentry))
1091 return ERR_PTR(-ENOMEM);
1076 1092
1077 old = inode->i_op->lookup(inode, dentry, nd); 1093 *need_lookup = true;
1078 if (unlikely(old)) {
1079 dput(dentry);
1080 dentry = old;
1081 } 1094 }
1082 return dentry; 1095 return dentry;
1083} 1096}
1084 1097
1085/* 1098/*
1086 * We already have a dentry, but require a lookup to be performed on the parent 1099 * Call i_op->lookup on the dentry. The dentry must be negative but may be
1087 * directory to fill in d_inode. Returns the new dentry, or ERR_PTR on error. 1100 * hashed if it was pouplated with DCACHE_NEED_LOOKUP.
1088 * parent->d_inode->i_mutex must be held. d_lookup must have verified that no 1101 *
1089 * child exists while under i_mutex. 1102 * dir->d_inode->i_mutex must be held
1090 */ 1103 */
1091static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentry, 1104static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
1092 struct nameidata *nd) 1105 struct nameidata *nd)
1093{ 1106{
1094 struct inode *inode = parent->d_inode;
1095 struct dentry *old; 1107 struct dentry *old;
1096 1108
1097 /* Don't create child dentry for a dead directory. */ 1109 /* Don't create child dentry for a dead directory. */
1098 if (unlikely(IS_DEADDIR(inode))) { 1110 if (unlikely(IS_DEADDIR(dir))) {
1099 dput(dentry); 1111 dput(dentry);
1100 return ERR_PTR(-ENOENT); 1112 return ERR_PTR(-ENOENT);
1101 } 1113 }
1102 1114
1103 old = inode->i_op->lookup(inode, dentry, nd); 1115 old = dir->i_op->lookup(dir, dentry, nd);
1104 if (unlikely(old)) { 1116 if (unlikely(old)) {
1105 dput(dentry); 1117 dput(dentry);
1106 dentry = old; 1118 dentry = old;
@@ -1108,6 +1120,19 @@ static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentr
1108 return dentry; 1120 return dentry;
1109} 1121}
1110 1122
1123static struct dentry *__lookup_hash(struct qstr *name,
1124 struct dentry *base, struct nameidata *nd)
1125{
1126 bool need_lookup;
1127 struct dentry *dentry;
1128
1129 dentry = lookup_dcache(name, base, nd, &need_lookup);
1130 if (!need_lookup)
1131 return dentry;
1132
1133 return lookup_real(base->d_inode, dentry, nd);
1134}
1135
1111/* 1136/*
1112 * It's more convoluted than I'd like it to be, but... it's still fairly 1137 * It's more convoluted than I'd like it to be, but... it's still fairly
1113 * small and for now I'd prefer to have fast path as straight as possible. 1138 * small and for now I'd prefer to have fast path as straight as possible.
@@ -1139,6 +1164,8 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
1139 return -ECHILD; 1164 return -ECHILD;
1140 nd->seq = seq; 1165 nd->seq = seq;
1141 1166
1167 if (unlikely(d_need_lookup(dentry)))
1168 goto unlazy;
1142 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1169 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1143 status = d_revalidate(dentry, nd); 1170 status = d_revalidate(dentry, nd);
1144 if (unlikely(status <= 0)) { 1171 if (unlikely(status <= 0)) {
@@ -1147,8 +1174,6 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
1147 goto unlazy; 1174 goto unlazy;
1148 } 1175 }
1149 } 1176 }
1150 if (unlikely(d_need_lookup(dentry)))
1151 goto unlazy;
1152 path->mnt = mnt; 1177 path->mnt = mnt;
1153 path->dentry = dentry; 1178 path->dentry = dentry;
1154 if (unlikely(!__follow_mount_rcu(nd, path, inode))) 1179 if (unlikely(!__follow_mount_rcu(nd, path, inode)))
@@ -1163,38 +1188,14 @@ unlazy:
1163 dentry = __d_lookup(parent, name); 1188 dentry = __d_lookup(parent, name);
1164 } 1189 }
1165 1190
1166 if (dentry && unlikely(d_need_lookup(dentry))) { 1191 if (unlikely(!dentry))
1192 goto need_lookup;
1193
1194 if (unlikely(d_need_lookup(dentry))) {
1167 dput(dentry); 1195 dput(dentry);
1168 dentry = NULL; 1196 goto need_lookup;
1169 }
1170retry:
1171 if (unlikely(!dentry)) {
1172 struct inode *dir = parent->d_inode;
1173 BUG_ON(nd->inode != dir);
1174
1175 mutex_lock(&dir->i_mutex);
1176 dentry = d_lookup(parent, name);
1177 if (likely(!dentry)) {
1178 dentry = d_alloc_and_lookup(parent, name, nd);
1179 if (IS_ERR(dentry)) {
1180 mutex_unlock(&dir->i_mutex);
1181 return PTR_ERR(dentry);
1182 }
1183 /* known good */
1184 need_reval = 0;
1185 status = 1;
1186 } else if (unlikely(d_need_lookup(dentry))) {
1187 dentry = d_inode_lookup(parent, dentry, nd);
1188 if (IS_ERR(dentry)) {
1189 mutex_unlock(&dir->i_mutex);
1190 return PTR_ERR(dentry);
1191 }
1192 /* known good */
1193 need_reval = 0;
1194 status = 1;
1195 }
1196 mutex_unlock(&dir->i_mutex);
1197 } 1197 }
1198
1198 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval) 1199 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
1199 status = d_revalidate(dentry, nd); 1200 status = d_revalidate(dentry, nd);
1200 if (unlikely(status <= 0)) { 1201 if (unlikely(status <= 0)) {
@@ -1204,12 +1205,10 @@ retry:
1204 } 1205 }
1205 if (!d_invalidate(dentry)) { 1206 if (!d_invalidate(dentry)) {
1206 dput(dentry); 1207 dput(dentry);
1207 dentry = NULL; 1208 goto need_lookup;
1208 need_reval = 1;
1209 goto retry;
1210 } 1209 }
1211 } 1210 }
1212 1211done:
1213 path->mnt = mnt; 1212 path->mnt = mnt;
1214 path->dentry = dentry; 1213 path->dentry = dentry;
1215 err = follow_managed(path, nd->flags); 1214 err = follow_managed(path, nd->flags);
@@ -1221,6 +1220,16 @@ retry:
1221 nd->flags |= LOOKUP_JUMPED; 1220 nd->flags |= LOOKUP_JUMPED;
1222 *inode = path->dentry->d_inode; 1221 *inode = path->dentry->d_inode;
1223 return 0; 1222 return 0;
1223
1224need_lookup:
1225 BUG_ON(nd->inode != parent->d_inode);
1226
1227 mutex_lock(&parent->d_inode->i_mutex);
1228 dentry = __lookup_hash(name, parent, nd);
1229 mutex_unlock(&parent->d_inode->i_mutex);
1230 if (IS_ERR(dentry))
1231 return PTR_ERR(dentry);
1232 goto done;
1224} 1233}
1225 1234
1226static inline int may_lookup(struct nameidata *nd) 1235static inline int may_lookup(struct nameidata *nd)
@@ -1374,6 +1383,98 @@ static inline int can_lookup(struct inode *inode)
1374 return 1; 1383 return 1;
1375} 1384}
1376 1385
1386/*
1387 * We can do the critical dentry name comparison and hashing
1388 * operations one word at a time, but we are limited to:
1389 *
1390 * - Architectures with fast unaligned word accesses. We could
1391 * do a "get_unaligned()" if this helps and is sufficiently
1392 * fast.
1393 *
1394 * - Little-endian machines (so that we can generate the mask
1395 * of low bytes efficiently). Again, we *could* do a byte
1396 * swapping load on big-endian architectures if that is not
1397 * expensive enough to make the optimization worthless.
1398 *
1399 * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
1400 * do not trap on the (extremely unlikely) case of a page
1401 * crossing operation.
1402 *
1403 * - Furthermore, we need an efficient 64-bit compile for the
1404 * 64-bit case in order to generate the "number of bytes in
1405 * the final mask". Again, that could be replaced with a
1406 * efficient population count instruction or similar.
1407 */
1408#ifdef CONFIG_DCACHE_WORD_ACCESS
1409
1410#include <asm/word-at-a-time.h>
1411
1412#ifdef CONFIG_64BIT
1413
1414static inline unsigned int fold_hash(unsigned long hash)
1415{
1416 hash += hash >> (8*sizeof(int));
1417 return hash;
1418}
1419
1420#else /* 32-bit case */
1421
1422#define fold_hash(x) (x)
1423
1424#endif
1425
1426unsigned int full_name_hash(const unsigned char *name, unsigned int len)
1427{
1428 unsigned long a, mask;
1429 unsigned long hash = 0;
1430
1431 for (;;) {
1432 a = load_unaligned_zeropad(name);
1433 if (len < sizeof(unsigned long))
1434 break;
1435 hash += a;
1436 hash *= 9;
1437 name += sizeof(unsigned long);
1438 len -= sizeof(unsigned long);
1439 if (!len)
1440 goto done;
1441 }
1442 mask = ~(~0ul << len*8);
1443 hash += mask & a;
1444done:
1445 return fold_hash(hash);
1446}
1447EXPORT_SYMBOL(full_name_hash);
1448
1449/*
1450 * Calculate the length and hash of the path component, and
1451 * return the length of the component;
1452 */
1453static inline unsigned long hash_name(const char *name, unsigned int *hashp)
1454{
1455 unsigned long a, mask, hash, len;
1456
1457 hash = a = 0;
1458 len = -sizeof(unsigned long);
1459 do {
1460 hash = (hash + a) * 9;
1461 len += sizeof(unsigned long);
1462 a = load_unaligned_zeropad(name+len);
1463 /* Do we have any NUL or '/' bytes in this word? */
1464 mask = has_zero(a) | has_zero(a ^ REPEAT_BYTE('/'));
1465 } while (!mask);
1466
1467 /* The mask *below* the first high bit set */
1468 mask = (mask - 1) & ~mask;
1469 mask >>= 7;
1470 hash += a & mask;
1471 *hashp = fold_hash(hash);
1472
1473 return len + count_masked_bytes(mask);
1474}
1475
1476#else
1477
1377unsigned int full_name_hash(const unsigned char *name, unsigned int len) 1478unsigned int full_name_hash(const unsigned char *name, unsigned int len)
1378{ 1479{
1379 unsigned long hash = init_name_hash(); 1480 unsigned long hash = init_name_hash();
@@ -1402,6 +1503,8 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp)
1402 return len; 1503 return len;
1403} 1504}
1404 1505
1506#endif
1507
1405/* 1508/*
1406 * Name resolution. 1509 * Name resolution.
1407 * This is the basic name resolution function, turning a pathname into 1510 * This is the basic name resolution function, turning a pathname into
@@ -1723,59 +1826,6 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1723 return err; 1826 return err;
1724} 1827}
1725 1828
1726static struct dentry *__lookup_hash(struct qstr *name,
1727 struct dentry *base, struct nameidata *nd)
1728{
1729 struct inode *inode = base->d_inode;
1730 struct dentry *dentry;
1731 int err;
1732
1733 err = inode_permission(inode, MAY_EXEC);
1734 if (err)
1735 return ERR_PTR(err);
1736
1737 /*
1738 * Don't bother with __d_lookup: callers are for creat as
1739 * well as unlink, so a lot of the time it would cost
1740 * a double lookup.
1741 */
1742 dentry = d_lookup(base, name);
1743
1744 if (dentry && d_need_lookup(dentry)) {
1745 /*
1746 * __lookup_hash is called with the parent dir's i_mutex already
1747 * held, so we are good to go here.
1748 */
1749 dentry = d_inode_lookup(base, dentry, nd);
1750 if (IS_ERR(dentry))
1751 return dentry;
1752 }
1753
1754 if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1755 int status = d_revalidate(dentry, nd);
1756 if (unlikely(status <= 0)) {
1757 /*
1758 * The dentry failed validation.
1759 * If d_revalidate returned 0 attempt to invalidate
1760 * the dentry otherwise d_revalidate is asking us
1761 * to return a fail status.
1762 */
1763 if (status < 0) {
1764 dput(dentry);
1765 return ERR_PTR(status);
1766 } else if (!d_invalidate(dentry)) {
1767 dput(dentry);
1768 dentry = NULL;
1769 }
1770 }
1771 }
1772
1773 if (!dentry)
1774 dentry = d_alloc_and_lookup(base, name, nd);
1775
1776 return dentry;
1777}
1778
1779/* 1829/*
1780 * Restricted form of lookup. Doesn't follow links, single-component only, 1830 * Restricted form of lookup. Doesn't follow links, single-component only,
1781 * needs parent already locked. Doesn't follow mounts. 1831 * needs parent already locked. Doesn't follow mounts.
@@ -1801,6 +1851,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1801{ 1851{
1802 struct qstr this; 1852 struct qstr this;
1803 unsigned int c; 1853 unsigned int c;
1854 int err;
1804 1855
1805 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); 1856 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
1806 1857
@@ -1825,6 +1876,10 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1825 return ERR_PTR(err); 1876 return ERR_PTR(err);
1826 } 1877 }
1827 1878
1879 err = inode_permission(base->d_inode, MAY_EXEC);
1880 if (err)
1881 return ERR_PTR(err);
1882
1828 return __lookup_hash(&this, base, NULL); 1883 return __lookup_hash(&this, base, NULL);
1829} 1884}
1830 1885
@@ -1849,7 +1904,7 @@ int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
1849int user_path_at(int dfd, const char __user *name, unsigned flags, 1904int user_path_at(int dfd, const char __user *name, unsigned flags,
1850 struct path *path) 1905 struct path *path)
1851{ 1906{
1852 return user_path_at_empty(dfd, name, flags, path, 0); 1907 return user_path_at_empty(dfd, name, flags, path, NULL);
1853} 1908}
1854 1909
1855static int user_path_parent(int dfd, const char __user *path, 1910static int user_path_parent(int dfd, const char __user *path,
@@ -2569,6 +2624,7 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
2569int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 2624int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2570{ 2625{
2571 int error = may_create(dir, dentry); 2626 int error = may_create(dir, dentry);
2627 unsigned max_links = dir->i_sb->s_max_links;
2572 2628
2573 if (error) 2629 if (error)
2574 return error; 2630 return error;
@@ -2581,6 +2637,9 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2581 if (error) 2637 if (error)
2582 return error; 2638 return error;
2583 2639
2640 if (max_links && dir->i_nlink >= max_links)
2641 return -EMLINK;
2642
2584 error = dir->i_op->mkdir(dir, dentry, mode); 2643 error = dir->i_op->mkdir(dir, dentry, mode);
2585 if (!error) 2644 if (!error)
2586 fsnotify_mkdir(dir, dentry); 2645 fsnotify_mkdir(dir, dentry);
@@ -2622,7 +2681,7 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
2622 2681
2623/* 2682/*
2624 * The dentry_unhash() helper will try to drop the dentry early: we 2683 * The dentry_unhash() helper will try to drop the dentry early: we
2625 * should have a usage count of 2 if we're the only user of this 2684 * should have a usage count of 1 if we're the only user of this
2626 * dentry, and if that is true (possibly after pruning the dcache), 2685 * dentry, and if that is true (possibly after pruning the dcache),
2627 * then we drop the dentry now. 2686 * then we drop the dentry now.
2628 * 2687 *
@@ -2911,6 +2970,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
2911int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) 2970int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
2912{ 2971{
2913 struct inode *inode = old_dentry->d_inode; 2972 struct inode *inode = old_dentry->d_inode;
2973 unsigned max_links = dir->i_sb->s_max_links;
2914 int error; 2974 int error;
2915 2975
2916 if (!inode) 2976 if (!inode)
@@ -2941,6 +3001,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
2941 /* Make sure we don't allow creating hardlink to an unlinked file */ 3001 /* Make sure we don't allow creating hardlink to an unlinked file */
2942 if (inode->i_nlink == 0) 3002 if (inode->i_nlink == 0)
2943 error = -ENOENT; 3003 error = -ENOENT;
3004 else if (max_links && inode->i_nlink >= max_links)
3005 error = -EMLINK;
2944 else 3006 else
2945 error = dir->i_op->link(old_dentry, dir, new_dentry); 3007 error = dir->i_op->link(old_dentry, dir, new_dentry);
2946 mutex_unlock(&inode->i_mutex); 3008 mutex_unlock(&inode->i_mutex);
@@ -3050,6 +3112,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
3050{ 3112{
3051 int error = 0; 3113 int error = 0;
3052 struct inode *target = new_dentry->d_inode; 3114 struct inode *target = new_dentry->d_inode;
3115 unsigned max_links = new_dir->i_sb->s_max_links;
3053 3116
3054 /* 3117 /*
3055 * If we are going to change the parent - check write permissions, 3118 * If we are going to change the parent - check write permissions,
@@ -3073,6 +3136,11 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
3073 if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) 3136 if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
3074 goto out; 3137 goto out;
3075 3138
3139 error = -EMLINK;
3140 if (max_links && !target && new_dir != old_dir &&
3141 new_dir->i_nlink >= max_links)
3142 goto out;
3143
3076 if (target) 3144 if (target)
3077 shrink_dcache_parent(new_dentry); 3145 shrink_dcache_parent(new_dentry);
3078 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 3146 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
@@ -3371,9 +3439,9 @@ retry:
3371 if (err) 3439 if (err)
3372 goto fail; 3440 goto fail;
3373 3441
3374 kaddr = kmap_atomic(page, KM_USER0); 3442 kaddr = kmap_atomic(page);
3375 memcpy(kaddr, symname, len-1); 3443 memcpy(kaddr, symname, len-1);
3376 kunmap_atomic(kaddr, KM_USER0); 3444 kunmap_atomic(kaddr);
3377 3445
3378 err = pagecache_write_end(NULL, mapping, 0, len-1, len-1, 3446 err = pagecache_write_end(NULL, mapping, 0, len-1, len-1,
3379 page, fsdata); 3447 page, fsdata);
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 64a326418aa..3ff5fcc1528 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -7,7 +7,6 @@
7 */ 7 */
8 8
9#include <asm/uaccess.h> 9#include <asm/uaccess.h>
10#include <asm/system.h>
11 10
12#include <linux/time.h> 11#include <linux/time.h>
13#include <linux/kernel.h> 12#include <linux/kernel.h>
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 3d1e34f8a68..87484fb8d17 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -11,7 +11,6 @@
11 11
12#include <linux/module.h> 12#include <linux/module.h>
13 13
14#include <asm/system.h>
15#include <asm/uaccess.h> 14#include <asm/uaccess.h>
16#include <asm/byteorder.h> 15#include <asm/byteorder.h>
17 16
@@ -716,13 +715,11 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
716 if (!root_inode) 715 if (!root_inode)
717 goto out_disconnect; 716 goto out_disconnect;
718 DPRINTK("ncp_fill_super: root vol=%d\n", NCP_FINFO(root_inode)->volNumber); 717 DPRINTK("ncp_fill_super: root vol=%d\n", NCP_FINFO(root_inode)->volNumber);
719 sb->s_root = d_alloc_root(root_inode); 718 sb->s_root = d_make_root(root_inode);
720 if (!sb->s_root) 719 if (!sb->s_root)
721 goto out_no_root; 720 goto out_disconnect;
722 return 0; 721 return 0;
723 722
724out_no_root:
725 iput(root_inode);
726out_disconnect: 723out_disconnect:
727 ncp_lock_server(server); 724 ncp_lock_server(server);
728 ncp_disconnect(server); 725 ncp_disconnect(server);
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index e5d71b27a5b..be20a7e171a 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -19,7 +19,6 @@
19#include <linux/memcontrol.h> 19#include <linux/memcontrol.h>
20 20
21#include <asm/uaccess.h> 21#include <asm/uaccess.h>
22#include <asm/system.h>
23 22
24#include "ncp_fs.h" 23#include "ncp_fs.h"
25 24
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index dbcd82126ae..2a0e6c59914 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -64,6 +64,7 @@ config NFS_V4
64 bool "NFS client support for NFS version 4" 64 bool "NFS client support for NFS version 4"
65 depends on NFS_FS 65 depends on NFS_FS
66 select SUNRPC_GSS 66 select SUNRPC_GSS
67 select KEYS
67 help 68 help
68 This option enables support for version 4 of the NFS protocol 69 This option enables support for version 4 of the NFS protocol
69 (RFC 3530) in the kernel's NFS client. 70 (RFC 3530) in the kernel's NFS client.
@@ -98,6 +99,18 @@ config PNFS_OBJLAYOUT
98 depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD 99 depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD
99 default m 100 default m
100 101
102config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN
103 string "NFSv4.1 Implementation ID Domain"
104 depends on NFS_V4_1
105 default "kernel.org"
106 help
107 This option defines the domain portion of the implementation ID that
108 may be sent in the NFS exchange_id operation. The value must be in
109 the format of a DNS domain name and should be set to the DNS domain
110 name of the distribution.
111 If the NFS client is unchanged from the upstream kernel, this
112 option should be set to the default "kernel.org".
113
101config ROOT_NFS 114config ROOT_NFS
102 bool "Root file system on NFS" 115 bool "Root file system on NFS"
103 depends on NFS_FS=y && IP_PNP 116 depends on NFS_FS=y && IP_PNP
@@ -130,16 +143,10 @@ config NFS_USE_KERNEL_DNS
130 bool 143 bool
131 depends on NFS_V4 && !NFS_USE_LEGACY_DNS 144 depends on NFS_V4 && !NFS_USE_LEGACY_DNS
132 select DNS_RESOLVER 145 select DNS_RESOLVER
133 select KEYS
134 default y 146 default y
135 147
136config NFS_USE_NEW_IDMAPPER 148config NFS_DEBUG
137 bool "Use the new idmapper upcall routine" 149 bool
138 depends on NFS_V4 && KEYS 150 depends on NFS_FS && SUNRPC_DEBUG
139 help 151 select CRC32
140 Say Y here if you want NFS to use the new idmapper upcall functions. 152 default y
141 You will need /sbin/request-key (usually provided by the keyutils
142 package). For details, read
143 <file:Documentation/filesystems/nfs/idmapper.txt>.
144
145 If you are unsure, say N.
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 48cfac31f64..7f6a23f0244 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -38,6 +38,8 @@
38#include <linux/buffer_head.h> /* various write calls */ 38#include <linux/buffer_head.h> /* various write calls */
39#include <linux/prefetch.h> 39#include <linux/prefetch.h>
40 40
41#include "../pnfs.h"
42#include "../internal.h"
41#include "blocklayout.h" 43#include "blocklayout.h"
42 44
43#define NFSDBG_FACILITY NFSDBG_PNFS_LD 45#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@@ -46,9 +48,6 @@ MODULE_LICENSE("GPL");
46MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>"); 48MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>");
47MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); 49MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver");
48 50
49struct dentry *bl_device_pipe;
50wait_queue_head_t bl_wq;
51
52static void print_page(struct page *page) 51static void print_page(struct page *page)
53{ 52{
54 dprintk("PRINTPAGE page %p\n", page); 53 dprintk("PRINTPAGE page %p\n", page);
@@ -236,12 +235,11 @@ bl_read_pagelist(struct nfs_read_data *rdata)
236 sector_t isect, extent_length = 0; 235 sector_t isect, extent_length = 0;
237 struct parallel_io *par; 236 struct parallel_io *par;
238 loff_t f_offset = rdata->args.offset; 237 loff_t f_offset = rdata->args.offset;
239 size_t count = rdata->args.count;
240 struct page **pages = rdata->args.pages; 238 struct page **pages = rdata->args.pages;
241 int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; 239 int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
242 240
243 dprintk("%s enter nr_pages %u offset %lld count %Zd\n", __func__, 241 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
244 rdata->npages, f_offset, count); 242 rdata->npages, f_offset, (unsigned int)rdata->args.count);
245 243
246 par = alloc_parallel(rdata); 244 par = alloc_parallel(rdata);
247 if (!par) 245 if (!par)
@@ -872,7 +870,7 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
872 * GETDEVICEINFO's maxcount 870 * GETDEVICEINFO's maxcount
873 */ 871 */
874 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; 872 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
875 max_pages = max_resp_sz >> PAGE_SHIFT; 873 max_pages = nfs_page_array_len(0, max_resp_sz);
876 dprintk("%s max_resp_sz %u max_pages %d\n", 874 dprintk("%s max_resp_sz %u max_pages %d\n",
877 __func__, max_resp_sz, max_pages); 875 __func__, max_resp_sz, max_pages);
878 876
@@ -1025,10 +1023,128 @@ static const struct rpc_pipe_ops bl_upcall_ops = {
1025 .destroy_msg = bl_pipe_destroy_msg, 1023 .destroy_msg = bl_pipe_destroy_msg,
1026}; 1024};
1027 1025
1026static struct dentry *nfs4blocklayout_register_sb(struct super_block *sb,
1027 struct rpc_pipe *pipe)
1028{
1029 struct dentry *dir, *dentry;
1030
1031 dir = rpc_d_lookup_sb(sb, NFS_PIPE_DIRNAME);
1032 if (dir == NULL)
1033 return ERR_PTR(-ENOENT);
1034 dentry = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe);
1035 dput(dir);
1036 return dentry;
1037}
1038
1039static void nfs4blocklayout_unregister_sb(struct super_block *sb,
1040 struct rpc_pipe *pipe)
1041{
1042 if (pipe->dentry)
1043 rpc_unlink(pipe->dentry);
1044}
1045
1046static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
1047 void *ptr)
1048{
1049 struct super_block *sb = ptr;
1050 struct net *net = sb->s_fs_info;
1051 struct nfs_net *nn = net_generic(net, nfs_net_id);
1052 struct dentry *dentry;
1053 int ret = 0;
1054
1055 if (!try_module_get(THIS_MODULE))
1056 return 0;
1057
1058 if (nn->bl_device_pipe == NULL) {
1059 module_put(THIS_MODULE);
1060 return 0;
1061 }
1062
1063 switch (event) {
1064 case RPC_PIPEFS_MOUNT:
1065 dentry = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe);
1066 if (IS_ERR(dentry)) {
1067 ret = PTR_ERR(dentry);
1068 break;
1069 }
1070 nn->bl_device_pipe->dentry = dentry;
1071 break;
1072 case RPC_PIPEFS_UMOUNT:
1073 if (nn->bl_device_pipe->dentry)
1074 nfs4blocklayout_unregister_sb(sb, nn->bl_device_pipe);
1075 break;
1076 default:
1077 ret = -ENOTSUPP;
1078 break;
1079 }
1080 module_put(THIS_MODULE);
1081 return ret;
1082}
1083
1084static struct notifier_block nfs4blocklayout_block = {
1085 .notifier_call = rpc_pipefs_event,
1086};
1087
1088static struct dentry *nfs4blocklayout_register_net(struct net *net,
1089 struct rpc_pipe *pipe)
1090{
1091 struct super_block *pipefs_sb;
1092 struct dentry *dentry;
1093
1094 pipefs_sb = rpc_get_sb_net(net);
1095 if (!pipefs_sb)
1096 return NULL;
1097 dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe);
1098 rpc_put_sb_net(net);
1099 return dentry;
1100}
1101
1102static void nfs4blocklayout_unregister_net(struct net *net,
1103 struct rpc_pipe *pipe)
1104{
1105 struct super_block *pipefs_sb;
1106
1107 pipefs_sb = rpc_get_sb_net(net);
1108 if (pipefs_sb) {
1109 nfs4blocklayout_unregister_sb(pipefs_sb, pipe);
1110 rpc_put_sb_net(net);
1111 }
1112}
1113
1114static int nfs4blocklayout_net_init(struct net *net)
1115{
1116 struct nfs_net *nn = net_generic(net, nfs_net_id);
1117 struct dentry *dentry;
1118
1119 init_waitqueue_head(&nn->bl_wq);
1120 nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0);
1121 if (IS_ERR(nn->bl_device_pipe))
1122 return PTR_ERR(nn->bl_device_pipe);
1123 dentry = nfs4blocklayout_register_net(net, nn->bl_device_pipe);
1124 if (IS_ERR(dentry)) {
1125 rpc_destroy_pipe_data(nn->bl_device_pipe);
1126 return PTR_ERR(dentry);
1127 }
1128 nn->bl_device_pipe->dentry = dentry;
1129 return 0;
1130}
1131
1132static void nfs4blocklayout_net_exit(struct net *net)
1133{
1134 struct nfs_net *nn = net_generic(net, nfs_net_id);
1135
1136 nfs4blocklayout_unregister_net(net, nn->bl_device_pipe);
1137 rpc_destroy_pipe_data(nn->bl_device_pipe);
1138 nn->bl_device_pipe = NULL;
1139}
1140
1141static struct pernet_operations nfs4blocklayout_net_ops = {
1142 .init = nfs4blocklayout_net_init,
1143 .exit = nfs4blocklayout_net_exit,
1144};
1145
1028static int __init nfs4blocklayout_init(void) 1146static int __init nfs4blocklayout_init(void)
1029{ 1147{
1030 struct vfsmount *mnt;
1031 struct path path;
1032 int ret; 1148 int ret;
1033 1149
1034 dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__); 1150 dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
@@ -1037,32 +1153,17 @@ static int __init nfs4blocklayout_init(void)
1037 if (ret) 1153 if (ret)
1038 goto out; 1154 goto out;
1039 1155
1040 init_waitqueue_head(&bl_wq); 1156 ret = rpc_pipefs_notifier_register(&nfs4blocklayout_block);
1041 1157 if (ret)
1042 mnt = rpc_get_mount();
1043 if (IS_ERR(mnt)) {
1044 ret = PTR_ERR(mnt);
1045 goto out_remove; 1158 goto out_remove;
1046 } 1159 ret = register_pernet_subsys(&nfs4blocklayout_net_ops);
1047
1048 ret = vfs_path_lookup(mnt->mnt_root,
1049 mnt,
1050 NFS_PIPE_DIRNAME, 0, &path);
1051 if (ret) 1160 if (ret)
1052 goto out_putrpc; 1161 goto out_notifier;
1053
1054 bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL,
1055 &bl_upcall_ops, 0);
1056 path_put(&path);
1057 if (IS_ERR(bl_device_pipe)) {
1058 ret = PTR_ERR(bl_device_pipe);
1059 goto out_putrpc;
1060 }
1061out: 1162out:
1062 return ret; 1163 return ret;
1063 1164
1064out_putrpc: 1165out_notifier:
1065 rpc_put_mount(); 1166 rpc_pipefs_notifier_unregister(&nfs4blocklayout_block);
1066out_remove: 1167out_remove:
1067 pnfs_unregister_layoutdriver(&blocklayout_type); 1168 pnfs_unregister_layoutdriver(&blocklayout_type);
1068 return ret; 1169 return ret;
@@ -1073,9 +1174,9 @@ static void __exit nfs4blocklayout_exit(void)
1073 dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", 1174 dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
1074 __func__); 1175 __func__);
1075 1176
1177 rpc_pipefs_notifier_unregister(&nfs4blocklayout_block);
1178 unregister_pernet_subsys(&nfs4blocklayout_net_ops);
1076 pnfs_unregister_layoutdriver(&blocklayout_type); 1179 pnfs_unregister_layoutdriver(&blocklayout_type);
1077 rpc_unlink(bl_device_pipe);
1078 rpc_put_mount();
1079} 1180}
1080 1181
1081MODULE_ALIAS("nfs-layouttype4-3"); 1182MODULE_ALIAS("nfs-layouttype4-3");
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index e31a2df28e7..03350690118 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -37,6 +37,7 @@
37#include <linux/sunrpc/rpc_pipe_fs.h> 37#include <linux/sunrpc/rpc_pipe_fs.h>
38 38
39#include "../pnfs.h" 39#include "../pnfs.h"
40#include "../netns.h"
40 41
41#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT) 42#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT)
42#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) 43#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)
@@ -50,6 +51,7 @@ struct pnfs_block_dev {
50 struct list_head bm_node; 51 struct list_head bm_node;
51 struct nfs4_deviceid bm_mdevid; /* associated devid */ 52 struct nfs4_deviceid bm_mdevid; /* associated devid */
52 struct block_device *bm_mdev; /* meta device itself */ 53 struct block_device *bm_mdev; /* meta device itself */
54 struct net *net;
53}; 55};
54 56
55enum exstate4 { 57enum exstate4 {
@@ -151,9 +153,9 @@ BLK_LSEG2EXT(struct pnfs_layout_segment *lseg)
151 return BLK_LO2EXT(lseg->pls_layout); 153 return BLK_LO2EXT(lseg->pls_layout);
152} 154}
153 155
154struct bl_dev_msg { 156struct bl_pipe_msg {
155 int32_t status; 157 struct rpc_pipe_msg msg;
156 uint32_t major, minor; 158 wait_queue_head_t *bl_wq;
157}; 159};
158 160
159struct bl_msg_hdr { 161struct bl_msg_hdr {
@@ -161,9 +163,6 @@ struct bl_msg_hdr {
161 u16 totallen; /* length of entire message, including hdr itself */ 163 u16 totallen; /* length of entire message, including hdr itself */
162}; 164};
163 165
164extern struct dentry *bl_device_pipe;
165extern wait_queue_head_t bl_wq;
166
167#define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */ 166#define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */
168#define BL_DEVICE_MOUNT 0x1 /* Mount--create devices*/ 167#define BL_DEVICE_MOUNT 0x1 /* Mount--create devices*/
169#define BL_DEVICE_REQUEST_INIT 0x0 /* Start request */ 168#define BL_DEVICE_REQUEST_INIT 0x0 /* Start request */
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index d08ba9107fd..a5c88a554d9 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -46,7 +46,7 @@ static int decode_sector_number(__be32 **rp, sector_t *sp)
46 46
47 *rp = xdr_decode_hyper(*rp, &s); 47 *rp = xdr_decode_hyper(*rp, &s);
48 if (s & 0x1ff) { 48 if (s & 0x1ff) {
49 printk(KERN_WARNING "%s: sector not aligned\n", __func__); 49 printk(KERN_WARNING "NFS: %s: sector not aligned\n", __func__);
50 return -1; 50 return -1;
51 } 51 }
52 *sp = s >> SECTOR_SHIFT; 52 *sp = s >> SECTOR_SHIFT;
@@ -79,27 +79,30 @@ int nfs4_blkdev_put(struct block_device *bdev)
79 return blkdev_put(bdev, FMODE_READ); 79 return blkdev_put(bdev, FMODE_READ);
80} 80}
81 81
82static struct bl_dev_msg bl_mount_reply;
83
84ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, 82ssize_t bl_pipe_downcall(struct file *filp, const char __user *src,
85 size_t mlen) 83 size_t mlen)
86{ 84{
85 struct nfs_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info,
86 nfs_net_id);
87
87 if (mlen != sizeof (struct bl_dev_msg)) 88 if (mlen != sizeof (struct bl_dev_msg))
88 return -EINVAL; 89 return -EINVAL;
89 90
90 if (copy_from_user(&bl_mount_reply, src, mlen) != 0) 91 if (copy_from_user(&nn->bl_mount_reply, src, mlen) != 0)
91 return -EFAULT; 92 return -EFAULT;
92 93
93 wake_up(&bl_wq); 94 wake_up(&nn->bl_wq);
94 95
95 return mlen; 96 return mlen;
96} 97}
97 98
98void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg) 99void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg)
99{ 100{
101 struct bl_pipe_msg *bl_pipe_msg = container_of(msg, struct bl_pipe_msg, msg);
102
100 if (msg->errno >= 0) 103 if (msg->errno >= 0)
101 return; 104 return;
102 wake_up(&bl_wq); 105 wake_up(bl_pipe_msg->bl_wq);
103} 106}
104 107
105/* 108/*
@@ -111,29 +114,33 @@ nfs4_blk_decode_device(struct nfs_server *server,
111{ 114{
112 struct pnfs_block_dev *rv; 115 struct pnfs_block_dev *rv;
113 struct block_device *bd = NULL; 116 struct block_device *bd = NULL;
114 struct rpc_pipe_msg msg; 117 struct bl_pipe_msg bl_pipe_msg;
118 struct rpc_pipe_msg *msg = &bl_pipe_msg.msg;
115 struct bl_msg_hdr bl_msg = { 119 struct bl_msg_hdr bl_msg = {
116 .type = BL_DEVICE_MOUNT, 120 .type = BL_DEVICE_MOUNT,
117 .totallen = dev->mincount, 121 .totallen = dev->mincount,
118 }; 122 };
119 uint8_t *dataptr; 123 uint8_t *dataptr;
120 DECLARE_WAITQUEUE(wq, current); 124 DECLARE_WAITQUEUE(wq, current);
121 struct bl_dev_msg *reply = &bl_mount_reply;
122 int offset, len, i, rc; 125 int offset, len, i, rc;
126 struct net *net = server->nfs_client->net;
127 struct nfs_net *nn = net_generic(net, nfs_net_id);
128 struct bl_dev_msg *reply = &nn->bl_mount_reply;
123 129
124 dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); 130 dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
125 dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, 131 dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data,
126 dev->mincount); 132 dev->mincount);
127 133
128 memset(&msg, 0, sizeof(msg)); 134 bl_pipe_msg.bl_wq = &nn->bl_wq;
129 msg.data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS); 135 memset(msg, 0, sizeof(*msg));
130 if (!msg.data) { 136 msg->data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS);
137 if (!msg->data) {
131 rv = ERR_PTR(-ENOMEM); 138 rv = ERR_PTR(-ENOMEM);
132 goto out; 139 goto out;
133 } 140 }
134 141
135 memcpy(msg.data, &bl_msg, sizeof(bl_msg)); 142 memcpy(msg->data, &bl_msg, sizeof(bl_msg));
136 dataptr = (uint8_t *) msg.data; 143 dataptr = (uint8_t *) msg->data;
137 len = dev->mincount; 144 len = dev->mincount;
138 offset = sizeof(bl_msg); 145 offset = sizeof(bl_msg);
139 for (i = 0; len > 0; i++) { 146 for (i = 0; len > 0; i++) {
@@ -142,13 +149,13 @@ nfs4_blk_decode_device(struct nfs_server *server,
142 len -= PAGE_CACHE_SIZE; 149 len -= PAGE_CACHE_SIZE;
143 offset += PAGE_CACHE_SIZE; 150 offset += PAGE_CACHE_SIZE;
144 } 151 }
145 msg.len = sizeof(bl_msg) + dev->mincount; 152 msg->len = sizeof(bl_msg) + dev->mincount;
146 153
147 dprintk("%s CALLING USERSPACE DAEMON\n", __func__); 154 dprintk("%s CALLING USERSPACE DAEMON\n", __func__);
148 add_wait_queue(&bl_wq, &wq); 155 add_wait_queue(&nn->bl_wq, &wq);
149 rc = rpc_queue_upcall(bl_device_pipe->d_inode, &msg); 156 rc = rpc_queue_upcall(nn->bl_device_pipe, msg);
150 if (rc < 0) { 157 if (rc < 0) {
151 remove_wait_queue(&bl_wq, &wq); 158 remove_wait_queue(&nn->bl_wq, &wq);
152 rv = ERR_PTR(rc); 159 rv = ERR_PTR(rc);
153 goto out; 160 goto out;
154 } 161 }
@@ -156,7 +163,7 @@ nfs4_blk_decode_device(struct nfs_server *server,
156 set_current_state(TASK_UNINTERRUPTIBLE); 163 set_current_state(TASK_UNINTERRUPTIBLE);
157 schedule(); 164 schedule();
158 __set_current_state(TASK_RUNNING); 165 __set_current_state(TASK_RUNNING);
159 remove_wait_queue(&bl_wq, &wq); 166 remove_wait_queue(&nn->bl_wq, &wq);
160 167
161 if (reply->status != BL_DEVICE_REQUEST_PROC) { 168 if (reply->status != BL_DEVICE_REQUEST_PROC) {
162 dprintk("%s failed to open device: %d\n", 169 dprintk("%s failed to open device: %d\n",
@@ -181,13 +188,14 @@ nfs4_blk_decode_device(struct nfs_server *server,
181 188
182 rv->bm_mdev = bd; 189 rv->bm_mdev = bd;
183 memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct nfs4_deviceid)); 190 memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct nfs4_deviceid));
191 rv->net = net;
184 dprintk("%s Created device %s with bd_block_size %u\n", 192 dprintk("%s Created device %s with bd_block_size %u\n",
185 __func__, 193 __func__,
186 bd->bd_disk->disk_name, 194 bd->bd_disk->disk_name,
187 bd->bd_block_size); 195 bd->bd_block_size);
188 196
189out: 197out:
190 kfree(msg.data); 198 kfree(msg->data);
191 return rv; 199 return rv;
192} 200}
193 201
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c
index d055c755807..737d839bc17 100644
--- a/fs/nfs/blocklayout/blocklayoutdm.c
+++ b/fs/nfs/blocklayout/blocklayoutdm.c
@@ -38,9 +38,10 @@
38 38
39#define NFSDBG_FACILITY NFSDBG_PNFS_LD 39#define NFSDBG_FACILITY NFSDBG_PNFS_LD
40 40
41static void dev_remove(dev_t dev) 41static void dev_remove(struct net *net, dev_t dev)
42{ 42{
43 struct rpc_pipe_msg msg; 43 struct bl_pipe_msg bl_pipe_msg;
44 struct rpc_pipe_msg *msg = &bl_pipe_msg.msg;
44 struct bl_dev_msg bl_umount_request; 45 struct bl_dev_msg bl_umount_request;
45 struct bl_msg_hdr bl_msg = { 46 struct bl_msg_hdr bl_msg = {
46 .type = BL_DEVICE_UMOUNT, 47 .type = BL_DEVICE_UMOUNT,
@@ -48,36 +49,38 @@ static void dev_remove(dev_t dev)
48 }; 49 };
49 uint8_t *dataptr; 50 uint8_t *dataptr;
50 DECLARE_WAITQUEUE(wq, current); 51 DECLARE_WAITQUEUE(wq, current);
52 struct nfs_net *nn = net_generic(net, nfs_net_id);
51 53
52 dprintk("Entering %s\n", __func__); 54 dprintk("Entering %s\n", __func__);
53 55
54 memset(&msg, 0, sizeof(msg)); 56 bl_pipe_msg.bl_wq = &nn->bl_wq;
55 msg.data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); 57 memset(msg, 0, sizeof(*msg));
56 if (!msg.data) 58 msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS);
59 if (!msg->data)
57 goto out; 60 goto out;
58 61
59 memset(&bl_umount_request, 0, sizeof(bl_umount_request)); 62 memset(&bl_umount_request, 0, sizeof(bl_umount_request));
60 bl_umount_request.major = MAJOR(dev); 63 bl_umount_request.major = MAJOR(dev);
61 bl_umount_request.minor = MINOR(dev); 64 bl_umount_request.minor = MINOR(dev);
62 65
63 memcpy(msg.data, &bl_msg, sizeof(bl_msg)); 66 memcpy(msg->data, &bl_msg, sizeof(bl_msg));
64 dataptr = (uint8_t *) msg.data; 67 dataptr = (uint8_t *) msg->data;
65 memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); 68 memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request));
66 msg.len = sizeof(bl_msg) + bl_msg.totallen; 69 msg->len = sizeof(bl_msg) + bl_msg.totallen;
67 70
68 add_wait_queue(&bl_wq, &wq); 71 add_wait_queue(&nn->bl_wq, &wq);
69 if (rpc_queue_upcall(bl_device_pipe->d_inode, &msg) < 0) { 72 if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) {
70 remove_wait_queue(&bl_wq, &wq); 73 remove_wait_queue(&nn->bl_wq, &wq);
71 goto out; 74 goto out;
72 } 75 }
73 76
74 set_current_state(TASK_UNINTERRUPTIBLE); 77 set_current_state(TASK_UNINTERRUPTIBLE);
75 schedule(); 78 schedule();
76 __set_current_state(TASK_RUNNING); 79 __set_current_state(TASK_RUNNING);
77 remove_wait_queue(&bl_wq, &wq); 80 remove_wait_queue(&nn->bl_wq, &wq);
78 81
79out: 82out:
80 kfree(msg.data); 83 kfree(msg->data);
81} 84}
82 85
83/* 86/*
@@ -90,10 +93,10 @@ static void nfs4_blk_metadev_release(struct pnfs_block_dev *bdev)
90 dprintk("%s Releasing\n", __func__); 93 dprintk("%s Releasing\n", __func__);
91 rv = nfs4_blkdev_put(bdev->bm_mdev); 94 rv = nfs4_blkdev_put(bdev->bm_mdev);
92 if (rv) 95 if (rv)
93 printk(KERN_ERR "%s nfs4_blkdev_put returns %d\n", 96 printk(KERN_ERR "NFS: %s nfs4_blkdev_put returns %d\n",
94 __func__, rv); 97 __func__, rv);
95 98
96 dev_remove(bdev->bm_mdev->bd_dev); 99 dev_remove(bdev->net, bdev->bm_mdev->bd_dev);
97} 100}
98 101
99void bl_free_block_dev(struct pnfs_block_dev *bdev) 102void bl_free_block_dev(struct pnfs_block_dev *bdev)
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 1abac09f7cd..1f9a6032796 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -147,7 +147,7 @@ static int _preload_range(struct pnfs_inval_markings *marks,
147 count = (int)(end - start) / (int)tree->mtt_step_size; 147 count = (int)(end - start) / (int)tree->mtt_step_size;
148 148
149 /* Pre-malloc what memory we might need */ 149 /* Pre-malloc what memory we might need */
150 storage = kmalloc(sizeof(*storage) * count, GFP_NOFS); 150 storage = kcalloc(count, sizeof(*storage), GFP_NOFS);
151 if (!storage) 151 if (!storage)
152 return -ENOMEM; 152 return -ENOMEM;
153 for (i = 0; i < count; i++) { 153 for (i = 0; i < count; i++) {
diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c
index c98b439332f..dded2636811 100644
--- a/fs/nfs/cache_lib.c
+++ b/fs/nfs/cache_lib.c
@@ -13,6 +13,7 @@
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/sunrpc/cache.h> 14#include <linux/sunrpc/cache.h>
15#include <linux/sunrpc/rpc_pipe_fs.h> 15#include <linux/sunrpc/rpc_pipe_fs.h>
16#include <net/net_namespace.h>
16 17
17#include "cache_lib.h" 18#include "cache_lib.h"
18 19
@@ -111,30 +112,54 @@ int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq)
111 return 0; 112 return 0;
112} 113}
113 114
114int nfs_cache_register(struct cache_detail *cd) 115int nfs_cache_register_sb(struct super_block *sb, struct cache_detail *cd)
115{ 116{
116 struct vfsmount *mnt;
117 struct path path;
118 int ret; 117 int ret;
118 struct dentry *dir;
119 119
120 mnt = rpc_get_mount(); 120 dir = rpc_d_lookup_sb(sb, "cache");
121 if (IS_ERR(mnt)) 121 BUG_ON(dir == NULL);
122 return PTR_ERR(mnt); 122 ret = sunrpc_cache_register_pipefs(dir, cd->name, 0600, cd);
123 ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &path); 123 dput(dir);
124 if (ret)
125 goto err;
126 ret = sunrpc_cache_register_pipefs(path.dentry, cd->name, 0600, cd);
127 path_put(&path);
128 if (!ret)
129 return ret;
130err:
131 rpc_put_mount();
132 return ret; 124 return ret;
133} 125}
134 126
135void nfs_cache_unregister(struct cache_detail *cd) 127int nfs_cache_register_net(struct net *net, struct cache_detail *cd)
136{ 128{
137 sunrpc_cache_unregister_pipefs(cd); 129 struct super_block *pipefs_sb;
138 rpc_put_mount(); 130 int ret = 0;
131
132 pipefs_sb = rpc_get_sb_net(net);
133 if (pipefs_sb) {
134 ret = nfs_cache_register_sb(pipefs_sb, cd);
135 rpc_put_sb_net(net);
136 }
137 return ret;
138}
139
140void nfs_cache_unregister_sb(struct super_block *sb, struct cache_detail *cd)
141{
142 if (cd->u.pipefs.dir)
143 sunrpc_cache_unregister_pipefs(cd);
144}
145
146void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd)
147{
148 struct super_block *pipefs_sb;
149
150 pipefs_sb = rpc_get_sb_net(net);
151 if (pipefs_sb) {
152 nfs_cache_unregister_sb(pipefs_sb, cd);
153 rpc_put_sb_net(net);
154 }
155}
156
157void nfs_cache_init(struct cache_detail *cd)
158{
159 sunrpc_init_cache_detail(cd);
139} 160}
140 161
162void nfs_cache_destroy(struct cache_detail *cd)
163{
164 sunrpc_destroy_cache_detail(cd);
165}
diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h
index 7cf6cafcc00..317db95e37f 100644
--- a/fs/nfs/cache_lib.h
+++ b/fs/nfs/cache_lib.h
@@ -23,5 +23,11 @@ extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void);
23extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq); 23extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq);
24extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq); 24extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq);
25 25
26extern int nfs_cache_register(struct cache_detail *cd); 26extern void nfs_cache_init(struct cache_detail *cd);
27extern void nfs_cache_unregister(struct cache_detail *cd); 27extern void nfs_cache_destroy(struct cache_detail *cd);
28extern int nfs_cache_register_net(struct net *net, struct cache_detail *cd);
29extern void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd);
30extern int nfs_cache_register_sb(struct super_block *sb,
31 struct cache_detail *cd);
32extern void nfs_cache_unregister_sb(struct super_block *sb,
33 struct cache_detail *cd);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 516f3375e06..eb95f5091c1 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -85,7 +85,7 @@ nfs4_callback_svc(void *vrqstp)
85 } 85 }
86 if (err < 0) { 86 if (err < 0) {
87 if (err != preverr) { 87 if (err != preverr) {
88 printk(KERN_WARNING "%s: unexpected error " 88 printk(KERN_WARNING "NFS: %s: unexpected error "
89 "from svc_recv (%d)\n", __func__, err); 89 "from svc_recv (%d)\n", __func__, err);
90 preverr = err; 90 preverr = err;
91 } 91 }
@@ -101,12 +101,12 @@ nfs4_callback_svc(void *vrqstp)
101/* 101/*
102 * Prepare to bring up the NFSv4 callback service 102 * Prepare to bring up the NFSv4 callback service
103 */ 103 */
104struct svc_rqst * 104static struct svc_rqst *
105nfs4_callback_up(struct svc_serv *serv) 105nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
106{ 106{
107 int ret; 107 int ret;
108 108
109 ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET, 109 ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET,
110 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); 110 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
111 if (ret <= 0) 111 if (ret <= 0)
112 goto out_err; 112 goto out_err;
@@ -114,7 +114,7 @@ nfs4_callback_up(struct svc_serv *serv)
114 dprintk("NFS: Callback listener port = %u (af %u)\n", 114 dprintk("NFS: Callback listener port = %u (af %u)\n",
115 nfs_callback_tcpport, PF_INET); 115 nfs_callback_tcpport, PF_INET);
116 116
117 ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6, 117 ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET6,
118 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); 118 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
119 if (ret > 0) { 119 if (ret > 0) {
120 nfs_callback_tcpport6 = ret; 120 nfs_callback_tcpport6 = ret;
@@ -172,7 +172,7 @@ nfs41_callback_svc(void *vrqstp)
172/* 172/*
173 * Bring up the NFSv4.1 callback service 173 * Bring up the NFSv4.1 callback service
174 */ 174 */
175struct svc_rqst * 175static struct svc_rqst *
176nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) 176nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
177{ 177{
178 struct svc_rqst *rqstp; 178 struct svc_rqst *rqstp;
@@ -183,7 +183,7 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
183 * fore channel connection. 183 * fore channel connection.
184 * Returns the input port (0) and sets the svc_serv bc_xprt on success 184 * Returns the input port (0) and sets the svc_serv bc_xprt on success
185 */ 185 */
186 ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0, 186 ret = svc_create_xprt(serv, "tcp-bc", xprt->xprt_net, PF_INET, 0,
187 SVC_SOCK_ANONYMOUS); 187 SVC_SOCK_ANONYMOUS);
188 if (ret < 0) { 188 if (ret < 0) {
189 rqstp = ERR_PTR(ret); 189 rqstp = ERR_PTR(ret);
@@ -269,7 +269,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
269 serv, xprt, &rqstp, &callback_svc); 269 serv, xprt, &rqstp, &callback_svc);
270 if (!minorversion_setup) { 270 if (!minorversion_setup) {
271 /* v4.0 callback setup */ 271 /* v4.0 callback setup */
272 rqstp = nfs4_callback_up(serv); 272 rqstp = nfs4_callback_up(serv, xprt);
273 callback_svc = nfs4_callback_svc; 273 callback_svc = nfs4_callback_svc;
274 } 274 }
275 275
@@ -332,7 +332,6 @@ void nfs_callback_down(int minorversion)
332int 332int
333check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) 333check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
334{ 334{
335 struct rpc_clnt *r = clp->cl_rpcclient;
336 char *p = svc_gss_principal(rqstp); 335 char *p = svc_gss_principal(rqstp);
337 336
338 if (rqstp->rq_authop->flavour != RPC_AUTH_GSS) 337 if (rqstp->rq_authop->flavour != RPC_AUTH_GSS)
@@ -353,7 +352,7 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
353 if (memcmp(p, "nfs@", 4) != 0) 352 if (memcmp(p, "nfs@", 4) != 0)
354 return 0; 353 return 0;
355 p += 4; 354 p += 4;
356 if (strcmp(p, r->cl_server) != 0) 355 if (strcmp(p, clp->cl_hostname) != 0)
357 return 0; 356 return 0;
358 return 1; 357 return 1;
359} 358}
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index c89d3b9e483..a5527c90a5a 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -38,7 +38,8 @@ enum nfs4_callback_opnum {
38struct cb_process_state { 38struct cb_process_state {
39 __be32 drc_status; 39 __be32 drc_status;
40 struct nfs_client *clp; 40 struct nfs_client *clp;
41 int slotid; 41 u32 slotid;
42 struct net *net;
42}; 43};
43 44
44struct cb_compound_hdr_arg { 45struct cb_compound_hdr_arg {
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 54cea8ad5a7..1b5d809a105 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -8,6 +8,7 @@
8#include <linux/nfs4.h> 8#include <linux/nfs4.h>
9#include <linux/nfs_fs.h> 9#include <linux/nfs_fs.h>
10#include <linux/slab.h> 10#include <linux/slab.h>
11#include <linux/rcupdate.h>
11#include "nfs4_fs.h" 12#include "nfs4_fs.h"
12#include "callback.h" 13#include "callback.h"
13#include "delegation.h" 14#include "delegation.h"
@@ -33,7 +34,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
33 res->bitmap[0] = res->bitmap[1] = 0; 34 res->bitmap[0] = res->bitmap[1] = 0;
34 res->status = htonl(NFS4ERR_BADHANDLE); 35 res->status = htonl(NFS4ERR_BADHANDLE);
35 36
36 dprintk("NFS: GETATTR callback request from %s\n", 37 dprintk_rcu("NFS: GETATTR callback request from %s\n",
37 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); 38 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
38 39
39 inode = nfs_delegation_find_inode(cps->clp, &args->fh); 40 inode = nfs_delegation_find_inode(cps->clp, &args->fh);
@@ -73,7 +74,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
73 if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */ 74 if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
74 goto out; 75 goto out;
75 76
76 dprintk("NFS: RECALL callback request from %s\n", 77 dprintk_rcu("NFS: RECALL callback request from %s\n",
77 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); 78 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
78 79
79 res = htonl(NFS4ERR_BADHANDLE); 80 res = htonl(NFS4ERR_BADHANDLE);
@@ -86,8 +87,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
86 res = 0; 87 res = 0;
87 break; 88 break;
88 case -ENOENT: 89 case -ENOENT:
89 if (res != 0) 90 res = htonl(NFS4ERR_BAD_STATEID);
90 res = htonl(NFS4ERR_BAD_STATEID);
91 break; 91 break;
92 default: 92 default:
93 res = htonl(NFS4ERR_RESOURCE); 93 res = htonl(NFS4ERR_RESOURCE);
@@ -98,52 +98,64 @@ out:
98 return res; 98 return res;
99} 99}
100 100
101int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
102{
103 if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data,
104 sizeof(delegation->stateid.data)) != 0)
105 return 0;
106 return 1;
107}
108
109#if defined(CONFIG_NFS_V4_1) 101#if defined(CONFIG_NFS_V4_1)
110 102
111static u32 initiate_file_draining(struct nfs_client *clp, 103/*
112 struct cb_layoutrecallargs *args) 104 * Lookup a layout by filehandle.
105 *
106 * Note: gets a refcount on the layout hdr and on its respective inode.
107 * Caller must put the layout hdr and the inode.
108 *
109 * TODO: keep track of all layouts (and delegations) in a hash table
110 * hashed by filehandle.
111 */
112static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, struct nfs_fh *fh)
113{ 113{
114 struct nfs_server *server; 114 struct nfs_server *server;
115 struct pnfs_layout_hdr *lo;
116 struct inode *ino; 115 struct inode *ino;
117 bool found = false; 116 struct pnfs_layout_hdr *lo;
118 u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
119 LIST_HEAD(free_me_list);
120 117
121 spin_lock(&clp->cl_lock);
122 rcu_read_lock();
123 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 118 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
124 list_for_each_entry(lo, &server->layouts, plh_layouts) { 119 list_for_each_entry(lo, &server->layouts, plh_layouts) {
125 if (nfs_compare_fh(&args->cbl_fh, 120 if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh))
126 &NFS_I(lo->plh_inode)->fh))
127 continue; 121 continue;
128 ino = igrab(lo->plh_inode); 122 ino = igrab(lo->plh_inode);
129 if (!ino) 123 if (!ino)
130 continue; 124 continue;
131 found = true;
132 /* Without this, layout can be freed as soon
133 * as we release cl_lock.
134 */
135 get_layout_hdr(lo); 125 get_layout_hdr(lo);
136 break; 126 return lo;
137 } 127 }
138 if (found)
139 break;
140 } 128 }
129
130 return NULL;
131}
132
133static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, struct nfs_fh *fh)
134{
135 struct pnfs_layout_hdr *lo;
136
137 spin_lock(&clp->cl_lock);
138 rcu_read_lock();
139 lo = get_layout_by_fh_locked(clp, fh);
141 rcu_read_unlock(); 140 rcu_read_unlock();
142 spin_unlock(&clp->cl_lock); 141 spin_unlock(&clp->cl_lock);
143 142
144 if (!found) 143 return lo;
144}
145
146static u32 initiate_file_draining(struct nfs_client *clp,
147 struct cb_layoutrecallargs *args)
148{
149 struct inode *ino;
150 struct pnfs_layout_hdr *lo;
151 u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
152 LIST_HEAD(free_me_list);
153
154 lo = get_layout_by_fh(clp, &args->cbl_fh);
155 if (!lo)
145 return NFS4ERR_NOMATCHING_LAYOUT; 156 return NFS4ERR_NOMATCHING_LAYOUT;
146 157
158 ino = lo->plh_inode;
147 spin_lock(&ino->i_lock); 159 spin_lock(&ino->i_lock);
148 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || 160 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
149 mark_matching_lsegs_invalid(lo, &free_me_list, 161 mark_matching_lsegs_invalid(lo, &free_me_list,
@@ -213,17 +225,13 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
213static u32 do_callback_layoutrecall(struct nfs_client *clp, 225static u32 do_callback_layoutrecall(struct nfs_client *clp,
214 struct cb_layoutrecallargs *args) 226 struct cb_layoutrecallargs *args)
215{ 227{
216 u32 res = NFS4ERR_DELAY; 228 u32 res;
217 229
218 dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type); 230 dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
219 if (test_and_set_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state))
220 goto out;
221 if (args->cbl_recall_type == RETURN_FILE) 231 if (args->cbl_recall_type == RETURN_FILE)
222 res = initiate_file_draining(clp, args); 232 res = initiate_file_draining(clp, args);
223 else 233 else
224 res = initiate_bulk_draining(clp, args); 234 res = initiate_bulk_draining(clp, args);
225 clear_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state);
226out:
227 dprintk("%s returning %i\n", __func__, res); 235 dprintk("%s returning %i\n", __func__, res);
228 return res; 236 return res;
229 237
@@ -303,21 +311,6 @@ out:
303 return res; 311 return res;
304} 312}
305 313
306int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
307{
308 if (delegation == NULL)
309 return 0;
310
311 if (stateid->stateid.seqid != 0)
312 return 0;
313 if (memcmp(&delegation->stateid.stateid.other,
314 &stateid->stateid.other,
315 NFS4_STATEID_OTHER_SIZE))
316 return 0;
317
318 return 1;
319}
320
321/* 314/*
322 * Validate the sequenceID sent by the server. 315 * Validate the sequenceID sent by the server.
323 * Return success if the sequenceID is one more than what we last saw on 316 * Return success if the sequenceID is one more than what we last saw on
@@ -441,7 +434,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
441 int i; 434 int i;
442 __be32 status = htonl(NFS4ERR_BADSESSION); 435 __be32 status = htonl(NFS4ERR_BADSESSION);
443 436
444 clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); 437 clp = nfs4_find_client_sessionid(cps->net, args->csa_addr, &args->csa_sessionid);
445 if (clp == NULL) 438 if (clp == NULL)
446 goto out; 439 goto out;
447 440
@@ -517,7 +510,7 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
517 if (!cps->clp) /* set in cb_sequence */ 510 if (!cps->clp) /* set in cb_sequence */
518 goto out; 511 goto out;
519 512
520 dprintk("NFS: RECALL_ANY callback request from %s\n", 513 dprintk_rcu("NFS: RECALL_ANY callback request from %s\n",
521 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); 514 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
522 515
523 status = cpu_to_be32(NFS4ERR_INVAL); 516 status = cpu_to_be32(NFS4ERR_INVAL);
@@ -552,7 +545,7 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
552 if (!cps->clp) /* set in cb_sequence */ 545 if (!cps->clp) /* set in cb_sequence */
553 goto out; 546 goto out;
554 547
555 dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n", 548 dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target max slots %d\n",
556 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR), 549 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR),
557 args->crsa_target_max_slots); 550 args->crsa_target_max_slots);
558 551
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index d50b2742f23..95bfc243992 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -9,6 +9,8 @@
9#include <linux/sunrpc/svc.h> 9#include <linux/sunrpc/svc.h>
10#include <linux/nfs4.h> 10#include <linux/nfs4.h>
11#include <linux/nfs_fs.h> 11#include <linux/nfs_fs.h>
12#include <linux/ratelimit.h>
13#include <linux/printk.h>
12#include <linux/slab.h> 14#include <linux/slab.h>
13#include <linux/sunrpc/bc_xprt.h> 15#include <linux/sunrpc/bc_xprt.h>
14#include "nfs4_fs.h" 16#include "nfs4_fs.h"
@@ -73,7 +75,7 @@ static __be32 *read_buf(struct xdr_stream *xdr, int nbytes)
73 75
74 p = xdr_inline_decode(xdr, nbytes); 76 p = xdr_inline_decode(xdr, nbytes);
75 if (unlikely(p == NULL)) 77 if (unlikely(p == NULL))
76 printk(KERN_WARNING "NFSv4 callback reply buffer overflowed!\n"); 78 printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed!\n");
77 return p; 79 return p;
78} 80}
79 81
@@ -138,10 +140,10 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
138{ 140{
139 __be32 *p; 141 __be32 *p;
140 142
141 p = read_buf(xdr, 16); 143 p = read_buf(xdr, NFS4_STATEID_SIZE);
142 if (unlikely(p == NULL)) 144 if (unlikely(p == NULL))
143 return htonl(NFS4ERR_RESOURCE); 145 return htonl(NFS4ERR_RESOURCE);
144 memcpy(stateid->data, p, 16); 146 memcpy(stateid, p, NFS4_STATEID_SIZE);
145 return 0; 147 return 0;
146} 148}
147 149
@@ -155,7 +157,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
155 return status; 157 return status;
156 /* We do not like overly long tags! */ 158 /* We do not like overly long tags! */
157 if (hdr->taglen > CB_OP_TAGLEN_MAXSZ - 12) { 159 if (hdr->taglen > CB_OP_TAGLEN_MAXSZ - 12) {
158 printk("NFSv4 CALLBACK %s: client sent tag of length %u\n", 160 printk("NFS: NFSv4 CALLBACK %s: client sent tag of length %u\n",
159 __func__, hdr->taglen); 161 __func__, hdr->taglen);
160 return htonl(NFS4ERR_RESOURCE); 162 return htonl(NFS4ERR_RESOURCE);
161 } 163 }
@@ -167,7 +169,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
167 if (hdr->minorversion <= 1) { 169 if (hdr->minorversion <= 1) {
168 hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */ 170 hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */
169 } else { 171 } else {
170 printk(KERN_WARNING "%s: NFSv4 server callback with " 172 pr_warn_ratelimited("NFS: %s: NFSv4 server callback with "
171 "illegal minor version %u!\n", 173 "illegal minor version %u!\n",
172 __func__, hdr->minorversion); 174 __func__, hdr->minorversion);
173 return htonl(NFS4ERR_MINOR_VERS_MISMATCH); 175 return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
@@ -759,14 +761,14 @@ static void nfs4_callback_free_slot(struct nfs4_session *session)
759 * Let the state manager know callback processing done. 761 * Let the state manager know callback processing done.
760 * A single slot, so highest used slotid is either 0 or -1 762 * A single slot, so highest used slotid is either 0 or -1
761 */ 763 */
762 tbl->highest_used_slotid = -1; 764 tbl->highest_used_slotid = NFS4_NO_SLOT;
763 nfs4_check_drain_bc_complete(session); 765 nfs4_check_drain_bc_complete(session);
764 spin_unlock(&tbl->slot_tbl_lock); 766 spin_unlock(&tbl->slot_tbl_lock);
765} 767}
766 768
767static void nfs4_cb_free_slot(struct cb_process_state *cps) 769static void nfs4_cb_free_slot(struct cb_process_state *cps)
768{ 770{
769 if (cps->slotid != -1) 771 if (cps->slotid != NFS4_NO_SLOT)
770 nfs4_callback_free_slot(cps->clp->cl_session); 772 nfs4_callback_free_slot(cps->clp->cl_session);
771} 773}
772 774
@@ -860,7 +862,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
860 struct cb_process_state cps = { 862 struct cb_process_state cps = {
861 .drc_status = 0, 863 .drc_status = 0,
862 .clp = NULL, 864 .clp = NULL,
863 .slotid = -1, 865 .slotid = NFS4_NO_SLOT,
866 .net = rqstp->rq_xprt->xpt_net,
864 }; 867 };
865 unsigned int nops = 0; 868 unsigned int nops = 0;
866 869
@@ -876,7 +879,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
876 return rpc_garbage_args; 879 return rpc_garbage_args;
877 880
878 if (hdr_arg.minorversion == 0) { 881 if (hdr_arg.minorversion == 0) {
879 cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident); 882 cps.clp = nfs4_find_client_ident(rqstp->rq_xprt->xpt_net, hdr_arg.cb_ident);
880 if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) 883 if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp))
881 return rpc_drop_reply; 884 return rpc_drop_reply;
882 } 885 }
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 31778f74357..60f7e4ec842 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -36,11 +36,13 @@
36#include <linux/inet.h> 36#include <linux/inet.h>
37#include <linux/in6.h> 37#include <linux/in6.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/idr.h>
39#include <net/ipv6.h> 40#include <net/ipv6.h>
40#include <linux/nfs_xdr.h> 41#include <linux/nfs_xdr.h>
41#include <linux/sunrpc/bc_xprt.h> 42#include <linux/sunrpc/bc_xprt.h>
43#include <linux/nsproxy.h>
44#include <linux/pid_namespace.h>
42 45
43#include <asm/system.h>
44 46
45#include "nfs4_fs.h" 47#include "nfs4_fs.h"
46#include "callback.h" 48#include "callback.h"
@@ -49,15 +51,12 @@
49#include "internal.h" 51#include "internal.h"
50#include "fscache.h" 52#include "fscache.h"
51#include "pnfs.h" 53#include "pnfs.h"
54#include "netns.h"
52 55
53#define NFSDBG_FACILITY NFSDBG_CLIENT 56#define NFSDBG_FACILITY NFSDBG_CLIENT
54 57
55static DEFINE_SPINLOCK(nfs_client_lock);
56static LIST_HEAD(nfs_client_list);
57static LIST_HEAD(nfs_volume_list);
58static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); 58static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
59#ifdef CONFIG_NFS_V4 59#ifdef CONFIG_NFS_V4
60static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */
61 60
62/* 61/*
63 * Get a unique NFSv4.0 callback identifier which will be used 62 * Get a unique NFSv4.0 callback identifier which will be used
@@ -66,15 +65,16 @@ static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */
66static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) 65static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
67{ 66{
68 int ret = 0; 67 int ret = 0;
68 struct nfs_net *nn = net_generic(clp->net, nfs_net_id);
69 69
70 if (clp->rpc_ops->version != 4 || minorversion != 0) 70 if (clp->rpc_ops->version != 4 || minorversion != 0)
71 return ret; 71 return ret;
72retry: 72retry:
73 if (!idr_pre_get(&cb_ident_idr, GFP_KERNEL)) 73 if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL))
74 return -ENOMEM; 74 return -ENOMEM;
75 spin_lock(&nfs_client_lock); 75 spin_lock(&nn->nfs_client_lock);
76 ret = idr_get_new(&cb_ident_idr, clp, &clp->cl_cb_ident); 76 ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident);
77 spin_unlock(&nfs_client_lock); 77 spin_unlock(&nn->nfs_client_lock);
78 if (ret == -EAGAIN) 78 if (ret == -EAGAIN)
79 goto retry; 79 goto retry;
80 return ret; 80 return ret;
@@ -89,7 +89,7 @@ static bool nfs4_disable_idmapping = true;
89/* 89/*
90 * RPC cruft for NFS 90 * RPC cruft for NFS
91 */ 91 */
92static struct rpc_version *nfs_version[5] = { 92static const struct rpc_version *nfs_version[5] = {
93 [2] = &nfs_version2, 93 [2] = &nfs_version2,
94#ifdef CONFIG_NFS_V3 94#ifdef CONFIG_NFS_V3
95 [3] = &nfs_version3, 95 [3] = &nfs_version3,
@@ -99,7 +99,7 @@ static struct rpc_version *nfs_version[5] = {
99#endif 99#endif
100}; 100};
101 101
102struct rpc_program nfs_program = { 102const struct rpc_program nfs_program = {
103 .name = "nfs", 103 .name = "nfs",
104 .number = NFS_PROGRAM, 104 .number = NFS_PROGRAM,
105 .nrvers = ARRAY_SIZE(nfs_version), 105 .nrvers = ARRAY_SIZE(nfs_version),
@@ -115,11 +115,11 @@ struct rpc_stat nfs_rpcstat = {
115 115
116#ifdef CONFIG_NFS_V3_ACL 116#ifdef CONFIG_NFS_V3_ACL
117static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; 117static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program };
118static struct rpc_version * nfsacl_version[] = { 118static const struct rpc_version *nfsacl_version[] = {
119 [3] = &nfsacl_version3, 119 [3] = &nfsacl_version3,
120}; 120};
121 121
122struct rpc_program nfsacl_program = { 122const struct rpc_program nfsacl_program = {
123 .name = "nfsacl", 123 .name = "nfsacl",
124 .number = NFS_ACL_PROGRAM, 124 .number = NFS_ACL_PROGRAM,
125 .nrvers = ARRAY_SIZE(nfsacl_version), 125 .nrvers = ARRAY_SIZE(nfsacl_version),
@@ -135,6 +135,7 @@ struct nfs_client_initdata {
135 const struct nfs_rpc_ops *rpc_ops; 135 const struct nfs_rpc_ops *rpc_ops;
136 int proto; 136 int proto;
137 u32 minorversion; 137 u32 minorversion;
138 struct net *net;
138}; 139};
139 140
140/* 141/*
@@ -171,6 +172,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
171 clp->cl_rpcclient = ERR_PTR(-EINVAL); 172 clp->cl_rpcclient = ERR_PTR(-EINVAL);
172 173
173 clp->cl_proto = cl_init->proto; 174 clp->cl_proto = cl_init->proto;
175 clp->net = get_net(cl_init->net);
174 176
175#ifdef CONFIG_NFS_V4 177#ifdef CONFIG_NFS_V4
176 err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); 178 err = nfs_get_cb_ident_idr(clp, cl_init->minorversion);
@@ -202,8 +204,11 @@ error_0:
202#ifdef CONFIG_NFS_V4_1 204#ifdef CONFIG_NFS_V4_1
203static void nfs4_shutdown_session(struct nfs_client *clp) 205static void nfs4_shutdown_session(struct nfs_client *clp)
204{ 206{
205 if (nfs4_has_session(clp)) 207 if (nfs4_has_session(clp)) {
208 nfs4_deviceid_purge_client(clp);
206 nfs4_destroy_session(clp->cl_session); 209 nfs4_destroy_session(clp->cl_session);
210 }
211
207} 212}
208#else /* CONFIG_NFS_V4_1 */ 213#else /* CONFIG_NFS_V4_1 */
209static void nfs4_shutdown_session(struct nfs_client *clp) 214static void nfs4_shutdown_session(struct nfs_client *clp)
@@ -233,16 +238,20 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
233} 238}
234 239
235/* idr_remove_all is not needed as all id's are removed by nfs_put_client */ 240/* idr_remove_all is not needed as all id's are removed by nfs_put_client */
236void nfs_cleanup_cb_ident_idr(void) 241void nfs_cleanup_cb_ident_idr(struct net *net)
237{ 242{
238 idr_destroy(&cb_ident_idr); 243 struct nfs_net *nn = net_generic(net, nfs_net_id);
244
245 idr_destroy(&nn->cb_ident_idr);
239} 246}
240 247
241/* nfs_client_lock held */ 248/* nfs_client_lock held */
242static void nfs_cb_idr_remove_locked(struct nfs_client *clp) 249static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
243{ 250{
251 struct nfs_net *nn = net_generic(clp->net, nfs_net_id);
252
244 if (clp->cl_cb_ident) 253 if (clp->cl_cb_ident)
245 idr_remove(&cb_ident_idr, clp->cl_cb_ident); 254 idr_remove(&nn->cb_ident_idr, clp->cl_cb_ident);
246} 255}
247 256
248static void pnfs_init_server(struct nfs_server *server) 257static void pnfs_init_server(struct nfs_server *server)
@@ -260,7 +269,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
260{ 269{
261} 270}
262 271
263void nfs_cleanup_cb_ident_idr(void) 272void nfs_cleanup_cb_ident_idr(struct net *net)
264{ 273{
265} 274}
266 275
@@ -292,10 +301,10 @@ static void nfs_free_client(struct nfs_client *clp)
292 if (clp->cl_machine_cred != NULL) 301 if (clp->cl_machine_cred != NULL)
293 put_rpccred(clp->cl_machine_cred); 302 put_rpccred(clp->cl_machine_cred);
294 303
295 nfs4_deviceid_purge_client(clp); 304 put_net(clp->net);
296
297 kfree(clp->cl_hostname); 305 kfree(clp->cl_hostname);
298 kfree(clp->server_scope); 306 kfree(clp->server_scope);
307 kfree(clp->impl_id);
299 kfree(clp); 308 kfree(clp);
300 309
301 dprintk("<-- nfs_free_client()\n"); 310 dprintk("<-- nfs_free_client()\n");
@@ -306,15 +315,18 @@ static void nfs_free_client(struct nfs_client *clp)
306 */ 315 */
307void nfs_put_client(struct nfs_client *clp) 316void nfs_put_client(struct nfs_client *clp)
308{ 317{
318 struct nfs_net *nn;
319
309 if (!clp) 320 if (!clp)
310 return; 321 return;
311 322
312 dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count)); 323 dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
324 nn = net_generic(clp->net, nfs_net_id);
313 325
314 if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) { 326 if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) {
315 list_del(&clp->cl_share_link); 327 list_del(&clp->cl_share_link);
316 nfs_cb_idr_remove_locked(clp); 328 nfs_cb_idr_remove_locked(clp);
317 spin_unlock(&nfs_client_lock); 329 spin_unlock(&nn->nfs_client_lock);
318 330
319 BUG_ON(!list_empty(&clp->cl_superblocks)); 331 BUG_ON(!list_empty(&clp->cl_superblocks));
320 332
@@ -392,6 +404,7 @@ static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1,
392 (sin1->sin_port == sin2->sin_port); 404 (sin1->sin_port == sin2->sin_port);
393} 405}
394 406
407#if defined(CONFIG_NFS_V4_1)
395/* 408/*
396 * Test if two socket addresses represent the same actual socket, 409 * Test if two socket addresses represent the same actual socket,
397 * by comparing (only) relevant fields, excluding the port number. 410 * by comparing (only) relevant fields, excluding the port number.
@@ -410,6 +423,7 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
410 } 423 }
411 return 0; 424 return 0;
412} 425}
426#endif /* CONFIG_NFS_V4_1 */
413 427
414/* 428/*
415 * Test if two socket addresses represent the same actual socket, 429 * Test if two socket addresses represent the same actual socket,
@@ -430,10 +444,10 @@ static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
430 return 0; 444 return 0;
431} 445}
432 446
447#if defined(CONFIG_NFS_V4_1)
433/* Common match routine for v4.0 and v4.1 callback services */ 448/* Common match routine for v4.0 and v4.1 callback services */
434bool 449static bool nfs4_cb_match_client(const struct sockaddr *addr,
435nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp, 450 struct nfs_client *clp, u32 minorversion)
436 u32 minorversion)
437{ 451{
438 struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; 452 struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
439 453
@@ -453,6 +467,7 @@ nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp,
453 467
454 return true; 468 return true;
455} 469}
470#endif /* CONFIG_NFS_V4_1 */
456 471
457/* 472/*
458 * Find an nfs_client on the list that matches the initialisation data 473 * Find an nfs_client on the list that matches the initialisation data
@@ -462,8 +477,9 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
462{ 477{
463 struct nfs_client *clp; 478 struct nfs_client *clp;
464 const struct sockaddr *sap = data->addr; 479 const struct sockaddr *sap = data->addr;
480 struct nfs_net *nn = net_generic(data->net, nfs_net_id);
465 481
466 list_for_each_entry(clp, &nfs_client_list, cl_share_link) { 482 list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
467 const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; 483 const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
468 /* Don't match clients that failed to initialise properly */ 484 /* Don't match clients that failed to initialise properly */
469 if (clp->cl_cons_state < 0) 485 if (clp->cl_cons_state < 0)
@@ -501,13 +517,14 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
501{ 517{
502 struct nfs_client *clp, *new = NULL; 518 struct nfs_client *clp, *new = NULL;
503 int error; 519 int error;
520 struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
504 521
505 dprintk("--> nfs_get_client(%s,v%u)\n", 522 dprintk("--> nfs_get_client(%s,v%u)\n",
506 cl_init->hostname ?: "", cl_init->rpc_ops->version); 523 cl_init->hostname ?: "", cl_init->rpc_ops->version);
507 524
508 /* see if the client already exists */ 525 /* see if the client already exists */
509 do { 526 do {
510 spin_lock(&nfs_client_lock); 527 spin_lock(&nn->nfs_client_lock);
511 528
512 clp = nfs_match_client(cl_init); 529 clp = nfs_match_client(cl_init);
513 if (clp) 530 if (clp)
@@ -515,7 +532,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
515 if (new) 532 if (new)
516 goto install_client; 533 goto install_client;
517 534
518 spin_unlock(&nfs_client_lock); 535 spin_unlock(&nn->nfs_client_lock);
519 536
520 new = nfs_alloc_client(cl_init); 537 new = nfs_alloc_client(cl_init);
521 } while (!IS_ERR(new)); 538 } while (!IS_ERR(new));
@@ -526,8 +543,8 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
526 /* install a new client and return with it unready */ 543 /* install a new client and return with it unready */
527install_client: 544install_client:
528 clp = new; 545 clp = new;
529 list_add(&clp->cl_share_link, &nfs_client_list); 546 list_add(&clp->cl_share_link, &nn->nfs_client_list);
530 spin_unlock(&nfs_client_lock); 547 spin_unlock(&nn->nfs_client_lock);
531 548
532 error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr, 549 error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr,
533 authflavour, noresvport); 550 authflavour, noresvport);
@@ -542,7 +559,7 @@ install_client:
542 * - make sure it's ready before returning 559 * - make sure it's ready before returning
543 */ 560 */
544found_client: 561found_client:
545 spin_unlock(&nfs_client_lock); 562 spin_unlock(&nn->nfs_client_lock);
546 563
547 if (new) 564 if (new)
548 nfs_free_client(new); 565 nfs_free_client(new);
@@ -642,7 +659,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
642{ 659{
643 struct rpc_clnt *clnt = NULL; 660 struct rpc_clnt *clnt = NULL;
644 struct rpc_create_args args = { 661 struct rpc_create_args args = {
645 .net = &init_net, 662 .net = clp->net,
646 .protocol = clp->cl_proto, 663 .protocol = clp->cl_proto,
647 .address = (struct sockaddr *)&clp->cl_addr, 664 .address = (struct sockaddr *)&clp->cl_addr,
648 .addrsize = clp->cl_addrlen, 665 .addrsize = clp->cl_addrlen,
@@ -696,6 +713,7 @@ static int nfs_start_lockd(struct nfs_server *server)
696 .nfs_version = clp->rpc_ops->version, 713 .nfs_version = clp->rpc_ops->version,
697 .noresvport = server->flags & NFS_MOUNT_NORESVPORT ? 714 .noresvport = server->flags & NFS_MOUNT_NORESVPORT ?
698 1 : 0, 715 1 : 0,
716 .net = clp->net,
699 }; 717 };
700 718
701 if (nlm_init.nfs_version > 3) 719 if (nlm_init.nfs_version > 3)
@@ -831,6 +849,7 @@ static int nfs_init_server(struct nfs_server *server,
831 .addrlen = data->nfs_server.addrlen, 849 .addrlen = data->nfs_server.addrlen,
832 .rpc_ops = &nfs_v2_clientops, 850 .rpc_ops = &nfs_v2_clientops,
833 .proto = data->nfs_server.protocol, 851 .proto = data->nfs_server.protocol,
852 .net = data->net,
834 }; 853 };
835 struct rpc_timeout timeparms; 854 struct rpc_timeout timeparms;
836 struct nfs_client *clp; 855 struct nfs_client *clp;
@@ -1029,25 +1048,30 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve
1029static void nfs_server_insert_lists(struct nfs_server *server) 1048static void nfs_server_insert_lists(struct nfs_server *server)
1030{ 1049{
1031 struct nfs_client *clp = server->nfs_client; 1050 struct nfs_client *clp = server->nfs_client;
1051 struct nfs_net *nn = net_generic(clp->net, nfs_net_id);
1032 1052
1033 spin_lock(&nfs_client_lock); 1053 spin_lock(&nn->nfs_client_lock);
1034 list_add_tail_rcu(&server->client_link, &clp->cl_superblocks); 1054 list_add_tail_rcu(&server->client_link, &clp->cl_superblocks);
1035 list_add_tail(&server->master_link, &nfs_volume_list); 1055 list_add_tail(&server->master_link, &nn->nfs_volume_list);
1036 clear_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); 1056 clear_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
1037 spin_unlock(&nfs_client_lock); 1057 spin_unlock(&nn->nfs_client_lock);
1038 1058
1039} 1059}
1040 1060
1041static void nfs_server_remove_lists(struct nfs_server *server) 1061static void nfs_server_remove_lists(struct nfs_server *server)
1042{ 1062{
1043 struct nfs_client *clp = server->nfs_client; 1063 struct nfs_client *clp = server->nfs_client;
1064 struct nfs_net *nn;
1044 1065
1045 spin_lock(&nfs_client_lock); 1066 if (clp == NULL)
1067 return;
1068 nn = net_generic(clp->net, nfs_net_id);
1069 spin_lock(&nn->nfs_client_lock);
1046 list_del_rcu(&server->client_link); 1070 list_del_rcu(&server->client_link);
1047 if (clp && list_empty(&clp->cl_superblocks)) 1071 if (list_empty(&clp->cl_superblocks))
1048 set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); 1072 set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
1049 list_del(&server->master_link); 1073 list_del(&server->master_link);
1050 spin_unlock(&nfs_client_lock); 1074 spin_unlock(&nn->nfs_client_lock);
1051 1075
1052 synchronize_rcu(); 1076 synchronize_rcu();
1053} 1077}
@@ -1086,6 +1110,8 @@ static struct nfs_server *nfs_alloc_server(void)
1086 return NULL; 1110 return NULL;
1087 } 1111 }
1088 1112
1113 ida_init(&server->openowner_id);
1114 ida_init(&server->lockowner_id);
1089 pnfs_init_server(server); 1115 pnfs_init_server(server);
1090 1116
1091 return server; 1117 return server;
@@ -1111,6 +1137,8 @@ void nfs_free_server(struct nfs_server *server)
1111 1137
1112 nfs_put_client(server->nfs_client); 1138 nfs_put_client(server->nfs_client);
1113 1139
1140 ida_destroy(&server->lockowner_id);
1141 ida_destroy(&server->openowner_id);
1114 nfs_free_iostats(server->io_stats); 1142 nfs_free_iostats(server->io_stats);
1115 bdi_destroy(&server->backing_dev_info); 1143 bdi_destroy(&server->backing_dev_info);
1116 kfree(server); 1144 kfree(server);
@@ -1189,45 +1217,19 @@ error:
1189/* 1217/*
1190 * NFSv4.0 callback thread helper 1218 * NFSv4.0 callback thread helper
1191 * 1219 *
1192 * Find a client by IP address, protocol version, and minorversion
1193 *
1194 * Called from the pg_authenticate method. The callback identifier
1195 * is not used as it has not been decoded.
1196 *
1197 * Returns NULL if no such client
1198 */
1199struct nfs_client *
1200nfs4_find_client_no_ident(const struct sockaddr *addr)
1201{
1202 struct nfs_client *clp;
1203
1204 spin_lock(&nfs_client_lock);
1205 list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
1206 if (nfs4_cb_match_client(addr, clp, 0) == false)
1207 continue;
1208 atomic_inc(&clp->cl_count);
1209 spin_unlock(&nfs_client_lock);
1210 return clp;
1211 }
1212 spin_unlock(&nfs_client_lock);
1213 return NULL;
1214}
1215
1216/*
1217 * NFSv4.0 callback thread helper
1218 *
1219 * Find a client by callback identifier 1220 * Find a client by callback identifier
1220 */ 1221 */
1221struct nfs_client * 1222struct nfs_client *
1222nfs4_find_client_ident(int cb_ident) 1223nfs4_find_client_ident(struct net *net, int cb_ident)
1223{ 1224{
1224 struct nfs_client *clp; 1225 struct nfs_client *clp;
1226 struct nfs_net *nn = net_generic(net, nfs_net_id);
1225 1227
1226 spin_lock(&nfs_client_lock); 1228 spin_lock(&nn->nfs_client_lock);
1227 clp = idr_find(&cb_ident_idr, cb_ident); 1229 clp = idr_find(&nn->cb_ident_idr, cb_ident);
1228 if (clp) 1230 if (clp)
1229 atomic_inc(&clp->cl_count); 1231 atomic_inc(&clp->cl_count);
1230 spin_unlock(&nfs_client_lock); 1232 spin_unlock(&nn->nfs_client_lock);
1231 return clp; 1233 return clp;
1232} 1234}
1233 1235
@@ -1240,13 +1242,14 @@ nfs4_find_client_ident(int cb_ident)
1240 * Returns NULL if no such client 1242 * Returns NULL if no such client
1241 */ 1243 */
1242struct nfs_client * 1244struct nfs_client *
1243nfs4_find_client_sessionid(const struct sockaddr *addr, 1245nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
1244 struct nfs4_sessionid *sid) 1246 struct nfs4_sessionid *sid)
1245{ 1247{
1246 struct nfs_client *clp; 1248 struct nfs_client *clp;
1249 struct nfs_net *nn = net_generic(net, nfs_net_id);
1247 1250
1248 spin_lock(&nfs_client_lock); 1251 spin_lock(&nn->nfs_client_lock);
1249 list_for_each_entry(clp, &nfs_client_list, cl_share_link) { 1252 list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
1250 if (nfs4_cb_match_client(addr, clp, 1) == false) 1253 if (nfs4_cb_match_client(addr, clp, 1) == false)
1251 continue; 1254 continue;
1252 1255
@@ -1259,17 +1262,17 @@ nfs4_find_client_sessionid(const struct sockaddr *addr,
1259 continue; 1262 continue;
1260 1263
1261 atomic_inc(&clp->cl_count); 1264 atomic_inc(&clp->cl_count);
1262 spin_unlock(&nfs_client_lock); 1265 spin_unlock(&nn->nfs_client_lock);
1263 return clp; 1266 return clp;
1264 } 1267 }
1265 spin_unlock(&nfs_client_lock); 1268 spin_unlock(&nn->nfs_client_lock);
1266 return NULL; 1269 return NULL;
1267} 1270}
1268 1271
1269#else /* CONFIG_NFS_V4_1 */ 1272#else /* CONFIG_NFS_V4_1 */
1270 1273
1271struct nfs_client * 1274struct nfs_client *
1272nfs4_find_client_sessionid(const struct sockaddr *addr, 1275nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
1273 struct nfs4_sessionid *sid) 1276 struct nfs4_sessionid *sid)
1274{ 1277{
1275 return NULL; 1278 return NULL;
@@ -1284,16 +1287,18 @@ static int nfs4_init_callback(struct nfs_client *clp)
1284 int error; 1287 int error;
1285 1288
1286 if (clp->rpc_ops->version == 4) { 1289 if (clp->rpc_ops->version == 4) {
1290 struct rpc_xprt *xprt;
1291
1292 xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt);
1293
1287 if (nfs4_has_session(clp)) { 1294 if (nfs4_has_session(clp)) {
1288 error = xprt_setup_backchannel( 1295 error = xprt_setup_backchannel(xprt,
1289 clp->cl_rpcclient->cl_xprt,
1290 NFS41_BC_MIN_CALLBACKS); 1296 NFS41_BC_MIN_CALLBACKS);
1291 if (error < 0) 1297 if (error < 0)
1292 return error; 1298 return error;
1293 } 1299 }
1294 1300
1295 error = nfs_callback_up(clp->cl_mvops->minor_version, 1301 error = nfs_callback_up(clp->cl_mvops->minor_version, xprt);
1296 clp->cl_rpcclient->cl_xprt);
1297 if (error < 0) { 1302 if (error < 0) {
1298 dprintk("%s: failed to start callback. Error = %d\n", 1303 dprintk("%s: failed to start callback. Error = %d\n",
1299 __func__, error); 1304 __func__, error);
@@ -1344,6 +1349,7 @@ int nfs4_init_client(struct nfs_client *clp,
1344 rpc_authflavor_t authflavour, 1349 rpc_authflavor_t authflavour,
1345 int noresvport) 1350 int noresvport)
1346{ 1351{
1352 char buf[INET6_ADDRSTRLEN + 1];
1347 int error; 1353 int error;
1348 1354
1349 if (clp->cl_cons_state == NFS_CS_READY) { 1355 if (clp->cl_cons_state == NFS_CS_READY) {
@@ -1359,6 +1365,20 @@ int nfs4_init_client(struct nfs_client *clp,
1359 1, noresvport); 1365 1, noresvport);
1360 if (error < 0) 1366 if (error < 0)
1361 goto error; 1367 goto error;
1368
1369 /* If no clientaddr= option was specified, find a usable cb address */
1370 if (ip_addr == NULL) {
1371 struct sockaddr_storage cb_addr;
1372 struct sockaddr *sap = (struct sockaddr *)&cb_addr;
1373
1374 error = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr));
1375 if (error < 0)
1376 goto error;
1377 error = rpc_ntop(sap, buf, sizeof(buf));
1378 if (error < 0)
1379 goto error;
1380 ip_addr = (const char *)buf;
1381 }
1362 strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); 1382 strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
1363 1383
1364 error = nfs_idmap_new(clp); 1384 error = nfs_idmap_new(clp);
@@ -1393,7 +1413,7 @@ static int nfs4_set_client(struct nfs_server *server,
1393 const char *ip_addr, 1413 const char *ip_addr,
1394 rpc_authflavor_t authflavour, 1414 rpc_authflavor_t authflavour,
1395 int proto, const struct rpc_timeout *timeparms, 1415 int proto, const struct rpc_timeout *timeparms,
1396 u32 minorversion) 1416 u32 minorversion, struct net *net)
1397{ 1417{
1398 struct nfs_client_initdata cl_init = { 1418 struct nfs_client_initdata cl_init = {
1399 .hostname = hostname, 1419 .hostname = hostname,
@@ -1402,6 +1422,7 @@ static int nfs4_set_client(struct nfs_server *server,
1402 .rpc_ops = &nfs_v4_clientops, 1422 .rpc_ops = &nfs_v4_clientops,
1403 .proto = proto, 1423 .proto = proto,
1404 .minorversion = minorversion, 1424 .minorversion = minorversion,
1425 .net = net,
1405 }; 1426 };
1406 struct nfs_client *clp; 1427 struct nfs_client *clp;
1407 int error; 1428 int error;
@@ -1453,6 +1474,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
1453 .rpc_ops = &nfs_v4_clientops, 1474 .rpc_ops = &nfs_v4_clientops,
1454 .proto = ds_proto, 1475 .proto = ds_proto,
1455 .minorversion = mds_clp->cl_minorversion, 1476 .minorversion = mds_clp->cl_minorversion,
1477 .net = mds_clp->net,
1456 }; 1478 };
1457 struct rpc_timeout ds_timeout = { 1479 struct rpc_timeout ds_timeout = {
1458 .to_initval = 15 * HZ, 1480 .to_initval = 15 * HZ,
@@ -1580,7 +1602,8 @@ static int nfs4_init_server(struct nfs_server *server,
1580 data->auth_flavors[0], 1602 data->auth_flavors[0],
1581 data->nfs_server.protocol, 1603 data->nfs_server.protocol,
1582 &timeparms, 1604 &timeparms,
1583 data->minorversion); 1605 data->minorversion,
1606 data->net);
1584 if (error < 0) 1607 if (error < 0)
1585 goto error; 1608 goto error;
1586 1609
@@ -1675,9 +1698,10 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1675 data->addrlen, 1698 data->addrlen,
1676 parent_client->cl_ipaddr, 1699 parent_client->cl_ipaddr,
1677 data->authflavor, 1700 data->authflavor,
1678 parent_server->client->cl_xprt->prot, 1701 rpc_protocol(parent_server->client),
1679 parent_server->client->cl_timeout, 1702 parent_server->client->cl_timeout,
1680 parent_client->cl_mvops->minor_version); 1703 parent_client->cl_mvops->minor_version,
1704 parent_client->net);
1681 if (error < 0) 1705 if (error < 0)
1682 goto error; 1706 goto error;
1683 1707
@@ -1705,7 +1729,8 @@ error:
1705 */ 1729 */
1706struct nfs_server *nfs_clone_server(struct nfs_server *source, 1730struct nfs_server *nfs_clone_server(struct nfs_server *source,
1707 struct nfs_fh *fh, 1731 struct nfs_fh *fh,
1708 struct nfs_fattr *fattr) 1732 struct nfs_fattr *fattr,
1733 rpc_authflavor_t flavor)
1709{ 1734{
1710 struct nfs_server *server; 1735 struct nfs_server *server;
1711 struct nfs_fattr *fattr_fsinfo; 1736 struct nfs_fattr *fattr_fsinfo;
@@ -1734,7 +1759,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
1734 1759
1735 error = nfs_init_server_rpcclient(server, 1760 error = nfs_init_server_rpcclient(server,
1736 source->client->cl_timeout, 1761 source->client->cl_timeout,
1737 source->client->cl_auth->au_flavor); 1762 flavor);
1738 if (error < 0) 1763 if (error < 0)
1739 goto out_free_server; 1764 goto out_free_server;
1740 if (!IS_ERR(source->client_acl)) 1765 if (!IS_ERR(source->client_acl))
@@ -1770,6 +1795,18 @@ out_free_server:
1770 return ERR_PTR(error); 1795 return ERR_PTR(error);
1771} 1796}
1772 1797
1798void nfs_clients_init(struct net *net)
1799{
1800 struct nfs_net *nn = net_generic(net, nfs_net_id);
1801
1802 INIT_LIST_HEAD(&nn->nfs_client_list);
1803 INIT_LIST_HEAD(&nn->nfs_volume_list);
1804#ifdef CONFIG_NFS_V4
1805 idr_init(&nn->cb_ident_idr);
1806#endif
1807 spin_lock_init(&nn->nfs_client_lock);
1808}
1809
1773#ifdef CONFIG_PROC_FS 1810#ifdef CONFIG_PROC_FS
1774static struct proc_dir_entry *proc_fs_nfs; 1811static struct proc_dir_entry *proc_fs_nfs;
1775 1812
@@ -1823,13 +1860,15 @@ static int nfs_server_list_open(struct inode *inode, struct file *file)
1823{ 1860{
1824 struct seq_file *m; 1861 struct seq_file *m;
1825 int ret; 1862 int ret;
1863 struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info;
1864 struct net *net = pid_ns->child_reaper->nsproxy->net_ns;
1826 1865
1827 ret = seq_open(file, &nfs_server_list_ops); 1866 ret = seq_open(file, &nfs_server_list_ops);
1828 if (ret < 0) 1867 if (ret < 0)
1829 return ret; 1868 return ret;
1830 1869
1831 m = file->private_data; 1870 m = file->private_data;
1832 m->private = PDE(inode)->data; 1871 m->private = net;
1833 1872
1834 return 0; 1873 return 0;
1835} 1874}
@@ -1839,9 +1878,11 @@ static int nfs_server_list_open(struct inode *inode, struct file *file)
1839 */ 1878 */
1840static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) 1879static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
1841{ 1880{
1881 struct nfs_net *nn = net_generic(m->private, nfs_net_id);
1882
1842 /* lock the list against modification */ 1883 /* lock the list against modification */
1843 spin_lock(&nfs_client_lock); 1884 spin_lock(&nn->nfs_client_lock);
1844 return seq_list_start_head(&nfs_client_list, *_pos); 1885 return seq_list_start_head(&nn->nfs_client_list, *_pos);
1845} 1886}
1846 1887
1847/* 1888/*
@@ -1849,7 +1890,9 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
1849 */ 1890 */
1850static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) 1891static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
1851{ 1892{
1852 return seq_list_next(v, &nfs_client_list, pos); 1893 struct nfs_net *nn = net_generic(p->private, nfs_net_id);
1894
1895 return seq_list_next(v, &nn->nfs_client_list, pos);
1853} 1896}
1854 1897
1855/* 1898/*
@@ -1857,7 +1900,9 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
1857 */ 1900 */
1858static void nfs_server_list_stop(struct seq_file *p, void *v) 1901static void nfs_server_list_stop(struct seq_file *p, void *v)
1859{ 1902{
1860 spin_unlock(&nfs_client_lock); 1903 struct nfs_net *nn = net_generic(p->private, nfs_net_id);
1904
1905 spin_unlock(&nn->nfs_client_lock);
1861} 1906}
1862 1907
1863/* 1908/*
@@ -1866,9 +1911,10 @@ static void nfs_server_list_stop(struct seq_file *p, void *v)
1866static int nfs_server_list_show(struct seq_file *m, void *v) 1911static int nfs_server_list_show(struct seq_file *m, void *v)
1867{ 1912{
1868 struct nfs_client *clp; 1913 struct nfs_client *clp;
1914 struct nfs_net *nn = net_generic(m->private, nfs_net_id);
1869 1915
1870 /* display header on line 1 */ 1916 /* display header on line 1 */
1871 if (v == &nfs_client_list) { 1917 if (v == &nn->nfs_client_list) {
1872 seq_puts(m, "NV SERVER PORT USE HOSTNAME\n"); 1918 seq_puts(m, "NV SERVER PORT USE HOSTNAME\n");
1873 return 0; 1919 return 0;
1874 } 1920 }
@@ -1880,12 +1926,14 @@ static int nfs_server_list_show(struct seq_file *m, void *v)
1880 if (clp->cl_cons_state != NFS_CS_READY) 1926 if (clp->cl_cons_state != NFS_CS_READY)
1881 return 0; 1927 return 0;
1882 1928
1929 rcu_read_lock();
1883 seq_printf(m, "v%u %s %s %3d %s\n", 1930 seq_printf(m, "v%u %s %s %3d %s\n",
1884 clp->rpc_ops->version, 1931 clp->rpc_ops->version,
1885 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), 1932 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
1886 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), 1933 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT),
1887 atomic_read(&clp->cl_count), 1934 atomic_read(&clp->cl_count),
1888 clp->cl_hostname); 1935 clp->cl_hostname);
1936 rcu_read_unlock();
1889 1937
1890 return 0; 1938 return 0;
1891} 1939}
@@ -1897,13 +1945,15 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file)
1897{ 1945{
1898 struct seq_file *m; 1946 struct seq_file *m;
1899 int ret; 1947 int ret;
1948 struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info;
1949 struct net *net = pid_ns->child_reaper->nsproxy->net_ns;
1900 1950
1901 ret = seq_open(file, &nfs_volume_list_ops); 1951 ret = seq_open(file, &nfs_volume_list_ops);
1902 if (ret < 0) 1952 if (ret < 0)
1903 return ret; 1953 return ret;
1904 1954
1905 m = file->private_data; 1955 m = file->private_data;
1906 m->private = PDE(inode)->data; 1956 m->private = net;
1907 1957
1908 return 0; 1958 return 0;
1909} 1959}
@@ -1913,9 +1963,11 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file)
1913 */ 1963 */
1914static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) 1964static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
1915{ 1965{
1966 struct nfs_net *nn = net_generic(m->private, nfs_net_id);
1967
1916 /* lock the list against modification */ 1968 /* lock the list against modification */
1917 spin_lock(&nfs_client_lock); 1969 spin_lock(&nn->nfs_client_lock);
1918 return seq_list_start_head(&nfs_volume_list, *_pos); 1970 return seq_list_start_head(&nn->nfs_volume_list, *_pos);
1919} 1971}
1920 1972
1921/* 1973/*
@@ -1923,7 +1975,9 @@ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
1923 */ 1975 */
1924static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) 1976static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
1925{ 1977{
1926 return seq_list_next(v, &nfs_volume_list, pos); 1978 struct nfs_net *nn = net_generic(p->private, nfs_net_id);
1979
1980 return seq_list_next(v, &nn->nfs_volume_list, pos);
1927} 1981}
1928 1982
1929/* 1983/*
@@ -1931,7 +1985,9 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
1931 */ 1985 */
1932static void nfs_volume_list_stop(struct seq_file *p, void *v) 1986static void nfs_volume_list_stop(struct seq_file *p, void *v)
1933{ 1987{
1934 spin_unlock(&nfs_client_lock); 1988 struct nfs_net *nn = net_generic(p->private, nfs_net_id);
1989
1990 spin_unlock(&nn->nfs_client_lock);
1935} 1991}
1936 1992
1937/* 1993/*
@@ -1942,9 +1998,10 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
1942 struct nfs_server *server; 1998 struct nfs_server *server;
1943 struct nfs_client *clp; 1999 struct nfs_client *clp;
1944 char dev[8], fsid[17]; 2000 char dev[8], fsid[17];
2001 struct nfs_net *nn = net_generic(m->private, nfs_net_id);
1945 2002
1946 /* display header on line 1 */ 2003 /* display header on line 1 */
1947 if (v == &nfs_volume_list) { 2004 if (v == &nn->nfs_volume_list) {
1948 seq_puts(m, "NV SERVER PORT DEV FSID FSC\n"); 2005 seq_puts(m, "NV SERVER PORT DEV FSID FSC\n");
1949 return 0; 2006 return 0;
1950 } 2007 }
@@ -1959,6 +2016,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
1959 (unsigned long long) server->fsid.major, 2016 (unsigned long long) server->fsid.major,
1960 (unsigned long long) server->fsid.minor); 2017 (unsigned long long) server->fsid.minor);
1961 2018
2019 rcu_read_lock();
1962 seq_printf(m, "v%u %s %s %-7s %-17s %s\n", 2020 seq_printf(m, "v%u %s %s %-7s %-17s %s\n",
1963 clp->rpc_ops->version, 2021 clp->rpc_ops->version,
1964 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), 2022 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
@@ -1966,6 +2024,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
1966 dev, 2024 dev,
1967 fsid, 2025 fsid,
1968 nfs_server_fscache_state(server)); 2026 nfs_server_fscache_state(server));
2027 rcu_read_unlock();
1969 2028
1970 return 0; 2029 return 0;
1971} 2030}
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 7f265406980..89af1d26927 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -105,7 +105,7 @@ again:
105 continue; 105 continue;
106 if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) 106 if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
107 continue; 107 continue;
108 if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0) 108 if (!nfs4_stateid_match(&state->stateid, stateid))
109 continue; 109 continue;
110 get_nfs_open_context(ctx); 110 get_nfs_open_context(ctx);
111 spin_unlock(&inode->i_lock); 111 spin_unlock(&inode->i_lock);
@@ -139,8 +139,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
139 if (delegation != NULL) { 139 if (delegation != NULL) {
140 spin_lock(&delegation->lock); 140 spin_lock(&delegation->lock);
141 if (delegation->inode != NULL) { 141 if (delegation->inode != NULL) {
142 memcpy(delegation->stateid.data, res->delegation.data, 142 nfs4_stateid_copy(&delegation->stateid, &res->delegation);
143 sizeof(delegation->stateid.data));
144 delegation->type = res->delegation_type; 143 delegation->type = res->delegation_type;
145 delegation->maxsize = res->maxsize; 144 delegation->maxsize = res->maxsize;
146 oldcred = delegation->cred; 145 oldcred = delegation->cred;
@@ -236,8 +235,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
236 delegation = kmalloc(sizeof(*delegation), GFP_NOFS); 235 delegation = kmalloc(sizeof(*delegation), GFP_NOFS);
237 if (delegation == NULL) 236 if (delegation == NULL)
238 return -ENOMEM; 237 return -ENOMEM;
239 memcpy(delegation->stateid.data, res->delegation.data, 238 nfs4_stateid_copy(&delegation->stateid, &res->delegation);
240 sizeof(delegation->stateid.data));
241 delegation->type = res->delegation_type; 239 delegation->type = res->delegation_type;
242 delegation->maxsize = res->maxsize; 240 delegation->maxsize = res->maxsize;
243 delegation->change_attr = inode->i_version; 241 delegation->change_attr = inode->i_version;
@@ -250,19 +248,22 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
250 old_delegation = rcu_dereference_protected(nfsi->delegation, 248 old_delegation = rcu_dereference_protected(nfsi->delegation,
251 lockdep_is_held(&clp->cl_lock)); 249 lockdep_is_held(&clp->cl_lock));
252 if (old_delegation != NULL) { 250 if (old_delegation != NULL) {
253 if (memcmp(&delegation->stateid, &old_delegation->stateid, 251 if (nfs4_stateid_match(&delegation->stateid,
254 sizeof(old_delegation->stateid)) == 0 && 252 &old_delegation->stateid) &&
255 delegation->type == old_delegation->type) { 253 delegation->type == old_delegation->type) {
256 goto out; 254 goto out;
257 } 255 }
258 /* 256 /*
259 * Deal with broken servers that hand out two 257 * Deal with broken servers that hand out two
260 * delegations for the same file. 258 * delegations for the same file.
259 * Allow for upgrades to a WRITE delegation, but
260 * nothing else.
261 */ 261 */
262 dfprintk(FILE, "%s: server %s handed out " 262 dfprintk(FILE, "%s: server %s handed out "
263 "a duplicate delegation!\n", 263 "a duplicate delegation!\n",
264 __func__, clp->cl_hostname); 264 __func__, clp->cl_hostname);
265 if (delegation->type <= old_delegation->type) { 265 if (delegation->type == old_delegation->type ||
266 !(delegation->type & FMODE_WRITE)) {
266 freeme = delegation; 267 freeme = delegation;
267 delegation = NULL; 268 delegation = NULL;
268 goto out; 269 goto out;
@@ -455,17 +456,24 @@ static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp,
455 rcu_read_unlock(); 456 rcu_read_unlock();
456} 457}
457 458
458static void nfs_client_mark_return_all_delegations(struct nfs_client *clp)
459{
460 nfs_client_mark_return_all_delegation_types(clp, FMODE_READ|FMODE_WRITE);
461}
462
463static void nfs_delegation_run_state_manager(struct nfs_client *clp) 459static void nfs_delegation_run_state_manager(struct nfs_client *clp)
464{ 460{
465 if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) 461 if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state))
466 nfs4_schedule_state_manager(clp); 462 nfs4_schedule_state_manager(clp);
467} 463}
468 464
465void nfs_remove_bad_delegation(struct inode *inode)
466{
467 struct nfs_delegation *delegation;
468
469 delegation = nfs_detach_delegation(NFS_I(inode), NFS_SERVER(inode));
470 if (delegation) {
471 nfs_inode_find_state_and_recover(inode, &delegation->stateid);
472 nfs_free_delegation(delegation);
473 }
474}
475EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation);
476
469/** 477/**
470 * nfs_expire_all_delegation_types 478 * nfs_expire_all_delegation_types
471 * @clp: client to process 479 * @clp: client to process
@@ -488,18 +496,6 @@ void nfs_expire_all_delegations(struct nfs_client *clp)
488 nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE); 496 nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE);
489} 497}
490 498
491/**
492 * nfs_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN
493 * @clp: client to process
494 *
495 */
496void nfs_handle_cb_pathdown(struct nfs_client *clp)
497{
498 if (clp == NULL)
499 return;
500 nfs_client_mark_return_all_delegations(clp);
501}
502
503static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server) 499static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
504{ 500{
505 struct nfs_delegation *delegation; 501 struct nfs_delegation *delegation;
@@ -531,7 +527,7 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
531/** 527/**
532 * nfs_async_inode_return_delegation - asynchronously return a delegation 528 * nfs_async_inode_return_delegation - asynchronously return a delegation
533 * @inode: inode to process 529 * @inode: inode to process
534 * @stateid: state ID information from CB_RECALL arguments 530 * @stateid: state ID information
535 * 531 *
536 * Returns zero on success, or a negative errno value. 532 * Returns zero on success, or a negative errno value.
537 */ 533 */
@@ -545,7 +541,7 @@ int nfs_async_inode_return_delegation(struct inode *inode,
545 rcu_read_lock(); 541 rcu_read_lock();
546 delegation = rcu_dereference(NFS_I(inode)->delegation); 542 delegation = rcu_dereference(NFS_I(inode)->delegation);
547 543
548 if (!clp->cl_mvops->validate_stateid(delegation, stateid)) { 544 if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) {
549 rcu_read_unlock(); 545 rcu_read_unlock();
550 return -ENOENT; 546 return -ENOENT;
551 } 547 }
@@ -684,21 +680,25 @@ int nfs_delegations_present(struct nfs_client *clp)
684 * nfs4_copy_delegation_stateid - Copy inode's state ID information 680 * nfs4_copy_delegation_stateid - Copy inode's state ID information
685 * @dst: stateid data structure to fill in 681 * @dst: stateid data structure to fill in
686 * @inode: inode to check 682 * @inode: inode to check
683 * @flags: delegation type requirement
687 * 684 *
688 * Returns one and fills in "dst->data" * if inode had a delegation, 685 * Returns "true" and fills in "dst->data" * if inode had a delegation,
689 * otherwise zero is returned. 686 * otherwise "false" is returned.
690 */ 687 */
691int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) 688bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode,
689 fmode_t flags)
692{ 690{
693 struct nfs_inode *nfsi = NFS_I(inode); 691 struct nfs_inode *nfsi = NFS_I(inode);
694 struct nfs_delegation *delegation; 692 struct nfs_delegation *delegation;
695 int ret = 0; 693 bool ret;
696 694
695 flags &= FMODE_READ|FMODE_WRITE;
697 rcu_read_lock(); 696 rcu_read_lock();
698 delegation = rcu_dereference(nfsi->delegation); 697 delegation = rcu_dereference(nfsi->delegation);
699 if (delegation != NULL) { 698 ret = (delegation != NULL && (delegation->type & flags) == flags);
700 memcpy(dst->data, delegation->stateid.data, sizeof(dst->data)); 699 if (ret) {
701 ret = 1; 700 nfs4_stateid_copy(dst, &delegation->stateid);
701 nfs_mark_delegation_referenced(delegation);
702 } 702 }
703 rcu_read_unlock(); 703 rcu_read_unlock();
704 return ret; 704 return ret;
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index d9322e490c5..cd6a7a8dada 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -42,9 +42,9 @@ void nfs_super_return_all_delegations(struct super_block *sb);
42void nfs_expire_all_delegations(struct nfs_client *clp); 42void nfs_expire_all_delegations(struct nfs_client *clp);
43void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags); 43void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags);
44void nfs_expire_unreferenced_delegations(struct nfs_client *clp); 44void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
45void nfs_handle_cb_pathdown(struct nfs_client *clp);
46int nfs_client_return_marked_delegations(struct nfs_client *clp); 45int nfs_client_return_marked_delegations(struct nfs_client *clp);
47int nfs_delegations_present(struct nfs_client *clp); 46int nfs_delegations_present(struct nfs_client *clp);
47void nfs_remove_bad_delegation(struct inode *inode);
48 48
49void nfs_delegation_mark_reclaim(struct nfs_client *clp); 49void nfs_delegation_mark_reclaim(struct nfs_client *clp);
50void nfs_delegation_reap_unclaimed(struct nfs_client *clp); 50void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
@@ -53,7 +53,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
53int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); 53int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync);
54int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); 54int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
55int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); 55int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
56int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); 56bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags);
57 57
58void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); 58void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
59int nfs_have_delegation(struct inode *inode, fmode_t flags); 59int nfs_have_delegation(struct inode *inode, fmode_t flags);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index fd9a872fada..8789210c690 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -207,7 +207,7 @@ struct nfs_cache_array_entry {
207}; 207};
208 208
209struct nfs_cache_array { 209struct nfs_cache_array {
210 unsigned int size; 210 int size;
211 int eof_index; 211 int eof_index;
212 u64 last_cookie; 212 u64 last_cookie;
213 struct nfs_cache_array_entry array[0]; 213 struct nfs_cache_array_entry array[0];
@@ -260,10 +260,10 @@ void nfs_readdir_clear_array(struct page *page)
260 struct nfs_cache_array *array; 260 struct nfs_cache_array *array;
261 int i; 261 int i;
262 262
263 array = kmap_atomic(page, KM_USER0); 263 array = kmap_atomic(page);
264 for (i = 0; i < array->size; i++) 264 for (i = 0; i < array->size; i++)
265 kfree(array->array[i].string.name); 265 kfree(array->array[i].string.name);
266 kunmap_atomic(array, KM_USER0); 266 kunmap_atomic(array);
267} 267}
268 268
269/* 269/*
@@ -1429,6 +1429,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1429 } 1429 }
1430 1430
1431 open_flags = nd->intent.open.flags; 1431 open_flags = nd->intent.open.flags;
1432 attr.ia_valid = ATTR_OPEN;
1432 1433
1433 ctx = create_nfs_open_context(dentry, open_flags); 1434 ctx = create_nfs_open_context(dentry, open_flags);
1434 res = ERR_CAST(ctx); 1435 res = ERR_CAST(ctx);
@@ -1437,11 +1438,14 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1437 1438
1438 if (nd->flags & LOOKUP_CREATE) { 1439 if (nd->flags & LOOKUP_CREATE) {
1439 attr.ia_mode = nd->intent.open.create_mode; 1440 attr.ia_mode = nd->intent.open.create_mode;
1440 attr.ia_valid = ATTR_MODE; 1441 attr.ia_valid |= ATTR_MODE;
1441 attr.ia_mode &= ~current_umask(); 1442 attr.ia_mode &= ~current_umask();
1442 } else { 1443 } else
1443 open_flags &= ~(O_EXCL | O_CREAT); 1444 open_flags &= ~(O_EXCL | O_CREAT);
1444 attr.ia_valid = 0; 1445
1446 if (open_flags & O_TRUNC) {
1447 attr.ia_valid |= ATTR_SIZE;
1448 attr.ia_size = 0;
1445 } 1449 }
1446 1450
1447 /* Open the file on the server */ 1451 /* Open the file on the server */
@@ -1495,6 +1499,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1495 struct inode *inode; 1499 struct inode *inode;
1496 struct inode *dir; 1500 struct inode *dir;
1497 struct nfs_open_context *ctx; 1501 struct nfs_open_context *ctx;
1502 struct iattr attr;
1498 int openflags, ret = 0; 1503 int openflags, ret = 0;
1499 1504
1500 if (nd->flags & LOOKUP_RCU) 1505 if (nd->flags & LOOKUP_RCU)
@@ -1523,19 +1528,27 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1523 /* We cannot do exclusive creation on a positive dentry */ 1528 /* We cannot do exclusive creation on a positive dentry */
1524 if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) 1529 if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
1525 goto no_open_dput; 1530 goto no_open_dput;
1526 /* We can't create new files, or truncate existing ones here */ 1531 /* We can't create new files here */
1527 openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); 1532 openflags &= ~(O_CREAT|O_EXCL);
1528 1533
1529 ctx = create_nfs_open_context(dentry, openflags); 1534 ctx = create_nfs_open_context(dentry, openflags);
1530 ret = PTR_ERR(ctx); 1535 ret = PTR_ERR(ctx);
1531 if (IS_ERR(ctx)) 1536 if (IS_ERR(ctx))
1532 goto out; 1537 goto out;
1538
1539 attr.ia_valid = ATTR_OPEN;
1540 if (openflags & O_TRUNC) {
1541 attr.ia_valid |= ATTR_SIZE;
1542 attr.ia_size = 0;
1543 nfs_wb_all(inode);
1544 }
1545
1533 /* 1546 /*
1534 * Note: we're not holding inode->i_mutex and so may be racing with 1547 * Note: we're not holding inode->i_mutex and so may be racing with
1535 * operations that change the directory. We therefore save the 1548 * operations that change the directory. We therefore save the
1536 * change attribute *before* we do the RPC call. 1549 * change attribute *before* we do the RPC call.
1537 */ 1550 */
1538 inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, NULL); 1551 inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr);
1539 if (IS_ERR(inode)) { 1552 if (IS_ERR(inode)) {
1540 ret = PTR_ERR(inode); 1553 ret = PTR_ERR(inode);
1541 switch (ret) { 1554 switch (ret) {
@@ -1870,11 +1883,11 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
1870 if (!page) 1883 if (!page)
1871 return -ENOMEM; 1884 return -ENOMEM;
1872 1885
1873 kaddr = kmap_atomic(page, KM_USER0); 1886 kaddr = kmap_atomic(page);
1874 memcpy(kaddr, symname, pathlen); 1887 memcpy(kaddr, symname, pathlen);
1875 if (pathlen < PAGE_SIZE) 1888 if (pathlen < PAGE_SIZE)
1876 memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen); 1889 memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
1877 kunmap_atomic(kaddr, KM_USER0); 1890 kunmap_atomic(kaddr);
1878 1891
1879 error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr); 1892 error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
1880 if (error != 0) { 1893 if (error != 0) {
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 1940f1a56a5..481be7f7bdd 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -51,7 +51,6 @@
51#include <linux/nfs_page.h> 51#include <linux/nfs_page.h>
52#include <linux/sunrpc/clnt.h> 52#include <linux/sunrpc/clnt.h>
53 53
54#include <asm/system.h>
55#include <asm/uaccess.h> 54#include <asm/uaccess.h>
56#include <linux/atomic.h> 55#include <linux/atomic.h>
57 56
@@ -265,9 +264,7 @@ static void nfs_direct_read_release(void *calldata)
265} 264}
266 265
267static const struct rpc_call_ops nfs_read_direct_ops = { 266static const struct rpc_call_ops nfs_read_direct_ops = {
268#if defined(CONFIG_NFS_V4_1)
269 .rpc_call_prepare = nfs_read_prepare, 267 .rpc_call_prepare = nfs_read_prepare,
270#endif /* CONFIG_NFS_V4_1 */
271 .rpc_call_done = nfs_direct_read_result, 268 .rpc_call_done = nfs_direct_read_result,
272 .rpc_release = nfs_direct_read_release, 269 .rpc_release = nfs_direct_read_release,
273}; 270};
@@ -554,9 +551,7 @@ static void nfs_direct_commit_release(void *calldata)
554} 551}
555 552
556static const struct rpc_call_ops nfs_commit_direct_ops = { 553static const struct rpc_call_ops nfs_commit_direct_ops = {
557#if defined(CONFIG_NFS_V4_1)
558 .rpc_call_prepare = nfs_write_prepare, 554 .rpc_call_prepare = nfs_write_prepare,
559#endif /* CONFIG_NFS_V4_1 */
560 .rpc_call_done = nfs_direct_commit_result, 555 .rpc_call_done = nfs_direct_commit_result,
561 .rpc_release = nfs_direct_commit_release, 556 .rpc_release = nfs_direct_commit_release,
562}; 557};
@@ -696,9 +691,7 @@ out_unlock:
696} 691}
697 692
698static const struct rpc_call_ops nfs_write_direct_ops = { 693static const struct rpc_call_ops nfs_write_direct_ops = {
699#if defined(CONFIG_NFS_V4_1)
700 .rpc_call_prepare = nfs_write_prepare, 694 .rpc_call_prepare = nfs_write_prepare,
701#endif /* CONFIG_NFS_V4_1 */
702 .rpc_call_done = nfs_direct_write_result, 695 .rpc_call_done = nfs_direct_write_result,
703 .rpc_release = nfs_direct_write_release, 696 .rpc_release = nfs_direct_write_release,
704}; 697};
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index a6e711ad130..b3924b8a600 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -10,8 +10,9 @@
10 10
11#include <linux/sunrpc/clnt.h> 11#include <linux/sunrpc/clnt.h>
12#include <linux/dns_resolver.h> 12#include <linux/dns_resolver.h>
13#include "dns_resolve.h"
13 14
14ssize_t nfs_dns_resolve_name(char *name, size_t namelen, 15ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
15 struct sockaddr *sa, size_t salen) 16 struct sockaddr *sa, size_t salen)
16{ 17{
17 ssize_t ret; 18 ssize_t ret;
@@ -20,7 +21,7 @@ ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
20 21
21 ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL); 22 ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL);
22 if (ip_len > 0) 23 if (ip_len > 0)
23 ret = rpc_pton(ip_addr, ip_len, sa, salen); 24 ret = rpc_pton(net, ip_addr, ip_len, sa, salen);
24 else 25 else
25 ret = -ESRCH; 26 ret = -ESRCH;
26 kfree(ip_addr); 27 kfree(ip_addr);
@@ -40,15 +41,15 @@ ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
40#include <linux/sunrpc/clnt.h> 41#include <linux/sunrpc/clnt.h>
41#include <linux/sunrpc/cache.h> 42#include <linux/sunrpc/cache.h>
42#include <linux/sunrpc/svcauth.h> 43#include <linux/sunrpc/svcauth.h>
44#include <linux/sunrpc/rpc_pipe_fs.h>
43 45
44#include "dns_resolve.h" 46#include "dns_resolve.h"
45#include "cache_lib.h" 47#include "cache_lib.h"
48#include "netns.h"
46 49
47#define NFS_DNS_HASHBITS 4 50#define NFS_DNS_HASHBITS 4
48#define NFS_DNS_HASHTBL_SIZE (1 << NFS_DNS_HASHBITS) 51#define NFS_DNS_HASHTBL_SIZE (1 << NFS_DNS_HASHBITS)
49 52
50static struct cache_head *nfs_dns_table[NFS_DNS_HASHTBL_SIZE];
51
52struct nfs_dns_ent { 53struct nfs_dns_ent {
53 struct cache_head h; 54 struct cache_head h;
54 55
@@ -224,7 +225,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
224 len = qword_get(&buf, buf1, sizeof(buf1)); 225 len = qword_get(&buf, buf1, sizeof(buf1));
225 if (len <= 0) 226 if (len <= 0)
226 goto out; 227 goto out;
227 key.addrlen = rpc_pton(buf1, len, 228 key.addrlen = rpc_pton(cd->net, buf1, len,
228 (struct sockaddr *)&key.addr, 229 (struct sockaddr *)&key.addr,
229 sizeof(key.addr)); 230 sizeof(key.addr));
230 231
@@ -259,21 +260,6 @@ out:
259 return ret; 260 return ret;
260} 261}
261 262
262static struct cache_detail nfs_dns_resolve = {
263 .owner = THIS_MODULE,
264 .hash_size = NFS_DNS_HASHTBL_SIZE,
265 .hash_table = nfs_dns_table,
266 .name = "dns_resolve",
267 .cache_put = nfs_dns_ent_put,
268 .cache_upcall = nfs_dns_upcall,
269 .cache_parse = nfs_dns_parse,
270 .cache_show = nfs_dns_show,
271 .match = nfs_dns_match,
272 .init = nfs_dns_ent_init,
273 .update = nfs_dns_ent_update,
274 .alloc = nfs_dns_ent_alloc,
275};
276
277static int do_cache_lookup(struct cache_detail *cd, 263static int do_cache_lookup(struct cache_detail *cd,
278 struct nfs_dns_ent *key, 264 struct nfs_dns_ent *key,
279 struct nfs_dns_ent **item, 265 struct nfs_dns_ent **item,
@@ -336,8 +322,8 @@ out:
336 return ret; 322 return ret;
337} 323}
338 324
339ssize_t nfs_dns_resolve_name(char *name, size_t namelen, 325ssize_t nfs_dns_resolve_name(struct net *net, char *name,
340 struct sockaddr *sa, size_t salen) 326 size_t namelen, struct sockaddr *sa, size_t salen)
341{ 327{
342 struct nfs_dns_ent key = { 328 struct nfs_dns_ent key = {
343 .hostname = name, 329 .hostname = name,
@@ -345,28 +331,118 @@ ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
345 }; 331 };
346 struct nfs_dns_ent *item = NULL; 332 struct nfs_dns_ent *item = NULL;
347 ssize_t ret; 333 ssize_t ret;
334 struct nfs_net *nn = net_generic(net, nfs_net_id);
348 335
349 ret = do_cache_lookup_wait(&nfs_dns_resolve, &key, &item); 336 ret = do_cache_lookup_wait(nn->nfs_dns_resolve, &key, &item);
350 if (ret == 0) { 337 if (ret == 0) {
351 if (salen >= item->addrlen) { 338 if (salen >= item->addrlen) {
352 memcpy(sa, &item->addr, item->addrlen); 339 memcpy(sa, &item->addr, item->addrlen);
353 ret = item->addrlen; 340 ret = item->addrlen;
354 } else 341 } else
355 ret = -EOVERFLOW; 342 ret = -EOVERFLOW;
356 cache_put(&item->h, &nfs_dns_resolve); 343 cache_put(&item->h, nn->nfs_dns_resolve);
357 } else if (ret == -ENOENT) 344 } else if (ret == -ENOENT)
358 ret = -ESRCH; 345 ret = -ESRCH;
359 return ret; 346 return ret;
360} 347}
361 348
349int nfs_dns_resolver_cache_init(struct net *net)
350{
351 int err = -ENOMEM;
352 struct nfs_net *nn = net_generic(net, nfs_net_id);
353 struct cache_detail *cd;
354 struct cache_head **tbl;
355
356 cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL);
357 if (cd == NULL)
358 goto err_cd;
359
360 tbl = kzalloc(NFS_DNS_HASHTBL_SIZE * sizeof(struct cache_head *),
361 GFP_KERNEL);
362 if (tbl == NULL)
363 goto err_tbl;
364
365 cd->owner = THIS_MODULE,
366 cd->hash_size = NFS_DNS_HASHTBL_SIZE,
367 cd->hash_table = tbl,
368 cd->name = "dns_resolve",
369 cd->cache_put = nfs_dns_ent_put,
370 cd->cache_upcall = nfs_dns_upcall,
371 cd->cache_parse = nfs_dns_parse,
372 cd->cache_show = nfs_dns_show,
373 cd->match = nfs_dns_match,
374 cd->init = nfs_dns_ent_init,
375 cd->update = nfs_dns_ent_update,
376 cd->alloc = nfs_dns_ent_alloc,
377
378 nfs_cache_init(cd);
379 err = nfs_cache_register_net(net, cd);
380 if (err)
381 goto err_reg;
382 nn->nfs_dns_resolve = cd;
383 return 0;
384
385err_reg:
386 nfs_cache_destroy(cd);
387 kfree(cd->hash_table);
388err_tbl:
389 kfree(cd);
390err_cd:
391 return err;
392}
393
394void nfs_dns_resolver_cache_destroy(struct net *net)
395{
396 struct nfs_net *nn = net_generic(net, nfs_net_id);
397 struct cache_detail *cd = nn->nfs_dns_resolve;
398
399 nfs_cache_unregister_net(net, cd);
400 nfs_cache_destroy(cd);
401 kfree(cd->hash_table);
402 kfree(cd);
403}
404
405static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
406 void *ptr)
407{
408 struct super_block *sb = ptr;
409 struct net *net = sb->s_fs_info;
410 struct nfs_net *nn = net_generic(net, nfs_net_id);
411 struct cache_detail *cd = nn->nfs_dns_resolve;
412 int ret = 0;
413
414 if (cd == NULL)
415 return 0;
416
417 if (!try_module_get(THIS_MODULE))
418 return 0;
419
420 switch (event) {
421 case RPC_PIPEFS_MOUNT:
422 ret = nfs_cache_register_sb(sb, cd);
423 break;
424 case RPC_PIPEFS_UMOUNT:
425 nfs_cache_unregister_sb(sb, cd);
426 break;
427 default:
428 ret = -ENOTSUPP;
429 break;
430 }
431 module_put(THIS_MODULE);
432 return ret;
433}
434
435static struct notifier_block nfs_dns_resolver_block = {
436 .notifier_call = rpc_pipefs_event,
437};
438
362int nfs_dns_resolver_init(void) 439int nfs_dns_resolver_init(void)
363{ 440{
364 return nfs_cache_register(&nfs_dns_resolve); 441 return rpc_pipefs_notifier_register(&nfs_dns_resolver_block);
365} 442}
366 443
367void nfs_dns_resolver_destroy(void) 444void nfs_dns_resolver_destroy(void)
368{ 445{
369 nfs_cache_unregister(&nfs_dns_resolve); 446 rpc_pipefs_notifier_unregister(&nfs_dns_resolver_block);
370} 447}
371
372#endif 448#endif
diff --git a/fs/nfs/dns_resolve.h b/fs/nfs/dns_resolve.h
index 199bb5543a9..2e4f596d292 100644
--- a/fs/nfs/dns_resolve.h
+++ b/fs/nfs/dns_resolve.h
@@ -15,12 +15,22 @@ static inline int nfs_dns_resolver_init(void)
15 15
16static inline void nfs_dns_resolver_destroy(void) 16static inline void nfs_dns_resolver_destroy(void)
17{} 17{}
18
19static inline int nfs_dns_resolver_cache_init(struct net *net)
20{
21 return 0;
22}
23
24static inline void nfs_dns_resolver_cache_destroy(struct net *net)
25{}
18#else 26#else
19extern int nfs_dns_resolver_init(void); 27extern int nfs_dns_resolver_init(void);
20extern void nfs_dns_resolver_destroy(void); 28extern void nfs_dns_resolver_destroy(void);
29extern int nfs_dns_resolver_cache_init(struct net *net);
30extern void nfs_dns_resolver_cache_destroy(struct net *net);
21#endif 31#endif
22 32
23extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen, 33extern ssize_t nfs_dns_resolve_name(struct net *net, char *name,
24 struct sockaddr *sa, size_t salen); 34 size_t namelen, struct sockaddr *sa, size_t salen);
25 35
26#endif 36#endif
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index c43a452f7da..aa9b709fd32 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -30,7 +30,6 @@
30#include <linux/swap.h> 30#include <linux/swap.h>
31 31
32#include <asm/uaccess.h> 32#include <asm/uaccess.h>
33#include <asm/system.h>
34 33
35#include "delegation.h" 34#include "delegation.h"
36#include "internal.h" 35#include "internal.h"
@@ -530,6 +529,8 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
530 if (mapping != dentry->d_inode->i_mapping) 529 if (mapping != dentry->d_inode->i_mapping)
531 goto out_unlock; 530 goto out_unlock;
532 531
532 wait_on_page_writeback(page);
533
533 pagelen = nfs_page_length(page); 534 pagelen = nfs_page_length(page);
534 if (pagelen == 0) 535 if (pagelen == 0)
535 goto out_unlock; 536 goto out_unlock;
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 419119c371b..ae65c16b367 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -327,7 +327,7 @@ void nfs_fscache_reset_inode_cookie(struct inode *inode)
327{ 327{
328 struct nfs_inode *nfsi = NFS_I(inode); 328 struct nfs_inode *nfsi = NFS_I(inode);
329 struct nfs_server *nfss = NFS_SERVER(inode); 329 struct nfs_server *nfss = NFS_SERVER(inode);
330 struct fscache_cookie *old = nfsi->fscache; 330 NFS_IFDEBUG(struct fscache_cookie *old = nfsi->fscache);
331 331
332 nfs_fscache_inode_lock(inode); 332 nfs_fscache_inode_lock(inode);
333 if (nfsi->fscache) { 333 if (nfsi->fscache) {
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index dcb61548887..4ca6f5c8038 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -32,7 +32,6 @@
32#include <linux/namei.h> 32#include <linux/namei.h>
33#include <linux/security.h> 33#include <linux/security.h>
34 34
35#include <asm/system.h>
36#include <asm/uaccess.h> 35#include <asm/uaccess.h>
37 36
38#include "nfs4_fs.h" 37#include "nfs4_fs.h"
@@ -49,11 +48,9 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
49{ 48{
50 /* The mntroot acts as the dummy root dentry for this superblock */ 49 /* The mntroot acts as the dummy root dentry for this superblock */
51 if (sb->s_root == NULL) { 50 if (sb->s_root == NULL) {
52 sb->s_root = d_alloc_root(inode); 51 sb->s_root = d_make_root(inode);
53 if (sb->s_root == NULL) { 52 if (sb->s_root == NULL)
54 iput(inode);
55 return -ENOMEM; 53 return -ENOMEM;
56 }
57 ihold(inode); 54 ihold(inode);
58 /* 55 /*
59 * Ensure that this dentry is invisible to d_find_alias(). 56 * Ensure that this dentry is invisible to d_find_alias().
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 2c05f1991e1..ba3019f5934 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -34,11 +34,29 @@
34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 */ 35 */
36#include <linux/types.h> 36#include <linux/types.h>
37#include <linux/string.h> 37#include <linux/parser.h>
38#include <linux/kernel.h> 38#include <linux/fs.h>
39#include <linux/slab.h>
40#include <linux/nfs_idmap.h> 39#include <linux/nfs_idmap.h>
40#include <net/net_namespace.h>
41#include <linux/sunrpc/rpc_pipe_fs.h>
41#include <linux/nfs_fs.h> 42#include <linux/nfs_fs.h>
43#include <linux/nfs_fs_sb.h>
44#include <linux/key.h>
45#include <linux/keyctl.h>
46#include <linux/key-type.h>
47#include <keys/user-type.h>
48#include <linux/module.h>
49
50#include "internal.h"
51#include "netns.h"
52
53#define NFS_UINT_MAXLEN 11
54
55/* Default cache timeout is 10 minutes */
56unsigned int nfs_idmap_cache_timeout = 600;
57static const struct cred *id_resolver_cache;
58static struct key_type key_type_id_resolver_legacy;
59
42 60
43/** 61/**
44 * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields 62 * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
@@ -142,24 +160,7 @@ static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen)
142 return snprintf(buf, buflen, "%u", id); 160 return snprintf(buf, buflen, "%u", id);
143} 161}
144 162
145#ifdef CONFIG_NFS_USE_NEW_IDMAPPER 163static struct key_type key_type_id_resolver = {
146
147#include <linux/cred.h>
148#include <linux/sunrpc/sched.h>
149#include <linux/nfs4.h>
150#include <linux/nfs_fs_sb.h>
151#include <linux/keyctl.h>
152#include <linux/key-type.h>
153#include <linux/rcupdate.h>
154#include <linux/err.h>
155
156#include <keys/user-type.h>
157
158#define NFS_UINT_MAXLEN 11
159
160const struct cred *id_resolver_cache;
161
162struct key_type key_type_id_resolver = {
163 .name = "id_resolver", 164 .name = "id_resolver",
164 .instantiate = user_instantiate, 165 .instantiate = user_instantiate,
165 .match = user_match, 166 .match = user_match,
@@ -169,13 +170,14 @@ struct key_type key_type_id_resolver = {
169 .read = user_read, 170 .read = user_read,
170}; 171};
171 172
172int nfs_idmap_init(void) 173static int nfs_idmap_init_keyring(void)
173{ 174{
174 struct cred *cred; 175 struct cred *cred;
175 struct key *keyring; 176 struct key *keyring;
176 int ret = 0; 177 int ret = 0;
177 178
178 printk(KERN_NOTICE "Registering the %s key type\n", key_type_id_resolver.name); 179 printk(KERN_NOTICE "NFS: Registering the %s key type\n",
180 key_type_id_resolver.name);
179 181
180 cred = prepare_kernel_cred(NULL); 182 cred = prepare_kernel_cred(NULL);
181 if (!cred) 183 if (!cred)
@@ -198,6 +200,7 @@ int nfs_idmap_init(void)
198 if (ret < 0) 200 if (ret < 0)
199 goto failed_put_key; 201 goto failed_put_key;
200 202
203 set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags);
201 cred->thread_keyring = keyring; 204 cred->thread_keyring = keyring;
202 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; 205 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
203 id_resolver_cache = cred; 206 id_resolver_cache = cred;
@@ -210,7 +213,7 @@ failed_put_cred:
210 return ret; 213 return ret;
211} 214}
212 215
213void nfs_idmap_quit(void) 216static void nfs_idmap_quit_keyring(void)
214{ 217{
215 key_revoke(id_resolver_cache->thread_keyring); 218 key_revoke(id_resolver_cache->thread_keyring);
216 unregister_key_type(&key_type_id_resolver); 219 unregister_key_type(&key_type_id_resolver);
@@ -245,8 +248,10 @@ static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
245 return desclen; 248 return desclen;
246} 249}
247 250
248static ssize_t nfs_idmap_request_key(const char *name, size_t namelen, 251static ssize_t nfs_idmap_request_key(struct key_type *key_type,
249 const char *type, void *data, size_t data_size) 252 const char *name, size_t namelen,
253 const char *type, void *data,
254 size_t data_size, struct idmap *idmap)
250{ 255{
251 const struct cred *saved_cred; 256 const struct cred *saved_cred;
252 struct key *rkey; 257 struct key *rkey;
@@ -259,8 +264,12 @@ static ssize_t nfs_idmap_request_key(const char *name, size_t namelen,
259 goto out; 264 goto out;
260 265
261 saved_cred = override_creds(id_resolver_cache); 266 saved_cred = override_creds(id_resolver_cache);
262 rkey = request_key(&key_type_id_resolver, desc, ""); 267 if (idmap)
268 rkey = request_key_with_auxdata(key_type, desc, "", 0, idmap);
269 else
270 rkey = request_key(&key_type_id_resolver, desc, "");
263 revert_creds(saved_cred); 271 revert_creds(saved_cred);
272
264 kfree(desc); 273 kfree(desc);
265 if (IS_ERR(rkey)) { 274 if (IS_ERR(rkey)) {
266 ret = PTR_ERR(rkey); 275 ret = PTR_ERR(rkey);
@@ -293,31 +302,46 @@ out:
293 return ret; 302 return ret;
294} 303}
295 304
305static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
306 const char *type, void *data,
307 size_t data_size, struct idmap *idmap)
308{
309 ssize_t ret = nfs_idmap_request_key(&key_type_id_resolver,
310 name, namelen, type, data,
311 data_size, NULL);
312 if (ret < 0) {
313 ret = nfs_idmap_request_key(&key_type_id_resolver_legacy,
314 name, namelen, type, data,
315 data_size, idmap);
316 }
317 return ret;
318}
296 319
297/* ID -> Name */ 320/* ID -> Name */
298static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, size_t buflen) 321static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf,
322 size_t buflen, struct idmap *idmap)
299{ 323{
300 char id_str[NFS_UINT_MAXLEN]; 324 char id_str[NFS_UINT_MAXLEN];
301 int id_len; 325 int id_len;
302 ssize_t ret; 326 ssize_t ret;
303 327
304 id_len = snprintf(id_str, sizeof(id_str), "%u", id); 328 id_len = snprintf(id_str, sizeof(id_str), "%u", id);
305 ret = nfs_idmap_request_key(id_str, id_len, type, buf, buflen); 329 ret = nfs_idmap_get_key(id_str, id_len, type, buf, buflen, idmap);
306 if (ret < 0) 330 if (ret < 0)
307 return -EINVAL; 331 return -EINVAL;
308 return ret; 332 return ret;
309} 333}
310 334
311/* Name -> ID */ 335/* Name -> ID */
312static int nfs_idmap_lookup_id(const char *name, size_t namelen, 336static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *type,
313 const char *type, __u32 *id) 337 __u32 *id, struct idmap *idmap)
314{ 338{
315 char id_str[NFS_UINT_MAXLEN]; 339 char id_str[NFS_UINT_MAXLEN];
316 long id_long; 340 long id_long;
317 ssize_t data_size; 341 ssize_t data_size;
318 int ret = 0; 342 int ret = 0;
319 343
320 data_size = nfs_idmap_request_key(name, namelen, type, id_str, NFS_UINT_MAXLEN); 344 data_size = nfs_idmap_get_key(name, namelen, type, id_str, NFS_UINT_MAXLEN, idmap);
321 if (data_size <= 0) { 345 if (data_size <= 0) {
322 ret = -EINVAL; 346 ret = -EINVAL;
323 } else { 347 } else {
@@ -327,114 +351,103 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen,
327 return ret; 351 return ret;
328} 352}
329 353
330int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) 354/* idmap classic begins here */
331{ 355module_param(nfs_idmap_cache_timeout, int, 0644);
332 if (nfs_map_string_to_numeric(name, namelen, uid))
333 return 0;
334 return nfs_idmap_lookup_id(name, namelen, "uid", uid);
335}
336
337int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
338{
339 if (nfs_map_string_to_numeric(name, namelen, gid))
340 return 0;
341 return nfs_idmap_lookup_id(name, namelen, "gid", gid);
342}
343
344int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
345{
346 int ret = -EINVAL;
347
348 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
349 ret = nfs_idmap_lookup_name(uid, "user", buf, buflen);
350 if (ret < 0)
351 ret = nfs_map_numeric_to_string(uid, buf, buflen);
352 return ret;
353}
354int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
355{
356 int ret = -EINVAL;
357 356
358 if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) 357struct idmap {
359 ret = nfs_idmap_lookup_name(gid, "group", buf, buflen); 358 struct rpc_pipe *idmap_pipe;
360 if (ret < 0) 359 struct key_construction *idmap_key_cons;
361 ret = nfs_map_numeric_to_string(gid, buf, buflen);
362 return ret;
363}
364
365#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */
366
367#include <linux/module.h>
368#include <linux/mutex.h>
369#include <linux/init.h>
370#include <linux/socket.h>
371#include <linux/in.h>
372#include <linux/sched.h>
373#include <linux/sunrpc/clnt.h>
374#include <linux/workqueue.h>
375#include <linux/sunrpc/rpc_pipe_fs.h>
376
377#include <linux/nfs_fs.h>
378
379#include "nfs4_fs.h"
380
381#define IDMAP_HASH_SZ 128
382
383/* Default cache timeout is 10 minutes */
384unsigned int nfs_idmap_cache_timeout = 600 * HZ;
385
386static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
387{
388 char *endp;
389 int num = simple_strtol(val, &endp, 0);
390 int jif = num * HZ;
391 if (endp == val || *endp || num < 0 || jif < num)
392 return -EINVAL;
393 *((int *)kp->arg) = jif;
394 return 0;
395}
396
397module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
398 &nfs_idmap_cache_timeout, 0644);
399
400struct idmap_hashent {
401 unsigned long ih_expires;
402 __u32 ih_id;
403 size_t ih_namelen;
404 char ih_name[IDMAP_NAMESZ];
405}; 360};
406 361
407struct idmap_hashtable { 362enum {
408 __u8 h_type; 363 Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err
409 struct idmap_hashent h_entries[IDMAP_HASH_SZ];
410}; 364};
411 365
412struct idmap { 366static const match_table_t nfs_idmap_tokens = {
413 struct dentry *idmap_dentry; 367 { Opt_find_uid, "uid:%s" },
414 wait_queue_head_t idmap_wq; 368 { Opt_find_gid, "gid:%s" },
415 struct idmap_msg idmap_im; 369 { Opt_find_user, "user:%s" },
416 struct mutex idmap_lock; /* Serializes upcalls */ 370 { Opt_find_group, "group:%s" },
417 struct mutex idmap_im_lock; /* Protects the hashtable */ 371 { Opt_find_err, NULL }
418 struct idmap_hashtable idmap_user_hash;
419 struct idmap_hashtable idmap_group_hash;
420}; 372};
421 373
374static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *);
422static ssize_t idmap_pipe_downcall(struct file *, const char __user *, 375static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
423 size_t); 376 size_t);
424static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); 377static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
425 378
426static unsigned int fnvhash32(const void *, size_t);
427
428static const struct rpc_pipe_ops idmap_upcall_ops = { 379static const struct rpc_pipe_ops idmap_upcall_ops = {
429 .upcall = rpc_pipe_generic_upcall, 380 .upcall = rpc_pipe_generic_upcall,
430 .downcall = idmap_pipe_downcall, 381 .downcall = idmap_pipe_downcall,
431 .destroy_msg = idmap_pipe_destroy_msg, 382 .destroy_msg = idmap_pipe_destroy_msg,
432}; 383};
433 384
385static struct key_type key_type_id_resolver_legacy = {
386 .name = "id_resolver",
387 .instantiate = user_instantiate,
388 .match = user_match,
389 .revoke = user_revoke,
390 .destroy = user_destroy,
391 .describe = user_describe,
392 .read = user_read,
393 .request_key = nfs_idmap_legacy_upcall,
394};
395
396static void __nfs_idmap_unregister(struct rpc_pipe *pipe)
397{
398 if (pipe->dentry)
399 rpc_unlink(pipe->dentry);
400}
401
402static int __nfs_idmap_register(struct dentry *dir,
403 struct idmap *idmap,
404 struct rpc_pipe *pipe)
405{
406 struct dentry *dentry;
407
408 dentry = rpc_mkpipe_dentry(dir, "idmap", idmap, pipe);
409 if (IS_ERR(dentry))
410 return PTR_ERR(dentry);
411 pipe->dentry = dentry;
412 return 0;
413}
414
415static void nfs_idmap_unregister(struct nfs_client *clp,
416 struct rpc_pipe *pipe)
417{
418 struct net *net = clp->net;
419 struct super_block *pipefs_sb;
420
421 pipefs_sb = rpc_get_sb_net(net);
422 if (pipefs_sb) {
423 __nfs_idmap_unregister(pipe);
424 rpc_put_sb_net(net);
425 }
426}
427
428static int nfs_idmap_register(struct nfs_client *clp,
429 struct idmap *idmap,
430 struct rpc_pipe *pipe)
431{
432 struct net *net = clp->net;
433 struct super_block *pipefs_sb;
434 int err = 0;
435
436 pipefs_sb = rpc_get_sb_net(net);
437 if (pipefs_sb) {
438 if (clp->cl_rpcclient->cl_dentry)
439 err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry,
440 idmap, pipe);
441 rpc_put_sb_net(net);
442 }
443 return err;
444}
445
434int 446int
435nfs_idmap_new(struct nfs_client *clp) 447nfs_idmap_new(struct nfs_client *clp)
436{ 448{
437 struct idmap *idmap; 449 struct idmap *idmap;
450 struct rpc_pipe *pipe;
438 int error; 451 int error;
439 452
440 BUG_ON(clp->cl_idmap != NULL); 453 BUG_ON(clp->cl_idmap != NULL);
@@ -443,19 +456,19 @@ nfs_idmap_new(struct nfs_client *clp)
443 if (idmap == NULL) 456 if (idmap == NULL)
444 return -ENOMEM; 457 return -ENOMEM;
445 458
446 idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_path.dentry, 459 pipe = rpc_mkpipe_data(&idmap_upcall_ops, 0);
447 "idmap", idmap, &idmap_upcall_ops, 0); 460 if (IS_ERR(pipe)) {
448 if (IS_ERR(idmap->idmap_dentry)) { 461 error = PTR_ERR(pipe);
449 error = PTR_ERR(idmap->idmap_dentry);
450 kfree(idmap); 462 kfree(idmap);
451 return error; 463 return error;
452 } 464 }
453 465 error = nfs_idmap_register(clp, idmap, pipe);
454 mutex_init(&idmap->idmap_lock); 466 if (error) {
455 mutex_init(&idmap->idmap_im_lock); 467 rpc_destroy_pipe_data(pipe);
456 init_waitqueue_head(&idmap->idmap_wq); 468 kfree(idmap);
457 idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER; 469 return error;
458 idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; 470 }
471 idmap->idmap_pipe = pipe;
459 472
460 clp->cl_idmap = idmap; 473 clp->cl_idmap = idmap;
461 return 0; 474 return 0;
@@ -468,211 +481,224 @@ nfs_idmap_delete(struct nfs_client *clp)
468 481
469 if (!idmap) 482 if (!idmap)
470 return; 483 return;
471 rpc_unlink(idmap->idmap_dentry); 484 nfs_idmap_unregister(clp, idmap->idmap_pipe);
485 rpc_destroy_pipe_data(idmap->idmap_pipe);
472 clp->cl_idmap = NULL; 486 clp->cl_idmap = NULL;
473 kfree(idmap); 487 kfree(idmap);
474} 488}
475 489
476/* 490static int __rpc_pipefs_event(struct nfs_client *clp, unsigned long event,
477 * Helper routines for manipulating the hashtable 491 struct super_block *sb)
478 */
479static inline struct idmap_hashent *
480idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len)
481{ 492{
482 return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ]; 493 int err = 0;
494
495 switch (event) {
496 case RPC_PIPEFS_MOUNT:
497 BUG_ON(clp->cl_rpcclient->cl_dentry == NULL);
498 err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry,
499 clp->cl_idmap,
500 clp->cl_idmap->idmap_pipe);
501 break;
502 case RPC_PIPEFS_UMOUNT:
503 if (clp->cl_idmap->idmap_pipe) {
504 struct dentry *parent;
505
506 parent = clp->cl_idmap->idmap_pipe->dentry->d_parent;
507 __nfs_idmap_unregister(clp->cl_idmap->idmap_pipe);
508 /*
509 * Note: This is a dirty hack. SUNRPC hook has been
510 * called already but simple_rmdir() call for the
511 * directory returned with error because of idmap pipe
512 * inside. Thus now we have to remove this directory
513 * here.
514 */
515 if (rpc_rmdir(parent))
516 printk(KERN_ERR "NFS: %s: failed to remove "
517 "clnt dir!\n", __func__);
518 }
519 break;
520 default:
521 printk(KERN_ERR "NFS: %s: unknown event: %ld\n", __func__,
522 event);
523 return -ENOTSUPP;
524 }
525 return err;
526}
527
528static struct nfs_client *nfs_get_client_for_event(struct net *net, int event)
529{
530 struct nfs_net *nn = net_generic(net, nfs_net_id);
531 struct dentry *cl_dentry;
532 struct nfs_client *clp;
533
534 spin_lock(&nn->nfs_client_lock);
535 list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
536 if (clp->rpc_ops != &nfs_v4_clientops)
537 continue;
538 cl_dentry = clp->cl_idmap->idmap_pipe->dentry;
539 if (((event == RPC_PIPEFS_MOUNT) && cl_dentry) ||
540 ((event == RPC_PIPEFS_UMOUNT) && !cl_dentry))
541 continue;
542 atomic_inc(&clp->cl_count);
543 spin_unlock(&nn->nfs_client_lock);
544 return clp;
545 }
546 spin_unlock(&nn->nfs_client_lock);
547 return NULL;
483} 548}
484 549
485static struct idmap_hashent * 550static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
486idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len) 551 void *ptr)
487{ 552{
488 struct idmap_hashent *he = idmap_name_hash(h, name, len); 553 struct super_block *sb = ptr;
554 struct nfs_client *clp;
555 int error = 0;
489 556
490 if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0) 557 if (!try_module_get(THIS_MODULE))
491 return NULL; 558 return 0;
492 if (time_after(jiffies, he->ih_expires))
493 return NULL;
494 return he;
495}
496 559
497static inline struct idmap_hashent * 560 while ((clp = nfs_get_client_for_event(sb->s_fs_info, event))) {
498idmap_id_hash(struct idmap_hashtable* h, __u32 id) 561 error = __rpc_pipefs_event(clp, event, sb);
499{ 562 nfs_put_client(clp);
500 return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ]; 563 if (error)
564 break;
565 }
566 module_put(THIS_MODULE);
567 return error;
501} 568}
502 569
503static struct idmap_hashent * 570#define PIPEFS_NFS_PRIO 1
504idmap_lookup_id(struct idmap_hashtable *h, __u32 id)
505{
506 struct idmap_hashent *he = idmap_id_hash(h, id);
507 if (he->ih_id != id || he->ih_namelen == 0)
508 return NULL;
509 if (time_after(jiffies, he->ih_expires))
510 return NULL;
511 return he;
512}
513 571
514/* 572static struct notifier_block nfs_idmap_block = {
515 * Routines for allocating new entries in the hashtable. 573 .notifier_call = rpc_pipefs_event,
516 * For now, we just have 1 entry per bucket, so it's all 574 .priority = SUNRPC_PIPEFS_NFS_PRIO,
517 * pretty trivial. 575};
518 */
519static inline struct idmap_hashent *
520idmap_alloc_name(struct idmap_hashtable *h, char *name, size_t len)
521{
522 return idmap_name_hash(h, name, len);
523}
524 576
525static inline struct idmap_hashent * 577int nfs_idmap_init(void)
526idmap_alloc_id(struct idmap_hashtable *h, __u32 id)
527{ 578{
528 return idmap_id_hash(h, id); 579 int ret;
580 ret = nfs_idmap_init_keyring();
581 if (ret != 0)
582 goto out;
583 ret = rpc_pipefs_notifier_register(&nfs_idmap_block);
584 if (ret != 0)
585 nfs_idmap_quit_keyring();
586out:
587 return ret;
529} 588}
530 589
531static void 590void nfs_idmap_quit(void)
532idmap_update_entry(struct idmap_hashent *he, const char *name,
533 size_t namelen, __u32 id)
534{ 591{
535 he->ih_id = id; 592 rpc_pipefs_notifier_unregister(&nfs_idmap_block);
536 memcpy(he->ih_name, name, namelen); 593 nfs_idmap_quit_keyring();
537 he->ih_name[namelen] = '\0';
538 he->ih_namelen = namelen;
539 he->ih_expires = jiffies + nfs_idmap_cache_timeout;
540} 594}
541 595
542/* 596static int nfs_idmap_prepare_message(char *desc, struct idmap_msg *im,
543 * Name -> ID 597 struct rpc_pipe_msg *msg)
544 */
545static int
546nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
547 const char *name, size_t namelen, __u32 *id)
548{ 598{
549 struct rpc_pipe_msg msg; 599 substring_t substr;
550 struct idmap_msg *im; 600 int token, ret;
551 struct idmap_hashent *he;
552 DECLARE_WAITQUEUE(wq, current);
553 int ret = -EIO;
554
555 im = &idmap->idmap_im;
556
557 /*
558 * String sanity checks
559 * Note that the userland daemon expects NUL terminated strings
560 */
561 for (;;) {
562 if (namelen == 0)
563 return -EINVAL;
564 if (name[namelen-1] != '\0')
565 break;
566 namelen--;
567 }
568 if (namelen >= IDMAP_NAMESZ)
569 return -EINVAL;
570 601
571 mutex_lock(&idmap->idmap_lock); 602 memset(im, 0, sizeof(*im));
572 mutex_lock(&idmap->idmap_im_lock); 603 memset(msg, 0, sizeof(*msg));
573
574 he = idmap_lookup_name(h, name, namelen);
575 if (he != NULL) {
576 *id = he->ih_id;
577 ret = 0;
578 goto out;
579 }
580 604
581 memset(im, 0, sizeof(*im)); 605 im->im_type = IDMAP_TYPE_GROUP;
582 memcpy(im->im_name, name, namelen); 606 token = match_token(desc, nfs_idmap_tokens, &substr);
583 607
584 im->im_type = h->h_type; 608 switch (token) {
585 im->im_conv = IDMAP_CONV_NAMETOID; 609 case Opt_find_uid:
610 im->im_type = IDMAP_TYPE_USER;
611 case Opt_find_gid:
612 im->im_conv = IDMAP_CONV_NAMETOID;
613 ret = match_strlcpy(im->im_name, &substr, IDMAP_NAMESZ);
614 break;
586 615
587 memset(&msg, 0, sizeof(msg)); 616 case Opt_find_user:
588 msg.data = im; 617 im->im_type = IDMAP_TYPE_USER;
589 msg.len = sizeof(*im); 618 case Opt_find_group:
619 im->im_conv = IDMAP_CONV_IDTONAME;
620 ret = match_int(&substr, &im->im_id);
621 break;
590 622
591 add_wait_queue(&idmap->idmap_wq, &wq); 623 default:
592 if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) { 624 ret = -EINVAL;
593 remove_wait_queue(&idmap->idmap_wq, &wq);
594 goto out; 625 goto out;
595 } 626 }
596 627
597 set_current_state(TASK_UNINTERRUPTIBLE); 628 msg->data = im;
598 mutex_unlock(&idmap->idmap_im_lock); 629 msg->len = sizeof(struct idmap_msg);
599 schedule();
600 __set_current_state(TASK_RUNNING);
601 remove_wait_queue(&idmap->idmap_wq, &wq);
602 mutex_lock(&idmap->idmap_im_lock);
603 630
604 if (im->im_status & IDMAP_STATUS_SUCCESS) { 631out:
605 *id = im->im_id;
606 ret = 0;
607 }
608
609 out:
610 memset(im, 0, sizeof(*im));
611 mutex_unlock(&idmap->idmap_im_lock);
612 mutex_unlock(&idmap->idmap_lock);
613 return ret; 632 return ret;
614} 633}
615 634
616/* 635static int nfs_idmap_legacy_upcall(struct key_construction *cons,
617 * ID -> Name 636 const char *op,
618 */ 637 void *aux)
619static int
620nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
621 __u32 id, char *name)
622{ 638{
623 struct rpc_pipe_msg msg; 639 struct rpc_pipe_msg *msg;
624 struct idmap_msg *im; 640 struct idmap_msg *im;
625 struct idmap_hashent *he; 641 struct idmap *idmap = (struct idmap *)aux;
626 DECLARE_WAITQUEUE(wq, current); 642 struct key *key = cons->key;
627 int ret = -EIO; 643 int ret;
628 unsigned int len;
629
630 im = &idmap->idmap_im;
631 644
632 mutex_lock(&idmap->idmap_lock); 645 /* msg and im are freed in idmap_pipe_destroy_msg */
633 mutex_lock(&idmap->idmap_im_lock); 646 msg = kmalloc(sizeof(*msg), GFP_KERNEL);
647 if (IS_ERR(msg)) {
648 ret = PTR_ERR(msg);
649 goto out0;
650 }
634 651
635 he = idmap_lookup_id(h, id); 652 im = kmalloc(sizeof(*im), GFP_KERNEL);
636 if (he) { 653 if (IS_ERR(im)) {
637 memcpy(name, he->ih_name, he->ih_namelen); 654 ret = PTR_ERR(im);
638 ret = he->ih_namelen; 655 goto out1;
639 goto out;
640 } 656 }
641 657
642 memset(im, 0, sizeof(*im)); 658 ret = nfs_idmap_prepare_message(key->description, im, msg);
643 im->im_type = h->h_type; 659 if (ret < 0)
644 im->im_conv = IDMAP_CONV_IDTONAME; 660 goto out2;
645 im->im_id = id;
646 661
647 memset(&msg, 0, sizeof(msg)); 662 idmap->idmap_key_cons = cons;
648 msg.data = im;
649 msg.len = sizeof(*im);
650 663
651 add_wait_queue(&idmap->idmap_wq, &wq); 664 ret = rpc_queue_upcall(idmap->idmap_pipe, msg);
665 if (ret < 0)
666 goto out2;
652 667
653 if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) { 668 return ret;
654 remove_wait_queue(&idmap->idmap_wq, &wq); 669
655 goto out; 670out2:
656 } 671 kfree(im);
672out1:
673 kfree(msg);
674out0:
675 key_revoke(cons->key);
676 key_revoke(cons->authkey);
677 return ret;
678}
679
680static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data)
681{
682 return key_instantiate_and_link(key, data, strlen(data) + 1,
683 id_resolver_cache->thread_keyring,
684 authkey);
685}
657 686
658 set_current_state(TASK_UNINTERRUPTIBLE); 687static int nfs_idmap_read_message(struct idmap_msg *im, struct key *key, struct key *authkey)
659 mutex_unlock(&idmap->idmap_im_lock); 688{
660 schedule(); 689 char id_str[NFS_UINT_MAXLEN];
661 __set_current_state(TASK_RUNNING); 690 int ret = -EINVAL;
662 remove_wait_queue(&idmap->idmap_wq, &wq); 691
663 mutex_lock(&idmap->idmap_im_lock); 692 switch (im->im_conv) {
664 693 case IDMAP_CONV_NAMETOID:
665 if (im->im_status & IDMAP_STATUS_SUCCESS) { 694 sprintf(id_str, "%d", im->im_id);
666 if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0) 695 ret = nfs_idmap_instantiate(key, authkey, id_str);
667 goto out; 696 break;
668 memcpy(name, im->im_name, len); 697 case IDMAP_CONV_IDTONAME:
669 ret = len; 698 ret = nfs_idmap_instantiate(key, authkey, im->im_name);
699 break;
670 } 700 }
671 701
672 out:
673 memset(im, 0, sizeof(*im));
674 mutex_unlock(&idmap->idmap_im_lock);
675 mutex_unlock(&idmap->idmap_lock);
676 return ret; 702 return ret;
677} 703}
678 704
@@ -681,115 +707,51 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
681{ 707{
682 struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); 708 struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode);
683 struct idmap *idmap = (struct idmap *)rpci->private; 709 struct idmap *idmap = (struct idmap *)rpci->private;
684 struct idmap_msg im_in, *im = &idmap->idmap_im; 710 struct key_construction *cons = idmap->idmap_key_cons;
685 struct idmap_hashtable *h; 711 struct idmap_msg im;
686 struct idmap_hashent *he = NULL;
687 size_t namelen_in; 712 size_t namelen_in;
688 int ret; 713 int ret;
689 714
690 if (mlen != sizeof(im_in)) 715 if (mlen != sizeof(im)) {
691 return -ENOSPC; 716 ret = -ENOSPC;
692
693 if (copy_from_user(&im_in, src, mlen) != 0)
694 return -EFAULT;
695
696 mutex_lock(&idmap->idmap_im_lock);
697
698 ret = mlen;
699 im->im_status = im_in.im_status;
700 /* If we got an error, terminate now, and wake up pending upcalls */
701 if (!(im_in.im_status & IDMAP_STATUS_SUCCESS)) {
702 wake_up(&idmap->idmap_wq);
703 goto out; 717 goto out;
704 } 718 }
705 719
706 /* Sanity checking of strings */ 720 if (copy_from_user(&im, src, mlen) != 0) {
707 ret = -EINVAL; 721 ret = -EFAULT;
708 namelen_in = strnlen(im_in.im_name, IDMAP_NAMESZ);
709 if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ)
710 goto out; 722 goto out;
723 }
711 724
712 switch (im_in.im_type) { 725 if (!(im.im_status & IDMAP_STATUS_SUCCESS)) {
713 case IDMAP_TYPE_USER: 726 ret = mlen;
714 h = &idmap->idmap_user_hash; 727 complete_request_key(idmap->idmap_key_cons, -ENOKEY);
715 break; 728 goto out_incomplete;
716 case IDMAP_TYPE_GROUP:
717 h = &idmap->idmap_group_hash;
718 break;
719 default:
720 goto out;
721 } 729 }
722 730
723 switch (im_in.im_conv) { 731 namelen_in = strnlen(im.im_name, IDMAP_NAMESZ);
724 case IDMAP_CONV_IDTONAME: 732 if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) {
725 /* Did we match the current upcall? */ 733 ret = -EINVAL;
726 if (im->im_conv == IDMAP_CONV_IDTONAME
727 && im->im_type == im_in.im_type
728 && im->im_id == im_in.im_id) {
729 /* Yes: copy string, including the terminating '\0' */
730 memcpy(im->im_name, im_in.im_name, namelen_in);
731 im->im_name[namelen_in] = '\0';
732 wake_up(&idmap->idmap_wq);
733 }
734 he = idmap_alloc_id(h, im_in.im_id);
735 break;
736 case IDMAP_CONV_NAMETOID:
737 /* Did we match the current upcall? */
738 if (im->im_conv == IDMAP_CONV_NAMETOID
739 && im->im_type == im_in.im_type
740 && strnlen(im->im_name, IDMAP_NAMESZ) == namelen_in
741 && memcmp(im->im_name, im_in.im_name, namelen_in) == 0) {
742 im->im_id = im_in.im_id;
743 wake_up(&idmap->idmap_wq);
744 }
745 he = idmap_alloc_name(h, im_in.im_name, namelen_in);
746 break;
747 default:
748 goto out; 734 goto out;
749 } 735 }
750 736
751 /* If the entry is valid, also copy it to the cache */ 737 ret = nfs_idmap_read_message(&im, cons->key, cons->authkey);
752 if (he != NULL) 738 if (ret >= 0) {
753 idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id); 739 key_set_timeout(cons->key, nfs_idmap_cache_timeout);
754 ret = mlen; 740 ret = mlen;
741 }
742
755out: 743out:
756 mutex_unlock(&idmap->idmap_im_lock); 744 complete_request_key(idmap->idmap_key_cons, ret);
745out_incomplete:
757 return ret; 746 return ret;
758} 747}
759 748
760static void 749static void
761idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) 750idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
762{ 751{
763 struct idmap_msg *im = msg->data; 752 /* Free memory allocated in nfs_idmap_legacy_upcall() */
764 struct idmap *idmap = container_of(im, struct idmap, idmap_im); 753 kfree(msg->data);
765 754 kfree(msg);
766 if (msg->errno >= 0)
767 return;
768 mutex_lock(&idmap->idmap_im_lock);
769 im->im_status = IDMAP_STATUS_LOOKUPFAIL;
770 wake_up(&idmap->idmap_wq);
771 mutex_unlock(&idmap->idmap_im_lock);
772}
773
774/*
775 * Fowler/Noll/Vo hash
776 * http://www.isthe.com/chongo/tech/comp/fnv/
777 */
778
779#define FNV_P_32 ((unsigned int)0x01000193) /* 16777619 */
780#define FNV_1_32 ((unsigned int)0x811c9dc5) /* 2166136261 */
781
782static unsigned int fnvhash32(const void *buf, size_t buflen)
783{
784 const unsigned char *p, *end = (const unsigned char *)buf + buflen;
785 unsigned int hash = FNV_1_32;
786
787 for (p = buf; p < end; p++) {
788 hash *= FNV_P_32;
789 hash ^= (unsigned int)*p;
790 }
791
792 return hash;
793} 755}
794 756
795int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) 757int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
@@ -798,16 +760,16 @@ int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_
798 760
799 if (nfs_map_string_to_numeric(name, namelen, uid)) 761 if (nfs_map_string_to_numeric(name, namelen, uid))
800 return 0; 762 return 0;
801 return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); 763 return nfs_idmap_lookup_id(name, namelen, "uid", uid, idmap);
802} 764}
803 765
804int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) 766int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
805{ 767{
806 struct idmap *idmap = server->nfs_client->cl_idmap; 768 struct idmap *idmap = server->nfs_client->cl_idmap;
807 769
808 if (nfs_map_string_to_numeric(name, namelen, uid)) 770 if (nfs_map_string_to_numeric(name, namelen, gid))
809 return 0; 771 return 0;
810 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); 772 return nfs_idmap_lookup_id(name, namelen, "gid", gid, idmap);
811} 773}
812 774
813int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) 775int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
@@ -816,21 +778,19 @@ int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, s
816 int ret = -EINVAL; 778 int ret = -EINVAL;
817 779
818 if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) 780 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
819 ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); 781 ret = nfs_idmap_lookup_name(uid, "user", buf, buflen, idmap);
820 if (ret < 0) 782 if (ret < 0)
821 ret = nfs_map_numeric_to_string(uid, buf, buflen); 783 ret = nfs_map_numeric_to_string(uid, buf, buflen);
822 return ret; 784 return ret;
823} 785}
824int nfs_map_gid_to_group(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) 786int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
825{ 787{
826 struct idmap *idmap = server->nfs_client->cl_idmap; 788 struct idmap *idmap = server->nfs_client->cl_idmap;
827 int ret = -EINVAL; 789 int ret = -EINVAL;
828 790
829 if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) 791 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
830 ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); 792 ret = nfs_idmap_lookup_name(gid, "group", buf, buflen, idmap);
831 if (ret < 0) 793 if (ret < 0)
832 ret = nfs_map_numeric_to_string(uid, buf, buflen); 794 ret = nfs_map_numeric_to_string(gid, buf, buflen);
833 return ret; 795 return ret;
834} 796}
835
836#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index f649fba8c38..e8bbfa5b350 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -39,8 +39,8 @@
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/compat.h> 40#include <linux/compat.h>
41#include <linux/freezer.h> 41#include <linux/freezer.h>
42#include <linux/crc32.h>
42 43
43#include <asm/system.h>
44#include <asm/uaccess.h> 44#include <asm/uaccess.h>
45 45
46#include "nfs4_fs.h" 46#include "nfs4_fs.h"
@@ -51,6 +51,7 @@
51#include "fscache.h" 51#include "fscache.h"
52#include "dns_resolve.h" 52#include "dns_resolve.h"
53#include "pnfs.h" 53#include "pnfs.h"
54#include "netns.h"
54 55
55#define NFSDBG_FACILITY NFSDBG_VFS 56#define NFSDBG_FACILITY NFSDBG_VFS
56 57
@@ -388,9 +389,10 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
388 unlock_new_inode(inode); 389 unlock_new_inode(inode);
389 } else 390 } else
390 nfs_refresh_inode(inode, fattr); 391 nfs_refresh_inode(inode, fattr);
391 dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n", 392 dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n",
392 inode->i_sb->s_id, 393 inode->i_sb->s_id,
393 (long long)NFS_FILEID(inode), 394 (long long)NFS_FILEID(inode),
395 nfs_display_fhandle_hash(fh),
394 atomic_read(&inode->i_count)); 396 atomic_read(&inode->i_count));
395 397
396out: 398out:
@@ -401,7 +403,7 @@ out_no_inode:
401 goto out; 403 goto out;
402} 404}
403 405
404#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE) 406#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN)
405 407
406int 408int
407nfs_setattr(struct dentry *dentry, struct iattr *attr) 409nfs_setattr(struct dentry *dentry, struct iattr *attr)
@@ -423,7 +425,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
423 425
424 /* Optimization: if the end result is no change, don't RPC */ 426 /* Optimization: if the end result is no change, don't RPC */
425 attr->ia_valid &= NFS_VALID_ATTRS; 427 attr->ia_valid &= NFS_VALID_ATTRS;
426 if ((attr->ia_valid & ~ATTR_FILE) == 0) 428 if ((attr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0)
427 return 0; 429 return 0;
428 430
429 /* Write all dirty data */ 431 /* Write all dirty data */
@@ -1044,6 +1046,67 @@ struct nfs_fh *nfs_alloc_fhandle(void)
1044 return fh; 1046 return fh;
1045} 1047}
1046 1048
1049#ifdef NFS_DEBUG
1050/*
1051 * _nfs_display_fhandle_hash - calculate the crc32 hash for the filehandle
1052 * in the same way that wireshark does
1053 *
1054 * @fh: file handle
1055 *
1056 * For debugging only.
1057 */
1058u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh)
1059{
1060 /* wireshark uses 32-bit AUTODIN crc and does a bitwise
1061 * not on the result */
1062 return ~crc32(0xFFFFFFFF, &fh->data[0], fh->size);
1063}
1064
1065/*
1066 * _nfs_display_fhandle - display an NFS file handle on the console
1067 *
1068 * @fh: file handle to display
1069 * @caption: display caption
1070 *
1071 * For debugging only.
1072 */
1073void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption)
1074{
1075 unsigned short i;
1076
1077 if (fh == NULL || fh->size == 0) {
1078 printk(KERN_DEFAULT "%s at %p is empty\n", caption, fh);
1079 return;
1080 }
1081
1082 printk(KERN_DEFAULT "%s at %p is %u bytes, crc: 0x%08x:\n",
1083 caption, fh, fh->size, _nfs_display_fhandle_hash(fh));
1084 for (i = 0; i < fh->size; i += 16) {
1085 __be32 *pos = (__be32 *)&fh->data[i];
1086
1087 switch ((fh->size - i - 1) >> 2) {
1088 case 0:
1089 printk(KERN_DEFAULT " %08x\n",
1090 be32_to_cpup(pos));
1091 break;
1092 case 1:
1093 printk(KERN_DEFAULT " %08x %08x\n",
1094 be32_to_cpup(pos), be32_to_cpup(pos + 1));
1095 break;
1096 case 2:
1097 printk(KERN_DEFAULT " %08x %08x %08x\n",
1098 be32_to_cpup(pos), be32_to_cpup(pos + 1),
1099 be32_to_cpup(pos + 2));
1100 break;
1101 default:
1102 printk(KERN_DEFAULT " %08x %08x %08x %08x\n",
1103 be32_to_cpup(pos), be32_to_cpup(pos + 1),
1104 be32_to_cpup(pos + 2), be32_to_cpup(pos + 3));
1105 }
1106 }
1107}
1108#endif
1109
1047/** 1110/**
1048 * nfs_inode_attrs_need_update - check if the inode attributes need updating 1111 * nfs_inode_attrs_need_update - check if the inode attributes need updating
1049 * @inode - pointer to inode 1112 * @inode - pointer to inode
@@ -1211,8 +1274,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1211 unsigned long now = jiffies; 1274 unsigned long now = jiffies;
1212 unsigned long save_cache_validity; 1275 unsigned long save_cache_validity;
1213 1276
1214 dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", 1277 dfprintk(VFS, "NFS: %s(%s/%ld fh_crc=0x%08x ct=%d info=0x%x)\n",
1215 __func__, inode->i_sb->s_id, inode->i_ino, 1278 __func__, inode->i_sb->s_id, inode->i_ino,
1279 nfs_display_fhandle_hash(NFS_FH(inode)),
1216 atomic_read(&inode->i_count), fattr->valid); 1280 atomic_read(&inode->i_count), fattr->valid);
1217 1281
1218 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) 1282 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
@@ -1406,7 +1470,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1406 /* 1470 /*
1407 * Big trouble! The inode has become a different object. 1471 * Big trouble! The inode has become a different object.
1408 */ 1472 */
1409 printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n", 1473 printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n",
1410 __func__, inode->i_ino, inode->i_mode, fattr->mode); 1474 __func__, inode->i_ino, inode->i_mode, fattr->mode);
1411 out_err: 1475 out_err:
1412 /* 1476 /*
@@ -1495,7 +1559,7 @@ static void init_once(void *foo)
1495 INIT_LIST_HEAD(&nfsi->open_files); 1559 INIT_LIST_HEAD(&nfsi->open_files);
1496 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); 1560 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
1497 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); 1561 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
1498 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); 1562 INIT_LIST_HEAD(&nfsi->commit_list);
1499 nfsi->npages = 0; 1563 nfsi->npages = 0;
1500 nfsi->ncommit = 0; 1564 nfsi->ncommit = 0;
1501 atomic_set(&nfsi->silly_count, 1); 1565 atomic_set(&nfsi->silly_count, 1);
@@ -1552,6 +1616,28 @@ static void nfsiod_stop(void)
1552 destroy_workqueue(wq); 1616 destroy_workqueue(wq);
1553} 1617}
1554 1618
1619int nfs_net_id;
1620EXPORT_SYMBOL_GPL(nfs_net_id);
1621
1622static int nfs_net_init(struct net *net)
1623{
1624 nfs_clients_init(net);
1625 return nfs_dns_resolver_cache_init(net);
1626}
1627
1628static void nfs_net_exit(struct net *net)
1629{
1630 nfs_dns_resolver_cache_destroy(net);
1631 nfs_cleanup_cb_ident_idr(net);
1632}
1633
1634static struct pernet_operations nfs_net_ops = {
1635 .init = nfs_net_init,
1636 .exit = nfs_net_exit,
1637 .id = &nfs_net_id,
1638 .size = sizeof(struct nfs_net),
1639};
1640
1555/* 1641/*
1556 * Initialize NFS 1642 * Initialize NFS
1557 */ 1643 */
@@ -1561,10 +1647,14 @@ static int __init init_nfs_fs(void)
1561 1647
1562 err = nfs_idmap_init(); 1648 err = nfs_idmap_init();
1563 if (err < 0) 1649 if (err < 0)
1564 goto out9; 1650 goto out10;
1565 1651
1566 err = nfs_dns_resolver_init(); 1652 err = nfs_dns_resolver_init();
1567 if (err < 0) 1653 if (err < 0)
1654 goto out9;
1655
1656 err = register_pernet_subsys(&nfs_net_ops);
1657 if (err < 0)
1568 goto out8; 1658 goto out8;
1569 1659
1570 err = nfs_fscache_register(); 1660 err = nfs_fscache_register();
@@ -1600,14 +1690,14 @@ static int __init init_nfs_fs(void)
1600 goto out0; 1690 goto out0;
1601 1691
1602#ifdef CONFIG_PROC_FS 1692#ifdef CONFIG_PROC_FS
1603 rpc_proc_register(&nfs_rpcstat); 1693 rpc_proc_register(&init_net, &nfs_rpcstat);
1604#endif 1694#endif
1605 if ((err = register_nfs_fs()) != 0) 1695 if ((err = register_nfs_fs()) != 0)
1606 goto out; 1696 goto out;
1607 return 0; 1697 return 0;
1608out: 1698out:
1609#ifdef CONFIG_PROC_FS 1699#ifdef CONFIG_PROC_FS
1610 rpc_proc_unregister("nfs"); 1700 rpc_proc_unregister(&init_net, "nfs");
1611#endif 1701#endif
1612 nfs_destroy_directcache(); 1702 nfs_destroy_directcache();
1613out0: 1703out0:
@@ -1625,10 +1715,12 @@ out5:
1625out6: 1715out6:
1626 nfs_fscache_unregister(); 1716 nfs_fscache_unregister();
1627out7: 1717out7:
1628 nfs_dns_resolver_destroy(); 1718 unregister_pernet_subsys(&nfs_net_ops);
1629out8: 1719out8:
1630 nfs_idmap_quit(); 1720 nfs_dns_resolver_destroy();
1631out9: 1721out9:
1722 nfs_idmap_quit();
1723out10:
1632 return err; 1724 return err;
1633} 1725}
1634 1726
@@ -1640,12 +1732,12 @@ static void __exit exit_nfs_fs(void)
1640 nfs_destroy_inodecache(); 1732 nfs_destroy_inodecache();
1641 nfs_destroy_nfspagecache(); 1733 nfs_destroy_nfspagecache();
1642 nfs_fscache_unregister(); 1734 nfs_fscache_unregister();
1735 unregister_pernet_subsys(&nfs_net_ops);
1643 nfs_dns_resolver_destroy(); 1736 nfs_dns_resolver_destroy();
1644 nfs_idmap_quit(); 1737 nfs_idmap_quit();
1645#ifdef CONFIG_PROC_FS 1738#ifdef CONFIG_PROC_FS
1646 rpc_proc_unregister("nfs"); 1739 rpc_proc_unregister(&init_net, "nfs");
1647#endif 1740#endif
1648 nfs_cleanup_cb_ident_idr();
1649 unregister_nfs_fs(); 1741 unregister_nfs_fs();
1650 nfs_fs_proc_exit(); 1742 nfs_fs_proc_exit();
1651 nfsiod_stop(); 1743 nfsiod_stop();
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 8102db9b926..b777bdaba4c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -123,6 +123,7 @@ struct nfs_parsed_mount_data {
123 } nfs_server; 123 } nfs_server;
124 124
125 struct security_mnt_opts lsm_opts; 125 struct security_mnt_opts lsm_opts;
126 struct net *net;
126}; 127};
127 128
128/* mount_clnt.c */ 129/* mount_clnt.c */
@@ -137,20 +138,22 @@ struct nfs_mount_request {
137 int noresvport; 138 int noresvport;
138 unsigned int *auth_flav_len; 139 unsigned int *auth_flav_len;
139 rpc_authflavor_t *auth_flavs; 140 rpc_authflavor_t *auth_flavs;
141 struct net *net;
140}; 142};
141 143
142extern int nfs_mount(struct nfs_mount_request *info); 144extern int nfs_mount(struct nfs_mount_request *info);
143extern void nfs_umount(const struct nfs_mount_request *info); 145extern void nfs_umount(const struct nfs_mount_request *info);
144 146
145/* client.c */ 147/* client.c */
146extern struct rpc_program nfs_program; 148extern const struct rpc_program nfs_program;
149extern void nfs_clients_init(struct net *net);
147 150
148extern void nfs_cleanup_cb_ident_idr(void); 151extern void nfs_cleanup_cb_ident_idr(struct net *);
149extern void nfs_put_client(struct nfs_client *); 152extern void nfs_put_client(struct nfs_client *);
150extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *); 153extern struct nfs_client *nfs4_find_client_ident(struct net *, int);
151extern struct nfs_client *nfs4_find_client_ident(int);
152extern struct nfs_client * 154extern struct nfs_client *
153nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *); 155nfs4_find_client_sessionid(struct net *, const struct sockaddr *,
156 struct nfs4_sessionid *);
154extern struct nfs_server *nfs_create_server( 157extern struct nfs_server *nfs_create_server(
155 const struct nfs_parsed_mount_data *, 158 const struct nfs_parsed_mount_data *,
156 struct nfs_fh *); 159 struct nfs_fh *);
@@ -162,7 +165,8 @@ extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
162extern void nfs_free_server(struct nfs_server *server); 165extern void nfs_free_server(struct nfs_server *server);
163extern struct nfs_server *nfs_clone_server(struct nfs_server *, 166extern struct nfs_server *nfs_clone_server(struct nfs_server *,
164 struct nfs_fh *, 167 struct nfs_fh *,
165 struct nfs_fattr *); 168 struct nfs_fattr *,
169 rpc_authflavor_t);
166extern void nfs_mark_client_ready(struct nfs_client *clp, int state); 170extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
167extern int nfs4_check_client_ready(struct nfs_client *clp); 171extern int nfs4_check_client_ready(struct nfs_client *clp);
168extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, 172extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
@@ -183,10 +187,10 @@ static inline void nfs_fs_proc_exit(void)
183 187
184/* nfs4namespace.c */ 188/* nfs4namespace.c */
185#ifdef CONFIG_NFS_V4 189#ifdef CONFIG_NFS_V4
186extern struct vfsmount *nfs_do_refmount(struct dentry *dentry); 190extern struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry);
187#else 191#else
188static inline 192static inline
189struct vfsmount *nfs_do_refmount(struct dentry *dentry) 193struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
190{ 194{
191 return ERR_PTR(-ENOENT); 195 return ERR_PTR(-ENOENT);
192} 196}
@@ -231,7 +235,6 @@ extern const u32 nfs41_maxwrite_overhead;
231/* nfs4proc.c */ 235/* nfs4proc.c */
232#ifdef CONFIG_NFS_V4 236#ifdef CONFIG_NFS_V4
233extern struct rpc_procinfo nfs4_procedures[]; 237extern struct rpc_procinfo nfs4_procedures[];
234void nfs_fixup_secinfo_attributes(struct nfs_fattr *, struct nfs_fh *);
235#endif 238#endif
236 239
237extern int nfs4_init_ds_session(struct nfs_client *clp); 240extern int nfs4_init_ds_session(struct nfs_client *clp);
@@ -329,6 +332,8 @@ void nfs_retry_commit(struct list_head *page_list,
329void nfs_commit_clear_lock(struct nfs_inode *nfsi); 332void nfs_commit_clear_lock(struct nfs_inode *nfsi);
330void nfs_commitdata_release(void *data); 333void nfs_commitdata_release(void *data);
331void nfs_commit_release_pages(struct nfs_write_data *data); 334void nfs_commit_release_pages(struct nfs_write_data *data);
335void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head);
336void nfs_request_remove_commit_list(struct nfs_page *req);
332 337
333#ifdef CONFIG_MIGRATION 338#ifdef CONFIG_MIGRATION
334extern int nfs_migrate_page(struct address_space *, 339extern int nfs_migrate_page(struct address_space *,
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index d4c2d6b7507..8e65c7f1f87 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -16,7 +16,7 @@
16#include <linux/nfs_fs.h> 16#include <linux/nfs_fs.h>
17#include "internal.h" 17#include "internal.h"
18 18
19#ifdef RPC_DEBUG 19#ifdef NFS_DEBUG
20# define NFSDBG_FACILITY NFSDBG_MOUNT 20# define NFSDBG_FACILITY NFSDBG_MOUNT
21#endif 21#endif
22 22
@@ -67,7 +67,7 @@ enum {
67 MOUNTPROC3_EXPORT = 5, 67 MOUNTPROC3_EXPORT = 5,
68}; 68};
69 69
70static struct rpc_program mnt_program; 70static const struct rpc_program mnt_program;
71 71
72/* 72/*
73 * Defined by OpenGroup XNFS Version 3W, chapter 8 73 * Defined by OpenGroup XNFS Version 3W, chapter 8
@@ -153,7 +153,7 @@ int nfs_mount(struct nfs_mount_request *info)
153 .rpc_resp = &result, 153 .rpc_resp = &result,
154 }; 154 };
155 struct rpc_create_args args = { 155 struct rpc_create_args args = {
156 .net = &init_net, 156 .net = info->net,
157 .protocol = info->protocol, 157 .protocol = info->protocol,
158 .address = info->sap, 158 .address = info->sap,
159 .addrsize = info->salen, 159 .addrsize = info->salen,
@@ -225,7 +225,7 @@ void nfs_umount(const struct nfs_mount_request *info)
225 .to_retries = 2, 225 .to_retries = 2,
226 }; 226 };
227 struct rpc_create_args args = { 227 struct rpc_create_args args = {
228 .net = &init_net, 228 .net = info->net,
229 .protocol = IPPROTO_UDP, 229 .protocol = IPPROTO_UDP,
230 .address = info->sap, 230 .address = info->sap,
231 .addrsize = info->salen, 231 .addrsize = info->salen,
@@ -488,19 +488,19 @@ static struct rpc_procinfo mnt3_procedures[] = {
488}; 488};
489 489
490 490
491static struct rpc_version mnt_version1 = { 491static const struct rpc_version mnt_version1 = {
492 .number = 1, 492 .number = 1,
493 .nrprocs = ARRAY_SIZE(mnt_procedures), 493 .nrprocs = ARRAY_SIZE(mnt_procedures),
494 .procs = mnt_procedures, 494 .procs = mnt_procedures,
495}; 495};
496 496
497static struct rpc_version mnt_version3 = { 497static const struct rpc_version mnt_version3 = {
498 .number = 3, 498 .number = 3,
499 .nrprocs = ARRAY_SIZE(mnt3_procedures), 499 .nrprocs = ARRAY_SIZE(mnt3_procedures),
500 .procs = mnt3_procedures, 500 .procs = mnt3_procedures,
501}; 501};
502 502
503static struct rpc_version *mnt_version[] = { 503static const struct rpc_version *mnt_version[] = {
504 NULL, 504 NULL,
505 &mnt_version1, 505 &mnt_version1,
506 NULL, 506 NULL,
@@ -509,7 +509,7 @@ static struct rpc_version *mnt_version[] = {
509 509
510static struct rpc_stat mnt_stats; 510static struct rpc_stat mnt_stats;
511 511
512static struct rpc_program mnt_program = { 512static const struct rpc_program mnt_program = {
513 .name = "mount", 513 .name = "mount",
514 .number = NFS_MNT_PROGRAM, 514 .number = NFS_MNT_PROGRAM,
515 .nrvers = ARRAY_SIZE(mnt_version), 515 .nrvers = ARRAY_SIZE(mnt_version),
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 8102391bb37..d51868e5683 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -148,66 +148,31 @@ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
148 return pseudoflavor; 148 return pseudoflavor;
149} 149}
150 150
151static int nfs_negotiate_security(const struct dentry *parent, 151static struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
152 const struct dentry *dentry, 152 struct qstr *name,
153 rpc_authflavor_t *flavor) 153 struct nfs_fh *fh,
154 struct nfs_fattr *fattr)
154{ 155{
155 struct page *page;
156 struct nfs4_secinfo_flavors *flavors;
157 int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
158 int ret = -EPERM;
159
160 secinfo = NFS_PROTO(parent->d_inode)->secinfo;
161 if (secinfo != NULL) {
162 page = alloc_page(GFP_KERNEL);
163 if (!page) {
164 ret = -ENOMEM;
165 goto out;
166 }
167 flavors = page_address(page);
168 ret = secinfo(parent->d_inode, &dentry->d_name, flavors);
169 *flavor = nfs_find_best_sec(flavors);
170 put_page(page);
171 }
172
173out:
174 return ret;
175}
176
177static int nfs_lookup_with_sec(struct nfs_server *server, struct dentry *parent,
178 struct dentry *dentry, struct path *path,
179 struct nfs_fh *fh, struct nfs_fattr *fattr,
180 rpc_authflavor_t *flavor)
181{
182 struct rpc_clnt *clone;
183 struct rpc_auth *auth;
184 int err; 156 int err;
185 157
186 err = nfs_negotiate_security(parent, path->dentry, flavor); 158 if (NFS_PROTO(dir)->version == 4)
187 if (err < 0) 159 return nfs4_proc_lookup_mountpoint(dir, name, fh, fattr);
188 goto out; 160
189 clone = rpc_clone_client(server->client); 161 err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr);
190 auth = rpcauth_create(*flavor, clone); 162 if (err)
191 if (!auth) { 163 return ERR_PTR(err);
192 err = -EIO; 164 return rpc_clone_client(NFS_SERVER(dir)->client);
193 goto out_shutdown;
194 }
195 err = server->nfs_client->rpc_ops->lookup(clone, parent->d_inode,
196 &path->dentry->d_name,
197 fh, fattr);
198out_shutdown:
199 rpc_shutdown_client(clone);
200out:
201 return err;
202} 165}
203#else /* CONFIG_NFS_V4 */ 166#else /* CONFIG_NFS_V4 */
204static inline int nfs_lookup_with_sec(struct nfs_server *server, 167static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
205 struct dentry *parent, struct dentry *dentry, 168 struct qstr *name,
206 struct path *path, struct nfs_fh *fh, 169 struct nfs_fh *fh,
207 struct nfs_fattr *fattr, 170 struct nfs_fattr *fattr)
208 rpc_authflavor_t *flavor)
209{ 171{
210 return -EPERM; 172 int err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr);
173 if (err)
174 return ERR_PTR(err);
175 return rpc_clone_client(NFS_SERVER(dir)->client);
211} 176}
212#endif /* CONFIG_NFS_V4 */ 177#endif /* CONFIG_NFS_V4 */
213 178
@@ -226,12 +191,10 @@ static inline int nfs_lookup_with_sec(struct nfs_server *server,
226struct vfsmount *nfs_d_automount(struct path *path) 191struct vfsmount *nfs_d_automount(struct path *path)
227{ 192{
228 struct vfsmount *mnt; 193 struct vfsmount *mnt;
229 struct nfs_server *server = NFS_SERVER(path->dentry->d_inode);
230 struct dentry *parent; 194 struct dentry *parent;
231 struct nfs_fh *fh = NULL; 195 struct nfs_fh *fh = NULL;
232 struct nfs_fattr *fattr = NULL; 196 struct nfs_fattr *fattr = NULL;
233 int err; 197 struct rpc_clnt *client;
234 rpc_authflavor_t flavor = RPC_AUTH_UNIX;
235 198
236 dprintk("--> nfs_d_automount()\n"); 199 dprintk("--> nfs_d_automount()\n");
237 200
@@ -249,21 +212,19 @@ struct vfsmount *nfs_d_automount(struct path *path)
249 212
250 /* Look it up again to get its attributes */ 213 /* Look it up again to get its attributes */
251 parent = dget_parent(path->dentry); 214 parent = dget_parent(path->dentry);
252 err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode, 215 client = nfs_lookup_mountpoint(parent->d_inode, &path->dentry->d_name, fh, fattr);
253 &path->dentry->d_name,
254 fh, fattr);
255 if (err == -EPERM && NFS_PROTO(parent->d_inode)->secinfo != NULL)
256 err = nfs_lookup_with_sec(server, parent, path->dentry, path, fh, fattr, &flavor);
257 dput(parent); 216 dput(parent);
258 if (err != 0) { 217 if (IS_ERR(client)) {
259 mnt = ERR_PTR(err); 218 mnt = ERR_CAST(client);
260 goto out; 219 goto out;
261 } 220 }
262 221
263 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) 222 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
264 mnt = nfs_do_refmount(path->dentry); 223 mnt = nfs_do_refmount(client, path->dentry);
265 else 224 else
266 mnt = nfs_do_submount(path->dentry, fh, fattr, flavor); 225 mnt = nfs_do_submount(path->dentry, fh, fattr, client->cl_auth->au_flavor);
226 rpc_shutdown_client(client);
227
267 if (IS_ERR(mnt)) 228 if (IS_ERR(mnt))
268 goto out; 229 goto out;
269 230
@@ -276,7 +237,10 @@ out:
276 nfs_free_fattr(fattr); 237 nfs_free_fattr(fattr);
277 nfs_free_fhandle(fh); 238 nfs_free_fhandle(fh);
278out_nofree: 239out_nofree:
279 dprintk("<-- nfs_follow_mountpoint() = %p\n", mnt); 240 if (IS_ERR(mnt))
241 dprintk("<-- %s(): error %ld\n", __func__, PTR_ERR(mnt));
242 else
243 dprintk("<-- %s() = %p\n", __func__, mnt);
280 return mnt; 244 return mnt;
281} 245}
282 246
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
new file mode 100644
index 00000000000..aa14ec303e9
--- /dev/null
+++ b/fs/nfs/netns.h
@@ -0,0 +1,27 @@
1#ifndef __NFS_NETNS_H__
2#define __NFS_NETNS_H__
3
4#include <net/net_namespace.h>
5#include <net/netns/generic.h>
6
7struct bl_dev_msg {
8 int32_t status;
9 uint32_t major, minor;
10};
11
12struct nfs_net {
13 struct cache_detail *nfs_dns_resolve;
14 struct rpc_pipe *bl_device_pipe;
15 struct bl_dev_msg bl_mount_reply;
16 wait_queue_head_t bl_wq;
17 struct list_head nfs_client_list;
18 struct list_head nfs_volume_list;
19#ifdef CONFIG_NFS_V4
20 struct idr cb_ident_idr; /* Protected by nfs_client_lock */
21#endif
22 spinlock_t nfs_client_lock;
23};
24
25extern int nfs_net_id;
26
27#endif
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 792cb13a430..1f56000fabb 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -1150,7 +1150,7 @@ struct rpc_procinfo nfs_procedures[] = {
1150 PROC(STATFS, fhandle, statfsres, 0), 1150 PROC(STATFS, fhandle, statfsres, 0),
1151}; 1151};
1152 1152
1153struct rpc_version nfs_version2 = { 1153const struct rpc_version nfs_version2 = {
1154 .number = 2, 1154 .number = 2,
1155 .nrprocs = ARRAY_SIZE(nfs_procedures), 1155 .nrprocs = ARRAY_SIZE(nfs_procedures),
1156 .procs = nfs_procedures 1156 .procs = nfs_procedures
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 7ef23979896..e4498dc351a 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -192,7 +192,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
192 .pages = pages, 192 .pages = pages,
193 }; 193 };
194 struct nfs3_getaclres res = { 194 struct nfs3_getaclres res = {
195 0 195 NULL,
196 }; 196 };
197 struct rpc_message msg = { 197 struct rpc_message msg = {
198 .rpc_argp = &args, 198 .rpc_argp = &args,
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 91943953a37..5242eae6711 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -428,6 +428,11 @@ nfs3_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
428 msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE]; 428 msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE];
429} 429}
430 430
431static void nfs3_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
432{
433 rpc_call_start(task);
434}
435
431static int 436static int
432nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir) 437nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir)
433{ 438{
@@ -445,6 +450,11 @@ nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
445 msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME]; 450 msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME];
446} 451}
447 452
453static void nfs3_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data)
454{
455 rpc_call_start(task);
456}
457
448static int 458static int
449nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir, 459nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
450 struct inode *new_dir) 460 struct inode *new_dir)
@@ -814,6 +824,11 @@ static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message
814 msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; 824 msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
815} 825}
816 826
827static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
828{
829 rpc_call_start(task);
830}
831
817static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) 832static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
818{ 833{
819 if (nfs3_async_handle_jukebox(task, data->inode)) 834 if (nfs3_async_handle_jukebox(task, data->inode))
@@ -828,6 +843,11 @@ static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
828 msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; 843 msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
829} 844}
830 845
846static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
847{
848 rpc_call_start(task);
849}
850
831static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) 851static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
832{ 852{
833 if (nfs3_async_handle_jukebox(task, data->inode)) 853 if (nfs3_async_handle_jukebox(task, data->inode))
@@ -864,9 +884,11 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
864 .create = nfs3_proc_create, 884 .create = nfs3_proc_create,
865 .remove = nfs3_proc_remove, 885 .remove = nfs3_proc_remove,
866 .unlink_setup = nfs3_proc_unlink_setup, 886 .unlink_setup = nfs3_proc_unlink_setup,
887 .unlink_rpc_prepare = nfs3_proc_unlink_rpc_prepare,
867 .unlink_done = nfs3_proc_unlink_done, 888 .unlink_done = nfs3_proc_unlink_done,
868 .rename = nfs3_proc_rename, 889 .rename = nfs3_proc_rename,
869 .rename_setup = nfs3_proc_rename_setup, 890 .rename_setup = nfs3_proc_rename_setup,
891 .rename_rpc_prepare = nfs3_proc_rename_rpc_prepare,
870 .rename_done = nfs3_proc_rename_done, 892 .rename_done = nfs3_proc_rename_done,
871 .link = nfs3_proc_link, 893 .link = nfs3_proc_link,
872 .symlink = nfs3_proc_symlink, 894 .symlink = nfs3_proc_symlink,
@@ -879,8 +901,10 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
879 .pathconf = nfs3_proc_pathconf, 901 .pathconf = nfs3_proc_pathconf,
880 .decode_dirent = nfs3_decode_dirent, 902 .decode_dirent = nfs3_decode_dirent,
881 .read_setup = nfs3_proc_read_setup, 903 .read_setup = nfs3_proc_read_setup,
904 .read_rpc_prepare = nfs3_proc_read_rpc_prepare,
882 .read_done = nfs3_read_done, 905 .read_done = nfs3_read_done,
883 .write_setup = nfs3_proc_write_setup, 906 .write_setup = nfs3_proc_write_setup,
907 .write_rpc_prepare = nfs3_proc_write_rpc_prepare,
884 .write_done = nfs3_write_done, 908 .write_done = nfs3_write_done,
885 .commit_setup = nfs3_proc_commit_setup, 909 .commit_setup = nfs3_proc_commit_setup,
886 .commit_done = nfs3_commit_done, 910 .commit_done = nfs3_commit_done,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 183c6b123d0..a77cc9a3ce5 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -2461,7 +2461,7 @@ struct rpc_procinfo nfs3_procedures[] = {
2461 PROC(COMMIT, commit, commit, 5), 2461 PROC(COMMIT, commit, commit, 5),
2462}; 2462};
2463 2463
2464struct rpc_version nfs_version3 = { 2464const struct rpc_version nfs_version3 = {
2465 .number = 3, 2465 .number = 3,
2466 .nrprocs = ARRAY_SIZE(nfs3_procedures), 2466 .nrprocs = ARRAY_SIZE(nfs3_procedures),
2467 .procs = nfs3_procedures 2467 .procs = nfs3_procedures
@@ -2489,7 +2489,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = {
2489 }, 2489 },
2490}; 2490};
2491 2491
2492struct rpc_version nfsacl_version3 = { 2492const struct rpc_version nfsacl_version3 = {
2493 .number = 3, 2493 .number = 3,
2494 .nrprocs = sizeof(nfs3_acl_procedures)/ 2494 .nrprocs = sizeof(nfs3_acl_procedures)/
2495 sizeof(nfs3_acl_procedures[0]), 2495 sizeof(nfs3_acl_procedures[0]),
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 4d7d0aedc10..8d75021020b 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -20,7 +20,6 @@ enum nfs4_client_state {
20 NFS4CLNT_RECLAIM_REBOOT, 20 NFS4CLNT_RECLAIM_REBOOT,
21 NFS4CLNT_RECLAIM_NOGRACE, 21 NFS4CLNT_RECLAIM_NOGRACE,
22 NFS4CLNT_DELEGRETURN, 22 NFS4CLNT_DELEGRETURN,
23 NFS4CLNT_LAYOUTRECALL,
24 NFS4CLNT_SESSION_RESET, 23 NFS4CLNT_SESSION_RESET,
25 NFS4CLNT_RECALL_SLOT, 24 NFS4CLNT_RECALL_SLOT,
26 NFS4CLNT_LEASE_CONFIRM, 25 NFS4CLNT_LEASE_CONFIRM,
@@ -44,7 +43,7 @@ struct nfs4_minor_version_ops {
44 struct nfs4_sequence_args *args, 43 struct nfs4_sequence_args *args,
45 struct nfs4_sequence_res *res, 44 struct nfs4_sequence_res *res,
46 int cache_reply); 45 int cache_reply);
47 int (*validate_stateid)(struct nfs_delegation *, 46 bool (*match_stateid)(const nfs4_stateid *,
48 const nfs4_stateid *); 47 const nfs4_stateid *);
49 int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, 48 int (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
50 struct nfs_fsinfo *); 49 struct nfs_fsinfo *);
@@ -53,26 +52,26 @@ struct nfs4_minor_version_ops {
53 const struct nfs4_state_maintenance_ops *state_renewal_ops; 52 const struct nfs4_state_maintenance_ops *state_renewal_ops;
54}; 53};
55 54
56/* 55struct nfs_unique_id {
57 * struct rpc_sequence ensures that RPC calls are sent in the exact 56 struct rb_node rb_node;
58 * order that they appear on the list. 57 __u64 id;
59 */
60struct rpc_sequence {
61 struct rpc_wait_queue wait; /* RPC call delay queue */
62 spinlock_t lock; /* Protects the list */
63 struct list_head list; /* Defines sequence of RPC calls */
64}; 58};
65 59
66#define NFS_SEQID_CONFIRMED 1 60#define NFS_SEQID_CONFIRMED 1
67struct nfs_seqid_counter { 61struct nfs_seqid_counter {
68 struct rpc_sequence *sequence; 62 ktime_t create_time;
63 int owner_id;
69 int flags; 64 int flags;
70 u32 counter; 65 u32 counter;
66 spinlock_t lock; /* Protects the list */
67 struct list_head list; /* Defines sequence of RPC calls */
68 struct rpc_wait_queue wait; /* RPC call delay queue */
71}; 69};
72 70
73struct nfs_seqid { 71struct nfs_seqid {
74 struct nfs_seqid_counter *sequence; 72 struct nfs_seqid_counter *sequence;
75 struct list_head list; 73 struct list_head list;
74 struct rpc_task *task;
76}; 75};
77 76
78static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status) 77static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status)
@@ -81,18 +80,12 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status
81 seqid->flags |= NFS_SEQID_CONFIRMED; 80 seqid->flags |= NFS_SEQID_CONFIRMED;
82} 81}
83 82
84struct nfs_unique_id {
85 struct rb_node rb_node;
86 __u64 id;
87};
88
89/* 83/*
90 * NFS4 state_owners and lock_owners are simply labels for ordered 84 * NFS4 state_owners and lock_owners are simply labels for ordered
91 * sequences of RPC calls. Their sole purpose is to provide once-only 85 * sequences of RPC calls. Their sole purpose is to provide once-only
92 * semantics by allowing the server to identify replayed requests. 86 * semantics by allowing the server to identify replayed requests.
93 */ 87 */
94struct nfs4_state_owner { 88struct nfs4_state_owner {
95 struct nfs_unique_id so_owner_id;
96 struct nfs_server *so_server; 89 struct nfs_server *so_server;
97 struct list_head so_lru; 90 struct list_head so_lru;
98 unsigned long so_expires; 91 unsigned long so_expires;
@@ -105,7 +98,6 @@ struct nfs4_state_owner {
105 unsigned long so_flags; 98 unsigned long so_flags;
106 struct list_head so_states; 99 struct list_head so_states;
107 struct nfs_seqid_counter so_seqid; 100 struct nfs_seqid_counter so_seqid;
108 struct rpc_sequence so_sequence;
109}; 101};
110 102
111enum { 103enum {
@@ -146,8 +138,6 @@ struct nfs4_lock_state {
146#define NFS_LOCK_INITIALIZED 1 138#define NFS_LOCK_INITIALIZED 1
147 int ls_flags; 139 int ls_flags;
148 struct nfs_seqid_counter ls_seqid; 140 struct nfs_seqid_counter ls_seqid;
149 struct rpc_sequence ls_sequence;
150 struct nfs_unique_id ls_id;
151 nfs4_stateid ls_stateid; 141 nfs4_stateid ls_stateid;
152 atomic_t ls_count; 142 atomic_t ls_count;
153 struct nfs4_lock_owner ls_owner; 143 struct nfs4_lock_owner ls_owner;
@@ -193,6 +183,7 @@ struct nfs4_exception {
193 long timeout; 183 long timeout;
194 int retry; 184 int retry;
195 struct nfs4_state *state; 185 struct nfs4_state *state;
186 struct inode *inode;
196}; 187};
197 188
198struct nfs4_state_recovery_ops { 189struct nfs4_state_recovery_ops {
@@ -214,6 +205,9 @@ struct nfs4_state_maintenance_ops {
214extern const struct dentry_operations nfs4_dentry_operations; 205extern const struct dentry_operations nfs4_dentry_operations;
215extern const struct inode_operations nfs4_dir_inode_operations; 206extern const struct inode_operations nfs4_dir_inode_operations;
216 207
208/* nfs4namespace.c */
209struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *);
210
217/* nfs4proc.c */ 211/* nfs4proc.c */
218extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); 212extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
219extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); 213extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
@@ -222,9 +216,12 @@ extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
222extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); 216extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
223extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); 217extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
224extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); 218extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
225extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, 219extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *,
226 struct nfs4_fs_locations *fs_locations, struct page *page); 220 struct nfs4_fs_locations *, struct page *);
227extern void nfs4_release_lockowner(const struct nfs4_lock_state *); 221extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct qstr *,
222 struct nfs_fh *, struct nfs_fattr *);
223extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
224extern int nfs4_release_lockowner(struct nfs4_lock_state *);
228extern const struct xattr_handler *nfs4_xattr_handlers[]; 225extern const struct xattr_handler *nfs4_xattr_handlers[];
229 226
230#if defined(CONFIG_NFS_V4_1) 227#if defined(CONFIG_NFS_V4_1)
@@ -233,12 +230,13 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser
233 return server->nfs_client->cl_session; 230 return server->nfs_client->cl_session;
234} 231}
235 232
233extern bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy);
236extern int nfs4_setup_sequence(const struct nfs_server *server, 234extern int nfs4_setup_sequence(const struct nfs_server *server,
237 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, 235 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
238 int cache_reply, struct rpc_task *task); 236 struct rpc_task *task);
239extern int nfs41_setup_sequence(struct nfs4_session *session, 237extern int nfs41_setup_sequence(struct nfs4_session *session,
240 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, 238 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
241 int cache_reply, struct rpc_task *task); 239 struct rpc_task *task);
242extern void nfs4_destroy_session(struct nfs4_session *session); 240extern void nfs4_destroy_session(struct nfs4_session *session);
243extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); 241extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
244extern int nfs4_proc_create_session(struct nfs_client *); 242extern int nfs4_proc_create_session(struct nfs_client *);
@@ -269,7 +267,7 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser
269 267
270static inline int nfs4_setup_sequence(const struct nfs_server *server, 268static inline int nfs4_setup_sequence(const struct nfs_server *server,
271 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, 269 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
272 int cache_reply, struct rpc_task *task) 270 struct rpc_task *task)
273{ 271{
274 return 0; 272 return 0;
275} 273}
@@ -319,7 +317,7 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
319} 317}
320#endif /* CONFIG_NFS_V4_1 */ 318#endif /* CONFIG_NFS_V4_1 */
321 319
322extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); 320extern struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *, gfp_t);
323extern void nfs4_put_state_owner(struct nfs4_state_owner *); 321extern void nfs4_put_state_owner(struct nfs4_state_owner *);
324extern void nfs4_purge_state_owners(struct nfs_server *); 322extern void nfs4_purge_state_owners(struct nfs_server *);
325extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); 323extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
@@ -327,6 +325,8 @@ extern void nfs4_put_open_state(struct nfs4_state *);
327extern void nfs4_close_state(struct nfs4_state *, fmode_t); 325extern void nfs4_close_state(struct nfs4_state *, fmode_t);
328extern void nfs4_close_sync(struct nfs4_state *, fmode_t); 326extern void nfs4_close_sync(struct nfs4_state *, fmode_t);
329extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); 327extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
328extern void nfs_inode_find_state_and_recover(struct inode *inode,
329 const nfs4_stateid *stateid);
330extern void nfs4_schedule_lease_recovery(struct nfs_client *); 330extern void nfs4_schedule_lease_recovery(struct nfs_client *);
331extern void nfs4_schedule_state_manager(struct nfs_client *); 331extern void nfs4_schedule_state_manager(struct nfs_client *);
332extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); 332extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp);
@@ -337,7 +337,8 @@ extern void nfs41_handle_server_scope(struct nfs_client *,
337 struct server_scope **); 337 struct server_scope **);
338extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 338extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
339extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 339extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
340extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t); 340extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *,
341 fmode_t, fl_owner_t, pid_t);
341 342
342extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); 343extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
343extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); 344extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
@@ -346,6 +347,8 @@ extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
346extern void nfs_release_seqid(struct nfs_seqid *seqid); 347extern void nfs_release_seqid(struct nfs_seqid *seqid);
347extern void nfs_free_seqid(struct nfs_seqid *seqid); 348extern void nfs_free_seqid(struct nfs_seqid *seqid);
348 349
350extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp);
351
349extern const nfs4_stateid zero_stateid; 352extern const nfs4_stateid zero_stateid;
350 353
351/* nfs4xdr.c */ 354/* nfs4xdr.c */
@@ -357,6 +360,16 @@ struct nfs4_mount_data;
357extern struct svc_version nfs4_callback_version1; 360extern struct svc_version nfs4_callback_version1;
358extern struct svc_version nfs4_callback_version4; 361extern struct svc_version nfs4_callback_version4;
359 362
363static inline void nfs4_stateid_copy(nfs4_stateid *dst, const nfs4_stateid *src)
364{
365 memcpy(dst, src, sizeof(*dst));
366}
367
368static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_stateid *src)
369{
370 return memcmp(dst, src, sizeof(*dst)) == 0;
371}
372
360#else 373#else
361 374
362#define nfs4_close_state(a, b) do { } while (0) 375#define nfs4_close_state(a, b) do { } while (0)
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 71ec08617e2..5acfd9ea8a3 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -33,7 +33,10 @@
33#include <linux/nfs_page.h> 33#include <linux/nfs_page.h>
34#include <linux/module.h> 34#include <linux/module.h>
35 35
36#include <linux/sunrpc/metrics.h>
37
36#include "internal.h" 38#include "internal.h"
39#include "delegation.h"
37#include "nfs4filelayout.h" 40#include "nfs4filelayout.h"
38 41
39#define NFSDBG_FACILITY NFSDBG_PNFS_LD 42#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@@ -84,12 +87,27 @@ static int filelayout_async_handle_error(struct rpc_task *task,
84 struct nfs_client *clp, 87 struct nfs_client *clp,
85 int *reset) 88 int *reset)
86{ 89{
90 struct nfs_server *mds_server = NFS_SERVER(state->inode);
91 struct nfs_client *mds_client = mds_server->nfs_client;
92
87 if (task->tk_status >= 0) 93 if (task->tk_status >= 0)
88 return 0; 94 return 0;
89
90 *reset = 0; 95 *reset = 0;
91 96
92 switch (task->tk_status) { 97 switch (task->tk_status) {
98 /* MDS state errors */
99 case -NFS4ERR_DELEG_REVOKED:
100 case -NFS4ERR_ADMIN_REVOKED:
101 case -NFS4ERR_BAD_STATEID:
102 nfs_remove_bad_delegation(state->inode);
103 case -NFS4ERR_OPENMODE:
104 nfs4_schedule_stateid_recovery(mds_server, state);
105 goto wait_on_recovery;
106 case -NFS4ERR_EXPIRED:
107 nfs4_schedule_stateid_recovery(mds_server, state);
108 nfs4_schedule_lease_recovery(mds_client);
109 goto wait_on_recovery;
110 /* DS session errors */
93 case -NFS4ERR_BADSESSION: 111 case -NFS4ERR_BADSESSION:
94 case -NFS4ERR_BADSLOT: 112 case -NFS4ERR_BADSLOT:
95 case -NFS4ERR_BAD_HIGH_SLOT: 113 case -NFS4ERR_BAD_HIGH_SLOT:
@@ -115,8 +133,14 @@ static int filelayout_async_handle_error(struct rpc_task *task,
115 *reset = 1; 133 *reset = 1;
116 break; 134 break;
117 } 135 }
136out:
118 task->tk_status = 0; 137 task->tk_status = 0;
119 return -EAGAIN; 138 return -EAGAIN;
139wait_on_recovery:
140 rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
141 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
142 rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task);
143 goto out;
120} 144}
121 145
122/* NFS_PROTO call done callback routines */ 146/* NFS_PROTO call done callback routines */
@@ -173,7 +197,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
173 197
174 if (nfs41_setup_sequence(rdata->ds_clp->cl_session, 198 if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
175 &rdata->args.seq_args, &rdata->res.seq_res, 199 &rdata->args.seq_args, &rdata->res.seq_res,
176 0, task)) 200 task))
177 return; 201 return;
178 202
179 rpc_call_start(task); 203 rpc_call_start(task);
@@ -189,10 +213,18 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data)
189 rdata->mds_ops->rpc_call_done(task, data); 213 rdata->mds_ops->rpc_call_done(task, data);
190} 214}
191 215
216static void filelayout_read_count_stats(struct rpc_task *task, void *data)
217{
218 struct nfs_read_data *rdata = (struct nfs_read_data *)data;
219
220 rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics);
221}
222
192static void filelayout_read_release(void *data) 223static void filelayout_read_release(void *data)
193{ 224{
194 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 225 struct nfs_read_data *rdata = (struct nfs_read_data *)data;
195 226
227 put_lseg(rdata->lseg);
196 rdata->mds_ops->rpc_release(data); 228 rdata->mds_ops->rpc_release(data);
197} 229}
198 230
@@ -254,7 +286,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
254 286
255 if (nfs41_setup_sequence(wdata->ds_clp->cl_session, 287 if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
256 &wdata->args.seq_args, &wdata->res.seq_res, 288 &wdata->args.seq_args, &wdata->res.seq_res,
257 0, task)) 289 task))
258 return; 290 return;
259 291
260 rpc_call_start(task); 292 rpc_call_start(task);
@@ -268,10 +300,18 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data)
268 wdata->mds_ops->rpc_call_done(task, data); 300 wdata->mds_ops->rpc_call_done(task, data);
269} 301}
270 302
303static void filelayout_write_count_stats(struct rpc_task *task, void *data)
304{
305 struct nfs_write_data *wdata = (struct nfs_write_data *)data;
306
307 rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics);
308}
309
271static void filelayout_write_release(void *data) 310static void filelayout_write_release(void *data)
272{ 311{
273 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 312 struct nfs_write_data *wdata = (struct nfs_write_data *)data;
274 313
314 put_lseg(wdata->lseg);
275 wdata->mds_ops->rpc_release(data); 315 wdata->mds_ops->rpc_release(data);
276} 316}
277 317
@@ -282,24 +322,28 @@ static void filelayout_commit_release(void *data)
282 nfs_commit_release_pages(wdata); 322 nfs_commit_release_pages(wdata);
283 if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding)) 323 if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding))
284 nfs_commit_clear_lock(NFS_I(wdata->inode)); 324 nfs_commit_clear_lock(NFS_I(wdata->inode));
325 put_lseg(wdata->lseg);
285 nfs_commitdata_release(wdata); 326 nfs_commitdata_release(wdata);
286} 327}
287 328
288struct rpc_call_ops filelayout_read_call_ops = { 329static const struct rpc_call_ops filelayout_read_call_ops = {
289 .rpc_call_prepare = filelayout_read_prepare, 330 .rpc_call_prepare = filelayout_read_prepare,
290 .rpc_call_done = filelayout_read_call_done, 331 .rpc_call_done = filelayout_read_call_done,
332 .rpc_count_stats = filelayout_read_count_stats,
291 .rpc_release = filelayout_read_release, 333 .rpc_release = filelayout_read_release,
292}; 334};
293 335
294struct rpc_call_ops filelayout_write_call_ops = { 336static const struct rpc_call_ops filelayout_write_call_ops = {
295 .rpc_call_prepare = filelayout_write_prepare, 337 .rpc_call_prepare = filelayout_write_prepare,
296 .rpc_call_done = filelayout_write_call_done, 338 .rpc_call_done = filelayout_write_call_done,
339 .rpc_count_stats = filelayout_write_count_stats,
297 .rpc_release = filelayout_write_release, 340 .rpc_release = filelayout_write_release,
298}; 341};
299 342
300struct rpc_call_ops filelayout_commit_call_ops = { 343static const struct rpc_call_ops filelayout_commit_call_ops = {
301 .rpc_call_prepare = filelayout_write_prepare, 344 .rpc_call_prepare = filelayout_write_prepare,
302 .rpc_call_done = filelayout_write_call_done, 345 .rpc_call_done = filelayout_write_call_done,
346 .rpc_count_stats = filelayout_write_count_stats,
303 .rpc_release = filelayout_commit_release, 347 .rpc_release = filelayout_commit_release,
304}; 348};
305 349
@@ -367,7 +411,8 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
367 idx = nfs4_fl_calc_ds_index(lseg, j); 411 idx = nfs4_fl_calc_ds_index(lseg, j);
368 ds = nfs4_fl_prepare_ds(lseg, idx); 412 ds = nfs4_fl_prepare_ds(lseg, idx);
369 if (!ds) { 413 if (!ds) {
370 printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); 414 printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
415 __func__);
371 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); 416 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
372 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); 417 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
373 return PNFS_NOT_ATTEMPTED; 418 return PNFS_NOT_ATTEMPTED;
@@ -575,7 +620,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
575 goto out_err_free; 620 goto out_err_free;
576 fl->fh_array[i]->size = be32_to_cpup(p++); 621 fl->fh_array[i]->size = be32_to_cpup(p++);
577 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { 622 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
578 printk(KERN_ERR "Too big fh %d received %d\n", 623 printk(KERN_ERR "NFS: Too big fh %d received %d\n",
579 i, fl->fh_array[i]->size); 624 i, fl->fh_array[i]->size);
580 goto out_err_free; 625 goto out_err_free;
581 } 626 }
@@ -640,14 +685,16 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
640 int size = (fl->stripe_type == STRIPE_SPARSE) ? 685 int size = (fl->stripe_type == STRIPE_SPARSE) ?
641 fl->dsaddr->ds_num : fl->dsaddr->stripe_count; 686 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
642 687
643 fl->commit_buckets = kcalloc(size, sizeof(struct list_head), gfp_flags); 688 fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags);
644 if (!fl->commit_buckets) { 689 if (!fl->commit_buckets) {
645 filelayout_free_lseg(&fl->generic_hdr); 690 filelayout_free_lseg(&fl->generic_hdr);
646 return NULL; 691 return NULL;
647 } 692 }
648 fl->number_of_buckets = size; 693 fl->number_of_buckets = size;
649 for (i = 0; i < size; i++) 694 for (i = 0; i < size; i++) {
650 INIT_LIST_HEAD(&fl->commit_buckets[i]); 695 INIT_LIST_HEAD(&fl->commit_buckets[i].written);
696 INIT_LIST_HEAD(&fl->commit_buckets[i].committing);
697 }
651 } 698 }
652 return &fl->generic_hdr; 699 return &fl->generic_hdr;
653} 700}
@@ -679,7 +726,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
679 return (p_stripe == r_stripe); 726 return (p_stripe == r_stripe);
680} 727}
681 728
682void 729static void
683filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, 730filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
684 struct nfs_page *req) 731 struct nfs_page *req)
685{ 732{
@@ -696,7 +743,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
696 nfs_pageio_reset_read_mds(pgio); 743 nfs_pageio_reset_read_mds(pgio);
697} 744}
698 745
699void 746static void
700filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, 747filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
701 struct nfs_page *req) 748 struct nfs_page *req)
702{ 749{
@@ -725,11 +772,6 @@ static const struct nfs_pageio_ops filelayout_pg_write_ops = {
725 .pg_doio = pnfs_generic_pg_writepages, 772 .pg_doio = pnfs_generic_pg_writepages,
726}; 773};
727 774
728static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
729{
730 return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
731}
732
733static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) 775static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
734{ 776{
735 if (fl->stripe_type == STRIPE_SPARSE) 777 if (fl->stripe_type == STRIPE_SPARSE)
@@ -738,13 +780,48 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
738 return j; 780 return j;
739} 781}
740 782
741struct list_head *filelayout_choose_commit_list(struct nfs_page *req) 783/* The generic layer is about to remove the req from the commit list.
784 * If this will make the bucket empty, it will need to put the lseg reference.
785 */
786static void
787filelayout_clear_request_commit(struct nfs_page *req)
788{
789 struct pnfs_layout_segment *freeme = NULL;
790 struct inode *inode = req->wb_context->dentry->d_inode;
791
792 spin_lock(&inode->i_lock);
793 if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
794 goto out;
795 if (list_is_singular(&req->wb_list)) {
796 struct pnfs_layout_segment *lseg;
797
798 /* From here we can find the bucket, but for the moment,
799 * since there is only one relevant lseg...
800 */
801 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
802 if (lseg->pls_range.iomode == IOMODE_RW) {
803 freeme = lseg;
804 break;
805 }
806 }
807 }
808out:
809 nfs_request_remove_commit_list(req);
810 spin_unlock(&inode->i_lock);
811 put_lseg(freeme);
812}
813
814static struct list_head *
815filelayout_choose_commit_list(struct nfs_page *req,
816 struct pnfs_layout_segment *lseg)
742{ 817{
743 struct pnfs_layout_segment *lseg = req->wb_commit_lseg;
744 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 818 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
745 u32 i, j; 819 u32 i, j;
746 struct list_head *list; 820 struct list_head *list;
747 821
822 if (fl->commit_through_mds)
823 return &NFS_I(req->wb_context->dentry->d_inode)->commit_list;
824
748 /* Note that we are calling nfs4_fl_calc_j_index on each page 825 /* Note that we are calling nfs4_fl_calc_j_index on each page
749 * that ends up being committed to a data server. An attractive 826 * that ends up being committed to a data server. An attractive
750 * alternative is to add a field to nfs_write_data and nfs_page 827 * alternative is to add a field to nfs_write_data and nfs_page
@@ -754,14 +831,30 @@ struct list_head *filelayout_choose_commit_list(struct nfs_page *req)
754 j = nfs4_fl_calc_j_index(lseg, 831 j = nfs4_fl_calc_j_index(lseg,
755 (loff_t)req->wb_index << PAGE_CACHE_SHIFT); 832 (loff_t)req->wb_index << PAGE_CACHE_SHIFT);
756 i = select_bucket_index(fl, j); 833 i = select_bucket_index(fl, j);
757 list = &fl->commit_buckets[i]; 834 list = &fl->commit_buckets[i].written;
758 if (list_empty(list)) { 835 if (list_empty(list)) {
759 /* Non-empty buckets hold a reference on the lseg */ 836 /* Non-empty buckets hold a reference on the lseg. That ref
837 * is normally transferred to the COMMIT call and released
838 * there. It could also be released if the last req is pulled
839 * off due to a rewrite, in which case it will be done in
840 * filelayout_remove_commit_req
841 */
760 get_lseg(lseg); 842 get_lseg(lseg);
761 } 843 }
844 set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
762 return list; 845 return list;
763} 846}
764 847
848static void
849filelayout_mark_request_commit(struct nfs_page *req,
850 struct pnfs_layout_segment *lseg)
851{
852 struct list_head *list;
853
854 list = filelayout_choose_commit_list(req, lseg);
855 nfs_request_add_commit_list(req, list);
856}
857
765static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) 858static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
766{ 859{
767 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); 860 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
@@ -797,11 +890,12 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
797 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); 890 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
798 ds = nfs4_fl_prepare_ds(lseg, idx); 891 ds = nfs4_fl_prepare_ds(lseg, idx);
799 if (!ds) { 892 if (!ds) {
800 printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); 893 printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
894 __func__);
801 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); 895 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
802 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); 896 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
803 prepare_to_resend_writes(data); 897 prepare_to_resend_writes(data);
804 data->mds_ops->rpc_release(data); 898 filelayout_commit_release(data);
805 return -EAGAIN; 899 return -EAGAIN;
806 } 900 }
807 dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how); 901 dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how);
@@ -817,24 +911,87 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
817/* 911/*
818 * This is only useful while we are using whole file layouts. 912 * This is only useful while we are using whole file layouts.
819 */ 913 */
820static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode) 914static struct pnfs_layout_segment *
915find_only_write_lseg_locked(struct inode *inode)
821{ 916{
822 struct pnfs_layout_segment *lseg, *rv = NULL; 917 struct pnfs_layout_segment *lseg;
823 918
824 spin_lock(&inode->i_lock);
825 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) 919 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
826 if (lseg->pls_range.iomode == IOMODE_RW) 920 if (lseg->pls_range.iomode == IOMODE_RW)
827 rv = get_lseg(lseg); 921 return lseg;
922 return NULL;
923}
924
925static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
926{
927 struct pnfs_layout_segment *rv;
928
929 spin_lock(&inode->i_lock);
930 rv = find_only_write_lseg_locked(inode);
931 if (rv)
932 get_lseg(rv);
828 spin_unlock(&inode->i_lock); 933 spin_unlock(&inode->i_lock);
829 return rv; 934 return rv;
830} 935}
831 936
832static int alloc_ds_commits(struct inode *inode, struct list_head *list) 937static int
938filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
939 spinlock_t *lock)
940{
941 struct list_head *src = &bucket->written;
942 struct list_head *dst = &bucket->committing;
943 struct nfs_page *req, *tmp;
944 int ret = 0;
945
946 list_for_each_entry_safe(req, tmp, src, wb_list) {
947 if (!nfs_lock_request(req))
948 continue;
949 if (cond_resched_lock(lock))
950 list_safe_reset_next(req, tmp, wb_list);
951 nfs_request_remove_commit_list(req);
952 clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
953 nfs_list_add_request(req, dst);
954 ret++;
955 if (ret == max)
956 break;
957 }
958 return ret;
959}
960
961/* Move reqs from written to committing lists, returning count of number moved.
962 * Note called with i_lock held.
963 */
964static int filelayout_scan_commit_lists(struct inode *inode, int max,
965 spinlock_t *lock)
966{
967 struct pnfs_layout_segment *lseg;
968 struct nfs4_filelayout_segment *fl;
969 int i, rv = 0, cnt;
970
971 lseg = find_only_write_lseg_locked(inode);
972 if (!lseg)
973 goto out_done;
974 fl = FILELAYOUT_LSEG(lseg);
975 if (fl->commit_through_mds)
976 goto out_done;
977 for (i = 0; i < fl->number_of_buckets && max != 0; i++) {
978 cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i],
979 max, lock);
980 max -= cnt;
981 rv += cnt;
982 }
983out_done:
984 return rv;
985}
986
987static unsigned int
988alloc_ds_commits(struct inode *inode, struct list_head *list)
833{ 989{
834 struct pnfs_layout_segment *lseg; 990 struct pnfs_layout_segment *lseg;
835 struct nfs4_filelayout_segment *fl; 991 struct nfs4_filelayout_segment *fl;
836 struct nfs_write_data *data; 992 struct nfs_write_data *data;
837 int i, j; 993 int i, j;
994 unsigned int nreq = 0;
838 995
839 /* Won't need this when non-whole file layout segments are supported 996 /* Won't need this when non-whole file layout segments are supported
840 * instead we will use a pnfs_layout_hdr structure */ 997 * instead we will use a pnfs_layout_hdr structure */
@@ -843,28 +1000,27 @@ static int alloc_ds_commits(struct inode *inode, struct list_head *list)
843 return 0; 1000 return 0;
844 fl = FILELAYOUT_LSEG(lseg); 1001 fl = FILELAYOUT_LSEG(lseg);
845 for (i = 0; i < fl->number_of_buckets; i++) { 1002 for (i = 0; i < fl->number_of_buckets; i++) {
846 if (list_empty(&fl->commit_buckets[i])) 1003 if (list_empty(&fl->commit_buckets[i].committing))
847 continue; 1004 continue;
848 data = nfs_commitdata_alloc(); 1005 data = nfs_commitdata_alloc();
849 if (!data) 1006 if (!data)
850 goto out_bad; 1007 break;
851 data->ds_commit_index = i; 1008 data->ds_commit_index = i;
852 data->lseg = lseg; 1009 data->lseg = lseg;
853 list_add(&data->pages, list); 1010 list_add(&data->pages, list);
1011 nreq++;
854 } 1012 }
855 put_lseg(lseg);
856 return 0;
857 1013
858out_bad: 1014 /* Clean up on error */
859 for (j = i; j < fl->number_of_buckets; j++) { 1015 for (j = i; j < fl->number_of_buckets; j++) {
860 if (list_empty(&fl->commit_buckets[i])) 1016 if (list_empty(&fl->commit_buckets[i].committing))
861 continue; 1017 continue;
862 nfs_retry_commit(&fl->commit_buckets[i], lseg); 1018 nfs_retry_commit(&fl->commit_buckets[i].committing, lseg);
863 put_lseg(lseg); /* associated with emptying bucket */ 1019 put_lseg(lseg); /* associated with emptying bucket */
864 } 1020 }
865 put_lseg(lseg); 1021 put_lseg(lseg);
866 /* Caller will clean up entries put on list */ 1022 /* Caller will clean up entries put on list */
867 return -ENOMEM; 1023 return nreq;
868} 1024}
869 1025
870/* This follows nfs_commit_list pretty closely */ 1026/* This follows nfs_commit_list pretty closely */
@@ -874,40 +1030,40 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
874{ 1030{
875 struct nfs_write_data *data, *tmp; 1031 struct nfs_write_data *data, *tmp;
876 LIST_HEAD(list); 1032 LIST_HEAD(list);
1033 unsigned int nreq = 0;
877 1034
878 if (!list_empty(mds_pages)) { 1035 if (!list_empty(mds_pages)) {
879 data = nfs_commitdata_alloc(); 1036 data = nfs_commitdata_alloc();
880 if (!data) 1037 if (data != NULL) {
881 goto out_bad; 1038 data->lseg = NULL;
882 data->lseg = NULL; 1039 list_add(&data->pages, &list);
883 list_add(&data->pages, &list); 1040 nreq++;
1041 } else
1042 nfs_retry_commit(mds_pages, NULL);
884 } 1043 }
885 1044
886 if (alloc_ds_commits(inode, &list)) 1045 nreq += alloc_ds_commits(inode, &list);
887 goto out_bad; 1046
1047 if (nreq == 0) {
1048 nfs_commit_clear_lock(NFS_I(inode));
1049 goto out;
1050 }
1051
1052 atomic_add(nreq, &NFS_I(inode)->commits_outstanding);
888 1053
889 list_for_each_entry_safe(data, tmp, &list, pages) { 1054 list_for_each_entry_safe(data, tmp, &list, pages) {
890 list_del_init(&data->pages); 1055 list_del_init(&data->pages);
891 atomic_inc(&NFS_I(inode)->commits_outstanding);
892 if (!data->lseg) { 1056 if (!data->lseg) {
893 nfs_init_commit(data, mds_pages, NULL); 1057 nfs_init_commit(data, mds_pages, NULL);
894 nfs_initiate_commit(data, NFS_CLIENT(inode), 1058 nfs_initiate_commit(data, NFS_CLIENT(inode),
895 data->mds_ops, how); 1059 data->mds_ops, how);
896 } else { 1060 } else {
897 nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index], data->lseg); 1061 nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg);
898 filelayout_initiate_commit(data, how); 1062 filelayout_initiate_commit(data, how);
899 } 1063 }
900 } 1064 }
901 return 0; 1065out:
902 out_bad: 1066 return PNFS_ATTEMPTED;
903 list_for_each_entry_safe(data, tmp, &list, pages) {
904 nfs_retry_commit(&data->pages, data->lseg);
905 list_del_init(&data->pages);
906 nfs_commit_free(data);
907 }
908 nfs_retry_commit(mds_pages, NULL);
909 nfs_commit_clear_lock(NFS_I(inode));
910 return -ENOMEM;
911} 1067}
912 1068
913static void 1069static void
@@ -924,8 +1080,9 @@ static struct pnfs_layoutdriver_type filelayout_type = {
924 .free_lseg = filelayout_free_lseg, 1080 .free_lseg = filelayout_free_lseg,
925 .pg_read_ops = &filelayout_pg_read_ops, 1081 .pg_read_ops = &filelayout_pg_read_ops,
926 .pg_write_ops = &filelayout_pg_write_ops, 1082 .pg_write_ops = &filelayout_pg_write_ops,
927 .mark_pnfs_commit = filelayout_mark_pnfs_commit, 1083 .mark_request_commit = filelayout_mark_request_commit,
928 .choose_commit_list = filelayout_choose_commit_list, 1084 .clear_request_commit = filelayout_clear_request_commit,
1085 .scan_commit_lists = filelayout_scan_commit_lists,
929 .commit_pagelist = filelayout_commit_pagelist, 1086 .commit_pagelist = filelayout_commit_pagelist,
930 .read_pagelist = filelayout_read_pagelist, 1087 .read_pagelist = filelayout_read_pagelist,
931 .write_pagelist = filelayout_write_pagelist, 1088 .write_pagelist = filelayout_write_pagelist,
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 2e42284253f..21190bb1f5e 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -74,6 +74,11 @@ struct nfs4_file_layout_dsaddr {
74 struct nfs4_pnfs_ds *ds_list[1]; 74 struct nfs4_pnfs_ds *ds_list[1];
75}; 75};
76 76
77struct nfs4_fl_commit_bucket {
78 struct list_head written;
79 struct list_head committing;
80};
81
77struct nfs4_filelayout_segment { 82struct nfs4_filelayout_segment {
78 struct pnfs_layout_segment generic_hdr; 83 struct pnfs_layout_segment generic_hdr;
79 u32 stripe_type; 84 u32 stripe_type;
@@ -84,7 +89,7 @@ struct nfs4_filelayout_segment {
84 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ 89 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
85 unsigned int num_fh; 90 unsigned int num_fh;
86 struct nfs_fh **fh_array; 91 struct nfs_fh **fh_array;
87 struct list_head *commit_buckets; /* Sort commits to ds */ 92 struct nfs4_fl_commit_bucket *commit_buckets; /* Sort commits to ds */
88 int number_of_buckets; 93 int number_of_buckets;
89}; 94};
90 95
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 8ae91908f5a..c9cff9adb2d 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -45,7 +45,7 @@
45 * - incremented when a device id maps a data server already in the cache. 45 * - incremented when a device id maps a data server already in the cache.
46 * - decremented when deviceid is removed from the cache. 46 * - decremented when deviceid is removed from the cache.
47 */ 47 */
48DEFINE_SPINLOCK(nfs4_ds_cache_lock); 48static DEFINE_SPINLOCK(nfs4_ds_cache_lock);
49static LIST_HEAD(nfs4_data_server_cache); 49static LIST_HEAD(nfs4_data_server_cache);
50 50
51/* Debug routines */ 51/* Debug routines */
@@ -108,58 +108,40 @@ same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
108 return false; 108 return false;
109} 109}
110 110
111/* 111static bool
112 * Lookup DS by addresses. The first matching address returns true. 112_same_data_server_addrs_locked(const struct list_head *dsaddrs1,
113 * nfs4_ds_cache_lock is held 113 const struct list_head *dsaddrs2)
114 */
115static struct nfs4_pnfs_ds *
116_data_server_lookup_locked(struct list_head *dsaddrs)
117{ 114{
118 struct nfs4_pnfs_ds *ds;
119 struct nfs4_pnfs_ds_addr *da1, *da2; 115 struct nfs4_pnfs_ds_addr *da1, *da2;
120 116
121 list_for_each_entry(da1, dsaddrs, da_node) { 117 /* step through both lists, comparing as we go */
122 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { 118 for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node),
123 list_for_each_entry(da2, &ds->ds_addrs, da_node) { 119 da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node);
124 if (same_sockaddr( 120 da1 != NULL && da2 != NULL;
125 (struct sockaddr *)&da1->da_addr, 121 da1 = list_entry(da1->da_node.next, typeof(*da1), da_node),
126 (struct sockaddr *)&da2->da_addr)) 122 da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) {
127 return ds; 123 if (!same_sockaddr((struct sockaddr *)&da1->da_addr,
128 } 124 (struct sockaddr *)&da2->da_addr))
129 } 125 return false;
130 } 126 }
131 return NULL; 127 if (da1 == NULL && da2 == NULL)
128 return true;
129
130 return false;
132} 131}
133 132
134/* 133/*
135 * Compare two lists of addresses. 134 * Lookup DS by addresses. nfs4_ds_cache_lock is held
136 */ 135 */
137static bool 136static struct nfs4_pnfs_ds *
138_data_server_match_all_addrs_locked(struct list_head *dsaddrs1, 137_data_server_lookup_locked(const struct list_head *dsaddrs)
139 struct list_head *dsaddrs2)
140{ 138{
141 struct nfs4_pnfs_ds_addr *da1, *da2; 139 struct nfs4_pnfs_ds *ds;
142 size_t count1 = 0,
143 count2 = 0;
144
145 list_for_each_entry(da1, dsaddrs1, da_node)
146 count1++;
147
148 list_for_each_entry(da2, dsaddrs2, da_node) {
149 bool found = false;
150 count2++;
151 list_for_each_entry(da1, dsaddrs1, da_node) {
152 if (same_sockaddr((struct sockaddr *)&da1->da_addr,
153 (struct sockaddr *)&da2->da_addr)) {
154 found = true;
155 break;
156 }
157 }
158 if (!found)
159 return false;
160 }
161 140
162 return (count1 == count2); 141 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
142 if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
143 return ds;
144 return NULL;
163} 145}
164 146
165/* 147/*
@@ -356,11 +338,6 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
356 dprintk("%s add new data server %s\n", __func__, 338 dprintk("%s add new data server %s\n", __func__,
357 ds->ds_remotestr); 339 ds->ds_remotestr);
358 } else { 340 } else {
359 if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs,
360 dsaddrs)) {
361 dprintk("%s: multipath address mismatch: %s != %s",
362 __func__, tmp_ds->ds_remotestr, remotestr);
363 }
364 kfree(remotestr); 341 kfree(remotestr);
365 kfree(ds); 342 kfree(ds);
366 atomic_inc(&tmp_ds->ds_count); 343 atomic_inc(&tmp_ds->ds_count);
@@ -378,7 +355,7 @@ out:
378 * Currently only supports ipv4, ipv6 and one multi-path address. 355 * Currently only supports ipv4, ipv6 and one multi-path address.
379 */ 356 */
380static struct nfs4_pnfs_ds_addr * 357static struct nfs4_pnfs_ds_addr *
381decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags) 358decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags)
382{ 359{
383 struct nfs4_pnfs_ds_addr *da = NULL; 360 struct nfs4_pnfs_ds_addr *da = NULL;
384 char *buf, *portstr; 361 char *buf, *portstr;
@@ -457,7 +434,7 @@ decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags)
457 434
458 INIT_LIST_HEAD(&da->da_node); 435 INIT_LIST_HEAD(&da->da_node);
459 436
460 if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr, 437 if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr,
461 sizeof(da->da_addr))) { 438 sizeof(da->da_addr))) {
462 dprintk("%s: error parsing address %s\n", __func__, buf); 439 dprintk("%s: error parsing address %s\n", __func__, buf);
463 goto out_free_da; 440 goto out_free_da;
@@ -554,7 +531,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
554 cnt = be32_to_cpup(p); 531 cnt = be32_to_cpup(p);
555 dprintk("%s stripe count %d\n", __func__, cnt); 532 dprintk("%s stripe count %d\n", __func__, cnt);
556 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { 533 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
557 printk(KERN_WARNING "%s: stripe count %d greater than " 534 printk(KERN_WARNING "NFS: %s: stripe count %d greater than "
558 "supported maximum %d\n", __func__, 535 "supported maximum %d\n", __func__,
559 cnt, NFS4_PNFS_MAX_STRIPE_CNT); 536 cnt, NFS4_PNFS_MAX_STRIPE_CNT);
560 goto out_err_free_scratch; 537 goto out_err_free_scratch;
@@ -585,7 +562,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
585 num = be32_to_cpup(p); 562 num = be32_to_cpup(p);
586 dprintk("%s ds_num %u\n", __func__, num); 563 dprintk("%s ds_num %u\n", __func__, num);
587 if (num > NFS4_PNFS_MAX_MULTI_CNT) { 564 if (num > NFS4_PNFS_MAX_MULTI_CNT) {
588 printk(KERN_WARNING "%s: multipath count %d greater than " 565 printk(KERN_WARNING "NFS: %s: multipath count %d greater than "
589 "supported maximum %d\n", __func__, 566 "supported maximum %d\n", __func__,
590 num, NFS4_PNFS_MAX_MULTI_CNT); 567 num, NFS4_PNFS_MAX_MULTI_CNT);
591 goto out_err_free_stripe_indices; 568 goto out_err_free_stripe_indices;
@@ -593,7 +570,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
593 570
594 /* validate stripe indices are all < num */ 571 /* validate stripe indices are all < num */
595 if (max_stripe_index >= num) { 572 if (max_stripe_index >= num) {
596 printk(KERN_WARNING "%s: stripe index %u >= num ds %u\n", 573 printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n",
597 __func__, max_stripe_index, num); 574 __func__, max_stripe_index, num);
598 goto out_err_free_stripe_indices; 575 goto out_err_free_stripe_indices;
599 } 576 }
@@ -625,7 +602,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
625 602
626 mp_count = be32_to_cpup(p); /* multipath count */ 603 mp_count = be32_to_cpup(p); /* multipath count */
627 for (j = 0; j < mp_count; j++) { 604 for (j = 0; j < mp_count; j++) {
628 da = decode_ds_addr(&stream, gfp_flags); 605 da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->net,
606 &stream, gfp_flags);
629 if (da) 607 if (da)
630 list_add_tail(&da->da_node, &dsaddrs); 608 list_add_tail(&da->da_node, &dsaddrs);
631 } 609 }
@@ -686,7 +664,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl
686 664
687 new = decode_device(inode, dev, gfp_flags); 665 new = decode_device(inode, dev, gfp_flags);
688 if (!new) { 666 if (!new) {
689 printk(KERN_WARNING "%s: Could not decode or add device\n", 667 printk(KERN_WARNING "NFS: %s: Could not decode or add device\n",
690 __func__); 668 __func__);
691 return NULL; 669 return NULL;
692 } 670 }
@@ -721,7 +699,7 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_fla
721 * GETDEVICEINFO's maxcount 699 * GETDEVICEINFO's maxcount
722 */ 700 */
723 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; 701 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
724 max_pages = max_resp_sz >> PAGE_SHIFT; 702 max_pages = nfs_page_array_len(0, max_resp_sz);
725 dprintk("%s inode %p max_resp_sz %u max_pages %d\n", 703 dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
726 __func__, inode, max_resp_sz, max_pages); 704 __func__, inode, max_resp_sz, max_pages);
727 705
@@ -835,7 +813,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
835 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; 813 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
836 814
837 if (ds == NULL) { 815 if (ds == NULL) {
838 printk(KERN_ERR "%s: No data server for offset index %d\n", 816 printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
839 __func__, ds_idx); 817 __func__, ds_idx);
840 return NULL; 818 return NULL;
841 } 819 }
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index bb80c49b653..a7f3dedc4ec 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -52,6 +52,30 @@ Elong:
52} 52}
53 53
54/* 54/*
55 * return the path component of "<server>:<path>"
56 * nfspath - the "<server>:<path>" string
57 * end - one past the last char that could contain "<server>:"
58 * returns NULL on failure
59 */
60static char *nfs_path_component(const char *nfspath, const char *end)
61{
62 char *p;
63
64 if (*nfspath == '[') {
65 /* parse [] escaped IPv6 addrs */
66 p = strchr(nfspath, ']');
67 if (p != NULL && ++p < end && *p == ':')
68 return p + 1;
69 } else {
70 /* otherwise split on first colon */
71 p = strchr(nfspath, ':');
72 if (p != NULL && p < end)
73 return p + 1;
74 }
75 return NULL;
76}
77
78/*
55 * Determine the mount path as a string 79 * Determine the mount path as a string
56 */ 80 */
57static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen) 81static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen)
@@ -59,9 +83,9 @@ static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen)
59 char *limit; 83 char *limit;
60 char *path = nfs_path(&limit, dentry, buffer, buflen); 84 char *path = nfs_path(&limit, dentry, buffer, buflen);
61 if (!IS_ERR(path)) { 85 if (!IS_ERR(path)) {
62 char *colon = strchr(path, ':'); 86 char *path_component = nfs_path_component(path, limit);
63 if (colon && colon < limit) 87 if (path_component)
64 path = colon + 1; 88 return path_component;
65 } 89 }
66 return path; 90 return path;
67} 91}
@@ -94,19 +118,72 @@ static int nfs4_validate_fspath(struct dentry *dentry,
94} 118}
95 119
96static size_t nfs_parse_server_name(char *string, size_t len, 120static size_t nfs_parse_server_name(char *string, size_t len,
97 struct sockaddr *sa, size_t salen) 121 struct sockaddr *sa, size_t salen, struct nfs_server *server)
98{ 122{
123 struct net *net = rpc_net_ns(server->client);
99 ssize_t ret; 124 ssize_t ret;
100 125
101 ret = rpc_pton(string, len, sa, salen); 126 ret = rpc_pton(net, string, len, sa, salen);
102 if (ret == 0) { 127 if (ret == 0) {
103 ret = nfs_dns_resolve_name(string, len, sa, salen); 128 ret = nfs_dns_resolve_name(net, string, len, sa, salen);
104 if (ret < 0) 129 if (ret < 0)
105 ret = 0; 130 ret = 0;
106 } 131 }
107 return ret; 132 return ret;
108} 133}
109 134
135static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name)
136{
137 struct page *page;
138 struct nfs4_secinfo_flavors *flavors;
139 rpc_authflavor_t flavor;
140 int err;
141
142 page = alloc_page(GFP_KERNEL);
143 if (!page)
144 return -ENOMEM;
145 flavors = page_address(page);
146
147 err = nfs4_proc_secinfo(inode, name, flavors);
148 if (err < 0) {
149 flavor = err;
150 goto out;
151 }
152
153 flavor = nfs_find_best_sec(flavors);
154
155out:
156 put_page(page);
157 return flavor;
158}
159
160/*
161 * Please call rpc_shutdown_client() when you are done with this client.
162 */
163struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *inode,
164 struct qstr *name)
165{
166 struct rpc_clnt *clone;
167 struct rpc_auth *auth;
168 rpc_authflavor_t flavor;
169
170 flavor = nfs4_negotiate_security(inode, name);
171 if (flavor < 0)
172 return ERR_PTR(flavor);
173
174 clone = rpc_clone_client(clnt);
175 if (IS_ERR(clone))
176 return clone;
177
178 auth = rpcauth_create(flavor, clone);
179 if (!auth) {
180 rpc_shutdown_client(clone);
181 clone = ERR_PTR(-EIO);
182 }
183
184 return clone;
185}
186
110static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, 187static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
111 char *page, char *page2, 188 char *page, char *page2,
112 const struct nfs4_fs_location *location) 189 const struct nfs4_fs_location *location)
@@ -137,7 +214,8 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
137 continue; 214 continue;
138 215
139 mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len, 216 mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len,
140 mountdata->addr, addr_bufsize); 217 mountdata->addr, addr_bufsize,
218 NFS_SB(mountdata->sb));
141 if (mountdata->addrlen == 0) 219 if (mountdata->addrlen == 0)
142 continue; 220 continue;
143 221
@@ -222,7 +300,7 @@ out:
222 * @dentry - dentry of referral 300 * @dentry - dentry of referral
223 * 301 *
224 */ 302 */
225struct vfsmount *nfs_do_refmount(struct dentry *dentry) 303struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
226{ 304{
227 struct vfsmount *mnt = ERR_PTR(-ENOMEM); 305 struct vfsmount *mnt = ERR_PTR(-ENOMEM);
228 struct dentry *parent; 306 struct dentry *parent;
@@ -248,7 +326,7 @@ struct vfsmount *nfs_do_refmount(struct dentry *dentry)
248 dprintk("%s: getting locations for %s/%s\n", 326 dprintk("%s: getting locations for %s/%s\n",
249 __func__, parent->d_name.name, dentry->d_name.name); 327 __func__, parent->d_name.name, dentry->d_name.name);
250 328
251 err = nfs4_proc_fs_locations(parent->d_inode, &dentry->d_name, fs_locations, page); 329 err = nfs4_proc_fs_locations(client, parent->d_inode, &dentry->d_name, fs_locations, page);
252 dput(parent); 330 dput(parent);
253 if (err != 0 || 331 if (err != 0 ||
254 fs_locations->nlocations <= 0 || 332 fs_locations->nlocations <= 0 ||
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ec9f6ef6c5d..99650aaf893 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -72,18 +72,21 @@
72 72
73#define NFS4_MAX_LOOP_ON_RECOVER (10) 73#define NFS4_MAX_LOOP_ON_RECOVER (10)
74 74
75static unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE;
76
75struct nfs4_opendata; 77struct nfs4_opendata;
76static int _nfs4_proc_open(struct nfs4_opendata *data); 78static int _nfs4_proc_open(struct nfs4_opendata *data);
77static int _nfs4_recover_proc_open(struct nfs4_opendata *data); 79static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
78static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 80static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
79static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); 81static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
82static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
80static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 83static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
81static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, 84static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
82 struct nfs_fattr *fattr, struct iattr *sattr, 85 struct nfs_fattr *fattr, struct iattr *sattr,
83 struct nfs4_state *state); 86 struct nfs4_state *state);
84#ifdef CONFIG_NFS_V4_1 87#ifdef CONFIG_NFS_V4_1
85static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *); 88static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *);
86static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *); 89static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *);
87#endif 90#endif
88/* Prevent leaks of NFSv4 errors into userland */ 91/* Prevent leaks of NFSv4 errors into userland */
89static int nfs4_map_errors(int err) 92static int nfs4_map_errors(int err)
@@ -193,7 +196,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
193 * when talking to the server, we always send cookie 0 196 * when talking to the server, we always send cookie 0
194 * instead of 1 or 2. 197 * instead of 1 or 2.
195 */ 198 */
196 start = p = kmap_atomic(*readdir->pages, KM_USER0); 199 start = p = kmap_atomic(*readdir->pages);
197 200
198 if (cookie == 0) { 201 if (cookie == 0) {
199 *p++ = xdr_one; /* next */ 202 *p++ = xdr_one; /* next */
@@ -221,7 +224,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
221 224
222 readdir->pgbase = (char *)p - (char *)start; 225 readdir->pgbase = (char *)p - (char *)start;
223 readdir->count -= readdir->pgbase; 226 readdir->count -= readdir->pgbase;
224 kunmap_atomic(start, KM_USER0); 227 kunmap_atomic(start);
225} 228}
226 229
227static int nfs4_wait_clnt_recover(struct nfs_client *clp) 230static int nfs4_wait_clnt_recover(struct nfs_client *clp)
@@ -259,17 +262,29 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
259{ 262{
260 struct nfs_client *clp = server->nfs_client; 263 struct nfs_client *clp = server->nfs_client;
261 struct nfs4_state *state = exception->state; 264 struct nfs4_state *state = exception->state;
265 struct inode *inode = exception->inode;
262 int ret = errorcode; 266 int ret = errorcode;
263 267
264 exception->retry = 0; 268 exception->retry = 0;
265 switch(errorcode) { 269 switch(errorcode) {
266 case 0: 270 case 0:
267 return 0; 271 return 0;
272 case -NFS4ERR_OPENMODE:
273 if (inode && nfs_have_delegation(inode, FMODE_READ)) {
274 nfs_inode_return_delegation(inode);
275 exception->retry = 1;
276 return 0;
277 }
278 if (state == NULL)
279 break;
280 nfs4_schedule_stateid_recovery(server, state);
281 goto wait_on_recovery;
282 case -NFS4ERR_DELEG_REVOKED:
268 case -NFS4ERR_ADMIN_REVOKED: 283 case -NFS4ERR_ADMIN_REVOKED:
269 case -NFS4ERR_BAD_STATEID: 284 case -NFS4ERR_BAD_STATEID:
270 case -NFS4ERR_OPENMODE:
271 if (state == NULL) 285 if (state == NULL)
272 break; 286 break;
287 nfs_remove_bad_delegation(state->inode);
273 nfs4_schedule_stateid_recovery(server, state); 288 nfs4_schedule_stateid_recovery(server, state);
274 goto wait_on_recovery; 289 goto wait_on_recovery;
275 case -NFS4ERR_EXPIRED: 290 case -NFS4ERR_EXPIRED:
@@ -360,16 +375,14 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp
360 * When updating highest_used_slotid there may be "holes" in the bitmap 375 * When updating highest_used_slotid there may be "holes" in the bitmap
361 * so we need to scan down from highest_used_slotid to 0 looking for the now 376 * so we need to scan down from highest_used_slotid to 0 looking for the now
362 * highest slotid in use. 377 * highest slotid in use.
363 * If none found, highest_used_slotid is set to -1. 378 * If none found, highest_used_slotid is set to NFS4_NO_SLOT.
364 * 379 *
365 * Must be called while holding tbl->slot_tbl_lock 380 * Must be called while holding tbl->slot_tbl_lock
366 */ 381 */
367static void 382static void
368nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) 383nfs4_free_slot(struct nfs4_slot_table *tbl, u32 slotid)
369{ 384{
370 int slotid = free_slotid; 385 BUG_ON(slotid >= NFS4_MAX_SLOT_TABLE);
371
372 BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE);
373 /* clear used bit in bitmap */ 386 /* clear used bit in bitmap */
374 __clear_bit(slotid, tbl->used_slots); 387 __clear_bit(slotid, tbl->used_slots);
375 388
@@ -379,10 +392,16 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid)
379 if (slotid < tbl->max_slots) 392 if (slotid < tbl->max_slots)
380 tbl->highest_used_slotid = slotid; 393 tbl->highest_used_slotid = slotid;
381 else 394 else
382 tbl->highest_used_slotid = -1; 395 tbl->highest_used_slotid = NFS4_NO_SLOT;
383 } 396 }
384 dprintk("%s: free_slotid %u highest_used_slotid %d\n", __func__, 397 dprintk("%s: slotid %u highest_used_slotid %d\n", __func__,
385 free_slotid, tbl->highest_used_slotid); 398 slotid, tbl->highest_used_slotid);
399}
400
401bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy)
402{
403 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
404 return true;
386} 405}
387 406
388/* 407/*
@@ -390,16 +409,13 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid)
390 */ 409 */
391static void nfs4_check_drain_fc_complete(struct nfs4_session *ses) 410static void nfs4_check_drain_fc_complete(struct nfs4_session *ses)
392{ 411{
393 struct rpc_task *task;
394
395 if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { 412 if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
396 task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq); 413 rpc_wake_up_first(&ses->fc_slot_table.slot_tbl_waitq,
397 if (task) 414 nfs4_set_task_privileged, NULL);
398 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
399 return; 415 return;
400 } 416 }
401 417
402 if (ses->fc_slot_table.highest_used_slotid != -1) 418 if (ses->fc_slot_table.highest_used_slotid != NFS4_NO_SLOT)
403 return; 419 return;
404 420
405 dprintk("%s COMPLETE: Session Fore Channel Drained\n", __func__); 421 dprintk("%s COMPLETE: Session Fore Channel Drained\n", __func__);
@@ -412,7 +428,7 @@ static void nfs4_check_drain_fc_complete(struct nfs4_session *ses)
412void nfs4_check_drain_bc_complete(struct nfs4_session *ses) 428void nfs4_check_drain_bc_complete(struct nfs4_session *ses)
413{ 429{
414 if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state) || 430 if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state) ||
415 ses->bc_slot_table.highest_used_slotid != -1) 431 ses->bc_slot_table.highest_used_slotid != NFS4_NO_SLOT)
416 return; 432 return;
417 dprintk("%s COMPLETE: Session Back Channel Drained\n", __func__); 433 dprintk("%s COMPLETE: Session Back Channel Drained\n", __func__);
418 complete(&ses->bc_slot_table.complete); 434 complete(&ses->bc_slot_table.complete);
@@ -507,25 +523,25 @@ static int nfs4_sequence_done(struct rpc_task *task,
507 * nfs4_find_slot looks for an unset bit in the used_slots bitmap. 523 * nfs4_find_slot looks for an unset bit in the used_slots bitmap.
508 * If found, we mark the slot as used, update the highest_used_slotid, 524 * If found, we mark the slot as used, update the highest_used_slotid,
509 * and respectively set up the sequence operation args. 525 * and respectively set up the sequence operation args.
510 * The slot number is returned if found, or NFS4_MAX_SLOT_TABLE otherwise. 526 * The slot number is returned if found, or NFS4_NO_SLOT otherwise.
511 * 527 *
512 * Note: must be called with under the slot_tbl_lock. 528 * Note: must be called with under the slot_tbl_lock.
513 */ 529 */
514static u8 530static u32
515nfs4_find_slot(struct nfs4_slot_table *tbl) 531nfs4_find_slot(struct nfs4_slot_table *tbl)
516{ 532{
517 int slotid; 533 u32 slotid;
518 u8 ret_id = NFS4_MAX_SLOT_TABLE; 534 u32 ret_id = NFS4_NO_SLOT;
519 BUILD_BUG_ON((u8)NFS4_MAX_SLOT_TABLE != (int)NFS4_MAX_SLOT_TABLE);
520 535
521 dprintk("--> %s used_slots=%04lx highest_used=%d max_slots=%d\n", 536 dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n",
522 __func__, tbl->used_slots[0], tbl->highest_used_slotid, 537 __func__, tbl->used_slots[0], tbl->highest_used_slotid,
523 tbl->max_slots); 538 tbl->max_slots);
524 slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots); 539 slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots);
525 if (slotid >= tbl->max_slots) 540 if (slotid >= tbl->max_slots)
526 goto out; 541 goto out;
527 __set_bit(slotid, tbl->used_slots); 542 __set_bit(slotid, tbl->used_slots);
528 if (slotid > tbl->highest_used_slotid) 543 if (slotid > tbl->highest_used_slotid ||
544 tbl->highest_used_slotid == NFS4_NO_SLOT)
529 tbl->highest_used_slotid = slotid; 545 tbl->highest_used_slotid = slotid;
530 ret_id = slotid; 546 ret_id = slotid;
531out: 547out:
@@ -534,15 +550,25 @@ out:
534 return ret_id; 550 return ret_id;
535} 551}
536 552
553static void nfs41_init_sequence(struct nfs4_sequence_args *args,
554 struct nfs4_sequence_res *res, int cache_reply)
555{
556 args->sa_session = NULL;
557 args->sa_cache_this = 0;
558 if (cache_reply)
559 args->sa_cache_this = 1;
560 res->sr_session = NULL;
561 res->sr_slot = NULL;
562}
563
537int nfs41_setup_sequence(struct nfs4_session *session, 564int nfs41_setup_sequence(struct nfs4_session *session,
538 struct nfs4_sequence_args *args, 565 struct nfs4_sequence_args *args,
539 struct nfs4_sequence_res *res, 566 struct nfs4_sequence_res *res,
540 int cache_reply,
541 struct rpc_task *task) 567 struct rpc_task *task)
542{ 568{
543 struct nfs4_slot *slot; 569 struct nfs4_slot *slot;
544 struct nfs4_slot_table *tbl; 570 struct nfs4_slot_table *tbl;
545 u8 slotid; 571 u32 slotid;
546 572
547 dprintk("--> %s\n", __func__); 573 dprintk("--> %s\n", __func__);
548 /* slot already allocated? */ 574 /* slot already allocated? */
@@ -570,7 +596,7 @@ int nfs41_setup_sequence(struct nfs4_session *session,
570 } 596 }
571 597
572 slotid = nfs4_find_slot(tbl); 598 slotid = nfs4_find_slot(tbl);
573 if (slotid == NFS4_MAX_SLOT_TABLE) { 599 if (slotid == NFS4_NO_SLOT) {
574 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); 600 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
575 spin_unlock(&tbl->slot_tbl_lock); 601 spin_unlock(&tbl->slot_tbl_lock);
576 dprintk("<-- %s: no free slots\n", __func__); 602 dprintk("<-- %s: no free slots\n", __func__);
@@ -582,7 +608,6 @@ int nfs41_setup_sequence(struct nfs4_session *session,
582 slot = tbl->slots + slotid; 608 slot = tbl->slots + slotid;
583 args->sa_session = session; 609 args->sa_session = session;
584 args->sa_slotid = slotid; 610 args->sa_slotid = slotid;
585 args->sa_cache_this = cache_reply;
586 611
587 dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); 612 dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr);
588 613
@@ -602,24 +627,19 @@ EXPORT_SYMBOL_GPL(nfs41_setup_sequence);
602int nfs4_setup_sequence(const struct nfs_server *server, 627int nfs4_setup_sequence(const struct nfs_server *server,
603 struct nfs4_sequence_args *args, 628 struct nfs4_sequence_args *args,
604 struct nfs4_sequence_res *res, 629 struct nfs4_sequence_res *res,
605 int cache_reply,
606 struct rpc_task *task) 630 struct rpc_task *task)
607{ 631{
608 struct nfs4_session *session = nfs4_get_session(server); 632 struct nfs4_session *session = nfs4_get_session(server);
609 int ret = 0; 633 int ret = 0;
610 634
611 if (session == NULL) { 635 if (session == NULL)
612 args->sa_session = NULL;
613 res->sr_session = NULL;
614 goto out; 636 goto out;
615 }
616 637
617 dprintk("--> %s clp %p session %p sr_slot %td\n", 638 dprintk("--> %s clp %p session %p sr_slot %td\n",
618 __func__, session->clp, session, res->sr_slot ? 639 __func__, session->clp, session, res->sr_slot ?
619 res->sr_slot - session->fc_slot_table.slots : -1); 640 res->sr_slot - session->fc_slot_table.slots : -1);
620 641
621 ret = nfs41_setup_sequence(session, args, res, cache_reply, 642 ret = nfs41_setup_sequence(session, args, res, task);
622 task);
623out: 643out:
624 dprintk("<-- %s status=%d\n", __func__, ret); 644 dprintk("<-- %s status=%d\n", __func__, ret);
625 return ret; 645 return ret;
@@ -629,7 +649,6 @@ struct nfs41_call_sync_data {
629 const struct nfs_server *seq_server; 649 const struct nfs_server *seq_server;
630 struct nfs4_sequence_args *seq_args; 650 struct nfs4_sequence_args *seq_args;
631 struct nfs4_sequence_res *seq_res; 651 struct nfs4_sequence_res *seq_res;
632 int cache_reply;
633}; 652};
634 653
635static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) 654static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
@@ -639,7 +658,7 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
639 dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server); 658 dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server);
640 659
641 if (nfs4_setup_sequence(data->seq_server, data->seq_args, 660 if (nfs4_setup_sequence(data->seq_server, data->seq_args,
642 data->seq_res, data->cache_reply, task)) 661 data->seq_res, task))
643 return; 662 return;
644 rpc_call_start(task); 663 rpc_call_start(task);
645} 664}
@@ -657,12 +676,12 @@ static void nfs41_call_sync_done(struct rpc_task *task, void *calldata)
657 nfs41_sequence_done(task, data->seq_res); 676 nfs41_sequence_done(task, data->seq_res);
658} 677}
659 678
660struct rpc_call_ops nfs41_call_sync_ops = { 679static const struct rpc_call_ops nfs41_call_sync_ops = {
661 .rpc_call_prepare = nfs41_call_sync_prepare, 680 .rpc_call_prepare = nfs41_call_sync_prepare,
662 .rpc_call_done = nfs41_call_sync_done, 681 .rpc_call_done = nfs41_call_sync_done,
663}; 682};
664 683
665struct rpc_call_ops nfs41_call_priv_sync_ops = { 684static const struct rpc_call_ops nfs41_call_priv_sync_ops = {
666 .rpc_call_prepare = nfs41_call_priv_sync_prepare, 685 .rpc_call_prepare = nfs41_call_priv_sync_prepare,
667 .rpc_call_done = nfs41_call_sync_done, 686 .rpc_call_done = nfs41_call_sync_done,
668}; 687};
@@ -672,7 +691,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
672 struct rpc_message *msg, 691 struct rpc_message *msg,
673 struct nfs4_sequence_args *args, 692 struct nfs4_sequence_args *args,
674 struct nfs4_sequence_res *res, 693 struct nfs4_sequence_res *res,
675 int cache_reply,
676 int privileged) 694 int privileged)
677{ 695{
678 int ret; 696 int ret;
@@ -681,7 +699,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
681 .seq_server = server, 699 .seq_server = server,
682 .seq_args = args, 700 .seq_args = args,
683 .seq_res = res, 701 .seq_res = res,
684 .cache_reply = cache_reply,
685 }; 702 };
686 struct rpc_task_setup task_setup = { 703 struct rpc_task_setup task_setup = {
687 .rpc_client = clnt, 704 .rpc_client = clnt,
@@ -690,7 +707,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
690 .callback_data = &data 707 .callback_data = &data
691 }; 708 };
692 709
693 res->sr_slot = NULL;
694 if (privileged) 710 if (privileged)
695 task_setup.callback_ops = &nfs41_call_priv_sync_ops; 711 task_setup.callback_ops = &nfs41_call_priv_sync_ops;
696 task = rpc_run_task(&task_setup); 712 task = rpc_run_task(&task_setup);
@@ -710,10 +726,17 @@ int _nfs4_call_sync_session(struct rpc_clnt *clnt,
710 struct nfs4_sequence_res *res, 726 struct nfs4_sequence_res *res,
711 int cache_reply) 727 int cache_reply)
712{ 728{
713 return nfs4_call_sync_sequence(clnt, server, msg, args, res, cache_reply, 0); 729 nfs41_init_sequence(args, res, cache_reply);
730 return nfs4_call_sync_sequence(clnt, server, msg, args, res, 0);
714} 731}
715 732
716#else 733#else
734static inline
735void nfs41_init_sequence(struct nfs4_sequence_args *args,
736 struct nfs4_sequence_res *res, int cache_reply)
737{
738}
739
717static int nfs4_sequence_done(struct rpc_task *task, 740static int nfs4_sequence_done(struct rpc_task *task,
718 struct nfs4_sequence_res *res) 741 struct nfs4_sequence_res *res)
719{ 742{
@@ -728,7 +751,7 @@ int _nfs4_call_sync(struct rpc_clnt *clnt,
728 struct nfs4_sequence_res *res, 751 struct nfs4_sequence_res *res,
729 int cache_reply) 752 int cache_reply)
730{ 753{
731 args->sa_session = res->sr_session = NULL; 754 nfs41_init_sequence(args, res, cache_reply);
732 return rpc_call_sync(clnt, msg, 0); 755 return rpc_call_sync(clnt, msg, 0);
733} 756}
734 757
@@ -815,20 +838,23 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
815 p->o_arg.open_flags = flags; 838 p->o_arg.open_flags = flags;
816 p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); 839 p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE);
817 p->o_arg.clientid = server->nfs_client->cl_clientid; 840 p->o_arg.clientid = server->nfs_client->cl_clientid;
818 p->o_arg.id = sp->so_owner_id.id; 841 p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time);
842 p->o_arg.id.uniquifier = sp->so_seqid.owner_id;
819 p->o_arg.name = &dentry->d_name; 843 p->o_arg.name = &dentry->d_name;
820 p->o_arg.server = server; 844 p->o_arg.server = server;
821 p->o_arg.bitmask = server->attr_bitmask; 845 p->o_arg.bitmask = server->attr_bitmask;
822 p->o_arg.dir_bitmask = server->cache_consistency_bitmask; 846 p->o_arg.dir_bitmask = server->cache_consistency_bitmask;
823 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; 847 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
824 if (flags & O_CREAT) { 848 if (attrs != NULL && attrs->ia_valid != 0) {
825 u32 *s; 849 __be32 verf[2];
826 850
827 p->o_arg.u.attrs = &p->attrs; 851 p->o_arg.u.attrs = &p->attrs;
828 memcpy(&p->attrs, attrs, sizeof(p->attrs)); 852 memcpy(&p->attrs, attrs, sizeof(p->attrs));
829 s = (u32 *) p->o_arg.u.verifier.data; 853
830 s[0] = jiffies; 854 verf[0] = jiffies;
831 s[1] = current->pid; 855 verf[1] = current->pid;
856 memcpy(p->o_arg.u.verifier.data, verf,
857 sizeof(p->o_arg.u.verifier.data));
832 } 858 }
833 p->c_arg.fh = &p->o_res.fh; 859 p->c_arg.fh = &p->o_res.fh;
834 p->c_arg.stateid = &p->o_res.stateid; 860 p->c_arg.stateid = &p->o_res.stateid;
@@ -878,7 +904,7 @@ static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode
878{ 904{
879 int ret = 0; 905 int ret = 0;
880 906
881 if (open_mode & O_EXCL) 907 if (open_mode & (O_EXCL|O_TRUNC))
882 goto out; 908 goto out;
883 switch (mode & (FMODE_READ|FMODE_WRITE)) { 909 switch (mode & (FMODE_READ|FMODE_WRITE)) {
884 case FMODE_READ: 910 case FMODE_READ:
@@ -927,8 +953,8 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode)
927static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) 953static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
928{ 954{
929 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) 955 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
930 memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data)); 956 nfs4_stateid_copy(&state->stateid, stateid);
931 memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data)); 957 nfs4_stateid_copy(&state->open_stateid, stateid);
932 switch (fmode) { 958 switch (fmode) {
933 case FMODE_READ: 959 case FMODE_READ:
934 set_bit(NFS_O_RDONLY_STATE, &state->flags); 960 set_bit(NFS_O_RDONLY_STATE, &state->flags);
@@ -956,7 +982,7 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s
956 */ 982 */
957 write_seqlock(&state->seqlock); 983 write_seqlock(&state->seqlock);
958 if (deleg_stateid != NULL) { 984 if (deleg_stateid != NULL) {
959 memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data)); 985 nfs4_stateid_copy(&state->stateid, deleg_stateid);
960 set_bit(NFS_DELEGATED_STATE, &state->flags); 986 set_bit(NFS_DELEGATED_STATE, &state->flags);
961 } 987 }
962 if (open_stateid != NULL) 988 if (open_stateid != NULL)
@@ -987,7 +1013,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat
987 1013
988 if (delegation == NULL) 1014 if (delegation == NULL)
989 delegation = &deleg_cur->stateid; 1015 delegation = &deleg_cur->stateid;
990 else if (memcmp(deleg_cur->stateid.data, delegation->data, NFS4_STATEID_SIZE) != 0) 1016 else if (!nfs4_stateid_match(&deleg_cur->stateid, delegation))
991 goto no_delegation_unlock; 1017 goto no_delegation_unlock;
992 1018
993 nfs_mark_delegation_referenced(deleg_cur); 1019 nfs_mark_delegation_referenced(deleg_cur);
@@ -1026,7 +1052,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
1026 struct nfs4_state *state = opendata->state; 1052 struct nfs4_state *state = opendata->state;
1027 struct nfs_inode *nfsi = NFS_I(state->inode); 1053 struct nfs_inode *nfsi = NFS_I(state->inode);
1028 struct nfs_delegation *delegation; 1054 struct nfs_delegation *delegation;
1029 int open_mode = opendata->o_arg.open_flags & O_EXCL; 1055 int open_mode = opendata->o_arg.open_flags & (O_EXCL|O_TRUNC);
1030 fmode_t fmode = opendata->o_arg.fmode; 1056 fmode_t fmode = opendata->o_arg.fmode;
1031 nfs4_stateid stateid; 1057 nfs4_stateid stateid;
1032 int ret = -EAGAIN; 1058 int ret = -EAGAIN;
@@ -1048,7 +1074,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
1048 break; 1074 break;
1049 } 1075 }
1050 /* Save the delegation */ 1076 /* Save the delegation */
1051 memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); 1077 nfs4_stateid_copy(&stateid, &delegation->stateid);
1052 rcu_read_unlock(); 1078 rcu_read_unlock();
1053 ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); 1079 ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
1054 if (ret != 0) 1080 if (ret != 0)
@@ -1090,6 +1116,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
1090 if (state == NULL) 1116 if (state == NULL)
1091 goto err_put_inode; 1117 goto err_put_inode;
1092 if (data->o_res.delegation_type != 0) { 1118 if (data->o_res.delegation_type != 0) {
1119 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
1093 int delegation_flags = 0; 1120 int delegation_flags = 0;
1094 1121
1095 rcu_read_lock(); 1122 rcu_read_lock();
@@ -1101,7 +1128,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
1101 pr_err_ratelimited("NFS: Broken NFSv4 server %s is " 1128 pr_err_ratelimited("NFS: Broken NFSv4 server %s is "
1102 "returning a delegation for " 1129 "returning a delegation for "
1103 "OPEN(CLAIM_DELEGATE_CUR)\n", 1130 "OPEN(CLAIM_DELEGATE_CUR)\n",
1104 NFS_CLIENT(inode)->cl_server); 1131 clp->cl_hostname);
1105 } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0) 1132 } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
1106 nfs_inode_set_delegation(state->inode, 1133 nfs_inode_set_delegation(state->inode,
1107 data->owner->so_cred, 1134 data->owner->so_cred,
@@ -1210,10 +1237,10 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
1210 * Check if we need to update the current stateid. 1237 * Check if we need to update the current stateid.
1211 */ 1238 */
1212 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 && 1239 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
1213 memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) { 1240 !nfs4_stateid_match(&state->stateid, &state->open_stateid)) {
1214 write_seqlock(&state->seqlock); 1241 write_seqlock(&state->seqlock);
1215 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) 1242 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
1216 memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)); 1243 nfs4_stateid_copy(&state->stateid, &state->open_stateid);
1217 write_sequnlock(&state->seqlock); 1244 write_sequnlock(&state->seqlock);
1218 } 1245 }
1219 return 0; 1246 return 0;
@@ -1282,8 +1309,7 @@ static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs
1282 if (IS_ERR(opendata)) 1309 if (IS_ERR(opendata))
1283 return PTR_ERR(opendata); 1310 return PTR_ERR(opendata);
1284 opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; 1311 opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
1285 memcpy(opendata->o_arg.u.delegation.data, stateid->data, 1312 nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid);
1286 sizeof(opendata->o_arg.u.delegation.data));
1287 ret = nfs4_open_recover(opendata, state); 1313 ret = nfs4_open_recover(opendata, state);
1288 nfs4_opendata_put(opendata); 1314 nfs4_opendata_put(opendata);
1289 return ret; 1315 return ret;
@@ -1319,8 +1345,11 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1319 * The show must go on: exit, but mark the 1345 * The show must go on: exit, but mark the
1320 * stateid as needing recovery. 1346 * stateid as needing recovery.
1321 */ 1347 */
1348 case -NFS4ERR_DELEG_REVOKED:
1322 case -NFS4ERR_ADMIN_REVOKED: 1349 case -NFS4ERR_ADMIN_REVOKED:
1323 case -NFS4ERR_BAD_STATEID: 1350 case -NFS4ERR_BAD_STATEID:
1351 nfs_inode_find_state_and_recover(state->inode,
1352 stateid);
1324 nfs4_schedule_stateid_recovery(server, state); 1353 nfs4_schedule_stateid_recovery(server, state);
1325 case -EKEYEXPIRED: 1354 case -EKEYEXPIRED:
1326 /* 1355 /*
@@ -1345,8 +1374,7 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
1345 1374
1346 data->rpc_status = task->tk_status; 1375 data->rpc_status = task->tk_status;
1347 if (data->rpc_status == 0) { 1376 if (data->rpc_status == 0) {
1348 memcpy(data->o_res.stateid.data, data->c_res.stateid.data, 1377 nfs4_stateid_copy(&data->o_res.stateid, &data->c_res.stateid);
1349 sizeof(data->o_res.stateid.data));
1350 nfs_confirm_seqid(&data->owner->so_seqid, 0); 1378 nfs_confirm_seqid(&data->owner->so_seqid, 0);
1351 renew_lease(data->o_res.server, data->timestamp); 1379 renew_lease(data->o_res.server, data->timestamp);
1352 data->rpc_done = 1; 1380 data->rpc_done = 1;
@@ -1439,8 +1467,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1439 goto unlock_no_action; 1467 goto unlock_no_action;
1440 rcu_read_unlock(); 1468 rcu_read_unlock();
1441 } 1469 }
1442 /* Update sequence id. */ 1470 /* Update client id. */
1443 data->o_arg.id = sp->so_owner_id.id;
1444 data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid; 1471 data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid;
1445 if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { 1472 if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) {
1446 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; 1473 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
@@ -1449,7 +1476,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1449 data->timestamp = jiffies; 1476 data->timestamp = jiffies;
1450 if (nfs4_setup_sequence(data->o_arg.server, 1477 if (nfs4_setup_sequence(data->o_arg.server,
1451 &data->o_arg.seq_args, 1478 &data->o_arg.seq_args,
1452 &data->o_res.seq_res, 1, task)) 1479 &data->o_res.seq_res, task))
1453 return; 1480 return;
1454 rpc_call_start(task); 1481 rpc_call_start(task);
1455 return; 1482 return;
@@ -1551,6 +1578,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
1551 }; 1578 };
1552 int status; 1579 int status;
1553 1580
1581 nfs41_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1);
1554 kref_get(&data->kref); 1582 kref_get(&data->kref);
1555 data->rpc_done = 0; 1583 data->rpc_done = 0;
1556 data->rpc_status = 0; 1584 data->rpc_status = 0;
@@ -1712,15 +1740,32 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
1712} 1740}
1713 1741
1714#if defined(CONFIG_NFS_V4_1) 1742#if defined(CONFIG_NFS_V4_1)
1715static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) 1743static int nfs41_check_expired_stateid(struct nfs4_state *state, nfs4_stateid *stateid, unsigned int flags)
1716{ 1744{
1717 int status; 1745 int status = NFS_OK;
1718 struct nfs_server *server = NFS_SERVER(state->inode); 1746 struct nfs_server *server = NFS_SERVER(state->inode);
1719 1747
1720 status = nfs41_test_stateid(server, state); 1748 if (state->flags & flags) {
1721 if (status == NFS_OK) 1749 status = nfs41_test_stateid(server, stateid);
1722 return 0; 1750 if (status != NFS_OK) {
1723 nfs41_free_stateid(server, state); 1751 nfs41_free_stateid(server, stateid);
1752 state->flags &= ~flags;
1753 }
1754 }
1755 return status;
1756}
1757
1758static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
1759{
1760 int deleg_status, open_status;
1761 int deleg_flags = 1 << NFS_DELEGATED_STATE;
1762 int open_flags = (1 << NFS_O_RDONLY_STATE) | (1 << NFS_O_WRONLY_STATE) | (1 << NFS_O_RDWR_STATE);
1763
1764 deleg_status = nfs41_check_expired_stateid(state, &state->stateid, deleg_flags);
1765 open_status = nfs41_check_expired_stateid(state, &state->open_stateid, open_flags);
1766
1767 if ((deleg_status == NFS_OK) && (open_status == NFS_OK))
1768 return NFS_OK;
1724 return nfs4_open_expired(sp, state); 1769 return nfs4_open_expired(sp, state);
1725} 1770}
1726#endif 1771#endif
@@ -1754,7 +1799,8 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
1754 1799
1755 /* Protect against reboot recovery conflicts */ 1800 /* Protect against reboot recovery conflicts */
1756 status = -ENOMEM; 1801 status = -ENOMEM;
1757 if (!(sp = nfs4_get_state_owner(server, cred))) { 1802 sp = nfs4_get_state_owner(server, cred, GFP_KERNEL);
1803 if (sp == NULL) {
1758 dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n"); 1804 dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n");
1759 goto out_err; 1805 goto out_err;
1760 } 1806 }
@@ -1829,7 +1875,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
1829 * the user though... 1875 * the user though...
1830 */ 1876 */
1831 if (status == -NFS4ERR_BAD_SEQID) { 1877 if (status == -NFS4ERR_BAD_SEQID) {
1832 printk(KERN_WARNING "NFS: v4 server %s " 1878 pr_warn_ratelimited("NFS: v4 server %s "
1833 " returned a bad sequence-id error!\n", 1879 " returned a bad sequence-id error!\n",
1834 NFS_SERVER(dir)->nfs_client->cl_hostname); 1880 NFS_SERVER(dir)->nfs_client->cl_hostname);
1835 exception.retry = 1; 1881 exception.retry = 1;
@@ -1882,12 +1928,14 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
1882 1928
1883 nfs_fattr_init(fattr); 1929 nfs_fattr_init(fattr);
1884 1930
1885 if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) { 1931 if (state != NULL) {
1932 nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE,
1933 current->files, current->tgid);
1934 } else if (nfs4_copy_delegation_stateid(&arg.stateid, inode,
1935 FMODE_WRITE)) {
1886 /* Use that stateid */ 1936 /* Use that stateid */
1887 } else if (state != NULL) {
1888 nfs4_copy_stateid(&arg.stateid, state, current->files, current->tgid);
1889 } else 1937 } else
1890 memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); 1938 nfs4_stateid_copy(&arg.stateid, &zero_stateid);
1891 1939
1892 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); 1940 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
1893 if (status == 0 && state != NULL) 1941 if (status == 0 && state != NULL)
@@ -1900,13 +1948,25 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
1900 struct nfs4_state *state) 1948 struct nfs4_state *state)
1901{ 1949{
1902 struct nfs_server *server = NFS_SERVER(inode); 1950 struct nfs_server *server = NFS_SERVER(inode);
1903 struct nfs4_exception exception = { }; 1951 struct nfs4_exception exception = {
1952 .state = state,
1953 .inode = inode,
1954 };
1904 int err; 1955 int err;
1905 do { 1956 do {
1906 err = nfs4_handle_exception(server, 1957 err = _nfs4_do_setattr(inode, cred, fattr, sattr, state);
1907 _nfs4_do_setattr(inode, cred, fattr, sattr, state), 1958 switch (err) {
1908 &exception); 1959 case -NFS4ERR_OPENMODE:
1960 if (state && !(state->state & FMODE_WRITE)) {
1961 err = -EBADF;
1962 if (sattr->ia_valid & ATTR_OPEN)
1963 err = -EACCES;
1964 goto out;
1965 }
1966 }
1967 err = nfs4_handle_exception(server, err, &exception);
1909 } while (exception.retry); 1968 } while (exception.retry);
1969out:
1910 return err; 1970 return err;
1911} 1971}
1912 1972
@@ -1954,6 +2014,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1954 struct nfs4_state *state = calldata->state; 2014 struct nfs4_state *state = calldata->state;
1955 struct nfs_server *server = NFS_SERVER(calldata->inode); 2015 struct nfs_server *server = NFS_SERVER(calldata->inode);
1956 2016
2017 dprintk("%s: begin!\n", __func__);
1957 if (!nfs4_sequence_done(task, &calldata->res.seq_res)) 2018 if (!nfs4_sequence_done(task, &calldata->res.seq_res))
1958 return; 2019 return;
1959 /* hmm. we are done with the inode, and in the process of freeing 2020 /* hmm. we are done with the inode, and in the process of freeing
@@ -1981,6 +2042,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1981 } 2042 }
1982 nfs_release_seqid(calldata->arg.seqid); 2043 nfs_release_seqid(calldata->arg.seqid);
1983 nfs_refresh_inode(calldata->inode, calldata->res.fattr); 2044 nfs_refresh_inode(calldata->inode, calldata->res.fattr);
2045 dprintk("%s: done, ret = %d!\n", __func__, task->tk_status);
1984} 2046}
1985 2047
1986static void nfs4_close_prepare(struct rpc_task *task, void *data) 2048static void nfs4_close_prepare(struct rpc_task *task, void *data)
@@ -1989,6 +2051,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
1989 struct nfs4_state *state = calldata->state; 2051 struct nfs4_state *state = calldata->state;
1990 int call_close = 0; 2052 int call_close = 0;
1991 2053
2054 dprintk("%s: begin!\n", __func__);
1992 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) 2055 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
1993 return; 2056 return;
1994 2057
@@ -2013,7 +2076,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2013 if (!call_close) { 2076 if (!call_close) {
2014 /* Note: exit _without_ calling nfs4_close_done */ 2077 /* Note: exit _without_ calling nfs4_close_done */
2015 task->tk_action = NULL; 2078 task->tk_action = NULL;
2016 return; 2079 goto out;
2017 } 2080 }
2018 2081
2019 if (calldata->arg.fmode == 0) { 2082 if (calldata->arg.fmode == 0) {
@@ -2022,17 +2085,20 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2022 pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) { 2085 pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) {
2023 rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq, 2086 rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq,
2024 task, NULL); 2087 task, NULL);
2025 return; 2088 goto out;
2026 } 2089 }
2027 } 2090 }
2028 2091
2029 nfs_fattr_init(calldata->res.fattr); 2092 nfs_fattr_init(calldata->res.fattr);
2030 calldata->timestamp = jiffies; 2093 calldata->timestamp = jiffies;
2031 if (nfs4_setup_sequence(NFS_SERVER(calldata->inode), 2094 if (nfs4_setup_sequence(NFS_SERVER(calldata->inode),
2032 &calldata->arg.seq_args, &calldata->res.seq_res, 2095 &calldata->arg.seq_args,
2033 1, task)) 2096 &calldata->res.seq_res,
2034 return; 2097 task))
2098 goto out;
2035 rpc_call_start(task); 2099 rpc_call_start(task);
2100out:
2101 dprintk("%s: done!\n", __func__);
2036} 2102}
2037 2103
2038static const struct rpc_call_ops nfs4_close_ops = { 2104static const struct rpc_call_ops nfs4_close_ops = {
@@ -2074,6 +2140,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc)
2074 calldata = kzalloc(sizeof(*calldata), gfp_mask); 2140 calldata = kzalloc(sizeof(*calldata), gfp_mask);
2075 if (calldata == NULL) 2141 if (calldata == NULL)
2076 goto out; 2142 goto out;
2143 nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1);
2077 calldata->inode = state->inode; 2144 calldata->inode = state->inode;
2078 calldata->state = state; 2145 calldata->state = state;
2079 calldata->arg.fh = NFS_FH(state->inode); 2146 calldata->arg.fh = NFS_FH(state->inode);
@@ -2182,6 +2249,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
2182 server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; 2249 server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
2183 server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; 2250 server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
2184 server->acl_bitmask = res.acl_bitmask; 2251 server->acl_bitmask = res.acl_bitmask;
2252 server->fh_expire_type = res.fh_expire_type;
2185 } 2253 }
2186 2254
2187 return status; 2255 return status;
@@ -2230,11 +2298,12 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
2230 switch (err) { 2298 switch (err) {
2231 case 0: 2299 case 0:
2232 case -NFS4ERR_WRONGSEC: 2300 case -NFS4ERR_WRONGSEC:
2233 break; 2301 goto out;
2234 default: 2302 default:
2235 err = nfs4_handle_exception(server, err, &exception); 2303 err = nfs4_handle_exception(server, err, &exception);
2236 } 2304 }
2237 } while (exception.retry); 2305 } while (exception.retry);
2306out:
2238 return err; 2307 return err;
2239} 2308}
2240 2309
@@ -2303,14 +2372,14 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
2303 return nfs4_map_errors(status); 2372 return nfs4_map_errors(status);
2304} 2373}
2305 2374
2306static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
2307/* 2375/*
2308 * Get locations and (maybe) other attributes of a referral. 2376 * Get locations and (maybe) other attributes of a referral.
2309 * Note that we'll actually follow the referral later when 2377 * Note that we'll actually follow the referral later when
2310 * we detect fsid mismatch in inode revalidation 2378 * we detect fsid mismatch in inode revalidation
2311 */ 2379 */
2312static int nfs4_get_referral(struct inode *dir, const struct qstr *name, 2380static int nfs4_get_referral(struct rpc_clnt *client, struct inode *dir,
2313 struct nfs_fattr *fattr, struct nfs_fh *fhandle) 2381 const struct qstr *name, struct nfs_fattr *fattr,
2382 struct nfs_fh *fhandle)
2314{ 2383{
2315 int status = -ENOMEM; 2384 int status = -ENOMEM;
2316 struct page *page = NULL; 2385 struct page *page = NULL;
@@ -2323,7 +2392,7 @@ static int nfs4_get_referral(struct inode *dir, const struct qstr *name,
2323 if (locations == NULL) 2392 if (locations == NULL)
2324 goto out; 2393 goto out;
2325 2394
2326 status = nfs4_proc_fs_locations(dir, name, locations, page); 2395 status = nfs4_proc_fs_locations(client, dir, name, locations, page);
2327 if (status != 0) 2396 if (status != 0)
2328 goto out; 2397 goto out;
2329 /* Make sure server returned a different fsid for the referral */ 2398 /* Make sure server returned a different fsid for the referral */
@@ -2420,6 +2489,10 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
2420 } 2489 }
2421 } 2490 }
2422 2491
2492 /* Deal with open(O_TRUNC) */
2493 if (sattr->ia_valid & ATTR_OPEN)
2494 sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
2495
2423 status = nfs4_do_setattr(inode, cred, fattr, sattr, state); 2496 status = nfs4_do_setattr(inode, cred, fattr, sattr, state);
2424 if (status == 0) 2497 if (status == 0)
2425 nfs_setattr_update_inode(inode, sattr); 2498 nfs_setattr_update_inode(inode, sattr);
@@ -2456,45 +2529,90 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
2456 return status; 2529 return status;
2457} 2530}
2458 2531
2459void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr, struct nfs_fh *fh) 2532static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr)
2460{ 2533{
2461 memset(fh, 0, sizeof(struct nfs_fh));
2462 fattr->fsid.major = 1;
2463 fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE | 2534 fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE |
2464 NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_FSID | NFS_ATTR_FATTR_MOUNTPOINT; 2535 NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_MOUNTPOINT;
2465 fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO; 2536 fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO;
2466 fattr->nlink = 2; 2537 fattr->nlink = 2;
2467} 2538}
2468 2539
2469static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, 2540static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
2470 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 2541 struct qstr *name, struct nfs_fh *fhandle,
2542 struct nfs_fattr *fattr)
2471{ 2543{
2472 struct nfs4_exception exception = { }; 2544 struct nfs4_exception exception = { };
2545 struct rpc_clnt *client = *clnt;
2473 int err; 2546 int err;
2474 do { 2547 do {
2475 int status; 2548 err = _nfs4_proc_lookup(client, dir, name, fhandle, fattr);
2476 2549 switch (err) {
2477 status = _nfs4_proc_lookup(clnt, dir, name, fhandle, fattr);
2478 switch (status) {
2479 case -NFS4ERR_BADNAME: 2550 case -NFS4ERR_BADNAME:
2480 return -ENOENT; 2551 err = -ENOENT;
2552 goto out;
2481 case -NFS4ERR_MOVED: 2553 case -NFS4ERR_MOVED:
2482 return nfs4_get_referral(dir, name, fattr, fhandle); 2554 err = nfs4_get_referral(client, dir, name, fattr, fhandle);
2555 goto out;
2483 case -NFS4ERR_WRONGSEC: 2556 case -NFS4ERR_WRONGSEC:
2484 nfs_fixup_secinfo_attributes(fattr, fhandle); 2557 err = -EPERM;
2558 if (client != *clnt)
2559 goto out;
2560
2561 client = nfs4_create_sec_client(client, dir, name);
2562 if (IS_ERR(client))
2563 return PTR_ERR(client);
2564
2565 exception.retry = 1;
2566 break;
2567 default:
2568 err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception);
2485 } 2569 }
2486 err = nfs4_handle_exception(NFS_SERVER(dir),
2487 status, &exception);
2488 } while (exception.retry); 2570 } while (exception.retry);
2571
2572out:
2573 if (err == 0)
2574 *clnt = client;
2575 else if (client != *clnt)
2576 rpc_shutdown_client(client);
2577
2489 return err; 2578 return err;
2490} 2579}
2491 2580
2581static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
2582 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
2583{
2584 int status;
2585 struct rpc_clnt *client = NFS_CLIENT(dir);
2586
2587 status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr);
2588 if (client != NFS_CLIENT(dir)) {
2589 rpc_shutdown_client(client);
2590 nfs_fixup_secinfo_attributes(fattr);
2591 }
2592 return status;
2593}
2594
2595struct rpc_clnt *
2596nfs4_proc_lookup_mountpoint(struct inode *dir, struct qstr *name,
2597 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
2598{
2599 int status;
2600 struct rpc_clnt *client = rpc_clone_client(NFS_CLIENT(dir));
2601
2602 status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr);
2603 if (status < 0) {
2604 rpc_shutdown_client(client);
2605 return ERR_PTR(status);
2606 }
2607 return client;
2608}
2609
2492static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) 2610static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
2493{ 2611{
2494 struct nfs_server *server = NFS_SERVER(inode); 2612 struct nfs_server *server = NFS_SERVER(inode);
2495 struct nfs4_accessargs args = { 2613 struct nfs4_accessargs args = {
2496 .fh = NFS_FH(inode), 2614 .fh = NFS_FH(inode),
2497 .bitmask = server->attr_bitmask, 2615 .bitmask = server->cache_consistency_bitmask,
2498 }; 2616 };
2499 struct nfs4_accessres res = { 2617 struct nfs4_accessres res = {
2500 .server = server, 2618 .server = server,
@@ -2712,8 +2830,18 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
2712 2830
2713 args->bitmask = server->cache_consistency_bitmask; 2831 args->bitmask = server->cache_consistency_bitmask;
2714 res->server = server; 2832 res->server = server;
2715 res->seq_res.sr_slot = NULL;
2716 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; 2833 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
2834 nfs41_init_sequence(&args->seq_args, &res->seq_res, 1);
2835}
2836
2837static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
2838{
2839 if (nfs4_setup_sequence(NFS_SERVER(data->dir),
2840 &data->args.seq_args,
2841 &data->res.seq_res,
2842 task))
2843 return;
2844 rpc_call_start(task);
2717} 2845}
2718 2846
2719static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) 2847static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
@@ -2738,6 +2866,17 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
2738 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; 2866 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
2739 arg->bitmask = server->attr_bitmask; 2867 arg->bitmask = server->attr_bitmask;
2740 res->server = server; 2868 res->server = server;
2869 nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1);
2870}
2871
2872static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data)
2873{
2874 if (nfs4_setup_sequence(NFS_SERVER(data->old_dir),
2875 &data->args.seq_args,
2876 &data->res.seq_res,
2877 task))
2878 return;
2879 rpc_call_start(task);
2741} 2880}
2742 2881
2743static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, 2882static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
@@ -3232,6 +3371,17 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message
3232 data->timestamp = jiffies; 3371 data->timestamp = jiffies;
3233 data->read_done_cb = nfs4_read_done_cb; 3372 data->read_done_cb = nfs4_read_done_cb;
3234 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; 3373 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
3374 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
3375}
3376
3377static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
3378{
3379 if (nfs4_setup_sequence(NFS_SERVER(data->inode),
3380 &data->args.seq_args,
3381 &data->res.seq_res,
3382 task))
3383 return;
3384 rpc_call_start(task);
3235} 3385}
3236 3386
3237/* Reset the the nfs_read_data to send the read to the MDS. */ 3387/* Reset the the nfs_read_data to send the read to the MDS. */
@@ -3305,6 +3455,17 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
3305 data->timestamp = jiffies; 3455 data->timestamp = jiffies;
3306 3456
3307 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; 3457 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
3458 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
3459}
3460
3461static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
3462{
3463 if (nfs4_setup_sequence(NFS_SERVER(data->inode),
3464 &data->args.seq_args,
3465 &data->res.seq_res,
3466 task))
3467 return;
3468 rpc_call_start(task);
3308} 3469}
3309 3470
3310static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data) 3471static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data)
@@ -3339,6 +3500,7 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa
3339 data->write_done_cb = nfs4_commit_done_cb; 3500 data->write_done_cb = nfs4_commit_done_cb;
3340 data->res.server = server; 3501 data->res.server = server;
3341 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; 3502 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
3503 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
3342} 3504}
3343 3505
3344struct nfs4_renewdata { 3506struct nfs4_renewdata {
@@ -3512,16 +3674,16 @@ out:
3512 return ret; 3674 return ret;
3513} 3675}
3514 3676
3515static void nfs4_write_cached_acl(struct inode *inode, const char *buf, size_t acl_len) 3677static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len)
3516{ 3678{
3517 struct nfs4_cached_acl *acl; 3679 struct nfs4_cached_acl *acl;
3518 3680
3519 if (buf && acl_len <= PAGE_SIZE) { 3681 if (pages && acl_len <= PAGE_SIZE) {
3520 acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL); 3682 acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL);
3521 if (acl == NULL) 3683 if (acl == NULL)
3522 goto out; 3684 goto out;
3523 acl->cached = 1; 3685 acl->cached = 1;
3524 memcpy(acl->data, buf, acl_len); 3686 _copy_from_pages(acl->data, pages, pgbase, acl_len);
3525 } else { 3687 } else {
3526 acl = kmalloc(sizeof(*acl), GFP_KERNEL); 3688 acl = kmalloc(sizeof(*acl), GFP_KERNEL);
3527 if (acl == NULL) 3689 if (acl == NULL)
@@ -3554,7 +3716,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
3554 struct nfs_getaclres res = { 3716 struct nfs_getaclres res = {
3555 .acl_len = buflen, 3717 .acl_len = buflen,
3556 }; 3718 };
3557 void *resp_buf;
3558 struct rpc_message msg = { 3719 struct rpc_message msg = {
3559 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL], 3720 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL],
3560 .rpc_argp = &args, 3721 .rpc_argp = &args,
@@ -3568,24 +3729,27 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
3568 if (npages == 0) 3729 if (npages == 0)
3569 npages = 1; 3730 npages = 1;
3570 3731
3732 /* Add an extra page to handle the bitmap returned */
3733 npages++;
3734
3571 for (i = 0; i < npages; i++) { 3735 for (i = 0; i < npages; i++) {
3572 pages[i] = alloc_page(GFP_KERNEL); 3736 pages[i] = alloc_page(GFP_KERNEL);
3573 if (!pages[i]) 3737 if (!pages[i])
3574 goto out_free; 3738 goto out_free;
3575 } 3739 }
3576 if (npages > 1) { 3740
3577 /* for decoding across pages */ 3741 /* for decoding across pages */
3578 res.acl_scratch = alloc_page(GFP_KERNEL); 3742 res.acl_scratch = alloc_page(GFP_KERNEL);
3579 if (!res.acl_scratch) 3743 if (!res.acl_scratch)
3580 goto out_free; 3744 goto out_free;
3581 } 3745
3582 args.acl_len = npages * PAGE_SIZE; 3746 args.acl_len = npages * PAGE_SIZE;
3583 args.acl_pgbase = 0; 3747 args.acl_pgbase = 0;
3748
3584 /* Let decode_getfacl know not to fail if the ACL data is larger than 3749 /* Let decode_getfacl know not to fail if the ACL data is larger than
3585 * the page we send as a guess */ 3750 * the page we send as a guess */
3586 if (buf == NULL) 3751 if (buf == NULL)
3587 res.acl_flags |= NFS4_ACL_LEN_REQUEST; 3752 res.acl_flags |= NFS4_ACL_LEN_REQUEST;
3588 resp_buf = page_address(pages[0]);
3589 3753
3590 dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n", 3754 dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n",
3591 __func__, buf, buflen, npages, args.acl_len); 3755 __func__, buf, buflen, npages, args.acl_len);
@@ -3596,16 +3760,16 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
3596 3760
3597 acl_len = res.acl_len - res.acl_data_offset; 3761 acl_len = res.acl_len - res.acl_data_offset;
3598 if (acl_len > args.acl_len) 3762 if (acl_len > args.acl_len)
3599 nfs4_write_cached_acl(inode, NULL, acl_len); 3763 nfs4_write_cached_acl(inode, NULL, 0, acl_len);
3600 else 3764 else
3601 nfs4_write_cached_acl(inode, resp_buf + res.acl_data_offset, 3765 nfs4_write_cached_acl(inode, pages, res.acl_data_offset,
3602 acl_len); 3766 acl_len);
3603 if (buf) { 3767 if (buf) {
3604 ret = -ERANGE; 3768 ret = -ERANGE;
3605 if (acl_len > buflen) 3769 if (acl_len > buflen)
3606 goto out_free; 3770 goto out_free;
3607 _copy_from_pages(buf, pages, res.acl_data_offset, 3771 _copy_from_pages(buf, pages, res.acl_data_offset,
3608 res.acl_len); 3772 acl_len);
3609 } 3773 }
3610 ret = acl_len; 3774 ret = acl_len;
3611out_free: 3775out_free:
@@ -3714,8 +3878,12 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3714 if (task->tk_status >= 0) 3878 if (task->tk_status >= 0)
3715 return 0; 3879 return 0;
3716 switch(task->tk_status) { 3880 switch(task->tk_status) {
3881 case -NFS4ERR_DELEG_REVOKED:
3717 case -NFS4ERR_ADMIN_REVOKED: 3882 case -NFS4ERR_ADMIN_REVOKED:
3718 case -NFS4ERR_BAD_STATEID: 3883 case -NFS4ERR_BAD_STATEID:
3884 if (state == NULL)
3885 break;
3886 nfs_remove_bad_delegation(state->inode);
3719 case -NFS4ERR_OPENMODE: 3887 case -NFS4ERR_OPENMODE:
3720 if (state == NULL) 3888 if (state == NULL)
3721 break; 3889 break;
@@ -3764,6 +3932,16 @@ wait_on_recovery:
3764 return -EAGAIN; 3932 return -EAGAIN;
3765} 3933}
3766 3934
3935static void nfs4_construct_boot_verifier(struct nfs_client *clp,
3936 nfs4_verifier *bootverf)
3937{
3938 __be32 verf[2];
3939
3940 verf[0] = htonl((u32)clp->cl_boot_time.tv_sec);
3941 verf[1] = htonl((u32)clp->cl_boot_time.tv_nsec);
3942 memcpy(bootverf->data, verf, sizeof(bootverf->data));
3943}
3944
3767int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, 3945int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
3768 unsigned short port, struct rpc_cred *cred, 3946 unsigned short port, struct rpc_cred *cred,
3769 struct nfs4_setclientid_res *res) 3947 struct nfs4_setclientid_res *res)
@@ -3780,15 +3958,13 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
3780 .rpc_resp = res, 3958 .rpc_resp = res,
3781 .rpc_cred = cred, 3959 .rpc_cred = cred,
3782 }; 3960 };
3783 __be32 *p;
3784 int loop = 0; 3961 int loop = 0;
3785 int status; 3962 int status;
3786 3963
3787 p = (__be32*)sc_verifier.data; 3964 nfs4_construct_boot_verifier(clp, &sc_verifier);
3788 *p++ = htonl((u32)clp->cl_boot_time.tv_sec);
3789 *p = htonl((u32)clp->cl_boot_time.tv_nsec);
3790 3965
3791 for(;;) { 3966 for(;;) {
3967 rcu_read_lock();
3792 setclientid.sc_name_len = scnprintf(setclientid.sc_name, 3968 setclientid.sc_name_len = scnprintf(setclientid.sc_name,
3793 sizeof(setclientid.sc_name), "%s/%s %s %s %u", 3969 sizeof(setclientid.sc_name), "%s/%s %s %s %u",
3794 clp->cl_ipaddr, 3970 clp->cl_ipaddr,
@@ -3805,6 +3981,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
3805 setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, 3981 setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr,
3806 sizeof(setclientid.sc_uaddr), "%s.%u.%u", 3982 sizeof(setclientid.sc_uaddr), "%s.%u.%u",
3807 clp->cl_ipaddr, port >> 8, port & 255); 3983 clp->cl_ipaddr, port >> 8, port & 255);
3984 rcu_read_unlock();
3808 3985
3809 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 3986 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
3810 if (status != -NFS4ERR_CLID_INUSE) 3987 if (status != -NFS4ERR_CLID_INUSE)
@@ -3891,7 +4068,7 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
3891 4068
3892 if (nfs4_setup_sequence(d_data->res.server, 4069 if (nfs4_setup_sequence(d_data->res.server,
3893 &d_data->args.seq_args, 4070 &d_data->args.seq_args,
3894 &d_data->res.seq_res, 1, task)) 4071 &d_data->res.seq_res, task))
3895 return; 4072 return;
3896 rpc_call_start(task); 4073 rpc_call_start(task);
3897} 4074}
@@ -3925,11 +4102,12 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
3925 data = kzalloc(sizeof(*data), GFP_NOFS); 4102 data = kzalloc(sizeof(*data), GFP_NOFS);
3926 if (data == NULL) 4103 if (data == NULL)
3927 return -ENOMEM; 4104 return -ENOMEM;
4105 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
3928 data->args.fhandle = &data->fh; 4106 data->args.fhandle = &data->fh;
3929 data->args.stateid = &data->stateid; 4107 data->args.stateid = &data->stateid;
3930 data->args.bitmask = server->attr_bitmask; 4108 data->args.bitmask = server->attr_bitmask;
3931 nfs_copy_fh(&data->fh, NFS_FH(inode)); 4109 nfs_copy_fh(&data->fh, NFS_FH(inode));
3932 memcpy(&data->stateid, stateid, sizeof(data->stateid)); 4110 nfs4_stateid_copy(&data->stateid, stateid);
3933 data->res.fattr = &data->fattr; 4111 data->res.fattr = &data->fattr;
3934 data->res.server = server; 4112 data->res.server = server;
3935 nfs_fattr_init(data->res.fattr); 4113 nfs_fattr_init(data->res.fattr);
@@ -4016,7 +4194,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
4016 if (status != 0) 4194 if (status != 0)
4017 goto out; 4195 goto out;
4018 lsp = request->fl_u.nfs4_fl.owner; 4196 lsp = request->fl_u.nfs4_fl.owner;
4019 arg.lock_owner.id = lsp->ls_id.id; 4197 arg.lock_owner.id = lsp->ls_seqid.owner_id;
4020 arg.lock_owner.s_dev = server->s_dev; 4198 arg.lock_owner.s_dev = server->s_dev;
4021 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); 4199 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
4022 switch (status) { 4200 switch (status) {
@@ -4112,9 +4290,8 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
4112 return; 4290 return;
4113 switch (task->tk_status) { 4291 switch (task->tk_status) {
4114 case 0: 4292 case 0:
4115 memcpy(calldata->lsp->ls_stateid.data, 4293 nfs4_stateid_copy(&calldata->lsp->ls_stateid,
4116 calldata->res.stateid.data, 4294 &calldata->res.stateid);
4117 sizeof(calldata->lsp->ls_stateid.data));
4118 renew_lease(calldata->server, calldata->timestamp); 4295 renew_lease(calldata->server, calldata->timestamp);
4119 break; 4296 break;
4120 case -NFS4ERR_BAD_STATEID: 4297 case -NFS4ERR_BAD_STATEID:
@@ -4142,7 +4319,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
4142 calldata->timestamp = jiffies; 4319 calldata->timestamp = jiffies;
4143 if (nfs4_setup_sequence(calldata->server, 4320 if (nfs4_setup_sequence(calldata->server,
4144 &calldata->arg.seq_args, 4321 &calldata->arg.seq_args,
4145 &calldata->res.seq_res, 1, task)) 4322 &calldata->res.seq_res, task))
4146 return; 4323 return;
4147 rpc_call_start(task); 4324 rpc_call_start(task);
4148} 4325}
@@ -4182,6 +4359,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
4182 return ERR_PTR(-ENOMEM); 4359 return ERR_PTR(-ENOMEM);
4183 } 4360 }
4184 4361
4362 nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1);
4185 msg.rpc_argp = &data->arg; 4363 msg.rpc_argp = &data->arg;
4186 msg.rpc_resp = &data->res; 4364 msg.rpc_resp = &data->res;
4187 task_setup_data.callback_data = data; 4365 task_setup_data.callback_data = data;
@@ -4261,7 +4439,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
4261 goto out_free_seqid; 4439 goto out_free_seqid;
4262 p->arg.lock_stateid = &lsp->ls_stateid; 4440 p->arg.lock_stateid = &lsp->ls_stateid;
4263 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; 4441 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
4264 p->arg.lock_owner.id = lsp->ls_id.id; 4442 p->arg.lock_owner.id = lsp->ls_seqid.owner_id;
4265 p->arg.lock_owner.s_dev = server->s_dev; 4443 p->arg.lock_owner.s_dev = server->s_dev;
4266 p->res.lock_seqid = p->arg.lock_seqid; 4444 p->res.lock_seqid = p->arg.lock_seqid;
4267 p->lsp = lsp; 4445 p->lsp = lsp;
@@ -4297,7 +4475,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
4297 data->timestamp = jiffies; 4475 data->timestamp = jiffies;
4298 if (nfs4_setup_sequence(data->server, 4476 if (nfs4_setup_sequence(data->server,
4299 &data->arg.seq_args, 4477 &data->arg.seq_args,
4300 &data->res.seq_res, 1, task)) 4478 &data->res.seq_res, task))
4301 return; 4479 return;
4302 rpc_call_start(task); 4480 rpc_call_start(task);
4303 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); 4481 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
@@ -4326,8 +4504,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
4326 goto out; 4504 goto out;
4327 } 4505 }
4328 if (data->rpc_status == 0) { 4506 if (data->rpc_status == 0) {
4329 memcpy(data->lsp->ls_stateid.data, data->res.stateid.data, 4507 nfs4_stateid_copy(&data->lsp->ls_stateid, &data->res.stateid);
4330 sizeof(data->lsp->ls_stateid.data));
4331 data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; 4508 data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
4332 renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); 4509 renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp);
4333 } 4510 }
@@ -4415,6 +4592,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
4415 data->arg.reclaim = NFS_LOCK_RECLAIM; 4592 data->arg.reclaim = NFS_LOCK_RECLAIM;
4416 task_setup_data.callback_ops = &nfs4_recover_lock_ops; 4593 task_setup_data.callback_ops = &nfs4_recover_lock_ops;
4417 } 4594 }
4595 nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1);
4418 msg.rpc_argp = &data->arg; 4596 msg.rpc_argp = &data->arg;
4419 msg.rpc_resp = &data->res; 4597 msg.rpc_resp = &data->res;
4420 task_setup_data.callback_data = data; 4598 task_setup_data.callback_data = data;
@@ -4437,7 +4615,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
4437static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request) 4615static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request)
4438{ 4616{
4439 struct nfs_server *server = NFS_SERVER(state->inode); 4617 struct nfs_server *server = NFS_SERVER(state->inode);
4440 struct nfs4_exception exception = { }; 4618 struct nfs4_exception exception = {
4619 .inode = state->inode,
4620 };
4441 int err; 4621 int err;
4442 4622
4443 do { 4623 do {
@@ -4455,7 +4635,9 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
4455static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request) 4635static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request)
4456{ 4636{
4457 struct nfs_server *server = NFS_SERVER(state->inode); 4637 struct nfs_server *server = NFS_SERVER(state->inode);
4458 struct nfs4_exception exception = { }; 4638 struct nfs4_exception exception = {
4639 .inode = state->inode,
4640 };
4459 int err; 4641 int err;
4460 4642
4461 err = nfs4_set_lock_state(state, request); 4643 err = nfs4_set_lock_state(state, request);
@@ -4479,15 +4661,34 @@ out:
4479} 4661}
4480 4662
4481#if defined(CONFIG_NFS_V4_1) 4663#if defined(CONFIG_NFS_V4_1)
4482static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request) 4664static int nfs41_check_expired_locks(struct nfs4_state *state)
4483{ 4665{
4484 int status; 4666 int status, ret = NFS_OK;
4667 struct nfs4_lock_state *lsp;
4485 struct nfs_server *server = NFS_SERVER(state->inode); 4668 struct nfs_server *server = NFS_SERVER(state->inode);
4486 4669
4487 status = nfs41_test_stateid(server, state); 4670 list_for_each_entry(lsp, &state->lock_states, ls_locks) {
4671 if (lsp->ls_flags & NFS_LOCK_INITIALIZED) {
4672 status = nfs41_test_stateid(server, &lsp->ls_stateid);
4673 if (status != NFS_OK) {
4674 nfs41_free_stateid(server, &lsp->ls_stateid);
4675 lsp->ls_flags &= ~NFS_LOCK_INITIALIZED;
4676 ret = status;
4677 }
4678 }
4679 };
4680
4681 return ret;
4682}
4683
4684static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request)
4685{
4686 int status = NFS_OK;
4687
4688 if (test_bit(LK_STATE_IN_USE, &state->flags))
4689 status = nfs41_check_expired_locks(state);
4488 if (status == NFS_OK) 4690 if (status == NFS_OK)
4489 return 0; 4691 return status;
4490 nfs41_free_stateid(server, state);
4491 return nfs4_lock_expired(state, request); 4692 return nfs4_lock_expired(state, request);
4492} 4693}
4493#endif 4694#endif
@@ -4523,7 +4724,8 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
4523 /* Note: we always want to sleep here! */ 4724 /* Note: we always want to sleep here! */
4524 request->fl_flags = fl_flags | FL_SLEEP; 4725 request->fl_flags = fl_flags | FL_SLEEP;
4525 if (do_vfs_lock(request->fl_file, request) < 0) 4726 if (do_vfs_lock(request->fl_file, request) < 0)
4526 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__); 4727 printk(KERN_WARNING "NFS: %s: VFS is out of sync with lock "
4728 "manager!\n", __func__);
4527out_unlock: 4729out_unlock:
4528 up_read(&nfsi->rwsem); 4730 up_read(&nfsi->rwsem);
4529out: 4731out:
@@ -4533,7 +4735,10 @@ out:
4533 4735
4534static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) 4736static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
4535{ 4737{
4536 struct nfs4_exception exception = { }; 4738 struct nfs4_exception exception = {
4739 .state = state,
4740 .inode = state->inode,
4741 };
4537 int err; 4742 int err;
4538 4743
4539 do { 4744 do {
@@ -4578,6 +4783,20 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
4578 4783
4579 if (state == NULL) 4784 if (state == NULL)
4580 return -ENOLCK; 4785 return -ENOLCK;
4786 /*
4787 * Don't rely on the VFS having checked the file open mode,
4788 * since it won't do this for flock() locks.
4789 */
4790 switch (request->fl_type & (F_RDLCK|F_WRLCK|F_UNLCK)) {
4791 case F_RDLCK:
4792 if (!(filp->f_mode & FMODE_READ))
4793 return -EBADF;
4794 break;
4795 case F_WRLCK:
4796 if (!(filp->f_mode & FMODE_WRITE))
4797 return -EBADF;
4798 }
4799
4581 do { 4800 do {
4582 status = nfs4_proc_setlk(state, cmd, request); 4801 status = nfs4_proc_setlk(state, cmd, request);
4583 if ((status != -EAGAIN) || IS_SETLK(cmd)) 4802 if ((status != -EAGAIN) || IS_SETLK(cmd))
@@ -4603,8 +4822,8 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4603 err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); 4822 err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
4604 switch (err) { 4823 switch (err) {
4605 default: 4824 default:
4606 printk(KERN_ERR "%s: unhandled error %d.\n", 4825 printk(KERN_ERR "NFS: %s: unhandled error "
4607 __func__, err); 4826 "%d.\n", __func__, err);
4608 case 0: 4827 case 0:
4609 case -ESTALE: 4828 case -ESTALE:
4610 goto out; 4829 goto out;
@@ -4626,6 +4845,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4626 * The show must go on: exit, but mark the 4845 * The show must go on: exit, but mark the
4627 * stateid as needing recovery. 4846 * stateid as needing recovery.
4628 */ 4847 */
4848 case -NFS4ERR_DELEG_REVOKED:
4629 case -NFS4ERR_ADMIN_REVOKED: 4849 case -NFS4ERR_ADMIN_REVOKED:
4630 case -NFS4ERR_BAD_STATEID: 4850 case -NFS4ERR_BAD_STATEID:
4631 case -NFS4ERR_OPENMODE: 4851 case -NFS4ERR_OPENMODE:
@@ -4655,33 +4875,44 @@ out:
4655 return err; 4875 return err;
4656} 4876}
4657 4877
4878struct nfs_release_lockowner_data {
4879 struct nfs4_lock_state *lsp;
4880 struct nfs_server *server;
4881 struct nfs_release_lockowner_args args;
4882};
4883
4658static void nfs4_release_lockowner_release(void *calldata) 4884static void nfs4_release_lockowner_release(void *calldata)
4659{ 4885{
4886 struct nfs_release_lockowner_data *data = calldata;
4887 nfs4_free_lock_state(data->server, data->lsp);
4660 kfree(calldata); 4888 kfree(calldata);
4661} 4889}
4662 4890
4663const struct rpc_call_ops nfs4_release_lockowner_ops = { 4891static const struct rpc_call_ops nfs4_release_lockowner_ops = {
4664 .rpc_release = nfs4_release_lockowner_release, 4892 .rpc_release = nfs4_release_lockowner_release,
4665}; 4893};
4666 4894
4667void nfs4_release_lockowner(const struct nfs4_lock_state *lsp) 4895int nfs4_release_lockowner(struct nfs4_lock_state *lsp)
4668{ 4896{
4669 struct nfs_server *server = lsp->ls_state->owner->so_server; 4897 struct nfs_server *server = lsp->ls_state->owner->so_server;
4670 struct nfs_release_lockowner_args *args; 4898 struct nfs_release_lockowner_data *data;
4671 struct rpc_message msg = { 4899 struct rpc_message msg = {
4672 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER], 4900 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER],
4673 }; 4901 };
4674 4902
4675 if (server->nfs_client->cl_mvops->minor_version != 0) 4903 if (server->nfs_client->cl_mvops->minor_version != 0)
4676 return; 4904 return -EINVAL;
4677 args = kmalloc(sizeof(*args), GFP_NOFS); 4905 data = kmalloc(sizeof(*data), GFP_NOFS);
4678 if (!args) 4906 if (!data)
4679 return; 4907 return -ENOMEM;
4680 args->lock_owner.clientid = server->nfs_client->cl_clientid; 4908 data->lsp = lsp;
4681 args->lock_owner.id = lsp->ls_id.id; 4909 data->server = server;
4682 args->lock_owner.s_dev = server->s_dev; 4910 data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
4683 msg.rpc_argp = args; 4911 data->args.lock_owner.id = lsp->ls_seqid.owner_id;
4684 rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args); 4912 data->args.lock_owner.s_dev = server->s_dev;
4913 msg.rpc_argp = &data->args;
4914 rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data);
4915 return 0;
4685} 4916}
4686 4917
4687#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" 4918#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
@@ -4727,17 +4958,19 @@ static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr)
4727 if (!(((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) || 4958 if (!(((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) ||
4728 (fattr->valid & NFS_ATTR_FATTR_FILEID)) && 4959 (fattr->valid & NFS_ATTR_FATTR_FILEID)) &&
4729 (fattr->valid & NFS_ATTR_FATTR_FSID) && 4960 (fattr->valid & NFS_ATTR_FATTR_FSID) &&
4730 (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL))) 4961 (fattr->valid & NFS_ATTR_FATTR_V4_LOCATIONS)))
4731 return; 4962 return;
4732 4963
4733 fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE | 4964 fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE |
4734 NFS_ATTR_FATTR_NLINK; 4965 NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_V4_REFERRAL;
4735 fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO; 4966 fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO;
4736 fattr->nlink = 2; 4967 fattr->nlink = 2;
4737} 4968}
4738 4969
4739int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, 4970static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
4740 struct nfs4_fs_locations *fs_locations, struct page *page) 4971 const struct qstr *name,
4972 struct nfs4_fs_locations *fs_locations,
4973 struct page *page)
4741{ 4974{
4742 struct nfs_server *server = NFS_SERVER(dir); 4975 struct nfs_server *server = NFS_SERVER(dir);
4743 u32 bitmask[2] = { 4976 u32 bitmask[2] = {
@@ -4771,11 +5004,26 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
4771 nfs_fattr_init(&fs_locations->fattr); 5004 nfs_fattr_init(&fs_locations->fattr);
4772 fs_locations->server = server; 5005 fs_locations->server = server;
4773 fs_locations->nlocations = 0; 5006 fs_locations->nlocations = 0;
4774 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); 5007 status = nfs4_call_sync(client, server, &msg, &args.seq_args, &res.seq_res, 0);
4775 dprintk("%s: returned status = %d\n", __func__, status); 5008 dprintk("%s: returned status = %d\n", __func__, status);
4776 return status; 5009 return status;
4777} 5010}
4778 5011
5012int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
5013 const struct qstr *name,
5014 struct nfs4_fs_locations *fs_locations,
5015 struct page *page)
5016{
5017 struct nfs4_exception exception = { };
5018 int err;
5019 do {
5020 err = nfs4_handle_exception(NFS_SERVER(dir),
5021 _nfs4_proc_fs_locations(client, dir, name, fs_locations, page),
5022 &exception);
5023 } while (exception.retry);
5024 return err;
5025}
5026
4779static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors) 5027static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors)
4780{ 5028{
4781 int status; 5029 int status;
@@ -4798,7 +5046,8 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct
4798 return status; 5046 return status;
4799} 5047}
4800 5048
4801int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors) 5049int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name,
5050 struct nfs4_secinfo_flavors *flavors)
4802{ 5051{
4803 struct nfs4_exception exception = { }; 5052 struct nfs4_exception exception = { };
4804 int err; 5053 int err;
@@ -4852,6 +5101,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4852{ 5101{
4853 nfs4_verifier verifier; 5102 nfs4_verifier verifier;
4854 struct nfs41_exchange_id_args args = { 5103 struct nfs41_exchange_id_args args = {
5104 .verifier = &verifier,
4855 .client = clp, 5105 .client = clp,
4856 .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER, 5106 .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER,
4857 }; 5107 };
@@ -4865,21 +5115,16 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4865 .rpc_resp = &res, 5115 .rpc_resp = &res,
4866 .rpc_cred = cred, 5116 .rpc_cred = cred,
4867 }; 5117 };
4868 __be32 *p;
4869 5118
4870 dprintk("--> %s\n", __func__); 5119 dprintk("--> %s\n", __func__);
4871 BUG_ON(clp == NULL); 5120 BUG_ON(clp == NULL);
4872 5121
4873 p = (u32 *)verifier.data; 5122 nfs4_construct_boot_verifier(clp, &verifier);
4874 *p++ = htonl((u32)clp->cl_boot_time.tv_sec);
4875 *p = htonl((u32)clp->cl_boot_time.tv_nsec);
4876 args.verifier = &verifier;
4877 5123
4878 args.id_len = scnprintf(args.id, sizeof(args.id), 5124 args.id_len = scnprintf(args.id, sizeof(args.id),
4879 "%s/%s.%s/%u", 5125 "%s/%s/%u",
4880 clp->cl_ipaddr, 5126 clp->cl_ipaddr,
4881 init_utsname()->nodename, 5127 clp->cl_rpcclient->cl_nodename,
4882 init_utsname()->domainname,
4883 clp->cl_rpcclient->cl_auth->au_flavor); 5128 clp->cl_rpcclient->cl_auth->au_flavor);
4884 5129
4885 res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL); 5130 res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL);
@@ -4888,11 +5133,24 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4888 goto out; 5133 goto out;
4889 } 5134 }
4890 5135
5136 res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_KERNEL);
5137 if (unlikely(!res.impl_id)) {
5138 status = -ENOMEM;
5139 goto out_server_scope;
5140 }
5141
4891 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 5142 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
4892 if (!status) 5143 if (!status)
4893 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); 5144 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
4894 5145
4895 if (!status) { 5146 if (!status) {
5147 /* use the most recent implementation id */
5148 kfree(clp->impl_id);
5149 clp->impl_id = res.impl_id;
5150 } else
5151 kfree(res.impl_id);
5152
5153 if (!status) {
4896 if (clp->server_scope && 5154 if (clp->server_scope &&
4897 !nfs41_same_server_scope(clp->server_scope, 5155 !nfs41_same_server_scope(clp->server_scope,
4898 res.server_scope)) { 5156 res.server_scope)) {
@@ -4908,8 +5166,16 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4908 goto out; 5166 goto out;
4909 } 5167 }
4910 } 5168 }
5169
5170out_server_scope:
4911 kfree(res.server_scope); 5171 kfree(res.server_scope);
4912out: 5172out:
5173 if (clp->impl_id)
5174 dprintk("%s: Server Implementation ID: "
5175 "domain: %s, name: %s, date: %llu,%u\n",
5176 __func__, clp->impl_id->domain, clp->impl_id->name,
5177 clp->impl_id->date.seconds,
5178 clp->impl_id->date.nseconds);
4913 dprintk("<-- %s status= %d\n", __func__, status); 5179 dprintk("<-- %s status= %d\n", __func__, status);
4914 return status; 5180 return status;
4915} 5181}
@@ -4933,7 +5199,7 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task,
4933 since we're invoked within one */ 5199 since we're invoked within one */
4934 ret = nfs41_setup_sequence(data->clp->cl_session, 5200 ret = nfs41_setup_sequence(data->clp->cl_session,
4935 &data->args->la_seq_args, 5201 &data->args->la_seq_args,
4936 &data->res->lr_seq_res, 0, task); 5202 &data->res->lr_seq_res, task);
4937 5203
4938 BUG_ON(ret == -EAGAIN); 5204 BUG_ON(ret == -EAGAIN);
4939 rpc_call_start(task); 5205 rpc_call_start(task);
@@ -4966,7 +5232,7 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
4966 dprintk("<-- %s\n", __func__); 5232 dprintk("<-- %s\n", __func__);
4967} 5233}
4968 5234
4969struct rpc_call_ops nfs4_get_lease_time_ops = { 5235static const struct rpc_call_ops nfs4_get_lease_time_ops = {
4970 .rpc_call_prepare = nfs4_get_lease_time_prepare, 5236 .rpc_call_prepare = nfs4_get_lease_time_prepare,
4971 .rpc_call_done = nfs4_get_lease_time_done, 5237 .rpc_call_done = nfs4_get_lease_time_done,
4972}; 5238};
@@ -4997,6 +5263,7 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
4997 }; 5263 };
4998 int status; 5264 int status;
4999 5265
5266 nfs41_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0);
5000 dprintk("--> %s\n", __func__); 5267 dprintk("--> %s\n", __func__);
5001 task = rpc_run_task(&task_setup); 5268 task = rpc_run_task(&task_setup);
5002 5269
@@ -5113,13 +5380,13 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
5113 return NULL; 5380 return NULL;
5114 5381
5115 tbl = &session->fc_slot_table; 5382 tbl = &session->fc_slot_table;
5116 tbl->highest_used_slotid = -1; 5383 tbl->highest_used_slotid = NFS4_NO_SLOT;
5117 spin_lock_init(&tbl->slot_tbl_lock); 5384 spin_lock_init(&tbl->slot_tbl_lock);
5118 rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); 5385 rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
5119 init_completion(&tbl->complete); 5386 init_completion(&tbl->complete);
5120 5387
5121 tbl = &session->bc_slot_table; 5388 tbl = &session->bc_slot_table;
5122 tbl->highest_used_slotid = -1; 5389 tbl->highest_used_slotid = NFS4_NO_SLOT;
5123 spin_lock_init(&tbl->slot_tbl_lock); 5390 spin_lock_init(&tbl->slot_tbl_lock);
5124 rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); 5391 rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
5125 init_completion(&tbl->complete); 5392 init_completion(&tbl->complete);
@@ -5132,11 +5399,16 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
5132 5399
5133void nfs4_destroy_session(struct nfs4_session *session) 5400void nfs4_destroy_session(struct nfs4_session *session)
5134{ 5401{
5402 struct rpc_xprt *xprt;
5403
5135 nfs4_proc_destroy_session(session); 5404 nfs4_proc_destroy_session(session);
5405
5406 rcu_read_lock();
5407 xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt);
5408 rcu_read_unlock();
5136 dprintk("%s Destroy backchannel for xprt %p\n", 5409 dprintk("%s Destroy backchannel for xprt %p\n",
5137 __func__, session->clp->cl_rpcclient->cl_xprt); 5410 __func__, xprt);
5138 xprt_destroy_backchannel(session->clp->cl_rpcclient->cl_xprt, 5411 xprt_destroy_backchannel(xprt, NFS41_BC_MIN_CALLBACKS);
5139 NFS41_BC_MIN_CALLBACKS);
5140 nfs4_destroy_slot_tables(session); 5412 nfs4_destroy_slot_tables(session);
5141 kfree(session); 5413 kfree(session);
5142} 5414}
@@ -5164,7 +5436,7 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
5164 args->fc_attrs.max_rqst_sz = mxrqst_sz; 5436 args->fc_attrs.max_rqst_sz = mxrqst_sz;
5165 args->fc_attrs.max_resp_sz = mxresp_sz; 5437 args->fc_attrs.max_resp_sz = mxresp_sz;
5166 args->fc_attrs.max_ops = NFS4_MAX_OPS; 5438 args->fc_attrs.max_ops = NFS4_MAX_OPS;
5167 args->fc_attrs.max_reqs = session->clp->cl_rpcclient->cl_xprt->max_reqs; 5439 args->fc_attrs.max_reqs = max_session_slots;
5168 5440
5169 dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u " 5441 dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u "
5170 "max_ops=%u max_reqs=%u\n", 5442 "max_ops=%u max_reqs=%u\n",
@@ -5204,6 +5476,8 @@ static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args
5204 return -EINVAL; 5476 return -EINVAL;
5205 if (rcvd->max_reqs == 0) 5477 if (rcvd->max_reqs == 0)
5206 return -EINVAL; 5478 return -EINVAL;
5479 if (rcvd->max_reqs > NFS4_MAX_SLOT_TABLE)
5480 rcvd->max_reqs = NFS4_MAX_SLOT_TABLE;
5207 return 0; 5481 return 0;
5208} 5482}
5209 5483
@@ -5219,9 +5493,9 @@ static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args
5219 if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached) 5493 if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached)
5220 return -EINVAL; 5494 return -EINVAL;
5221 /* These would render the backchannel useless: */ 5495 /* These would render the backchannel useless: */
5222 if (rcvd->max_ops == 0) 5496 if (rcvd->max_ops != sent->max_ops)
5223 return -EINVAL; 5497 return -EINVAL;
5224 if (rcvd->max_reqs == 0) 5498 if (rcvd->max_reqs != sent->max_reqs)
5225 return -EINVAL; 5499 return -EINVAL;
5226 return 0; 5500 return 0;
5227} 5501}
@@ -5324,7 +5598,7 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)
5324 5598
5325 if (status) 5599 if (status)
5326 printk(KERN_WARNING 5600 printk(KERN_WARNING
5327 "Got error %d from the server on DESTROY_SESSION. " 5601 "NFS: Got error %d from the server on DESTROY_SESSION. "
5328 "Session has been destroyed regardless...\n", status); 5602 "Session has been destroyed regardless...\n", status);
5329 5603
5330 dprintk("<-- nfs4_proc_destroy_session\n"); 5604 dprintk("<-- nfs4_proc_destroy_session\n");
@@ -5447,7 +5721,7 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
5447 args = task->tk_msg.rpc_argp; 5721 args = task->tk_msg.rpc_argp;
5448 res = task->tk_msg.rpc_resp; 5722 res = task->tk_msg.rpc_resp;
5449 5723
5450 if (nfs41_setup_sequence(clp->cl_session, args, res, 0, task)) 5724 if (nfs41_setup_sequence(clp->cl_session, args, res, task))
5451 return; 5725 return;
5452 rpc_call_start(task); 5726 rpc_call_start(task);
5453} 5727}
@@ -5479,6 +5753,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
5479 nfs_put_client(clp); 5753 nfs_put_client(clp);
5480 return ERR_PTR(-ENOMEM); 5754 return ERR_PTR(-ENOMEM);
5481 } 5755 }
5756 nfs41_init_sequence(&calldata->args, &calldata->res, 0);
5482 msg.rpc_argp = &calldata->args; 5757 msg.rpc_argp = &calldata->args;
5483 msg.rpc_resp = &calldata->res; 5758 msg.rpc_resp = &calldata->res;
5484 calldata->clp = clp; 5759 calldata->clp = clp;
@@ -5540,7 +5815,7 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data)
5540 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); 5815 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
5541 if (nfs41_setup_sequence(calldata->clp->cl_session, 5816 if (nfs41_setup_sequence(calldata->clp->cl_session,
5542 &calldata->arg.seq_args, 5817 &calldata->arg.seq_args,
5543 &calldata->res.seq_res, 0, task)) 5818 &calldata->res.seq_res, task))
5544 return; 5819 return;
5545 5820
5546 rpc_call_start(task); 5821 rpc_call_start(task);
@@ -5619,6 +5894,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
5619 calldata->clp = clp; 5894 calldata->clp = clp;
5620 calldata->arg.one_fs = 0; 5895 calldata->arg.one_fs = 0;
5621 5896
5897 nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0);
5622 msg.rpc_argp = &calldata->arg; 5898 msg.rpc_argp = &calldata->arg;
5623 msg.rpc_resp = &calldata->res; 5899 msg.rpc_resp = &calldata->res;
5624 task_setup_data.callback_data = calldata; 5900 task_setup_data.callback_data = calldata;
@@ -5650,7 +5926,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
5650 * to be no way to prevent it completely. 5926 * to be no way to prevent it completely.
5651 */ 5927 */
5652 if (nfs4_setup_sequence(server, &lgp->args.seq_args, 5928 if (nfs4_setup_sequence(server, &lgp->args.seq_args,
5653 &lgp->res.seq_res, 0, task)) 5929 &lgp->res.seq_res, task))
5654 return; 5930 return;
5655 if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, 5931 if (pnfs_choose_layoutget_stateid(&lgp->args.stateid,
5656 NFS_I(lgp->args.inode)->layout, 5932 NFS_I(lgp->args.inode)->layout,
@@ -5725,6 +6001,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
5725 6001
5726 lgp->res.layoutp = &lgp->args.layout; 6002 lgp->res.layoutp = &lgp->args.layout;
5727 lgp->res.seq_res.sr_slot = NULL; 6003 lgp->res.seq_res.sr_slot = NULL;
6004 nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
5728 task = rpc_run_task(&task_setup_data); 6005 task = rpc_run_task(&task_setup_data);
5729 if (IS_ERR(task)) 6006 if (IS_ERR(task))
5730 return PTR_ERR(task); 6007 return PTR_ERR(task);
@@ -5745,7 +6022,7 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
5745 6022
5746 dprintk("--> %s\n", __func__); 6023 dprintk("--> %s\n", __func__);
5747 if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args, 6024 if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args,
5748 &lrp->res.seq_res, 0, task)) 6025 &lrp->res.seq_res, task))
5749 return; 6026 return;
5750 rpc_call_start(task); 6027 rpc_call_start(task);
5751} 6028}
@@ -5811,6 +6088,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp)
5811 int status; 6088 int status;
5812 6089
5813 dprintk("--> %s\n", __func__); 6090 dprintk("--> %s\n", __func__);
6091 nfs41_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1);
5814 task = rpc_run_task(&task_setup_data); 6092 task = rpc_run_task(&task_setup_data);
5815 if (IS_ERR(task)) 6093 if (IS_ERR(task))
5816 return PTR_ERR(task); 6094 return PTR_ERR(task);
@@ -5911,7 +6189,7 @@ static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata)
5911 struct nfs_server *server = NFS_SERVER(data->args.inode); 6189 struct nfs_server *server = NFS_SERVER(data->args.inode);
5912 6190
5913 if (nfs4_setup_sequence(server, &data->args.seq_args, 6191 if (nfs4_setup_sequence(server, &data->args.seq_args,
5914 &data->res.seq_res, 1, task)) 6192 &data->res.seq_res, task))
5915 return; 6193 return;
5916 rpc_call_start(task); 6194 rpc_call_start(task);
5917} 6195}
@@ -5926,21 +6204,22 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
5926 return; 6204 return;
5927 6205
5928 switch (task->tk_status) { /* Just ignore these failures */ 6206 switch (task->tk_status) { /* Just ignore these failures */
5929 case NFS4ERR_DELEG_REVOKED: /* layout was recalled */ 6207 case -NFS4ERR_DELEG_REVOKED: /* layout was recalled */
5930 case NFS4ERR_BADIOMODE: /* no IOMODE_RW layout for range */ 6208 case -NFS4ERR_BADIOMODE: /* no IOMODE_RW layout for range */
5931 case NFS4ERR_BADLAYOUT: /* no layout */ 6209 case -NFS4ERR_BADLAYOUT: /* no layout */
5932 case NFS4ERR_GRACE: /* loca_recalim always false */ 6210 case -NFS4ERR_GRACE: /* loca_recalim always false */
5933 task->tk_status = 0; 6211 task->tk_status = 0;
5934 } 6212 break;
5935 6213 case 0:
5936 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
5937 rpc_restart_call_prepare(task);
5938 return;
5939 }
5940
5941 if (task->tk_status == 0)
5942 nfs_post_op_update_inode_force_wcc(data->args.inode, 6214 nfs_post_op_update_inode_force_wcc(data->args.inode,
5943 data->res.fattr); 6215 data->res.fattr);
6216 break;
6217 default:
6218 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
6219 rpc_restart_call_prepare(task);
6220 return;
6221 }
6222 }
5944} 6223}
5945 6224
5946static void nfs4_layoutcommit_release(void *calldata) 6225static void nfs4_layoutcommit_release(void *calldata)
@@ -5998,6 +6277,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
5998 data->args.lastbytewritten, 6277 data->args.lastbytewritten,
5999 data->args.inode->i_ino); 6278 data->args.inode->i_ino);
6000 6279
6280 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
6001 task = rpc_run_task(&task_setup_data); 6281 task = rpc_run_task(&task_setup_data);
6002 if (IS_ERR(task)) 6282 if (IS_ERR(task))
6003 return PTR_ERR(task); 6283 return PTR_ERR(task);
@@ -6043,11 +6323,12 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
6043 case 0: 6323 case 0:
6044 case -NFS4ERR_WRONGSEC: 6324 case -NFS4ERR_WRONGSEC:
6045 case -NFS4ERR_NOTSUPP: 6325 case -NFS4ERR_NOTSUPP:
6046 break; 6326 goto out;
6047 default: 6327 default:
6048 err = nfs4_handle_exception(server, err, &exception); 6328 err = nfs4_handle_exception(server, err, &exception);
6049 } 6329 }
6050 } while (exception.retry); 6330 } while (exception.retry);
6331out:
6051 return err; 6332 return err;
6052} 6333}
6053 6334
@@ -6091,11 +6372,12 @@ out_freepage:
6091out: 6372out:
6092 return err; 6373 return err;
6093} 6374}
6094static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) 6375
6376static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid)
6095{ 6377{
6096 int status; 6378 int status;
6097 struct nfs41_test_stateid_args args = { 6379 struct nfs41_test_stateid_args args = {
6098 .stateid = &state->stateid, 6380 .stateid = stateid,
6099 }; 6381 };
6100 struct nfs41_test_stateid_res res; 6382 struct nfs41_test_stateid_res res;
6101 struct rpc_message msg = { 6383 struct rpc_message msg = {
@@ -6103,28 +6385,31 @@ static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *sta
6103 .rpc_argp = &args, 6385 .rpc_argp = &args,
6104 .rpc_resp = &res, 6386 .rpc_resp = &res,
6105 }; 6387 };
6106 args.seq_args.sa_session = res.seq_res.sr_session = NULL; 6388
6107 status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); 6389 nfs41_init_sequence(&args.seq_args, &res.seq_res, 0);
6390 status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
6391
6392 if (status == NFS_OK)
6393 return res.status;
6108 return status; 6394 return status;
6109} 6395}
6110 6396
6111static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) 6397static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid)
6112{ 6398{
6113 struct nfs4_exception exception = { }; 6399 struct nfs4_exception exception = { };
6114 int err; 6400 int err;
6115 do { 6401 do {
6116 err = nfs4_handle_exception(server, 6402 err = nfs4_handle_exception(server,
6117 _nfs41_test_stateid(server, state), 6403 _nfs41_test_stateid(server, stateid),
6118 &exception); 6404 &exception);
6119 } while (exception.retry); 6405 } while (exception.retry);
6120 return err; 6406 return err;
6121} 6407}
6122 6408
6123static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state) 6409static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid)
6124{ 6410{
6125 int status;
6126 struct nfs41_free_stateid_args args = { 6411 struct nfs41_free_stateid_args args = {
6127 .stateid = &state->stateid, 6412 .stateid = stateid,
6128 }; 6413 };
6129 struct nfs41_free_stateid_res res; 6414 struct nfs41_free_stateid_res res;
6130 struct rpc_message msg = { 6415 struct rpc_message msg = {
@@ -6133,25 +6418,46 @@ static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *stat
6133 .rpc_resp = &res, 6418 .rpc_resp = &res,
6134 }; 6419 };
6135 6420
6136 args.seq_args.sa_session = res.seq_res.sr_session = NULL; 6421 nfs41_init_sequence(&args.seq_args, &res.seq_res, 0);
6137 status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); 6422 return nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
6138 return status;
6139} 6423}
6140 6424
6141static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state) 6425static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid)
6142{ 6426{
6143 struct nfs4_exception exception = { }; 6427 struct nfs4_exception exception = { };
6144 int err; 6428 int err;
6145 do { 6429 do {
6146 err = nfs4_handle_exception(server, 6430 err = nfs4_handle_exception(server,
6147 _nfs4_free_stateid(server, state), 6431 _nfs4_free_stateid(server, stateid),
6148 &exception); 6432 &exception);
6149 } while (exception.retry); 6433 } while (exception.retry);
6150 return err; 6434 return err;
6151} 6435}
6436
6437static bool nfs41_match_stateid(const nfs4_stateid *s1,
6438 const nfs4_stateid *s2)
6439{
6440 if (memcmp(s1->other, s2->other, sizeof(s1->other)) != 0)
6441 return false;
6442
6443 if (s1->seqid == s2->seqid)
6444 return true;
6445 if (s1->seqid == 0 || s2->seqid == 0)
6446 return true;
6447
6448 return false;
6449}
6450
6152#endif /* CONFIG_NFS_V4_1 */ 6451#endif /* CONFIG_NFS_V4_1 */
6153 6452
6154struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { 6453static bool nfs4_match_stateid(const nfs4_stateid *s1,
6454 const nfs4_stateid *s2)
6455{
6456 return nfs4_stateid_match(s1, s2);
6457}
6458
6459
6460static const struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
6155 .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, 6461 .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT,
6156 .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, 6462 .state_flag_bit = NFS_STATE_RECLAIM_REBOOT,
6157 .recover_open = nfs4_open_reclaim, 6463 .recover_open = nfs4_open_reclaim,
@@ -6161,7 +6467,7 @@ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
6161}; 6467};
6162 6468
6163#if defined(CONFIG_NFS_V4_1) 6469#if defined(CONFIG_NFS_V4_1)
6164struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { 6470static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = {
6165 .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, 6471 .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT,
6166 .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, 6472 .state_flag_bit = NFS_STATE_RECLAIM_REBOOT,
6167 .recover_open = nfs4_open_reclaim, 6473 .recover_open = nfs4_open_reclaim,
@@ -6172,7 +6478,7 @@ struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = {
6172}; 6478};
6173#endif /* CONFIG_NFS_V4_1 */ 6479#endif /* CONFIG_NFS_V4_1 */
6174 6480
6175struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { 6481static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
6176 .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, 6482 .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
6177 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, 6483 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
6178 .recover_open = nfs4_open_expired, 6484 .recover_open = nfs4_open_expired,
@@ -6182,7 +6488,7 @@ struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
6182}; 6488};
6183 6489
6184#if defined(CONFIG_NFS_V4_1) 6490#if defined(CONFIG_NFS_V4_1)
6185struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { 6491static const struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
6186 .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, 6492 .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
6187 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, 6493 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
6188 .recover_open = nfs41_open_expired, 6494 .recover_open = nfs41_open_expired,
@@ -6192,14 +6498,14 @@ struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
6192}; 6498};
6193#endif /* CONFIG_NFS_V4_1 */ 6499#endif /* CONFIG_NFS_V4_1 */
6194 6500
6195struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = { 6501static const struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = {
6196 .sched_state_renewal = nfs4_proc_async_renew, 6502 .sched_state_renewal = nfs4_proc_async_renew,
6197 .get_state_renewal_cred_locked = nfs4_get_renew_cred_locked, 6503 .get_state_renewal_cred_locked = nfs4_get_renew_cred_locked,
6198 .renew_lease = nfs4_proc_renew, 6504 .renew_lease = nfs4_proc_renew,
6199}; 6505};
6200 6506
6201#if defined(CONFIG_NFS_V4_1) 6507#if defined(CONFIG_NFS_V4_1)
6202struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { 6508static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
6203 .sched_state_renewal = nfs41_proc_async_sequence, 6509 .sched_state_renewal = nfs41_proc_async_sequence,
6204 .get_state_renewal_cred_locked = nfs4_get_machine_cred_locked, 6510 .get_state_renewal_cred_locked = nfs4_get_machine_cred_locked,
6205 .renew_lease = nfs4_proc_sequence, 6511 .renew_lease = nfs4_proc_sequence,
@@ -6209,7 +6515,7 @@ struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
6209static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { 6515static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
6210 .minor_version = 0, 6516 .minor_version = 0,
6211 .call_sync = _nfs4_call_sync, 6517 .call_sync = _nfs4_call_sync,
6212 .validate_stateid = nfs4_validate_delegation_stateid, 6518 .match_stateid = nfs4_match_stateid,
6213 .find_root_sec = nfs4_find_root_sec, 6519 .find_root_sec = nfs4_find_root_sec,
6214 .reboot_recovery_ops = &nfs40_reboot_recovery_ops, 6520 .reboot_recovery_ops = &nfs40_reboot_recovery_ops,
6215 .nograce_recovery_ops = &nfs40_nograce_recovery_ops, 6521 .nograce_recovery_ops = &nfs40_nograce_recovery_ops,
@@ -6220,7 +6526,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
6220static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { 6526static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
6221 .minor_version = 1, 6527 .minor_version = 1,
6222 .call_sync = _nfs4_call_sync_session, 6528 .call_sync = _nfs4_call_sync_session,
6223 .validate_stateid = nfs41_validate_delegation_stateid, 6529 .match_stateid = nfs41_match_stateid,
6224 .find_root_sec = nfs41_find_root_sec, 6530 .find_root_sec = nfs41_find_root_sec,
6225 .reboot_recovery_ops = &nfs41_reboot_recovery_ops, 6531 .reboot_recovery_ops = &nfs41_reboot_recovery_ops,
6226 .nograce_recovery_ops = &nfs41_nograce_recovery_ops, 6532 .nograce_recovery_ops = &nfs41_nograce_recovery_ops,
@@ -6260,9 +6566,11 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
6260 .create = nfs4_proc_create, 6566 .create = nfs4_proc_create,
6261 .remove = nfs4_proc_remove, 6567 .remove = nfs4_proc_remove,
6262 .unlink_setup = nfs4_proc_unlink_setup, 6568 .unlink_setup = nfs4_proc_unlink_setup,
6569 .unlink_rpc_prepare = nfs4_proc_unlink_rpc_prepare,
6263 .unlink_done = nfs4_proc_unlink_done, 6570 .unlink_done = nfs4_proc_unlink_done,
6264 .rename = nfs4_proc_rename, 6571 .rename = nfs4_proc_rename,
6265 .rename_setup = nfs4_proc_rename_setup, 6572 .rename_setup = nfs4_proc_rename_setup,
6573 .rename_rpc_prepare = nfs4_proc_rename_rpc_prepare,
6266 .rename_done = nfs4_proc_rename_done, 6574 .rename_done = nfs4_proc_rename_done,
6267 .link = nfs4_proc_link, 6575 .link = nfs4_proc_link,
6268 .symlink = nfs4_proc_symlink, 6576 .symlink = nfs4_proc_symlink,
@@ -6276,8 +6584,10 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
6276 .set_capabilities = nfs4_server_capabilities, 6584 .set_capabilities = nfs4_server_capabilities,
6277 .decode_dirent = nfs4_decode_dirent, 6585 .decode_dirent = nfs4_decode_dirent,
6278 .read_setup = nfs4_proc_read_setup, 6586 .read_setup = nfs4_proc_read_setup,
6587 .read_rpc_prepare = nfs4_proc_read_rpc_prepare,
6279 .read_done = nfs4_read_done, 6588 .read_done = nfs4_read_done,
6280 .write_setup = nfs4_proc_write_setup, 6589 .write_setup = nfs4_proc_write_setup,
6590 .write_rpc_prepare = nfs4_proc_write_rpc_prepare,
6281 .write_done = nfs4_write_done, 6591 .write_done = nfs4_write_done,
6282 .commit_setup = nfs4_proc_commit_setup, 6592 .commit_setup = nfs4_proc_commit_setup,
6283 .commit_done = nfs4_commit_done, 6593 .commit_done = nfs4_commit_done,
@@ -6301,6 +6611,10 @@ const struct xattr_handler *nfs4_xattr_handlers[] = {
6301 NULL 6611 NULL
6302}; 6612};
6303 6613
6614module_param(max_session_slots, ushort, 0644);
6615MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 "
6616 "requests the client will negotiate");
6617
6304/* 6618/*
6305 * Local variables: 6619 * Local variables:
6306 * c-basic-offset: 8 6620 * c-basic-offset: 8
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 45392032e7b..7f0fcfc1fe9 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -146,6 +146,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
146 struct rpc_cred *cred = NULL; 146 struct rpc_cred *cred = NULL;
147 struct nfs_server *server; 147 struct nfs_server *server;
148 148
149 /* Use machine credentials if available */
150 cred = nfs4_get_machine_cred_locked(clp);
151 if (cred != NULL)
152 goto out;
153
149 rcu_read_lock(); 154 rcu_read_lock();
150 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 155 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
151 cred = nfs4_get_renew_cred_server_locked(server); 156 cred = nfs4_get_renew_cred_server_locked(server);
@@ -153,6 +158,8 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
153 break; 158 break;
154 } 159 }
155 rcu_read_unlock(); 160 rcu_read_unlock();
161
162out:
156 return cred; 163 return cred;
157} 164}
158 165
@@ -190,30 +197,29 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
190static void nfs4_end_drain_session(struct nfs_client *clp) 197static void nfs4_end_drain_session(struct nfs_client *clp)
191{ 198{
192 struct nfs4_session *ses = clp->cl_session; 199 struct nfs4_session *ses = clp->cl_session;
200 struct nfs4_slot_table *tbl;
193 int max_slots; 201 int max_slots;
194 202
195 if (ses == NULL) 203 if (ses == NULL)
196 return; 204 return;
205 tbl = &ses->fc_slot_table;
197 if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { 206 if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
198 spin_lock(&ses->fc_slot_table.slot_tbl_lock); 207 spin_lock(&tbl->slot_tbl_lock);
199 max_slots = ses->fc_slot_table.max_slots; 208 max_slots = tbl->max_slots;
200 while (max_slots--) { 209 while (max_slots--) {
201 struct rpc_task *task; 210 if (rpc_wake_up_first(&tbl->slot_tbl_waitq,
202 211 nfs4_set_task_privileged,
203 task = rpc_wake_up_next(&ses->fc_slot_table. 212 NULL) == NULL)
204 slot_tbl_waitq);
205 if (!task)
206 break; 213 break;
207 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
208 } 214 }
209 spin_unlock(&ses->fc_slot_table.slot_tbl_lock); 215 spin_unlock(&tbl->slot_tbl_lock);
210 } 216 }
211} 217}
212 218
213static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl) 219static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl)
214{ 220{
215 spin_lock(&tbl->slot_tbl_lock); 221 spin_lock(&tbl->slot_tbl_lock);
216 if (tbl->highest_used_slotid != -1) { 222 if (tbl->highest_used_slotid != NFS4_NO_SLOT) {
217 INIT_COMPLETION(tbl->complete); 223 INIT_COMPLETION(tbl->complete);
218 spin_unlock(&tbl->slot_tbl_lock); 224 spin_unlock(&tbl->slot_tbl_lock);
219 return wait_for_completion_interruptible(&tbl->complete); 225 return wait_for_completion_interruptible(&tbl->complete);
@@ -317,62 +323,6 @@ out:
317 return cred; 323 return cred;
318} 324}
319 325
320static void nfs_alloc_unique_id_locked(struct rb_root *root,
321 struct nfs_unique_id *new,
322 __u64 minval, int maxbits)
323{
324 struct rb_node **p, *parent;
325 struct nfs_unique_id *pos;
326 __u64 mask = ~0ULL;
327
328 if (maxbits < 64)
329 mask = (1ULL << maxbits) - 1ULL;
330
331 /* Ensure distribution is more or less flat */
332 get_random_bytes(&new->id, sizeof(new->id));
333 new->id &= mask;
334 if (new->id < minval)
335 new->id += minval;
336retry:
337 p = &root->rb_node;
338 parent = NULL;
339
340 while (*p != NULL) {
341 parent = *p;
342 pos = rb_entry(parent, struct nfs_unique_id, rb_node);
343
344 if (new->id < pos->id)
345 p = &(*p)->rb_left;
346 else if (new->id > pos->id)
347 p = &(*p)->rb_right;
348 else
349 goto id_exists;
350 }
351 rb_link_node(&new->rb_node, parent, p);
352 rb_insert_color(&new->rb_node, root);
353 return;
354id_exists:
355 for (;;) {
356 new->id++;
357 if (new->id < minval || (new->id & mask) != new->id) {
358 new->id = minval;
359 break;
360 }
361 parent = rb_next(parent);
362 if (parent == NULL)
363 break;
364 pos = rb_entry(parent, struct nfs_unique_id, rb_node);
365 if (new->id < pos->id)
366 break;
367 }
368 goto retry;
369}
370
371static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id)
372{
373 rb_erase(&id->rb_node, root);
374}
375
376static struct nfs4_state_owner * 326static struct nfs4_state_owner *
377nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred) 327nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred)
378{ 328{
@@ -405,6 +355,7 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
405 struct rb_node **p = &server->state_owners.rb_node, 355 struct rb_node **p = &server->state_owners.rb_node,
406 *parent = NULL; 356 *parent = NULL;
407 struct nfs4_state_owner *sp; 357 struct nfs4_state_owner *sp;
358 int err;
408 359
409 while (*p != NULL) { 360 while (*p != NULL) {
410 parent = *p; 361 parent = *p;
@@ -421,8 +372,9 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
421 return sp; 372 return sp;
422 } 373 }
423 } 374 }
424 nfs_alloc_unique_id_locked(&server->openowner_id, 375 err = ida_get_new(&server->openowner_id, &new->so_seqid.owner_id);
425 &new->so_owner_id, 1, 64); 376 if (err)
377 return ERR_PTR(err);
426 rb_link_node(&new->so_server_node, parent, p); 378 rb_link_node(&new->so_server_node, parent, p);
427 rb_insert_color(&new->so_server_node, &server->state_owners); 379 rb_insert_color(&new->so_server_node, &server->state_owners);
428 return new; 380 return new;
@@ -435,7 +387,24 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp)
435 387
436 if (!RB_EMPTY_NODE(&sp->so_server_node)) 388 if (!RB_EMPTY_NODE(&sp->so_server_node))
437 rb_erase(&sp->so_server_node, &server->state_owners); 389 rb_erase(&sp->so_server_node, &server->state_owners);
438 nfs_free_unique_id(&server->openowner_id, &sp->so_owner_id); 390 ida_remove(&server->openowner_id, sp->so_seqid.owner_id);
391}
392
393static void
394nfs4_init_seqid_counter(struct nfs_seqid_counter *sc)
395{
396 sc->create_time = ktime_get();
397 sc->flags = 0;
398 sc->counter = 0;
399 spin_lock_init(&sc->lock);
400 INIT_LIST_HEAD(&sc->list);
401 rpc_init_wait_queue(&sc->wait, "Seqid_waitqueue");
402}
403
404static void
405nfs4_destroy_seqid_counter(struct nfs_seqid_counter *sc)
406{
407 rpc_destroy_wait_queue(&sc->wait);
439} 408}
440 409
441/* 410/*
@@ -444,19 +413,20 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp)
444 * 413 *
445 */ 414 */
446static struct nfs4_state_owner * 415static struct nfs4_state_owner *
447nfs4_alloc_state_owner(void) 416nfs4_alloc_state_owner(struct nfs_server *server,
417 struct rpc_cred *cred,
418 gfp_t gfp_flags)
448{ 419{
449 struct nfs4_state_owner *sp; 420 struct nfs4_state_owner *sp;
450 421
451 sp = kzalloc(sizeof(*sp),GFP_NOFS); 422 sp = kzalloc(sizeof(*sp), gfp_flags);
452 if (!sp) 423 if (!sp)
453 return NULL; 424 return NULL;
425 sp->so_server = server;
426 sp->so_cred = get_rpccred(cred);
454 spin_lock_init(&sp->so_lock); 427 spin_lock_init(&sp->so_lock);
455 INIT_LIST_HEAD(&sp->so_states); 428 INIT_LIST_HEAD(&sp->so_states);
456 rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue"); 429 nfs4_init_seqid_counter(&sp->so_seqid);
457 sp->so_seqid.sequence = &sp->so_sequence;
458 spin_lock_init(&sp->so_sequence.lock);
459 INIT_LIST_HEAD(&sp->so_sequence.list);
460 atomic_set(&sp->so_count, 1); 430 atomic_set(&sp->so_count, 1);
461 INIT_LIST_HEAD(&sp->so_lru); 431 INIT_LIST_HEAD(&sp->so_lru);
462 return sp; 432 return sp;
@@ -465,20 +435,24 @@ nfs4_alloc_state_owner(void)
465static void 435static void
466nfs4_drop_state_owner(struct nfs4_state_owner *sp) 436nfs4_drop_state_owner(struct nfs4_state_owner *sp)
467{ 437{
468 if (!RB_EMPTY_NODE(&sp->so_server_node)) { 438 struct rb_node *rb_node = &sp->so_server_node;
439
440 if (!RB_EMPTY_NODE(rb_node)) {
469 struct nfs_server *server = sp->so_server; 441 struct nfs_server *server = sp->so_server;
470 struct nfs_client *clp = server->nfs_client; 442 struct nfs_client *clp = server->nfs_client;
471 443
472 spin_lock(&clp->cl_lock); 444 spin_lock(&clp->cl_lock);
473 rb_erase(&sp->so_server_node, &server->state_owners); 445 if (!RB_EMPTY_NODE(rb_node)) {
474 RB_CLEAR_NODE(&sp->so_server_node); 446 rb_erase(rb_node, &server->state_owners);
447 RB_CLEAR_NODE(rb_node);
448 }
475 spin_unlock(&clp->cl_lock); 449 spin_unlock(&clp->cl_lock);
476 } 450 }
477} 451}
478 452
479static void nfs4_free_state_owner(struct nfs4_state_owner *sp) 453static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
480{ 454{
481 rpc_destroy_wait_queue(&sp->so_sequence.wait); 455 nfs4_destroy_seqid_counter(&sp->so_seqid);
482 put_rpccred(sp->so_cred); 456 put_rpccred(sp->so_cred);
483 kfree(sp); 457 kfree(sp);
484} 458}
@@ -516,7 +490,8 @@ static void nfs4_gc_state_owners(struct nfs_server *server)
516 * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL. 490 * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL.
517 */ 491 */
518struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, 492struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
519 struct rpc_cred *cred) 493 struct rpc_cred *cred,
494 gfp_t gfp_flags)
520{ 495{
521 struct nfs_client *clp = server->nfs_client; 496 struct nfs_client *clp = server->nfs_client;
522 struct nfs4_state_owner *sp, *new; 497 struct nfs4_state_owner *sp, *new;
@@ -526,20 +501,18 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
526 spin_unlock(&clp->cl_lock); 501 spin_unlock(&clp->cl_lock);
527 if (sp != NULL) 502 if (sp != NULL)
528 goto out; 503 goto out;
529 new = nfs4_alloc_state_owner(); 504 new = nfs4_alloc_state_owner(server, cred, gfp_flags);
530 if (new == NULL) 505 if (new == NULL)
531 goto out; 506 goto out;
532 new->so_server = server; 507 do {
533 new->so_cred = cred; 508 if (ida_pre_get(&server->openowner_id, gfp_flags) == 0)
534 spin_lock(&clp->cl_lock); 509 break;
535 sp = nfs4_insert_state_owner_locked(new); 510 spin_lock(&clp->cl_lock);
536 spin_unlock(&clp->cl_lock); 511 sp = nfs4_insert_state_owner_locked(new);
537 if (sp == new) 512 spin_unlock(&clp->cl_lock);
538 get_rpccred(cred); 513 } while (sp == ERR_PTR(-EAGAIN));
539 else { 514 if (sp != new)
540 rpc_destroy_wait_queue(&new->so_sequence.wait); 515 nfs4_free_state_owner(new);
541 kfree(new);
542 }
543out: 516out:
544 nfs4_gc_state_owners(server); 517 nfs4_gc_state_owners(server);
545 return sp; 518 return sp;
@@ -548,6 +521,14 @@ out:
548/** 521/**
549 * nfs4_put_state_owner - Release a nfs4_state_owner 522 * nfs4_put_state_owner - Release a nfs4_state_owner
550 * @sp: state owner data to release 523 * @sp: state owner data to release
524 *
525 * Note that we keep released state owners on an LRU
526 * list.
527 * This caches valid state owners so that they can be
528 * reused, to avoid the OPEN_CONFIRM on minor version 0.
529 * It also pins the uniquifier of dropped state owners for
530 * a while, to ensure that those state owner names are
531 * never reused.
551 */ 532 */
552void nfs4_put_state_owner(struct nfs4_state_owner *sp) 533void nfs4_put_state_owner(struct nfs4_state_owner *sp)
553{ 534{
@@ -557,15 +538,9 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
557 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) 538 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
558 return; 539 return;
559 540
560 if (!RB_EMPTY_NODE(&sp->so_server_node)) { 541 sp->so_expires = jiffies;
561 sp->so_expires = jiffies; 542 list_add_tail(&sp->so_lru, &server->state_owners_lru);
562 list_add_tail(&sp->so_lru, &server->state_owners_lru); 543 spin_unlock(&clp->cl_lock);
563 spin_unlock(&clp->cl_lock);
564 } else {
565 nfs4_remove_state_owner_locked(sp);
566 spin_unlock(&clp->cl_lock);
567 nfs4_free_state_owner(sp);
568 }
569} 544}
570 545
571/** 546/**
@@ -795,15 +770,11 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
795{ 770{
796 struct nfs4_lock_state *lsp; 771 struct nfs4_lock_state *lsp;
797 struct nfs_server *server = state->owner->so_server; 772 struct nfs_server *server = state->owner->so_server;
798 struct nfs_client *clp = server->nfs_client;
799 773
800 lsp = kzalloc(sizeof(*lsp), GFP_NOFS); 774 lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
801 if (lsp == NULL) 775 if (lsp == NULL)
802 return NULL; 776 return NULL;
803 rpc_init_wait_queue(&lsp->ls_sequence.wait, "lock_seqid_waitqueue"); 777 nfs4_init_seqid_counter(&lsp->ls_seqid);
804 spin_lock_init(&lsp->ls_sequence.lock);
805 INIT_LIST_HEAD(&lsp->ls_sequence.list);
806 lsp->ls_seqid.sequence = &lsp->ls_sequence;
807 atomic_set(&lsp->ls_count, 1); 778 atomic_set(&lsp->ls_count, 1);
808 lsp->ls_state = state; 779 lsp->ls_state = state;
809 lsp->ls_owner.lo_type = type; 780 lsp->ls_owner.lo_type = type;
@@ -815,25 +786,22 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
815 lsp->ls_owner.lo_u.posix_owner = fl_owner; 786 lsp->ls_owner.lo_u.posix_owner = fl_owner;
816 break; 787 break;
817 default: 788 default:
818 kfree(lsp); 789 goto out_free;
819 return NULL;
820 } 790 }
821 spin_lock(&clp->cl_lock); 791 lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS);
822 nfs_alloc_unique_id_locked(&server->lockowner_id, &lsp->ls_id, 1, 64); 792 if (lsp->ls_seqid.owner_id < 0)
823 spin_unlock(&clp->cl_lock); 793 goto out_free;
824 INIT_LIST_HEAD(&lsp->ls_locks); 794 INIT_LIST_HEAD(&lsp->ls_locks);
825 return lsp; 795 return lsp;
796out_free:
797 kfree(lsp);
798 return NULL;
826} 799}
827 800
828static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) 801void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
829{ 802{
830 struct nfs_server *server = lsp->ls_state->owner->so_server; 803 ida_simple_remove(&server->lockowner_id, lsp->ls_seqid.owner_id);
831 struct nfs_client *clp = server->nfs_client; 804 nfs4_destroy_seqid_counter(&lsp->ls_seqid);
832
833 spin_lock(&clp->cl_lock);
834 nfs_free_unique_id(&server->lockowner_id, &lsp->ls_id);
835 spin_unlock(&clp->cl_lock);
836 rpc_destroy_wait_queue(&lsp->ls_sequence.wait);
837 kfree(lsp); 805 kfree(lsp);
838} 806}
839 807
@@ -865,7 +833,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
865 } 833 }
866 spin_unlock(&state->state_lock); 834 spin_unlock(&state->state_lock);
867 if (new != NULL) 835 if (new != NULL)
868 nfs4_free_lock_state(new); 836 nfs4_free_lock_state(state->owner->so_server, new);
869 return lsp; 837 return lsp;
870} 838}
871 839
@@ -886,9 +854,11 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
886 if (list_empty(&state->lock_states)) 854 if (list_empty(&state->lock_states))
887 clear_bit(LK_STATE_IN_USE, &state->flags); 855 clear_bit(LK_STATE_IN_USE, &state->flags);
888 spin_unlock(&state->state_lock); 856 spin_unlock(&state->state_lock);
889 if (lsp->ls_flags & NFS_LOCK_INITIALIZED) 857 if (lsp->ls_flags & NFS_LOCK_INITIALIZED) {
890 nfs4_release_lockowner(lsp); 858 if (nfs4_release_lockowner(lsp) == 0)
891 nfs4_free_lock_state(lsp); 859 return;
860 }
861 nfs4_free_lock_state(lsp->ls_state->owner->so_server, lsp);
892} 862}
893 863
894static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) 864static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
@@ -918,7 +888,8 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
918 if (fl->fl_flags & FL_POSIX) 888 if (fl->fl_flags & FL_POSIX)
919 lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE); 889 lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE);
920 else if (fl->fl_flags & FL_FLOCK) 890 else if (fl->fl_flags & FL_FLOCK)
921 lsp = nfs4_get_lock_state(state, 0, fl->fl_pid, NFS4_FLOCK_LOCK_TYPE); 891 lsp = nfs4_get_lock_state(state, NULL, fl->fl_pid,
892 NFS4_FLOCK_LOCK_TYPE);
922 else 893 else
923 return -EINVAL; 894 return -EINVAL;
924 if (lsp == NULL) 895 if (lsp == NULL)
@@ -928,28 +899,49 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
928 return 0; 899 return 0;
929} 900}
930 901
931/* 902static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state,
932 * Byte-range lock aware utility to initialize the stateid of read/write 903 fl_owner_t fl_owner, pid_t fl_pid)
933 * requests.
934 */
935void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid)
936{ 904{
937 struct nfs4_lock_state *lsp; 905 struct nfs4_lock_state *lsp;
938 int seq; 906 bool ret = false;
939 907
940 do {
941 seq = read_seqbegin(&state->seqlock);
942 memcpy(dst, &state->stateid, sizeof(*dst));
943 } while (read_seqretry(&state->seqlock, seq));
944 if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) 908 if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
945 return; 909 goto out;
946 910
947 spin_lock(&state->state_lock); 911 spin_lock(&state->state_lock);
948 lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); 912 lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE);
949 if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) 913 if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) {
950 memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); 914 nfs4_stateid_copy(dst, &lsp->ls_stateid);
915 ret = true;
916 }
951 spin_unlock(&state->state_lock); 917 spin_unlock(&state->state_lock);
952 nfs4_put_lock_state(lsp); 918 nfs4_put_lock_state(lsp);
919out:
920 return ret;
921}
922
923static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
924{
925 int seq;
926
927 do {
928 seq = read_seqbegin(&state->seqlock);
929 nfs4_stateid_copy(dst, &state->stateid);
930 } while (read_seqretry(&state->seqlock, seq));
931}
932
933/*
934 * Byte-range lock aware utility to initialize the stateid of read/write
935 * requests.
936 */
937void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state,
938 fmode_t fmode, fl_owner_t fl_owner, pid_t fl_pid)
939{
940 if (nfs4_copy_delegation_stateid(dst, state->inode, fmode))
941 return;
942 if (nfs4_copy_lock_stateid(dst, state, fl_owner, fl_pid))
943 return;
944 nfs4_copy_open_stateid(dst, state);
953} 945}
954 946
955struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask) 947struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask)
@@ -960,20 +952,28 @@ struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_m
960 if (new != NULL) { 952 if (new != NULL) {
961 new->sequence = counter; 953 new->sequence = counter;
962 INIT_LIST_HEAD(&new->list); 954 INIT_LIST_HEAD(&new->list);
955 new->task = NULL;
963 } 956 }
964 return new; 957 return new;
965} 958}
966 959
967void nfs_release_seqid(struct nfs_seqid *seqid) 960void nfs_release_seqid(struct nfs_seqid *seqid)
968{ 961{
969 if (!list_empty(&seqid->list)) { 962 struct nfs_seqid_counter *sequence;
970 struct rpc_sequence *sequence = seqid->sequence->sequence;
971 963
972 spin_lock(&sequence->lock); 964 if (list_empty(&seqid->list))
973 list_del_init(&seqid->list); 965 return;
974 spin_unlock(&sequence->lock); 966 sequence = seqid->sequence;
975 rpc_wake_up(&sequence->wait); 967 spin_lock(&sequence->lock);
968 list_del_init(&seqid->list);
969 if (!list_empty(&sequence->list)) {
970 struct nfs_seqid *next;
971
972 next = list_first_entry(&sequence->list,
973 struct nfs_seqid, list);
974 rpc_wake_up_queued_task(&sequence->wait, next->task);
976 } 975 }
976 spin_unlock(&sequence->lock);
977} 977}
978 978
979void nfs_free_seqid(struct nfs_seqid *seqid) 979void nfs_free_seqid(struct nfs_seqid *seqid)
@@ -989,14 +989,14 @@ void nfs_free_seqid(struct nfs_seqid *seqid)
989 */ 989 */
990static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) 990static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
991{ 991{
992 BUG_ON(list_first_entry(&seqid->sequence->sequence->list, struct nfs_seqid, list) != seqid); 992 BUG_ON(list_first_entry(&seqid->sequence->list, struct nfs_seqid, list) != seqid);
993 switch (status) { 993 switch (status) {
994 case 0: 994 case 0:
995 break; 995 break;
996 case -NFS4ERR_BAD_SEQID: 996 case -NFS4ERR_BAD_SEQID:
997 if (seqid->sequence->flags & NFS_SEQID_CONFIRMED) 997 if (seqid->sequence->flags & NFS_SEQID_CONFIRMED)
998 return; 998 return;
999 printk(KERN_WARNING "NFS: v4 server returned a bad" 999 pr_warn_ratelimited("NFS: v4 server returned a bad"
1000 " sequence-id error on an" 1000 " sequence-id error on an"
1001 " unconfirmed sequence %p!\n", 1001 " unconfirmed sequence %p!\n",
1002 seqid->sequence); 1002 seqid->sequence);
@@ -1040,10 +1040,11 @@ void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
1040 1040
1041int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) 1041int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
1042{ 1042{
1043 struct rpc_sequence *sequence = seqid->sequence->sequence; 1043 struct nfs_seqid_counter *sequence = seqid->sequence;
1044 int status = 0; 1044 int status = 0;
1045 1045
1046 spin_lock(&sequence->lock); 1046 spin_lock(&sequence->lock);
1047 seqid->task = task;
1047 if (list_empty(&seqid->list)) 1048 if (list_empty(&seqid->list))
1048 list_add_tail(&seqid->list, &sequence->list); 1049 list_add_tail(&seqid->list, &sequence->list);
1049 if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid) 1050 if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid)
@@ -1072,19 +1073,28 @@ static void nfs4_clear_state_manager_bit(struct nfs_client *clp)
1072void nfs4_schedule_state_manager(struct nfs_client *clp) 1073void nfs4_schedule_state_manager(struct nfs_client *clp)
1073{ 1074{
1074 struct task_struct *task; 1075 struct task_struct *task;
1076 char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
1075 1077
1076 if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) 1078 if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
1077 return; 1079 return;
1078 __module_get(THIS_MODULE); 1080 __module_get(THIS_MODULE);
1079 atomic_inc(&clp->cl_count); 1081 atomic_inc(&clp->cl_count);
1080 task = kthread_run(nfs4_run_state_manager, clp, "%s-manager", 1082
1081 rpc_peeraddr2str(clp->cl_rpcclient, 1083 /* The rcu_read_lock() is not strictly necessary, as the state
1082 RPC_DISPLAY_ADDR)); 1084 * manager is the only thread that ever changes the rpc_xprt
1083 if (!IS_ERR(task)) 1085 * after it's initialized. At this point, we're single threaded. */
1084 return; 1086 rcu_read_lock();
1085 nfs4_clear_state_manager_bit(clp); 1087 snprintf(buf, sizeof(buf), "%s-manager",
1086 nfs_put_client(clp); 1088 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
1087 module_put(THIS_MODULE); 1089 rcu_read_unlock();
1090 task = kthread_run(nfs4_run_state_manager, clp, buf);
1091 if (IS_ERR(task)) {
1092 printk(KERN_ERR "%s: kthread_run: %ld\n",
1093 __func__, PTR_ERR(task));
1094 nfs4_clear_state_manager_bit(clp);
1095 nfs_put_client(clp);
1096 module_put(THIS_MODULE);
1097 }
1088} 1098}
1089 1099
1090/* 1100/*
@@ -1098,10 +1108,25 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp)
1098 set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); 1108 set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1099 nfs4_schedule_state_manager(clp); 1109 nfs4_schedule_state_manager(clp);
1100} 1110}
1111EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery);
1112
1113/*
1114 * nfs40_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN
1115 * @clp: client to process
1116 *
1117 * Set the NFS4CLNT_LEASE_EXPIRED state in order to force a
1118 * resend of the SETCLIENTID and hence re-establish the
1119 * callback channel. Then return all existing delegations.
1120 */
1121static void nfs40_handle_cb_pathdown(struct nfs_client *clp)
1122{
1123 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1124 nfs_expire_all_delegations(clp);
1125}
1101 1126
1102void nfs4_schedule_path_down_recovery(struct nfs_client *clp) 1127void nfs4_schedule_path_down_recovery(struct nfs_client *clp)
1103{ 1128{
1104 nfs_handle_cb_pathdown(clp); 1129 nfs40_handle_cb_pathdown(clp);
1105 nfs4_schedule_state_manager(clp); 1130 nfs4_schedule_state_manager(clp);
1106} 1131}
1107 1132
@@ -1132,11 +1157,37 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4
1132{ 1157{
1133 struct nfs_client *clp = server->nfs_client; 1158 struct nfs_client *clp = server->nfs_client;
1134 1159
1135 if (test_and_clear_bit(NFS_DELEGATED_STATE, &state->flags))
1136 nfs_async_inode_return_delegation(state->inode, &state->stateid);
1137 nfs4_state_mark_reclaim_nograce(clp, state); 1160 nfs4_state_mark_reclaim_nograce(clp, state);
1138 nfs4_schedule_state_manager(clp); 1161 nfs4_schedule_state_manager(clp);
1139} 1162}
1163EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery);
1164
1165void nfs_inode_find_state_and_recover(struct inode *inode,
1166 const nfs4_stateid *stateid)
1167{
1168 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
1169 struct nfs_inode *nfsi = NFS_I(inode);
1170 struct nfs_open_context *ctx;
1171 struct nfs4_state *state;
1172 bool found = false;
1173
1174 spin_lock(&inode->i_lock);
1175 list_for_each_entry(ctx, &nfsi->open_files, list) {
1176 state = ctx->state;
1177 if (state == NULL)
1178 continue;
1179 if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
1180 continue;
1181 if (!nfs4_stateid_match(&state->stateid, stateid))
1182 continue;
1183 nfs4_state_mark_reclaim_nograce(clp, state);
1184 found = true;
1185 }
1186 spin_unlock(&inode->i_lock);
1187 if (found)
1188 nfs4_schedule_state_manager(clp);
1189}
1190
1140 1191
1141static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) 1192static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
1142{ 1193{
@@ -1175,8 +1226,8 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
1175 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1226 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1176 goto out; 1227 goto out;
1177 default: 1228 default:
1178 printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", 1229 printk(KERN_ERR "NFS: %s: unhandled error %d. "
1179 __func__, status); 1230 "Zeroing state\n", __func__, status);
1180 case -ENOMEM: 1231 case -ENOMEM:
1181 case -NFS4ERR_DENIED: 1232 case -NFS4ERR_DENIED:
1182 case -NFS4ERR_RECLAIM_BAD: 1233 case -NFS4ERR_RECLAIM_BAD:
@@ -1222,8 +1273,9 @@ restart:
1222 spin_lock(&state->state_lock); 1273 spin_lock(&state->state_lock);
1223 list_for_each_entry(lock, &state->lock_states, ls_locks) { 1274 list_for_each_entry(lock, &state->lock_states, ls_locks) {
1224 if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) 1275 if (!(lock->ls_flags & NFS_LOCK_INITIALIZED))
1225 printk("%s: Lock reclaim failed!\n", 1276 pr_warn_ratelimited("NFS: "
1226 __func__); 1277 "%s: Lock reclaim "
1278 "failed!\n", __func__);
1227 } 1279 }
1228 spin_unlock(&state->state_lock); 1280 spin_unlock(&state->state_lock);
1229 nfs4_put_open_state(state); 1281 nfs4_put_open_state(state);
@@ -1232,8 +1284,8 @@ restart:
1232 } 1284 }
1233 switch (status) { 1285 switch (status) {
1234 default: 1286 default:
1235 printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", 1287 printk(KERN_ERR "NFS: %s: unhandled error %d. "
1236 __func__, status); 1288 "Zeroing state\n", __func__, status);
1237 case -ENOENT: 1289 case -ENOENT:
1238 case -ENOMEM: 1290 case -ENOMEM:
1239 case -ESTALE: 1291 case -ESTALE:
@@ -1241,8 +1293,8 @@ restart:
1241 * Open state on this file cannot be recovered 1293 * Open state on this file cannot be recovered
1242 * All we can do is revert to using the zero stateid. 1294 * All we can do is revert to using the zero stateid.
1243 */ 1295 */
1244 memset(state->stateid.data, 0, 1296 memset(&state->stateid, 0,
1245 sizeof(state->stateid.data)); 1297 sizeof(state->stateid));
1246 /* Mark the file as being 'closed' */ 1298 /* Mark the file as being 'closed' */
1247 state->state = 0; 1299 state->state = 0;
1248 break; 1300 break;
@@ -1420,7 +1472,7 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1420 case 0: 1472 case 0:
1421 break; 1473 break;
1422 case -NFS4ERR_CB_PATH_DOWN: 1474 case -NFS4ERR_CB_PATH_DOWN:
1423 nfs_handle_cb_pathdown(clp); 1475 nfs40_handle_cb_pathdown(clp);
1424 break; 1476 break;
1425 case -NFS4ERR_NO_GRACE: 1477 case -NFS4ERR_NO_GRACE:
1426 nfs4_state_end_reclaim_reboot(clp); 1478 nfs4_state_end_reclaim_reboot(clp);
@@ -1801,7 +1853,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1801 } while (atomic_read(&clp->cl_count) > 1); 1853 } while (atomic_read(&clp->cl_count) > 1);
1802 return; 1854 return;
1803out_error: 1855out_error:
1804 printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s" 1856 pr_warn_ratelimited("NFS: state manager failed on NFSv4 server %s"
1805 " with error %d\n", clp->cl_hostname, -status); 1857 " with error %d\n", clp->cl_hostname, -status);
1806 nfs4_end_drain_session(clp); 1858 nfs4_end_drain_session(clp);
1807 nfs4_clear_state_manager_bit(clp); 1859 nfs4_clear_state_manager_bit(clp);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 33bd8d0f745..c54aae364be 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -44,6 +44,8 @@
44#include <linux/pagemap.h> 44#include <linux/pagemap.h>
45#include <linux/proc_fs.h> 45#include <linux/proc_fs.h>
46#include <linux/kdev_t.h> 46#include <linux/kdev_t.h>
47#include <linux/module.h>
48#include <linux/utsname.h>
47#include <linux/sunrpc/clnt.h> 49#include <linux/sunrpc/clnt.h>
48#include <linux/sunrpc/msg_prot.h> 50#include <linux/sunrpc/msg_prot.h>
49#include <linux/sunrpc/gss_api.h> 51#include <linux/sunrpc/gss_api.h>
@@ -72,7 +74,7 @@ static int nfs4_stat_to_errno(int);
72/* lock,open owner id: 74/* lock,open owner id:
73 * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2) 75 * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2)
74 */ 76 */
75#define open_owner_id_maxsz (1 + 1 + 4) 77#define open_owner_id_maxsz (1 + 2 + 1 + 1 + 2)
76#define lock_owner_id_maxsz (1 + 1 + 4) 78#define lock_owner_id_maxsz (1 + 1 + 4)
77#define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) 79#define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
78#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) 80#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
@@ -271,7 +273,12 @@ static int nfs4_stat_to_errno(int);
271 1 /* flags */ + \ 273 1 /* flags */ + \
272 1 /* spa_how */ + \ 274 1 /* spa_how */ + \
273 0 /* SP4_NONE (for now) */ + \ 275 0 /* SP4_NONE (for now) */ + \
274 1 /* zero implemetation id array */) 276 1 /* implementation id array of size 1 */ + \
277 1 /* nii_domain */ + \
278 XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
279 1 /* nii_name */ + \
280 XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
281 3 /* nii_date */)
275#define decode_exchange_id_maxsz (op_decode_hdr_maxsz + \ 282#define decode_exchange_id_maxsz (op_decode_hdr_maxsz + \
276 2 /* eir_clientid */ + \ 283 2 /* eir_clientid */ + \
277 1 /* eir_sequenceid */ + \ 284 1 /* eir_sequenceid */ + \
@@ -284,7 +291,11 @@ static int nfs4_stat_to_errno(int);
284 /* eir_server_scope<> */ \ 291 /* eir_server_scope<> */ \
285 XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \ 292 XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \
286 1 /* eir_server_impl_id array length */ + \ 293 1 /* eir_server_impl_id array length */ + \
287 0 /* ignored eir_server_impl_id contents */) 294 1 /* nii_domain */ + \
295 XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
296 1 /* nii_name */ + \
297 XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
298 3 /* nii_date */)
288#define encode_channel_attrs_maxsz (6 + 1 /* ca_rdma_ird.len (0) */) 299#define encode_channel_attrs_maxsz (6 + 1 /* ca_rdma_ird.len (0) */)
289#define decode_channel_attrs_maxsz (6 + \ 300#define decode_channel_attrs_maxsz (6 + \
290 1 /* ca_rdma_ird.len */ + \ 301 1 /* ca_rdma_ird.len */ + \
@@ -838,6 +849,12 @@ const u32 nfs41_maxread_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
838 XDR_UNIT); 849 XDR_UNIT);
839#endif /* CONFIG_NFS_V4_1 */ 850#endif /* CONFIG_NFS_V4_1 */
840 851
852static unsigned short send_implementation_id = 1;
853
854module_param(send_implementation_id, ushort, 0644);
855MODULE_PARM_DESC(send_implementation_id,
856 "Send implementation ID with NFSv4.1 exchange_id");
857
841static const umode_t nfs_type2fmt[] = { 858static const umode_t nfs_type2fmt[] = {
842 [NF4BAD] = 0, 859 [NF4BAD] = 0,
843 [NF4REG] = S_IFREG, 860 [NF4REG] = S_IFREG,
@@ -868,15 +885,44 @@ static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes)
868 return p; 885 return p;
869} 886}
870 887
888static void encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len)
889{
890 __be32 *p;
891
892 p = xdr_reserve_space(xdr, len);
893 xdr_encode_opaque_fixed(p, buf, len);
894}
895
871static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) 896static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
872{ 897{
873 __be32 *p; 898 __be32 *p;
874 899
875 p = xdr_reserve_space(xdr, 4 + len); 900 p = reserve_space(xdr, 4 + len);
876 BUG_ON(p == NULL);
877 xdr_encode_opaque(p, str, len); 901 xdr_encode_opaque(p, str, len);
878} 902}
879 903
904static void encode_uint32(struct xdr_stream *xdr, u32 n)
905{
906 __be32 *p;
907
908 p = reserve_space(xdr, 4);
909 *p = cpu_to_be32(n);
910}
911
912static void encode_uint64(struct xdr_stream *xdr, u64 n)
913{
914 __be32 *p;
915
916 p = reserve_space(xdr, 8);
917 xdr_encode_hyper(p, n);
918}
919
920static void encode_nfs4_seqid(struct xdr_stream *xdr,
921 const struct nfs_seqid *seqid)
922{
923 encode_uint32(xdr, seqid->sequence->counter);
924}
925
880static void encode_compound_hdr(struct xdr_stream *xdr, 926static void encode_compound_hdr(struct xdr_stream *xdr,
881 struct rpc_rqst *req, 927 struct rpc_rqst *req,
882 struct compound_hdr *hdr) 928 struct compound_hdr *hdr)
@@ -889,28 +935,37 @@ static void encode_compound_hdr(struct xdr_stream *xdr,
889 * but this is not required as a MUST for the server to do so. */ 935 * but this is not required as a MUST for the server to do so. */
890 hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen; 936 hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen;
891 937
892 dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag);
893 BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); 938 BUG_ON(hdr->taglen > NFS4_MAXTAGLEN);
894 p = reserve_space(xdr, 4 + hdr->taglen + 8); 939 encode_string(xdr, hdr->taglen, hdr->tag);
895 p = xdr_encode_opaque(p, hdr->tag, hdr->taglen); 940 p = reserve_space(xdr, 8);
896 *p++ = cpu_to_be32(hdr->minorversion); 941 *p++ = cpu_to_be32(hdr->minorversion);
897 hdr->nops_p = p; 942 hdr->nops_p = p;
898 *p = cpu_to_be32(hdr->nops); 943 *p = cpu_to_be32(hdr->nops);
899} 944}
900 945
946static void encode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 op,
947 uint32_t replen,
948 struct compound_hdr *hdr)
949{
950 encode_uint32(xdr, op);
951 hdr->nops++;
952 hdr->replen += replen;
953}
954
901static void encode_nops(struct compound_hdr *hdr) 955static void encode_nops(struct compound_hdr *hdr)
902{ 956{
903 BUG_ON(hdr->nops > NFS4_MAX_OPS); 957 BUG_ON(hdr->nops > NFS4_MAX_OPS);
904 *hdr->nops_p = htonl(hdr->nops); 958 *hdr->nops_p = htonl(hdr->nops);
905} 959}
906 960
907static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf) 961static void encode_nfs4_stateid(struct xdr_stream *xdr, const nfs4_stateid *stateid)
908{ 962{
909 __be32 *p; 963 encode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE);
964}
910 965
911 p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); 966static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf)
912 BUG_ON(p == NULL); 967{
913 xdr_encode_opaque_fixed(p, verf->data, NFS4_VERIFIER_SIZE); 968 encode_opaque_fixed(xdr, verf->data, NFS4_VERIFIER_SIZE);
914} 969}
915 970
916static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server) 971static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server)
@@ -1023,7 +1078,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
1023 * Now we backfill the bitmap and the attribute buffer length. 1078 * Now we backfill the bitmap and the attribute buffer length.
1024 */ 1079 */
1025 if (len != ((char *)p - (char *)q) + 4) { 1080 if (len != ((char *)p - (char *)q) + 4) {
1026 printk(KERN_ERR "nfs: Attr length error, %u != %Zu\n", 1081 printk(KERN_ERR "NFS: Attr length error, %u != %Zu\n",
1027 len, ((char *)p - (char *)q) + 4); 1082 len, ((char *)p - (char *)q) + 4);
1028 BUG(); 1083 BUG();
1029 } 1084 }
@@ -1037,46 +1092,33 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
1037 1092
1038static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hdr *hdr) 1093static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hdr *hdr)
1039{ 1094{
1040 __be32 *p; 1095 encode_op_hdr(xdr, OP_ACCESS, decode_access_maxsz, hdr);
1041 1096 encode_uint32(xdr, access);
1042 p = reserve_space(xdr, 8);
1043 *p++ = cpu_to_be32(OP_ACCESS);
1044 *p = cpu_to_be32(access);
1045 hdr->nops++;
1046 hdr->replen += decode_access_maxsz;
1047} 1097}
1048 1098
1049static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) 1099static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr)
1050{ 1100{
1051 __be32 *p; 1101 encode_op_hdr(xdr, OP_CLOSE, decode_close_maxsz, hdr);
1052 1102 encode_nfs4_seqid(xdr, arg->seqid);
1053 p = reserve_space(xdr, 8+NFS4_STATEID_SIZE); 1103 encode_nfs4_stateid(xdr, arg->stateid);
1054 *p++ = cpu_to_be32(OP_CLOSE);
1055 *p++ = cpu_to_be32(arg->seqid->sequence->counter);
1056 xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
1057 hdr->nops++;
1058 hdr->replen += decode_close_maxsz;
1059} 1104}
1060 1105
1061static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) 1106static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
1062{ 1107{
1063 __be32 *p; 1108 __be32 *p;
1064 1109
1065 p = reserve_space(xdr, 16); 1110 encode_op_hdr(xdr, OP_COMMIT, decode_commit_maxsz, hdr);
1066 *p++ = cpu_to_be32(OP_COMMIT); 1111 p = reserve_space(xdr, 12);
1067 p = xdr_encode_hyper(p, args->offset); 1112 p = xdr_encode_hyper(p, args->offset);
1068 *p = cpu_to_be32(args->count); 1113 *p = cpu_to_be32(args->count);
1069 hdr->nops++;
1070 hdr->replen += decode_commit_maxsz;
1071} 1114}
1072 1115
1073static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr) 1116static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr)
1074{ 1117{
1075 __be32 *p; 1118 __be32 *p;
1076 1119
1077 p = reserve_space(xdr, 8); 1120 encode_op_hdr(xdr, OP_CREATE, decode_create_maxsz, hdr);
1078 *p++ = cpu_to_be32(OP_CREATE); 1121 encode_uint32(xdr, create->ftype);
1079 *p = cpu_to_be32(create->ftype);
1080 1122
1081 switch (create->ftype) { 1123 switch (create->ftype) {
1082 case NF4LNK: 1124 case NF4LNK:
@@ -1096,9 +1138,6 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *
1096 } 1138 }
1097 1139
1098 encode_string(xdr, create->name->len, create->name->name); 1140 encode_string(xdr, create->name->len, create->name->name);
1099 hdr->nops++;
1100 hdr->replen += decode_create_maxsz;
1101
1102 encode_attrs(xdr, create->attrs, create->server); 1141 encode_attrs(xdr, create->attrs, create->server);
1103} 1142}
1104 1143
@@ -1106,25 +1145,21 @@ static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct c
1106{ 1145{
1107 __be32 *p; 1146 __be32 *p;
1108 1147
1109 p = reserve_space(xdr, 12); 1148 encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr);
1110 *p++ = cpu_to_be32(OP_GETATTR); 1149 p = reserve_space(xdr, 8);
1111 *p++ = cpu_to_be32(1); 1150 *p++ = cpu_to_be32(1);
1112 *p = cpu_to_be32(bitmap); 1151 *p = cpu_to_be32(bitmap);
1113 hdr->nops++;
1114 hdr->replen += decode_getattr_maxsz;
1115} 1152}
1116 1153
1117static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr) 1154static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr)
1118{ 1155{
1119 __be32 *p; 1156 __be32 *p;
1120 1157
1121 p = reserve_space(xdr, 16); 1158 encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr);
1122 *p++ = cpu_to_be32(OP_GETATTR); 1159 p = reserve_space(xdr, 12);
1123 *p++ = cpu_to_be32(2); 1160 *p++ = cpu_to_be32(2);
1124 *p++ = cpu_to_be32(bm0); 1161 *p++ = cpu_to_be32(bm0);
1125 *p = cpu_to_be32(bm1); 1162 *p = cpu_to_be32(bm1);
1126 hdr->nops++;
1127 hdr->replen += decode_getattr_maxsz;
1128} 1163}
1129 1164
1130static void 1165static void
@@ -1134,8 +1169,7 @@ encode_getattr_three(struct xdr_stream *xdr,
1134{ 1169{
1135 __be32 *p; 1170 __be32 *p;
1136 1171
1137 p = reserve_space(xdr, 4); 1172 encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr);
1138 *p = cpu_to_be32(OP_GETATTR);
1139 if (bm2) { 1173 if (bm2) {
1140 p = reserve_space(xdr, 16); 1174 p = reserve_space(xdr, 16);
1141 *p++ = cpu_to_be32(3); 1175 *p++ = cpu_to_be32(3);
@@ -1152,8 +1186,6 @@ encode_getattr_three(struct xdr_stream *xdr,
1152 *p++ = cpu_to_be32(1); 1186 *p++ = cpu_to_be32(1);
1153 *p = cpu_to_be32(bm0); 1187 *p = cpu_to_be32(bm0);
1154 } 1188 }
1155 hdr->nops++;
1156 hdr->replen += decode_getattr_maxsz;
1157} 1189}
1158 1190
1159static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) 1191static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
@@ -1179,23 +1211,13 @@ static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, stru
1179 1211
1180static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr) 1212static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1181{ 1213{
1182 __be32 *p; 1214 encode_op_hdr(xdr, OP_GETFH, decode_getfh_maxsz, hdr);
1183
1184 p = reserve_space(xdr, 4);
1185 *p = cpu_to_be32(OP_GETFH);
1186 hdr->nops++;
1187 hdr->replen += decode_getfh_maxsz;
1188} 1215}
1189 1216
1190static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) 1217static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
1191{ 1218{
1192 __be32 *p; 1219 encode_op_hdr(xdr, OP_LINK, decode_link_maxsz, hdr);
1193 1220 encode_string(xdr, name->len, name->name);
1194 p = reserve_space(xdr, 8 + name->len);
1195 *p++ = cpu_to_be32(OP_LINK);
1196 xdr_encode_opaque(p, name->name, name->len);
1197 hdr->nops++;
1198 hdr->replen += decode_link_maxsz;
1199} 1221}
1200 1222
1201static inline int nfs4_lock_type(struct file_lock *fl, int block) 1223static inline int nfs4_lock_type(struct file_lock *fl, int block)
@@ -1232,79 +1254,60 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args
1232{ 1254{
1233 __be32 *p; 1255 __be32 *p;
1234 1256
1235 p = reserve_space(xdr, 32); 1257 encode_op_hdr(xdr, OP_LOCK, decode_lock_maxsz, hdr);
1236 *p++ = cpu_to_be32(OP_LOCK); 1258 p = reserve_space(xdr, 28);
1237 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, args->block)); 1259 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, args->block));
1238 *p++ = cpu_to_be32(args->reclaim); 1260 *p++ = cpu_to_be32(args->reclaim);
1239 p = xdr_encode_hyper(p, args->fl->fl_start); 1261 p = xdr_encode_hyper(p, args->fl->fl_start);
1240 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); 1262 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1241 *p = cpu_to_be32(args->new_lock_owner); 1263 *p = cpu_to_be32(args->new_lock_owner);
1242 if (args->new_lock_owner){ 1264 if (args->new_lock_owner){
1243 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); 1265 encode_nfs4_seqid(xdr, args->open_seqid);
1244 *p++ = cpu_to_be32(args->open_seqid->sequence->counter); 1266 encode_nfs4_stateid(xdr, args->open_stateid);
1245 p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE); 1267 encode_nfs4_seqid(xdr, args->lock_seqid);
1246 *p++ = cpu_to_be32(args->lock_seqid->sequence->counter);
1247 encode_lockowner(xdr, &args->lock_owner); 1268 encode_lockowner(xdr, &args->lock_owner);
1248 } 1269 }
1249 else { 1270 else {
1250 p = reserve_space(xdr, NFS4_STATEID_SIZE+4); 1271 encode_nfs4_stateid(xdr, args->lock_stateid);
1251 p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE); 1272 encode_nfs4_seqid(xdr, args->lock_seqid);
1252 *p = cpu_to_be32(args->lock_seqid->sequence->counter);
1253 } 1273 }
1254 hdr->nops++;
1255 hdr->replen += decode_lock_maxsz;
1256} 1274}
1257 1275
1258static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr) 1276static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr)
1259{ 1277{
1260 __be32 *p; 1278 __be32 *p;
1261 1279
1262 p = reserve_space(xdr, 24); 1280 encode_op_hdr(xdr, OP_LOCKT, decode_lockt_maxsz, hdr);
1263 *p++ = cpu_to_be32(OP_LOCKT); 1281 p = reserve_space(xdr, 20);
1264 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); 1282 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
1265 p = xdr_encode_hyper(p, args->fl->fl_start); 1283 p = xdr_encode_hyper(p, args->fl->fl_start);
1266 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); 1284 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1267 encode_lockowner(xdr, &args->lock_owner); 1285 encode_lockowner(xdr, &args->lock_owner);
1268 hdr->nops++;
1269 hdr->replen += decode_lockt_maxsz;
1270} 1286}
1271 1287
1272static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr) 1288static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr)
1273{ 1289{
1274 __be32 *p; 1290 __be32 *p;
1275 1291
1276 p = reserve_space(xdr, 12+NFS4_STATEID_SIZE+16); 1292 encode_op_hdr(xdr, OP_LOCKU, decode_locku_maxsz, hdr);
1277 *p++ = cpu_to_be32(OP_LOCKU); 1293 encode_uint32(xdr, nfs4_lock_type(args->fl, 0));
1278 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); 1294 encode_nfs4_seqid(xdr, args->seqid);
1279 *p++ = cpu_to_be32(args->seqid->sequence->counter); 1295 encode_nfs4_stateid(xdr, args->stateid);
1280 p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); 1296 p = reserve_space(xdr, 16);
1281 p = xdr_encode_hyper(p, args->fl->fl_start); 1297 p = xdr_encode_hyper(p, args->fl->fl_start);
1282 xdr_encode_hyper(p, nfs4_lock_length(args->fl)); 1298 xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1283 hdr->nops++;
1284 hdr->replen += decode_locku_maxsz;
1285} 1299}
1286 1300
1287static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr) 1301static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr)
1288{ 1302{
1289 __be32 *p; 1303 encode_op_hdr(xdr, OP_RELEASE_LOCKOWNER, decode_release_lockowner_maxsz, hdr);
1290
1291 p = reserve_space(xdr, 4);
1292 *p = cpu_to_be32(OP_RELEASE_LOCKOWNER);
1293 encode_lockowner(xdr, lowner); 1304 encode_lockowner(xdr, lowner);
1294 hdr->nops++;
1295 hdr->replen += decode_release_lockowner_maxsz;
1296} 1305}
1297 1306
1298static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) 1307static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
1299{ 1308{
1300 int len = name->len; 1309 encode_op_hdr(xdr, OP_LOOKUP, decode_lookup_maxsz, hdr);
1301 __be32 *p; 1310 encode_string(xdr, name->len, name->name);
1302
1303 p = reserve_space(xdr, 8 + len);
1304 *p++ = cpu_to_be32(OP_LOOKUP);
1305 xdr_encode_opaque(p, name->name, len);
1306 hdr->nops++;
1307 hdr->replen += decode_lookup_maxsz;
1308} 1311}
1309 1312
1310static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode) 1313static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode)
@@ -1335,16 +1338,15 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
1335 * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, 1338 * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4,
1336 * owner 4 = 32 1339 * owner 4 = 32
1337 */ 1340 */
1338 p = reserve_space(xdr, 8); 1341 encode_nfs4_seqid(xdr, arg->seqid);
1339 *p++ = cpu_to_be32(OP_OPEN);
1340 *p = cpu_to_be32(arg->seqid->sequence->counter);
1341 encode_share_access(xdr, arg->fmode); 1342 encode_share_access(xdr, arg->fmode);
1342 p = reserve_space(xdr, 32); 1343 p = reserve_space(xdr, 36);
1343 p = xdr_encode_hyper(p, arg->clientid); 1344 p = xdr_encode_hyper(p, arg->clientid);
1344 *p++ = cpu_to_be32(20); 1345 *p++ = cpu_to_be32(24);
1345 p = xdr_encode_opaque_fixed(p, "open id:", 8); 1346 p = xdr_encode_opaque_fixed(p, "open id:", 8);
1346 *p++ = cpu_to_be32(arg->server->s_dev); 1347 *p++ = cpu_to_be32(arg->server->s_dev);
1347 xdr_encode_hyper(p, arg->id); 1348 *p++ = cpu_to_be32(arg->id.uniquifier);
1349 xdr_encode_hyper(p, arg->id.create_time);
1348} 1350}
1349 1351
1350static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) 1352static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
@@ -1437,14 +1439,15 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc
1437{ 1439{
1438 __be32 *p; 1440 __be32 *p;
1439 1441
1440 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); 1442 p = reserve_space(xdr, 4);
1441 *p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR); 1443 *p = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
1442 xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE); 1444 encode_nfs4_stateid(xdr, stateid);
1443 encode_string(xdr, name->len, name->name); 1445 encode_string(xdr, name->len, name->name);
1444} 1446}
1445 1447
1446static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr) 1448static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr)
1447{ 1449{
1450 encode_op_hdr(xdr, OP_OPEN, decode_open_maxsz, hdr);
1448 encode_openhdr(xdr, arg); 1451 encode_openhdr(xdr, arg);
1449 encode_opentype(xdr, arg); 1452 encode_opentype(xdr, arg);
1450 switch (arg->claim) { 1453 switch (arg->claim) {
@@ -1460,88 +1463,64 @@ static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg,
1460 default: 1463 default:
1461 BUG(); 1464 BUG();
1462 } 1465 }
1463 hdr->nops++;
1464 hdr->replen += decode_open_maxsz;
1465} 1466}
1466 1467
1467static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr) 1468static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr)
1468{ 1469{
1469 __be32 *p; 1470 encode_op_hdr(xdr, OP_OPEN_CONFIRM, decode_open_confirm_maxsz, hdr);
1470 1471 encode_nfs4_stateid(xdr, arg->stateid);
1471 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); 1472 encode_nfs4_seqid(xdr, arg->seqid);
1472 *p++ = cpu_to_be32(OP_OPEN_CONFIRM);
1473 p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
1474 *p = cpu_to_be32(arg->seqid->sequence->counter);
1475 hdr->nops++;
1476 hdr->replen += decode_open_confirm_maxsz;
1477} 1473}
1478 1474
1479static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) 1475static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr)
1480{ 1476{
1481 __be32 *p; 1477 encode_op_hdr(xdr, OP_OPEN_DOWNGRADE, decode_open_downgrade_maxsz, hdr);
1482 1478 encode_nfs4_stateid(xdr, arg->stateid);
1483 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); 1479 encode_nfs4_seqid(xdr, arg->seqid);
1484 *p++ = cpu_to_be32(OP_OPEN_DOWNGRADE);
1485 p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
1486 *p = cpu_to_be32(arg->seqid->sequence->counter);
1487 encode_share_access(xdr, arg->fmode); 1480 encode_share_access(xdr, arg->fmode);
1488 hdr->nops++;
1489 hdr->replen += decode_open_downgrade_maxsz;
1490} 1481}
1491 1482
1492static void 1483static void
1493encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hdr *hdr) 1484encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hdr *hdr)
1494{ 1485{
1495 int len = fh->size; 1486 encode_op_hdr(xdr, OP_PUTFH, decode_putfh_maxsz, hdr);
1496 __be32 *p; 1487 encode_string(xdr, fh->size, fh->data);
1497
1498 p = reserve_space(xdr, 8 + len);
1499 *p++ = cpu_to_be32(OP_PUTFH);
1500 xdr_encode_opaque(p, fh->data, len);
1501 hdr->nops++;
1502 hdr->replen += decode_putfh_maxsz;
1503} 1488}
1504 1489
1505static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) 1490static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1506{ 1491{
1507 __be32 *p; 1492 encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr);
1508
1509 p = reserve_space(xdr, 4);
1510 *p = cpu_to_be32(OP_PUTROOTFH);
1511 hdr->nops++;
1512 hdr->replen += decode_putrootfh_maxsz;
1513} 1493}
1514 1494
1515static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, int zero_seqid) 1495static void encode_open_stateid(struct xdr_stream *xdr,
1496 const struct nfs_open_context *ctx,
1497 const struct nfs_lock_context *l_ctx,
1498 fmode_t fmode,
1499 int zero_seqid)
1516{ 1500{
1517 nfs4_stateid stateid; 1501 nfs4_stateid stateid;
1518 __be32 *p;
1519 1502
1520 p = reserve_space(xdr, NFS4_STATEID_SIZE);
1521 if (ctx->state != NULL) { 1503 if (ctx->state != NULL) {
1522 nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid); 1504 nfs4_select_rw_stateid(&stateid, ctx->state,
1505 fmode, l_ctx->lockowner, l_ctx->pid);
1523 if (zero_seqid) 1506 if (zero_seqid)
1524 stateid.stateid.seqid = 0; 1507 stateid.seqid = 0;
1525 xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE); 1508 encode_nfs4_stateid(xdr, &stateid);
1526 } else 1509 } else
1527 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); 1510 encode_nfs4_stateid(xdr, &zero_stateid);
1528} 1511}
1529 1512
1530static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) 1513static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
1531{ 1514{
1532 __be32 *p; 1515 __be32 *p;
1533 1516
1534 p = reserve_space(xdr, 4); 1517 encode_op_hdr(xdr, OP_READ, decode_read_maxsz, hdr);
1535 *p = cpu_to_be32(OP_READ); 1518 encode_open_stateid(xdr, args->context, args->lock_context,
1536 1519 FMODE_READ, hdr->minorversion);
1537 encode_stateid(xdr, args->context, args->lock_context,
1538 hdr->minorversion);
1539 1520
1540 p = reserve_space(xdr, 12); 1521 p = reserve_space(xdr, 12);
1541 p = xdr_encode_hyper(p, args->offset); 1522 p = xdr_encode_hyper(p, args->offset);
1542 *p = cpu_to_be32(args->count); 1523 *p = cpu_to_be32(args->count);
1543 hdr->nops++;
1544 hdr->replen += decode_read_maxsz;
1545} 1524}
1546 1525
1547static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) 1526static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
@@ -1551,7 +1530,7 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
1551 FATTR4_WORD1_MOUNTED_ON_FILEID, 1530 FATTR4_WORD1_MOUNTED_ON_FILEID,
1552 }; 1531 };
1553 uint32_t dircount = readdir->count >> 1; 1532 uint32_t dircount = readdir->count >> 1;
1554 __be32 *p; 1533 __be32 *p, verf[2];
1555 1534
1556 if (readdir->plus) { 1535 if (readdir->plus) {
1557 attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE| 1536 attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE|
@@ -1566,80 +1545,54 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
1566 if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) 1545 if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID))
1567 attrs[0] |= FATTR4_WORD0_FILEID; 1546 attrs[0] |= FATTR4_WORD0_FILEID;
1568 1547
1569 p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20); 1548 encode_op_hdr(xdr, OP_READDIR, decode_readdir_maxsz, hdr);
1570 *p++ = cpu_to_be32(OP_READDIR); 1549 encode_uint64(xdr, readdir->cookie);
1571 p = xdr_encode_hyper(p, readdir->cookie); 1550 encode_nfs4_verifier(xdr, &readdir->verifier);
1572 p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE); 1551 p = reserve_space(xdr, 20);
1573 *p++ = cpu_to_be32(dircount); 1552 *p++ = cpu_to_be32(dircount);
1574 *p++ = cpu_to_be32(readdir->count); 1553 *p++ = cpu_to_be32(readdir->count);
1575 *p++ = cpu_to_be32(2); 1554 *p++ = cpu_to_be32(2);
1576 1555
1577 *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); 1556 *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]);
1578 *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); 1557 *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]);
1579 hdr->nops++; 1558 memcpy(verf, readdir->verifier.data, sizeof(verf));
1580 hdr->replen += decode_readdir_maxsz;
1581 dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", 1559 dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n",
1582 __func__, 1560 __func__,
1583 (unsigned long long)readdir->cookie, 1561 (unsigned long long)readdir->cookie,
1584 ((u32 *)readdir->verifier.data)[0], 1562 verf[0], verf[1],
1585 ((u32 *)readdir->verifier.data)[1],
1586 attrs[0] & readdir->bitmask[0], 1563 attrs[0] & readdir->bitmask[0],
1587 attrs[1] & readdir->bitmask[1]); 1564 attrs[1] & readdir->bitmask[1]);
1588} 1565}
1589 1566
1590static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr) 1567static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr)
1591{ 1568{
1592 __be32 *p; 1569 encode_op_hdr(xdr, OP_READLINK, decode_readlink_maxsz, hdr);
1593
1594 p = reserve_space(xdr, 4);
1595 *p = cpu_to_be32(OP_READLINK);
1596 hdr->nops++;
1597 hdr->replen += decode_readlink_maxsz;
1598} 1570}
1599 1571
1600static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) 1572static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
1601{ 1573{
1602 __be32 *p; 1574 encode_op_hdr(xdr, OP_REMOVE, decode_remove_maxsz, hdr);
1603 1575 encode_string(xdr, name->len, name->name);
1604 p = reserve_space(xdr, 8 + name->len);
1605 *p++ = cpu_to_be32(OP_REMOVE);
1606 xdr_encode_opaque(p, name->name, name->len);
1607 hdr->nops++;
1608 hdr->replen += decode_remove_maxsz;
1609} 1576}
1610 1577
1611static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr) 1578static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr)
1612{ 1579{
1613 __be32 *p; 1580 encode_op_hdr(xdr, OP_RENAME, decode_rename_maxsz, hdr);
1614
1615 p = reserve_space(xdr, 4);
1616 *p = cpu_to_be32(OP_RENAME);
1617 encode_string(xdr, oldname->len, oldname->name); 1581 encode_string(xdr, oldname->len, oldname->name);
1618 encode_string(xdr, newname->len, newname->name); 1582 encode_string(xdr, newname->len, newname->name);
1619 hdr->nops++;
1620 hdr->replen += decode_rename_maxsz;
1621} 1583}
1622 1584
1623static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid, struct compound_hdr *hdr) 1585static void encode_renew(struct xdr_stream *xdr, clientid4 clid,
1586 struct compound_hdr *hdr)
1624{ 1587{
1625 __be32 *p; 1588 encode_op_hdr(xdr, OP_RENEW, decode_renew_maxsz, hdr);
1626 1589 encode_uint64(xdr, clid);
1627 p = reserve_space(xdr, 12);
1628 *p++ = cpu_to_be32(OP_RENEW);
1629 xdr_encode_hyper(p, client_stateid->cl_clientid);
1630 hdr->nops++;
1631 hdr->replen += decode_renew_maxsz;
1632} 1590}
1633 1591
1634static void 1592static void
1635encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr) 1593encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1636{ 1594{
1637 __be32 *p; 1595 encode_op_hdr(xdr, OP_RESTOREFH, decode_restorefh_maxsz, hdr);
1638
1639 p = reserve_space(xdr, 4);
1640 *p = cpu_to_be32(OP_RESTOREFH);
1641 hdr->nops++;
1642 hdr->replen += decode_restorefh_maxsz;
1643} 1596}
1644 1597
1645static void 1598static void
@@ -1647,9 +1600,8 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
1647{ 1600{
1648 __be32 *p; 1601 __be32 *p;
1649 1602
1650 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); 1603 encode_op_hdr(xdr, OP_SETATTR, decode_setacl_maxsz, hdr);
1651 *p++ = cpu_to_be32(OP_SETATTR); 1604 encode_nfs4_stateid(xdr, &zero_stateid);
1652 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
1653 p = reserve_space(xdr, 2*4); 1605 p = reserve_space(xdr, 2*4);
1654 *p++ = cpu_to_be32(1); 1606 *p++ = cpu_to_be32(1);
1655 *p = cpu_to_be32(FATTR4_WORD0_ACL); 1607 *p = cpu_to_be32(FATTR4_WORD0_ACL);
@@ -1657,30 +1609,18 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
1657 p = reserve_space(xdr, 4); 1609 p = reserve_space(xdr, 4);
1658 *p = cpu_to_be32(arg->acl_len); 1610 *p = cpu_to_be32(arg->acl_len);
1659 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); 1611 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
1660 hdr->nops++;
1661 hdr->replen += decode_setacl_maxsz;
1662} 1612}
1663 1613
1664static void 1614static void
1665encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr) 1615encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1666{ 1616{
1667 __be32 *p; 1617 encode_op_hdr(xdr, OP_SAVEFH, decode_savefh_maxsz, hdr);
1668
1669 p = reserve_space(xdr, 4);
1670 *p = cpu_to_be32(OP_SAVEFH);
1671 hdr->nops++;
1672 hdr->replen += decode_savefh_maxsz;
1673} 1618}
1674 1619
1675static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr) 1620static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr)
1676{ 1621{
1677 __be32 *p; 1622 encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr);
1678 1623 encode_nfs4_stateid(xdr, &arg->stateid);
1679 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
1680 *p++ = cpu_to_be32(OP_SETATTR);
1681 xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE);
1682 hdr->nops++;
1683 hdr->replen += decode_setattr_maxsz;
1684 encode_attrs(xdr, arg->iap, server); 1624 encode_attrs(xdr, arg->iap, server);
1685} 1625}
1686 1626
@@ -1688,9 +1628,8 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
1688{ 1628{
1689 __be32 *p; 1629 __be32 *p;
1690 1630
1691 p = reserve_space(xdr, 4 + NFS4_VERIFIER_SIZE); 1631 encode_op_hdr(xdr, OP_SETCLIENTID, decode_setclientid_maxsz, hdr);
1692 *p++ = cpu_to_be32(OP_SETCLIENTID); 1632 encode_nfs4_verifier(xdr, setclientid->sc_verifier);
1693 xdr_encode_opaque_fixed(p, setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE);
1694 1633
1695 encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); 1634 encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name);
1696 p = reserve_space(xdr, 4); 1635 p = reserve_space(xdr, 4);
@@ -1699,31 +1638,23 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
1699 encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); 1638 encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr);
1700 p = reserve_space(xdr, 4); 1639 p = reserve_space(xdr, 4);
1701 *p = cpu_to_be32(setclientid->sc_cb_ident); 1640 *p = cpu_to_be32(setclientid->sc_cb_ident);
1702 hdr->nops++;
1703 hdr->replen += decode_setclientid_maxsz;
1704} 1641}
1705 1642
1706static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr) 1643static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr)
1707{ 1644{
1708 __be32 *p; 1645 encode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM,
1709 1646 decode_setclientid_confirm_maxsz, hdr);
1710 p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE); 1647 encode_uint64(xdr, arg->clientid);
1711 *p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM); 1648 encode_nfs4_verifier(xdr, &arg->confirm);
1712 p = xdr_encode_hyper(p, arg->clientid);
1713 xdr_encode_opaque_fixed(p, arg->confirm.data, NFS4_VERIFIER_SIZE);
1714 hdr->nops++;
1715 hdr->replen += decode_setclientid_confirm_maxsz;
1716} 1649}
1717 1650
1718static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) 1651static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
1719{ 1652{
1720 __be32 *p; 1653 __be32 *p;
1721 1654
1722 p = reserve_space(xdr, 4); 1655 encode_op_hdr(xdr, OP_WRITE, decode_write_maxsz, hdr);
1723 *p = cpu_to_be32(OP_WRITE); 1656 encode_open_stateid(xdr, args->context, args->lock_context,
1724 1657 FMODE_WRITE, hdr->minorversion);
1725 encode_stateid(xdr, args->context, args->lock_context,
1726 hdr->minorversion);
1727 1658
1728 p = reserve_space(xdr, 16); 1659 p = reserve_space(xdr, 16);
1729 p = xdr_encode_hyper(p, args->offset); 1660 p = xdr_encode_hyper(p, args->offset);
@@ -1731,32 +1662,18 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
1731 *p = cpu_to_be32(args->count); 1662 *p = cpu_to_be32(args->count);
1732 1663
1733 xdr_write_pages(xdr, args->pages, args->pgbase, args->count); 1664 xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
1734 hdr->nops++;
1735 hdr->replen += decode_write_maxsz;
1736} 1665}
1737 1666
1738static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr) 1667static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr)
1739{ 1668{
1740 __be32 *p; 1669 encode_op_hdr(xdr, OP_DELEGRETURN, decode_delegreturn_maxsz, hdr);
1741 1670 encode_nfs4_stateid(xdr, stateid);
1742 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
1743
1744 *p++ = cpu_to_be32(OP_DELEGRETURN);
1745 xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
1746 hdr->nops++;
1747 hdr->replen += decode_delegreturn_maxsz;
1748} 1671}
1749 1672
1750static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) 1673static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
1751{ 1674{
1752 int len = name->len; 1675 encode_op_hdr(xdr, OP_SECINFO, decode_secinfo_maxsz, hdr);
1753 __be32 *p; 1676 encode_string(xdr, name->len, name->name);
1754
1755 p = reserve_space(xdr, 8 + len);
1756 *p++ = cpu_to_be32(OP_SECINFO);
1757 xdr_encode_opaque(p, name->name, len);
1758 hdr->nops++;
1759 hdr->replen += decode_secinfo_maxsz;
1760} 1677}
1761 1678
1762#if defined(CONFIG_NFS_V4_1) 1679#if defined(CONFIG_NFS_V4_1)
@@ -1766,19 +1683,39 @@ static void encode_exchange_id(struct xdr_stream *xdr,
1766 struct compound_hdr *hdr) 1683 struct compound_hdr *hdr)
1767{ 1684{
1768 __be32 *p; 1685 __be32 *p;
1686 char impl_name[NFS4_OPAQUE_LIMIT];
1687 int len = 0;
1769 1688
1770 p = reserve_space(xdr, 4 + sizeof(args->verifier->data)); 1689 encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr);
1771 *p++ = cpu_to_be32(OP_EXCHANGE_ID); 1690 encode_nfs4_verifier(xdr, args->verifier);
1772 xdr_encode_opaque_fixed(p, args->verifier->data, sizeof(args->verifier->data));
1773 1691
1774 encode_string(xdr, args->id_len, args->id); 1692 encode_string(xdr, args->id_len, args->id);
1775 1693
1776 p = reserve_space(xdr, 12); 1694 p = reserve_space(xdr, 12);
1777 *p++ = cpu_to_be32(args->flags); 1695 *p++ = cpu_to_be32(args->flags);
1778 *p++ = cpu_to_be32(0); /* zero length state_protect4_a */ 1696 *p++ = cpu_to_be32(0); /* zero length state_protect4_a */
1779 *p = cpu_to_be32(0); /* zero length implementation id array */ 1697
1780 hdr->nops++; 1698 if (send_implementation_id &&
1781 hdr->replen += decode_exchange_id_maxsz; 1699 sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) > 1 &&
1700 sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN)
1701 <= NFS4_OPAQUE_LIMIT + 1)
1702 len = snprintf(impl_name, sizeof(impl_name), "%s %s %s %s",
1703 utsname()->sysname, utsname()->release,
1704 utsname()->version, utsname()->machine);
1705
1706 if (len > 0) {
1707 *p = cpu_to_be32(1); /* implementation id array length=1 */
1708
1709 encode_string(xdr,
1710 sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) - 1,
1711 CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN);
1712 encode_string(xdr, len, impl_name);
1713 /* just send zeros for nii_date - the date is in nii_name */
1714 p = reserve_space(xdr, 12);
1715 p = xdr_encode_hyper(p, 0);
1716 *p = cpu_to_be32(0);
1717 } else
1718 *p = cpu_to_be32(0); /* implementation id array length=0 */
1782} 1719}
1783 1720
1784static void encode_create_session(struct xdr_stream *xdr, 1721static void encode_create_session(struct xdr_stream *xdr,
@@ -1801,8 +1738,8 @@ static void encode_create_session(struct xdr_stream *xdr,
1801 len = scnprintf(machine_name, sizeof(machine_name), "%s", 1738 len = scnprintf(machine_name, sizeof(machine_name), "%s",
1802 clp->cl_ipaddr); 1739 clp->cl_ipaddr);
1803 1740
1804 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12); 1741 encode_op_hdr(xdr, OP_CREATE_SESSION, decode_create_session_maxsz, hdr);
1805 *p++ = cpu_to_be32(OP_CREATE_SESSION); 1742 p = reserve_space(xdr, 16 + 2*28 + 20 + len + 12);
1806 p = xdr_encode_hyper(p, clp->cl_clientid); 1743 p = xdr_encode_hyper(p, clp->cl_clientid);
1807 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */ 1744 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
1808 *p++ = cpu_to_be32(args->flags); /*flags */ 1745 *p++ = cpu_to_be32(args->flags); /*flags */
@@ -1835,33 +1772,22 @@ static void encode_create_session(struct xdr_stream *xdr,
1835 *p++ = cpu_to_be32(0); /* UID */ 1772 *p++ = cpu_to_be32(0); /* UID */
1836 *p++ = cpu_to_be32(0); /* GID */ 1773 *p++ = cpu_to_be32(0); /* GID */
1837 *p = cpu_to_be32(0); /* No more gids */ 1774 *p = cpu_to_be32(0); /* No more gids */
1838 hdr->nops++;
1839 hdr->replen += decode_create_session_maxsz;
1840} 1775}
1841 1776
1842static void encode_destroy_session(struct xdr_stream *xdr, 1777static void encode_destroy_session(struct xdr_stream *xdr,
1843 struct nfs4_session *session, 1778 struct nfs4_session *session,
1844 struct compound_hdr *hdr) 1779 struct compound_hdr *hdr)
1845{ 1780{
1846 __be32 *p; 1781 encode_op_hdr(xdr, OP_DESTROY_SESSION, decode_destroy_session_maxsz, hdr);
1847 p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN); 1782 encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1848 *p++ = cpu_to_be32(OP_DESTROY_SESSION);
1849 xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1850 hdr->nops++;
1851 hdr->replen += decode_destroy_session_maxsz;
1852} 1783}
1853 1784
1854static void encode_reclaim_complete(struct xdr_stream *xdr, 1785static void encode_reclaim_complete(struct xdr_stream *xdr,
1855 struct nfs41_reclaim_complete_args *args, 1786 struct nfs41_reclaim_complete_args *args,
1856 struct compound_hdr *hdr) 1787 struct compound_hdr *hdr)
1857{ 1788{
1858 __be32 *p; 1789 encode_op_hdr(xdr, OP_RECLAIM_COMPLETE, decode_reclaim_complete_maxsz, hdr);
1859 1790 encode_uint32(xdr, args->one_fs);
1860 p = reserve_space(xdr, 8);
1861 *p++ = cpu_to_be32(OP_RECLAIM_COMPLETE);
1862 *p++ = cpu_to_be32(args->one_fs);
1863 hdr->nops++;
1864 hdr->replen += decode_reclaim_complete_maxsz;
1865} 1791}
1866#endif /* CONFIG_NFS_V4_1 */ 1792#endif /* CONFIG_NFS_V4_1 */
1867 1793
@@ -1883,8 +1809,7 @@ static void encode_sequence(struct xdr_stream *xdr,
1883 WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE); 1809 WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE);
1884 slot = tp->slots + args->sa_slotid; 1810 slot = tp->slots + args->sa_slotid;
1885 1811
1886 p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN + 16); 1812 encode_op_hdr(xdr, OP_SEQUENCE, decode_sequence_maxsz, hdr);
1887 *p++ = cpu_to_be32(OP_SEQUENCE);
1888 1813
1889 /* 1814 /*
1890 * Sessionid + seqid + slotid + max slotid + cache_this 1815 * Sessionid + seqid + slotid + max slotid + cache_this
@@ -1898,13 +1823,12 @@ static void encode_sequence(struct xdr_stream *xdr,
1898 ((u32 *)session->sess_id.data)[3], 1823 ((u32 *)session->sess_id.data)[3],
1899 slot->seq_nr, args->sa_slotid, 1824 slot->seq_nr, args->sa_slotid,
1900 tp->highest_used_slotid, args->sa_cache_this); 1825 tp->highest_used_slotid, args->sa_cache_this);
1826 p = reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 16);
1901 p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); 1827 p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1902 *p++ = cpu_to_be32(slot->seq_nr); 1828 *p++ = cpu_to_be32(slot->seq_nr);
1903 *p++ = cpu_to_be32(args->sa_slotid); 1829 *p++ = cpu_to_be32(args->sa_slotid);
1904 *p++ = cpu_to_be32(tp->highest_used_slotid); 1830 *p++ = cpu_to_be32(tp->highest_used_slotid);
1905 *p = cpu_to_be32(args->sa_cache_this); 1831 *p = cpu_to_be32(args->sa_cache_this);
1906 hdr->nops++;
1907 hdr->replen += decode_sequence_maxsz;
1908#endif /* CONFIG_NFS_V4_1 */ 1832#endif /* CONFIG_NFS_V4_1 */
1909} 1833}
1910 1834
@@ -1919,14 +1843,12 @@ encode_getdevicelist(struct xdr_stream *xdr,
1919 .data = "dummmmmy", 1843 .data = "dummmmmy",
1920 }; 1844 };
1921 1845
1922 p = reserve_space(xdr, 20); 1846 encode_op_hdr(xdr, OP_GETDEVICELIST, decode_getdevicelist_maxsz, hdr);
1923 *p++ = cpu_to_be32(OP_GETDEVICELIST); 1847 p = reserve_space(xdr, 16);
1924 *p++ = cpu_to_be32(args->layoutclass); 1848 *p++ = cpu_to_be32(args->layoutclass);
1925 *p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM); 1849 *p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM);
1926 xdr_encode_hyper(p, 0ULL); /* cookie */ 1850 xdr_encode_hyper(p, 0ULL); /* cookie */
1927 encode_nfs4_verifier(xdr, &dummy); 1851 encode_nfs4_verifier(xdr, &dummy);
1928 hdr->nops++;
1929 hdr->replen += decode_getdevicelist_maxsz;
1930} 1852}
1931 1853
1932static void 1854static void
@@ -1936,15 +1858,13 @@ encode_getdeviceinfo(struct xdr_stream *xdr,
1936{ 1858{
1937 __be32 *p; 1859 __be32 *p;
1938 1860
1939 p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE); 1861 encode_op_hdr(xdr, OP_GETDEVICEINFO, decode_getdeviceinfo_maxsz, hdr);
1940 *p++ = cpu_to_be32(OP_GETDEVICEINFO); 1862 p = reserve_space(xdr, 12 + NFS4_DEVICEID4_SIZE);
1941 p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, 1863 p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data,
1942 NFS4_DEVICEID4_SIZE); 1864 NFS4_DEVICEID4_SIZE);
1943 *p++ = cpu_to_be32(args->pdev->layout_type); 1865 *p++ = cpu_to_be32(args->pdev->layout_type);
1944 *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */ 1866 *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */
1945 *p++ = cpu_to_be32(0); /* bitmap length 0 */ 1867 *p++ = cpu_to_be32(0); /* bitmap length 0 */
1946 hdr->nops++;
1947 hdr->replen += decode_getdeviceinfo_maxsz;
1948} 1868}
1949 1869
1950static void 1870static void
@@ -1954,16 +1874,16 @@ encode_layoutget(struct xdr_stream *xdr,
1954{ 1874{
1955 __be32 *p; 1875 __be32 *p;
1956 1876
1957 p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); 1877 encode_op_hdr(xdr, OP_LAYOUTGET, decode_layoutget_maxsz, hdr);
1958 *p++ = cpu_to_be32(OP_LAYOUTGET); 1878 p = reserve_space(xdr, 36);
1959 *p++ = cpu_to_be32(0); /* Signal layout available */ 1879 *p++ = cpu_to_be32(0); /* Signal layout available */
1960 *p++ = cpu_to_be32(args->type); 1880 *p++ = cpu_to_be32(args->type);
1961 *p++ = cpu_to_be32(args->range.iomode); 1881 *p++ = cpu_to_be32(args->range.iomode);
1962 p = xdr_encode_hyper(p, args->range.offset); 1882 p = xdr_encode_hyper(p, args->range.offset);
1963 p = xdr_encode_hyper(p, args->range.length); 1883 p = xdr_encode_hyper(p, args->range.length);
1964 p = xdr_encode_hyper(p, args->minlength); 1884 p = xdr_encode_hyper(p, args->minlength);
1965 p = xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE); 1885 encode_nfs4_stateid(xdr, &args->stateid);
1966 *p = cpu_to_be32(args->maxcount); 1886 encode_uint32(xdr, args->maxcount);
1967 1887
1968 dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n", 1888 dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
1969 __func__, 1889 __func__,
@@ -1972,8 +1892,6 @@ encode_layoutget(struct xdr_stream *xdr,
1972 (unsigned long)args->range.offset, 1892 (unsigned long)args->range.offset,
1973 (unsigned long)args->range.length, 1893 (unsigned long)args->range.length,
1974 args->maxcount); 1894 args->maxcount);
1975 hdr->nops++;
1976 hdr->replen += decode_layoutget_maxsz;
1977} 1895}
1978 1896
1979static int 1897static int
@@ -1987,13 +1905,14 @@ encode_layoutcommit(struct xdr_stream *xdr,
1987 dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten, 1905 dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten,
1988 NFS_SERVER(args->inode)->pnfs_curr_ld->id); 1906 NFS_SERVER(args->inode)->pnfs_curr_ld->id);
1989 1907
1990 p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); 1908 encode_op_hdr(xdr, OP_LAYOUTCOMMIT, decode_layoutcommit_maxsz, hdr);
1991 *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); 1909 p = reserve_space(xdr, 20);
1992 /* Only whole file layouts */ 1910 /* Only whole file layouts */
1993 p = xdr_encode_hyper(p, 0); /* offset */ 1911 p = xdr_encode_hyper(p, 0); /* offset */
1994 p = xdr_encode_hyper(p, args->lastbytewritten + 1); /* length */ 1912 p = xdr_encode_hyper(p, args->lastbytewritten + 1); /* length */
1995 *p++ = cpu_to_be32(0); /* reclaim */ 1913 *p = cpu_to_be32(0); /* reclaim */
1996 p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE); 1914 encode_nfs4_stateid(xdr, &args->stateid);
1915 p = reserve_space(xdr, 20);
1997 *p++ = cpu_to_be32(1); /* newoffset = TRUE */ 1916 *p++ = cpu_to_be32(1); /* newoffset = TRUE */
1998 p = xdr_encode_hyper(p, args->lastbytewritten); 1917 p = xdr_encode_hyper(p, args->lastbytewritten);
1999 *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ 1918 *p++ = cpu_to_be32(0); /* Never send time_modify_changed */
@@ -2002,13 +1921,9 @@ encode_layoutcommit(struct xdr_stream *xdr,
2002 if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) 1921 if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit)
2003 NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( 1922 NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit(
2004 NFS_I(inode)->layout, xdr, args); 1923 NFS_I(inode)->layout, xdr, args);
2005 else { 1924 else
2006 p = reserve_space(xdr, 4); 1925 encode_uint32(xdr, 0); /* no layout-type payload */
2007 *p = cpu_to_be32(0); /* no layout-type payload */
2008 }
2009 1926
2010 hdr->nops++;
2011 hdr->replen += decode_layoutcommit_maxsz;
2012 return 0; 1927 return 0;
2013} 1928}
2014 1929
@@ -2019,27 +1934,23 @@ encode_layoutreturn(struct xdr_stream *xdr,
2019{ 1934{
2020 __be32 *p; 1935 __be32 *p;
2021 1936
2022 p = reserve_space(xdr, 20); 1937 encode_op_hdr(xdr, OP_LAYOUTRETURN, decode_layoutreturn_maxsz, hdr);
2023 *p++ = cpu_to_be32(OP_LAYOUTRETURN); 1938 p = reserve_space(xdr, 16);
2024 *p++ = cpu_to_be32(0); /* reclaim. always 0 for now */ 1939 *p++ = cpu_to_be32(0); /* reclaim. always 0 for now */
2025 *p++ = cpu_to_be32(args->layout_type); 1940 *p++ = cpu_to_be32(args->layout_type);
2026 *p++ = cpu_to_be32(IOMODE_ANY); 1941 *p++ = cpu_to_be32(IOMODE_ANY);
2027 *p = cpu_to_be32(RETURN_FILE); 1942 *p = cpu_to_be32(RETURN_FILE);
2028 p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE); 1943 p = reserve_space(xdr, 16);
2029 p = xdr_encode_hyper(p, 0); 1944 p = xdr_encode_hyper(p, 0);
2030 p = xdr_encode_hyper(p, NFS4_MAX_UINT64); 1945 p = xdr_encode_hyper(p, NFS4_MAX_UINT64);
2031 spin_lock(&args->inode->i_lock); 1946 spin_lock(&args->inode->i_lock);
2032 xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE); 1947 encode_nfs4_stateid(xdr, &args->stateid);
2033 spin_unlock(&args->inode->i_lock); 1948 spin_unlock(&args->inode->i_lock);
2034 if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) { 1949 if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) {
2035 NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn( 1950 NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn(
2036 NFS_I(args->inode)->layout, xdr, args); 1951 NFS_I(args->inode)->layout, xdr, args);
2037 } else { 1952 } else
2038 p = reserve_space(xdr, 4); 1953 encode_uint32(xdr, 0);
2039 *p = cpu_to_be32(0);
2040 }
2041 hdr->nops++;
2042 hdr->replen += decode_layoutreturn_maxsz;
2043} 1954}
2044 1955
2045static int 1956static int
@@ -2047,12 +1958,8 @@ encode_secinfo_no_name(struct xdr_stream *xdr,
2047 const struct nfs41_secinfo_no_name_args *args, 1958 const struct nfs41_secinfo_no_name_args *args,
2048 struct compound_hdr *hdr) 1959 struct compound_hdr *hdr)
2049{ 1960{
2050 __be32 *p; 1961 encode_op_hdr(xdr, OP_SECINFO_NO_NAME, decode_secinfo_no_name_maxsz, hdr);
2051 p = reserve_space(xdr, 8); 1962 encode_uint32(xdr, args->style);
2052 *p++ = cpu_to_be32(OP_SECINFO_NO_NAME);
2053 *p++ = cpu_to_be32(args->style);
2054 hdr->nops++;
2055 hdr->replen += decode_secinfo_no_name_maxsz;
2056 return 0; 1963 return 0;
2057} 1964}
2058 1965
@@ -2060,26 +1967,17 @@ static void encode_test_stateid(struct xdr_stream *xdr,
2060 struct nfs41_test_stateid_args *args, 1967 struct nfs41_test_stateid_args *args,
2061 struct compound_hdr *hdr) 1968 struct compound_hdr *hdr)
2062{ 1969{
2063 __be32 *p; 1970 encode_op_hdr(xdr, OP_TEST_STATEID, decode_test_stateid_maxsz, hdr);
2064 1971 encode_uint32(xdr, 1);
2065 p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE); 1972 encode_nfs4_stateid(xdr, args->stateid);
2066 *p++ = cpu_to_be32(OP_TEST_STATEID);
2067 *p++ = cpu_to_be32(1);
2068 xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
2069 hdr->nops++;
2070 hdr->replen += decode_test_stateid_maxsz;
2071} 1973}
2072 1974
2073static void encode_free_stateid(struct xdr_stream *xdr, 1975static void encode_free_stateid(struct xdr_stream *xdr,
2074 struct nfs41_free_stateid_args *args, 1976 struct nfs41_free_stateid_args *args,
2075 struct compound_hdr *hdr) 1977 struct compound_hdr *hdr)
2076{ 1978{
2077 __be32 *p; 1979 encode_op_hdr(xdr, OP_FREE_STATEID, decode_free_stateid_maxsz, hdr);
2078 p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE); 1980 encode_nfs4_stateid(xdr, args->stateid);
2079 *p++ = cpu_to_be32(OP_FREE_STATEID);
2080 xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
2081 hdr->nops++;
2082 hdr->replen += decode_free_stateid_maxsz;
2083} 1981}
2084#endif /* CONFIG_NFS_V4_1 */ 1982#endif /* CONFIG_NFS_V4_1 */
2085 1983
@@ -2633,6 +2531,7 @@ static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req,
2633 encode_sequence(xdr, &args->seq_args, &hdr); 2531 encode_sequence(xdr, &args->seq_args, &hdr);
2634 encode_putfh(xdr, args->fhandle, &hdr); 2532 encode_putfh(xdr, args->fhandle, &hdr);
2635 encode_getattr_one(xdr, FATTR4_WORD0_SUPPORTED_ATTRS| 2533 encode_getattr_one(xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
2534 FATTR4_WORD0_FH_EXPIRE_TYPE|
2636 FATTR4_WORD0_LINK_SUPPORT| 2535 FATTR4_WORD0_LINK_SUPPORT|
2637 FATTR4_WORD0_SYMLINK_SUPPORT| 2536 FATTR4_WORD0_SYMLINK_SUPPORT|
2638 FATTR4_WORD0_ACLSUPPORT, &hdr); 2537 FATTR4_WORD0_ACLSUPPORT, &hdr);
@@ -2650,7 +2549,7 @@ static void nfs4_xdr_enc_renew(struct rpc_rqst *req, struct xdr_stream *xdr,
2650 }; 2549 };
2651 2550
2652 encode_compound_hdr(xdr, req, &hdr); 2551 encode_compound_hdr(xdr, req, &hdr);
2653 encode_renew(xdr, clp, &hdr); 2552 encode_renew(xdr, clp->cl_clientid, &hdr);
2654 encode_nops(&hdr); 2553 encode_nops(&hdr);
2655} 2554}
2656 2555
@@ -3180,6 +3079,28 @@ out_overflow:
3180 return -EIO; 3079 return -EIO;
3181} 3080}
3182 3081
3082static int decode_attr_fh_expire_type(struct xdr_stream *xdr,
3083 uint32_t *bitmap, uint32_t *type)
3084{
3085 __be32 *p;
3086
3087 *type = 0;
3088 if (unlikely(bitmap[0] & (FATTR4_WORD0_FH_EXPIRE_TYPE - 1U)))
3089 return -EIO;
3090 if (likely(bitmap[0] & FATTR4_WORD0_FH_EXPIRE_TYPE)) {
3091 p = xdr_inline_decode(xdr, 4);
3092 if (unlikely(!p))
3093 goto out_overflow;
3094 *type = be32_to_cpup(p);
3095 bitmap[0] &= ~FATTR4_WORD0_FH_EXPIRE_TYPE;
3096 }
3097 dprintk("%s: expire type=0x%x\n", __func__, *type);
3098 return 0;
3099out_overflow:
3100 print_overflow_msg(__func__, xdr);
3101 return -EIO;
3102}
3103
3183static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change) 3104static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change)
3184{ 3105{
3185 __be32 *p; 3106 __be32 *p;
@@ -3513,16 +3434,17 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
3513 n = be32_to_cpup(p); 3434 n = be32_to_cpup(p);
3514 if (n == 0) 3435 if (n == 0)
3515 goto root_path; 3436 goto root_path;
3516 dprintk("path "); 3437 dprintk("pathname4: ");
3517 path->ncomponents = 0; 3438 path->ncomponents = 0;
3518 while (path->ncomponents < n) { 3439 while (path->ncomponents < n) {
3519 struct nfs4_string *component = &path->components[path->ncomponents]; 3440 struct nfs4_string *component = &path->components[path->ncomponents];
3520 status = decode_opaque_inline(xdr, &component->len, &component->data); 3441 status = decode_opaque_inline(xdr, &component->len, &component->data);
3521 if (unlikely(status != 0)) 3442 if (unlikely(status != 0))
3522 goto out_eio; 3443 goto out_eio;
3523 if (path->ncomponents != n) 3444 ifdebug (XDR)
3524 dprintk("/"); 3445 pr_cont("%s%.*s ",
3525 dprintk("%s", component->data); 3446 (path->ncomponents != n ? "/ " : ""),
3447 component->len, component->data);
3526 if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS) 3448 if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS)
3527 path->ncomponents++; 3449 path->ncomponents++;
3528 else { 3450 else {
@@ -3531,14 +3453,13 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
3531 } 3453 }
3532 } 3454 }
3533out: 3455out:
3534 dprintk("\n");
3535 return status; 3456 return status;
3536root_path: 3457root_path:
3537/* a root pathname is sent as a zero component4 */ 3458/* a root pathname is sent as a zero component4 */
3538 path->ncomponents = 1; 3459 path->ncomponents = 1;
3539 path->components[0].len=0; 3460 path->components[0].len=0;
3540 path->components[0].data=NULL; 3461 path->components[0].data=NULL;
3541 dprintk("path /\n"); 3462 dprintk("pathname4: /\n");
3542 goto out; 3463 goto out;
3543out_eio: 3464out_eio:
3544 dprintk(" status %d", status); 3465 dprintk(" status %d", status);
@@ -3560,7 +3481,11 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
3560 status = 0; 3481 status = 0;
3561 if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS))) 3482 if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS)))
3562 goto out; 3483 goto out;
3563 dprintk("%s: fsroot ", __func__); 3484 status = -EIO;
3485 /* Ignore borken servers that return unrequested attrs */
3486 if (unlikely(res == NULL))
3487 goto out;
3488 dprintk("%s: fsroot:\n", __func__);
3564 status = decode_pathname(xdr, &res->fs_path); 3489 status = decode_pathname(xdr, &res->fs_path);
3565 if (unlikely(status != 0)) 3490 if (unlikely(status != 0))
3566 goto out; 3491 goto out;
@@ -3581,7 +3506,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
3581 m = be32_to_cpup(p); 3506 m = be32_to_cpup(p);
3582 3507
3583 loc->nservers = 0; 3508 loc->nservers = 0;
3584 dprintk("%s: servers ", __func__); 3509 dprintk("%s: servers:\n", __func__);
3585 while (loc->nservers < m) { 3510 while (loc->nservers < m) {
3586 struct nfs4_string *server = &loc->servers[loc->nservers]; 3511 struct nfs4_string *server = &loc->servers[loc->nservers];
3587 status = decode_opaque_inline(xdr, &server->len, &server->data); 3512 status = decode_opaque_inline(xdr, &server->len, &server->data);
@@ -3613,7 +3538,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
3613 res->nlocations++; 3538 res->nlocations++;
3614 } 3539 }
3615 if (res->nlocations != 0) 3540 if (res->nlocations != 0)
3616 status = NFS_ATTR_FATTR_V4_REFERRAL; 3541 status = NFS_ATTR_FATTR_V4_LOCATIONS;
3617out: 3542out:
3618 dprintk("%s: fs_locations done, error = %d\n", __func__, status); 3543 dprintk("%s: fs_locations done, error = %d\n", __func__, status);
3619 return status; 3544 return status;
@@ -4157,7 +4082,7 @@ static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len)
4157 4082
4158static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) 4083static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
4159{ 4084{
4160 return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE); 4085 return decode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE);
4161} 4086}
4162 4087
4163static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) 4088static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
@@ -4174,7 +4099,7 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
4174 4099
4175static int decode_verifier(struct xdr_stream *xdr, void *verifier) 4100static int decode_verifier(struct xdr_stream *xdr, void *verifier)
4176{ 4101{
4177 return decode_opaque_fixed(xdr, verifier, 8); 4102 return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE);
4178} 4103}
4179 4104
4180static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) 4105static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res)
@@ -4224,6 +4149,9 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re
4224 goto xdr_error; 4149 goto xdr_error;
4225 if ((status = decode_attr_supported(xdr, bitmap, res->attr_bitmask)) != 0) 4150 if ((status = decode_attr_supported(xdr, bitmap, res->attr_bitmask)) != 0)
4226 goto xdr_error; 4151 goto xdr_error;
4152 if ((status = decode_attr_fh_expire_type(xdr, bitmap,
4153 &res->fh_expire_type)) != 0)
4154 goto xdr_error;
4227 if ((status = decode_attr_link_support(xdr, bitmap, &res->has_links)) != 0) 4155 if ((status = decode_attr_link_support(xdr, bitmap, &res->has_links)) != 0)
4228 goto xdr_error; 4156 goto xdr_error;
4229 if ((status = decode_attr_symlink_support(xdr, bitmap, &res->has_symlinks)) != 0) 4157 if ((status = decode_attr_symlink_support(xdr, bitmap, &res->has_symlinks)) != 0)
@@ -4294,6 +4222,7 @@ xdr_error:
4294 4222
4295static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, 4223static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
4296 struct nfs_fattr *fattr, struct nfs_fh *fh, 4224 struct nfs_fattr *fattr, struct nfs_fh *fh,
4225 struct nfs4_fs_locations *fs_loc,
4297 const struct nfs_server *server) 4226 const struct nfs_server *server)
4298{ 4227{
4299 int status; 4228 int status;
@@ -4329,8 +4258,6 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
4329 status = decode_attr_error(xdr, bitmap, &err); 4258 status = decode_attr_error(xdr, bitmap, &err);
4330 if (status < 0) 4259 if (status < 0)
4331 goto xdr_error; 4260 goto xdr_error;
4332 if (err == -NFS4ERR_WRONGSEC)
4333 nfs_fixup_secinfo_attributes(fattr, fh);
4334 4261
4335 status = decode_attr_filehandle(xdr, bitmap, fh); 4262 status = decode_attr_filehandle(xdr, bitmap, fh);
4336 if (status < 0) 4263 if (status < 0)
@@ -4341,9 +4268,7 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
4341 goto xdr_error; 4268 goto xdr_error;
4342 fattr->valid |= status; 4269 fattr->valid |= status;
4343 4270
4344 status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr, 4271 status = decode_attr_fs_locations(xdr, bitmap, fs_loc);
4345 struct nfs4_fs_locations,
4346 fattr));
4347 if (status < 0) 4272 if (status < 0)
4348 goto xdr_error; 4273 goto xdr_error;
4349 fattr->valid |= status; 4274 fattr->valid |= status;
@@ -4407,7 +4332,8 @@ xdr_error:
4407} 4332}
4408 4333
4409static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr, 4334static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr,
4410 struct nfs_fh *fh, const struct nfs_server *server) 4335 struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc,
4336 const struct nfs_server *server)
4411{ 4337{
4412 __be32 *savep; 4338 __be32 *savep;
4413 uint32_t attrlen, 4339 uint32_t attrlen,
@@ -4426,7 +4352,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat
4426 if (status < 0) 4352 if (status < 0)
4427 goto xdr_error; 4353 goto xdr_error;
4428 4354
4429 status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server); 4355 status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, fs_loc, server);
4430 if (status < 0) 4356 if (status < 0)
4431 goto xdr_error; 4357 goto xdr_error;
4432 4358
@@ -4439,7 +4365,7 @@ xdr_error:
4439static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, 4365static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
4440 const struct nfs_server *server) 4366 const struct nfs_server *server)
4441{ 4367{
4442 return decode_getfattr_generic(xdr, fattr, NULL, server); 4368 return decode_getfattr_generic(xdr, fattr, NULL, NULL, server);
4443} 4369}
4444 4370
4445/* 4371/*
@@ -4463,8 +4389,8 @@ static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
4463 return 0; 4389 return 0;
4464 } 4390 }
4465 if (num > 1) 4391 if (num > 1)
4466 printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers " 4392 printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout "
4467 "per filesystem not supported\n", __func__); 4393 "drivers per filesystem not supported\n", __func__);
4468 4394
4469 /* Decode and set first layout type, move xdr->p past unused types */ 4395 /* Decode and set first layout type, move xdr->p past unused types */
4470 p = xdr_inline_decode(xdr, num * 4); 4396 p = xdr_inline_decode(xdr, num * 4);
@@ -4863,17 +4789,16 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
4863 size_t hdrlen; 4789 size_t hdrlen;
4864 u32 recvd, pglen = rcvbuf->page_len; 4790 u32 recvd, pglen = rcvbuf->page_len;
4865 int status; 4791 int status;
4792 __be32 verf[2];
4866 4793
4867 status = decode_op_hdr(xdr, OP_READDIR); 4794 status = decode_op_hdr(xdr, OP_READDIR);
4868 if (!status) 4795 if (!status)
4869 status = decode_verifier(xdr, readdir->verifier.data); 4796 status = decode_verifier(xdr, readdir->verifier.data);
4870 if (unlikely(status)) 4797 if (unlikely(status))
4871 return status; 4798 return status;
4799 memcpy(verf, readdir->verifier.data, sizeof(verf));
4872 dprintk("%s: verifier = %08x:%08x\n", 4800 dprintk("%s: verifier = %08x:%08x\n",
4873 __func__, 4801 __func__, verf[0], verf[1]);
4874 ((u32 *)readdir->verifier.data)[0],
4875 ((u32 *)readdir->verifier.data)[1]);
4876
4877 4802
4878 hdrlen = (char *) xdr->p - (char *) iov->iov_base; 4803 hdrlen = (char *) xdr->p - (char *) iov->iov_base;
4879 recvd = rcvbuf->len - hdrlen; 4804 recvd = rcvbuf->len - hdrlen;
@@ -4975,11 +4900,19 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
4975 bitmap[3] = {0}; 4900 bitmap[3] = {0};
4976 struct kvec *iov = req->rq_rcv_buf.head; 4901 struct kvec *iov = req->rq_rcv_buf.head;
4977 int status; 4902 int status;
4903 size_t page_len = xdr->buf->page_len;
4978 4904
4979 res->acl_len = 0; 4905 res->acl_len = 0;
4980 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) 4906 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
4981 goto out; 4907 goto out;
4908
4982 bm_p = xdr->p; 4909 bm_p = xdr->p;
4910 res->acl_data_offset = be32_to_cpup(bm_p) + 2;
4911 res->acl_data_offset <<= 2;
4912 /* Check if the acl data starts beyond the allocated buffer */
4913 if (res->acl_data_offset > page_len)
4914 return -ERANGE;
4915
4983 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) 4916 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
4984 goto out; 4917 goto out;
4985 if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) 4918 if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
@@ -4989,28 +4922,24 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
4989 return -EIO; 4922 return -EIO;
4990 if (likely(bitmap[0] & FATTR4_WORD0_ACL)) { 4923 if (likely(bitmap[0] & FATTR4_WORD0_ACL)) {
4991 size_t hdrlen; 4924 size_t hdrlen;
4992 u32 recvd;
4993 4925
4994 /* The bitmap (xdr len + bitmaps) and the attr xdr len words 4926 /* The bitmap (xdr len + bitmaps) and the attr xdr len words
4995 * are stored with the acl data to handle the problem of 4927 * are stored with the acl data to handle the problem of
4996 * variable length bitmaps.*/ 4928 * variable length bitmaps.*/
4997 xdr->p = bm_p; 4929 xdr->p = bm_p;
4998 res->acl_data_offset = be32_to_cpup(bm_p) + 2;
4999 res->acl_data_offset <<= 2;
5000 4930
5001 /* We ignore &savep and don't do consistency checks on 4931 /* We ignore &savep and don't do consistency checks on
5002 * the attr length. Let userspace figure it out.... */ 4932 * the attr length. Let userspace figure it out.... */
5003 hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; 4933 hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base;
5004 attrlen += res->acl_data_offset; 4934 attrlen += res->acl_data_offset;
5005 recvd = req->rq_rcv_buf.len - hdrlen; 4935 if (attrlen > page_len) {
5006 if (attrlen > recvd) {
5007 if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { 4936 if (res->acl_flags & NFS4_ACL_LEN_REQUEST) {
5008 /* getxattr interface called with a NULL buf */ 4937 /* getxattr interface called with a NULL buf */
5009 res->acl_len = attrlen; 4938 res->acl_len = attrlen;
5010 goto out; 4939 goto out;
5011 } 4940 }
5012 dprintk("NFS: acl reply: attrlen %u > recvd %u\n", 4941 dprintk("NFS: acl reply: attrlen %u > page_len %zu\n",
5013 attrlen, recvd); 4942 attrlen, page_len);
5014 return -EINVAL; 4943 return -EINVAL;
5015 } 4944 }
5016 xdr_read_pages(xdr, attrlen); 4945 xdr_read_pages(xdr, attrlen);
@@ -5120,7 +5049,7 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
5120 goto out_overflow; 5049 goto out_overflow;
5121 res->count = be32_to_cpup(p++); 5050 res->count = be32_to_cpup(p++);
5122 res->verf->committed = be32_to_cpup(p++); 5051 res->verf->committed = be32_to_cpup(p++);
5123 memcpy(res->verf->verifier, p, 8); 5052 memcpy(res->verf->verifier, p, NFS4_VERIFIER_SIZE);
5124 return 0; 5053 return 0;
5125out_overflow: 5054out_overflow:
5126 print_overflow_msg(__func__, xdr); 5055 print_overflow_msg(__func__, xdr);
@@ -5163,16 +5092,13 @@ out_err:
5163 return -EINVAL; 5092 return -EINVAL;
5164} 5093}
5165 5094
5166static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res) 5095static int decode_secinfo_common(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
5167{ 5096{
5168 struct nfs4_secinfo_flavor *sec_flavor; 5097 struct nfs4_secinfo_flavor *sec_flavor;
5169 int status; 5098 int status;
5170 __be32 *p; 5099 __be32 *p;
5171 int i, num_flavors; 5100 int i, num_flavors;
5172 5101
5173 status = decode_op_hdr(xdr, OP_SECINFO);
5174 if (status)
5175 goto out;
5176 p = xdr_inline_decode(xdr, 4); 5102 p = xdr_inline_decode(xdr, 4);
5177 if (unlikely(!p)) 5103 if (unlikely(!p))
5178 goto out_overflow; 5104 goto out_overflow;
@@ -5198,6 +5124,7 @@ static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
5198 res->flavors->num_flavors++; 5124 res->flavors->num_flavors++;
5199 } 5125 }
5200 5126
5127 status = 0;
5201out: 5128out:
5202 return status; 5129 return status;
5203out_overflow: 5130out_overflow:
@@ -5205,7 +5132,23 @@ out_overflow:
5205 return -EIO; 5132 return -EIO;
5206} 5133}
5207 5134
5135static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
5136{
5137 int status = decode_op_hdr(xdr, OP_SECINFO);
5138 if (status)
5139 return status;
5140 return decode_secinfo_common(xdr, res);
5141}
5142
5208#if defined(CONFIG_NFS_V4_1) 5143#if defined(CONFIG_NFS_V4_1)
5144static int decode_secinfo_no_name(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
5145{
5146 int status = decode_op_hdr(xdr, OP_SECINFO_NO_NAME);
5147 if (status)
5148 return status;
5149 return decode_secinfo_common(xdr, res);
5150}
5151
5209static int decode_exchange_id(struct xdr_stream *xdr, 5152static int decode_exchange_id(struct xdr_stream *xdr,
5210 struct nfs41_exchange_id_res *res) 5153 struct nfs41_exchange_id_res *res)
5211{ 5154{
@@ -5214,6 +5157,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
5214 char *dummy_str; 5157 char *dummy_str;
5215 int status; 5158 int status;
5216 struct nfs_client *clp = res->client; 5159 struct nfs_client *clp = res->client;
5160 uint32_t impl_id_count;
5217 5161
5218 status = decode_op_hdr(xdr, OP_EXCHANGE_ID); 5162 status = decode_op_hdr(xdr, OP_EXCHANGE_ID);
5219 if (status) 5163 if (status)
@@ -5255,11 +5199,38 @@ static int decode_exchange_id(struct xdr_stream *xdr,
5255 memcpy(res->server_scope->server_scope, dummy_str, dummy); 5199 memcpy(res->server_scope->server_scope, dummy_str, dummy);
5256 res->server_scope->server_scope_sz = dummy; 5200 res->server_scope->server_scope_sz = dummy;
5257 5201
5258 /* Throw away Implementation id array */ 5202 /* Implementation Id */
5259 status = decode_opaque_inline(xdr, &dummy, &dummy_str); 5203 p = xdr_inline_decode(xdr, 4);
5260 if (unlikely(status)) 5204 if (unlikely(!p))
5261 return status; 5205 goto out_overflow;
5206 impl_id_count = be32_to_cpup(p++);
5207
5208 if (impl_id_count) {
5209 /* nii_domain */
5210 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
5211 if (unlikely(status))
5212 return status;
5213 if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
5214 return -EIO;
5215 memcpy(res->impl_id->domain, dummy_str, dummy);
5262 5216
5217 /* nii_name */
5218 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
5219 if (unlikely(status))
5220 return status;
5221 if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
5222 return -EIO;
5223 memcpy(res->impl_id->name, dummy_str, dummy);
5224
5225 /* nii_date */
5226 p = xdr_inline_decode(xdr, 12);
5227 if (unlikely(!p))
5228 goto out_overflow;
5229 p = xdr_decode_hyper(p, &res->impl_id->date.seconds);
5230 res->impl_id->date.nseconds = be32_to_cpup(p);
5231
5232 /* if there's more than one entry, ignore the rest */
5233 }
5263 return 0; 5234 return 0;
5264out_overflow: 5235out_overflow:
5265 print_overflow_msg(__func__, xdr); 5236 print_overflow_msg(__func__, xdr);
@@ -5285,8 +5256,8 @@ static int decode_chan_attrs(struct xdr_stream *xdr,
5285 attrs->max_reqs = be32_to_cpup(p++); 5256 attrs->max_reqs = be32_to_cpup(p++);
5286 nr_attrs = be32_to_cpup(p); 5257 nr_attrs = be32_to_cpup(p);
5287 if (unlikely(nr_attrs > 1)) { 5258 if (unlikely(nr_attrs > 1)) {
5288 printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n", 5259 printk(KERN_WARNING "NFS: %s: Invalid rdma channel attrs "
5289 __func__, nr_attrs); 5260 "count %u\n", __func__, nr_attrs);
5290 return -EINVAL; 5261 return -EINVAL;
5291 } 5262 }
5292 if (nr_attrs == 1) { 5263 if (nr_attrs == 1) {
@@ -5436,14 +5407,14 @@ static int decode_getdevicelist(struct xdr_stream *xdr,
5436 p += 2; 5407 p += 2;
5437 5408
5438 /* Read verifier */ 5409 /* Read verifier */
5439 p = xdr_decode_opaque_fixed(p, verftemp.verifier, 8); 5410 p = xdr_decode_opaque_fixed(p, verftemp.verifier, NFS4_VERIFIER_SIZE);
5440 5411
5441 res->num_devs = be32_to_cpup(p); 5412 res->num_devs = be32_to_cpup(p);
5442 5413
5443 dprintk("%s: num_dev %d\n", __func__, res->num_devs); 5414 dprintk("%s: num_dev %d\n", __func__, res->num_devs);
5444 5415
5445 if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM) { 5416 if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM) {
5446 printk(KERN_ERR "%s too many result dev_num %u\n", 5417 printk(KERN_ERR "NFS: %s too many result dev_num %u\n",
5447 __func__, res->num_devs); 5418 __func__, res->num_devs);
5448 return -EIO; 5419 return -EIO;
5449 } 5420 }
@@ -5537,11 +5508,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
5537 status = decode_op_hdr(xdr, OP_LAYOUTGET); 5508 status = decode_op_hdr(xdr, OP_LAYOUTGET);
5538 if (status) 5509 if (status)
5539 return status; 5510 return status;
5540 p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE); 5511 p = xdr_inline_decode(xdr, 4);
5512 if (unlikely(!p))
5513 goto out_overflow;
5514 res->return_on_close = be32_to_cpup(p);
5515 decode_stateid(xdr, &res->stateid);
5516 p = xdr_inline_decode(xdr, 4);
5541 if (unlikely(!p)) 5517 if (unlikely(!p))
5542 goto out_overflow; 5518 goto out_overflow;
5543 res->return_on_close = be32_to_cpup(p++);
5544 p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE);
5545 layout_count = be32_to_cpup(p); 5519 layout_count = be32_to_cpup(p);
5546 if (!layout_count) { 5520 if (!layout_count) {
5547 dprintk("%s: server responded with empty layout array\n", 5521 dprintk("%s: server responded with empty layout array\n",
@@ -5666,7 +5640,8 @@ static int decode_test_stateid(struct xdr_stream *xdr,
5666 if (unlikely(!p)) 5640 if (unlikely(!p))
5667 goto out_overflow; 5641 goto out_overflow;
5668 res->status = be32_to_cpup(p++); 5642 res->status = be32_to_cpup(p++);
5669 return res->status; 5643
5644 return status;
5670out_overflow: 5645out_overflow:
5671 print_overflow_msg(__func__, xdr); 5646 print_overflow_msg(__func__, xdr);
5672out: 5647out:
@@ -6583,8 +6558,9 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
6583 if (status) 6558 if (status)
6584 goto out; 6559 goto out;
6585 xdr_enter_page(xdr, PAGE_SIZE); 6560 xdr_enter_page(xdr, PAGE_SIZE);
6586 status = decode_getfattr(xdr, &res->fs_locations->fattr, 6561 status = decode_getfattr_generic(xdr, &res->fs_locations->fattr,
6587 res->fs_locations->server); 6562 NULL, res->fs_locations,
6563 res->fs_locations->server);
6588out: 6564out:
6589 return status; 6565 return status;
6590} 6566}
@@ -6857,7 +6833,7 @@ static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp,
6857 status = decode_putrootfh(xdr); 6833 status = decode_putrootfh(xdr);
6858 if (status) 6834 if (status)
6859 goto out; 6835 goto out;
6860 status = decode_secinfo(xdr, res); 6836 status = decode_secinfo_no_name(xdr, res);
6861out: 6837out:
6862 return status; 6838 return status;
6863} 6839}
@@ -6964,7 +6940,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6964 goto out_overflow; 6940 goto out_overflow;
6965 6941
6966 if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, 6942 if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
6967 entry->server) < 0) 6943 NULL, entry->server) < 0)
6968 goto out_overflow; 6944 goto out_overflow;
6969 if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) 6945 if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
6970 entry->ino = entry->fattr->mounted_on_fileid; 6946 entry->ino = entry->fattr->mounted_on_fileid;
@@ -7112,7 +7088,7 @@ struct rpc_procinfo nfs4_procedures[] = {
7112#endif /* CONFIG_NFS_V4_1 */ 7088#endif /* CONFIG_NFS_V4_1 */
7113}; 7089};
7114 7090
7115struct rpc_version nfs_version4 = { 7091const struct rpc_version nfs_version4 = {
7116 .number = 4, 7092 .number = 4,
7117 .nrprocs = ARRAY_SIZE(nfs4_procedures), 7093 .nrprocs = ARRAY_SIZE(nfs4_procedures),
7118 .procs = nfs4_procedures 7094 .procs = nfs4_procedures
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index c4744e1d513..cd3c910d2d1 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -104,7 +104,7 @@ static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = "";
104/* server:export path string passed to super.c */ 104/* server:export path string passed to super.c */
105static char nfs_root_device[NFS_MAXPATHLEN + 1] __initdata = ""; 105static char nfs_root_device[NFS_MAXPATHLEN + 1] __initdata = "";
106 106
107#ifdef RPC_DEBUG 107#ifdef NFS_DEBUG
108/* 108/*
109 * When the "nfsrootdebug" kernel command line option is specified, 109 * When the "nfsrootdebug" kernel command line option is specified,
110 * enable debugging messages for NFSROOT. 110 * enable debugging messages for NFSROOT.
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 55d01280a60..4bff4a3dab4 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -137,6 +137,7 @@ static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay,
137 struct objio_dev_ent *ode; 137 struct objio_dev_ent *ode;
138 struct osd_dev *od; 138 struct osd_dev *od;
139 struct osd_dev_info odi; 139 struct osd_dev_info odi;
140 bool retry_flag = true;
140 int err; 141 int err;
141 142
142 ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); 143 ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id);
@@ -171,10 +172,18 @@ static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay,
171 goto out; 172 goto out;
172 } 173 }
173 174
175retry_lookup:
174 od = osduld_info_lookup(&odi); 176 od = osduld_info_lookup(&odi);
175 if (unlikely(IS_ERR(od))) { 177 if (unlikely(IS_ERR(od))) {
176 err = PTR_ERR(od); 178 err = PTR_ERR(od);
177 dprintk("%s: osduld_info_lookup => %d\n", __func__, err); 179 dprintk("%s: osduld_info_lookup => %d\n", __func__, err);
180 if (err == -ENODEV && retry_flag) {
181 err = objlayout_autologin(deviceaddr);
182 if (likely(!err)) {
183 retry_flag = false;
184 goto retry_lookup;
185 }
186 }
178 goto out; 187 goto out;
179 } 188 }
180 189
@@ -205,25 +214,36 @@ static void copy_single_comp(struct ore_components *oc, unsigned c,
205int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, 214int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
206 struct objio_segment **pseg) 215 struct objio_segment **pseg)
207{ 216{
208 struct __alloc_objio_segment { 217/* This is the in memory structure of the objio_segment
209 struct objio_segment olseg; 218 *
210 struct ore_dev *ods[numdevs]; 219 * struct __alloc_objio_segment {
211 struct ore_comp comps[numdevs]; 220 * struct objio_segment olseg;
212 } *aolseg; 221 * struct ore_dev *ods[numdevs];
213 222 * struct ore_comp comps[numdevs];
214 aolseg = kzalloc(sizeof(*aolseg), gfp_flags); 223 * } *aolseg;
215 if (unlikely(!aolseg)) { 224 * NOTE: The code as above compiles and runs perfectly. It is elegant,
225 * type safe and compact. At some Past time Linus has decided he does not
226 * like variable length arrays, For the sake of this principal we uglify
227 * the code as below.
228 */
229 struct objio_segment *lseg;
230 size_t lseg_size = sizeof(*lseg) +
231 numdevs * sizeof(lseg->oc.ods[0]) +
232 numdevs * sizeof(*lseg->oc.comps);
233
234 lseg = kzalloc(lseg_size, gfp_flags);
235 if (unlikely(!lseg)) {
216 dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__, 236 dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__,
217 numdevs, sizeof(*aolseg)); 237 numdevs, lseg_size);
218 return -ENOMEM; 238 return -ENOMEM;
219 } 239 }
220 240
221 aolseg->olseg.oc.numdevs = numdevs; 241 lseg->oc.numdevs = numdevs;
222 aolseg->olseg.oc.single_comp = EC_MULTPLE_COMPS; 242 lseg->oc.single_comp = EC_MULTPLE_COMPS;
223 aolseg->olseg.oc.comps = aolseg->comps; 243 lseg->oc.ods = (void *)(lseg + 1);
224 aolseg->olseg.oc.ods = aolseg->ods; 244 lseg->oc.comps = (void *)(lseg->oc.ods + numdevs);
225 245
226 *pseg = &aolseg->olseg; 246 *pseg = lseg;
227 return 0; 247 return 0;
228} 248}
229 249
@@ -582,10 +602,10 @@ objlayout_init(void)
582 602
583 if (ret) 603 if (ret)
584 printk(KERN_INFO 604 printk(KERN_INFO
585 "%s: Registering OSD pNFS Layout Driver failed: error=%d\n", 605 "NFS: %s: Registering OSD pNFS Layout Driver failed: error=%d\n",
586 __func__, ret); 606 __func__, ret);
587 else 607 else
588 printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n", 608 printk(KERN_INFO "NFS: %s: Registered OSD pNFS Layout Driver\n",
589 __func__); 609 __func__);
590 return ret; 610 return ret;
591} 611}
@@ -594,7 +614,7 @@ static void __exit
594objlayout_exit(void) 614objlayout_exit(void)
595{ 615{
596 pnfs_unregister_layoutdriver(&objlayout_type); 616 pnfs_unregister_layoutdriver(&objlayout_type);
597 printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n", 617 printk(KERN_INFO "NFS: %s: Unregistered OSD pNFS Layout Driver\n",
598 __func__); 618 __func__);
599} 619}
600 620
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index b3c29039f5b..595c5fc21a1 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -37,6 +37,9 @@
37 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 */ 38 */
39 39
40#include <linux/kmod.h>
41#include <linux/moduleparam.h>
42#include <linux/ratelimit.h>
40#include <scsi/osd_initiator.h> 43#include <scsi/osd_initiator.h>
41#include "objlayout.h" 44#include "objlayout.h"
42 45
@@ -156,7 +159,7 @@ last_byte_offset(u64 start, u64 len)
156 return end > start ? end - 1 : NFS4_MAX_UINT64; 159 return end > start ? end - 1 : NFS4_MAX_UINT64;
157} 160}
158 161
159void _fix_verify_io_params(struct pnfs_layout_segment *lseg, 162static void _fix_verify_io_params(struct pnfs_layout_segment *lseg,
160 struct page ***p_pages, unsigned *p_pgbase, 163 struct page ***p_pages, unsigned *p_pgbase,
161 u64 offset, unsigned long count) 164 u64 offset, unsigned long count)
162{ 165{
@@ -490,9 +493,9 @@ encode_accumulated_error(struct objlayout *objlay, __be32 *p)
490 if (!ioerr->oer_errno) 493 if (!ioerr->oer_errno)
491 continue; 494 continue;
492 495
493 printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d " 496 printk(KERN_ERR "NFS: %s: err[%d]: errno=%d "
494 "dev(%llx:%llx) par=0x%llx obj=0x%llx " 497 "is_write=%d dev(%llx:%llx) par=0x%llx "
495 "offset=0x%llx length=0x%llx\n", 498 "obj=0x%llx offset=0x%llx length=0x%llx\n",
496 __func__, i, ioerr->oer_errno, 499 __func__, i, ioerr->oer_errno,
497 ioerr->oer_iswrite, 500 ioerr->oer_iswrite,
498 _DEVID_LO(&ioerr->oer_component.oid_device_id), 501 _DEVID_LO(&ioerr->oer_component.oid_device_id),
@@ -601,7 +604,6 @@ int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
601{ 604{
602 struct objlayout_deviceinfo *odi; 605 struct objlayout_deviceinfo *odi;
603 struct pnfs_device pd; 606 struct pnfs_device pd;
604 struct super_block *sb;
605 struct page *page, **pages; 607 struct page *page, **pages;
606 u32 *p; 608 u32 *p;
607 int err; 609 int err;
@@ -620,7 +622,6 @@ int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
620 pd.pglen = PAGE_SIZE; 622 pd.pglen = PAGE_SIZE;
621 pd.mincount = 0; 623 pd.mincount = 0;
622 624
623 sb = pnfslay->plh_inode->i_sb;
624 err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd); 625 err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd);
625 dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err); 626 dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err);
626 if (err) 627 if (err)
@@ -651,3 +652,134 @@ void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr)
651 __free_page(odi->page); 652 __free_page(odi->page);
652 kfree(odi); 653 kfree(odi);
653} 654}
655
656enum {
657 OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64,
658 OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1,
659 OSD_LOGIN_UPCALL_PATHLEN = 256
660};
661
662static char osd_login_prog[OSD_LOGIN_UPCALL_PATHLEN] = "/sbin/osd_login";
663
664module_param_string(osd_login_prog, osd_login_prog, sizeof(osd_login_prog),
665 0600);
666MODULE_PARM_DESC(osd_login_prog, "Path to the osd_login upcall program");
667
668struct __auto_login {
669 char uri[OBJLAYOUT_MAX_URI_LEN];
670 char osdname[OBJLAYOUT_MAX_OSDNAME_LEN];
671 char systemid_hex[OBJLAYOUT_MAX_SYSID_HEX_LEN];
672};
673
674static int __objlayout_upcall(struct __auto_login *login)
675{
676 static char *envp[] = { "HOME=/",
677 "TERM=linux",
678 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
679 NULL
680 };
681 char *argv[8];
682 int ret;
683
684 if (unlikely(!osd_login_prog[0])) {
685 dprintk("%s: osd_login_prog is disabled\n", __func__);
686 return -EACCES;
687 }
688
689 dprintk("%s uri: %s\n", __func__, login->uri);
690 dprintk("%s osdname %s\n", __func__, login->osdname);
691 dprintk("%s systemid_hex %s\n", __func__, login->systemid_hex);
692
693 argv[0] = (char *)osd_login_prog;
694 argv[1] = "-u";
695 argv[2] = login->uri;
696 argv[3] = "-o";
697 argv[4] = login->osdname;
698 argv[5] = "-s";
699 argv[6] = login->systemid_hex;
700 argv[7] = NULL;
701
702 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
703 /*
704 * Disable the upcall mechanism if we're getting an ENOENT or
705 * EACCES error. The admin can re-enable it on the fly by using
706 * sysfs to set the objlayoutdriver.osd_login_prog module parameter once
707 * the problem has been fixed.
708 */
709 if (ret == -ENOENT || ret == -EACCES) {
710 printk(KERN_ERR "PNFS-OBJ: %s was not found please set "
711 "objlayoutdriver.osd_login_prog kernel parameter!\n",
712 osd_login_prog);
713 osd_login_prog[0] = '\0';
714 }
715 dprintk("%s %s return value: %d\n", __func__, osd_login_prog, ret);
716
717 return ret;
718}
719
720/* Assume dest is all zeros */
721static void __copy_nfsS_and_zero_terminate(struct nfs4_string s,
722 char *dest, int max_len,
723 const char *var_name)
724{
725 if (!s.len)
726 return;
727
728 if (s.len >= max_len) {
729 pr_warn_ratelimited(
730 "objlayout_autologin: %s: s.len(%d) >= max_len(%d)",
731 var_name, s.len, max_len);
732 s.len = max_len - 1; /* space for null terminator */
733 }
734
735 memcpy(dest, s.data, s.len);
736}
737
738/* Assume sysid is all zeros */
739static void _sysid_2_hex(struct nfs4_string s,
740 char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN])
741{
742 int i;
743 char *cur;
744
745 if (!s.len)
746 return;
747
748 if (s.len != OSD_SYSTEMID_LEN) {
749 pr_warn_ratelimited(
750 "objlayout_autologin: systemid_len(%d) != OSD_SYSTEMID_LEN",
751 s.len);
752 if (s.len > OSD_SYSTEMID_LEN)
753 s.len = OSD_SYSTEMID_LEN;
754 }
755
756 cur = sysid;
757 for (i = 0; i < s.len; i++)
758 cur = hex_byte_pack(cur, s.data[i]);
759}
760
761int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr)
762{
763 int rc;
764 struct __auto_login login;
765
766 if (!deviceaddr->oda_targetaddr.ota_netaddr.r_addr.len)
767 return -ENODEV;
768
769 memset(&login, 0, sizeof(login));
770 __copy_nfsS_and_zero_terminate(
771 deviceaddr->oda_targetaddr.ota_netaddr.r_addr,
772 login.uri, sizeof(login.uri), "URI");
773
774 __copy_nfsS_and_zero_terminate(
775 deviceaddr->oda_osdname,
776 login.osdname, sizeof(login.osdname), "OSDNAME");
777
778 _sysid_2_hex(deviceaddr->oda_systemid, login.systemid_hex);
779
780 rc = __objlayout_upcall(&login);
781 if (rc > 0) /* script returns positive values */
782 rc = -ENODEV;
783
784 return rc;
785}
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 8ec34727ed2..880ba086be9 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -184,4 +184,6 @@ extern void objlayout_encode_layoutreturn(
184 struct xdr_stream *, 184 struct xdr_stream *,
185 const struct nfs4_layoutreturn_args *); 185 const struct nfs4_layoutreturn_args *);
186 186
187extern int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr);
188
187#endif /* _OBJLAYOUT_H */ 189#endif /* _OBJLAYOUT_H */
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 5668f7c54c4..d21fceaa9f6 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -13,6 +13,7 @@
13#include <linux/file.h> 13#include <linux/file.h>
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/sunrpc/clnt.h> 15#include <linux/sunrpc/clnt.h>
16#include <linux/nfs.h>
16#include <linux/nfs3.h> 17#include <linux/nfs3.h>
17#include <linux/nfs4.h> 18#include <linux/nfs4.h>
18#include <linux/nfs_page.h> 19#include <linux/nfs_page.h>
@@ -106,36 +107,6 @@ void nfs_unlock_request(struct nfs_page *req)
106 nfs_release_request(req); 107 nfs_release_request(req);
107} 108}
108 109
109/**
110 * nfs_set_page_tag_locked - Tag a request as locked
111 * @req:
112 */
113int nfs_set_page_tag_locked(struct nfs_page *req)
114{
115 if (!nfs_lock_request_dontget(req))
116 return 0;
117 if (test_bit(PG_MAPPED, &req->wb_flags))
118 radix_tree_tag_set(&NFS_I(req->wb_context->dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
119 return 1;
120}
121
122/**
123 * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers
124 */
125void nfs_clear_page_tag_locked(struct nfs_page *req)
126{
127 if (test_bit(PG_MAPPED, &req->wb_flags)) {
128 struct inode *inode = req->wb_context->dentry->d_inode;
129 struct nfs_inode *nfsi = NFS_I(inode);
130
131 spin_lock(&inode->i_lock);
132 radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
133 nfs_unlock_request(req);
134 spin_unlock(&inode->i_lock);
135 } else
136 nfs_unlock_request(req);
137}
138
139/* 110/*
140 * nfs_clear_request - Free up all resources allocated to the request 111 * nfs_clear_request - Free up all resources allocated to the request
141 * @req: 112 * @req:
@@ -425,67 +396,6 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
425 } 396 }
426} 397}
427 398
428#define NFS_SCAN_MAXENTRIES 16
429/**
430 * nfs_scan_list - Scan a list for matching requests
431 * @nfsi: NFS inode
432 * @dst: Destination list
433 * @idx_start: lower bound of page->index to scan
434 * @npages: idx_start + npages sets the upper bound to scan.
435 * @tag: tag to scan for
436 *
437 * Moves elements from one of the inode request lists.
438 * If the number of requests is set to 0, the entire address_space
439 * starting at index idx_start, is scanned.
440 * The requests are *not* checked to ensure that they form a contiguous set.
441 * You must be holding the inode's i_lock when calling this function
442 */
443int nfs_scan_list(struct nfs_inode *nfsi,
444 struct list_head *dst, pgoff_t idx_start,
445 unsigned int npages, int tag)
446{
447 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
448 struct nfs_page *req;
449 pgoff_t idx_end;
450 int found, i;
451 int res;
452 struct list_head *list;
453
454 res = 0;
455 if (npages == 0)
456 idx_end = ~0;
457 else
458 idx_end = idx_start + npages - 1;
459
460 for (;;) {
461 found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
462 (void **)&pgvec[0], idx_start,
463 NFS_SCAN_MAXENTRIES, tag);
464 if (found <= 0)
465 break;
466 for (i = 0; i < found; i++) {
467 req = pgvec[i];
468 if (req->wb_index > idx_end)
469 goto out;
470 idx_start = req->wb_index + 1;
471 if (nfs_set_page_tag_locked(req)) {
472 kref_get(&req->wb_kref);
473 radix_tree_tag_clear(&nfsi->nfs_page_tree,
474 req->wb_index, tag);
475 list = pnfs_choose_commit_list(req, dst);
476 nfs_list_add_request(req, list);
477 res++;
478 if (res == INT_MAX)
479 goto out;
480 }
481 }
482 /* for latency reduction */
483 cond_resched_lock(&nfsi->vfs_inode.i_lock);
484 }
485out:
486 return res;
487}
488
489int __init nfs_init_nfspagecache(void) 399int __init nfs_init_nfspagecache(void)
490{ 400{
491 nfs_page_cachep = kmem_cache_create("nfs_page", 401 nfs_page_cachep = kmem_cache_create("nfs_page",
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 17149a49006..38512bcd2e9 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -101,8 +101,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
101 goto out_no_driver; 101 goto out_no_driver;
102 if (!(server->nfs_client->cl_exchange_flags & 102 if (!(server->nfs_client->cl_exchange_flags &
103 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { 103 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
104 printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__, 104 printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n",
105 id, server->nfs_client->cl_exchange_flags); 105 __func__, id, server->nfs_client->cl_exchange_flags);
106 goto out_no_driver; 106 goto out_no_driver;
107 } 107 }
108 ld_type = find_pnfs_driver(id); 108 ld_type = find_pnfs_driver(id);
@@ -122,8 +122,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
122 server->pnfs_curr_ld = ld_type; 122 server->pnfs_curr_ld = ld_type;
123 if (ld_type->set_layoutdriver 123 if (ld_type->set_layoutdriver
124 && ld_type->set_layoutdriver(server, mntfh)) { 124 && ld_type->set_layoutdriver(server, mntfh)) {
125 printk(KERN_ERR "%s: Error initializing pNFS layout driver %u.\n", 125 printk(KERN_ERR "NFS: %s: Error initializing pNFS layout "
126 __func__, id); 126 "driver %u.\n", __func__, id);
127 module_put(ld_type->owner); 127 module_put(ld_type->owner);
128 goto out_no_driver; 128 goto out_no_driver;
129 } 129 }
@@ -143,11 +143,11 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
143 struct pnfs_layoutdriver_type *tmp; 143 struct pnfs_layoutdriver_type *tmp;
144 144
145 if (ld_type->id == 0) { 145 if (ld_type->id == 0) {
146 printk(KERN_ERR "%s id 0 is reserved\n", __func__); 146 printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__);
147 return status; 147 return status;
148 } 148 }
149 if (!ld_type->alloc_lseg || !ld_type->free_lseg) { 149 if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
150 printk(KERN_ERR "%s Layout driver must provide " 150 printk(KERN_ERR "NFS: %s Layout driver must provide "
151 "alloc_lseg and free_lseg.\n", __func__); 151 "alloc_lseg and free_lseg.\n", __func__);
152 return status; 152 return status;
153 } 153 }
@@ -160,7 +160,7 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
160 dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, 160 dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
161 ld_type->name); 161 ld_type->name);
162 } else { 162 } else {
163 printk(KERN_ERR "%s Module with id %d already loaded!\n", 163 printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n",
164 __func__, ld_type->id); 164 __func__, ld_type->id);
165 } 165 }
166 spin_unlock(&pnfs_spinlock); 166 spin_unlock(&pnfs_spinlock);
@@ -496,12 +496,12 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
496{ 496{
497 u32 oldseq, newseq; 497 u32 oldseq, newseq;
498 498
499 oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid); 499 oldseq = be32_to_cpu(lo->plh_stateid.seqid);
500 newseq = be32_to_cpu(new->stateid.seqid); 500 newseq = be32_to_cpu(new->seqid);
501 if ((int)(newseq - oldseq) > 0) { 501 if ((int)(newseq - oldseq) > 0) {
502 memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid)); 502 nfs4_stateid_copy(&lo->plh_stateid, new);
503 if (update_barrier) { 503 if (update_barrier) {
504 u32 new_barrier = be32_to_cpu(new->stateid.seqid); 504 u32 new_barrier = be32_to_cpu(new->seqid);
505 505
506 if ((int)(new_barrier - lo->plh_barrier)) 506 if ((int)(new_barrier - lo->plh_barrier))
507 lo->plh_barrier = new_barrier; 507 lo->plh_barrier = new_barrier;
@@ -525,7 +525,7 @@ pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
525 int lget) 525 int lget)
526{ 526{
527 if ((stateid) && 527 if ((stateid) &&
528 (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0) 528 (int)(lo->plh_barrier - be32_to_cpu(stateid->seqid)) >= 0)
529 return true; 529 return true;
530 return lo->plh_block_lgets || 530 return lo->plh_block_lgets ||
531 test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) || 531 test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) ||
@@ -549,11 +549,10 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
549 549
550 do { 550 do {
551 seq = read_seqbegin(&open_state->seqlock); 551 seq = read_seqbegin(&open_state->seqlock);
552 memcpy(dst->data, open_state->stateid.data, 552 nfs4_stateid_copy(dst, &open_state->stateid);
553 sizeof(open_state->stateid.data));
554 } while (read_seqretry(&open_state->seqlock, seq)); 553 } while (read_seqretry(&open_state->seqlock, seq));
555 } else 554 } else
556 memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data)); 555 nfs4_stateid_copy(dst, &lo->plh_stateid);
557 spin_unlock(&lo->plh_inode->i_lock); 556 spin_unlock(&lo->plh_inode->i_lock);
558 dprintk("<-- %s\n", __func__); 557 dprintk("<-- %s\n", __func__);
559 return status; 558 return status;
@@ -588,9 +587,9 @@ send_layoutget(struct pnfs_layout_hdr *lo,
588 587
589 /* allocate pages for xdr post processing */ 588 /* allocate pages for xdr post processing */
590 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; 589 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
591 max_pages = max_resp_sz >> PAGE_SHIFT; 590 max_pages = nfs_page_array_len(0, max_resp_sz);
592 591
593 pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); 592 pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags);
594 if (!pages) 593 if (!pages)
595 goto out_err_free; 594 goto out_err_free;
596 595
@@ -760,7 +759,7 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
760 } 759 }
761 if (!found) { 760 if (!found) {
762 struct pnfs_layout_hdr *lo = nfsi->layout; 761 struct pnfs_layout_hdr *lo = nfsi->layout;
763 u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid); 762 u32 current_seqid = be32_to_cpu(lo->plh_stateid.seqid);
764 763
765 /* Since close does not return a layout stateid for use as 764 /* Since close does not return a layout stateid for use as
766 * a barrier, we choose the worst-case barrier. 765 * a barrier, we choose the worst-case barrier.
@@ -966,8 +965,7 @@ pnfs_update_layout(struct inode *ino,
966 } 965 }
967 966
968 /* Do we even need to bother with this? */ 967 /* Do we even need to bother with this? */
969 if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || 968 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
970 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
971 dprintk("%s matches recall, use MDS\n", __func__); 969 dprintk("%s matches recall, use MDS\n", __func__);
972 goto out_unlock; 970 goto out_unlock;
973 } 971 }
@@ -1032,7 +1030,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
1032 struct nfs4_layoutget_res *res = &lgp->res; 1030 struct nfs4_layoutget_res *res = &lgp->res;
1033 struct pnfs_layout_segment *lseg; 1031 struct pnfs_layout_segment *lseg;
1034 struct inode *ino = lo->plh_inode; 1032 struct inode *ino = lo->plh_inode;
1035 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
1036 int status = 0; 1033 int status = 0;
1037 1034
1038 /* Inject layout blob into I/O device driver */ 1035 /* Inject layout blob into I/O device driver */
@@ -1048,8 +1045,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
1048 } 1045 }
1049 1046
1050 spin_lock(&ino->i_lock); 1047 spin_lock(&ino->i_lock);
1051 if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || 1048 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
1052 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
1053 dprintk("%s forget reply due to recall\n", __func__); 1049 dprintk("%s forget reply due to recall\n", __func__);
1054 goto out_forget_reply; 1050 goto out_forget_reply;
1055 } 1051 }
@@ -1214,6 +1210,7 @@ void pnfs_ld_write_done(struct nfs_write_data *data)
1214 } 1210 }
1215 data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages); 1211 data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages);
1216 } 1212 }
1213 put_lseg(data->lseg);
1217 data->mds_ops->rpc_release(data); 1214 data->mds_ops->rpc_release(data);
1218} 1215}
1219EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 1216EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
@@ -1227,6 +1224,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
1227 nfs_list_add_request(data->req, &desc->pg_list); 1224 nfs_list_add_request(data->req, &desc->pg_list);
1228 nfs_pageio_reset_write_mds(desc); 1225 nfs_pageio_reset_write_mds(desc);
1229 desc->pg_recoalesce = 1; 1226 desc->pg_recoalesce = 1;
1227 put_lseg(data->lseg);
1230 nfs_writedata_release(data); 1228 nfs_writedata_release(data);
1231} 1229}
1232 1230
@@ -1327,6 +1325,7 @@ void pnfs_ld_read_done(struct nfs_read_data *data)
1327 data->mds_ops->rpc_call_done(&data->task, data); 1325 data->mds_ops->rpc_call_done(&data->task, data);
1328 } else 1326 } else
1329 pnfs_ld_handle_read_error(data); 1327 pnfs_ld_handle_read_error(data);
1328 put_lseg(data->lseg);
1330 data->mds_ops->rpc_release(data); 1329 data->mds_ops->rpc_release(data);
1331} 1330}
1332EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 1331EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
@@ -1530,8 +1529,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
1530 end_pos = nfsi->layout->plh_lwb; 1529 end_pos = nfsi->layout->plh_lwb;
1531 nfsi->layout->plh_lwb = 0; 1530 nfsi->layout->plh_lwb = 0;
1532 1531
1533 memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data, 1532 nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid);
1534 sizeof(nfsi->layout->plh_stateid.data));
1535 spin_unlock(&inode->i_lock); 1533 spin_unlock(&inode->i_lock);
1536 1534
1537 data->args.inode = inode; 1535 data->args.inode = inode;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 53d593a0a4f..442ebf68eee 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -94,11 +94,10 @@ struct pnfs_layoutdriver_type {
94 const struct nfs_pageio_ops *pg_read_ops; 94 const struct nfs_pageio_ops *pg_read_ops;
95 const struct nfs_pageio_ops *pg_write_ops; 95 const struct nfs_pageio_ops *pg_write_ops;
96 96
97 /* Returns true if layoutdriver wants to divert this request to 97 void (*mark_request_commit) (struct nfs_page *req,
98 * driver's commit routine. 98 struct pnfs_layout_segment *lseg);
99 */ 99 void (*clear_request_commit) (struct nfs_page *req);
100 bool (*mark_pnfs_commit)(struct pnfs_layout_segment *lseg); 100 int (*scan_commit_lists) (struct inode *inode, int max, spinlock_t *lock);
101 struct list_head * (*choose_commit_list) (struct nfs_page *req);
102 int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); 101 int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how);
103 102
104 /* 103 /*
@@ -229,7 +228,6 @@ struct nfs4_deviceid_node {
229 atomic_t ref; 228 atomic_t ref;
230}; 229};
231 230
232void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
233struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); 231struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
234void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); 232void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
235void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, 233void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
@@ -262,20 +260,6 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss)
262 return nfss->pnfs_curr_ld != NULL; 260 return nfss->pnfs_curr_ld != NULL;
263} 261}
264 262
265static inline void
266pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
267{
268 if (lseg) {
269 struct pnfs_layoutdriver_type *ld;
270
271 ld = NFS_SERVER(req->wb_page->mapping->host)->pnfs_curr_ld;
272 if (ld->mark_pnfs_commit && ld->mark_pnfs_commit(lseg)) {
273 set_bit(PG_PNFS_COMMIT, &req->wb_flags);
274 req->wb_commit_lseg = get_lseg(lseg);
275 }
276 }
277}
278
279static inline int 263static inline int
280pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) 264pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
281{ 265{
@@ -284,27 +268,42 @@ pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
284 return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how); 268 return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how);
285} 269}
286 270
287static inline struct list_head * 271static inline bool
288pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) 272pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
289{ 273{
290 struct list_head *rv; 274 struct inode *inode = req->wb_context->dentry->d_inode;
275 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
291 276
292 if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) { 277 if (lseg == NULL || ld->mark_request_commit == NULL)
293 struct inode *inode = req->wb_commit_lseg->pls_layout->plh_inode; 278 return false;
279 ld->mark_request_commit(req, lseg);
280 return true;
281}
294 282
295 set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); 283static inline bool
296 rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req); 284pnfs_clear_request_commit(struct nfs_page *req)
297 /* matched by ref taken when PG_PNFS_COMMIT is set */ 285{
298 put_lseg(req->wb_commit_lseg); 286 struct inode *inode = req->wb_context->dentry->d_inode;
299 } else 287 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
300 rv = mds; 288
301 return rv; 289 if (ld == NULL || ld->clear_request_commit == NULL)
290 return false;
291 ld->clear_request_commit(req);
292 return true;
302} 293}
303 294
304static inline void pnfs_clear_request_commit(struct nfs_page *req) 295static inline int
296pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock)
305{ 297{
306 if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) 298 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
307 put_lseg(req->wb_commit_lseg); 299 int ret;
300
301 if (ld == NULL || ld->scan_commit_lists == NULL)
302 return 0;
303 ret = ld->scan_commit_lists(inode, max, lock);
304 if (ret != 0)
305 set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags);
306 return ret;
308} 307}
309 308
310/* Should the pNFS client commit and return the layout upon a setattr */ 309/* Should the pNFS client commit and return the layout upon a setattr */
@@ -328,6 +327,13 @@ static inline int pnfs_return_layout(struct inode *ino)
328 return 0; 327 return 0;
329} 328}
330 329
330#ifdef NFS_DEBUG
331void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
332#else
333static inline void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id)
334{
335}
336#endif /* NFS_DEBUG */
331#else /* CONFIG_NFS_V4_1 */ 337#else /* CONFIG_NFS_V4_1 */
332 338
333static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) 339static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -400,35 +406,35 @@ static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, st
400 return false; 406 return false;
401} 407}
402 408
403static inline void
404pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
405{
406}
407
408static inline int 409static inline int
409pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) 410pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
410{ 411{
411 return PNFS_NOT_ATTEMPTED; 412 return PNFS_NOT_ATTEMPTED;
412} 413}
413 414
414static inline struct list_head * 415static inline bool
415pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) 416pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
416{ 417{
417 return mds; 418 return false;
418} 419}
419 420
420static inline void pnfs_clear_request_commit(struct nfs_page *req) 421static inline bool
422pnfs_clear_request_commit(struct nfs_page *req)
421{ 423{
424 return false;
422} 425}
423 426
424static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) 427static inline int
428pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock)
425{ 429{
426 return 0; 430 return 0;
427} 431}
428 432
429static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl) 433static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
430{ 434{
435 return 0;
431} 436}
437
432#endif /* CONFIG_NFS_V4_1 */ 438#endif /* CONFIG_NFS_V4_1 */
433 439
434#endif /* FS_NFS_PNFS_H */ 440#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index 4f359d2a26e..73f701f1f4d 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -43,6 +43,7 @@
43static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE]; 43static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE];
44static DEFINE_SPINLOCK(nfs4_deviceid_lock); 44static DEFINE_SPINLOCK(nfs4_deviceid_lock);
45 45
46#ifdef NFS_DEBUG
46void 47void
47nfs4_print_deviceid(const struct nfs4_deviceid *id) 48nfs4_print_deviceid(const struct nfs4_deviceid *id)
48{ 49{
@@ -52,6 +53,7 @@ nfs4_print_deviceid(const struct nfs4_deviceid *id)
52 p[0], p[1], p[2], p[3]); 53 p[0], p[1], p[2], p[3]);
53} 54}
54EXPORT_SYMBOL_GPL(nfs4_print_deviceid); 55EXPORT_SYMBOL_GPL(nfs4_print_deviceid);
56#endif
55 57
56static inline u32 58static inline u32
57nfs4_deviceid_hash(const struct nfs4_deviceid *id) 59nfs4_deviceid_hash(const struct nfs4_deviceid *id)
@@ -92,7 +94,7 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld,
92 * @clp nfs_client associated with deviceid 94 * @clp nfs_client associated with deviceid
93 * @id deviceid to look up 95 * @id deviceid to look up
94 */ 96 */
95struct nfs4_deviceid_node * 97static struct nfs4_deviceid_node *
96_find_get_deviceid(const struct pnfs_layoutdriver_type *ld, 98_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
97 const struct nfs_client *clp, const struct nfs4_deviceid *id, 99 const struct nfs_client *clp, const struct nfs4_deviceid *id,
98 long hash) 100 long hash)
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 0c672588fe5..b63b6f4d14f 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -358,6 +358,11 @@ nfs_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
358 msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE]; 358 msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE];
359} 359}
360 360
361static void nfs_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
362{
363 rpc_call_start(task);
364}
365
361static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir) 366static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir)
362{ 367{
363 if (nfs_async_handle_expired_key(task)) 368 if (nfs_async_handle_expired_key(task))
@@ -372,6 +377,11 @@ nfs_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
372 msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME]; 377 msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME];
373} 378}
374 379
380static void nfs_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data)
381{
382 rpc_call_start(task);
383}
384
375static int 385static int
376nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir, 386nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
377 struct inode *new_dir) 387 struct inode *new_dir)
@@ -651,6 +661,11 @@ static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message *
651 msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; 661 msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
652} 662}
653 663
664static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
665{
666 rpc_call_start(task);
667}
668
654static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) 669static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
655{ 670{
656 if (nfs_async_handle_expired_key(task)) 671 if (nfs_async_handle_expired_key(task))
@@ -668,6 +683,11 @@ static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message
668 msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; 683 msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
669} 684}
670 685
686static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
687{
688 rpc_call_start(task);
689}
690
671static void 691static void
672nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) 692nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
673{ 693{
@@ -721,9 +741,11 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
721 .create = nfs_proc_create, 741 .create = nfs_proc_create,
722 .remove = nfs_proc_remove, 742 .remove = nfs_proc_remove,
723 .unlink_setup = nfs_proc_unlink_setup, 743 .unlink_setup = nfs_proc_unlink_setup,
744 .unlink_rpc_prepare = nfs_proc_unlink_rpc_prepare,
724 .unlink_done = nfs_proc_unlink_done, 745 .unlink_done = nfs_proc_unlink_done,
725 .rename = nfs_proc_rename, 746 .rename = nfs_proc_rename,
726 .rename_setup = nfs_proc_rename_setup, 747 .rename_setup = nfs_proc_rename_setup,
748 .rename_rpc_prepare = nfs_proc_rename_rpc_prepare,
727 .rename_done = nfs_proc_rename_done, 749 .rename_done = nfs_proc_rename_done,
728 .link = nfs_proc_link, 750 .link = nfs_proc_link,
729 .symlink = nfs_proc_symlink, 751 .symlink = nfs_proc_symlink,
@@ -736,8 +758,10 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
736 .pathconf = nfs_proc_pathconf, 758 .pathconf = nfs_proc_pathconf,
737 .decode_dirent = nfs2_decode_dirent, 759 .decode_dirent = nfs2_decode_dirent,
738 .read_setup = nfs_proc_read_setup, 760 .read_setup = nfs_proc_read_setup,
761 .read_rpc_prepare = nfs_proc_read_rpc_prepare,
739 .read_done = nfs_read_done, 762 .read_done = nfs_read_done,
740 .write_setup = nfs_proc_write_setup, 763 .write_setup = nfs_proc_write_setup,
764 .write_rpc_prepare = nfs_proc_write_rpc_prepare,
741 .write_done = nfs_write_done, 765 .write_done = nfs_write_done,
742 .commit_setup = nfs_proc_commit_setup, 766 .commit_setup = nfs_proc_commit_setup,
743 .lock = nfs_proc_lock, 767 .lock = nfs_proc_lock,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index cfa175c223d..0a4be28c2ea 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -20,7 +20,6 @@
20#include <linux/nfs_page.h> 20#include <linux/nfs_page.h>
21#include <linux/module.h> 21#include <linux/module.h>
22 22
23#include <asm/system.h>
24#include "pnfs.h" 23#include "pnfs.h"
25 24
26#include "nfs4_fs.h" 25#include "nfs4_fs.h"
@@ -66,7 +65,6 @@ void nfs_readdata_free(struct nfs_read_data *p)
66 65
67void nfs_readdata_release(struct nfs_read_data *rdata) 66void nfs_readdata_release(struct nfs_read_data *rdata)
68{ 67{
69 put_lseg(rdata->lseg);
70 put_nfs_open_context(rdata->args.context); 68 put_nfs_open_context(rdata->args.context);
71 nfs_readdata_free(rdata); 69 nfs_readdata_free(rdata);
72} 70}
@@ -324,7 +322,7 @@ out_bad:
324 while (!list_empty(res)) { 322 while (!list_empty(res)) {
325 data = list_entry(res->next, struct nfs_read_data, list); 323 data = list_entry(res->next, struct nfs_read_data, list);
326 list_del(&data->list); 324 list_del(&data->list);
327 nfs_readdata_free(data); 325 nfs_readdata_release(data);
328 } 326 }
329 nfs_readpage_release(req); 327 nfs_readpage_release(req);
330 return -ENOMEM; 328 return -ENOMEM;
@@ -465,23 +463,14 @@ static void nfs_readpage_release_partial(void *calldata)
465 nfs_readdata_release(calldata); 463 nfs_readdata_release(calldata);
466} 464}
467 465
468#if defined(CONFIG_NFS_V4_1)
469void nfs_read_prepare(struct rpc_task *task, void *calldata) 466void nfs_read_prepare(struct rpc_task *task, void *calldata)
470{ 467{
471 struct nfs_read_data *data = calldata; 468 struct nfs_read_data *data = calldata;
472 469 NFS_PROTO(data->inode)->read_rpc_prepare(task, data);
473 if (nfs4_setup_sequence(NFS_SERVER(data->inode),
474 &data->args.seq_args, &data->res.seq_res,
475 0, task))
476 return;
477 rpc_call_start(task);
478} 470}
479#endif /* CONFIG_NFS_V4_1 */
480 471
481static const struct rpc_call_ops nfs_read_partial_ops = { 472static const struct rpc_call_ops nfs_read_partial_ops = {
482#if defined(CONFIG_NFS_V4_1)
483 .rpc_call_prepare = nfs_read_prepare, 473 .rpc_call_prepare = nfs_read_prepare,
484#endif /* CONFIG_NFS_V4_1 */
485 .rpc_call_done = nfs_readpage_result_partial, 474 .rpc_call_done = nfs_readpage_result_partial,
486 .rpc_release = nfs_readpage_release_partial, 475 .rpc_release = nfs_readpage_release_partial,
487}; 476};
@@ -545,9 +534,7 @@ static void nfs_readpage_release_full(void *calldata)
545} 534}
546 535
547static const struct rpc_call_ops nfs_read_full_ops = { 536static const struct rpc_call_ops nfs_read_full_ops = {
548#if defined(CONFIG_NFS_V4_1)
549 .rpc_call_prepare = nfs_read_prepare, 537 .rpc_call_prepare = nfs_read_prepare,
550#endif /* CONFIG_NFS_V4_1 */
551 .rpc_call_done = nfs_readpage_result_full, 538 .rpc_call_done = nfs_readpage_result_full,
552 .rpc_release = nfs_readpage_release_full, 539 .rpc_release = nfs_readpage_release_full,
553}; 540};
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3dfa4f112c0..4ac7fca7e4b 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -52,8 +52,9 @@
52#include <linux/nfs_xdr.h> 52#include <linux/nfs_xdr.h>
53#include <linux/magic.h> 53#include <linux/magic.h>
54#include <linux/parser.h> 54#include <linux/parser.h>
55#include <linux/nsproxy.h>
56#include <linux/rcupdate.h>
55 57
56#include <asm/system.h>
57#include <asm/uaccess.h> 58#include <asm/uaccess.h>
58 59
59#include "nfs4_fs.h" 60#include "nfs4_fs.h"
@@ -79,7 +80,6 @@ enum {
79 Opt_cto, Opt_nocto, 80 Opt_cto, Opt_nocto,
80 Opt_ac, Opt_noac, 81 Opt_ac, Opt_noac,
81 Opt_lock, Opt_nolock, 82 Opt_lock, Opt_nolock,
82 Opt_v2, Opt_v3, Opt_v4,
83 Opt_udp, Opt_tcp, Opt_rdma, 83 Opt_udp, Opt_tcp, Opt_rdma,
84 Opt_acl, Opt_noacl, 84 Opt_acl, Opt_noacl,
85 Opt_rdirplus, Opt_nordirplus, 85 Opt_rdirplus, Opt_nordirplus,
@@ -97,10 +97,10 @@ enum {
97 Opt_namelen, 97 Opt_namelen,
98 Opt_mountport, 98 Opt_mountport,
99 Opt_mountvers, 99 Opt_mountvers,
100 Opt_nfsvers,
101 Opt_minorversion, 100 Opt_minorversion,
102 101
103 /* Mount options that take string arguments */ 102 /* Mount options that take string arguments */
103 Opt_nfsvers,
104 Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, 104 Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
105 Opt_addr, Opt_mountaddr, Opt_clientaddr, 105 Opt_addr, Opt_mountaddr, Opt_clientaddr,
106 Opt_lookupcache, 106 Opt_lookupcache,
@@ -132,9 +132,6 @@ static const match_table_t nfs_mount_option_tokens = {
132 { Opt_noac, "noac" }, 132 { Opt_noac, "noac" },
133 { Opt_lock, "lock" }, 133 { Opt_lock, "lock" },
134 { Opt_nolock, "nolock" }, 134 { Opt_nolock, "nolock" },
135 { Opt_v2, "v2" },
136 { Opt_v3, "v3" },
137 { Opt_v4, "v4" },
138 { Opt_udp, "udp" }, 135 { Opt_udp, "udp" },
139 { Opt_tcp, "tcp" }, 136 { Opt_tcp, "tcp" },
140 { Opt_rdma, "rdma" }, 137 { Opt_rdma, "rdma" },
@@ -163,9 +160,10 @@ static const match_table_t nfs_mount_option_tokens = {
163 { Opt_namelen, "namlen=%s" }, 160 { Opt_namelen, "namlen=%s" },
164 { Opt_mountport, "mountport=%s" }, 161 { Opt_mountport, "mountport=%s" },
165 { Opt_mountvers, "mountvers=%s" }, 162 { Opt_mountvers, "mountvers=%s" },
163 { Opt_minorversion, "minorversion=%s" },
164
166 { Opt_nfsvers, "nfsvers=%s" }, 165 { Opt_nfsvers, "nfsvers=%s" },
167 { Opt_nfsvers, "vers=%s" }, 166 { Opt_nfsvers, "vers=%s" },
168 { Opt_minorversion, "minorversion=%s" },
169 167
170 { Opt_sec, "sec=%s" }, 168 { Opt_sec, "sec=%s" },
171 { Opt_proto, "proto=%s" }, 169 { Opt_proto, "proto=%s" },
@@ -179,6 +177,9 @@ static const match_table_t nfs_mount_option_tokens = {
179 { Opt_fscache_uniq, "fsc=%s" }, 177 { Opt_fscache_uniq, "fsc=%s" },
180 { Opt_local_lock, "local_lock=%s" }, 178 { Opt_local_lock, "local_lock=%s" },
181 179
180 /* The following needs to be listed after all other options */
181 { Opt_nfsvers, "v%s" },
182
182 { Opt_err, NULL } 183 { Opt_err, NULL }
183}; 184};
184 185
@@ -259,6 +260,22 @@ static match_table_t nfs_local_lock_tokens = {
259 { Opt_local_lock_err, NULL } 260 { Opt_local_lock_err, NULL }
260}; 261};
261 262
263enum {
264 Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0,
265 Opt_vers_4_1,
266
267 Opt_vers_err
268};
269
270static match_table_t nfs_vers_tokens = {
271 { Opt_vers_2, "2" },
272 { Opt_vers_3, "3" },
273 { Opt_vers_4, "4" },
274 { Opt_vers_4_0, "4.0" },
275 { Opt_vers_4_1, "4.1" },
276
277 { Opt_vers_err, NULL }
278};
262 279
263static void nfs_umount_begin(struct super_block *); 280static void nfs_umount_begin(struct super_block *);
264static int nfs_statfs(struct dentry *, struct kstatfs *); 281static int nfs_statfs(struct dentry *, struct kstatfs *);
@@ -620,7 +637,6 @@ static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
620 struct nfs_client *clp = nfss->nfs_client; 637 struct nfs_client *clp = nfss->nfs_client;
621 638
622 seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr); 639 seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
623 seq_printf(m, ",minorversion=%u", clp->cl_minorversion);
624} 640}
625#else 641#else
626static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, 642static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
@@ -629,6 +645,15 @@ static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
629} 645}
630#endif 646#endif
631 647
648static void nfs_show_nfs_version(struct seq_file *m,
649 unsigned int version,
650 unsigned int minorversion)
651{
652 seq_printf(m, ",vers=%u", version);
653 if (version == 4)
654 seq_printf(m, ".%u", minorversion);
655}
656
632/* 657/*
633 * Describe the mount options in force on this server representation 658 * Describe the mount options in force on this server representation
634 */ 659 */
@@ -656,7 +681,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
656 u32 version = clp->rpc_ops->version; 681 u32 version = clp->rpc_ops->version;
657 int local_flock, local_fcntl; 682 int local_flock, local_fcntl;
658 683
659 seq_printf(m, ",vers=%u", version); 684 nfs_show_nfs_version(m, version, clp->cl_minorversion);
660 seq_printf(m, ",rsize=%u", nfss->rsize); 685 seq_printf(m, ",rsize=%u", nfss->rsize);
661 seq_printf(m, ",wsize=%u", nfss->wsize); 686 seq_printf(m, ",wsize=%u", nfss->wsize);
662 if (nfss->bsize != 0) 687 if (nfss->bsize != 0)
@@ -676,8 +701,10 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
676 else 701 else
677 seq_puts(m, nfs_infop->nostr); 702 seq_puts(m, nfs_infop->nostr);
678 } 703 }
704 rcu_read_lock();
679 seq_printf(m, ",proto=%s", 705 seq_printf(m, ",proto=%s",
680 rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID)); 706 rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID));
707 rcu_read_unlock();
681 if (version == 4) { 708 if (version == 4) {
682 if (nfss->port != NFS_PORT) 709 if (nfss->port != NFS_PORT)
683 seq_printf(m, ",port=%u", nfss->port); 710 seq_printf(m, ",port=%u", nfss->port);
@@ -726,9 +753,11 @@ static int nfs_show_options(struct seq_file *m, struct dentry *root)
726 753
727 nfs_show_mount_options(m, nfss, 0); 754 nfs_show_mount_options(m, nfss, 0);
728 755
756 rcu_read_lock();
729 seq_printf(m, ",addr=%s", 757 seq_printf(m, ",addr=%s",
730 rpc_peeraddr2str(nfss->nfs_client->cl_rpcclient, 758 rpc_peeraddr2str(nfss->nfs_client->cl_rpcclient,
731 RPC_DISPLAY_ADDR)); 759 RPC_DISPLAY_ADDR));
760 rcu_read_unlock();
732 761
733 return 0; 762 return 0;
734} 763}
@@ -745,7 +774,6 @@ static void show_sessions(struct seq_file *m, struct nfs_server *server) {}
745#endif 774#endif
746#endif 775#endif
747 776
748#ifdef CONFIG_NFS_V4
749#ifdef CONFIG_NFS_V4_1 777#ifdef CONFIG_NFS_V4_1
750static void show_pnfs(struct seq_file *m, struct nfs_server *server) 778static void show_pnfs(struct seq_file *m, struct nfs_server *server)
751{ 779{
@@ -755,9 +783,26 @@ static void show_pnfs(struct seq_file *m, struct nfs_server *server)
755 else 783 else
756 seq_printf(m, "not configured"); 784 seq_printf(m, "not configured");
757} 785}
786
787static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss)
788{
789 if (nfss->nfs_client && nfss->nfs_client->impl_id) {
790 struct nfs41_impl_id *impl_id = nfss->nfs_client->impl_id;
791 seq_printf(m, "\n\timpl_id:\tname='%s',domain='%s',"
792 "date='%llu,%u'",
793 impl_id->name, impl_id->domain,
794 impl_id->date.seconds, impl_id->date.nseconds);
795 }
796}
758#else 797#else
759static void show_pnfs(struct seq_file *m, struct nfs_server *server) {} 798#ifdef CONFIG_NFS_V4
799static void show_pnfs(struct seq_file *m, struct nfs_server *server)
800{
801}
760#endif 802#endif
803static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss)
804{
805}
761#endif 806#endif
762 807
763static int nfs_show_devname(struct seq_file *m, struct dentry *root) 808static int nfs_show_devname(struct seq_file *m, struct dentry *root)
@@ -806,6 +851,8 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root)
806 851
807 seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); 852 seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ);
808 853
854 show_implementation_id(m, nfss);
855
809 seq_printf(m, "\n\tcaps:\t"); 856 seq_printf(m, "\n\tcaps:\t");
810 seq_printf(m, "caps=0x%x", nfss->caps); 857 seq_printf(m, "caps=0x%x", nfss->caps);
811 seq_printf(m, ",wtmult=%u", nfss->wtmult); 858 seq_printf(m, ",wtmult=%u", nfss->wtmult);
@@ -908,6 +955,7 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve
908 data->auth_flavor_len = 1; 955 data->auth_flavor_len = 1;
909 data->version = version; 956 data->version = version;
910 data->minorversion = 0; 957 data->minorversion = 0;
958 data->net = current->nsproxy->net_ns;
911 security_init_mnt_opts(&data->lsm_opts); 959 security_init_mnt_opts(&data->lsm_opts);
912 } 960 }
913 return data; 961 return data;
@@ -1052,6 +1100,40 @@ static int nfs_parse_security_flavors(char *value,
1052 return 1; 1100 return 1;
1053} 1101}
1054 1102
1103static int nfs_parse_version_string(char *string,
1104 struct nfs_parsed_mount_data *mnt,
1105 substring_t *args)
1106{
1107 mnt->flags &= ~NFS_MOUNT_VER3;
1108 switch (match_token(string, nfs_vers_tokens, args)) {
1109 case Opt_vers_2:
1110 mnt->version = 2;
1111 break;
1112 case Opt_vers_3:
1113 mnt->flags |= NFS_MOUNT_VER3;
1114 mnt->version = 3;
1115 break;
1116 case Opt_vers_4:
1117 /* Backward compatibility option. In future,
1118 * the mount program should always supply
1119 * a NFSv4 minor version number.
1120 */
1121 mnt->version = 4;
1122 break;
1123 case Opt_vers_4_0:
1124 mnt->version = 4;
1125 mnt->minorversion = 0;
1126 break;
1127 case Opt_vers_4_1:
1128 mnt->version = 4;
1129 mnt->minorversion = 1;
1130 break;
1131 default:
1132 return 0;
1133 }
1134 return 1;
1135}
1136
1055static int nfs_get_option_str(substring_t args[], char **option) 1137static int nfs_get_option_str(substring_t args[], char **option)
1056{ 1138{
1057 kfree(*option); 1139 kfree(*option);
@@ -1157,18 +1239,6 @@ static int nfs_parse_mount_options(char *raw,
1157 mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | 1239 mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
1158 NFS_MOUNT_LOCAL_FCNTL); 1240 NFS_MOUNT_LOCAL_FCNTL);
1159 break; 1241 break;
1160 case Opt_v2:
1161 mnt->flags &= ~NFS_MOUNT_VER3;
1162 mnt->version = 2;
1163 break;
1164 case Opt_v3:
1165 mnt->flags |= NFS_MOUNT_VER3;
1166 mnt->version = 3;
1167 break;
1168 case Opt_v4:
1169 mnt->flags &= ~NFS_MOUNT_VER3;
1170 mnt->version = 4;
1171 break;
1172 case Opt_udp: 1242 case Opt_udp:
1173 mnt->flags &= ~NFS_MOUNT_TCP; 1243 mnt->flags &= ~NFS_MOUNT_TCP;
1174 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; 1244 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
@@ -1295,26 +1365,6 @@ static int nfs_parse_mount_options(char *raw,
1295 goto out_invalid_value; 1365 goto out_invalid_value;
1296 mnt->mount_server.version = option; 1366 mnt->mount_server.version = option;
1297 break; 1367 break;
1298 case Opt_nfsvers:
1299 if (nfs_get_option_ul(args, &option))
1300 goto out_invalid_value;
1301 switch (option) {
1302 case NFS2_VERSION:
1303 mnt->flags &= ~NFS_MOUNT_VER3;
1304 mnt->version = 2;
1305 break;
1306 case NFS3_VERSION:
1307 mnt->flags |= NFS_MOUNT_VER3;
1308 mnt->version = 3;
1309 break;
1310 case NFS4_VERSION:
1311 mnt->flags &= ~NFS_MOUNT_VER3;
1312 mnt->version = 4;
1313 break;
1314 default:
1315 goto out_invalid_value;
1316 }
1317 break;
1318 case Opt_minorversion: 1368 case Opt_minorversion:
1319 if (nfs_get_option_ul(args, &option)) 1369 if (nfs_get_option_ul(args, &option))
1320 goto out_invalid_value; 1370 goto out_invalid_value;
@@ -1326,6 +1376,15 @@ static int nfs_parse_mount_options(char *raw,
1326 /* 1376 /*
1327 * options that take text values 1377 * options that take text values
1328 */ 1378 */
1379 case Opt_nfsvers:
1380 string = match_strdup(args);
1381 if (string == NULL)
1382 goto out_nomem;
1383 rc = nfs_parse_version_string(string, mnt, args);
1384 kfree(string);
1385 if (!rc)
1386 goto out_invalid_value;
1387 break;
1329 case Opt_sec: 1388 case Opt_sec:
1330 string = match_strdup(args); 1389 string = match_strdup(args);
1331 if (string == NULL) 1390 if (string == NULL)
@@ -1405,7 +1464,7 @@ static int nfs_parse_mount_options(char *raw,
1405 if (string == NULL) 1464 if (string == NULL)
1406 goto out_nomem; 1465 goto out_nomem;
1407 mnt->nfs_server.addrlen = 1466 mnt->nfs_server.addrlen =
1408 rpc_pton(string, strlen(string), 1467 rpc_pton(mnt->net, string, strlen(string),
1409 (struct sockaddr *) 1468 (struct sockaddr *)
1410 &mnt->nfs_server.address, 1469 &mnt->nfs_server.address,
1411 sizeof(mnt->nfs_server.address)); 1470 sizeof(mnt->nfs_server.address));
@@ -1427,7 +1486,7 @@ static int nfs_parse_mount_options(char *raw,
1427 if (string == NULL) 1486 if (string == NULL)
1428 goto out_nomem; 1487 goto out_nomem;
1429 mnt->mount_server.addrlen = 1488 mnt->mount_server.addrlen =
1430 rpc_pton(string, strlen(string), 1489 rpc_pton(mnt->net, string, strlen(string),
1431 (struct sockaddr *) 1490 (struct sockaddr *)
1432 &mnt->mount_server.address, 1491 &mnt->mount_server.address,
1433 sizeof(mnt->mount_server.address)); 1492 sizeof(mnt->mount_server.address));
@@ -1516,6 +1575,9 @@ static int nfs_parse_mount_options(char *raw,
1516 if (!sloppy && invalid_option) 1575 if (!sloppy && invalid_option)
1517 return 0; 1576 return 0;
1518 1577
1578 if (mnt->minorversion && mnt->version != 4)
1579 goto out_minorversion_mismatch;
1580
1519 /* 1581 /*
1520 * verify that any proto=/mountproto= options match the address 1582 * verify that any proto=/mountproto= options match the address
1521 * familiies in the addr=/mountaddr= options. 1583 * familiies in the addr=/mountaddr= options.
@@ -1549,6 +1611,10 @@ out_invalid_address:
1549out_invalid_value: 1611out_invalid_value:
1550 printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p); 1612 printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p);
1551 return 0; 1613 return 0;
1614out_minorversion_mismatch:
1615 printk(KERN_INFO "NFS: mount option vers=%u does not support "
1616 "minorversion=%u\n", mnt->version, mnt->minorversion);
1617 return 0;
1552out_nomem: 1618out_nomem:
1553 printk(KERN_INFO "NFS: not enough memory to parse option\n"); 1619 printk(KERN_INFO "NFS: not enough memory to parse option\n");
1554 return 0; 1620 return 0;
@@ -1622,6 +1688,7 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1622 .noresvport = args->flags & NFS_MOUNT_NORESVPORT, 1688 .noresvport = args->flags & NFS_MOUNT_NORESVPORT,
1623 .auth_flav_len = &server_authlist_len, 1689 .auth_flav_len = &server_authlist_len,
1624 .auth_flavs = server_authlist, 1690 .auth_flavs = server_authlist,
1691 .net = args->net,
1625 }; 1692 };
1626 int status; 1693 int status;
1627 1694
@@ -2047,7 +2114,7 @@ static inline void nfs_initialise_sb(struct super_block *sb)
2047 2114
2048 /* We probably want something more informative here */ 2115 /* We probably want something more informative here */
2049 snprintf(sb->s_id, sizeof(sb->s_id), 2116 snprintf(sb->s_id, sizeof(sb->s_id),
2050 "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev)); 2117 "%u:%u", MAJOR(sb->s_dev), MINOR(sb->s_dev));
2051 2118
2052 if (sb->s_blocksize == 0) 2119 if (sb->s_blocksize == 0)
2053 sb->s_blocksize = nfs_block_bits(server->wsize, 2120 sb->s_blocksize = nfs_block_bits(server->wsize,
@@ -2361,7 +2428,7 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags,
2361 dprintk("--> nfs_xdev_mount()\n"); 2428 dprintk("--> nfs_xdev_mount()\n");
2362 2429
2363 /* create a new volume representation */ 2430 /* create a new volume representation */
2364 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); 2431 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
2365 if (IS_ERR(server)) { 2432 if (IS_ERR(server)) {
2366 error = PTR_ERR(server); 2433 error = PTR_ERR(server);
2367 goto out_err_noserver; 2434 goto out_err_noserver;
@@ -2499,12 +2566,6 @@ static int nfs4_validate_text_mount_data(void *options,
2499 return -EINVAL; 2566 return -EINVAL;
2500 } 2567 }
2501 2568
2502 if (args->client_address == NULL) {
2503 dfprintk(MOUNT,
2504 "NFS4: mount program didn't pass callback address\n");
2505 return -EINVAL;
2506 }
2507
2508 return nfs_parse_devname(dev_name, 2569 return nfs_parse_devname(dev_name,
2509 &args->nfs_server.hostname, 2570 &args->nfs_server.hostname,
2510 NFS4_MAXNAMLEN, 2571 NFS4_MAXNAMLEN,
@@ -2663,8 +2724,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags,
2663 if (!s->s_root) { 2724 if (!s->s_root) {
2664 /* initial superblock/root creation */ 2725 /* initial superblock/root creation */
2665 nfs4_fill_super(s); 2726 nfs4_fill_super(s);
2666 nfs_fscache_get_super_cookie( 2727 nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL);
2667 s, data ? data->fscache_uniq : NULL, NULL);
2668 } 2728 }
2669 2729
2670 mntroot = nfs4_get_root(s, mntfh, dev_name); 2730 mntroot = nfs4_get_root(s, mntfh, dev_name);
@@ -2707,11 +2767,15 @@ static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
2707 char *root_devname; 2767 char *root_devname;
2708 size_t len; 2768 size_t len;
2709 2769
2710 len = strlen(hostname) + 3; 2770 len = strlen(hostname) + 5;
2711 root_devname = kmalloc(len, GFP_KERNEL); 2771 root_devname = kmalloc(len, GFP_KERNEL);
2712 if (root_devname == NULL) 2772 if (root_devname == NULL)
2713 return ERR_PTR(-ENOMEM); 2773 return ERR_PTR(-ENOMEM);
2714 snprintf(root_devname, len, "%s:/", hostname); 2774 /* Does hostname needs to be enclosed in brackets? */
2775 if (strchr(hostname, ':'))
2776 snprintf(root_devname, len, "[%s]:/", hostname);
2777 else
2778 snprintf(root_devname, len, "%s:/", hostname);
2715 root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data); 2779 root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data);
2716 kfree(root_devname); 2780 kfree(root_devname);
2717 return root_mnt; 2781 return root_mnt;
@@ -2891,7 +2955,7 @@ nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
2891 dprintk("--> nfs4_xdev_mount()\n"); 2955 dprintk("--> nfs4_xdev_mount()\n");
2892 2956
2893 /* create a new volume representation */ 2957 /* create a new volume representation */
2894 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); 2958 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
2895 if (IS_ERR(server)) { 2959 if (IS_ERR(server)) {
2896 error = PTR_ERR(server); 2960 error = PTR_ERR(server);
2897 goto out_err_noserver; 2961 goto out_err_noserver;
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index 978aaeb8a09..ad4d2e787b2 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -32,7 +32,6 @@ static ctl_table nfs_cb_sysctls[] = {
32 .extra1 = (int *)&nfs_set_port_min, 32 .extra1 = (int *)&nfs_set_port_min,
33 .extra2 = (int *)&nfs_set_port_max, 33 .extra2 = (int *)&nfs_set_port_max,
34 }, 34 },
35#ifndef CONFIG_NFS_USE_NEW_IDMAPPER
36 { 35 {
37 .procname = "idmap_cache_timeout", 36 .procname = "idmap_cache_timeout",
38 .data = &nfs_idmap_cache_timeout, 37 .data = &nfs_idmap_cache_timeout,
@@ -40,7 +39,6 @@ static ctl_table nfs_cb_sysctls[] = {
40 .mode = 0644, 39 .mode = 0644,
41 .proc_handler = proc_dointvec_jiffies, 40 .proc_handler = proc_dointvec_jiffies,
42 }, 41 },
43#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
44#endif 42#endif
45 { 43 {
46 .procname = "nfs_mountpoint_timeout", 44 .procname = "nfs_mountpoint_timeout",
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 4f9319a2e56..3210a03342f 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -20,15 +20,6 @@
20#include "iostat.h" 20#include "iostat.h"
21#include "delegation.h" 21#include "delegation.h"
22 22
23struct nfs_unlinkdata {
24 struct hlist_node list;
25 struct nfs_removeargs args;
26 struct nfs_removeres res;
27 struct inode *dir;
28 struct rpc_cred *cred;
29 struct nfs_fattr dir_attr;
30};
31
32/** 23/**
33 * nfs_free_unlinkdata - release data from a sillydelete operation. 24 * nfs_free_unlinkdata - release data from a sillydelete operation.
34 * @data: pointer to unlink structure. 25 * @data: pointer to unlink structure.
@@ -107,25 +98,16 @@ static void nfs_async_unlink_release(void *calldata)
107 nfs_sb_deactive(sb); 98 nfs_sb_deactive(sb);
108} 99}
109 100
110#if defined(CONFIG_NFS_V4_1) 101static void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
111void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
112{ 102{
113 struct nfs_unlinkdata *data = calldata; 103 struct nfs_unlinkdata *data = calldata;
114 struct nfs_server *server = NFS_SERVER(data->dir); 104 NFS_PROTO(data->dir)->unlink_rpc_prepare(task, data);
115
116 if (nfs4_setup_sequence(server, &data->args.seq_args,
117 &data->res.seq_res, 1, task))
118 return;
119 rpc_call_start(task);
120} 105}
121#endif /* CONFIG_NFS_V4_1 */
122 106
123static const struct rpc_call_ops nfs_unlink_ops = { 107static const struct rpc_call_ops nfs_unlink_ops = {
124 .rpc_call_done = nfs_async_unlink_done, 108 .rpc_call_done = nfs_async_unlink_done,
125 .rpc_release = nfs_async_unlink_release, 109 .rpc_release = nfs_async_unlink_release,
126#if defined(CONFIG_NFS_V4_1)
127 .rpc_call_prepare = nfs_unlink_prepare, 110 .rpc_call_prepare = nfs_unlink_prepare,
128#endif /* CONFIG_NFS_V4_1 */
129}; 111};
130 112
131static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data) 113static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data)
@@ -341,18 +323,6 @@ nfs_cancel_async_unlink(struct dentry *dentry)
341 spin_unlock(&dentry->d_lock); 323 spin_unlock(&dentry->d_lock);
342} 324}
343 325
344struct nfs_renamedata {
345 struct nfs_renameargs args;
346 struct nfs_renameres res;
347 struct rpc_cred *cred;
348 struct inode *old_dir;
349 struct dentry *old_dentry;
350 struct nfs_fattr old_fattr;
351 struct inode *new_dir;
352 struct dentry *new_dentry;
353 struct nfs_fattr new_fattr;
354};
355
356/** 326/**
357 * nfs_async_rename_done - Sillyrename post-processing 327 * nfs_async_rename_done - Sillyrename post-processing
358 * @task: rpc_task of the sillyrename 328 * @task: rpc_task of the sillyrename
@@ -403,25 +373,16 @@ static void nfs_async_rename_release(void *calldata)
403 kfree(data); 373 kfree(data);
404} 374}
405 375
406#if defined(CONFIG_NFS_V4_1)
407static void nfs_rename_prepare(struct rpc_task *task, void *calldata) 376static void nfs_rename_prepare(struct rpc_task *task, void *calldata)
408{ 377{
409 struct nfs_renamedata *data = calldata; 378 struct nfs_renamedata *data = calldata;
410 struct nfs_server *server = NFS_SERVER(data->old_dir); 379 NFS_PROTO(data->old_dir)->rename_rpc_prepare(task, data);
411
412 if (nfs4_setup_sequence(server, &data->args.seq_args,
413 &data->res.seq_res, 1, task))
414 return;
415 rpc_call_start(task);
416} 380}
417#endif /* CONFIG_NFS_V4_1 */
418 381
419static const struct rpc_call_ops nfs_rename_ops = { 382static const struct rpc_call_ops nfs_rename_ops = {
420 .rpc_call_done = nfs_async_rename_done, 383 .rpc_call_done = nfs_async_rename_done,
421 .rpc_release = nfs_async_rename_release, 384 .rpc_release = nfs_async_rename_release,
422#if defined(CONFIG_NFS_V4_1)
423 .rpc_call_prepare = nfs_rename_prepare, 385 .rpc_call_prepare = nfs_rename_prepare,
424#endif /* CONFIG_NFS_V4_1 */
425}; 386};
426 387
427/** 388/**
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 834f0fe96f8..c07462320f6 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -100,7 +100,6 @@ void nfs_writedata_free(struct nfs_write_data *p)
100 100
101void nfs_writedata_release(struct nfs_write_data *wdata) 101void nfs_writedata_release(struct nfs_write_data *wdata)
102{ 102{
103 put_lseg(wdata->lseg);
104 put_nfs_open_context(wdata->args.context); 103 put_nfs_open_context(wdata->args.context);
105 nfs_writedata_free(wdata); 104 nfs_writedata_free(wdata);
106} 105}
@@ -236,10 +235,10 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblo
236 req = nfs_page_find_request_locked(page); 235 req = nfs_page_find_request_locked(page);
237 if (req == NULL) 236 if (req == NULL)
238 break; 237 break;
239 if (nfs_set_page_tag_locked(req)) 238 if (nfs_lock_request_dontget(req))
240 break; 239 break;
241 /* Note: If we hold the page lock, as is the case in nfs_writepage, 240 /* Note: If we hold the page lock, as is the case in nfs_writepage,
242 * then the call to nfs_set_page_tag_locked() will always 241 * then the call to nfs_lock_request_dontget() will always
243 * succeed provided that someone hasn't already marked the 242 * succeed provided that someone hasn't already marked the
244 * request as dirty (in which case we don't care). 243 * request as dirty (in which case we don't care).
245 */ 244 */
@@ -375,21 +374,14 @@ out_err:
375/* 374/*
376 * Insert a write request into an inode 375 * Insert a write request into an inode
377 */ 376 */
378static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) 377static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
379{ 378{
380 struct nfs_inode *nfsi = NFS_I(inode); 379 struct nfs_inode *nfsi = NFS_I(inode);
381 int error;
382
383 error = radix_tree_preload(GFP_NOFS);
384 if (error != 0)
385 goto out;
386 380
387 /* Lock the request! */ 381 /* Lock the request! */
388 nfs_lock_request_dontget(req); 382 nfs_lock_request_dontget(req);
389 383
390 spin_lock(&inode->i_lock); 384 spin_lock(&inode->i_lock);
391 error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
392 BUG_ON(error);
393 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) 385 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE))
394 inode->i_version++; 386 inode->i_version++;
395 set_bit(PG_MAPPED, &req->wb_flags); 387 set_bit(PG_MAPPED, &req->wb_flags);
@@ -397,12 +389,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
397 set_page_private(req->wb_page, (unsigned long)req); 389 set_page_private(req->wb_page, (unsigned long)req);
398 nfsi->npages++; 390 nfsi->npages++;
399 kref_get(&req->wb_kref); 391 kref_get(&req->wb_kref);
400 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
401 NFS_PAGE_TAG_LOCKED);
402 spin_unlock(&inode->i_lock); 392 spin_unlock(&inode->i_lock);
403 radix_tree_preload_end();
404out:
405 return error;
406} 393}
407 394
408/* 395/*
@@ -419,7 +406,6 @@ static void nfs_inode_remove_request(struct nfs_page *req)
419 set_page_private(req->wb_page, 0); 406 set_page_private(req->wb_page, 0);
420 ClearPagePrivate(req->wb_page); 407 ClearPagePrivate(req->wb_page);
421 clear_bit(PG_MAPPED, &req->wb_flags); 408 clear_bit(PG_MAPPED, &req->wb_flags);
422 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
423 nfsi->npages--; 409 nfsi->npages--;
424 spin_unlock(&inode->i_lock); 410 spin_unlock(&inode->i_lock);
425 nfs_release_request(req); 411 nfs_release_request(req);
@@ -432,39 +418,90 @@ nfs_mark_request_dirty(struct nfs_page *req)
432} 418}
433 419
434#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 420#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
435/* 421/**
436 * Add a request to the inode's commit list. 422 * nfs_request_add_commit_list - add request to a commit list
423 * @req: pointer to a struct nfs_page
424 * @head: commit list head
425 *
426 * This sets the PG_CLEAN bit, updates the inode global count of
427 * number of outstanding requests requiring a commit as well as
428 * the MM page stats.
429 *
430 * The caller must _not_ hold the inode->i_lock, but must be
431 * holding the nfs_page lock.
437 */ 432 */
438static void 433void
439nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 434nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head)
440{ 435{
441 struct inode *inode = req->wb_context->dentry->d_inode; 436 struct inode *inode = req->wb_context->dentry->d_inode;
442 struct nfs_inode *nfsi = NFS_I(inode);
443 437
444 spin_lock(&inode->i_lock);
445 set_bit(PG_CLEAN, &(req)->wb_flags); 438 set_bit(PG_CLEAN, &(req)->wb_flags);
446 radix_tree_tag_set(&nfsi->nfs_page_tree, 439 spin_lock(&inode->i_lock);
447 req->wb_index, 440 nfs_list_add_request(req, head);
448 NFS_PAGE_TAG_COMMIT); 441 NFS_I(inode)->ncommit++;
449 nfsi->ncommit++;
450 spin_unlock(&inode->i_lock); 442 spin_unlock(&inode->i_lock);
451 pnfs_mark_request_commit(req, lseg);
452 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 443 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
453 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); 444 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
454 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 445 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
455} 446}
447EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
456 448
457static int 449/**
450 * nfs_request_remove_commit_list - Remove request from a commit list
451 * @req: pointer to a nfs_page
452 *
453 * This clears the PG_CLEAN bit, and updates the inode global count of
454 * number of outstanding requests requiring a commit
455 * It does not update the MM page stats.
456 *
457 * The caller _must_ hold the inode->i_lock and the nfs_page lock.
458 */
459void
460nfs_request_remove_commit_list(struct nfs_page *req)
461{
462 struct inode *inode = req->wb_context->dentry->d_inode;
463
464 if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
465 return;
466 nfs_list_remove_request(req);
467 NFS_I(inode)->ncommit--;
468}
469EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);
470
471
472/*
473 * Add a request to the inode's commit list.
474 */
475static void
476nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
477{
478 struct inode *inode = req->wb_context->dentry->d_inode;
479
480 if (pnfs_mark_request_commit(req, lseg))
481 return;
482 nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list);
483}
484
485static void
486nfs_clear_page_commit(struct page *page)
487{
488 dec_zone_page_state(page, NR_UNSTABLE_NFS);
489 dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE);
490}
491
492static void
458nfs_clear_request_commit(struct nfs_page *req) 493nfs_clear_request_commit(struct nfs_page *req)
459{ 494{
460 struct page *page = req->wb_page; 495 if (test_bit(PG_CLEAN, &req->wb_flags)) {
496 struct inode *inode = req->wb_context->dentry->d_inode;
461 497
462 if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) { 498 if (!pnfs_clear_request_commit(req)) {
463 dec_zone_page_state(page, NR_UNSTABLE_NFS); 499 spin_lock(&inode->i_lock);
464 dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); 500 nfs_request_remove_commit_list(req);
465 return 1; 501 spin_unlock(&inode->i_lock);
502 }
503 nfs_clear_page_commit(req->wb_page);
466 } 504 }
467 return 0;
468} 505}
469 506
470static inline 507static inline
@@ -491,15 +528,14 @@ int nfs_reschedule_unstable_write(struct nfs_page *req,
491 return 0; 528 return 0;
492} 529}
493#else 530#else
494static inline void 531static void
495nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 532nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
496{ 533{
497} 534}
498 535
499static inline int 536static void
500nfs_clear_request_commit(struct nfs_page *req) 537nfs_clear_request_commit(struct nfs_page *req)
501{ 538{
502 return 0;
503} 539}
504 540
505static inline 541static inline
@@ -520,46 +556,65 @@ int nfs_reschedule_unstable_write(struct nfs_page *req,
520static int 556static int
521nfs_need_commit(struct nfs_inode *nfsi) 557nfs_need_commit(struct nfs_inode *nfsi)
522{ 558{
523 return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT); 559 return nfsi->ncommit > 0;
560}
561
562/* i_lock held by caller */
563static int
564nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
565 spinlock_t *lock)
566{
567 struct nfs_page *req, *tmp;
568 int ret = 0;
569
570 list_for_each_entry_safe(req, tmp, src, wb_list) {
571 if (!nfs_lock_request(req))
572 continue;
573 if (cond_resched_lock(lock))
574 list_safe_reset_next(req, tmp, wb_list);
575 nfs_request_remove_commit_list(req);
576 nfs_list_add_request(req, dst);
577 ret++;
578 if (ret == max)
579 break;
580 }
581 return ret;
524} 582}
525 583
526/* 584/*
527 * nfs_scan_commit - Scan an inode for commit requests 585 * nfs_scan_commit - Scan an inode for commit requests
528 * @inode: NFS inode to scan 586 * @inode: NFS inode to scan
529 * @dst: destination list 587 * @dst: destination list
530 * @idx_start: lower bound of page->index to scan.
531 * @npages: idx_start + npages sets the upper bound to scan.
532 * 588 *
533 * Moves requests from the inode's 'commit' request list. 589 * Moves requests from the inode's 'commit' request list.
534 * The requests are *not* checked to ensure that they form a contiguous set. 590 * The requests are *not* checked to ensure that they form a contiguous set.
535 */ 591 */
536static int 592static int
537nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) 593nfs_scan_commit(struct inode *inode, struct list_head *dst)
538{ 594{
539 struct nfs_inode *nfsi = NFS_I(inode); 595 struct nfs_inode *nfsi = NFS_I(inode);
540 int ret; 596 int ret = 0;
541
542 if (!nfs_need_commit(nfsi))
543 return 0;
544 597
545 spin_lock(&inode->i_lock); 598 spin_lock(&inode->i_lock);
546 ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); 599 if (nfsi->ncommit > 0) {
547 if (ret > 0) 600 const int max = INT_MAX;
548 nfsi->ncommit -= ret;
549 spin_unlock(&inode->i_lock);
550
551 if (nfs_need_commit(NFS_I(inode)))
552 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
553 601
602 ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max,
603 &inode->i_lock);
604 ret += pnfs_scan_commit_lists(inode, max - ret,
605 &inode->i_lock);
606 }
607 spin_unlock(&inode->i_lock);
554 return ret; 608 return ret;
555} 609}
610
556#else 611#else
557static inline int nfs_need_commit(struct nfs_inode *nfsi) 612static inline int nfs_need_commit(struct nfs_inode *nfsi)
558{ 613{
559 return 0; 614 return 0;
560} 615}
561 616
562static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) 617static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst)
563{ 618{
564 return 0; 619 return 0;
565} 620}
@@ -604,7 +659,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
604 || end < req->wb_offset) 659 || end < req->wb_offset)
605 goto out_flushme; 660 goto out_flushme;
606 661
607 if (nfs_set_page_tag_locked(req)) 662 if (nfs_lock_request_dontget(req))
608 break; 663 break;
609 664
610 /* The request is locked, so wait and then retry */ 665 /* The request is locked, so wait and then retry */
@@ -616,13 +671,6 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
616 spin_lock(&inode->i_lock); 671 spin_lock(&inode->i_lock);
617 } 672 }
618 673
619 if (nfs_clear_request_commit(req) &&
620 radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
621 req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) {
622 NFS_I(inode)->ncommit--;
623 pnfs_clear_request_commit(req);
624 }
625
626 /* Okay, the request matches. Update the region */ 674 /* Okay, the request matches. Update the region */
627 if (offset < req->wb_offset) { 675 if (offset < req->wb_offset) {
628 req->wb_offset = offset; 676 req->wb_offset = offset;
@@ -634,6 +682,8 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
634 req->wb_bytes = rqend - req->wb_offset; 682 req->wb_bytes = rqend - req->wb_offset;
635out_unlock: 683out_unlock:
636 spin_unlock(&inode->i_lock); 684 spin_unlock(&inode->i_lock);
685 if (req)
686 nfs_clear_request_commit(req);
637 return req; 687 return req;
638out_flushme: 688out_flushme:
639 spin_unlock(&inode->i_lock); 689 spin_unlock(&inode->i_lock);
@@ -655,7 +705,6 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
655{ 705{
656 struct inode *inode = page->mapping->host; 706 struct inode *inode = page->mapping->host;
657 struct nfs_page *req; 707 struct nfs_page *req;
658 int error;
659 708
660 req = nfs_try_to_update_request(inode, page, offset, bytes); 709 req = nfs_try_to_update_request(inode, page, offset, bytes);
661 if (req != NULL) 710 if (req != NULL)
@@ -663,11 +712,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
663 req = nfs_create_request(ctx, inode, page, offset, bytes); 712 req = nfs_create_request(ctx, inode, page, offset, bytes);
664 if (IS_ERR(req)) 713 if (IS_ERR(req))
665 goto out; 714 goto out;
666 error = nfs_inode_add_request(inode, req); 715 nfs_inode_add_request(inode, req);
667 if (error != 0) {
668 nfs_release_request(req);
669 req = ERR_PTR(error);
670 }
671out: 716out:
672 return req; 717 return req;
673} 718}
@@ -684,7 +729,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
684 nfs_grow_file(page, offset, count); 729 nfs_grow_file(page, offset, count);
685 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); 730 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
686 nfs_mark_request_dirty(req); 731 nfs_mark_request_dirty(req);
687 nfs_clear_page_tag_locked(req); 732 nfs_unlock_request(req);
688 return 0; 733 return 0;
689} 734}
690 735
@@ -777,7 +822,7 @@ static void nfs_writepage_release(struct nfs_page *req,
777 822
778 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data)) 823 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data))
779 nfs_inode_remove_request(req); 824 nfs_inode_remove_request(req);
780 nfs_clear_page_tag_locked(req); 825 nfs_unlock_request(req);
781 nfs_end_page_writeback(page); 826 nfs_end_page_writeback(page);
782} 827}
783 828
@@ -925,7 +970,7 @@ static void nfs_redirty_request(struct nfs_page *req)
925 struct page *page = req->wb_page; 970 struct page *page = req->wb_page;
926 971
927 nfs_mark_request_dirty(req); 972 nfs_mark_request_dirty(req);
928 nfs_clear_page_tag_locked(req); 973 nfs_unlock_request(req);
929 nfs_end_page_writeback(page); 974 nfs_end_page_writeback(page);
930} 975}
931 976
@@ -974,7 +1019,7 @@ out_bad:
974 while (!list_empty(res)) { 1019 while (!list_empty(res)) {
975 data = list_entry(res->next, struct nfs_write_data, list); 1020 data = list_entry(res->next, struct nfs_write_data, list);
976 list_del(&data->list); 1021 list_del(&data->list);
977 nfs_writedata_free(data); 1022 nfs_writedata_release(data);
978 } 1023 }
979 nfs_redirty_request(req); 1024 nfs_redirty_request(req);
980 return -ENOMEM; 1025 return -ENOMEM;
@@ -1128,23 +1173,14 @@ out:
1128 nfs_writedata_release(calldata); 1173 nfs_writedata_release(calldata);
1129} 1174}
1130 1175
1131#if defined(CONFIG_NFS_V4_1)
1132void nfs_write_prepare(struct rpc_task *task, void *calldata) 1176void nfs_write_prepare(struct rpc_task *task, void *calldata)
1133{ 1177{
1134 struct nfs_write_data *data = calldata; 1178 struct nfs_write_data *data = calldata;
1135 1179 NFS_PROTO(data->inode)->write_rpc_prepare(task, data);
1136 if (nfs4_setup_sequence(NFS_SERVER(data->inode),
1137 &data->args.seq_args,
1138 &data->res.seq_res, 1, task))
1139 return;
1140 rpc_call_start(task);
1141} 1180}
1142#endif /* CONFIG_NFS_V4_1 */
1143 1181
1144static const struct rpc_call_ops nfs_write_partial_ops = { 1182static const struct rpc_call_ops nfs_write_partial_ops = {
1145#if defined(CONFIG_NFS_V4_1)
1146 .rpc_call_prepare = nfs_write_prepare, 1183 .rpc_call_prepare = nfs_write_prepare,
1147#endif /* CONFIG_NFS_V4_1 */
1148 .rpc_call_done = nfs_writeback_done_partial, 1184 .rpc_call_done = nfs_writeback_done_partial,
1149 .rpc_release = nfs_writeback_release_partial, 1185 .rpc_release = nfs_writeback_release_partial,
1150}; 1186};
@@ -1199,16 +1235,14 @@ static void nfs_writeback_release_full(void *calldata)
1199remove_request: 1235remove_request:
1200 nfs_inode_remove_request(req); 1236 nfs_inode_remove_request(req);
1201 next: 1237 next:
1202 nfs_clear_page_tag_locked(req); 1238 nfs_unlock_request(req);
1203 nfs_end_page_writeback(page); 1239 nfs_end_page_writeback(page);
1204 } 1240 }
1205 nfs_writedata_release(calldata); 1241 nfs_writedata_release(calldata);
1206} 1242}
1207 1243
1208static const struct rpc_call_ops nfs_write_full_ops = { 1244static const struct rpc_call_ops nfs_write_full_ops = {
1209#if defined(CONFIG_NFS_V4_1)
1210 .rpc_call_prepare = nfs_write_prepare, 1245 .rpc_call_prepare = nfs_write_prepare,
1211#endif /* CONFIG_NFS_V4_1 */
1212 .rpc_call_done = nfs_writeback_done_full, 1246 .rpc_call_done = nfs_writeback_done_full,
1213 .rpc_release = nfs_writeback_release_full, 1247 .rpc_release = nfs_writeback_release_full,
1214}; 1248};
@@ -1325,7 +1359,6 @@ void nfs_commitdata_release(void *data)
1325{ 1359{
1326 struct nfs_write_data *wdata = data; 1360 struct nfs_write_data *wdata = data;
1327 1361
1328 put_lseg(wdata->lseg);
1329 put_nfs_open_context(wdata->args.context); 1362 put_nfs_open_context(wdata->args.context);
1330 nfs_commit_free(wdata); 1363 nfs_commit_free(wdata);
1331} 1364}
@@ -1411,7 +1444,7 @@ void nfs_retry_commit(struct list_head *page_list,
1411 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 1444 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1412 dec_bdi_stat(req->wb_page->mapping->backing_dev_info, 1445 dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
1413 BDI_RECLAIMABLE); 1446 BDI_RECLAIMABLE);
1414 nfs_clear_page_tag_locked(req); 1447 nfs_unlock_request(req);
1415 } 1448 }
1416} 1449}
1417EXPORT_SYMBOL_GPL(nfs_retry_commit); 1450EXPORT_SYMBOL_GPL(nfs_retry_commit);
@@ -1460,7 +1493,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data)
1460 while (!list_empty(&data->pages)) { 1493 while (!list_empty(&data->pages)) {
1461 req = nfs_list_entry(data->pages.next); 1494 req = nfs_list_entry(data->pages.next);
1462 nfs_list_remove_request(req); 1495 nfs_list_remove_request(req);
1463 nfs_clear_request_commit(req); 1496 nfs_clear_page_commit(req->wb_page);
1464 1497
1465 dprintk("NFS: commit (%s/%lld %d@%lld)", 1498 dprintk("NFS: commit (%s/%lld %d@%lld)",
1466 req->wb_context->dentry->d_sb->s_id, 1499 req->wb_context->dentry->d_sb->s_id,
@@ -1486,7 +1519,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data)
1486 dprintk(" mismatch\n"); 1519 dprintk(" mismatch\n");
1487 nfs_mark_request_dirty(req); 1520 nfs_mark_request_dirty(req);
1488 next: 1521 next:
1489 nfs_clear_page_tag_locked(req); 1522 nfs_unlock_request(req);
1490 } 1523 }
1491} 1524}
1492EXPORT_SYMBOL_GPL(nfs_commit_release_pages); 1525EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
@@ -1501,9 +1534,7 @@ static void nfs_commit_release(void *calldata)
1501} 1534}
1502 1535
1503static const struct rpc_call_ops nfs_commit_ops = { 1536static const struct rpc_call_ops nfs_commit_ops = {
1504#if defined(CONFIG_NFS_V4_1)
1505 .rpc_call_prepare = nfs_write_prepare, 1537 .rpc_call_prepare = nfs_write_prepare,
1506#endif /* CONFIG_NFS_V4_1 */
1507 .rpc_call_done = nfs_commit_done, 1538 .rpc_call_done = nfs_commit_done,
1508 .rpc_release = nfs_commit_release, 1539 .rpc_release = nfs_commit_release,
1509}; 1540};
@@ -1517,7 +1548,7 @@ int nfs_commit_inode(struct inode *inode, int how)
1517 res = nfs_commit_set_lock(NFS_I(inode), may_wait); 1548 res = nfs_commit_set_lock(NFS_I(inode), may_wait);
1518 if (res <= 0) 1549 if (res <= 0)
1519 goto out_mark_dirty; 1550 goto out_mark_dirty;
1520 res = nfs_scan_commit(inode, &head, 0, 0); 1551 res = nfs_scan_commit(inode, &head);
1521 if (res) { 1552 if (res) {
1522 int error; 1553 int error;
1523 1554
@@ -1635,6 +1666,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1635 if (req == NULL) 1666 if (req == NULL)
1636 break; 1667 break;
1637 if (nfs_lock_request_dontget(req)) { 1668 if (nfs_lock_request_dontget(req)) {
1669 nfs_clear_request_commit(req);
1638 nfs_inode_remove_request(req); 1670 nfs_inode_remove_request(req);
1639 /* 1671 /*
1640 * In case nfs_inode_remove_request has marked the 1672 * In case nfs_inode_remove_request has marked the
diff --git a/fs/nfsd/current_stateid.h b/fs/nfsd/current_stateid.h
new file mode 100644
index 00000000000..4123551208d
--- /dev/null
+++ b/fs/nfsd/current_stateid.h
@@ -0,0 +1,28 @@
1#ifndef _NFSD4_CURRENT_STATE_H
2#define _NFSD4_CURRENT_STATE_H
3
4#include "state.h"
5#include "xdr4.h"
6
7extern void clear_current_stateid(struct nfsd4_compound_state *cstate);
8/*
9 * functions to set current state id
10 */
11extern void nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *);
12extern void nfsd4_set_openstateid(struct nfsd4_compound_state *, struct nfsd4_open *);
13extern void nfsd4_set_lockstateid(struct nfsd4_compound_state *, struct nfsd4_lock *);
14extern void nfsd4_set_closestateid(struct nfsd4_compound_state *, struct nfsd4_close *);
15
16/*
17 * functions to consume current state id
18 */
19extern void nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *);
20extern void nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *, struct nfsd4_delegreturn *);
21extern void nfsd4_get_freestateid(struct nfsd4_compound_state *, struct nfsd4_free_stateid *);
22extern void nfsd4_get_setattrstateid(struct nfsd4_compound_state *, struct nfsd4_setattr *);
23extern void nfsd4_get_closestateid(struct nfsd4_compound_state *, struct nfsd4_close *);
24extern void nfsd4_get_lockustateid(struct nfsd4_compound_state *, struct nfsd4_locku *);
25extern void nfsd4_get_readstateid(struct nfsd4_compound_state *, struct nfsd4_read *);
26extern void nfsd4_get_writestateid(struct nfsd4_compound_state *, struct nfsd4_write *);
27
28#endif /* _NFSD4_CURRENT_STATE_H */
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index cf8a6bd062f..8e9689abbc0 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -87,7 +87,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
87 struct svc_expkey key; 87 struct svc_expkey key;
88 struct svc_expkey *ek = NULL; 88 struct svc_expkey *ek = NULL;
89 89
90 if (mlen < 1 || mesg[mlen-1] != '\n') 90 if (mesg[mlen - 1] != '\n')
91 return -EINVAL; 91 return -EINVAL;
92 mesg[mlen-1] = 0; 92 mesg[mlen-1] = 0;
93 93
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index ce7f0758d84..9559ce46873 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -72,7 +72,7 @@ int nfsd_fault_inject_init(void)
72{ 72{
73 unsigned int i; 73 unsigned int i;
74 struct nfsd_fault_inject_op *op; 74 struct nfsd_fault_inject_op *op;
75 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; 75 umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
76 76
77 debug_dir = debugfs_create_dir("nfsd", NULL); 77 debug_dir = debugfs_create_dir("nfsd", NULL);
78 if (!debug_dir) 78 if (!debug_dir)
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
new file mode 100644
index 00000000000..12e0cff435b
--- /dev/null
+++ b/fs/nfsd/netns.h
@@ -0,0 +1,34 @@
1/*
2 * per net namespace data structures for nfsd
3 *
4 * Copyright (C) 2012, Jeff Layton <jlayton@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc., 51
18 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20
21#ifndef __NFSD_NETNS_H__
22#define __NFSD_NETNS_H__
23
24#include <net/net_namespace.h>
25#include <net/netns/generic.h>
26
27struct cld_net;
28
29struct nfsd_net {
30 struct cld_net *cld_net;
31};
32
33extern int nfsd_net_id;
34#endif /* __NFSD_NETNS_H__ */
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 08c6e36ab2e..43f46cd9ede 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -803,13 +803,13 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
803 return p; 803 return p;
804} 804}
805 805
806static int 806static __be32
807compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, 807compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
808 const char *name, int namlen) 808 const char *name, int namlen)
809{ 809{
810 struct svc_export *exp; 810 struct svc_export *exp;
811 struct dentry *dparent, *dchild; 811 struct dentry *dparent, *dchild;
812 int rv = 0; 812 __be32 rv = nfserr_noent;
813 813
814 dparent = cd->fh.fh_dentry; 814 dparent = cd->fh.fh_dentry;
815 exp = cd->fh.fh_export; 815 exp = cd->fh.fh_export;
@@ -817,26 +817,20 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
817 if (isdotent(name, namlen)) { 817 if (isdotent(name, namlen)) {
818 if (namlen == 2) { 818 if (namlen == 2) {
819 dchild = dget_parent(dparent); 819 dchild = dget_parent(dparent);
820 if (dchild == dparent) { 820 /* filesystem root - cannot return filehandle for ".." */
821 /* filesystem root - cannot return filehandle for ".." */ 821 if (dchild == dparent)
822 dput(dchild); 822 goto out;
823 return -ENOENT;
824 }
825 } else 823 } else
826 dchild = dget(dparent); 824 dchild = dget(dparent);
827 } else 825 } else
828 dchild = lookup_one_len(name, dparent, namlen); 826 dchild = lookup_one_len(name, dparent, namlen);
829 if (IS_ERR(dchild)) 827 if (IS_ERR(dchild))
830 return -ENOENT; 828 return rv;
831 rv = -ENOENT;
832 if (d_mountpoint(dchild)) 829 if (d_mountpoint(dchild))
833 goto out; 830 goto out;
834 rv = fh_compose(fhp, exp, dchild, &cd->fh);
835 if (rv)
836 goto out;
837 if (!dchild->d_inode) 831 if (!dchild->d_inode)
838 goto out; 832 goto out;
839 rv = 0; 833 rv = fh_compose(fhp, exp, dchild, &cd->fh);
840out: 834out:
841 dput(dchild); 835 dput(dchild);
842 return rv; 836 return rv;
@@ -845,7 +839,7 @@ out:
845static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) 839static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)
846{ 840{
847 struct svc_fh fh; 841 struct svc_fh fh;
848 int err; 842 __be32 err;
849 843
850 fh_init(&fh, NFS3_FHSIZE); 844 fh_init(&fh, NFS3_FHSIZE);
851 err = compose_entry_fh(cd, &fh, name, namlen); 845 err = compose_entry_fh(cd, &fh, name, namlen);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 6f3ebb48b12..c8e9f637153 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -605,24 +605,24 @@ static struct rpc_version nfs_cb_version4 = {
605 .procs = nfs4_cb_procedures 605 .procs = nfs4_cb_procedures
606}; 606};
607 607
608static struct rpc_version *nfs_cb_version[] = { 608static const struct rpc_version *nfs_cb_version[] = {
609 &nfs_cb_version4, 609 &nfs_cb_version4,
610}; 610};
611 611
612static struct rpc_program cb_program; 612static const struct rpc_program cb_program;
613 613
614static struct rpc_stat cb_stats = { 614static struct rpc_stat cb_stats = {
615 .program = &cb_program 615 .program = &cb_program
616}; 616};
617 617
618#define NFS4_CALLBACK 0x40000000 618#define NFS4_CALLBACK 0x40000000
619static struct rpc_program cb_program = { 619static const struct rpc_program cb_program = {
620 .name = "nfs4_cb", 620 .name = "nfs4_cb",
621 .number = NFS4_CALLBACK, 621 .number = NFS4_CALLBACK,
622 .nrvers = ARRAY_SIZE(nfs_cb_version), 622 .nrvers = ARRAY_SIZE(nfs_cb_version),
623 .version = nfs_cb_version, 623 .version = nfs_cb_version,
624 .stats = &cb_stats, 624 .stats = &cb_stats,
625 .pipe_dir_name = "/nfsd4_cb", 625 .pipe_dir_name = "nfsd4_cb",
626}; 626};
627 627
628static int max_cb_time(void) 628static int max_cb_time(void)
@@ -645,7 +645,6 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
645 .timeout = &timeparms, 645 .timeout = &timeparms,
646 .program = &cb_program, 646 .program = &cb_program,
647 .version = 0, 647 .version = 0,
648 .authflavor = clp->cl_flavor,
649 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), 648 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
650 }; 649 };
651 struct rpc_clnt *client; 650 struct rpc_clnt *client;
@@ -656,6 +655,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
656 args.client_name = clp->cl_principal; 655 args.client_name = clp->cl_principal;
657 args.prognumber = conn->cb_prog, 656 args.prognumber = conn->cb_prog,
658 args.protocol = XPRT_TRANSPORT_TCP; 657 args.protocol = XPRT_TRANSPORT_TCP;
658 args.authflavor = clp->cl_flavor;
659 clp->cl_cb_ident = conn->cb_ident; 659 clp->cl_cb_ident = conn->cb_ident;
660 } else { 660 } else {
661 if (!conn->cb_xprt) 661 if (!conn->cb_xprt)
@@ -665,6 +665,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
665 args.bc_xprt = conn->cb_xprt; 665 args.bc_xprt = conn->cb_xprt;
666 args.prognumber = clp->cl_cb_session->se_cb_prog; 666 args.prognumber = clp->cl_cb_session->se_cb_prog;
667 args.protocol = XPRT_TRANSPORT_BC_TCP; 667 args.protocol = XPRT_TRANSPORT_BC_TCP;
668 args.authflavor = RPC_AUTH_UNIX;
668 } 669 }
669 /* Create RPC client */ 670 /* Create RPC client */
670 client = rpc_create(&args); 671 client = rpc_create(&args);
@@ -754,9 +755,9 @@ static void do_probe_callback(struct nfs4_client *clp)
754 */ 755 */
755void nfsd4_probe_callback(struct nfs4_client *clp) 756void nfsd4_probe_callback(struct nfs4_client *clp)
756{ 757{
757 /* XXX: atomicity? Also, should we be using cl_cb_flags? */ 758 /* XXX: atomicity? Also, should we be using cl_flags? */
758 clp->cl_cb_state = NFSD4_CB_UNKNOWN; 759 clp->cl_cb_state = NFSD4_CB_UNKNOWN;
759 set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); 760 set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
760 do_probe_callback(clp); 761 do_probe_callback(clp);
761} 762}
762 763
@@ -915,7 +916,7 @@ void nfsd4_destroy_callback_queue(void)
915/* must be called under the state lock */ 916/* must be called under the state lock */
916void nfsd4_shutdown_callback(struct nfs4_client *clp) 917void nfsd4_shutdown_callback(struct nfs4_client *clp)
917{ 918{
918 set_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags); 919 set_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags);
919 /* 920 /*
920 * Note this won't actually result in a null callback; 921 * Note this won't actually result in a null callback;
921 * instead, nfsd4_do_callback_rpc() will detect the killed 922 * instead, nfsd4_do_callback_rpc() will detect the killed
@@ -966,15 +967,15 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
966 svc_xprt_put(clp->cl_cb_conn.cb_xprt); 967 svc_xprt_put(clp->cl_cb_conn.cb_xprt);
967 clp->cl_cb_conn.cb_xprt = NULL; 968 clp->cl_cb_conn.cb_xprt = NULL;
968 } 969 }
969 if (test_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags)) 970 if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags))
970 return; 971 return;
971 spin_lock(&clp->cl_lock); 972 spin_lock(&clp->cl_lock);
972 /* 973 /*
973 * Only serialized callback code is allowed to clear these 974 * Only serialized callback code is allowed to clear these
974 * flags; main nfsd code can only set them: 975 * flags; main nfsd code can only set them:
975 */ 976 */
976 BUG_ON(!clp->cl_cb_flags); 977 BUG_ON(!(clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK));
977 clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); 978 clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
978 memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn)); 979 memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn));
979 c = __nfsd4_find_backchannel(clp); 980 c = __nfsd4_find_backchannel(clp);
980 if (c) { 981 if (c) {
@@ -986,7 +987,7 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
986 987
987 err = setup_callback_client(clp, &conn, ses); 988 err = setup_callback_client(clp, &conn, ses);
988 if (err) { 989 if (err) {
989 warn_no_callback_path(clp, err); 990 nfsd4_mark_cb_down(clp, err);
990 return; 991 return;
991 } 992 }
992 /* Yay, the callback channel's back! Restart any callbacks: */ 993 /* Yay, the callback channel's back! Restart any callbacks: */
@@ -1000,7 +1001,7 @@ void nfsd4_do_callback_rpc(struct work_struct *w)
1000 struct nfs4_client *clp = cb->cb_clp; 1001 struct nfs4_client *clp = cb->cb_clp;
1001 struct rpc_clnt *clnt; 1002 struct rpc_clnt *clnt;
1002 1003
1003 if (clp->cl_cb_flags) 1004 if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)
1004 nfsd4_process_cb_update(cb); 1005 nfsd4_process_cb_update(cb);
1005 1006
1006 clnt = clp->cl_cb_client; 1007 clnt = clp->cl_cb_client;
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 94096273cd6..322d11ce06a 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -41,6 +41,14 @@
41#include "nfsd.h" 41#include "nfsd.h"
42 42
43/* 43/*
44 * Turn off idmapping when using AUTH_SYS.
45 */
46static bool nfs4_disable_idmapping = true;
47module_param(nfs4_disable_idmapping, bool, 0644);
48MODULE_PARM_DESC(nfs4_disable_idmapping,
49 "Turn off server's NFSv4 idmapping when using 'sec=sys'");
50
51/*
44 * Cache entry 52 * Cache entry
45 */ 53 */
46 54
@@ -561,28 +569,65 @@ idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
561 return ret; 569 return ret;
562} 570}
563 571
572static bool
573numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, uid_t *id)
574{
575 int ret;
576 char buf[11];
577
578 if (namelen + 1 > sizeof(buf))
579 /* too long to represent a 32-bit id: */
580 return false;
581 /* Just to make sure it's null-terminated: */
582 memcpy(buf, name, namelen);
583 buf[namelen] = '\0';
584 ret = kstrtouint(name, 10, id);
585 return ret == 0;
586}
587
588static __be32
589do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, uid_t *id)
590{
591 if (nfs4_disable_idmapping && rqstp->rq_flavor < RPC_AUTH_GSS)
592 if (numeric_name_to_id(rqstp, type, name, namelen, id))
593 return 0;
594 /*
595 * otherwise, fall through and try idmapping, for
596 * backwards compatibility with clients sending names:
597 */
598 return idmap_name_to_id(rqstp, type, name, namelen, id);
599}
600
601static int
602do_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
603{
604 if (nfs4_disable_idmapping && rqstp->rq_flavor < RPC_AUTH_GSS)
605 return sprintf(name, "%u", id);
606 return idmap_id_to_name(rqstp, type, id, name);
607}
608
564__be32 609__be32
565nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen, 610nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
566 __u32 *id) 611 __u32 *id)
567{ 612{
568 return idmap_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id); 613 return do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id);
569} 614}
570 615
571__be32 616__be32
572nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, 617nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
573 __u32 *id) 618 __u32 *id)
574{ 619{
575 return idmap_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, id); 620 return do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, id);
576} 621}
577 622
578int 623int
579nfsd_map_uid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) 624nfsd_map_uid_to_name(struct svc_rqst *rqstp, __u32 id, char *name)
580{ 625{
581 return idmap_id_to_name(rqstp, IDMAP_TYPE_USER, id, name); 626 return do_id_to_name(rqstp, IDMAP_TYPE_USER, id, name);
582} 627}
583 628
584int 629int
585nfsd_map_gid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) 630nfsd_map_gid_to_name(struct svc_rqst *rqstp, __u32 id, char *name)
586{ 631{
587 return idmap_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name); 632 return do_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name);
588} 633}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 896da74ec56..987e719fbae 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -39,6 +39,7 @@
39#include "cache.h" 39#include "cache.h"
40#include "xdr4.h" 40#include "xdr4.h"
41#include "vfs.h" 41#include "vfs.h"
42#include "current_stateid.h"
42 43
43#define NFSDDBG_FACILITY NFSDDBG_PROC 44#define NFSDDBG_FACILITY NFSDDBG_PROC
44 45
@@ -192,10 +193,13 @@ static __be32 nfsd_check_obj_isreg(struct svc_fh *fh)
192static __be32 193static __be32
193do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) 194do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
194{ 195{
195 struct svc_fh resfh; 196 struct svc_fh *resfh;
196 __be32 status; 197 __be32 status;
197 198
198 fh_init(&resfh, NFS4_FHSIZE); 199 resfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL);
200 if (!resfh)
201 return nfserr_jukebox;
202 fh_init(resfh, NFS4_FHSIZE);
199 open->op_truncate = 0; 203 open->op_truncate = 0;
200 204
201 if (open->op_create) { 205 if (open->op_create) {
@@ -220,7 +224,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
220 */ 224 */
221 status = do_nfsd_create(rqstp, current_fh, open->op_fname.data, 225 status = do_nfsd_create(rqstp, current_fh, open->op_fname.data,
222 open->op_fname.len, &open->op_iattr, 226 open->op_fname.len, &open->op_iattr,
223 &resfh, open->op_createmode, 227 resfh, open->op_createmode,
224 (u32 *)open->op_verf.data, 228 (u32 *)open->op_verf.data,
225 &open->op_truncate, &open->op_created); 229 &open->op_truncate, &open->op_created);
226 230
@@ -231,33 +235,32 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
231 */ 235 */
232 if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0) 236 if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0)
233 open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS | 237 open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS |
234 FATTR4_WORD1_TIME_MODIFY); 238 FATTR4_WORD1_TIME_MODIFY);
235 } else { 239 } else {
236 status = nfsd_lookup(rqstp, current_fh, 240 status = nfsd_lookup(rqstp, current_fh,
237 open->op_fname.data, open->op_fname.len, &resfh); 241 open->op_fname.data, open->op_fname.len, resfh);
238 fh_unlock(current_fh); 242 fh_unlock(current_fh);
239 if (status)
240 goto out;
241 status = nfsd_check_obj_isreg(&resfh);
242 } 243 }
243 if (status) 244 if (status)
244 goto out; 245 goto out;
246 status = nfsd_check_obj_isreg(resfh);
247 if (status)
248 goto out;
245 249
246 if (is_create_with_attrs(open) && open->op_acl != NULL) 250 if (is_create_with_attrs(open) && open->op_acl != NULL)
247 do_set_nfs4_acl(rqstp, &resfh, open->op_acl, open->op_bmval); 251 do_set_nfs4_acl(rqstp, resfh, open->op_acl, open->op_bmval);
248
249 set_change_info(&open->op_cinfo, current_fh);
250 fh_dup2(current_fh, &resfh);
251 252
252 /* set reply cache */ 253 /* set reply cache */
253 fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, 254 fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh,
254 &resfh.fh_handle); 255 &resfh->fh_handle);
255 if (!open->op_created) 256 if (!open->op_created)
256 status = do_open_permission(rqstp, current_fh, open, 257 status = do_open_permission(rqstp, resfh, open,
257 NFSD_MAY_NOP); 258 NFSD_MAY_NOP);
258 259 set_change_info(&open->op_cinfo, current_fh);
260 fh_dup2(current_fh, resfh);
259out: 261out:
260 fh_put(&resfh); 262 fh_put(resfh);
263 kfree(resfh);
261 return status; 264 return status;
262} 265}
263 266
@@ -310,16 +313,14 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
310 if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) 313 if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL)
311 return nfserr_inval; 314 return nfserr_inval;
312 315
313 /* We don't yet support WANT bits: */
314 open->op_share_access &= NFS4_SHARE_ACCESS_MASK;
315
316 open->op_created = 0; 316 open->op_created = 0;
317 /* 317 /*
318 * RFC5661 18.51.3 318 * RFC5661 18.51.3
319 * Before RECLAIM_COMPLETE done, server should deny new lock 319 * Before RECLAIM_COMPLETE done, server should deny new lock
320 */ 320 */
321 if (nfsd4_has_session(cstate) && 321 if (nfsd4_has_session(cstate) &&
322 !cstate->session->se_client->cl_firststate && 322 !test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE,
323 &cstate->session->se_client->cl_flags) &&
323 open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) 324 open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
324 return nfserr_grace; 325 return nfserr_grace;
325 326
@@ -452,6 +453,10 @@ nfsd4_restorefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
452 return nfserr_restorefh; 453 return nfserr_restorefh;
453 454
454 fh_dup2(&cstate->current_fh, &cstate->save_fh); 455 fh_dup2(&cstate->current_fh, &cstate->save_fh);
456 if (HAS_STATE_ID(cstate, SAVED_STATE_ID_FLAG)) {
457 memcpy(&cstate->current_stateid, &cstate->save_stateid, sizeof(stateid_t));
458 SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
459 }
455 return nfs_ok; 460 return nfs_ok;
456} 461}
457 462
@@ -463,6 +468,10 @@ nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
463 return nfserr_nofilehandle; 468 return nfserr_nofilehandle;
464 469
465 fh_dup2(&cstate->save_fh, &cstate->current_fh); 470 fh_dup2(&cstate->save_fh, &cstate->current_fh);
471 if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) {
472 memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t));
473 SET_STATE_ID(cstate, SAVED_STATE_ID_FLAG);
474 }
466 return nfs_ok; 475 return nfs_ok;
467} 476}
468 477
@@ -481,14 +490,20 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
481 &access->ac_supported); 490 &access->ac_supported);
482} 491}
483 492
493static void gen_boot_verifier(nfs4_verifier *verifier)
494{
495 __be32 verf[2];
496
497 verf[0] = (__be32)nfssvc_boot.tv_sec;
498 verf[1] = (__be32)nfssvc_boot.tv_usec;
499 memcpy(verifier->data, verf, sizeof(verifier->data));
500}
501
484static __be32 502static __be32
485nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 503nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
486 struct nfsd4_commit *commit) 504 struct nfsd4_commit *commit)
487{ 505{
488 u32 *p = (u32 *)commit->co_verf.data; 506 gen_boot_verifier(&commit->co_verf);
489 *p++ = nfssvc_boot.tv_sec;
490 *p++ = nfssvc_boot.tv_usec;
491
492 return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset, 507 return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
493 commit->co_count); 508 commit->co_count);
494} 509}
@@ -826,6 +841,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
826 struct nfsd4_setattr *setattr) 841 struct nfsd4_setattr *setattr)
827{ 842{
828 __be32 status = nfs_ok; 843 __be32 status = nfs_ok;
844 int err;
829 845
830 if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { 846 if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
831 nfs4_lock_state(); 847 nfs4_lock_state();
@@ -837,9 +853,9 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
837 return status; 853 return status;
838 } 854 }
839 } 855 }
840 status = fh_want_write(&cstate->current_fh); 856 err = fh_want_write(&cstate->current_fh);
841 if (status) 857 if (err)
842 return status; 858 return nfserrno(err);
843 status = nfs_ok; 859 status = nfs_ok;
844 860
845 status = check_attr_support(rqstp, cstate, setattr->sa_bmval, 861 status = check_attr_support(rqstp, cstate, setattr->sa_bmval,
@@ -865,7 +881,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
865{ 881{
866 stateid_t *stateid = &write->wr_stateid; 882 stateid_t *stateid = &write->wr_stateid;
867 struct file *filp = NULL; 883 struct file *filp = NULL;
868 u32 *p;
869 __be32 status = nfs_ok; 884 __be32 status = nfs_ok;
870 unsigned long cnt; 885 unsigned long cnt;
871 886
@@ -887,9 +902,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
887 902
888 cnt = write->wr_buflen; 903 cnt = write->wr_buflen;
889 write->wr_how_written = write->wr_stable_how; 904 write->wr_how_written = write->wr_stable_how;
890 p = (u32 *)write->wr_verifier.data; 905 gen_boot_verifier(&write->wr_verifier);
891 *p++ = nfssvc_boot.tv_sec;
892 *p++ = nfssvc_boot.tv_usec;
893 906
894 status = nfsd_write(rqstp, &cstate->current_fh, filp, 907 status = nfsd_write(rqstp, &cstate->current_fh, filp,
895 write->wr_offset, rqstp->rq_vec, write->wr_vlen, 908 write->wr_offset, rqstp->rq_vec, write->wr_vlen,
@@ -1000,6 +1013,8 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
1000typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *, 1013typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
1001 void *); 1014 void *);
1002typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op); 1015typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op);
1016typedef void(*stateid_setter)(struct nfsd4_compound_state *, void *);
1017typedef void(*stateid_getter)(struct nfsd4_compound_state *, void *);
1003 1018
1004enum nfsd4_op_flags { 1019enum nfsd4_op_flags {
1005 ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ 1020 ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */
@@ -1025,6 +1040,10 @@ enum nfsd4_op_flags {
1025 * the v4.0 case). 1040 * the v4.0 case).
1026 */ 1041 */
1027 OP_CACHEME = 1 << 6, 1042 OP_CACHEME = 1 << 6,
1043 /*
1044 * These are ops which clear current state id.
1045 */
1046 OP_CLEAR_STATEID = 1 << 7,
1028}; 1047};
1029 1048
1030struct nfsd4_operation { 1049struct nfsd4_operation {
@@ -1033,11 +1052,15 @@ struct nfsd4_operation {
1033 char *op_name; 1052 char *op_name;
1034 /* Try to get response size before operation */ 1053 /* Try to get response size before operation */
1035 nfsd4op_rsize op_rsize_bop; 1054 nfsd4op_rsize op_rsize_bop;
1055 stateid_setter op_get_currentstateid;
1056 stateid_getter op_set_currentstateid;
1036}; 1057};
1037 1058
1038static struct nfsd4_operation nfsd4_ops[]; 1059static struct nfsd4_operation nfsd4_ops[];
1039 1060
1061#ifdef NFSD_DEBUG
1040static const char *nfsd4_op_name(unsigned opnum); 1062static const char *nfsd4_op_name(unsigned opnum);
1063#endif
1041 1064
1042/* 1065/*
1043 * Enforce NFSv4.1 COMPOUND ordering rules: 1066 * Enforce NFSv4.1 COMPOUND ordering rules:
@@ -1215,13 +1238,23 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1215 if (op->status) 1238 if (op->status)
1216 goto encode_op; 1239 goto encode_op;
1217 1240
1218 if (opdesc->op_func) 1241 if (opdesc->op_func) {
1242 if (opdesc->op_get_currentstateid)
1243 opdesc->op_get_currentstateid(cstate, &op->u);
1219 op->status = opdesc->op_func(rqstp, cstate, &op->u); 1244 op->status = opdesc->op_func(rqstp, cstate, &op->u);
1220 else 1245 } else
1221 BUG_ON(op->status == nfs_ok); 1246 BUG_ON(op->status == nfs_ok);
1222 1247
1223 if (!op->status && need_wrongsec_check(rqstp)) 1248 if (!op->status) {
1224 op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp); 1249 if (opdesc->op_set_currentstateid)
1250 opdesc->op_set_currentstateid(cstate, &op->u);
1251
1252 if (opdesc->op_flags & OP_CLEAR_STATEID)
1253 clear_current_stateid(cstate);
1254
1255 if (need_wrongsec_check(rqstp))
1256 op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp);
1257 }
1225 1258
1226encode_op: 1259encode_op:
1227 /* Only from SEQUENCE */ 1260 /* Only from SEQUENCE */
@@ -1413,6 +1446,8 @@ static struct nfsd4_operation nfsd4_ops[] = {
1413 .op_flags = OP_MODIFIES_SOMETHING, 1446 .op_flags = OP_MODIFIES_SOMETHING,
1414 .op_name = "OP_CLOSE", 1447 .op_name = "OP_CLOSE",
1415 .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, 1448 .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
1449 .op_get_currentstateid = (stateid_getter)nfsd4_get_closestateid,
1450 .op_set_currentstateid = (stateid_setter)nfsd4_set_closestateid,
1416 }, 1451 },
1417 [OP_COMMIT] = { 1452 [OP_COMMIT] = {
1418 .op_func = (nfsd4op_func)nfsd4_commit, 1453 .op_func = (nfsd4op_func)nfsd4_commit,
@@ -1422,7 +1457,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
1422 }, 1457 },
1423 [OP_CREATE] = { 1458 [OP_CREATE] = {
1424 .op_func = (nfsd4op_func)nfsd4_create, 1459 .op_func = (nfsd4op_func)nfsd4_create,
1425 .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, 1460 .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME | OP_CLEAR_STATEID,
1426 .op_name = "OP_CREATE", 1461 .op_name = "OP_CREATE",
1427 .op_rsize_bop = (nfsd4op_rsize)nfsd4_create_rsize, 1462 .op_rsize_bop = (nfsd4op_rsize)nfsd4_create_rsize,
1428 }, 1463 },
@@ -1431,6 +1466,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
1431 .op_flags = OP_MODIFIES_SOMETHING, 1466 .op_flags = OP_MODIFIES_SOMETHING,
1432 .op_name = "OP_DELEGRETURN", 1467 .op_name = "OP_DELEGRETURN",
1433 .op_rsize_bop = nfsd4_only_status_rsize, 1468 .op_rsize_bop = nfsd4_only_status_rsize,
1469 .op_get_currentstateid = (stateid_getter)nfsd4_get_delegreturnstateid,
1434 }, 1470 },
1435 [OP_GETATTR] = { 1471 [OP_GETATTR] = {
1436 .op_func = (nfsd4op_func)nfsd4_getattr, 1472 .op_func = (nfsd4op_func)nfsd4_getattr,
@@ -1453,6 +1489,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
1453 .op_flags = OP_MODIFIES_SOMETHING, 1489 .op_flags = OP_MODIFIES_SOMETHING,
1454 .op_name = "OP_LOCK", 1490 .op_name = "OP_LOCK",
1455 .op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize, 1491 .op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
1492 .op_set_currentstateid = (stateid_setter)nfsd4_set_lockstateid,
1456 }, 1493 },
1457 [OP_LOCKT] = { 1494 [OP_LOCKT] = {
1458 .op_func = (nfsd4op_func)nfsd4_lockt, 1495 .op_func = (nfsd4op_func)nfsd4_lockt,
@@ -1463,15 +1500,16 @@ static struct nfsd4_operation nfsd4_ops[] = {
1463 .op_flags = OP_MODIFIES_SOMETHING, 1500 .op_flags = OP_MODIFIES_SOMETHING,
1464 .op_name = "OP_LOCKU", 1501 .op_name = "OP_LOCKU",
1465 .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, 1502 .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
1503 .op_get_currentstateid = (stateid_getter)nfsd4_get_lockustateid,
1466 }, 1504 },
1467 [OP_LOOKUP] = { 1505 [OP_LOOKUP] = {
1468 .op_func = (nfsd4op_func)nfsd4_lookup, 1506 .op_func = (nfsd4op_func)nfsd4_lookup,
1469 .op_flags = OP_HANDLES_WRONGSEC, 1507 .op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
1470 .op_name = "OP_LOOKUP", 1508 .op_name = "OP_LOOKUP",
1471 }, 1509 },
1472 [OP_LOOKUPP] = { 1510 [OP_LOOKUPP] = {
1473 .op_func = (nfsd4op_func)nfsd4_lookupp, 1511 .op_func = (nfsd4op_func)nfsd4_lookupp,
1474 .op_flags = OP_HANDLES_WRONGSEC, 1512 .op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
1475 .op_name = "OP_LOOKUPP", 1513 .op_name = "OP_LOOKUPP",
1476 }, 1514 },
1477 [OP_NVERIFY] = { 1515 [OP_NVERIFY] = {
@@ -1483,6 +1521,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
1483 .op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING, 1521 .op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
1484 .op_name = "OP_OPEN", 1522 .op_name = "OP_OPEN",
1485 .op_rsize_bop = (nfsd4op_rsize)nfsd4_open_rsize, 1523 .op_rsize_bop = (nfsd4op_rsize)nfsd4_open_rsize,
1524 .op_set_currentstateid = (stateid_setter)nfsd4_set_openstateid,
1486 }, 1525 },
1487 [OP_OPEN_CONFIRM] = { 1526 [OP_OPEN_CONFIRM] = {
1488 .op_func = (nfsd4op_func)nfsd4_open_confirm, 1527 .op_func = (nfsd4op_func)nfsd4_open_confirm,
@@ -1495,25 +1534,30 @@ static struct nfsd4_operation nfsd4_ops[] = {
1495 .op_flags = OP_MODIFIES_SOMETHING, 1534 .op_flags = OP_MODIFIES_SOMETHING,
1496 .op_name = "OP_OPEN_DOWNGRADE", 1535 .op_name = "OP_OPEN_DOWNGRADE",
1497 .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, 1536 .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
1537 .op_get_currentstateid = (stateid_getter)nfsd4_get_opendowngradestateid,
1538 .op_set_currentstateid = (stateid_setter)nfsd4_set_opendowngradestateid,
1498 }, 1539 },
1499 [OP_PUTFH] = { 1540 [OP_PUTFH] = {
1500 .op_func = (nfsd4op_func)nfsd4_putfh, 1541 .op_func = (nfsd4op_func)nfsd4_putfh,
1501 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS 1542 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
1502 | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, 1543 | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING
1544 | OP_CLEAR_STATEID,
1503 .op_name = "OP_PUTFH", 1545 .op_name = "OP_PUTFH",
1504 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, 1546 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
1505 }, 1547 },
1506 [OP_PUTPUBFH] = { 1548 [OP_PUTPUBFH] = {
1507 .op_func = (nfsd4op_func)nfsd4_putrootfh, 1549 .op_func = (nfsd4op_func)nfsd4_putrootfh,
1508 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS 1550 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
1509 | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, 1551 | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING
1552 | OP_CLEAR_STATEID,
1510 .op_name = "OP_PUTPUBFH", 1553 .op_name = "OP_PUTPUBFH",
1511 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, 1554 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
1512 }, 1555 },
1513 [OP_PUTROOTFH] = { 1556 [OP_PUTROOTFH] = {
1514 .op_func = (nfsd4op_func)nfsd4_putrootfh, 1557 .op_func = (nfsd4op_func)nfsd4_putrootfh,
1515 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS 1558 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
1516 | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, 1559 | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING
1560 | OP_CLEAR_STATEID,
1517 .op_name = "OP_PUTROOTFH", 1561 .op_name = "OP_PUTROOTFH",
1518 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, 1562 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
1519 }, 1563 },
@@ -1522,6 +1566,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
1522 .op_flags = OP_MODIFIES_SOMETHING, 1566 .op_flags = OP_MODIFIES_SOMETHING,
1523 .op_name = "OP_READ", 1567 .op_name = "OP_READ",
1524 .op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize, 1568 .op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize,
1569 .op_get_currentstateid = (stateid_getter)nfsd4_get_readstateid,
1525 }, 1570 },
1526 [OP_READDIR] = { 1571 [OP_READDIR] = {
1527 .op_func = (nfsd4op_func)nfsd4_readdir, 1572 .op_func = (nfsd4op_func)nfsd4_readdir,
@@ -1576,6 +1621,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
1576 .op_name = "OP_SETATTR", 1621 .op_name = "OP_SETATTR",
1577 .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, 1622 .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
1578 .op_rsize_bop = (nfsd4op_rsize)nfsd4_setattr_rsize, 1623 .op_rsize_bop = (nfsd4op_rsize)nfsd4_setattr_rsize,
1624 .op_get_currentstateid = (stateid_getter)nfsd4_get_setattrstateid,
1579 }, 1625 },
1580 [OP_SETCLIENTID] = { 1626 [OP_SETCLIENTID] = {
1581 .op_func = (nfsd4op_func)nfsd4_setclientid, 1627 .op_func = (nfsd4op_func)nfsd4_setclientid,
@@ -1600,6 +1646,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
1600 .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, 1646 .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
1601 .op_name = "OP_WRITE", 1647 .op_name = "OP_WRITE",
1602 .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize, 1648 .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize,
1649 .op_get_currentstateid = (stateid_getter)nfsd4_get_writestateid,
1603 }, 1650 },
1604 [OP_RELEASE_LOCKOWNER] = { 1651 [OP_RELEASE_LOCKOWNER] = {
1605 .op_func = (nfsd4op_func)nfsd4_release_lockowner, 1652 .op_func = (nfsd4op_func)nfsd4_release_lockowner,
@@ -1674,12 +1721,14 @@ static struct nfsd4_operation nfsd4_ops[] = {
1674 }, 1721 },
1675}; 1722};
1676 1723
1724#ifdef NFSD_DEBUG
1677static const char *nfsd4_op_name(unsigned opnum) 1725static const char *nfsd4_op_name(unsigned opnum)
1678{ 1726{
1679 if (opnum < ARRAY_SIZE(nfsd4_ops)) 1727 if (opnum < ARRAY_SIZE(nfsd4_ops))
1680 return nfsd4_ops[opnum].op_name; 1728 return nfsd4_ops[opnum].op_name;
1681 return "unknown_operation"; 1729 return "unknown_operation";
1682} 1730}
1731#endif
1683 1732
1684#define nfsd4_voidres nfsd4_voidargs 1733#define nfsd4_voidres nfsd4_voidargs
1685struct nfsd4_voidargs { int dummy; }; 1734struct nfsd4_voidargs { int dummy; };
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 0b3e875d1ab..ed3f9206a0e 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -1,5 +1,6 @@
1/* 1/*
2* Copyright (c) 2004 The Regents of the University of Michigan. 2* Copyright (c) 2004 The Regents of the University of Michigan.
3* Copyright (c) 2012 Jeff Layton <jlayton@redhat.com>
3* All rights reserved. 4* All rights reserved.
4* 5*
5* Andy Adamson <andros@citi.umich.edu> 6* Andy Adamson <andros@citi.umich.edu>
@@ -36,16 +37,34 @@
36#include <linux/namei.h> 37#include <linux/namei.h>
37#include <linux/crypto.h> 38#include <linux/crypto.h>
38#include <linux/sched.h> 39#include <linux/sched.h>
40#include <linux/fs.h>
41#include <linux/module.h>
42#include <net/net_namespace.h>
43#include <linux/sunrpc/rpc_pipe_fs.h>
44#include <linux/sunrpc/clnt.h>
45#include <linux/nfsd/cld.h>
39 46
40#include "nfsd.h" 47#include "nfsd.h"
41#include "state.h" 48#include "state.h"
42#include "vfs.h" 49#include "vfs.h"
50#include "netns.h"
43 51
44#define NFSDDBG_FACILITY NFSDDBG_PROC 52#define NFSDDBG_FACILITY NFSDDBG_PROC
45 53
54/* Declarations */
55struct nfsd4_client_tracking_ops {
56 int (*init)(struct net *);
57 void (*exit)(struct net *);
58 void (*create)(struct nfs4_client *);
59 void (*remove)(struct nfs4_client *);
60 int (*check)(struct nfs4_client *);
61 void (*grace_done)(struct net *, time_t);
62};
63
46/* Globals */ 64/* Globals */
47static struct file *rec_file; 65static struct file *rec_file;
48static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; 66static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
67static struct nfsd4_client_tracking_ops *client_tracking_ops;
49 68
50static int 69static int
51nfs4_save_creds(const struct cred **original_creds) 70nfs4_save_creds(const struct cred **original_creds)
@@ -117,7 +136,8 @@ out_no_tfm:
117 return status; 136 return status;
118} 137}
119 138
120void nfsd4_create_clid_dir(struct nfs4_client *clp) 139static void
140nfsd4_create_clid_dir(struct nfs4_client *clp)
121{ 141{
122 const struct cred *original_cred; 142 const struct cred *original_cred;
123 char *dname = clp->cl_recdir; 143 char *dname = clp->cl_recdir;
@@ -126,9 +146,8 @@ void nfsd4_create_clid_dir(struct nfs4_client *clp)
126 146
127 dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); 147 dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
128 148
129 if (clp->cl_firststate) 149 if (test_and_set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
130 return; 150 return;
131 clp->cl_firststate = 1;
132 if (!rec_file) 151 if (!rec_file)
133 return; 152 return;
134 status = nfs4_save_creds(&original_cred); 153 status = nfs4_save_creds(&original_cred);
@@ -265,19 +284,19 @@ out_unlock:
265 return status; 284 return status;
266} 285}
267 286
268void 287static void
269nfsd4_remove_clid_dir(struct nfs4_client *clp) 288nfsd4_remove_clid_dir(struct nfs4_client *clp)
270{ 289{
271 const struct cred *original_cred; 290 const struct cred *original_cred;
272 int status; 291 int status;
273 292
274 if (!rec_file || !clp->cl_firststate) 293 if (!rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
275 return; 294 return;
276 295
277 status = mnt_want_write_file(rec_file); 296 status = mnt_want_write_file(rec_file);
278 if (status) 297 if (status)
279 goto out; 298 goto out;
280 clp->cl_firststate = 0; 299 clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
281 300
282 status = nfs4_save_creds(&original_cred); 301 status = nfs4_save_creds(&original_cred);
283 if (status < 0) 302 if (status < 0)
@@ -292,7 +311,6 @@ out:
292 if (status) 311 if (status)
293 printk("NFSD: Failed to remove expired client state directory" 312 printk("NFSD: Failed to remove expired client state directory"
294 " %.*s\n", HEXDIR_LEN, clp->cl_recdir); 313 " %.*s\n", HEXDIR_LEN, clp->cl_recdir);
295 return;
296} 314}
297 315
298static int 316static int
@@ -311,8 +329,9 @@ purge_old(struct dentry *parent, struct dentry *child)
311 return 0; 329 return 0;
312} 330}
313 331
314void 332static void
315nfsd4_recdir_purge_old(void) { 333nfsd4_recdir_purge_old(struct net *net, time_t boot_time)
334{
316 int status; 335 int status;
317 336
318 if (!rec_file) 337 if (!rec_file)
@@ -343,7 +362,7 @@ load_recdir(struct dentry *parent, struct dentry *child)
343 return 0; 362 return 0;
344} 363}
345 364
346int 365static int
347nfsd4_recdir_load(void) { 366nfsd4_recdir_load(void) {
348 int status; 367 int status;
349 368
@@ -361,8 +380,8 @@ nfsd4_recdir_load(void) {
361 * Hold reference to the recovery directory. 380 * Hold reference to the recovery directory.
362 */ 381 */
363 382
364void 383static int
365nfsd4_init_recdir() 384nfsd4_init_recdir(void)
366{ 385{
367 const struct cred *original_cred; 386 const struct cred *original_cred;
368 int status; 387 int status;
@@ -377,20 +396,44 @@ nfsd4_init_recdir()
377 printk("NFSD: Unable to change credentials to find recovery" 396 printk("NFSD: Unable to change credentials to find recovery"
378 " directory: error %d\n", 397 " directory: error %d\n",
379 status); 398 status);
380 return; 399 return status;
381 } 400 }
382 401
383 rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0); 402 rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0);
384 if (IS_ERR(rec_file)) { 403 if (IS_ERR(rec_file)) {
385 printk("NFSD: unable to find recovery directory %s\n", 404 printk("NFSD: unable to find recovery directory %s\n",
386 user_recovery_dirname); 405 user_recovery_dirname);
406 status = PTR_ERR(rec_file);
387 rec_file = NULL; 407 rec_file = NULL;
388 } 408 }
389 409
390 nfs4_reset_creds(original_cred); 410 nfs4_reset_creds(original_cred);
411 return status;
391} 412}
392 413
393void 414static int
415nfsd4_load_reboot_recovery_data(struct net *net)
416{
417 int status;
418
419 /* XXX: The legacy code won't work in a container */
420 if (net != &init_net) {
421 WARN(1, KERN_ERR "NFSD: attempt to initialize legacy client "
422 "tracking in a container!\n");
423 return -EINVAL;
424 }
425
426 nfs4_lock_state();
427 status = nfsd4_init_recdir();
428 if (!status)
429 status = nfsd4_recdir_load();
430 nfs4_unlock_state();
431 if (status)
432 printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n");
433 return status;
434}
435
436static void
394nfsd4_shutdown_recdir(void) 437nfsd4_shutdown_recdir(void)
395{ 438{
396 if (!rec_file) 439 if (!rec_file)
@@ -399,6 +442,13 @@ nfsd4_shutdown_recdir(void)
399 rec_file = NULL; 442 rec_file = NULL;
400} 443}
401 444
445static void
446nfsd4_legacy_tracking_exit(struct net *net)
447{
448 nfs4_release_reclaim();
449 nfsd4_shutdown_recdir();
450}
451
402/* 452/*
403 * Change the NFSv4 recovery directory to recdir. 453 * Change the NFSv4 recovery directory to recdir.
404 */ 454 */
@@ -425,3 +475,572 @@ nfs4_recoverydir(void)
425{ 475{
426 return user_recovery_dirname; 476 return user_recovery_dirname;
427} 477}
478
479static int
480nfsd4_check_legacy_client(struct nfs4_client *clp)
481{
482 /* did we already find that this client is stable? */
483 if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
484 return 0;
485
486 /* look for it in the reclaim hashtable otherwise */
487 if (nfsd4_find_reclaim_client(clp)) {
488 set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
489 return 0;
490 }
491
492 return -ENOENT;
493}
494
495static struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
496 .init = nfsd4_load_reboot_recovery_data,
497 .exit = nfsd4_legacy_tracking_exit,
498 .create = nfsd4_create_clid_dir,
499 .remove = nfsd4_remove_clid_dir,
500 .check = nfsd4_check_legacy_client,
501 .grace_done = nfsd4_recdir_purge_old,
502};
503
504/* Globals */
505#define NFSD_PIPE_DIR "nfsd"
506#define NFSD_CLD_PIPE "cld"
507
508/* per-net-ns structure for holding cld upcall info */
509struct cld_net {
510 struct rpc_pipe *cn_pipe;
511 spinlock_t cn_lock;
512 struct list_head cn_list;
513 unsigned int cn_xid;
514};
515
516struct cld_upcall {
517 struct list_head cu_list;
518 struct cld_net *cu_net;
519 struct task_struct *cu_task;
520 struct cld_msg cu_msg;
521};
522
523static int
524__cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
525{
526 int ret;
527 struct rpc_pipe_msg msg;
528
529 memset(&msg, 0, sizeof(msg));
530 msg.data = cmsg;
531 msg.len = sizeof(*cmsg);
532
533 /*
534 * Set task state before we queue the upcall. That prevents
535 * wake_up_process in the downcall from racing with schedule.
536 */
537 set_current_state(TASK_UNINTERRUPTIBLE);
538 ret = rpc_queue_upcall(pipe, &msg);
539 if (ret < 0) {
540 set_current_state(TASK_RUNNING);
541 goto out;
542 }
543
544 schedule();
545 set_current_state(TASK_RUNNING);
546
547 if (msg.errno < 0)
548 ret = msg.errno;
549out:
550 return ret;
551}
552
553static int
554cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
555{
556 int ret;
557
558 /*
559 * -EAGAIN occurs when pipe is closed and reopened while there are
560 * upcalls queued.
561 */
562 do {
563 ret = __cld_pipe_upcall(pipe, cmsg);
564 } while (ret == -EAGAIN);
565
566 return ret;
567}
568
569static ssize_t
570cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
571{
572 struct cld_upcall *tmp, *cup;
573 struct cld_msg *cmsg = (struct cld_msg *)src;
574 uint32_t xid;
575 struct nfsd_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info,
576 nfsd_net_id);
577 struct cld_net *cn = nn->cld_net;
578
579 if (mlen != sizeof(*cmsg)) {
580 dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen,
581 sizeof(*cmsg));
582 return -EINVAL;
583 }
584
585 /* copy just the xid so we can try to find that */
586 if (copy_from_user(&xid, &cmsg->cm_xid, sizeof(xid)) != 0) {
587 dprintk("%s: error when copying xid from userspace", __func__);
588 return -EFAULT;
589 }
590
591 /* walk the list and find corresponding xid */
592 cup = NULL;
593 spin_lock(&cn->cn_lock);
594 list_for_each_entry(tmp, &cn->cn_list, cu_list) {
595 if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) {
596 cup = tmp;
597 list_del_init(&cup->cu_list);
598 break;
599 }
600 }
601 spin_unlock(&cn->cn_lock);
602
603 /* couldn't find upcall? */
604 if (!cup) {
605 dprintk("%s: couldn't find upcall -- xid=%u\n", __func__, xid);
606 return -EINVAL;
607 }
608
609 if (copy_from_user(&cup->cu_msg, src, mlen) != 0)
610 return -EFAULT;
611
612 wake_up_process(cup->cu_task);
613 return mlen;
614}
615
616static void
617cld_pipe_destroy_msg(struct rpc_pipe_msg *msg)
618{
619 struct cld_msg *cmsg = msg->data;
620 struct cld_upcall *cup = container_of(cmsg, struct cld_upcall,
621 cu_msg);
622
623 /* errno >= 0 means we got a downcall */
624 if (msg->errno >= 0)
625 return;
626
627 wake_up_process(cup->cu_task);
628}
629
630static const struct rpc_pipe_ops cld_upcall_ops = {
631 .upcall = rpc_pipe_generic_upcall,
632 .downcall = cld_pipe_downcall,
633 .destroy_msg = cld_pipe_destroy_msg,
634};
635
636static struct dentry *
637nfsd4_cld_register_sb(struct super_block *sb, struct rpc_pipe *pipe)
638{
639 struct dentry *dir, *dentry;
640
641 dir = rpc_d_lookup_sb(sb, NFSD_PIPE_DIR);
642 if (dir == NULL)
643 return ERR_PTR(-ENOENT);
644 dentry = rpc_mkpipe_dentry(dir, NFSD_CLD_PIPE, NULL, pipe);
645 dput(dir);
646 return dentry;
647}
648
649static void
650nfsd4_cld_unregister_sb(struct rpc_pipe *pipe)
651{
652 if (pipe->dentry)
653 rpc_unlink(pipe->dentry);
654}
655
656static struct dentry *
657nfsd4_cld_register_net(struct net *net, struct rpc_pipe *pipe)
658{
659 struct super_block *sb;
660 struct dentry *dentry;
661
662 sb = rpc_get_sb_net(net);
663 if (!sb)
664 return NULL;
665 dentry = nfsd4_cld_register_sb(sb, pipe);
666 rpc_put_sb_net(net);
667 return dentry;
668}
669
670static void
671nfsd4_cld_unregister_net(struct net *net, struct rpc_pipe *pipe)
672{
673 struct super_block *sb;
674
675 sb = rpc_get_sb_net(net);
676 if (sb) {
677 nfsd4_cld_unregister_sb(pipe);
678 rpc_put_sb_net(net);
679 }
680}
681
682/* Initialize rpc_pipefs pipe for communication with client tracking daemon */
683static int
684nfsd4_init_cld_pipe(struct net *net)
685{
686 int ret;
687 struct dentry *dentry;
688 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
689 struct cld_net *cn;
690
691 if (nn->cld_net)
692 return 0;
693
694 cn = kzalloc(sizeof(*cn), GFP_KERNEL);
695 if (!cn) {
696 ret = -ENOMEM;
697 goto err;
698 }
699
700 cn->cn_pipe = rpc_mkpipe_data(&cld_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
701 if (IS_ERR(cn->cn_pipe)) {
702 ret = PTR_ERR(cn->cn_pipe);
703 goto err;
704 }
705 spin_lock_init(&cn->cn_lock);
706 INIT_LIST_HEAD(&cn->cn_list);
707
708 dentry = nfsd4_cld_register_net(net, cn->cn_pipe);
709 if (IS_ERR(dentry)) {
710 ret = PTR_ERR(dentry);
711 goto err_destroy_data;
712 }
713
714 cn->cn_pipe->dentry = dentry;
715 nn->cld_net = cn;
716 return 0;
717
718err_destroy_data:
719 rpc_destroy_pipe_data(cn->cn_pipe);
720err:
721 kfree(cn);
722 printk(KERN_ERR "NFSD: unable to create nfsdcld upcall pipe (%d)\n",
723 ret);
724 return ret;
725}
726
727static void
728nfsd4_remove_cld_pipe(struct net *net)
729{
730 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
731 struct cld_net *cn = nn->cld_net;
732
733 nfsd4_cld_unregister_net(net, cn->cn_pipe);
734 rpc_destroy_pipe_data(cn->cn_pipe);
735 kfree(nn->cld_net);
736 nn->cld_net = NULL;
737}
738
739static struct cld_upcall *
740alloc_cld_upcall(struct cld_net *cn)
741{
742 struct cld_upcall *new, *tmp;
743
744 new = kzalloc(sizeof(*new), GFP_KERNEL);
745 if (!new)
746 return new;
747
748 /* FIXME: hard cap on number in flight? */
749restart_search:
750 spin_lock(&cn->cn_lock);
751 list_for_each_entry(tmp, &cn->cn_list, cu_list) {
752 if (tmp->cu_msg.cm_xid == cn->cn_xid) {
753 cn->cn_xid++;
754 spin_unlock(&cn->cn_lock);
755 goto restart_search;
756 }
757 }
758 new->cu_task = current;
759 new->cu_msg.cm_vers = CLD_UPCALL_VERSION;
760 put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid);
761 new->cu_net = cn;
762 list_add(&new->cu_list, &cn->cn_list);
763 spin_unlock(&cn->cn_lock);
764
765 dprintk("%s: allocated xid %u\n", __func__, new->cu_msg.cm_xid);
766
767 return new;
768}
769
770static void
771free_cld_upcall(struct cld_upcall *victim)
772{
773 struct cld_net *cn = victim->cu_net;
774
775 spin_lock(&cn->cn_lock);
776 list_del(&victim->cu_list);
777 spin_unlock(&cn->cn_lock);
778 kfree(victim);
779}
780
781/* Ask daemon to create a new record */
782static void
783nfsd4_cld_create(struct nfs4_client *clp)
784{
785 int ret;
786 struct cld_upcall *cup;
787 /* FIXME: determine net from clp */
788 struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
789 struct cld_net *cn = nn->cld_net;
790
791 /* Don't upcall if it's already stored */
792 if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
793 return;
794
795 cup = alloc_cld_upcall(cn);
796 if (!cup) {
797 ret = -ENOMEM;
798 goto out_err;
799 }
800
801 cup->cu_msg.cm_cmd = Cld_Create;
802 cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
803 memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
804 clp->cl_name.len);
805
806 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
807 if (!ret) {
808 ret = cup->cu_msg.cm_status;
809 set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
810 }
811
812 free_cld_upcall(cup);
813out_err:
814 if (ret)
815 printk(KERN_ERR "NFSD: Unable to create client "
816 "record on stable storage: %d\n", ret);
817}
818
819/* Ask daemon to create a new record */
820static void
821nfsd4_cld_remove(struct nfs4_client *clp)
822{
823 int ret;
824 struct cld_upcall *cup;
825 /* FIXME: determine net from clp */
826 struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
827 struct cld_net *cn = nn->cld_net;
828
829 /* Don't upcall if it's already removed */
830 if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
831 return;
832
833 cup = alloc_cld_upcall(cn);
834 if (!cup) {
835 ret = -ENOMEM;
836 goto out_err;
837 }
838
839 cup->cu_msg.cm_cmd = Cld_Remove;
840 cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
841 memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
842 clp->cl_name.len);
843
844 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
845 if (!ret) {
846 ret = cup->cu_msg.cm_status;
847 clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
848 }
849
850 free_cld_upcall(cup);
851out_err:
852 if (ret)
853 printk(KERN_ERR "NFSD: Unable to remove client "
854 "record from stable storage: %d\n", ret);
855}
856
857/* Check for presence of a record, and update its timestamp */
858static int
859nfsd4_cld_check(struct nfs4_client *clp)
860{
861 int ret;
862 struct cld_upcall *cup;
863 /* FIXME: determine net from clp */
864 struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
865 struct cld_net *cn = nn->cld_net;
866
867 /* Don't upcall if one was already stored during this grace pd */
868 if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
869 return 0;
870
871 cup = alloc_cld_upcall(cn);
872 if (!cup) {
873 printk(KERN_ERR "NFSD: Unable to check client record on "
874 "stable storage: %d\n", -ENOMEM);
875 return -ENOMEM;
876 }
877
878 cup->cu_msg.cm_cmd = Cld_Check;
879 cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
880 memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
881 clp->cl_name.len);
882
883 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
884 if (!ret) {
885 ret = cup->cu_msg.cm_status;
886 set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
887 }
888
889 free_cld_upcall(cup);
890 return ret;
891}
892
893static void
894nfsd4_cld_grace_done(struct net *net, time_t boot_time)
895{
896 int ret;
897 struct cld_upcall *cup;
898 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
899 struct cld_net *cn = nn->cld_net;
900
901 cup = alloc_cld_upcall(cn);
902 if (!cup) {
903 ret = -ENOMEM;
904 goto out_err;
905 }
906
907 cup->cu_msg.cm_cmd = Cld_GraceDone;
908 cup->cu_msg.cm_u.cm_gracetime = (int64_t)boot_time;
909 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
910 if (!ret)
911 ret = cup->cu_msg.cm_status;
912
913 free_cld_upcall(cup);
914out_err:
915 if (ret)
916 printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret);
917}
918
919static struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
920 .init = nfsd4_init_cld_pipe,
921 .exit = nfsd4_remove_cld_pipe,
922 .create = nfsd4_cld_create,
923 .remove = nfsd4_cld_remove,
924 .check = nfsd4_cld_check,
925 .grace_done = nfsd4_cld_grace_done,
926};
927
928int
929nfsd4_client_tracking_init(struct net *net)
930{
931 int status;
932 struct path path;
933
934 if (!client_tracking_ops) {
935 client_tracking_ops = &nfsd4_cld_tracking_ops;
936 status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path);
937 if (!status) {
938 if (S_ISDIR(path.dentry->d_inode->i_mode))
939 client_tracking_ops =
940 &nfsd4_legacy_tracking_ops;
941 path_put(&path);
942 }
943 }
944
945 status = client_tracking_ops->init(net);
946 if (status) {
947 printk(KERN_WARNING "NFSD: Unable to initialize client "
948 "recovery tracking! (%d)\n", status);
949 client_tracking_ops = NULL;
950 }
951 return status;
952}
953
954void
955nfsd4_client_tracking_exit(struct net *net)
956{
957 if (client_tracking_ops) {
958 client_tracking_ops->exit(net);
959 client_tracking_ops = NULL;
960 }
961}
962
963void
964nfsd4_client_record_create(struct nfs4_client *clp)
965{
966 if (client_tracking_ops)
967 client_tracking_ops->create(clp);
968}
969
970void
971nfsd4_client_record_remove(struct nfs4_client *clp)
972{
973 if (client_tracking_ops)
974 client_tracking_ops->remove(clp);
975}
976
977int
978nfsd4_client_record_check(struct nfs4_client *clp)
979{
980 if (client_tracking_ops)
981 return client_tracking_ops->check(clp);
982
983 return -EOPNOTSUPP;
984}
985
986void
987nfsd4_record_grace_done(struct net *net, time_t boot_time)
988{
989 if (client_tracking_ops)
990 client_tracking_ops->grace_done(net, boot_time);
991}
992
993static int
994rpc_pipefs_event(struct notifier_block *nb, unsigned long event, void *ptr)
995{
996 struct super_block *sb = ptr;
997 struct net *net = sb->s_fs_info;
998 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
999 struct cld_net *cn = nn->cld_net;
1000 struct dentry *dentry;
1001 int ret = 0;
1002
1003 if (!try_module_get(THIS_MODULE))
1004 return 0;
1005
1006 if (!cn) {
1007 module_put(THIS_MODULE);
1008 return 0;
1009 }
1010
1011 switch (event) {
1012 case RPC_PIPEFS_MOUNT:
1013 dentry = nfsd4_cld_register_sb(sb, cn->cn_pipe);
1014 if (IS_ERR(dentry)) {
1015 ret = PTR_ERR(dentry);
1016 break;
1017 }
1018 cn->cn_pipe->dentry = dentry;
1019 break;
1020 case RPC_PIPEFS_UMOUNT:
1021 if (cn->cn_pipe->dentry)
1022 nfsd4_cld_unregister_sb(cn->cn_pipe);
1023 break;
1024 default:
1025 ret = -ENOTSUPP;
1026 break;
1027 }
1028 module_put(THIS_MODULE);
1029 return ret;
1030}
1031
1032struct notifier_block nfsd4_cld_block = {
1033 .notifier_call = rpc_pipefs_event,
1034};
1035
1036int
1037register_cld_notifier(void)
1038{
1039 return rpc_pipefs_notifier_register(&nfsd4_cld_block);
1040}
1041
1042void
1043unregister_cld_notifier(void)
1044{
1045 rpc_pipefs_notifier_unregister(&nfsd4_cld_block);
1046}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e8c98f00967..7f71c69cdcd 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -58,11 +58,15 @@ static const stateid_t one_stateid = {
58static const stateid_t zero_stateid = { 58static const stateid_t zero_stateid = {
59 /* all fields zero */ 59 /* all fields zero */
60}; 60};
61static const stateid_t currentstateid = {
62 .si_generation = 1,
63};
61 64
62static u64 current_sessionid = 1; 65static u64 current_sessionid = 1;
63 66
64#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t))) 67#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t)))
65#define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t))) 68#define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t)))
69#define CURRENT_STATEID(stateid) (!memcmp((stateid), &currentstateid, sizeof(stateid_t)))
66 70
67/* forward declarations */ 71/* forward declarations */
68static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner); 72static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner);
@@ -91,6 +95,19 @@ nfs4_lock_state(void)
91 mutex_lock(&client_mutex); 95 mutex_lock(&client_mutex);
92} 96}
93 97
98static void free_session(struct kref *);
99
100/* Must be called under the client_lock */
101static void nfsd4_put_session_locked(struct nfsd4_session *ses)
102{
103 kref_put(&ses->se_ref, free_session);
104}
105
106static void nfsd4_get_session(struct nfsd4_session *ses)
107{
108 kref_get(&ses->se_ref);
109}
110
94void 111void
95nfs4_unlock_state(void) 112nfs4_unlock_state(void)
96{ 113{
@@ -605,12 +622,20 @@ hash_sessionid(struct nfs4_sessionid *sessionid)
605 return sid->sequence % SESSION_HASH_SIZE; 622 return sid->sequence % SESSION_HASH_SIZE;
606} 623}
607 624
625#ifdef NFSD_DEBUG
608static inline void 626static inline void
609dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) 627dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
610{ 628{
611 u32 *ptr = (u32 *)(&sessionid->data[0]); 629 u32 *ptr = (u32 *)(&sessionid->data[0]);
612 dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]); 630 dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]);
613} 631}
632#else
633static inline void
634dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
635{
636}
637#endif
638
614 639
615static void 640static void
616gen_sessionid(struct nfsd4_session *ses) 641gen_sessionid(struct nfsd4_session *ses)
@@ -832,11 +857,12 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
832 spin_unlock(&clp->cl_lock); 857 spin_unlock(&clp->cl_lock);
833} 858}
834 859
835void free_session(struct kref *kref) 860static void free_session(struct kref *kref)
836{ 861{
837 struct nfsd4_session *ses; 862 struct nfsd4_session *ses;
838 int mem; 863 int mem;
839 864
865 BUG_ON(!spin_is_locked(&client_lock));
840 ses = container_of(kref, struct nfsd4_session, se_ref); 866 ses = container_of(kref, struct nfsd4_session, se_ref);
841 nfsd4_del_conns(ses); 867 nfsd4_del_conns(ses);
842 spin_lock(&nfsd_drc_lock); 868 spin_lock(&nfsd_drc_lock);
@@ -847,6 +873,13 @@ void free_session(struct kref *kref)
847 kfree(ses); 873 kfree(ses);
848} 874}
849 875
876void nfsd4_put_session(struct nfsd4_session *ses)
877{
878 spin_lock(&client_lock);
879 nfsd4_put_session_locked(ses);
880 spin_unlock(&client_lock);
881}
882
850static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses) 883static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses)
851{ 884{
852 struct nfsd4_session *new; 885 struct nfsd4_session *new;
@@ -894,7 +927,9 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n
894 status = nfsd4_new_conn_from_crses(rqstp, new); 927 status = nfsd4_new_conn_from_crses(rqstp, new);
895 /* whoops: benny points out, status is ignored! (err, or bogus) */ 928 /* whoops: benny points out, status is ignored! (err, or bogus) */
896 if (status) { 929 if (status) {
930 spin_lock(&client_lock);
897 free_session(&new->se_ref); 931 free_session(&new->se_ref);
932 spin_unlock(&client_lock);
898 return NULL; 933 return NULL;
899 } 934 }
900 if (cses->flags & SESSION4_BACK_CHAN) { 935 if (cses->flags & SESSION4_BACK_CHAN) {
@@ -1006,12 +1041,13 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
1006static inline void 1041static inline void
1007free_client(struct nfs4_client *clp) 1042free_client(struct nfs4_client *clp)
1008{ 1043{
1044 BUG_ON(!spin_is_locked(&client_lock));
1009 while (!list_empty(&clp->cl_sessions)) { 1045 while (!list_empty(&clp->cl_sessions)) {
1010 struct nfsd4_session *ses; 1046 struct nfsd4_session *ses;
1011 ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, 1047 ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
1012 se_perclnt); 1048 se_perclnt);
1013 list_del(&ses->se_perclnt); 1049 list_del(&ses->se_perclnt);
1014 nfsd4_put_session(ses); 1050 nfsd4_put_session_locked(ses);
1015 } 1051 }
1016 if (clp->cl_cred.cr_group_info) 1052 if (clp->cl_cred.cr_group_info)
1017 put_group_info(clp->cl_cred.cr_group_info); 1053 put_group_info(clp->cl_cred.cr_group_info);
@@ -1138,12 +1174,12 @@ static void gen_clid(struct nfs4_client *clp)
1138 1174
1139static void gen_confirm(struct nfs4_client *clp) 1175static void gen_confirm(struct nfs4_client *clp)
1140{ 1176{
1177 __be32 verf[2];
1141 static u32 i; 1178 static u32 i;
1142 u32 *p;
1143 1179
1144 p = (u32 *)clp->cl_confirm.data; 1180 verf[0] = (__be32)get_seconds();
1145 *p++ = get_seconds(); 1181 verf[1] = (__be32)i++;
1146 *p++ = i++; 1182 memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data));
1147} 1183}
1148 1184
1149static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t) 1185static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t)
@@ -1180,7 +1216,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
1180 if (princ) { 1216 if (princ) {
1181 clp->cl_principal = kstrdup(princ, GFP_KERNEL); 1217 clp->cl_principal = kstrdup(princ, GFP_KERNEL);
1182 if (clp->cl_principal == NULL) { 1218 if (clp->cl_principal == NULL) {
1219 spin_lock(&client_lock);
1183 free_client(clp); 1220 free_client(clp);
1221 spin_unlock(&client_lock);
1184 return NULL; 1222 return NULL;
1185 } 1223 }
1186 } 1224 }
@@ -1308,7 +1346,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r
1308 else 1346 else
1309 goto out_err; 1347 goto out_err;
1310 1348
1311 conn->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, 1349 conn->cb_addrlen = rpc_uaddr2sockaddr(&init_net, se->se_callback_addr_val,
1312 se->se_callback_addr_len, 1350 se->se_callback_addr_len,
1313 (struct sockaddr *)&conn->cb_addr, 1351 (struct sockaddr *)&conn->cb_addr,
1314 sizeof(conn->cb_addr)); 1352 sizeof(conn->cb_addr));
@@ -1347,6 +1385,7 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
1347 slot->sl_opcnt = resp->opcnt; 1385 slot->sl_opcnt = resp->opcnt;
1348 slot->sl_status = resp->cstate.status; 1386 slot->sl_status = resp->cstate.status;
1349 1387
1388 slot->sl_flags |= NFSD4_SLOT_INITIALIZED;
1350 if (nfsd4_not_cached(resp)) { 1389 if (nfsd4_not_cached(resp)) {
1351 slot->sl_datalen = 0; 1390 slot->sl_datalen = 0;
1352 return; 1391 return;
@@ -1374,15 +1413,12 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
1374 struct nfsd4_op *op; 1413 struct nfsd4_op *op;
1375 struct nfsd4_slot *slot = resp->cstate.slot; 1414 struct nfsd4_slot *slot = resp->cstate.slot;
1376 1415
1377 dprintk("--> %s resp->opcnt %d cachethis %u \n", __func__,
1378 resp->opcnt, resp->cstate.slot->sl_cachethis);
1379
1380 /* Encode the replayed sequence operation */ 1416 /* Encode the replayed sequence operation */
1381 op = &args->ops[resp->opcnt - 1]; 1417 op = &args->ops[resp->opcnt - 1];
1382 nfsd4_encode_operation(resp, op); 1418 nfsd4_encode_operation(resp, op);
1383 1419
1384 /* Return nfserr_retry_uncached_rep in next operation. */ 1420 /* Return nfserr_retry_uncached_rep in next operation. */
1385 if (args->opcnt > 1 && slot->sl_cachethis == 0) { 1421 if (args->opcnt > 1 && !(slot->sl_flags & NFSD4_SLOT_CACHETHIS)) {
1386 op = &args->ops[resp->opcnt++]; 1422 op = &args->ops[resp->opcnt++];
1387 op->status = nfserr_retry_uncached_rep; 1423 op->status = nfserr_retry_uncached_rep;
1388 nfsd4_encode_operation(resp, op); 1424 nfsd4_encode_operation(resp, op);
@@ -1575,16 +1611,11 @@ check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse)
1575 else 1611 else
1576 return nfserr_seq_misordered; 1612 return nfserr_seq_misordered;
1577 } 1613 }
1578 /* Normal */ 1614 /* Note unsigned 32-bit arithmetic handles wraparound: */
1579 if (likely(seqid == slot_seqid + 1)) 1615 if (likely(seqid == slot_seqid + 1))
1580 return nfs_ok; 1616 return nfs_ok;
1581 /* Replay */
1582 if (seqid == slot_seqid) 1617 if (seqid == slot_seqid)
1583 return nfserr_replay_cache; 1618 return nfserr_replay_cache;
1584 /* Wraparound */
1585 if (seqid == 1 && (slot_seqid + 1) == 0)
1586 return nfs_ok;
1587 /* Misordered replay or misordered new request */
1588 return nfserr_seq_misordered; 1619 return nfserr_seq_misordered;
1589} 1620}
1590 1621
@@ -1815,9 +1846,10 @@ nfsd4_destroy_session(struct svc_rqst *r,
1815 nfsd4_probe_callback_sync(ses->se_client); 1846 nfsd4_probe_callback_sync(ses->se_client);
1816 nfs4_unlock_state(); 1847 nfs4_unlock_state();
1817 1848
1849 spin_lock(&client_lock);
1818 nfsd4_del_conns(ses); 1850 nfsd4_del_conns(ses);
1819 1851 nfsd4_put_session_locked(ses);
1820 nfsd4_put_session(ses); 1852 spin_unlock(&client_lock);
1821 status = nfs_ok; 1853 status = nfs_ok;
1822out: 1854out:
1823 dprintk("%s returns %d\n", __func__, ntohl(status)); 1855 dprintk("%s returns %d\n", __func__, ntohl(status));
@@ -1921,8 +1953,12 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1921 * sr_highest_slotid and the sr_target_slot id to maxslots */ 1953 * sr_highest_slotid and the sr_target_slot id to maxslots */
1922 seq->maxslots = session->se_fchannel.maxreqs; 1954 seq->maxslots = session->se_fchannel.maxreqs;
1923 1955
1924 status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_inuse); 1956 status = check_slot_seqid(seq->seqid, slot->sl_seqid,
1957 slot->sl_flags & NFSD4_SLOT_INUSE);
1925 if (status == nfserr_replay_cache) { 1958 if (status == nfserr_replay_cache) {
1959 status = nfserr_seq_misordered;
1960 if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
1961 goto out;
1926 cstate->slot = slot; 1962 cstate->slot = slot;
1927 cstate->session = session; 1963 cstate->session = session;
1928 /* Return the cached reply status and set cstate->status 1964 /* Return the cached reply status and set cstate->status
@@ -1938,9 +1974,12 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1938 conn = NULL; 1974 conn = NULL;
1939 1975
1940 /* Success! bump slot seqid */ 1976 /* Success! bump slot seqid */
1941 slot->sl_inuse = true;
1942 slot->sl_seqid = seq->seqid; 1977 slot->sl_seqid = seq->seqid;
1943 slot->sl_cachethis = seq->cachethis; 1978 slot->sl_flags |= NFSD4_SLOT_INUSE;
1979 if (seq->cachethis)
1980 slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
1981 else
1982 slot->sl_flags &= ~NFSD4_SLOT_CACHETHIS;
1944 1983
1945 cstate->slot = slot; 1984 cstate->slot = slot;
1946 cstate->session = session; 1985 cstate->session = session;
@@ -2030,7 +2069,8 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
2030 2069
2031 nfs4_lock_state(); 2070 nfs4_lock_state();
2032 status = nfserr_complete_already; 2071 status = nfserr_complete_already;
2033 if (cstate->session->se_client->cl_firststate) 2072 if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE,
2073 &cstate->session->se_client->cl_flags))
2034 goto out; 2074 goto out;
2035 2075
2036 status = nfserr_stale_clientid; 2076 status = nfserr_stale_clientid;
@@ -2045,7 +2085,7 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
2045 goto out; 2085 goto out;
2046 2086
2047 status = nfs_ok; 2087 status = nfs_ok;
2048 nfsd4_create_clid_dir(cstate->session->se_client); 2088 nfsd4_client_record_create(cstate->session->se_client);
2049out: 2089out:
2050 nfs4_unlock_state(); 2090 nfs4_unlock_state();
2051 return status; 2091 return status;
@@ -2240,7 +2280,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
2240 conf = find_confirmed_client_by_str(unconf->cl_recdir, 2280 conf = find_confirmed_client_by_str(unconf->cl_recdir,
2241 hash); 2281 hash);
2242 if (conf) { 2282 if (conf) {
2243 nfsd4_remove_clid_dir(conf); 2283 nfsd4_client_record_remove(conf);
2244 expire_client(conf); 2284 expire_client(conf);
2245 } 2285 }
2246 move_to_confirmed(unconf); 2286 move_to_confirmed(unconf);
@@ -2633,8 +2673,6 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
2633 2673
2634static int share_access_to_flags(u32 share_access) 2674static int share_access_to_flags(u32 share_access)
2635{ 2675{
2636 share_access &= ~NFS4_SHARE_WANT_MASK;
2637
2638 return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE; 2676 return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE;
2639} 2677}
2640 2678
@@ -2776,10 +2814,9 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c
2776 2814
2777 2815
2778static void 2816static void
2779nfs4_set_claim_prev(struct nfsd4_open *open) 2817nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session)
2780{ 2818{
2781 open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; 2819 open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
2782 open->op_openowner->oo_owner.so_client->cl_firststate = 1;
2783} 2820}
2784 2821
2785/* Should we give out recallable state?: */ 2822/* Should we give out recallable state?: */
@@ -2855,6 +2892,27 @@ static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag)
2855 return 0; 2892 return 0;
2856} 2893}
2857 2894
2895static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
2896{
2897 open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
2898 if (status == -EAGAIN)
2899 open->op_why_no_deleg = WND4_CONTENTION;
2900 else {
2901 open->op_why_no_deleg = WND4_RESOURCE;
2902 switch (open->op_deleg_want) {
2903 case NFS4_SHARE_WANT_READ_DELEG:
2904 case NFS4_SHARE_WANT_WRITE_DELEG:
2905 case NFS4_SHARE_WANT_ANY_DELEG:
2906 break;
2907 case NFS4_SHARE_WANT_CANCEL:
2908 open->op_why_no_deleg = WND4_CANCELLED;
2909 break;
2910 case NFS4_SHARE_WANT_NO_DELEG:
2911 BUG(); /* not supposed to get here */
2912 }
2913 }
2914}
2915
2858/* 2916/*
2859 * Attempt to hand out a delegation. 2917 * Attempt to hand out a delegation.
2860 */ 2918 */
@@ -2864,7 +2922,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_ol_
2864 struct nfs4_delegation *dp; 2922 struct nfs4_delegation *dp;
2865 struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner); 2923 struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner);
2866 int cb_up; 2924 int cb_up;
2867 int status, flag = 0; 2925 int status = 0, flag = 0;
2868 2926
2869 cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); 2927 cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
2870 flag = NFS4_OPEN_DELEGATE_NONE; 2928 flag = NFS4_OPEN_DELEGATE_NONE;
@@ -2905,11 +2963,16 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_ol_
2905 dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", 2963 dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
2906 STATEID_VAL(&dp->dl_stid.sc_stateid)); 2964 STATEID_VAL(&dp->dl_stid.sc_stateid));
2907out: 2965out:
2908 if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
2909 && flag == NFS4_OPEN_DELEGATE_NONE
2910 && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
2911 dprintk("NFSD: WARNING: refusing delegation reclaim\n");
2912 open->op_delegate_type = flag; 2966 open->op_delegate_type = flag;
2967 if (flag == NFS4_OPEN_DELEGATE_NONE) {
2968 if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
2969 open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
2970 dprintk("NFSD: WARNING: refusing delegation reclaim\n");
2971
2972 /* 4.1 client asking for a delegation? */
2973 if (open->op_deleg_want)
2974 nfsd4_open_deleg_none_ext(open, status);
2975 }
2913 return; 2976 return;
2914out_free: 2977out_free:
2915 nfs4_put_delegation(dp); 2978 nfs4_put_delegation(dp);
@@ -2918,6 +2981,24 @@ out_no_deleg:
2918 goto out; 2981 goto out;
2919} 2982}
2920 2983
2984static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
2985 struct nfs4_delegation *dp)
2986{
2987 if (open->op_deleg_want == NFS4_SHARE_WANT_READ_DELEG &&
2988 dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
2989 open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
2990 open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE;
2991 } else if (open->op_deleg_want == NFS4_SHARE_WANT_WRITE_DELEG &&
2992 dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
2993 open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
2994 open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE;
2995 }
2996 /* Otherwise the client must be confused wanting a delegation
2997 * it already has, therefore we don't return
2998 * NFS4_OPEN_DELEGATE_NONE_EXT and reason.
2999 */
3000}
3001
2921/* 3002/*
2922 * called with nfs4_lock_state() held. 3003 * called with nfs4_lock_state() held.
2923 */ 3004 */
@@ -2979,24 +3060,36 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
2979 update_stateid(&stp->st_stid.sc_stateid); 3060 update_stateid(&stp->st_stid.sc_stateid);
2980 memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 3061 memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
2981 3062
2982 if (nfsd4_has_session(&resp->cstate)) 3063 if (nfsd4_has_session(&resp->cstate)) {
2983 open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; 3064 open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
2984 3065
3066 if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
3067 open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
3068 open->op_why_no_deleg = WND4_NOT_WANTED;
3069 goto nodeleg;
3070 }
3071 }
3072
2985 /* 3073 /*
2986 * Attempt to hand out a delegation. No error return, because the 3074 * Attempt to hand out a delegation. No error return, because the
2987 * OPEN succeeds even if we fail. 3075 * OPEN succeeds even if we fail.
2988 */ 3076 */
2989 nfs4_open_delegation(current_fh, open, stp); 3077 nfs4_open_delegation(current_fh, open, stp);
2990 3078nodeleg:
2991 status = nfs_ok; 3079 status = nfs_ok;
2992 3080
2993 dprintk("%s: stateid=" STATEID_FMT "\n", __func__, 3081 dprintk("%s: stateid=" STATEID_FMT "\n", __func__,
2994 STATEID_VAL(&stp->st_stid.sc_stateid)); 3082 STATEID_VAL(&stp->st_stid.sc_stateid));
2995out: 3083out:
3084 /* 4.1 client trying to upgrade/downgrade delegation? */
3085 if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp &&
3086 open->op_deleg_want)
3087 nfsd4_deleg_xgrade_none_ext(open, dp);
3088
2996 if (fp) 3089 if (fp)
2997 put_nfs4_file(fp); 3090 put_nfs4_file(fp);
2998 if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) 3091 if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
2999 nfs4_set_claim_prev(open); 3092 nfs4_set_claim_prev(open, nfsd4_has_session(&resp->cstate));
3000 /* 3093 /*
3001 * To finish the open response, we just need to set the rflags. 3094 * To finish the open response, we just need to set the rflags.
3002 */ 3095 */
@@ -3066,7 +3159,7 @@ static void
3066nfsd4_end_grace(void) 3159nfsd4_end_grace(void)
3067{ 3160{
3068 dprintk("NFSD: end of grace period\n"); 3161 dprintk("NFSD: end of grace period\n");
3069 nfsd4_recdir_purge_old(); 3162 nfsd4_record_grace_done(&init_net, boot_time);
3070 locks_end_grace(&nfsd4_manager); 3163 locks_end_grace(&nfsd4_manager);
3071 /* 3164 /*
3072 * Now that every NFSv4 client has had the chance to recover and 3165 * Now that every NFSv4 client has had the chance to recover and
@@ -3115,7 +3208,7 @@ nfs4_laundromat(void)
3115 clp = list_entry(pos, struct nfs4_client, cl_lru); 3208 clp = list_entry(pos, struct nfs4_client, cl_lru);
3116 dprintk("NFSD: purging unused client (clientid %08x)\n", 3209 dprintk("NFSD: purging unused client (clientid %08x)\n",
3117 clp->cl_clientid.cl_id); 3210 clp->cl_clientid.cl_id);
3118 nfsd4_remove_clid_dir(clp); 3211 nfsd4_client_record_remove(clp);
3119 expire_client(clp); 3212 expire_client(clp);
3120 } 3213 }
3121 spin_lock(&recall_lock); 3214 spin_lock(&recall_lock);
@@ -3400,7 +3493,14 @@ __be32
3400nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 3493nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3401 struct nfsd4_test_stateid *test_stateid) 3494 struct nfsd4_test_stateid *test_stateid)
3402{ 3495{
3403 /* real work is done during encoding */ 3496 struct nfsd4_test_stateid_id *stateid;
3497 struct nfs4_client *cl = cstate->session->se_client;
3498
3499 nfs4_lock_state();
3500 list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list)
3501 stateid->ts_id_status = nfs4_validate_stateid(cl, &stateid->ts_id_stateid);
3502 nfs4_unlock_state();
3503
3404 return nfs_ok; 3504 return nfs_ok;
3405} 3505}
3406 3506
@@ -3539,7 +3639,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3539 dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", 3639 dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
3540 __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); 3640 __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
3541 3641
3542 nfsd4_create_clid_dir(oo->oo_owner.so_client); 3642 nfsd4_client_record_create(oo->oo_owner.so_client);
3543 status = nfs_ok; 3643 status = nfs_ok;
3544out: 3644out:
3545 if (!cstate->replay_owner) 3645 if (!cstate->replay_owner)
@@ -3596,7 +3696,9 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
3596 cstate->current_fh.fh_dentry->d_name.name); 3696 cstate->current_fh.fh_dentry->d_name.name);
3597 3697
3598 /* We don't yet support WANT bits: */ 3698 /* We don't yet support WANT bits: */
3599 od->od_share_access &= NFS4_SHARE_ACCESS_MASK; 3699 if (od->od_deleg_want)
3700 dprintk("NFSD: %s: od_deleg_want=0x%x ignored\n", __func__,
3701 od->od_deleg_want);
3600 3702
3601 nfs4_lock_state(); 3703 nfs4_lock_state();
3602 status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid, 3704 status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid,
@@ -4109,16 +4211,14 @@ out:
4109 * vfs_test_lock. (Arguably perhaps test_lock should be done with an 4211 * vfs_test_lock. (Arguably perhaps test_lock should be done with an
4110 * inode operation.) 4212 * inode operation.)
4111 */ 4213 */
4112static int nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock) 4214static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
4113{ 4215{
4114 struct file *file; 4216 struct file *file;
4115 int err; 4217 __be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
4116 4218 if (!err) {
4117 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); 4219 err = nfserrno(vfs_test_lock(file, lock));
4118 if (err) 4220 nfsd_close(file);
4119 return err; 4221 }
4120 err = vfs_test_lock(file, lock);
4121 nfsd_close(file);
4122 return err; 4222 return err;
4123} 4223}
4124 4224
@@ -4132,7 +4232,6 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4132 struct inode *inode; 4232 struct inode *inode;
4133 struct file_lock file_lock; 4233 struct file_lock file_lock;
4134 struct nfs4_lockowner *lo; 4234 struct nfs4_lockowner *lo;
4135 int error;
4136 __be32 status; 4235 __be32 status;
4137 4236
4138 if (locks_in_grace()) 4237 if (locks_in_grace())
@@ -4178,12 +4277,10 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4178 4277
4179 nfs4_transform_lock_offset(&file_lock); 4278 nfs4_transform_lock_offset(&file_lock);
4180 4279
4181 status = nfs_ok; 4280 status = nfsd_test_lock(rqstp, &cstate->current_fh, &file_lock);
4182 error = nfsd_test_lock(rqstp, &cstate->current_fh, &file_lock); 4281 if (status)
4183 if (error) {
4184 status = nfserrno(error);
4185 goto out; 4282 goto out;
4186 } 4283
4187 if (file_lock.fl_type != F_UNLCK) { 4284 if (file_lock.fl_type != F_UNLCK) {
4188 status = nfserr_denied; 4285 status = nfserr_denied;
4189 nfs4_set_lock_denied(&file_lock, &lockt->lt_denied); 4286 nfs4_set_lock_denied(&file_lock, &lockt->lt_denied);
@@ -4353,7 +4450,9 @@ nfs4_has_reclaimed_state(const char *name, bool use_exchange_id)
4353 struct nfs4_client *clp; 4450 struct nfs4_client *clp;
4354 4451
4355 clp = find_confirmed_client_by_str(name, strhashval); 4452 clp = find_confirmed_client_by_str(name, strhashval);
4356 return clp ? 1 : 0; 4453 if (!clp)
4454 return 0;
4455 return test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
4357} 4456}
4358 4457
4359/* 4458/*
@@ -4377,7 +4476,7 @@ nfs4_client_to_reclaim(const char *name)
4377 return 1; 4476 return 1;
4378} 4477}
4379 4478
4380static void 4479void
4381nfs4_release_reclaim(void) 4480nfs4_release_reclaim(void)
4382{ 4481{
4383 struct nfs4_client_reclaim *crp = NULL; 4482 struct nfs4_client_reclaim *crp = NULL;
@@ -4397,19 +4496,12 @@ nfs4_release_reclaim(void)
4397 4496
4398/* 4497/*
4399 * called from OPEN, CLAIM_PREVIOUS with a new clientid. */ 4498 * called from OPEN, CLAIM_PREVIOUS with a new clientid. */
4400static struct nfs4_client_reclaim * 4499struct nfs4_client_reclaim *
4401nfs4_find_reclaim_client(clientid_t *clid) 4500nfsd4_find_reclaim_client(struct nfs4_client *clp)
4402{ 4501{
4403 unsigned int strhashval; 4502 unsigned int strhashval;
4404 struct nfs4_client *clp;
4405 struct nfs4_client_reclaim *crp = NULL; 4503 struct nfs4_client_reclaim *crp = NULL;
4406 4504
4407
4408 /* find clientid in conf_id_hashtbl */
4409 clp = find_confirmed_client(clid);
4410 if (clp == NULL)
4411 return NULL;
4412
4413 dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n", 4505 dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n",
4414 clp->cl_name.len, clp->cl_name.data, 4506 clp->cl_name.len, clp->cl_name.data,
4415 clp->cl_recdir); 4507 clp->cl_recdir);
@@ -4430,7 +4522,14 @@ nfs4_find_reclaim_client(clientid_t *clid)
4430__be32 4522__be32
4431nfs4_check_open_reclaim(clientid_t *clid) 4523nfs4_check_open_reclaim(clientid_t *clid)
4432{ 4524{
4433 return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad; 4525 struct nfs4_client *clp;
4526
4527 /* find clientid in conf_id_hashtbl */
4528 clp = find_confirmed_client(clid);
4529 if (clp == NULL)
4530 return nfserr_reclaim_bad;
4531
4532 return nfsd4_client_record_check(clp) ? nfserr_reclaim_bad : nfs_ok;
4434} 4533}
4435 4534
4436#ifdef CONFIG_NFSD_FAULT_INJECTION 4535#ifdef CONFIG_NFSD_FAULT_INJECTION
@@ -4442,7 +4541,7 @@ void nfsd_forget_clients(u64 num)
4442 4541
4443 nfs4_lock_state(); 4542 nfs4_lock_state();
4444 list_for_each_entry_safe(clp, next, &client_lru, cl_lru) { 4543 list_for_each_entry_safe(clp, next, &client_lru, cl_lru) {
4445 nfsd4_remove_clid_dir(clp); 4544 nfsd4_client_record_remove(clp);
4446 expire_client(clp); 4545 expire_client(clp);
4447 if (++count == num) 4546 if (++count == num)
4448 break; 4547 break;
@@ -4577,19 +4676,6 @@ nfs4_state_init(void)
4577 reclaim_str_hashtbl_size = 0; 4676 reclaim_str_hashtbl_size = 0;
4578} 4677}
4579 4678
4580static void
4581nfsd4_load_reboot_recovery_data(void)
4582{
4583 int status;
4584
4585 nfs4_lock_state();
4586 nfsd4_init_recdir();
4587 status = nfsd4_recdir_load();
4588 nfs4_unlock_state();
4589 if (status)
4590 printk("NFSD: Failure reading reboot recovery data\n");
4591}
4592
4593/* 4679/*
4594 * Since the lifetime of a delegation isn't limited to that of an open, a 4680 * Since the lifetime of a delegation isn't limited to that of an open, a
4595 * client may quite reasonably hang on to a delegation as long as it has 4681 * client may quite reasonably hang on to a delegation as long as it has
@@ -4613,21 +4699,34 @@ set_max_delegations(void)
4613 4699
4614/* initialization to perform when the nfsd service is started: */ 4700/* initialization to perform when the nfsd service is started: */
4615 4701
4616static int 4702int
4617__nfs4_state_start(void) 4703nfs4_state_start(void)
4618{ 4704{
4619 int ret; 4705 int ret;
4620 4706
4707 /*
4708 * FIXME: For now, we hang most of the pernet global stuff off of
4709 * init_net until nfsd is fully containerized. Eventually, we'll
4710 * need to pass a net pointer into this function, take a reference
4711 * to that instead and then do most of the rest of this on a per-net
4712 * basis.
4713 */
4714 get_net(&init_net);
4715 nfsd4_client_tracking_init(&init_net);
4621 boot_time = get_seconds(); 4716 boot_time = get_seconds();
4622 locks_start_grace(&nfsd4_manager); 4717 locks_start_grace(&nfsd4_manager);
4623 printk(KERN_INFO "NFSD: starting %ld-second grace period\n", 4718 printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
4624 nfsd4_grace); 4719 nfsd4_grace);
4625 ret = set_callback_cred(); 4720 ret = set_callback_cred();
4626 if (ret) 4721 if (ret) {
4627 return -ENOMEM; 4722 ret = -ENOMEM;
4723 goto out_recovery;
4724 }
4628 laundry_wq = create_singlethread_workqueue("nfsd4"); 4725 laundry_wq = create_singlethread_workqueue("nfsd4");
4629 if (laundry_wq == NULL) 4726 if (laundry_wq == NULL) {
4630 return -ENOMEM; 4727 ret = -ENOMEM;
4728 goto out_recovery;
4729 }
4631 ret = nfsd4_create_callback_queue(); 4730 ret = nfsd4_create_callback_queue();
4632 if (ret) 4731 if (ret)
4633 goto out_free_laundry; 4732 goto out_free_laundry;
@@ -4636,16 +4735,12 @@ __nfs4_state_start(void)
4636 return 0; 4735 return 0;
4637out_free_laundry: 4736out_free_laundry:
4638 destroy_workqueue(laundry_wq); 4737 destroy_workqueue(laundry_wq);
4738out_recovery:
4739 nfsd4_client_tracking_exit(&init_net);
4740 put_net(&init_net);
4639 return ret; 4741 return ret;
4640} 4742}
4641 4743
4642int
4643nfs4_state_start(void)
4644{
4645 nfsd4_load_reboot_recovery_data();
4646 return __nfs4_state_start();
4647}
4648
4649static void 4744static void
4650__nfs4_state_shutdown(void) 4745__nfs4_state_shutdown(void)
4651{ 4746{
@@ -4676,7 +4771,8 @@ __nfs4_state_shutdown(void)
4676 unhash_delegation(dp); 4771 unhash_delegation(dp);
4677 } 4772 }
4678 4773
4679 nfsd4_shutdown_recdir(); 4774 nfsd4_client_tracking_exit(&init_net);
4775 put_net(&init_net);
4680} 4776}
4681 4777
4682void 4778void
@@ -4686,8 +4782,108 @@ nfs4_state_shutdown(void)
4686 destroy_workqueue(laundry_wq); 4782 destroy_workqueue(laundry_wq);
4687 locks_end_grace(&nfsd4_manager); 4783 locks_end_grace(&nfsd4_manager);
4688 nfs4_lock_state(); 4784 nfs4_lock_state();
4689 nfs4_release_reclaim();
4690 __nfs4_state_shutdown(); 4785 __nfs4_state_shutdown();
4691 nfs4_unlock_state(); 4786 nfs4_unlock_state();
4692 nfsd4_destroy_callback_queue(); 4787 nfsd4_destroy_callback_queue();
4693} 4788}
4789
4790static void
4791get_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid)
4792{
4793 if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG) && CURRENT_STATEID(stateid))
4794 memcpy(stateid, &cstate->current_stateid, sizeof(stateid_t));
4795}
4796
4797static void
4798put_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid)
4799{
4800 if (cstate->minorversion) {
4801 memcpy(&cstate->current_stateid, stateid, sizeof(stateid_t));
4802 SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
4803 }
4804}
4805
4806void
4807clear_current_stateid(struct nfsd4_compound_state *cstate)
4808{
4809 CLEAR_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
4810}
4811
4812/*
4813 * functions to set current state id
4814 */
4815void
4816nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp)
4817{
4818 put_stateid(cstate, &odp->od_stateid);
4819}
4820
4821void
4822nfsd4_set_openstateid(struct nfsd4_compound_state *cstate, struct nfsd4_open *open)
4823{
4824 put_stateid(cstate, &open->op_stateid);
4825}
4826
4827void
4828nfsd4_set_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close)
4829{
4830 put_stateid(cstate, &close->cl_stateid);
4831}
4832
4833void
4834nfsd4_set_lockstateid(struct nfsd4_compound_state *cstate, struct nfsd4_lock *lock)
4835{
4836 put_stateid(cstate, &lock->lk_resp_stateid);
4837}
4838
4839/*
4840 * functions to consume current state id
4841 */
4842
4843void
4844nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp)
4845{
4846 get_stateid(cstate, &odp->od_stateid);
4847}
4848
4849void
4850nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *cstate, struct nfsd4_delegreturn *drp)
4851{
4852 get_stateid(cstate, &drp->dr_stateid);
4853}
4854
4855void
4856nfsd4_get_freestateid(struct nfsd4_compound_state *cstate, struct nfsd4_free_stateid *fsp)
4857{
4858 get_stateid(cstate, &fsp->fr_stateid);
4859}
4860
4861void
4862nfsd4_get_setattrstateid(struct nfsd4_compound_state *cstate, struct nfsd4_setattr *setattr)
4863{
4864 get_stateid(cstate, &setattr->sa_stateid);
4865}
4866
4867void
4868nfsd4_get_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close)
4869{
4870 get_stateid(cstate, &close->cl_stateid);
4871}
4872
4873void
4874nfsd4_get_lockustateid(struct nfsd4_compound_state *cstate, struct nfsd4_locku *locku)
4875{
4876 get_stateid(cstate, &locku->lu_stateid);
4877}
4878
4879void
4880nfsd4_get_readstateid(struct nfsd4_compound_state *cstate, struct nfsd4_read *read)
4881{
4882 get_stateid(cstate, &read->rd_stateid);
4883}
4884
4885void
4886nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, struct nfsd4_write *write)
4887{
4888 get_stateid(cstate, &write->wr_stateid);
4889}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0ec5a1b9700..74c00bc92b9 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -133,22 +133,6 @@ xdr_error: \
133 } \ 133 } \
134} while (0) 134} while (0)
135 135
136static void save_buf(struct nfsd4_compoundargs *argp, struct nfsd4_saved_compoundargs *savep)
137{
138 savep->p = argp->p;
139 savep->end = argp->end;
140 savep->pagelen = argp->pagelen;
141 savep->pagelist = argp->pagelist;
142}
143
144static void restore_buf(struct nfsd4_compoundargs *argp, struct nfsd4_saved_compoundargs *savep)
145{
146 argp->p = savep->p;
147 argp->end = savep->end;
148 argp->pagelen = savep->pagelen;
149 argp->pagelist = savep->pagelist;
150}
151
152static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) 136static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
153{ 137{
154 /* We want more bytes than seem to be available. 138 /* We want more bytes than seem to be available.
@@ -638,14 +622,18 @@ nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup
638 DECODE_TAIL; 622 DECODE_TAIL;
639} 623}
640 624
641static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *x) 625static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *share_access, u32 *deleg_want, u32 *deleg_when)
642{ 626{
643 __be32 *p; 627 __be32 *p;
644 u32 w; 628 u32 w;
645 629
646 READ_BUF(4); 630 READ_BUF(4);
647 READ32(w); 631 READ32(w);
648 *x = w; 632 *share_access = w & NFS4_SHARE_ACCESS_MASK;
633 *deleg_want = w & NFS4_SHARE_WANT_MASK;
634 if (deleg_when)
635 *deleg_when = w & NFS4_SHARE_WHEN_MASK;
636
649 switch (w & NFS4_SHARE_ACCESS_MASK) { 637 switch (w & NFS4_SHARE_ACCESS_MASK) {
650 case NFS4_SHARE_ACCESS_READ: 638 case NFS4_SHARE_ACCESS_READ:
651 case NFS4_SHARE_ACCESS_WRITE: 639 case NFS4_SHARE_ACCESS_WRITE:
@@ -673,6 +661,9 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *x)
673 w &= ~NFS4_SHARE_WANT_MASK; 661 w &= ~NFS4_SHARE_WANT_MASK;
674 if (!w) 662 if (!w)
675 return nfs_ok; 663 return nfs_ok;
664
665 if (!deleg_when) /* open_downgrade */
666 return nfserr_inval;
676 switch (w) { 667 switch (w) {
677 case NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL: 668 case NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL:
678 case NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED: 669 case NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED:
@@ -719,6 +710,7 @@ static __be32
719nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) 710nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
720{ 711{
721 DECODE_HEAD; 712 DECODE_HEAD;
713 u32 dummy;
722 714
723 memset(open->op_bmval, 0, sizeof(open->op_bmval)); 715 memset(open->op_bmval, 0, sizeof(open->op_bmval));
724 open->op_iattr.ia_valid = 0; 716 open->op_iattr.ia_valid = 0;
@@ -727,7 +719,9 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
727 /* seqid, share_access, share_deny, clientid, ownerlen */ 719 /* seqid, share_access, share_deny, clientid, ownerlen */
728 READ_BUF(4); 720 READ_BUF(4);
729 READ32(open->op_seqid); 721 READ32(open->op_seqid);
730 status = nfsd4_decode_share_access(argp, &open->op_share_access); 722 /* decode, yet ignore deleg_when until supported */
723 status = nfsd4_decode_share_access(argp, &open->op_share_access,
724 &open->op_deleg_want, &dummy);
731 if (status) 725 if (status)
732 goto xdr_error; 726 goto xdr_error;
733 status = nfsd4_decode_share_deny(argp, &open->op_share_deny); 727 status = nfsd4_decode_share_deny(argp, &open->op_share_deny);
@@ -755,14 +749,14 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
755 goto out; 749 goto out;
756 break; 750 break;
757 case NFS4_CREATE_EXCLUSIVE: 751 case NFS4_CREATE_EXCLUSIVE:
758 READ_BUF(8); 752 READ_BUF(NFS4_VERIFIER_SIZE);
759 COPYMEM(open->op_verf.data, 8); 753 COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE);
760 break; 754 break;
761 case NFS4_CREATE_EXCLUSIVE4_1: 755 case NFS4_CREATE_EXCLUSIVE4_1:
762 if (argp->minorversion < 1) 756 if (argp->minorversion < 1)
763 goto xdr_error; 757 goto xdr_error;
764 READ_BUF(8); 758 READ_BUF(NFS4_VERIFIER_SIZE);
765 COPYMEM(open->op_verf.data, 8); 759 COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE);
766 status = nfsd4_decode_fattr(argp, open->op_bmval, 760 status = nfsd4_decode_fattr(argp, open->op_bmval,
767 &open->op_iattr, &open->op_acl); 761 &open->op_iattr, &open->op_acl);
768 if (status) 762 if (status)
@@ -848,7 +842,8 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d
848 return status; 842 return status;
849 READ_BUF(4); 843 READ_BUF(4);
850 READ32(open_down->od_seqid); 844 READ32(open_down->od_seqid);
851 status = nfsd4_decode_share_access(argp, &open_down->od_share_access); 845 status = nfsd4_decode_share_access(argp, &open_down->od_share_access,
846 &open_down->od_deleg_want, NULL);
852 if (status) 847 if (status)
853 return status; 848 return status;
854 status = nfsd4_decode_share_deny(argp, &open_down->od_share_deny); 849 status = nfsd4_decode_share_deny(argp, &open_down->od_share_deny);
@@ -994,8 +989,8 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient
994{ 989{
995 DECODE_HEAD; 990 DECODE_HEAD;
996 991
997 READ_BUF(8); 992 READ_BUF(NFS4_VERIFIER_SIZE);
998 COPYMEM(setclientid->se_verf.data, 8); 993 COPYMEM(setclientid->se_verf.data, NFS4_VERIFIER_SIZE);
999 994
1000 status = nfsd4_decode_opaque(argp, &setclientid->se_name); 995 status = nfsd4_decode_opaque(argp, &setclientid->se_name);
1001 if (status) 996 if (status)
@@ -1020,9 +1015,9 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s
1020{ 1015{
1021 DECODE_HEAD; 1016 DECODE_HEAD;
1022 1017
1023 READ_BUF(8 + sizeof(nfs4_verifier)); 1018 READ_BUF(8 + NFS4_VERIFIER_SIZE);
1024 COPYMEM(&scd_c->sc_clientid, 8); 1019 COPYMEM(&scd_c->sc_clientid, 8);
1025 COPYMEM(&scd_c->sc_confirm, sizeof(nfs4_verifier)); 1020 COPYMEM(&scd_c->sc_confirm, NFS4_VERIFIER_SIZE);
1026 1021
1027 DECODE_TAIL; 1022 DECODE_TAIL;
1028} 1023}
@@ -1385,26 +1380,29 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
1385static __be32 1380static __be32
1386nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid) 1381nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid)
1387{ 1382{
1388 unsigned int nbytes;
1389 stateid_t si;
1390 int i; 1383 int i;
1391 __be32 *p; 1384 __be32 *p, status;
1392 __be32 status; 1385 struct nfsd4_test_stateid_id *stateid;
1393 1386
1394 READ_BUF(4); 1387 READ_BUF(4);
1395 test_stateid->ts_num_ids = ntohl(*p++); 1388 test_stateid->ts_num_ids = ntohl(*p++);
1396 1389
1397 nbytes = test_stateid->ts_num_ids * sizeof(stateid_t); 1390 INIT_LIST_HEAD(&test_stateid->ts_stateid_list);
1398 if (nbytes > (u32)((char *)argp->end - (char *)argp->p))
1399 goto xdr_error;
1400
1401 test_stateid->ts_saved_args = argp;
1402 save_buf(argp, &test_stateid->ts_savedp);
1403 1391
1404 for (i = 0; i < test_stateid->ts_num_ids; i++) { 1392 for (i = 0; i < test_stateid->ts_num_ids; i++) {
1405 status = nfsd4_decode_stateid(argp, &si); 1393 stateid = kmalloc(sizeof(struct nfsd4_test_stateid_id), GFP_KERNEL);
1394 if (!stateid) {
1395 status = nfserrno(-ENOMEM);
1396 goto out;
1397 }
1398
1399 defer_free(argp, kfree, stateid);
1400 INIT_LIST_HEAD(&stateid->ts_id_list);
1401 list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list);
1402
1403 status = nfsd4_decode_stateid(argp, &stateid->ts_id_stateid);
1406 if (status) 1404 if (status)
1407 return status; 1405 goto out;
1408 } 1406 }
1409 1407
1410 status = 0; 1408 status = 0;
@@ -2661,8 +2659,8 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
2661 __be32 *p; 2659 __be32 *p;
2662 2660
2663 if (!nfserr) { 2661 if (!nfserr) {
2664 RESERVE_SPACE(8); 2662 RESERVE_SPACE(NFS4_VERIFIER_SIZE);
2665 WRITEMEM(commit->co_verf.data, 8); 2663 WRITEMEM(commit->co_verf.data, NFS4_VERIFIER_SIZE);
2666 ADJUST_ARGS(); 2664 ADJUST_ARGS();
2667 } 2665 }
2668 return nfserr; 2666 return nfserr;
@@ -2851,6 +2849,20 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
2851 WRITE32(0); /* XXX: is NULL principal ok? */ 2849 WRITE32(0); /* XXX: is NULL principal ok? */
2852 ADJUST_ARGS(); 2850 ADJUST_ARGS();
2853 break; 2851 break;
2852 case NFS4_OPEN_DELEGATE_NONE_EXT: /* 4.1 */
2853 switch (open->op_why_no_deleg) {
2854 case WND4_CONTENTION:
2855 case WND4_RESOURCE:
2856 RESERVE_SPACE(8);
2857 WRITE32(open->op_why_no_deleg);
2858 WRITE32(0); /* deleg signaling not supported yet */
2859 break;
2860 default:
2861 RESERVE_SPACE(4);
2862 WRITE32(open->op_why_no_deleg);
2863 }
2864 ADJUST_ARGS();
2865 break;
2854 default: 2866 default:
2855 BUG(); 2867 BUG();
2856 } 2868 }
@@ -3008,7 +3020,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
3008 if (resp->xbuf->page_len) 3020 if (resp->xbuf->page_len)
3009 return nfserr_resource; 3021 return nfserr_resource;
3010 3022
3011 RESERVE_SPACE(8); /* verifier */ 3023 RESERVE_SPACE(NFS4_VERIFIER_SIZE);
3012 savep = p; 3024 savep = p;
3013 3025
3014 /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ 3026 /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
@@ -3209,9 +3221,9 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n
3209 __be32 *p; 3221 __be32 *p;
3210 3222
3211 if (!nfserr) { 3223 if (!nfserr) {
3212 RESERVE_SPACE(8 + sizeof(nfs4_verifier)); 3224 RESERVE_SPACE(8 + NFS4_VERIFIER_SIZE);
3213 WRITEMEM(&scd->se_clientid, 8); 3225 WRITEMEM(&scd->se_clientid, 8);
3214 WRITEMEM(&scd->se_confirm, sizeof(nfs4_verifier)); 3226 WRITEMEM(&scd->se_confirm, NFS4_VERIFIER_SIZE);
3215 ADJUST_ARGS(); 3227 ADJUST_ARGS();
3216 } 3228 }
3217 else if (nfserr == nfserr_clid_inuse) { 3229 else if (nfserr == nfserr_clid_inuse) {
@@ -3232,7 +3244,7 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w
3232 RESERVE_SPACE(16); 3244 RESERVE_SPACE(16);
3233 WRITE32(write->wr_bytes_written); 3245 WRITE32(write->wr_bytes_written);
3234 WRITE32(write->wr_how_written); 3246 WRITE32(write->wr_how_written);
3235 WRITEMEM(write->wr_verifier.data, 8); 3247 WRITEMEM(write->wr_verifier.data, NFS4_VERIFIER_SIZE);
3236 ADJUST_ARGS(); 3248 ADJUST_ARGS();
3237 } 3249 }
3238 return nfserr; 3250 return nfserr;
@@ -3391,30 +3403,17 @@ __be32
3391nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, int nfserr, 3403nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, int nfserr,
3392 struct nfsd4_test_stateid *test_stateid) 3404 struct nfsd4_test_stateid *test_stateid)
3393{ 3405{
3394 struct nfsd4_compoundargs *argp; 3406 struct nfsd4_test_stateid_id *stateid, *next;
3395 struct nfs4_client *cl = resp->cstate.session->se_client;
3396 stateid_t si;
3397 __be32 *p; 3407 __be32 *p;
3398 int i;
3399 int valid;
3400
3401 restore_buf(test_stateid->ts_saved_args, &test_stateid->ts_savedp);
3402 argp = test_stateid->ts_saved_args;
3403 3408
3404 RESERVE_SPACE(4); 3409 RESERVE_SPACE(4 + (4 * test_stateid->ts_num_ids));
3405 *p++ = htonl(test_stateid->ts_num_ids); 3410 *p++ = htonl(test_stateid->ts_num_ids);
3406 resp->p = p;
3407 3411
3408 nfs4_lock_state(); 3412 list_for_each_entry_safe(stateid, next, &test_stateid->ts_stateid_list, ts_id_list) {
3409 for (i = 0; i < test_stateid->ts_num_ids; i++) { 3413 *p++ = stateid->ts_id_status;
3410 nfsd4_decode_stateid(argp, &si);
3411 valid = nfs4_validate_stateid(cl, &si);
3412 RESERVE_SPACE(4);
3413 *p++ = htonl(valid);
3414 resp->p = p;
3415 } 3414 }
3416 nfs4_unlock_state();
3417 3415
3416 ADJUST_ARGS();
3418 return nfserr; 3417 return nfserr;
3419} 3418}
3420 3419
@@ -3532,7 +3531,7 @@ int nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad)
3532 if (length > session->se_fchannel.maxresp_sz) 3531 if (length > session->se_fchannel.maxresp_sz)
3533 return nfserr_rep_too_big; 3532 return nfserr_rep_too_big;
3534 3533
3535 if (slot->sl_cachethis == 1 && 3534 if ((slot->sl_flags & NFSD4_SLOT_CACHETHIS) &&
3536 length > session->se_fchannel.maxresp_cached) 3535 length > session->se_fchannel.maxresp_cached)
3537 return nfserr_rep_too_big_to_cache; 3536 return nfserr_rep_too_big_to_cache;
3538 3537
@@ -3656,8 +3655,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
3656 if (nfsd4_has_session(cs)) { 3655 if (nfsd4_has_session(cs)) {
3657 if (cs->status != nfserr_replay_cache) { 3656 if (cs->status != nfserr_replay_cache) {
3658 nfsd4_store_cache_entry(resp); 3657 nfsd4_store_cache_entry(resp);
3659 dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); 3658 cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE;
3660 cs->slot->sl_inuse = false;
3661 } 3659 }
3662 /* Renew the clientid on success and on replay */ 3660 /* Renew the clientid on success and on replay */
3663 release_session_client(cs->session); 3661 release_session_client(cs->session);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 748eda93ce5..2c53be6d357 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -13,12 +13,14 @@
13#include <linux/sunrpc/clnt.h> 13#include <linux/sunrpc/clnt.h>
14#include <linux/sunrpc/gss_api.h> 14#include <linux/sunrpc/gss_api.h>
15#include <linux/sunrpc/gss_krb5_enctypes.h> 15#include <linux/sunrpc/gss_krb5_enctypes.h>
16#include <linux/sunrpc/rpc_pipe_fs.h>
16#include <linux/module.h> 17#include <linux/module.h>
17 18
18#include "idmap.h" 19#include "idmap.h"
19#include "nfsd.h" 20#include "nfsd.h"
20#include "cache.h" 21#include "cache.h"
21#include "fault_inject.h" 22#include "fault_inject.h"
23#include "netns.h"
22 24
23/* 25/*
24 * We have a single directory with several nodes in it. 26 * We have a single directory with several nodes in it.
@@ -223,7 +225,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
223 if (qword_get(&buf, fo_path, size) < 0) 225 if (qword_get(&buf, fo_path, size) < 0)
224 return -EINVAL; 226 return -EINVAL;
225 227
226 if (rpc_pton(fo_path, size, sap, salen) == 0) 228 if (rpc_pton(&init_net, fo_path, size, sap, salen) == 0)
227 return -EINVAL; 229 return -EINVAL;
228 230
229 return nlmsvc_unlock_all_by_ip(sap); 231 return nlmsvc_unlock_all_by_ip(sap);
@@ -722,7 +724,7 @@ static ssize_t __write_ports_addxprt(char *buf)
722 nfsd_serv->sv_nrthreads--; 724 nfsd_serv->sv_nrthreads--;
723 return 0; 725 return 0;
724out_close: 726out_close:
725 xprt = svc_find_xprt(nfsd_serv, transport, PF_INET, port); 727 xprt = svc_find_xprt(nfsd_serv, transport, &init_net, PF_INET, port);
726 if (xprt != NULL) { 728 if (xprt != NULL) {
727 svc_close_xprt(xprt); 729 svc_close_xprt(xprt);
728 svc_xprt_put(xprt); 730 svc_xprt_put(xprt);
@@ -748,7 +750,7 @@ static ssize_t __write_ports_delxprt(char *buf)
748 if (port < 1 || port > USHRT_MAX || nfsd_serv == NULL) 750 if (port < 1 || port > USHRT_MAX || nfsd_serv == NULL)
749 return -EINVAL; 751 return -EINVAL;
750 752
751 xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port); 753 xprt = svc_find_xprt(nfsd_serv, transport, &init_net, AF_UNSPEC, port);
752 if (xprt == NULL) 754 if (xprt == NULL)
753 return -ENOTCONN; 755 return -ENOTCONN;
754 756
@@ -1124,14 +1126,26 @@ static int create_proc_exports_entry(void)
1124} 1126}
1125#endif 1127#endif
1126 1128
1129int nfsd_net_id;
1130static struct pernet_operations nfsd_net_ops = {
1131 .id = &nfsd_net_id,
1132 .size = sizeof(struct nfsd_net),
1133};
1134
1127static int __init init_nfsd(void) 1135static int __init init_nfsd(void)
1128{ 1136{
1129 int retval; 1137 int retval;
1130 printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); 1138 printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
1131 1139
1132 retval = nfsd4_init_slabs(); 1140 retval = register_cld_notifier();
1133 if (retval) 1141 if (retval)
1134 return retval; 1142 return retval;
1143 retval = register_pernet_subsys(&nfsd_net_ops);
1144 if (retval < 0)
1145 goto out_unregister_notifier;
1146 retval = nfsd4_init_slabs();
1147 if (retval)
1148 goto out_unregister_pernet;
1135 nfs4_state_init(); 1149 nfs4_state_init();
1136 retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ 1150 retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
1137 if (retval) 1151 if (retval)
@@ -1169,6 +1183,10 @@ out_free_stat:
1169 nfsd_fault_inject_cleanup(); 1183 nfsd_fault_inject_cleanup();
1170out_free_slabs: 1184out_free_slabs:
1171 nfsd4_free_slabs(); 1185 nfsd4_free_slabs();
1186out_unregister_pernet:
1187 unregister_pernet_subsys(&nfsd_net_ops);
1188out_unregister_notifier:
1189 unregister_cld_notifier();
1172 return retval; 1190 return retval;
1173} 1191}
1174 1192
@@ -1184,6 +1202,8 @@ static void __exit exit_nfsd(void)
1184 nfsd4_free_slabs(); 1202 nfsd4_free_slabs();
1185 nfsd_fault_inject_cleanup(); 1203 nfsd_fault_inject_cleanup();
1186 unregister_filesystem(&nfsd_fs_type); 1204 unregister_filesystem(&nfsd_fs_type);
1205 unregister_pernet_subsys(&nfsd_net_ops);
1206 unregister_cld_notifier();
1187} 1207}
1188 1208
1189MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); 1209MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 1d1e8589b4c..1671429ffa6 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -364,12 +364,17 @@ static inline u32 nfsd_suppattrs2(u32 minorversion)
364 NFSD_WRITEABLE_ATTRS_WORD2 364 NFSD_WRITEABLE_ATTRS_WORD2
365 365
366extern int nfsd4_is_junction(struct dentry *dentry); 366extern int nfsd4_is_junction(struct dentry *dentry);
367#else 367extern int register_cld_notifier(void);
368extern void unregister_cld_notifier(void);
369#else /* CONFIG_NFSD_V4 */
368static inline int nfsd4_is_junction(struct dentry *dentry) 370static inline int nfsd4_is_junction(struct dentry *dentry)
369{ 371{
370 return 0; 372 return 0;
371} 373}
372 374
375#define register_cld_notifier() 0
376#define unregister_cld_notifier() do { } while(0)
377
373#endif /* CONFIG_NFSD_V4 */ 378#endif /* CONFIG_NFSD_V4 */
374 379
375#endif /* LINUX_NFSD_NFSD_H */ 380#endif /* LINUX_NFSD_NFSD_H */
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index eda7d7e55e0..28dfad39f0c 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -251,13 +251,13 @@ static void nfsd_shutdown(void)
251 nfsd_up = false; 251 nfsd_up = false;
252} 252}
253 253
254static void nfsd_last_thread(struct svc_serv *serv) 254static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
255{ 255{
256 /* When last nfsd thread exits we need to do some clean-up */ 256 /* When last nfsd thread exits we need to do some clean-up */
257 nfsd_serv = NULL; 257 nfsd_serv = NULL;
258 nfsd_shutdown(); 258 nfsd_shutdown();
259 259
260 svc_rpcb_cleanup(serv); 260 svc_rpcb_cleanup(serv, net);
261 261
262 printk(KERN_WARNING "nfsd: last server has exited, flushing export " 262 printk(KERN_WARNING "nfsd: last server has exited, flushing export "
263 "cache\n"); 263 "cache\n");
@@ -307,33 +307,37 @@ static void set_max_drc(void)
307 dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem); 307 dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem);
308} 308}
309 309
310int nfsd_create_serv(void) 310static int nfsd_get_default_max_blksize(void)
311{ 311{
312 int err = 0; 312 struct sysinfo i;
313 unsigned long long target;
314 unsigned long ret;
315
316 si_meminfo(&i);
317 target = (i.totalram - i.totalhigh) << PAGE_SHIFT;
318 /*
319 * Aim for 1/4096 of memory per thread This gives 1MB on 4Gig
320 * machines, but only uses 32K on 128M machines. Bottom out at
321 * 8K on 32M and smaller. Of course, this is only a default.
322 */
323 target >>= 12;
324
325 ret = NFSSVC_MAXBLKSIZE;
326 while (ret > target && ret >= 8*1024*2)
327 ret /= 2;
328 return ret;
329}
313 330
331int nfsd_create_serv(void)
332{
314 WARN_ON(!mutex_is_locked(&nfsd_mutex)); 333 WARN_ON(!mutex_is_locked(&nfsd_mutex));
315 if (nfsd_serv) { 334 if (nfsd_serv) {
316 svc_get(nfsd_serv); 335 svc_get(nfsd_serv);
317 return 0; 336 return 0;
318 } 337 }
319 if (nfsd_max_blksize == 0) { 338 if (nfsd_max_blksize == 0)
320 /* choose a suitable default */ 339 nfsd_max_blksize = nfsd_get_default_max_blksize();
321 struct sysinfo i;
322 si_meminfo(&i);
323 /* Aim for 1/4096 of memory per thread
324 * This gives 1MB on 4Gig machines
325 * But only uses 32K on 128M machines.
326 * Bottom out at 8K on 32M and smaller.
327 * Of course, this is only a default.
328 */
329 nfsd_max_blksize = NFSSVC_MAXBLKSIZE;
330 i.totalram <<= PAGE_SHIFT - 12;
331 while (nfsd_max_blksize > i.totalram &&
332 nfsd_max_blksize >= 8*1024*2)
333 nfsd_max_blksize /= 2;
334 }
335 nfsd_reset_versions(); 340 nfsd_reset_versions();
336
337 nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, 341 nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
338 nfsd_last_thread, nfsd, THIS_MODULE); 342 nfsd_last_thread, nfsd, THIS_MODULE);
339 if (nfsd_serv == NULL) 343 if (nfsd_serv == NULL)
@@ -341,7 +345,7 @@ int nfsd_create_serv(void)
341 345
342 set_max_drc(); 346 set_max_drc();
343 do_gettimeofday(&nfssvc_boot); /* record boot time */ 347 do_gettimeofday(&nfssvc_boot); /* record boot time */
344 return err; 348 return 0;
345} 349}
346 350
347int nfsd_nrpools(void) 351int nfsd_nrpools(void)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index ffb5df1db94..89ab137d379 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -128,12 +128,14 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
128 (NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE) 128 (NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
129 129
130struct nfsd4_slot { 130struct nfsd4_slot {
131 bool sl_inuse;
132 bool sl_cachethis;
133 u16 sl_opcnt;
134 u32 sl_seqid; 131 u32 sl_seqid;
135 __be32 sl_status; 132 __be32 sl_status;
136 u32 sl_datalen; 133 u32 sl_datalen;
134 u16 sl_opcnt;
135#define NFSD4_SLOT_INUSE (1 << 0)
136#define NFSD4_SLOT_CACHETHIS (1 << 1)
137#define NFSD4_SLOT_INITIALIZED (1 << 2)
138 u8 sl_flags;
137 char sl_data[]; 139 char sl_data[];
138}; 140};
139 141
@@ -196,18 +198,7 @@ struct nfsd4_session {
196 struct nfsd4_slot *se_slots[]; /* forward channel slots */ 198 struct nfsd4_slot *se_slots[]; /* forward channel slots */
197}; 199};
198 200
199static inline void 201extern void nfsd4_put_session(struct nfsd4_session *ses);
200nfsd4_put_session(struct nfsd4_session *ses)
201{
202 extern void free_session(struct kref *kref);
203 kref_put(&ses->se_ref, free_session);
204}
205
206static inline void
207nfsd4_get_session(struct nfsd4_session *ses)
208{
209 kref_get(&ses->se_ref);
210}
211 202
212/* formatted contents of nfs4_sessionid */ 203/* formatted contents of nfs4_sessionid */
213struct nfsd4_sessionid { 204struct nfsd4_sessionid {
@@ -245,14 +236,17 @@ struct nfs4_client {
245 struct svc_cred cl_cred; /* setclientid principal */ 236 struct svc_cred cl_cred; /* setclientid principal */
246 clientid_t cl_clientid; /* generated by server */ 237 clientid_t cl_clientid; /* generated by server */
247 nfs4_verifier cl_confirm; /* generated by server */ 238 nfs4_verifier cl_confirm; /* generated by server */
248 u32 cl_firststate; /* recovery dir creation */
249 u32 cl_minorversion; 239 u32 cl_minorversion;
250 240
251 /* for v4.0 and v4.1 callbacks: */ 241 /* for v4.0 and v4.1 callbacks: */
252 struct nfs4_cb_conn cl_cb_conn; 242 struct nfs4_cb_conn cl_cb_conn;
253#define NFSD4_CLIENT_CB_UPDATE 1 243#define NFSD4_CLIENT_CB_UPDATE (0)
254#define NFSD4_CLIENT_KILL 2 244#define NFSD4_CLIENT_CB_KILL (1)
255 unsigned long cl_cb_flags; 245#define NFSD4_CLIENT_STABLE (2) /* client on stable storage */
246#define NFSD4_CLIENT_RECLAIM_COMPLETE (3) /* reclaim_complete done */
247#define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
248 1 << NFSD4_CLIENT_CB_KILL)
249 unsigned long cl_flags;
256 struct rpc_clnt *cl_cb_client; 250 struct rpc_clnt *cl_cb_client;
257 u32 cl_cb_ident; 251 u32 cl_cb_ident;
258#define NFSD4_CB_UP 0 252#define NFSD4_CB_UP 0
@@ -463,6 +457,8 @@ extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
463extern void nfs4_lock_state(void); 457extern void nfs4_lock_state(void);
464extern void nfs4_unlock_state(void); 458extern void nfs4_unlock_state(void);
465extern int nfs4_in_grace(void); 459extern int nfs4_in_grace(void);
460extern void nfs4_release_reclaim(void);
461extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(struct nfs4_client *crp);
466extern __be32 nfs4_check_open_reclaim(clientid_t *clid); 462extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
467extern void nfs4_free_openowner(struct nfs4_openowner *); 463extern void nfs4_free_openowner(struct nfs4_openowner *);
468extern void nfs4_free_lockowner(struct nfs4_lockowner *); 464extern void nfs4_free_lockowner(struct nfs4_lockowner *);
@@ -477,16 +473,17 @@ extern void nfsd4_destroy_callback_queue(void);
477extern void nfsd4_shutdown_callback(struct nfs4_client *); 473extern void nfsd4_shutdown_callback(struct nfs4_client *);
478extern void nfs4_put_delegation(struct nfs4_delegation *dp); 474extern void nfs4_put_delegation(struct nfs4_delegation *dp);
479extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname); 475extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
480extern void nfsd4_init_recdir(void);
481extern int nfsd4_recdir_load(void);
482extern void nfsd4_shutdown_recdir(void);
483extern int nfs4_client_to_reclaim(const char *name); 476extern int nfs4_client_to_reclaim(const char *name);
484extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id); 477extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
485extern void nfsd4_recdir_purge_old(void);
486extern void nfsd4_create_clid_dir(struct nfs4_client *clp);
487extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
488extern void release_session_client(struct nfsd4_session *); 478extern void release_session_client(struct nfsd4_session *);
489extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *); 479extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *);
490extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *); 480extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *);
491 481
482/* nfs4recover operations */
483extern int nfsd4_client_tracking_init(struct net *net);
484extern void nfsd4_client_tracking_exit(struct net *net);
485extern void nfsd4_client_record_create(struct nfs4_client *clp);
486extern void nfsd4_client_record_remove(struct nfs4_client *clp);
487extern int nfsd4_client_record_check(struct nfs4_client *clp);
488extern void nfsd4_record_grace_done(struct net *net, time_t boot_time);
492#endif /* NFSD4_STATE_H */ 489#endif /* NFSD4_STATE_H */
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index a2e2402b2af..6d4521feb6e 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -25,6 +25,7 @@
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/sunrpc/stats.h> 26#include <linux/sunrpc/stats.h>
27#include <linux/nfsd/stats.h> 27#include <linux/nfsd/stats.h>
28#include <net/net_namespace.h>
28 29
29#include "nfsd.h" 30#include "nfsd.h"
30 31
@@ -94,11 +95,11 @@ static const struct file_operations nfsd_proc_fops = {
94void 95void
95nfsd_stat_init(void) 96nfsd_stat_init(void)
96{ 97{
97 svc_proc_register(&nfsd_svcstats, &nfsd_proc_fops); 98 svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_fops);
98} 99}
99 100
100void 101void
101nfsd_stat_shutdown(void) 102nfsd_stat_shutdown(void)
102{ 103{
103 svc_proc_unregister("nfsd"); 104 svc_proc_unregister(&init_net, "nfsd");
104} 105}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index edf6d3ed877..568666156ea 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -737,12 +737,13 @@ static int nfsd_open_break_lease(struct inode *inode, int access)
737 737
738/* 738/*
739 * Open an existing file or directory. 739 * Open an existing file or directory.
740 * The access argument indicates the type of open (read/write/lock) 740 * The may_flags argument indicates the type of open (read/write/lock)
741 * and additional flags.
741 * N.B. After this call fhp needs an fh_put 742 * N.B. After this call fhp needs an fh_put
742 */ 743 */
743__be32 744__be32
744nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, 745nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
745 int access, struct file **filp) 746 int may_flags, struct file **filp)
746{ 747{
747 struct dentry *dentry; 748 struct dentry *dentry;
748 struct inode *inode; 749 struct inode *inode;
@@ -757,7 +758,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
757 * and (hopefully) checked permission - so allow OWNER_OVERRIDE 758 * and (hopefully) checked permission - so allow OWNER_OVERRIDE
758 * in case a chmod has now revoked permission. 759 * in case a chmod has now revoked permission.
759 */ 760 */
760 err = fh_verify(rqstp, fhp, type, access | NFSD_MAY_OWNER_OVERRIDE); 761 err = fh_verify(rqstp, fhp, type, may_flags | NFSD_MAY_OWNER_OVERRIDE);
761 if (err) 762 if (err)
762 goto out; 763 goto out;
763 764
@@ -768,7 +769,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
768 * or any access when mandatory locking enabled 769 * or any access when mandatory locking enabled
769 */ 770 */
770 err = nfserr_perm; 771 err = nfserr_perm;
771 if (IS_APPEND(inode) && (access & NFSD_MAY_WRITE)) 772 if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE))
772 goto out; 773 goto out;
773 /* 774 /*
774 * We must ignore files (but only files) which might have mandatory 775 * We must ignore files (but only files) which might have mandatory
@@ -781,12 +782,12 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
781 if (!inode->i_fop) 782 if (!inode->i_fop)
782 goto out; 783 goto out;
783 784
784 host_err = nfsd_open_break_lease(inode, access); 785 host_err = nfsd_open_break_lease(inode, may_flags);
785 if (host_err) /* NOMEM or WOULDBLOCK */ 786 if (host_err) /* NOMEM or WOULDBLOCK */
786 goto out_nfserr; 787 goto out_nfserr;
787 788
788 if (access & NFSD_MAY_WRITE) { 789 if (may_flags & NFSD_MAY_WRITE) {
789 if (access & NFSD_MAY_READ) 790 if (may_flags & NFSD_MAY_READ)
790 flags = O_RDWR|O_LARGEFILE; 791 flags = O_RDWR|O_LARGEFILE;
791 else 792 else
792 flags = O_WRONLY|O_LARGEFILE; 793 flags = O_WRONLY|O_LARGEFILE;
@@ -795,8 +796,15 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
795 flags, current_cred()); 796 flags, current_cred());
796 if (IS_ERR(*filp)) 797 if (IS_ERR(*filp))
797 host_err = PTR_ERR(*filp); 798 host_err = PTR_ERR(*filp);
798 else 799 else {
799 host_err = ima_file_check(*filp, access); 800 host_err = ima_file_check(*filp, may_flags);
801
802 if (may_flags & NFSD_MAY_64BIT_COOKIE)
803 (*filp)->f_mode |= FMODE_64BITHASH;
804 else
805 (*filp)->f_mode |= FMODE_32BITHASH;
806 }
807
800out_nfserr: 808out_nfserr:
801 err = nfserrno(host_err); 809 err = nfserrno(host_err);
802out: 810out:
@@ -1450,7 +1458,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1450 switch (createmode) { 1458 switch (createmode) {
1451 case NFS3_CREATE_UNCHECKED: 1459 case NFS3_CREATE_UNCHECKED:
1452 if (! S_ISREG(dchild->d_inode->i_mode)) 1460 if (! S_ISREG(dchild->d_inode->i_mode))
1453 err = nfserr_exist; 1461 goto out;
1454 else if (truncp) { 1462 else if (truncp) {
1455 /* in nfsv4, we need to treat this case a little 1463 /* in nfsv4, we need to treat this case a little
1456 * differently. we don't want to truncate the 1464 * differently. we don't want to truncate the
@@ -1541,30 +1549,31 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1541__be32 1549__be32
1542nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) 1550nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
1543{ 1551{
1544 struct dentry *dentry;
1545 struct inode *inode; 1552 struct inode *inode;
1546 mm_segment_t oldfs; 1553 mm_segment_t oldfs;
1547 __be32 err; 1554 __be32 err;
1548 int host_err; 1555 int host_err;
1556 struct path path;
1549 1557
1550 err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP); 1558 err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP);
1551 if (err) 1559 if (err)
1552 goto out; 1560 goto out;
1553 1561
1554 dentry = fhp->fh_dentry; 1562 path.mnt = fhp->fh_export->ex_path.mnt;
1555 inode = dentry->d_inode; 1563 path.dentry = fhp->fh_dentry;
1564 inode = path.dentry->d_inode;
1556 1565
1557 err = nfserr_inval; 1566 err = nfserr_inval;
1558 if (!inode->i_op->readlink) 1567 if (!inode->i_op->readlink)
1559 goto out; 1568 goto out;
1560 1569
1561 touch_atime(fhp->fh_export->ex_path.mnt, dentry); 1570 touch_atime(&path);
1562 /* N.B. Why does this call need a get_fs()?? 1571 /* N.B. Why does this call need a get_fs()??
1563 * Remove the set_fs and watch the fireworks:-) --okir 1572 * Remove the set_fs and watch the fireworks:-) --okir
1564 */ 1573 */
1565 1574
1566 oldfs = get_fs(); set_fs(KERNEL_DS); 1575 oldfs = get_fs(); set_fs(KERNEL_DS);
1567 host_err = inode->i_op->readlink(dentry, buf, *lenp); 1576 host_err = inode->i_op->readlink(path.dentry, buf, *lenp);
1568 set_fs(oldfs); 1577 set_fs(oldfs);
1569 1578
1570 if (host_err < 0) 1579 if (host_err < 0)
@@ -2020,8 +2029,13 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
2020 __be32 err; 2029 __be32 err;
2021 struct file *file; 2030 struct file *file;
2022 loff_t offset = *offsetp; 2031 loff_t offset = *offsetp;
2032 int may_flags = NFSD_MAY_READ;
2033
2034 /* NFSv2 only supports 32 bit cookies */
2035 if (rqstp->rq_vers > 2)
2036 may_flags |= NFSD_MAY_64BIT_COOKIE;
2023 2037
2024 err = nfsd_open(rqstp, fhp, S_IFDIR, NFSD_MAY_READ, &file); 2038 err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file);
2025 if (err) 2039 if (err)
2026 goto out; 2040 goto out;
2027 2041
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 1dcd238e11a..ec0611b2b73 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -27,6 +27,8 @@
27#define NFSD_MAY_BYPASS_GSS 0x400 27#define NFSD_MAY_BYPASS_GSS 0x400
28#define NFSD_MAY_READ_IF_EXEC 0x800 28#define NFSD_MAY_READ_IF_EXEC 0x800
29 29
30#define NFSD_MAY_64BIT_COOKIE 0x1000 /* 64 bit readdir cookies for >= NFSv3 */
31
30#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) 32#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
31#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) 33#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
32 34
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 2364747ee97..1b3501598ab 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -43,6 +43,13 @@
43#define NFSD4_MAX_TAGLEN 128 43#define NFSD4_MAX_TAGLEN 128
44#define XDR_LEN(n) (((n) + 3) & ~3) 44#define XDR_LEN(n) (((n) + 3) & ~3)
45 45
46#define CURRENT_STATE_ID_FLAG (1<<0)
47#define SAVED_STATE_ID_FLAG (1<<1)
48
49#define SET_STATE_ID(c, f) ((c)->sid_flags |= (f))
50#define HAS_STATE_ID(c, f) ((c)->sid_flags & (f))
51#define CLEAR_STATE_ID(c, f) ((c)->sid_flags &= ~(f))
52
46struct nfsd4_compound_state { 53struct nfsd4_compound_state {
47 struct svc_fh current_fh; 54 struct svc_fh current_fh;
48 struct svc_fh save_fh; 55 struct svc_fh save_fh;
@@ -54,6 +61,10 @@ struct nfsd4_compound_state {
54 size_t iovlen; 61 size_t iovlen;
55 u32 minorversion; 62 u32 minorversion;
56 u32 status; 63 u32 status;
64 stateid_t current_stateid;
65 stateid_t save_stateid;
66 /* to indicate current and saved state id presents */
67 u32 sid_flags;
57}; 68};
58 69
59static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs) 70static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs)
@@ -212,16 +223,19 @@ struct nfsd4_open {
212 struct xdr_netobj op_fname; /* request - everything but CLAIM_PREV */ 223 struct xdr_netobj op_fname; /* request - everything but CLAIM_PREV */
213 u32 op_delegate_type; /* request - CLAIM_PREV only */ 224 u32 op_delegate_type; /* request - CLAIM_PREV only */
214 stateid_t op_delegate_stateid; /* request - response */ 225 stateid_t op_delegate_stateid; /* request - response */
226 u32 op_why_no_deleg; /* response - DELEG_NONE_EXT only */
215 u32 op_create; /* request */ 227 u32 op_create; /* request */
216 u32 op_createmode; /* request */ 228 u32 op_createmode; /* request */
217 u32 op_bmval[3]; /* request */ 229 u32 op_bmval[3]; /* request */
218 struct iattr iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */ 230 struct iattr iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */
219 nfs4_verifier verf; /* EXCLUSIVE4 */ 231 nfs4_verifier op_verf __attribute__((aligned(32)));
232 /* EXCLUSIVE4 */
220 clientid_t op_clientid; /* request */ 233 clientid_t op_clientid; /* request */
221 struct xdr_netobj op_owner; /* request */ 234 struct xdr_netobj op_owner; /* request */
222 u32 op_seqid; /* request */ 235 u32 op_seqid; /* request */
223 u32 op_share_access; /* request */ 236 u32 op_share_access; /* request */
224 u32 op_share_deny; /* request */ 237 u32 op_share_deny; /* request */
238 u32 op_deleg_want; /* request */
225 stateid_t op_stateid; /* response */ 239 stateid_t op_stateid; /* response */
226 u32 op_recall; /* recall */ 240 u32 op_recall; /* recall */
227 struct nfsd4_change_info op_cinfo; /* response */ 241 struct nfsd4_change_info op_cinfo; /* response */
@@ -234,7 +248,6 @@ struct nfsd4_open {
234 struct nfs4_acl *op_acl; 248 struct nfs4_acl *op_acl;
235}; 249};
236#define op_iattr iattr 250#define op_iattr iattr
237#define op_verf verf
238 251
239struct nfsd4_open_confirm { 252struct nfsd4_open_confirm {
240 stateid_t oc_req_stateid /* request */; 253 stateid_t oc_req_stateid /* request */;
@@ -245,8 +258,9 @@ struct nfsd4_open_confirm {
245struct nfsd4_open_downgrade { 258struct nfsd4_open_downgrade {
246 stateid_t od_stateid; 259 stateid_t od_stateid;
247 u32 od_seqid; 260 u32 od_seqid;
248 u32 od_share_access; 261 u32 od_share_access; /* request */
249 u32 od_share_deny; 262 u32 od_deleg_want; /* request */
263 u32 od_share_deny; /* request */
250}; 264};
251 265
252 266
@@ -343,10 +357,15 @@ struct nfsd4_saved_compoundargs {
343 struct page **pagelist; 357 struct page **pagelist;
344}; 358};
345 359
360struct nfsd4_test_stateid_id {
361 __be32 ts_id_status;
362 stateid_t ts_id_stateid;
363 struct list_head ts_id_list;
364};
365
346struct nfsd4_test_stateid { 366struct nfsd4_test_stateid {
347 __be32 ts_num_ids; 367 __be32 ts_num_ids;
348 struct nfsd4_compoundargs *ts_saved_args; 368 struct list_head ts_stateid_list;
349 struct nfsd4_saved_compoundargs ts_savedp;
350}; 369};
351 370
352struct nfsd4_free_stateid { 371struct nfsd4_free_stateid {
@@ -503,7 +522,8 @@ static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
503 522
504static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) 523static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
505{ 524{
506 return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp); 525 return !(resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS)
526 || nfsd4_is_solo_sequence(resp);
507} 527}
508 528
509#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs) 529#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs)
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index c9b342c8b50..dab5c4c6dfa 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -218,11 +218,11 @@ int nilfs_cpfile_get_checkpoint(struct inode *cpfile,
218 kaddr, 1); 218 kaddr, 1);
219 mark_buffer_dirty(cp_bh); 219 mark_buffer_dirty(cp_bh);
220 220
221 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 221 kaddr = kmap_atomic(header_bh->b_page);
222 header = nilfs_cpfile_block_get_header(cpfile, header_bh, 222 header = nilfs_cpfile_block_get_header(cpfile, header_bh,
223 kaddr); 223 kaddr);
224 le64_add_cpu(&header->ch_ncheckpoints, 1); 224 le64_add_cpu(&header->ch_ncheckpoints, 1);
225 kunmap_atomic(kaddr, KM_USER0); 225 kunmap_atomic(kaddr);
226 mark_buffer_dirty(header_bh); 226 mark_buffer_dirty(header_bh);
227 nilfs_mdt_mark_dirty(cpfile); 227 nilfs_mdt_mark_dirty(cpfile);
228 } 228 }
@@ -313,7 +313,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
313 continue; 313 continue;
314 } 314 }
315 315
316 kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); 316 kaddr = kmap_atomic(cp_bh->b_page);
317 cp = nilfs_cpfile_block_get_checkpoint( 317 cp = nilfs_cpfile_block_get_checkpoint(
318 cpfile, cno, cp_bh, kaddr); 318 cpfile, cno, cp_bh, kaddr);
319 nicps = 0; 319 nicps = 0;
@@ -334,7 +334,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
334 cpfile, cp_bh, kaddr, nicps); 334 cpfile, cp_bh, kaddr, nicps);
335 if (count == 0) { 335 if (count == 0) {
336 /* make hole */ 336 /* make hole */
337 kunmap_atomic(kaddr, KM_USER0); 337 kunmap_atomic(kaddr);
338 brelse(cp_bh); 338 brelse(cp_bh);
339 ret = 339 ret =
340 nilfs_cpfile_delete_checkpoint_block( 340 nilfs_cpfile_delete_checkpoint_block(
@@ -349,18 +349,18 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
349 } 349 }
350 } 350 }
351 351
352 kunmap_atomic(kaddr, KM_USER0); 352 kunmap_atomic(kaddr);
353 brelse(cp_bh); 353 brelse(cp_bh);
354 } 354 }
355 355
356 if (tnicps > 0) { 356 if (tnicps > 0) {
357 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 357 kaddr = kmap_atomic(header_bh->b_page);
358 header = nilfs_cpfile_block_get_header(cpfile, header_bh, 358 header = nilfs_cpfile_block_get_header(cpfile, header_bh,
359 kaddr); 359 kaddr);
360 le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps); 360 le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps);
361 mark_buffer_dirty(header_bh); 361 mark_buffer_dirty(header_bh);
362 nilfs_mdt_mark_dirty(cpfile); 362 nilfs_mdt_mark_dirty(cpfile);
363 kunmap_atomic(kaddr, KM_USER0); 363 kunmap_atomic(kaddr);
364 } 364 }
365 365
366 brelse(header_bh); 366 brelse(header_bh);
@@ -408,7 +408,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
408 continue; /* skip hole */ 408 continue; /* skip hole */
409 } 409 }
410 410
411 kaddr = kmap_atomic(bh->b_page, KM_USER0); 411 kaddr = kmap_atomic(bh->b_page);
412 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); 412 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
413 for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) { 413 for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) {
414 if (!nilfs_checkpoint_invalid(cp)) { 414 if (!nilfs_checkpoint_invalid(cp)) {
@@ -418,7 +418,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
418 n++; 418 n++;
419 } 419 }
420 } 420 }
421 kunmap_atomic(kaddr, KM_USER0); 421 kunmap_atomic(kaddr);
422 brelse(bh); 422 brelse(bh);
423 } 423 }
424 424
@@ -451,10 +451,10 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
451 ret = nilfs_cpfile_get_header_block(cpfile, &bh); 451 ret = nilfs_cpfile_get_header_block(cpfile, &bh);
452 if (ret < 0) 452 if (ret < 0)
453 goto out; 453 goto out;
454 kaddr = kmap_atomic(bh->b_page, KM_USER0); 454 kaddr = kmap_atomic(bh->b_page);
455 header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); 455 header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
456 curr = le64_to_cpu(header->ch_snapshot_list.ssl_next); 456 curr = le64_to_cpu(header->ch_snapshot_list.ssl_next);
457 kunmap_atomic(kaddr, KM_USER0); 457 kunmap_atomic(kaddr);
458 brelse(bh); 458 brelse(bh);
459 if (curr == 0) { 459 if (curr == 0) {
460 ret = 0; 460 ret = 0;
@@ -472,7 +472,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
472 ret = 0; /* No snapshots (started from a hole block) */ 472 ret = 0; /* No snapshots (started from a hole block) */
473 goto out; 473 goto out;
474 } 474 }
475 kaddr = kmap_atomic(bh->b_page, KM_USER0); 475 kaddr = kmap_atomic(bh->b_page);
476 while (n < nci) { 476 while (n < nci) {
477 cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr); 477 cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr);
478 curr = ~(__u64)0; /* Terminator */ 478 curr = ~(__u64)0; /* Terminator */
@@ -488,7 +488,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
488 488
489 next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next); 489 next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next);
490 if (curr_blkoff != next_blkoff) { 490 if (curr_blkoff != next_blkoff) {
491 kunmap_atomic(kaddr, KM_USER0); 491 kunmap_atomic(kaddr);
492 brelse(bh); 492 brelse(bh);
493 ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, 493 ret = nilfs_cpfile_get_checkpoint_block(cpfile, next,
494 0, &bh); 494 0, &bh);
@@ -496,12 +496,12 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
496 WARN_ON(ret == -ENOENT); 496 WARN_ON(ret == -ENOENT);
497 goto out; 497 goto out;
498 } 498 }
499 kaddr = kmap_atomic(bh->b_page, KM_USER0); 499 kaddr = kmap_atomic(bh->b_page);
500 } 500 }
501 curr = next; 501 curr = next;
502 curr_blkoff = next_blkoff; 502 curr_blkoff = next_blkoff;
503 } 503 }
504 kunmap_atomic(kaddr, KM_USER0); 504 kunmap_atomic(kaddr);
505 brelse(bh); 505 brelse(bh);
506 *cnop = curr; 506 *cnop = curr;
507 ret = n; 507 ret = n;
@@ -592,24 +592,24 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
592 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); 592 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
593 if (ret < 0) 593 if (ret < 0)
594 goto out_sem; 594 goto out_sem;
595 kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); 595 kaddr = kmap_atomic(cp_bh->b_page);
596 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); 596 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
597 if (nilfs_checkpoint_invalid(cp)) { 597 if (nilfs_checkpoint_invalid(cp)) {
598 ret = -ENOENT; 598 ret = -ENOENT;
599 kunmap_atomic(kaddr, KM_USER0); 599 kunmap_atomic(kaddr);
600 goto out_cp; 600 goto out_cp;
601 } 601 }
602 if (nilfs_checkpoint_snapshot(cp)) { 602 if (nilfs_checkpoint_snapshot(cp)) {
603 ret = 0; 603 ret = 0;
604 kunmap_atomic(kaddr, KM_USER0); 604 kunmap_atomic(kaddr);
605 goto out_cp; 605 goto out_cp;
606 } 606 }
607 kunmap_atomic(kaddr, KM_USER0); 607 kunmap_atomic(kaddr);
608 608
609 ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); 609 ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
610 if (ret < 0) 610 if (ret < 0)
611 goto out_cp; 611 goto out_cp;
612 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 612 kaddr = kmap_atomic(header_bh->b_page);
613 header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); 613 header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
614 list = &header->ch_snapshot_list; 614 list = &header->ch_snapshot_list;
615 curr_bh = header_bh; 615 curr_bh = header_bh;
@@ -621,13 +621,13 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
621 prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev); 621 prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev);
622 curr = prev; 622 curr = prev;
623 if (curr_blkoff != prev_blkoff) { 623 if (curr_blkoff != prev_blkoff) {
624 kunmap_atomic(kaddr, KM_USER0); 624 kunmap_atomic(kaddr);
625 brelse(curr_bh); 625 brelse(curr_bh);
626 ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr, 626 ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr,
627 0, &curr_bh); 627 0, &curr_bh);
628 if (ret < 0) 628 if (ret < 0)
629 goto out_header; 629 goto out_header;
630 kaddr = kmap_atomic(curr_bh->b_page, KM_USER0); 630 kaddr = kmap_atomic(curr_bh->b_page);
631 } 631 }
632 curr_blkoff = prev_blkoff; 632 curr_blkoff = prev_blkoff;
633 cp = nilfs_cpfile_block_get_checkpoint( 633 cp = nilfs_cpfile_block_get_checkpoint(
@@ -635,7 +635,7 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
635 list = &cp->cp_snapshot_list; 635 list = &cp->cp_snapshot_list;
636 prev = le64_to_cpu(list->ssl_prev); 636 prev = le64_to_cpu(list->ssl_prev);
637 } 637 }
638 kunmap_atomic(kaddr, KM_USER0); 638 kunmap_atomic(kaddr);
639 639
640 if (prev != 0) { 640 if (prev != 0) {
641 ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0, 641 ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0,
@@ -647,29 +647,29 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
647 get_bh(prev_bh); 647 get_bh(prev_bh);
648 } 648 }
649 649
650 kaddr = kmap_atomic(curr_bh->b_page, KM_USER0); 650 kaddr = kmap_atomic(curr_bh->b_page);
651 list = nilfs_cpfile_block_get_snapshot_list( 651 list = nilfs_cpfile_block_get_snapshot_list(
652 cpfile, curr, curr_bh, kaddr); 652 cpfile, curr, curr_bh, kaddr);
653 list->ssl_prev = cpu_to_le64(cno); 653 list->ssl_prev = cpu_to_le64(cno);
654 kunmap_atomic(kaddr, KM_USER0); 654 kunmap_atomic(kaddr);
655 655
656 kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); 656 kaddr = kmap_atomic(cp_bh->b_page);
657 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); 657 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
658 cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr); 658 cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr);
659 cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev); 659 cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev);
660 nilfs_checkpoint_set_snapshot(cp); 660 nilfs_checkpoint_set_snapshot(cp);
661 kunmap_atomic(kaddr, KM_USER0); 661 kunmap_atomic(kaddr);
662 662
663 kaddr = kmap_atomic(prev_bh->b_page, KM_USER0); 663 kaddr = kmap_atomic(prev_bh->b_page);
664 list = nilfs_cpfile_block_get_snapshot_list( 664 list = nilfs_cpfile_block_get_snapshot_list(
665 cpfile, prev, prev_bh, kaddr); 665 cpfile, prev, prev_bh, kaddr);
666 list->ssl_next = cpu_to_le64(cno); 666 list->ssl_next = cpu_to_le64(cno);
667 kunmap_atomic(kaddr, KM_USER0); 667 kunmap_atomic(kaddr);
668 668
669 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 669 kaddr = kmap_atomic(header_bh->b_page);
670 header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); 670 header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
671 le64_add_cpu(&header->ch_nsnapshots, 1); 671 le64_add_cpu(&header->ch_nsnapshots, 1);
672 kunmap_atomic(kaddr, KM_USER0); 672 kunmap_atomic(kaddr);
673 673
674 mark_buffer_dirty(prev_bh); 674 mark_buffer_dirty(prev_bh);
675 mark_buffer_dirty(curr_bh); 675 mark_buffer_dirty(curr_bh);
@@ -710,23 +710,23 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
710 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); 710 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
711 if (ret < 0) 711 if (ret < 0)
712 goto out_sem; 712 goto out_sem;
713 kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); 713 kaddr = kmap_atomic(cp_bh->b_page);
714 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); 714 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
715 if (nilfs_checkpoint_invalid(cp)) { 715 if (nilfs_checkpoint_invalid(cp)) {
716 ret = -ENOENT; 716 ret = -ENOENT;
717 kunmap_atomic(kaddr, KM_USER0); 717 kunmap_atomic(kaddr);
718 goto out_cp; 718 goto out_cp;
719 } 719 }
720 if (!nilfs_checkpoint_snapshot(cp)) { 720 if (!nilfs_checkpoint_snapshot(cp)) {
721 ret = 0; 721 ret = 0;
722 kunmap_atomic(kaddr, KM_USER0); 722 kunmap_atomic(kaddr);
723 goto out_cp; 723 goto out_cp;
724 } 724 }
725 725
726 list = &cp->cp_snapshot_list; 726 list = &cp->cp_snapshot_list;
727 next = le64_to_cpu(list->ssl_next); 727 next = le64_to_cpu(list->ssl_next);
728 prev = le64_to_cpu(list->ssl_prev); 728 prev = le64_to_cpu(list->ssl_prev);
729 kunmap_atomic(kaddr, KM_USER0); 729 kunmap_atomic(kaddr);
730 730
731 ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); 731 ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
732 if (ret < 0) 732 if (ret < 0)
@@ -750,29 +750,29 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
750 get_bh(prev_bh); 750 get_bh(prev_bh);
751 } 751 }
752 752
753 kaddr = kmap_atomic(next_bh->b_page, KM_USER0); 753 kaddr = kmap_atomic(next_bh->b_page);
754 list = nilfs_cpfile_block_get_snapshot_list( 754 list = nilfs_cpfile_block_get_snapshot_list(
755 cpfile, next, next_bh, kaddr); 755 cpfile, next, next_bh, kaddr);
756 list->ssl_prev = cpu_to_le64(prev); 756 list->ssl_prev = cpu_to_le64(prev);
757 kunmap_atomic(kaddr, KM_USER0); 757 kunmap_atomic(kaddr);
758 758
759 kaddr = kmap_atomic(prev_bh->b_page, KM_USER0); 759 kaddr = kmap_atomic(prev_bh->b_page);
760 list = nilfs_cpfile_block_get_snapshot_list( 760 list = nilfs_cpfile_block_get_snapshot_list(
761 cpfile, prev, prev_bh, kaddr); 761 cpfile, prev, prev_bh, kaddr);
762 list->ssl_next = cpu_to_le64(next); 762 list->ssl_next = cpu_to_le64(next);
763 kunmap_atomic(kaddr, KM_USER0); 763 kunmap_atomic(kaddr);
764 764
765 kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); 765 kaddr = kmap_atomic(cp_bh->b_page);
766 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); 766 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
767 cp->cp_snapshot_list.ssl_next = cpu_to_le64(0); 767 cp->cp_snapshot_list.ssl_next = cpu_to_le64(0);
768 cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0); 768 cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0);
769 nilfs_checkpoint_clear_snapshot(cp); 769 nilfs_checkpoint_clear_snapshot(cp);
770 kunmap_atomic(kaddr, KM_USER0); 770 kunmap_atomic(kaddr);
771 771
772 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 772 kaddr = kmap_atomic(header_bh->b_page);
773 header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); 773 header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
774 le64_add_cpu(&header->ch_nsnapshots, -1); 774 le64_add_cpu(&header->ch_nsnapshots, -1);
775 kunmap_atomic(kaddr, KM_USER0); 775 kunmap_atomic(kaddr);
776 776
777 mark_buffer_dirty(next_bh); 777 mark_buffer_dirty(next_bh);
778 mark_buffer_dirty(prev_bh); 778 mark_buffer_dirty(prev_bh);
@@ -829,13 +829,13 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
829 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); 829 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh);
830 if (ret < 0) 830 if (ret < 0)
831 goto out; 831 goto out;
832 kaddr = kmap_atomic(bh->b_page, KM_USER0); 832 kaddr = kmap_atomic(bh->b_page);
833 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); 833 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
834 if (nilfs_checkpoint_invalid(cp)) 834 if (nilfs_checkpoint_invalid(cp))
835 ret = -ENOENT; 835 ret = -ENOENT;
836 else 836 else
837 ret = nilfs_checkpoint_snapshot(cp); 837 ret = nilfs_checkpoint_snapshot(cp);
838 kunmap_atomic(kaddr, KM_USER0); 838 kunmap_atomic(kaddr);
839 brelse(bh); 839 brelse(bh);
840 840
841 out: 841 out:
@@ -912,12 +912,12 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat)
912 ret = nilfs_cpfile_get_header_block(cpfile, &bh); 912 ret = nilfs_cpfile_get_header_block(cpfile, &bh);
913 if (ret < 0) 913 if (ret < 0)
914 goto out_sem; 914 goto out_sem;
915 kaddr = kmap_atomic(bh->b_page, KM_USER0); 915 kaddr = kmap_atomic(bh->b_page);
916 header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); 916 header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
917 cpstat->cs_cno = nilfs_mdt_cno(cpfile); 917 cpstat->cs_cno = nilfs_mdt_cno(cpfile);
918 cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints); 918 cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints);
919 cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots); 919 cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots);
920 kunmap_atomic(kaddr, KM_USER0); 920 kunmap_atomic(kaddr);
921 brelse(bh); 921 brelse(bh);
922 922
923 out_sem: 923 out_sem:
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index fcc2f869af1..b5c13f3576b 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -85,13 +85,13 @@ void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req)
85 struct nilfs_dat_entry *entry; 85 struct nilfs_dat_entry *entry;
86 void *kaddr; 86 void *kaddr;
87 87
88 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); 88 kaddr = kmap_atomic(req->pr_entry_bh->b_page);
89 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 89 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
90 req->pr_entry_bh, kaddr); 90 req->pr_entry_bh, kaddr);
91 entry->de_start = cpu_to_le64(NILFS_CNO_MIN); 91 entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
92 entry->de_end = cpu_to_le64(NILFS_CNO_MAX); 92 entry->de_end = cpu_to_le64(NILFS_CNO_MAX);
93 entry->de_blocknr = cpu_to_le64(0); 93 entry->de_blocknr = cpu_to_le64(0);
94 kunmap_atomic(kaddr, KM_USER0); 94 kunmap_atomic(kaddr);
95 95
96 nilfs_palloc_commit_alloc_entry(dat, req); 96 nilfs_palloc_commit_alloc_entry(dat, req);
97 nilfs_dat_commit_entry(dat, req); 97 nilfs_dat_commit_entry(dat, req);
@@ -109,13 +109,13 @@ static void nilfs_dat_commit_free(struct inode *dat,
109 struct nilfs_dat_entry *entry; 109 struct nilfs_dat_entry *entry;
110 void *kaddr; 110 void *kaddr;
111 111
112 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); 112 kaddr = kmap_atomic(req->pr_entry_bh->b_page);
113 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 113 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
114 req->pr_entry_bh, kaddr); 114 req->pr_entry_bh, kaddr);
115 entry->de_start = cpu_to_le64(NILFS_CNO_MIN); 115 entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
116 entry->de_end = cpu_to_le64(NILFS_CNO_MIN); 116 entry->de_end = cpu_to_le64(NILFS_CNO_MIN);
117 entry->de_blocknr = cpu_to_le64(0); 117 entry->de_blocknr = cpu_to_le64(0);
118 kunmap_atomic(kaddr, KM_USER0); 118 kunmap_atomic(kaddr);
119 119
120 nilfs_dat_commit_entry(dat, req); 120 nilfs_dat_commit_entry(dat, req);
121 nilfs_palloc_commit_free_entry(dat, req); 121 nilfs_palloc_commit_free_entry(dat, req);
@@ -136,12 +136,12 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
136 struct nilfs_dat_entry *entry; 136 struct nilfs_dat_entry *entry;
137 void *kaddr; 137 void *kaddr;
138 138
139 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); 139 kaddr = kmap_atomic(req->pr_entry_bh->b_page);
140 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 140 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
141 req->pr_entry_bh, kaddr); 141 req->pr_entry_bh, kaddr);
142 entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); 142 entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat));
143 entry->de_blocknr = cpu_to_le64(blocknr); 143 entry->de_blocknr = cpu_to_le64(blocknr);
144 kunmap_atomic(kaddr, KM_USER0); 144 kunmap_atomic(kaddr);
145 145
146 nilfs_dat_commit_entry(dat, req); 146 nilfs_dat_commit_entry(dat, req);
147} 147}
@@ -160,12 +160,12 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
160 return ret; 160 return ret;
161 } 161 }
162 162
163 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); 163 kaddr = kmap_atomic(req->pr_entry_bh->b_page);
164 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 164 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
165 req->pr_entry_bh, kaddr); 165 req->pr_entry_bh, kaddr);
166 start = le64_to_cpu(entry->de_start); 166 start = le64_to_cpu(entry->de_start);
167 blocknr = le64_to_cpu(entry->de_blocknr); 167 blocknr = le64_to_cpu(entry->de_blocknr);
168 kunmap_atomic(kaddr, KM_USER0); 168 kunmap_atomic(kaddr);
169 169
170 if (blocknr == 0) { 170 if (blocknr == 0) {
171 ret = nilfs_palloc_prepare_free_entry(dat, req); 171 ret = nilfs_palloc_prepare_free_entry(dat, req);
@@ -186,7 +186,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req,
186 sector_t blocknr; 186 sector_t blocknr;
187 void *kaddr; 187 void *kaddr;
188 188
189 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); 189 kaddr = kmap_atomic(req->pr_entry_bh->b_page);
190 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 190 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
191 req->pr_entry_bh, kaddr); 191 req->pr_entry_bh, kaddr);
192 end = start = le64_to_cpu(entry->de_start); 192 end = start = le64_to_cpu(entry->de_start);
@@ -196,7 +196,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req,
196 } 196 }
197 entry->de_end = cpu_to_le64(end); 197 entry->de_end = cpu_to_le64(end);
198 blocknr = le64_to_cpu(entry->de_blocknr); 198 blocknr = le64_to_cpu(entry->de_blocknr);
199 kunmap_atomic(kaddr, KM_USER0); 199 kunmap_atomic(kaddr);
200 200
201 if (blocknr == 0) 201 if (blocknr == 0)
202 nilfs_dat_commit_free(dat, req); 202 nilfs_dat_commit_free(dat, req);
@@ -211,12 +211,12 @@ void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req)
211 sector_t blocknr; 211 sector_t blocknr;
212 void *kaddr; 212 void *kaddr;
213 213
214 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); 214 kaddr = kmap_atomic(req->pr_entry_bh->b_page);
215 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 215 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
216 req->pr_entry_bh, kaddr); 216 req->pr_entry_bh, kaddr);
217 start = le64_to_cpu(entry->de_start); 217 start = le64_to_cpu(entry->de_start);
218 blocknr = le64_to_cpu(entry->de_blocknr); 218 blocknr = le64_to_cpu(entry->de_blocknr);
219 kunmap_atomic(kaddr, KM_USER0); 219 kunmap_atomic(kaddr);
220 220
221 if (start == nilfs_mdt_cno(dat) && blocknr == 0) 221 if (start == nilfs_mdt_cno(dat) && blocknr == 0)
222 nilfs_palloc_abort_free_entry(dat, req); 222 nilfs_palloc_abort_free_entry(dat, req);
@@ -346,20 +346,20 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
346 } 346 }
347 } 347 }
348 348
349 kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); 349 kaddr = kmap_atomic(entry_bh->b_page);
350 entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); 350 entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
351 if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { 351 if (unlikely(entry->de_blocknr == cpu_to_le64(0))) {
352 printk(KERN_CRIT "%s: vbn = %llu, [%llu, %llu)\n", __func__, 352 printk(KERN_CRIT "%s: vbn = %llu, [%llu, %llu)\n", __func__,
353 (unsigned long long)vblocknr, 353 (unsigned long long)vblocknr,
354 (unsigned long long)le64_to_cpu(entry->de_start), 354 (unsigned long long)le64_to_cpu(entry->de_start),
355 (unsigned long long)le64_to_cpu(entry->de_end)); 355 (unsigned long long)le64_to_cpu(entry->de_end));
356 kunmap_atomic(kaddr, KM_USER0); 356 kunmap_atomic(kaddr);
357 brelse(entry_bh); 357 brelse(entry_bh);
358 return -EINVAL; 358 return -EINVAL;
359 } 359 }
360 WARN_ON(blocknr == 0); 360 WARN_ON(blocknr == 0);
361 entry->de_blocknr = cpu_to_le64(blocknr); 361 entry->de_blocknr = cpu_to_le64(blocknr);
362 kunmap_atomic(kaddr, KM_USER0); 362 kunmap_atomic(kaddr);
363 363
364 mark_buffer_dirty(entry_bh); 364 mark_buffer_dirty(entry_bh);
365 nilfs_mdt_mark_dirty(dat); 365 nilfs_mdt_mark_dirty(dat);
@@ -409,7 +409,7 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp)
409 } 409 }
410 } 410 }
411 411
412 kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); 412 kaddr = kmap_atomic(entry_bh->b_page);
413 entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); 413 entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
414 blocknr = le64_to_cpu(entry->de_blocknr); 414 blocknr = le64_to_cpu(entry->de_blocknr);
415 if (blocknr == 0) { 415 if (blocknr == 0) {
@@ -419,7 +419,7 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp)
419 *blocknrp = blocknr; 419 *blocknrp = blocknr;
420 420
421 out: 421 out:
422 kunmap_atomic(kaddr, KM_USER0); 422 kunmap_atomic(kaddr);
423 brelse(entry_bh); 423 brelse(entry_bh);
424 return ret; 424 return ret;
425} 425}
@@ -440,7 +440,7 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz,
440 0, &entry_bh); 440 0, &entry_bh);
441 if (ret < 0) 441 if (ret < 0)
442 return ret; 442 return ret;
443 kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); 443 kaddr = kmap_atomic(entry_bh->b_page);
444 /* last virtual block number in this block */ 444 /* last virtual block number in this block */
445 first = vinfo->vi_vblocknr; 445 first = vinfo->vi_vblocknr;
446 do_div(first, entries_per_block); 446 do_div(first, entries_per_block);
@@ -456,7 +456,7 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz,
456 vinfo->vi_end = le64_to_cpu(entry->de_end); 456 vinfo->vi_end = le64_to_cpu(entry->de_end);
457 vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr); 457 vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr);
458 } 458 }
459 kunmap_atomic(kaddr, KM_USER0); 459 kunmap_atomic(kaddr);
460 brelse(entry_bh); 460 brelse(entry_bh);
461 } 461 }
462 462
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index ca35b3a46d1..df1a7fb238d 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -602,7 +602,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent)
602 unlock_page(page); 602 unlock_page(page);
603 goto fail; 603 goto fail;
604 } 604 }
605 kaddr = kmap_atomic(page, KM_USER0); 605 kaddr = kmap_atomic(page);
606 memset(kaddr, 0, chunk_size); 606 memset(kaddr, 0, chunk_size);
607 de = (struct nilfs_dir_entry *)kaddr; 607 de = (struct nilfs_dir_entry *)kaddr;
608 de->name_len = 1; 608 de->name_len = 1;
@@ -617,7 +617,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent)
617 de->inode = cpu_to_le64(parent->i_ino); 617 de->inode = cpu_to_le64(parent->i_ino);
618 memcpy(de->name, "..\0", 4); 618 memcpy(de->name, "..\0", 4);
619 nilfs_set_de_type(de, inode); 619 nilfs_set_de_type(de, inode);
620 kunmap_atomic(kaddr, KM_USER0); 620 kunmap_atomic(kaddr);
621 nilfs_commit_chunk(page, mapping, 0, chunk_size); 621 nilfs_commit_chunk(page, mapping, 0, chunk_size);
622fail: 622fail:
623 page_cache_release(page); 623 page_cache_release(page);
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index 684d76300a8..5a48df79d67 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -122,11 +122,11 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino)
122 return ret; 122 return ret;
123 } 123 }
124 124
125 kaddr = kmap_atomic(req.pr_entry_bh->b_page, KM_USER0); 125 kaddr = kmap_atomic(req.pr_entry_bh->b_page);
126 raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr, 126 raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr,
127 req.pr_entry_bh, kaddr); 127 req.pr_entry_bh, kaddr);
128 raw_inode->i_flags = 0; 128 raw_inode->i_flags = 0;
129 kunmap_atomic(kaddr, KM_USER0); 129 kunmap_atomic(kaddr);
130 130
131 mark_buffer_dirty(req.pr_entry_bh); 131 mark_buffer_dirty(req.pr_entry_bh);
132 brelse(req.pr_entry_bh); 132 brelse(req.pr_entry_bh);
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 800e8d78a83..f9897d09c69 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -58,12 +58,12 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
58 58
59 set_buffer_mapped(bh); 59 set_buffer_mapped(bh);
60 60
61 kaddr = kmap_atomic(bh->b_page, KM_USER0); 61 kaddr = kmap_atomic(bh->b_page);
62 memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits); 62 memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits);
63 if (init_block) 63 if (init_block)
64 init_block(inode, bh, kaddr); 64 init_block(inode, bh, kaddr);
65 flush_dcache_page(bh->b_page); 65 flush_dcache_page(bh->b_page);
66 kunmap_atomic(kaddr, KM_USER0); 66 kunmap_atomic(kaddr);
67 67
68 set_buffer_uptodate(bh); 68 set_buffer_uptodate(bh);
69 mark_buffer_dirty(bh); 69 mark_buffer_dirty(bh);
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 1cd3f624dff..fce2bbee66d 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -193,9 +193,6 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
193 struct nilfs_transaction_info ti; 193 struct nilfs_transaction_info ti;
194 int err; 194 int err;
195 195
196 if (inode->i_nlink >= NILFS_LINK_MAX)
197 return -EMLINK;
198
199 err = nilfs_transaction_begin(dir->i_sb, &ti, 1); 196 err = nilfs_transaction_begin(dir->i_sb, &ti, 1);
200 if (err) 197 if (err)
201 return err; 198 return err;
@@ -219,9 +216,6 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
219 struct nilfs_transaction_info ti; 216 struct nilfs_transaction_info ti;
220 int err; 217 int err;
221 218
222 if (dir->i_nlink >= NILFS_LINK_MAX)
223 return -EMLINK;
224
225 err = nilfs_transaction_begin(dir->i_sb, &ti, 1); 219 err = nilfs_transaction_begin(dir->i_sb, &ti, 1);
226 if (err) 220 if (err)
227 return err; 221 return err;
@@ -400,11 +394,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
400 drop_nlink(new_inode); 394 drop_nlink(new_inode);
401 nilfs_mark_inode_dirty(new_inode); 395 nilfs_mark_inode_dirty(new_inode);
402 } else { 396 } else {
403 if (dir_de) {
404 err = -EMLINK;
405 if (new_dir->i_nlink >= NILFS_LINK_MAX)
406 goto out_dir;
407 }
408 err = nilfs_add_link(new_dentry, old_inode); 397 err = nilfs_add_link(new_dentry, old_inode);
409 if (err) 398 if (err)
410 goto out_dir; 399 goto out_dir;
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 65221a04c6f..3e7b2a0dc0c 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -119,11 +119,11 @@ void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
119 struct page *spage = sbh->b_page, *dpage = dbh->b_page; 119 struct page *spage = sbh->b_page, *dpage = dbh->b_page;
120 struct buffer_head *bh; 120 struct buffer_head *bh;
121 121
122 kaddr0 = kmap_atomic(spage, KM_USER0); 122 kaddr0 = kmap_atomic(spage);
123 kaddr1 = kmap_atomic(dpage, KM_USER1); 123 kaddr1 = kmap_atomic(dpage);
124 memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); 124 memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size);
125 kunmap_atomic(kaddr1, KM_USER1); 125 kunmap_atomic(kaddr1);
126 kunmap_atomic(kaddr0, KM_USER0); 126 kunmap_atomic(kaddr0);
127 127
128 dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; 128 dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS;
129 dbh->b_blocknr = sbh->b_blocknr; 129 dbh->b_blocknr = sbh->b_blocknr;
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index a604ac0331b..f1626f5011c 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -493,9 +493,9 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
493 if (unlikely(!bh_org)) 493 if (unlikely(!bh_org))
494 return -EIO; 494 return -EIO;
495 495
496 kaddr = kmap_atomic(page, KM_USER0); 496 kaddr = kmap_atomic(page);
497 memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size); 497 memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
498 kunmap_atomic(kaddr, KM_USER0); 498 kunmap_atomic(kaddr);
499 brelse(bh_org); 499 brelse(bh_org);
500 return 0; 500 return 0;
501} 501}
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 850a7c0228f..dc9a913784a 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -227,9 +227,9 @@ static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf,
227 crc = crc32_le(crc, bh->b_data, bh->b_size); 227 crc = crc32_le(crc, bh->b_data, bh->b_size);
228 } 228 }
229 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { 229 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
230 kaddr = kmap_atomic(bh->b_page, KM_USER0); 230 kaddr = kmap_atomic(bh->b_page);
231 crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size); 231 crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size);
232 kunmap_atomic(kaddr, KM_USER0); 232 kunmap_atomic(kaddr);
233 } 233 }
234 raw_sum->ss_datasum = cpu_to_le32(crc); 234 raw_sum->ss_datasum = cpu_to_le32(crc);
235} 235}
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 0a0aba617d8..c5b7653a439 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -111,11 +111,11 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
111 struct nilfs_sufile_header *header; 111 struct nilfs_sufile_header *header;
112 void *kaddr; 112 void *kaddr;
113 113
114 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 114 kaddr = kmap_atomic(header_bh->b_page);
115 header = kaddr + bh_offset(header_bh); 115 header = kaddr + bh_offset(header_bh);
116 le64_add_cpu(&header->sh_ncleansegs, ncleanadd); 116 le64_add_cpu(&header->sh_ncleansegs, ncleanadd);
117 le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd); 117 le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd);
118 kunmap_atomic(kaddr, KM_USER0); 118 kunmap_atomic(kaddr);
119 119
120 mark_buffer_dirty(header_bh); 120 mark_buffer_dirty(header_bh);
121} 121}
@@ -319,11 +319,11 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
319 ret = nilfs_sufile_get_header_block(sufile, &header_bh); 319 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
320 if (ret < 0) 320 if (ret < 0)
321 goto out_sem; 321 goto out_sem;
322 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 322 kaddr = kmap_atomic(header_bh->b_page);
323 header = kaddr + bh_offset(header_bh); 323 header = kaddr + bh_offset(header_bh);
324 ncleansegs = le64_to_cpu(header->sh_ncleansegs); 324 ncleansegs = le64_to_cpu(header->sh_ncleansegs);
325 last_alloc = le64_to_cpu(header->sh_last_alloc); 325 last_alloc = le64_to_cpu(header->sh_last_alloc);
326 kunmap_atomic(kaddr, KM_USER0); 326 kunmap_atomic(kaddr);
327 327
328 nsegments = nilfs_sufile_get_nsegments(sufile); 328 nsegments = nilfs_sufile_get_nsegments(sufile);
329 maxsegnum = sui->allocmax; 329 maxsegnum = sui->allocmax;
@@ -356,7 +356,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
356 &su_bh); 356 &su_bh);
357 if (ret < 0) 357 if (ret < 0)
358 goto out_header; 358 goto out_header;
359 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 359 kaddr = kmap_atomic(su_bh->b_page);
360 su = nilfs_sufile_block_get_segment_usage( 360 su = nilfs_sufile_block_get_segment_usage(
361 sufile, segnum, su_bh, kaddr); 361 sufile, segnum, su_bh, kaddr);
362 362
@@ -367,14 +367,14 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
367 continue; 367 continue;
368 /* found a clean segment */ 368 /* found a clean segment */
369 nilfs_segment_usage_set_dirty(su); 369 nilfs_segment_usage_set_dirty(su);
370 kunmap_atomic(kaddr, KM_USER0); 370 kunmap_atomic(kaddr);
371 371
372 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 372 kaddr = kmap_atomic(header_bh->b_page);
373 header = kaddr + bh_offset(header_bh); 373 header = kaddr + bh_offset(header_bh);
374 le64_add_cpu(&header->sh_ncleansegs, -1); 374 le64_add_cpu(&header->sh_ncleansegs, -1);
375 le64_add_cpu(&header->sh_ndirtysegs, 1); 375 le64_add_cpu(&header->sh_ndirtysegs, 1);
376 header->sh_last_alloc = cpu_to_le64(segnum); 376 header->sh_last_alloc = cpu_to_le64(segnum);
377 kunmap_atomic(kaddr, KM_USER0); 377 kunmap_atomic(kaddr);
378 378
379 sui->ncleansegs--; 379 sui->ncleansegs--;
380 mark_buffer_dirty(header_bh); 380 mark_buffer_dirty(header_bh);
@@ -385,7 +385,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
385 goto out_header; 385 goto out_header;
386 } 386 }
387 387
388 kunmap_atomic(kaddr, KM_USER0); 388 kunmap_atomic(kaddr);
389 brelse(su_bh); 389 brelse(su_bh);
390 } 390 }
391 391
@@ -407,16 +407,16 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
407 struct nilfs_segment_usage *su; 407 struct nilfs_segment_usage *su;
408 void *kaddr; 408 void *kaddr;
409 409
410 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 410 kaddr = kmap_atomic(su_bh->b_page);
411 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); 411 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
412 if (unlikely(!nilfs_segment_usage_clean(su))) { 412 if (unlikely(!nilfs_segment_usage_clean(su))) {
413 printk(KERN_WARNING "%s: segment %llu must be clean\n", 413 printk(KERN_WARNING "%s: segment %llu must be clean\n",
414 __func__, (unsigned long long)segnum); 414 __func__, (unsigned long long)segnum);
415 kunmap_atomic(kaddr, KM_USER0); 415 kunmap_atomic(kaddr);
416 return; 416 return;
417 } 417 }
418 nilfs_segment_usage_set_dirty(su); 418 nilfs_segment_usage_set_dirty(su);
419 kunmap_atomic(kaddr, KM_USER0); 419 kunmap_atomic(kaddr);
420 420
421 nilfs_sufile_mod_counter(header_bh, -1, 1); 421 nilfs_sufile_mod_counter(header_bh, -1, 1);
422 NILFS_SUI(sufile)->ncleansegs--; 422 NILFS_SUI(sufile)->ncleansegs--;
@@ -433,11 +433,11 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
433 void *kaddr; 433 void *kaddr;
434 int clean, dirty; 434 int clean, dirty;
435 435
436 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 436 kaddr = kmap_atomic(su_bh->b_page);
437 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); 437 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
438 if (su->su_flags == cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY) && 438 if (su->su_flags == cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY) &&
439 su->su_nblocks == cpu_to_le32(0)) { 439 su->su_nblocks == cpu_to_le32(0)) {
440 kunmap_atomic(kaddr, KM_USER0); 440 kunmap_atomic(kaddr);
441 return; 441 return;
442 } 442 }
443 clean = nilfs_segment_usage_clean(su); 443 clean = nilfs_segment_usage_clean(su);
@@ -447,7 +447,7 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
447 su->su_lastmod = cpu_to_le64(0); 447 su->su_lastmod = cpu_to_le64(0);
448 su->su_nblocks = cpu_to_le32(0); 448 su->su_nblocks = cpu_to_le32(0);
449 su->su_flags = cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY); 449 su->su_flags = cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY);
450 kunmap_atomic(kaddr, KM_USER0); 450 kunmap_atomic(kaddr);
451 451
452 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); 452 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
453 NILFS_SUI(sufile)->ncleansegs -= clean; 453 NILFS_SUI(sufile)->ncleansegs -= clean;
@@ -464,12 +464,12 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
464 void *kaddr; 464 void *kaddr;
465 int sudirty; 465 int sudirty;
466 466
467 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 467 kaddr = kmap_atomic(su_bh->b_page);
468 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); 468 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
469 if (nilfs_segment_usage_clean(su)) { 469 if (nilfs_segment_usage_clean(su)) {
470 printk(KERN_WARNING "%s: segment %llu is already clean\n", 470 printk(KERN_WARNING "%s: segment %llu is already clean\n",
471 __func__, (unsigned long long)segnum); 471 __func__, (unsigned long long)segnum);
472 kunmap_atomic(kaddr, KM_USER0); 472 kunmap_atomic(kaddr);
473 return; 473 return;
474 } 474 }
475 WARN_ON(nilfs_segment_usage_error(su)); 475 WARN_ON(nilfs_segment_usage_error(su));
@@ -477,7 +477,7 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
477 477
478 sudirty = nilfs_segment_usage_dirty(su); 478 sudirty = nilfs_segment_usage_dirty(su);
479 nilfs_segment_usage_set_clean(su); 479 nilfs_segment_usage_set_clean(su);
480 kunmap_atomic(kaddr, KM_USER0); 480 kunmap_atomic(kaddr);
481 mark_buffer_dirty(su_bh); 481 mark_buffer_dirty(su_bh);
482 482
483 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); 483 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
@@ -525,13 +525,13 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
525 if (ret < 0) 525 if (ret < 0)
526 goto out_sem; 526 goto out_sem;
527 527
528 kaddr = kmap_atomic(bh->b_page, KM_USER0); 528 kaddr = kmap_atomic(bh->b_page);
529 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); 529 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
530 WARN_ON(nilfs_segment_usage_error(su)); 530 WARN_ON(nilfs_segment_usage_error(su));
531 if (modtime) 531 if (modtime)
532 su->su_lastmod = cpu_to_le64(modtime); 532 su->su_lastmod = cpu_to_le64(modtime);
533 su->su_nblocks = cpu_to_le32(nblocks); 533 su->su_nblocks = cpu_to_le32(nblocks);
534 kunmap_atomic(kaddr, KM_USER0); 534 kunmap_atomic(kaddr);
535 535
536 mark_buffer_dirty(bh); 536 mark_buffer_dirty(bh);
537 nilfs_mdt_mark_dirty(sufile); 537 nilfs_mdt_mark_dirty(sufile);
@@ -572,7 +572,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
572 if (ret < 0) 572 if (ret < 0)
573 goto out_sem; 573 goto out_sem;
574 574
575 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 575 kaddr = kmap_atomic(header_bh->b_page);
576 header = kaddr + bh_offset(header_bh); 576 header = kaddr + bh_offset(header_bh);
577 sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile); 577 sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile);
578 sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs); 578 sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs);
@@ -582,7 +582,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
582 spin_lock(&nilfs->ns_last_segment_lock); 582 spin_lock(&nilfs->ns_last_segment_lock);
583 sustat->ss_prot_seq = nilfs->ns_prot_seq; 583 sustat->ss_prot_seq = nilfs->ns_prot_seq;
584 spin_unlock(&nilfs->ns_last_segment_lock); 584 spin_unlock(&nilfs->ns_last_segment_lock);
585 kunmap_atomic(kaddr, KM_USER0); 585 kunmap_atomic(kaddr);
586 brelse(header_bh); 586 brelse(header_bh);
587 587
588 out_sem: 588 out_sem:
@@ -598,15 +598,15 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
598 void *kaddr; 598 void *kaddr;
599 int suclean; 599 int suclean;
600 600
601 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 601 kaddr = kmap_atomic(su_bh->b_page);
602 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); 602 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
603 if (nilfs_segment_usage_error(su)) { 603 if (nilfs_segment_usage_error(su)) {
604 kunmap_atomic(kaddr, KM_USER0); 604 kunmap_atomic(kaddr);
605 return; 605 return;
606 } 606 }
607 suclean = nilfs_segment_usage_clean(su); 607 suclean = nilfs_segment_usage_clean(su);
608 nilfs_segment_usage_set_error(su); 608 nilfs_segment_usage_set_error(su);
609 kunmap_atomic(kaddr, KM_USER0); 609 kunmap_atomic(kaddr);
610 610
611 if (suclean) { 611 if (suclean) {
612 nilfs_sufile_mod_counter(header_bh, -1, 0); 612 nilfs_sufile_mod_counter(header_bh, -1, 0);
@@ -675,7 +675,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
675 /* hole */ 675 /* hole */
676 continue; 676 continue;
677 } 677 }
678 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 678 kaddr = kmap_atomic(su_bh->b_page);
679 su = nilfs_sufile_block_get_segment_usage( 679 su = nilfs_sufile_block_get_segment_usage(
680 sufile, segnum, su_bh, kaddr); 680 sufile, segnum, su_bh, kaddr);
681 su2 = su; 681 su2 = su;
@@ -684,7 +684,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
684 ~(1UL << NILFS_SEGMENT_USAGE_ERROR)) || 684 ~(1UL << NILFS_SEGMENT_USAGE_ERROR)) ||
685 nilfs_segment_is_active(nilfs, segnum + j)) { 685 nilfs_segment_is_active(nilfs, segnum + j)) {
686 ret = -EBUSY; 686 ret = -EBUSY;
687 kunmap_atomic(kaddr, KM_USER0); 687 kunmap_atomic(kaddr);
688 brelse(su_bh); 688 brelse(su_bh);
689 goto out_header; 689 goto out_header;
690 } 690 }
@@ -696,7 +696,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
696 nc++; 696 nc++;
697 } 697 }
698 } 698 }
699 kunmap_atomic(kaddr, KM_USER0); 699 kunmap_atomic(kaddr);
700 if (nc > 0) { 700 if (nc > 0) {
701 mark_buffer_dirty(su_bh); 701 mark_buffer_dirty(su_bh);
702 ncleaned += nc; 702 ncleaned += nc;
@@ -772,10 +772,10 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
772 sui->ncleansegs -= nsegs - newnsegs; 772 sui->ncleansegs -= nsegs - newnsegs;
773 } 773 }
774 774
775 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 775 kaddr = kmap_atomic(header_bh->b_page);
776 header = kaddr + bh_offset(header_bh); 776 header = kaddr + bh_offset(header_bh);
777 header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs); 777 header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs);
778 kunmap_atomic(kaddr, KM_USER0); 778 kunmap_atomic(kaddr);
779 779
780 mark_buffer_dirty(header_bh); 780 mark_buffer_dirty(header_bh);
781 nilfs_mdt_mark_dirty(sufile); 781 nilfs_mdt_mark_dirty(sufile);
@@ -840,7 +840,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
840 continue; 840 continue;
841 } 841 }
842 842
843 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 843 kaddr = kmap_atomic(su_bh->b_page);
844 su = nilfs_sufile_block_get_segment_usage( 844 su = nilfs_sufile_block_get_segment_usage(
845 sufile, segnum, su_bh, kaddr); 845 sufile, segnum, su_bh, kaddr);
846 for (j = 0; j < n; 846 for (j = 0; j < n;
@@ -853,7 +853,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
853 si->sui_flags |= 853 si->sui_flags |=
854 (1UL << NILFS_SEGMENT_USAGE_ACTIVE); 854 (1UL << NILFS_SEGMENT_USAGE_ACTIVE);
855 } 855 }
856 kunmap_atomic(kaddr, KM_USER0); 856 kunmap_atomic(kaddr);
857 brelse(su_bh); 857 brelse(su_bh);
858 } 858 }
859 ret = nsegs; 859 ret = nsegs;
@@ -902,10 +902,10 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize,
902 goto failed; 902 goto failed;
903 903
904 sui = NILFS_SUI(sufile); 904 sui = NILFS_SUI(sufile);
905 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 905 kaddr = kmap_atomic(header_bh->b_page);
906 header = kaddr + bh_offset(header_bh); 906 header = kaddr + bh_offset(header_bh);
907 sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs); 907 sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs);
908 kunmap_atomic(kaddr, KM_USER0); 908 kunmap_atomic(kaddr);
909 brelse(header_bh); 909 brelse(header_bh);
910 910
911 sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1; 911 sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 08e3d4f9df1..1099a76cee5 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -917,9 +917,8 @@ static int nilfs_get_root_dentry(struct super_block *sb,
917 if (root->cno == NILFS_CPTREE_CURRENT_CNO) { 917 if (root->cno == NILFS_CPTREE_CURRENT_CNO) {
918 dentry = d_find_alias(inode); 918 dentry = d_find_alias(inode);
919 if (!dentry) { 919 if (!dentry) {
920 dentry = d_alloc_root(inode); 920 dentry = d_make_root(inode);
921 if (!dentry) { 921 if (!dentry) {
922 iput(inode);
923 ret = -ENOMEM; 922 ret = -ENOMEM;
924 goto failed_dentry; 923 goto failed_dentry;
925 } 924 }
@@ -1059,6 +1058,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
1059 sb->s_export_op = &nilfs_export_ops; 1058 sb->s_export_op = &nilfs_export_ops;
1060 sb->s_root = NULL; 1059 sb->s_root = NULL;
1061 sb->s_time_gran = 1; 1060 sb->s_time_gran = 1;
1061 sb->s_max_links = NILFS_LINK_MAX;
1062 1062
1063 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; 1063 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
1064 sb->s_bdi = bdi ? : &default_backing_dev_info; 1064 sb->s_bdi = bdi ? : &default_backing_dev_info;
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index ee188158a22..c887b1378f7 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -447,7 +447,7 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
447 return event; 447 return event;
448} 448}
449 449
450__init int fsnotify_notification_init(void) 450static __init int fsnotify_notification_init(void)
451{ 451{
452 fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC); 452 fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC);
453 fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC); 453 fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC);
@@ -461,4 +461,3 @@ __init int fsnotify_notification_init(void)
461 return 0; 461 return 0;
462} 462}
463subsys_initcall(fsnotify_notification_init); 463subsys_initcall(fsnotify_notification_init);
464
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 0b1e885b8cf..fa9c05f97af 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -94,11 +94,11 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
94 if (file_ofs < init_size) 94 if (file_ofs < init_size)
95 ofs = init_size - file_ofs; 95 ofs = init_size - file_ofs;
96 local_irq_save(flags); 96 local_irq_save(flags);
97 kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); 97 kaddr = kmap_atomic(page);
98 memset(kaddr + bh_offset(bh) + ofs, 0, 98 memset(kaddr + bh_offset(bh) + ofs, 0,
99 bh->b_size - ofs); 99 bh->b_size - ofs);
100 flush_dcache_page(page); 100 flush_dcache_page(page);
101 kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); 101 kunmap_atomic(kaddr);
102 local_irq_restore(flags); 102 local_irq_restore(flags);
103 } 103 }
104 } else { 104 } else {
@@ -147,11 +147,11 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
147 /* Should have been verified before we got here... */ 147 /* Should have been verified before we got here... */
148 BUG_ON(!recs); 148 BUG_ON(!recs);
149 local_irq_save(flags); 149 local_irq_save(flags);
150 kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); 150 kaddr = kmap_atomic(page);
151 for (i = 0; i < recs; i++) 151 for (i = 0; i < recs; i++)
152 post_read_mst_fixup((NTFS_RECORD*)(kaddr + 152 post_read_mst_fixup((NTFS_RECORD*)(kaddr +
153 i * rec_size), rec_size); 153 i * rec_size), rec_size);
154 kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); 154 kunmap_atomic(kaddr);
155 local_irq_restore(flags); 155 local_irq_restore(flags);
156 flush_dcache_page(page); 156 flush_dcache_page(page);
157 if (likely(page_uptodate && !PageError(page))) 157 if (likely(page_uptodate && !PageError(page)))
@@ -504,7 +504,7 @@ retry_readpage:
504 /* Race with shrinking truncate. */ 504 /* Race with shrinking truncate. */
505 attr_len = i_size; 505 attr_len = i_size;
506 } 506 }
507 addr = kmap_atomic(page, KM_USER0); 507 addr = kmap_atomic(page);
508 /* Copy the data to the page. */ 508 /* Copy the data to the page. */
509 memcpy(addr, (u8*)ctx->attr + 509 memcpy(addr, (u8*)ctx->attr +
510 le16_to_cpu(ctx->attr->data.resident.value_offset), 510 le16_to_cpu(ctx->attr->data.resident.value_offset),
@@ -512,7 +512,7 @@ retry_readpage:
512 /* Zero the remainder of the page. */ 512 /* Zero the remainder of the page. */
513 memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); 513 memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
514 flush_dcache_page(page); 514 flush_dcache_page(page);
515 kunmap_atomic(addr, KM_USER0); 515 kunmap_atomic(addr);
516put_unm_err_out: 516put_unm_err_out:
517 ntfs_attr_put_search_ctx(ctx); 517 ntfs_attr_put_search_ctx(ctx);
518unm_err_out: 518unm_err_out:
@@ -746,14 +746,14 @@ lock_retry_remap:
746 unsigned long *bpos, *bend; 746 unsigned long *bpos, *bend;
747 747
748 /* Check if the buffer is zero. */ 748 /* Check if the buffer is zero. */
749 kaddr = kmap_atomic(page, KM_USER0); 749 kaddr = kmap_atomic(page);
750 bpos = (unsigned long *)(kaddr + bh_offset(bh)); 750 bpos = (unsigned long *)(kaddr + bh_offset(bh));
751 bend = (unsigned long *)((u8*)bpos + blocksize); 751 bend = (unsigned long *)((u8*)bpos + blocksize);
752 do { 752 do {
753 if (unlikely(*bpos)) 753 if (unlikely(*bpos))
754 break; 754 break;
755 } while (likely(++bpos < bend)); 755 } while (likely(++bpos < bend));
756 kunmap_atomic(kaddr, KM_USER0); 756 kunmap_atomic(kaddr);
757 if (bpos == bend) { 757 if (bpos == bend) {
758 /* 758 /*
759 * Buffer is zero and sparse, no need to write 759 * Buffer is zero and sparse, no need to write
@@ -1495,14 +1495,14 @@ retry_writepage:
1495 /* Shrinking cannot fail. */ 1495 /* Shrinking cannot fail. */
1496 BUG_ON(err); 1496 BUG_ON(err);
1497 } 1497 }
1498 addr = kmap_atomic(page, KM_USER0); 1498 addr = kmap_atomic(page);
1499 /* Copy the data from the page to the mft record. */ 1499 /* Copy the data from the page to the mft record. */
1500 memcpy((u8*)ctx->attr + 1500 memcpy((u8*)ctx->attr +
1501 le16_to_cpu(ctx->attr->data.resident.value_offset), 1501 le16_to_cpu(ctx->attr->data.resident.value_offset),
1502 addr, attr_len); 1502 addr, attr_len);
1503 /* Zero out of bounds area in the page cache page. */ 1503 /* Zero out of bounds area in the page cache page. */
1504 memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); 1504 memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
1505 kunmap_atomic(addr, KM_USER0); 1505 kunmap_atomic(addr);
1506 flush_dcache_page(page); 1506 flush_dcache_page(page);
1507 flush_dcache_mft_record_page(ctx->ntfs_ino); 1507 flush_dcache_mft_record_page(ctx->ntfs_ino);
1508 /* We are done with the page. */ 1508 /* We are done with the page. */
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index e0281992ddc..a27e3fecefa 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1656,12 +1656,12 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
1656 attr_size = le32_to_cpu(a->data.resident.value_length); 1656 attr_size = le32_to_cpu(a->data.resident.value_length);
1657 BUG_ON(attr_size != data_size); 1657 BUG_ON(attr_size != data_size);
1658 if (page && !PageUptodate(page)) { 1658 if (page && !PageUptodate(page)) {
1659 kaddr = kmap_atomic(page, KM_USER0); 1659 kaddr = kmap_atomic(page);
1660 memcpy(kaddr, (u8*)a + 1660 memcpy(kaddr, (u8*)a +
1661 le16_to_cpu(a->data.resident.value_offset), 1661 le16_to_cpu(a->data.resident.value_offset),
1662 attr_size); 1662 attr_size);
1663 memset(kaddr + attr_size, 0, PAGE_CACHE_SIZE - attr_size); 1663 memset(kaddr + attr_size, 0, PAGE_CACHE_SIZE - attr_size);
1664 kunmap_atomic(kaddr, KM_USER0); 1664 kunmap_atomic(kaddr);
1665 flush_dcache_page(page); 1665 flush_dcache_page(page);
1666 SetPageUptodate(page); 1666 SetPageUptodate(page);
1667 } 1667 }
@@ -1806,9 +1806,9 @@ undo_err_out:
1806 sizeof(a->data.resident.reserved)); 1806 sizeof(a->data.resident.reserved));
1807 /* Copy the data from the page back to the attribute value. */ 1807 /* Copy the data from the page back to the attribute value. */
1808 if (page) { 1808 if (page) {
1809 kaddr = kmap_atomic(page, KM_USER0); 1809 kaddr = kmap_atomic(page);
1810 memcpy((u8*)a + mp_ofs, kaddr, attr_size); 1810 memcpy((u8*)a + mp_ofs, kaddr, attr_size);
1811 kunmap_atomic(kaddr, KM_USER0); 1811 kunmap_atomic(kaddr);
1812 } 1812 }
1813 /* Setup the allocated size in the ntfs inode in case it changed. */ 1813 /* Setup the allocated size in the ntfs inode in case it changed. */
1814 write_lock_irqsave(&ni->size_lock, flags); 1814 write_lock_irqsave(&ni->size_lock, flags);
@@ -2540,10 +2540,10 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
2540 size = PAGE_CACHE_SIZE; 2540 size = PAGE_CACHE_SIZE;
2541 if (idx == end) 2541 if (idx == end)
2542 size = end_ofs; 2542 size = end_ofs;
2543 kaddr = kmap_atomic(page, KM_USER0); 2543 kaddr = kmap_atomic(page);
2544 memset(kaddr + start_ofs, val, size - start_ofs); 2544 memset(kaddr + start_ofs, val, size - start_ofs);
2545 flush_dcache_page(page); 2545 flush_dcache_page(page);
2546 kunmap_atomic(kaddr, KM_USER0); 2546 kunmap_atomic(kaddr);
2547 set_page_dirty(page); 2547 set_page_dirty(page);
2548 page_cache_release(page); 2548 page_cache_release(page);
2549 balance_dirty_pages_ratelimited(mapping); 2549 balance_dirty_pages_ratelimited(mapping);
@@ -2561,10 +2561,10 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
2561 "page (index 0x%lx).", idx); 2561 "page (index 0x%lx).", idx);
2562 return -ENOMEM; 2562 return -ENOMEM;
2563 } 2563 }
2564 kaddr = kmap_atomic(page, KM_USER0); 2564 kaddr = kmap_atomic(page);
2565 memset(kaddr, val, PAGE_CACHE_SIZE); 2565 memset(kaddr, val, PAGE_CACHE_SIZE);
2566 flush_dcache_page(page); 2566 flush_dcache_page(page);
2567 kunmap_atomic(kaddr, KM_USER0); 2567 kunmap_atomic(kaddr);
2568 /* 2568 /*
2569 * If the page has buffers, mark them uptodate since buffer 2569 * If the page has buffers, mark them uptodate since buffer
2570 * state and not page state is definitive in 2.6 kernels. 2570 * state and not page state is definitive in 2.6 kernels.
@@ -2598,10 +2598,10 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
2598 "(error, index 0x%lx).", idx); 2598 "(error, index 0x%lx).", idx);
2599 return PTR_ERR(page); 2599 return PTR_ERR(page);
2600 } 2600 }
2601 kaddr = kmap_atomic(page, KM_USER0); 2601 kaddr = kmap_atomic(page);
2602 memset(kaddr, val, end_ofs); 2602 memset(kaddr, val, end_ofs);
2603 flush_dcache_page(page); 2603 flush_dcache_page(page);
2604 kunmap_atomic(kaddr, KM_USER0); 2604 kunmap_atomic(kaddr);
2605 set_page_dirty(page); 2605 set_page_dirty(page);
2606 page_cache_release(page); 2606 page_cache_release(page);
2607 balance_dirty_pages_ratelimited(mapping); 2607 balance_dirty_pages_ratelimited(mapping);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index c587e2d2718..8639169221c 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -704,7 +704,7 @@ map_buffer_cached:
704 u8 *kaddr; 704 u8 *kaddr;
705 unsigned pofs; 705 unsigned pofs;
706 706
707 kaddr = kmap_atomic(page, KM_USER0); 707 kaddr = kmap_atomic(page);
708 if (bh_pos < pos) { 708 if (bh_pos < pos) {
709 pofs = bh_pos & ~PAGE_CACHE_MASK; 709 pofs = bh_pos & ~PAGE_CACHE_MASK;
710 memset(kaddr + pofs, 0, pos - bh_pos); 710 memset(kaddr + pofs, 0, pos - bh_pos);
@@ -713,7 +713,7 @@ map_buffer_cached:
713 pofs = end & ~PAGE_CACHE_MASK; 713 pofs = end & ~PAGE_CACHE_MASK;
714 memset(kaddr + pofs, 0, bh_end - end); 714 memset(kaddr + pofs, 0, bh_end - end);
715 } 715 }
716 kunmap_atomic(kaddr, KM_USER0); 716 kunmap_atomic(kaddr);
717 flush_dcache_page(page); 717 flush_dcache_page(page);
718 } 718 }
719 continue; 719 continue;
@@ -1287,9 +1287,9 @@ static inline size_t ntfs_copy_from_user(struct page **pages,
1287 len = PAGE_CACHE_SIZE - ofs; 1287 len = PAGE_CACHE_SIZE - ofs;
1288 if (len > bytes) 1288 if (len > bytes)
1289 len = bytes; 1289 len = bytes;
1290 addr = kmap_atomic(*pages, KM_USER0); 1290 addr = kmap_atomic(*pages);
1291 left = __copy_from_user_inatomic(addr + ofs, buf, len); 1291 left = __copy_from_user_inatomic(addr + ofs, buf, len);
1292 kunmap_atomic(addr, KM_USER0); 1292 kunmap_atomic(addr);
1293 if (unlikely(left)) { 1293 if (unlikely(left)) {
1294 /* Do it the slow way. */ 1294 /* Do it the slow way. */
1295 addr = kmap(*pages); 1295 addr = kmap(*pages);
@@ -1401,10 +1401,10 @@ static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
1401 len = PAGE_CACHE_SIZE - ofs; 1401 len = PAGE_CACHE_SIZE - ofs;
1402 if (len > bytes) 1402 if (len > bytes)
1403 len = bytes; 1403 len = bytes;
1404 addr = kmap_atomic(*pages, KM_USER0); 1404 addr = kmap_atomic(*pages);
1405 copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs, 1405 copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs,
1406 *iov, *iov_ofs, len); 1406 *iov, *iov_ofs, len);
1407 kunmap_atomic(addr, KM_USER0); 1407 kunmap_atomic(addr);
1408 if (unlikely(copied != len)) { 1408 if (unlikely(copied != len)) {
1409 /* Do it the slow way. */ 1409 /* Do it the slow way. */
1410 addr = kmap(*pages); 1410 addr = kmap(*pages);
@@ -1691,7 +1691,7 @@ static int ntfs_commit_pages_after_write(struct page **pages,
1691 BUG_ON(end > le32_to_cpu(a->length) - 1691 BUG_ON(end > le32_to_cpu(a->length) -
1692 le16_to_cpu(a->data.resident.value_offset)); 1692 le16_to_cpu(a->data.resident.value_offset));
1693 kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset); 1693 kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
1694 kaddr = kmap_atomic(page, KM_USER0); 1694 kaddr = kmap_atomic(page);
1695 /* Copy the received data from the page to the mft record. */ 1695 /* Copy the received data from the page to the mft record. */
1696 memcpy(kattr + pos, kaddr + pos, bytes); 1696 memcpy(kattr + pos, kaddr + pos, bytes);
1697 /* Update the attribute length if necessary. */ 1697 /* Update the attribute length if necessary. */
@@ -1713,7 +1713,7 @@ static int ntfs_commit_pages_after_write(struct page **pages,
1713 flush_dcache_page(page); 1713 flush_dcache_page(page);
1714 SetPageUptodate(page); 1714 SetPageUptodate(page);
1715 } 1715 }
1716 kunmap_atomic(kaddr, KM_USER0); 1716 kunmap_atomic(kaddr);
1717 /* Update initialized_size/i_size if necessary. */ 1717 /* Update initialized_size/i_size if necessary. */
1718 read_lock_irqsave(&ni->size_lock, flags); 1718 read_lock_irqsave(&ni->size_lock, flags);
1719 initialized_size = ni->initialized_size; 1719 initialized_size = ni->initialized_size;
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index faece719086..809c0e6d8e0 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -2008,14 +2008,14 @@ typedef struct {
2008 * 2008 *
2009 * When a directory is small enough to fit inside the index root then this 2009 * When a directory is small enough to fit inside the index root then this
2010 * is the only attribute describing the directory. When the directory is too 2010 * is the only attribute describing the directory. When the directory is too
2011 * large to fit in the index root, on the other hand, two aditional attributes 2011 * large to fit in the index root, on the other hand, two additional attributes
2012 * are present: an index allocation attribute, containing sub-nodes of the B+ 2012 * are present: an index allocation attribute, containing sub-nodes of the B+
2013 * directory tree (see below), and a bitmap attribute, describing which virtual 2013 * directory tree (see below), and a bitmap attribute, describing which virtual
2014 * cluster numbers (vcns) in the index allocation attribute are in use by an 2014 * cluster numbers (vcns) in the index allocation attribute are in use by an
2015 * index block. 2015 * index block.
2016 * 2016 *
2017 * NOTE: The root directory (FILE_root) contains an entry for itself. Other 2017 * NOTE: The root directory (FILE_root) contains an entry for itself. Other
2018 * dircetories do not contain entries for themselves, though. 2018 * directories do not contain entries for themselves, though.
2019 */ 2019 */
2020typedef struct { 2020typedef struct {
2021 ATTR_TYPE type; /* Type of the indexed attribute. Is 2021 ATTR_TYPE type; /* Type of the indexed attribute. Is
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index f907611cca7..b341492542c 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2473,7 +2473,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
2473 nr_free -= PAGE_CACHE_SIZE * 8; 2473 nr_free -= PAGE_CACHE_SIZE * 8;
2474 continue; 2474 continue;
2475 } 2475 }
2476 kaddr = kmap_atomic(page, KM_USER0); 2476 kaddr = kmap_atomic(page);
2477 /* 2477 /*
2478 * Subtract the number of set bits. If this 2478 * Subtract the number of set bits. If this
2479 * is the last page and it is partial we don't really care as 2479 * is the last page and it is partial we don't really care as
@@ -2483,7 +2483,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
2483 */ 2483 */
2484 nr_free -= bitmap_weight(kaddr, 2484 nr_free -= bitmap_weight(kaddr,
2485 PAGE_CACHE_SIZE * BITS_PER_BYTE); 2485 PAGE_CACHE_SIZE * BITS_PER_BYTE);
2486 kunmap_atomic(kaddr, KM_USER0); 2486 kunmap_atomic(kaddr);
2487 page_cache_release(page); 2487 page_cache_release(page);
2488 } 2488 }
2489 ntfs_debug("Finished reading $Bitmap, last index = 0x%lx.", index - 1); 2489 ntfs_debug("Finished reading $Bitmap, last index = 0x%lx.", index - 1);
@@ -2544,7 +2544,7 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
2544 nr_free -= PAGE_CACHE_SIZE * 8; 2544 nr_free -= PAGE_CACHE_SIZE * 8;
2545 continue; 2545 continue;
2546 } 2546 }
2547 kaddr = kmap_atomic(page, KM_USER0); 2547 kaddr = kmap_atomic(page);
2548 /* 2548 /*
2549 * Subtract the number of set bits. If this 2549 * Subtract the number of set bits. If this
2550 * is the last page and it is partial we don't really care as 2550 * is the last page and it is partial we don't really care as
@@ -2554,7 +2554,7 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
2554 */ 2554 */
2555 nr_free -= bitmap_weight(kaddr, 2555 nr_free -= bitmap_weight(kaddr,
2556 PAGE_CACHE_SIZE * BITS_PER_BYTE); 2556 PAGE_CACHE_SIZE * BITS_PER_BYTE);
2557 kunmap_atomic(kaddr, KM_USER0); 2557 kunmap_atomic(kaddr);
2558 page_cache_release(page); 2558 page_cache_release(page);
2559 } 2559 }
2560 ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.", 2560 ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.",
@@ -2908,9 +2908,10 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
2908 ntfs_error(sb, "Failed to load system files."); 2908 ntfs_error(sb, "Failed to load system files.");
2909 goto unl_upcase_iput_tmp_ino_err_out_now; 2909 goto unl_upcase_iput_tmp_ino_err_out_now;
2910 } 2910 }
2911 if ((sb->s_root = d_alloc_root(vol->root_ino))) { 2911
2912 /* We grab a reference, simulating an ntfs_iget(). */ 2912 /* We grab a reference, simulating an ntfs_iget(). */
2913 ihold(vol->root_ino); 2913 ihold(vol->root_ino);
2914 if ((sb->s_root = d_make_root(vol->root_ino))) {
2914 ntfs_debug("Exiting, status successful."); 2915 ntfs_debug("Exiting, status successful.");
2915 /* Release the default upcase if it has no users. */ 2916 /* Release the default upcase if it has no users. */
2916 mutex_lock(&ntfs_lock); 2917 mutex_lock(&ntfs_lock);
@@ -3158,6 +3159,8 @@ static int __init init_ntfs_fs(void)
3158 } 3159 }
3159 printk(KERN_CRIT "NTFS: Failed to register NTFS filesystem driver!\n"); 3160 printk(KERN_CRIT "NTFS: Failed to register NTFS filesystem driver!\n");
3160 3161
3162 /* Unregister the ntfs sysctls. */
3163 ntfs_sysctl(0);
3161sysctl_err_out: 3164sysctl_err_out:
3162 kmem_cache_destroy(ntfs_big_inode_cache); 3165 kmem_cache_destroy(ntfs_big_inode_cache);
3163big_inode_err_out: 3166big_inode_err_out:
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 3165aebb43c..31b9463fba1 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1134,7 +1134,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle,
1134 } 1134 }
1135 1135
1136 el = path_leaf_el(path); 1136 el = path_leaf_el(path);
1137 rec = &el->l_recs[le32_to_cpu(el->l_next_free_rec) - 1]; 1137 rec = &el->l_recs[le16_to_cpu(el->l_next_free_rec) - 1];
1138 1138
1139 ocfs2_adjust_rightmost_records(handle, et, path, rec); 1139 ocfs2_adjust_rightmost_records(handle, et, path, rec);
1140 1140
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 78b68af3b0e..657743254eb 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -102,7 +102,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
102 * copy, the data is still good. */ 102 * copy, the data is still good. */
103 if (buffer_jbd(buffer_cache_bh) 103 if (buffer_jbd(buffer_cache_bh)
104 && ocfs2_inode_is_new(inode)) { 104 && ocfs2_inode_is_new(inode)) {
105 kaddr = kmap_atomic(bh_result->b_page, KM_USER0); 105 kaddr = kmap_atomic(bh_result->b_page);
106 if (!kaddr) { 106 if (!kaddr) {
107 mlog(ML_ERROR, "couldn't kmap!\n"); 107 mlog(ML_ERROR, "couldn't kmap!\n");
108 goto bail; 108 goto bail;
@@ -110,7 +110,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
110 memcpy(kaddr + (bh_result->b_size * iblock), 110 memcpy(kaddr + (bh_result->b_size * iblock),
111 buffer_cache_bh->b_data, 111 buffer_cache_bh->b_data,
112 bh_result->b_size); 112 bh_result->b_size);
113 kunmap_atomic(kaddr, KM_USER0); 113 kunmap_atomic(kaddr);
114 set_buffer_uptodate(bh_result); 114 set_buffer_uptodate(bh_result);
115 } 115 }
116 brelse(buffer_cache_bh); 116 brelse(buffer_cache_bh);
@@ -236,13 +236,13 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
236 return -EROFS; 236 return -EROFS;
237 } 237 }
238 238
239 kaddr = kmap_atomic(page, KM_USER0); 239 kaddr = kmap_atomic(page);
240 if (size) 240 if (size)
241 memcpy(kaddr, di->id2.i_data.id_data, size); 241 memcpy(kaddr, di->id2.i_data.id_data, size);
242 /* Clear the remaining part of the page */ 242 /* Clear the remaining part of the page */
243 memset(kaddr + size, 0, PAGE_CACHE_SIZE - size); 243 memset(kaddr + size, 0, PAGE_CACHE_SIZE - size);
244 flush_dcache_page(page); 244 flush_dcache_page(page);
245 kunmap_atomic(kaddr, KM_USER0); 245 kunmap_atomic(kaddr);
246 246
247 SetPageUptodate(page); 247 SetPageUptodate(page);
248 248
@@ -689,7 +689,7 @@ static void ocfs2_clear_page_regions(struct page *page,
689 689
690 ocfs2_figure_cluster_boundaries(osb, cpos, &cluster_start, &cluster_end); 690 ocfs2_figure_cluster_boundaries(osb, cpos, &cluster_start, &cluster_end);
691 691
692 kaddr = kmap_atomic(page, KM_USER0); 692 kaddr = kmap_atomic(page);
693 693
694 if (from || to) { 694 if (from || to) {
695 if (from > cluster_start) 695 if (from > cluster_start)
@@ -700,7 +700,7 @@ static void ocfs2_clear_page_regions(struct page *page,
700 memset(kaddr + cluster_start, 0, cluster_end - cluster_start); 700 memset(kaddr + cluster_start, 0, cluster_end - cluster_start);
701 } 701 }
702 702
703 kunmap_atomic(kaddr, KM_USER0); 703 kunmap_atomic(kaddr);
704} 704}
705 705
706/* 706/*
@@ -1981,9 +1981,9 @@ static void ocfs2_write_end_inline(struct inode *inode, loff_t pos,
1981 } 1981 }
1982 } 1982 }
1983 1983
1984 kaddr = kmap_atomic(wc->w_target_page, KM_USER0); 1984 kaddr = kmap_atomic(wc->w_target_page);
1985 memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied); 1985 memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied);
1986 kunmap_atomic(kaddr, KM_USER0); 1986 kunmap_atomic(kaddr);
1987 1987
1988 trace_ocfs2_write_end_inline( 1988 trace_ocfs2_write_end_inline(
1989 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1989 (unsigned long long)OCFS2_I(inode)->ip_blkno,
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index abfac0d7ae9..3b5825ef319 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -582,24 +582,14 @@ static int dlmfs_fill_super(struct super_block * sb,
582 void * data, 582 void * data,
583 int silent) 583 int silent)
584{ 584{
585 struct inode * inode;
586 struct dentry * root;
587
588 sb->s_maxbytes = MAX_LFS_FILESIZE; 585 sb->s_maxbytes = MAX_LFS_FILESIZE;
589 sb->s_blocksize = PAGE_CACHE_SIZE; 586 sb->s_blocksize = PAGE_CACHE_SIZE;
590 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 587 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
591 sb->s_magic = DLMFS_MAGIC; 588 sb->s_magic = DLMFS_MAGIC;
592 sb->s_op = &dlmfs_ops; 589 sb->s_op = &dlmfs_ops;
593 inode = dlmfs_get_root_inode(sb); 590 sb->s_root = d_make_root(dlmfs_get_root_inode(sb));
594 if (!inode) 591 if (!sb->s_root)
595 return -ENOMEM;
596
597 root = d_alloc_root(inode);
598 if (!root) {
599 iput(inode);
600 return -ENOMEM; 592 return -ENOMEM;
601 }
602 sb->s_root = root;
603 return 0; 593 return 0;
604} 594}
605 595
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index a6fda3c188a..a1a1bfd652c 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -28,8 +28,6 @@
28#include "suballoc.h" 28#include "suballoc.h"
29#include "move_extents.h" 29#include "move_extents.h"
30 30
31#include <linux/ext2_fs.h>
32
33#define o2info_from_user(a, b) \ 31#define o2info_from_user(a, b) \
34 copy_from_user(&(a), (b), sizeof(a)) 32 copy_from_user(&(a), (b), sizeof(a))
35#define o2info_to_user(a, b) \ 33#define o2info_to_user(a, b) \
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index cf782338266..9f32d7cbb7a 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -1036,14 +1036,14 @@ static int ocfs2_get_refcount_cpos_end(struct ocfs2_caching_info *ci,
1036 1036
1037 tmp_el = left_path->p_node[subtree_root].el; 1037 tmp_el = left_path->p_node[subtree_root].el;
1038 blkno = left_path->p_node[subtree_root+1].bh->b_blocknr; 1038 blkno = left_path->p_node[subtree_root+1].bh->b_blocknr;
1039 for (i = 0; i < le32_to_cpu(tmp_el->l_next_free_rec); i++) { 1039 for (i = 0; i < le16_to_cpu(tmp_el->l_next_free_rec); i++) {
1040 if (le64_to_cpu(tmp_el->l_recs[i].e_blkno) == blkno) { 1040 if (le64_to_cpu(tmp_el->l_recs[i].e_blkno) == blkno) {
1041 *cpos_end = le32_to_cpu(tmp_el->l_recs[i+1].e_cpos); 1041 *cpos_end = le32_to_cpu(tmp_el->l_recs[i+1].e_cpos);
1042 break; 1042 break;
1043 } 1043 }
1044 } 1044 }
1045 1045
1046 BUG_ON(i == le32_to_cpu(tmp_el->l_next_free_rec)); 1046 BUG_ON(i == le16_to_cpu(tmp_el->l_next_free_rec));
1047 1047
1048out: 1048out:
1049 ocfs2_free_path(left_path); 1049 ocfs2_free_path(left_path);
@@ -1468,7 +1468,7 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh,
1468 1468
1469 trace_ocfs2_divide_leaf_refcount_block( 1469 trace_ocfs2_divide_leaf_refcount_block(
1470 (unsigned long long)ref_leaf_bh->b_blocknr, 1470 (unsigned long long)ref_leaf_bh->b_blocknr,
1471 le32_to_cpu(rl->rl_count), le32_to_cpu(rl->rl_used)); 1471 le16_to_cpu(rl->rl_count), le16_to_cpu(rl->rl_used));
1472 1472
1473 /* 1473 /*
1474 * XXX: Improvement later. 1474 * XXX: Improvement later.
@@ -2411,7 +2411,7 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
2411 rb = (struct ocfs2_refcount_block *) 2411 rb = (struct ocfs2_refcount_block *)
2412 prev_bh->b_data; 2412 prev_bh->b_data;
2413 2413
2414 if (le64_to_cpu(rb->rf_records.rl_used) + 2414 if (le16_to_cpu(rb->rf_records.rl_used) +
2415 recs_add > 2415 recs_add >
2416 le16_to_cpu(rb->rf_records.rl_count)) 2416 le16_to_cpu(rb->rf_records.rl_count))
2417 ref_blocks++; 2417 ref_blocks++;
@@ -2476,7 +2476,7 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
2476 if (prev_bh) { 2476 if (prev_bh) {
2477 rb = (struct ocfs2_refcount_block *)prev_bh->b_data; 2477 rb = (struct ocfs2_refcount_block *)prev_bh->b_data;
2478 2478
2479 if (le64_to_cpu(rb->rf_records.rl_used) + recs_add > 2479 if (le16_to_cpu(rb->rf_records.rl_used) + recs_add >
2480 le16_to_cpu(rb->rf_records.rl_count)) 2480 le16_to_cpu(rb->rf_records.rl_count))
2481 ref_blocks++; 2481 ref_blocks++;
2482 2482
@@ -3629,7 +3629,7 @@ int ocfs2_refcounted_xattr_delete_need(struct inode *inode,
3629 * one will split a refcount rec, so totally we need 3629 * one will split a refcount rec, so totally we need
3630 * clusters * 2 new refcount rec. 3630 * clusters * 2 new refcount rec.
3631 */ 3631 */
3632 if (le64_to_cpu(rb->rf_records.rl_used) + clusters * 2 > 3632 if (le16_to_cpu(rb->rf_records.rl_used) + clusters * 2 >
3633 le16_to_cpu(rb->rf_records.rl_count)) 3633 le16_to_cpu(rb->rf_records.rl_count))
3634 ref_blocks++; 3634 ref_blocks++;
3635 3635
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index ba5d97e4a73..f169da4624f 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -600,7 +600,7 @@ static void ocfs2_bg_alloc_cleanup(handle_t *handle,
600 ret = ocfs2_free_clusters(handle, cluster_ac->ac_inode, 600 ret = ocfs2_free_clusters(handle, cluster_ac->ac_inode,
601 cluster_ac->ac_bh, 601 cluster_ac->ac_bh,
602 le64_to_cpu(rec->e_blkno), 602 le64_to_cpu(rec->e_blkno),
603 le32_to_cpu(rec->e_leaf_clusters)); 603 le16_to_cpu(rec->e_leaf_clusters));
604 if (ret) 604 if (ret)
605 mlog_errno(ret); 605 mlog_errno(ret);
606 /* Try all the clusters to free */ 606 /* Try all the clusters to free */
@@ -1628,7 +1628,7 @@ static int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res,
1628{ 1628{
1629 unsigned int bpc = le16_to_cpu(cl->cl_bpc); 1629 unsigned int bpc = le16_to_cpu(cl->cl_bpc);
1630 unsigned int bitoff = le32_to_cpu(rec->e_cpos) * bpc; 1630 unsigned int bitoff = le32_to_cpu(rec->e_cpos) * bpc;
1631 unsigned int bitcount = le32_to_cpu(rec->e_leaf_clusters) * bpc; 1631 unsigned int bitcount = le16_to_cpu(rec->e_leaf_clusters) * bpc;
1632 1632
1633 if (res->sr_bit_offset < bitoff) 1633 if (res->sr_bit_offset < bitoff)
1634 return 0; 1634 return 0;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 604e12c4e97..68f4541c2db 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1154,19 +1154,19 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1154 } 1154 }
1155 1155
1156 status = ocfs2_mount_volume(sb); 1156 status = ocfs2_mount_volume(sb);
1157 if (osb->root_inode)
1158 inode = igrab(osb->root_inode);
1159
1160 if (status < 0) 1157 if (status < 0)
1161 goto read_super_error; 1158 goto read_super_error;
1162 1159
1160 if (osb->root_inode)
1161 inode = igrab(osb->root_inode);
1162
1163 if (!inode) { 1163 if (!inode) {
1164 status = -EIO; 1164 status = -EIO;
1165 mlog_errno(status); 1165 mlog_errno(status);
1166 goto read_super_error; 1166 goto read_super_error;
1167 } 1167 }
1168 1168
1169 root = d_alloc_root(inode); 1169 root = d_make_root(inode);
1170 if (!root) { 1170 if (!root) {
1171 status = -ENOMEM; 1171 status = -ENOMEM;
1172 mlog_errno(status); 1172 mlog_errno(status);
@@ -1220,9 +1220,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1220read_super_error: 1220read_super_error:
1221 brelse(bh); 1221 brelse(bh);
1222 1222
1223 if (inode)
1224 iput(inode);
1225
1226 if (osb) { 1223 if (osb) {
1227 atomic_set(&osb->vol_state, VOLUME_DISABLED); 1224 atomic_set(&osb->vol_state, VOLUME_DISABLED);
1228 wake_up(&osb->osb_mount_event); 1225 wake_up(&osb->osb_mount_event);
@@ -1627,21 +1624,17 @@ static int __init ocfs2_init(void)
1627 init_waitqueue_head(&ocfs2__ioend_wq[i]); 1624 init_waitqueue_head(&ocfs2__ioend_wq[i]);
1628 1625
1629 status = init_ocfs2_uptodate_cache(); 1626 status = init_ocfs2_uptodate_cache();
1630 if (status < 0) { 1627 if (status < 0)
1631 mlog_errno(status); 1628 goto out1;
1632 goto leave;
1633 }
1634 1629
1635 status = ocfs2_initialize_mem_caches(); 1630 status = ocfs2_initialize_mem_caches();
1636 if (status < 0) { 1631 if (status < 0)
1637 mlog_errno(status); 1632 goto out2;
1638 goto leave;
1639 }
1640 1633
1641 ocfs2_wq = create_singlethread_workqueue("ocfs2_wq"); 1634 ocfs2_wq = create_singlethread_workqueue("ocfs2_wq");
1642 if (!ocfs2_wq) { 1635 if (!ocfs2_wq) {
1643 status = -ENOMEM; 1636 status = -ENOMEM;
1644 goto leave; 1637 goto out3;
1645 } 1638 }
1646 1639
1647 ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); 1640 ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL);
@@ -1653,17 +1646,23 @@ static int __init ocfs2_init(void)
1653 ocfs2_set_locking_protocol(); 1646 ocfs2_set_locking_protocol();
1654 1647
1655 status = register_quota_format(&ocfs2_quota_format); 1648 status = register_quota_format(&ocfs2_quota_format);
1656leave: 1649 if (status < 0)
1657 if (status < 0) { 1650 goto out4;
1658 ocfs2_free_mem_caches(); 1651 status = register_filesystem(&ocfs2_fs_type);
1659 exit_ocfs2_uptodate_cache(); 1652 if (!status)
1660 mlog_errno(status); 1653 return 0;
1661 }
1662 1654
1663 if (status >= 0) { 1655 unregister_quota_format(&ocfs2_quota_format);
1664 return register_filesystem(&ocfs2_fs_type); 1656out4:
1665 } else 1657 destroy_workqueue(ocfs2_wq);
1666 return -1; 1658 debugfs_remove(ocfs2_debugfs_root);
1659out3:
1660 ocfs2_free_mem_caches();
1661out2:
1662 exit_ocfs2_uptodate_cache();
1663out1:
1664 mlog_errno(status);
1665 return status;
1667} 1666}
1668 1667
1669static void __exit ocfs2_exit(void) 1668static void __exit ocfs2_exit(void)
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 6065bb0ba20..dbc84222258 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -539,11 +539,9 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
539 goto out_brelse_bh2; 539 goto out_brelse_bh2;
540 } 540 }
541 541
542 sb->s_root = d_alloc_root(root); 542 sb->s_root = d_make_root(root);
543 if (!sb->s_root) { 543 if (!sb->s_root)
544 iput(root);
545 goto out_brelse_bh2; 544 goto out_brelse_bh2;
546 }
547 printk(KERN_DEBUG "omfs: Mounted volume %s\n", omfs_rb->r_name); 545 printk(KERN_DEBUG "omfs: Mounted volume %s\n", omfs_rb->r_name);
548 546
549 ret = 0; 547 ret = 0;
diff --git a/fs/open.c b/fs/open.c
index 77becc04114..5720854156d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -836,7 +836,7 @@ EXPORT_SYMBOL(dentry_open);
836static void __put_unused_fd(struct files_struct *files, unsigned int fd) 836static void __put_unused_fd(struct files_struct *files, unsigned int fd)
837{ 837{
838 struct fdtable *fdt = files_fdtable(files); 838 struct fdtable *fdt = files_fdtable(files);
839 __FD_CLR(fd, fdt->open_fds); 839 __clear_open_fd(fd, fdt);
840 if (fd < files->next_fd) 840 if (fd < files->next_fd)
841 files->next_fd = fd; 841 files->next_fd = fd;
842} 842}
@@ -1080,7 +1080,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
1080 if (!filp) 1080 if (!filp)
1081 goto out_unlock; 1081 goto out_unlock;
1082 rcu_assign_pointer(fdt->fd[fd], NULL); 1082 rcu_assign_pointer(fdt->fd[fd], NULL);
1083 FD_CLR(fd, fdt->close_on_exec); 1083 __clear_close_on_exec(fd, fdt);
1084 __put_unused_fd(files, fd); 1084 __put_unused_fd(files, fd);
1085 spin_unlock(&files->file_lock); 1085 spin_unlock(&files->file_lock);
1086 retval = filp_close(filp, files); 1086 retval = filp_close(filp, files);
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index a88c03bc749..bc49c975d50 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -408,13 +408,12 @@ static int openprom_fill_super(struct super_block *s, void *data, int silent)
408 oi->type = op_inode_node; 408 oi->type = op_inode_node;
409 oi->u.node = of_find_node_by_path("/"); 409 oi->u.node = of_find_node_by_path("/");
410 410
411 s->s_root = d_alloc_root(root_inode); 411 s->s_root = d_make_root(root_inode);
412 if (!s->s_root) 412 if (!s->s_root)
413 goto out_no_root_dentry; 413 goto out_no_root_dentry;
414 return 0; 414 return 0;
415 415
416out_no_root_dentry: 416out_no_root_dentry:
417 iput(root_inode);
418 ret = -ENOMEM; 417 ret = -ENOMEM;
419out_no_root: 418out_no_root:
420 printk("openprom_fill_super: get root inode failed\n"); 419 printk("openprom_fill_super: get root inode failed\n");
diff --git a/fs/pipe.c b/fs/pipe.c
index a932ced92a1..fec5e4ad071 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -13,6 +13,7 @@
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/log2.h> 14#include <linux/log2.h>
15#include <linux/mount.h> 15#include <linux/mount.h>
16#include <linux/magic.h>
16#include <linux/pipe_fs_i.h> 17#include <linux/pipe_fs_i.h>
17#include <linux/uio.h> 18#include <linux/uio.h>
18#include <linux/highmem.h> 19#include <linux/highmem.h>
@@ -230,7 +231,7 @@ void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
230{ 231{
231 if (atomic) { 232 if (atomic) {
232 buf->flags |= PIPE_BUF_FLAG_ATOMIC; 233 buf->flags |= PIPE_BUF_FLAG_ATOMIC;
233 return kmap_atomic(buf->page, KM_USER0); 234 return kmap_atomic(buf->page);
234 } 235 }
235 236
236 return kmap(buf->page); 237 return kmap(buf->page);
@@ -251,7 +252,7 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
251{ 252{
252 if (buf->flags & PIPE_BUF_FLAG_ATOMIC) { 253 if (buf->flags & PIPE_BUF_FLAG_ATOMIC) {
253 buf->flags &= ~PIPE_BUF_FLAG_ATOMIC; 254 buf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
254 kunmap_atomic(map_data, KM_USER0); 255 kunmap_atomic(map_data);
255 } else 256 } else
256 kunmap(buf->page); 257 kunmap(buf->page);
257} 258}
@@ -345,6 +346,16 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
345 .get = generic_pipe_buf_get, 346 .get = generic_pipe_buf_get,
346}; 347};
347 348
349static const struct pipe_buf_operations packet_pipe_buf_ops = {
350 .can_merge = 0,
351 .map = generic_pipe_buf_map,
352 .unmap = generic_pipe_buf_unmap,
353 .confirm = generic_pipe_buf_confirm,
354 .release = anon_pipe_buf_release,
355 .steal = generic_pipe_buf_steal,
356 .get = generic_pipe_buf_get,
357};
358
348static ssize_t 359static ssize_t
349pipe_read(struct kiocb *iocb, const struct iovec *_iov, 360pipe_read(struct kiocb *iocb, const struct iovec *_iov,
350 unsigned long nr_segs, loff_t pos) 361 unsigned long nr_segs, loff_t pos)
@@ -406,6 +417,13 @@ redo:
406 ret += chars; 417 ret += chars;
407 buf->offset += chars; 418 buf->offset += chars;
408 buf->len -= chars; 419 buf->len -= chars;
420
421 /* Was it a packet buffer? Clean up and exit */
422 if (buf->flags & PIPE_BUF_FLAG_PACKET) {
423 total_len = chars;
424 buf->len = 0;
425 }
426
409 if (!buf->len) { 427 if (!buf->len) {
410 buf->ops = NULL; 428 buf->ops = NULL;
411 ops->release(pipe, buf); 429 ops->release(pipe, buf);
@@ -458,6 +476,11 @@ redo:
458 return ret; 476 return ret;
459} 477}
460 478
479static inline int is_packetized(struct file *file)
480{
481 return (file->f_flags & O_DIRECT) != 0;
482}
483
461static ssize_t 484static ssize_t
462pipe_write(struct kiocb *iocb, const struct iovec *_iov, 485pipe_write(struct kiocb *iocb, const struct iovec *_iov,
463 unsigned long nr_segs, loff_t ppos) 486 unsigned long nr_segs, loff_t ppos)
@@ -565,14 +588,14 @@ redo1:
565 iov_fault_in_pages_read(iov, chars); 588 iov_fault_in_pages_read(iov, chars);
566redo2: 589redo2:
567 if (atomic) 590 if (atomic)
568 src = kmap_atomic(page, KM_USER0); 591 src = kmap_atomic(page);
569 else 592 else
570 src = kmap(page); 593 src = kmap(page);
571 594
572 error = pipe_iov_copy_from_user(src, iov, chars, 595 error = pipe_iov_copy_from_user(src, iov, chars,
573 atomic); 596 atomic);
574 if (atomic) 597 if (atomic)
575 kunmap_atomic(src, KM_USER0); 598 kunmap_atomic(src);
576 else 599 else
577 kunmap(page); 600 kunmap(page);
578 601
@@ -592,6 +615,11 @@ redo2:
592 buf->ops = &anon_pipe_buf_ops; 615 buf->ops = &anon_pipe_buf_ops;
593 buf->offset = 0; 616 buf->offset = 0;
594 buf->len = chars; 617 buf->len = chars;
618 buf->flags = 0;
619 if (is_packetized(filp)) {
620 buf->ops = &packet_pipe_buf_ops;
621 buf->flags = PIPE_BUF_FLAG_PACKET;
622 }
595 pipe->nrbufs = ++bufs; 623 pipe->nrbufs = ++bufs;
596 pipe->tmp_page = NULL; 624 pipe->tmp_page = NULL;
597 625
@@ -1012,7 +1040,7 @@ struct file *create_write_pipe(int flags)
1012 goto err_dentry; 1040 goto err_dentry;
1013 f->f_mapping = inode->i_mapping; 1041 f->f_mapping = inode->i_mapping;
1014 1042
1015 f->f_flags = O_WRONLY | (flags & O_NONBLOCK); 1043 f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
1016 f->f_version = 0; 1044 f->f_version = 0;
1017 1045
1018 return f; 1046 return f;
@@ -1056,7 +1084,7 @@ int do_pipe_flags(int *fd, int flags)
1056 int error; 1084 int error;
1057 int fdw, fdr; 1085 int fdw, fdr;
1058 1086
1059 if (flags & ~(O_CLOEXEC | O_NONBLOCK)) 1087 if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT))
1060 return -EINVAL; 1088 return -EINVAL;
1061 1089
1062 fw = create_write_pipe(flags); 1090 fw = create_write_pipe(flags);
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index cea4623f1ed..5e325a42e33 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -18,7 +18,7 @@
18#include <linux/fs.h> 18#include <linux/fs.h>
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/posix_acl.h> 20#include <linux/posix_acl.h>
21#include <linux/module.h> 21#include <linux/export.h>
22 22
23#include <linux/errno.h> 23#include <linux/errno.h>
24 24
diff --git a/fs/proc/array.c b/fs/proc/array.c
index c602b8d20f0..f9bd395b347 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -462,59 +462,56 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
462 /* convert nsec -> ticks */ 462 /* convert nsec -> ticks */
463 start_time = nsec_to_clock_t(start_time); 463 start_time = nsec_to_clock_t(start_time);
464 464
465 seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \ 465 seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
466%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ 466 seq_put_decimal_ll(m, ' ', ppid);
467%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld %lu %lu %lu\n", 467 seq_put_decimal_ll(m, ' ', pgid);
468 pid_nr_ns(pid, ns), 468 seq_put_decimal_ll(m, ' ', sid);
469 tcomm, 469 seq_put_decimal_ll(m, ' ', tty_nr);
470 state, 470 seq_put_decimal_ll(m, ' ', tty_pgrp);
471 ppid, 471 seq_put_decimal_ull(m, ' ', task->flags);
472 pgid, 472 seq_put_decimal_ull(m, ' ', min_flt);
473 sid, 473 seq_put_decimal_ull(m, ' ', cmin_flt);
474 tty_nr, 474 seq_put_decimal_ull(m, ' ', maj_flt);
475 tty_pgrp, 475 seq_put_decimal_ull(m, ' ', cmaj_flt);
476 task->flags, 476 seq_put_decimal_ull(m, ' ', cputime_to_clock_t(utime));
477 min_flt, 477 seq_put_decimal_ull(m, ' ', cputime_to_clock_t(stime));
478 cmin_flt, 478 seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cutime));
479 maj_flt, 479 seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cstime));
480 cmaj_flt, 480 seq_put_decimal_ll(m, ' ', priority);
481 cputime_to_clock_t(utime), 481 seq_put_decimal_ll(m, ' ', nice);
482 cputime_to_clock_t(stime), 482 seq_put_decimal_ll(m, ' ', num_threads);
483 cputime_to_clock_t(cutime), 483 seq_put_decimal_ull(m, ' ', 0);
484 cputime_to_clock_t(cstime), 484 seq_put_decimal_ull(m, ' ', start_time);
485 priority, 485 seq_put_decimal_ull(m, ' ', vsize);
486 nice, 486 seq_put_decimal_ll(m, ' ', mm ? get_mm_rss(mm) : 0);
487 num_threads, 487 seq_put_decimal_ull(m, ' ', rsslim);
488 start_time, 488 seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->start_code : 1) : 0);
489 vsize, 489 seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->end_code : 1) : 0);
490 mm ? get_mm_rss(mm) : 0, 490 seq_put_decimal_ull(m, ' ', (permitted && mm) ? mm->start_stack : 0);
491 rsslim, 491 seq_put_decimal_ull(m, ' ', esp);
492 mm ? (permitted ? mm->start_code : 1) : 0, 492 seq_put_decimal_ull(m, ' ', eip);
493 mm ? (permitted ? mm->end_code : 1) : 0, 493 /* The signal information here is obsolete.
494 (permitted && mm) ? mm->start_stack : 0, 494 * It must be decimal for Linux 2.0 compatibility.
495 esp, 495 * Use /proc/#/status for real-time signals.
496 eip, 496 */
497 /* The signal information here is obsolete. 497 seq_put_decimal_ull(m, ' ', task->pending.signal.sig[0] & 0x7fffffffUL);
498 * It must be decimal for Linux 2.0 compatibility. 498 seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL);
499 * Use /proc/#/status for real-time signals. 499 seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL);
500 */ 500 seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL);
501 task->pending.signal.sig[0] & 0x7fffffffUL, 501 seq_put_decimal_ull(m, ' ', wchan);
502 task->blocked.sig[0] & 0x7fffffffUL, 502 seq_put_decimal_ull(m, ' ', 0);
503 sigign .sig[0] & 0x7fffffffUL, 503 seq_put_decimal_ull(m, ' ', 0);
504 sigcatch .sig[0] & 0x7fffffffUL, 504 seq_put_decimal_ll(m, ' ', task->exit_signal);
505 wchan, 505 seq_put_decimal_ll(m, ' ', task_cpu(task));
506 0UL, 506 seq_put_decimal_ull(m, ' ', task->rt_priority);
507 0UL, 507 seq_put_decimal_ull(m, ' ', task->policy);
508 task->exit_signal, 508 seq_put_decimal_ull(m, ' ', delayacct_blkio_ticks(task));
509 task_cpu(task), 509 seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gtime));
510 task->rt_priority, 510 seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cgtime));
511 task->policy, 511 seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->start_data : 0);
512 (unsigned long long)delayacct_blkio_ticks(task), 512 seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->end_data : 0);
513 cputime_to_clock_t(gtime), 513 seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->start_brk : 0);
514 cputime_to_clock_t(cgtime), 514 seq_putc(m, '\n');
515 (mm && permitted) ? mm->start_data : 0,
516 (mm && permitted) ? mm->end_data : 0,
517 (mm && permitted) ? mm->start_brk : 0);
518 if (mm) 515 if (mm)
519 mmput(mm); 516 mmput(mm);
520 return 0; 517 return 0;
@@ -542,8 +539,20 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
542 size = task_statm(mm, &shared, &text, &data, &resident); 539 size = task_statm(mm, &shared, &text, &data, &resident);
543 mmput(mm); 540 mmput(mm);
544 } 541 }
545 seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n", 542 /*
546 size, resident, shared, text, data); 543 * For quick read, open code by putting numbers directly
544 * expected format is
545 * seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n",
546 * size, resident, shared, text, data);
547 */
548 seq_put_decimal_ull(m, 0, size);
549 seq_put_decimal_ull(m, ' ', resident);
550 seq_put_decimal_ull(m, ' ', shared);
551 seq_put_decimal_ull(m, ' ', text);
552 seq_put_decimal_ull(m, ' ', 0);
553 seq_put_decimal_ull(m, ' ', data);
554 seq_put_decimal_ull(m, ' ', 0);
555 seq_putc(m, '\n');
547 556
548 return 0; 557 return 0;
549} 558}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d4548dd49b0..57b8159f26f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1310,8 +1310,7 @@ sched_autogroup_write(struct file *file, const char __user *buf,
1310 if (!p) 1310 if (!p)
1311 return -ESRCH; 1311 return -ESRCH;
1312 1312
1313 err = nice; 1313 err = proc_sched_autogroup_set_nice(p, nice);
1314 err = proc_sched_autogroup_set_nice(p, &err);
1315 if (err) 1314 if (err)
1316 count = err; 1315 count = err;
1317 1316
@@ -1754,7 +1753,7 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1754 1753
1755 fdt = files_fdtable(files); 1754 fdt = files_fdtable(files);
1756 f_flags = file->f_flags & ~O_CLOEXEC; 1755 f_flags = file->f_flags & ~O_CLOEXEC;
1757 if (FD_ISSET(fd, fdt->close_on_exec)) 1756 if (close_on_exec(fd, fdt))
1758 f_flags |= O_CLOEXEC; 1757 f_flags |= O_CLOEXEC;
1759 1758
1760 if (path) { 1759 if (path) {
@@ -1800,10 +1799,15 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1800 if (task) { 1799 if (task) {
1801 files = get_files_struct(task); 1800 files = get_files_struct(task);
1802 if (files) { 1801 if (files) {
1802 struct file *file;
1803 rcu_read_lock(); 1803 rcu_read_lock();
1804 if (fcheck_files(files, fd)) { 1804 file = fcheck_files(files, fd);
1805 if (file) {
1806 unsigned i_mode, f_mode = file->f_mode;
1807
1805 rcu_read_unlock(); 1808 rcu_read_unlock();
1806 put_files_struct(files); 1809 put_files_struct(files);
1810
1807 if (task_dumpable(task)) { 1811 if (task_dumpable(task)) {
1808 rcu_read_lock(); 1812 rcu_read_lock();
1809 cred = __task_cred(task); 1813 cred = __task_cred(task);
@@ -1814,7 +1818,14 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1814 inode->i_uid = 0; 1818 inode->i_uid = 0;
1815 inode->i_gid = 0; 1819 inode->i_gid = 0;
1816 } 1820 }
1817 inode->i_mode &= ~(S_ISUID | S_ISGID); 1821
1822 i_mode = S_IFLNK;
1823 if (f_mode & FMODE_READ)
1824 i_mode |= S_IRUSR | S_IXUSR;
1825 if (f_mode & FMODE_WRITE)
1826 i_mode |= S_IWUSR | S_IXUSR;
1827 inode->i_mode = i_mode;
1828
1818 security_task_to_inode(task, inode); 1829 security_task_to_inode(task, inode);
1819 put_task_struct(task); 1830 put_task_struct(task);
1820 return 1; 1831 return 1;
@@ -1838,8 +1849,6 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
1838 struct dentry *dentry, struct task_struct *task, const void *ptr) 1849 struct dentry *dentry, struct task_struct *task, const void *ptr)
1839{ 1850{
1840 unsigned fd = *(const unsigned *)ptr; 1851 unsigned fd = *(const unsigned *)ptr;
1841 struct file *file;
1842 struct files_struct *files;
1843 struct inode *inode; 1852 struct inode *inode;
1844 struct proc_inode *ei; 1853 struct proc_inode *ei;
1845 struct dentry *error = ERR_PTR(-ENOENT); 1854 struct dentry *error = ERR_PTR(-ENOENT);
@@ -1849,25 +1858,6 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
1849 goto out; 1858 goto out;
1850 ei = PROC_I(inode); 1859 ei = PROC_I(inode);
1851 ei->fd = fd; 1860 ei->fd = fd;
1852 files = get_files_struct(task);
1853 if (!files)
1854 goto out_iput;
1855 inode->i_mode = S_IFLNK;
1856
1857 /*
1858 * We are not taking a ref to the file structure, so we must
1859 * hold ->file_lock.
1860 */
1861 spin_lock(&files->file_lock);
1862 file = fcheck_files(files, fd);
1863 if (!file)
1864 goto out_unlock;
1865 if (file->f_mode & FMODE_READ)
1866 inode->i_mode |= S_IRUSR | S_IXUSR;
1867 if (file->f_mode & FMODE_WRITE)
1868 inode->i_mode |= S_IWUSR | S_IXUSR;
1869 spin_unlock(&files->file_lock);
1870 put_files_struct(files);
1871 1861
1872 inode->i_op = &proc_pid_link_inode_operations; 1862 inode->i_op = &proc_pid_link_inode_operations;
1873 inode->i_size = 64; 1863 inode->i_size = 64;
@@ -1880,12 +1870,6 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
1880 1870
1881 out: 1871 out:
1882 return error; 1872 return error;
1883out_unlock:
1884 spin_unlock(&files->file_lock);
1885 put_files_struct(files);
1886out_iput:
1887 iput(inode);
1888 goto out;
1889} 1873}
1890 1874
1891static struct dentry *proc_lookupfd_common(struct inode *dir, 1875static struct dentry *proc_lookupfd_common(struct inode *dir,
@@ -2178,16 +2162,16 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
2178 goto out; 2162 goto out;
2179 2163
2180 result = ERR_PTR(-EACCES); 2164 result = ERR_PTR(-EACCES);
2181 if (lock_trace(task)) 2165 if (!ptrace_may_access(task, PTRACE_MODE_READ))
2182 goto out_put_task; 2166 goto out_put_task;
2183 2167
2184 result = ERR_PTR(-ENOENT); 2168 result = ERR_PTR(-ENOENT);
2185 if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) 2169 if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
2186 goto out_unlock; 2170 goto out_put_task;
2187 2171
2188 mm = get_task_mm(task); 2172 mm = get_task_mm(task);
2189 if (!mm) 2173 if (!mm)
2190 goto out_unlock; 2174 goto out_put_task;
2191 2175
2192 down_read(&mm->mmap_sem); 2176 down_read(&mm->mmap_sem);
2193 vma = find_exact_vma(mm, vm_start, vm_end); 2177 vma = find_exact_vma(mm, vm_start, vm_end);
@@ -2199,8 +2183,6 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
2199out_no_vma: 2183out_no_vma:
2200 up_read(&mm->mmap_sem); 2184 up_read(&mm->mmap_sem);
2201 mmput(mm); 2185 mmput(mm);
2202out_unlock:
2203 unlock_trace(task);
2204out_put_task: 2186out_put_task:
2205 put_task_struct(task); 2187 put_task_struct(task);
2206out: 2188out:
@@ -2234,7 +2216,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
2234 goto out; 2216 goto out;
2235 2217
2236 ret = -EACCES; 2218 ret = -EACCES;
2237 if (lock_trace(task)) 2219 if (!ptrace_may_access(task, PTRACE_MODE_READ))
2238 goto out_put_task; 2220 goto out_put_task;
2239 2221
2240 ret = 0; 2222 ret = 0;
@@ -2242,12 +2224,12 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
2242 case 0: 2224 case 0:
2243 ino = inode->i_ino; 2225 ino = inode->i_ino;
2244 if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0) 2226 if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0)
2245 goto out_unlock; 2227 goto out_put_task;
2246 filp->f_pos++; 2228 filp->f_pos++;
2247 case 1: 2229 case 1:
2248 ino = parent_ino(dentry); 2230 ino = parent_ino(dentry);
2249 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) 2231 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
2250 goto out_unlock; 2232 goto out_put_task;
2251 filp->f_pos++; 2233 filp->f_pos++;
2252 default: 2234 default:
2253 { 2235 {
@@ -2258,7 +2240,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
2258 2240
2259 mm = get_task_mm(task); 2241 mm = get_task_mm(task);
2260 if (!mm) 2242 if (!mm)
2261 goto out_unlock; 2243 goto out_put_task;
2262 down_read(&mm->mmap_sem); 2244 down_read(&mm->mmap_sem);
2263 2245
2264 nr_files = 0; 2246 nr_files = 0;
@@ -2288,7 +2270,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
2288 flex_array_free(fa); 2270 flex_array_free(fa);
2289 up_read(&mm->mmap_sem); 2271 up_read(&mm->mmap_sem);
2290 mmput(mm); 2272 mmput(mm);
2291 goto out_unlock; 2273 goto out_put_task;
2292 } 2274 }
2293 for (i = 0, vma = mm->mmap, pos = 2; vma; 2275 for (i = 0, vma = mm->mmap, pos = 2; vma;
2294 vma = vma->vm_next) { 2276 vma = vma->vm_next) {
@@ -2333,8 +2315,6 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
2333 } 2315 }
2334 } 2316 }
2335 2317
2336out_unlock:
2337 unlock_trace(task);
2338out_put_task: 2318out_put_task:
2339 put_task_struct(task); 2319 put_task_struct(task);
2340out: 2320out:
@@ -2990,9 +2970,9 @@ static const struct pid_entry tgid_base_stuff[] = {
2990 INF("cmdline", S_IRUGO, proc_pid_cmdline), 2970 INF("cmdline", S_IRUGO, proc_pid_cmdline),
2991 ONE("stat", S_IRUGO, proc_tgid_stat), 2971 ONE("stat", S_IRUGO, proc_tgid_stat),
2992 ONE("statm", S_IRUGO, proc_pid_statm), 2972 ONE("statm", S_IRUGO, proc_pid_statm),
2993 REG("maps", S_IRUGO, proc_maps_operations), 2973 REG("maps", S_IRUGO, proc_pid_maps_operations),
2994#ifdef CONFIG_NUMA 2974#ifdef CONFIG_NUMA
2995 REG("numa_maps", S_IRUGO, proc_numa_maps_operations), 2975 REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
2996#endif 2976#endif
2997 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 2977 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations),
2998 LNK("cwd", proc_cwd_link), 2978 LNK("cwd", proc_cwd_link),
@@ -3003,7 +2983,7 @@ static const struct pid_entry tgid_base_stuff[] = {
3003 REG("mountstats", S_IRUSR, proc_mountstats_operations), 2983 REG("mountstats", S_IRUSR, proc_mountstats_operations),
3004#ifdef CONFIG_PROC_PAGE_MONITOR 2984#ifdef CONFIG_PROC_PAGE_MONITOR
3005 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 2985 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
3006 REG("smaps", S_IRUGO, proc_smaps_operations), 2986 REG("smaps", S_IRUGO, proc_pid_smaps_operations),
3007 REG("pagemap", S_IRUGO, proc_pagemap_operations), 2987 REG("pagemap", S_IRUGO, proc_pagemap_operations),
3008#endif 2988#endif
3009#ifdef CONFIG_SECURITY 2989#ifdef CONFIG_SECURITY
@@ -3349,9 +3329,9 @@ static const struct pid_entry tid_base_stuff[] = {
3349 INF("cmdline", S_IRUGO, proc_pid_cmdline), 3329 INF("cmdline", S_IRUGO, proc_pid_cmdline),
3350 ONE("stat", S_IRUGO, proc_tid_stat), 3330 ONE("stat", S_IRUGO, proc_tid_stat),
3351 ONE("statm", S_IRUGO, proc_pid_statm), 3331 ONE("statm", S_IRUGO, proc_pid_statm),
3352 REG("maps", S_IRUGO, proc_maps_operations), 3332 REG("maps", S_IRUGO, proc_tid_maps_operations),
3353#ifdef CONFIG_NUMA 3333#ifdef CONFIG_NUMA
3354 REG("numa_maps", S_IRUGO, proc_numa_maps_operations), 3334 REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations),
3355#endif 3335#endif
3356 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 3336 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations),
3357 LNK("cwd", proc_cwd_link), 3337 LNK("cwd", proc_cwd_link),
@@ -3361,7 +3341,7 @@ static const struct pid_entry tid_base_stuff[] = {
3361 REG("mountinfo", S_IRUGO, proc_mountinfo_operations), 3341 REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
3362#ifdef CONFIG_PROC_PAGE_MONITOR 3342#ifdef CONFIG_PROC_PAGE_MONITOR
3363 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 3343 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
3364 REG("smaps", S_IRUGO, proc_smaps_operations), 3344 REG("smaps", S_IRUGO, proc_tid_smaps_operations),
3365 REG("pagemap", S_IRUGO, proc_pagemap_operations), 3345 REG("pagemap", S_IRUGO, proc_pagemap_operations),
3366#endif 3346#endif
3367#ifdef CONFIG_SECURITY 3347#ifdef CONFIG_SECURITY
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 84fd3235a59..205c9228083 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -22,7 +22,6 @@
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/mount.h> 23#include <linux/mount.h>
24 24
25#include <asm/system.h>
26#include <asm/uaccess.h> 25#include <asm/uaccess.h>
27 26
28#include "internal.h" 27#include "internal.h"
@@ -486,8 +485,6 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
486 485
487int proc_fill_super(struct super_block *s) 486int proc_fill_super(struct super_block *s)
488{ 487{
489 struct inode * root_inode;
490
491 s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; 488 s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
492 s->s_blocksize = 1024; 489 s->s_blocksize = 1024;
493 s->s_blocksize_bits = 10; 490 s->s_blocksize_bits = 10;
@@ -496,19 +493,11 @@ int proc_fill_super(struct super_block *s)
496 s->s_time_gran = 1; 493 s->s_time_gran = 1;
497 494
498 pde_get(&proc_root); 495 pde_get(&proc_root);
499 root_inode = proc_get_inode(s, &proc_root); 496 s->s_root = d_make_root(proc_get_inode(s, &proc_root));
500 if (!root_inode) 497 if (s->s_root)
501 goto out_no_root; 498 return 0;
502 root_inode->i_uid = 0;
503 root_inode->i_gid = 0;
504 s->s_root = d_alloc_root(root_inode);
505 if (!s->s_root)
506 goto out_no_root;
507 return 0;
508 499
509out_no_root:
510 printk("proc_read_super: get root inode failed\n"); 500 printk("proc_read_super: get root inode failed\n");
511 iput(root_inode);
512 pde_put(&proc_root); 501 pde_put(&proc_root);
513 return -ENOMEM; 502 return -ENOMEM;
514} 503}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 292577531ad..5f79bb8b4c6 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -10,12 +10,15 @@
10 */ 10 */
11 11
12#include <linux/proc_fs.h> 12#include <linux/proc_fs.h>
13struct ctl_table_header;
13 14
14extern struct proc_dir_entry proc_root; 15extern struct proc_dir_entry proc_root;
15#ifdef CONFIG_PROC_SYSCTL 16#ifdef CONFIG_PROC_SYSCTL
16extern int proc_sys_init(void); 17extern int proc_sys_init(void);
18extern void sysctl_head_put(struct ctl_table_header *head);
17#else 19#else
18static inline void proc_sys_init(void) { } 20static inline void proc_sys_init(void) { }
21static inline void sysctl_head_put(struct ctl_table_header *head) { }
19#endif 22#endif
20#ifdef CONFIG_NET 23#ifdef CONFIG_NET
21extern int proc_net_init(void); 24extern int proc_net_init(void);
@@ -53,9 +56,12 @@ extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
53 struct pid *pid, struct task_struct *task); 56 struct pid *pid, struct task_struct *task);
54extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); 57extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
55 58
56extern const struct file_operations proc_maps_operations; 59extern const struct file_operations proc_pid_maps_operations;
57extern const struct file_operations proc_numa_maps_operations; 60extern const struct file_operations proc_tid_maps_operations;
58extern const struct file_operations proc_smaps_operations; 61extern const struct file_operations proc_pid_numa_maps_operations;
62extern const struct file_operations proc_tid_numa_maps_operations;
63extern const struct file_operations proc_pid_smaps_operations;
64extern const struct file_operations proc_tid_smaps_operations;
59extern const struct file_operations proc_clear_refs_operations; 65extern const struct file_operations proc_clear_refs_operations;
60extern const struct file_operations proc_pagemap_operations; 66extern const struct file_operations proc_pagemap_operations;
61extern const struct file_operations proc_net_operations; 67extern const struct file_operations proc_net_operations;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index d245cb23dd7..86c67eee439 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -157,7 +157,8 @@ static int kcore_update_ram(void)
157 157
158#ifdef CONFIG_SPARSEMEM_VMEMMAP 158#ifdef CONFIG_SPARSEMEM_VMEMMAP
159/* calculate vmemmap's address from given system ram pfn and register it */ 159/* calculate vmemmap's address from given system ram pfn and register it */
160int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) 160static int
161get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
161{ 162{
162 unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT; 163 unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT;
163 unsigned long nr_pages = ent->size >> PAGE_SHIFT; 164 unsigned long nr_pages = ent->size >> PAGE_SHIFT;
@@ -189,7 +190,8 @@ int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
189 190
190} 191}
191#else 192#else
192int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) 193static int
194get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
193{ 195{
194 return 1; 196 return 1;
195} 197}
@@ -513,7 +515,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
513 515
514 n = copy_to_user(buffer, (char *)start, tsz); 516 n = copy_to_user(buffer, (char *)start, tsz);
515 /* 517 /*
516 * We cannot distingush between fault on source 518 * We cannot distinguish between fault on source
517 * and fault on destination. When this happens 519 * and fault on destination. When this happens
518 * we clear too and hope it will trigger the 520 * we clear too and hope it will trigger the
519 * EFAULT again. 521 * EFAULT again.
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 27da860115c..0d9e23a39e4 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -53,7 +53,7 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
53 ei->ns_ops = ns_ops; 53 ei->ns_ops = ns_ops;
54 ei->ns = ns; 54 ei->ns = ns;
55 55
56 dentry->d_op = &pid_dentry_operations; 56 d_set_d_op(dentry, &pid_dentry_operations);
57 d_add(dentry, inode); 57 d_add(dentry, inode);
58 /* Close the race of the process dying before we return the dentry */ 58 /* Close the race of the process dying before we return the dentry */
59 if (pid_revalidate(dentry, NULL)) 59 if (pid_revalidate(dentry, NULL))
@@ -156,15 +156,15 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir,
156 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 156 if (!ptrace_may_access(task, PTRACE_MODE_READ))
157 goto out; 157 goto out;
158 158
159 last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; 159 last = &ns_entries[ARRAY_SIZE(ns_entries)];
160 for (entry = ns_entries; entry <= last; entry++) { 160 for (entry = ns_entries; entry < last; entry++) {
161 if (strlen((*entry)->name) != len) 161 if (strlen((*entry)->name) != len)
162 continue; 162 continue;
163 if (!memcmp(dentry->d_name.name, (*entry)->name, len)) 163 if (!memcmp(dentry->d_name.name, (*entry)->name, len))
164 break; 164 break;
165 } 165 }
166 error = ERR_PTR(-ENOENT); 166 error = ERR_PTR(-ENOENT);
167 if (entry > last) 167 if (entry == last)
168 goto out; 168 goto out;
169 169
170 error = proc_ns_instantiate(dir, dentry, task, *entry); 170 error = proc_ns_instantiate(dir, dentry, task, *entry);
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 6d8e6a9e93a..7fcd0d60a96 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -115,6 +115,8 @@ u64 stable_page_flags(struct page *page)
115 u |= 1 << KPF_COMPOUND_TAIL; 115 u |= 1 << KPF_COMPOUND_TAIL;
116 if (PageHuge(page)) 116 if (PageHuge(page))
117 u |= 1 << KPF_HUGE; 117 u |= 1 << KPF_HUGE;
118 else if (PageTransCompound(page))
119 u |= 1 << KPF_THP;
118 120
119 /* 121 /*
120 * Caveats on high order pages: page->_count will only be set 122 * Caveats on high order pages: page->_count will only be set
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index a6b62173d4c..21d836f4029 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -6,7 +6,10 @@
6#include <linux/poll.h> 6#include <linux/poll.h>
7#include <linux/proc_fs.h> 7#include <linux/proc_fs.h>
8#include <linux/security.h> 8#include <linux/security.h>
9#include <linux/sched.h>
9#include <linux/namei.h> 10#include <linux/namei.h>
11#include <linux/mm.h>
12#include <linux/module.h>
10#include "internal.h" 13#include "internal.h"
11 14
12static const struct dentry_operations proc_sys_dentry_operations; 15static const struct dentry_operations proc_sys_dentry_operations;
@@ -24,6 +27,371 @@ void proc_sys_poll_notify(struct ctl_table_poll *poll)
24 wake_up_interruptible(&poll->wait); 27 wake_up_interruptible(&poll->wait);
25} 28}
26 29
30static struct ctl_table root_table[] = {
31 {
32 .procname = "",
33 .mode = S_IFDIR|S_IRUGO|S_IXUGO,
34 },
35 { }
36};
37static struct ctl_table_root sysctl_table_root = {
38 .default_set.dir.header = {
39 {{.count = 1,
40 .nreg = 1,
41 .ctl_table = root_table }},
42 .ctl_table_arg = root_table,
43 .root = &sysctl_table_root,
44 .set = &sysctl_table_root.default_set,
45 },
46};
47
48static DEFINE_SPINLOCK(sysctl_lock);
49
50static void drop_sysctl_table(struct ctl_table_header *header);
51static int sysctl_follow_link(struct ctl_table_header **phead,
52 struct ctl_table **pentry, struct nsproxy *namespaces);
53static int insert_links(struct ctl_table_header *head);
54static void put_links(struct ctl_table_header *header);
55
56static void sysctl_print_dir(struct ctl_dir *dir)
57{
58 if (dir->header.parent)
59 sysctl_print_dir(dir->header.parent);
60 printk(KERN_CONT "%s/", dir->header.ctl_table[0].procname);
61}
62
63static int namecmp(const char *name1, int len1, const char *name2, int len2)
64{
65 int minlen;
66 int cmp;
67
68 minlen = len1;
69 if (minlen > len2)
70 minlen = len2;
71
72 cmp = memcmp(name1, name2, minlen);
73 if (cmp == 0)
74 cmp = len1 - len2;
75 return cmp;
76}
77
78/* Called under sysctl_lock */
79static struct ctl_table *find_entry(struct ctl_table_header **phead,
80 struct ctl_dir *dir, const char *name, int namelen)
81{
82 struct ctl_table_header *head;
83 struct ctl_table *entry;
84 struct rb_node *node = dir->root.rb_node;
85
86 while (node)
87 {
88 struct ctl_node *ctl_node;
89 const char *procname;
90 int cmp;
91
92 ctl_node = rb_entry(node, struct ctl_node, node);
93 head = ctl_node->header;
94 entry = &head->ctl_table[ctl_node - head->node];
95 procname = entry->procname;
96
97 cmp = namecmp(name, namelen, procname, strlen(procname));
98 if (cmp < 0)
99 node = node->rb_left;
100 else if (cmp > 0)
101 node = node->rb_right;
102 else {
103 *phead = head;
104 return entry;
105 }
106 }
107 return NULL;
108}
109
110static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry)
111{
112 struct rb_node *node = &head->node[entry - head->ctl_table].node;
113 struct rb_node **p = &head->parent->root.rb_node;
114 struct rb_node *parent = NULL;
115 const char *name = entry->procname;
116 int namelen = strlen(name);
117
118 while (*p) {
119 struct ctl_table_header *parent_head;
120 struct ctl_table *parent_entry;
121 struct ctl_node *parent_node;
122 const char *parent_name;
123 int cmp;
124
125 parent = *p;
126 parent_node = rb_entry(parent, struct ctl_node, node);
127 parent_head = parent_node->header;
128 parent_entry = &parent_head->ctl_table[parent_node - parent_head->node];
129 parent_name = parent_entry->procname;
130
131 cmp = namecmp(name, namelen, parent_name, strlen(parent_name));
132 if (cmp < 0)
133 p = &(*p)->rb_left;
134 else if (cmp > 0)
135 p = &(*p)->rb_right;
136 else {
137 printk(KERN_ERR "sysctl duplicate entry: ");
138 sysctl_print_dir(head->parent);
139 printk(KERN_CONT "/%s\n", entry->procname);
140 return -EEXIST;
141 }
142 }
143
144 rb_link_node(node, parent, p);
145 return 0;
146}
147
148static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry)
149{
150 struct rb_node *node = &head->node[entry - head->ctl_table].node;
151
152 rb_erase(node, &head->parent->root);
153}
154
155static void init_header(struct ctl_table_header *head,
156 struct ctl_table_root *root, struct ctl_table_set *set,
157 struct ctl_node *node, struct ctl_table *table)
158{
159 head->ctl_table = table;
160 head->ctl_table_arg = table;
161 head->used = 0;
162 head->count = 1;
163 head->nreg = 1;
164 head->unregistering = NULL;
165 head->root = root;
166 head->set = set;
167 head->parent = NULL;
168 head->node = node;
169 if (node) {
170 struct ctl_table *entry;
171 for (entry = table; entry->procname; entry++, node++) {
172 rb_init_node(&node->node);
173 node->header = head;
174 }
175 }
176}
177
178static void erase_header(struct ctl_table_header *head)
179{
180 struct ctl_table *entry;
181 for (entry = head->ctl_table; entry->procname; entry++)
182 erase_entry(head, entry);
183}
184
185static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header)
186{
187 struct ctl_table *entry;
188 int err;
189
190 dir->header.nreg++;
191 header->parent = dir;
192 err = insert_links(header);
193 if (err)
194 goto fail_links;
195 for (entry = header->ctl_table; entry->procname; entry++) {
196 err = insert_entry(header, entry);
197 if (err)
198 goto fail;
199 }
200 return 0;
201fail:
202 erase_header(header);
203 put_links(header);
204fail_links:
205 header->parent = NULL;
206 drop_sysctl_table(&dir->header);
207 return err;
208}
209
210/* called under sysctl_lock */
211static int use_table(struct ctl_table_header *p)
212{
213 if (unlikely(p->unregistering))
214 return 0;
215 p->used++;
216 return 1;
217}
218
219/* called under sysctl_lock */
220static void unuse_table(struct ctl_table_header *p)
221{
222 if (!--p->used)
223 if (unlikely(p->unregistering))
224 complete(p->unregistering);
225}
226
227/* called under sysctl_lock, will reacquire if has to wait */
228static void start_unregistering(struct ctl_table_header *p)
229{
230 /*
231 * if p->used is 0, nobody will ever touch that entry again;
232 * we'll eliminate all paths to it before dropping sysctl_lock
233 */
234 if (unlikely(p->used)) {
235 struct completion wait;
236 init_completion(&wait);
237 p->unregistering = &wait;
238 spin_unlock(&sysctl_lock);
239 wait_for_completion(&wait);
240 spin_lock(&sysctl_lock);
241 } else {
242 /* anything non-NULL; we'll never dereference it */
243 p->unregistering = ERR_PTR(-EINVAL);
244 }
245 /*
246 * do not remove from the list until nobody holds it; walking the
247 * list in do_sysctl() relies on that.
248 */
249 erase_header(p);
250}
251
252static void sysctl_head_get(struct ctl_table_header *head)
253{
254 spin_lock(&sysctl_lock);
255 head->count++;
256 spin_unlock(&sysctl_lock);
257}
258
259void sysctl_head_put(struct ctl_table_header *head)
260{
261 spin_lock(&sysctl_lock);
262 if (!--head->count)
263 kfree_rcu(head, rcu);
264 spin_unlock(&sysctl_lock);
265}
266
267static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
268{
269 if (!head)
270 BUG();
271 spin_lock(&sysctl_lock);
272 if (!use_table(head))
273 head = ERR_PTR(-ENOENT);
274 spin_unlock(&sysctl_lock);
275 return head;
276}
277
278static void sysctl_head_finish(struct ctl_table_header *head)
279{
280 if (!head)
281 return;
282 spin_lock(&sysctl_lock);
283 unuse_table(head);
284 spin_unlock(&sysctl_lock);
285}
286
287static struct ctl_table_set *
288lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
289{
290 struct ctl_table_set *set = &root->default_set;
291 if (root->lookup)
292 set = root->lookup(root, namespaces);
293 return set;
294}
295
296static struct ctl_table *lookup_entry(struct ctl_table_header **phead,
297 struct ctl_dir *dir,
298 const char *name, int namelen)
299{
300 struct ctl_table_header *head;
301 struct ctl_table *entry;
302
303 spin_lock(&sysctl_lock);
304 entry = find_entry(&head, dir, name, namelen);
305 if (entry && use_table(head))
306 *phead = head;
307 else
308 entry = NULL;
309 spin_unlock(&sysctl_lock);
310 return entry;
311}
312
313static struct ctl_node *first_usable_entry(struct rb_node *node)
314{
315 struct ctl_node *ctl_node;
316
317 for (;node; node = rb_next(node)) {
318 ctl_node = rb_entry(node, struct ctl_node, node);
319 if (use_table(ctl_node->header))
320 return ctl_node;
321 }
322 return NULL;
323}
324
325static void first_entry(struct ctl_dir *dir,
326 struct ctl_table_header **phead, struct ctl_table **pentry)
327{
328 struct ctl_table_header *head = NULL;
329 struct ctl_table *entry = NULL;
330 struct ctl_node *ctl_node;
331
332 spin_lock(&sysctl_lock);
333 ctl_node = first_usable_entry(rb_first(&dir->root));
334 spin_unlock(&sysctl_lock);
335 if (ctl_node) {
336 head = ctl_node->header;
337 entry = &head->ctl_table[ctl_node - head->node];
338 }
339 *phead = head;
340 *pentry = entry;
341}
342
343static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentry)
344{
345 struct ctl_table_header *head = *phead;
346 struct ctl_table *entry = *pentry;
347 struct ctl_node *ctl_node = &head->node[entry - head->ctl_table];
348
349 spin_lock(&sysctl_lock);
350 unuse_table(head);
351
352 ctl_node = first_usable_entry(rb_next(&ctl_node->node));
353 spin_unlock(&sysctl_lock);
354 head = NULL;
355 if (ctl_node) {
356 head = ctl_node->header;
357 entry = &head->ctl_table[ctl_node - head->node];
358 }
359 *phead = head;
360 *pentry = entry;
361}
362
363void register_sysctl_root(struct ctl_table_root *root)
364{
365}
366
367/*
368 * sysctl_perm does NOT grant the superuser all rights automatically, because
369 * some sysctl variables are readonly even to root.
370 */
371
372static int test_perm(int mode, int op)
373{
374 if (!current_euid())
375 mode >>= 6;
376 else if (in_egroup_p(0))
377 mode >>= 3;
378 if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
379 return 0;
380 return -EACCES;
381}
382
383static int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
384{
385 int mode;
386
387 if (root->permissions)
388 mode = root->permissions(root, current->nsproxy, table);
389 else
390 mode = table->mode;
391
392 return test_perm(mode, op);
393}
394
27static struct inode *proc_sys_make_inode(struct super_block *sb, 395static struct inode *proc_sys_make_inode(struct super_block *sb,
28 struct ctl_table_header *head, struct ctl_table *table) 396 struct ctl_table_header *head, struct ctl_table *table)
29{ 397{
@@ -43,13 +411,12 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
43 411
44 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 412 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
45 inode->i_mode = table->mode; 413 inode->i_mode = table->mode;
46 if (!table->child) { 414 if (!S_ISDIR(table->mode)) {
47 inode->i_mode |= S_IFREG; 415 inode->i_mode |= S_IFREG;
48 inode->i_op = &proc_sys_inode_operations; 416 inode->i_op = &proc_sys_inode_operations;
49 inode->i_fop = &proc_sys_file_operations; 417 inode->i_fop = &proc_sys_file_operations;
50 } else { 418 } else {
51 inode->i_mode |= S_IFDIR; 419 inode->i_mode |= S_IFDIR;
52 clear_nlink(inode);
53 inode->i_op = &proc_sys_dir_operations; 420 inode->i_op = &proc_sys_dir_operations;
54 inode->i_fop = &proc_sys_dir_file_operations; 421 inode->i_fop = &proc_sys_dir_file_operations;
55 } 422 }
@@ -57,70 +424,42 @@ out:
57 return inode; 424 return inode;
58} 425}
59 426
60static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name)
61{
62 int len;
63 for ( ; p->procname; p++) {
64
65 if (!p->procname)
66 continue;
67
68 len = strlen(p->procname);
69 if (len != name->len)
70 continue;
71
72 if (memcmp(p->procname, name->name, len) != 0)
73 continue;
74
75 /* I have a match */
76 return p;
77 }
78 return NULL;
79}
80
81static struct ctl_table_header *grab_header(struct inode *inode) 427static struct ctl_table_header *grab_header(struct inode *inode)
82{ 428{
83 if (PROC_I(inode)->sysctl) 429 struct ctl_table_header *head = PROC_I(inode)->sysctl;
84 return sysctl_head_grab(PROC_I(inode)->sysctl); 430 if (!head)
85 else 431 head = &sysctl_table_root.default_set.dir.header;
86 return sysctl_head_next(NULL); 432 return sysctl_head_grab(head);
87} 433}
88 434
89static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, 435static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
90 struct nameidata *nd) 436 struct nameidata *nd)
91{ 437{
92 struct ctl_table_header *head = grab_header(dir); 438 struct ctl_table_header *head = grab_header(dir);
93 struct ctl_table *table = PROC_I(dir)->sysctl_entry;
94 struct ctl_table_header *h = NULL; 439 struct ctl_table_header *h = NULL;
95 struct qstr *name = &dentry->d_name; 440 struct qstr *name = &dentry->d_name;
96 struct ctl_table *p; 441 struct ctl_table *p;
97 struct inode *inode; 442 struct inode *inode;
98 struct dentry *err = ERR_PTR(-ENOENT); 443 struct dentry *err = ERR_PTR(-ENOENT);
444 struct ctl_dir *ctl_dir;
445 int ret;
99 446
100 if (IS_ERR(head)) 447 if (IS_ERR(head))
101 return ERR_CAST(head); 448 return ERR_CAST(head);
102 449
103 if (table && !table->child) { 450 ctl_dir = container_of(head, struct ctl_dir, header);
104 WARN_ON(1);
105 goto out;
106 }
107
108 table = table ? table->child : head->ctl_table;
109
110 p = find_in_table(table, name);
111 if (!p) {
112 for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
113 if (h->attached_to != table)
114 continue;
115 p = find_in_table(h->attached_by, name);
116 if (p)
117 break;
118 }
119 }
120 451
452 p = lookup_entry(&h, ctl_dir, name->name, name->len);
121 if (!p) 453 if (!p)
122 goto out; 454 goto out;
123 455
456 if (S_ISLNK(p->mode)) {
457 ret = sysctl_follow_link(&h, &p, current->nsproxy);
458 err = ERR_PTR(ret);
459 if (ret)
460 goto out;
461 }
462
124 err = ERR_PTR(-ENOMEM); 463 err = ERR_PTR(-ENOMEM);
125 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); 464 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
126 if (h) 465 if (h)
@@ -188,20 +527,32 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
188 527
189static int proc_sys_open(struct inode *inode, struct file *filp) 528static int proc_sys_open(struct inode *inode, struct file *filp)
190{ 529{
530 struct ctl_table_header *head = grab_header(inode);
191 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 531 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
192 532
533 /* sysctl was unregistered */
534 if (IS_ERR(head))
535 return PTR_ERR(head);
536
193 if (table->poll) 537 if (table->poll)
194 filp->private_data = proc_sys_poll_event(table->poll); 538 filp->private_data = proc_sys_poll_event(table->poll);
195 539
540 sysctl_head_finish(head);
541
196 return 0; 542 return 0;
197} 543}
198 544
199static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) 545static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
200{ 546{
201 struct inode *inode = filp->f_path.dentry->d_inode; 547 struct inode *inode = filp->f_path.dentry->d_inode;
548 struct ctl_table_header *head = grab_header(inode);
202 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 549 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
203 unsigned long event = (unsigned long)filp->private_data;
204 unsigned int ret = DEFAULT_POLLMASK; 550 unsigned int ret = DEFAULT_POLLMASK;
551 unsigned long event;
552
553 /* sysctl was unregistered */
554 if (IS_ERR(head))
555 return POLLERR | POLLHUP;
205 556
206 if (!table->proc_handler) 557 if (!table->proc_handler)
207 goto out; 558 goto out;
@@ -209,6 +560,7 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
209 if (!table->poll) 560 if (!table->poll)
210 goto out; 561 goto out;
211 562
563 event = (unsigned long)filp->private_data;
212 poll_wait(filp, &table->poll->wait, wait); 564 poll_wait(filp, &table->poll->wait, wait);
213 565
214 if (event != atomic_read(&table->poll->event)) { 566 if (event != atomic_read(&table->poll->event)) {
@@ -217,6 +569,8 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
217 } 569 }
218 570
219out: 571out:
572 sysctl_head_finish(head);
573
220 return ret; 574 return ret;
221} 575}
222 576
@@ -258,28 +612,45 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent,
258 return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); 612 return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
259} 613}
260 614
615static int proc_sys_link_fill_cache(struct file *filp, void *dirent,
616 filldir_t filldir,
617 struct ctl_table_header *head,
618 struct ctl_table *table)
619{
620 int err, ret = 0;
621 head = sysctl_head_grab(head);
622
623 if (S_ISLNK(table->mode)) {
624 /* It is not an error if we can not follow the link ignore it */
625 err = sysctl_follow_link(&head, &table, current->nsproxy);
626 if (err)
627 goto out;
628 }
629
630 ret = proc_sys_fill_cache(filp, dirent, filldir, head, table);
631out:
632 sysctl_head_finish(head);
633 return ret;
634}
635
261static int scan(struct ctl_table_header *head, ctl_table *table, 636static int scan(struct ctl_table_header *head, ctl_table *table,
262 unsigned long *pos, struct file *file, 637 unsigned long *pos, struct file *file,
263 void *dirent, filldir_t filldir) 638 void *dirent, filldir_t filldir)
264{ 639{
640 int res;
265 641
266 for (; table->procname; table++, (*pos)++) { 642 if ((*pos)++ < file->f_pos)
267 int res; 643 return 0;
268
269 /* Can't do anything without a proc name */
270 if (!table->procname)
271 continue;
272
273 if (*pos < file->f_pos)
274 continue;
275 644
645 if (unlikely(S_ISLNK(table->mode)))
646 res = proc_sys_link_fill_cache(file, dirent, filldir, head, table);
647 else
276 res = proc_sys_fill_cache(file, dirent, filldir, head, table); 648 res = proc_sys_fill_cache(file, dirent, filldir, head, table);
277 if (res)
278 return res;
279 649
280 file->f_pos = *pos + 1; 650 if (res == 0)
281 } 651 file->f_pos = *pos;
282 return 0; 652
653 return res;
283} 654}
284 655
285static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) 656static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
@@ -287,20 +658,16 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
287 struct dentry *dentry = filp->f_path.dentry; 658 struct dentry *dentry = filp->f_path.dentry;
288 struct inode *inode = dentry->d_inode; 659 struct inode *inode = dentry->d_inode;
289 struct ctl_table_header *head = grab_header(inode); 660 struct ctl_table_header *head = grab_header(inode);
290 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
291 struct ctl_table_header *h = NULL; 661 struct ctl_table_header *h = NULL;
662 struct ctl_table *entry;
663 struct ctl_dir *ctl_dir;
292 unsigned long pos; 664 unsigned long pos;
293 int ret = -EINVAL; 665 int ret = -EINVAL;
294 666
295 if (IS_ERR(head)) 667 if (IS_ERR(head))
296 return PTR_ERR(head); 668 return PTR_ERR(head);
297 669
298 if (table && !table->child) { 670 ctl_dir = container_of(head, struct ctl_dir, header);
299 WARN_ON(1);
300 goto out;
301 }
302
303 table = table ? table->child : head->ctl_table;
304 671
305 ret = 0; 672 ret = 0;
306 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ 673 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */
@@ -318,14 +685,8 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
318 } 685 }
319 pos = 2; 686 pos = 2;
320 687
321 ret = scan(head, table, &pos, filp, dirent, filldir); 688 for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) {
322 if (ret) 689 ret = scan(h, entry, &pos, filp, dirent, filldir);
323 goto out;
324
325 for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
326 if (h->attached_to != table)
327 continue;
328 ret = scan(h, h->attached_by, &pos, filp, dirent, filldir);
329 if (ret) { 690 if (ret) {
330 sysctl_head_finish(h); 691 sysctl_head_finish(h);
331 break; 692 break;
@@ -445,6 +806,21 @@ static int proc_sys_delete(const struct dentry *dentry)
445 return !!PROC_I(dentry->d_inode)->sysctl->unregistering; 806 return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
446} 807}
447 808
809static int sysctl_is_seen(struct ctl_table_header *p)
810{
811 struct ctl_table_set *set = p->set;
812 int res;
813 spin_lock(&sysctl_lock);
814 if (p->unregistering)
815 res = 0;
816 else if (!set->is_seen)
817 res = 1;
818 else
819 res = set->is_seen(set);
820 spin_unlock(&sysctl_lock);
821 return res;
822}
823
448static int proc_sys_compare(const struct dentry *parent, 824static int proc_sys_compare(const struct dentry *parent,
449 const struct inode *pinode, 825 const struct inode *pinode,
450 const struct dentry *dentry, const struct inode *inode, 826 const struct dentry *dentry, const struct inode *inode,
@@ -470,6 +846,753 @@ static const struct dentry_operations proc_sys_dentry_operations = {
470 .d_compare = proc_sys_compare, 846 .d_compare = proc_sys_compare,
471}; 847};
472 848
849static struct ctl_dir *find_subdir(struct ctl_dir *dir,
850 const char *name, int namelen)
851{
852 struct ctl_table_header *head;
853 struct ctl_table *entry;
854
855 entry = find_entry(&head, dir, name, namelen);
856 if (!entry)
857 return ERR_PTR(-ENOENT);
858 if (!S_ISDIR(entry->mode))
859 return ERR_PTR(-ENOTDIR);
860 return container_of(head, struct ctl_dir, header);
861}
862
863static struct ctl_dir *new_dir(struct ctl_table_set *set,
864 const char *name, int namelen)
865{
866 struct ctl_table *table;
867 struct ctl_dir *new;
868 struct ctl_node *node;
869 char *new_name;
870
871 new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) +
872 sizeof(struct ctl_table)*2 + namelen + 1,
873 GFP_KERNEL);
874 if (!new)
875 return NULL;
876
877 node = (struct ctl_node *)(new + 1);
878 table = (struct ctl_table *)(node + 1);
879 new_name = (char *)(table + 2);
880 memcpy(new_name, name, namelen);
881 new_name[namelen] = '\0';
882 table[0].procname = new_name;
883 table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO;
884 init_header(&new->header, set->dir.header.root, set, node, table);
885
886 return new;
887}
888
889/**
890 * get_subdir - find or create a subdir with the specified name.
891 * @dir: Directory to create the subdirectory in
892 * @name: The name of the subdirectory to find or create
893 * @namelen: The length of name
894 *
895 * Takes a directory with an elevated reference count so we know that
896 * if we drop the lock the directory will not go away. Upon success
897 * the reference is moved from @dir to the returned subdirectory.
898 * Upon error an error code is returned and the reference on @dir is
899 * simply dropped.
900 */
901static struct ctl_dir *get_subdir(struct ctl_dir *dir,
902 const char *name, int namelen)
903{
904 struct ctl_table_set *set = dir->header.set;
905 struct ctl_dir *subdir, *new = NULL;
906 int err;
907
908 spin_lock(&sysctl_lock);
909 subdir = find_subdir(dir, name, namelen);
910 if (!IS_ERR(subdir))
911 goto found;
912 if (PTR_ERR(subdir) != -ENOENT)
913 goto failed;
914
915 spin_unlock(&sysctl_lock);
916 new = new_dir(set, name, namelen);
917 spin_lock(&sysctl_lock);
918 subdir = ERR_PTR(-ENOMEM);
919 if (!new)
920 goto failed;
921
922 /* Was the subdir added while we dropped the lock? */
923 subdir = find_subdir(dir, name, namelen);
924 if (!IS_ERR(subdir))
925 goto found;
926 if (PTR_ERR(subdir) != -ENOENT)
927 goto failed;
928
929 /* Nope. Use the our freshly made directory entry. */
930 err = insert_header(dir, &new->header);
931 subdir = ERR_PTR(err);
932 if (err)
933 goto failed;
934 subdir = new;
935found:
936 subdir->header.nreg++;
937failed:
938 if (unlikely(IS_ERR(subdir))) {
939 printk(KERN_ERR "sysctl could not get directory: ");
940 sysctl_print_dir(dir);
941 printk(KERN_CONT "/%*.*s %ld\n",
942 namelen, namelen, name, PTR_ERR(subdir));
943 }
944 drop_sysctl_table(&dir->header);
945 if (new)
946 drop_sysctl_table(&new->header);
947 spin_unlock(&sysctl_lock);
948 return subdir;
949}
950
951static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir)
952{
953 struct ctl_dir *parent;
954 const char *procname;
955 if (!dir->header.parent)
956 return &set->dir;
957 parent = xlate_dir(set, dir->header.parent);
958 if (IS_ERR(parent))
959 return parent;
960 procname = dir->header.ctl_table[0].procname;
961 return find_subdir(parent, procname, strlen(procname));
962}
963
964static int sysctl_follow_link(struct ctl_table_header **phead,
965 struct ctl_table **pentry, struct nsproxy *namespaces)
966{
967 struct ctl_table_header *head;
968 struct ctl_table_root *root;
969 struct ctl_table_set *set;
970 struct ctl_table *entry;
971 struct ctl_dir *dir;
972 int ret;
973
974 ret = 0;
975 spin_lock(&sysctl_lock);
976 root = (*pentry)->data;
977 set = lookup_header_set(root, namespaces);
978 dir = xlate_dir(set, (*phead)->parent);
979 if (IS_ERR(dir))
980 ret = PTR_ERR(dir);
981 else {
982 const char *procname = (*pentry)->procname;
983 head = NULL;
984 entry = find_entry(&head, dir, procname, strlen(procname));
985 ret = -ENOENT;
986 if (entry && use_table(head)) {
987 unuse_table(*phead);
988 *phead = head;
989 *pentry = entry;
990 ret = 0;
991 }
992 }
993
994 spin_unlock(&sysctl_lock);
995 return ret;
996}
997
998static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
999{
1000 struct va_format vaf;
1001 va_list args;
1002
1003 va_start(args, fmt);
1004 vaf.fmt = fmt;
1005 vaf.va = &args;
1006
1007 printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n",
1008 path, table->procname, &vaf);
1009
1010 va_end(args);
1011 return -EINVAL;
1012}
1013
1014static int sysctl_check_table(const char *path, struct ctl_table *table)
1015{
1016 int err = 0;
1017 for (; table->procname; table++) {
1018 if (table->child)
1019 err = sysctl_err(path, table, "Not a file");
1020
1021 if ((table->proc_handler == proc_dostring) ||
1022 (table->proc_handler == proc_dointvec) ||
1023 (table->proc_handler == proc_dointvec_minmax) ||
1024 (table->proc_handler == proc_dointvec_jiffies) ||
1025 (table->proc_handler == proc_dointvec_userhz_jiffies) ||
1026 (table->proc_handler == proc_dointvec_ms_jiffies) ||
1027 (table->proc_handler == proc_doulongvec_minmax) ||
1028 (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
1029 if (!table->data)
1030 err = sysctl_err(path, table, "No data");
1031 if (!table->maxlen)
1032 err = sysctl_err(path, table, "No maxlen");
1033 }
1034 if (!table->proc_handler)
1035 err = sysctl_err(path, table, "No proc_handler");
1036
1037 if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode)
1038 err = sysctl_err(path, table, "bogus .mode 0%o",
1039 table->mode);
1040 }
1041 return err;
1042}
1043
1044static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table *table,
1045 struct ctl_table_root *link_root)
1046{
1047 struct ctl_table *link_table, *entry, *link;
1048 struct ctl_table_header *links;
1049 struct ctl_node *node;
1050 char *link_name;
1051 int nr_entries, name_bytes;
1052
1053 name_bytes = 0;
1054 nr_entries = 0;
1055 for (entry = table; entry->procname; entry++) {
1056 nr_entries++;
1057 name_bytes += strlen(entry->procname) + 1;
1058 }
1059
1060 links = kzalloc(sizeof(struct ctl_table_header) +
1061 sizeof(struct ctl_node)*nr_entries +
1062 sizeof(struct ctl_table)*(nr_entries + 1) +
1063 name_bytes,
1064 GFP_KERNEL);
1065
1066 if (!links)
1067 return NULL;
1068
1069 node = (struct ctl_node *)(links + 1);
1070 link_table = (struct ctl_table *)(node + nr_entries);
1071 link_name = (char *)&link_table[nr_entries + 1];
1072
1073 for (link = link_table, entry = table; entry->procname; link++, entry++) {
1074 int len = strlen(entry->procname) + 1;
1075 memcpy(link_name, entry->procname, len);
1076 link->procname = link_name;
1077 link->mode = S_IFLNK|S_IRWXUGO;
1078 link->data = link_root;
1079 link_name += len;
1080 }
1081 init_header(links, dir->header.root, dir->header.set, node, link_table);
1082 links->nreg = nr_entries;
1083
1084 return links;
1085}
1086
1087static bool get_links(struct ctl_dir *dir,
1088 struct ctl_table *table, struct ctl_table_root *link_root)
1089{
1090 struct ctl_table_header *head;
1091 struct ctl_table *entry, *link;
1092
1093 /* Are there links available for every entry in table? */
1094 for (entry = table; entry->procname; entry++) {
1095 const char *procname = entry->procname;
1096 link = find_entry(&head, dir, procname, strlen(procname));
1097 if (!link)
1098 return false;
1099 if (S_ISDIR(link->mode) && S_ISDIR(entry->mode))
1100 continue;
1101 if (S_ISLNK(link->mode) && (link->data == link_root))
1102 continue;
1103 return false;
1104 }
1105
1106 /* The checks passed. Increase the registration count on the links */
1107 for (entry = table; entry->procname; entry++) {
1108 const char *procname = entry->procname;
1109 link = find_entry(&head, dir, procname, strlen(procname));
1110 head->nreg++;
1111 }
1112 return true;
1113}
1114
1115static int insert_links(struct ctl_table_header *head)
1116{
1117 struct ctl_table_set *root_set = &sysctl_table_root.default_set;
1118 struct ctl_dir *core_parent = NULL;
1119 struct ctl_table_header *links;
1120 int err;
1121
1122 if (head->set == root_set)
1123 return 0;
1124
1125 core_parent = xlate_dir(root_set, head->parent);
1126 if (IS_ERR(core_parent))
1127 return 0;
1128
1129 if (get_links(core_parent, head->ctl_table, head->root))
1130 return 0;
1131
1132 core_parent->header.nreg++;
1133 spin_unlock(&sysctl_lock);
1134
1135 links = new_links(core_parent, head->ctl_table, head->root);
1136
1137 spin_lock(&sysctl_lock);
1138 err = -ENOMEM;
1139 if (!links)
1140 goto out;
1141
1142 err = 0;
1143 if (get_links(core_parent, head->ctl_table, head->root)) {
1144 kfree(links);
1145 goto out;
1146 }
1147
1148 err = insert_header(core_parent, links);
1149 if (err)
1150 kfree(links);
1151out:
1152 drop_sysctl_table(&core_parent->header);
1153 return err;
1154}
1155
1156/**
1157 * __register_sysctl_table - register a leaf sysctl table
1158 * @set: Sysctl tree to register on
1159 * @path: The path to the directory the sysctl table is in.
1160 * @table: the top-level table structure
1161 *
1162 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1163 * array. A completely 0 filled entry terminates the table.
1164 *
1165 * The members of the &struct ctl_table structure are used as follows:
1166 *
1167 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1168 * enter a sysctl file
1169 *
1170 * data - a pointer to data for use by proc_handler
1171 *
1172 * maxlen - the maximum size in bytes of the data
1173 *
1174 * mode - the file permissions for the /proc/sys file
1175 *
1176 * child - must be %NULL.
1177 *
1178 * proc_handler - the text handler routine (described below)
1179 *
1180 * extra1, extra2 - extra pointers usable by the proc handler routines
1181 *
1182 * Leaf nodes in the sysctl tree will be represented by a single file
1183 * under /proc; non-leaf nodes will be represented by directories.
1184 *
1185 * There must be a proc_handler routine for any terminal nodes.
1186 * Several default handlers are available to cover common cases -
1187 *
1188 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1189 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1190 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1191 *
1192 * It is the handler's job to read the input buffer from user memory
1193 * and process it. The handler should return 0 on success.
1194 *
1195 * This routine returns %NULL on a failure to register, and a pointer
1196 * to the table header on success.
1197 */
1198struct ctl_table_header *__register_sysctl_table(
1199 struct ctl_table_set *set,
1200 const char *path, struct ctl_table *table)
1201{
1202 struct ctl_table_root *root = set->dir.header.root;
1203 struct ctl_table_header *header;
1204 const char *name, *nextname;
1205 struct ctl_dir *dir;
1206 struct ctl_table *entry;
1207 struct ctl_node *node;
1208 int nr_entries = 0;
1209
1210 for (entry = table; entry->procname; entry++)
1211 nr_entries++;
1212
1213 header = kzalloc(sizeof(struct ctl_table_header) +
1214 sizeof(struct ctl_node)*nr_entries, GFP_KERNEL);
1215 if (!header)
1216 return NULL;
1217
1218 node = (struct ctl_node *)(header + 1);
1219 init_header(header, root, set, node, table);
1220 if (sysctl_check_table(path, table))
1221 goto fail;
1222
1223 spin_lock(&sysctl_lock);
1224 dir = &set->dir;
1225 /* Reference moved down the diretory tree get_subdir */
1226 dir->header.nreg++;
1227 spin_unlock(&sysctl_lock);
1228
1229 /* Find the directory for the ctl_table */
1230 for (name = path; name; name = nextname) {
1231 int namelen;
1232 nextname = strchr(name, '/');
1233 if (nextname) {
1234 namelen = nextname - name;
1235 nextname++;
1236 } else {
1237 namelen = strlen(name);
1238 }
1239 if (namelen == 0)
1240 continue;
1241
1242 dir = get_subdir(dir, name, namelen);
1243 if (IS_ERR(dir))
1244 goto fail;
1245 }
1246
1247 spin_lock(&sysctl_lock);
1248 if (insert_header(dir, header))
1249 goto fail_put_dir_locked;
1250
1251 drop_sysctl_table(&dir->header);
1252 spin_unlock(&sysctl_lock);
1253
1254 return header;
1255
1256fail_put_dir_locked:
1257 drop_sysctl_table(&dir->header);
1258 spin_unlock(&sysctl_lock);
1259fail:
1260 kfree(header);
1261 dump_stack();
1262 return NULL;
1263}
1264
1265/**
1266 * register_sysctl - register a sysctl table
1267 * @path: The path to the directory the sysctl table is in.
1268 * @table: the table structure
1269 *
1270 * Register a sysctl table. @table should be a filled in ctl_table
1271 * array. A completely 0 filled entry terminates the table.
1272 *
1273 * See __register_sysctl_table for more details.
1274 */
1275struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table)
1276{
1277 return __register_sysctl_table(&sysctl_table_root.default_set,
1278 path, table);
1279}
1280EXPORT_SYMBOL(register_sysctl);
1281
1282static char *append_path(const char *path, char *pos, const char *name)
1283{
1284 int namelen;
1285 namelen = strlen(name);
1286 if (((pos - path) + namelen + 2) >= PATH_MAX)
1287 return NULL;
1288 memcpy(pos, name, namelen);
1289 pos[namelen] = '/';
1290 pos[namelen + 1] = '\0';
1291 pos += namelen + 1;
1292 return pos;
1293}
1294
1295static int count_subheaders(struct ctl_table *table)
1296{
1297 int has_files = 0;
1298 int nr_subheaders = 0;
1299 struct ctl_table *entry;
1300
1301 /* special case: no directory and empty directory */
1302 if (!table || !table->procname)
1303 return 1;
1304
1305 for (entry = table; entry->procname; entry++) {
1306 if (entry->child)
1307 nr_subheaders += count_subheaders(entry->child);
1308 else
1309 has_files = 1;
1310 }
1311 return nr_subheaders + has_files;
1312}
1313
1314static int register_leaf_sysctl_tables(const char *path, char *pos,
1315 struct ctl_table_header ***subheader, struct ctl_table_set *set,
1316 struct ctl_table *table)
1317{
1318 struct ctl_table *ctl_table_arg = NULL;
1319 struct ctl_table *entry, *files;
1320 int nr_files = 0;
1321 int nr_dirs = 0;
1322 int err = -ENOMEM;
1323
1324 for (entry = table; entry->procname; entry++) {
1325 if (entry->child)
1326 nr_dirs++;
1327 else
1328 nr_files++;
1329 }
1330
1331 files = table;
1332 /* If there are mixed files and directories we need a new table */
1333 if (nr_dirs && nr_files) {
1334 struct ctl_table *new;
1335 files = kzalloc(sizeof(struct ctl_table) * (nr_files + 1),
1336 GFP_KERNEL);
1337 if (!files)
1338 goto out;
1339
1340 ctl_table_arg = files;
1341 for (new = files, entry = table; entry->procname; entry++) {
1342 if (entry->child)
1343 continue;
1344 *new = *entry;
1345 new++;
1346 }
1347 }
1348
1349 /* Register everything except a directory full of subdirectories */
1350 if (nr_files || !nr_dirs) {
1351 struct ctl_table_header *header;
1352 header = __register_sysctl_table(set, path, files);
1353 if (!header) {
1354 kfree(ctl_table_arg);
1355 goto out;
1356 }
1357
1358 /* Remember if we need to free the file table */
1359 header->ctl_table_arg = ctl_table_arg;
1360 **subheader = header;
1361 (*subheader)++;
1362 }
1363
1364 /* Recurse into the subdirectories. */
1365 for (entry = table; entry->procname; entry++) {
1366 char *child_pos;
1367
1368 if (!entry->child)
1369 continue;
1370
1371 err = -ENAMETOOLONG;
1372 child_pos = append_path(path, pos, entry->procname);
1373 if (!child_pos)
1374 goto out;
1375
1376 err = register_leaf_sysctl_tables(path, child_pos, subheader,
1377 set, entry->child);
1378 pos[0] = '\0';
1379 if (err)
1380 goto out;
1381 }
1382 err = 0;
1383out:
1384 /* On failure our caller will unregister all registered subheaders */
1385 return err;
1386}
1387
1388/**
1389 * __register_sysctl_paths - register a sysctl table hierarchy
1390 * @set: Sysctl tree to register on
1391 * @path: The path to the directory the sysctl table is in.
1392 * @table: the top-level table structure
1393 *
1394 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1395 * array. A completely 0 filled entry terminates the table.
1396 *
1397 * See __register_sysctl_table for more details.
1398 */
1399struct ctl_table_header *__register_sysctl_paths(
1400 struct ctl_table_set *set,
1401 const struct ctl_path *path, struct ctl_table *table)
1402{
1403 struct ctl_table *ctl_table_arg = table;
1404 int nr_subheaders = count_subheaders(table);
1405 struct ctl_table_header *header = NULL, **subheaders, **subheader;
1406 const struct ctl_path *component;
1407 char *new_path, *pos;
1408
1409 pos = new_path = kmalloc(PATH_MAX, GFP_KERNEL);
1410 if (!new_path)
1411 return NULL;
1412
1413 pos[0] = '\0';
1414 for (component = path; component->procname; component++) {
1415 pos = append_path(new_path, pos, component->procname);
1416 if (!pos)
1417 goto out;
1418 }
1419 while (table->procname && table->child && !table[1].procname) {
1420 pos = append_path(new_path, pos, table->procname);
1421 if (!pos)
1422 goto out;
1423 table = table->child;
1424 }
1425 if (nr_subheaders == 1) {
1426 header = __register_sysctl_table(set, new_path, table);
1427 if (header)
1428 header->ctl_table_arg = ctl_table_arg;
1429 } else {
1430 header = kzalloc(sizeof(*header) +
1431 sizeof(*subheaders)*nr_subheaders, GFP_KERNEL);
1432 if (!header)
1433 goto out;
1434
1435 subheaders = (struct ctl_table_header **) (header + 1);
1436 subheader = subheaders;
1437 header->ctl_table_arg = ctl_table_arg;
1438
1439 if (register_leaf_sysctl_tables(new_path, pos, &subheader,
1440 set, table))
1441 goto err_register_leaves;
1442 }
1443
1444out:
1445 kfree(new_path);
1446 return header;
1447
1448err_register_leaves:
1449 while (subheader > subheaders) {
1450 struct ctl_table_header *subh = *(--subheader);
1451 struct ctl_table *table = subh->ctl_table_arg;
1452 unregister_sysctl_table(subh);
1453 kfree(table);
1454 }
1455 kfree(header);
1456 header = NULL;
1457 goto out;
1458}
1459
1460/**
1461 * register_sysctl_table_path - register a sysctl table hierarchy
1462 * @path: The path to the directory the sysctl table is in.
1463 * @table: the top-level table structure
1464 *
1465 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1466 * array. A completely 0 filled entry terminates the table.
1467 *
1468 * See __register_sysctl_paths for more details.
1469 */
1470struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1471 struct ctl_table *table)
1472{
1473 return __register_sysctl_paths(&sysctl_table_root.default_set,
1474 path, table);
1475}
1476EXPORT_SYMBOL(register_sysctl_paths);
1477
1478/**
1479 * register_sysctl_table - register a sysctl table hierarchy
1480 * @table: the top-level table structure
1481 *
1482 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1483 * array. A completely 0 filled entry terminates the table.
1484 *
1485 * See register_sysctl_paths for more details.
1486 */
1487struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1488{
1489 static const struct ctl_path null_path[] = { {} };
1490
1491 return register_sysctl_paths(null_path, table);
1492}
1493EXPORT_SYMBOL(register_sysctl_table);
1494
1495static void put_links(struct ctl_table_header *header)
1496{
1497 struct ctl_table_set *root_set = &sysctl_table_root.default_set;
1498 struct ctl_table_root *root = header->root;
1499 struct ctl_dir *parent = header->parent;
1500 struct ctl_dir *core_parent;
1501 struct ctl_table *entry;
1502
1503 if (header->set == root_set)
1504 return;
1505
1506 core_parent = xlate_dir(root_set, parent);
1507 if (IS_ERR(core_parent))
1508 return;
1509
1510 for (entry = header->ctl_table; entry->procname; entry++) {
1511 struct ctl_table_header *link_head;
1512 struct ctl_table *link;
1513 const char *name = entry->procname;
1514
1515 link = find_entry(&link_head, core_parent, name, strlen(name));
1516 if (link &&
1517 ((S_ISDIR(link->mode) && S_ISDIR(entry->mode)) ||
1518 (S_ISLNK(link->mode) && (link->data == root)))) {
1519 drop_sysctl_table(link_head);
1520 }
1521 else {
1522 printk(KERN_ERR "sysctl link missing during unregister: ");
1523 sysctl_print_dir(parent);
1524 printk(KERN_CONT "/%s\n", name);
1525 }
1526 }
1527}
1528
1529static void drop_sysctl_table(struct ctl_table_header *header)
1530{
1531 struct ctl_dir *parent = header->parent;
1532
1533 if (--header->nreg)
1534 return;
1535
1536 put_links(header);
1537 start_unregistering(header);
1538 if (!--header->count)
1539 kfree_rcu(header, rcu);
1540
1541 if (parent)
1542 drop_sysctl_table(&parent->header);
1543}
1544
1545/**
1546 * unregister_sysctl_table - unregister a sysctl table hierarchy
1547 * @header: the header returned from register_sysctl_table
1548 *
1549 * Unregisters the sysctl table and all children. proc entries may not
1550 * actually be removed until they are no longer used by anyone.
1551 */
1552void unregister_sysctl_table(struct ctl_table_header * header)
1553{
1554 int nr_subheaders;
1555 might_sleep();
1556
1557 if (header == NULL)
1558 return;
1559
1560 nr_subheaders = count_subheaders(header->ctl_table_arg);
1561 if (unlikely(nr_subheaders > 1)) {
1562 struct ctl_table_header **subheaders;
1563 int i;
1564
1565 subheaders = (struct ctl_table_header **)(header + 1);
1566 for (i = nr_subheaders -1; i >= 0; i--) {
1567 struct ctl_table_header *subh = subheaders[i];
1568 struct ctl_table *table = subh->ctl_table_arg;
1569 unregister_sysctl_table(subh);
1570 kfree(table);
1571 }
1572 kfree(header);
1573 return;
1574 }
1575
1576 spin_lock(&sysctl_lock);
1577 drop_sysctl_table(header);
1578 spin_unlock(&sysctl_lock);
1579}
1580EXPORT_SYMBOL(unregister_sysctl_table);
1581
1582void setup_sysctl_set(struct ctl_table_set *set,
1583 struct ctl_table_root *root,
1584 int (*is_seen)(struct ctl_table_set *))
1585{
1586 memset(set, 0, sizeof(*set));
1587 set->is_seen = is_seen;
1588 init_header(&set->dir.header, root, set, NULL, root_table);
1589}
1590
1591void retire_sysctl_set(struct ctl_table_set *set)
1592{
1593 WARN_ON(!RB_EMPTY_ROOT(&set->dir.root));
1594}
1595
473int __init proc_sys_init(void) 1596int __init proc_sys_init(void)
474{ 1597{
475 struct proc_dir_entry *proc_sys_root; 1598 struct proc_dir_entry *proc_sys_root;
@@ -478,5 +1601,6 @@ int __init proc_sys_init(void)
478 proc_sys_root->proc_iops = &proc_sys_dir_operations; 1601 proc_sys_root->proc_iops = &proc_sys_dir_operations;
479 proc_sys_root->proc_fops = &proc_sys_dir_file_operations; 1602 proc_sys_root->proc_fops = &proc_sys_dir_file_operations;
480 proc_sys_root->nlink = 0; 1603 proc_sys_root->nlink = 0;
481 return 0; 1604
1605 return sysctl_init();
482} 1606}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 46a15d8a29c..eed44bfc85d 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -115,12 +115,13 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
115 if (IS_ERR(sb)) 115 if (IS_ERR(sb))
116 return ERR_CAST(sb); 116 return ERR_CAST(sb);
117 117
118 if (!proc_parse_options(options, ns)) {
119 deactivate_locked_super(sb);
120 return ERR_PTR(-EINVAL);
121 }
122
118 if (!sb->s_root) { 123 if (!sb->s_root) {
119 sb->s_flags = flags; 124 sb->s_flags = flags;
120 if (!proc_parse_options(options, ns)) {
121 deactivate_locked_super(sb);
122 return ERR_PTR(-EINVAL);
123 }
124 err = proc_fill_super(sb); 125 err = proc_fill_super(sb);
125 if (err) { 126 if (err) {
126 deactivate_locked_super(sb); 127 deactivate_locked_super(sb);
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 121f77cfef7..64c3b317236 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -18,19 +18,39 @@
18#ifndef arch_irq_stat 18#ifndef arch_irq_stat
19#define arch_irq_stat() 0 19#define arch_irq_stat() 0
20#endif 20#endif
21#ifndef arch_idle_time 21
22#define arch_idle_time(cpu) 0 22#ifdef arch_idle_time
23#endif 23
24static cputime64_t get_idle_time(int cpu)
25{
26 cputime64_t idle;
27
28 idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
29 if (cpu_online(cpu) && !nr_iowait_cpu(cpu))
30 idle += arch_idle_time(cpu);
31 return idle;
32}
33
34static cputime64_t get_iowait_time(int cpu)
35{
36 cputime64_t iowait;
37
38 iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
39 if (cpu_online(cpu) && nr_iowait_cpu(cpu))
40 iowait += arch_idle_time(cpu);
41 return iowait;
42}
43
44#else
24 45
25static u64 get_idle_time(int cpu) 46static u64 get_idle_time(int cpu)
26{ 47{
27 u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL); 48 u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL);
28 49
29 if (idle_time == -1ULL) { 50 if (idle_time == -1ULL)
30 /* !NO_HZ so we can rely on cpustat.idle */ 51 /* !NO_HZ so we can rely on cpustat.idle */
31 idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; 52 idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
32 idle += arch_idle_time(cpu); 53 else
33 } else
34 idle = usecs_to_cputime64(idle_time); 54 idle = usecs_to_cputime64(idle_time);
35 55
36 return idle; 56 return idle;
@@ -49,6 +69,8 @@ static u64 get_iowait_time(int cpu)
49 return iowait; 69 return iowait;
50} 70}
51 71
72#endif
73
52static int show_stat(struct seq_file *p, void *v) 74static int show_stat(struct seq_file *p, void *v)
53{ 75{
54 int i, j; 76 int i, j;
@@ -89,18 +111,19 @@ static int show_stat(struct seq_file *p, void *v)
89 } 111 }
90 sum += arch_irq_stat(); 112 sum += arch_irq_stat();
91 113
92 seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu " 114 seq_puts(p, "cpu ");
93 "%llu\n", 115 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user));
94 (unsigned long long)cputime64_to_clock_t(user), 116 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice));
95 (unsigned long long)cputime64_to_clock_t(nice), 117 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system));
96 (unsigned long long)cputime64_to_clock_t(system), 118 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle));
97 (unsigned long long)cputime64_to_clock_t(idle), 119 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait));
98 (unsigned long long)cputime64_to_clock_t(iowait), 120 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq));
99 (unsigned long long)cputime64_to_clock_t(irq), 121 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq));
100 (unsigned long long)cputime64_to_clock_t(softirq), 122 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal));
101 (unsigned long long)cputime64_to_clock_t(steal), 123 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest));
102 (unsigned long long)cputime64_to_clock_t(guest), 124 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice));
103 (unsigned long long)cputime64_to_clock_t(guest_nice)); 125 seq_putc(p, '\n');
126
104 for_each_online_cpu(i) { 127 for_each_online_cpu(i) {
105 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ 128 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
106 user = kcpustat_cpu(i).cpustat[CPUTIME_USER]; 129 user = kcpustat_cpu(i).cpustat[CPUTIME_USER];
@@ -113,26 +136,24 @@ static int show_stat(struct seq_file *p, void *v)
113 steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL]; 136 steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
114 guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; 137 guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
115 guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; 138 guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
116 seq_printf(p, 139 seq_printf(p, "cpu%d", i);
117 "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu " 140 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user));
118 "%llu\n", 141 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice));
119 i, 142 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system));
120 (unsigned long long)cputime64_to_clock_t(user), 143 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle));
121 (unsigned long long)cputime64_to_clock_t(nice), 144 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait));
122 (unsigned long long)cputime64_to_clock_t(system), 145 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq));
123 (unsigned long long)cputime64_to_clock_t(idle), 146 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq));
124 (unsigned long long)cputime64_to_clock_t(iowait), 147 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal));
125 (unsigned long long)cputime64_to_clock_t(irq), 148 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest));
126 (unsigned long long)cputime64_to_clock_t(softirq), 149 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice));
127 (unsigned long long)cputime64_to_clock_t(steal), 150 seq_putc(p, '\n');
128 (unsigned long long)cputime64_to_clock_t(guest),
129 (unsigned long long)cputime64_to_clock_t(guest_nice));
130 } 151 }
131 seq_printf(p, "intr %llu", (unsigned long long)sum); 152 seq_printf(p, "intr %llu", (unsigned long long)sum);
132 153
133 /* sum again ? it could be updated? */ 154 /* sum again ? it could be updated? */
134 for_each_irq_nr(j) 155 for_each_irq_nr(j)
135 seq_printf(p, " %u", kstat_irqs(j)); 156 seq_put_decimal_ull(p, ' ', kstat_irqs(j));
136 157
137 seq_printf(p, 158 seq_printf(p,
138 "\nctxt %llu\n" 159 "\nctxt %llu\n"
@@ -149,7 +170,7 @@ static int show_stat(struct seq_file *p, void *v)
149 seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq); 170 seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq);
150 171
151 for (i = 0; i < NR_SOFTIRQS; i++) 172 for (i = 0; i < NR_SOFTIRQS; i++)
152 seq_printf(p, " %u", per_softirq_sums[i]); 173 seq_put_decimal_ull(p, ' ', per_softirq_sums[i]);
153 seq_putc(p, '\n'); 174 seq_putc(p, '\n');
154 175
155 return 0; 176 return 0;
@@ -157,11 +178,14 @@ static int show_stat(struct seq_file *p, void *v)
157 178
158static int stat_open(struct inode *inode, struct file *file) 179static int stat_open(struct inode *inode, struct file *file)
159{ 180{
160 unsigned size = 4096 * (1 + num_possible_cpus() / 32); 181 unsigned size = 1024 + 128 * num_possible_cpus();
161 char *buf; 182 char *buf;
162 struct seq_file *m; 183 struct seq_file *m;
163 int res; 184 int res;
164 185
186 /* minimum size to display an interrupt count : 2 bytes */
187 size += 2 * nr_irqs;
188
165 /* don't ask for more than the kmalloc() max size */ 189 /* don't ask for more than the kmalloc() max size */
166 if (size > KMALLOC_MAX_SIZE) 190 if (size > KMALLOC_MAX_SIZE)
167 size = KMALLOC_MAX_SIZE; 191 size = KMALLOC_MAX_SIZE;
@@ -173,7 +197,7 @@ static int stat_open(struct inode *inode, struct file *file)
173 if (!res) { 197 if (!res) {
174 m = file->private_data; 198 m = file->private_data;
175 m->buf = buf; 199 m->buf = buf;
176 m->size = size; 200 m->size = ksize(buf);
177 } else 201 } else
178 kfree(buf); 202 kfree(buf);
179 return res; 203 return res;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7dcd2a25049..1030a716d15 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -209,16 +209,20 @@ static int do_maps_open(struct inode *inode, struct file *file,
209 return ret; 209 return ret;
210} 210}
211 211
212static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) 212static void
213show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
213{ 214{
214 struct mm_struct *mm = vma->vm_mm; 215 struct mm_struct *mm = vma->vm_mm;
215 struct file *file = vma->vm_file; 216 struct file *file = vma->vm_file;
217 struct proc_maps_private *priv = m->private;
218 struct task_struct *task = priv->task;
216 vm_flags_t flags = vma->vm_flags; 219 vm_flags_t flags = vma->vm_flags;
217 unsigned long ino = 0; 220 unsigned long ino = 0;
218 unsigned long long pgoff = 0; 221 unsigned long long pgoff = 0;
219 unsigned long start, end; 222 unsigned long start, end;
220 dev_t dev = 0; 223 dev_t dev = 0;
221 int len; 224 int len;
225 const char *name = NULL;
222 226
223 if (file) { 227 if (file) {
224 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 228 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
@@ -252,36 +256,57 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
252 if (file) { 256 if (file) {
253 pad_len_spaces(m, len); 257 pad_len_spaces(m, len);
254 seq_path(m, &file->f_path, "\n"); 258 seq_path(m, &file->f_path, "\n");
255 } else { 259 goto done;
256 const char *name = arch_vma_name(vma); 260 }
257 if (!name) { 261
258 if (mm) { 262 name = arch_vma_name(vma);
259 if (vma->vm_start <= mm->brk && 263 if (!name) {
260 vma->vm_end >= mm->start_brk) { 264 pid_t tid;
261 name = "[heap]"; 265
262 } else if (vma->vm_start <= mm->start_stack && 266 if (!mm) {
263 vma->vm_end >= mm->start_stack) { 267 name = "[vdso]";
264 name = "[stack]"; 268 goto done;
265 } 269 }
270
271 if (vma->vm_start <= mm->brk &&
272 vma->vm_end >= mm->start_brk) {
273 name = "[heap]";
274 goto done;
275 }
276
277 tid = vm_is_stack(task, vma, is_pid);
278
279 if (tid != 0) {
280 /*
281 * Thread stack in /proc/PID/task/TID/maps or
282 * the main process stack.
283 */
284 if (!is_pid || (vma->vm_start <= mm->start_stack &&
285 vma->vm_end >= mm->start_stack)) {
286 name = "[stack]";
266 } else { 287 } else {
267 name = "[vdso]"; 288 /* Thread stack in /proc/PID/maps */
289 pad_len_spaces(m, len);
290 seq_printf(m, "[stack:%d]", tid);
268 } 291 }
269 } 292 }
270 if (name) { 293 }
271 pad_len_spaces(m, len); 294
272 seq_puts(m, name); 295done:
273 } 296 if (name) {
297 pad_len_spaces(m, len);
298 seq_puts(m, name);
274 } 299 }
275 seq_putc(m, '\n'); 300 seq_putc(m, '\n');
276} 301}
277 302
278static int show_map(struct seq_file *m, void *v) 303static int show_map(struct seq_file *m, void *v, int is_pid)
279{ 304{
280 struct vm_area_struct *vma = v; 305 struct vm_area_struct *vma = v;
281 struct proc_maps_private *priv = m->private; 306 struct proc_maps_private *priv = m->private;
282 struct task_struct *task = priv->task; 307 struct task_struct *task = priv->task;
283 308
284 show_map_vma(m, vma); 309 show_map_vma(m, vma, is_pid);
285 310
286 if (m->count < m->size) /* vma is copied successfully */ 311 if (m->count < m->size) /* vma is copied successfully */
287 m->version = (vma != get_gate_vma(task->mm)) 312 m->version = (vma != get_gate_vma(task->mm))
@@ -289,20 +314,49 @@ static int show_map(struct seq_file *m, void *v)
289 return 0; 314 return 0;
290} 315}
291 316
317static int show_pid_map(struct seq_file *m, void *v)
318{
319 return show_map(m, v, 1);
320}
321
322static int show_tid_map(struct seq_file *m, void *v)
323{
324 return show_map(m, v, 0);
325}
326
292static const struct seq_operations proc_pid_maps_op = { 327static const struct seq_operations proc_pid_maps_op = {
293 .start = m_start, 328 .start = m_start,
294 .next = m_next, 329 .next = m_next,
295 .stop = m_stop, 330 .stop = m_stop,
296 .show = show_map 331 .show = show_pid_map
297}; 332};
298 333
299static int maps_open(struct inode *inode, struct file *file) 334static const struct seq_operations proc_tid_maps_op = {
335 .start = m_start,
336 .next = m_next,
337 .stop = m_stop,
338 .show = show_tid_map
339};
340
341static int pid_maps_open(struct inode *inode, struct file *file)
300{ 342{
301 return do_maps_open(inode, file, &proc_pid_maps_op); 343 return do_maps_open(inode, file, &proc_pid_maps_op);
302} 344}
303 345
304const struct file_operations proc_maps_operations = { 346static int tid_maps_open(struct inode *inode, struct file *file)
305 .open = maps_open, 347{
348 return do_maps_open(inode, file, &proc_tid_maps_op);
349}
350
351const struct file_operations proc_pid_maps_operations = {
352 .open = pid_maps_open,
353 .read = seq_read,
354 .llseek = seq_lseek,
355 .release = seq_release_private,
356};
357
358const struct file_operations proc_tid_maps_operations = {
359 .open = tid_maps_open,
306 .read = seq_read, 360 .read = seq_read,
307 .llseek = seq_lseek, 361 .llseek = seq_lseek,
308 .release = seq_release_private, 362 .release = seq_release_private,
@@ -394,21 +448,15 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
394 pte_t *pte; 448 pte_t *pte;
395 spinlock_t *ptl; 449 spinlock_t *ptl;
396 450
397 spin_lock(&walk->mm->page_table_lock); 451 if (pmd_trans_huge_lock(pmd, vma) == 1) {
398 if (pmd_trans_huge(*pmd)) { 452 smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);
399 if (pmd_trans_splitting(*pmd)) {
400 spin_unlock(&walk->mm->page_table_lock);
401 wait_split_huge_page(vma->anon_vma, pmd);
402 } else {
403 smaps_pte_entry(*(pte_t *)pmd, addr,
404 HPAGE_PMD_SIZE, walk);
405 spin_unlock(&walk->mm->page_table_lock);
406 mss->anonymous_thp += HPAGE_PMD_SIZE;
407 return 0;
408 }
409 } else {
410 spin_unlock(&walk->mm->page_table_lock); 453 spin_unlock(&walk->mm->page_table_lock);
454 mss->anonymous_thp += HPAGE_PMD_SIZE;
455 return 0;
411 } 456 }
457
458 if (pmd_trans_unstable(pmd))
459 return 0;
412 /* 460 /*
413 * The mmap_sem held all the way back in m_start() is what 461 * The mmap_sem held all the way back in m_start() is what
414 * keeps khugepaged out of here and from collapsing things 462 * keeps khugepaged out of here and from collapsing things
@@ -422,7 +470,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
422 return 0; 470 return 0;
423} 471}
424 472
425static int show_smap(struct seq_file *m, void *v) 473static int show_smap(struct seq_file *m, void *v, int is_pid)
426{ 474{
427 struct proc_maps_private *priv = m->private; 475 struct proc_maps_private *priv = m->private;
428 struct task_struct *task = priv->task; 476 struct task_struct *task = priv->task;
@@ -440,7 +488,7 @@ static int show_smap(struct seq_file *m, void *v)
440 if (vma->vm_mm && !is_vm_hugetlb_page(vma)) 488 if (vma->vm_mm && !is_vm_hugetlb_page(vma))
441 walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); 489 walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
442 490
443 show_map_vma(m, vma); 491 show_map_vma(m, vma, is_pid);
444 492
445 seq_printf(m, 493 seq_printf(m,
446 "Size: %8lu kB\n" 494 "Size: %8lu kB\n"
@@ -479,20 +527,49 @@ static int show_smap(struct seq_file *m, void *v)
479 return 0; 527 return 0;
480} 528}
481 529
530static int show_pid_smap(struct seq_file *m, void *v)
531{
532 return show_smap(m, v, 1);
533}
534
535static int show_tid_smap(struct seq_file *m, void *v)
536{
537 return show_smap(m, v, 0);
538}
539
482static const struct seq_operations proc_pid_smaps_op = { 540static const struct seq_operations proc_pid_smaps_op = {
483 .start = m_start, 541 .start = m_start,
484 .next = m_next, 542 .next = m_next,
485 .stop = m_stop, 543 .stop = m_stop,
486 .show = show_smap 544 .show = show_pid_smap
545};
546
547static const struct seq_operations proc_tid_smaps_op = {
548 .start = m_start,
549 .next = m_next,
550 .stop = m_stop,
551 .show = show_tid_smap
487}; 552};
488 553
489static int smaps_open(struct inode *inode, struct file *file) 554static int pid_smaps_open(struct inode *inode, struct file *file)
490{ 555{
491 return do_maps_open(inode, file, &proc_pid_smaps_op); 556 return do_maps_open(inode, file, &proc_pid_smaps_op);
492} 557}
493 558
494const struct file_operations proc_smaps_operations = { 559static int tid_smaps_open(struct inode *inode, struct file *file)
495 .open = smaps_open, 560{
561 return do_maps_open(inode, file, &proc_tid_smaps_op);
562}
563
564const struct file_operations proc_pid_smaps_operations = {
565 .open = pid_smaps_open,
566 .read = seq_read,
567 .llseek = seq_lseek,
568 .release = seq_release_private,
569};
570
571const struct file_operations proc_tid_smaps_operations = {
572 .open = tid_smaps_open,
496 .read = seq_read, 573 .read = seq_read,
497 .llseek = seq_lseek, 574 .llseek = seq_lseek,
498 .release = seq_release_private, 575 .release = seq_release_private,
@@ -507,6 +584,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
507 struct page *page; 584 struct page *page;
508 585
509 split_huge_page_pmd(walk->mm, pmd); 586 split_huge_page_pmd(walk->mm, pmd);
587 if (pmd_trans_unstable(pmd))
588 return 0;
510 589
511 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 590 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
512 for (; addr != end; pte++, addr += PAGE_SIZE) { 591 for (; addr != end; pte++, addr += PAGE_SIZE) {
@@ -518,9 +597,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
518 if (!page) 597 if (!page)
519 continue; 598 continue;
520 599
521 if (PageReserved(page))
522 continue;
523
524 /* Clear accessed and referenced bits. */ 600 /* Clear accessed and referenced bits. */
525 ptep_test_and_clear_young(vma, addr, pte); 601 ptep_test_and_clear_young(vma, addr, pte);
526 ClearPageReferenced(page); 602 ClearPageReferenced(page);
@@ -598,11 +674,18 @@ const struct file_operations proc_clear_refs_operations = {
598 .llseek = noop_llseek, 674 .llseek = noop_llseek,
599}; 675};
600 676
677typedef struct {
678 u64 pme;
679} pagemap_entry_t;
680
601struct pagemapread { 681struct pagemapread {
602 int pos, len; 682 int pos, len;
603 u64 *buffer; 683 pagemap_entry_t *buffer;
604}; 684};
605 685
686#define PAGEMAP_WALK_SIZE (PMD_SIZE)
687#define PAGEMAP_WALK_MASK (PMD_MASK)
688
606#define PM_ENTRY_BYTES sizeof(u64) 689#define PM_ENTRY_BYTES sizeof(u64)
607#define PM_STATUS_BITS 3 690#define PM_STATUS_BITS 3
608#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) 691#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
@@ -620,10 +703,15 @@ struct pagemapread {
620#define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) 703#define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT)
621#define PM_END_OF_BUFFER 1 704#define PM_END_OF_BUFFER 1
622 705
623static int add_to_pagemap(unsigned long addr, u64 pfn, 706static inline pagemap_entry_t make_pme(u64 val)
707{
708 return (pagemap_entry_t) { .pme = val };
709}
710
711static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
624 struct pagemapread *pm) 712 struct pagemapread *pm)
625{ 713{
626 pm->buffer[pm->pos++] = pfn; 714 pm->buffer[pm->pos++] = *pme;
627 if (pm->pos >= pm->len) 715 if (pm->pos >= pm->len)
628 return PM_END_OF_BUFFER; 716 return PM_END_OF_BUFFER;
629 return 0; 717 return 0;
@@ -635,8 +723,10 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
635 struct pagemapread *pm = walk->private; 723 struct pagemapread *pm = walk->private;
636 unsigned long addr; 724 unsigned long addr;
637 int err = 0; 725 int err = 0;
726 pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
727
638 for (addr = start; addr < end; addr += PAGE_SIZE) { 728 for (addr = start; addr < end; addr += PAGE_SIZE) {
639 err = add_to_pagemap(addr, PM_NOT_PRESENT, pm); 729 err = add_to_pagemap(addr, &pme, pm);
640 if (err) 730 if (err)
641 break; 731 break;
642 } 732 }
@@ -649,18 +739,40 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte)
649 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); 739 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
650} 740}
651 741
652static u64 pte_to_pagemap_entry(pte_t pte) 742static void pte_to_pagemap_entry(pagemap_entry_t *pme, pte_t pte)
653{ 743{
654 u64 pme = 0;
655 if (is_swap_pte(pte)) 744 if (is_swap_pte(pte))
656 pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) 745 *pme = make_pme(PM_PFRAME(swap_pte_to_pagemap_entry(pte))
657 | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; 746 | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP);
658 else if (pte_present(pte)) 747 else if (pte_present(pte))
659 pme = PM_PFRAME(pte_pfn(pte)) 748 *pme = make_pme(PM_PFRAME(pte_pfn(pte))
660 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; 749 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
661 return pme; 750 else
751 *pme = make_pme(PM_NOT_PRESENT);
662} 752}
663 753
754#ifdef CONFIG_TRANSPARENT_HUGEPAGE
755static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme,
756 pmd_t pmd, int offset)
757{
758 /*
759 * Currently pmd for thp is always present because thp can not be
760 * swapped-out, migrated, or HWPOISONed (split in such cases instead.)
761 * This if-check is just to prepare for future implementation.
762 */
763 if (pmd_present(pmd))
764 *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset)
765 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
766 else
767 *pme = make_pme(PM_NOT_PRESENT);
768}
769#else
770static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme,
771 pmd_t pmd, int offset)
772{
773}
774#endif
775
664static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 776static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
665 struct mm_walk *walk) 777 struct mm_walk *walk)
666{ 778{
@@ -668,29 +780,46 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
668 struct pagemapread *pm = walk->private; 780 struct pagemapread *pm = walk->private;
669 pte_t *pte; 781 pte_t *pte;
670 int err = 0; 782 int err = 0;
671 783 pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
672 split_huge_page_pmd(walk->mm, pmd);
673 784
674 /* find the first VMA at or above 'addr' */ 785 /* find the first VMA at or above 'addr' */
675 vma = find_vma(walk->mm, addr); 786 vma = find_vma(walk->mm, addr);
787 if (pmd_trans_huge_lock(pmd, vma) == 1) {
788 for (; addr != end; addr += PAGE_SIZE) {
789 unsigned long offset;
790
791 offset = (addr & ~PAGEMAP_WALK_MASK) >>
792 PAGE_SHIFT;
793 thp_pmd_to_pagemap_entry(&pme, *pmd, offset);
794 err = add_to_pagemap(addr, &pme, pm);
795 if (err)
796 break;
797 }
798 spin_unlock(&walk->mm->page_table_lock);
799 return err;
800 }
801
802 if (pmd_trans_unstable(pmd))
803 return 0;
676 for (; addr != end; addr += PAGE_SIZE) { 804 for (; addr != end; addr += PAGE_SIZE) {
677 u64 pfn = PM_NOT_PRESENT;
678 805
679 /* check to see if we've left 'vma' behind 806 /* check to see if we've left 'vma' behind
680 * and need a new, higher one */ 807 * and need a new, higher one */
681 if (vma && (addr >= vma->vm_end)) 808 if (vma && (addr >= vma->vm_end)) {
682 vma = find_vma(walk->mm, addr); 809 vma = find_vma(walk->mm, addr);
810 pme = make_pme(PM_NOT_PRESENT);
811 }
683 812
684 /* check that 'vma' actually covers this address, 813 /* check that 'vma' actually covers this address,
685 * and that it isn't a huge page vma */ 814 * and that it isn't a huge page vma */
686 if (vma && (vma->vm_start <= addr) && 815 if (vma && (vma->vm_start <= addr) &&
687 !is_vm_hugetlb_page(vma)) { 816 !is_vm_hugetlb_page(vma)) {
688 pte = pte_offset_map(pmd, addr); 817 pte = pte_offset_map(pmd, addr);
689 pfn = pte_to_pagemap_entry(*pte); 818 pte_to_pagemap_entry(&pme, *pte);
690 /* unmap before userspace copy */ 819 /* unmap before userspace copy */
691 pte_unmap(pte); 820 pte_unmap(pte);
692 } 821 }
693 err = add_to_pagemap(addr, pfn, pm); 822 err = add_to_pagemap(addr, &pme, pm);
694 if (err) 823 if (err)
695 return err; 824 return err;
696 } 825 }
@@ -701,13 +830,14 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
701} 830}
702 831
703#ifdef CONFIG_HUGETLB_PAGE 832#ifdef CONFIG_HUGETLB_PAGE
704static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset) 833static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme,
834 pte_t pte, int offset)
705{ 835{
706 u64 pme = 0;
707 if (pte_present(pte)) 836 if (pte_present(pte))
708 pme = PM_PFRAME(pte_pfn(pte) + offset) 837 *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset)
709 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; 838 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
710 return pme; 839 else
840 *pme = make_pme(PM_NOT_PRESENT);
711} 841}
712 842
713/* This function walks within one hugetlb entry in the single call */ 843/* This function walks within one hugetlb entry in the single call */
@@ -717,12 +847,12 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
717{ 847{
718 struct pagemapread *pm = walk->private; 848 struct pagemapread *pm = walk->private;
719 int err = 0; 849 int err = 0;
720 u64 pfn; 850 pagemap_entry_t pme;
721 851
722 for (; addr != end; addr += PAGE_SIZE) { 852 for (; addr != end; addr += PAGE_SIZE) {
723 int offset = (addr & ~hmask) >> PAGE_SHIFT; 853 int offset = (addr & ~hmask) >> PAGE_SHIFT;
724 pfn = huge_pte_to_pagemap_entry(*pte, offset); 854 huge_pte_to_pagemap_entry(&pme, *pte, offset);
725 err = add_to_pagemap(addr, pfn, pm); 855 err = add_to_pagemap(addr, &pme, pm);
726 if (err) 856 if (err)
727 return err; 857 return err;
728 } 858 }
@@ -757,8 +887,6 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
757 * determine which areas of memory are actually mapped and llseek to 887 * determine which areas of memory are actually mapped and llseek to
758 * skip over unmapped regions. 888 * skip over unmapped regions.
759 */ 889 */
760#define PAGEMAP_WALK_SIZE (PMD_SIZE)
761#define PAGEMAP_WALK_MASK (PMD_MASK)
762static ssize_t pagemap_read(struct file *file, char __user *buf, 890static ssize_t pagemap_read(struct file *file, char __user *buf,
763 size_t count, loff_t *ppos) 891 size_t count, loff_t *ppos)
764{ 892{
@@ -941,26 +1069,21 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
941 pte_t *pte; 1069 pte_t *pte;
942 1070
943 md = walk->private; 1071 md = walk->private;
944 spin_lock(&walk->mm->page_table_lock); 1072
945 if (pmd_trans_huge(*pmd)) { 1073 if (pmd_trans_huge_lock(pmd, md->vma) == 1) {
946 if (pmd_trans_splitting(*pmd)) { 1074 pte_t huge_pte = *(pte_t *)pmd;
947 spin_unlock(&walk->mm->page_table_lock); 1075 struct page *page;
948 wait_split_huge_page(md->vma->anon_vma, pmd); 1076
949 } else { 1077 page = can_gather_numa_stats(huge_pte, md->vma, addr);
950 pte_t huge_pte = *(pte_t *)pmd; 1078 if (page)
951 struct page *page; 1079 gather_stats(page, md, pte_dirty(huge_pte),
952 1080 HPAGE_PMD_SIZE/PAGE_SIZE);
953 page = can_gather_numa_stats(huge_pte, md->vma, addr);
954 if (page)
955 gather_stats(page, md, pte_dirty(huge_pte),
956 HPAGE_PMD_SIZE/PAGE_SIZE);
957 spin_unlock(&walk->mm->page_table_lock);
958 return 0;
959 }
960 } else {
961 spin_unlock(&walk->mm->page_table_lock); 1081 spin_unlock(&walk->mm->page_table_lock);
1082 return 0;
962 } 1083 }
963 1084
1085 if (pmd_trans_unstable(pmd))
1086 return 0;
964 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 1087 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
965 do { 1088 do {
966 struct page *page = can_gather_numa_stats(*pte, md->vma, addr); 1089 struct page *page = can_gather_numa_stats(*pte, md->vma, addr);
@@ -1002,7 +1125,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
1002/* 1125/*
1003 * Display pages allocated per node and memory policy via /proc. 1126 * Display pages allocated per node and memory policy via /proc.
1004 */ 1127 */
1005static int show_numa_map(struct seq_file *m, void *v) 1128static int show_numa_map(struct seq_file *m, void *v, int is_pid)
1006{ 1129{
1007 struct numa_maps_private *numa_priv = m->private; 1130 struct numa_maps_private *numa_priv = m->private;
1008 struct proc_maps_private *proc_priv = &numa_priv->proc_maps; 1131 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
@@ -1039,9 +1162,19 @@ static int show_numa_map(struct seq_file *m, void *v)
1039 seq_path(m, &file->f_path, "\n\t= "); 1162 seq_path(m, &file->f_path, "\n\t= ");
1040 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { 1163 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
1041 seq_printf(m, " heap"); 1164 seq_printf(m, " heap");
1042 } else if (vma->vm_start <= mm->start_stack && 1165 } else {
1043 vma->vm_end >= mm->start_stack) { 1166 pid_t tid = vm_is_stack(proc_priv->task, vma, is_pid);
1044 seq_printf(m, " stack"); 1167 if (tid != 0) {
1168 /*
1169 * Thread stack in /proc/PID/task/TID/maps or
1170 * the main process stack.
1171 */
1172 if (!is_pid || (vma->vm_start <= mm->start_stack &&
1173 vma->vm_end >= mm->start_stack))
1174 seq_printf(m, " stack");
1175 else
1176 seq_printf(m, " stack:%d", tid);
1177 }
1045 } 1178 }
1046 1179
1047 if (is_vm_hugetlb_page(vma)) 1180 if (is_vm_hugetlb_page(vma))
@@ -1084,21 +1217,39 @@ out:
1084 return 0; 1217 return 0;
1085} 1218}
1086 1219
1220static int show_pid_numa_map(struct seq_file *m, void *v)
1221{
1222 return show_numa_map(m, v, 1);
1223}
1224
1225static int show_tid_numa_map(struct seq_file *m, void *v)
1226{
1227 return show_numa_map(m, v, 0);
1228}
1229
1087static const struct seq_operations proc_pid_numa_maps_op = { 1230static const struct seq_operations proc_pid_numa_maps_op = {
1088 .start = m_start, 1231 .start = m_start,
1089 .next = m_next, 1232 .next = m_next,
1090 .stop = m_stop, 1233 .stop = m_stop,
1091 .show = show_numa_map, 1234 .show = show_pid_numa_map,
1235};
1236
1237static const struct seq_operations proc_tid_numa_maps_op = {
1238 .start = m_start,
1239 .next = m_next,
1240 .stop = m_stop,
1241 .show = show_tid_numa_map,
1092}; 1242};
1093 1243
1094static int numa_maps_open(struct inode *inode, struct file *file) 1244static int numa_maps_open(struct inode *inode, struct file *file,
1245 const struct seq_operations *ops)
1095{ 1246{
1096 struct numa_maps_private *priv; 1247 struct numa_maps_private *priv;
1097 int ret = -ENOMEM; 1248 int ret = -ENOMEM;
1098 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 1249 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
1099 if (priv) { 1250 if (priv) {
1100 priv->proc_maps.pid = proc_pid(inode); 1251 priv->proc_maps.pid = proc_pid(inode);
1101 ret = seq_open(file, &proc_pid_numa_maps_op); 1252 ret = seq_open(file, ops);
1102 if (!ret) { 1253 if (!ret) {
1103 struct seq_file *m = file->private_data; 1254 struct seq_file *m = file->private_data;
1104 m->private = priv; 1255 m->private = priv;
@@ -1109,8 +1260,25 @@ static int numa_maps_open(struct inode *inode, struct file *file)
1109 return ret; 1260 return ret;
1110} 1261}
1111 1262
1112const struct file_operations proc_numa_maps_operations = { 1263static int pid_numa_maps_open(struct inode *inode, struct file *file)
1113 .open = numa_maps_open, 1264{
1265 return numa_maps_open(inode, file, &proc_pid_numa_maps_op);
1266}
1267
1268static int tid_numa_maps_open(struct inode *inode, struct file *file)
1269{
1270 return numa_maps_open(inode, file, &proc_tid_numa_maps_op);
1271}
1272
1273const struct file_operations proc_pid_numa_maps_operations = {
1274 .open = pid_numa_maps_open,
1275 .read = seq_read,
1276 .llseek = seq_lseek,
1277 .release = seq_release_private,
1278};
1279
1280const struct file_operations proc_tid_numa_maps_operations = {
1281 .open = tid_numa_maps_open,
1114 .read = seq_read, 1282 .read = seq_read,
1115 .llseek = seq_lseek, 1283 .llseek = seq_lseek,
1116 .release = seq_release_private, 1284 .release = seq_release_private,
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 980de547c07..74fe164d1b2 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -134,9 +134,11 @@ static void pad_len_spaces(struct seq_file *m, int len)
134/* 134/*
135 * display a single VMA to a sequenced file 135 * display a single VMA to a sequenced file
136 */ 136 */
137static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) 137static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
138 int is_pid)
138{ 139{
139 struct mm_struct *mm = vma->vm_mm; 140 struct mm_struct *mm = vma->vm_mm;
141 struct proc_maps_private *priv = m->private;
140 unsigned long ino = 0; 142 unsigned long ino = 0;
141 struct file *file; 143 struct file *file;
142 dev_t dev = 0; 144 dev_t dev = 0;
@@ -168,10 +170,19 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
168 pad_len_spaces(m, len); 170 pad_len_spaces(m, len);
169 seq_path(m, &file->f_path, ""); 171 seq_path(m, &file->f_path, "");
170 } else if (mm) { 172 } else if (mm) {
171 if (vma->vm_start <= mm->start_stack && 173 pid_t tid = vm_is_stack(priv->task, vma, is_pid);
172 vma->vm_end >= mm->start_stack) { 174
175 if (tid != 0) {
173 pad_len_spaces(m, len); 176 pad_len_spaces(m, len);
174 seq_puts(m, "[stack]"); 177 /*
178 * Thread stack in /proc/PID/task/TID/maps or
179 * the main process stack.
180 */
181 if (!is_pid || (vma->vm_start <= mm->start_stack &&
182 vma->vm_end >= mm->start_stack))
183 seq_printf(m, "[stack]");
184 else
185 seq_printf(m, "[stack:%d]", tid);
175 } 186 }
176 } 187 }
177 188
@@ -182,11 +193,22 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
182/* 193/*
183 * display mapping lines for a particular process's /proc/pid/maps 194 * display mapping lines for a particular process's /proc/pid/maps
184 */ 195 */
185static int show_map(struct seq_file *m, void *_p) 196static int show_map(struct seq_file *m, void *_p, int is_pid)
186{ 197{
187 struct rb_node *p = _p; 198 struct rb_node *p = _p;
188 199
189 return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb)); 200 return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb),
201 is_pid);
202}
203
204static int show_pid_map(struct seq_file *m, void *_p)
205{
206 return show_map(m, _p, 1);
207}
208
209static int show_tid_map(struct seq_file *m, void *_p)
210{
211 return show_map(m, _p, 0);
190} 212}
191 213
192static void *m_start(struct seq_file *m, loff_t *pos) 214static void *m_start(struct seq_file *m, loff_t *pos)
@@ -240,10 +262,18 @@ static const struct seq_operations proc_pid_maps_ops = {
240 .start = m_start, 262 .start = m_start,
241 .next = m_next, 263 .next = m_next,
242 .stop = m_stop, 264 .stop = m_stop,
243 .show = show_map 265 .show = show_pid_map
266};
267
268static const struct seq_operations proc_tid_maps_ops = {
269 .start = m_start,
270 .next = m_next,
271 .stop = m_stop,
272 .show = show_tid_map
244}; 273};
245 274
246static int maps_open(struct inode *inode, struct file *file) 275static int maps_open(struct inode *inode, struct file *file,
276 const struct seq_operations *ops)
247{ 277{
248 struct proc_maps_private *priv; 278 struct proc_maps_private *priv;
249 int ret = -ENOMEM; 279 int ret = -ENOMEM;
@@ -251,7 +281,7 @@ static int maps_open(struct inode *inode, struct file *file)
251 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 281 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
252 if (priv) { 282 if (priv) {
253 priv->pid = proc_pid(inode); 283 priv->pid = proc_pid(inode);
254 ret = seq_open(file, &proc_pid_maps_ops); 284 ret = seq_open(file, ops);
255 if (!ret) { 285 if (!ret) {
256 struct seq_file *m = file->private_data; 286 struct seq_file *m = file->private_data;
257 m->private = priv; 287 m->private = priv;
@@ -262,8 +292,25 @@ static int maps_open(struct inode *inode, struct file *file)
262 return ret; 292 return ret;
263} 293}
264 294
265const struct file_operations proc_maps_operations = { 295static int pid_maps_open(struct inode *inode, struct file *file)
266 .open = maps_open, 296{
297 return maps_open(inode, file, &proc_pid_maps_ops);
298}
299
300static int tid_maps_open(struct inode *inode, struct file *file)
301{
302 return maps_open(inode, file, &proc_tid_maps_ops);
303}
304
305const struct file_operations proc_pid_maps_operations = {
306 .open = pid_maps_open,
307 .read = seq_read,
308 .llseek = seq_lseek,
309 .release = seq_release_private,
310};
311
312const struct file_operations proc_tid_maps_operations = {
313 .open = tid_maps_open,
267 .read = seq_read, 314 .read = seq_read,
268 .llseek = seq_lseek, 315 .llseek = seq_lseek,
269 .release = seq_release_private, 316 .release = seq_release_private,
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index b0f450a2bb7..0d5071d2998 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -700,3 +700,26 @@ static int __init vmcore_init(void)
700 return 0; 700 return 0;
701} 701}
702module_init(vmcore_init) 702module_init(vmcore_init)
703
704/* Cleanup function for vmcore module. */
705void vmcore_cleanup(void)
706{
707 struct list_head *pos, *next;
708
709 if (proc_vmcore) {
710 remove_proc_entry(proc_vmcore->name, proc_vmcore->parent);
711 proc_vmcore = NULL;
712 }
713
714 /* clear the vmcore list. */
715 list_for_each_safe(pos, next, &vmcore_list) {
716 struct vmcore *m;
717
718 m = list_entry(pos, struct vmcore, list);
719 list_del(&m->list);
720 kfree(m);
721 }
722 kfree(elfcorebuf);
723 elfcorebuf = NULL;
724}
725EXPORT_SYMBOL_GPL(vmcore_cleanup);
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index b3b426edb2f..19507889bb7 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -52,12 +52,6 @@ struct pstore_private {
52 char data[]; 52 char data[];
53}; 53};
54 54
55static int pstore_file_open(struct inode *inode, struct file *file)
56{
57 file->private_data = inode->i_private;
58 return 0;
59}
60
61static ssize_t pstore_file_read(struct file *file, char __user *userbuf, 55static ssize_t pstore_file_read(struct file *file, char __user *userbuf,
62 size_t count, loff_t *ppos) 56 size_t count, loff_t *ppos)
63{ 57{
@@ -67,7 +61,7 @@ static ssize_t pstore_file_read(struct file *file, char __user *userbuf,
67} 61}
68 62
69static const struct file_operations pstore_file_operations = { 63static const struct file_operations pstore_file_operations = {
70 .open = pstore_file_open, 64 .open = simple_open,
71 .read = pstore_file_read, 65 .read = pstore_file_read,
72 .llseek = default_llseek, 66 .llseek = default_llseek,
73}; 67};
@@ -105,26 +99,12 @@ static const struct inode_operations pstore_dir_inode_operations = {
105 .unlink = pstore_unlink, 99 .unlink = pstore_unlink,
106}; 100};
107 101
108static struct inode *pstore_get_inode(struct super_block *sb, 102static struct inode *pstore_get_inode(struct super_block *sb)
109 const struct inode *dir, int mode, dev_t dev)
110{ 103{
111 struct inode *inode = new_inode(sb); 104 struct inode *inode = new_inode(sb);
112
113 if (inode) { 105 if (inode) {
114 inode->i_ino = get_next_ino(); 106 inode->i_ino = get_next_ino();
115 inode->i_uid = inode->i_gid = 0;
116 inode->i_mode = mode;
117 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 107 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
118 switch (mode & S_IFMT) {
119 case S_IFREG:
120 inode->i_fop = &pstore_file_operations;
121 break;
122 case S_IFDIR:
123 inode->i_op = &pstore_dir_inode_operations;
124 inode->i_fop = &simple_dir_operations;
125 inc_nlink(inode);
126 break;
127 }
128 } 108 }
129 return inode; 109 return inode;
130} 110}
@@ -216,9 +196,11 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id,
216 return rc; 196 return rc;
217 197
218 rc = -ENOMEM; 198 rc = -ENOMEM;
219 inode = pstore_get_inode(pstore_sb, root->d_inode, S_IFREG | 0444, 0); 199 inode = pstore_get_inode(pstore_sb);
220 if (!inode) 200 if (!inode)
221 goto fail; 201 goto fail;
202 inode->i_mode = S_IFREG | 0444;
203 inode->i_fop = &pstore_file_operations;
222 private = kmalloc(sizeof *private + size, GFP_KERNEL); 204 private = kmalloc(sizeof *private + size, GFP_KERNEL);
223 if (!private) 205 if (!private)
224 goto fail_alloc; 206 goto fail_alloc;
@@ -278,9 +260,7 @@ fail:
278 260
279int pstore_fill_super(struct super_block *sb, void *data, int silent) 261int pstore_fill_super(struct super_block *sb, void *data, int silent)
280{ 262{
281 struct inode *inode = NULL; 263 struct inode *inode;
282 struct dentry *root;
283 int err;
284 264
285 save_mount_options(sb, data); 265 save_mount_options(sb, data);
286 266
@@ -295,27 +275,20 @@ int pstore_fill_super(struct super_block *sb, void *data, int silent)
295 275
296 parse_options(data); 276 parse_options(data);
297 277
298 inode = pstore_get_inode(sb, NULL, S_IFDIR | 0755, 0); 278 inode = pstore_get_inode(sb);
299 if (!inode) { 279 if (inode) {
300 err = -ENOMEM; 280 inode->i_mode = S_IFDIR | 0755;
301 goto fail; 281 inode->i_op = &pstore_dir_inode_operations;
302 } 282 inode->i_fop = &simple_dir_operations;
303 /* override ramfs "dir" options so we catch unlink(2) */ 283 inc_nlink(inode);
304 inode->i_op = &pstore_dir_inode_operations;
305
306 root = d_alloc_root(inode);
307 sb->s_root = root;
308 if (!root) {
309 err = -ENOMEM;
310 goto fail;
311 } 284 }
285 sb->s_root = d_make_root(inode);
286 if (!sb->s_root)
287 return -ENOMEM;
312 288
313 pstore_get_records(0); 289 pstore_get_records(0);
314 290
315 return 0; 291 return 0;
316fail:
317 iput(inode);
318 return err;
319} 292}
320 293
321static struct dentry *pstore_mount(struct file_system_type *fs_type, 294static struct dentry *pstore_mount(struct file_system_type *fs_type,
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 9ec22d3b429..82c585f715e 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -68,9 +68,25 @@ void pstore_set_kmsg_bytes(int bytes)
68/* Tag each group of saved records with a sequence number */ 68/* Tag each group of saved records with a sequence number */
69static int oopscount; 69static int oopscount;
70 70
71static char *reason_str[] = { 71static const char *get_reason_str(enum kmsg_dump_reason reason)
72 "Oops", "Panic", "Kexec", "Restart", "Halt", "Poweroff", "Emergency" 72{
73}; 73 switch (reason) {
74 case KMSG_DUMP_PANIC:
75 return "Panic";
76 case KMSG_DUMP_OOPS:
77 return "Oops";
78 case KMSG_DUMP_EMERG:
79 return "Emergency";
80 case KMSG_DUMP_RESTART:
81 return "Restart";
82 case KMSG_DUMP_HALT:
83 return "Halt";
84 case KMSG_DUMP_POWEROFF:
85 return "Poweroff";
86 default:
87 return "Unknown";
88 }
89}
74 90
75/* 91/*
76 * callback from kmsg_dump. (s2,l2) has the most recently 92 * callback from kmsg_dump. (s2,l2) has the most recently
@@ -85,17 +101,15 @@ static void pstore_dump(struct kmsg_dumper *dumper,
85 unsigned long s1_start, s2_start; 101 unsigned long s1_start, s2_start;
86 unsigned long l1_cpy, l2_cpy; 102 unsigned long l1_cpy, l2_cpy;
87 unsigned long size, total = 0; 103 unsigned long size, total = 0;
88 char *dst, *why; 104 char *dst;
105 const char *why;
89 u64 id; 106 u64 id;
90 int hsize, ret; 107 int hsize, ret;
91 unsigned int part = 1; 108 unsigned int part = 1;
92 unsigned long flags = 0; 109 unsigned long flags = 0;
93 int is_locked = 0; 110 int is_locked = 0;
94 111
95 if (reason < ARRAY_SIZE(reason_str)) 112 why = get_reason_str(reason);
96 why = reason_str[reason];
97 else
98 why = "Unknown";
99 113
100 if (in_nmi()) { 114 if (in_nmi()) {
101 is_locked = spin_trylock(&psinfo->buf_lock); 115 is_locked = spin_trylock(&psinfo->buf_lock);
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 6b009548d2e..552e994e3aa 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -52,38 +52,6 @@ static int qnx4_remount(struct super_block *sb, int *flags, char *data)
52 return 0; 52 return 0;
53} 53}
54 54
55static struct buffer_head *qnx4_getblk(struct inode *inode, int nr,
56 int create)
57{
58 struct buffer_head *result = NULL;
59
60 if ( nr >= 0 )
61 nr = qnx4_block_map( inode, nr );
62 if (nr) {
63 result = sb_getblk(inode->i_sb, nr);
64 return result;
65 }
66 return NULL;
67}
68
69struct buffer_head *qnx4_bread(struct inode *inode, int block, int create)
70{
71 struct buffer_head *bh;
72
73 bh = qnx4_getblk(inode, block, create);
74 if (!bh || buffer_uptodate(bh)) {
75 return bh;
76 }
77 ll_rw_block(READ, 1, &bh);
78 wait_on_buffer(bh);
79 if (buffer_uptodate(bh)) {
80 return bh;
81 }
82 brelse(bh);
83
84 return NULL;
85}
86
87static int qnx4_get_block( struct inode *inode, sector_t iblock, struct buffer_head *bh, int create ) 55static int qnx4_get_block( struct inode *inode, sector_t iblock, struct buffer_head *bh, int create )
88{ 56{
89 unsigned long phys; 57 unsigned long phys;
@@ -98,23 +66,31 @@ static int qnx4_get_block( struct inode *inode, sector_t iblock, struct buffer_h
98 return 0; 66 return 0;
99} 67}
100 68
69static inline u32 try_extent(qnx4_xtnt_t *extent, u32 *offset)
70{
71 u32 size = le32_to_cpu(extent->xtnt_size);
72 if (*offset < size)
73 return le32_to_cpu(extent->xtnt_blk) + *offset - 1;
74 *offset -= size;
75 return 0;
76}
77
101unsigned long qnx4_block_map( struct inode *inode, long iblock ) 78unsigned long qnx4_block_map( struct inode *inode, long iblock )
102{ 79{
103 int ix; 80 int ix;
104 long offset, i_xblk; 81 long i_xblk;
105 unsigned long block = 0;
106 struct buffer_head *bh = NULL; 82 struct buffer_head *bh = NULL;
107 struct qnx4_xblk *xblk = NULL; 83 struct qnx4_xblk *xblk = NULL;
108 struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode); 84 struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode);
109 u16 nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts); 85 u16 nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts);
86 u32 offset = iblock;
87 u32 block = try_extent(&qnx4_inode->di_first_xtnt, &offset);
110 88
111 if ( iblock < le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_size) ) { 89 if (block) {
112 // iblock is in the first extent. This is easy. 90 // iblock is in the first extent. This is easy.
113 block = le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_blk) + iblock - 1;
114 } else { 91 } else {
115 // iblock is beyond first extent. We have to follow the extent chain. 92 // iblock is beyond first extent. We have to follow the extent chain.
116 i_xblk = le32_to_cpu(qnx4_inode->di_xblk); 93 i_xblk = le32_to_cpu(qnx4_inode->di_xblk);
117 offset = iblock - le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_size);
118 ix = 0; 94 ix = 0;
119 while ( --nxtnt > 0 ) { 95 while ( --nxtnt > 0 ) {
120 if ( ix == 0 ) { 96 if ( ix == 0 ) {
@@ -130,12 +106,11 @@ unsigned long qnx4_block_map( struct inode *inode, long iblock )
130 return -EIO; 106 return -EIO;
131 } 107 }
132 } 108 }
133 if ( offset < le32_to_cpu(xblk->xblk_xtnts[ix].xtnt_size) ) { 109 block = try_extent(&xblk->xblk_xtnts[ix], &offset);
110 if (block) {
134 // got it! 111 // got it!
135 block = le32_to_cpu(xblk->xblk_xtnts[ix].xtnt_blk) + offset - 1;
136 break; 112 break;
137 } 113 }
138 offset -= le32_to_cpu(xblk->xblk_xtnts[ix].xtnt_size);
139 if ( ++ix >= xblk->xblk_num_xtnts ) { 114 if ( ++ix >= xblk->xblk_num_xtnts ) {
140 i_xblk = le32_to_cpu(xblk->xblk_next_xblk); 115 i_xblk = le32_to_cpu(xblk->xblk_next_xblk);
141 ix = 0; 116 ix = 0;
@@ -260,15 +235,13 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
260 } 235 }
261 236
262 ret = -ENOMEM; 237 ret = -ENOMEM;
263 s->s_root = d_alloc_root(root); 238 s->s_root = d_make_root(root);
264 if (s->s_root == NULL) 239 if (s->s_root == NULL)
265 goto outi; 240 goto outb;
266 241
267 brelse(bh); 242 brelse(bh);
268 return 0; 243 return 0;
269 244
270 outi:
271 iput(root);
272 outb: 245 outb:
273 kfree(qs->BitMap); 246 kfree(qs->BitMap);
274 out: 247 out:
@@ -288,44 +261,17 @@ static void qnx4_put_super(struct super_block *sb)
288 return; 261 return;
289} 262}
290 263
291static int qnx4_writepage(struct page *page, struct writeback_control *wbc)
292{
293 return block_write_full_page(page,qnx4_get_block, wbc);
294}
295
296static int qnx4_readpage(struct file *file, struct page *page) 264static int qnx4_readpage(struct file *file, struct page *page)
297{ 265{
298 return block_read_full_page(page,qnx4_get_block); 266 return block_read_full_page(page,qnx4_get_block);
299} 267}
300 268
301static int qnx4_write_begin(struct file *file, struct address_space *mapping,
302 loff_t pos, unsigned len, unsigned flags,
303 struct page **pagep, void **fsdata)
304{
305 struct qnx4_inode_info *qnx4_inode = qnx4_i(mapping->host);
306 int ret;
307
308 *pagep = NULL;
309 ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
310 qnx4_get_block,
311 &qnx4_inode->mmu_private);
312 if (unlikely(ret)) {
313 loff_t isize = mapping->host->i_size;
314 if (pos + len > isize)
315 vmtruncate(mapping->host, isize);
316 }
317
318 return ret;
319}
320static sector_t qnx4_bmap(struct address_space *mapping, sector_t block) 269static sector_t qnx4_bmap(struct address_space *mapping, sector_t block)
321{ 270{
322 return generic_block_bmap(mapping,block,qnx4_get_block); 271 return generic_block_bmap(mapping,block,qnx4_get_block);
323} 272}
324static const struct address_space_operations qnx4_aops = { 273static const struct address_space_operations qnx4_aops = {
325 .readpage = qnx4_readpage, 274 .readpage = qnx4_readpage,
326 .writepage = qnx4_writepage,
327 .write_begin = qnx4_write_begin,
328 .write_end = generic_write_end,
329 .bmap = qnx4_bmap 275 .bmap = qnx4_bmap
330}; 276};
331 277
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index 275327b5615..a512c0b30e8 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -39,10 +39,6 @@ static int qnx4_match(int len, const char *name,
39 } else { 39 } else {
40 namelen = QNX4_SHORT_NAME_MAX; 40 namelen = QNX4_SHORT_NAME_MAX;
41 } 41 }
42 /* "" means "." ---> so paths like "/usr/lib//libc.a" work */
43 if (!len && (de->di_fname[0] == '.') && (de->di_fname[1] == '\0')) {
44 return 1;
45 }
46 thislen = strlen( de->di_fname ); 42 thislen = strlen( de->di_fname );
47 if ( thislen > namelen ) 43 if ( thislen > namelen )
48 thislen = namelen; 44 thislen = namelen;
@@ -72,7 +68,9 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir,
72 block = offset = blkofs = 0; 68 block = offset = blkofs = 0;
73 while (blkofs * QNX4_BLOCK_SIZE + offset < dir->i_size) { 69 while (blkofs * QNX4_BLOCK_SIZE + offset < dir->i_size) {
74 if (!bh) { 70 if (!bh) {
75 bh = qnx4_bread(dir, blkofs, 0); 71 block = qnx4_block_map(dir, blkofs);
72 if (block)
73 bh = sb_bread(dir->i_sb, block);
76 if (!bh) { 74 if (!bh) {
77 blkofs++; 75 blkofs++;
78 continue; 76 continue;
@@ -80,7 +78,6 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir,
80 } 78 }
81 *res_dir = (struct qnx4_inode_entry *) (bh->b_data + offset); 79 *res_dir = (struct qnx4_inode_entry *) (bh->b_data + offset);
82 if (qnx4_match(len, name, bh, &offset)) { 80 if (qnx4_match(len, name, bh, &offset)) {
83 block = qnx4_block_map( dir, blkofs );
84 *ino = block * QNX4_INODES_PER_BLOCK + 81 *ino = block * QNX4_INODES_PER_BLOCK +
85 (offset / QNX4_DIR_ENTRY_SIZE) - 1; 82 (offset / QNX4_DIR_ENTRY_SIZE) - 1;
86 return bh; 83 return bh;
diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h
index 33a60858203..244d4620189 100644
--- a/fs/qnx4/qnx4.h
+++ b/fs/qnx4/qnx4.h
@@ -27,8 +27,6 @@ extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, stru
27extern unsigned long qnx4_count_free_blocks(struct super_block *sb); 27extern unsigned long qnx4_count_free_blocks(struct super_block *sb);
28extern unsigned long qnx4_block_map(struct inode *inode, long iblock); 28extern unsigned long qnx4_block_map(struct inode *inode, long iblock);
29 29
30extern struct buffer_head *qnx4_bread(struct inode *, int, int);
31
32extern const struct inode_operations qnx4_dir_inode_operations; 30extern const struct inode_operations qnx4_dir_inode_operations;
33extern const struct file_operations qnx4_dir_operations; 31extern const struct file_operations qnx4_dir_operations;
34extern int qnx4_is_free(struct super_block *sb, long block); 32extern int qnx4_is_free(struct super_block *sb, long block);
diff --git a/fs/qnx6/Kconfig b/fs/qnx6/Kconfig
new file mode 100644
index 00000000000..edbba5c17cc
--- /dev/null
+++ b/fs/qnx6/Kconfig
@@ -0,0 +1,26 @@
1config QNX6FS_FS
2 tristate "QNX6 file system support (read only)"
3 depends on BLOCK && CRC32
4 help
5 This is the file system used by the real-time operating systems
6 QNX 6 (also called QNX RTP).
7 Further information is available at <http://www.qnx.com/>.
8 Say Y if you intend to mount QNX hard disks or floppies formatted
9 with a mkqnx6fs.
10 However, keep in mind that this currently is a readonly driver!
11
12 To compile this file system support as a module, choose M here: the
13 module will be called qnx6.
14
15 If you don't know whether you need it, then you don't need it:
16 answer N.
17
18config QNX6FS_DEBUG
19 bool "QNX6 debugging information"
20 depends on QNX6FS_FS
21 help
22 Turns on extended debugging output.
23
24 If you are not a developer working on the QNX6FS, you probably don't
25 want this:
26 answer N.
diff --git a/fs/qnx6/Makefile b/fs/qnx6/Makefile
new file mode 100644
index 00000000000..9dd06199afc
--- /dev/null
+++ b/fs/qnx6/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for the linux qnx4-filesystem routines.
3#
4
5obj-$(CONFIG_QNX6FS_FS) += qnx6.o
6
7qnx6-objs := inode.o dir.o namei.o super_mmi.o
diff --git a/fs/qnx6/README b/fs/qnx6/README
new file mode 100644
index 00000000000..116d622026c
--- /dev/null
+++ b/fs/qnx6/README
@@ -0,0 +1,8 @@
1
2 This is a snapshot of the QNX6 filesystem for Linux.
3 Please send diffs and remarks to <chaosman@ontika.net> .
4
5Credits :
6
7Al Viro <viro@ZenIV.linux.org.uk> (endless patience with me & support ;))
8Kai Bankett <chaosman@ontika.net> (Maintainer)
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
new file mode 100644
index 00000000000..dc597353db3
--- /dev/null
+++ b/fs/qnx6/dir.c
@@ -0,0 +1,291 @@
1/*
2 * QNX6 file system, Linux implementation.
3 *
4 * Version : 1.0.0
5 *
6 * History :
7 *
8 * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release.
9 * 16-02-2012 pagemap extension by Al Viro
10 *
11 */
12
13#include "qnx6.h"
14
15static unsigned qnx6_lfile_checksum(char *name, unsigned size)
16{
17 unsigned crc = 0;
18 char *end = name + size;
19 while (name < end) {
20 crc = ((crc >> 1) + *(name++)) ^
21 ((crc & 0x00000001) ? 0x80000000 : 0);
22 }
23 return crc;
24}
25
26static struct page *qnx6_get_page(struct inode *dir, unsigned long n)
27{
28 struct address_space *mapping = dir->i_mapping;
29 struct page *page = read_mapping_page(mapping, n, NULL);
30 if (!IS_ERR(page))
31 kmap(page);
32 return page;
33}
34
35static inline unsigned long dir_pages(struct inode *inode)
36{
37 return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
38}
39
40static unsigned last_entry(struct inode *inode, unsigned long page_nr)
41{
42 unsigned long last_byte = inode->i_size;
43 last_byte -= page_nr << PAGE_CACHE_SHIFT;
44 if (last_byte > PAGE_CACHE_SIZE)
45 last_byte = PAGE_CACHE_SIZE;
46 return last_byte / QNX6_DIR_ENTRY_SIZE;
47}
48
49static struct qnx6_long_filename *qnx6_longname(struct super_block *sb,
50 struct qnx6_long_dir_entry *de,
51 struct page **p)
52{
53 struct qnx6_sb_info *sbi = QNX6_SB(sb);
54 u32 s = fs32_to_cpu(sbi, de->de_long_inode); /* in block units */
55 u32 n = s >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits); /* in pages */
56 /* within page */
57 u32 offs = (s << sb->s_blocksize_bits) & ~PAGE_CACHE_MASK;
58 struct address_space *mapping = sbi->longfile->i_mapping;
59 struct page *page = read_mapping_page(mapping, n, NULL);
60 if (IS_ERR(page))
61 return ERR_CAST(page);
62 kmap(*p = page);
63 return (struct qnx6_long_filename *)(page_address(page) + offs);
64}
65
66static int qnx6_dir_longfilename(struct inode *inode,
67 struct qnx6_long_dir_entry *de,
68 void *dirent, loff_t pos,
69 unsigned de_inode, filldir_t filldir)
70{
71 struct qnx6_long_filename *lf;
72 struct super_block *s = inode->i_sb;
73 struct qnx6_sb_info *sbi = QNX6_SB(s);
74 struct page *page;
75 int lf_size;
76
77 if (de->de_size != 0xff) {
78 /* error - long filename entries always have size 0xff
79 in direntry */
80 printk(KERN_ERR "qnx6: invalid direntry size (%i).\n",
81 de->de_size);
82 return 0;
83 }
84 lf = qnx6_longname(s, de, &page);
85 if (IS_ERR(lf)) {
86 printk(KERN_ERR "qnx6:Error reading longname\n");
87 return 0;
88 }
89
90 lf_size = fs16_to_cpu(sbi, lf->lf_size);
91
92 if (lf_size > QNX6_LONG_NAME_MAX) {
93 QNX6DEBUG((KERN_INFO "file %s\n", lf->lf_fname));
94 printk(KERN_ERR "qnx6:Filename too long (%i)\n", lf_size);
95 qnx6_put_page(page);
96 return 0;
97 }
98
99 /* calc & validate longfilename checksum
100 mmi 3g filesystem does not have that checksum */
101 if (!test_opt(s, MMI_FS) && fs32_to_cpu(sbi, de->de_checksum) !=
102 qnx6_lfile_checksum(lf->lf_fname, lf_size))
103 printk(KERN_INFO "qnx6: long filename checksum error.\n");
104
105 QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s inode:%u\n",
106 lf_size, lf->lf_fname, de_inode));
107 if (filldir(dirent, lf->lf_fname, lf_size, pos, de_inode,
108 DT_UNKNOWN) < 0) {
109 qnx6_put_page(page);
110 return 0;
111 }
112
113 qnx6_put_page(page);
114 /* success */
115 return 1;
116}
117
118static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir)
119{
120 struct inode *inode = filp->f_path.dentry->d_inode;
121 struct super_block *s = inode->i_sb;
122 struct qnx6_sb_info *sbi = QNX6_SB(s);
123 loff_t pos = filp->f_pos & (QNX6_DIR_ENTRY_SIZE - 1);
124 unsigned long npages = dir_pages(inode);
125 unsigned long n = pos >> PAGE_CACHE_SHIFT;
126 unsigned start = (pos & ~PAGE_CACHE_MASK) / QNX6_DIR_ENTRY_SIZE;
127 bool done = false;
128
129 if (filp->f_pos >= inode->i_size)
130 return 0;
131
132 for ( ; !done && n < npages; n++, start = 0) {
133 struct page *page = qnx6_get_page(inode, n);
134 int limit = last_entry(inode, n);
135 struct qnx6_dir_entry *de;
136 int i = start;
137
138 if (IS_ERR(page)) {
139 printk(KERN_ERR "qnx6_readdir: read failed\n");
140 filp->f_pos = (n + 1) << PAGE_CACHE_SHIFT;
141 return PTR_ERR(page);
142 }
143 de = ((struct qnx6_dir_entry *)page_address(page)) + start;
144 for (; i < limit; i++, de++, pos += QNX6_DIR_ENTRY_SIZE) {
145 int size = de->de_size;
146 u32 no_inode = fs32_to_cpu(sbi, de->de_inode);
147
148 if (!no_inode || !size)
149 continue;
150
151 if (size > QNX6_SHORT_NAME_MAX) {
152 /* long filename detected
153 get the filename from long filename
154 structure / block */
155 if (!qnx6_dir_longfilename(inode,
156 (struct qnx6_long_dir_entry *)de,
157 dirent, pos, no_inode,
158 filldir)) {
159 done = true;
160 break;
161 }
162 } else {
163 QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s"
164 " inode:%u\n", size, de->de_fname,
165 no_inode));
166 if (filldir(dirent, de->de_fname, size,
167 pos, no_inode, DT_UNKNOWN)
168 < 0) {
169 done = true;
170 break;
171 }
172 }
173 }
174 qnx6_put_page(page);
175 }
176 filp->f_pos = pos;
177 return 0;
178}
179
180/*
181 * check if the long filename is correct.
182 */
183static unsigned qnx6_long_match(int len, const char *name,
184 struct qnx6_long_dir_entry *de, struct inode *dir)
185{
186 struct super_block *s = dir->i_sb;
187 struct qnx6_sb_info *sbi = QNX6_SB(s);
188 struct page *page;
189 int thislen;
190 struct qnx6_long_filename *lf = qnx6_longname(s, de, &page);
191
192 if (IS_ERR(lf))
193 return 0;
194
195 thislen = fs16_to_cpu(sbi, lf->lf_size);
196 if (len != thislen) {
197 qnx6_put_page(page);
198 return 0;
199 }
200 if (memcmp(name, lf->lf_fname, len) == 0) {
201 qnx6_put_page(page);
202 return fs32_to_cpu(sbi, de->de_inode);
203 }
204 qnx6_put_page(page);
205 return 0;
206}
207
208/*
209 * check if the filename is correct.
210 */
211static unsigned qnx6_match(struct super_block *s, int len, const char *name,
212 struct qnx6_dir_entry *de)
213{
214 struct qnx6_sb_info *sbi = QNX6_SB(s);
215 if (memcmp(name, de->de_fname, len) == 0)
216 return fs32_to_cpu(sbi, de->de_inode);
217 return 0;
218}
219
220
221unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
222 struct page **res_page)
223{
224 struct super_block *s = dir->i_sb;
225 struct qnx6_inode_info *ei = QNX6_I(dir);
226 struct page *page = NULL;
227 unsigned long start, n;
228 unsigned long npages = dir_pages(dir);
229 unsigned ino;
230 struct qnx6_dir_entry *de;
231 struct qnx6_long_dir_entry *lde;
232
233 *res_page = NULL;
234
235 if (npages == 0)
236 return 0;
237 start = ei->i_dir_start_lookup;
238 if (start >= npages)
239 start = 0;
240 n = start;
241
242 do {
243 page = qnx6_get_page(dir, n);
244 if (!IS_ERR(page)) {
245 int limit = last_entry(dir, n);
246 int i;
247
248 de = (struct qnx6_dir_entry *)page_address(page);
249 for (i = 0; i < limit; i++, de++) {
250 if (len <= QNX6_SHORT_NAME_MAX) {
251 /* short filename */
252 if (len != de->de_size)
253 continue;
254 ino = qnx6_match(s, len, name, de);
255 if (ino)
256 goto found;
257 } else if (de->de_size == 0xff) {
258 /* deal with long filename */
259 lde = (struct qnx6_long_dir_entry *)de;
260 ino = qnx6_long_match(len,
261 name, lde, dir);
262 if (ino)
263 goto found;
264 } else
265 printk(KERN_ERR "qnx6: undefined "
266 "filename size in inode.\n");
267 }
268 qnx6_put_page(page);
269 }
270
271 if (++n >= npages)
272 n = 0;
273 } while (n != start);
274 return 0;
275
276found:
277 *res_page = page;
278 ei->i_dir_start_lookup = n;
279 return ino;
280}
281
282const struct file_operations qnx6_dir_operations = {
283 .llseek = generic_file_llseek,
284 .read = generic_read_dir,
285 .readdir = qnx6_readdir,
286 .fsync = generic_file_fsync,
287};
288
289const struct inode_operations qnx6_dir_inode_operations = {
290 .lookup = qnx6_lookup,
291};
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
new file mode 100644
index 00000000000..e44012dc564
--- /dev/null
+++ b/fs/qnx6/inode.c
@@ -0,0 +1,698 @@
1/*
2 * QNX6 file system, Linux implementation.
3 *
4 * Version : 1.0.0
5 *
6 * History :
7 *
8 * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release.
9 * 16-02-2012 pagemap extension by Al Viro
10 *
11 */
12
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/slab.h>
16#include <linux/highuid.h>
17#include <linux/pagemap.h>
18#include <linux/buffer_head.h>
19#include <linux/writeback.h>
20#include <linux/statfs.h>
21#include <linux/parser.h>
22#include <linux/seq_file.h>
23#include <linux/mount.h>
24#include <linux/crc32.h>
25#include <linux/mpage.h>
26#include "qnx6.h"
27
28static const struct super_operations qnx6_sops;
29
30static void qnx6_put_super(struct super_block *sb);
31static struct inode *qnx6_alloc_inode(struct super_block *sb);
32static void qnx6_destroy_inode(struct inode *inode);
33static int qnx6_remount(struct super_block *sb, int *flags, char *data);
34static int qnx6_statfs(struct dentry *dentry, struct kstatfs *buf);
35static int qnx6_show_options(struct seq_file *seq, struct dentry *root);
36
37static const struct super_operations qnx6_sops = {
38 .alloc_inode = qnx6_alloc_inode,
39 .destroy_inode = qnx6_destroy_inode,
40 .put_super = qnx6_put_super,
41 .statfs = qnx6_statfs,
42 .remount_fs = qnx6_remount,
43 .show_options = qnx6_show_options,
44};
45
46static int qnx6_show_options(struct seq_file *seq, struct dentry *root)
47{
48 struct super_block *sb = root->d_sb;
49 struct qnx6_sb_info *sbi = QNX6_SB(sb);
50
51 if (sbi->s_mount_opt & QNX6_MOUNT_MMI_FS)
52 seq_puts(seq, ",mmi_fs");
53 return 0;
54}
55
56static int qnx6_remount(struct super_block *sb, int *flags, char *data)
57{
58 *flags |= MS_RDONLY;
59 return 0;
60}
61
62static unsigned qnx6_get_devblock(struct super_block *sb, __fs32 block)
63{
64 struct qnx6_sb_info *sbi = QNX6_SB(sb);
65 return fs32_to_cpu(sbi, block) + sbi->s_blks_off;
66}
67
68static unsigned qnx6_block_map(struct inode *inode, unsigned iblock);
69
70static int qnx6_get_block(struct inode *inode, sector_t iblock,
71 struct buffer_head *bh, int create)
72{
73 unsigned phys;
74
75 QNX6DEBUG((KERN_INFO "qnx6: qnx6_get_block inode=[%ld] iblock=[%ld]\n",
76 inode->i_ino, (unsigned long)iblock));
77
78 phys = qnx6_block_map(inode, iblock);
79 if (phys) {
80 /* logical block is before EOF */
81 map_bh(bh, inode->i_sb, phys);
82 }
83 return 0;
84}
85
86static int qnx6_check_blockptr(__fs32 ptr)
87{
88 if (ptr == ~(__fs32)0) {
89 printk(KERN_ERR "qnx6: hit unused blockpointer.\n");
90 return 0;
91 }
92 return 1;
93}
94
95static int qnx6_readpage(struct file *file, struct page *page)
96{
97 return mpage_readpage(page, qnx6_get_block);
98}
99
100static int qnx6_readpages(struct file *file, struct address_space *mapping,
101 struct list_head *pages, unsigned nr_pages)
102{
103 return mpage_readpages(mapping, pages, nr_pages, qnx6_get_block);
104}
105
106/*
107 * returns the block number for the no-th element in the tree
108 * inodebits requred as there are multiple inodes in one inode block
109 */
110static unsigned qnx6_block_map(struct inode *inode, unsigned no)
111{
112 struct super_block *s = inode->i_sb;
113 struct qnx6_sb_info *sbi = QNX6_SB(s);
114 struct qnx6_inode_info *ei = QNX6_I(inode);
115 unsigned block = 0;
116 struct buffer_head *bh;
117 __fs32 ptr;
118 int levelptr;
119 int ptrbits = sbi->s_ptrbits;
120 int bitdelta;
121 u32 mask = (1 << ptrbits) - 1;
122 int depth = ei->di_filelevels;
123 int i;
124
125 bitdelta = ptrbits * depth;
126 levelptr = no >> bitdelta;
127
128 if (levelptr > QNX6_NO_DIRECT_POINTERS - 1) {
129 printk(KERN_ERR "qnx6:Requested file block number (%u) too big.",
130 no);
131 return 0;
132 }
133
134 block = qnx6_get_devblock(s, ei->di_block_ptr[levelptr]);
135
136 for (i = 0; i < depth; i++) {
137 bh = sb_bread(s, block);
138 if (!bh) {
139 printk(KERN_ERR "qnx6:Error reading block (%u)\n",
140 block);
141 return 0;
142 }
143 bitdelta -= ptrbits;
144 levelptr = (no >> bitdelta) & mask;
145 ptr = ((__fs32 *)bh->b_data)[levelptr];
146
147 if (!qnx6_check_blockptr(ptr))
148 return 0;
149
150 block = qnx6_get_devblock(s, ptr);
151 brelse(bh);
152 }
153 return block;
154}
155
156static int qnx6_statfs(struct dentry *dentry, struct kstatfs *buf)
157{
158 struct super_block *sb = dentry->d_sb;
159 struct qnx6_sb_info *sbi = QNX6_SB(sb);
160 u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
161
162 buf->f_type = sb->s_magic;
163 buf->f_bsize = sb->s_blocksize;
164 buf->f_blocks = fs32_to_cpu(sbi, sbi->sb->sb_num_blocks);
165 buf->f_bfree = fs32_to_cpu(sbi, sbi->sb->sb_free_blocks);
166 buf->f_files = fs32_to_cpu(sbi, sbi->sb->sb_num_inodes);
167 buf->f_ffree = fs32_to_cpu(sbi, sbi->sb->sb_free_inodes);
168 buf->f_bavail = buf->f_bfree;
169 buf->f_namelen = QNX6_LONG_NAME_MAX;
170 buf->f_fsid.val[0] = (u32)id;
171 buf->f_fsid.val[1] = (u32)(id >> 32);
172
173 return 0;
174}
175
176/*
177 * Check the root directory of the filesystem to make sure
178 * it really _is_ a qnx6 filesystem, and to check the size
179 * of the directory entry.
180 */
181static const char *qnx6_checkroot(struct super_block *s)
182{
183 static char match_root[2][3] = {".\0\0", "..\0"};
184 int i, error = 0;
185 struct qnx6_dir_entry *dir_entry;
186 struct inode *root = s->s_root->d_inode;
187 struct address_space *mapping = root->i_mapping;
188 struct page *page = read_mapping_page(mapping, 0, NULL);
189 if (IS_ERR(page))
190 return "error reading root directory";
191 kmap(page);
192 dir_entry = page_address(page);
193 for (i = 0; i < 2; i++) {
194 /* maximum 3 bytes - due to match_root limitation */
195 if (strncmp(dir_entry[i].de_fname, match_root[i], 3))
196 error = 1;
197 }
198 qnx6_put_page(page);
199 if (error)
200 return "error reading root directory.";
201 return NULL;
202}
203
204#ifdef CONFIG_QNX6FS_DEBUG
205void qnx6_superblock_debug(struct qnx6_super_block *sb, struct super_block *s)
206{
207 struct qnx6_sb_info *sbi = QNX6_SB(s);
208
209 QNX6DEBUG((KERN_INFO "magic: %08x\n",
210 fs32_to_cpu(sbi, sb->sb_magic)));
211 QNX6DEBUG((KERN_INFO "checksum: %08x\n",
212 fs32_to_cpu(sbi, sb->sb_checksum)));
213 QNX6DEBUG((KERN_INFO "serial: %llx\n",
214 fs64_to_cpu(sbi, sb->sb_serial)));
215 QNX6DEBUG((KERN_INFO "flags: %08x\n",
216 fs32_to_cpu(sbi, sb->sb_flags)));
217 QNX6DEBUG((KERN_INFO "blocksize: %08x\n",
218 fs32_to_cpu(sbi, sb->sb_blocksize)));
219 QNX6DEBUG((KERN_INFO "num_inodes: %08x\n",
220 fs32_to_cpu(sbi, sb->sb_num_inodes)));
221 QNX6DEBUG((KERN_INFO "free_inodes: %08x\n",
222 fs32_to_cpu(sbi, sb->sb_free_inodes)));
223 QNX6DEBUG((KERN_INFO "num_blocks: %08x\n",
224 fs32_to_cpu(sbi, sb->sb_num_blocks)));
225 QNX6DEBUG((KERN_INFO "free_blocks: %08x\n",
226 fs32_to_cpu(sbi, sb->sb_free_blocks)));
227 QNX6DEBUG((KERN_INFO "inode_levels: %02x\n",
228 sb->Inode.levels));
229}
230#endif
231
232enum {
233 Opt_mmifs,
234 Opt_err
235};
236
237static const match_table_t tokens = {
238 {Opt_mmifs, "mmi_fs"},
239 {Opt_err, NULL}
240};
241
242static int qnx6_parse_options(char *options, struct super_block *sb)
243{
244 char *p;
245 struct qnx6_sb_info *sbi = QNX6_SB(sb);
246 substring_t args[MAX_OPT_ARGS];
247
248 if (!options)
249 return 1;
250
251 while ((p = strsep(&options, ",")) != NULL) {
252 int token;
253 if (!*p)
254 continue;
255
256 token = match_token(p, tokens, args);
257 switch (token) {
258 case Opt_mmifs:
259 set_opt(sbi->s_mount_opt, MMI_FS);
260 break;
261 default:
262 return 0;
263 }
264 }
265 return 1;
266}
267
268static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
269 int offset, int silent)
270{
271 struct qnx6_sb_info *sbi = QNX6_SB(s);
272 struct buffer_head *bh;
273 struct qnx6_super_block *sb;
274
275 /* Check the superblock signatures
276 start with the first superblock */
277 bh = sb_bread(s, offset);
278 if (!bh) {
279 printk(KERN_ERR "qnx6: unable to read the first superblock\n");
280 return NULL;
281 }
282 sb = (struct qnx6_super_block *)bh->b_data;
283 if (fs32_to_cpu(sbi, sb->sb_magic) != QNX6_SUPER_MAGIC) {
284 sbi->s_bytesex = BYTESEX_BE;
285 if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) {
286 /* we got a big endian fs */
287 QNX6DEBUG((KERN_INFO "qnx6: fs got different"
288 " endianess.\n"));
289 return bh;
290 } else
291 sbi->s_bytesex = BYTESEX_LE;
292 if (!silent) {
293 if (offset == 0) {
294 printk(KERN_ERR "qnx6: wrong signature (magic)"
295 " in superblock #1.\n");
296 } else {
297 printk(KERN_INFO "qnx6: wrong signature (magic)"
298 " at position (0x%lx) - will try"
299 " alternative position (0x0000).\n",
300 offset * s->s_blocksize);
301 }
302 }
303 brelse(bh);
304 return NULL;
305 }
306 return bh;
307}
308
309static struct inode *qnx6_private_inode(struct super_block *s,
310 struct qnx6_root_node *p);
311
312static int qnx6_fill_super(struct super_block *s, void *data, int silent)
313{
314 struct buffer_head *bh1 = NULL, *bh2 = NULL;
315 struct qnx6_super_block *sb1 = NULL, *sb2 = NULL;
316 struct qnx6_sb_info *sbi;
317 struct inode *root;
318 const char *errmsg;
319 struct qnx6_sb_info *qs;
320 int ret = -EINVAL;
321 u64 offset;
322 int bootblock_offset = QNX6_BOOTBLOCK_SIZE;
323
324 qs = kzalloc(sizeof(struct qnx6_sb_info), GFP_KERNEL);
325 if (!qs)
326 return -ENOMEM;
327 s->s_fs_info = qs;
328
329 /* Superblock always is 512 Byte long */
330 if (!sb_set_blocksize(s, QNX6_SUPERBLOCK_SIZE)) {
331 printk(KERN_ERR "qnx6: unable to set blocksize\n");
332 goto outnobh;
333 }
334
335 /* parse the mount-options */
336 if (!qnx6_parse_options((char *) data, s)) {
337 printk(KERN_ERR "qnx6: invalid mount options.\n");
338 goto outnobh;
339 }
340 if (test_opt(s, MMI_FS)) {
341 sb1 = qnx6_mmi_fill_super(s, silent);
342 if (sb1)
343 goto mmi_success;
344 else
345 goto outnobh;
346 }
347 sbi = QNX6_SB(s);
348 sbi->s_bytesex = BYTESEX_LE;
349 /* Check the superblock signatures
350 start with the first superblock */
351 bh1 = qnx6_check_first_superblock(s,
352 bootblock_offset / QNX6_SUPERBLOCK_SIZE, silent);
353 if (!bh1) {
354 /* try again without bootblock offset */
355 bh1 = qnx6_check_first_superblock(s, 0, silent);
356 if (!bh1) {
357 printk(KERN_ERR "qnx6: unable to read the first superblock\n");
358 goto outnobh;
359 }
360 /* seems that no bootblock at partition start */
361 bootblock_offset = 0;
362 }
363 sb1 = (struct qnx6_super_block *)bh1->b_data;
364
365#ifdef CONFIG_QNX6FS_DEBUG
366 qnx6_superblock_debug(sb1, s);
367#endif
368
369 /* checksum check - start at byte 8 and end at byte 512 */
370 if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
371 crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
372 printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
373 goto out;
374 }
375
376 /* set new blocksize */
377 if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
378 printk(KERN_ERR "qnx6: unable to set blocksize\n");
379 goto out;
380 }
381 /* blocksize invalidates bh - pull it back in */
382 brelse(bh1);
383 bh1 = sb_bread(s, bootblock_offset >> s->s_blocksize_bits);
384 if (!bh1)
385 goto outnobh;
386 sb1 = (struct qnx6_super_block *)bh1->b_data;
387
388 /* calculate second superblock blocknumber */
389 offset = fs32_to_cpu(sbi, sb1->sb_num_blocks) +
390 (bootblock_offset >> s->s_blocksize_bits) +
391 (QNX6_SUPERBLOCK_AREA >> s->s_blocksize_bits);
392
393 /* set bootblock offset */
394 sbi->s_blks_off = (bootblock_offset >> s->s_blocksize_bits) +
395 (QNX6_SUPERBLOCK_AREA >> s->s_blocksize_bits);
396
397 /* next the second superblock */
398 bh2 = sb_bread(s, offset);
399 if (!bh2) {
400 printk(KERN_ERR "qnx6: unable to read the second superblock\n");
401 goto out;
402 }
403 sb2 = (struct qnx6_super_block *)bh2->b_data;
404 if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
405 if (!silent)
406 printk(KERN_ERR "qnx6: wrong signature (magic)"
407 " in superblock #2.\n");
408 goto out;
409 }
410
411 /* checksum check - start at byte 8 and end at byte 512 */
412 if (fs32_to_cpu(sbi, sb2->sb_checksum) !=
413 crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
414 printk(KERN_ERR "qnx6: superblock #2 checksum error\n");
415 goto out;
416 }
417
418 if (fs64_to_cpu(sbi, sb1->sb_serial) >=
419 fs64_to_cpu(sbi, sb2->sb_serial)) {
420 /* superblock #1 active */
421 sbi->sb_buf = bh1;
422 sbi->sb = (struct qnx6_super_block *)bh1->b_data;
423 brelse(bh2);
424 printk(KERN_INFO "qnx6: superblock #1 active\n");
425 } else {
426 /* superblock #2 active */
427 sbi->sb_buf = bh2;
428 sbi->sb = (struct qnx6_super_block *)bh2->b_data;
429 brelse(bh1);
430 printk(KERN_INFO "qnx6: superblock #2 active\n");
431 }
432mmi_success:
433 /* sanity check - limit maximum indirect pointer levels */
434 if (sb1->Inode.levels > QNX6_PTR_MAX_LEVELS) {
435 printk(KERN_ERR "qnx6: too many inode levels (max %i, sb %i)\n",
436 QNX6_PTR_MAX_LEVELS, sb1->Inode.levels);
437 goto out;
438 }
439 if (sb1->Longfile.levels > QNX6_PTR_MAX_LEVELS) {
440 printk(KERN_ERR "qnx6: too many longfilename levels"
441 " (max %i, sb %i)\n",
442 QNX6_PTR_MAX_LEVELS, sb1->Longfile.levels);
443 goto out;
444 }
445 s->s_op = &qnx6_sops;
446 s->s_magic = QNX6_SUPER_MAGIC;
447 s->s_flags |= MS_RDONLY; /* Yup, read-only yet */
448
449 /* ease the later tree level calculations */
450 sbi = QNX6_SB(s);
451 sbi->s_ptrbits = ilog2(s->s_blocksize / 4);
452 sbi->inodes = qnx6_private_inode(s, &sb1->Inode);
453 if (!sbi->inodes)
454 goto out;
455 sbi->longfile = qnx6_private_inode(s, &sb1->Longfile);
456 if (!sbi->longfile)
457 goto out1;
458
459 /* prefetch root inode */
460 root = qnx6_iget(s, QNX6_ROOT_INO);
461 if (IS_ERR(root)) {
462 printk(KERN_ERR "qnx6: get inode failed\n");
463 ret = PTR_ERR(root);
464 goto out2;
465 }
466
467 ret = -ENOMEM;
468 s->s_root = d_make_root(root);
469 if (!s->s_root)
470 goto out2;
471
472 ret = -EINVAL;
473 errmsg = qnx6_checkroot(s);
474 if (errmsg != NULL) {
475 if (!silent)
476 printk(KERN_ERR "qnx6: %s\n", errmsg);
477 goto out3;
478 }
479 return 0;
480
481out3:
482 dput(s->s_root);
483 s->s_root = NULL;
484out2:
485 iput(sbi->longfile);
486out1:
487 iput(sbi->inodes);
488out:
489 if (bh1)
490 brelse(bh1);
491 if (bh2)
492 brelse(bh2);
493outnobh:
494 kfree(qs);
495 s->s_fs_info = NULL;
496 return ret;
497}
498
499static void qnx6_put_super(struct super_block *sb)
500{
501 struct qnx6_sb_info *qs = QNX6_SB(sb);
502 brelse(qs->sb_buf);
503 iput(qs->longfile);
504 iput(qs->inodes);
505 kfree(qs);
506 sb->s_fs_info = NULL;
507 return;
508}
509
510static sector_t qnx6_bmap(struct address_space *mapping, sector_t block)
511{
512 return generic_block_bmap(mapping, block, qnx6_get_block);
513}
514static const struct address_space_operations qnx6_aops = {
515 .readpage = qnx6_readpage,
516 .readpages = qnx6_readpages,
517 .bmap = qnx6_bmap
518};
519
520static struct inode *qnx6_private_inode(struct super_block *s,
521 struct qnx6_root_node *p)
522{
523 struct inode *inode = new_inode(s);
524 if (inode) {
525 struct qnx6_inode_info *ei = QNX6_I(inode);
526 struct qnx6_sb_info *sbi = QNX6_SB(s);
527 inode->i_size = fs64_to_cpu(sbi, p->size);
528 memcpy(ei->di_block_ptr, p->ptr, sizeof(p->ptr));
529 ei->di_filelevels = p->levels;
530 inode->i_mode = S_IFREG | S_IRUSR; /* probably wrong */
531 inode->i_mapping->a_ops = &qnx6_aops;
532 }
533 return inode;
534}
535
536struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
537{
538 struct qnx6_sb_info *sbi = QNX6_SB(sb);
539 struct qnx6_inode_entry *raw_inode;
540 struct inode *inode;
541 struct qnx6_inode_info *ei;
542 struct address_space *mapping;
543 struct page *page;
544 u32 n, offs;
545
546 inode = iget_locked(sb, ino);
547 if (!inode)
548 return ERR_PTR(-ENOMEM);
549 if (!(inode->i_state & I_NEW))
550 return inode;
551
552 ei = QNX6_I(inode);
553
554 inode->i_mode = 0;
555
556 if (ino == 0) {
557 printk(KERN_ERR "qnx6: bad inode number on dev %s: %u is "
558 "out of range\n",
559 sb->s_id, ino);
560 iget_failed(inode);
561 return ERR_PTR(-EIO);
562 }
563 n = (ino - 1) >> (PAGE_CACHE_SHIFT - QNX6_INODE_SIZE_BITS);
564 offs = (ino - 1) & (~PAGE_CACHE_MASK >> QNX6_INODE_SIZE_BITS);
565 mapping = sbi->inodes->i_mapping;
566 page = read_mapping_page(mapping, n, NULL);
567 if (IS_ERR(page)) {
568 printk(KERN_ERR "qnx6: major problem: unable to read inode from "
569 "dev %s\n", sb->s_id);
570 iget_failed(inode);
571 return ERR_CAST(page);
572 }
573 kmap(page);
574 raw_inode = ((struct qnx6_inode_entry *)page_address(page)) + offs;
575
576 inode->i_mode = fs16_to_cpu(sbi, raw_inode->di_mode);
577 inode->i_uid = (uid_t)fs32_to_cpu(sbi, raw_inode->di_uid);
578 inode->i_gid = (gid_t)fs32_to_cpu(sbi, raw_inode->di_gid);
579 inode->i_size = fs64_to_cpu(sbi, raw_inode->di_size);
580 inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->di_mtime);
581 inode->i_mtime.tv_nsec = 0;
582 inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->di_atime);
583 inode->i_atime.tv_nsec = 0;
584 inode->i_ctime.tv_sec = fs32_to_cpu(sbi, raw_inode->di_ctime);
585 inode->i_ctime.tv_nsec = 0;
586
587 /* calc blocks based on 512 byte blocksize */
588 inode->i_blocks = (inode->i_size + 511) >> 9;
589
590 memcpy(&ei->di_block_ptr, &raw_inode->di_block_ptr,
591 sizeof(raw_inode->di_block_ptr));
592 ei->di_filelevels = raw_inode->di_filelevels;
593
594 if (S_ISREG(inode->i_mode)) {
595 inode->i_fop = &generic_ro_fops;
596 inode->i_mapping->a_ops = &qnx6_aops;
597 } else if (S_ISDIR(inode->i_mode)) {
598 inode->i_op = &qnx6_dir_inode_operations;
599 inode->i_fop = &qnx6_dir_operations;
600 inode->i_mapping->a_ops = &qnx6_aops;
601 } else if (S_ISLNK(inode->i_mode)) {
602 inode->i_op = &page_symlink_inode_operations;
603 inode->i_mapping->a_ops = &qnx6_aops;
604 } else
605 init_special_inode(inode, inode->i_mode, 0);
606 qnx6_put_page(page);
607 unlock_new_inode(inode);
608 return inode;
609}
610
611static struct kmem_cache *qnx6_inode_cachep;
612
613static struct inode *qnx6_alloc_inode(struct super_block *sb)
614{
615 struct qnx6_inode_info *ei;
616 ei = kmem_cache_alloc(qnx6_inode_cachep, GFP_KERNEL);
617 if (!ei)
618 return NULL;
619 return &ei->vfs_inode;
620}
621
622static void qnx6_i_callback(struct rcu_head *head)
623{
624 struct inode *inode = container_of(head, struct inode, i_rcu);
625 INIT_LIST_HEAD(&inode->i_dentry);
626 kmem_cache_free(qnx6_inode_cachep, QNX6_I(inode));
627}
628
629static void qnx6_destroy_inode(struct inode *inode)
630{
631 call_rcu(&inode->i_rcu, qnx6_i_callback);
632}
633
634static void init_once(void *foo)
635{
636 struct qnx6_inode_info *ei = (struct qnx6_inode_info *) foo;
637
638 inode_init_once(&ei->vfs_inode);
639}
640
641static int init_inodecache(void)
642{
643 qnx6_inode_cachep = kmem_cache_create("qnx6_inode_cache",
644 sizeof(struct qnx6_inode_info),
645 0, (SLAB_RECLAIM_ACCOUNT|
646 SLAB_MEM_SPREAD),
647 init_once);
648 if (!qnx6_inode_cachep)
649 return -ENOMEM;
650 return 0;
651}
652
653static void destroy_inodecache(void)
654{
655 kmem_cache_destroy(qnx6_inode_cachep);
656}
657
658static struct dentry *qnx6_mount(struct file_system_type *fs_type,
659 int flags, const char *dev_name, void *data)
660{
661 return mount_bdev(fs_type, flags, dev_name, data, qnx6_fill_super);
662}
663
664static struct file_system_type qnx6_fs_type = {
665 .owner = THIS_MODULE,
666 .name = "qnx6",
667 .mount = qnx6_mount,
668 .kill_sb = kill_block_super,
669 .fs_flags = FS_REQUIRES_DEV,
670};
671
672static int __init init_qnx6_fs(void)
673{
674 int err;
675
676 err = init_inodecache();
677 if (err)
678 return err;
679
680 err = register_filesystem(&qnx6_fs_type);
681 if (err) {
682 destroy_inodecache();
683 return err;
684 }
685
686 printk(KERN_INFO "QNX6 filesystem 1.0.0 registered.\n");
687 return 0;
688}
689
690static void __exit exit_qnx6_fs(void)
691{
692 unregister_filesystem(&qnx6_fs_type);
693 destroy_inodecache();
694}
695
696module_init(init_qnx6_fs)
697module_exit(exit_qnx6_fs)
698MODULE_LICENSE("GPL");
diff --git a/fs/qnx6/namei.c b/fs/qnx6/namei.c
new file mode 100644
index 00000000000..8a97289e04a
--- /dev/null
+++ b/fs/qnx6/namei.c
@@ -0,0 +1,42 @@
1/*
2 * QNX6 file system, Linux implementation.
3 *
4 * Version : 1.0.0
5 *
6 * History :
7 *
8 * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release.
9 * 16-02-2012 pagemap extension by Al Viro
10 *
11 */
12
13#include "qnx6.h"
14
15struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
16 struct nameidata *nd)
17{
18 unsigned ino;
19 struct page *page;
20 struct inode *foundinode = NULL;
21 const char *name = dentry->d_name.name;
22 int len = dentry->d_name.len;
23
24 if (len > QNX6_LONG_NAME_MAX)
25 return ERR_PTR(-ENAMETOOLONG);
26
27 ino = qnx6_find_entry(len, dir, name, &page);
28 if (ino) {
29 foundinode = qnx6_iget(dir->i_sb, ino);
30 qnx6_put_page(page);
31 if (IS_ERR(foundinode)) {
32 QNX6DEBUG((KERN_ERR "qnx6: lookup->iget -> "
33 " error %ld\n", PTR_ERR(foundinode)));
34 return ERR_CAST(foundinode);
35 }
36 } else {
37 QNX6DEBUG((KERN_INFO "qnx6_lookup: not found %s\n", name));
38 return NULL;
39 }
40 d_add(dentry, foundinode);
41 return NULL;
42}
diff --git a/fs/qnx6/qnx6.h b/fs/qnx6/qnx6.h
new file mode 100644
index 00000000000..6c5e02a0b6a
--- /dev/null
+++ b/fs/qnx6/qnx6.h
@@ -0,0 +1,135 @@
1/*
2 * QNX6 file system, Linux implementation.
3 *
4 * Version : 1.0.0
5 *
6 * History :
7 *
8 * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release.
9 * 16-02-2012 page map extension by Al Viro
10 *
11 */
12
13#include <linux/fs.h>
14#include <linux/pagemap.h>
15
16typedef __u16 __bitwise __fs16;
17typedef __u32 __bitwise __fs32;
18typedef __u64 __bitwise __fs64;
19
20#include <linux/qnx6_fs.h>
21
22#ifdef CONFIG_QNX6FS_DEBUG
23#define QNX6DEBUG(X) printk X
24#else
25#define QNX6DEBUG(X) (void) 0
26#endif
27
28struct qnx6_sb_info {
29 struct buffer_head *sb_buf; /* superblock buffer */
30 struct qnx6_super_block *sb; /* our superblock */
31 int s_blks_off; /* blkoffset fs-startpoint */
32 int s_ptrbits; /* indirect pointer bitfield */
33 unsigned long s_mount_opt; /* all mount options */
34 int s_bytesex; /* holds endianess info */
35 struct inode * inodes;
36 struct inode * longfile;
37};
38
39struct qnx6_inode_info {
40 __fs32 di_block_ptr[QNX6_NO_DIRECT_POINTERS];
41 __u8 di_filelevels;
42 __u32 i_dir_start_lookup;
43 struct inode vfs_inode;
44};
45
46extern struct inode *qnx6_iget(struct super_block *sb, unsigned ino);
47extern struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
48 struct nameidata *nd);
49
50#ifdef CONFIG_QNX6FS_DEBUG
51extern void qnx6_superblock_debug(struct qnx6_super_block *,
52 struct super_block *);
53#endif
54
55extern const struct inode_operations qnx6_dir_inode_operations;
56extern const struct file_operations qnx6_dir_operations;
57
58static inline struct qnx6_sb_info *QNX6_SB(struct super_block *sb)
59{
60 return sb->s_fs_info;
61}
62
63static inline struct qnx6_inode_info *QNX6_I(struct inode *inode)
64{
65 return container_of(inode, struct qnx6_inode_info, vfs_inode);
66}
67
68#define clear_opt(o, opt) (o &= ~(QNX6_MOUNT_##opt))
69#define set_opt(o, opt) (o |= (QNX6_MOUNT_##opt))
70#define test_opt(sb, opt) (QNX6_SB(sb)->s_mount_opt & \
71 QNX6_MOUNT_##opt)
72enum {
73 BYTESEX_LE,
74 BYTESEX_BE,
75};
76
77static inline __u64 fs64_to_cpu(struct qnx6_sb_info *sbi, __fs64 n)
78{
79 if (sbi->s_bytesex == BYTESEX_LE)
80 return le64_to_cpu((__force __le64)n);
81 else
82 return be64_to_cpu((__force __be64)n);
83}
84
85static inline __fs64 cpu_to_fs64(struct qnx6_sb_info *sbi, __u64 n)
86{
87 if (sbi->s_bytesex == BYTESEX_LE)
88 return (__force __fs64)cpu_to_le64(n);
89 else
90 return (__force __fs64)cpu_to_be64(n);
91}
92
93static inline __u32 fs32_to_cpu(struct qnx6_sb_info *sbi, __fs32 n)
94{
95 if (sbi->s_bytesex == BYTESEX_LE)
96 return le32_to_cpu((__force __le32)n);
97 else
98 return be32_to_cpu((__force __be32)n);
99}
100
101static inline __fs32 cpu_to_fs32(struct qnx6_sb_info *sbi, __u32 n)
102{
103 if (sbi->s_bytesex == BYTESEX_LE)
104 return (__force __fs32)cpu_to_le32(n);
105 else
106 return (__force __fs32)cpu_to_be32(n);
107}
108
109static inline __u16 fs16_to_cpu(struct qnx6_sb_info *sbi, __fs16 n)
110{
111 if (sbi->s_bytesex == BYTESEX_LE)
112 return le16_to_cpu((__force __le16)n);
113 else
114 return be16_to_cpu((__force __be16)n);
115}
116
117static inline __fs16 cpu_to_fs16(struct qnx6_sb_info *sbi, __u16 n)
118{
119 if (sbi->s_bytesex == BYTESEX_LE)
120 return (__force __fs16)cpu_to_le16(n);
121 else
122 return (__force __fs16)cpu_to_be16(n);
123}
124
125extern struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s,
126 int silent);
127
128static inline void qnx6_put_page(struct page *page)
129{
130 kunmap(page);
131 page_cache_release(page);
132}
133
134extern unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
135 struct page **res_page);
diff --git a/fs/qnx6/super_mmi.c b/fs/qnx6/super_mmi.c
new file mode 100644
index 00000000000..29c32cba62d
--- /dev/null
+++ b/fs/qnx6/super_mmi.c
@@ -0,0 +1,150 @@
1/*
2 * QNX6 file system, Linux implementation.
3 *
4 * Version : 1.0.0
5 *
6 * History :
7 *
8 * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release.
9 *
10 */
11
12#include <linux/buffer_head.h>
13#include <linux/slab.h>
14#include <linux/crc32.h>
15#include "qnx6.h"
16
17static void qnx6_mmi_copy_sb(struct qnx6_super_block *qsb,
18 struct qnx6_mmi_super_block *sb)
19{
20 qsb->sb_magic = sb->sb_magic;
21 qsb->sb_checksum = sb->sb_checksum;
22 qsb->sb_serial = sb->sb_serial;
23 qsb->sb_blocksize = sb->sb_blocksize;
24 qsb->sb_num_inodes = sb->sb_num_inodes;
25 qsb->sb_free_inodes = sb->sb_free_inodes;
26 qsb->sb_num_blocks = sb->sb_num_blocks;
27 qsb->sb_free_blocks = sb->sb_free_blocks;
28
29 /* the rest of the superblock is the same */
30 memcpy(&qsb->Inode, &sb->Inode, sizeof(sb->Inode));
31 memcpy(&qsb->Bitmap, &sb->Bitmap, sizeof(sb->Bitmap));
32 memcpy(&qsb->Longfile, &sb->Longfile, sizeof(sb->Longfile));
33}
34
35struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
36{
37 struct buffer_head *bh1, *bh2 = NULL;
38 struct qnx6_mmi_super_block *sb1, *sb2;
39 struct qnx6_super_block *qsb = NULL;
40 struct qnx6_sb_info *sbi;
41 __u64 offset;
42
43 /* Check the superblock signatures
44 start with the first superblock */
45 bh1 = sb_bread(s, 0);
46 if (!bh1) {
47 printk(KERN_ERR "qnx6: Unable to read first mmi superblock\n");
48 return NULL;
49 }
50 sb1 = (struct qnx6_mmi_super_block *)bh1->b_data;
51 sbi = QNX6_SB(s);
52 if (fs32_to_cpu(sbi, sb1->sb_magic) != QNX6_SUPER_MAGIC) {
53 if (!silent) {
54 printk(KERN_ERR "qnx6: wrong signature (magic) in"
55 " superblock #1.\n");
56 goto out;
57 }
58 }
59
60 /* checksum check - start at byte 8 and end at byte 512 */
61 if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
62 crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
63 printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
64 goto out;
65 }
66
67 /* calculate second superblock blocknumber */
68 offset = fs32_to_cpu(sbi, sb1->sb_num_blocks) + QNX6_SUPERBLOCK_AREA /
69 fs32_to_cpu(sbi, sb1->sb_blocksize);
70
71 /* set new blocksize */
72 if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
73 printk(KERN_ERR "qnx6: unable to set blocksize\n");
74 goto out;
75 }
76 /* blocksize invalidates bh - pull it back in */
77 brelse(bh1);
78 bh1 = sb_bread(s, 0);
79 if (!bh1)
80 goto out;
81 sb1 = (struct qnx6_mmi_super_block *)bh1->b_data;
82
83 /* read second superblock */
84 bh2 = sb_bread(s, offset);
85 if (!bh2) {
86 printk(KERN_ERR "qnx6: unable to read the second superblock\n");
87 goto out;
88 }
89 sb2 = (struct qnx6_mmi_super_block *)bh2->b_data;
90 if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
91 if (!silent)
92 printk(KERN_ERR "qnx6: wrong signature (magic) in"
93 " superblock #2.\n");
94 goto out;
95 }
96
97 /* checksum check - start at byte 8 and end at byte 512 */
98 if (fs32_to_cpu(sbi, sb2->sb_checksum)
99 != crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
100 printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
101 goto out;
102 }
103
104 qsb = kmalloc(sizeof(*qsb), GFP_KERNEL);
105 if (!qsb) {
106 printk(KERN_ERR "qnx6: unable to allocate memory.\n");
107 goto out;
108 }
109
110 if (fs64_to_cpu(sbi, sb1->sb_serial) >
111 fs64_to_cpu(sbi, sb2->sb_serial)) {
112 /* superblock #1 active */
113 qnx6_mmi_copy_sb(qsb, sb1);
114#ifdef CONFIG_QNX6FS_DEBUG
115 qnx6_superblock_debug(qsb, s);
116#endif
117 memcpy(bh1->b_data, qsb, sizeof(struct qnx6_super_block));
118
119 sbi->sb_buf = bh1;
120 sbi->sb = (struct qnx6_super_block *)bh1->b_data;
121 brelse(bh2);
122 printk(KERN_INFO "qnx6: superblock #1 active\n");
123 } else {
124 /* superblock #2 active */
125 qnx6_mmi_copy_sb(qsb, sb2);
126#ifdef CONFIG_QNX6FS_DEBUG
127 qnx6_superblock_debug(qsb, s);
128#endif
129 memcpy(bh2->b_data, qsb, sizeof(struct qnx6_super_block));
130
131 sbi->sb_buf = bh2;
132 sbi->sb = (struct qnx6_super_block *)bh2->b_data;
133 brelse(bh1);
134 printk(KERN_INFO "qnx6: superblock #2 active\n");
135 }
136 kfree(qsb);
137
138 /* offset for mmi_fs is just SUPERBLOCK_AREA bytes */
139 sbi->s_blks_off = QNX6_SUPERBLOCK_AREA / s->s_blocksize;
140
141 /* success */
142 return sbi->sb;
143
144out:
145 if (bh1 != NULL)
146 brelse(bh1);
147 if (bh2 != NULL)
148 brelse(bh2);
149 return NULL;
150}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 46741970371..d69a1d1d7e1 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -71,6 +71,7 @@
71#include <linux/module.h> 71#include <linux/module.h>
72#include <linux/proc_fs.h> 72#include <linux/proc_fs.h>
73#include <linux/security.h> 73#include <linux/security.h>
74#include <linux/sched.h>
74#include <linux/kmod.h> 75#include <linux/kmod.h>
75#include <linux/namei.h> 76#include <linux/namei.h>
76#include <linux/capability.h> 77#include <linux/capability.h>
@@ -1109,6 +1110,13 @@ static void dquot_decr_space(struct dquot *dquot, qsize_t number)
1109 clear_bit(DQ_BLKS_B, &dquot->dq_flags); 1110 clear_bit(DQ_BLKS_B, &dquot->dq_flags);
1110} 1111}
1111 1112
1113struct dquot_warn {
1114 struct super_block *w_sb;
1115 qid_t w_dq_id;
1116 short w_dq_type;
1117 short w_type;
1118};
1119
1112static int warning_issued(struct dquot *dquot, const int warntype) 1120static int warning_issued(struct dquot *dquot, const int warntype)
1113{ 1121{
1114 int flag = (warntype == QUOTA_NL_BHARDWARN || 1122 int flag = (warntype == QUOTA_NL_BHARDWARN ||
@@ -1124,41 +1132,42 @@ static int warning_issued(struct dquot *dquot, const int warntype)
1124#ifdef CONFIG_PRINT_QUOTA_WARNING 1132#ifdef CONFIG_PRINT_QUOTA_WARNING
1125static int flag_print_warnings = 1; 1133static int flag_print_warnings = 1;
1126 1134
1127static int need_print_warning(struct dquot *dquot) 1135static int need_print_warning(struct dquot_warn *warn)
1128{ 1136{
1129 if (!flag_print_warnings) 1137 if (!flag_print_warnings)
1130 return 0; 1138 return 0;
1131 1139
1132 switch (dquot->dq_type) { 1140 switch (warn->w_dq_type) {
1133 case USRQUOTA: 1141 case USRQUOTA:
1134 return current_fsuid() == dquot->dq_id; 1142 return current_fsuid() == warn->w_dq_id;
1135 case GRPQUOTA: 1143 case GRPQUOTA:
1136 return in_group_p(dquot->dq_id); 1144 return in_group_p(warn->w_dq_id);
1137 } 1145 }
1138 return 0; 1146 return 0;
1139} 1147}
1140 1148
1141/* Print warning to user which exceeded quota */ 1149/* Print warning to user which exceeded quota */
1142static void print_warning(struct dquot *dquot, const int warntype) 1150static void print_warning(struct dquot_warn *warn)
1143{ 1151{
1144 char *msg = NULL; 1152 char *msg = NULL;
1145 struct tty_struct *tty; 1153 struct tty_struct *tty;
1154 int warntype = warn->w_type;
1146 1155
1147 if (warntype == QUOTA_NL_IHARDBELOW || 1156 if (warntype == QUOTA_NL_IHARDBELOW ||
1148 warntype == QUOTA_NL_ISOFTBELOW || 1157 warntype == QUOTA_NL_ISOFTBELOW ||
1149 warntype == QUOTA_NL_BHARDBELOW || 1158 warntype == QUOTA_NL_BHARDBELOW ||
1150 warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot)) 1159 warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(warn))
1151 return; 1160 return;
1152 1161
1153 tty = get_current_tty(); 1162 tty = get_current_tty();
1154 if (!tty) 1163 if (!tty)
1155 return; 1164 return;
1156 tty_write_message(tty, dquot->dq_sb->s_id); 1165 tty_write_message(tty, warn->w_sb->s_id);
1157 if (warntype == QUOTA_NL_ISOFTWARN || warntype == QUOTA_NL_BSOFTWARN) 1166 if (warntype == QUOTA_NL_ISOFTWARN || warntype == QUOTA_NL_BSOFTWARN)
1158 tty_write_message(tty, ": warning, "); 1167 tty_write_message(tty, ": warning, ");
1159 else 1168 else
1160 tty_write_message(tty, ": write failed, "); 1169 tty_write_message(tty, ": write failed, ");
1161 tty_write_message(tty, quotatypes[dquot->dq_type]); 1170 tty_write_message(tty, quotatypes[warn->w_dq_type]);
1162 switch (warntype) { 1171 switch (warntype) {
1163 case QUOTA_NL_IHARDWARN: 1172 case QUOTA_NL_IHARDWARN:
1164 msg = " file limit reached.\r\n"; 1173 msg = " file limit reached.\r\n";
@@ -1184,26 +1193,34 @@ static void print_warning(struct dquot *dquot, const int warntype)
1184} 1193}
1185#endif 1194#endif
1186 1195
1196static void prepare_warning(struct dquot_warn *warn, struct dquot *dquot,
1197 int warntype)
1198{
1199 if (warning_issued(dquot, warntype))
1200 return;
1201 warn->w_type = warntype;
1202 warn->w_sb = dquot->dq_sb;
1203 warn->w_dq_id = dquot->dq_id;
1204 warn->w_dq_type = dquot->dq_type;
1205}
1206
1187/* 1207/*
1188 * Write warnings to the console and send warning messages over netlink. 1208 * Write warnings to the console and send warning messages over netlink.
1189 * 1209 *
1190 * Note that this function can sleep. 1210 * Note that this function can call into tty and networking code.
1191 */ 1211 */
1192static void flush_warnings(struct dquot *const *dquots, char *warntype) 1212static void flush_warnings(struct dquot_warn *warn)
1193{ 1213{
1194 struct dquot *dq;
1195 int i; 1214 int i;
1196 1215
1197 for (i = 0; i < MAXQUOTAS; i++) { 1216 for (i = 0; i < MAXQUOTAS; i++) {
1198 dq = dquots[i]; 1217 if (warn[i].w_type == QUOTA_NL_NOWARN)
1199 if (dq && warntype[i] != QUOTA_NL_NOWARN && 1218 continue;
1200 !warning_issued(dq, warntype[i])) {
1201#ifdef CONFIG_PRINT_QUOTA_WARNING 1219#ifdef CONFIG_PRINT_QUOTA_WARNING
1202 print_warning(dq, warntype[i]); 1220 print_warning(&warn[i]);
1203#endif 1221#endif
1204 quota_send_warning(dq->dq_type, dq->dq_id, 1222 quota_send_warning(warn[i].w_dq_type, warn[i].w_dq_id,
1205 dq->dq_sb->s_dev, warntype[i]); 1223 warn[i].w_sb->s_dev, warn[i].w_type);
1206 }
1207 } 1224 }
1208} 1225}
1209 1226
@@ -1217,11 +1234,11 @@ static int ignore_hardlimit(struct dquot *dquot)
1217} 1234}
1218 1235
1219/* needs dq_data_lock */ 1236/* needs dq_data_lock */
1220static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype) 1237static int check_idq(struct dquot *dquot, qsize_t inodes,
1238 struct dquot_warn *warn)
1221{ 1239{
1222 qsize_t newinodes = dquot->dq_dqb.dqb_curinodes + inodes; 1240 qsize_t newinodes = dquot->dq_dqb.dqb_curinodes + inodes;
1223 1241
1224 *warntype = QUOTA_NL_NOWARN;
1225 if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) || 1242 if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) ||
1226 test_bit(DQ_FAKE_B, &dquot->dq_flags)) 1243 test_bit(DQ_FAKE_B, &dquot->dq_flags))
1227 return 0; 1244 return 0;
@@ -1229,7 +1246,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
1229 if (dquot->dq_dqb.dqb_ihardlimit && 1246 if (dquot->dq_dqb.dqb_ihardlimit &&
1230 newinodes > dquot->dq_dqb.dqb_ihardlimit && 1247 newinodes > dquot->dq_dqb.dqb_ihardlimit &&
1231 !ignore_hardlimit(dquot)) { 1248 !ignore_hardlimit(dquot)) {
1232 *warntype = QUOTA_NL_IHARDWARN; 1249 prepare_warning(warn, dquot, QUOTA_NL_IHARDWARN);
1233 return -EDQUOT; 1250 return -EDQUOT;
1234 } 1251 }
1235 1252
@@ -1238,14 +1255,14 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
1238 dquot->dq_dqb.dqb_itime && 1255 dquot->dq_dqb.dqb_itime &&
1239 get_seconds() >= dquot->dq_dqb.dqb_itime && 1256 get_seconds() >= dquot->dq_dqb.dqb_itime &&
1240 !ignore_hardlimit(dquot)) { 1257 !ignore_hardlimit(dquot)) {
1241 *warntype = QUOTA_NL_ISOFTLONGWARN; 1258 prepare_warning(warn, dquot, QUOTA_NL_ISOFTLONGWARN);
1242 return -EDQUOT; 1259 return -EDQUOT;
1243 } 1260 }
1244 1261
1245 if (dquot->dq_dqb.dqb_isoftlimit && 1262 if (dquot->dq_dqb.dqb_isoftlimit &&
1246 newinodes > dquot->dq_dqb.dqb_isoftlimit && 1263 newinodes > dquot->dq_dqb.dqb_isoftlimit &&
1247 dquot->dq_dqb.dqb_itime == 0) { 1264 dquot->dq_dqb.dqb_itime == 0) {
1248 *warntype = QUOTA_NL_ISOFTWARN; 1265 prepare_warning(warn, dquot, QUOTA_NL_ISOFTWARN);
1249 dquot->dq_dqb.dqb_itime = get_seconds() + 1266 dquot->dq_dqb.dqb_itime = get_seconds() +
1250 sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace; 1267 sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace;
1251 } 1268 }
@@ -1254,12 +1271,12 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
1254} 1271}
1255 1272
1256/* needs dq_data_lock */ 1273/* needs dq_data_lock */
1257static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *warntype) 1274static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc,
1275 struct dquot_warn *warn)
1258{ 1276{
1259 qsize_t tspace; 1277 qsize_t tspace;
1260 struct super_block *sb = dquot->dq_sb; 1278 struct super_block *sb = dquot->dq_sb;
1261 1279
1262 *warntype = QUOTA_NL_NOWARN;
1263 if (!sb_has_quota_limits_enabled(sb, dquot->dq_type) || 1280 if (!sb_has_quota_limits_enabled(sb, dquot->dq_type) ||
1264 test_bit(DQ_FAKE_B, &dquot->dq_flags)) 1281 test_bit(DQ_FAKE_B, &dquot->dq_flags))
1265 return 0; 1282 return 0;
@@ -1271,7 +1288,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
1271 tspace > dquot->dq_dqb.dqb_bhardlimit && 1288 tspace > dquot->dq_dqb.dqb_bhardlimit &&
1272 !ignore_hardlimit(dquot)) { 1289 !ignore_hardlimit(dquot)) {
1273 if (!prealloc) 1290 if (!prealloc)
1274 *warntype = QUOTA_NL_BHARDWARN; 1291 prepare_warning(warn, dquot, QUOTA_NL_BHARDWARN);
1275 return -EDQUOT; 1292 return -EDQUOT;
1276 } 1293 }
1277 1294
@@ -1281,7 +1298,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
1281 get_seconds() >= dquot->dq_dqb.dqb_btime && 1298 get_seconds() >= dquot->dq_dqb.dqb_btime &&
1282 !ignore_hardlimit(dquot)) { 1299 !ignore_hardlimit(dquot)) {
1283 if (!prealloc) 1300 if (!prealloc)
1284 *warntype = QUOTA_NL_BSOFTLONGWARN; 1301 prepare_warning(warn, dquot, QUOTA_NL_BSOFTLONGWARN);
1285 return -EDQUOT; 1302 return -EDQUOT;
1286 } 1303 }
1287 1304
@@ -1289,7 +1306,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
1289 tspace > dquot->dq_dqb.dqb_bsoftlimit && 1306 tspace > dquot->dq_dqb.dqb_bsoftlimit &&
1290 dquot->dq_dqb.dqb_btime == 0) { 1307 dquot->dq_dqb.dqb_btime == 0) {
1291 if (!prealloc) { 1308 if (!prealloc) {
1292 *warntype = QUOTA_NL_BSOFTWARN; 1309 prepare_warning(warn, dquot, QUOTA_NL_BSOFTWARN);
1293 dquot->dq_dqb.dqb_btime = get_seconds() + 1310 dquot->dq_dqb.dqb_btime = get_seconds() +
1294 sb_dqopt(sb)->info[dquot->dq_type].dqi_bgrace; 1311 sb_dqopt(sb)->info[dquot->dq_type].dqi_bgrace;
1295 } 1312 }
@@ -1542,10 +1559,9 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
1542int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) 1559int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
1543{ 1560{
1544 int cnt, ret = 0; 1561 int cnt, ret = 0;
1545 char warntype[MAXQUOTAS]; 1562 struct dquot_warn warn[MAXQUOTAS];
1546 int warn = flags & DQUOT_SPACE_WARN; 1563 struct dquot **dquots = inode->i_dquot;
1547 int reserve = flags & DQUOT_SPACE_RESERVE; 1564 int reserve = flags & DQUOT_SPACE_RESERVE;
1548 int nofail = flags & DQUOT_SPACE_NOFAIL;
1549 1565
1550 /* 1566 /*
1551 * First test before acquiring mutex - solves deadlocks when we 1567 * First test before acquiring mutex - solves deadlocks when we
@@ -1558,36 +1574,36 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
1558 1574
1559 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1575 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1560 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1576 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1561 warntype[cnt] = QUOTA_NL_NOWARN; 1577 warn[cnt].w_type = QUOTA_NL_NOWARN;
1562 1578
1563 spin_lock(&dq_data_lock); 1579 spin_lock(&dq_data_lock);
1564 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1580 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1565 if (!inode->i_dquot[cnt]) 1581 if (!dquots[cnt])
1566 continue; 1582 continue;
1567 ret = check_bdq(inode->i_dquot[cnt], number, !warn, 1583 ret = check_bdq(dquots[cnt], number,
1568 warntype+cnt); 1584 !(flags & DQUOT_SPACE_WARN), &warn[cnt]);
1569 if (ret && !nofail) { 1585 if (ret && !(flags & DQUOT_SPACE_NOFAIL)) {
1570 spin_unlock(&dq_data_lock); 1586 spin_unlock(&dq_data_lock);
1571 goto out_flush_warn; 1587 goto out_flush_warn;
1572 } 1588 }
1573 } 1589 }
1574 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1590 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1575 if (!inode->i_dquot[cnt]) 1591 if (!dquots[cnt])
1576 continue; 1592 continue;
1577 if (reserve) 1593 if (reserve)
1578 dquot_resv_space(inode->i_dquot[cnt], number); 1594 dquot_resv_space(dquots[cnt], number);
1579 else 1595 else
1580 dquot_incr_space(inode->i_dquot[cnt], number); 1596 dquot_incr_space(dquots[cnt], number);
1581 } 1597 }
1582 inode_incr_space(inode, number, reserve); 1598 inode_incr_space(inode, number, reserve);
1583 spin_unlock(&dq_data_lock); 1599 spin_unlock(&dq_data_lock);
1584 1600
1585 if (reserve) 1601 if (reserve)
1586 goto out_flush_warn; 1602 goto out_flush_warn;
1587 mark_all_dquot_dirty(inode->i_dquot); 1603 mark_all_dquot_dirty(dquots);
1588out_flush_warn: 1604out_flush_warn:
1589 flush_warnings(inode->i_dquot, warntype);
1590 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1605 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1606 flush_warnings(warn);
1591out: 1607out:
1592 return ret; 1608 return ret;
1593} 1609}
@@ -1599,36 +1615,37 @@ EXPORT_SYMBOL(__dquot_alloc_space);
1599int dquot_alloc_inode(const struct inode *inode) 1615int dquot_alloc_inode(const struct inode *inode)
1600{ 1616{
1601 int cnt, ret = 0; 1617 int cnt, ret = 0;
1602 char warntype[MAXQUOTAS]; 1618 struct dquot_warn warn[MAXQUOTAS];
1619 struct dquot * const *dquots = inode->i_dquot;
1603 1620
1604 /* First test before acquiring mutex - solves deadlocks when we 1621 /* First test before acquiring mutex - solves deadlocks when we
1605 * re-enter the quota code and are already holding the mutex */ 1622 * re-enter the quota code and are already holding the mutex */
1606 if (!dquot_active(inode)) 1623 if (!dquot_active(inode))
1607 return 0; 1624 return 0;
1608 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1625 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1609 warntype[cnt] = QUOTA_NL_NOWARN; 1626 warn[cnt].w_type = QUOTA_NL_NOWARN;
1610 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1627 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1611 spin_lock(&dq_data_lock); 1628 spin_lock(&dq_data_lock);
1612 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1629 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1613 if (!inode->i_dquot[cnt]) 1630 if (!dquots[cnt])
1614 continue; 1631 continue;
1615 ret = check_idq(inode->i_dquot[cnt], 1, warntype + cnt); 1632 ret = check_idq(dquots[cnt], 1, &warn[cnt]);
1616 if (ret) 1633 if (ret)
1617 goto warn_put_all; 1634 goto warn_put_all;
1618 } 1635 }
1619 1636
1620 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1637 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1621 if (!inode->i_dquot[cnt]) 1638 if (!dquots[cnt])
1622 continue; 1639 continue;
1623 dquot_incr_inodes(inode->i_dquot[cnt], 1); 1640 dquot_incr_inodes(dquots[cnt], 1);
1624 } 1641 }
1625 1642
1626warn_put_all: 1643warn_put_all:
1627 spin_unlock(&dq_data_lock); 1644 spin_unlock(&dq_data_lock);
1628 if (ret == 0) 1645 if (ret == 0)
1629 mark_all_dquot_dirty(inode->i_dquot); 1646 mark_all_dquot_dirty(dquots);
1630 flush_warnings(inode->i_dquot, warntype);
1631 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1647 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1648 flush_warnings(warn);
1632 return ret; 1649 return ret;
1633} 1650}
1634EXPORT_SYMBOL(dquot_alloc_inode); 1651EXPORT_SYMBOL(dquot_alloc_inode);
@@ -1668,7 +1685,8 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty);
1668void __dquot_free_space(struct inode *inode, qsize_t number, int flags) 1685void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
1669{ 1686{
1670 unsigned int cnt; 1687 unsigned int cnt;
1671 char warntype[MAXQUOTAS]; 1688 struct dquot_warn warn[MAXQUOTAS];
1689 struct dquot **dquots = inode->i_dquot;
1672 int reserve = flags & DQUOT_SPACE_RESERVE; 1690 int reserve = flags & DQUOT_SPACE_RESERVE;
1673 1691
1674 /* First test before acquiring mutex - solves deadlocks when we 1692 /* First test before acquiring mutex - solves deadlocks when we
@@ -1681,23 +1699,28 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
1681 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1699 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1682 spin_lock(&dq_data_lock); 1700 spin_lock(&dq_data_lock);
1683 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1701 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1684 if (!inode->i_dquot[cnt]) 1702 int wtype;
1703
1704 warn[cnt].w_type = QUOTA_NL_NOWARN;
1705 if (!dquots[cnt])
1685 continue; 1706 continue;
1686 warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number); 1707 wtype = info_bdq_free(dquots[cnt], number);
1708 if (wtype != QUOTA_NL_NOWARN)
1709 prepare_warning(&warn[cnt], dquots[cnt], wtype);
1687 if (reserve) 1710 if (reserve)
1688 dquot_free_reserved_space(inode->i_dquot[cnt], number); 1711 dquot_free_reserved_space(dquots[cnt], number);
1689 else 1712 else
1690 dquot_decr_space(inode->i_dquot[cnt], number); 1713 dquot_decr_space(dquots[cnt], number);
1691 } 1714 }
1692 inode_decr_space(inode, number, reserve); 1715 inode_decr_space(inode, number, reserve);
1693 spin_unlock(&dq_data_lock); 1716 spin_unlock(&dq_data_lock);
1694 1717
1695 if (reserve) 1718 if (reserve)
1696 goto out_unlock; 1719 goto out_unlock;
1697 mark_all_dquot_dirty(inode->i_dquot); 1720 mark_all_dquot_dirty(dquots);
1698out_unlock: 1721out_unlock:
1699 flush_warnings(inode->i_dquot, warntype);
1700 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1722 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1723 flush_warnings(warn);
1701} 1724}
1702EXPORT_SYMBOL(__dquot_free_space); 1725EXPORT_SYMBOL(__dquot_free_space);
1703 1726
@@ -1707,7 +1730,8 @@ EXPORT_SYMBOL(__dquot_free_space);
1707void dquot_free_inode(const struct inode *inode) 1730void dquot_free_inode(const struct inode *inode)
1708{ 1731{
1709 unsigned int cnt; 1732 unsigned int cnt;
1710 char warntype[MAXQUOTAS]; 1733 struct dquot_warn warn[MAXQUOTAS];
1734 struct dquot * const *dquots = inode->i_dquot;
1711 1735
1712 /* First test before acquiring mutex - solves deadlocks when we 1736 /* First test before acquiring mutex - solves deadlocks when we
1713 * re-enter the quota code and are already holding the mutex */ 1737 * re-enter the quota code and are already holding the mutex */
@@ -1717,15 +1741,20 @@ void dquot_free_inode(const struct inode *inode)
1717 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1741 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1718 spin_lock(&dq_data_lock); 1742 spin_lock(&dq_data_lock);
1719 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1743 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1720 if (!inode->i_dquot[cnt]) 1744 int wtype;
1745
1746 warn[cnt].w_type = QUOTA_NL_NOWARN;
1747 if (!dquots[cnt])
1721 continue; 1748 continue;
1722 warntype[cnt] = info_idq_free(inode->i_dquot[cnt], 1); 1749 wtype = info_idq_free(dquots[cnt], 1);
1723 dquot_decr_inodes(inode->i_dquot[cnt], 1); 1750 if (wtype != QUOTA_NL_NOWARN)
1751 prepare_warning(&warn[cnt], dquots[cnt], wtype);
1752 dquot_decr_inodes(dquots[cnt], 1);
1724 } 1753 }
1725 spin_unlock(&dq_data_lock); 1754 spin_unlock(&dq_data_lock);
1726 mark_all_dquot_dirty(inode->i_dquot); 1755 mark_all_dquot_dirty(dquots);
1727 flush_warnings(inode->i_dquot, warntype);
1728 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1756 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1757 flush_warnings(warn);
1729} 1758}
1730EXPORT_SYMBOL(dquot_free_inode); 1759EXPORT_SYMBOL(dquot_free_inode);
1731 1760
@@ -1746,16 +1775,20 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1746 struct dquot *transfer_from[MAXQUOTAS] = {}; 1775 struct dquot *transfer_from[MAXQUOTAS] = {};
1747 int cnt, ret = 0; 1776 int cnt, ret = 0;
1748 char is_valid[MAXQUOTAS] = {}; 1777 char is_valid[MAXQUOTAS] = {};
1749 char warntype_to[MAXQUOTAS]; 1778 struct dquot_warn warn_to[MAXQUOTAS];
1750 char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS]; 1779 struct dquot_warn warn_from_inodes[MAXQUOTAS];
1780 struct dquot_warn warn_from_space[MAXQUOTAS];
1751 1781
1752 /* First test before acquiring mutex - solves deadlocks when we 1782 /* First test before acquiring mutex - solves deadlocks when we
1753 * re-enter the quota code and are already holding the mutex */ 1783 * re-enter the quota code and are already holding the mutex */
1754 if (IS_NOQUOTA(inode)) 1784 if (IS_NOQUOTA(inode))
1755 return 0; 1785 return 0;
1756 /* Initialize the arrays */ 1786 /* Initialize the arrays */
1757 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1787 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1758 warntype_to[cnt] = QUOTA_NL_NOWARN; 1788 warn_to[cnt].w_type = QUOTA_NL_NOWARN;
1789 warn_from_inodes[cnt].w_type = QUOTA_NL_NOWARN;
1790 warn_from_space[cnt].w_type = QUOTA_NL_NOWARN;
1791 }
1759 down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1792 down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1760 if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ 1793 if (IS_NOQUOTA(inode)) { /* File without quota accounting? */
1761 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1794 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1777,10 +1810,10 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1777 continue; 1810 continue;
1778 is_valid[cnt] = 1; 1811 is_valid[cnt] = 1;
1779 transfer_from[cnt] = inode->i_dquot[cnt]; 1812 transfer_from[cnt] = inode->i_dquot[cnt];
1780 ret = check_idq(transfer_to[cnt], 1, warntype_to + cnt); 1813 ret = check_idq(transfer_to[cnt], 1, &warn_to[cnt]);
1781 if (ret) 1814 if (ret)
1782 goto over_quota; 1815 goto over_quota;
1783 ret = check_bdq(transfer_to[cnt], space, 0, warntype_to + cnt); 1816 ret = check_bdq(transfer_to[cnt], space, 0, &warn_to[cnt]);
1784 if (ret) 1817 if (ret)
1785 goto over_quota; 1818 goto over_quota;
1786 } 1819 }
@@ -1793,10 +1826,15 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1793 continue; 1826 continue;
1794 /* Due to IO error we might not have transfer_from[] structure */ 1827 /* Due to IO error we might not have transfer_from[] structure */
1795 if (transfer_from[cnt]) { 1828 if (transfer_from[cnt]) {
1796 warntype_from_inodes[cnt] = 1829 int wtype;
1797 info_idq_free(transfer_from[cnt], 1); 1830 wtype = info_idq_free(transfer_from[cnt], 1);
1798 warntype_from_space[cnt] = 1831 if (wtype != QUOTA_NL_NOWARN)
1799 info_bdq_free(transfer_from[cnt], space); 1832 prepare_warning(&warn_from_inodes[cnt],
1833 transfer_from[cnt], wtype);
1834 wtype = info_bdq_free(transfer_from[cnt], space);
1835 if (wtype != QUOTA_NL_NOWARN)
1836 prepare_warning(&warn_from_space[cnt],
1837 transfer_from[cnt], wtype);
1800 dquot_decr_inodes(transfer_from[cnt], 1); 1838 dquot_decr_inodes(transfer_from[cnt], 1);
1801 dquot_decr_space(transfer_from[cnt], cur_space); 1839 dquot_decr_space(transfer_from[cnt], cur_space);
1802 dquot_free_reserved_space(transfer_from[cnt], 1840 dquot_free_reserved_space(transfer_from[cnt],
@@ -1814,9 +1852,9 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1814 1852
1815 mark_all_dquot_dirty(transfer_from); 1853 mark_all_dquot_dirty(transfer_from);
1816 mark_all_dquot_dirty(transfer_to); 1854 mark_all_dquot_dirty(transfer_to);
1817 flush_warnings(transfer_to, warntype_to); 1855 flush_warnings(warn_to);
1818 flush_warnings(transfer_from, warntype_from_inodes); 1856 flush_warnings(warn_from_inodes);
1819 flush_warnings(transfer_from, warntype_from_space); 1857 flush_warnings(warn_from_space);
1820 /* Pass back references to put */ 1858 /* Pass back references to put */
1821 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1859 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1822 if (is_valid[cnt]) 1860 if (is_valid[cnt])
@@ -1825,7 +1863,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1825over_quota: 1863over_quota:
1826 spin_unlock(&dq_data_lock); 1864 spin_unlock(&dq_data_lock);
1827 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1865 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1828 flush_warnings(transfer_to, warntype_to); 1866 flush_warnings(warn_to);
1829 return ret; 1867 return ret;
1830} 1868}
1831EXPORT_SYMBOL(__dquot_transfer); 1869EXPORT_SYMBOL(__dquot_transfer);
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index fc2c4388d12..9a391204ca2 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -282,10 +282,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
282 case Q_XGETQUOTA: 282 case Q_XGETQUOTA:
283 return quota_getxquota(sb, type, id, addr); 283 return quota_getxquota(sb, type, id, addr);
284 case Q_XQUOTASYNC: 284 case Q_XQUOTASYNC:
285 /* caller already holds s_umount */
286 if (sb->s_flags & MS_RDONLY) 285 if (sb->s_flags & MS_RDONLY)
287 return -EROFS; 286 return -EROFS;
288 writeback_inodes_sb(sb, WB_REASON_SYNC); 287 /* XFS quotas are fully coherent now, making this call a noop */
289 return 0; 288 return 0;
290 default: 289 default:
291 return -EINVAL; 290 return -EINVAL;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index aec766abe3a..a1fdabe21de 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -209,22 +209,19 @@ static int ramfs_parse_options(char *data, struct ramfs_mount_opts *opts)
209int ramfs_fill_super(struct super_block *sb, void *data, int silent) 209int ramfs_fill_super(struct super_block *sb, void *data, int silent)
210{ 210{
211 struct ramfs_fs_info *fsi; 211 struct ramfs_fs_info *fsi;
212 struct inode *inode = NULL; 212 struct inode *inode;
213 struct dentry *root;
214 int err; 213 int err;
215 214
216 save_mount_options(sb, data); 215 save_mount_options(sb, data);
217 216
218 fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL); 217 fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL);
219 sb->s_fs_info = fsi; 218 sb->s_fs_info = fsi;
220 if (!fsi) { 219 if (!fsi)
221 err = -ENOMEM; 220 return -ENOMEM;
222 goto fail;
223 }
224 221
225 err = ramfs_parse_options(data, &fsi->mount_opts); 222 err = ramfs_parse_options(data, &fsi->mount_opts);
226 if (err) 223 if (err)
227 goto fail; 224 return err;
228 225
229 sb->s_maxbytes = MAX_LFS_FILESIZE; 226 sb->s_maxbytes = MAX_LFS_FILESIZE;
230 sb->s_blocksize = PAGE_CACHE_SIZE; 227 sb->s_blocksize = PAGE_CACHE_SIZE;
@@ -234,24 +231,11 @@ int ramfs_fill_super(struct super_block *sb, void *data, int silent)
234 sb->s_time_gran = 1; 231 sb->s_time_gran = 1;
235 232
236 inode = ramfs_get_inode(sb, NULL, S_IFDIR | fsi->mount_opts.mode, 0); 233 inode = ramfs_get_inode(sb, NULL, S_IFDIR | fsi->mount_opts.mode, 0);
237 if (!inode) { 234 sb->s_root = d_make_root(inode);
238 err = -ENOMEM; 235 if (!sb->s_root)
239 goto fail; 236 return -ENOMEM;
240 }
241
242 root = d_alloc_root(inode);
243 sb->s_root = root;
244 if (!root) {
245 err = -ENOMEM;
246 goto fail;
247 }
248 237
249 return 0; 238 return 0;
250fail:
251 kfree(fsi);
252 sb->s_fs_info = NULL;
253 iput(inode);
254 return err;
255} 239}
256 240
257struct dentry *ramfs_mount(struct file_system_type *fs_type, 241struct dentry *ramfs_mount(struct file_system_type *fs_type,
diff --git a/fs/read_write.c b/fs/read_write.c
index 5ad4248b0cd..ffc99d22e0a 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -11,7 +11,7 @@
11#include <linux/uio.h> 11#include <linux/uio.h>
12#include <linux/fsnotify.h> 12#include <linux/fsnotify.h>
13#include <linux/security.h> 13#include <linux/security.h>
14#include <linux/module.h> 14#include <linux/export.h>
15#include <linux/syscalls.h> 15#include <linux/syscalls.h>
16#include <linux/pagemap.h> 16#include <linux/pagemap.h>
17#include <linux/splice.h> 17#include <linux/splice.h>
diff --git a/fs/readdir.c b/fs/readdir.c
index 356f71528ad..cc0a8227cdd 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -6,7 +6,7 @@
6 6
7#include <linux/stddef.h> 7#include <linux/stddef.h>
8#include <linux/kernel.h> 8#include <linux/kernel.h>
9#include <linux/module.h> 9#include <linux/export.h>
10#include <linux/time.h> 10#include <linux/time.h>
11#include <linux/mm.h> 11#include <linux/mm.h>
12#include <linux/errno.h> 12#include <linux/errno.h>
diff --git a/fs/reiserfs/acl.h b/fs/reiserfs/acl.h
new file mode 100644
index 00000000000..f096b80e73d
--- /dev/null
+++ b/fs/reiserfs/acl.h
@@ -0,0 +1,76 @@
1#include <linux/init.h>
2#include <linux/posix_acl.h>
3
4#define REISERFS_ACL_VERSION 0x0001
5
6typedef struct {
7 __le16 e_tag;
8 __le16 e_perm;
9 __le32 e_id;
10} reiserfs_acl_entry;
11
12typedef struct {
13 __le16 e_tag;
14 __le16 e_perm;
15} reiserfs_acl_entry_short;
16
17typedef struct {
18 __le32 a_version;
19} reiserfs_acl_header;
20
21static inline size_t reiserfs_acl_size(int count)
22{
23 if (count <= 4) {
24 return sizeof(reiserfs_acl_header) +
25 count * sizeof(reiserfs_acl_entry_short);
26 } else {
27 return sizeof(reiserfs_acl_header) +
28 4 * sizeof(reiserfs_acl_entry_short) +
29 (count - 4) * sizeof(reiserfs_acl_entry);
30 }
31}
32
33static inline int reiserfs_acl_count(size_t size)
34{
35 ssize_t s;
36 size -= sizeof(reiserfs_acl_header);
37 s = size - 4 * sizeof(reiserfs_acl_entry_short);
38 if (s < 0) {
39 if (size % sizeof(reiserfs_acl_entry_short))
40 return -1;
41 return size / sizeof(reiserfs_acl_entry_short);
42 } else {
43 if (s % sizeof(reiserfs_acl_entry))
44 return -1;
45 return s / sizeof(reiserfs_acl_entry) + 4;
46 }
47}
48
49#ifdef CONFIG_REISERFS_FS_POSIX_ACL
50struct posix_acl *reiserfs_get_acl(struct inode *inode, int type);
51int reiserfs_acl_chmod(struct inode *inode);
52int reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
53 struct inode *dir, struct dentry *dentry,
54 struct inode *inode);
55int reiserfs_cache_default_acl(struct inode *dir);
56extern const struct xattr_handler reiserfs_posix_acl_default_handler;
57extern const struct xattr_handler reiserfs_posix_acl_access_handler;
58
59#else
60
61#define reiserfs_cache_default_acl(inode) 0
62#define reiserfs_get_acl NULL
63
64static inline int reiserfs_acl_chmod(struct inode *inode)
65{
66 return 0;
67}
68
69static inline int
70reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
71 const struct inode *dir, struct dentry *dentry,
72 struct inode *inode)
73{
74 return 0;
75}
76#endif
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 70de42f09f1..4c0c7d163d1 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -4,14 +4,12 @@
4/* Reiserfs block (de)allocator, bitmap-based. */ 4/* Reiserfs block (de)allocator, bitmap-based. */
5 5
6#include <linux/time.h> 6#include <linux/time.h>
7#include <linux/reiserfs_fs.h> 7#include "reiserfs.h"
8#include <linux/errno.h> 8#include <linux/errno.h>
9#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
10#include <linux/kernel.h> 10#include <linux/kernel.h>
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/vmalloc.h> 12#include <linux/vmalloc.h>
13#include <linux/reiserfs_fs_sb.h>
14#include <linux/reiserfs_fs_i.h>
15#include <linux/quotaops.h> 13#include <linux/quotaops.h>
16#include <linux/seq_file.h> 14#include <linux/seq_file.h>
17 15
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 133e9355dc6..66c53b642a8 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -5,7 +5,7 @@
5#include <linux/string.h> 5#include <linux/string.h>
6#include <linux/errno.h> 6#include <linux/errno.h>
7#include <linux/fs.h> 7#include <linux/fs.h>
8#include <linux/reiserfs_fs.h> 8#include "reiserfs.h"
9#include <linux/stat.h> 9#include <linux/stat.h>
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 60c08044066..2b7882b508d 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -17,7 +17,7 @@
17 17
18#include <asm/uaccess.h> 18#include <asm/uaccess.h>
19#include <linux/time.h> 19#include <linux/time.h>
20#include <linux/reiserfs_fs.h> 20#include "reiserfs.h"
21#include <linux/buffer_head.h> 21#include <linux/buffer_head.h>
22#include <linux/kernel.h> 22#include <linux/kernel.h>
23 23
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index ace635053a3..8375c922c0d 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -3,9 +3,9 @@
3 */ 3 */
4 4
5#include <linux/time.h> 5#include <linux/time.h>
6#include <linux/reiserfs_fs.h> 6#include "reiserfs.h"
7#include <linux/reiserfs_acl.h> 7#include "acl.h"
8#include <linux/reiserfs_xattr.h> 8#include "xattr.h"
9#include <asm/uaccess.h> 9#include <asm/uaccess.h>
10#include <linux/pagemap.h> 10#include <linux/pagemap.h>
11#include <linux/swap.h> 11#include <linux/swap.h>
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 1e4250bc3a6..430e0658704 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -37,7 +37,7 @@
37#include <linux/time.h> 37#include <linux/time.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/string.h> 39#include <linux/string.h>
40#include <linux/reiserfs_fs.h> 40#include "reiserfs.h"
41#include <linux/buffer_head.h> 41#include <linux/buffer_head.h>
42 42
43/* To make any changes in the tree we find a node, that contains item 43/* To make any changes in the tree we find a node, that contains item
diff --git a/fs/reiserfs/hashes.c b/fs/reiserfs/hashes.c
index 6471c670743..91b0cc1242a 100644
--- a/fs/reiserfs/hashes.c
+++ b/fs/reiserfs/hashes.c
@@ -19,7 +19,7 @@
19// 19//
20 20
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/reiserfs_fs.h> 22#include "reiserfs.h"
23#include <asm/types.h> 23#include <asm/types.h>
24 24
25#define DELTA 0x9E3779B9 25#define DELTA 0x9E3779B9
diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c
index 2074fd95046..e1978fd895f 100644
--- a/fs/reiserfs/ibalance.c
+++ b/fs/reiserfs/ibalance.c
@@ -5,7 +5,7 @@
5#include <asm/uaccess.h> 5#include <asm/uaccess.h>
6#include <linux/string.h> 6#include <linux/string.h>
7#include <linux/time.h> 7#include <linux/time.h>
8#include <linux/reiserfs_fs.h> 8#include "reiserfs.h"
9#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
10 10
11/* this is one and only function that is used outside (do_balance.c) */ 11/* this is one and only function that is used outside (do_balance.c) */
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 9e8cd5acd79..494c315c741 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -4,9 +4,9 @@
4 4
5#include <linux/time.h> 5#include <linux/time.h>
6#include <linux/fs.h> 6#include <linux/fs.h>
7#include <linux/reiserfs_fs.h> 7#include "reiserfs.h"
8#include <linux/reiserfs_acl.h> 8#include "acl.h"
9#include <linux/reiserfs_xattr.h> 9#include "xattr.h"
10#include <linux/exportfs.h> 10#include <linux/exportfs.h>
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/highmem.h> 12#include <linux/highmem.h>
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 950e3d1b5c9..0c2185042d5 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -5,7 +5,7 @@
5#include <linux/capability.h> 5#include <linux/capability.h>
6#include <linux/fs.h> 6#include <linux/fs.h>
7#include <linux/mount.h> 7#include <linux/mount.h>
8#include <linux/reiserfs_fs.h> 8#include "reiserfs.h"
9#include <linux/time.h> 9#include <linux/time.h>
10#include <asm/uaccess.h> 10#include <asm/uaccess.h>
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index 72cb1cc51b8..ee382ef3d30 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -3,7 +3,7 @@
3 */ 3 */
4 4
5#include <linux/time.h> 5#include <linux/time.h>
6#include <linux/reiserfs_fs.h> 6#include "reiserfs.h"
7 7
8// this contains item handlers for old item types: sd, direct, 8// this contains item handlers for old item types: sd, direct,
9// indirect, directory 9// indirect, directory
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c3cf54fd4de..b1a08573fe1 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -37,7 +37,7 @@
37#include <linux/time.h> 37#include <linux/time.h>
38#include <linux/semaphore.h> 38#include <linux/semaphore.h>
39#include <linux/vmalloc.h> 39#include <linux/vmalloc.h>
40#include <linux/reiserfs_fs.h> 40#include "reiserfs.h"
41#include <linux/kernel.h> 41#include <linux/kernel.h>
42#include <linux/errno.h> 42#include <linux/errno.h>
43#include <linux/fcntl.h> 43#include <linux/fcntl.h>
@@ -51,7 +51,6 @@
51#include <linux/uaccess.h> 51#include <linux/uaccess.h>
52#include <linux/slab.h> 52#include <linux/slab.h>
53 53
54#include <asm/system.h>
55 54
56/* gets a struct reiserfs_journal_list * from a list head */ 55/* gets a struct reiserfs_journal_list * from a list head */
57#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ 56#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index 03d85cbf90b..79e5a8b4c22 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -5,7 +5,7 @@
5#include <asm/uaccess.h> 5#include <asm/uaccess.h>
6#include <linux/string.h> 6#include <linux/string.h>
7#include <linux/time.h> 7#include <linux/time.h>
8#include <linux/reiserfs_fs.h> 8#include "reiserfs.h"
9#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
10 10
11/* these are used in do_balance.c */ 11/* these are used in do_balance.c */
@@ -975,7 +975,7 @@ static int leaf_cut_entries(struct buffer_head *bh,
975 remove */ 975 remove */
976 RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item"); 976 RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item");
977 RFALSE(I_ENTRY_COUNT(ih) < from + del_count, 977 RFALSE(I_ENTRY_COUNT(ih) < from + del_count,
978 "10185: item contains not enough entries: entry_cout = %d, from = %d, to delete = %d", 978 "10185: item contains not enough entries: entry_count = %d, from = %d, to delete = %d",
979 I_ENTRY_COUNT(ih), from, del_count); 979 I_ENTRY_COUNT(ih), from, del_count);
980 980
981 if (del_count == 0) 981 if (del_count == 0)
diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
index 7df1ce48203..d735bc8470e 100644
--- a/fs/reiserfs/lock.c
+++ b/fs/reiserfs/lock.c
@@ -1,4 +1,4 @@
1#include <linux/reiserfs_fs.h> 1#include "reiserfs.h"
2#include <linux/mutex.h> 2#include <linux/mutex.h>
3 3
4/* 4/*
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 14637886523..84e8a69cee9 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -14,9 +14,9 @@
14#include <linux/time.h> 14#include <linux/time.h>
15#include <linux/bitops.h> 15#include <linux/bitops.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/reiserfs_fs.h> 17#include "reiserfs.h"
18#include <linux/reiserfs_acl.h> 18#include "acl.h"
19#include <linux/reiserfs_xattr.h> 19#include "xattr.h"
20#include <linux/quotaops.h> 20#include <linux/quotaops.h>
21 21
22#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); } 22#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); }
diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c
index 3a6de810bd6..f732d6a5251 100644
--- a/fs/reiserfs/objectid.c
+++ b/fs/reiserfs/objectid.c
@@ -5,8 +5,7 @@
5#include <linux/string.h> 5#include <linux/string.h>
6#include <linux/random.h> 6#include <linux/random.h>
7#include <linux/time.h> 7#include <linux/time.h>
8#include <linux/reiserfs_fs.h> 8#include "reiserfs.h"
9#include <linux/reiserfs_fs_sb.h>
10 9
11// find where objectid map starts 10// find where objectid map starts
12#define objectid_map(s,rs) (old_format_only (s) ? \ 11#define objectid_map(s,rs) (old_format_only (s) ? \
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 45de98b5946..c0b1112ab7e 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -4,7 +4,7 @@
4 4
5#include <linux/time.h> 5#include <linux/time.h>
6#include <linux/fs.h> 6#include <linux/fs.h>
7#include <linux/reiserfs_fs.h> 7#include "reiserfs.h"
8#include <linux/string.h> 8#include <linux/string.h>
9#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
10 10
@@ -329,7 +329,7 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...)
329 Numbering scheme for panic used by Vladimir and Anatoly( Hans completely ignores this scheme, and considers it 329 Numbering scheme for panic used by Vladimir and Anatoly( Hans completely ignores this scheme, and considers it
330 pointless complexity): 330 pointless complexity):
331 331
332 panics in reiserfs_fs.h have numbers from 1000 to 1999 332 panics in reiserfs.h have numbers from 1000 to 1999
333 super.c 2000 to 2999 333 super.c 2000 to 2999
334 preserve.c (unused) 3000 to 3999 334 preserve.c (unused) 3000 to 3999
335 bitmap.c 4000 to 4999 335 bitmap.c 4000 to 4999
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 7a9981196c1..2c1ade692cc 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -12,8 +12,7 @@
12#include <linux/time.h> 12#include <linux/time.h>
13#include <linux/seq_file.h> 13#include <linux/seq_file.h>
14#include <asm/uaccess.h> 14#include <asm/uaccess.h>
15#include <linux/reiserfs_fs.h> 15#include "reiserfs.h"
16#include <linux/reiserfs_fs_sb.h>
17#include <linux/init.h> 16#include <linux/init.h>
18#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
19 18
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
new file mode 100644
index 00000000000..a59d2712633
--- /dev/null
+++ b/fs/reiserfs/reiserfs.h
@@ -0,0 +1,2923 @@
1/*
2 * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details
3 */
4
5#include <linux/reiserfs_fs.h>
6
7#include <linux/slab.h>
8#include <linux/interrupt.h>
9#include <linux/sched.h>
10#include <linux/bug.h>
11#include <linux/workqueue.h>
12#include <asm/unaligned.h>
13#include <linux/bitops.h>
14#include <linux/proc_fs.h>
15#include <linux/buffer_head.h>
16
17/* the 32 bit compat definitions with int argument */
18#define REISERFS_IOC32_UNPACK _IOW(0xCD, 1, int)
19#define REISERFS_IOC32_GETFLAGS FS_IOC32_GETFLAGS
20#define REISERFS_IOC32_SETFLAGS FS_IOC32_SETFLAGS
21#define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION
22#define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION
23
24struct reiserfs_journal_list;
25
26/** bitmasks for i_flags field in reiserfs-specific part of inode */
27typedef enum {
28 /** this says what format of key do all items (but stat data) of
29 an object have. If this is set, that format is 3.6 otherwise
30 - 3.5 */
31 i_item_key_version_mask = 0x0001,
32 /** If this is unset, object has 3.5 stat data, otherwise, it has
33 3.6 stat data with 64bit size, 32bit nlink etc. */
34 i_stat_data_version_mask = 0x0002,
35 /** file might need tail packing on close */
36 i_pack_on_close_mask = 0x0004,
37 /** don't pack tail of file */
38 i_nopack_mask = 0x0008,
39 /** If those is set, "safe link" was created for this file during
40 truncate or unlink. Safe link is used to avoid leakage of disk
41 space on crash with some files open, but unlinked. */
42 i_link_saved_unlink_mask = 0x0010,
43 i_link_saved_truncate_mask = 0x0020,
44 i_has_xattr_dir = 0x0040,
45 i_data_log = 0x0080,
46} reiserfs_inode_flags;
47
48struct reiserfs_inode_info {
49 __u32 i_key[4]; /* key is still 4 32 bit integers */
50 /** transient inode flags that are never stored on disk. Bitmasks
51 for this field are defined above. */
52 __u32 i_flags;
53
54 __u32 i_first_direct_byte; // offset of first byte stored in direct item.
55
56 /* copy of persistent inode flags read from sd_attrs. */
57 __u32 i_attrs;
58
59 int i_prealloc_block; /* first unused block of a sequence of unused blocks */
60 int i_prealloc_count; /* length of that sequence */
61 struct list_head i_prealloc_list; /* per-transaction list of inodes which
62 * have preallocated blocks */
63
64 unsigned new_packing_locality:1; /* new_packig_locality is created; new blocks
65 * for the contents of this directory should be
66 * displaced */
67
68 /* we use these for fsync or O_SYNC to decide which transaction
69 ** needs to be committed in order for this inode to be properly
70 ** flushed */
71 unsigned int i_trans_id;
72 struct reiserfs_journal_list *i_jl;
73 atomic_t openers;
74 struct mutex tailpack;
75#ifdef CONFIG_REISERFS_FS_XATTR
76 struct rw_semaphore i_xattr_sem;
77#endif
78 struct inode vfs_inode;
79};
80
81typedef enum {
82 reiserfs_attrs_cleared = 0x00000001,
83} reiserfs_super_block_flags;
84
85/* struct reiserfs_super_block accessors/mutators
86 * since this is a disk structure, it will always be in
87 * little endian format. */
88#define sb_block_count(sbp) (le32_to_cpu((sbp)->s_v1.s_block_count))
89#define set_sb_block_count(sbp,v) ((sbp)->s_v1.s_block_count = cpu_to_le32(v))
90#define sb_free_blocks(sbp) (le32_to_cpu((sbp)->s_v1.s_free_blocks))
91#define set_sb_free_blocks(sbp,v) ((sbp)->s_v1.s_free_blocks = cpu_to_le32(v))
92#define sb_root_block(sbp) (le32_to_cpu((sbp)->s_v1.s_root_block))
93#define set_sb_root_block(sbp,v) ((sbp)->s_v1.s_root_block = cpu_to_le32(v))
94
95#define sb_jp_journal_1st_block(sbp) \
96 (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_1st_block))
97#define set_sb_jp_journal_1st_block(sbp,v) \
98 ((sbp)->s_v1.s_journal.jp_journal_1st_block = cpu_to_le32(v))
99#define sb_jp_journal_dev(sbp) \
100 (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_dev))
101#define set_sb_jp_journal_dev(sbp,v) \
102 ((sbp)->s_v1.s_journal.jp_journal_dev = cpu_to_le32(v))
103#define sb_jp_journal_size(sbp) \
104 (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_size))
105#define set_sb_jp_journal_size(sbp,v) \
106 ((sbp)->s_v1.s_journal.jp_journal_size = cpu_to_le32(v))
107#define sb_jp_journal_trans_max(sbp) \
108 (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_trans_max))
109#define set_sb_jp_journal_trans_max(sbp,v) \
110 ((sbp)->s_v1.s_journal.jp_journal_trans_max = cpu_to_le32(v))
111#define sb_jp_journal_magic(sbp) \
112 (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_magic))
113#define set_sb_jp_journal_magic(sbp,v) \
114 ((sbp)->s_v1.s_journal.jp_journal_magic = cpu_to_le32(v))
115#define sb_jp_journal_max_batch(sbp) \
116 (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_batch))
117#define set_sb_jp_journal_max_batch(sbp,v) \
118 ((sbp)->s_v1.s_journal.jp_journal_max_batch = cpu_to_le32(v))
119#define sb_jp_jourmal_max_commit_age(sbp) \
120 (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_commit_age))
121#define set_sb_jp_journal_max_commit_age(sbp,v) \
122 ((sbp)->s_v1.s_journal.jp_journal_max_commit_age = cpu_to_le32(v))
123
124#define sb_blocksize(sbp) (le16_to_cpu((sbp)->s_v1.s_blocksize))
125#define set_sb_blocksize(sbp,v) ((sbp)->s_v1.s_blocksize = cpu_to_le16(v))
126#define sb_oid_maxsize(sbp) (le16_to_cpu((sbp)->s_v1.s_oid_maxsize))
127#define set_sb_oid_maxsize(sbp,v) ((sbp)->s_v1.s_oid_maxsize = cpu_to_le16(v))
128#define sb_oid_cursize(sbp) (le16_to_cpu((sbp)->s_v1.s_oid_cursize))
129#define set_sb_oid_cursize(sbp,v) ((sbp)->s_v1.s_oid_cursize = cpu_to_le16(v))
130#define sb_umount_state(sbp) (le16_to_cpu((sbp)->s_v1.s_umount_state))
131#define set_sb_umount_state(sbp,v) ((sbp)->s_v1.s_umount_state = cpu_to_le16(v))
132#define sb_fs_state(sbp) (le16_to_cpu((sbp)->s_v1.s_fs_state))
133#define set_sb_fs_state(sbp,v) ((sbp)->s_v1.s_fs_state = cpu_to_le16(v))
134#define sb_hash_function_code(sbp) \
135 (le32_to_cpu((sbp)->s_v1.s_hash_function_code))
136#define set_sb_hash_function_code(sbp,v) \
137 ((sbp)->s_v1.s_hash_function_code = cpu_to_le32(v))
138#define sb_tree_height(sbp) (le16_to_cpu((sbp)->s_v1.s_tree_height))
139#define set_sb_tree_height(sbp,v) ((sbp)->s_v1.s_tree_height = cpu_to_le16(v))
140#define sb_bmap_nr(sbp) (le16_to_cpu((sbp)->s_v1.s_bmap_nr))
141#define set_sb_bmap_nr(sbp,v) ((sbp)->s_v1.s_bmap_nr = cpu_to_le16(v))
142#define sb_version(sbp) (le16_to_cpu((sbp)->s_v1.s_version))
143#define set_sb_version(sbp,v) ((sbp)->s_v1.s_version = cpu_to_le16(v))
144
145#define sb_mnt_count(sbp) (le16_to_cpu((sbp)->s_mnt_count))
146#define set_sb_mnt_count(sbp, v) ((sbp)->s_mnt_count = cpu_to_le16(v))
147
148#define sb_reserved_for_journal(sbp) \
149 (le16_to_cpu((sbp)->s_v1.s_reserved_for_journal))
150#define set_sb_reserved_for_journal(sbp,v) \
151 ((sbp)->s_v1.s_reserved_for_journal = cpu_to_le16(v))
152
153/* LOGGING -- */
154
155/* These all interelate for performance.
156**
157** If the journal block count is smaller than n transactions, you lose speed.
158** I don't know what n is yet, I'm guessing 8-16.
159**
160** typical transaction size depends on the application, how often fsync is
161** called, and how many metadata blocks you dirty in a 30 second period.
162** The more small files (<16k) you use, the larger your transactions will
163** be.
164**
165** If your journal fills faster than dirty buffers get flushed to disk, it must flush them before allowing the journal
166** to wrap, which slows things down. If you need high speed meta data updates, the journal should be big enough
167** to prevent wrapping before dirty meta blocks get to disk.
168**
169** If the batch max is smaller than the transaction max, you'll waste space at the end of the journal
170** because journal_end sets the next transaction to start at 0 if the next transaction has any chance of wrapping.
171**
172** The large the batch max age, the better the speed, and the more meta data changes you'll lose after a crash.
173**
174*/
175
176/* don't mess with these for a while */
177 /* we have a node size define somewhere in reiserfs_fs.h. -Hans */
178#define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */
179#define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */
180#define JOURNAL_HASH_SIZE 8192
181#define JOURNAL_NUM_BITMAPS 5 /* number of copies of the bitmaps to have floating. Must be >= 2 */
182
183/* One of these for every block in every transaction
184** Each one is in two hash tables. First, a hash of the current transaction, and after journal_end, a
185** hash of all the in memory transactions.
186** next and prev are used by the current transaction (journal_hash).
187** hnext and hprev are used by journal_list_hash. If a block is in more than one transaction, the journal_list_hash
188** links it in multiple times. This allows flush_journal_list to remove just the cnode belonging
189** to a given transaction.
190*/
191struct reiserfs_journal_cnode {
192 struct buffer_head *bh; /* real buffer head */
193 struct super_block *sb; /* dev of real buffer head */
194 __u32 blocknr; /* block number of real buffer head, == 0 when buffer on disk */
195 unsigned long state;
196 struct reiserfs_journal_list *jlist; /* journal list this cnode lives in */
197 struct reiserfs_journal_cnode *next; /* next in transaction list */
198 struct reiserfs_journal_cnode *prev; /* prev in transaction list */
199 struct reiserfs_journal_cnode *hprev; /* prev in hash list */
200 struct reiserfs_journal_cnode *hnext; /* next in hash list */
201};
202
203struct reiserfs_bitmap_node {
204 int id;
205 char *data;
206 struct list_head list;
207};
208
209struct reiserfs_list_bitmap {
210 struct reiserfs_journal_list *journal_list;
211 struct reiserfs_bitmap_node **bitmaps;
212};
213
214/*
215** one of these for each transaction. The most important part here is the j_realblock.
216** this list of cnodes is used to hash all the blocks in all the commits, to mark all the
217** real buffer heads dirty once all the commits hit the disk,
218** and to make sure every real block in a transaction is on disk before allowing the log area
219** to be overwritten */
220struct reiserfs_journal_list {
221 unsigned long j_start;
222 unsigned long j_state;
223 unsigned long j_len;
224 atomic_t j_nonzerolen;
225 atomic_t j_commit_left;
226 atomic_t j_older_commits_done; /* all commits older than this on disk */
227 struct mutex j_commit_mutex;
228 unsigned int j_trans_id;
229 time_t j_timestamp;
230 struct reiserfs_list_bitmap *j_list_bitmap;
231 struct buffer_head *j_commit_bh; /* commit buffer head */
232 struct reiserfs_journal_cnode *j_realblock;
233 struct reiserfs_journal_cnode *j_freedlist; /* list of buffers that were freed during this trans. free each of these on flush */
234 /* time ordered list of all active transactions */
235 struct list_head j_list;
236
237 /* time ordered list of all transactions we haven't tried to flush yet */
238 struct list_head j_working_list;
239
240 /* list of tail conversion targets in need of flush before commit */
241 struct list_head j_tail_bh_list;
242 /* list of data=ordered buffers in need of flush before commit */
243 struct list_head j_bh_list;
244 int j_refcount;
245};
246
247struct reiserfs_journal {
248 struct buffer_head **j_ap_blocks; /* journal blocks on disk */
249 struct reiserfs_journal_cnode *j_last; /* newest journal block */
250 struct reiserfs_journal_cnode *j_first; /* oldest journal block. start here for traverse */
251
252 struct block_device *j_dev_bd;
253 fmode_t j_dev_mode;
254 int j_1st_reserved_block; /* first block on s_dev of reserved area journal */
255
256 unsigned long j_state;
257 unsigned int j_trans_id;
258 unsigned long j_mount_id;
259 unsigned long j_start; /* start of current waiting commit (index into j_ap_blocks) */
260 unsigned long j_len; /* length of current waiting commit */
261 unsigned long j_len_alloc; /* number of buffers requested by journal_begin() */
262 atomic_t j_wcount; /* count of writers for current commit */
263 unsigned long j_bcount; /* batch count. allows turning X transactions into 1 */
264 unsigned long j_first_unflushed_offset; /* first unflushed transactions offset */
265 unsigned j_last_flush_trans_id; /* last fully flushed journal timestamp */
266 struct buffer_head *j_header_bh;
267
268 time_t j_trans_start_time; /* time this transaction started */
269 struct mutex j_mutex;
270 struct mutex j_flush_mutex;
271 wait_queue_head_t j_join_wait; /* wait for current transaction to finish before starting new one */
272 atomic_t j_jlock; /* lock for j_join_wait */
273 int j_list_bitmap_index; /* number of next list bitmap to use */
274 int j_must_wait; /* no more journal begins allowed. MUST sleep on j_join_wait */
275 int j_next_full_flush; /* next journal_end will flush all journal list */
276 int j_next_async_flush; /* next journal_end will flush all async commits */
277
278 int j_cnode_used; /* number of cnodes on the used list */
279 int j_cnode_free; /* number of cnodes on the free list */
280
281 unsigned int j_trans_max; /* max number of blocks in a transaction. */
282 unsigned int j_max_batch; /* max number of blocks to batch into a trans */
283 unsigned int j_max_commit_age; /* in seconds, how old can an async commit be */
284 unsigned int j_max_trans_age; /* in seconds, how old can a transaction be */
285 unsigned int j_default_max_commit_age; /* the default for the max commit age */
286
287 struct reiserfs_journal_cnode *j_cnode_free_list;
288 struct reiserfs_journal_cnode *j_cnode_free_orig; /* orig pointer returned from vmalloc */
289
290 struct reiserfs_journal_list *j_current_jl;
291 int j_free_bitmap_nodes;
292 int j_used_bitmap_nodes;
293
294 int j_num_lists; /* total number of active transactions */
295 int j_num_work_lists; /* number that need attention from kreiserfsd */
296
297 /* debugging to make sure things are flushed in order */
298 unsigned int j_last_flush_id;
299
300 /* debugging to make sure things are committed in order */
301 unsigned int j_last_commit_id;
302
303 struct list_head j_bitmap_nodes;
304 struct list_head j_dirty_buffers;
305 spinlock_t j_dirty_buffers_lock; /* protects j_dirty_buffers */
306
307 /* list of all active transactions */
308 struct list_head j_journal_list;
309 /* lists that haven't been touched by writeback attempts */
310 struct list_head j_working_list;
311
312 struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS]; /* array of bitmaps to record the deleted blocks */
313 struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE]; /* hash table for real buffer heads in current trans */
314 struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE]; /* hash table for all the real buffer heads in all
315 the transactions */
316 struct list_head j_prealloc_list; /* list of inodes which have preallocated blocks */
317 int j_persistent_trans;
318 unsigned long j_max_trans_size;
319 unsigned long j_max_batch_size;
320
321 int j_errno;
322
323 /* when flushing ordered buffers, throttle new ordered writers */
324 struct delayed_work j_work;
325 struct super_block *j_work_sb;
326 atomic_t j_async_throttle;
327};
328
329enum journal_state_bits {
330 J_WRITERS_BLOCKED = 1, /* set when new writers not allowed */
331 J_WRITERS_QUEUED, /* set when log is full due to too many writers */
332 J_ABORTED, /* set when log is aborted */
333};
334
335#define JOURNAL_DESC_MAGIC "ReIsErLB" /* ick. magic string to find desc blocks in the journal */
336
337typedef __u32(*hashf_t) (const signed char *, int);
338
339struct reiserfs_bitmap_info {
340 __u32 free_count;
341};
342
343struct proc_dir_entry;
344
345#if defined( CONFIG_PROC_FS ) && defined( CONFIG_REISERFS_PROC_INFO )
346typedef unsigned long int stat_cnt_t;
347typedef struct reiserfs_proc_info_data {
348 spinlock_t lock;
349 int exiting;
350 int max_hash_collisions;
351
352 stat_cnt_t breads;
353 stat_cnt_t bread_miss;
354 stat_cnt_t search_by_key;
355 stat_cnt_t search_by_key_fs_changed;
356 stat_cnt_t search_by_key_restarted;
357
358 stat_cnt_t insert_item_restarted;
359 stat_cnt_t paste_into_item_restarted;
360 stat_cnt_t cut_from_item_restarted;
361 stat_cnt_t delete_solid_item_restarted;
362 stat_cnt_t delete_item_restarted;
363
364 stat_cnt_t leaked_oid;
365 stat_cnt_t leaves_removable;
366
367 /* balances per level. Use explicit 5 as MAX_HEIGHT is not visible yet. */
368 stat_cnt_t balance_at[5]; /* XXX */
369 /* sbk == search_by_key */
370 stat_cnt_t sbk_read_at[5]; /* XXX */
371 stat_cnt_t sbk_fs_changed[5];
372 stat_cnt_t sbk_restarted[5];
373 stat_cnt_t items_at[5]; /* XXX */
374 stat_cnt_t free_at[5]; /* XXX */
375 stat_cnt_t can_node_be_removed[5]; /* XXX */
376 long int lnum[5]; /* XXX */
377 long int rnum[5]; /* XXX */
378 long int lbytes[5]; /* XXX */
379 long int rbytes[5]; /* XXX */
380 stat_cnt_t get_neighbors[5];
381 stat_cnt_t get_neighbors_restart[5];
382 stat_cnt_t need_l_neighbor[5];
383 stat_cnt_t need_r_neighbor[5];
384
385 stat_cnt_t free_block;
386 struct __scan_bitmap_stats {
387 stat_cnt_t call;
388 stat_cnt_t wait;
389 stat_cnt_t bmap;
390 stat_cnt_t retry;
391 stat_cnt_t in_journal_hint;
392 stat_cnt_t in_journal_nohint;
393 stat_cnt_t stolen;
394 } scan_bitmap;
395 struct __journal_stats {
396 stat_cnt_t in_journal;
397 stat_cnt_t in_journal_bitmap;
398 stat_cnt_t in_journal_reusable;
399 stat_cnt_t lock_journal;
400 stat_cnt_t lock_journal_wait;
401 stat_cnt_t journal_being;
402 stat_cnt_t journal_relock_writers;
403 stat_cnt_t journal_relock_wcount;
404 stat_cnt_t mark_dirty;
405 stat_cnt_t mark_dirty_already;
406 stat_cnt_t mark_dirty_notjournal;
407 stat_cnt_t restore_prepared;
408 stat_cnt_t prepare;
409 stat_cnt_t prepare_retry;
410 } journal;
411} reiserfs_proc_info_data_t;
412#else
413typedef struct reiserfs_proc_info_data {
414} reiserfs_proc_info_data_t;
415#endif
416
417/* reiserfs union of in-core super block data */
418struct reiserfs_sb_info {
419 struct buffer_head *s_sbh; /* Buffer containing the super block */
420 /* both the comment and the choice of
421 name are unclear for s_rs -Hans */
422 struct reiserfs_super_block *s_rs; /* Pointer to the super block in the buffer */
423 struct reiserfs_bitmap_info *s_ap_bitmap;
424 struct reiserfs_journal *s_journal; /* pointer to journal information */
425 unsigned short s_mount_state; /* reiserfs state (valid, invalid) */
426
427 /* Serialize writers access, replace the old bkl */
428 struct mutex lock;
429 /* Owner of the lock (can be recursive) */
430 struct task_struct *lock_owner;
431 /* Depth of the lock, start from -1 like the bkl */
432 int lock_depth;
433
434 /* Comment? -Hans */
435 void (*end_io_handler) (struct buffer_head *, int);
436 hashf_t s_hash_function; /* pointer to function which is used
437 to sort names in directory. Set on
438 mount */
439 unsigned long s_mount_opt; /* reiserfs's mount options are set
440 here (currently - NOTAIL, NOLOG,
441 REPLAYONLY) */
442
443 struct { /* This is a structure that describes block allocator options */
444 unsigned long bits; /* Bitfield for enable/disable kind of options */
445 unsigned long large_file_size; /* size started from which we consider file to be a large one(in blocks) */
446 int border; /* percentage of disk, border takes */
447 int preallocmin; /* Minimal file size (in blocks) starting from which we do preallocations */
448 int preallocsize; /* Number of blocks we try to prealloc when file
449 reaches preallocmin size (in blocks) or
450 prealloc_list is empty. */
451 } s_alloc_options;
452
453 /* Comment? -Hans */
454 wait_queue_head_t s_wait;
455 /* To be obsoleted soon by per buffer seals.. -Hans */
456 atomic_t s_generation_counter; // increased by one every time the
457 // tree gets re-balanced
458 unsigned long s_properties; /* File system properties. Currently holds
459 on-disk FS format */
460
461 /* session statistics */
462 int s_disk_reads;
463 int s_disk_writes;
464 int s_fix_nodes;
465 int s_do_balance;
466 int s_unneeded_left_neighbor;
467 int s_good_search_by_key_reada;
468 int s_bmaps;
469 int s_bmaps_without_search;
470 int s_direct2indirect;
471 int s_indirect2direct;
472 /* set up when it's ok for reiserfs_read_inode2() to read from
473 disk inode with nlink==0. Currently this is only used during
474 finish_unfinished() processing at mount time */
475 int s_is_unlinked_ok;
476 reiserfs_proc_info_data_t s_proc_info_data;
477 struct proc_dir_entry *procdir;
478 int reserved_blocks; /* amount of blocks reserved for further allocations */
479 spinlock_t bitmap_lock; /* this lock on now only used to protect reserved_blocks variable */
480 struct dentry *priv_root; /* root of /.reiserfs_priv */
481 struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */
482 int j_errno;
483#ifdef CONFIG_QUOTA
484 char *s_qf_names[MAXQUOTAS];
485 int s_jquota_fmt;
486#endif
487 char *s_jdev; /* Stored jdev for mount option showing */
488#ifdef CONFIG_REISERFS_CHECK
489
490 struct tree_balance *cur_tb; /*
491 * Detects whether more than one
492 * copy of tb exists per superblock
493 * as a means of checking whether
494 * do_balance is executing concurrently
495 * against another tree reader/writer
496 * on a same mount point.
497 */
498#endif
499};
500
501/* Definitions of reiserfs on-disk properties: */
502#define REISERFS_3_5 0
503#define REISERFS_3_6 1
504#define REISERFS_OLD_FORMAT 2
505
506enum reiserfs_mount_options {
507/* Mount options */
508 REISERFS_LARGETAIL, /* large tails will be created in a session */
509 REISERFS_SMALLTAIL, /* small (for files less than block size) tails will be created in a session */
510 REPLAYONLY, /* replay journal and return 0. Use by fsck */
511 REISERFS_CONVERT, /* -o conv: causes conversion of old
512 format super block to the new
513 format. If not specified - old
514 partition will be dealt with in a
515 manner of 3.5.x */
516
517/* -o hash={tea, rupasov, r5, detect} is meant for properly mounting
518** reiserfs disks from 3.5.19 or earlier. 99% of the time, this option
519** is not required. If the normal autodection code can't determine which
520** hash to use (because both hashes had the same value for a file)
521** use this option to force a specific hash. It won't allow you to override
522** the existing hash on the FS, so if you have a tea hash disk, and mount
523** with -o hash=rupasov, the mount will fail.
524*/
525 FORCE_TEA_HASH, /* try to force tea hash on mount */
526 FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */
527 FORCE_R5_HASH, /* try to force rupasov hash on mount */
528 FORCE_HASH_DETECT, /* try to detect hash function on mount */
529
530 REISERFS_DATA_LOG,
531 REISERFS_DATA_ORDERED,
532 REISERFS_DATA_WRITEBACK,
533
534/* used for testing experimental features, makes benchmarking new
535 features with and without more convenient, should never be used by
536 users in any code shipped to users (ideally) */
537
538 REISERFS_NO_BORDER,
539 REISERFS_NO_UNHASHED_RELOCATION,
540 REISERFS_HASHED_RELOCATION,
541 REISERFS_ATTRS,
542 REISERFS_XATTRS_USER,
543 REISERFS_POSIXACL,
544 REISERFS_EXPOSE_PRIVROOT,
545 REISERFS_BARRIER_NONE,
546 REISERFS_BARRIER_FLUSH,
547
548 /* Actions on error */
549 REISERFS_ERROR_PANIC,
550 REISERFS_ERROR_RO,
551 REISERFS_ERROR_CONTINUE,
552
553 REISERFS_USRQUOTA, /* User quota option specified */
554 REISERFS_GRPQUOTA, /* Group quota option specified */
555
556 REISERFS_TEST1,
557 REISERFS_TEST2,
558 REISERFS_TEST3,
559 REISERFS_TEST4,
560 REISERFS_UNSUPPORTED_OPT,
561};
562
563#define reiserfs_r5_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_R5_HASH))
564#define reiserfs_rupasov_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_RUPASOV_HASH))
565#define reiserfs_tea_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_TEA_HASH))
566#define reiserfs_hash_detect(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_HASH_DETECT))
567#define reiserfs_no_border(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_BORDER))
568#define reiserfs_no_unhashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_UNHASHED_RELOCATION))
569#define reiserfs_hashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_HASHED_RELOCATION))
570#define reiserfs_test4(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TEST4))
571
572#define have_large_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_LARGETAIL))
573#define have_small_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_SMALLTAIL))
574#define replay_only(s) (REISERFS_SB(s)->s_mount_opt & (1 << REPLAYONLY))
575#define reiserfs_attrs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ATTRS))
576#define old_format_only(s) (REISERFS_SB(s)->s_properties & (1 << REISERFS_3_5))
577#define convert_reiserfs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_CONVERT))
578#define reiserfs_data_log(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_LOG))
579#define reiserfs_data_ordered(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_ORDERED))
580#define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK))
581#define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER))
582#define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL))
583#define reiserfs_expose_privroot(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_EXPOSE_PRIVROOT))
584#define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s))
585#define reiserfs_barrier_none(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_NONE))
586#define reiserfs_barrier_flush(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_FLUSH))
587
588#define reiserfs_error_panic(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_PANIC))
589#define reiserfs_error_ro(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_RO))
590
591void reiserfs_file_buffer(struct buffer_head *bh, int list);
592extern struct file_system_type reiserfs_fs_type;
593int reiserfs_resize(struct super_block *, unsigned long);
594
595#define CARRY_ON 0
596#define SCHEDULE_OCCURRED 1
597
598#define SB_BUFFER_WITH_SB(s) (REISERFS_SB(s)->s_sbh)
599#define SB_JOURNAL(s) (REISERFS_SB(s)->s_journal)
600#define SB_JOURNAL_1st_RESERVED_BLOCK(s) (SB_JOURNAL(s)->j_1st_reserved_block)
601#define SB_JOURNAL_LEN_FREE(s) (SB_JOURNAL(s)->j_journal_len_free)
602#define SB_AP_BITMAP(s) (REISERFS_SB(s)->s_ap_bitmap)
603
604#define SB_DISK_JOURNAL_HEAD(s) (SB_JOURNAL(s)->j_header_bh->)
605
606/* A safe version of the "bdevname", which returns the "s_id" field of
607 * a superblock or else "Null superblock" if the super block is NULL.
608 */
609static inline char *reiserfs_bdevname(struct super_block *s)
610{
611 return (s == NULL) ? "Null superblock" : s->s_id;
612}
613
614#define reiserfs_is_journal_aborted(journal) (unlikely (__reiserfs_is_journal_aborted (journal)))
615static inline int __reiserfs_is_journal_aborted(struct reiserfs_journal
616 *journal)
617{
618 return test_bit(J_ABORTED, &journal->j_state);
619}
620
621/*
622 * Locking primitives. The write lock is a per superblock
623 * special mutex that has properties close to the Big Kernel Lock
624 * which was used in the previous locking scheme.
625 */
626void reiserfs_write_lock(struct super_block *s);
627void reiserfs_write_unlock(struct super_block *s);
628int reiserfs_write_lock_once(struct super_block *s);
629void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
630
631#ifdef CONFIG_REISERFS_CHECK
632void reiserfs_lock_check_recursive(struct super_block *s);
633#else
634static inline void reiserfs_lock_check_recursive(struct super_block *s) { }
635#endif
636
637/*
638 * Several mutexes depend on the write lock.
639 * However sometimes we want to relax the write lock while we hold
640 * these mutexes, according to the release/reacquire on schedule()
641 * properties of the Bkl that were used.
642 * Reiserfs performances and locking were based on this scheme.
643 * Now that the write lock is a mutex and not the bkl anymore, doing so
644 * may result in a deadlock:
645 *
646 * A acquire write_lock
647 * A acquire j_commit_mutex
648 * A release write_lock and wait for something
649 * B acquire write_lock
650 * B can't acquire j_commit_mutex and sleep
651 * A can't acquire write lock anymore
652 * deadlock
653 *
654 * What we do here is avoiding such deadlock by playing the same game
655 * than the Bkl: if we can't acquire a mutex that depends on the write lock,
656 * we release the write lock, wait a bit and then retry.
657 *
658 * The mutexes concerned by this hack are:
659 * - The commit mutex of a journal list
660 * - The flush mutex
661 * - The journal lock
662 * - The inode mutex
663 */
664static inline void reiserfs_mutex_lock_safe(struct mutex *m,
665 struct super_block *s)
666{
667 reiserfs_lock_check_recursive(s);
668 reiserfs_write_unlock(s);
669 mutex_lock(m);
670 reiserfs_write_lock(s);
671}
672
673static inline void
674reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
675 struct super_block *s)
676{
677 reiserfs_lock_check_recursive(s);
678 reiserfs_write_unlock(s);
679 mutex_lock_nested(m, subclass);
680 reiserfs_write_lock(s);
681}
682
683static inline void
684reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s)
685{
686 reiserfs_lock_check_recursive(s);
687 reiserfs_write_unlock(s);
688 down_read(sem);
689 reiserfs_write_lock(s);
690}
691
692/*
693 * When we schedule, we usually want to also release the write lock,
694 * according to the previous bkl based locking scheme of reiserfs.
695 */
696static inline void reiserfs_cond_resched(struct super_block *s)
697{
698 if (need_resched()) {
699 reiserfs_write_unlock(s);
700 schedule();
701 reiserfs_write_lock(s);
702 }
703}
704
705struct fid;
706
707/* in reading the #defines, it may help to understand that they employ
708 the following abbreviations:
709
710 B = Buffer
711 I = Item header
712 H = Height within the tree (should be changed to LEV)
713 N = Number of the item in the node
714 STAT = stat data
715 DEH = Directory Entry Header
716 EC = Entry Count
717 E = Entry number
718 UL = Unsigned Long
719 BLKH = BLocK Header
720 UNFM = UNForMatted node
721 DC = Disk Child
722 P = Path
723
724 These #defines are named by concatenating these abbreviations,
725 where first comes the arguments, and last comes the return value,
726 of the macro.
727
728*/
729
730#define USE_INODE_GENERATION_COUNTER
731
732#define REISERFS_PREALLOCATE
733#define DISPLACE_NEW_PACKING_LOCALITIES
734#define PREALLOCATION_SIZE 9
735
736/* n must be power of 2 */
737#define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u))
738
739// to be ok for alpha and others we have to align structures to 8 byte
740// boundary.
741// FIXME: do not change 4 by anything else: there is code which relies on that
742#define ROUND_UP(x) _ROUND_UP(x,8LL)
743
744/* debug levels. Right now, CONFIG_REISERFS_CHECK means print all debug
745** messages.
746*/
747#define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */
748
749void __reiserfs_warning(struct super_block *s, const char *id,
750 const char *func, const char *fmt, ...);
751#define reiserfs_warning(s, id, fmt, args...) \
752 __reiserfs_warning(s, id, __func__, fmt, ##args)
753/* assertions handling */
754
755/** always check a condition and panic if it's false. */
756#define __RASSERT(cond, scond, format, args...) \
757do { \
758 if (!(cond)) \
759 reiserfs_panic(NULL, "assertion failure", "(" #cond ") at " \
760 __FILE__ ":%i:%s: " format "\n", \
761 in_interrupt() ? -1 : task_pid_nr(current), \
762 __LINE__, __func__ , ##args); \
763} while (0)
764
765#define RASSERT(cond, format, args...) __RASSERT(cond, #cond, format, ##args)
766
767#if defined( CONFIG_REISERFS_CHECK )
768#define RFALSE(cond, format, args...) __RASSERT(!(cond), "!(" #cond ")", format, ##args)
769#else
770#define RFALSE( cond, format, args... ) do {;} while( 0 )
771#endif
772
773#define CONSTF __attribute_const__
774/*
775 * Disk Data Structures
776 */
777
778/***************************************************************************/
779/* SUPER BLOCK */
780/***************************************************************************/
781
782/*
783 * Structure of super block on disk, a version of which in RAM is often accessed as REISERFS_SB(s)->s_rs
784 * the version in RAM is part of a larger structure containing fields never written to disk.
785 */
786#define UNSET_HASH 0 // read_super will guess about, what hash names
787 // in directories were sorted with
788#define TEA_HASH 1
789#define YURA_HASH 2
790#define R5_HASH 3
791#define DEFAULT_HASH R5_HASH
792
793struct journal_params {
794 __le32 jp_journal_1st_block; /* where does journal start from on its
795 * device */
796 __le32 jp_journal_dev; /* journal device st_rdev */
797 __le32 jp_journal_size; /* size of the journal */
798 __le32 jp_journal_trans_max; /* max number of blocks in a transaction. */
799 __le32 jp_journal_magic; /* random value made on fs creation (this
800 * was sb_journal_block_count) */
801 __le32 jp_journal_max_batch; /* max number of blocks to batch into a
802 * trans */
803 __le32 jp_journal_max_commit_age; /* in seconds, how old can an async
804 * commit be */
805 __le32 jp_journal_max_trans_age; /* in seconds, how old can a transaction
806 * be */
807};
808
809/* this is the super from 3.5.X, where X >= 10 */
810struct reiserfs_super_block_v1 {
811 __le32 s_block_count; /* blocks count */
812 __le32 s_free_blocks; /* free blocks count */
813 __le32 s_root_block; /* root block number */
814 struct journal_params s_journal;
815 __le16 s_blocksize; /* block size */
816 __le16 s_oid_maxsize; /* max size of object id array, see
817 * get_objectid() commentary */
818 __le16 s_oid_cursize; /* current size of object id array */
819 __le16 s_umount_state; /* this is set to 1 when filesystem was
820 * umounted, to 2 - when not */
821 char s_magic[10]; /* reiserfs magic string indicates that
822 * file system is reiserfs:
823 * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" */
824 __le16 s_fs_state; /* it is set to used by fsck to mark which
825 * phase of rebuilding is done */
826 __le32 s_hash_function_code; /* indicate, what hash function is being use
827 * to sort names in a directory*/
828 __le16 s_tree_height; /* height of disk tree */
829 __le16 s_bmap_nr; /* amount of bitmap blocks needed to address
830 * each block of file system */
831 __le16 s_version; /* this field is only reliable on filesystem
832 * with non-standard journal */
833 __le16 s_reserved_for_journal; /* size in blocks of journal area on main
834 * device, we need to keep after
835 * making fs with non-standard journal */
836} __attribute__ ((__packed__));
837
838#define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1))
839
840/* this is the on disk super block */
841struct reiserfs_super_block {
842 struct reiserfs_super_block_v1 s_v1;
843 __le32 s_inode_generation;
844 __le32 s_flags; /* Right now used only by inode-attributes, if enabled */
845 unsigned char s_uuid[16]; /* filesystem unique identifier */
846 unsigned char s_label[16]; /* filesystem volume label */
847 __le16 s_mnt_count; /* Count of mounts since last fsck */
848 __le16 s_max_mnt_count; /* Maximum mounts before check */
849 __le32 s_lastcheck; /* Timestamp of last fsck */
850 __le32 s_check_interval; /* Interval between checks */
851 char s_unused[76]; /* zero filled by mkreiserfs and
852 * reiserfs_convert_objectid_map_v1()
853 * so any additions must be updated
854 * there as well. */
855} __attribute__ ((__packed__));
856
857#define SB_SIZE (sizeof(struct reiserfs_super_block))
858
859#define REISERFS_VERSION_1 0
860#define REISERFS_VERSION_2 2
861
862// on-disk super block fields converted to cpu form
863#define SB_DISK_SUPER_BLOCK(s) (REISERFS_SB(s)->s_rs)
864#define SB_V1_DISK_SUPER_BLOCK(s) (&(SB_DISK_SUPER_BLOCK(s)->s_v1))
865#define SB_BLOCKSIZE(s) \
866 le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_blocksize))
867#define SB_BLOCK_COUNT(s) \
868 le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_block_count))
869#define SB_FREE_BLOCKS(s) \
870 le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks))
871#define SB_REISERFS_MAGIC(s) \
872 (SB_V1_DISK_SUPER_BLOCK(s)->s_magic)
873#define SB_ROOT_BLOCK(s) \
874 le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_root_block))
875#define SB_TREE_HEIGHT(s) \
876 le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height))
877#define SB_REISERFS_STATE(s) \
878 le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state))
879#define SB_VERSION(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_version))
880#define SB_BMAP_NR(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr))
881
882#define PUT_SB_BLOCK_COUNT(s, val) \
883 do { SB_V1_DISK_SUPER_BLOCK(s)->s_block_count = cpu_to_le32(val); } while (0)
884#define PUT_SB_FREE_BLOCKS(s, val) \
885 do { SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks = cpu_to_le32(val); } while (0)
886#define PUT_SB_ROOT_BLOCK(s, val) \
887 do { SB_V1_DISK_SUPER_BLOCK(s)->s_root_block = cpu_to_le32(val); } while (0)
888#define PUT_SB_TREE_HEIGHT(s, val) \
889 do { SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height = cpu_to_le16(val); } while (0)
890#define PUT_SB_REISERFS_STATE(s, val) \
891 do { SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state = cpu_to_le16(val); } while (0)
892#define PUT_SB_VERSION(s, val) \
893 do { SB_V1_DISK_SUPER_BLOCK(s)->s_version = cpu_to_le16(val); } while (0)
894#define PUT_SB_BMAP_NR(s, val) \
895 do { SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr = cpu_to_le16 (val); } while (0)
896
897#define SB_ONDISK_JP(s) (&SB_V1_DISK_SUPER_BLOCK(s)->s_journal)
898#define SB_ONDISK_JOURNAL_SIZE(s) \
899 le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_size))
900#define SB_ONDISK_JOURNAL_1st_BLOCK(s) \
901 le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_1st_block))
902#define SB_ONDISK_JOURNAL_DEVICE(s) \
903 le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_dev))
904#define SB_ONDISK_RESERVED_FOR_JOURNAL(s) \
905 le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_reserved_for_journal))
906
907#define is_block_in_log_or_reserved_area(s, block) \
908 block >= SB_JOURNAL_1st_RESERVED_BLOCK(s) \
909 && block < SB_JOURNAL_1st_RESERVED_BLOCK(s) + \
910 ((!is_reiserfs_jr(SB_DISK_SUPER_BLOCK(s)) ? \
911 SB_ONDISK_JOURNAL_SIZE(s) + 1 : SB_ONDISK_RESERVED_FOR_JOURNAL(s)))
912
913int is_reiserfs_3_5(struct reiserfs_super_block *rs);
914int is_reiserfs_3_6(struct reiserfs_super_block *rs);
915int is_reiserfs_jr(struct reiserfs_super_block *rs);
916
917/* ReiserFS leaves the first 64k unused, so that partition labels have
918 enough space. If someone wants to write a fancy bootloader that
919 needs more than 64k, let us know, and this will be increased in size.
920 This number must be larger than than the largest block size on any
921 platform, or code will break. -Hans */
922#define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024)
923#define REISERFS_FIRST_BLOCK unused_define
924#define REISERFS_JOURNAL_OFFSET_IN_BYTES REISERFS_DISK_OFFSET_IN_BYTES
925
926/* the spot for the super in versions 3.5 - 3.5.10 (inclusive) */
927#define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024)
928
929/* reiserfs internal error code (used by search_by_key and fix_nodes)) */
930#define CARRY_ON 0
931#define REPEAT_SEARCH -1
932#define IO_ERROR -2
933#define NO_DISK_SPACE -3
934#define NO_BALANCING_NEEDED (-4)
935#define NO_MORE_UNUSED_CONTIGUOUS_BLOCKS (-5)
936#define QUOTA_EXCEEDED -6
937
938typedef __u32 b_blocknr_t;
939typedef __le32 unp_t;
940
941struct unfm_nodeinfo {
942 unp_t unfm_nodenum;
943 unsigned short unfm_freespace;
944};
945
946/* there are two formats of keys: 3.5 and 3.6
947 */
948#define KEY_FORMAT_3_5 0
949#define KEY_FORMAT_3_6 1
950
951/* there are two stat datas */
952#define STAT_DATA_V1 0
953#define STAT_DATA_V2 1
954
955static inline struct reiserfs_inode_info *REISERFS_I(const struct inode *inode)
956{
957 return container_of(inode, struct reiserfs_inode_info, vfs_inode);
958}
959
960static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb)
961{
962 return sb->s_fs_info;
963}
964
965/* Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16
966 * which overflows on large file systems. */
967static inline __u32 reiserfs_bmap_count(struct super_block *sb)
968{
969 return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1;
970}
971
972static inline int bmap_would_wrap(unsigned bmap_nr)
973{
974 return bmap_nr > ((1LL << 16) - 1);
975}
976
977/** this says about version of key of all items (but stat data) the
978 object consists of */
979#define get_inode_item_key_version( inode ) \
980 ((REISERFS_I(inode)->i_flags & i_item_key_version_mask) ? KEY_FORMAT_3_6 : KEY_FORMAT_3_5)
981
982#define set_inode_item_key_version( inode, version ) \
983 ({ if((version)==KEY_FORMAT_3_6) \
984 REISERFS_I(inode)->i_flags |= i_item_key_version_mask; \
985 else \
986 REISERFS_I(inode)->i_flags &= ~i_item_key_version_mask; })
987
988#define get_inode_sd_version(inode) \
989 ((REISERFS_I(inode)->i_flags & i_stat_data_version_mask) ? STAT_DATA_V2 : STAT_DATA_V1)
990
991#define set_inode_sd_version(inode, version) \
992 ({ if((version)==STAT_DATA_V2) \
993 REISERFS_I(inode)->i_flags |= i_stat_data_version_mask; \
994 else \
995 REISERFS_I(inode)->i_flags &= ~i_stat_data_version_mask; })
996
997/* This is an aggressive tail suppression policy, I am hoping it
998 improves our benchmarks. The principle behind it is that percentage
999 space saving is what matters, not absolute space saving. This is
1000 non-intuitive, but it helps to understand it if you consider that the
1001 cost to access 4 blocks is not much more than the cost to access 1
1002 block, if you have to do a seek and rotate. A tail risks a
1003 non-linear disk access that is significant as a percentage of total
1004 time cost for a 4 block file and saves an amount of space that is
1005 less significant as a percentage of space, or so goes the hypothesis.
1006 -Hans */
1007#define STORE_TAIL_IN_UNFM_S1(n_file_size,n_tail_size,n_block_size) \
1008(\
1009 (!(n_tail_size)) || \
1010 (((n_tail_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) || \
1011 ( (n_file_size) >= (n_block_size) * 4 ) || \
1012 ( ( (n_file_size) >= (n_block_size) * 3 ) && \
1013 ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size))/4) ) || \
1014 ( ( (n_file_size) >= (n_block_size) * 2 ) && \
1015 ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size))/2) ) || \
1016 ( ( (n_file_size) >= (n_block_size) ) && \
1017 ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \
1018)
1019
1020/* Another strategy for tails, this one means only create a tail if all the
1021 file would fit into one DIRECT item.
1022 Primary intention for this one is to increase performance by decreasing
1023 seeking.
1024*/
1025#define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \
1026(\
1027 (!(n_tail_size)) || \
1028 (((n_file_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) ) \
1029)
1030
1031/*
1032 * values for s_umount_state field
1033 */
1034#define REISERFS_VALID_FS 1
1035#define REISERFS_ERROR_FS 2
1036
1037//
1038// there are 5 item types currently
1039//
1040#define TYPE_STAT_DATA 0
1041#define TYPE_INDIRECT 1
1042#define TYPE_DIRECT 2
1043#define TYPE_DIRENTRY 3
1044#define TYPE_MAXTYPE 3
1045#define TYPE_ANY 15 // FIXME: comment is required
1046
1047/***************************************************************************/
1048/* KEY & ITEM HEAD */
1049/***************************************************************************/
1050
1051//
1052// directories use this key as well as old files
1053//
1054struct offset_v1 {
1055 __le32 k_offset;
1056 __le32 k_uniqueness;
1057} __attribute__ ((__packed__));
1058
1059struct offset_v2 {
1060 __le64 v;
1061} __attribute__ ((__packed__));
1062
1063static inline __u16 offset_v2_k_type(const struct offset_v2 *v2)
1064{
1065 __u8 type = le64_to_cpu(v2->v) >> 60;
1066 return (type <= TYPE_MAXTYPE) ? type : TYPE_ANY;
1067}
1068
1069static inline void set_offset_v2_k_type(struct offset_v2 *v2, int type)
1070{
1071 v2->v =
1072 (v2->v & cpu_to_le64(~0ULL >> 4)) | cpu_to_le64((__u64) type << 60);
1073}
1074
1075static inline loff_t offset_v2_k_offset(const struct offset_v2 *v2)
1076{
1077 return le64_to_cpu(v2->v) & (~0ULL >> 4);
1078}
1079
1080static inline void set_offset_v2_k_offset(struct offset_v2 *v2, loff_t offset)
1081{
1082 offset &= (~0ULL >> 4);
1083 v2->v = (v2->v & cpu_to_le64(15ULL << 60)) | cpu_to_le64(offset);
1084}
1085
1086/* Key of an item determines its location in the S+tree, and
1087 is composed of 4 components */
1088struct reiserfs_key {
1089 __le32 k_dir_id; /* packing locality: by default parent
1090 directory object id */
1091 __le32 k_objectid; /* object identifier */
1092 union {
1093 struct offset_v1 k_offset_v1;
1094 struct offset_v2 k_offset_v2;
1095 } __attribute__ ((__packed__)) u;
1096} __attribute__ ((__packed__));
1097
1098struct in_core_key {
1099 __u32 k_dir_id; /* packing locality: by default parent
1100 directory object id */
1101 __u32 k_objectid; /* object identifier */
1102 __u64 k_offset;
1103 __u8 k_type;
1104};
1105
1106struct cpu_key {
1107 struct in_core_key on_disk_key;
1108 int version;
1109 int key_length; /* 3 in all cases but direct2indirect and
1110 indirect2direct conversion */
1111};
1112
1113/* Our function for comparing keys can compare keys of different
1114 lengths. It takes as a parameter the length of the keys it is to
1115 compare. These defines are used in determining what is to be passed
1116 to it as that parameter. */
1117#define REISERFS_FULL_KEY_LEN 4
1118#define REISERFS_SHORT_KEY_LEN 2
1119
1120/* The result of the key compare */
1121#define FIRST_GREATER 1
1122#define SECOND_GREATER -1
1123#define KEYS_IDENTICAL 0
1124#define KEY_FOUND 1
1125#define KEY_NOT_FOUND 0
1126
1127#define KEY_SIZE (sizeof(struct reiserfs_key))
1128#define SHORT_KEY_SIZE (sizeof (__u32) + sizeof (__u32))
1129
1130/* return values for search_by_key and clones */
1131#define ITEM_FOUND 1
1132#define ITEM_NOT_FOUND 0
1133#define ENTRY_FOUND 1
1134#define ENTRY_NOT_FOUND 0
1135#define DIRECTORY_NOT_FOUND -1
1136#define REGULAR_FILE_FOUND -2
1137#define DIRECTORY_FOUND -3
1138#define BYTE_FOUND 1
1139#define BYTE_NOT_FOUND 0
1140#define FILE_NOT_FOUND -1
1141
1142#define POSITION_FOUND 1
1143#define POSITION_NOT_FOUND 0
1144
1145// return values for reiserfs_find_entry and search_by_entry_key
1146#define NAME_FOUND 1
1147#define NAME_NOT_FOUND 0
1148#define GOTO_PREVIOUS_ITEM 2
1149#define NAME_FOUND_INVISIBLE 3
1150
1151/* Everything in the filesystem is stored as a set of items. The
1152 item head contains the key of the item, its free space (for
1153 indirect items) and specifies the location of the item itself
1154 within the block. */
1155
1156struct item_head {
1157 /* Everything in the tree is found by searching for it based on
1158 * its key.*/
1159 struct reiserfs_key ih_key;
1160 union {
1161 /* The free space in the last unformatted node of an
1162 indirect item if this is an indirect item. This
1163 equals 0xFFFF iff this is a direct item or stat data
1164 item. Note that the key, not this field, is used to
1165 determine the item type, and thus which field this
1166 union contains. */
1167 __le16 ih_free_space_reserved;
1168 /* Iff this is a directory item, this field equals the
1169 number of directory entries in the directory item. */
1170 __le16 ih_entry_count;
1171 } __attribute__ ((__packed__)) u;
1172 __le16 ih_item_len; /* total size of the item body */
1173 __le16 ih_item_location; /* an offset to the item body
1174 * within the block */
1175 __le16 ih_version; /* 0 for all old items, 2 for new
1176 ones. Highest bit is set by fsck
1177 temporary, cleaned after all
1178 done */
1179} __attribute__ ((__packed__));
1180/* size of item header */
1181#define IH_SIZE (sizeof(struct item_head))
1182
1183#define ih_free_space(ih) le16_to_cpu((ih)->u.ih_free_space_reserved)
1184#define ih_version(ih) le16_to_cpu((ih)->ih_version)
1185#define ih_entry_count(ih) le16_to_cpu((ih)->u.ih_entry_count)
1186#define ih_location(ih) le16_to_cpu((ih)->ih_item_location)
1187#define ih_item_len(ih) le16_to_cpu((ih)->ih_item_len)
1188
1189#define put_ih_free_space(ih, val) do { (ih)->u.ih_free_space_reserved = cpu_to_le16(val); } while(0)
1190#define put_ih_version(ih, val) do { (ih)->ih_version = cpu_to_le16(val); } while (0)
1191#define put_ih_entry_count(ih, val) do { (ih)->u.ih_entry_count = cpu_to_le16(val); } while (0)
1192#define put_ih_location(ih, val) do { (ih)->ih_item_location = cpu_to_le16(val); } while (0)
1193#define put_ih_item_len(ih, val) do { (ih)->ih_item_len = cpu_to_le16(val); } while (0)
1194
1195#define unreachable_item(ih) (ih_version(ih) & (1 << 15))
1196
1197#define get_ih_free_space(ih) (ih_version (ih) == KEY_FORMAT_3_6 ? 0 : ih_free_space (ih))
1198#define set_ih_free_space(ih,val) put_ih_free_space((ih), ((ih_version(ih) == KEY_FORMAT_3_6) ? 0 : (val)))
1199
1200/* these operate on indirect items, where you've got an array of ints
1201** at a possibly unaligned location. These are a noop on ia32
1202**
1203** p is the array of __u32, i is the index into the array, v is the value
1204** to store there.
1205*/
1206#define get_block_num(p, i) get_unaligned_le32((p) + (i))
1207#define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i))
1208
1209//
1210// in old version uniqueness field shows key type
1211//
1212#define V1_SD_UNIQUENESS 0
1213#define V1_INDIRECT_UNIQUENESS 0xfffffffe
1214#define V1_DIRECT_UNIQUENESS 0xffffffff
1215#define V1_DIRENTRY_UNIQUENESS 500
1216#define V1_ANY_UNIQUENESS 555 // FIXME: comment is required
1217
1218//
1219// here are conversion routines
1220//
1221static inline int uniqueness2type(__u32 uniqueness) CONSTF;
1222static inline int uniqueness2type(__u32 uniqueness)
1223{
1224 switch ((int)uniqueness) {
1225 case V1_SD_UNIQUENESS:
1226 return TYPE_STAT_DATA;
1227 case V1_INDIRECT_UNIQUENESS:
1228 return TYPE_INDIRECT;
1229 case V1_DIRECT_UNIQUENESS:
1230 return TYPE_DIRECT;
1231 case V1_DIRENTRY_UNIQUENESS:
1232 return TYPE_DIRENTRY;
1233 case V1_ANY_UNIQUENESS:
1234 default:
1235 return TYPE_ANY;
1236 }
1237}
1238
1239static inline __u32 type2uniqueness(int type) CONSTF;
1240static inline __u32 type2uniqueness(int type)
1241{
1242 switch (type) {
1243 case TYPE_STAT_DATA:
1244 return V1_SD_UNIQUENESS;
1245 case TYPE_INDIRECT:
1246 return V1_INDIRECT_UNIQUENESS;
1247 case TYPE_DIRECT:
1248 return V1_DIRECT_UNIQUENESS;
1249 case TYPE_DIRENTRY:
1250 return V1_DIRENTRY_UNIQUENESS;
1251 case TYPE_ANY:
1252 default:
1253 return V1_ANY_UNIQUENESS;
1254 }
1255}
1256
1257//
1258// key is pointer to on disk key which is stored in le, result is cpu,
1259// there is no way to get version of object from key, so, provide
1260// version to these defines
1261//
1262static inline loff_t le_key_k_offset(int version,
1263 const struct reiserfs_key *key)
1264{
1265 return (version == KEY_FORMAT_3_5) ?
1266 le32_to_cpu(key->u.k_offset_v1.k_offset) :
1267 offset_v2_k_offset(&(key->u.k_offset_v2));
1268}
1269
1270static inline loff_t le_ih_k_offset(const struct item_head *ih)
1271{
1272 return le_key_k_offset(ih_version(ih), &(ih->ih_key));
1273}
1274
1275static inline loff_t le_key_k_type(int version, const struct reiserfs_key *key)
1276{
1277 return (version == KEY_FORMAT_3_5) ?
1278 uniqueness2type(le32_to_cpu(key->u.k_offset_v1.k_uniqueness)) :
1279 offset_v2_k_type(&(key->u.k_offset_v2));
1280}
1281
1282static inline loff_t le_ih_k_type(const struct item_head *ih)
1283{
1284 return le_key_k_type(ih_version(ih), &(ih->ih_key));
1285}
1286
1287static inline void set_le_key_k_offset(int version, struct reiserfs_key *key,
1288 loff_t offset)
1289{
1290 (version == KEY_FORMAT_3_5) ? (void)(key->u.k_offset_v1.k_offset = cpu_to_le32(offset)) : /* jdm check */
1291 (void)(set_offset_v2_k_offset(&(key->u.k_offset_v2), offset));
1292}
1293
1294static inline void set_le_ih_k_offset(struct item_head *ih, loff_t offset)
1295{
1296 set_le_key_k_offset(ih_version(ih), &(ih->ih_key), offset);
1297}
1298
1299static inline void set_le_key_k_type(int version, struct reiserfs_key *key,
1300 int type)
1301{
1302 (version == KEY_FORMAT_3_5) ?
1303 (void)(key->u.k_offset_v1.k_uniqueness =
1304 cpu_to_le32(type2uniqueness(type)))
1305 : (void)(set_offset_v2_k_type(&(key->u.k_offset_v2), type));
1306}
1307
1308static inline void set_le_ih_k_type(struct item_head *ih, int type)
1309{
1310 set_le_key_k_type(ih_version(ih), &(ih->ih_key), type);
1311}
1312
1313static inline int is_direntry_le_key(int version, struct reiserfs_key *key)
1314{
1315 return le_key_k_type(version, key) == TYPE_DIRENTRY;
1316}
1317
1318static inline int is_direct_le_key(int version, struct reiserfs_key *key)
1319{
1320 return le_key_k_type(version, key) == TYPE_DIRECT;
1321}
1322
1323static inline int is_indirect_le_key(int version, struct reiserfs_key *key)
1324{
1325 return le_key_k_type(version, key) == TYPE_INDIRECT;
1326}
1327
1328static inline int is_statdata_le_key(int version, struct reiserfs_key *key)
1329{
1330 return le_key_k_type(version, key) == TYPE_STAT_DATA;
1331}
1332
1333//
1334// item header has version.
1335//
1336static inline int is_direntry_le_ih(struct item_head *ih)
1337{
1338 return is_direntry_le_key(ih_version(ih), &ih->ih_key);
1339}
1340
1341static inline int is_direct_le_ih(struct item_head *ih)
1342{
1343 return is_direct_le_key(ih_version(ih), &ih->ih_key);
1344}
1345
1346static inline int is_indirect_le_ih(struct item_head *ih)
1347{
1348 return is_indirect_le_key(ih_version(ih), &ih->ih_key);
1349}
1350
1351static inline int is_statdata_le_ih(struct item_head *ih)
1352{
1353 return is_statdata_le_key(ih_version(ih), &ih->ih_key);
1354}
1355
1356//
1357// key is pointer to cpu key, result is cpu
1358//
1359static inline loff_t cpu_key_k_offset(const struct cpu_key *key)
1360{
1361 return key->on_disk_key.k_offset;
1362}
1363
1364static inline loff_t cpu_key_k_type(const struct cpu_key *key)
1365{
1366 return key->on_disk_key.k_type;
1367}
1368
1369static inline void set_cpu_key_k_offset(struct cpu_key *key, loff_t offset)
1370{
1371 key->on_disk_key.k_offset = offset;
1372}
1373
1374static inline void set_cpu_key_k_type(struct cpu_key *key, int type)
1375{
1376 key->on_disk_key.k_type = type;
1377}
1378
1379static inline void cpu_key_k_offset_dec(struct cpu_key *key)
1380{
1381 key->on_disk_key.k_offset--;
1382}
1383
1384#define is_direntry_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRENTRY)
1385#define is_direct_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRECT)
1386#define is_indirect_cpu_key(key) (cpu_key_k_type (key) == TYPE_INDIRECT)
1387#define is_statdata_cpu_key(key) (cpu_key_k_type (key) == TYPE_STAT_DATA)
1388
1389/* are these used ? */
1390#define is_direntry_cpu_ih(ih) (is_direntry_cpu_key (&((ih)->ih_key)))
1391#define is_direct_cpu_ih(ih) (is_direct_cpu_key (&((ih)->ih_key)))
1392#define is_indirect_cpu_ih(ih) (is_indirect_cpu_key (&((ih)->ih_key)))
1393#define is_statdata_cpu_ih(ih) (is_statdata_cpu_key (&((ih)->ih_key)))
1394
1395#define I_K_KEY_IN_ITEM(ih, key, n_blocksize) \
1396 (!COMP_SHORT_KEYS(ih, key) && \
1397 I_OFF_BYTE_IN_ITEM(ih, k_offset(key), n_blocksize))
1398
1399/* maximal length of item */
1400#define MAX_ITEM_LEN(block_size) (block_size - BLKH_SIZE - IH_SIZE)
1401#define MIN_ITEM_LEN 1
1402
1403/* object identifier for root dir */
1404#define REISERFS_ROOT_OBJECTID 2
1405#define REISERFS_ROOT_PARENT_OBJECTID 1
1406
1407extern struct reiserfs_key root_key;
1408
1409/*
1410 * Picture represents a leaf of the S+tree
1411 * ______________________________________________________
1412 * | | Array of | | |
1413 * |Block | Object-Item | F r e e | Objects- |
1414 * | head | Headers | S p a c e | Items |
1415 * |______|_______________|___________________|___________|
1416 */
1417
1418/* Header of a disk block. More precisely, header of a formatted leaf
1419 or internal node, and not the header of an unformatted node. */
1420struct block_head {
1421 __le16 blk_level; /* Level of a block in the tree. */
1422 __le16 blk_nr_item; /* Number of keys/items in a block. */
1423 __le16 blk_free_space; /* Block free space in bytes. */
1424 __le16 blk_reserved;
1425 /* dump this in v4/planA */
1426 struct reiserfs_key blk_right_delim_key; /* kept only for compatibility */
1427};
1428
1429#define BLKH_SIZE (sizeof(struct block_head))
1430#define blkh_level(p_blkh) (le16_to_cpu((p_blkh)->blk_level))
1431#define blkh_nr_item(p_blkh) (le16_to_cpu((p_blkh)->blk_nr_item))
1432#define blkh_free_space(p_blkh) (le16_to_cpu((p_blkh)->blk_free_space))
1433#define blkh_reserved(p_blkh) (le16_to_cpu((p_blkh)->blk_reserved))
1434#define set_blkh_level(p_blkh,val) ((p_blkh)->blk_level = cpu_to_le16(val))
1435#define set_blkh_nr_item(p_blkh,val) ((p_blkh)->blk_nr_item = cpu_to_le16(val))
1436#define set_blkh_free_space(p_blkh,val) ((p_blkh)->blk_free_space = cpu_to_le16(val))
1437#define set_blkh_reserved(p_blkh,val) ((p_blkh)->blk_reserved = cpu_to_le16(val))
1438#define blkh_right_delim_key(p_blkh) ((p_blkh)->blk_right_delim_key)
1439#define set_blkh_right_delim_key(p_blkh,val) ((p_blkh)->blk_right_delim_key = val)
1440
1441/*
1442 * values for blk_level field of the struct block_head
1443 */
1444
1445#define FREE_LEVEL 0 /* when node gets removed from the tree its
1446 blk_level is set to FREE_LEVEL. It is then
1447 used to see whether the node is still in the
1448 tree */
1449
1450#define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */
1451
1452/* Given the buffer head of a formatted node, resolve to the block head of that node. */
1453#define B_BLK_HEAD(bh) ((struct block_head *)((bh)->b_data))
1454/* Number of items that are in buffer. */
1455#define B_NR_ITEMS(bh) (blkh_nr_item(B_BLK_HEAD(bh)))
1456#define B_LEVEL(bh) (blkh_level(B_BLK_HEAD(bh)))
1457#define B_FREE_SPACE(bh) (blkh_free_space(B_BLK_HEAD(bh)))
1458
1459#define PUT_B_NR_ITEMS(bh, val) do { set_blkh_nr_item(B_BLK_HEAD(bh), val); } while (0)
1460#define PUT_B_LEVEL(bh, val) do { set_blkh_level(B_BLK_HEAD(bh), val); } while (0)
1461#define PUT_B_FREE_SPACE(bh, val) do { set_blkh_free_space(B_BLK_HEAD(bh), val); } while (0)
1462
1463/* Get right delimiting key. -- little endian */
1464#define B_PRIGHT_DELIM_KEY(bh) (&(blk_right_delim_key(B_BLK_HEAD(bh))))
1465
1466/* Does the buffer contain a disk leaf. */
1467#define B_IS_ITEMS_LEVEL(bh) (B_LEVEL(bh) == DISK_LEAF_NODE_LEVEL)
1468
1469/* Does the buffer contain a disk internal node */
1470#define B_IS_KEYS_LEVEL(bh) (B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL \
1471 && B_LEVEL(bh) <= MAX_HEIGHT)
1472
1473/***************************************************************************/
1474/* STAT DATA */
1475/***************************************************************************/
1476
1477//
1478// old stat data is 32 bytes long. We are going to distinguish new one by
1479// different size
1480//
1481struct stat_data_v1 {
1482 __le16 sd_mode; /* file type, permissions */
1483 __le16 sd_nlink; /* number of hard links */
1484 __le16 sd_uid; /* owner */
1485 __le16 sd_gid; /* group */
1486 __le32 sd_size; /* file size */
1487 __le32 sd_atime; /* time of last access */
1488 __le32 sd_mtime; /* time file was last modified */
1489 __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */
1490 union {
1491 __le32 sd_rdev;
1492 __le32 sd_blocks; /* number of blocks file uses */
1493 } __attribute__ ((__packed__)) u;
1494 __le32 sd_first_direct_byte; /* first byte of file which is stored
1495 in a direct item: except that if it
1496 equals 1 it is a symlink and if it
1497 equals ~(__u32)0 there is no
1498 direct item. The existence of this
1499 field really grates on me. Let's
1500 replace it with a macro based on
1501 sd_size and our tail suppression
1502 policy. Someday. -Hans */
1503} __attribute__ ((__packed__));
1504
1505#define SD_V1_SIZE (sizeof(struct stat_data_v1))
1506#define stat_data_v1(ih) (ih_version (ih) == KEY_FORMAT_3_5)
1507#define sd_v1_mode(sdp) (le16_to_cpu((sdp)->sd_mode))
1508#define set_sd_v1_mode(sdp,v) ((sdp)->sd_mode = cpu_to_le16(v))
1509#define sd_v1_nlink(sdp) (le16_to_cpu((sdp)->sd_nlink))
1510#define set_sd_v1_nlink(sdp,v) ((sdp)->sd_nlink = cpu_to_le16(v))
1511#define sd_v1_uid(sdp) (le16_to_cpu((sdp)->sd_uid))
1512#define set_sd_v1_uid(sdp,v) ((sdp)->sd_uid = cpu_to_le16(v))
1513#define sd_v1_gid(sdp) (le16_to_cpu((sdp)->sd_gid))
1514#define set_sd_v1_gid(sdp,v) ((sdp)->sd_gid = cpu_to_le16(v))
1515#define sd_v1_size(sdp) (le32_to_cpu((sdp)->sd_size))
1516#define set_sd_v1_size(sdp,v) ((sdp)->sd_size = cpu_to_le32(v))
1517#define sd_v1_atime(sdp) (le32_to_cpu((sdp)->sd_atime))
1518#define set_sd_v1_atime(sdp,v) ((sdp)->sd_atime = cpu_to_le32(v))
1519#define sd_v1_mtime(sdp) (le32_to_cpu((sdp)->sd_mtime))
1520#define set_sd_v1_mtime(sdp,v) ((sdp)->sd_mtime = cpu_to_le32(v))
1521#define sd_v1_ctime(sdp) (le32_to_cpu((sdp)->sd_ctime))
1522#define set_sd_v1_ctime(sdp,v) ((sdp)->sd_ctime = cpu_to_le32(v))
1523#define sd_v1_rdev(sdp) (le32_to_cpu((sdp)->u.sd_rdev))
1524#define set_sd_v1_rdev(sdp,v) ((sdp)->u.sd_rdev = cpu_to_le32(v))
1525#define sd_v1_blocks(sdp) (le32_to_cpu((sdp)->u.sd_blocks))
1526#define set_sd_v1_blocks(sdp,v) ((sdp)->u.sd_blocks = cpu_to_le32(v))
1527#define sd_v1_first_direct_byte(sdp) \
1528 (le32_to_cpu((sdp)->sd_first_direct_byte))
1529#define set_sd_v1_first_direct_byte(sdp,v) \
1530 ((sdp)->sd_first_direct_byte = cpu_to_le32(v))
1531
1532/* inode flags stored in sd_attrs (nee sd_reserved) */
1533
1534/* we want common flags to have the same values as in ext2,
1535 so chattr(1) will work without problems */
1536#define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL
1537#define REISERFS_APPEND_FL FS_APPEND_FL
1538#define REISERFS_SYNC_FL FS_SYNC_FL
1539#define REISERFS_NOATIME_FL FS_NOATIME_FL
1540#define REISERFS_NODUMP_FL FS_NODUMP_FL
1541#define REISERFS_SECRM_FL FS_SECRM_FL
1542#define REISERFS_UNRM_FL FS_UNRM_FL
1543#define REISERFS_COMPR_FL FS_COMPR_FL
1544#define REISERFS_NOTAIL_FL FS_NOTAIL_FL
1545
1546/* persistent flags that file inherits from the parent directory */
1547#define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \
1548 REISERFS_SYNC_FL | \
1549 REISERFS_NOATIME_FL | \
1550 REISERFS_NODUMP_FL | \
1551 REISERFS_SECRM_FL | \
1552 REISERFS_COMPR_FL | \
1553 REISERFS_NOTAIL_FL )
1554
1555/* Stat Data on disk (reiserfs version of UFS disk inode minus the
1556 address blocks) */
1557struct stat_data {
1558 __le16 sd_mode; /* file type, permissions */
1559 __le16 sd_attrs; /* persistent inode flags */
1560 __le32 sd_nlink; /* number of hard links */
1561 __le64 sd_size; /* file size */
1562 __le32 sd_uid; /* owner */
1563 __le32 sd_gid; /* group */
1564 __le32 sd_atime; /* time of last access */
1565 __le32 sd_mtime; /* time file was last modified */
1566 __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */
1567 __le32 sd_blocks;
1568 union {
1569 __le32 sd_rdev;
1570 __le32 sd_generation;
1571 //__le32 sd_first_direct_byte;
1572 /* first byte of file which is stored in a
1573 direct item: except that if it equals 1
1574 it is a symlink and if it equals
1575 ~(__u32)0 there is no direct item. The
1576 existence of this field really grates
1577 on me. Let's replace it with a macro
1578 based on sd_size and our tail
1579 suppression policy? */
1580 } __attribute__ ((__packed__)) u;
1581} __attribute__ ((__packed__));
1582//
1583// this is 44 bytes long
1584//
1585#define SD_SIZE (sizeof(struct stat_data))
1586#define SD_V2_SIZE SD_SIZE
1587#define stat_data_v2(ih) (ih_version (ih) == KEY_FORMAT_3_6)
1588#define sd_v2_mode(sdp) (le16_to_cpu((sdp)->sd_mode))
1589#define set_sd_v2_mode(sdp,v) ((sdp)->sd_mode = cpu_to_le16(v))
1590/* sd_reserved */
1591/* set_sd_reserved */
1592#define sd_v2_nlink(sdp) (le32_to_cpu((sdp)->sd_nlink))
1593#define set_sd_v2_nlink(sdp,v) ((sdp)->sd_nlink = cpu_to_le32(v))
1594#define sd_v2_size(sdp) (le64_to_cpu((sdp)->sd_size))
1595#define set_sd_v2_size(sdp,v) ((sdp)->sd_size = cpu_to_le64(v))
1596#define sd_v2_uid(sdp) (le32_to_cpu((sdp)->sd_uid))
1597#define set_sd_v2_uid(sdp,v) ((sdp)->sd_uid = cpu_to_le32(v))
1598#define sd_v2_gid(sdp) (le32_to_cpu((sdp)->sd_gid))
1599#define set_sd_v2_gid(sdp,v) ((sdp)->sd_gid = cpu_to_le32(v))
1600#define sd_v2_atime(sdp) (le32_to_cpu((sdp)->sd_atime))
1601#define set_sd_v2_atime(sdp,v) ((sdp)->sd_atime = cpu_to_le32(v))
1602#define sd_v2_mtime(sdp) (le32_to_cpu((sdp)->sd_mtime))
1603#define set_sd_v2_mtime(sdp,v) ((sdp)->sd_mtime = cpu_to_le32(v))
1604#define sd_v2_ctime(sdp) (le32_to_cpu((sdp)->sd_ctime))
1605#define set_sd_v2_ctime(sdp,v) ((sdp)->sd_ctime = cpu_to_le32(v))
1606#define sd_v2_blocks(sdp) (le32_to_cpu((sdp)->sd_blocks))
1607#define set_sd_v2_blocks(sdp,v) ((sdp)->sd_blocks = cpu_to_le32(v))
1608#define sd_v2_rdev(sdp) (le32_to_cpu((sdp)->u.sd_rdev))
1609#define set_sd_v2_rdev(sdp,v) ((sdp)->u.sd_rdev = cpu_to_le32(v))
1610#define sd_v2_generation(sdp) (le32_to_cpu((sdp)->u.sd_generation))
1611#define set_sd_v2_generation(sdp,v) ((sdp)->u.sd_generation = cpu_to_le32(v))
1612#define sd_v2_attrs(sdp) (le16_to_cpu((sdp)->sd_attrs))
1613#define set_sd_v2_attrs(sdp,v) ((sdp)->sd_attrs = cpu_to_le16(v))
1614
1615/***************************************************************************/
1616/* DIRECTORY STRUCTURE */
1617/***************************************************************************/
1618/*
1619 Picture represents the structure of directory items
1620 ________________________________________________
1621 | Array of | | | | | |
1622 | directory |N-1| N-2 | .... | 1st |0th|
1623 | entry headers | | | | | |
1624 |_______________|___|_____|________|_______|___|
1625 <---- directory entries ------>
1626
1627 First directory item has k_offset component 1. We store "." and ".."
1628 in one item, always, we never split "." and ".." into differing
1629 items. This makes, among other things, the code for removing
1630 directories simpler. */
1631#define SD_OFFSET 0
1632#define SD_UNIQUENESS 0
1633#define DOT_OFFSET 1
1634#define DOT_DOT_OFFSET 2
1635#define DIRENTRY_UNIQUENESS 500
1636
1637/* */
1638#define FIRST_ITEM_OFFSET 1
1639
1640/*
1641 Q: How to get key of object pointed to by entry from entry?
1642
1643 A: Each directory entry has its header. This header has deh_dir_id and deh_objectid fields, those are key
1644 of object, entry points to */
1645
1646/* NOT IMPLEMENTED:
1647 Directory will someday contain stat data of object */
1648
1649struct reiserfs_de_head {
1650 __le32 deh_offset; /* third component of the directory entry key */
1651 __le32 deh_dir_id; /* objectid of the parent directory of the object, that is referenced
1652 by directory entry */
1653 __le32 deh_objectid; /* objectid of the object, that is referenced by directory entry */
1654 __le16 deh_location; /* offset of name in the whole item */
1655 __le16 deh_state; /* whether 1) entry contains stat data (for future), and 2) whether
1656 entry is hidden (unlinked) */
1657} __attribute__ ((__packed__));
1658#define DEH_SIZE sizeof(struct reiserfs_de_head)
1659#define deh_offset(p_deh) (le32_to_cpu((p_deh)->deh_offset))
1660#define deh_dir_id(p_deh) (le32_to_cpu((p_deh)->deh_dir_id))
1661#define deh_objectid(p_deh) (le32_to_cpu((p_deh)->deh_objectid))
1662#define deh_location(p_deh) (le16_to_cpu((p_deh)->deh_location))
1663#define deh_state(p_deh) (le16_to_cpu((p_deh)->deh_state))
1664
1665#define put_deh_offset(p_deh,v) ((p_deh)->deh_offset = cpu_to_le32((v)))
1666#define put_deh_dir_id(p_deh,v) ((p_deh)->deh_dir_id = cpu_to_le32((v)))
1667#define put_deh_objectid(p_deh,v) ((p_deh)->deh_objectid = cpu_to_le32((v)))
1668#define put_deh_location(p_deh,v) ((p_deh)->deh_location = cpu_to_le16((v)))
1669#define put_deh_state(p_deh,v) ((p_deh)->deh_state = cpu_to_le16((v)))
1670
1671/* empty directory contains two entries "." and ".." and their headers */
1672#define EMPTY_DIR_SIZE \
1673(DEH_SIZE * 2 + ROUND_UP (strlen (".")) + ROUND_UP (strlen ("..")))
1674
1675/* old format directories have this size when empty */
1676#define EMPTY_DIR_SIZE_V1 (DEH_SIZE * 2 + 3)
1677
1678#define DEH_Statdata 0 /* not used now */
1679#define DEH_Visible 2
1680
1681/* 64 bit systems (and the S/390) need to be aligned explicitly -jdm */
1682#if BITS_PER_LONG == 64 || defined(__s390__) || defined(__hppa__)
1683# define ADDR_UNALIGNED_BITS (3)
1684#endif
1685
1686/* These are only used to manipulate deh_state.
1687 * Because of this, we'll use the ext2_ bit routines,
1688 * since they are little endian */
1689#ifdef ADDR_UNALIGNED_BITS
1690
1691# define aligned_address(addr) ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1)))
1692# define unaligned_offset(addr) (((int)((long)(addr) & ((1 << ADDR_UNALIGNED_BITS) - 1))) << 3)
1693
1694# define set_bit_unaligned(nr, addr) \
1695 __test_and_set_bit_le((nr) + unaligned_offset(addr), aligned_address(addr))
1696# define clear_bit_unaligned(nr, addr) \
1697 __test_and_clear_bit_le((nr) + unaligned_offset(addr), aligned_address(addr))
1698# define test_bit_unaligned(nr, addr) \
1699 test_bit_le((nr) + unaligned_offset(addr), aligned_address(addr))
1700
1701#else
1702
1703# define set_bit_unaligned(nr, addr) __test_and_set_bit_le(nr, addr)
1704# define clear_bit_unaligned(nr, addr) __test_and_clear_bit_le(nr, addr)
1705# define test_bit_unaligned(nr, addr) test_bit_le(nr, addr)
1706
1707#endif
1708
1709#define mark_de_with_sd(deh) set_bit_unaligned (DEH_Statdata, &((deh)->deh_state))
1710#define mark_de_without_sd(deh) clear_bit_unaligned (DEH_Statdata, &((deh)->deh_state))
1711#define mark_de_visible(deh) set_bit_unaligned (DEH_Visible, &((deh)->deh_state))
1712#define mark_de_hidden(deh) clear_bit_unaligned (DEH_Visible, &((deh)->deh_state))
1713
1714#define de_with_sd(deh) test_bit_unaligned (DEH_Statdata, &((deh)->deh_state))
1715#define de_visible(deh) test_bit_unaligned (DEH_Visible, &((deh)->deh_state))
1716#define de_hidden(deh) !test_bit_unaligned (DEH_Visible, &((deh)->deh_state))
1717
1718extern void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid,
1719 __le32 par_dirid, __le32 par_objid);
1720extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid,
1721 __le32 par_dirid, __le32 par_objid);
1722
1723/* array of the entry headers */
1724 /* get item body */
1725#define B_I_PITEM(bh,ih) ( (bh)->b_data + ih_location(ih) )
1726#define B_I_DEH(bh,ih) ((struct reiserfs_de_head *)(B_I_PITEM(bh,ih)))
1727
1728/* length of the directory entry in directory item. This define
1729 calculates length of i-th directory entry using directory entry
1730 locations from dir entry head. When it calculates length of 0-th
1731 directory entry, it uses length of whole item in place of entry
1732 location of the non-existent following entry in the calculation.
1733 See picture above.*/
1734/*
1735#define I_DEH_N_ENTRY_LENGTH(ih,deh,i) \
1736((i) ? (deh_location((deh)-1) - deh_location((deh))) : (ih_item_len((ih)) - deh_location((deh))))
1737*/
1738static inline int entry_length(const struct buffer_head *bh,
1739 const struct item_head *ih, int pos_in_item)
1740{
1741 struct reiserfs_de_head *deh;
1742
1743 deh = B_I_DEH(bh, ih) + pos_in_item;
1744 if (pos_in_item)
1745 return deh_location(deh - 1) - deh_location(deh);
1746
1747 return ih_item_len(ih) - deh_location(deh);
1748}
1749
1750/* number of entries in the directory item, depends on ENTRY_COUNT being at the start of directory dynamic data. */
1751#define I_ENTRY_COUNT(ih) (ih_entry_count((ih)))
1752
1753/* name by bh, ih and entry_num */
1754#define B_I_E_NAME(bh,ih,entry_num) ((char *)(bh->b_data + ih_location(ih) + deh_location(B_I_DEH(bh,ih)+(entry_num))))
1755
1756// two entries per block (at least)
1757#define REISERFS_MAX_NAME(block_size) 255
1758
1759/* this structure is used for operations on directory entries. It is
1760 not a disk structure. */
1761/* When reiserfs_find_entry or search_by_entry_key find directory
1762 entry, they return filled reiserfs_dir_entry structure */
1763struct reiserfs_dir_entry {
1764 struct buffer_head *de_bh;
1765 int de_item_num;
1766 struct item_head *de_ih;
1767 int de_entry_num;
1768 struct reiserfs_de_head *de_deh;
1769 int de_entrylen;
1770 int de_namelen;
1771 char *de_name;
1772 unsigned long *de_gen_number_bit_string;
1773
1774 __u32 de_dir_id;
1775 __u32 de_objectid;
1776
1777 struct cpu_key de_entry_key;
1778};
1779
1780/* these defines are useful when a particular member of a reiserfs_dir_entry is needed */
1781
1782/* pointer to file name, stored in entry */
1783#define B_I_DEH_ENTRY_FILE_NAME(bh,ih,deh) (B_I_PITEM (bh, ih) + deh_location(deh))
1784
1785/* length of name */
1786#define I_DEH_N_ENTRY_FILE_NAME_LENGTH(ih,deh,entry_num) \
1787(I_DEH_N_ENTRY_LENGTH (ih, deh, entry_num) - (de_with_sd (deh) ? SD_SIZE : 0))
1788
1789/* hash value occupies bits from 7 up to 30 */
1790#define GET_HASH_VALUE(offset) ((offset) & 0x7fffff80LL)
1791/* generation number occupies 7 bits starting from 0 up to 6 */
1792#define GET_GENERATION_NUMBER(offset) ((offset) & 0x7fLL)
1793#define MAX_GENERATION_NUMBER 127
1794
1795#define SET_GENERATION_NUMBER(offset,gen_number) (GET_HASH_VALUE(offset)|(gen_number))
1796
1797/*
1798 * Picture represents an internal node of the reiserfs tree
1799 * ______________________________________________________
1800 * | | Array of | Array of | Free |
1801 * |block | keys | pointers | space |
1802 * | head | N | N+1 | |
1803 * |______|_______________|___________________|___________|
1804 */
1805
1806/***************************************************************************/
1807/* DISK CHILD */
1808/***************************************************************************/
1809/* Disk child pointer: The pointer from an internal node of the tree
1810 to a node that is on disk. */
1811struct disk_child {
1812 __le32 dc_block_number; /* Disk child's block number. */
1813 __le16 dc_size; /* Disk child's used space. */
1814 __le16 dc_reserved;
1815};
1816
1817#define DC_SIZE (sizeof(struct disk_child))
1818#define dc_block_number(dc_p) (le32_to_cpu((dc_p)->dc_block_number))
1819#define dc_size(dc_p) (le16_to_cpu((dc_p)->dc_size))
1820#define put_dc_block_number(dc_p, val) do { (dc_p)->dc_block_number = cpu_to_le32(val); } while(0)
1821#define put_dc_size(dc_p, val) do { (dc_p)->dc_size = cpu_to_le16(val); } while(0)
1822
1823/* Get disk child by buffer header and position in the tree node. */
1824#define B_N_CHILD(bh, n_pos) ((struct disk_child *)\
1825((bh)->b_data + BLKH_SIZE + B_NR_ITEMS(bh) * KEY_SIZE + DC_SIZE * (n_pos)))
1826
1827/* Get disk child number by buffer header and position in the tree node. */
1828#define B_N_CHILD_NUM(bh, n_pos) (dc_block_number(B_N_CHILD(bh, n_pos)))
1829#define PUT_B_N_CHILD_NUM(bh, n_pos, val) \
1830 (put_dc_block_number(B_N_CHILD(bh, n_pos), val))
1831
1832 /* maximal value of field child_size in structure disk_child */
1833 /* child size is the combined size of all items and their headers */
1834#define MAX_CHILD_SIZE(bh) ((int)( (bh)->b_size - BLKH_SIZE ))
1835
1836/* amount of used space in buffer (not including block head) */
1837#define B_CHILD_SIZE(cur) (MAX_CHILD_SIZE(cur)-(B_FREE_SPACE(cur)))
1838
1839/* max and min number of keys in internal node */
1840#define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) )
1841#define MIN_NR_KEY(bh) (MAX_NR_KEY(bh)/2)
1842
1843/***************************************************************************/
1844/* PATH STRUCTURES AND DEFINES */
1845/***************************************************************************/
1846
1847/* Search_by_key fills up the path from the root to the leaf as it descends the tree looking for the
1848 key. It uses reiserfs_bread to try to find buffers in the cache given their block number. If it
1849 does not find them in the cache it reads them from disk. For each node search_by_key finds using
1850 reiserfs_bread it then uses bin_search to look through that node. bin_search will find the
1851 position of the block_number of the next node if it is looking through an internal node. If it
1852 is looking through a leaf node bin_search will find the position of the item which has key either
1853 equal to given key, or which is the maximal key less than the given key. */
1854
1855struct path_element {
1856 struct buffer_head *pe_buffer; /* Pointer to the buffer at the path in the tree. */
1857 int pe_position; /* Position in the tree node which is placed in the */
1858 /* buffer above. */
1859};
1860
1861#define MAX_HEIGHT 5 /* maximal height of a tree. don't change this without changing JOURNAL_PER_BALANCE_CNT */
1862#define EXTENDED_MAX_HEIGHT 7 /* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */
1863#define FIRST_PATH_ELEMENT_OFFSET 2 /* Must be equal to at least 2. */
1864
1865#define ILLEGAL_PATH_ELEMENT_OFFSET 1 /* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */
1866#define MAX_FEB_SIZE 6 /* this MUST be MAX_HEIGHT + 1. See about FEB below */
1867
1868/* We need to keep track of who the ancestors of nodes are. When we
1869 perform a search we record which nodes were visited while
1870 descending the tree looking for the node we searched for. This list
1871 of nodes is called the path. This information is used while
1872 performing balancing. Note that this path information may become
1873 invalid, and this means we must check it when using it to see if it
1874 is still valid. You'll need to read search_by_key and the comments
1875 in it, especially about decrement_counters_in_path(), to understand
1876 this structure.
1877
1878Paths make the code so much harder to work with and debug.... An
1879enormous number of bugs are due to them, and trying to write or modify
1880code that uses them just makes my head hurt. They are based on an
1881excessive effort to avoid disturbing the precious VFS code.:-( The
1882gods only know how we are going to SMP the code that uses them.
1883znodes are the way! */
1884
1885#define PATH_READA 0x1 /* do read ahead */
1886#define PATH_READA_BACK 0x2 /* read backwards */
1887
1888struct treepath {
1889 int path_length; /* Length of the array above. */
1890 int reada;
1891 struct path_element path_elements[EXTENDED_MAX_HEIGHT]; /* Array of the path elements. */
1892 int pos_in_item;
1893};
1894
1895#define pos_in_item(path) ((path)->pos_in_item)
1896
1897#define INITIALIZE_PATH(var) \
1898struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,}
1899
1900/* Get path element by path and path position. */
1901#define PATH_OFFSET_PELEMENT(path, n_offset) ((path)->path_elements + (n_offset))
1902
1903/* Get buffer header at the path by path and path position. */
1904#define PATH_OFFSET_PBUFFER(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_buffer)
1905
1906/* Get position in the element at the path by path and path position. */
1907#define PATH_OFFSET_POSITION(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_position)
1908
1909#define PATH_PLAST_BUFFER(path) (PATH_OFFSET_PBUFFER((path), (path)->path_length))
1910 /* you know, to the person who didn't
1911 write this the macro name does not
1912 at first suggest what it does.
1913 Maybe POSITION_FROM_PATH_END? Or
1914 maybe we should just focus on
1915 dumping paths... -Hans */
1916#define PATH_LAST_POSITION(path) (PATH_OFFSET_POSITION((path), (path)->path_length))
1917
1918#define PATH_PITEM_HEAD(path) B_N_PITEM_HEAD(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path))
1919
1920/* in do_balance leaf has h == 0 in contrast with path structure,
1921 where root has level == 0. That is why we need these defines */
1922#define PATH_H_PBUFFER(path, h) PATH_OFFSET_PBUFFER (path, path->path_length - (h)) /* tb->S[h] */
1923#define PATH_H_PPARENT(path, h) PATH_H_PBUFFER (path, (h) + 1) /* tb->F[h] or tb->S[0]->b_parent */
1924#define PATH_H_POSITION(path, h) PATH_OFFSET_POSITION (path, path->path_length - (h))
1925#define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1) /* tb->S[h]->b_item_order */
1926
1927#define PATH_H_PATH_OFFSET(path, n_h) ((path)->path_length - (n_h))
1928
1929#define get_last_bh(path) PATH_PLAST_BUFFER(path)
1930#define get_ih(path) PATH_PITEM_HEAD(path)
1931#define get_item_pos(path) PATH_LAST_POSITION(path)
1932#define get_item(path) ((void *)B_N_PITEM(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION (path)))
1933#define item_moved(ih,path) comp_items(ih, path)
1934#define path_changed(ih,path) comp_items (ih, path)
1935
1936/***************************************************************************/
1937/* MISC */
1938/***************************************************************************/
1939
1940/* Size of pointer to the unformatted node. */
1941#define UNFM_P_SIZE (sizeof(unp_t))
1942#define UNFM_P_SHIFT 2
1943
1944// in in-core inode key is stored on le form
1945#define INODE_PKEY(inode) ((struct reiserfs_key *)(REISERFS_I(inode)->i_key))
1946
1947#define MAX_UL_INT 0xffffffff
1948#define MAX_INT 0x7ffffff
1949#define MAX_US_INT 0xffff
1950
1951// reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset
1952#define U32_MAX (~(__u32)0)
1953
1954static inline loff_t max_reiserfs_offset(struct inode *inode)
1955{
1956 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5)
1957 return (loff_t) U32_MAX;
1958
1959 return (loff_t) ((~(__u64) 0) >> 4);
1960}
1961
1962/*#define MAX_KEY_UNIQUENESS MAX_UL_INT*/
1963#define MAX_KEY_OBJECTID MAX_UL_INT
1964
1965#define MAX_B_NUM MAX_UL_INT
1966#define MAX_FC_NUM MAX_US_INT
1967
1968/* the purpose is to detect overflow of an unsigned short */
1969#define REISERFS_LINK_MAX (MAX_US_INT - 1000)
1970
1971/* The following defines are used in reiserfs_insert_item and reiserfs_append_item */
1972#define REISERFS_KERNEL_MEM 0 /* reiserfs kernel memory mode */
1973#define REISERFS_USER_MEM 1 /* reiserfs user memory mode */
1974
1975#define fs_generation(s) (REISERFS_SB(s)->s_generation_counter)
1976#define get_generation(s) atomic_read (&fs_generation(s))
1977#define FILESYSTEM_CHANGED_TB(tb) (get_generation((tb)->tb_sb) != (tb)->fs_gen)
1978#define __fs_changed(gen,s) (gen != get_generation (s))
1979#define fs_changed(gen,s) \
1980({ \
1981 reiserfs_cond_resched(s); \
1982 __fs_changed(gen, s); \
1983})
1984
1985/***************************************************************************/
1986/* FIXATE NODES */
1987/***************************************************************************/
1988
1989#define VI_TYPE_LEFT_MERGEABLE 1
1990#define VI_TYPE_RIGHT_MERGEABLE 2
1991
1992/* To make any changes in the tree we always first find node, that
1993 contains item to be changed/deleted or place to insert a new
1994 item. We call this node S. To do balancing we need to decide what
1995 we will shift to left/right neighbor, or to a new node, where new
1996 item will be etc. To make this analysis simpler we build virtual
1997 node. Virtual node is an array of items, that will replace items of
1998 node S. (For instance if we are going to delete an item, virtual
1999 node does not contain it). Virtual node keeps information about
2000 item sizes and types, mergeability of first and last items, sizes
2001 of all entries in directory item. We use this array of items when
2002 calculating what we can shift to neighbors and how many nodes we
2003 have to have if we do not any shiftings, if we shift to left/right
2004 neighbor or to both. */
2005struct virtual_item {
2006 int vi_index; // index in the array of item operations
2007 unsigned short vi_type; // left/right mergeability
2008 unsigned short vi_item_len; /* length of item that it will have after balancing */
2009 struct item_head *vi_ih;
2010 const char *vi_item; // body of item (old or new)
2011 const void *vi_new_data; // 0 always but paste mode
2012 void *vi_uarea; // item specific area
2013};
2014
2015struct virtual_node {
2016 char *vn_free_ptr; /* this is a pointer to the free space in the buffer */
2017 unsigned short vn_nr_item; /* number of items in virtual node */
2018 short vn_size; /* size of node , that node would have if it has unlimited size and no balancing is performed */
2019 short vn_mode; /* mode of balancing (paste, insert, delete, cut) */
2020 short vn_affected_item_num;
2021 short vn_pos_in_item;
2022 struct item_head *vn_ins_ih; /* item header of inserted item, 0 for other modes */
2023 const void *vn_data;
2024 struct virtual_item *vn_vi; /* array of items (including a new one, excluding item to be deleted) */
2025};
2026
2027/* used by directory items when creating virtual nodes */
2028struct direntry_uarea {
2029 int flags;
2030 __u16 entry_count;
2031 __u16 entry_sizes[1];
2032} __attribute__ ((__packed__));
2033
2034/***************************************************************************/
2035/* TREE BALANCE */
2036/***************************************************************************/
2037
2038/* This temporary structure is used in tree balance algorithms, and
2039 constructed as we go to the extent that its various parts are
2040 needed. It contains arrays of nodes that can potentially be
2041 involved in the balancing of node S, and parameters that define how
2042 each of the nodes must be balanced. Note that in these algorithms
2043 for balancing the worst case is to need to balance the current node
2044 S and the left and right neighbors and all of their parents plus
2045 create a new node. We implement S1 balancing for the leaf nodes
2046 and S0 balancing for the internal nodes (S1 and S0 are defined in
2047 our papers.)*/
2048
2049#define MAX_FREE_BLOCK 7 /* size of the array of buffers to free at end of do_balance */
2050
2051/* maximum number of FEB blocknrs on a single level */
2052#define MAX_AMOUNT_NEEDED 2
2053
2054/* someday somebody will prefix every field in this struct with tb_ */
2055struct tree_balance {
2056 int tb_mode;
2057 int need_balance_dirty;
2058 struct super_block *tb_sb;
2059 struct reiserfs_transaction_handle *transaction_handle;
2060 struct treepath *tb_path;
2061 struct buffer_head *L[MAX_HEIGHT]; /* array of left neighbors of nodes in the path */
2062 struct buffer_head *R[MAX_HEIGHT]; /* array of right neighbors of nodes in the path */
2063 struct buffer_head *FL[MAX_HEIGHT]; /* array of fathers of the left neighbors */
2064 struct buffer_head *FR[MAX_HEIGHT]; /* array of fathers of the right neighbors */
2065 struct buffer_head *CFL[MAX_HEIGHT]; /* array of common parents of center node and its left neighbor */
2066 struct buffer_head *CFR[MAX_HEIGHT]; /* array of common parents of center node and its right neighbor */
2067
2068 struct buffer_head *FEB[MAX_FEB_SIZE]; /* array of empty buffers. Number of buffers in array equals
2069 cur_blknum. */
2070 struct buffer_head *used[MAX_FEB_SIZE];
2071 struct buffer_head *thrown[MAX_FEB_SIZE];
2072 int lnum[MAX_HEIGHT]; /* array of number of items which must be
2073 shifted to the left in order to balance the
2074 current node; for leaves includes item that
2075 will be partially shifted; for internal
2076 nodes, it is the number of child pointers
2077 rather than items. It includes the new item
2078 being created. The code sometimes subtracts
2079 one to get the number of wholly shifted
2080 items for other purposes. */
2081 int rnum[MAX_HEIGHT]; /* substitute right for left in comment above */
2082 int lkey[MAX_HEIGHT]; /* array indexed by height h mapping the key delimiting L[h] and
2083 S[h] to its item number within the node CFL[h] */
2084 int rkey[MAX_HEIGHT]; /* substitute r for l in comment above */
2085 int insert_size[MAX_HEIGHT]; /* the number of bytes by we are trying to add or remove from
2086 S[h]. A negative value means removing. */
2087 int blknum[MAX_HEIGHT]; /* number of nodes that will replace node S[h] after
2088 balancing on the level h of the tree. If 0 then S is
2089 being deleted, if 1 then S is remaining and no new nodes
2090 are being created, if 2 or 3 then 1 or 2 new nodes is
2091 being created */
2092
2093 /* fields that are used only for balancing leaves of the tree */
2094 int cur_blknum; /* number of empty blocks having been already allocated */
2095 int s0num; /* number of items that fall into left most node when S[0] splits */
2096 int s1num; /* number of items that fall into first new node when S[0] splits */
2097 int s2num; /* number of items that fall into second new node when S[0] splits */
2098 int lbytes; /* number of bytes which can flow to the left neighbor from the left */
2099 /* most liquid item that cannot be shifted from S[0] entirely */
2100 /* if -1 then nothing will be partially shifted */
2101 int rbytes; /* number of bytes which will flow to the right neighbor from the right */
2102 /* most liquid item that cannot be shifted from S[0] entirely */
2103 /* if -1 then nothing will be partially shifted */
2104 int s1bytes; /* number of bytes which flow to the first new node when S[0] splits */
2105 /* note: if S[0] splits into 3 nodes, then items do not need to be cut */
2106 int s2bytes;
2107 struct buffer_head *buf_to_free[MAX_FREE_BLOCK]; /* buffers which are to be freed after do_balance finishes by unfix_nodes */
2108 char *vn_buf; /* kmalloced memory. Used to create
2109 virtual node and keep map of
2110 dirtied bitmap blocks */
2111 int vn_buf_size; /* size of the vn_buf */
2112 struct virtual_node *tb_vn; /* VN starts after bitmap of bitmap blocks */
2113
2114 int fs_gen; /* saved value of `reiserfs_generation' counter
2115 see FILESYSTEM_CHANGED() macro in reiserfs_fs.h */
2116#ifdef DISPLACE_NEW_PACKING_LOCALITIES
2117 struct in_core_key key; /* key pointer, to pass to block allocator or
2118 another low-level subsystem */
2119#endif
2120};
2121
2122/* These are modes of balancing */
2123
2124/* When inserting an item. */
2125#define M_INSERT 'i'
2126/* When inserting into (directories only) or appending onto an already
2127 existent item. */
2128#define M_PASTE 'p'
2129/* When deleting an item. */
2130#define M_DELETE 'd'
2131/* When truncating an item or removing an entry from a (directory) item. */
2132#define M_CUT 'c'
2133
2134/* used when balancing on leaf level skipped (in reiserfsck) */
2135#define M_INTERNAL 'n'
2136
2137/* When further balancing is not needed, then do_balance does not need
2138 to be called. */
2139#define M_SKIP_BALANCING 's'
2140#define M_CONVERT 'v'
2141
2142/* modes of leaf_move_items */
2143#define LEAF_FROM_S_TO_L 0
2144#define LEAF_FROM_S_TO_R 1
2145#define LEAF_FROM_R_TO_L 2
2146#define LEAF_FROM_L_TO_R 3
2147#define LEAF_FROM_S_TO_SNEW 4
2148
2149#define FIRST_TO_LAST 0
2150#define LAST_TO_FIRST 1
2151
2152/* used in do_balance for passing parent of node information that has
2153 been gotten from tb struct */
2154struct buffer_info {
2155 struct tree_balance *tb;
2156 struct buffer_head *bi_bh;
2157 struct buffer_head *bi_parent;
2158 int bi_position;
2159};
2160
2161static inline struct super_block *sb_from_tb(struct tree_balance *tb)
2162{
2163 return tb ? tb->tb_sb : NULL;
2164}
2165
2166static inline struct super_block *sb_from_bi(struct buffer_info *bi)
2167{
2168 return bi ? sb_from_tb(bi->tb) : NULL;
2169}
2170
2171/* there are 4 types of items: stat data, directory item, indirect, direct.
2172+-------------------+------------+--------------+------------+
2173| | k_offset | k_uniqueness | mergeable? |
2174+-------------------+------------+--------------+------------+
2175| stat data | 0 | 0 | no |
2176+-------------------+------------+--------------+------------+
2177| 1st directory item| DOT_OFFSET |DIRENTRY_UNIQUENESS| no |
2178| non 1st directory | hash value | | yes |
2179| item | | | |
2180+-------------------+------------+--------------+------------+
2181| indirect item | offset + 1 |TYPE_INDIRECT | if this is not the first indirect item of the object
2182+-------------------+------------+--------------+------------+
2183| direct item | offset + 1 |TYPE_DIRECT | if not this is not the first direct item of the object
2184+-------------------+------------+--------------+------------+
2185*/
2186
2187struct item_operations {
2188 int (*bytes_number) (struct item_head * ih, int block_size);
2189 void (*decrement_key) (struct cpu_key *);
2190 int (*is_left_mergeable) (struct reiserfs_key * ih,
2191 unsigned long bsize);
2192 void (*print_item) (struct item_head *, char *item);
2193 void (*check_item) (struct item_head *, char *item);
2194
2195 int (*create_vi) (struct virtual_node * vn, struct virtual_item * vi,
2196 int is_affected, int insert_size);
2197 int (*check_left) (struct virtual_item * vi, int free,
2198 int start_skip, int end_skip);
2199 int (*check_right) (struct virtual_item * vi, int free);
2200 int (*part_size) (struct virtual_item * vi, int from, int to);
2201 int (*unit_num) (struct virtual_item * vi);
2202 void (*print_vi) (struct virtual_item * vi);
2203};
2204
2205extern struct item_operations *item_ops[TYPE_ANY + 1];
2206
2207#define op_bytes_number(ih,bsize) item_ops[le_ih_k_type (ih)]->bytes_number (ih, bsize)
2208#define op_is_left_mergeable(key,bsize) item_ops[le_key_k_type (le_key_version (key), key)]->is_left_mergeable (key, bsize)
2209#define op_print_item(ih,item) item_ops[le_ih_k_type (ih)]->print_item (ih, item)
2210#define op_check_item(ih,item) item_ops[le_ih_k_type (ih)]->check_item (ih, item)
2211#define op_create_vi(vn,vi,is_affected,insert_size) item_ops[le_ih_k_type ((vi)->vi_ih)]->create_vi (vn,vi,is_affected,insert_size)
2212#define op_check_left(vi,free,start_skip,end_skip) item_ops[(vi)->vi_index]->check_left (vi, free, start_skip, end_skip)
2213#define op_check_right(vi,free) item_ops[(vi)->vi_index]->check_right (vi, free)
2214#define op_part_size(vi,from,to) item_ops[(vi)->vi_index]->part_size (vi, from, to)
2215#define op_unit_num(vi) item_ops[(vi)->vi_index]->unit_num (vi)
2216#define op_print_vi(vi) item_ops[(vi)->vi_index]->print_vi (vi)
2217
2218#define COMP_SHORT_KEYS comp_short_keys
2219
2220/* number of blocks pointed to by the indirect item */
2221#define I_UNFM_NUM(ih) (ih_item_len(ih) / UNFM_P_SIZE)
2222
2223/* the used space within the unformatted node corresponding to pos within the item pointed to by ih */
2224#define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - ih_free_space(ih) : (size))
2225
2226/* number of bytes contained by the direct item or the unformatted nodes the indirect item points to */
2227
2228/* get the item header */
2229#define B_N_PITEM_HEAD(bh,item_num) ( (struct item_head * )((bh)->b_data + BLKH_SIZE) + (item_num) )
2230
2231/* get key */
2232#define B_N_PDELIM_KEY(bh,item_num) ( (struct reiserfs_key * )((bh)->b_data + BLKH_SIZE) + (item_num) )
2233
2234/* get the key */
2235#define B_N_PKEY(bh,item_num) ( &(B_N_PITEM_HEAD(bh,item_num)->ih_key) )
2236
2237/* get item body */
2238#define B_N_PITEM(bh,item_num) ( (bh)->b_data + ih_location(B_N_PITEM_HEAD((bh),(item_num))))
2239
2240/* get the stat data by the buffer header and the item order */
2241#define B_N_STAT_DATA(bh,nr) \
2242( (struct stat_data *)((bh)->b_data + ih_location(B_N_PITEM_HEAD((bh),(nr))) ) )
2243
2244 /* following defines use reiserfs buffer header and item header */
2245
2246/* get stat-data */
2247#define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + ih_location(ih)) )
2248
2249// this is 3976 for size==4096
2250#define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE)
2251
2252/* indirect items consist of entries which contain blocknrs, pos
2253 indicates which entry, and B_I_POS_UNFM_POINTER resolves to the
2254 blocknr contained by the entry pos points to */
2255#define B_I_POS_UNFM_POINTER(bh,ih,pos) le32_to_cpu(*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)))
2256#define PUT_B_I_POS_UNFM_POINTER(bh,ih,pos, val) do {*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)) = cpu_to_le32(val); } while (0)
2257
2258struct reiserfs_iget_args {
2259 __u32 objectid;
2260 __u32 dirid;
2261};
2262
2263/***************************************************************************/
2264/* FUNCTION DECLARATIONS */
2265/***************************************************************************/
2266
2267#define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12)
2268
2269#define journal_trans_half(blocksize) \
2270 ((blocksize - sizeof (struct reiserfs_journal_desc) + sizeof (__u32) - 12) / sizeof (__u32))
2271
2272/* journal.c see journal.c for all the comments here */
2273
2274/* first block written in a commit. */
2275struct reiserfs_journal_desc {
2276 __le32 j_trans_id; /* id of commit */
2277 __le32 j_len; /* length of commit. len +1 is the commit block */
2278 __le32 j_mount_id; /* mount id of this trans */
2279 __le32 j_realblock[1]; /* real locations for each block */
2280};
2281
2282#define get_desc_trans_id(d) le32_to_cpu((d)->j_trans_id)
2283#define get_desc_trans_len(d) le32_to_cpu((d)->j_len)
2284#define get_desc_mount_id(d) le32_to_cpu((d)->j_mount_id)
2285
2286#define set_desc_trans_id(d,val) do { (d)->j_trans_id = cpu_to_le32 (val); } while (0)
2287#define set_desc_trans_len(d,val) do { (d)->j_len = cpu_to_le32 (val); } while (0)
2288#define set_desc_mount_id(d,val) do { (d)->j_mount_id = cpu_to_le32 (val); } while (0)
2289
2290/* last block written in a commit */
2291struct reiserfs_journal_commit {
2292 __le32 j_trans_id; /* must match j_trans_id from the desc block */
2293 __le32 j_len; /* ditto */
2294 __le32 j_realblock[1]; /* real locations for each block */
2295};
2296
2297#define get_commit_trans_id(c) le32_to_cpu((c)->j_trans_id)
2298#define get_commit_trans_len(c) le32_to_cpu((c)->j_len)
2299#define get_commit_mount_id(c) le32_to_cpu((c)->j_mount_id)
2300
2301#define set_commit_trans_id(c,val) do { (c)->j_trans_id = cpu_to_le32 (val); } while (0)
2302#define set_commit_trans_len(c,val) do { (c)->j_len = cpu_to_le32 (val); } while (0)
2303
2304/* this header block gets written whenever a transaction is considered fully flushed, and is more recent than the
2305** last fully flushed transaction. fully flushed means all the log blocks and all the real blocks are on disk,
2306** and this transaction does not need to be replayed.
2307*/
2308struct reiserfs_journal_header {
2309 __le32 j_last_flush_trans_id; /* id of last fully flushed transaction */
2310 __le32 j_first_unflushed_offset; /* offset in the log of where to start replay after a crash */
2311 __le32 j_mount_id;
2312 /* 12 */ struct journal_params jh_journal;
2313};
2314
2315/* biggest tunable defines are right here */
2316#define JOURNAL_BLOCK_COUNT 8192 /* number of blocks in the journal */
2317#define JOURNAL_TRANS_MAX_DEFAULT 1024 /* biggest possible single transaction, don't change for now (8/3/99) */
2318#define JOURNAL_TRANS_MIN_DEFAULT 256
2319#define JOURNAL_MAX_BATCH_DEFAULT 900 /* max blocks to batch into one transaction, don't make this any bigger than 900 */
2320#define JOURNAL_MIN_RATIO 2
2321#define JOURNAL_MAX_COMMIT_AGE 30
2322#define JOURNAL_MAX_TRANS_AGE 30
2323#define JOURNAL_PER_BALANCE_CNT (3 * (MAX_HEIGHT-2) + 9)
2324#define JOURNAL_BLOCKS_PER_OBJECT(sb) (JOURNAL_PER_BALANCE_CNT * 3 + \
2325 2 * (REISERFS_QUOTA_INIT_BLOCKS(sb) + \
2326 REISERFS_QUOTA_TRANS_BLOCKS(sb)))
2327
2328#ifdef CONFIG_QUOTA
2329#define REISERFS_QUOTA_OPTS ((1 << REISERFS_USRQUOTA) | (1 << REISERFS_GRPQUOTA))
2330/* We need to update data and inode (atime) */
2331#define REISERFS_QUOTA_TRANS_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? 2 : 0)
2332/* 1 balancing, 1 bitmap, 1 data per write + stat data update */
2333#define REISERFS_QUOTA_INIT_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \
2334(DQUOT_INIT_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_INIT_REWRITE+1) : 0)
2335/* same as with INIT */
2336#define REISERFS_QUOTA_DEL_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \
2337(DQUOT_DEL_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_DEL_REWRITE+1) : 0)
2338#else
2339#define REISERFS_QUOTA_TRANS_BLOCKS(s) 0
2340#define REISERFS_QUOTA_INIT_BLOCKS(s) 0
2341#define REISERFS_QUOTA_DEL_BLOCKS(s) 0
2342#endif
2343
2344/* both of these can be as low as 1, or as high as you want. The min is the
2345** number of 4k bitmap nodes preallocated on mount. New nodes are allocated
2346** as needed, and released when transactions are committed. On release, if
2347** the current number of nodes is > max, the node is freed, otherwise,
2348** it is put on a free list for faster use later.
2349*/
2350#define REISERFS_MIN_BITMAP_NODES 10
2351#define REISERFS_MAX_BITMAP_NODES 100
2352
2353#define JBH_HASH_SHIFT 13 /* these are based on journal hash size of 8192 */
2354#define JBH_HASH_MASK 8191
2355
2356#define _jhashfn(sb,block) \
2357 (((unsigned long)sb>>L1_CACHE_SHIFT) ^ \
2358 (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12))))
2359#define journal_hash(t,sb,block) ((t)[_jhashfn((sb),(block)) & JBH_HASH_MASK])
2360
2361// We need these to make journal.c code more readable
2362#define journal_find_get_block(s, block) __find_get_block(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
2363#define journal_getblk(s, block) __getblk(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
2364#define journal_bread(s, block) __bread(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
2365
2366enum reiserfs_bh_state_bits {
2367 BH_JDirty = BH_PrivateStart, /* buffer is in current transaction */
2368 BH_JDirty_wait,
2369 BH_JNew, /* disk block was taken off free list before
2370 * being in a finished transaction, or
2371 * written to disk. Can be reused immed. */
2372 BH_JPrepared,
2373 BH_JRestore_dirty,
2374 BH_JTest, // debugging only will go away
2375};
2376
2377BUFFER_FNS(JDirty, journaled);
2378TAS_BUFFER_FNS(JDirty, journaled);
2379BUFFER_FNS(JDirty_wait, journal_dirty);
2380TAS_BUFFER_FNS(JDirty_wait, journal_dirty);
2381BUFFER_FNS(JNew, journal_new);
2382TAS_BUFFER_FNS(JNew, journal_new);
2383BUFFER_FNS(JPrepared, journal_prepared);
2384TAS_BUFFER_FNS(JPrepared, journal_prepared);
2385BUFFER_FNS(JRestore_dirty, journal_restore_dirty);
2386TAS_BUFFER_FNS(JRestore_dirty, journal_restore_dirty);
2387BUFFER_FNS(JTest, journal_test);
2388TAS_BUFFER_FNS(JTest, journal_test);
2389
2390/*
2391** transaction handle which is passed around for all journal calls
2392*/
2393struct reiserfs_transaction_handle {
2394 struct super_block *t_super; /* super for this FS when journal_begin was
2395 called. saves calls to reiserfs_get_super
2396 also used by nested transactions to make
2397 sure they are nesting on the right FS
2398 _must_ be first in the handle
2399 */
2400 int t_refcount;
2401 int t_blocks_logged; /* number of blocks this writer has logged */
2402 int t_blocks_allocated; /* number of blocks this writer allocated */
2403 unsigned int t_trans_id; /* sanity check, equals the current trans id */
2404 void *t_handle_save; /* save existing current->journal_info */
2405 unsigned displace_new_blocks:1; /* if new block allocation occurres, that block
2406 should be displaced from others */
2407 struct list_head t_list;
2408};
2409
2410/* used to keep track of ordered and tail writes, attached to the buffer
2411 * head through b_journal_head.
2412 */
2413struct reiserfs_jh {
2414 struct reiserfs_journal_list *jl;
2415 struct buffer_head *bh;
2416 struct list_head list;
2417};
2418
2419void reiserfs_free_jh(struct buffer_head *bh);
2420int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh);
2421int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh);
2422int journal_mark_dirty(struct reiserfs_transaction_handle *,
2423 struct super_block *, struct buffer_head *bh);
2424
2425static inline int reiserfs_file_data_log(struct inode *inode)
2426{
2427 if (reiserfs_data_log(inode->i_sb) ||
2428 (REISERFS_I(inode)->i_flags & i_data_log))
2429 return 1;
2430 return 0;
2431}
2432
2433static inline int reiserfs_transaction_running(struct super_block *s)
2434{
2435 struct reiserfs_transaction_handle *th = current->journal_info;
2436 if (th && th->t_super == s)
2437 return 1;
2438 if (th && th->t_super == NULL)
2439 BUG();
2440 return 0;
2441}
2442
2443static inline int reiserfs_transaction_free_space(struct reiserfs_transaction_handle *th)
2444{
2445 return th->t_blocks_allocated - th->t_blocks_logged;
2446}
2447
2448struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
2449 super_block
2450 *,
2451 int count);
2452int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *);
2453int reiserfs_commit_page(struct inode *inode, struct page *page,
2454 unsigned from, unsigned to);
2455int reiserfs_flush_old_commits(struct super_block *);
2456int reiserfs_commit_for_inode(struct inode *);
2457int reiserfs_inode_needs_commit(struct inode *);
2458void reiserfs_update_inode_transaction(struct inode *);
2459void reiserfs_wait_on_write_block(struct super_block *s);
2460void reiserfs_block_writes(struct reiserfs_transaction_handle *th);
2461void reiserfs_allow_writes(struct super_block *s);
2462void reiserfs_check_lock_depth(struct super_block *s, char *caller);
2463int reiserfs_prepare_for_journal(struct super_block *, struct buffer_head *bh,
2464 int wait);
2465void reiserfs_restore_prepared_buffer(struct super_block *,
2466 struct buffer_head *bh);
2467int journal_init(struct super_block *, const char *j_dev_name, int old_format,
2468 unsigned int);
2469int journal_release(struct reiserfs_transaction_handle *, struct super_block *);
2470int journal_release_error(struct reiserfs_transaction_handle *,
2471 struct super_block *);
2472int journal_end(struct reiserfs_transaction_handle *, struct super_block *,
2473 unsigned long);
2474int journal_end_sync(struct reiserfs_transaction_handle *, struct super_block *,
2475 unsigned long);
2476int journal_mark_freed(struct reiserfs_transaction_handle *,
2477 struct super_block *, b_blocknr_t blocknr);
2478int journal_transaction_should_end(struct reiserfs_transaction_handle *, int);
2479int reiserfs_in_journal(struct super_block *sb, unsigned int bmap_nr,
2480 int bit_nr, int searchall, b_blocknr_t *next);
2481int journal_begin(struct reiserfs_transaction_handle *,
2482 struct super_block *sb, unsigned long);
2483int journal_join_abort(struct reiserfs_transaction_handle *,
2484 struct super_block *sb, unsigned long);
2485void reiserfs_abort_journal(struct super_block *sb, int errno);
2486void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...);
2487int reiserfs_allocate_list_bitmaps(struct super_block *s,
2488 struct reiserfs_list_bitmap *, unsigned int);
2489
2490void add_save_link(struct reiserfs_transaction_handle *th,
2491 struct inode *inode, int truncate);
2492int remove_save_link(struct inode *inode, int truncate);
2493
2494/* objectid.c */
2495__u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th);
2496void reiserfs_release_objectid(struct reiserfs_transaction_handle *th,
2497 __u32 objectid_to_release);
2498int reiserfs_convert_objectid_map_v1(struct super_block *);
2499
2500/* stree.c */
2501int B_IS_IN_TREE(const struct buffer_head *);
2502extern void copy_item_head(struct item_head *to,
2503 const struct item_head *from);
2504
2505// first key is in cpu form, second - le
2506extern int comp_short_keys(const struct reiserfs_key *le_key,
2507 const struct cpu_key *cpu_key);
2508extern void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from);
2509
2510// both are in le form
2511extern int comp_le_keys(const struct reiserfs_key *,
2512 const struct reiserfs_key *);
2513extern int comp_short_le_keys(const struct reiserfs_key *,
2514 const struct reiserfs_key *);
2515
2516//
2517// get key version from on disk key - kludge
2518//
2519static inline int le_key_version(const struct reiserfs_key *key)
2520{
2521 int type;
2522
2523 type = offset_v2_k_type(&(key->u.k_offset_v2));
2524 if (type != TYPE_DIRECT && type != TYPE_INDIRECT
2525 && type != TYPE_DIRENTRY)
2526 return KEY_FORMAT_3_5;
2527
2528 return KEY_FORMAT_3_6;
2529
2530}
2531
2532static inline void copy_key(struct reiserfs_key *to,
2533 const struct reiserfs_key *from)
2534{
2535 memcpy(to, from, KEY_SIZE);
2536}
2537
2538int comp_items(const struct item_head *stored_ih, const struct treepath *path);
2539const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
2540 const struct super_block *sb);
2541int search_by_key(struct super_block *, const struct cpu_key *,
2542 struct treepath *, int);
2543#define search_item(s,key,path) search_by_key (s, key, path, DISK_LEAF_NODE_LEVEL)
2544int search_for_position_by_key(struct super_block *sb,
2545 const struct cpu_key *cpu_key,
2546 struct treepath *search_path);
2547extern void decrement_bcount(struct buffer_head *bh);
2548void decrement_counters_in_path(struct treepath *search_path);
2549void pathrelse(struct treepath *search_path);
2550int reiserfs_check_path(struct treepath *p);
2551void pathrelse_and_restore(struct super_block *s, struct treepath *search_path);
2552
2553int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2554 struct treepath *path,
2555 const struct cpu_key *key,
2556 struct item_head *ih,
2557 struct inode *inode, const char *body);
2558
2559int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th,
2560 struct treepath *path,
2561 const struct cpu_key *key,
2562 struct inode *inode,
2563 const char *body, int paste_size);
2564
2565int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
2566 struct treepath *path,
2567 struct cpu_key *key,
2568 struct inode *inode,
2569 struct page *page, loff_t new_file_size);
2570
2571int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
2572 struct treepath *path,
2573 const struct cpu_key *key,
2574 struct inode *inode, struct buffer_head *un_bh);
2575
2576void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
2577 struct inode *inode, struct reiserfs_key *key);
2578int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
2579 struct inode *inode);
2580int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
2581 struct inode *inode, struct page *,
2582 int update_timestamps);
2583
2584#define i_block_size(inode) ((inode)->i_sb->s_blocksize)
2585#define file_size(inode) ((inode)->i_size)
2586#define tail_size(inode) (file_size (inode) & (i_block_size (inode) - 1))
2587
2588#define tail_has_to_be_packed(inode) (have_large_tails ((inode)->i_sb)?\
2589!STORE_TAIL_IN_UNFM_S1(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):have_small_tails ((inode)->i_sb)?!STORE_TAIL_IN_UNFM_S2(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):0 )
2590
2591void padd_item(char *item, int total_length, int length);
2592
2593/* inode.c */
2594/* args for the create parameter of reiserfs_get_block */
2595#define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */
2596#define GET_BLOCK_CREATE 1 /* add anything you need to find block */
2597#define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */
2598#define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */
2599#define GET_BLOCK_NO_IMUX 8 /* i_mutex is not held, don't preallocate */
2600#define GET_BLOCK_NO_DANGLE 16 /* don't leave any transactions running */
2601
2602void reiserfs_read_locked_inode(struct inode *inode,
2603 struct reiserfs_iget_args *args);
2604int reiserfs_find_actor(struct inode *inode, void *p);
2605int reiserfs_init_locked_inode(struct inode *inode, void *p);
2606void reiserfs_evict_inode(struct inode *inode);
2607int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc);
2608int reiserfs_get_block(struct inode *inode, sector_t block,
2609 struct buffer_head *bh_result, int create);
2610struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
2611 int fh_len, int fh_type);
2612struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid,
2613 int fh_len, int fh_type);
2614int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
2615 int connectable);
2616
2617int reiserfs_truncate_file(struct inode *, int update_timestamps);
2618void make_cpu_key(struct cpu_key *cpu_key, struct inode *inode, loff_t offset,
2619 int type, int key_length);
2620void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
2621 int version,
2622 loff_t offset, int type, int length, int entry_count);
2623struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key);
2624
2625struct reiserfs_security_handle;
2626int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
2627 struct inode *dir, umode_t mode,
2628 const char *symname, loff_t i_size,
2629 struct dentry *dentry, struct inode *inode,
2630 struct reiserfs_security_handle *security);
2631
2632void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
2633 struct inode *inode, loff_t size);
2634
2635static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th,
2636 struct inode *inode)
2637{
2638 reiserfs_update_sd_size(th, inode, inode->i_size);
2639}
2640
2641void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode);
2642void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs);
2643int reiserfs_setattr(struct dentry *dentry, struct iattr *attr);
2644
2645int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len);
2646
2647/* namei.c */
2648void set_de_name_and_namelen(struct reiserfs_dir_entry *de);
2649int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
2650 struct treepath *path, struct reiserfs_dir_entry *de);
2651struct dentry *reiserfs_get_parent(struct dentry *);
2652
2653#ifdef CONFIG_REISERFS_PROC_INFO
2654int reiserfs_proc_info_init(struct super_block *sb);
2655int reiserfs_proc_info_done(struct super_block *sb);
2656int reiserfs_proc_info_global_init(void);
2657int reiserfs_proc_info_global_done(void);
2658
2659#define PROC_EXP( e ) e
2660
2661#define __PINFO( sb ) REISERFS_SB(sb) -> s_proc_info_data
2662#define PROC_INFO_MAX( sb, field, value ) \
2663 __PINFO( sb ).field = \
2664 max( REISERFS_SB( sb ) -> s_proc_info_data.field, value )
2665#define PROC_INFO_INC( sb, field ) ( ++ ( __PINFO( sb ).field ) )
2666#define PROC_INFO_ADD( sb, field, val ) ( __PINFO( sb ).field += ( val ) )
2667#define PROC_INFO_BH_STAT( sb, bh, level ) \
2668 PROC_INFO_INC( sb, sbk_read_at[ ( level ) ] ); \
2669 PROC_INFO_ADD( sb, free_at[ ( level ) ], B_FREE_SPACE( bh ) ); \
2670 PROC_INFO_ADD( sb, items_at[ ( level ) ], B_NR_ITEMS( bh ) )
2671#else
2672static inline int reiserfs_proc_info_init(struct super_block *sb)
2673{
2674 return 0;
2675}
2676
2677static inline int reiserfs_proc_info_done(struct super_block *sb)
2678{
2679 return 0;
2680}
2681
2682static inline int reiserfs_proc_info_global_init(void)
2683{
2684 return 0;
2685}
2686
2687static inline int reiserfs_proc_info_global_done(void)
2688{
2689 return 0;
2690}
2691
2692#define PROC_EXP( e )
2693#define VOID_V ( ( void ) 0 )
2694#define PROC_INFO_MAX( sb, field, value ) VOID_V
2695#define PROC_INFO_INC( sb, field ) VOID_V
2696#define PROC_INFO_ADD( sb, field, val ) VOID_V
2697#define PROC_INFO_BH_STAT(sb, bh, n_node_level) VOID_V
2698#endif
2699
2700/* dir.c */
2701extern const struct inode_operations reiserfs_dir_inode_operations;
2702extern const struct inode_operations reiserfs_symlink_inode_operations;
2703extern const struct inode_operations reiserfs_special_inode_operations;
2704extern const struct file_operations reiserfs_dir_operations;
2705int reiserfs_readdir_dentry(struct dentry *, void *, filldir_t, loff_t *);
2706
2707/* tail_conversion.c */
2708int direct2indirect(struct reiserfs_transaction_handle *, struct inode *,
2709 struct treepath *, struct buffer_head *, loff_t);
2710int indirect2direct(struct reiserfs_transaction_handle *, struct inode *,
2711 struct page *, struct treepath *, const struct cpu_key *,
2712 loff_t, char *);
2713void reiserfs_unmap_buffer(struct buffer_head *);
2714
2715/* file.c */
2716extern const struct inode_operations reiserfs_file_inode_operations;
2717extern const struct file_operations reiserfs_file_operations;
2718extern const struct address_space_operations reiserfs_address_space_operations;
2719
2720/* fix_nodes.c */
2721
2722int fix_nodes(int n_op_mode, struct tree_balance *tb,
2723 struct item_head *ins_ih, const void *);
2724void unfix_nodes(struct tree_balance *);
2725
2726/* prints.c */
2727void __reiserfs_panic(struct super_block *s, const char *id,
2728 const char *function, const char *fmt, ...)
2729 __attribute__ ((noreturn));
2730#define reiserfs_panic(s, id, fmt, args...) \
2731 __reiserfs_panic(s, id, __func__, fmt, ##args)
2732void __reiserfs_error(struct super_block *s, const char *id,
2733 const char *function, const char *fmt, ...);
2734#define reiserfs_error(s, id, fmt, args...) \
2735 __reiserfs_error(s, id, __func__, fmt, ##args)
2736void reiserfs_info(struct super_block *s, const char *fmt, ...);
2737void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...);
2738void print_indirect_item(struct buffer_head *bh, int item_num);
2739void store_print_tb(struct tree_balance *tb);
2740void print_cur_tb(char *mes);
2741void print_de(struct reiserfs_dir_entry *de);
2742void print_bi(struct buffer_info *bi, char *mes);
2743#define PRINT_LEAF_ITEMS 1 /* print all items */
2744#define PRINT_DIRECTORY_ITEMS 2 /* print directory items */
2745#define PRINT_DIRECT_ITEMS 4 /* print contents of direct items */
2746void print_block(struct buffer_head *bh, ...);
2747void print_bmap(struct super_block *s, int silent);
2748void print_bmap_block(int i, char *data, int size, int silent);
2749/*void print_super_block (struct super_block * s, char * mes);*/
2750void print_objectid_map(struct super_block *s);
2751void print_block_head(struct buffer_head *bh, char *mes);
2752void check_leaf(struct buffer_head *bh);
2753void check_internal(struct buffer_head *bh);
2754void print_statistics(struct super_block *s);
2755char *reiserfs_hashname(int code);
2756
2757/* lbalance.c */
2758int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num,
2759 int mov_bytes, struct buffer_head *Snew);
2760int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes);
2761int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes);
2762void leaf_delete_items(struct buffer_info *cur_bi, int last_first, int first,
2763 int del_num, int del_bytes);
2764void leaf_insert_into_buf(struct buffer_info *bi, int before,
2765 struct item_head *inserted_item_ih,
2766 const char *inserted_item_body, int zeros_number);
2767void leaf_paste_in_buffer(struct buffer_info *bi, int pasted_item_num,
2768 int pos_in_item, int paste_size, const char *body,
2769 int zeros_number);
2770void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num,
2771 int pos_in_item, int cut_size);
2772void leaf_paste_entries(struct buffer_info *bi, int item_num, int before,
2773 int new_entry_count, struct reiserfs_de_head *new_dehs,
2774 const char *records, int paste_size);
2775/* ibalance.c */
2776int balance_internal(struct tree_balance *, int, int, struct item_head *,
2777 struct buffer_head **);
2778
2779/* do_balance.c */
2780void do_balance_mark_leaf_dirty(struct tree_balance *tb,
2781 struct buffer_head *bh, int flag);
2782#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
2783#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
2784
2785void do_balance(struct tree_balance *tb, struct item_head *ih,
2786 const char *body, int flag);
2787void reiserfs_invalidate_buffer(struct tree_balance *tb,
2788 struct buffer_head *bh);
2789
2790int get_left_neighbor_position(struct tree_balance *tb, int h);
2791int get_right_neighbor_position(struct tree_balance *tb, int h);
2792void replace_key(struct tree_balance *tb, struct buffer_head *, int,
2793 struct buffer_head *, int);
2794void make_empty_node(struct buffer_info *);
2795struct buffer_head *get_FEB(struct tree_balance *);
2796
2797/* bitmap.c */
2798
2799/* structure contains hints for block allocator, and it is a container for
2800 * arguments, such as node, search path, transaction_handle, etc. */
2801struct __reiserfs_blocknr_hint {
2802 struct inode *inode; /* inode passed to allocator, if we allocate unf. nodes */
2803 sector_t block; /* file offset, in blocks */
2804 struct in_core_key key;
2805 struct treepath *path; /* search path, used by allocator to deternine search_start by
2806 * various ways */
2807 struct reiserfs_transaction_handle *th; /* transaction handle is needed to log super blocks and
2808 * bitmap blocks changes */
2809 b_blocknr_t beg, end;
2810 b_blocknr_t search_start; /* a field used to transfer search start value (block number)
2811 * between different block allocator procedures
2812 * (determine_search_start() and others) */
2813 int prealloc_size; /* is set in determine_prealloc_size() function, used by underlayed
2814 * function that do actual allocation */
2815
2816 unsigned formatted_node:1; /* the allocator uses different polices for getting disk space for
2817 * formatted/unformatted blocks with/without preallocation */
2818 unsigned preallocate:1;
2819};
2820
2821typedef struct __reiserfs_blocknr_hint reiserfs_blocknr_hint_t;
2822
2823int reiserfs_parse_alloc_options(struct super_block *, char *);
2824void reiserfs_init_alloc_options(struct super_block *s);
2825
2826/*
2827 * given a directory, this will tell you what packing locality
2828 * to use for a new object underneat it. The locality is returned
2829 * in disk byte order (le).
2830 */
2831__le32 reiserfs_choose_packing(struct inode *dir);
2832
2833int reiserfs_init_bitmap_cache(struct super_block *sb);
2834void reiserfs_free_bitmap_cache(struct super_block *sb);
2835void reiserfs_cache_bitmap_metadata(struct super_block *sb, struct buffer_head *bh, struct reiserfs_bitmap_info *info);
2836struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, unsigned int bitmap);
2837int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
2838void reiserfs_free_block(struct reiserfs_transaction_handle *th, struct inode *,
2839 b_blocknr_t, int for_unformatted);
2840int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *, b_blocknr_t *, int,
2841 int);
2842static inline int reiserfs_new_form_blocknrs(struct tree_balance *tb,
2843 b_blocknr_t * new_blocknrs,
2844 int amount_needed)
2845{
2846 reiserfs_blocknr_hint_t hint = {
2847 .th = tb->transaction_handle,
2848 .path = tb->tb_path,
2849 .inode = NULL,
2850 .key = tb->key,
2851 .block = 0,
2852 .formatted_node = 1
2853 };
2854 return reiserfs_allocate_blocknrs(&hint, new_blocknrs, amount_needed,
2855 0);
2856}
2857
2858static inline int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle
2859 *th, struct inode *inode,
2860 b_blocknr_t * new_blocknrs,
2861 struct treepath *path,
2862 sector_t block)
2863{
2864 reiserfs_blocknr_hint_t hint = {
2865 .th = th,
2866 .path = path,
2867 .inode = inode,
2868 .block = block,
2869 .formatted_node = 0,
2870 .preallocate = 0
2871 };
2872 return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0);
2873}
2874
2875#ifdef REISERFS_PREALLOCATE
2876static inline int reiserfs_new_unf_blocknrs2(struct reiserfs_transaction_handle
2877 *th, struct inode *inode,
2878 b_blocknr_t * new_blocknrs,
2879 struct treepath *path,
2880 sector_t block)
2881{
2882 reiserfs_blocknr_hint_t hint = {
2883 .th = th,
2884 .path = path,
2885 .inode = inode,
2886 .block = block,
2887 .formatted_node = 0,
2888 .preallocate = 1
2889 };
2890 return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0);
2891}
2892
2893void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
2894 struct inode *inode);
2895void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th);
2896#endif
2897
2898/* hashes.c */
2899__u32 keyed_hash(const signed char *msg, int len);
2900__u32 yura_hash(const signed char *msg, int len);
2901__u32 r5_hash(const signed char *msg, int len);
2902
2903#define reiserfs_set_le_bit __set_bit_le
2904#define reiserfs_test_and_set_le_bit __test_and_set_bit_le
2905#define reiserfs_clear_le_bit __clear_bit_le
2906#define reiserfs_test_and_clear_le_bit __test_and_clear_bit_le
2907#define reiserfs_test_le_bit test_bit_le
2908#define reiserfs_find_next_zero_le_bit find_next_zero_bit_le
2909
2910/* sometimes reiserfs_truncate may require to allocate few new blocks
2911 to perform indirect2direct conversion. People probably used to
2912 think, that truncate should work without problems on a filesystem
2913 without free disk space. They may complain that they can not
2914 truncate due to lack of free disk space. This spare space allows us
2915 to not worry about it. 500 is probably too much, but it should be
2916 absolutely safe */
2917#define SPARE_SPACE 500
2918
2919/* prototypes from ioctl.c */
2920long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
2921long reiserfs_compat_ioctl(struct file *filp,
2922 unsigned int cmd, unsigned long arg);
2923int reiserfs_unpack(struct inode *inode, struct file *filp);
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 7483279b482..9a17f63c3fd 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -13,8 +13,7 @@
13#include <linux/vmalloc.h> 13#include <linux/vmalloc.h>
14#include <linux/string.h> 14#include <linux/string.h>
15#include <linux/errno.h> 15#include <linux/errno.h>
16#include <linux/reiserfs_fs.h> 16#include "reiserfs.h"
17#include <linux/reiserfs_fs_sb.h>
18#include <linux/buffer_head.h> 17#include <linux/buffer_head.h>
19 18
20int reiserfs_resize(struct super_block *s, unsigned long block_count_new) 19int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 313d39d639e..f8afa4b162b 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -51,7 +51,7 @@
51#include <linux/time.h> 51#include <linux/time.h>
52#include <linux/string.h> 52#include <linux/string.h>
53#include <linux/pagemap.h> 53#include <linux/pagemap.h>
54#include <linux/reiserfs_fs.h> 54#include "reiserfs.h"
55#include <linux/buffer_head.h> 55#include <linux/buffer_head.h>
56#include <linux/quotaops.h> 56#include <linux/quotaops.h>
57 57
@@ -1284,12 +1284,12 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1284 ** -clm 1284 ** -clm
1285 */ 1285 */
1286 1286
1287 data = kmap_atomic(un_bh->b_page, KM_USER0); 1287 data = kmap_atomic(un_bh->b_page);
1288 off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1)); 1288 off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
1289 memcpy(data + off, 1289 memcpy(data + off,
1290 B_I_PITEM(PATH_PLAST_BUFFER(path), &s_ih), 1290 B_I_PITEM(PATH_PLAST_BUFFER(path), &s_ih),
1291 ret_value); 1291 ret_value);
1292 kunmap_atomic(data, KM_USER0); 1292 kunmap_atomic(data);
1293 } 1293 }
1294 /* Perform balancing after all resources have been collected at once. */ 1294 /* Perform balancing after all resources have been collected at once. */
1295 do_balance(&s_del_balance, NULL, NULL, M_DELETE); 1295 do_balance(&s_del_balance, NULL, NULL, M_DELETE);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index e12d8b97cd4..8b7616ef06d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -16,9 +16,9 @@
16#include <linux/vmalloc.h> 16#include <linux/vmalloc.h>
17#include <linux/time.h> 17#include <linux/time.h>
18#include <asm/uaccess.h> 18#include <asm/uaccess.h>
19#include <linux/reiserfs_fs.h> 19#include "reiserfs.h"
20#include <linux/reiserfs_acl.h> 20#include "acl.h"
21#include <linux/reiserfs_xattr.h> 21#include "xattr.h"
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/blkdev.h> 23#include <linux/blkdev.h>
24#include <linux/buffer_head.h> 24#include <linux/buffer_head.h>
@@ -1874,11 +1874,9 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1874 unlock_new_inode(root_inode); 1874 unlock_new_inode(root_inode);
1875 } 1875 }
1876 1876
1877 s->s_root = d_alloc_root(root_inode); 1877 s->s_root = d_make_root(root_inode);
1878 if (!s->s_root) { 1878 if (!s->s_root)
1879 iput(root_inode);
1880 goto error; 1879 goto error;
1881 }
1882 // define and initialize hash function 1880 // define and initialize hash function
1883 sbi->s_hash_function = hash_function(s); 1881 sbi->s_hash_function = hash_function(s);
1884 if (sbi->s_hash_function == NULL) { 1882 if (sbi->s_hash_function == NULL) {
diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c
index d7f6e51bef2..5e2624d12f7 100644
--- a/fs/reiserfs/tail_conversion.c
+++ b/fs/reiserfs/tail_conversion.c
@@ -5,7 +5,7 @@
5#include <linux/time.h> 5#include <linux/time.h>
6#include <linux/pagemap.h> 6#include <linux/pagemap.h>
7#include <linux/buffer_head.h> 7#include <linux/buffer_head.h>
8#include <linux/reiserfs_fs.h> 8#include "reiserfs.h"
9 9
10/* access to tail : when one is going to read tail it must make sure, that is not running. 10/* access to tail : when one is going to read tail it must make sure, that is not running.
11 direct2indirect and indirect2direct can not run concurrently */ 11 direct2indirect and indirect2direct can not run concurrently */
@@ -128,9 +128,9 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
128 if (up_to_date_bh) { 128 if (up_to_date_bh) {
129 unsigned pgoff = 129 unsigned pgoff =
130 (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1); 130 (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1);
131 char *kaddr = kmap_atomic(up_to_date_bh->b_page, KM_USER0); 131 char *kaddr = kmap_atomic(up_to_date_bh->b_page);
132 memset(kaddr + pgoff, 0, blk_size - total_tail); 132 memset(kaddr + pgoff, 0, blk_size - total_tail);
133 kunmap_atomic(kaddr, KM_USER0); 133 kunmap_atomic(kaddr);
134 } 134 }
135 135
136 REISERFS_I(inode)->i_first_direct_byte = U32_MAX; 136 REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index c24deda8a8b..46fc1c20a6b 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -33,7 +33,7 @@
33 * The xattrs themselves are protected by the xattr_sem. 33 * The xattrs themselves are protected by the xattr_sem.
34 */ 34 */
35 35
36#include <linux/reiserfs_fs.h> 36#include "reiserfs.h"
37#include <linux/capability.h> 37#include <linux/capability.h>
38#include <linux/dcache.h> 38#include <linux/dcache.h>
39#include <linux/namei.h> 39#include <linux/namei.h>
@@ -43,8 +43,8 @@
43#include <linux/file.h> 43#include <linux/file.h>
44#include <linux/pagemap.h> 44#include <linux/pagemap.h>
45#include <linux/xattr.h> 45#include <linux/xattr.h>
46#include <linux/reiserfs_xattr.h> 46#include "xattr.h"
47#include <linux/reiserfs_acl.h> 47#include "acl.h"
48#include <asm/uaccess.h> 48#include <asm/uaccess.h>
49#include <net/checksum.h> 49#include <net/checksum.h>
50#include <linux/stat.h> 50#include <linux/stat.h>
diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h
new file mode 100644
index 00000000000..f59626c5d33
--- /dev/null
+++ b/fs/reiserfs/xattr.h
@@ -0,0 +1,122 @@
1#include <linux/reiserfs_xattr.h>
2#include <linux/init.h>
3#include <linux/list.h>
4#include <linux/rwsem.h>
5
6struct inode;
7struct dentry;
8struct iattr;
9struct super_block;
10struct nameidata;
11
12int reiserfs_xattr_register_handlers(void) __init;
13void reiserfs_xattr_unregister_handlers(void);
14int reiserfs_xattr_init(struct super_block *sb, int mount_flags);
15int reiserfs_lookup_privroot(struct super_block *sb);
16int reiserfs_delete_xattrs(struct inode *inode);
17int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs);
18int reiserfs_permission(struct inode *inode, int mask);
19
20#ifdef CONFIG_REISERFS_FS_XATTR
21#define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir)
22ssize_t reiserfs_getxattr(struct dentry *dentry, const char *name,
23 void *buffer, size_t size);
24int reiserfs_setxattr(struct dentry *dentry, const char *name,
25 const void *value, size_t size, int flags);
26ssize_t reiserfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
27int reiserfs_removexattr(struct dentry *dentry, const char *name);
28
29int reiserfs_xattr_get(struct inode *, const char *, void *, size_t);
30int reiserfs_xattr_set(struct inode *, const char *, const void *, size_t, int);
31int reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *,
32 struct inode *, const char *, const void *,
33 size_t, int);
34
35extern const struct xattr_handler reiserfs_xattr_user_handler;
36extern const struct xattr_handler reiserfs_xattr_trusted_handler;
37extern const struct xattr_handler reiserfs_xattr_security_handler;
38#ifdef CONFIG_REISERFS_FS_SECURITY
39int reiserfs_security_init(struct inode *dir, struct inode *inode,
40 const struct qstr *qstr,
41 struct reiserfs_security_handle *sec);
42int reiserfs_security_write(struct reiserfs_transaction_handle *th,
43 struct inode *inode,
44 struct reiserfs_security_handle *sec);
45void reiserfs_security_free(struct reiserfs_security_handle *sec);
46#endif
47
48static inline int reiserfs_xattrs_initialized(struct super_block *sb)
49{
50 return REISERFS_SB(sb)->priv_root != NULL;
51}
52
53#define xattr_size(size) ((size) + sizeof(struct reiserfs_xattr_header))
54static inline loff_t reiserfs_xattr_nblocks(struct inode *inode, loff_t size)
55{
56 loff_t ret = 0;
57 if (reiserfs_file_data_log(inode)) {
58 ret = _ROUND_UP(xattr_size(size), inode->i_sb->s_blocksize);
59 ret >>= inode->i_sb->s_blocksize_bits;
60 }
61 return ret;
62}
63
64/* We may have to create up to 3 objects: xattr root, xattr dir, xattr file.
65 * Let's try to be smart about it.
66 * xattr root: We cache it. If it's not cached, we may need to create it.
67 * xattr dir: If anything has been loaded for this inode, we can set a flag
68 * saying so.
69 * xattr file: Since we don't cache xattrs, we can't tell. We always include
70 * blocks for it.
71 *
72 * However, since root and dir can be created between calls - YOU MUST SAVE
73 * THIS VALUE.
74 */
75static inline size_t reiserfs_xattr_jcreate_nblocks(struct inode *inode)
76{
77 size_t nblocks = JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
78
79 if ((REISERFS_I(inode)->i_flags & i_has_xattr_dir) == 0) {
80 nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
81 if (!REISERFS_SB(inode->i_sb)->xattr_root->d_inode)
82 nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
83 }
84
85 return nblocks;
86}
87
88static inline void reiserfs_init_xattr_rwsem(struct inode *inode)
89{
90 init_rwsem(&REISERFS_I(inode)->i_xattr_sem);
91}
92
93#else
94
95#define reiserfs_getxattr NULL
96#define reiserfs_setxattr NULL
97#define reiserfs_listxattr NULL
98#define reiserfs_removexattr NULL
99
100static inline void reiserfs_init_xattr_rwsem(struct inode *inode)
101{
102}
103#endif /* CONFIG_REISERFS_FS_XATTR */
104
105#ifndef CONFIG_REISERFS_FS_SECURITY
106static inline int reiserfs_security_init(struct inode *dir,
107 struct inode *inode,
108 const struct qstr *qstr,
109 struct reiserfs_security_handle *sec)
110{
111 return 0;
112}
113static inline int
114reiserfs_security_write(struct reiserfs_transaction_handle *th,
115 struct inode *inode,
116 struct reiserfs_security_handle *sec)
117{
118 return 0;
119}
120static inline void reiserfs_security_free(struct reiserfs_security_handle *sec)
121{}
122#endif
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 6da0396e505..44474f9b990 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -1,14 +1,14 @@
1#include <linux/capability.h> 1#include <linux/capability.h>
2#include <linux/fs.h> 2#include <linux/fs.h>
3#include <linux/posix_acl.h> 3#include <linux/posix_acl.h>
4#include <linux/reiserfs_fs.h> 4#include "reiserfs.h"
5#include <linux/errno.h> 5#include <linux/errno.h>
6#include <linux/pagemap.h> 6#include <linux/pagemap.h>
7#include <linux/xattr.h> 7#include <linux/xattr.h>
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <linux/posix_acl_xattr.h> 9#include <linux/posix_acl_xattr.h>
10#include <linux/reiserfs_xattr.h> 10#include "xattr.h"
11#include <linux/reiserfs_acl.h> 11#include "acl.h"
12#include <asm/uaccess.h> 12#include <asm/uaccess.h>
13 13
14static int reiserfs_set_acl(struct reiserfs_transaction_handle *th, 14static int reiserfs_set_acl(struct reiserfs_transaction_handle *th,
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 534668fa41b..800a3cef6f6 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -1,10 +1,10 @@
1#include <linux/reiserfs_fs.h> 1#include "reiserfs.h"
2#include <linux/errno.h> 2#include <linux/errno.h>
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/pagemap.h> 4#include <linux/pagemap.h>
5#include <linux/xattr.h> 5#include <linux/xattr.h>
6#include <linux/slab.h> 6#include <linux/slab.h>
7#include <linux/reiserfs_xattr.h> 7#include "xattr.h"
8#include <linux/security.h> 8#include <linux/security.h>
9#include <asm/uaccess.h> 9#include <asm/uaccess.h>
10 10
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index 9883736ce3e..a0035719f66 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -1,10 +1,10 @@
1#include <linux/reiserfs_fs.h> 1#include "reiserfs.h"
2#include <linux/capability.h> 2#include <linux/capability.h>
3#include <linux/errno.h> 3#include <linux/errno.h>
4#include <linux/fs.h> 4#include <linux/fs.h>
5#include <linux/pagemap.h> 5#include <linux/pagemap.h>
6#include <linux/xattr.h> 6#include <linux/xattr.h>
7#include <linux/reiserfs_xattr.h> 7#include "xattr.h"
8#include <asm/uaccess.h> 8#include <asm/uaccess.h>
9 9
10static int 10static int
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 45ae1a00013..8667491ae7c 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -1,9 +1,9 @@
1#include <linux/reiserfs_fs.h> 1#include "reiserfs.h"
2#include <linux/errno.h> 2#include <linux/errno.h>
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/pagemap.h> 4#include <linux/pagemap.h>
5#include <linux/xattr.h> 5#include <linux/xattr.h>
6#include <linux/reiserfs_xattr.h> 6#include "xattr.h"
7#include <asm/uaccess.h> 7#include <asm/uaccess.h>
8 8
9static int 9static int
diff --git a/fs/romfs/storage.c b/fs/romfs/storage.c
index 71e2b4d50a0..f86f51f99ac 100644
--- a/fs/romfs/storage.c
+++ b/fs/romfs/storage.c
@@ -19,7 +19,7 @@
19#endif 19#endif
20 20
21#ifdef CONFIG_ROMFS_ON_MTD 21#ifdef CONFIG_ROMFS_ON_MTD
22#define ROMFS_MTD_READ(sb, ...) ((sb)->s_mtd->read((sb)->s_mtd, ##__VA_ARGS__)) 22#define ROMFS_MTD_READ(sb, ...) mtd_read((sb)->s_mtd, ##__VA_ARGS__)
23 23
24/* 24/*
25 * read data from an romfs image on an MTD device 25 * read data from an romfs image on an MTD device
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index bb36ab74eb4..e64f6b5f7ae 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -538,14 +538,12 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
538 if (IS_ERR(root)) 538 if (IS_ERR(root))
539 goto error; 539 goto error;
540 540
541 sb->s_root = d_alloc_root(root); 541 sb->s_root = d_make_root(root);
542 if (!sb->s_root) 542 if (!sb->s_root)
543 goto error_i; 543 goto error;
544 544
545 return 0; 545 return 0;
546 546
547error_i:
548 iput(root);
549error: 547error:
550 return -EINVAL; 548 return -EINVAL;
551error_rsb_inval: 549error_rsb_inval:
diff --git a/fs/select.c b/fs/select.c
index e782258d0de..17d33d09fc1 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -17,7 +17,7 @@
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/sched.h> 18#include <linux/sched.h>
19#include <linux/syscalls.h> 19#include <linux/syscalls.h>
20#include <linux/module.h> 20#include <linux/export.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/poll.h> 22#include <linux/poll.h>
23#include <linux/personality.h> /* for STICKY_TIMEOUTS */ 23#include <linux/personality.h> /* for STICKY_TIMEOUTS */
@@ -223,7 +223,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
223 get_file(filp); 223 get_file(filp);
224 entry->filp = filp; 224 entry->filp = filp;
225 entry->wait_address = wait_address; 225 entry->wait_address = wait_address;
226 entry->key = p->key; 226 entry->key = p->_key;
227 init_waitqueue_func_entry(&entry->wait, pollwake); 227 init_waitqueue_func_entry(&entry->wait, pollwake);
228 entry->wait.private = pwq; 228 entry->wait.private = pwq;
229 add_wait_queue(wait_address, &entry->wait); 229 add_wait_queue(wait_address, &entry->wait);
@@ -348,7 +348,7 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds)
348 set = ~(~0UL << (n & (__NFDBITS-1))); 348 set = ~(~0UL << (n & (__NFDBITS-1)));
349 n /= __NFDBITS; 349 n /= __NFDBITS;
350 fdt = files_fdtable(current->files); 350 fdt = files_fdtable(current->files);
351 open_fds = fdt->open_fds->fds_bits+n; 351 open_fds = fdt->open_fds + n;
352 max = 0; 352 max = 0;
353 if (set) { 353 if (set) {
354 set &= BITS(fds, n); 354 set &= BITS(fds, n);
@@ -386,13 +386,11 @@ get_max:
386static inline void wait_key_set(poll_table *wait, unsigned long in, 386static inline void wait_key_set(poll_table *wait, unsigned long in,
387 unsigned long out, unsigned long bit) 387 unsigned long out, unsigned long bit)
388{ 388{
389 if (wait) { 389 wait->_key = POLLEX_SET;
390 wait->key = POLLEX_SET; 390 if (in & bit)
391 if (in & bit) 391 wait->_key |= POLLIN_SET;
392 wait->key |= POLLIN_SET; 392 if (out & bit)
393 if (out & bit) 393 wait->_key |= POLLOUT_SET;
394 wait->key |= POLLOUT_SET;
395 }
396} 394}
397 395
398int do_select(int n, fd_set_bits *fds, struct timespec *end_time) 396int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
@@ -414,7 +412,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
414 poll_initwait(&table); 412 poll_initwait(&table);
415 wait = &table.pt; 413 wait = &table.pt;
416 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { 414 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
417 wait = NULL; 415 wait->_qproc = NULL;
418 timed_out = 1; 416 timed_out = 1;
419 } 417 }
420 418
@@ -459,17 +457,17 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
459 if ((mask & POLLIN_SET) && (in & bit)) { 457 if ((mask & POLLIN_SET) && (in & bit)) {
460 res_in |= bit; 458 res_in |= bit;
461 retval++; 459 retval++;
462 wait = NULL; 460 wait->_qproc = NULL;
463 } 461 }
464 if ((mask & POLLOUT_SET) && (out & bit)) { 462 if ((mask & POLLOUT_SET) && (out & bit)) {
465 res_out |= bit; 463 res_out |= bit;
466 retval++; 464 retval++;
467 wait = NULL; 465 wait->_qproc = NULL;
468 } 466 }
469 if ((mask & POLLEX_SET) && (ex & bit)) { 467 if ((mask & POLLEX_SET) && (ex & bit)) {
470 res_ex |= bit; 468 res_ex |= bit;
471 retval++; 469 retval++;
472 wait = NULL; 470 wait->_qproc = NULL;
473 } 471 }
474 } 472 }
475 } 473 }
@@ -481,7 +479,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
481 *rexp = res_ex; 479 *rexp = res_ex;
482 cond_resched(); 480 cond_resched();
483 } 481 }
484 wait = NULL; 482 wait->_qproc = NULL;
485 if (retval || timed_out || signal_pending(current)) 483 if (retval || timed_out || signal_pending(current))
486 break; 484 break;
487 if (table.error) { 485 if (table.error) {
@@ -720,7 +718,7 @@ struct poll_list {
720 * interested in events matching the pollfd->events mask, and the result 718 * interested in events matching the pollfd->events mask, and the result
721 * matching that mask is both recorded in pollfd->revents and returned. The 719 * matching that mask is both recorded in pollfd->revents and returned. The
722 * pwait poll_table will be used by the fd-provided poll handler for waiting, 720 * pwait poll_table will be used by the fd-provided poll handler for waiting,
723 * if non-NULL. 721 * if pwait->_qproc is non-NULL.
724 */ 722 */
725static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) 723static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
726{ 724{
@@ -738,9 +736,7 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
738 if (file != NULL) { 736 if (file != NULL) {
739 mask = DEFAULT_POLLMASK; 737 mask = DEFAULT_POLLMASK;
740 if (file->f_op && file->f_op->poll) { 738 if (file->f_op && file->f_op->poll) {
741 if (pwait) 739 pwait->_key = pollfd->events|POLLERR|POLLHUP;
742 pwait->key = pollfd->events |
743 POLLERR | POLLHUP;
744 mask = file->f_op->poll(file, pwait); 740 mask = file->f_op->poll(file, pwait);
745 } 741 }
746 /* Mask out unneeded events. */ 742 /* Mask out unneeded events. */
@@ -763,7 +759,7 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
763 759
764 /* Optimise the no-wait case */ 760 /* Optimise the no-wait case */
765 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { 761 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
766 pt = NULL; 762 pt->_qproc = NULL;
767 timed_out = 1; 763 timed_out = 1;
768 } 764 }
769 765
@@ -781,22 +777,22 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
781 for (; pfd != pfd_end; pfd++) { 777 for (; pfd != pfd_end; pfd++) {
782 /* 778 /*
783 * Fish for events. If we found one, record it 779 * Fish for events. If we found one, record it
784 * and kill the poll_table, so we don't 780 * and kill poll_table->_qproc, so we don't
785 * needlessly register any other waiters after 781 * needlessly register any other waiters after
786 * this. They'll get immediately deregistered 782 * this. They'll get immediately deregistered
787 * when we break out and return. 783 * when we break out and return.
788 */ 784 */
789 if (do_pollfd(pfd, pt)) { 785 if (do_pollfd(pfd, pt)) {
790 count++; 786 count++;
791 pt = NULL; 787 pt->_qproc = NULL;
792 } 788 }
793 } 789 }
794 } 790 }
795 /* 791 /*
796 * All waiters have already been registered, so don't provide 792 * All waiters have already been registered, so don't provide
797 * a poll_table to them on the next loop iteration. 793 * a poll_table->_qproc to them on the next loop iteration.
798 */ 794 */
799 pt = NULL; 795 pt->_qproc = NULL;
800 if (!count) { 796 if (!count) {
801 count = wait->error; 797 count = wait->error;
802 if (signal_pending(current)) 798 if (signal_pending(current))
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 4023d6be939..0cbd0494b79 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -6,13 +6,29 @@
6 */ 6 */
7 7
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/module.h> 9#include <linux/export.h>
10#include <linux/seq_file.h> 10#include <linux/seq_file.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
12 12
13#include <asm/uaccess.h> 13#include <asm/uaccess.h>
14#include <asm/page.h> 14#include <asm/page.h>
15 15
16
17/*
18 * seq_files have a buffer which can may overflow. When this happens a larger
19 * buffer is reallocated and all the data will be printed again.
20 * The overflow state is true when m->count == m->size.
21 */
22static bool seq_overflow(struct seq_file *m)
23{
24 return m->count == m->size;
25}
26
27static void seq_set_overflow(struct seq_file *m)
28{
29 m->count = m->size;
30}
31
16/** 32/**
17 * seq_open - initialize sequential file 33 * seq_open - initialize sequential file
18 * @file: file we initialize 34 * @file: file we initialize
@@ -92,7 +108,7 @@ static int traverse(struct seq_file *m, loff_t offset)
92 error = 0; 108 error = 0;
93 m->count = 0; 109 m->count = 0;
94 } 110 }
95 if (m->count == m->size) 111 if (seq_overflow(m))
96 goto Eoverflow; 112 goto Eoverflow;
97 if (pos + m->count > offset) { 113 if (pos + m->count > offset) {
98 m->from = offset - pos; 114 m->from = offset - pos;
@@ -140,9 +156,21 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
140 156
141 mutex_lock(&m->lock); 157 mutex_lock(&m->lock);
142 158
159 /*
160 * seq_file->op->..m_start/m_stop/m_next may do special actions
161 * or optimisations based on the file->f_version, so we want to
162 * pass the file->f_version to those methods.
163 *
164 * seq_file->version is just copy of f_version, and seq_file
165 * methods can treat it simply as file version.
166 * It is copied in first and copied out after all operations.
167 * It is convenient to have it as part of structure to avoid the
168 * need of passing another argument to all the seq_file methods.
169 */
170 m->version = file->f_version;
171
143 /* Don't assume *ppos is where we left it */ 172 /* Don't assume *ppos is where we left it */
144 if (unlikely(*ppos != m->read_pos)) { 173 if (unlikely(*ppos != m->read_pos)) {
145 m->read_pos = *ppos;
146 while ((err = traverse(m, *ppos)) == -EAGAIN) 174 while ((err = traverse(m, *ppos)) == -EAGAIN)
147 ; 175 ;
148 if (err) { 176 if (err) {
@@ -152,21 +180,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
152 m->index = 0; 180 m->index = 0;
153 m->count = 0; 181 m->count = 0;
154 goto Done; 182 goto Done;
183 } else {
184 m->read_pos = *ppos;
155 } 185 }
156 } 186 }
157 187
158 /*
159 * seq_file->op->..m_start/m_stop/m_next may do special actions
160 * or optimisations based on the file->f_version, so we want to
161 * pass the file->f_version to those methods.
162 *
163 * seq_file->version is just copy of f_version, and seq_file
164 * methods can treat it simply as file version.
165 * It is copied in first and copied out after all operations.
166 * It is convenient to have it as part of structure to avoid the
167 * need of passing another argument to all the seq_file methods.
168 */
169 m->version = file->f_version;
170 /* grab buffer if we didn't have one */ 188 /* grab buffer if we didn't have one */
171 if (!m->buf) { 189 if (!m->buf) {
172 m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); 190 m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
@@ -232,7 +250,7 @@ Fill:
232 break; 250 break;
233 } 251 }
234 err = m->op->show(m, p); 252 err = m->op->show(m, p);
235 if (m->count == m->size || err) { 253 if (seq_overflow(m) || err) {
236 m->count = offs; 254 m->count = offs;
237 if (likely(err <= 0)) 255 if (likely(err <= 0))
238 break; 256 break;
@@ -359,7 +377,7 @@ int seq_escape(struct seq_file *m, const char *s, const char *esc)
359 *p++ = '0' + (c & 07); 377 *p++ = '0' + (c & 07);
360 continue; 378 continue;
361 } 379 }
362 m->count = m->size; 380 seq_set_overflow(m);
363 return -1; 381 return -1;
364 } 382 }
365 m->count = p - m->buf; 383 m->count = p - m->buf;
@@ -381,7 +399,7 @@ int seq_printf(struct seq_file *m, const char *f, ...)
381 return 0; 399 return 0;
382 } 400 }
383 } 401 }
384 m->count = m->size; 402 seq_set_overflow(m);
385 return -1; 403 return -1;
386} 404}
387EXPORT_SYMBOL(seq_printf); 405EXPORT_SYMBOL(seq_printf);
@@ -510,7 +528,7 @@ int seq_bitmap(struct seq_file *m, const unsigned long *bits,
510 return 0; 528 return 0;
511 } 529 }
512 } 530 }
513 m->count = m->size; 531 seq_set_overflow(m);
514 return -1; 532 return -1;
515} 533}
516EXPORT_SYMBOL(seq_bitmap); 534EXPORT_SYMBOL(seq_bitmap);
@@ -526,7 +544,7 @@ int seq_bitmap_list(struct seq_file *m, const unsigned long *bits,
526 return 0; 544 return 0;
527 } 545 }
528 } 546 }
529 m->count = m->size; 547 seq_set_overflow(m);
530 return -1; 548 return -1;
531} 549}
532EXPORT_SYMBOL(seq_bitmap_list); 550EXPORT_SYMBOL(seq_bitmap_list);
@@ -637,11 +655,63 @@ int seq_puts(struct seq_file *m, const char *s)
637 m->count += len; 655 m->count += len;
638 return 0; 656 return 0;
639 } 657 }
640 m->count = m->size; 658 seq_set_overflow(m);
641 return -1; 659 return -1;
642} 660}
643EXPORT_SYMBOL(seq_puts); 661EXPORT_SYMBOL(seq_puts);
644 662
663/*
664 * A helper routine for putting decimal numbers without rich format of printf().
665 * only 'unsigned long long' is supported.
666 * This routine will put one byte delimiter + number into seq_file.
667 * This routine is very quick when you show lots of numbers.
668 * In usual cases, it will be better to use seq_printf(). It's easier to read.
669 */
670int seq_put_decimal_ull(struct seq_file *m, char delimiter,
671 unsigned long long num)
672{
673 int len;
674
675 if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */
676 goto overflow;
677
678 if (delimiter)
679 m->buf[m->count++] = delimiter;
680
681 if (num < 10) {
682 m->buf[m->count++] = num + '0';
683 return 0;
684 }
685
686 len = num_to_str(m->buf + m->count, m->size - m->count, num);
687 if (!len)
688 goto overflow;
689 m->count += len;
690 return 0;
691overflow:
692 seq_set_overflow(m);
693 return -1;
694}
695EXPORT_SYMBOL(seq_put_decimal_ull);
696
697int seq_put_decimal_ll(struct seq_file *m, char delimiter,
698 long long num)
699{
700 if (num < 0) {
701 if (m->count + 3 >= m->size) {
702 seq_set_overflow(m);
703 return -1;
704 }
705 if (delimiter)
706 m->buf[m->count++] = delimiter;
707 num = -num;
708 delimiter = '-';
709 }
710 return seq_put_decimal_ull(m, delimiter, num);
711
712}
713EXPORT_SYMBOL(seq_put_decimal_ll);
714
645/** 715/**
646 * seq_write - write arbitrary data to buffer 716 * seq_write - write arbitrary data to buffer
647 * @seq: seq_file identifying the buffer to which data should be written 717 * @seq: seq_file identifying the buffer to which data should be written
@@ -657,7 +727,7 @@ int seq_write(struct seq_file *seq, const void *data, size_t len)
657 seq->count += len; 727 seq->count += len;
658 return 0; 728 return 0;
659 } 729 }
660 seq->count = seq->size; 730 seq_set_overflow(seq);
661 return -1; 731 return -1;
662} 732}
663EXPORT_SYMBOL(seq_write); 733EXPORT_SYMBOL(seq_write);
diff --git a/fs/splice.c b/fs/splice.c
index 1ec0493266b..f8476841eb0 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -25,11 +25,12 @@
25#include <linux/mm_inline.h> 25#include <linux/mm_inline.h>
26#include <linux/swap.h> 26#include <linux/swap.h>
27#include <linux/writeback.h> 27#include <linux/writeback.h>
28#include <linux/module.h> 28#include <linux/export.h>
29#include <linux/syscalls.h> 29#include <linux/syscalls.h>
30#include <linux/uio.h> 30#include <linux/uio.h>
31#include <linux/security.h> 31#include <linux/security.h>
32#include <linux/gfp.h> 32#include <linux/gfp.h>
33#include <linux/socket.h>
33 34
34/* 35/*
35 * Attempt to steal a page from a pipe buffer. This should perhaps go into 36 * Attempt to steal a page from a pipe buffer. This should perhaps go into
@@ -690,7 +691,9 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
690 if (!likely(file->f_op && file->f_op->sendpage)) 691 if (!likely(file->f_op && file->f_op->sendpage))
691 return -EINVAL; 692 return -EINVAL;
692 693
693 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; 694 more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0;
695 if (sd->len < sd->total_len)
696 more |= MSG_SENDPAGE_NOTLAST;
694 return file->f_op->sendpage(file, buf->page, buf->offset, 697 return file->f_op->sendpage(file, buf->page, buf->offset,
695 sd->len, &pos, more); 698 sd->len, &pos, more);
696} 699}
@@ -737,15 +740,12 @@ int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
737 goto out; 740 goto out;
738 741
739 if (buf->page != page) { 742 if (buf->page != page) {
740 /*
741 * Careful, ->map() uses KM_USER0!
742 */
743 char *src = buf->ops->map(pipe, buf, 1); 743 char *src = buf->ops->map(pipe, buf, 1);
744 char *dst = kmap_atomic(page, KM_USER1); 744 char *dst = kmap_atomic(page);
745 745
746 memcpy(dst + offset, src + buf->offset, this_len); 746 memcpy(dst + offset, src + buf->offset, this_len);
747 flush_dcache_page(page); 747 flush_dcache_page(page);
748 kunmap_atomic(dst, KM_USER1); 748 kunmap_atomic(dst);
749 buf->ops->unmap(pipe, buf, src); 749 buf->ops->unmap(pipe, buf, src);
750 } 750 }
751 ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len, 751 ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index ed0eb2a921f..fb50652e4e1 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -83,7 +83,8 @@ static struct buffer_head *get_block_length(struct super_block *sb,
83 * filesystem), otherwise the length is obtained from the first two bytes of 83 * filesystem), otherwise the length is obtained from the first two bytes of
84 * the metadata block. A bit in the length field indicates if the block 84 * the metadata block. A bit in the length field indicates if the block
85 * is stored uncompressed in the filesystem (usually because compression 85 * is stored uncompressed in the filesystem (usually because compression
86 * generated a larger block - this does occasionally happen with zlib). 86 * generated a larger block - this does occasionally happen with compression
87 * algorithms).
87 */ 88 */
88int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, 89int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
89 int length, u64 *next_index, int srclength, int pages) 90 int length, u64 *next_index, int srclength, int pages)
diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c
index 9dfe2ce0fb7..b381305c9a4 100644
--- a/fs/squashfs/dir.c
+++ b/fs/squashfs/dir.c
@@ -64,7 +64,7 @@ static int get_dir_index_using_offset(struct super_block *sb,
64 * is offset by 3 because we invent "." and ".." entries which are 64 * is offset by 3 because we invent "." and ".." entries which are
65 * not actually stored in the directory. 65 * not actually stored in the directory.
66 */ 66 */
67 if (f_pos < 3) 67 if (f_pos <= 3)
68 return f_pos; 68 return f_pos;
69 f_pos -= 3; 69 f_pos -= 3;
70 70
@@ -105,7 +105,7 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
105 struct inode *inode = file->f_dentry->d_inode; 105 struct inode *inode = file->f_dentry->d_inode;
106 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 106 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
107 u64 block = squashfs_i(inode)->start + msblk->directory_table; 107 u64 block = squashfs_i(inode)->start + msblk->directory_table;
108 int offset = squashfs_i(inode)->offset, length = 0, dir_count, size, 108 int offset = squashfs_i(inode)->offset, length, dir_count, size,
109 type, err; 109 type, err;
110 unsigned int inode_number; 110 unsigned int inode_number;
111 struct squashfs_dir_header dirh; 111 struct squashfs_dir_header dirh;
@@ -173,8 +173,7 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
173 173
174 dir_count = le32_to_cpu(dirh.count) + 1; 174 dir_count = le32_to_cpu(dirh.count) + 1;
175 175
176 /* dir_count should never be larger than 256 */ 176 if (dir_count > SQUASHFS_DIR_COUNT)
177 if (dir_count > 256)
178 goto failed_read; 177 goto failed_read;
179 178
180 while (dir_count--) { 179 while (dir_count--) {
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 38bb1c64055..8ca62c28fe1 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -464,10 +464,10 @@ static int squashfs_readpage(struct file *file, struct page *page)
464 if (PageUptodate(push_page)) 464 if (PageUptodate(push_page))
465 goto skip_page; 465 goto skip_page;
466 466
467 pageaddr = kmap_atomic(push_page, KM_USER0); 467 pageaddr = kmap_atomic(push_page);
468 squashfs_copy_data(pageaddr, buffer, offset, avail); 468 squashfs_copy_data(pageaddr, buffer, offset, avail);
469 memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail); 469 memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
470 kunmap_atomic(pageaddr, KM_USER0); 470 kunmap_atomic(pageaddr);
471 flush_dcache_page(push_page); 471 flush_dcache_page(push_page);
472 SetPageUptodate(push_page); 472 SetPageUptodate(push_page);
473skip_page: 473skip_page:
@@ -484,9 +484,9 @@ skip_page:
484error_out: 484error_out:
485 SetPageError(page); 485 SetPageError(page);
486out: 486out:
487 pageaddr = kmap_atomic(page, KM_USER0); 487 pageaddr = kmap_atomic(page);
488 memset(pageaddr, 0, PAGE_CACHE_SIZE); 488 memset(pageaddr, 0, PAGE_CACHE_SIZE);
489 kunmap_atomic(pageaddr, KM_USER0); 489 kunmap_atomic(pageaddr);
490 flush_dcache_page(page); 490 flush_dcache_page(page);
491 if (!PageError(page)) 491 if (!PageError(page))
492 SetPageUptodate(page); 492 SetPageUptodate(page);
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
index 0682b38d7e3..abcc58f3c15 100644
--- a/fs/squashfs/namei.c
+++ b/fs/squashfs/namei.c
@@ -144,7 +144,7 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
144 struct squashfs_dir_entry *dire; 144 struct squashfs_dir_entry *dire;
145 u64 block = squashfs_i(dir)->start + msblk->directory_table; 145 u64 block = squashfs_i(dir)->start + msblk->directory_table;
146 int offset = squashfs_i(dir)->offset; 146 int offset = squashfs_i(dir)->offset;
147 int err, length = 0, dir_count, size; 147 int err, length, dir_count, size;
148 148
149 TRACE("Entered squashfs_lookup [%llx:%x]\n", block, offset); 149 TRACE("Entered squashfs_lookup [%llx:%x]\n", block, offset);
150 150
@@ -177,8 +177,7 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
177 177
178 dir_count = le32_to_cpu(dirh.count) + 1; 178 dir_count = le32_to_cpu(dirh.count) + 1;
179 179
180 /* dir_count should never be larger than 256 */ 180 if (dir_count > SQUASHFS_DIR_COUNT)
181 if (dir_count > 256)
182 goto data_error; 181 goto data_error;
183 182
184 while (dir_count--) { 183 while (dir_count--) {
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index e8e14645de9..9e2349d07cb 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -30,11 +30,6 @@
30 30
31/* size of metadata (inode and directory) blocks */ 31/* size of metadata (inode and directory) blocks */
32#define SQUASHFS_METADATA_SIZE 8192 32#define SQUASHFS_METADATA_SIZE 8192
33#define SQUASHFS_METADATA_LOG 13
34
35/* default size of data blocks */
36#define SQUASHFS_FILE_SIZE 131072
37#define SQUASHFS_FILE_LOG 17
38 33
39/* default size of block device I/O */ 34/* default size of block device I/O */
40#ifdef CONFIG_SQUASHFS_4K_DEVBLK_SIZE 35#ifdef CONFIG_SQUASHFS_4K_DEVBLK_SIZE
@@ -46,12 +41,12 @@
46#define SQUASHFS_FILE_MAX_SIZE 1048576 41#define SQUASHFS_FILE_MAX_SIZE 1048576
47#define SQUASHFS_FILE_MAX_LOG 20 42#define SQUASHFS_FILE_MAX_LOG 20
48 43
49/* Max number of uids and gids */
50#define SQUASHFS_IDS 65536
51
52/* Max length of filename (not 255) */ 44/* Max length of filename (not 255) */
53#define SQUASHFS_NAME_LEN 256 45#define SQUASHFS_NAME_LEN 256
54 46
47/* Max value for directory header count*/
48#define SQUASHFS_DIR_COUNT 256
49
55#define SQUASHFS_INVALID_FRAG (0xffffffffU) 50#define SQUASHFS_INVALID_FRAG (0xffffffffU)
56#define SQUASHFS_INVALID_XATTR (0xffffffffU) 51#define SQUASHFS_INVALID_XATTR (0xffffffffU)
57#define SQUASHFS_INVALID_BLK (-1LL) 52#define SQUASHFS_INVALID_BLK (-1LL)
@@ -142,9 +137,6 @@
142#define SQUASHFS_MKINODE(A, B) ((long long)(((long long) (A)\ 137#define SQUASHFS_MKINODE(A, B) ((long long)(((long long) (A)\
143 << 16) + (B))) 138 << 16) + (B)))
144 139
145/* Translate between VFS mode and squashfs mode */
146#define SQUASHFS_MODE(A) ((A) & 0xfff)
147
148/* fragment and fragment table defines */ 140/* fragment and fragment table defines */
149#define SQUASHFS_FRAGMENT_BYTES(A) \ 141#define SQUASHFS_FRAGMENT_BYTES(A) \
150 ((A) * sizeof(struct squashfs_fragment_entry)) 142 ((A) * sizeof(struct squashfs_fragment_entry))
@@ -215,11 +207,6 @@
215/* cached data constants for filesystem */ 207/* cached data constants for filesystem */
216#define SQUASHFS_CACHED_BLKS 8 208#define SQUASHFS_CACHED_BLKS 8
217 209
218#define SQUASHFS_MAX_FILE_SIZE_LOG 64
219
220#define SQUASHFS_MAX_FILE_SIZE (1LL << \
221 (SQUASHFS_MAX_FILE_SIZE_LOG - 2))
222
223/* meta index cache */ 210/* meta index cache */
224#define SQUASHFS_META_INDEXES (SQUASHFS_METADATA_SIZE / sizeof(unsigned int)) 211#define SQUASHFS_META_INDEXES (SQUASHFS_METADATA_SIZE / sizeof(unsigned int))
225#define SQUASHFS_META_ENTRIES 127 212#define SQUASHFS_META_ENTRIES 127
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index ecaa2f7bdb8..29cd014ed3a 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -158,10 +158,15 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
158 goto failed_mount; 158 goto failed_mount;
159 } 159 }
160 160
161 /* Check block log for sanity */
161 msblk->block_log = le16_to_cpu(sblk->block_log); 162 msblk->block_log = le16_to_cpu(sblk->block_log);
162 if (msblk->block_log > SQUASHFS_FILE_MAX_LOG) 163 if (msblk->block_log > SQUASHFS_FILE_MAX_LOG)
163 goto failed_mount; 164 goto failed_mount;
164 165
166 /* Check that block_size and block_log match */
167 if (msblk->block_size != (1 << msblk->block_log))
168 goto failed_mount;
169
165 /* Check the root inode for sanity */ 170 /* Check the root inode for sanity */
166 root_inode = le64_to_cpu(sblk->root_inode); 171 root_inode = le64_to_cpu(sblk->root_inode);
167 if (SQUASHFS_INODE_OFFSET(root_inode) > SQUASHFS_METADATA_SIZE) 172 if (SQUASHFS_INODE_OFFSET(root_inode) > SQUASHFS_METADATA_SIZE)
@@ -316,11 +321,10 @@ check_directory_table:
316 } 321 }
317 insert_inode_hash(root); 322 insert_inode_hash(root);
318 323
319 sb->s_root = d_alloc_root(root); 324 sb->s_root = d_make_root(root);
320 if (sb->s_root == NULL) { 325 if (sb->s_root == NULL) {
321 ERROR("Root inode create failed\n"); 326 ERROR("Root inode create failed\n");
322 err = -ENOMEM; 327 err = -ENOMEM;
323 iput(root);
324 goto failed_mount; 328 goto failed_mount;
325 } 329 }
326 330
diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c
index 1191817264c..12806dffb34 100644
--- a/fs/squashfs/symlink.c
+++ b/fs/squashfs/symlink.c
@@ -90,14 +90,14 @@ static int squashfs_symlink_readpage(struct file *file, struct page *page)
90 goto error_out; 90 goto error_out;
91 } 91 }
92 92
93 pageaddr = kmap_atomic(page, KM_USER0); 93 pageaddr = kmap_atomic(page);
94 copied = squashfs_copy_data(pageaddr + bytes, entry, offset, 94 copied = squashfs_copy_data(pageaddr + bytes, entry, offset,
95 length - bytes); 95 length - bytes);
96 if (copied == length - bytes) 96 if (copied == length - bytes)
97 memset(pageaddr + length, 0, PAGE_CACHE_SIZE - length); 97 memset(pageaddr + length, 0, PAGE_CACHE_SIZE - length);
98 else 98 else
99 block = entry->next_index; 99 block = entry->next_index;
100 kunmap_atomic(pageaddr, KM_USER0); 100 kunmap_atomic(pageaddr);
101 squashfs_cache_put(entry); 101 squashfs_cache_put(entry);
102 } 102 }
103 103
diff --git a/fs/stack.c b/fs/stack.c
index 9c11519245a..5b5388250e2 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -1,4 +1,4 @@
1#include <linux/module.h> 1#include <linux/export.h>
2#include <linux/fs.h> 2#include <linux/fs.h>
3#include <linux/fs_stack.h> 3#include <linux/fs_stack.h>
4 4
diff --git a/fs/stat.c b/fs/stat.c
index 8806b8997d2..c733dc5753a 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -4,7 +4,7 @@
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */ 5 */
6 6
7#include <linux/module.h> 7#include <linux/export.h>
8#include <linux/mm.h> 8#include <linux/mm.h>
9#include <linux/errno.h> 9#include <linux/errno.h>
10#include <linux/file.h> 10#include <linux/file.h>
@@ -307,7 +307,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
307 if (inode->i_op->readlink) { 307 if (inode->i_op->readlink) {
308 error = security_inode_readlink(path.dentry); 308 error = security_inode_readlink(path.dentry);
309 if (!error) { 309 if (!error) {
310 touch_atime(path.mnt, path.dentry); 310 touch_atime(&path);
311 error = inode->i_op->readlink(path.dentry, 311 error = inode->i_op->readlink(path.dentry,
312 buf, bufsiz); 312 buf, bufsiz);
313 } 313 }
diff --git a/fs/statfs.c b/fs/statfs.c
index 2aa6a22e0be..43e6b6fe4e8 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -1,5 +1,5 @@
1#include <linux/syscalls.h> 1#include <linux/syscalls.h>
2#include <linux/module.h> 2#include <linux/export.h>
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/file.h> 4#include <linux/file.h>
5#include <linux/mount.h> 5#include <linux/mount.h>
diff --git a/fs/super.c b/fs/super.c
index 6277ec6cb60..cf001775617 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -20,7 +20,7 @@
20 * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000 20 * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000
21 */ 21 */
22 22
23#include <linux/module.h> 23#include <linux/export.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/acct.h> 25#include <linux/acct.h>
26#include <linux/blkdev.h> 26#include <linux/blkdev.h>
@@ -32,6 +32,7 @@
32#include <linux/backing-dev.h> 32#include <linux/backing-dev.h>
33#include <linux/rculist_bl.h> 33#include <linux/rculist_bl.h>
34#include <linux/cleancache.h> 34#include <linux/cleancache.h>
35#include <linux/fsnotify.h>
35#include "internal.h" 36#include "internal.h"
36 37
37 38
@@ -250,7 +251,7 @@ void deactivate_locked_super(struct super_block *s)
250{ 251{
251 struct file_system_type *fs = s->s_type; 252 struct file_system_type *fs = s->s_type;
252 if (atomic_dec_and_test(&s->s_active)) { 253 if (atomic_dec_and_test(&s->s_active)) {
253 cleancache_flush_fs(s); 254 cleancache_invalidate_fs(s);
254 fs->kill_sb(s); 255 fs->kill_sb(s);
255 256
256 /* caches are now gone, we can safely kill the shrinker now */ 257 /* caches are now gone, we can safely kill the shrinker now */
diff --git a/fs/sync.c b/fs/sync.c
index f3501ef3923..0e8db939d96 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -6,7 +6,7 @@
6#include <linux/file.h> 6#include <linux/file.h>
7#include <linux/fs.h> 7#include <linux/fs.h>
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <linux/module.h> 9#include <linux/export.h>
10#include <linux/namei.h> 10#include <linux/namei.h>
11#include <linux/sched.h> 11#include <linux/sched.h>
12#include <linux/writeback.h> 12#include <linux/writeback.h>
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 7fdf6a7b743..35a36d39fa2 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -22,76 +22,103 @@
22#include <linux/mutex.h> 22#include <linux/mutex.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/security.h> 24#include <linux/security.h>
25#include <linux/hash.h>
25#include "sysfs.h" 26#include "sysfs.h"
26 27
27DEFINE_MUTEX(sysfs_mutex); 28DEFINE_MUTEX(sysfs_mutex);
28DEFINE_SPINLOCK(sysfs_assoc_lock); 29DEFINE_SPINLOCK(sysfs_assoc_lock);
29 30
31#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb);
32
30static DEFINE_SPINLOCK(sysfs_ino_lock); 33static DEFINE_SPINLOCK(sysfs_ino_lock);
31static DEFINE_IDA(sysfs_ino_ida); 34static DEFINE_IDA(sysfs_ino_ida);
32 35
33/** 36/**
34 * sysfs_link_sibling - link sysfs_dirent into sibling list 37 * sysfs_name_hash
38 * @ns: Namespace tag to hash
39 * @name: Null terminated string to hash
40 *
41 * Returns 31 bit hash of ns + name (so it fits in an off_t )
42 */
43static unsigned int sysfs_name_hash(const void *ns, const char *name)
44{
45 unsigned long hash = init_name_hash();
46 unsigned int len = strlen(name);
47 while (len--)
48 hash = partial_name_hash(*name++, hash);
49 hash = ( end_name_hash(hash) ^ hash_ptr( (void *)ns, 31 ) );
50 hash &= 0x7fffffffU;
51 /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
52 if (hash < 1)
53 hash += 2;
54 if (hash >= INT_MAX)
55 hash = INT_MAX - 1;
56 return hash;
57}
58
59static int sysfs_name_compare(unsigned int hash, const void *ns,
60 const char *name, const struct sysfs_dirent *sd)
61{
62 if (hash != sd->s_hash)
63 return hash - sd->s_hash;
64 if (ns != sd->s_ns)
65 return ns - sd->s_ns;
66 return strcmp(name, sd->s_name);
67}
68
69static int sysfs_sd_compare(const struct sysfs_dirent *left,
70 const struct sysfs_dirent *right)
71{
72 return sysfs_name_compare(left->s_hash, left->s_ns, left->s_name,
73 right);
74}
75
76/**
77 * sysfs_link_subling - link sysfs_dirent into sibling rbtree
35 * @sd: sysfs_dirent of interest 78 * @sd: sysfs_dirent of interest
36 * 79 *
37 * Link @sd into its sibling list which starts from 80 * Link @sd into its sibling rbtree which starts from
38 * sd->s_parent->s_dir.children. 81 * sd->s_parent->s_dir.children.
39 * 82 *
40 * Locking: 83 * Locking:
41 * mutex_lock(sysfs_mutex) 84 * mutex_lock(sysfs_mutex)
85 *
86 * RETURNS:
87 * 0 on susccess -EEXIST on failure.
42 */ 88 */
43static void sysfs_link_sibling(struct sysfs_dirent *sd) 89static int sysfs_link_sibling(struct sysfs_dirent *sd)
44{ 90{
45 struct sysfs_dirent *parent_sd = sd->s_parent; 91 struct rb_node **node = &sd->s_parent->s_dir.children.rb_node;
46 92 struct rb_node *parent = NULL;
47 struct rb_node **p;
48 struct rb_node *parent;
49 93
50 if (sysfs_type(sd) == SYSFS_DIR) 94 if (sysfs_type(sd) == SYSFS_DIR)
51 parent_sd->s_dir.subdirs++; 95 sd->s_parent->s_dir.subdirs++;
52 96
53 p = &parent_sd->s_dir.inode_tree.rb_node; 97 while (*node) {
54 parent = NULL; 98 struct sysfs_dirent *pos;
55 while (*p) { 99 int result;
56 parent = *p; 100
57#define node rb_entry(parent, struct sysfs_dirent, inode_node) 101 pos = to_sysfs_dirent(*node);
58 if (sd->s_ino < node->s_ino) { 102 parent = *node;
59 p = &node->inode_node.rb_left; 103 result = sysfs_sd_compare(sd, pos);
60 } else if (sd->s_ino > node->s_ino) { 104 if (result < 0)
61 p = &node->inode_node.rb_right; 105 node = &pos->s_rb.rb_left;
62 } else { 106 else if (result > 0)
63 printk(KERN_CRIT "sysfs: inserting duplicate inode '%lx'\n", 107 node = &pos->s_rb.rb_right;
64 (unsigned long) sd->s_ino); 108 else
65 BUG(); 109 return -EEXIST;
66 }
67#undef node
68 }
69 rb_link_node(&sd->inode_node, parent, p);
70 rb_insert_color(&sd->inode_node, &parent_sd->s_dir.inode_tree);
71
72 p = &parent_sd->s_dir.name_tree.rb_node;
73 parent = NULL;
74 while (*p) {
75 int c;
76 parent = *p;
77#define node rb_entry(parent, struct sysfs_dirent, name_node)
78 c = strcmp(sd->s_name, node->s_name);
79 if (c < 0) {
80 p = &node->name_node.rb_left;
81 } else {
82 p = &node->name_node.rb_right;
83 }
84#undef node
85 } 110 }
86 rb_link_node(&sd->name_node, parent, p); 111 /* add new node and rebalance the tree */
87 rb_insert_color(&sd->name_node, &parent_sd->s_dir.name_tree); 112 rb_link_node(&sd->s_rb, parent, node);
113 rb_insert_color(&sd->s_rb, &sd->s_parent->s_dir.children);
114 return 0;
88} 115}
89 116
90/** 117/**
91 * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list 118 * sysfs_unlink_sibling - unlink sysfs_dirent from sibling rbtree
92 * @sd: sysfs_dirent of interest 119 * @sd: sysfs_dirent of interest
93 * 120 *
94 * Unlink @sd from its sibling list which starts from 121 * Unlink @sd from its sibling rbtree which starts from
95 * sd->s_parent->s_dir.children. 122 * sd->s_parent->s_dir.children.
96 * 123 *
97 * Locking: 124 * Locking:
@@ -102,8 +129,7 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
102 if (sysfs_type(sd) == SYSFS_DIR) 129 if (sysfs_type(sd) == SYSFS_DIR)
103 sd->s_parent->s_dir.subdirs--; 130 sd->s_parent->s_dir.subdirs--;
104 131
105 rb_erase(&sd->inode_node, &sd->s_parent->s_dir.inode_tree); 132 rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children);
106 rb_erase(&sd->name_node, &sd->s_parent->s_dir.name_tree);
107} 133}
108 134
109/** 135/**
@@ -198,7 +224,7 @@ static void sysfs_deactivate(struct sysfs_dirent *sd)
198 rwsem_release(&sd->dep_map, 1, _RET_IP_); 224 rwsem_release(&sd->dep_map, 1, _RET_IP_);
199} 225}
200 226
201static int sysfs_alloc_ino(ino_t *pino) 227static int sysfs_alloc_ino(unsigned int *pino)
202{ 228{
203 int ino, rc; 229 int ino, rc;
204 230
@@ -217,7 +243,7 @@ static int sysfs_alloc_ino(ino_t *pino)
217 return rc; 243 return rc;
218} 244}
219 245
220static void sysfs_free_ino(ino_t ino) 246static void sysfs_free_ino(unsigned int ino)
221{ 247{
222 spin_lock(&sysfs_ino_lock); 248 spin_lock(&sysfs_ino_lock);
223 ida_remove(&sysfs_ino_ida, ino); 249 ida_remove(&sysfs_ino_ida, ino);
@@ -402,6 +428,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
402int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) 428int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
403{ 429{
404 struct sysfs_inode_attrs *ps_iattr; 430 struct sysfs_inode_attrs *ps_iattr;
431 int ret;
405 432
406 if (!!sysfs_ns_type(acxt->parent_sd) != !!sd->s_ns) { 433 if (!!sysfs_ns_type(acxt->parent_sd) != !!sd->s_ns) {
407 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", 434 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
@@ -410,12 +437,12 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
410 return -EINVAL; 437 return -EINVAL;
411 } 438 }
412 439
413 if (sysfs_find_dirent(acxt->parent_sd, sd->s_ns, sd->s_name)) 440 sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name);
414 return -EEXIST;
415
416 sd->s_parent = sysfs_get(acxt->parent_sd); 441 sd->s_parent = sysfs_get(acxt->parent_sd);
417 442
418 sysfs_link_sibling(sd); 443 ret = sysfs_link_sibling(sd);
444 if (ret)
445 return ret;
419 446
420 /* Update timestamps on the parent */ 447 /* Update timestamps on the parent */
421 ps_iattr = acxt->parent_sd->s_iattr; 448 ps_iattr = acxt->parent_sd->s_iattr;
@@ -565,8 +592,8 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
565 const void *ns, 592 const void *ns,
566 const unsigned char *name) 593 const unsigned char *name)
567{ 594{
568 struct rb_node *p = parent_sd->s_dir.name_tree.rb_node; 595 struct rb_node *node = parent_sd->s_dir.children.rb_node;
569 struct sysfs_dirent *found = NULL; 596 unsigned int hash;
570 597
571 if (!!sysfs_ns_type(parent_sd) != !!ns) { 598 if (!!sysfs_ns_type(parent_sd) != !!ns) {
572 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", 599 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
@@ -575,33 +602,21 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
575 return NULL; 602 return NULL;
576 } 603 }
577 604
578 while (p) { 605 hash = sysfs_name_hash(ns, name);
579 int c; 606 while (node) {
580#define node rb_entry(p, struct sysfs_dirent, name_node) 607 struct sysfs_dirent *sd;
581 c = strcmp(name, node->s_name); 608 int result;
582 if (c < 0) { 609
583 p = node->name_node.rb_left; 610 sd = to_sysfs_dirent(node);
584 } else if (c > 0) { 611 result = sysfs_name_compare(hash, ns, name, sd);
585 p = node->name_node.rb_right; 612 if (result < 0)
586 } else { 613 node = node->rb_left;
587 found = node; 614 else if (result > 0)
588 p = node->name_node.rb_left; 615 node = node->rb_right;
589 } 616 else
590#undef node 617 return sd;
591 }
592
593 if (found) {
594 while (found->s_ns != ns) {
595 p = rb_next(&found->name_node);
596 if (!p)
597 return NULL;
598 found = rb_entry(p, struct sysfs_dirent, name_node);
599 if (strcmp(name, found->s_name))
600 return NULL;
601 }
602 } 618 }
603 619 return NULL;
604 return found;
605} 620}
606 621
607/** 622/**
@@ -714,6 +729,9 @@ int sysfs_create_dir(struct kobject * kobj)
714 else 729 else
715 parent_sd = &sysfs_root; 730 parent_sd = &sysfs_root;
716 731
732 if (!parent_sd)
733 return -ENOENT;
734
717 if (sysfs_ns_type(parent_sd)) 735 if (sysfs_ns_type(parent_sd))
718 ns = kobj->ktype->namespace(kobj); 736 ns = kobj->ktype->namespace(kobj);
719 type = sysfs_read_ns_type(kobj); 737 type = sysfs_read_ns_type(kobj);
@@ -804,9 +822,9 @@ static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
804 822
805 pr_debug("sysfs %s: removing dir\n", dir_sd->s_name); 823 pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
806 sysfs_addrm_start(&acxt, dir_sd); 824 sysfs_addrm_start(&acxt, dir_sd);
807 pos = rb_first(&dir_sd->s_dir.inode_tree); 825 pos = rb_first(&dir_sd->s_dir.children);
808 while (pos) { 826 while (pos) {
809 struct sysfs_dirent *sd = rb_entry(pos, struct sysfs_dirent, inode_node); 827 struct sysfs_dirent *sd = to_sysfs_dirent(pos);
810 pos = rb_next(pos); 828 pos = rb_next(pos);
811 if (sysfs_type(sd) != SYSFS_DIR) 829 if (sysfs_type(sd) != SYSFS_DIR)
812 sysfs_remove_one(&acxt, sd); 830 sysfs_remove_one(&acxt, sd);
@@ -870,6 +888,7 @@ int sysfs_rename(struct sysfs_dirent *sd,
870 sysfs_get(new_parent_sd); 888 sysfs_get(new_parent_sd);
871 sysfs_put(sd->s_parent); 889 sysfs_put(sd->s_parent);
872 sd->s_ns = new_ns; 890 sd->s_ns = new_ns;
891 sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name);
873 sd->s_parent = new_parent_sd; 892 sd->s_parent = new_parent_sd;
874 sysfs_link_sibling(sd); 893 sysfs_link_sibling(sd);
875 894
@@ -919,38 +938,36 @@ static int sysfs_dir_release(struct inode *inode, struct file *filp)
919} 938}
920 939
921static struct sysfs_dirent *sysfs_dir_pos(const void *ns, 940static struct sysfs_dirent *sysfs_dir_pos(const void *ns,
922 struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos) 941 struct sysfs_dirent *parent_sd, loff_t hash, struct sysfs_dirent *pos)
923{ 942{
924 if (pos) { 943 if (pos) {
925 int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) && 944 int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) &&
926 pos->s_parent == parent_sd && 945 pos->s_parent == parent_sd &&
927 ino == pos->s_ino; 946 hash == pos->s_hash;
928 sysfs_put(pos); 947 sysfs_put(pos);
929 if (!valid) 948 if (!valid)
930 pos = NULL; 949 pos = NULL;
931 } 950 }
932 if (!pos && (ino > 1) && (ino < INT_MAX)) { 951 if (!pos && (hash > 1) && (hash < INT_MAX)) {
933 struct rb_node *p = parent_sd->s_dir.inode_tree.rb_node; 952 struct rb_node *node = parent_sd->s_dir.children.rb_node;
934 while (p) { 953 while (node) {
935#define node rb_entry(p, struct sysfs_dirent, inode_node) 954 pos = to_sysfs_dirent(node);
936 if (ino < node->s_ino) { 955
937 pos = node; 956 if (hash < pos->s_hash)
938 p = node->inode_node.rb_left; 957 node = node->rb_left;
939 } else if (ino > node->s_ino) { 958 else if (hash > pos->s_hash)
940 p = node->inode_node.rb_right; 959 node = node->rb_right;
941 } else { 960 else
942 pos = node;
943 break; 961 break;
944 }
945#undef node
946 } 962 }
947 } 963 }
964 /* Skip over entries in the wrong namespace */
948 while (pos && pos->s_ns != ns) { 965 while (pos && pos->s_ns != ns) {
949 struct rb_node *p = rb_next(&pos->inode_node); 966 struct rb_node *node = rb_next(&pos->s_rb);
950 if (!p) 967 if (!node)
951 pos = NULL; 968 pos = NULL;
952 else 969 else
953 pos = rb_entry(p, struct sysfs_dirent, inode_node); 970 pos = to_sysfs_dirent(node);
954 } 971 }
955 return pos; 972 return pos;
956} 973}
@@ -960,11 +977,11 @@ static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns,
960{ 977{
961 pos = sysfs_dir_pos(ns, parent_sd, ino, pos); 978 pos = sysfs_dir_pos(ns, parent_sd, ino, pos);
962 if (pos) do { 979 if (pos) do {
963 struct rb_node *p = rb_next(&pos->inode_node); 980 struct rb_node *node = rb_next(&pos->s_rb);
964 if (!p) 981 if (!node)
965 pos = NULL; 982 pos = NULL;
966 else 983 else
967 pos = rb_entry(p, struct sysfs_dirent, inode_node); 984 pos = to_sysfs_dirent(node);
968 } while (pos && pos->s_ns != ns); 985 } while (pos && pos->s_ns != ns);
969 return pos; 986 return pos;
970} 987}
@@ -1006,7 +1023,7 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
1006 len = strlen(name); 1023 len = strlen(name);
1007 ino = pos->s_ino; 1024 ino = pos->s_ino;
1008 type = dt_type(pos); 1025 type = dt_type(pos);
1009 filp->f_pos = ino; 1026 filp->f_pos = pos->s_hash;
1010 filp->private_data = sysfs_get(pos); 1027 filp->private_data = sysfs_get(pos);
1011 1028
1012 mutex_unlock(&sysfs_mutex); 1029 mutex_unlock(&sysfs_mutex);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index dd1701caecc..2df555c66d5 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -67,7 +67,11 @@ static int internal_create_group(struct kobject *kobj, int update,
67 /* Updates may happen before the object has been instantiated */ 67 /* Updates may happen before the object has been instantiated */
68 if (unlikely(update && !kobj->sd)) 68 if (unlikely(update && !kobj->sd))
69 return -EINVAL; 69 return -EINVAL;
70 70 if (!grp->attrs) {
71 WARN(1, "sysfs: attrs not set by subsystem for group: %s/%s\n",
72 kobj->name, grp->name ? "" : grp->name);
73 return -EINVAL;
74 }
71 if (grp->name) { 75 if (grp->name) {
72 error = sysfs_create_subdir(kobj, grp->name, &sd); 76 error = sysfs_create_subdir(kobj, grp->name, &sd);
73 if (error) 77 if (error)
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 85eb81683a2..feb2d69396c 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -136,12 +136,13 @@ static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata, u32 *sec
136 void *old_secdata; 136 void *old_secdata;
137 size_t old_secdata_len; 137 size_t old_secdata_len;
138 138
139 iattrs = sd->s_iattr; 139 if (!sd->s_iattr) {
140 if (!iattrs) 140 sd->s_iattr = sysfs_init_inode_attrs(sd);
141 iattrs = sysfs_init_inode_attrs(sd); 141 if (!sd->s_iattr)
142 if (!iattrs) 142 return -ENOMEM;
143 return -ENOMEM; 143 }
144 144
145 iattrs = sd->s_iattr;
145 old_secdata = iattrs->ia_secdata; 146 old_secdata = iattrs->ia_secdata;
146 old_secdata_len = iattrs->ia_secdata_len; 147 old_secdata_len = iattrs->ia_secdata_len;
147 148
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index e34f0d99ea4..52c3bdb66a8 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -36,7 +36,7 @@ struct sysfs_dirent sysfs_root = {
36 .s_name = "", 36 .s_name = "",
37 .s_count = ATOMIC_INIT(1), 37 .s_count = ATOMIC_INIT(1),
38 .s_flags = SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT), 38 .s_flags = SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT),
39 .s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, 39 .s_mode = S_IFDIR | S_IRUGO | S_IXUGO,
40 .s_ino = 1, 40 .s_ino = 1,
41}; 41};
42 42
@@ -61,10 +61,9 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
61 } 61 }
62 62
63 /* instantiate and link root dentry */ 63 /* instantiate and link root dentry */
64 root = d_alloc_root(inode); 64 root = d_make_root(inode);
65 if (!root) { 65 if (!root) {
66 pr_debug("%s: could not get root dentry!\n",__func__); 66 pr_debug("%s: could not get root dentry!\n",__func__);
67 iput(inode);
68 return -ENOMEM; 67 return -ENOMEM;
69 } 68 }
70 root->d_fsdata = &sysfs_root; 69 root->d_fsdata = &sysfs_root;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 7484a36ee67..661a9639570 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -20,9 +20,8 @@ struct sysfs_elem_dir {
20 struct kobject *kobj; 20 struct kobject *kobj;
21 21
22 unsigned long subdirs; 22 unsigned long subdirs;
23 23 /* children rbtree starts here and goes through sd->s_rb */
24 struct rb_root inode_tree; 24 struct rb_root children;
25 struct rb_root name_tree;
26}; 25};
27 26
28struct sysfs_elem_symlink { 27struct sysfs_elem_symlink {
@@ -62,8 +61,7 @@ struct sysfs_dirent {
62 struct sysfs_dirent *s_parent; 61 struct sysfs_dirent *s_parent;
63 const char *s_name; 62 const char *s_name;
64 63
65 struct rb_node inode_node; 64 struct rb_node s_rb;
66 struct rb_node name_node;
67 65
68 union { 66 union {
69 struct completion *completion; 67 struct completion *completion;
@@ -71,6 +69,7 @@ struct sysfs_dirent {
71 } u; 69 } u;
72 70
73 const void *s_ns; /* namespace tag */ 71 const void *s_ns; /* namespace tag */
72 unsigned int s_hash; /* ns + name hash */
74 union { 73 union {
75 struct sysfs_elem_dir s_dir; 74 struct sysfs_elem_dir s_dir;
76 struct sysfs_elem_symlink s_symlink; 75 struct sysfs_elem_symlink s_symlink;
@@ -78,9 +77,9 @@ struct sysfs_dirent {
78 struct sysfs_elem_bin_attr s_bin_attr; 77 struct sysfs_elem_bin_attr s_bin_attr;
79 }; 78 };
80 79
81 unsigned int s_flags; 80 unsigned short s_flags;
82 umode_t s_mode; 81 umode_t s_mode;
83 ino_t s_ino; 82 unsigned int s_ino;
84 struct sysfs_inode_attrs *s_iattr; 83 struct sysfs_inode_attrs *s_iattr;
85}; 84};
86 85
@@ -95,11 +94,11 @@ struct sysfs_dirent {
95#define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR) 94#define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR)
96 95
97/* identify any namespace tag on sysfs_dirents */ 96/* identify any namespace tag on sysfs_dirents */
98#define SYSFS_NS_TYPE_MASK 0xff00 97#define SYSFS_NS_TYPE_MASK 0xf00
99#define SYSFS_NS_TYPE_SHIFT 8 98#define SYSFS_NS_TYPE_SHIFT 8
100 99
101#define SYSFS_FLAG_MASK ~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK) 100#define SYSFS_FLAG_MASK ~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK)
102#define SYSFS_FLAG_REMOVED 0x020000 101#define SYSFS_FLAG_REMOVED 0x02000
103 102
104static inline unsigned int sysfs_type(struct sysfs_dirent *sd) 103static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
105{ 104{
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index b217797e621..d7466e29361 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -121,9 +121,6 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
121{ 121{
122 struct inode *inode = old_dentry->d_inode; 122 struct inode *inode = old_dentry->d_inode;
123 123
124 if (inode->i_nlink >= SYSV_SB(inode->i_sb)->s_link_max)
125 return -EMLINK;
126
127 inode->i_ctime = CURRENT_TIME_SEC; 124 inode->i_ctime = CURRENT_TIME_SEC;
128 inode_inc_link_count(inode); 125 inode_inc_link_count(inode);
129 ihold(inode); 126 ihold(inode);
@@ -134,10 +131,8 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
134static int sysv_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode) 131static int sysv_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode)
135{ 132{
136 struct inode * inode; 133 struct inode * inode;
137 int err = -EMLINK; 134 int err;
138 135
139 if (dir->i_nlink >= SYSV_SB(dir->i_sb)->s_link_max)
140 goto out;
141 inode_inc_link_count(dir); 136 inode_inc_link_count(dir);
142 137
143 inode = sysv_new_inode(dir, S_IFDIR|mode); 138 inode = sysv_new_inode(dir, S_IFDIR|mode);
@@ -251,11 +246,6 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
251 drop_nlink(new_inode); 246 drop_nlink(new_inode);
252 inode_dec_link_count(new_inode); 247 inode_dec_link_count(new_inode);
253 } else { 248 } else {
254 if (dir_de) {
255 err = -EMLINK;
256 if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max)
257 goto out_dir;
258 }
259 err = sysv_add_link(new_dentry, old_inode); 249 err = sysv_add_link(new_dentry, old_inode);
260 if (err) 250 if (err)
261 goto out_dir; 251 goto out_dir;
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index f60c196913e..7491c33b646 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -44,7 +44,7 @@ enum {
44 JAN_1_1980 = (10*365 + 2) * 24 * 60 * 60 44 JAN_1_1980 = (10*365 + 2) * 24 * 60 * 60
45}; 45};
46 46
47static void detected_xenix(struct sysv_sb_info *sbi) 47static void detected_xenix(struct sysv_sb_info *sbi, unsigned *max_links)
48{ 48{
49 struct buffer_head *bh1 = sbi->s_bh1; 49 struct buffer_head *bh1 = sbi->s_bh1;
50 struct buffer_head *bh2 = sbi->s_bh2; 50 struct buffer_head *bh2 = sbi->s_bh2;
@@ -59,7 +59,7 @@ static void detected_xenix(struct sysv_sb_info *sbi)
59 sbd2 = (struct xenix_super_block *) (bh2->b_data - 512); 59 sbd2 = (struct xenix_super_block *) (bh2->b_data - 512);
60 } 60 }
61 61
62 sbi->s_link_max = XENIX_LINK_MAX; 62 *max_links = XENIX_LINK_MAX;
63 sbi->s_fic_size = XENIX_NICINOD; 63 sbi->s_fic_size = XENIX_NICINOD;
64 sbi->s_flc_size = XENIX_NICFREE; 64 sbi->s_flc_size = XENIX_NICFREE;
65 sbi->s_sbd1 = (char *)sbd1; 65 sbi->s_sbd1 = (char *)sbd1;
@@ -75,7 +75,7 @@ static void detected_xenix(struct sysv_sb_info *sbi)
75 sbi->s_nzones = fs32_to_cpu(sbi, sbd1->s_fsize); 75 sbi->s_nzones = fs32_to_cpu(sbi, sbd1->s_fsize);
76} 76}
77 77
78static void detected_sysv4(struct sysv_sb_info *sbi) 78static void detected_sysv4(struct sysv_sb_info *sbi, unsigned *max_links)
79{ 79{
80 struct sysv4_super_block * sbd; 80 struct sysv4_super_block * sbd;
81 struct buffer_head *bh1 = sbi->s_bh1; 81 struct buffer_head *bh1 = sbi->s_bh1;
@@ -86,7 +86,7 @@ static void detected_sysv4(struct sysv_sb_info *sbi)
86 else 86 else
87 sbd = (struct sysv4_super_block *) bh2->b_data; 87 sbd = (struct sysv4_super_block *) bh2->b_data;
88 88
89 sbi->s_link_max = SYSV_LINK_MAX; 89 *max_links = SYSV_LINK_MAX;
90 sbi->s_fic_size = SYSV_NICINOD; 90 sbi->s_fic_size = SYSV_NICINOD;
91 sbi->s_flc_size = SYSV_NICFREE; 91 sbi->s_flc_size = SYSV_NICFREE;
92 sbi->s_sbd1 = (char *)sbd; 92 sbi->s_sbd1 = (char *)sbd;
@@ -103,7 +103,7 @@ static void detected_sysv4(struct sysv_sb_info *sbi)
103 sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize); 103 sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize);
104} 104}
105 105
106static void detected_sysv2(struct sysv_sb_info *sbi) 106static void detected_sysv2(struct sysv_sb_info *sbi, unsigned *max_links)
107{ 107{
108 struct sysv2_super_block *sbd; 108 struct sysv2_super_block *sbd;
109 struct buffer_head *bh1 = sbi->s_bh1; 109 struct buffer_head *bh1 = sbi->s_bh1;
@@ -114,7 +114,7 @@ static void detected_sysv2(struct sysv_sb_info *sbi)
114 else 114 else
115 sbd = (struct sysv2_super_block *) bh2->b_data; 115 sbd = (struct sysv2_super_block *) bh2->b_data;
116 116
117 sbi->s_link_max = SYSV_LINK_MAX; 117 *max_links = SYSV_LINK_MAX;
118 sbi->s_fic_size = SYSV_NICINOD; 118 sbi->s_fic_size = SYSV_NICINOD;
119 sbi->s_flc_size = SYSV_NICFREE; 119 sbi->s_flc_size = SYSV_NICFREE;
120 sbi->s_sbd1 = (char *)sbd; 120 sbi->s_sbd1 = (char *)sbd;
@@ -131,14 +131,14 @@ static void detected_sysv2(struct sysv_sb_info *sbi)
131 sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize); 131 sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize);
132} 132}
133 133
134static void detected_coherent(struct sysv_sb_info *sbi) 134static void detected_coherent(struct sysv_sb_info *sbi, unsigned *max_links)
135{ 135{
136 struct coh_super_block * sbd; 136 struct coh_super_block * sbd;
137 struct buffer_head *bh1 = sbi->s_bh1; 137 struct buffer_head *bh1 = sbi->s_bh1;
138 138
139 sbd = (struct coh_super_block *) bh1->b_data; 139 sbd = (struct coh_super_block *) bh1->b_data;
140 140
141 sbi->s_link_max = COH_LINK_MAX; 141 *max_links = COH_LINK_MAX;
142 sbi->s_fic_size = COH_NICINOD; 142 sbi->s_fic_size = COH_NICINOD;
143 sbi->s_flc_size = COH_NICFREE; 143 sbi->s_flc_size = COH_NICFREE;
144 sbi->s_sbd1 = (char *)sbd; 144 sbi->s_sbd1 = (char *)sbd;
@@ -154,12 +154,12 @@ static void detected_coherent(struct sysv_sb_info *sbi)
154 sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize); 154 sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize);
155} 155}
156 156
157static void detected_v7(struct sysv_sb_info *sbi) 157static void detected_v7(struct sysv_sb_info *sbi, unsigned *max_links)
158{ 158{
159 struct buffer_head *bh2 = sbi->s_bh2; 159 struct buffer_head *bh2 = sbi->s_bh2;
160 struct v7_super_block *sbd = (struct v7_super_block *)bh2->b_data; 160 struct v7_super_block *sbd = (struct v7_super_block *)bh2->b_data;
161 161
162 sbi->s_link_max = V7_LINK_MAX; 162 *max_links = V7_LINK_MAX;
163 sbi->s_fic_size = V7_NICINOD; 163 sbi->s_fic_size = V7_NICINOD;
164 sbi->s_flc_size = V7_NICFREE; 164 sbi->s_flc_size = V7_NICFREE;
165 sbi->s_sbd1 = (char *)sbd; 165 sbi->s_sbd1 = (char *)sbd;
@@ -290,7 +290,7 @@ static char *flavour_names[] = {
290 [FSTYPE_AFS] = "AFS", 290 [FSTYPE_AFS] = "AFS",
291}; 291};
292 292
293static void (*flavour_setup[])(struct sysv_sb_info *) = { 293static void (*flavour_setup[])(struct sysv_sb_info *, unsigned *) = {
294 [FSTYPE_XENIX] = detected_xenix, 294 [FSTYPE_XENIX] = detected_xenix,
295 [FSTYPE_SYSV4] = detected_sysv4, 295 [FSTYPE_SYSV4] = detected_sysv4,
296 [FSTYPE_SYSV2] = detected_sysv2, 296 [FSTYPE_SYSV2] = detected_sysv2,
@@ -310,7 +310,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
310 310
311 sbi->s_firstinodezone = 2; 311 sbi->s_firstinodezone = 2;
312 312
313 flavour_setup[sbi->s_type](sbi); 313 flavour_setup[sbi->s_type](sbi, &sb->s_max_links);
314 314
315 sbi->s_truncate = 1; 315 sbi->s_truncate = 1;
316 sbi->s_ndatazones = sbi->s_nzones - sbi->s_firstdatazone; 316 sbi->s_ndatazones = sbi->s_nzones - sbi->s_firstdatazone;
@@ -341,9 +341,8 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
341 printk("SysV FS: get root inode failed\n"); 341 printk("SysV FS: get root inode failed\n");
342 return 0; 342 return 0;
343 } 343 }
344 sb->s_root = d_alloc_root(root_inode); 344 sb->s_root = d_make_root(root_inode);
345 if (!sb->s_root) { 345 if (!sb->s_root) {
346 iput(root_inode);
347 printk("SysV FS: get root dentry failed\n"); 346 printk("SysV FS: get root dentry failed\n");
348 return 0; 347 return 0;
349 } 348 }
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 0e4b821c569..11b07672f6c 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -24,7 +24,6 @@ struct sysv_sb_info {
24 char s_bytesex; /* bytesex (le/be/pdp) */ 24 char s_bytesex; /* bytesex (le/be/pdp) */
25 char s_truncate; /* if 1: names > SYSV_NAMELEN chars are truncated */ 25 char s_truncate; /* if 1: names > SYSV_NAMELEN chars are truncated */
26 /* if 0: they are disallowed (ENAMETOOLONG) */ 26 /* if 0: they are disallowed (ENAMETOOLONG) */
27 nlink_t s_link_max; /* max number of hard links to a file */
28 unsigned int s_inodes_per_block; /* number of inodes per block */ 27 unsigned int s_inodes_per_block; /* number of inodes per block */
29 unsigned int s_inodes_per_block_1; /* inodes_per_block - 1 */ 28 unsigned int s_inodes_per_block_1; /* inodes_per_block - 1 */
30 unsigned int s_inodes_per_block_bits; /* log2(inodes_per_block) */ 29 unsigned int s_inodes_per_block_bits; /* log2(inodes_per_block) */
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index f922cbacdb9..1934084e208 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -36,7 +36,7 @@
36 36
37#ifdef CONFIG_UBIFS_FS_DEBUG 37#ifdef CONFIG_UBIFS_FS_DEBUG
38 38
39DEFINE_SPINLOCK(dbg_lock); 39static DEFINE_SPINLOCK(dbg_lock);
40 40
41static const char *get_key_fmt(int fmt) 41static const char *get_key_fmt(int fmt)
42{ 42{
@@ -221,15 +221,15 @@ const char *dbg_jhead(int jhead)
221 221
222static void dump_ch(const struct ubifs_ch *ch) 222static void dump_ch(const struct ubifs_ch *ch)
223{ 223{
224 printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic)); 224 printk(KERN_ERR "\tmagic %#x\n", le32_to_cpu(ch->magic));
225 printk(KERN_DEBUG "\tcrc %#x\n", le32_to_cpu(ch->crc)); 225 printk(KERN_ERR "\tcrc %#x\n", le32_to_cpu(ch->crc));
226 printk(KERN_DEBUG "\tnode_type %d (%s)\n", ch->node_type, 226 printk(KERN_ERR "\tnode_type %d (%s)\n", ch->node_type,
227 dbg_ntype(ch->node_type)); 227 dbg_ntype(ch->node_type));
228 printk(KERN_DEBUG "\tgroup_type %d (%s)\n", ch->group_type, 228 printk(KERN_ERR "\tgroup_type %d (%s)\n", ch->group_type,
229 dbg_gtype(ch->group_type)); 229 dbg_gtype(ch->group_type));
230 printk(KERN_DEBUG "\tsqnum %llu\n", 230 printk(KERN_ERR "\tsqnum %llu\n",
231 (unsigned long long)le64_to_cpu(ch->sqnum)); 231 (unsigned long long)le64_to_cpu(ch->sqnum));
232 printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len)); 232 printk(KERN_ERR "\tlen %u\n", le32_to_cpu(ch->len));
233} 233}
234 234
235void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode) 235void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode)
@@ -240,43 +240,43 @@ void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode)
240 struct ubifs_dent_node *dent, *pdent = NULL; 240 struct ubifs_dent_node *dent, *pdent = NULL;
241 int count = 2; 241 int count = 2;
242 242
243 printk(KERN_DEBUG "Dump in-memory inode:"); 243 printk(KERN_ERR "Dump in-memory inode:");
244 printk(KERN_DEBUG "\tinode %lu\n", inode->i_ino); 244 printk(KERN_ERR "\tinode %lu\n", inode->i_ino);
245 printk(KERN_DEBUG "\tsize %llu\n", 245 printk(KERN_ERR "\tsize %llu\n",
246 (unsigned long long)i_size_read(inode)); 246 (unsigned long long)i_size_read(inode));
247 printk(KERN_DEBUG "\tnlink %u\n", inode->i_nlink); 247 printk(KERN_ERR "\tnlink %u\n", inode->i_nlink);
248 printk(KERN_DEBUG "\tuid %u\n", (unsigned int)inode->i_uid); 248 printk(KERN_ERR "\tuid %u\n", (unsigned int)inode->i_uid);
249 printk(KERN_DEBUG "\tgid %u\n", (unsigned int)inode->i_gid); 249 printk(KERN_ERR "\tgid %u\n", (unsigned int)inode->i_gid);
250 printk(KERN_DEBUG "\tatime %u.%u\n", 250 printk(KERN_ERR "\tatime %u.%u\n",
251 (unsigned int)inode->i_atime.tv_sec, 251 (unsigned int)inode->i_atime.tv_sec,
252 (unsigned int)inode->i_atime.tv_nsec); 252 (unsigned int)inode->i_atime.tv_nsec);
253 printk(KERN_DEBUG "\tmtime %u.%u\n", 253 printk(KERN_ERR "\tmtime %u.%u\n",
254 (unsigned int)inode->i_mtime.tv_sec, 254 (unsigned int)inode->i_mtime.tv_sec,
255 (unsigned int)inode->i_mtime.tv_nsec); 255 (unsigned int)inode->i_mtime.tv_nsec);
256 printk(KERN_DEBUG "\tctime %u.%u\n", 256 printk(KERN_ERR "\tctime %u.%u\n",
257 (unsigned int)inode->i_ctime.tv_sec, 257 (unsigned int)inode->i_ctime.tv_sec,
258 (unsigned int)inode->i_ctime.tv_nsec); 258 (unsigned int)inode->i_ctime.tv_nsec);
259 printk(KERN_DEBUG "\tcreat_sqnum %llu\n", ui->creat_sqnum); 259 printk(KERN_ERR "\tcreat_sqnum %llu\n", ui->creat_sqnum);
260 printk(KERN_DEBUG "\txattr_size %u\n", ui->xattr_size); 260 printk(KERN_ERR "\txattr_size %u\n", ui->xattr_size);
261 printk(KERN_DEBUG "\txattr_cnt %u\n", ui->xattr_cnt); 261 printk(KERN_ERR "\txattr_cnt %u\n", ui->xattr_cnt);
262 printk(KERN_DEBUG "\txattr_names %u\n", ui->xattr_names); 262 printk(KERN_ERR "\txattr_names %u\n", ui->xattr_names);
263 printk(KERN_DEBUG "\tdirty %u\n", ui->dirty); 263 printk(KERN_ERR "\tdirty %u\n", ui->dirty);
264 printk(KERN_DEBUG "\txattr %u\n", ui->xattr); 264 printk(KERN_ERR "\txattr %u\n", ui->xattr);
265 printk(KERN_DEBUG "\tbulk_read %u\n", ui->xattr); 265 printk(KERN_ERR "\tbulk_read %u\n", ui->xattr);
266 printk(KERN_DEBUG "\tsynced_i_size %llu\n", 266 printk(KERN_ERR "\tsynced_i_size %llu\n",
267 (unsigned long long)ui->synced_i_size); 267 (unsigned long long)ui->synced_i_size);
268 printk(KERN_DEBUG "\tui_size %llu\n", 268 printk(KERN_ERR "\tui_size %llu\n",
269 (unsigned long long)ui->ui_size); 269 (unsigned long long)ui->ui_size);
270 printk(KERN_DEBUG "\tflags %d\n", ui->flags); 270 printk(KERN_ERR "\tflags %d\n", ui->flags);
271 printk(KERN_DEBUG "\tcompr_type %d\n", ui->compr_type); 271 printk(KERN_ERR "\tcompr_type %d\n", ui->compr_type);
272 printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read); 272 printk(KERN_ERR "\tlast_page_read %lu\n", ui->last_page_read);
273 printk(KERN_DEBUG "\tread_in_a_row %lu\n", ui->read_in_a_row); 273 printk(KERN_ERR "\tread_in_a_row %lu\n", ui->read_in_a_row);
274 printk(KERN_DEBUG "\tdata_len %d\n", ui->data_len); 274 printk(KERN_ERR "\tdata_len %d\n", ui->data_len);
275 275
276 if (!S_ISDIR(inode->i_mode)) 276 if (!S_ISDIR(inode->i_mode))
277 return; 277 return;
278 278
279 printk(KERN_DEBUG "List of directory entries:\n"); 279 printk(KERN_ERR "List of directory entries:\n");
280 ubifs_assert(!mutex_is_locked(&c->tnc_mutex)); 280 ubifs_assert(!mutex_is_locked(&c->tnc_mutex));
281 281
282 lowest_dent_key(c, &key, inode->i_ino); 282 lowest_dent_key(c, &key, inode->i_ino);
@@ -284,11 +284,11 @@ void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode)
284 dent = ubifs_tnc_next_ent(c, &key, &nm); 284 dent = ubifs_tnc_next_ent(c, &key, &nm);
285 if (IS_ERR(dent)) { 285 if (IS_ERR(dent)) {
286 if (PTR_ERR(dent) != -ENOENT) 286 if (PTR_ERR(dent) != -ENOENT)
287 printk(KERN_DEBUG "error %ld\n", PTR_ERR(dent)); 287 printk(KERN_ERR "error %ld\n", PTR_ERR(dent));
288 break; 288 break;
289 } 289 }
290 290
291 printk(KERN_DEBUG "\t%d: %s (%s)\n", 291 printk(KERN_ERR "\t%d: %s (%s)\n",
292 count++, dent->name, get_dent_type(dent->type)); 292 count++, dent->name, get_dent_type(dent->type));
293 293
294 nm.name = dent->name; 294 nm.name = dent->name;
@@ -312,8 +312,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
312 312
313 /* If the magic is incorrect, just hexdump the first bytes */ 313 /* If the magic is incorrect, just hexdump the first bytes */
314 if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) { 314 if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) {
315 printk(KERN_DEBUG "Not a node, first %zu bytes:", UBIFS_CH_SZ); 315 printk(KERN_ERR "Not a node, first %zu bytes:", UBIFS_CH_SZ);
316 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, 316 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 32, 1,
317 (void *)node, UBIFS_CH_SZ, 1); 317 (void *)node, UBIFS_CH_SZ, 1);
318 return; 318 return;
319 } 319 }
@@ -326,7 +326,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
326 { 326 {
327 const struct ubifs_pad_node *pad = node; 327 const struct ubifs_pad_node *pad = node;
328 328
329 printk(KERN_DEBUG "\tpad_len %u\n", 329 printk(KERN_ERR "\tpad_len %u\n",
330 le32_to_cpu(pad->pad_len)); 330 le32_to_cpu(pad->pad_len));
331 break; 331 break;
332 } 332 }
@@ -335,50 +335,50 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
335 const struct ubifs_sb_node *sup = node; 335 const struct ubifs_sb_node *sup = node;
336 unsigned int sup_flags = le32_to_cpu(sup->flags); 336 unsigned int sup_flags = le32_to_cpu(sup->flags);
337 337
338 printk(KERN_DEBUG "\tkey_hash %d (%s)\n", 338 printk(KERN_ERR "\tkey_hash %d (%s)\n",
339 (int)sup->key_hash, get_key_hash(sup->key_hash)); 339 (int)sup->key_hash, get_key_hash(sup->key_hash));
340 printk(KERN_DEBUG "\tkey_fmt %d (%s)\n", 340 printk(KERN_ERR "\tkey_fmt %d (%s)\n",
341 (int)sup->key_fmt, get_key_fmt(sup->key_fmt)); 341 (int)sup->key_fmt, get_key_fmt(sup->key_fmt));
342 printk(KERN_DEBUG "\tflags %#x\n", sup_flags); 342 printk(KERN_ERR "\tflags %#x\n", sup_flags);
343 printk(KERN_DEBUG "\t big_lpt %u\n", 343 printk(KERN_ERR "\t big_lpt %u\n",
344 !!(sup_flags & UBIFS_FLG_BIGLPT)); 344 !!(sup_flags & UBIFS_FLG_BIGLPT));
345 printk(KERN_DEBUG "\t space_fixup %u\n", 345 printk(KERN_ERR "\t space_fixup %u\n",
346 !!(sup_flags & UBIFS_FLG_SPACE_FIXUP)); 346 !!(sup_flags & UBIFS_FLG_SPACE_FIXUP));
347 printk(KERN_DEBUG "\tmin_io_size %u\n", 347 printk(KERN_ERR "\tmin_io_size %u\n",
348 le32_to_cpu(sup->min_io_size)); 348 le32_to_cpu(sup->min_io_size));
349 printk(KERN_DEBUG "\tleb_size %u\n", 349 printk(KERN_ERR "\tleb_size %u\n",
350 le32_to_cpu(sup->leb_size)); 350 le32_to_cpu(sup->leb_size));
351 printk(KERN_DEBUG "\tleb_cnt %u\n", 351 printk(KERN_ERR "\tleb_cnt %u\n",
352 le32_to_cpu(sup->leb_cnt)); 352 le32_to_cpu(sup->leb_cnt));
353 printk(KERN_DEBUG "\tmax_leb_cnt %u\n", 353 printk(KERN_ERR "\tmax_leb_cnt %u\n",
354 le32_to_cpu(sup->max_leb_cnt)); 354 le32_to_cpu(sup->max_leb_cnt));
355 printk(KERN_DEBUG "\tmax_bud_bytes %llu\n", 355 printk(KERN_ERR "\tmax_bud_bytes %llu\n",
356 (unsigned long long)le64_to_cpu(sup->max_bud_bytes)); 356 (unsigned long long)le64_to_cpu(sup->max_bud_bytes));
357 printk(KERN_DEBUG "\tlog_lebs %u\n", 357 printk(KERN_ERR "\tlog_lebs %u\n",
358 le32_to_cpu(sup->log_lebs)); 358 le32_to_cpu(sup->log_lebs));
359 printk(KERN_DEBUG "\tlpt_lebs %u\n", 359 printk(KERN_ERR "\tlpt_lebs %u\n",
360 le32_to_cpu(sup->lpt_lebs)); 360 le32_to_cpu(sup->lpt_lebs));
361 printk(KERN_DEBUG "\torph_lebs %u\n", 361 printk(KERN_ERR "\torph_lebs %u\n",
362 le32_to_cpu(sup->orph_lebs)); 362 le32_to_cpu(sup->orph_lebs));
363 printk(KERN_DEBUG "\tjhead_cnt %u\n", 363 printk(KERN_ERR "\tjhead_cnt %u\n",
364 le32_to_cpu(sup->jhead_cnt)); 364 le32_to_cpu(sup->jhead_cnt));
365 printk(KERN_DEBUG "\tfanout %u\n", 365 printk(KERN_ERR "\tfanout %u\n",
366 le32_to_cpu(sup->fanout)); 366 le32_to_cpu(sup->fanout));
367 printk(KERN_DEBUG "\tlsave_cnt %u\n", 367 printk(KERN_ERR "\tlsave_cnt %u\n",
368 le32_to_cpu(sup->lsave_cnt)); 368 le32_to_cpu(sup->lsave_cnt));
369 printk(KERN_DEBUG "\tdefault_compr %u\n", 369 printk(KERN_ERR "\tdefault_compr %u\n",
370 (int)le16_to_cpu(sup->default_compr)); 370 (int)le16_to_cpu(sup->default_compr));
371 printk(KERN_DEBUG "\trp_size %llu\n", 371 printk(KERN_ERR "\trp_size %llu\n",
372 (unsigned long long)le64_to_cpu(sup->rp_size)); 372 (unsigned long long)le64_to_cpu(sup->rp_size));
373 printk(KERN_DEBUG "\trp_uid %u\n", 373 printk(KERN_ERR "\trp_uid %u\n",
374 le32_to_cpu(sup->rp_uid)); 374 le32_to_cpu(sup->rp_uid));
375 printk(KERN_DEBUG "\trp_gid %u\n", 375 printk(KERN_ERR "\trp_gid %u\n",
376 le32_to_cpu(sup->rp_gid)); 376 le32_to_cpu(sup->rp_gid));
377 printk(KERN_DEBUG "\tfmt_version %u\n", 377 printk(KERN_ERR "\tfmt_version %u\n",
378 le32_to_cpu(sup->fmt_version)); 378 le32_to_cpu(sup->fmt_version));
379 printk(KERN_DEBUG "\ttime_gran %u\n", 379 printk(KERN_ERR "\ttime_gran %u\n",
380 le32_to_cpu(sup->time_gran)); 380 le32_to_cpu(sup->time_gran));
381 printk(KERN_DEBUG "\tUUID %pUB\n", 381 printk(KERN_ERR "\tUUID %pUB\n",
382 sup->uuid); 382 sup->uuid);
383 break; 383 break;
384 } 384 }
@@ -386,61 +386,61 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
386 { 386 {
387 const struct ubifs_mst_node *mst = node; 387 const struct ubifs_mst_node *mst = node;
388 388
389 printk(KERN_DEBUG "\thighest_inum %llu\n", 389 printk(KERN_ERR "\thighest_inum %llu\n",
390 (unsigned long long)le64_to_cpu(mst->highest_inum)); 390 (unsigned long long)le64_to_cpu(mst->highest_inum));
391 printk(KERN_DEBUG "\tcommit number %llu\n", 391 printk(KERN_ERR "\tcommit number %llu\n",
392 (unsigned long long)le64_to_cpu(mst->cmt_no)); 392 (unsigned long long)le64_to_cpu(mst->cmt_no));
393 printk(KERN_DEBUG "\tflags %#x\n", 393 printk(KERN_ERR "\tflags %#x\n",
394 le32_to_cpu(mst->flags)); 394 le32_to_cpu(mst->flags));
395 printk(KERN_DEBUG "\tlog_lnum %u\n", 395 printk(KERN_ERR "\tlog_lnum %u\n",
396 le32_to_cpu(mst->log_lnum)); 396 le32_to_cpu(mst->log_lnum));
397 printk(KERN_DEBUG "\troot_lnum %u\n", 397 printk(KERN_ERR "\troot_lnum %u\n",
398 le32_to_cpu(mst->root_lnum)); 398 le32_to_cpu(mst->root_lnum));
399 printk(KERN_DEBUG "\troot_offs %u\n", 399 printk(KERN_ERR "\troot_offs %u\n",
400 le32_to_cpu(mst->root_offs)); 400 le32_to_cpu(mst->root_offs));
401 printk(KERN_DEBUG "\troot_len %u\n", 401 printk(KERN_ERR "\troot_len %u\n",
402 le32_to_cpu(mst->root_len)); 402 le32_to_cpu(mst->root_len));
403 printk(KERN_DEBUG "\tgc_lnum %u\n", 403 printk(KERN_ERR "\tgc_lnum %u\n",
404 le32_to_cpu(mst->gc_lnum)); 404 le32_to_cpu(mst->gc_lnum));
405 printk(KERN_DEBUG "\tihead_lnum %u\n", 405 printk(KERN_ERR "\tihead_lnum %u\n",
406 le32_to_cpu(mst->ihead_lnum)); 406 le32_to_cpu(mst->ihead_lnum));
407 printk(KERN_DEBUG "\tihead_offs %u\n", 407 printk(KERN_ERR "\tihead_offs %u\n",
408 le32_to_cpu(mst->ihead_offs)); 408 le32_to_cpu(mst->ihead_offs));
409 printk(KERN_DEBUG "\tindex_size %llu\n", 409 printk(KERN_ERR "\tindex_size %llu\n",
410 (unsigned long long)le64_to_cpu(mst->index_size)); 410 (unsigned long long)le64_to_cpu(mst->index_size));
411 printk(KERN_DEBUG "\tlpt_lnum %u\n", 411 printk(KERN_ERR "\tlpt_lnum %u\n",
412 le32_to_cpu(mst->lpt_lnum)); 412 le32_to_cpu(mst->lpt_lnum));
413 printk(KERN_DEBUG "\tlpt_offs %u\n", 413 printk(KERN_ERR "\tlpt_offs %u\n",
414 le32_to_cpu(mst->lpt_offs)); 414 le32_to_cpu(mst->lpt_offs));
415 printk(KERN_DEBUG "\tnhead_lnum %u\n", 415 printk(KERN_ERR "\tnhead_lnum %u\n",
416 le32_to_cpu(mst->nhead_lnum)); 416 le32_to_cpu(mst->nhead_lnum));
417 printk(KERN_DEBUG "\tnhead_offs %u\n", 417 printk(KERN_ERR "\tnhead_offs %u\n",
418 le32_to_cpu(mst->nhead_offs)); 418 le32_to_cpu(mst->nhead_offs));
419 printk(KERN_DEBUG "\tltab_lnum %u\n", 419 printk(KERN_ERR "\tltab_lnum %u\n",
420 le32_to_cpu(mst->ltab_lnum)); 420 le32_to_cpu(mst->ltab_lnum));
421 printk(KERN_DEBUG "\tltab_offs %u\n", 421 printk(KERN_ERR "\tltab_offs %u\n",
422 le32_to_cpu(mst->ltab_offs)); 422 le32_to_cpu(mst->ltab_offs));
423 printk(KERN_DEBUG "\tlsave_lnum %u\n", 423 printk(KERN_ERR "\tlsave_lnum %u\n",
424 le32_to_cpu(mst->lsave_lnum)); 424 le32_to_cpu(mst->lsave_lnum));
425 printk(KERN_DEBUG "\tlsave_offs %u\n", 425 printk(KERN_ERR "\tlsave_offs %u\n",
426 le32_to_cpu(mst->lsave_offs)); 426 le32_to_cpu(mst->lsave_offs));
427 printk(KERN_DEBUG "\tlscan_lnum %u\n", 427 printk(KERN_ERR "\tlscan_lnum %u\n",
428 le32_to_cpu(mst->lscan_lnum)); 428 le32_to_cpu(mst->lscan_lnum));
429 printk(KERN_DEBUG "\tleb_cnt %u\n", 429 printk(KERN_ERR "\tleb_cnt %u\n",
430 le32_to_cpu(mst->leb_cnt)); 430 le32_to_cpu(mst->leb_cnt));
431 printk(KERN_DEBUG "\tempty_lebs %u\n", 431 printk(KERN_ERR "\tempty_lebs %u\n",
432 le32_to_cpu(mst->empty_lebs)); 432 le32_to_cpu(mst->empty_lebs));
433 printk(KERN_DEBUG "\tidx_lebs %u\n", 433 printk(KERN_ERR "\tidx_lebs %u\n",
434 le32_to_cpu(mst->idx_lebs)); 434 le32_to_cpu(mst->idx_lebs));
435 printk(KERN_DEBUG "\ttotal_free %llu\n", 435 printk(KERN_ERR "\ttotal_free %llu\n",
436 (unsigned long long)le64_to_cpu(mst->total_free)); 436 (unsigned long long)le64_to_cpu(mst->total_free));
437 printk(KERN_DEBUG "\ttotal_dirty %llu\n", 437 printk(KERN_ERR "\ttotal_dirty %llu\n",
438 (unsigned long long)le64_to_cpu(mst->total_dirty)); 438 (unsigned long long)le64_to_cpu(mst->total_dirty));
439 printk(KERN_DEBUG "\ttotal_used %llu\n", 439 printk(KERN_ERR "\ttotal_used %llu\n",
440 (unsigned long long)le64_to_cpu(mst->total_used)); 440 (unsigned long long)le64_to_cpu(mst->total_used));
441 printk(KERN_DEBUG "\ttotal_dead %llu\n", 441 printk(KERN_ERR "\ttotal_dead %llu\n",
442 (unsigned long long)le64_to_cpu(mst->total_dead)); 442 (unsigned long long)le64_to_cpu(mst->total_dead));
443 printk(KERN_DEBUG "\ttotal_dark %llu\n", 443 printk(KERN_ERR "\ttotal_dark %llu\n",
444 (unsigned long long)le64_to_cpu(mst->total_dark)); 444 (unsigned long long)le64_to_cpu(mst->total_dark));
445 break; 445 break;
446 } 446 }
@@ -448,11 +448,11 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
448 { 448 {
449 const struct ubifs_ref_node *ref = node; 449 const struct ubifs_ref_node *ref = node;
450 450
451 printk(KERN_DEBUG "\tlnum %u\n", 451 printk(KERN_ERR "\tlnum %u\n",
452 le32_to_cpu(ref->lnum)); 452 le32_to_cpu(ref->lnum));
453 printk(KERN_DEBUG "\toffs %u\n", 453 printk(KERN_ERR "\toffs %u\n",
454 le32_to_cpu(ref->offs)); 454 le32_to_cpu(ref->offs));
455 printk(KERN_DEBUG "\tjhead %u\n", 455 printk(KERN_ERR "\tjhead %u\n",
456 le32_to_cpu(ref->jhead)); 456 le32_to_cpu(ref->jhead));
457 break; 457 break;
458 } 458 }
@@ -461,40 +461,40 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
461 const struct ubifs_ino_node *ino = node; 461 const struct ubifs_ino_node *ino = node;
462 462
463 key_read(c, &ino->key, &key); 463 key_read(c, &ino->key, &key);
464 printk(KERN_DEBUG "\tkey %s\n", 464 printk(KERN_ERR "\tkey %s\n",
465 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); 465 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
466 printk(KERN_DEBUG "\tcreat_sqnum %llu\n", 466 printk(KERN_ERR "\tcreat_sqnum %llu\n",
467 (unsigned long long)le64_to_cpu(ino->creat_sqnum)); 467 (unsigned long long)le64_to_cpu(ino->creat_sqnum));
468 printk(KERN_DEBUG "\tsize %llu\n", 468 printk(KERN_ERR "\tsize %llu\n",
469 (unsigned long long)le64_to_cpu(ino->size)); 469 (unsigned long long)le64_to_cpu(ino->size));
470 printk(KERN_DEBUG "\tnlink %u\n", 470 printk(KERN_ERR "\tnlink %u\n",
471 le32_to_cpu(ino->nlink)); 471 le32_to_cpu(ino->nlink));
472 printk(KERN_DEBUG "\tatime %lld.%u\n", 472 printk(KERN_ERR "\tatime %lld.%u\n",
473 (long long)le64_to_cpu(ino->atime_sec), 473 (long long)le64_to_cpu(ino->atime_sec),
474 le32_to_cpu(ino->atime_nsec)); 474 le32_to_cpu(ino->atime_nsec));
475 printk(KERN_DEBUG "\tmtime %lld.%u\n", 475 printk(KERN_ERR "\tmtime %lld.%u\n",
476 (long long)le64_to_cpu(ino->mtime_sec), 476 (long long)le64_to_cpu(ino->mtime_sec),
477 le32_to_cpu(ino->mtime_nsec)); 477 le32_to_cpu(ino->mtime_nsec));
478 printk(KERN_DEBUG "\tctime %lld.%u\n", 478 printk(KERN_ERR "\tctime %lld.%u\n",
479 (long long)le64_to_cpu(ino->ctime_sec), 479 (long long)le64_to_cpu(ino->ctime_sec),
480 le32_to_cpu(ino->ctime_nsec)); 480 le32_to_cpu(ino->ctime_nsec));
481 printk(KERN_DEBUG "\tuid %u\n", 481 printk(KERN_ERR "\tuid %u\n",
482 le32_to_cpu(ino->uid)); 482 le32_to_cpu(ino->uid));
483 printk(KERN_DEBUG "\tgid %u\n", 483 printk(KERN_ERR "\tgid %u\n",
484 le32_to_cpu(ino->gid)); 484 le32_to_cpu(ino->gid));
485 printk(KERN_DEBUG "\tmode %u\n", 485 printk(KERN_ERR "\tmode %u\n",
486 le32_to_cpu(ino->mode)); 486 le32_to_cpu(ino->mode));
487 printk(KERN_DEBUG "\tflags %#x\n", 487 printk(KERN_ERR "\tflags %#x\n",
488 le32_to_cpu(ino->flags)); 488 le32_to_cpu(ino->flags));
489 printk(KERN_DEBUG "\txattr_cnt %u\n", 489 printk(KERN_ERR "\txattr_cnt %u\n",
490 le32_to_cpu(ino->xattr_cnt)); 490 le32_to_cpu(ino->xattr_cnt));
491 printk(KERN_DEBUG "\txattr_size %u\n", 491 printk(KERN_ERR "\txattr_size %u\n",
492 le32_to_cpu(ino->xattr_size)); 492 le32_to_cpu(ino->xattr_size));
493 printk(KERN_DEBUG "\txattr_names %u\n", 493 printk(KERN_ERR "\txattr_names %u\n",
494 le32_to_cpu(ino->xattr_names)); 494 le32_to_cpu(ino->xattr_names));
495 printk(KERN_DEBUG "\tcompr_type %#x\n", 495 printk(KERN_ERR "\tcompr_type %#x\n",
496 (int)le16_to_cpu(ino->compr_type)); 496 (int)le16_to_cpu(ino->compr_type));
497 printk(KERN_DEBUG "\tdata len %u\n", 497 printk(KERN_ERR "\tdata len %u\n",
498 le32_to_cpu(ino->data_len)); 498 le32_to_cpu(ino->data_len));
499 break; 499 break;
500 } 500 }
@@ -505,16 +505,16 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
505 int nlen = le16_to_cpu(dent->nlen); 505 int nlen = le16_to_cpu(dent->nlen);
506 506
507 key_read(c, &dent->key, &key); 507 key_read(c, &dent->key, &key);
508 printk(KERN_DEBUG "\tkey %s\n", 508 printk(KERN_ERR "\tkey %s\n",
509 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); 509 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
510 printk(KERN_DEBUG "\tinum %llu\n", 510 printk(KERN_ERR "\tinum %llu\n",
511 (unsigned long long)le64_to_cpu(dent->inum)); 511 (unsigned long long)le64_to_cpu(dent->inum));
512 printk(KERN_DEBUG "\ttype %d\n", (int)dent->type); 512 printk(KERN_ERR "\ttype %d\n", (int)dent->type);
513 printk(KERN_DEBUG "\tnlen %d\n", nlen); 513 printk(KERN_ERR "\tnlen %d\n", nlen);
514 printk(KERN_DEBUG "\tname "); 514 printk(KERN_ERR "\tname ");
515 515
516 if (nlen > UBIFS_MAX_NLEN) 516 if (nlen > UBIFS_MAX_NLEN)
517 printk(KERN_DEBUG "(bad name length, not printing, " 517 printk(KERN_ERR "(bad name length, not printing, "
518 "bad or corrupted node)"); 518 "bad or corrupted node)");
519 else { 519 else {
520 for (i = 0; i < nlen && dent->name[i]; i++) 520 for (i = 0; i < nlen && dent->name[i]; i++)
@@ -530,16 +530,16 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
530 int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; 530 int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ;
531 531
532 key_read(c, &dn->key, &key); 532 key_read(c, &dn->key, &key);
533 printk(KERN_DEBUG "\tkey %s\n", 533 printk(KERN_ERR "\tkey %s\n",
534 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); 534 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
535 printk(KERN_DEBUG "\tsize %u\n", 535 printk(KERN_ERR "\tsize %u\n",
536 le32_to_cpu(dn->size)); 536 le32_to_cpu(dn->size));
537 printk(KERN_DEBUG "\tcompr_typ %d\n", 537 printk(KERN_ERR "\tcompr_typ %d\n",
538 (int)le16_to_cpu(dn->compr_type)); 538 (int)le16_to_cpu(dn->compr_type));
539 printk(KERN_DEBUG "\tdata size %d\n", 539 printk(KERN_ERR "\tdata size %d\n",
540 dlen); 540 dlen);
541 printk(KERN_DEBUG "\tdata:\n"); 541 printk(KERN_ERR "\tdata:\n");
542 print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, 32, 1, 542 print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1,
543 (void *)&dn->data, dlen, 0); 543 (void *)&dn->data, dlen, 0);
544 break; 544 break;
545 } 545 }
@@ -547,11 +547,11 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
547 { 547 {
548 const struct ubifs_trun_node *trun = node; 548 const struct ubifs_trun_node *trun = node;
549 549
550 printk(KERN_DEBUG "\tinum %u\n", 550 printk(KERN_ERR "\tinum %u\n",
551 le32_to_cpu(trun->inum)); 551 le32_to_cpu(trun->inum));
552 printk(KERN_DEBUG "\told_size %llu\n", 552 printk(KERN_ERR "\told_size %llu\n",
553 (unsigned long long)le64_to_cpu(trun->old_size)); 553 (unsigned long long)le64_to_cpu(trun->old_size));
554 printk(KERN_DEBUG "\tnew_size %llu\n", 554 printk(KERN_ERR "\tnew_size %llu\n",
555 (unsigned long long)le64_to_cpu(trun->new_size)); 555 (unsigned long long)le64_to_cpu(trun->new_size));
556 break; 556 break;
557 } 557 }
@@ -560,17 +560,17 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
560 const struct ubifs_idx_node *idx = node; 560 const struct ubifs_idx_node *idx = node;
561 561
562 n = le16_to_cpu(idx->child_cnt); 562 n = le16_to_cpu(idx->child_cnt);
563 printk(KERN_DEBUG "\tchild_cnt %d\n", n); 563 printk(KERN_ERR "\tchild_cnt %d\n", n);
564 printk(KERN_DEBUG "\tlevel %d\n", 564 printk(KERN_ERR "\tlevel %d\n",
565 (int)le16_to_cpu(idx->level)); 565 (int)le16_to_cpu(idx->level));
566 printk(KERN_DEBUG "\tBranches:\n"); 566 printk(KERN_ERR "\tBranches:\n");
567 567
568 for (i = 0; i < n && i < c->fanout - 1; i++) { 568 for (i = 0; i < n && i < c->fanout - 1; i++) {
569 const struct ubifs_branch *br; 569 const struct ubifs_branch *br;
570 570
571 br = ubifs_idx_branch(c, idx, i); 571 br = ubifs_idx_branch(c, idx, i);
572 key_read(c, &br->key, &key); 572 key_read(c, &br->key, &key);
573 printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n", 573 printk(KERN_ERR "\t%d: LEB %d:%d len %d key %s\n",
574 i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), 574 i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs),
575 le32_to_cpu(br->len), 575 le32_to_cpu(br->len),
576 dbg_snprintf_key(c, &key, key_buf, 576 dbg_snprintf_key(c, &key, key_buf,
@@ -584,20 +584,20 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
584 { 584 {
585 const struct ubifs_orph_node *orph = node; 585 const struct ubifs_orph_node *orph = node;
586 586
587 printk(KERN_DEBUG "\tcommit number %llu\n", 587 printk(KERN_ERR "\tcommit number %llu\n",
588 (unsigned long long) 588 (unsigned long long)
589 le64_to_cpu(orph->cmt_no) & LLONG_MAX); 589 le64_to_cpu(orph->cmt_no) & LLONG_MAX);
590 printk(KERN_DEBUG "\tlast node flag %llu\n", 590 printk(KERN_ERR "\tlast node flag %llu\n",
591 (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63); 591 (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63);
592 n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3; 592 n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3;
593 printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); 593 printk(KERN_ERR "\t%d orphan inode numbers:\n", n);
594 for (i = 0; i < n; i++) 594 for (i = 0; i < n; i++)
595 printk(KERN_DEBUG "\t ino %llu\n", 595 printk(KERN_ERR "\t ino %llu\n",
596 (unsigned long long)le64_to_cpu(orph->inos[i])); 596 (unsigned long long)le64_to_cpu(orph->inos[i]));
597 break; 597 break;
598 } 598 }
599 default: 599 default:
600 printk(KERN_DEBUG "node type %d was not recognized\n", 600 printk(KERN_ERR "node type %d was not recognized\n",
601 (int)ch->node_type); 601 (int)ch->node_type);
602 } 602 }
603 spin_unlock(&dbg_lock); 603 spin_unlock(&dbg_lock);
@@ -606,16 +606,16 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
606void dbg_dump_budget_req(const struct ubifs_budget_req *req) 606void dbg_dump_budget_req(const struct ubifs_budget_req *req)
607{ 607{
608 spin_lock(&dbg_lock); 608 spin_lock(&dbg_lock);
609 printk(KERN_DEBUG "Budgeting request: new_ino %d, dirtied_ino %d\n", 609 printk(KERN_ERR "Budgeting request: new_ino %d, dirtied_ino %d\n",
610 req->new_ino, req->dirtied_ino); 610 req->new_ino, req->dirtied_ino);
611 printk(KERN_DEBUG "\tnew_ino_d %d, dirtied_ino_d %d\n", 611 printk(KERN_ERR "\tnew_ino_d %d, dirtied_ino_d %d\n",
612 req->new_ino_d, req->dirtied_ino_d); 612 req->new_ino_d, req->dirtied_ino_d);
613 printk(KERN_DEBUG "\tnew_page %d, dirtied_page %d\n", 613 printk(KERN_ERR "\tnew_page %d, dirtied_page %d\n",
614 req->new_page, req->dirtied_page); 614 req->new_page, req->dirtied_page);
615 printk(KERN_DEBUG "\tnew_dent %d, mod_dent %d\n", 615 printk(KERN_ERR "\tnew_dent %d, mod_dent %d\n",
616 req->new_dent, req->mod_dent); 616 req->new_dent, req->mod_dent);
617 printk(KERN_DEBUG "\tidx_growth %d\n", req->idx_growth); 617 printk(KERN_ERR "\tidx_growth %d\n", req->idx_growth);
618 printk(KERN_DEBUG "\tdata_growth %d dd_growth %d\n", 618 printk(KERN_ERR "\tdata_growth %d dd_growth %d\n",
619 req->data_growth, req->dd_growth); 619 req->data_growth, req->dd_growth);
620 spin_unlock(&dbg_lock); 620 spin_unlock(&dbg_lock);
621} 621}
@@ -623,12 +623,12 @@ void dbg_dump_budget_req(const struct ubifs_budget_req *req)
623void dbg_dump_lstats(const struct ubifs_lp_stats *lst) 623void dbg_dump_lstats(const struct ubifs_lp_stats *lst)
624{ 624{
625 spin_lock(&dbg_lock); 625 spin_lock(&dbg_lock);
626 printk(KERN_DEBUG "(pid %d) Lprops statistics: empty_lebs %d, " 626 printk(KERN_ERR "(pid %d) Lprops statistics: empty_lebs %d, "
627 "idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs); 627 "idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs);
628 printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, " 628 printk(KERN_ERR "\ttaken_empty_lebs %d, total_free %lld, "
629 "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, 629 "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free,
630 lst->total_dirty); 630 lst->total_dirty);
631 printk(KERN_DEBUG "\ttotal_used %lld, total_dark %lld, " 631 printk(KERN_ERR "\ttotal_used %lld, total_dark %lld, "
632 "total_dead %lld\n", lst->total_used, lst->total_dark, 632 "total_dead %lld\n", lst->total_used, lst->total_dark,
633 lst->total_dead); 633 lst->total_dead);
634 spin_unlock(&dbg_lock); 634 spin_unlock(&dbg_lock);
@@ -644,21 +644,21 @@ void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
644 644
645 spin_lock(&c->space_lock); 645 spin_lock(&c->space_lock);
646 spin_lock(&dbg_lock); 646 spin_lock(&dbg_lock);
647 printk(KERN_DEBUG "(pid %d) Budgeting info: data budget sum %lld, " 647 printk(KERN_ERR "(pid %d) Budgeting info: data budget sum %lld, "
648 "total budget sum %lld\n", current->pid, 648 "total budget sum %lld\n", current->pid,
649 bi->data_growth + bi->dd_growth, 649 bi->data_growth + bi->dd_growth,
650 bi->data_growth + bi->dd_growth + bi->idx_growth); 650 bi->data_growth + bi->dd_growth + bi->idx_growth);
651 printk(KERN_DEBUG "\tbudg_data_growth %lld, budg_dd_growth %lld, " 651 printk(KERN_ERR "\tbudg_data_growth %lld, budg_dd_growth %lld, "
652 "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth, 652 "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth,
653 bi->idx_growth); 653 bi->idx_growth);
654 printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %llu, " 654 printk(KERN_ERR "\tmin_idx_lebs %d, old_idx_sz %llu, "
655 "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz, 655 "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz,
656 bi->uncommitted_idx); 656 bi->uncommitted_idx);
657 printk(KERN_DEBUG "\tpage_budget %d, inode_budget %d, dent_budget %d\n", 657 printk(KERN_ERR "\tpage_budget %d, inode_budget %d, dent_budget %d\n",
658 bi->page_budget, bi->inode_budget, bi->dent_budget); 658 bi->page_budget, bi->inode_budget, bi->dent_budget);
659 printk(KERN_DEBUG "\tnospace %u, nospace_rp %u\n", 659 printk(KERN_ERR "\tnospace %u, nospace_rp %u\n",
660 bi->nospace, bi->nospace_rp); 660 bi->nospace, bi->nospace_rp);
661 printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", 661 printk(KERN_ERR "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
662 c->dark_wm, c->dead_wm, c->max_idx_node_sz); 662 c->dark_wm, c->dead_wm, c->max_idx_node_sz);
663 663
664 if (bi != &c->bi) 664 if (bi != &c->bi)
@@ -669,38 +669,38 @@ void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
669 */ 669 */
670 goto out_unlock; 670 goto out_unlock;
671 671
672 printk(KERN_DEBUG "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", 672 printk(KERN_ERR "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n",
673 c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt); 673 c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt);
674 printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " 674 printk(KERN_ERR "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, "
675 "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), 675 "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt),
676 atomic_long_read(&c->dirty_zn_cnt), 676 atomic_long_read(&c->dirty_zn_cnt),
677 atomic_long_read(&c->clean_zn_cnt)); 677 atomic_long_read(&c->clean_zn_cnt));
678 printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", 678 printk(KERN_ERR "\tgc_lnum %d, ihead_lnum %d\n",
679 c->gc_lnum, c->ihead_lnum); 679 c->gc_lnum, c->ihead_lnum);
680 680
681 /* If we are in R/O mode, journal heads do not exist */ 681 /* If we are in R/O mode, journal heads do not exist */
682 if (c->jheads) 682 if (c->jheads)
683 for (i = 0; i < c->jhead_cnt; i++) 683 for (i = 0; i < c->jhead_cnt; i++)
684 printk(KERN_DEBUG "\tjhead %s\t LEB %d\n", 684 printk(KERN_ERR "\tjhead %s\t LEB %d\n",
685 dbg_jhead(c->jheads[i].wbuf.jhead), 685 dbg_jhead(c->jheads[i].wbuf.jhead),
686 c->jheads[i].wbuf.lnum); 686 c->jheads[i].wbuf.lnum);
687 for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { 687 for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) {
688 bud = rb_entry(rb, struct ubifs_bud, rb); 688 bud = rb_entry(rb, struct ubifs_bud, rb);
689 printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); 689 printk(KERN_ERR "\tbud LEB %d\n", bud->lnum);
690 } 690 }
691 list_for_each_entry(bud, &c->old_buds, list) 691 list_for_each_entry(bud, &c->old_buds, list)
692 printk(KERN_DEBUG "\told bud LEB %d\n", bud->lnum); 692 printk(KERN_ERR "\told bud LEB %d\n", bud->lnum);
693 list_for_each_entry(idx_gc, &c->idx_gc, list) 693 list_for_each_entry(idx_gc, &c->idx_gc, list)
694 printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n", 694 printk(KERN_ERR "\tGC'ed idx LEB %d unmap %d\n",
695 idx_gc->lnum, idx_gc->unmap); 695 idx_gc->lnum, idx_gc->unmap);
696 printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); 696 printk(KERN_ERR "\tcommit state %d\n", c->cmt_state);
697 697
698 /* Print budgeting predictions */ 698 /* Print budgeting predictions */
699 available = ubifs_calc_available(c, c->bi.min_idx_lebs); 699 available = ubifs_calc_available(c, c->bi.min_idx_lebs);
700 outstanding = c->bi.data_growth + c->bi.dd_growth; 700 outstanding = c->bi.data_growth + c->bi.dd_growth;
701 free = ubifs_get_free_space_nolock(c); 701 free = ubifs_get_free_space_nolock(c);
702 printk(KERN_DEBUG "Budgeting predictions:\n"); 702 printk(KERN_ERR "Budgeting predictions:\n");
703 printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", 703 printk(KERN_ERR "\tavailable: %lld, outstanding %lld, free %lld\n",
704 available, outstanding, free); 704 available, outstanding, free);
705out_unlock: 705out_unlock:
706 spin_unlock(&dbg_lock); 706 spin_unlock(&dbg_lock);
@@ -720,11 +720,11 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
720 dark = ubifs_calc_dark(c, spc); 720 dark = ubifs_calc_dark(c, spc);
721 721
722 if (lp->flags & LPROPS_INDEX) 722 if (lp->flags & LPROPS_INDEX)
723 printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " 723 printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d "
724 "free + dirty %-8d flags %#x (", lp->lnum, lp->free, 724 "free + dirty %-8d flags %#x (", lp->lnum, lp->free,
725 lp->dirty, c->leb_size - spc, spc, lp->flags); 725 lp->dirty, c->leb_size - spc, spc, lp->flags);
726 else 726 else
727 printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " 727 printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d "
728 "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d " 728 "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d "
729 "flags %#-4x (", lp->lnum, lp->free, lp->dirty, 729 "flags %#-4x (", lp->lnum, lp->free, lp->dirty,
730 c->leb_size - spc, spc, dark, dead, 730 c->leb_size - spc, spc, dark, dead,
@@ -807,7 +807,7 @@ void dbg_dump_lprops(struct ubifs_info *c)
807 struct ubifs_lprops lp; 807 struct ubifs_lprops lp;
808 struct ubifs_lp_stats lst; 808 struct ubifs_lp_stats lst;
809 809
810 printk(KERN_DEBUG "(pid %d) start dumping LEB properties\n", 810 printk(KERN_ERR "(pid %d) start dumping LEB properties\n",
811 current->pid); 811 current->pid);
812 ubifs_get_lp_stats(c, &lst); 812 ubifs_get_lp_stats(c, &lst);
813 dbg_dump_lstats(&lst); 813 dbg_dump_lstats(&lst);
@@ -819,7 +819,7 @@ void dbg_dump_lprops(struct ubifs_info *c)
819 819
820 dbg_dump_lprop(c, &lp); 820 dbg_dump_lprop(c, &lp);
821 } 821 }
822 printk(KERN_DEBUG "(pid %d) finish dumping LEB properties\n", 822 printk(KERN_ERR "(pid %d) finish dumping LEB properties\n",
823 current->pid); 823 current->pid);
824} 824}
825 825
@@ -828,35 +828,35 @@ void dbg_dump_lpt_info(struct ubifs_info *c)
828 int i; 828 int i;
829 829
830 spin_lock(&dbg_lock); 830 spin_lock(&dbg_lock);
831 printk(KERN_DEBUG "(pid %d) dumping LPT information\n", current->pid); 831 printk(KERN_ERR "(pid %d) dumping LPT information\n", current->pid);
832 printk(KERN_DEBUG "\tlpt_sz: %lld\n", c->lpt_sz); 832 printk(KERN_ERR "\tlpt_sz: %lld\n", c->lpt_sz);
833 printk(KERN_DEBUG "\tpnode_sz: %d\n", c->pnode_sz); 833 printk(KERN_ERR "\tpnode_sz: %d\n", c->pnode_sz);
834 printk(KERN_DEBUG "\tnnode_sz: %d\n", c->nnode_sz); 834 printk(KERN_ERR "\tnnode_sz: %d\n", c->nnode_sz);
835 printk(KERN_DEBUG "\tltab_sz: %d\n", c->ltab_sz); 835 printk(KERN_ERR "\tltab_sz: %d\n", c->ltab_sz);
836 printk(KERN_DEBUG "\tlsave_sz: %d\n", c->lsave_sz); 836 printk(KERN_ERR "\tlsave_sz: %d\n", c->lsave_sz);
837 printk(KERN_DEBUG "\tbig_lpt: %d\n", c->big_lpt); 837 printk(KERN_ERR "\tbig_lpt: %d\n", c->big_lpt);
838 printk(KERN_DEBUG "\tlpt_hght: %d\n", c->lpt_hght); 838 printk(KERN_ERR "\tlpt_hght: %d\n", c->lpt_hght);
839 printk(KERN_DEBUG "\tpnode_cnt: %d\n", c->pnode_cnt); 839 printk(KERN_ERR "\tpnode_cnt: %d\n", c->pnode_cnt);
840 printk(KERN_DEBUG "\tnnode_cnt: %d\n", c->nnode_cnt); 840 printk(KERN_ERR "\tnnode_cnt: %d\n", c->nnode_cnt);
841 printk(KERN_DEBUG "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); 841 printk(KERN_ERR "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt);
842 printk(KERN_DEBUG "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); 842 printk(KERN_ERR "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt);
843 printk(KERN_DEBUG "\tlsave_cnt: %d\n", c->lsave_cnt); 843 printk(KERN_ERR "\tlsave_cnt: %d\n", c->lsave_cnt);
844 printk(KERN_DEBUG "\tspace_bits: %d\n", c->space_bits); 844 printk(KERN_ERR "\tspace_bits: %d\n", c->space_bits);
845 printk(KERN_DEBUG "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); 845 printk(KERN_ERR "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits);
846 printk(KERN_DEBUG "\tlpt_offs_bits: %d\n", c->lpt_offs_bits); 846 printk(KERN_ERR "\tlpt_offs_bits: %d\n", c->lpt_offs_bits);
847 printk(KERN_DEBUG "\tlpt_spc_bits: %d\n", c->lpt_spc_bits); 847 printk(KERN_ERR "\tlpt_spc_bits: %d\n", c->lpt_spc_bits);
848 printk(KERN_DEBUG "\tpcnt_bits: %d\n", c->pcnt_bits); 848 printk(KERN_ERR "\tpcnt_bits: %d\n", c->pcnt_bits);
849 printk(KERN_DEBUG "\tlnum_bits: %d\n", c->lnum_bits); 849 printk(KERN_ERR "\tlnum_bits: %d\n", c->lnum_bits);
850 printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); 850 printk(KERN_ERR "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs);
851 printk(KERN_DEBUG "\tLPT head is at %d:%d\n", 851 printk(KERN_ERR "\tLPT head is at %d:%d\n",
852 c->nhead_lnum, c->nhead_offs); 852 c->nhead_lnum, c->nhead_offs);
853 printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", 853 printk(KERN_ERR "\tLPT ltab is at %d:%d\n",
854 c->ltab_lnum, c->ltab_offs); 854 c->ltab_lnum, c->ltab_offs);
855 if (c->big_lpt) 855 if (c->big_lpt)
856 printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n", 856 printk(KERN_ERR "\tLPT lsave is at %d:%d\n",
857 c->lsave_lnum, c->lsave_offs); 857 c->lsave_lnum, c->lsave_offs);
858 for (i = 0; i < c->lpt_lebs; i++) 858 for (i = 0; i < c->lpt_lebs; i++)
859 printk(KERN_DEBUG "\tLPT LEB %d free %d dirty %d tgc %d " 859 printk(KERN_ERR "\tLPT LEB %d free %d dirty %d tgc %d "
860 "cmt %d\n", i + c->lpt_first, c->ltab[i].free, 860 "cmt %d\n", i + c->lpt_first, c->ltab[i].free,
861 c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt); 861 c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt);
862 spin_unlock(&dbg_lock); 862 spin_unlock(&dbg_lock);
@@ -867,12 +867,12 @@ void dbg_dump_sleb(const struct ubifs_info *c,
867{ 867{
868 struct ubifs_scan_node *snod; 868 struct ubifs_scan_node *snod;
869 869
870 printk(KERN_DEBUG "(pid %d) start dumping scanned data from LEB %d:%d\n", 870 printk(KERN_ERR "(pid %d) start dumping scanned data from LEB %d:%d\n",
871 current->pid, sleb->lnum, offs); 871 current->pid, sleb->lnum, offs);
872 872
873 list_for_each_entry(snod, &sleb->nodes, list) { 873 list_for_each_entry(snod, &sleb->nodes, list) {
874 cond_resched(); 874 cond_resched();
875 printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", sleb->lnum, 875 printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", sleb->lnum,
876 snod->offs, snod->len); 876 snod->offs, snod->len);
877 dbg_dump_node(c, snod->node); 877 dbg_dump_node(c, snod->node);
878 } 878 }
@@ -887,7 +887,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
887 if (dbg_is_tst_rcvry(c)) 887 if (dbg_is_tst_rcvry(c))
888 return; 888 return;
889 889
890 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", 890 printk(KERN_ERR "(pid %d) start dumping LEB %d\n",
891 current->pid, lnum); 891 current->pid, lnum);
892 892
893 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); 893 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
@@ -902,17 +902,17 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
902 goto out; 902 goto out;
903 } 903 }
904 904
905 printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum, 905 printk(KERN_ERR "LEB %d has %d nodes ending at %d\n", lnum,
906 sleb->nodes_cnt, sleb->endpt); 906 sleb->nodes_cnt, sleb->endpt);
907 907
908 list_for_each_entry(snod, &sleb->nodes, list) { 908 list_for_each_entry(snod, &sleb->nodes, list) {
909 cond_resched(); 909 cond_resched();
910 printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", lnum, 910 printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", lnum,
911 snod->offs, snod->len); 911 snod->offs, snod->len);
912 dbg_dump_node(c, snod->node); 912 dbg_dump_node(c, snod->node);
913 } 913 }
914 914
915 printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", 915 printk(KERN_ERR "(pid %d) finish dumping LEB %d\n",
916 current->pid, lnum); 916 current->pid, lnum);
917 ubifs_scan_destroy(sleb); 917 ubifs_scan_destroy(sleb);
918 918
@@ -934,7 +934,7 @@ void dbg_dump_znode(const struct ubifs_info *c,
934 else 934 else
935 zbr = &c->zroot; 935 zbr = &c->zroot;
936 936
937 printk(KERN_DEBUG "znode %p, LEB %d:%d len %d parent %p iip %d level %d" 937 printk(KERN_ERR "znode %p, LEB %d:%d len %d parent %p iip %d level %d"
938 " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs, 938 " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs,
939 zbr->len, znode->parent, znode->iip, znode->level, 939 zbr->len, znode->parent, znode->iip, znode->level,
940 znode->child_cnt, znode->flags); 940 znode->child_cnt, znode->flags);
@@ -944,18 +944,18 @@ void dbg_dump_znode(const struct ubifs_info *c,
944 return; 944 return;
945 } 945 }
946 946
947 printk(KERN_DEBUG "zbranches:\n"); 947 printk(KERN_ERR "zbranches:\n");
948 for (n = 0; n < znode->child_cnt; n++) { 948 for (n = 0; n < znode->child_cnt; n++) {
949 zbr = &znode->zbranch[n]; 949 zbr = &znode->zbranch[n];
950 if (znode->level > 0) 950 if (znode->level > 0)
951 printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key " 951 printk(KERN_ERR "\t%d: znode %p LEB %d:%d len %d key "
952 "%s\n", n, zbr->znode, zbr->lnum, 952 "%s\n", n, zbr->znode, zbr->lnum,
953 zbr->offs, zbr->len, 953 zbr->offs, zbr->len,
954 dbg_snprintf_key(c, &zbr->key, 954 dbg_snprintf_key(c, &zbr->key,
955 key_buf, 955 key_buf,
956 DBG_KEY_BUF_LEN)); 956 DBG_KEY_BUF_LEN));
957 else 957 else
958 printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key " 958 printk(KERN_ERR "\t%d: LNC %p LEB %d:%d len %d key "
959 "%s\n", n, zbr->znode, zbr->lnum, 959 "%s\n", n, zbr->znode, zbr->lnum,
960 zbr->offs, zbr->len, 960 zbr->offs, zbr->len,
961 dbg_snprintf_key(c, &zbr->key, 961 dbg_snprintf_key(c, &zbr->key,
@@ -969,16 +969,16 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat)
969{ 969{
970 int i; 970 int i;
971 971
972 printk(KERN_DEBUG "(pid %d) start dumping heap cat %d (%d elements)\n", 972 printk(KERN_ERR "(pid %d) start dumping heap cat %d (%d elements)\n",
973 current->pid, cat, heap->cnt); 973 current->pid, cat, heap->cnt);
974 for (i = 0; i < heap->cnt; i++) { 974 for (i = 0; i < heap->cnt; i++) {
975 struct ubifs_lprops *lprops = heap->arr[i]; 975 struct ubifs_lprops *lprops = heap->arr[i];
976 976
977 printk(KERN_DEBUG "\t%d. LEB %d hpos %d free %d dirty %d " 977 printk(KERN_ERR "\t%d. LEB %d hpos %d free %d dirty %d "
978 "flags %d\n", i, lprops->lnum, lprops->hpos, 978 "flags %d\n", i, lprops->lnum, lprops->hpos,
979 lprops->free, lprops->dirty, lprops->flags); 979 lprops->free, lprops->dirty, lprops->flags);
980 } 980 }
981 printk(KERN_DEBUG "(pid %d) finish dumping heap\n", current->pid); 981 printk(KERN_ERR "(pid %d) finish dumping heap\n", current->pid);
982} 982}
983 983
984void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, 984void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
@@ -986,15 +986,15 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
986{ 986{
987 int i; 987 int i;
988 988
989 printk(KERN_DEBUG "(pid %d) dumping pnode:\n", current->pid); 989 printk(KERN_ERR "(pid %d) dumping pnode:\n", current->pid);
990 printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", 990 printk(KERN_ERR "\taddress %zx parent %zx cnext %zx\n",
991 (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); 991 (size_t)pnode, (size_t)parent, (size_t)pnode->cnext);
992 printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", 992 printk(KERN_ERR "\tflags %lu iip %d level %d num %d\n",
993 pnode->flags, iip, pnode->level, pnode->num); 993 pnode->flags, iip, pnode->level, pnode->num);
994 for (i = 0; i < UBIFS_LPT_FANOUT; i++) { 994 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
995 struct ubifs_lprops *lp = &pnode->lprops[i]; 995 struct ubifs_lprops *lp = &pnode->lprops[i];
996 996
997 printk(KERN_DEBUG "\t%d: free %d dirty %d flags %d lnum %d\n", 997 printk(KERN_ERR "\t%d: free %d dirty %d flags %d lnum %d\n",
998 i, lp->free, lp->dirty, lp->flags, lp->lnum); 998 i, lp->free, lp->dirty, lp->flags, lp->lnum);
999 } 999 }
1000} 1000}
@@ -1004,20 +1004,20 @@ void dbg_dump_tnc(struct ubifs_info *c)
1004 struct ubifs_znode *znode; 1004 struct ubifs_znode *znode;
1005 int level; 1005 int level;
1006 1006
1007 printk(KERN_DEBUG "\n"); 1007 printk(KERN_ERR "\n");
1008 printk(KERN_DEBUG "(pid %d) start dumping TNC tree\n", current->pid); 1008 printk(KERN_ERR "(pid %d) start dumping TNC tree\n", current->pid);
1009 znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); 1009 znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
1010 level = znode->level; 1010 level = znode->level;
1011 printk(KERN_DEBUG "== Level %d ==\n", level); 1011 printk(KERN_ERR "== Level %d ==\n", level);
1012 while (znode) { 1012 while (znode) {
1013 if (level != znode->level) { 1013 if (level != znode->level) {
1014 level = znode->level; 1014 level = znode->level;
1015 printk(KERN_DEBUG "== Level %d ==\n", level); 1015 printk(KERN_ERR "== Level %d ==\n", level);
1016 } 1016 }
1017 dbg_dump_znode(c, znode); 1017 dbg_dump_znode(c, znode);
1018 znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); 1018 znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
1019 } 1019 }
1020 printk(KERN_DEBUG "(pid %d) finish dumping TNC tree\n", current->pid); 1020 printk(KERN_ERR "(pid %d) finish dumping TNC tree\n", current->pid);
1021} 1021}
1022 1022
1023static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, 1023static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode,
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index ad1a6fee601..9f717655df1 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -164,9 +164,7 @@ struct ubifs_global_debug_info {
164#define dbg_dump_stack() dump_stack() 164#define dbg_dump_stack() dump_stack()
165 165
166#define dbg_err(fmt, ...) do { \ 166#define dbg_err(fmt, ...) do { \
167 spin_lock(&dbg_lock); \
168 ubifs_err(fmt, ##__VA_ARGS__); \ 167 ubifs_err(fmt, ##__VA_ARGS__); \
169 spin_unlock(&dbg_lock); \
170} while (0) 168} while (0)
171 169
172#define ubifs_dbg_msg(type, fmt, ...) \ 170#define ubifs_dbg_msg(type, fmt, ...) \
@@ -217,7 +215,6 @@ struct ubifs_global_debug_info {
217/* Additional recovery messages */ 215/* Additional recovery messages */
218#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) 216#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__)
219 217
220extern spinlock_t dbg_lock;
221extern struct ubifs_global_debug_info ubifs_dbg; 218extern struct ubifs_global_debug_info ubifs_dbg;
222 219
223static inline int dbg_is_chk_gen(const struct ubifs_info *c) 220static inline int dbg_is_chk_gen(const struct ubifs_info *c)
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index d6fe1c79f18..ec9f1870ab7 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -566,6 +566,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
566 int sz_change = CALC_DENT_SIZE(dentry->d_name.len); 566 int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
567 int err, budgeted = 1; 567 int err, budgeted = 1;
568 struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; 568 struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 };
569 unsigned int saved_nlink = inode->i_nlink;
569 570
570 /* 571 /*
571 * Budget request settings: deletion direntry, deletion inode (+1 for 572 * Budget request settings: deletion direntry, deletion inode (+1 for
@@ -613,7 +614,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
613out_cancel: 614out_cancel:
614 dir->i_size += sz_change; 615 dir->i_size += sz_change;
615 dir_ui->ui_size = dir->i_size; 616 dir_ui->ui_size = dir->i_size;
616 inc_nlink(inode); 617 set_nlink(inode, saved_nlink);
617 unlock_2_inodes(dir, inode); 618 unlock_2_inodes(dir, inode);
618 if (budgeted) 619 if (budgeted)
619 ubifs_release_budget(c, &req); 620 ubifs_release_budget(c, &req);
@@ -704,8 +705,7 @@ out_cancel:
704 dir->i_size += sz_change; 705 dir->i_size += sz_change;
705 dir_ui->ui_size = dir->i_size; 706 dir_ui->ui_size = dir->i_size;
706 inc_nlink(dir); 707 inc_nlink(dir);
707 inc_nlink(inode); 708 set_nlink(inode, 2);
708 inc_nlink(inode);
709 unlock_2_inodes(dir, inode); 709 unlock_2_inodes(dir, inode);
710 if (budgeted) 710 if (budgeted)
711 ubifs_release_budget(c, &req); 711 ubifs_release_budget(c, &req);
@@ -977,6 +977,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
977 struct ubifs_budget_req ino_req = { .dirtied_ino = 1, 977 struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
978 .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; 978 .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
979 struct timespec time; 979 struct timespec time;
980 unsigned int saved_nlink;
980 981
981 /* 982 /*
982 * Budget request settings: deletion direntry, new direntry, removing 983 * Budget request settings: deletion direntry, new direntry, removing
@@ -1059,13 +1060,14 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
1059 if (unlink) { 1060 if (unlink) {
1060 /* 1061 /*
1061 * Directories cannot have hard-links, so if this is a 1062 * Directories cannot have hard-links, so if this is a
1062 * directory, decrement its @i_nlink twice because an empty 1063 * directory, just clear @i_nlink.
1063 * directory has @i_nlink 2.
1064 */ 1064 */
1065 saved_nlink = new_inode->i_nlink;
1065 if (is_dir) 1066 if (is_dir)
1067 clear_nlink(new_inode);
1068 else
1066 drop_nlink(new_inode); 1069 drop_nlink(new_inode);
1067 new_inode->i_ctime = time; 1070 new_inode->i_ctime = time;
1068 drop_nlink(new_inode);
1069 } else { 1071 } else {
1070 new_dir->i_size += new_sz; 1072 new_dir->i_size += new_sz;
1071 ubifs_inode(new_dir)->ui_size = new_dir->i_size; 1073 ubifs_inode(new_dir)->ui_size = new_dir->i_size;
@@ -1102,9 +1104,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
1102 1104
1103out_cancel: 1105out_cancel:
1104 if (unlink) { 1106 if (unlink) {
1105 if (is_dir) 1107 set_nlink(new_inode, saved_nlink);
1106 inc_nlink(new_inode);
1107 inc_nlink(new_inode);
1108 } else { 1108 } else {
1109 new_dir->i_size -= new_sz; 1109 new_dir->i_size -= new_sz;
1110 ubifs_inode(new_dir)->ui_size = new_dir->i_size; 1110 ubifs_inode(new_dir)->ui_size = new_dir->i_size;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index f9c234bf33d..5c8f6dc1d28 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1042,10 +1042,10 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
1042 * the page size, the remaining memory is zeroed when mapped, and 1042 * the page size, the remaining memory is zeroed when mapped, and
1043 * writes to that region are not written out to the file." 1043 * writes to that region are not written out to the file."
1044 */ 1044 */
1045 kaddr = kmap_atomic(page, KM_USER0); 1045 kaddr = kmap_atomic(page);
1046 memset(kaddr + len, 0, PAGE_CACHE_SIZE - len); 1046 memset(kaddr + len, 0, PAGE_CACHE_SIZE - len);
1047 flush_dcache_page(page); 1047 flush_dcache_page(page);
1048 kunmap_atomic(kaddr, KM_USER0); 1048 kunmap_atomic(kaddr);
1049 1049
1050 if (i_size > synced_i_size) { 1050 if (i_size > synced_i_size) {
1051 err = inode->i_sb->s_op->write_inode(inode, NULL); 1051 err = inode->i_sb->s_op->write_inode(inode, NULL);
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index ee4f43f4bb9..2a935b31723 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -679,7 +679,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
679 ret == SCANNED_GARBAGE || 679 ret == SCANNED_GARBAGE ||
680 ret == SCANNED_A_BAD_PAD_NODE || 680 ret == SCANNED_A_BAD_PAD_NODE ||
681 ret == SCANNED_A_CORRUPT_NODE) { 681 ret == SCANNED_A_CORRUPT_NODE) {
682 dbg_rcvry("found corruption - %d", ret); 682 dbg_rcvry("found corruption (%d) at %d:%d",
683 ret, lnum, offs);
683 break; 684 break;
684 } else { 685 } else {
685 dbg_err("unexpected return value %d", ret); 686 dbg_err("unexpected return value %d", ret);
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 6094c5a5d7a..771f7fb6ce9 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -410,13 +410,23 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
410 } 410 }
411 411
412 if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) { 412 if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) {
413 err = 7; 413 ubifs_err("too few main LEBs count %d, must be at least %d",
414 c->main_lebs, UBIFS_MIN_MAIN_LEBS);
414 goto failed; 415 goto failed;
415 } 416 }
416 417
417 if (c->max_bud_bytes < (long long)c->leb_size * UBIFS_MIN_BUD_LEBS || 418 max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS;
418 c->max_bud_bytes > (long long)c->leb_size * c->main_lebs) { 419 if (c->max_bud_bytes < max_bytes) {
419 err = 8; 420 ubifs_err("too small journal (%lld bytes), must be at least "
421 "%lld bytes", c->max_bud_bytes, max_bytes);
422 goto failed;
423 }
424
425 max_bytes = (long long)c->leb_size * c->main_lebs;
426 if (c->max_bud_bytes > max_bytes) {
427 ubifs_err("too large journal size (%lld bytes), only %lld bytes"
428 "available in the main area",
429 c->max_bud_bytes, max_bytes);
420 goto failed; 430 goto failed;
421 } 431 }
422 432
@@ -450,7 +460,6 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
450 goto failed; 460 goto failed;
451 } 461 }
452 462
453 max_bytes = c->main_lebs * (long long)c->leb_size;
454 if (c->rp_size < 0 || max_bytes < c->rp_size) { 463 if (c->rp_size < 0 || max_bytes < c->rp_size) {
455 err = 14; 464 err = 14;
456 goto failed; 465 goto failed;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 63765d58445..76e4e0566ad 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2076,15 +2076,13 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
2076 goto out_umount; 2076 goto out_umount;
2077 } 2077 }
2078 2078
2079 sb->s_root = d_alloc_root(root); 2079 sb->s_root = d_make_root(root);
2080 if (!sb->s_root) 2080 if (!sb->s_root)
2081 goto out_iput; 2081 goto out_umount;
2082 2082
2083 mutex_unlock(&c->umount_mutex); 2083 mutex_unlock(&c->umount_mutex);
2084 return 0; 2084 return 0;
2085 2085
2086out_iput:
2087 iput(root);
2088out_umount: 2086out_umount:
2089 ubifs_umount(c); 2087 ubifs_umount(c);
2090out_unlock: 2088out_unlock:
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 12e94774aa8..93d59aceaae 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -84,9 +84,6 @@
84#define INUM_WARN_WATERMARK 0xFFF00000 84#define INUM_WARN_WATERMARK 0xFFF00000
85#define INUM_WATERMARK 0xFFFFFF00 85#define INUM_WATERMARK 0xFFFFFF00
86 86
87/* Largest key size supported in this implementation */
88#define CUR_MAX_KEY_LEN UBIFS_SK_LEN
89
90/* Maximum number of entries in each LPT (LEB category) heap */ 87/* Maximum number of entries in each LPT (LEB category) heap */
91#define LPT_HEAP_SZ 256 88#define LPT_HEAP_SZ 256
92 89
@@ -277,10 +274,10 @@ struct ubifs_old_idx {
277 274
278/* The below union makes it easier to deal with keys */ 275/* The below union makes it easier to deal with keys */
279union ubifs_key { 276union ubifs_key {
280 uint8_t u8[CUR_MAX_KEY_LEN]; 277 uint8_t u8[UBIFS_SK_LEN];
281 uint32_t u32[CUR_MAX_KEY_LEN/4]; 278 uint32_t u32[UBIFS_SK_LEN/4];
282 uint64_t u64[CUR_MAX_KEY_LEN/8]; 279 uint64_t u64[UBIFS_SK_LEN/8];
283 __le32 j32[CUR_MAX_KEY_LEN/4]; 280 __le32 j32[UBIFS_SK_LEN/4];
284}; 281};
285 282
286/** 283/**
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 987585bb0a1..1ba2baaf436 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -105,7 +105,6 @@ static void udf_add_free_space(struct super_block *sb, u16 partition, u32 cnt)
105} 105}
106 106
107static void udf_bitmap_free_blocks(struct super_block *sb, 107static void udf_bitmap_free_blocks(struct super_block *sb,
108 struct inode *inode,
109 struct udf_bitmap *bitmap, 108 struct udf_bitmap *bitmap,
110 struct kernel_lb_addr *bloc, 109 struct kernel_lb_addr *bloc,
111 uint32_t offset, 110 uint32_t offset,
@@ -172,7 +171,6 @@ error_return:
172} 171}
173 172
174static int udf_bitmap_prealloc_blocks(struct super_block *sb, 173static int udf_bitmap_prealloc_blocks(struct super_block *sb,
175 struct inode *inode,
176 struct udf_bitmap *bitmap, 174 struct udf_bitmap *bitmap,
177 uint16_t partition, uint32_t first_block, 175 uint16_t partition, uint32_t first_block,
178 uint32_t block_count) 176 uint32_t block_count)
@@ -223,7 +221,6 @@ out:
223} 221}
224 222
225static int udf_bitmap_new_block(struct super_block *sb, 223static int udf_bitmap_new_block(struct super_block *sb,
226 struct inode *inode,
227 struct udf_bitmap *bitmap, uint16_t partition, 224 struct udf_bitmap *bitmap, uint16_t partition,
228 uint32_t goal, int *err) 225 uint32_t goal, int *err)
229{ 226{
@@ -349,7 +346,6 @@ error_return:
349} 346}
350 347
351static void udf_table_free_blocks(struct super_block *sb, 348static void udf_table_free_blocks(struct super_block *sb,
352 struct inode *inode,
353 struct inode *table, 349 struct inode *table,
354 struct kernel_lb_addr *bloc, 350 struct kernel_lb_addr *bloc,
355 uint32_t offset, 351 uint32_t offset,
@@ -581,7 +577,6 @@ error_return:
581} 577}
582 578
583static int udf_table_prealloc_blocks(struct super_block *sb, 579static int udf_table_prealloc_blocks(struct super_block *sb,
584 struct inode *inode,
585 struct inode *table, uint16_t partition, 580 struct inode *table, uint16_t partition,
586 uint32_t first_block, uint32_t block_count) 581 uint32_t first_block, uint32_t block_count)
587{ 582{
@@ -643,7 +638,6 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
643} 638}
644 639
645static int udf_table_new_block(struct super_block *sb, 640static int udf_table_new_block(struct super_block *sb,
646 struct inode *inode,
647 struct inode *table, uint16_t partition, 641 struct inode *table, uint16_t partition,
648 uint32_t goal, int *err) 642 uint32_t goal, int *err)
649{ 643{
@@ -743,18 +737,23 @@ void udf_free_blocks(struct super_block *sb, struct inode *inode,
743 struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; 737 struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition];
744 738
745 if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) { 739 if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) {
746 udf_bitmap_free_blocks(sb, inode, map->s_uspace.s_bitmap, 740 udf_bitmap_free_blocks(sb, map->s_uspace.s_bitmap,
747 bloc, offset, count); 741 bloc, offset, count);
748 } else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) { 742 } else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) {
749 udf_table_free_blocks(sb, inode, map->s_uspace.s_table, 743 udf_table_free_blocks(sb, map->s_uspace.s_table,
750 bloc, offset, count); 744 bloc, offset, count);
751 } else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) { 745 } else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) {
752 udf_bitmap_free_blocks(sb, inode, map->s_fspace.s_bitmap, 746 udf_bitmap_free_blocks(sb, map->s_fspace.s_bitmap,
753 bloc, offset, count); 747 bloc, offset, count);
754 } else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) { 748 } else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) {
755 udf_table_free_blocks(sb, inode, map->s_fspace.s_table, 749 udf_table_free_blocks(sb, map->s_fspace.s_table,
756 bloc, offset, count); 750 bloc, offset, count);
757 } 751 }
752
753 if (inode) {
754 inode_sub_bytes(inode,
755 ((sector_t)count) << sb->s_blocksize_bits);
756 }
758} 757}
759 758
760inline int udf_prealloc_blocks(struct super_block *sb, 759inline int udf_prealloc_blocks(struct super_block *sb,
@@ -763,29 +762,34 @@ inline int udf_prealloc_blocks(struct super_block *sb,
763 uint32_t block_count) 762 uint32_t block_count)
764{ 763{
765 struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; 764 struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition];
765 sector_t allocated;
766 766
767 if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) 767 if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP)
768 return udf_bitmap_prealloc_blocks(sb, inode, 768 allocated = udf_bitmap_prealloc_blocks(sb,
769 map->s_uspace.s_bitmap, 769 map->s_uspace.s_bitmap,
770 partition, first_block, 770 partition, first_block,
771 block_count); 771 block_count);
772 else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) 772 else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE)
773 return udf_table_prealloc_blocks(sb, inode, 773 allocated = udf_table_prealloc_blocks(sb,
774 map->s_uspace.s_table, 774 map->s_uspace.s_table,
775 partition, first_block, 775 partition, first_block,
776 block_count); 776 block_count);
777 else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) 777 else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP)
778 return udf_bitmap_prealloc_blocks(sb, inode, 778 allocated = udf_bitmap_prealloc_blocks(sb,
779 map->s_fspace.s_bitmap, 779 map->s_fspace.s_bitmap,
780 partition, first_block, 780 partition, first_block,
781 block_count); 781 block_count);
782 else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) 782 else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE)
783 return udf_table_prealloc_blocks(sb, inode, 783 allocated = udf_table_prealloc_blocks(sb,
784 map->s_fspace.s_table, 784 map->s_fspace.s_table,
785 partition, first_block, 785 partition, first_block,
786 block_count); 786 block_count);
787 else 787 else
788 return 0; 788 return 0;
789
790 if (inode && allocated > 0)
791 inode_add_bytes(inode, allocated << sb->s_blocksize_bits);
792 return allocated;
789} 793}
790 794
791inline int udf_new_block(struct super_block *sb, 795inline int udf_new_block(struct super_block *sb,
@@ -793,25 +797,29 @@ inline int udf_new_block(struct super_block *sb,
793 uint16_t partition, uint32_t goal, int *err) 797 uint16_t partition, uint32_t goal, int *err)
794{ 798{
795 struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; 799 struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition];
800 int block;
796 801
797 if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) 802 if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP)
798 return udf_bitmap_new_block(sb, inode, 803 block = udf_bitmap_new_block(sb,
799 map->s_uspace.s_bitmap, 804 map->s_uspace.s_bitmap,
800 partition, goal, err); 805 partition, goal, err);
801 else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) 806 else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE)
802 return udf_table_new_block(sb, inode, 807 block = udf_table_new_block(sb,
803 map->s_uspace.s_table, 808 map->s_uspace.s_table,
804 partition, goal, err);
805 else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP)
806 return udf_bitmap_new_block(sb, inode,
807 map->s_fspace.s_bitmap,
808 partition, goal, err); 809 partition, goal, err);
810 else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP)
811 block = udf_bitmap_new_block(sb,
812 map->s_fspace.s_bitmap,
813 partition, goal, err);
809 else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) 814 else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE)
810 return udf_table_new_block(sb, inode, 815 block = udf_table_new_block(sb,
811 map->s_fspace.s_table, 816 map->s_fspace.s_table,
812 partition, goal, err); 817 partition, goal, err);
813 else { 818 else {
814 *err = -EIO; 819 *err = -EIO;
815 return 0; 820 return 0;
816 } 821 }
822 if (inode && block)
823 inode_add_bytes(inode, sb->s_blocksize);
824 return block;
817} 825}
diff --git a/fs/udf/file.c b/fs/udf/file.c
index d567b8448df..7f3f7ba3df6 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -87,10 +87,10 @@ static int udf_adinicb_write_end(struct file *file,
87 char *kaddr; 87 char *kaddr;
88 struct udf_inode_info *iinfo = UDF_I(inode); 88 struct udf_inode_info *iinfo = UDF_I(inode);
89 89
90 kaddr = kmap_atomic(page, KM_USER0); 90 kaddr = kmap_atomic(page);
91 memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr + offset, 91 memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr + offset,
92 kaddr + offset, copied); 92 kaddr + offset, copied);
93 kunmap_atomic(kaddr, KM_USER0); 93 kunmap_atomic(kaddr);
94 94
95 return simple_write_end(file, mapping, pos, len, copied, page, fsdata); 95 return simple_write_end(file, mapping, pos, len, copied, page, fsdata);
96} 96}
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 05ab48195be..7e5aae4bf46 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -116,6 +116,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err)
116 iinfo->i_lenEAttr = 0; 116 iinfo->i_lenEAttr = 0;
117 iinfo->i_lenAlloc = 0; 117 iinfo->i_lenAlloc = 0;
118 iinfo->i_use = 0; 118 iinfo->i_use = 0;
119 iinfo->i_checkpoint = 1;
119 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) 120 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB))
120 iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; 121 iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
121 else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) 122 else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 7699df7b319..7d752800835 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1358,6 +1358,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1358 iinfo->i_unique = le64_to_cpu(fe->uniqueID); 1358 iinfo->i_unique = le64_to_cpu(fe->uniqueID);
1359 iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr); 1359 iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr);
1360 iinfo->i_lenAlloc = le32_to_cpu(fe->lengthAllocDescs); 1360 iinfo->i_lenAlloc = le32_to_cpu(fe->lengthAllocDescs);
1361 iinfo->i_checkpoint = le32_to_cpu(fe->checkpoint);
1361 offset = sizeof(struct fileEntry) + iinfo->i_lenEAttr; 1362 offset = sizeof(struct fileEntry) + iinfo->i_lenEAttr;
1362 } else { 1363 } else {
1363 inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) << 1364 inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) <<
@@ -1379,6 +1380,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1379 iinfo->i_unique = le64_to_cpu(efe->uniqueID); 1380 iinfo->i_unique = le64_to_cpu(efe->uniqueID);
1380 iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr); 1381 iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr);
1381 iinfo->i_lenAlloc = le32_to_cpu(efe->lengthAllocDescs); 1382 iinfo->i_lenAlloc = le32_to_cpu(efe->lengthAllocDescs);
1383 iinfo->i_checkpoint = le32_to_cpu(efe->checkpoint);
1382 offset = sizeof(struct extendedFileEntry) + 1384 offset = sizeof(struct extendedFileEntry) +
1383 iinfo->i_lenEAttr; 1385 iinfo->i_lenEAttr;
1384 } 1386 }
@@ -1495,6 +1497,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
1495 struct buffer_head *bh = NULL; 1497 struct buffer_head *bh = NULL;
1496 struct fileEntry *fe; 1498 struct fileEntry *fe;
1497 struct extendedFileEntry *efe; 1499 struct extendedFileEntry *efe;
1500 uint64_t lb_recorded;
1498 uint32_t udfperms; 1501 uint32_t udfperms;
1499 uint16_t icbflags; 1502 uint16_t icbflags;
1500 uint16_t crclen; 1503 uint16_t crclen;
@@ -1589,13 +1592,18 @@ static int udf_update_inode(struct inode *inode, int do_sync)
1589 dsea->minorDeviceIdent = cpu_to_le32(iminor(inode)); 1592 dsea->minorDeviceIdent = cpu_to_le32(iminor(inode));
1590 } 1593 }
1591 1594
1595 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
1596 lb_recorded = 0; /* No extents => no blocks! */
1597 else
1598 lb_recorded =
1599 (inode->i_blocks + (1 << (blocksize_bits - 9)) - 1) >>
1600 (blocksize_bits - 9);
1601
1592 if (iinfo->i_efe == 0) { 1602 if (iinfo->i_efe == 0) {
1593 memcpy(bh->b_data + sizeof(struct fileEntry), 1603 memcpy(bh->b_data + sizeof(struct fileEntry),
1594 iinfo->i_ext.i_data, 1604 iinfo->i_ext.i_data,
1595 inode->i_sb->s_blocksize - sizeof(struct fileEntry)); 1605 inode->i_sb->s_blocksize - sizeof(struct fileEntry));
1596 fe->logicalBlocksRecorded = cpu_to_le64( 1606 fe->logicalBlocksRecorded = cpu_to_le64(lb_recorded);
1597 (inode->i_blocks + (1 << (blocksize_bits - 9)) - 1) >>
1598 (blocksize_bits - 9));
1599 1607
1600 udf_time_to_disk_stamp(&fe->accessTime, inode->i_atime); 1608 udf_time_to_disk_stamp(&fe->accessTime, inode->i_atime);
1601 udf_time_to_disk_stamp(&fe->modificationTime, inode->i_mtime); 1609 udf_time_to_disk_stamp(&fe->modificationTime, inode->i_mtime);
@@ -1607,6 +1615,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
1607 fe->uniqueID = cpu_to_le64(iinfo->i_unique); 1615 fe->uniqueID = cpu_to_le64(iinfo->i_unique);
1608 fe->lengthExtendedAttr = cpu_to_le32(iinfo->i_lenEAttr); 1616 fe->lengthExtendedAttr = cpu_to_le32(iinfo->i_lenEAttr);
1609 fe->lengthAllocDescs = cpu_to_le32(iinfo->i_lenAlloc); 1617 fe->lengthAllocDescs = cpu_to_le32(iinfo->i_lenAlloc);
1618 fe->checkpoint = cpu_to_le32(iinfo->i_checkpoint);
1610 fe->descTag.tagIdent = cpu_to_le16(TAG_IDENT_FE); 1619 fe->descTag.tagIdent = cpu_to_le16(TAG_IDENT_FE);
1611 crclen = sizeof(struct fileEntry); 1620 crclen = sizeof(struct fileEntry);
1612 } else { 1621 } else {
@@ -1615,9 +1624,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
1615 inode->i_sb->s_blocksize - 1624 inode->i_sb->s_blocksize -
1616 sizeof(struct extendedFileEntry)); 1625 sizeof(struct extendedFileEntry));
1617 efe->objectSize = cpu_to_le64(inode->i_size); 1626 efe->objectSize = cpu_to_le64(inode->i_size);
1618 efe->logicalBlocksRecorded = cpu_to_le64( 1627 efe->logicalBlocksRecorded = cpu_to_le64(lb_recorded);
1619 (inode->i_blocks + (1 << (blocksize_bits - 9)) - 1) >>
1620 (blocksize_bits - 9));
1621 1628
1622 if (iinfo->i_crtime.tv_sec > inode->i_atime.tv_sec || 1629 if (iinfo->i_crtime.tv_sec > inode->i_atime.tv_sec ||
1623 (iinfo->i_crtime.tv_sec == inode->i_atime.tv_sec && 1630 (iinfo->i_crtime.tv_sec == inode->i_atime.tv_sec &&
@@ -1646,6 +1653,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
1646 efe->uniqueID = cpu_to_le64(iinfo->i_unique); 1653 efe->uniqueID = cpu_to_le64(iinfo->i_unique);
1647 efe->lengthExtendedAttr = cpu_to_le32(iinfo->i_lenEAttr); 1654 efe->lengthExtendedAttr = cpu_to_le32(iinfo->i_lenEAttr);
1648 efe->lengthAllocDescs = cpu_to_le32(iinfo->i_lenAlloc); 1655 efe->lengthAllocDescs = cpu_to_le32(iinfo->i_lenAlloc);
1656 efe->checkpoint = cpu_to_le32(iinfo->i_checkpoint);
1649 efe->descTag.tagIdent = cpu_to_le16(TAG_IDENT_EFE); 1657 efe->descTag.tagIdent = cpu_to_le16(TAG_IDENT_EFE);
1650 crclen = sizeof(struct extendedFileEntry); 1658 crclen = sizeof(struct extendedFileEntry);
1651 } 1659 }
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 08bf46edf9c..38de8f234b9 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -32,8 +32,6 @@
32#include <linux/crc-itu-t.h> 32#include <linux/crc-itu-t.h>
33#include <linux/exportfs.h> 33#include <linux/exportfs.h>
34 34
35enum { UDF_MAX_LINKS = 0xffff };
36
37static inline int udf_match(int len1, const unsigned char *name1, int len2, 35static inline int udf_match(int len1, const unsigned char *name1, int len2,
38 const unsigned char *name2) 36 const unsigned char *name2)
39{ 37{
@@ -649,10 +647,6 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
649 struct udf_inode_info *dinfo = UDF_I(dir); 647 struct udf_inode_info *dinfo = UDF_I(dir);
650 struct udf_inode_info *iinfo; 648 struct udf_inode_info *iinfo;
651 649
652 err = -EMLINK;
653 if (dir->i_nlink >= UDF_MAX_LINKS)
654 goto out;
655
656 err = -EIO; 650 err = -EIO;
657 inode = udf_new_inode(dir, S_IFDIR | mode, &err); 651 inode = udf_new_inode(dir, S_IFDIR | mode, &err);
658 if (!inode) 652 if (!inode)
@@ -1032,9 +1026,6 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
1032 struct fileIdentDesc cfi, *fi; 1026 struct fileIdentDesc cfi, *fi;
1033 int err; 1027 int err;
1034 1028
1035 if (inode->i_nlink >= UDF_MAX_LINKS)
1036 return -EMLINK;
1037
1038 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 1029 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
1039 if (!fi) { 1030 if (!fi) {
1040 return err; 1031 return err;
@@ -1126,10 +1117,6 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1126 if (udf_get_lb_pblock(old_inode->i_sb, &tloc, 0) != 1117 if (udf_get_lb_pblock(old_inode->i_sb, &tloc, 0) !=
1127 old_dir->i_ino) 1118 old_dir->i_ino)
1128 goto end_rename; 1119 goto end_rename;
1129
1130 retval = -EMLINK;
1131 if (!new_inode && new_dir->i_nlink >= UDF_MAX_LINKS)
1132 goto end_rename;
1133 } 1120 }
1134 if (!nfi) { 1121 if (!nfi) {
1135 nfi = udf_add_entry(new_dir, new_dentry, &nfibh, &ncfi, 1122 nfi = udf_add_entry(new_dir, new_dentry, &nfibh, &ncfi,
diff --git a/fs/udf/super.c b/fs/udf/super.c
index c09a84daaf5..ac8a348dcb6 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -75,6 +75,8 @@
75 75
76#define UDF_DEFAULT_BLOCKSIZE 2048 76#define UDF_DEFAULT_BLOCKSIZE 2048
77 77
78enum { UDF_MAX_LINKS = 0xffff };
79
78/* These are the "meat" - everything else is stuffing */ 80/* These are the "meat" - everything else is stuffing */
79static int udf_fill_super(struct super_block *, void *, int); 81static int udf_fill_super(struct super_block *, void *, int);
80static void udf_put_super(struct super_block *); 82static void udf_put_super(struct super_block *);
@@ -948,11 +950,8 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
948 else 950 else
949 bitmap = vzalloc(size); /* TODO: get rid of vzalloc */ 951 bitmap = vzalloc(size); /* TODO: get rid of vzalloc */
950 952
951 if (bitmap == NULL) { 953 if (bitmap == NULL)
952 udf_err(sb, "Unable to allocate space for bitmap and %d buffer_head pointers\n",
953 nr_groups);
954 return NULL; 954 return NULL;
955 }
956 955
957 bitmap->s_block_bitmap = (struct buffer_head **)(bitmap + 1); 956 bitmap->s_block_bitmap = (struct buffer_head **)(bitmap + 1);
958 bitmap->s_nr_groups = nr_groups; 957 bitmap->s_nr_groups = nr_groups;
@@ -2035,13 +2034,13 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2035 } 2034 }
2036 2035
2037 /* Allocate a dentry for the root inode */ 2036 /* Allocate a dentry for the root inode */
2038 sb->s_root = d_alloc_root(inode); 2037 sb->s_root = d_make_root(inode);
2039 if (!sb->s_root) { 2038 if (!sb->s_root) {
2040 udf_err(sb, "Couldn't allocate root dentry\n"); 2039 udf_err(sb, "Couldn't allocate root dentry\n");
2041 iput(inode);
2042 goto error_out; 2040 goto error_out;
2043 } 2041 }
2044 sb->s_maxbytes = MAX_LFS_FILESIZE; 2042 sb->s_maxbytes = MAX_LFS_FILESIZE;
2043 sb->s_max_links = UDF_MAX_LINKS;
2045 return 0; 2044 return 0;
2046 2045
2047error_out: 2046error_out:
diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h
index d1bd31ea724..bb8309dcd5c 100644
--- a/fs/udf/udf_i.h
+++ b/fs/udf/udf_i.h
@@ -23,6 +23,7 @@ struct udf_inode_info {
23 __u64 i_lenExtents; 23 __u64 i_lenExtents;
24 __u32 i_next_alloc_block; 24 __u32 i_next_alloc_block;
25 __u32 i_next_alloc_goal; 25 __u32 i_next_alloc_goal;
26 __u32 i_checkpoint;
26 unsigned i_alloc_type : 3; 27 unsigned i_alloc_type : 3;
27 unsigned i_efe : 1; /* extendedFileEntry */ 28 unsigned i_efe : 1; /* extendedFileEntry */
28 unsigned i_use : 1; /* unallocSpaceEntry */ 29 unsigned i_use : 1; /* unallocSpaceEntry */
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 9094e1d917b..7cdd3953d67 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -26,7 +26,6 @@
26 */ 26 */
27 27
28#include <asm/uaccess.h> 28#include <asm/uaccess.h>
29#include <asm/system.h>
30 29
31#include <linux/errno.h> 30#include <linux/errno.h>
32#include <linux/fs.h> 31#include <linux/fs.h>
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 38cac199edf..a2281cadefa 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -166,10 +166,6 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
166 int error; 166 int error;
167 167
168 lock_ufs(dir->i_sb); 168 lock_ufs(dir->i_sb);
169 if (inode->i_nlink >= UFS_LINK_MAX) {
170 unlock_ufs(dir->i_sb);
171 return -EMLINK;
172 }
173 169
174 inode->i_ctime = CURRENT_TIME_SEC; 170 inode->i_ctime = CURRENT_TIME_SEC;
175 inode_inc_link_count(inode); 171 inode_inc_link_count(inode);
@@ -183,10 +179,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
183static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) 179static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
184{ 180{
185 struct inode * inode; 181 struct inode * inode;
186 int err = -EMLINK; 182 int err;
187
188 if (dir->i_nlink >= UFS_LINK_MAX)
189 goto out;
190 183
191 lock_ufs(dir->i_sb); 184 lock_ufs(dir->i_sb);
192 inode_inc_link_count(dir); 185 inode_inc_link_count(dir);
@@ -305,11 +298,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
305 drop_nlink(new_inode); 298 drop_nlink(new_inode);
306 inode_dec_link_count(new_inode); 299 inode_dec_link_count(new_inode);
307 } else { 300 } else {
308 if (dir_de) {
309 err = -EMLINK;
310 if (new_dir->i_nlink >= UFS_LINK_MAX)
311 goto out_dir;
312 }
313 err = ufs_add_link(new_dentry, old_inode); 301 err = ufs_add_link(new_dentry, old_inode);
314 if (err) 302 if (err)
315 goto out_dir; 303 goto out_dir;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 5246ee3e560..ac8e279eccc 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -73,7 +73,6 @@
73#include <stdarg.h> 73#include <stdarg.h>
74 74
75#include <asm/uaccess.h> 75#include <asm/uaccess.h>
76#include <asm/system.h>
77 76
78#include <linux/errno.h> 77#include <linux/errno.h>
79#include <linux/fs.h> 78#include <linux/fs.h>
@@ -1157,16 +1156,17 @@ magic_found:
1157 "fast symlink size (%u)\n", uspi->s_maxsymlinklen); 1156 "fast symlink size (%u)\n", uspi->s_maxsymlinklen);
1158 uspi->s_maxsymlinklen = maxsymlen; 1157 uspi->s_maxsymlinklen = maxsymlen;
1159 } 1158 }
1159 sb->s_max_links = UFS_LINK_MAX;
1160 1160
1161 inode = ufs_iget(sb, UFS_ROOTINO); 1161 inode = ufs_iget(sb, UFS_ROOTINO);
1162 if (IS_ERR(inode)) { 1162 if (IS_ERR(inode)) {
1163 ret = PTR_ERR(inode); 1163 ret = PTR_ERR(inode);
1164 goto failed; 1164 goto failed;
1165 } 1165 }
1166 sb->s_root = d_alloc_root(inode); 1166 sb->s_root = d_make_root(inode);
1167 if (!sb->s_root) { 1167 if (!sb->s_root) {
1168 ret = -ENOMEM; 1168 ret = -ENOMEM;
1169 goto dalloc_failed; 1169 goto failed;
1170 } 1170 }
1171 1171
1172 ufs_setup_cstotal(sb); 1172 ufs_setup_cstotal(sb);
@@ -1180,8 +1180,6 @@ magic_found:
1180 UFSD("EXIT\n"); 1180 UFSD("EXIT\n");
1181 return 0; 1181 return 0;
1182 1182
1183dalloc_failed:
1184 iput(inode);
1185failed: 1183failed:
1186 if (ubh) 1184 if (ubh)
1187 ubh_brelse_uspi (uspi); 1185 ubh_brelse_uspi (uspi);
diff --git a/fs/xattr.c b/fs/xattr.c
index 82f43376c7c..3c8c1cc333c 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -16,11 +16,12 @@
16#include <linux/security.h> 16#include <linux/security.h>
17#include <linux/evm.h> 17#include <linux/evm.h>
18#include <linux/syscalls.h> 18#include <linux/syscalls.h>
19#include <linux/module.h> 19#include <linux/export.h>
20#include <linux/fsnotify.h> 20#include <linux/fsnotify.h>
21#include <linux/audit.h> 21#include <linux/audit.h>
22#include <asm/uaccess.h> 22#include <linux/vmalloc.h>
23 23
24#include <asm/uaccess.h>
24 25
25/* 26/*
26 * Check permissions for extended attribute access. This is a bit complicated 27 * Check permissions for extended attribute access. This is a bit complicated
@@ -320,6 +321,7 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
320{ 321{
321 int error; 322 int error;
322 void *kvalue = NULL; 323 void *kvalue = NULL;
324 void *vvalue = NULL; /* If non-NULL, we used vmalloc() */
323 char kname[XATTR_NAME_MAX + 1]; 325 char kname[XATTR_NAME_MAX + 1];
324 326
325 if (flags & ~(XATTR_CREATE|XATTR_REPLACE)) 327 if (flags & ~(XATTR_CREATE|XATTR_REPLACE))
@@ -334,13 +336,25 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
334 if (size) { 336 if (size) {
335 if (size > XATTR_SIZE_MAX) 337 if (size > XATTR_SIZE_MAX)
336 return -E2BIG; 338 return -E2BIG;
337 kvalue = memdup_user(value, size); 339 kvalue = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
338 if (IS_ERR(kvalue)) 340 if (!kvalue) {
339 return PTR_ERR(kvalue); 341 vvalue = vmalloc(size);
342 if (!vvalue)
343 return -ENOMEM;
344 kvalue = vvalue;
345 }
346 if (copy_from_user(kvalue, value, size)) {
347 error = -EFAULT;
348 goto out;
349 }
340 } 350 }
341 351
342 error = vfs_setxattr(d, kname, kvalue, size, flags); 352 error = vfs_setxattr(d, kname, kvalue, size, flags);
343 kfree(kvalue); 353out:
354 if (vvalue)
355 vfree(vvalue);
356 else
357 kfree(kvalue);
344 return error; 358 return error;
345} 359}
346 360
@@ -492,13 +506,18 @@ listxattr(struct dentry *d, char __user *list, size_t size)
492{ 506{
493 ssize_t error; 507 ssize_t error;
494 char *klist = NULL; 508 char *klist = NULL;
509 char *vlist = NULL; /* If non-NULL, we used vmalloc() */
495 510
496 if (size) { 511 if (size) {
497 if (size > XATTR_LIST_MAX) 512 if (size > XATTR_LIST_MAX)
498 size = XATTR_LIST_MAX; 513 size = XATTR_LIST_MAX;
499 klist = kmalloc(size, GFP_KERNEL); 514 klist = kmalloc(size, __GFP_NOWARN | GFP_KERNEL);
500 if (!klist) 515 if (!klist) {
501 return -ENOMEM; 516 vlist = vmalloc(size);
517 if (!vlist)
518 return -ENOMEM;
519 klist = vlist;
520 }
502 } 521 }
503 522
504 error = vfs_listxattr(d, klist, size); 523 error = vfs_listxattr(d, klist, size);
@@ -510,7 +529,10 @@ listxattr(struct dentry *d, char __user *list, size_t size)
510 than XATTR_LIST_MAX bytes. Not possible. */ 529 than XATTR_LIST_MAX bytes. Not possible. */
511 error = -E2BIG; 530 error = -E2BIG;
512 } 531 }
513 kfree(klist); 532 if (vlist)
533 vfree(vlist);
534 else
535 kfree(klist);
514 return error; 536 return error;
515} 537}
516 538
diff --git a/fs/xattr_acl.c b/fs/xattr_acl.c
index 8d5a506c82e..69d06b07b16 100644
--- a/fs/xattr_acl.c
+++ b/fs/xattr_acl.c
@@ -5,7 +5,7 @@
5 * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> 5 * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
6 */ 6 */
7 7
8#include <linux/module.h> 8#include <linux/export.h>
9#include <linux/fs.h> 9#include <linux/fs.h>
10#include <linux/posix_acl_xattr.h> 10#include <linux/posix_acl_xattr.h>
11#include <linux/gfp.h> 11#include <linux/gfp.h>
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 427a4e82a58..0a9977983f9 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -96,9 +96,6 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
96 xfs_qm_bhv.o \ 96 xfs_qm_bhv.o \
97 xfs_qm.o \ 97 xfs_qm.o \
98 xfs_quotaops.o 98 xfs_quotaops.o
99ifeq ($(CONFIG_XFS_QUOTA),y)
100xfs-$(CONFIG_PROC_FS) += xfs_qm_stats.o
101endif
102xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o 99xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
103xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o 100xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
104xfs-$(CONFIG_PROC_FS) += xfs_stats.o 101xfs-$(CONFIG_PROC_FS) += xfs_stats.o
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index ce84ffd0264..0f0df2759b0 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -35,6 +35,7 @@
35#include "xfs_error.h" 35#include "xfs_error.h"
36#include "xfs_trace.h" 36#include "xfs_trace.h"
37 37
38struct workqueue_struct *xfs_alloc_wq;
38 39
39#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) 40#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
40 41
@@ -68,7 +69,7 @@ xfs_alloc_lookup_eq(
68 * Lookup the first record greater than or equal to [bno, len] 69 * Lookup the first record greater than or equal to [bno, len]
69 * in the btree given by cur. 70 * in the btree given by cur.
70 */ 71 */
71STATIC int /* error */ 72int /* error */
72xfs_alloc_lookup_ge( 73xfs_alloc_lookup_ge(
73 struct xfs_btree_cur *cur, /* btree cursor */ 74 struct xfs_btree_cur *cur, /* btree cursor */
74 xfs_agblock_t bno, /* starting block of extent */ 75 xfs_agblock_t bno, /* starting block of extent */
@@ -2207,7 +2208,7 @@ xfs_alloc_read_agf(
2207 * group or loop over the allocation groups to find the result. 2208 * group or loop over the allocation groups to find the result.
2208 */ 2209 */
2209int /* error */ 2210int /* error */
2210xfs_alloc_vextent( 2211__xfs_alloc_vextent(
2211 xfs_alloc_arg_t *args) /* allocation argument structure */ 2212 xfs_alloc_arg_t *args) /* allocation argument structure */
2212{ 2213{
2213 xfs_agblock_t agsize; /* allocation group size */ 2214 xfs_agblock_t agsize; /* allocation group size */
@@ -2417,6 +2418,37 @@ error0:
2417 return error; 2418 return error;
2418} 2419}
2419 2420
2421static void
2422xfs_alloc_vextent_worker(
2423 struct work_struct *work)
2424{
2425 struct xfs_alloc_arg *args = container_of(work,
2426 struct xfs_alloc_arg, work);
2427 unsigned long pflags;
2428
2429 /* we are in a transaction context here */
2430 current_set_flags_nested(&pflags, PF_FSTRANS);
2431
2432 args->result = __xfs_alloc_vextent(args);
2433 complete(args->done);
2434
2435 current_restore_flags_nested(&pflags, PF_FSTRANS);
2436}
2437
2438
2439int /* error */
2440xfs_alloc_vextent(
2441 xfs_alloc_arg_t *args) /* allocation argument structure */
2442{
2443 DECLARE_COMPLETION_ONSTACK(done);
2444
2445 args->done = &done;
2446 INIT_WORK(&args->work, xfs_alloc_vextent_worker);
2447 queue_work(xfs_alloc_wq, &args->work);
2448 wait_for_completion(&done);
2449 return args->result;
2450}
2451
2420/* 2452/*
2421 * Free an extent. 2453 * Free an extent.
2422 * Just break up the extent address and hand off to xfs_free_ag_extent 2454 * Just break up the extent address and hand off to xfs_free_ag_extent
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 2f52b924be7..3a7e7d8f8de 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -25,6 +25,8 @@ struct xfs_perag;
25struct xfs_trans; 25struct xfs_trans;
26struct xfs_busy_extent; 26struct xfs_busy_extent;
27 27
28extern struct workqueue_struct *xfs_alloc_wq;
29
28/* 30/*
29 * Freespace allocation types. Argument to xfs_alloc_[v]extent. 31 * Freespace allocation types. Argument to xfs_alloc_[v]extent.
30 */ 32 */
@@ -119,6 +121,9 @@ typedef struct xfs_alloc_arg {
119 char isfl; /* set if is freelist blocks - !acctg */ 121 char isfl; /* set if is freelist blocks - !acctg */
120 char userdata; /* set if this is user data */ 122 char userdata; /* set if this is user data */
121 xfs_fsblock_t firstblock; /* io first block allocated */ 123 xfs_fsblock_t firstblock; /* io first block allocated */
124 struct completion *done;
125 struct work_struct work;
126 int result;
122} xfs_alloc_arg_t; 127} xfs_alloc_arg_t;
123 128
124/* 129/*
@@ -243,6 +248,13 @@ xfs_alloc_lookup_le(
243 xfs_extlen_t len, /* length of extent */ 248 xfs_extlen_t len, /* length of extent */
244 int *stat); /* success/failure */ 249 int *stat); /* success/failure */
245 250
251int /* error */
252xfs_alloc_lookup_ge(
253 struct xfs_btree_cur *cur, /* btree cursor */
254 xfs_agblock_t bno, /* starting block of extent */
255 xfs_extlen_t len, /* length of extent */
256 int *stat); /* success/failure */
257
246int /* error */ 258int /* error */
247xfs_alloc_get_rec( 259xfs_alloc_get_rec(
248 struct xfs_btree_cur *cur, /* btree cursor */ 260 struct xfs_btree_cur *cur, /* btree cursor */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 74b9baf36ac..0dbb9e70fe2 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -26,6 +26,7 @@
26#include "xfs_bmap_btree.h" 26#include "xfs_bmap_btree.h"
27#include "xfs_dinode.h" 27#include "xfs_dinode.h"
28#include "xfs_inode.h" 28#include "xfs_inode.h"
29#include "xfs_inode_item.h"
29#include "xfs_alloc.h" 30#include "xfs_alloc.h"
30#include "xfs_error.h" 31#include "xfs_error.h"
31#include "xfs_rw.h" 32#include "xfs_rw.h"
@@ -99,23 +100,6 @@ xfs_destroy_ioend(
99} 100}
100 101
101/* 102/*
102 * If the end of the current ioend is beyond the current EOF,
103 * return the new EOF value, otherwise zero.
104 */
105STATIC xfs_fsize_t
106xfs_ioend_new_eof(
107 xfs_ioend_t *ioend)
108{
109 xfs_inode_t *ip = XFS_I(ioend->io_inode);
110 xfs_fsize_t isize;
111 xfs_fsize_t bsize;
112
113 bsize = ioend->io_offset + ioend->io_size;
114 isize = MIN(i_size_read(VFS_I(ip)), bsize);
115 return isize > ip->i_d.di_size ? isize : 0;
116}
117
118/*
119 * Fast and loose check if this write could update the on-disk inode size. 103 * Fast and loose check if this write could update the on-disk inode size.
120 */ 104 */
121static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) 105static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
@@ -124,32 +108,65 @@ static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
124 XFS_I(ioend->io_inode)->i_d.di_size; 108 XFS_I(ioend->io_inode)->i_d.di_size;
125} 109}
126 110
111STATIC int
112xfs_setfilesize_trans_alloc(
113 struct xfs_ioend *ioend)
114{
115 struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
116 struct xfs_trans *tp;
117 int error;
118
119 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
120
121 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
122 if (error) {
123 xfs_trans_cancel(tp, 0);
124 return error;
125 }
126
127 ioend->io_append_trans = tp;
128
129 /*
130 * We hand off the transaction to the completion thread now, so
131 * clear the flag here.
132 */
133 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
134 return 0;
135}
136
127/* 137/*
128 * Update on-disk file size now that data has been written to disk. 138 * Update on-disk file size now that data has been written to disk.
129 *
130 * This function does not block as blocking on the inode lock in IO completion
131 * can lead to IO completion order dependency deadlocks.. If it can't get the
132 * inode ilock it will return EAGAIN. Callers must handle this.
133 */ 139 */
134STATIC int 140STATIC int
135xfs_setfilesize( 141xfs_setfilesize(
136 xfs_ioend_t *ioend) 142 struct xfs_ioend *ioend)
137{ 143{
138 xfs_inode_t *ip = XFS_I(ioend->io_inode); 144 struct xfs_inode *ip = XFS_I(ioend->io_inode);
145 struct xfs_trans *tp = ioend->io_append_trans;
139 xfs_fsize_t isize; 146 xfs_fsize_t isize;
140 147
141 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) 148 /*
142 return EAGAIN; 149 * The transaction was allocated in the I/O submission thread,
150 * thus we need to mark ourselves as beeing in a transaction
151 * manually.
152 */
153 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
143 154
144 isize = xfs_ioend_new_eof(ioend); 155 xfs_ilock(ip, XFS_ILOCK_EXCL);
145 if (isize) { 156 isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size);
146 trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); 157 if (!isize) {
147 ip->i_d.di_size = isize; 158 xfs_iunlock(ip, XFS_ILOCK_EXCL);
148 xfs_mark_inode_dirty(ip); 159 xfs_trans_cancel(tp, 0);
160 return 0;
149 } 161 }
150 162
151 xfs_iunlock(ip, XFS_ILOCK_EXCL); 163 trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
152 return 0; 164
165 ip->i_d.di_size = isize;
166 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
167 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
168
169 return xfs_trans_commit(tp, 0);
153} 170}
154 171
155/* 172/*
@@ -163,10 +180,12 @@ xfs_finish_ioend(
163 struct xfs_ioend *ioend) 180 struct xfs_ioend *ioend)
164{ 181{
165 if (atomic_dec_and_test(&ioend->io_remaining)) { 182 if (atomic_dec_and_test(&ioend->io_remaining)) {
183 struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
184
166 if (ioend->io_type == IO_UNWRITTEN) 185 if (ioend->io_type == IO_UNWRITTEN)
167 queue_work(xfsconvertd_workqueue, &ioend->io_work); 186 queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
168 else if (xfs_ioend_is_append(ioend)) 187 else if (ioend->io_append_trans)
169 queue_work(xfsdatad_workqueue, &ioend->io_work); 188 queue_work(mp->m_data_workqueue, &ioend->io_work);
170 else 189 else
171 xfs_destroy_ioend(ioend); 190 xfs_destroy_ioend(ioend);
172 } 191 }
@@ -195,35 +214,36 @@ xfs_end_io(
195 * range to normal written extens after the data I/O has finished. 214 * range to normal written extens after the data I/O has finished.
196 */ 215 */
197 if (ioend->io_type == IO_UNWRITTEN) { 216 if (ioend->io_type == IO_UNWRITTEN) {
217 /*
218 * For buffered I/O we never preallocate a transaction when
219 * doing the unwritten extent conversion, but for direct I/O
220 * we do not know if we are converting an unwritten extent
221 * or not at the point where we preallocate the transaction.
222 */
223 if (ioend->io_append_trans) {
224 ASSERT(ioend->io_isdirect);
225
226 current_set_flags_nested(
227 &ioend->io_append_trans->t_pflags, PF_FSTRANS);
228 xfs_trans_cancel(ioend->io_append_trans, 0);
229 }
230
198 error = xfs_iomap_write_unwritten(ip, ioend->io_offset, 231 error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
199 ioend->io_size); 232 ioend->io_size);
200 if (error) { 233 if (error) {
201 ioend->io_error = -error; 234 ioend->io_error = -error;
202 goto done; 235 goto done;
203 } 236 }
237 } else if (ioend->io_append_trans) {
238 error = xfs_setfilesize(ioend);
239 if (error)
240 ioend->io_error = -error;
241 } else {
242 ASSERT(!xfs_ioend_is_append(ioend));
204 } 243 }
205 244
206 /*
207 * We might have to update the on-disk file size after extending
208 * writes.
209 */
210 error = xfs_setfilesize(ioend);
211 ASSERT(!error || error == EAGAIN);
212
213done: 245done:
214 /* 246 xfs_destroy_ioend(ioend);
215 * If we didn't complete processing of the ioend, requeue it to the
216 * tail of the workqueue for another attempt later. Otherwise destroy
217 * it.
218 */
219 if (error == EAGAIN) {
220 atomic_inc(&ioend->io_remaining);
221 xfs_finish_ioend(ioend);
222 /* ensure we don't spin on blocked ioends */
223 delay(1);
224 } else {
225 xfs_destroy_ioend(ioend);
226 }
227} 247}
228 248
229/* 249/*
@@ -259,6 +279,7 @@ xfs_alloc_ioend(
259 */ 279 */
260 atomic_set(&ioend->io_remaining, 1); 280 atomic_set(&ioend->io_remaining, 1);
261 ioend->io_isasync = 0; 281 ioend->io_isasync = 0;
282 ioend->io_isdirect = 0;
262 ioend->io_error = 0; 283 ioend->io_error = 0;
263 ioend->io_list = NULL; 284 ioend->io_list = NULL;
264 ioend->io_type = type; 285 ioend->io_type = type;
@@ -269,6 +290,7 @@ xfs_alloc_ioend(
269 ioend->io_size = 0; 290 ioend->io_size = 0;
270 ioend->io_iocb = NULL; 291 ioend->io_iocb = NULL;
271 ioend->io_result = 0; 292 ioend->io_result = 0;
293 ioend->io_append_trans = NULL;
272 294
273 INIT_WORK(&ioend->io_work, xfs_end_io); 295 INIT_WORK(&ioend->io_work, xfs_end_io);
274 return ioend; 296 return ioend;
@@ -379,14 +401,6 @@ xfs_submit_ioend_bio(
379 atomic_inc(&ioend->io_remaining); 401 atomic_inc(&ioend->io_remaining);
380 bio->bi_private = ioend; 402 bio->bi_private = ioend;
381 bio->bi_end_io = xfs_end_bio; 403 bio->bi_end_io = xfs_end_bio;
382
383 /*
384 * If the I/O is beyond EOF we mark the inode dirty immediately
385 * but don't update the inode size until I/O completion.
386 */
387 if (xfs_ioend_new_eof(ioend))
388 xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
389
390 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); 404 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
391} 405}
392 406
@@ -1033,8 +1047,20 @@ xfs_vm_writepage(
1033 wbc, end_index); 1047 wbc, end_index);
1034 } 1048 }
1035 1049
1036 if (iohead) 1050 if (iohead) {
1051 /*
1052 * Reserve log space if we might write beyond the on-disk
1053 * inode size.
1054 */
1055 if (ioend->io_type != IO_UNWRITTEN &&
1056 xfs_ioend_is_append(ioend)) {
1057 err = xfs_setfilesize_trans_alloc(ioend);
1058 if (err)
1059 goto error;
1060 }
1061
1037 xfs_submit_ioend(wbc, iohead); 1062 xfs_submit_ioend(wbc, iohead);
1063 }
1038 1064
1039 return 0; 1065 return 0;
1040 1066
@@ -1314,17 +1340,32 @@ xfs_vm_direct_IO(
1314{ 1340{
1315 struct inode *inode = iocb->ki_filp->f_mapping->host; 1341 struct inode *inode = iocb->ki_filp->f_mapping->host;
1316 struct block_device *bdev = xfs_find_bdev_for_inode(inode); 1342 struct block_device *bdev = xfs_find_bdev_for_inode(inode);
1343 struct xfs_ioend *ioend = NULL;
1317 ssize_t ret; 1344 ssize_t ret;
1318 1345
1319 if (rw & WRITE) { 1346 if (rw & WRITE) {
1320 iocb->private = xfs_alloc_ioend(inode, IO_DIRECT); 1347 size_t size = iov_length(iov, nr_segs);
1348
1349 /*
1350 * We need to preallocate a transaction for a size update
1351 * here. In the case that this write both updates the size
1352 * and converts at least on unwritten extent we will cancel
1353 * the still clean transaction after the I/O has finished.
1354 */
1355 iocb->private = ioend = xfs_alloc_ioend(inode, IO_DIRECT);
1356 if (offset + size > XFS_I(inode)->i_d.di_size) {
1357 ret = xfs_setfilesize_trans_alloc(ioend);
1358 if (ret)
1359 goto out_destroy_ioend;
1360 ioend->io_isdirect = 1;
1361 }
1321 1362
1322 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, 1363 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1323 offset, nr_segs, 1364 offset, nr_segs,
1324 xfs_get_blocks_direct, 1365 xfs_get_blocks_direct,
1325 xfs_end_io_direct_write, NULL, 0); 1366 xfs_end_io_direct_write, NULL, 0);
1326 if (ret != -EIOCBQUEUED && iocb->private) 1367 if (ret != -EIOCBQUEUED && iocb->private)
1327 xfs_destroy_ioend(iocb->private); 1368 goto out_trans_cancel;
1328 } else { 1369 } else {
1329 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, 1370 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1330 offset, nr_segs, 1371 offset, nr_segs,
@@ -1333,6 +1374,16 @@ xfs_vm_direct_IO(
1333 } 1374 }
1334 1375
1335 return ret; 1376 return ret;
1377
1378out_trans_cancel:
1379 if (ioend->io_append_trans) {
1380 current_set_flags_nested(&ioend->io_append_trans->t_pflags,
1381 PF_FSTRANS);
1382 xfs_trans_cancel(ioend->io_append_trans, 0);
1383 }
1384out_destroy_ioend:
1385 xfs_destroy_ioend(ioend);
1386 return ret;
1336} 1387}
1337 1388
1338STATIC void 1389STATIC void
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 116dd5c3703..84eafbcb0d9 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -18,8 +18,6 @@
18#ifndef __XFS_AOPS_H__ 18#ifndef __XFS_AOPS_H__
19#define __XFS_AOPS_H__ 19#define __XFS_AOPS_H__
20 20
21extern struct workqueue_struct *xfsdatad_workqueue;
22extern struct workqueue_struct *xfsconvertd_workqueue;
23extern mempool_t *xfs_ioend_pool; 21extern mempool_t *xfs_ioend_pool;
24 22
25/* 23/*
@@ -48,12 +46,14 @@ typedef struct xfs_ioend {
48 int io_error; /* I/O error code */ 46 int io_error; /* I/O error code */
49 atomic_t io_remaining; /* hold count */ 47 atomic_t io_remaining; /* hold count */
50 unsigned int io_isasync : 1; /* needs aio_complete */ 48 unsigned int io_isasync : 1; /* needs aio_complete */
49 unsigned int io_isdirect : 1;/* direct I/O */
51 struct inode *io_inode; /* file being written to */ 50 struct inode *io_inode; /* file being written to */
52 struct buffer_head *io_buffer_head;/* buffer linked list head */ 51 struct buffer_head *io_buffer_head;/* buffer linked list head */
53 struct buffer_head *io_buffer_tail;/* buffer linked list tail */ 52 struct buffer_head *io_buffer_tail;/* buffer linked list tail */
54 size_t io_size; /* size of the extent */ 53 size_t io_size; /* size of the extent */
55 xfs_off_t io_offset; /* offset in the file */ 54 xfs_off_t io_offset; /* offset in the file */
56 struct work_struct io_work; /* xfsdatad work queue */ 55 struct work_struct io_work; /* xfsdatad work queue */
56 struct xfs_trans *io_append_trans;/* xact. for size update */
57 struct kiocb *io_iocb; 57 struct kiocb *io_iocb;
58 int io_result; 58 int io_result;
59} xfs_ioend_t; 59} xfs_ioend_t;
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 08b9ac644c3..65d61b948ea 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -853,6 +853,8 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
853{ 853{
854 int newsize, forkoff, retval; 854 int newsize, forkoff, retval;
855 855
856 trace_xfs_attr_sf_addname(args);
857
856 retval = xfs_attr_shortform_lookup(args); 858 retval = xfs_attr_shortform_lookup(args);
857 if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { 859 if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
858 return(retval); 860 return(retval);
@@ -896,6 +898,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
896 xfs_dabuf_t *bp; 898 xfs_dabuf_t *bp;
897 int retval, error, committed, forkoff; 899 int retval, error, committed, forkoff;
898 900
901 trace_xfs_attr_leaf_addname(args);
902
899 /* 903 /*
900 * Read the (only) block in the attribute list in. 904 * Read the (only) block in the attribute list in.
901 */ 905 */
@@ -920,6 +924,9 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
920 xfs_da_brelse(args->trans, bp); 924 xfs_da_brelse(args->trans, bp);
921 return(retval); 925 return(retval);
922 } 926 }
927
928 trace_xfs_attr_leaf_replace(args);
929
923 args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */ 930 args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */
924 args->blkno2 = args->blkno; /* set 2nd entry info*/ 931 args->blkno2 = args->blkno; /* set 2nd entry info*/
925 args->index2 = args->index; 932 args->index2 = args->index;
@@ -1090,6 +1097,8 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
1090 xfs_dabuf_t *bp; 1097 xfs_dabuf_t *bp;
1091 int error, committed, forkoff; 1098 int error, committed, forkoff;
1092 1099
1100 trace_xfs_attr_leaf_removename(args);
1101
1093 /* 1102 /*
1094 * Remove the attribute. 1103 * Remove the attribute.
1095 */ 1104 */
@@ -1223,6 +1232,8 @@ xfs_attr_node_addname(xfs_da_args_t *args)
1223 xfs_mount_t *mp; 1232 xfs_mount_t *mp;
1224 int committed, retval, error; 1233 int committed, retval, error;
1225 1234
1235 trace_xfs_attr_node_addname(args);
1236
1226 /* 1237 /*
1227 * Fill in bucket of arguments/results/context to carry around. 1238 * Fill in bucket of arguments/results/context to carry around.
1228 */ 1239 */
@@ -1249,6 +1260,9 @@ restart:
1249 } else if (retval == EEXIST) { 1260 } else if (retval == EEXIST) {
1250 if (args->flags & ATTR_CREATE) 1261 if (args->flags & ATTR_CREATE)
1251 goto out; 1262 goto out;
1263
1264 trace_xfs_attr_node_replace(args);
1265
1252 args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */ 1266 args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */
1253 args->blkno2 = args->blkno; /* set 2nd entry info*/ 1267 args->blkno2 = args->blkno; /* set 2nd entry info*/
1254 args->index2 = args->index; 1268 args->index2 = args->index;
@@ -1480,6 +1494,8 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1480 xfs_dabuf_t *bp; 1494 xfs_dabuf_t *bp;
1481 int retval, error, committed, forkoff; 1495 int retval, error, committed, forkoff;
1482 1496
1497 trace_xfs_attr_node_removename(args);
1498
1483 /* 1499 /*
1484 * Tie a string around our finger to remind us where we are. 1500 * Tie a string around our finger to remind us where we are.
1485 */ 1501 */
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index d25eafd4d28..76d93dc953e 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -235,6 +235,8 @@ xfs_attr_shortform_create(xfs_da_args_t *args)
235 xfs_inode_t *dp; 235 xfs_inode_t *dp;
236 xfs_ifork_t *ifp; 236 xfs_ifork_t *ifp;
237 237
238 trace_xfs_attr_sf_create(args);
239
238 dp = args->dp; 240 dp = args->dp;
239 ASSERT(dp != NULL); 241 ASSERT(dp != NULL);
240 ifp = dp->i_afp; 242 ifp = dp->i_afp;
@@ -268,6 +270,8 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
268 xfs_inode_t *dp; 270 xfs_inode_t *dp;
269 xfs_ifork_t *ifp; 271 xfs_ifork_t *ifp;
270 272
273 trace_xfs_attr_sf_add(args);
274
271 dp = args->dp; 275 dp = args->dp;
272 mp = dp->i_mount; 276 mp = dp->i_mount;
273 dp->i_d.di_forkoff = forkoff; 277 dp->i_d.di_forkoff = forkoff;
@@ -337,6 +341,8 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
337 xfs_mount_t *mp; 341 xfs_mount_t *mp;
338 xfs_inode_t *dp; 342 xfs_inode_t *dp;
339 343
344 trace_xfs_attr_sf_remove(args);
345
340 dp = args->dp; 346 dp = args->dp;
341 mp = dp->i_mount; 347 mp = dp->i_mount;
342 base = sizeof(xfs_attr_sf_hdr_t); 348 base = sizeof(xfs_attr_sf_hdr_t);
@@ -405,6 +411,8 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args)
405 int i; 411 int i;
406 xfs_ifork_t *ifp; 412 xfs_ifork_t *ifp;
407 413
414 trace_xfs_attr_sf_lookup(args);
415
408 ifp = args->dp->i_afp; 416 ifp = args->dp->i_afp;
409 ASSERT(ifp->if_flags & XFS_IFINLINE); 417 ASSERT(ifp->if_flags & XFS_IFINLINE);
410 sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; 418 sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
@@ -476,6 +484,8 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
476 xfs_dabuf_t *bp; 484 xfs_dabuf_t *bp;
477 xfs_ifork_t *ifp; 485 xfs_ifork_t *ifp;
478 486
487 trace_xfs_attr_sf_to_leaf(args);
488
479 dp = args->dp; 489 dp = args->dp;
480 ifp = dp->i_afp; 490 ifp = dp->i_afp;
481 sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; 491 sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
@@ -775,6 +785,8 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
775 char *tmpbuffer; 785 char *tmpbuffer;
776 int error, i; 786 int error, i;
777 787
788 trace_xfs_attr_leaf_to_sf(args);
789
778 dp = args->dp; 790 dp = args->dp;
779 tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP); 791 tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP);
780 ASSERT(tmpbuffer != NULL); 792 ASSERT(tmpbuffer != NULL);
@@ -848,6 +860,8 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
848 xfs_dablk_t blkno; 860 xfs_dablk_t blkno;
849 int error; 861 int error;
850 862
863 trace_xfs_attr_leaf_to_node(args);
864
851 dp = args->dp; 865 dp = args->dp;
852 bp1 = bp2 = NULL; 866 bp1 = bp2 = NULL;
853 error = xfs_da_grow_inode(args, &blkno); 867 error = xfs_da_grow_inode(args, &blkno);
@@ -911,6 +925,8 @@ xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
911 xfs_dabuf_t *bp; 925 xfs_dabuf_t *bp;
912 int error; 926 int error;
913 927
928 trace_xfs_attr_leaf_create(args);
929
914 dp = args->dp; 930 dp = args->dp;
915 ASSERT(dp != NULL); 931 ASSERT(dp != NULL);
916 error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp, 932 error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp,
@@ -948,6 +964,8 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
948 xfs_dablk_t blkno; 964 xfs_dablk_t blkno;
949 int error; 965 int error;
950 966
967 trace_xfs_attr_leaf_split(state->args);
968
951 /* 969 /*
952 * Allocate space for a new leaf node. 970 * Allocate space for a new leaf node.
953 */ 971 */
@@ -977,10 +995,13 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
977 * 995 *
978 * Insert the "new" entry in the correct block. 996 * Insert the "new" entry in the correct block.
979 */ 997 */
980 if (state->inleaf) 998 if (state->inleaf) {
999 trace_xfs_attr_leaf_add_old(state->args);
981 error = xfs_attr_leaf_add(oldblk->bp, state->args); 1000 error = xfs_attr_leaf_add(oldblk->bp, state->args);
982 else 1001 } else {
1002 trace_xfs_attr_leaf_add_new(state->args);
983 error = xfs_attr_leaf_add(newblk->bp, state->args); 1003 error = xfs_attr_leaf_add(newblk->bp, state->args);
1004 }
984 1005
985 /* 1006 /*
986 * Update last hashval in each block since we added the name. 1007 * Update last hashval in each block since we added the name.
@@ -1001,6 +1022,8 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
1001 xfs_attr_leaf_map_t *map; 1022 xfs_attr_leaf_map_t *map;
1002 int tablesize, entsize, sum, tmp, i; 1023 int tablesize, entsize, sum, tmp, i;
1003 1024
1025 trace_xfs_attr_leaf_add(args);
1026
1004 leaf = bp->data; 1027 leaf = bp->data;
1005 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1028 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1006 ASSERT((args->index >= 0) 1029 ASSERT((args->index >= 0)
@@ -1128,8 +1151,6 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
1128 (be32_to_cpu(entry->hashval) <= be32_to_cpu((entry+1)->hashval))); 1151 (be32_to_cpu(entry->hashval) <= be32_to_cpu((entry+1)->hashval)));
1129 1152
1130 /* 1153 /*
1131 * Copy the attribute name and value into the new space.
1132 *
1133 * For "remote" attribute values, simply note that we need to 1154 * For "remote" attribute values, simply note that we need to
1134 * allocate space for the "remote" value. We can't actually 1155 * allocate space for the "remote" value. We can't actually
1135 * allocate the extents in this transaction, and we can't decide 1156 * allocate the extents in this transaction, and we can't decide
@@ -1265,6 +1286,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1265 ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1286 ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1266 args = state->args; 1287 args = state->args;
1267 1288
1289 trace_xfs_attr_leaf_rebalance(args);
1290
1268 /* 1291 /*
1269 * Check ordering of blocks, reverse if it makes things simpler. 1292 * Check ordering of blocks, reverse if it makes things simpler.
1270 * 1293 *
@@ -1810,6 +1833,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1810 xfs_mount_t *mp; 1833 xfs_mount_t *mp;
1811 char *tmpbuffer; 1834 char *tmpbuffer;
1812 1835
1836 trace_xfs_attr_leaf_unbalance(state->args);
1837
1813 /* 1838 /*
1814 * Set up environment. 1839 * Set up environment.
1815 */ 1840 */
@@ -1919,6 +1944,8 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
1919 int probe, span; 1944 int probe, span;
1920 xfs_dahash_t hashval; 1945 xfs_dahash_t hashval;
1921 1946
1947 trace_xfs_attr_leaf_lookup(args);
1948
1922 leaf = bp->data; 1949 leaf = bp->data;
1923 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1950 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1924 ASSERT(be16_to_cpu(leaf->hdr.count) 1951 ASSERT(be16_to_cpu(leaf->hdr.count)
@@ -2445,6 +2472,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
2445 char *name; 2472 char *name;
2446#endif /* DEBUG */ 2473#endif /* DEBUG */
2447 2474
2475 trace_xfs_attr_leaf_clearflag(args);
2448 /* 2476 /*
2449 * Set up the operation. 2477 * Set up the operation.
2450 */ 2478 */
@@ -2509,6 +2537,8 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
2509 xfs_dabuf_t *bp; 2537 xfs_dabuf_t *bp;
2510 int error; 2538 int error;
2511 2539
2540 trace_xfs_attr_leaf_setflag(args);
2541
2512 /* 2542 /*
2513 * Set up the operation. 2543 * Set up the operation.
2514 */ 2544 */
@@ -2565,6 +2595,8 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
2565 char *name1, *name2; 2595 char *name1, *name2;
2566#endif /* DEBUG */ 2596#endif /* DEBUG */
2567 2597
2598 trace_xfs_attr_leaf_flipflags(args);
2599
2568 /* 2600 /*
2569 * Read the block containing the "old" attr 2601 * Read the block containing the "old" attr
2570 */ 2602 */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 188ef2fbd62..85e7e327bcd 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5124,6 +5124,15 @@ xfs_bunmapi(
5124 cur->bc_private.b.flags = 0; 5124 cur->bc_private.b.flags = 0;
5125 } else 5125 } else
5126 cur = NULL; 5126 cur = NULL;
5127
5128 if (isrt) {
5129 /*
5130 * Synchronize by locking the bitmap inode.
5131 */
5132 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
5133 xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5134 }
5135
5127 extno = 0; 5136 extno = 0;
5128 while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 && 5137 while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
5129 (nexts == 0 || extno < nexts)) { 5138 (nexts == 0 || extno < nexts)) {
@@ -5536,8 +5545,12 @@ xfs_getbmap(
5536 if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) 5545 if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
5537 return XFS_ERROR(ENOMEM); 5546 return XFS_ERROR(ENOMEM);
5538 out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL); 5547 out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL);
5539 if (!out) 5548 if (!out) {
5540 return XFS_ERROR(ENOMEM); 5549 out = kmem_zalloc_large(bmv->bmv_count *
5550 sizeof(struct getbmapx));
5551 if (!out)
5552 return XFS_ERROR(ENOMEM);
5553 }
5541 5554
5542 xfs_ilock(ip, XFS_IOLOCK_SHARED); 5555 xfs_ilock(ip, XFS_IOLOCK_SHARED);
5543 if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { 5556 if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
@@ -5661,7 +5674,10 @@ xfs_getbmap(
5661 break; 5674 break;
5662 } 5675 }
5663 5676
5664 kmem_free(out); 5677 if (is_vmalloc_addr(out))
5678 kmem_free_large(out);
5679 else
5680 kmem_free(out);
5665 return error; 5681 return error;
5666} 5682}
5667 5683
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 4dff85c7d7e..6819b5163e3 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -45,8 +45,6 @@ static kmem_zone_t *xfs_buf_zone;
45STATIC int xfsbufd(void *); 45STATIC int xfsbufd(void *);
46 46
47static struct workqueue_struct *xfslogd_workqueue; 47static struct workqueue_struct *xfslogd_workqueue;
48struct workqueue_struct *xfsdatad_workqueue;
49struct workqueue_struct *xfsconvertd_workqueue;
50 48
51#ifdef XFS_BUF_LOCK_TRACKING 49#ifdef XFS_BUF_LOCK_TRACKING
52# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) 50# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid)
@@ -1793,21 +1791,8 @@ xfs_buf_init(void)
1793 if (!xfslogd_workqueue) 1791 if (!xfslogd_workqueue)
1794 goto out_free_buf_zone; 1792 goto out_free_buf_zone;
1795 1793
1796 xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
1797 if (!xfsdatad_workqueue)
1798 goto out_destroy_xfslogd_workqueue;
1799
1800 xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
1801 WQ_MEM_RECLAIM, 1);
1802 if (!xfsconvertd_workqueue)
1803 goto out_destroy_xfsdatad_workqueue;
1804
1805 return 0; 1794 return 0;
1806 1795
1807 out_destroy_xfsdatad_workqueue:
1808 destroy_workqueue(xfsdatad_workqueue);
1809 out_destroy_xfslogd_workqueue:
1810 destroy_workqueue(xfslogd_workqueue);
1811 out_free_buf_zone: 1796 out_free_buf_zone:
1812 kmem_zone_destroy(xfs_buf_zone); 1797 kmem_zone_destroy(xfs_buf_zone);
1813 out: 1798 out:
@@ -1817,8 +1802,6 @@ xfs_buf_init(void)
1817void 1802void
1818xfs_buf_terminate(void) 1803xfs_buf_terminate(void)
1819{ 1804{
1820 destroy_workqueue(xfsconvertd_workqueue);
1821 destroy_workqueue(xfsdatad_workqueue);
1822 destroy_workqueue(xfslogd_workqueue); 1805 destroy_workqueue(xfslogd_workqueue);
1823 kmem_zone_destroy(xfs_buf_zone); 1806 kmem_zone_destroy(xfs_buf_zone);
1824} 1807}
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index df7ffb0affe..5bf3be45f54 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -21,7 +21,6 @@
21#include <linux/list.h> 21#include <linux/list.h>
22#include <linux/types.h> 22#include <linux/types.h>
23#include <linux/spinlock.h> 23#include <linux/spinlock.h>
24#include <asm/system.h>
25#include <linux/mm.h> 24#include <linux/mm.h>
26#include <linux/fs.h> 25#include <linux/fs.h>
27#include <linux/buffer_head.h> 26#include <linux/buffer_head.h>
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 77c74257c2a..7f1a6f5b05a 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -108,6 +108,8 @@ xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
108 int error; 108 int error;
109 xfs_trans_t *tp; 109 xfs_trans_t *tp;
110 110
111 trace_xfs_da_node_create(args);
112
111 tp = args->trans; 113 tp = args->trans;
112 error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork); 114 error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);
113 if (error) 115 if (error)
@@ -140,6 +142,8 @@ xfs_da_split(xfs_da_state_t *state)
140 xfs_dabuf_t *bp; 142 xfs_dabuf_t *bp;
141 int max, action, error, i; 143 int max, action, error, i;
142 144
145 trace_xfs_da_split(state->args);
146
143 /* 147 /*
144 * Walk back up the tree splitting/inserting/adjusting as necessary. 148 * Walk back up the tree splitting/inserting/adjusting as necessary.
145 * If we need to insert and there isn't room, split the node, then 149 * If we need to insert and there isn't room, split the node, then
@@ -178,10 +182,12 @@ xfs_da_split(xfs_da_state_t *state)
178 state->extravalid = 1; 182 state->extravalid = 1;
179 if (state->inleaf) { 183 if (state->inleaf) {
180 state->extraafter = 0; /* before newblk */ 184 state->extraafter = 0; /* before newblk */
185 trace_xfs_attr_leaf_split_before(state->args);
181 error = xfs_attr_leaf_split(state, oldblk, 186 error = xfs_attr_leaf_split(state, oldblk,
182 &state->extrablk); 187 &state->extrablk);
183 } else { 188 } else {
184 state->extraafter = 1; /* after newblk */ 189 state->extraafter = 1; /* after newblk */
190 trace_xfs_attr_leaf_split_after(state->args);
185 error = xfs_attr_leaf_split(state, newblk, 191 error = xfs_attr_leaf_split(state, newblk,
186 &state->extrablk); 192 &state->extrablk);
187 } 193 }
@@ -300,6 +306,8 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
300 xfs_mount_t *mp; 306 xfs_mount_t *mp;
301 xfs_dir2_leaf_t *leaf; 307 xfs_dir2_leaf_t *leaf;
302 308
309 trace_xfs_da_root_split(state->args);
310
303 /* 311 /*
304 * Copy the existing (incorrect) block from the root node position 312 * Copy the existing (incorrect) block from the root node position
305 * to a free space somewhere. 313 * to a free space somewhere.
@@ -380,6 +388,8 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
380 int newcount, error; 388 int newcount, error;
381 int useextra; 389 int useextra;
382 390
391 trace_xfs_da_node_split(state->args);
392
383 node = oldblk->bp->data; 393 node = oldblk->bp->data;
384 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 394 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
385 395
@@ -466,6 +476,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
466 int count, tmp; 476 int count, tmp;
467 xfs_trans_t *tp; 477 xfs_trans_t *tp;
468 478
479 trace_xfs_da_node_rebalance(state->args);
480
469 node1 = blk1->bp->data; 481 node1 = blk1->bp->data;
470 node2 = blk2->bp->data; 482 node2 = blk2->bp->data;
471 /* 483 /*
@@ -574,6 +586,8 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
574 xfs_da_node_entry_t *btree; 586 xfs_da_node_entry_t *btree;
575 int tmp; 587 int tmp;
576 588
589 trace_xfs_da_node_add(state->args);
590
577 node = oldblk->bp->data; 591 node = oldblk->bp->data;
578 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 592 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
579 ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count))); 593 ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
@@ -619,6 +633,8 @@ xfs_da_join(xfs_da_state_t *state)
619 xfs_da_state_blk_t *drop_blk, *save_blk; 633 xfs_da_state_blk_t *drop_blk, *save_blk;
620 int action, error; 634 int action, error;
621 635
636 trace_xfs_da_join(state->args);
637
622 action = 0; 638 action = 0;
623 drop_blk = &state->path.blk[ state->path.active-1 ]; 639 drop_blk = &state->path.blk[ state->path.active-1 ];
624 save_blk = &state->altpath.blk[ state->path.active-1 ]; 640 save_blk = &state->altpath.blk[ state->path.active-1 ];
@@ -723,6 +739,8 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
723 xfs_dabuf_t *bp; 739 xfs_dabuf_t *bp;
724 int error; 740 int error;
725 741
742 trace_xfs_da_root_join(state->args);
743
726 args = state->args; 744 args = state->args;
727 ASSERT(args != NULL); 745 ASSERT(args != NULL);
728 ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); 746 ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
@@ -941,6 +959,8 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
941 xfs_da_node_entry_t *btree; 959 xfs_da_node_entry_t *btree;
942 int tmp; 960 int tmp;
943 961
962 trace_xfs_da_node_remove(state->args);
963
944 node = drop_blk->bp->data; 964 node = drop_blk->bp->data;
945 ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count)); 965 ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count));
946 ASSERT(drop_blk->index >= 0); 966 ASSERT(drop_blk->index >= 0);
@@ -984,6 +1004,8 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
984 int tmp; 1004 int tmp;
985 xfs_trans_t *tp; 1005 xfs_trans_t *tp;
986 1006
1007 trace_xfs_da_node_unbalance(state->args);
1008
987 drop_node = drop_blk->bp->data; 1009 drop_node = drop_blk->bp->data;
988 save_node = save_blk->bp->data; 1010 save_node = save_blk->bp->data;
989 ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 1011 ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
@@ -1230,6 +1252,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1230 /* 1252 /*
1231 * Link new block in before existing block. 1253 * Link new block in before existing block.
1232 */ 1254 */
1255 trace_xfs_da_link_before(args);
1233 new_info->forw = cpu_to_be32(old_blk->blkno); 1256 new_info->forw = cpu_to_be32(old_blk->blkno);
1234 new_info->back = old_info->back; 1257 new_info->back = old_info->back;
1235 if (old_info->back) { 1258 if (old_info->back) {
@@ -1251,6 +1274,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1251 /* 1274 /*
1252 * Link new block in after existing block. 1275 * Link new block in after existing block.
1253 */ 1276 */
1277 trace_xfs_da_link_after(args);
1254 new_info->forw = old_info->forw; 1278 new_info->forw = old_info->forw;
1255 new_info->back = cpu_to_be32(old_blk->blkno); 1279 new_info->back = cpu_to_be32(old_blk->blkno);
1256 if (old_info->forw) { 1280 if (old_info->forw) {
@@ -1348,6 +1372,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1348 * Unlink the leaf block from the doubly linked chain of leaves. 1372 * Unlink the leaf block from the doubly linked chain of leaves.
1349 */ 1373 */
1350 if (be32_to_cpu(save_info->back) == drop_blk->blkno) { 1374 if (be32_to_cpu(save_info->back) == drop_blk->blkno) {
1375 trace_xfs_da_unlink_back(args);
1351 save_info->back = drop_info->back; 1376 save_info->back = drop_info->back;
1352 if (drop_info->back) { 1377 if (drop_info->back) {
1353 error = xfs_da_read_buf(args->trans, args->dp, 1378 error = xfs_da_read_buf(args->trans, args->dp,
@@ -1365,6 +1390,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1365 xfs_da_buf_done(bp); 1390 xfs_da_buf_done(bp);
1366 } 1391 }
1367 } else { 1392 } else {
1393 trace_xfs_da_unlink_forward(args);
1368 save_info->forw = drop_info->forw; 1394 save_info->forw = drop_info->forw;
1369 if (drop_info->forw) { 1395 if (drop_info->forw) {
1370 error = xfs_da_read_buf(args->trans, args->dp, 1396 error = xfs_da_read_buf(args->trans, args->dp,
@@ -1652,6 +1678,8 @@ xfs_da_grow_inode(
1652 int count; 1678 int count;
1653 int error; 1679 int error;
1654 1680
1681 trace_xfs_da_grow_inode(args);
1682
1655 if (args->whichfork == XFS_DATA_FORK) { 1683 if (args->whichfork == XFS_DATA_FORK) {
1656 bno = args->dp->i_mount->m_dirleafblk; 1684 bno = args->dp->i_mount->m_dirleafblk;
1657 count = args->dp->i_mount->m_dirblkfsbs; 1685 count = args->dp->i_mount->m_dirblkfsbs;
@@ -1690,6 +1718,8 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
1690 xfs_dir2_leaf_t *dead_leaf2; 1718 xfs_dir2_leaf_t *dead_leaf2;
1691 xfs_dahash_t dead_hash; 1719 xfs_dahash_t dead_hash;
1692 1720
1721 trace_xfs_da_swap_lastblock(args);
1722
1693 dead_buf = *dead_bufp; 1723 dead_buf = *dead_bufp;
1694 dead_blkno = *dead_blknop; 1724 dead_blkno = *dead_blknop;
1695 tp = args->trans; 1725 tp = args->trans;
@@ -1878,6 +1908,8 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
1878 xfs_trans_t *tp; 1908 xfs_trans_t *tp;
1879 xfs_mount_t *mp; 1909 xfs_mount_t *mp;
1880 1910
1911 trace_xfs_da_shrink_inode(args);
1912
1881 dp = args->dp; 1913 dp = args->dp;
1882 w = args->whichfork; 1914 w = args->whichfork;
1883 tp = args->trans; 1915 tp = args->trans;
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index dd974a55c77..1137bbc5ecc 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -215,7 +215,7 @@ xfs_swap_extents(
215 xfs_trans_t *tp; 215 xfs_trans_t *tp;
216 xfs_bstat_t *sbp = &sxp->sx_stat; 216 xfs_bstat_t *sbp = &sxp->sx_stat;
217 xfs_ifork_t *tempifp, *ifp, *tifp; 217 xfs_ifork_t *tempifp, *ifp, *tifp;
218 int ilf_fields, tilf_fields; 218 int src_log_flags, target_log_flags;
219 int error = 0; 219 int error = 0;
220 int aforkblks = 0; 220 int aforkblks = 0;
221 int taforkblks = 0; 221 int taforkblks = 0;
@@ -385,9 +385,8 @@ xfs_swap_extents(
385 tip->i_delayed_blks = ip->i_delayed_blks; 385 tip->i_delayed_blks = ip->i_delayed_blks;
386 ip->i_delayed_blks = 0; 386 ip->i_delayed_blks = 0;
387 387
388 ilf_fields = XFS_ILOG_CORE; 388 src_log_flags = XFS_ILOG_CORE;
389 389 switch (ip->i_d.di_format) {
390 switch(ip->i_d.di_format) {
391 case XFS_DINODE_FMT_EXTENTS: 390 case XFS_DINODE_FMT_EXTENTS:
392 /* If the extents fit in the inode, fix the 391 /* If the extents fit in the inode, fix the
393 * pointer. Otherwise it's already NULL or 392 * pointer. Otherwise it's already NULL or
@@ -397,16 +396,15 @@ xfs_swap_extents(
397 ifp->if_u1.if_extents = 396 ifp->if_u1.if_extents =
398 ifp->if_u2.if_inline_ext; 397 ifp->if_u2.if_inline_ext;
399 } 398 }
400 ilf_fields |= XFS_ILOG_DEXT; 399 src_log_flags |= XFS_ILOG_DEXT;
401 break; 400 break;
402 case XFS_DINODE_FMT_BTREE: 401 case XFS_DINODE_FMT_BTREE:
403 ilf_fields |= XFS_ILOG_DBROOT; 402 src_log_flags |= XFS_ILOG_DBROOT;
404 break; 403 break;
405 } 404 }
406 405
407 tilf_fields = XFS_ILOG_CORE; 406 target_log_flags = XFS_ILOG_CORE;
408 407 switch (tip->i_d.di_format) {
409 switch(tip->i_d.di_format) {
410 case XFS_DINODE_FMT_EXTENTS: 408 case XFS_DINODE_FMT_EXTENTS:
411 /* If the extents fit in the inode, fix the 409 /* If the extents fit in the inode, fix the
412 * pointer. Otherwise it's already NULL or 410 * pointer. Otherwise it's already NULL or
@@ -416,10 +414,10 @@ xfs_swap_extents(
416 tifp->if_u1.if_extents = 414 tifp->if_u1.if_extents =
417 tifp->if_u2.if_inline_ext; 415 tifp->if_u2.if_inline_ext;
418 } 416 }
419 tilf_fields |= XFS_ILOG_DEXT; 417 target_log_flags |= XFS_ILOG_DEXT;
420 break; 418 break;
421 case XFS_DINODE_FMT_BTREE: 419 case XFS_DINODE_FMT_BTREE:
422 tilf_fields |= XFS_ILOG_DBROOT; 420 target_log_flags |= XFS_ILOG_DBROOT;
423 break; 421 break;
424 } 422 }
425 423
@@ -427,8 +425,8 @@ xfs_swap_extents(
427 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 425 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
428 xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 426 xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
429 427
430 xfs_trans_log_inode(tp, ip, ilf_fields); 428 xfs_trans_log_inode(tp, ip, src_log_flags);
431 xfs_trans_log_inode(tp, tip, tilf_fields); 429 xfs_trans_log_inode(tp, tip, target_log_flags);
432 430
433 /* 431 /*
434 * If this is a synchronous mount, make sure that the 432 * If this is a synchronous mount, make sure that the
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 9245e029b8e..d3b63aefd01 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -29,6 +29,7 @@
29#include "xfs_dinode.h" 29#include "xfs_dinode.h"
30#include "xfs_inode.h" 30#include "xfs_inode.h"
31#include "xfs_inode_item.h" 31#include "xfs_inode_item.h"
32#include "xfs_dir2.h"
32#include "xfs_dir2_format.h" 33#include "xfs_dir2_format.h"
33#include "xfs_dir2_priv.h" 34#include "xfs_dir2_priv.h"
34#include "xfs_error.h" 35#include "xfs_error.h"
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 286a051f12c..1ad3a4b8ca4 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -37,9 +37,9 @@ STATIC int
37xfs_trim_extents( 37xfs_trim_extents(
38 struct xfs_mount *mp, 38 struct xfs_mount *mp,
39 xfs_agnumber_t agno, 39 xfs_agnumber_t agno,
40 xfs_fsblock_t start, 40 xfs_daddr_t start,
41 xfs_fsblock_t end, 41 xfs_daddr_t end,
42 xfs_fsblock_t minlen, 42 xfs_daddr_t minlen,
43 __uint64_t *blocks_trimmed) 43 __uint64_t *blocks_trimmed)
44{ 44{
45 struct block_device *bdev = mp->m_ddev_targp->bt_bdev; 45 struct block_device *bdev = mp->m_ddev_targp->bt_bdev;
@@ -67,7 +67,7 @@ xfs_trim_extents(
67 /* 67 /*
68 * Look up the longest btree in the AGF and start with it. 68 * Look up the longest btree in the AGF and start with it.
69 */ 69 */
70 error = xfs_alloc_lookup_le(cur, 0, 70 error = xfs_alloc_lookup_ge(cur, 0,
71 be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest), &i); 71 be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest), &i);
72 if (error) 72 if (error)
73 goto out_del_cursor; 73 goto out_del_cursor;
@@ -77,8 +77,10 @@ xfs_trim_extents(
77 * enough to be worth discarding. 77 * enough to be worth discarding.
78 */ 78 */
79 while (i) { 79 while (i) {
80 xfs_agblock_t fbno; 80 xfs_agblock_t fbno;
81 xfs_extlen_t flen; 81 xfs_extlen_t flen;
82 xfs_daddr_t dbno;
83 xfs_extlen_t dlen;
82 84
83 error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); 85 error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
84 if (error) 86 if (error)
@@ -87,9 +89,17 @@ xfs_trim_extents(
87 ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest)); 89 ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest));
88 90
89 /* 91 /*
92 * use daddr format for all range/len calculations as that is
93 * the format the range/len variables are supplied in by
94 * userspace.
95 */
96 dbno = XFS_AGB_TO_DADDR(mp, agno, fbno);
97 dlen = XFS_FSB_TO_BB(mp, flen);
98
99 /*
90 * Too small? Give up. 100 * Too small? Give up.
91 */ 101 */
92 if (flen < minlen) { 102 if (dlen < minlen) {
93 trace_xfs_discard_toosmall(mp, agno, fbno, flen); 103 trace_xfs_discard_toosmall(mp, agno, fbno, flen);
94 goto out_del_cursor; 104 goto out_del_cursor;
95 } 105 }
@@ -99,8 +109,7 @@ xfs_trim_extents(
99 * supposed to discard skip it. Do not bother to trim 109 * supposed to discard skip it. Do not bother to trim
100 * down partially overlapping ranges for now. 110 * down partially overlapping ranges for now.
101 */ 111 */
102 if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start || 112 if (dbno + dlen < start || dbno > end) {
103 XFS_AGB_TO_FSB(mp, agno, fbno) > end) {
104 trace_xfs_discard_exclude(mp, agno, fbno, flen); 113 trace_xfs_discard_exclude(mp, agno, fbno, flen);
105 goto next_extent; 114 goto next_extent;
106 } 115 }
@@ -115,10 +124,7 @@ xfs_trim_extents(
115 } 124 }
116 125
117 trace_xfs_discard_extent(mp, agno, fbno, flen); 126 trace_xfs_discard_extent(mp, agno, fbno, flen);
118 error = -blkdev_issue_discard(bdev, 127 error = -blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS, 0);
119 XFS_AGB_TO_DADDR(mp, agno, fbno),
120 XFS_FSB_TO_BB(mp, flen),
121 GFP_NOFS, 0);
122 if (error) 128 if (error)
123 goto out_del_cursor; 129 goto out_del_cursor;
124 *blocks_trimmed += flen; 130 *blocks_trimmed += flen;
@@ -137,6 +143,15 @@ out_put_perag:
137 return error; 143 return error;
138} 144}
139 145
146/*
147 * trim a range of the filesystem.
148 *
149 * Note: the parameters passed from userspace are byte ranges into the
150 * filesystem which does not match to the format we use for filesystem block
151 * addressing. FSB addressing is sparse (AGNO|AGBNO), while the incoming format
152 * is a linear address range. Hence we need to use DADDR based conversions and
153 * comparisons for determining the correct offset and regions to trim.
154 */
140int 155int
141xfs_ioc_trim( 156xfs_ioc_trim(
142 struct xfs_mount *mp, 157 struct xfs_mount *mp,
@@ -145,7 +160,7 @@ xfs_ioc_trim(
145 struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; 160 struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue;
146 unsigned int granularity = q->limits.discard_granularity; 161 unsigned int granularity = q->limits.discard_granularity;
147 struct fstrim_range range; 162 struct fstrim_range range;
148 xfs_fsblock_t start, end, minlen; 163 xfs_daddr_t start, end, minlen;
149 xfs_agnumber_t start_agno, end_agno, agno; 164 xfs_agnumber_t start_agno, end_agno, agno;
150 __uint64_t blocks_trimmed = 0; 165 __uint64_t blocks_trimmed = 0;
151 int error, last_error = 0; 166 int error, last_error = 0;
@@ -159,22 +174,22 @@ xfs_ioc_trim(
159 174
160 /* 175 /*
161 * Truncating down the len isn't actually quite correct, but using 176 * Truncating down the len isn't actually quite correct, but using
162 * XFS_B_TO_FSB would mean we trivially get overflows for values 177 * BBTOB would mean we trivially get overflows for values
163 * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default 178 * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default
164 * used by the fstrim application. In the end it really doesn't 179 * used by the fstrim application. In the end it really doesn't
165 * matter as trimming blocks is an advisory interface. 180 * matter as trimming blocks is an advisory interface.
166 */ 181 */
167 start = XFS_B_TO_FSBT(mp, range.start); 182 start = BTOBB(range.start);
168 end = start + XFS_B_TO_FSBT(mp, range.len) - 1; 183 end = start + BTOBBT(range.len) - 1;
169 minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen)); 184 minlen = BTOBB(max_t(u64, granularity, range.minlen));
170 185
171 if (start >= mp->m_sb.sb_dblocks) 186 if (XFS_BB_TO_FSB(mp, start) >= mp->m_sb.sb_dblocks)
172 return -XFS_ERROR(EINVAL); 187 return -XFS_ERROR(EINVAL);
173 if (end > mp->m_sb.sb_dblocks - 1) 188 if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1)
174 end = mp->m_sb.sb_dblocks - 1; 189 end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1;
175 190
176 start_agno = XFS_FSB_TO_AGNO(mp, start); 191 start_agno = xfs_daddr_to_agno(mp, start);
177 end_agno = XFS_FSB_TO_AGNO(mp, end); 192 end_agno = xfs_daddr_to_agno(mp, end);
178 193
179 for (agno = start_agno; agno <= end_agno; agno++) { 194 for (agno = start_agno; agno <= end_agno; agno++) {
180 error = -xfs_trim_extents(mp, agno, start, end, minlen, 195 error = -xfs_trim_extents(mp, agno, start, end, minlen,
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 53db20ee3e7..1155208fa83 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -43,11 +43,10 @@
43 * Lock order: 43 * Lock order:
44 * 44 *
45 * ip->i_lock 45 * ip->i_lock
46 * qh->qh_lock 46 * qi->qi_tree_lock
47 * qi->qi_dqlist_lock 47 * dquot->q_qlock (xfs_dqlock() and friends)
48 * dquot->q_qlock (xfs_dqlock() and friends) 48 * dquot->q_flush (xfs_dqflock() and friends)
49 * dquot->q_flush (xfs_dqflock() and friends) 49 * qi->qi_lru_lock
50 * xfs_Gqm->qm_dqfrlist_lock
51 * 50 *
52 * If two dquots need to be locked the order is user before group/project, 51 * If two dquots need to be locked the order is user before group/project,
53 * otherwise by the lowest id first, see xfs_dqlock2. 52 * otherwise by the lowest id first, see xfs_dqlock2.
@@ -60,6 +59,9 @@ int xfs_dqreq_num;
60int xfs_dqerror_mod = 33; 59int xfs_dqerror_mod = 33;
61#endif 60#endif
62 61
62struct kmem_zone *xfs_qm_dqtrxzone;
63static struct kmem_zone *xfs_qm_dqzone;
64
63static struct lock_class_key xfs_dquot_other_class; 65static struct lock_class_key xfs_dquot_other_class;
64 66
65/* 67/*
@@ -69,12 +71,12 @@ void
69xfs_qm_dqdestroy( 71xfs_qm_dqdestroy(
70 xfs_dquot_t *dqp) 72 xfs_dquot_t *dqp)
71{ 73{
72 ASSERT(list_empty(&dqp->q_freelist)); 74 ASSERT(list_empty(&dqp->q_lru));
73 75
74 mutex_destroy(&dqp->q_qlock); 76 mutex_destroy(&dqp->q_qlock);
75 kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); 77 kmem_zone_free(xfs_qm_dqzone, dqp);
76 78
77 atomic_dec(&xfs_Gqm->qm_totaldquots); 79 XFS_STATS_DEC(xs_qm_dquot);
78} 80}
79 81
80/* 82/*
@@ -282,7 +284,7 @@ xfs_qm_dqalloc(
282 * Return if this type of quotas is turned off while we didn't 284 * Return if this type of quotas is turned off while we didn't
283 * have an inode lock 285 * have an inode lock
284 */ 286 */
285 if (XFS_IS_THIS_QUOTA_OFF(dqp)) { 287 if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
286 xfs_iunlock(quotip, XFS_ILOCK_EXCL); 288 xfs_iunlock(quotip, XFS_ILOCK_EXCL);
287 return (ESRCH); 289 return (ESRCH);
288 } 290 }
@@ -384,7 +386,7 @@ xfs_qm_dqtobp(
384 dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; 386 dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
385 387
386 xfs_ilock(quotip, XFS_ILOCK_SHARED); 388 xfs_ilock(quotip, XFS_ILOCK_SHARED);
387 if (XFS_IS_THIS_QUOTA_OFF(dqp)) { 389 if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
388 /* 390 /*
389 * Return if this type of quotas is turned off while we 391 * Return if this type of quotas is turned off while we
390 * didn't have the quota inode lock. 392 * didn't have the quota inode lock.
@@ -492,12 +494,12 @@ xfs_qm_dqread(
492 int cancelflags = 0; 494 int cancelflags = 0;
493 495
494 496
495 dqp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); 497 dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP);
496 498
497 dqp->dq_flags = type; 499 dqp->dq_flags = type;
498 dqp->q_core.d_id = cpu_to_be32(id); 500 dqp->q_core.d_id = cpu_to_be32(id);
499 dqp->q_mount = mp; 501 dqp->q_mount = mp;
500 INIT_LIST_HEAD(&dqp->q_freelist); 502 INIT_LIST_HEAD(&dqp->q_lru);
501 mutex_init(&dqp->q_qlock); 503 mutex_init(&dqp->q_qlock);
502 init_waitqueue_head(&dqp->q_pinwait); 504 init_waitqueue_head(&dqp->q_pinwait);
503 505
@@ -516,7 +518,7 @@ xfs_qm_dqread(
516 if (!(type & XFS_DQ_USER)) 518 if (!(type & XFS_DQ_USER))
517 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); 519 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
518 520
519 atomic_inc(&xfs_Gqm->qm_totaldquots); 521 XFS_STATS_INC(xs_qm_dquot);
520 522
521 trace_xfs_dqread(dqp); 523 trace_xfs_dqread(dqp);
522 524
@@ -602,60 +604,6 @@ error0:
602} 604}
603 605
604/* 606/*
605 * Lookup a dquot in the incore dquot hashtable. We keep two separate
606 * hashtables for user and group dquots; and, these are global tables
607 * inside the XQM, not per-filesystem tables.
608 * The hash chain must be locked by caller, and it is left locked
609 * on return. Returning dquot is locked.
610 */
611STATIC int
612xfs_qm_dqlookup(
613 xfs_mount_t *mp,
614 xfs_dqid_t id,
615 xfs_dqhash_t *qh,
616 xfs_dquot_t **O_dqpp)
617{
618 xfs_dquot_t *dqp;
619
620 ASSERT(mutex_is_locked(&qh->qh_lock));
621
622 /*
623 * Traverse the hashchain looking for a match
624 */
625 list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
626 /*
627 * We already have the hashlock. We don't need the
628 * dqlock to look at the id field of the dquot, since the
629 * id can't be modified without the hashlock anyway.
630 */
631 if (be32_to_cpu(dqp->q_core.d_id) != id || dqp->q_mount != mp)
632 continue;
633
634 trace_xfs_dqlookup_found(dqp);
635
636 xfs_dqlock(dqp);
637 if (dqp->dq_flags & XFS_DQ_FREEING) {
638 *O_dqpp = NULL;
639 xfs_dqunlock(dqp);
640 return -1;
641 }
642
643 dqp->q_nrefs++;
644
645 /*
646 * move the dquot to the front of the hashchain
647 */
648 list_move(&dqp->q_hashlist, &qh->qh_list);
649 trace_xfs_dqlookup_done(dqp);
650 *O_dqpp = dqp;
651 return 0;
652 }
653
654 *O_dqpp = NULL;
655 return 1;
656}
657
658/*
659 * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a 607 * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
660 * a locked dquot, doing an allocation (if requested) as needed. 608 * a locked dquot, doing an allocation (if requested) as needed.
661 * When both an inode and an id are given, the inode's id takes precedence. 609 * When both an inode and an id are given, the inode's id takes precedence.
@@ -672,10 +620,10 @@ xfs_qm_dqget(
672 uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */ 620 uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
673 xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ 621 xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */
674{ 622{
675 xfs_dquot_t *dqp; 623 struct xfs_quotainfo *qi = mp->m_quotainfo;
676 xfs_dqhash_t *h; 624 struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type);
677 uint version; 625 struct xfs_dquot *dqp;
678 int error; 626 int error;
679 627
680 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 628 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
681 if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) || 629 if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
@@ -683,7 +631,6 @@ xfs_qm_dqget(
683 (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) { 631 (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
684 return (ESRCH); 632 return (ESRCH);
685 } 633 }
686 h = XFS_DQ_HASH(mp, id, type);
687 634
688#ifdef DEBUG 635#ifdef DEBUG
689 if (xfs_do_dqerror) { 636 if (xfs_do_dqerror) {
@@ -699,42 +646,33 @@ xfs_qm_dqget(
699 type == XFS_DQ_GROUP); 646 type == XFS_DQ_GROUP);
700 if (ip) { 647 if (ip) {
701 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 648 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
702 if (type == XFS_DQ_USER) 649 ASSERT(xfs_inode_dquot(ip, type) == NULL);
703 ASSERT(ip->i_udquot == NULL);
704 else
705 ASSERT(ip->i_gdquot == NULL);
706 } 650 }
707#endif 651#endif
708 652
709restart: 653restart:
710 mutex_lock(&h->qh_lock); 654 mutex_lock(&qi->qi_tree_lock);
655 dqp = radix_tree_lookup(tree, id);
656 if (dqp) {
657 xfs_dqlock(dqp);
658 if (dqp->dq_flags & XFS_DQ_FREEING) {
659 xfs_dqunlock(dqp);
660 mutex_unlock(&qi->qi_tree_lock);
661 trace_xfs_dqget_freeing(dqp);
662 delay(1);
663 goto restart;
664 }
711 665
712 /* 666 dqp->q_nrefs++;
713 * Look in the cache (hashtable). 667 mutex_unlock(&qi->qi_tree_lock);
714 * The chain is kept locked during lookup. 668
715 */ 669 trace_xfs_dqget_hit(dqp);
716 switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) { 670 XFS_STATS_INC(xs_qm_dqcachehits);
717 case -1: 671 *O_dqpp = dqp;
718 XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); 672 return 0;
719 mutex_unlock(&h->qh_lock);
720 delay(1);
721 goto restart;
722 case 0:
723 XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
724 /*
725 * The dquot was found, moved to the front of the chain,
726 * taken off the freelist if it was on it, and locked
727 * at this point. Just unlock the hashchain and return.
728 */
729 ASSERT(*O_dqpp);
730 ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
731 mutex_unlock(&h->qh_lock);
732 trace_xfs_dqget_hit(*O_dqpp);
733 return 0; /* success */
734 default:
735 XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
736 break;
737 } 673 }
674 mutex_unlock(&qi->qi_tree_lock);
675 XFS_STATS_INC(xs_qm_dqcachemisses);
738 676
739 /* 677 /*
740 * Dquot cache miss. We don't want to keep the inode lock across 678 * Dquot cache miss. We don't want to keep the inode lock across
@@ -745,12 +683,6 @@ restart:
745 */ 683 */
746 if (ip) 684 if (ip)
747 xfs_iunlock(ip, XFS_ILOCK_EXCL); 685 xfs_iunlock(ip, XFS_ILOCK_EXCL);
748 /*
749 * Save the hashchain version stamp, and unlock the chain, so that
750 * we don't keep the lock across a disk read
751 */
752 version = h->qh_version;
753 mutex_unlock(&h->qh_lock);
754 686
755 error = xfs_qm_dqread(mp, id, type, flags, &dqp); 687 error = xfs_qm_dqread(mp, id, type, flags, &dqp);
756 688
@@ -760,97 +692,53 @@ restart:
760 if (error) 692 if (error)
761 return error; 693 return error;
762 694
763 /*
764 * Dquot lock comes after hashlock in the lock ordering
765 */
766 if (ip) { 695 if (ip) {
767 /* 696 /*
768 * A dquot could be attached to this inode by now, since 697 * A dquot could be attached to this inode by now, since
769 * we had dropped the ilock. 698 * we had dropped the ilock.
770 */ 699 */
771 if (type == XFS_DQ_USER) { 700 if (xfs_this_quota_on(mp, type)) {
772 if (!XFS_IS_UQUOTA_ON(mp)) { 701 struct xfs_dquot *dqp1;
773 /* inode stays locked on return */ 702
774 xfs_qm_dqdestroy(dqp); 703 dqp1 = xfs_inode_dquot(ip, type);
775 return XFS_ERROR(ESRCH); 704 if (dqp1) {
776 }
777 if (ip->i_udquot) {
778 xfs_qm_dqdestroy(dqp); 705 xfs_qm_dqdestroy(dqp);
779 dqp = ip->i_udquot; 706 dqp = dqp1;
780 xfs_dqlock(dqp); 707 xfs_dqlock(dqp);
781 goto dqret; 708 goto dqret;
782 } 709 }
783 } else { 710 } else {
784 if (!XFS_IS_OQUOTA_ON(mp)) { 711 /* inode stays locked on return */
785 /* inode stays locked on return */ 712 xfs_qm_dqdestroy(dqp);
786 xfs_qm_dqdestroy(dqp); 713 return XFS_ERROR(ESRCH);
787 return XFS_ERROR(ESRCH);
788 }
789 if (ip->i_gdquot) {
790 xfs_qm_dqdestroy(dqp);
791 dqp = ip->i_gdquot;
792 xfs_dqlock(dqp);
793 goto dqret;
794 }
795 } 714 }
796 } 715 }
797 716
798 /* 717 mutex_lock(&qi->qi_tree_lock);
799 * Hashlock comes after ilock in lock order 718 error = -radix_tree_insert(tree, id, dqp);
800 */ 719 if (unlikely(error)) {
801 mutex_lock(&h->qh_lock); 720 WARN_ON(error != EEXIST);
802 if (version != h->qh_version) { 721
803 xfs_dquot_t *tmpdqp;
804 /* 722 /*
805 * Now, see if somebody else put the dquot in the 723 * Duplicate found. Just throw away the new dquot and start
806 * hashtable before us. This can happen because we didn't 724 * over.
807 * keep the hashchain lock. We don't have to worry about
808 * lock order between the two dquots here since dqp isn't
809 * on any findable lists yet.
810 */ 725 */
811 switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) { 726 mutex_unlock(&qi->qi_tree_lock);
812 case 0: 727 trace_xfs_dqget_dup(dqp);
813 case -1: 728 xfs_qm_dqdestroy(dqp);
814 /* 729 XFS_STATS_INC(xs_qm_dquot_dups);
815 * Duplicate found, either in cache or on its way out. 730 goto restart;
816 * Just throw away the new dquot and start over.
817 */
818 if (tmpdqp)
819 xfs_qm_dqput(tmpdqp);
820 mutex_unlock(&h->qh_lock);
821 xfs_qm_dqdestroy(dqp);
822 XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
823 goto restart;
824 default:
825 break;
826 }
827 } 731 }
828 732
829 /* 733 /*
830 * Put the dquot at the beginning of the hash-chain and mp's list
831 * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
832 */
833 ASSERT(mutex_is_locked(&h->qh_lock));
834 dqp->q_hash = h;
835 list_add(&dqp->q_hashlist, &h->qh_list);
836 h->qh_version++;
837
838 /*
839 * Attach this dquot to this filesystem's list of all dquots,
840 * kept inside the mount structure in m_quotainfo field
841 */
842 mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
843
844 /*
845 * We return a locked dquot to the caller, with a reference taken 734 * We return a locked dquot to the caller, with a reference taken
846 */ 735 */
847 xfs_dqlock(dqp); 736 xfs_dqlock(dqp);
848 dqp->q_nrefs = 1; 737 dqp->q_nrefs = 1;
849 738
850 list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist); 739 qi->qi_dquots++;
851 mp->m_quotainfo->qi_dquots++; 740 mutex_unlock(&qi->qi_tree_lock);
852 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); 741
853 mutex_unlock(&h->qh_lock);
854 dqret: 742 dqret:
855 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); 743 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
856 trace_xfs_dqget_miss(dqp); 744 trace_xfs_dqget_miss(dqp);
@@ -859,37 +747,22 @@ restart:
859} 747}
860 748
861 749
862/* 750STATIC void
863 * Release a reference to the dquot (decrement ref-count) 751xfs_qm_dqput_final(
864 * and unlock it. If there is a group quota attached to this
865 * dquot, carefully release that too without tripping over
866 * deadlocks'n'stuff.
867 */
868void
869xfs_qm_dqput(
870 struct xfs_dquot *dqp) 752 struct xfs_dquot *dqp)
871{ 753{
754 struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo;
872 struct xfs_dquot *gdqp; 755 struct xfs_dquot *gdqp;
873 756
874 ASSERT(dqp->q_nrefs > 0);
875 ASSERT(XFS_DQ_IS_LOCKED(dqp));
876
877 trace_xfs_dqput(dqp);
878
879recurse:
880 if (--dqp->q_nrefs > 0) {
881 xfs_dqunlock(dqp);
882 return;
883 }
884
885 trace_xfs_dqput_free(dqp); 757 trace_xfs_dqput_free(dqp);
886 758
887 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); 759 mutex_lock(&qi->qi_lru_lock);
888 if (list_empty(&dqp->q_freelist)) { 760 if (list_empty(&dqp->q_lru)) {
889 list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); 761 list_add_tail(&dqp->q_lru, &qi->qi_lru_list);
890 xfs_Gqm->qm_dqfrlist_cnt++; 762 qi->qi_lru_count++;
763 XFS_STATS_INC(xs_qm_dquot_unused);
891 } 764 }
892 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 765 mutex_unlock(&qi->qi_lru_lock);
893 766
894 /* 767 /*
895 * If we just added a udquot to the freelist, then we want to release 768 * If we just added a udquot to the freelist, then we want to release
@@ -906,10 +779,29 @@ recurse:
906 /* 779 /*
907 * If we had a group quota hint, release it now. 780 * If we had a group quota hint, release it now.
908 */ 781 */
909 if (gdqp) { 782 if (gdqp)
910 dqp = gdqp; 783 xfs_qm_dqput(gdqp);
911 goto recurse; 784}
912 } 785
786/*
787 * Release a reference to the dquot (decrement ref-count) and unlock it.
788 *
789 * If there is a group quota attached to this dquot, carefully release that
790 * too without tripping over deadlocks'n'stuff.
791 */
792void
793xfs_qm_dqput(
794 struct xfs_dquot *dqp)
795{
796 ASSERT(dqp->q_nrefs > 0);
797 ASSERT(XFS_DQ_IS_LOCKED(dqp));
798
799 trace_xfs_dqput(dqp);
800
801 if (--dqp->q_nrefs > 0)
802 xfs_dqunlock(dqp);
803 else
804 xfs_qm_dqput_final(dqp);
913} 805}
914 806
915/* 807/*
@@ -1091,17 +983,6 @@ xfs_qm_dqflush(
1091 983
1092} 984}
1093 985
1094void
1095xfs_dqunlock(
1096 xfs_dquot_t *dqp)
1097{
1098 xfs_dqunlock_nonotify(dqp);
1099 if (dqp->q_logitem.qli_dquot == dqp) {
1100 xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
1101 &dqp->q_logitem.qli_item);
1102 }
1103}
1104
1105/* 986/*
1106 * Lock two xfs_dquot structures. 987 * Lock two xfs_dquot structures.
1107 * 988 *
@@ -1131,85 +1012,6 @@ xfs_dqlock2(
1131} 1012}
1132 1013
1133/* 1014/*
1134 * Take a dquot out of the mount's dqlist as well as the hashlist. This is
1135 * called via unmount as well as quotaoff, and the purge will always succeed.
1136 */
1137void
1138xfs_qm_dqpurge(
1139 struct xfs_dquot *dqp)
1140{
1141 struct xfs_mount *mp = dqp->q_mount;
1142 struct xfs_dqhash *qh = dqp->q_hash;
1143
1144 xfs_dqlock(dqp);
1145
1146 /*
1147 * If we're turning off quotas, we have to make sure that, for
1148 * example, we don't delete quota disk blocks while dquots are
1149 * in the process of getting written to those disk blocks.
1150 * This dquot might well be on AIL, and we can't leave it there
1151 * if we're turning off quotas. Basically, we need this flush
1152 * lock, and are willing to block on it.
1153 */
1154 if (!xfs_dqflock_nowait(dqp)) {
1155 /*
1156 * Block on the flush lock after nudging dquot buffer,
1157 * if it is incore.
1158 */
1159 xfs_dqflock_pushbuf_wait(dqp);
1160 }
1161
1162 /*
1163 * If we are turning this type of quotas off, we don't care
1164 * about the dirty metadata sitting in this dquot. OTOH, if
1165 * we're unmounting, we do care, so we flush it and wait.
1166 */
1167 if (XFS_DQ_IS_DIRTY(dqp)) {
1168 int error;
1169
1170 /*
1171 * We don't care about getting disk errors here. We need
1172 * to purge this dquot anyway, so we go ahead regardless.
1173 */
1174 error = xfs_qm_dqflush(dqp, SYNC_WAIT);
1175 if (error)
1176 xfs_warn(mp, "%s: dquot %p flush failed",
1177 __func__, dqp);
1178 xfs_dqflock(dqp);
1179 }
1180
1181 ASSERT(atomic_read(&dqp->q_pincount) == 0);
1182 ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
1183 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
1184
1185 xfs_dqfunlock(dqp);
1186 xfs_dqunlock(dqp);
1187
1188 mutex_lock(&qh->qh_lock);
1189 list_del_init(&dqp->q_hashlist);
1190 qh->qh_version++;
1191 mutex_unlock(&qh->qh_lock);
1192
1193 mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
1194 list_del_init(&dqp->q_mplist);
1195 mp->m_quotainfo->qi_dqreclaims++;
1196 mp->m_quotainfo->qi_dquots--;
1197 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
1198
1199 /*
1200 * We move dquots to the freelist as soon as their reference count
1201 * hits zero, so it really should be on the freelist here.
1202 */
1203 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
1204 ASSERT(!list_empty(&dqp->q_freelist));
1205 list_del_init(&dqp->q_freelist);
1206 xfs_Gqm->qm_dqfrlist_cnt--;
1207 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1208
1209 xfs_qm_dqdestroy(dqp);
1210}
1211
1212/*
1213 * Give the buffer a little push if it is incore and 1015 * Give the buffer a little push if it is incore and
1214 * wait on the flush lock. 1016 * wait on the flush lock.
1215 */ 1017 */
@@ -1241,3 +1043,31 @@ xfs_dqflock_pushbuf_wait(
1241out_lock: 1043out_lock:
1242 xfs_dqflock(dqp); 1044 xfs_dqflock(dqp);
1243} 1045}
1046
1047int __init
1048xfs_qm_init(void)
1049{
1050 xfs_qm_dqzone =
1051 kmem_zone_init(sizeof(struct xfs_dquot), "xfs_dquot");
1052 if (!xfs_qm_dqzone)
1053 goto out;
1054
1055 xfs_qm_dqtrxzone =
1056 kmem_zone_init(sizeof(struct xfs_dquot_acct), "xfs_dqtrx");
1057 if (!xfs_qm_dqtrxzone)
1058 goto out_free_dqzone;
1059
1060 return 0;
1061
1062out_free_dqzone:
1063 kmem_zone_destroy(xfs_qm_dqzone);
1064out:
1065 return -ENOMEM;
1066}
1067
1068void
1069xfs_qm_exit(void)
1070{
1071 kmem_zone_destroy(xfs_qm_dqtrxzone);
1072 kmem_zone_destroy(xfs_qm_dqzone);
1073}
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index a1d91d8f180..ef9190bd8b3 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -29,16 +29,6 @@
29 * when quotas are off. 29 * when quotas are off.
30 */ 30 */
31 31
32/*
33 * The hash chain headers (hash buckets)
34 */
35typedef struct xfs_dqhash {
36 struct list_head qh_list;
37 struct mutex qh_lock;
38 uint qh_version; /* ever increasing version */
39 uint qh_nelems; /* number of dquots on the list */
40} xfs_dqhash_t;
41
42struct xfs_mount; 32struct xfs_mount;
43struct xfs_trans; 33struct xfs_trans;
44 34
@@ -47,10 +37,7 @@ struct xfs_trans;
47 */ 37 */
48typedef struct xfs_dquot { 38typedef struct xfs_dquot {
49 uint dq_flags; /* various flags (XFS_DQ_*) */ 39 uint dq_flags; /* various flags (XFS_DQ_*) */
50 struct list_head q_freelist; /* global free list of dquots */ 40 struct list_head q_lru; /* global free list of dquots */
51 struct list_head q_mplist; /* mount's list of dquots */
52 struct list_head q_hashlist; /* gloabl hash list of dquots */
53 xfs_dqhash_t *q_hash; /* the hashchain header */
54 struct xfs_mount*q_mount; /* filesystem this relates to */ 41 struct xfs_mount*q_mount; /* filesystem this relates to */
55 struct xfs_trans*q_transp; /* trans this belongs to currently */ 42 struct xfs_trans*q_transp; /* trans this belongs to currently */
56 uint q_nrefs; /* # active refs from inodes */ 43 uint q_nrefs; /* # active refs from inodes */
@@ -110,11 +97,37 @@ static inline void xfs_dqlock(struct xfs_dquot *dqp)
110 mutex_lock(&dqp->q_qlock); 97 mutex_lock(&dqp->q_qlock);
111} 98}
112 99
113static inline void xfs_dqunlock_nonotify(struct xfs_dquot *dqp) 100static inline void xfs_dqunlock(struct xfs_dquot *dqp)
114{ 101{
115 mutex_unlock(&dqp->q_qlock); 102 mutex_unlock(&dqp->q_qlock);
116} 103}
117 104
105static inline int xfs_this_quota_on(struct xfs_mount *mp, int type)
106{
107 switch (type & XFS_DQ_ALLTYPES) {
108 case XFS_DQ_USER:
109 return XFS_IS_UQUOTA_ON(mp);
110 case XFS_DQ_GROUP:
111 case XFS_DQ_PROJ:
112 return XFS_IS_OQUOTA_ON(mp);
113 default:
114 return 0;
115 }
116}
117
118static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type)
119{
120 switch (type & XFS_DQ_ALLTYPES) {
121 case XFS_DQ_USER:
122 return ip->i_udquot;
123 case XFS_DQ_GROUP:
124 case XFS_DQ_PROJ:
125 return ip->i_gdquot;
126 default:
127 return NULL;
128 }
129}
130
118#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) 131#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock)))
119#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) 132#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
120#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) 133#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
@@ -125,15 +138,10 @@ static inline void xfs_dqunlock_nonotify(struct xfs_dquot *dqp)
125 XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \ 138 XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \
126 XFS_DQ_TO_QINF(dqp)->qi_gquotaip) 139 XFS_DQ_TO_QINF(dqp)->qi_gquotaip)
127 140
128#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \
129 (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
130 (XFS_IS_OQUOTA_ON((d)->q_mount))))
131
132extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint, 141extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint,
133 uint, struct xfs_dquot **); 142 uint, struct xfs_dquot **);
134extern void xfs_qm_dqdestroy(xfs_dquot_t *); 143extern void xfs_qm_dqdestroy(xfs_dquot_t *);
135extern int xfs_qm_dqflush(xfs_dquot_t *, uint); 144extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
136extern void xfs_qm_dqpurge(xfs_dquot_t *);
137extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); 145extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
138extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, 146extern void xfs_qm_adjust_dqtimers(xfs_mount_t *,
139 xfs_disk_dquot_t *); 147 xfs_disk_dquot_t *);
@@ -144,7 +152,6 @@ extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
144extern void xfs_qm_dqput(xfs_dquot_t *); 152extern void xfs_qm_dqput(xfs_dquot_t *);
145 153
146extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); 154extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
147extern void xfs_dqunlock(struct xfs_dquot *);
148extern void xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp); 155extern void xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp);
149 156
150static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) 157static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 7e5bc872f2b..54a67dd9ac0 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -163,7 +163,6 @@ xfs_file_fsync(
163 struct inode *inode = file->f_mapping->host; 163 struct inode *inode = file->f_mapping->host;
164 struct xfs_inode *ip = XFS_I(inode); 164 struct xfs_inode *ip = XFS_I(inode);
165 struct xfs_mount *mp = ip->i_mount; 165 struct xfs_mount *mp = ip->i_mount;
166 struct xfs_trans *tp;
167 int error = 0; 166 int error = 0;
168 int log_flushed = 0; 167 int log_flushed = 0;
169 xfs_lsn_t lsn = 0; 168 xfs_lsn_t lsn = 0;
@@ -194,75 +193,18 @@ xfs_file_fsync(
194 } 193 }
195 194
196 /* 195 /*
197 * We always need to make sure that the required inode state is safe on 196 * All metadata updates are logged, which means that we just have
198 * disk. The inode might be clean but we still might need to force the 197 * to flush the log up to the latest LSN that touched the inode.
199 * log because of committed transactions that haven't hit the disk yet.
200 * Likewise, there could be unflushed non-transactional changes to the
201 * inode core that have to go to disk and this requires us to issue
202 * a synchronous transaction to capture these changes correctly.
203 *
204 * This code relies on the assumption that if the i_update_core field
205 * of the inode is clear and the inode is unpinned then it is clean
206 * and no action is required.
207 */ 198 */
208 xfs_ilock(ip, XFS_ILOCK_SHARED); 199 xfs_ilock(ip, XFS_ILOCK_SHARED);
209 200 if (xfs_ipincount(ip)) {
210 /* 201 if (!datasync ||
211 * First check if the VFS inode is marked dirty. All the dirtying 202 (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP))
212 * of non-transactional updates do not go through mark_inode_dirty*,
213 * which allows us to distinguish between pure timestamp updates
214 * and i_size updates which need to be caught for fdatasync.
215 * After that also check for the dirty state in the XFS inode, which
216 * might gets cleared when the inode gets written out via the AIL
217 * or xfs_iflush_cluster.
218 */
219 if (((inode->i_state & I_DIRTY_DATASYNC) ||
220 ((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
221 ip->i_update_core) {
222 /*
223 * Kick off a transaction to log the inode core to get the
224 * updates. The sync transaction will also force the log.
225 */
226 xfs_iunlock(ip, XFS_ILOCK_SHARED);
227 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
228 error = xfs_trans_reserve(tp, 0,
229 XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
230 if (error) {
231 xfs_trans_cancel(tp, 0);
232 return -error;
233 }
234 xfs_ilock(ip, XFS_ILOCK_EXCL);
235
236 /*
237 * Note - it's possible that we might have pushed ourselves out
238 * of the way during trans_reserve which would flush the inode.
239 * But there's no guarantee that the inode buffer has actually
240 * gone out yet (it's delwri). Plus the buffer could be pinned
241 * anyway if it's part of an inode in another recent
242 * transaction. So we play it safe and fire off the
243 * transaction anyway.
244 */
245 xfs_trans_ijoin(tp, ip, 0);
246 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
247 error = xfs_trans_commit(tp, 0);
248
249 lsn = ip->i_itemp->ili_last_lsn;
250 xfs_iunlock(ip, XFS_ILOCK_EXCL);
251 } else {
252 /*
253 * Timestamps/size haven't changed since last inode flush or
254 * inode transaction commit. That means either nothing got
255 * written or a transaction committed which caught the updates.
256 * If the latter happened and the transaction hasn't hit the
257 * disk yet, the inode will be still be pinned. If it is,
258 * force the log.
259 */
260 if (xfs_ipincount(ip))
261 lsn = ip->i_itemp->ili_last_lsn; 203 lsn = ip->i_itemp->ili_last_lsn;
262 xfs_iunlock(ip, XFS_ILOCK_SHARED);
263 } 204 }
205 xfs_iunlock(ip, XFS_ILOCK_SHARED);
264 206
265 if (!error && lsn) 207 if (lsn)
266 error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); 208 error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
267 209
268 /* 210 /*
@@ -659,9 +601,6 @@ restart:
659 return error; 601 return error;
660 } 602 }
661 603
662 if (likely(!(file->f_mode & FMODE_NOCMTIME)))
663 file_update_time(file);
664
665 /* 604 /*
666 * If the offset is beyond the size of the file, we need to zero any 605 * If the offset is beyond the size of the file, we need to zero any
667 * blocks that fall between the existing EOF and the start of this 606 * blocks that fall between the existing EOF and the start of this
@@ -685,6 +624,15 @@ restart:
685 return error; 624 return error;
686 625
687 /* 626 /*
627 * Updating the timestamps will grab the ilock again from
628 * xfs_fs_dirty_inode, so we have to call it after dropping the
629 * lock above. Eventually we should look into a way to avoid
630 * the pointless lock roundtrip.
631 */
632 if (likely(!(file->f_mode & FMODE_NOCMTIME)))
633 file_update_time(file);
634
635 /*
688 * If we're writing the file then make sure to clear the setuid and 636 * If we're writing the file then make sure to clear the setuid and
689 * setgid bits if the process is not being run by root. This keeps 637 * setgid bits if the process is not being run by root. This keeps
690 * people from modifying setuid and setgid binaries. 638 * people from modifying setuid and setgid binaries.
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 8c3e46394d4..bcc6c249b2c 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -91,7 +91,6 @@ xfs_inode_alloc(
91 ip->i_afp = NULL; 91 ip->i_afp = NULL;
92 memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); 92 memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
93 ip->i_flags = 0; 93 ip->i_flags = 0;
94 ip->i_update_core = 0;
95 ip->i_delayed_blks = 0; 94 ip->i_delayed_blks = 0;
96 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); 95 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
97 96
@@ -290,7 +289,7 @@ xfs_iget_cache_hit(
290 if (lock_flags != 0) 289 if (lock_flags != 0)
291 xfs_ilock(ip, lock_flags); 290 xfs_ilock(ip, lock_flags);
292 291
293 xfs_iflags_clear(ip, XFS_ISTALE); 292 xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE);
294 XFS_STATS_INC(xs_ig_found); 293 XFS_STATS_INC(xs_ig_found);
295 294
296 return 0; 295 return 0;
@@ -315,6 +314,7 @@ xfs_iget_cache_miss(
315 struct xfs_inode *ip; 314 struct xfs_inode *ip;
316 int error; 315 int error;
317 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); 316 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino);
317 int iflags;
318 318
319 ip = xfs_inode_alloc(mp, ino); 319 ip = xfs_inode_alloc(mp, ino);
320 if (!ip) 320 if (!ip)
@@ -350,9 +350,23 @@ xfs_iget_cache_miss(
350 BUG(); 350 BUG();
351 } 351 }
352 352
353 spin_lock(&pag->pag_ici_lock); 353 /*
354 * These values must be set before inserting the inode into the radix
355 * tree as the moment it is inserted a concurrent lookup (allowed by the
356 * RCU locking mechanism) can find it and that lookup must see that this
357 * is an inode currently under construction (i.e. that XFS_INEW is set).
358 * The ip->i_flags_lock that protects the XFS_INEW flag forms the
359 * memory barrier that ensures this detection works correctly at lookup
360 * time.
361 */
362 iflags = XFS_INEW;
363 if (flags & XFS_IGET_DONTCACHE)
364 iflags |= XFS_IDONTCACHE;
365 ip->i_udquot = ip->i_gdquot = NULL;
366 xfs_iflags_set(ip, iflags);
354 367
355 /* insert the new inode */ 368 /* insert the new inode */
369 spin_lock(&pag->pag_ici_lock);
356 error = radix_tree_insert(&pag->pag_ici_root, agino, ip); 370 error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
357 if (unlikely(error)) { 371 if (unlikely(error)) {
358 WARN_ON(error != -EEXIST); 372 WARN_ON(error != -EEXIST);
@@ -360,11 +374,6 @@ xfs_iget_cache_miss(
360 error = EAGAIN; 374 error = EAGAIN;
361 goto out_preload_end; 375 goto out_preload_end;
362 } 376 }
363
364 /* These values _must_ be set before releasing the radix tree lock! */
365 ip->i_udquot = ip->i_gdquot = NULL;
366 xfs_iflags_set(ip, XFS_INEW);
367
368 spin_unlock(&pag->pag_ici_lock); 377 spin_unlock(&pag->pag_ici_lock);
369 radix_tree_preload_end(); 378 radix_tree_preload_end();
370 379
@@ -418,6 +427,15 @@ xfs_iget(
418 xfs_perag_t *pag; 427 xfs_perag_t *pag;
419 xfs_agino_t agino; 428 xfs_agino_t agino;
420 429
430 /*
431 * xfs_reclaim_inode() uses the ILOCK to ensure an inode
432 * doesn't get freed while it's being referenced during a
433 * radix tree traversal here. It assumes this function
434 * aqcuires only the ILOCK (and therefore it has no need to
435 * involve the IOLOCK in this synchronization).
436 */
437 ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0);
438
421 /* reject inode numbers outside existing AGs */ 439 /* reject inode numbers outside existing AGs */
422 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) 440 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
423 return EINVAL; 441 return EINVAL;
@@ -642,8 +660,7 @@ xfs_iunlock(
642 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 660 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
643 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 661 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
644 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 662 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
645 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_IUNLOCK_NONOTIFY | 663 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
646 XFS_LOCK_DEP_MASK)) == 0);
647 ASSERT(lock_flags != 0); 664 ASSERT(lock_flags != 0);
648 665
649 if (lock_flags & XFS_IOLOCK_EXCL) 666 if (lock_flags & XFS_IOLOCK_EXCL)
@@ -656,16 +673,6 @@ xfs_iunlock(
656 else if (lock_flags & XFS_ILOCK_SHARED) 673 else if (lock_flags & XFS_ILOCK_SHARED)
657 mrunlock_shared(&ip->i_lock); 674 mrunlock_shared(&ip->i_lock);
658 675
659 if ((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) &&
660 !(lock_flags & XFS_IUNLOCK_NONOTIFY) && ip->i_itemp) {
661 /*
662 * Let the AIL know that this item has been unlocked in case
663 * it is in the AIL and anyone is waiting on it. Don't do
664 * this if the caller has asked us not to.
665 */
666 xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp,
667 (xfs_log_item_t*)(ip->i_itemp));
668 }
669 trace_xfs_iunlock(ip, lock_flags, _RET_IP_); 676 trace_xfs_iunlock(ip, lock_flags, _RET_IP_);
670} 677}
671 678
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b21022499c2..bc46c0a133d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1656,14 +1656,13 @@ retry:
1656 iip = ip->i_itemp; 1656 iip = ip->i_itemp;
1657 if (!iip || xfs_inode_clean(ip)) { 1657 if (!iip || xfs_inode_clean(ip)) {
1658 ASSERT(ip != free_ip); 1658 ASSERT(ip != free_ip);
1659 ip->i_update_core = 0;
1660 xfs_ifunlock(ip); 1659 xfs_ifunlock(ip);
1661 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1660 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1662 continue; 1661 continue;
1663 } 1662 }
1664 1663
1665 iip->ili_last_fields = iip->ili_format.ilf_fields; 1664 iip->ili_last_fields = iip->ili_fields;
1666 iip->ili_format.ilf_fields = 0; 1665 iip->ili_fields = 0;
1667 iip->ili_logged = 1; 1666 iip->ili_logged = 1;
1668 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 1667 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
1669 &iip->ili_item.li_lsn); 1668 &iip->ili_item.li_lsn);
@@ -2177,7 +2176,7 @@ xfs_iflush_fork(
2177 mp = ip->i_mount; 2176 mp = ip->i_mount;
2178 switch (XFS_IFORK_FORMAT(ip, whichfork)) { 2177 switch (XFS_IFORK_FORMAT(ip, whichfork)) {
2179 case XFS_DINODE_FMT_LOCAL: 2178 case XFS_DINODE_FMT_LOCAL:
2180 if ((iip->ili_format.ilf_fields & dataflag[whichfork]) && 2179 if ((iip->ili_fields & dataflag[whichfork]) &&
2181 (ifp->if_bytes > 0)) { 2180 (ifp->if_bytes > 0)) {
2182 ASSERT(ifp->if_u1.if_data != NULL); 2181 ASSERT(ifp->if_u1.if_data != NULL);
2183 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 2182 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
@@ -2187,8 +2186,8 @@ xfs_iflush_fork(
2187 2186
2188 case XFS_DINODE_FMT_EXTENTS: 2187 case XFS_DINODE_FMT_EXTENTS:
2189 ASSERT((ifp->if_flags & XFS_IFEXTENTS) || 2188 ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
2190 !(iip->ili_format.ilf_fields & extflag[whichfork])); 2189 !(iip->ili_fields & extflag[whichfork]));
2191 if ((iip->ili_format.ilf_fields & extflag[whichfork]) && 2190 if ((iip->ili_fields & extflag[whichfork]) &&
2192 (ifp->if_bytes > 0)) { 2191 (ifp->if_bytes > 0)) {
2193 ASSERT(xfs_iext_get_ext(ifp, 0)); 2192 ASSERT(xfs_iext_get_ext(ifp, 0));
2194 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); 2193 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
@@ -2198,7 +2197,7 @@ xfs_iflush_fork(
2198 break; 2197 break;
2199 2198
2200 case XFS_DINODE_FMT_BTREE: 2199 case XFS_DINODE_FMT_BTREE:
2201 if ((iip->ili_format.ilf_fields & brootflag[whichfork]) && 2200 if ((iip->ili_fields & brootflag[whichfork]) &&
2202 (ifp->if_broot_bytes > 0)) { 2201 (ifp->if_broot_bytes > 0)) {
2203 ASSERT(ifp->if_broot != NULL); 2202 ASSERT(ifp->if_broot != NULL);
2204 ASSERT(ifp->if_broot_bytes <= 2203 ASSERT(ifp->if_broot_bytes <=
@@ -2211,14 +2210,14 @@ xfs_iflush_fork(
2211 break; 2210 break;
2212 2211
2213 case XFS_DINODE_FMT_DEV: 2212 case XFS_DINODE_FMT_DEV:
2214 if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { 2213 if (iip->ili_fields & XFS_ILOG_DEV) {
2215 ASSERT(whichfork == XFS_DATA_FORK); 2214 ASSERT(whichfork == XFS_DATA_FORK);
2216 xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); 2215 xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
2217 } 2216 }
2218 break; 2217 break;
2219 2218
2220 case XFS_DINODE_FMT_UUID: 2219 case XFS_DINODE_FMT_UUID:
2221 if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { 2220 if (iip->ili_fields & XFS_ILOG_UUID) {
2222 ASSERT(whichfork == XFS_DATA_FORK); 2221 ASSERT(whichfork == XFS_DATA_FORK);
2223 memcpy(XFS_DFORK_DPTR(dip), 2222 memcpy(XFS_DFORK_DPTR(dip),
2224 &ip->i_df.if_u2.if_uuid, 2223 &ip->i_df.if_u2.if_uuid,
@@ -2451,9 +2450,8 @@ xfs_iflush(
2451 * to disk, because the log record didn't make it to disk! 2450 * to disk, because the log record didn't make it to disk!
2452 */ 2451 */
2453 if (XFS_FORCED_SHUTDOWN(mp)) { 2452 if (XFS_FORCED_SHUTDOWN(mp)) {
2454 ip->i_update_core = 0;
2455 if (iip) 2453 if (iip)
2456 iip->ili_format.ilf_fields = 0; 2454 iip->ili_fields = 0;
2457 xfs_ifunlock(ip); 2455 xfs_ifunlock(ip);
2458 return XFS_ERROR(EIO); 2456 return XFS_ERROR(EIO);
2459 } 2457 }
@@ -2533,26 +2531,6 @@ xfs_iflush_int(
2533 /* set *dip = inode's place in the buffer */ 2531 /* set *dip = inode's place in the buffer */
2534 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 2532 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
2535 2533
2536 /*
2537 * Clear i_update_core before copying out the data.
2538 * This is for coordination with our timestamp updates
2539 * that don't hold the inode lock. They will always
2540 * update the timestamps BEFORE setting i_update_core,
2541 * so if we clear i_update_core after they set it we
2542 * are guaranteed to see their updates to the timestamps.
2543 * I believe that this depends on strongly ordered memory
2544 * semantics, but we have that. We use the SYNCHRONIZE
2545 * macro to make sure that the compiler does not reorder
2546 * the i_update_core access below the data copy below.
2547 */
2548 ip->i_update_core = 0;
2549 SYNCHRONIZE();
2550
2551 /*
2552 * Make sure to get the latest timestamps from the Linux inode.
2553 */
2554 xfs_synchronize_times(ip);
2555
2556 if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), 2534 if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
2557 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 2535 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
2558 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 2536 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
@@ -2663,36 +2641,33 @@ xfs_iflush_int(
2663 xfs_inobp_check(mp, bp); 2641 xfs_inobp_check(mp, bp);
2664 2642
2665 /* 2643 /*
2666 * We've recorded everything logged in the inode, so we'd 2644 * We've recorded everything logged in the inode, so we'd like to clear
2667 * like to clear the ilf_fields bits so we don't log and 2645 * the ili_fields bits so we don't log and flush things unnecessarily.
2668 * flush things unnecessarily. However, we can't stop 2646 * However, we can't stop logging all this information until the data
2669 * logging all this information until the data we've copied 2647 * we've copied into the disk buffer is written to disk. If we did we
2670 * into the disk buffer is written to disk. If we did we might 2648 * might overwrite the copy of the inode in the log with all the data
2671 * overwrite the copy of the inode in the log with all the 2649 * after re-logging only part of it, and in the face of a crash we
2672 * data after re-logging only part of it, and in the face of 2650 * wouldn't have all the data we need to recover.
2673 * a crash we wouldn't have all the data we need to recover.
2674 * 2651 *
2675 * What we do is move the bits to the ili_last_fields field. 2652 * What we do is move the bits to the ili_last_fields field. When
2676 * When logging the inode, these bits are moved back to the 2653 * logging the inode, these bits are moved back to the ili_fields field.
2677 * ilf_fields field. In the xfs_iflush_done() routine we 2654 * In the xfs_iflush_done() routine we clear ili_last_fields, since we
2678 * clear ili_last_fields, since we know that the information 2655 * know that the information those bits represent is permanently on
2679 * those bits represent is permanently on disk. As long as 2656 * disk. As long as the flush completes before the inode is logged
2680 * the flush completes before the inode is logged again, then 2657 * again, then both ili_fields and ili_last_fields will be cleared.
2681 * both ilf_fields and ili_last_fields will be cleared.
2682 * 2658 *
2683 * We can play with the ilf_fields bits here, because the inode 2659 * We can play with the ili_fields bits here, because the inode lock
2684 * lock must be held exclusively in order to set bits there 2660 * must be held exclusively in order to set bits there and the flush
2685 * and the flush lock protects the ili_last_fields bits. 2661 * lock protects the ili_last_fields bits. Set ili_logged so the flush
2686 * Set ili_logged so the flush done 2662 * done routine can tell whether or not to look in the AIL. Also, store
2687 * routine can tell whether or not to look in the AIL. 2663 * the current LSN of the inode so that we can tell whether the item has
2688 * Also, store the current LSN of the inode so that we can tell 2664 * moved in the AIL from xfs_iflush_done(). In order to read the lsn we
2689 * whether the item has moved in the AIL from xfs_iflush_done(). 2665 * need the AIL lock, because it is a 64 bit value that cannot be read
2690 * In order to read the lsn we need the AIL lock, because 2666 * atomically.
2691 * it is a 64 bit value that cannot be read atomically.
2692 */ 2667 */
2693 if (iip != NULL && iip->ili_format.ilf_fields != 0) { 2668 if (iip != NULL && iip->ili_fields != 0) {
2694 iip->ili_last_fields = iip->ili_format.ilf_fields; 2669 iip->ili_last_fields = iip->ili_fields;
2695 iip->ili_format.ilf_fields = 0; 2670 iip->ili_fields = 0;
2696 iip->ili_logged = 1; 2671 iip->ili_logged = 1;
2697 2672
2698 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 2673 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
@@ -2711,8 +2686,7 @@ xfs_iflush_int(
2711 } else { 2686 } else {
2712 /* 2687 /*
2713 * We're flushing an inode which is not in the AIL and has 2688 * We're flushing an inode which is not in the AIL and has
2714 * not been logged but has i_update_core set. For this 2689 * not been logged. For this case we can immediately drop
2715 * case we can use a B_DELWRI flush and immediately drop
2716 * the inode flush lock because we can avoid the whole 2690 * the inode flush lock because we can avoid the whole
2717 * AIL state thing. It's OK to drop the flush lock now, 2691 * AIL state thing. It's OK to drop the flush lock now,
2718 * because we've already locked the buffer and to do anything 2692 * because we've already locked the buffer and to do anything
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 2f27b745408..7fee3387e1c 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -241,7 +241,6 @@ typedef struct xfs_inode {
241 spinlock_t i_flags_lock; /* inode i_flags lock */ 241 spinlock_t i_flags_lock; /* inode i_flags lock */
242 /* Miscellaneous state. */ 242 /* Miscellaneous state. */
243 unsigned long i_flags; /* see defined flags below */ 243 unsigned long i_flags; /* see defined flags below */
244 unsigned char i_update_core; /* timestamps/size is dirty */
245 unsigned int i_delayed_blks; /* count of delay alloc blks */ 244 unsigned int i_delayed_blks; /* count of delay alloc blks */
246 245
247 xfs_icdinode_t i_d; /* most of ondisk inode */ 246 xfs_icdinode_t i_d; /* most of ondisk inode */
@@ -275,6 +274,20 @@ static inline xfs_fsize_t XFS_ISIZE(struct xfs_inode *ip)
275} 274}
276 275
277/* 276/*
277 * If this I/O goes past the on-disk inode size update it unless it would
278 * be past the current in-core inode size.
279 */
280static inline xfs_fsize_t
281xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size)
282{
283 xfs_fsize_t i_size = i_size_read(VFS_I(ip));
284
285 if (new_size > i_size)
286 new_size = i_size;
287 return new_size > ip->i_d.di_size ? new_size : 0;
288}
289
290/*
278 * i_flags helper functions 291 * i_flags helper functions
279 */ 292 */
280static inline void 293static inline void
@@ -374,10 +387,11 @@ xfs_set_projid(struct xfs_inode *ip,
374#define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT) 387#define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT)
375#define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */ 388#define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */
376#define XFS_IPINNED (1 << __XFS_IPINNED_BIT) 389#define XFS_IPINNED (1 << __XFS_IPINNED_BIT)
390#define XFS_IDONTCACHE (1 << 9) /* don't cache the inode long term */
377 391
378/* 392/*
379 * Per-lifetime flags need to be reset when re-using a reclaimable inode during 393 * Per-lifetime flags need to be reset when re-using a reclaimable inode during
380 * inode lookup. Thi prevents unintended behaviour on the new inode from 394 * inode lookup. This prevents unintended behaviour on the new inode from
381 * ocurring. 395 * ocurring.
382 */ 396 */
383#define XFS_IRECLAIM_RESET_FLAGS \ 397#define XFS_IRECLAIM_RESET_FLAGS \
@@ -422,7 +436,6 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
422#define XFS_IOLOCK_SHARED (1<<1) 436#define XFS_IOLOCK_SHARED (1<<1)
423#define XFS_ILOCK_EXCL (1<<2) 437#define XFS_ILOCK_EXCL (1<<2)
424#define XFS_ILOCK_SHARED (1<<3) 438#define XFS_ILOCK_SHARED (1<<3)
425#define XFS_IUNLOCK_NONOTIFY (1<<4)
426 439
427#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ 440#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
428 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) 441 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
@@ -431,8 +444,7 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
431 { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \ 444 { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \
432 { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \ 445 { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \
433 { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \ 446 { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \
434 { XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \ 447 { XFS_ILOCK_SHARED, "ILOCK_SHARED" }
435 { XFS_IUNLOCK_NONOTIFY, "IUNLOCK_NONOTIFY" }
436 448
437 449
438/* 450/*
@@ -522,10 +534,6 @@ void xfs_promote_inode(struct xfs_inode *);
522void xfs_lock_inodes(xfs_inode_t **, int, uint); 534void xfs_lock_inodes(xfs_inode_t **, int, uint);
523void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); 535void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
524 536
525void xfs_synchronize_times(xfs_inode_t *);
526void xfs_mark_inode_dirty(xfs_inode_t *);
527void xfs_mark_inode_dirty_sync(xfs_inode_t *);
528
529#define IHOLD(ip) \ 537#define IHOLD(ip) \
530do { \ 538do { \
531 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ 539 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
@@ -546,6 +554,7 @@ do { \
546 */ 554 */
547#define XFS_IGET_CREATE 0x1 555#define XFS_IGET_CREATE 0x1
548#define XFS_IGET_UNTRUSTED 0x2 556#define XFS_IGET_UNTRUSTED 0x2
557#define XFS_IGET_DONTCACHE 0x4
549 558
550int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, 559int xfs_inotobp(struct xfs_mount *, struct xfs_trans *,
551 xfs_ino_t, struct xfs_dinode **, 560 xfs_ino_t, struct xfs_dinode **,
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 91d71dcd485..05d924efcea 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -57,77 +57,28 @@ xfs_inode_item_size(
57 struct xfs_inode *ip = iip->ili_inode; 57 struct xfs_inode *ip = iip->ili_inode;
58 uint nvecs = 2; 58 uint nvecs = 2;
59 59
60 /*
61 * Only log the data/extents/b-tree root if there is something
62 * left to log.
63 */
64 iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
65
66 switch (ip->i_d.di_format) { 60 switch (ip->i_d.di_format) {
67 case XFS_DINODE_FMT_EXTENTS: 61 case XFS_DINODE_FMT_EXTENTS:
68 iip->ili_format.ilf_fields &= 62 if ((iip->ili_fields & XFS_ILOG_DEXT) &&
69 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | 63 ip->i_d.di_nextents > 0 &&
70 XFS_ILOG_DEV | XFS_ILOG_UUID); 64 ip->i_df.if_bytes > 0)
71 if ((iip->ili_format.ilf_fields & XFS_ILOG_DEXT) &&
72 (ip->i_d.di_nextents > 0) &&
73 (ip->i_df.if_bytes > 0)) {
74 ASSERT(ip->i_df.if_u1.if_extents != NULL);
75 nvecs++; 65 nvecs++;
76 } else {
77 iip->ili_format.ilf_fields &= ~XFS_ILOG_DEXT;
78 }
79 break; 66 break;
80 67
81 case XFS_DINODE_FMT_BTREE: 68 case XFS_DINODE_FMT_BTREE:
82 iip->ili_format.ilf_fields &= 69 if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
83 ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | 70 ip->i_df.if_broot_bytes > 0)
84 XFS_ILOG_DEV | XFS_ILOG_UUID);
85 if ((iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) &&
86 (ip->i_df.if_broot_bytes > 0)) {
87 ASSERT(ip->i_df.if_broot != NULL);
88 nvecs++; 71 nvecs++;
89 } else {
90 ASSERT(!(iip->ili_format.ilf_fields &
91 XFS_ILOG_DBROOT));
92#ifdef XFS_TRANS_DEBUG
93 if (iip->ili_root_size > 0) {
94 ASSERT(iip->ili_root_size ==
95 ip->i_df.if_broot_bytes);
96 ASSERT(memcmp(iip->ili_orig_root,
97 ip->i_df.if_broot,
98 iip->ili_root_size) == 0);
99 } else {
100 ASSERT(ip->i_df.if_broot_bytes == 0);
101 }
102#endif
103 iip->ili_format.ilf_fields &= ~XFS_ILOG_DBROOT;
104 }
105 break; 72 break;
106 73
107 case XFS_DINODE_FMT_LOCAL: 74 case XFS_DINODE_FMT_LOCAL:
108 iip->ili_format.ilf_fields &= 75 if ((iip->ili_fields & XFS_ILOG_DDATA) &&
109 ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | 76 ip->i_df.if_bytes > 0)
110 XFS_ILOG_DEV | XFS_ILOG_UUID);
111 if ((iip->ili_format.ilf_fields & XFS_ILOG_DDATA) &&
112 (ip->i_df.if_bytes > 0)) {
113 ASSERT(ip->i_df.if_u1.if_data != NULL);
114 ASSERT(ip->i_d.di_size > 0);
115 nvecs++; 77 nvecs++;
116 } else {
117 iip->ili_format.ilf_fields &= ~XFS_ILOG_DDATA;
118 }
119 break; 78 break;
120 79
121 case XFS_DINODE_FMT_DEV: 80 case XFS_DINODE_FMT_DEV:
122 iip->ili_format.ilf_fields &=
123 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
124 XFS_ILOG_DEXT | XFS_ILOG_UUID);
125 break;
126
127 case XFS_DINODE_FMT_UUID: 81 case XFS_DINODE_FMT_UUID:
128 iip->ili_format.ilf_fields &=
129 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
130 XFS_ILOG_DEXT | XFS_ILOG_DEV);
131 break; 82 break;
132 83
133 default: 84 default:
@@ -135,56 +86,31 @@ xfs_inode_item_size(
135 break; 86 break;
136 } 87 }
137 88
138 /* 89 if (!XFS_IFORK_Q(ip))
139 * If there are no attributes associated with this file,
140 * then there cannot be anything more to log.
141 * Clear all attribute-related log flags.
142 */
143 if (!XFS_IFORK_Q(ip)) {
144 iip->ili_format.ilf_fields &=
145 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
146 return nvecs; 90 return nvecs;
147 } 91
148 92
149 /* 93 /*
150 * Log any necessary attribute data. 94 * Log any necessary attribute data.
151 */ 95 */
152 switch (ip->i_d.di_aformat) { 96 switch (ip->i_d.di_aformat) {
153 case XFS_DINODE_FMT_EXTENTS: 97 case XFS_DINODE_FMT_EXTENTS:
154 iip->ili_format.ilf_fields &= 98 if ((iip->ili_fields & XFS_ILOG_AEXT) &&
155 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT); 99 ip->i_d.di_anextents > 0 &&
156 if ((iip->ili_format.ilf_fields & XFS_ILOG_AEXT) && 100 ip->i_afp->if_bytes > 0)
157 (ip->i_d.di_anextents > 0) &&
158 (ip->i_afp->if_bytes > 0)) {
159 ASSERT(ip->i_afp->if_u1.if_extents != NULL);
160 nvecs++; 101 nvecs++;
161 } else {
162 iip->ili_format.ilf_fields &= ~XFS_ILOG_AEXT;
163 }
164 break; 102 break;
165 103
166 case XFS_DINODE_FMT_BTREE: 104 case XFS_DINODE_FMT_BTREE:
167 iip->ili_format.ilf_fields &= 105 if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
168 ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT); 106 ip->i_afp->if_broot_bytes > 0)
169 if ((iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) &&
170 (ip->i_afp->if_broot_bytes > 0)) {
171 ASSERT(ip->i_afp->if_broot != NULL);
172 nvecs++; 107 nvecs++;
173 } else {
174 iip->ili_format.ilf_fields &= ~XFS_ILOG_ABROOT;
175 }
176 break; 108 break;
177 109
178 case XFS_DINODE_FMT_LOCAL: 110 case XFS_DINODE_FMT_LOCAL:
179 iip->ili_format.ilf_fields &= 111 if ((iip->ili_fields & XFS_ILOG_ADATA) &&
180 ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT); 112 ip->i_afp->if_bytes > 0)
181 if ((iip->ili_format.ilf_fields & XFS_ILOG_ADATA) &&
182 (ip->i_afp->if_bytes > 0)) {
183 ASSERT(ip->i_afp->if_u1.if_data != NULL);
184 nvecs++; 113 nvecs++;
185 } else {
186 iip->ili_format.ilf_fields &= ~XFS_ILOG_ADATA;
187 }
188 break; 114 break;
189 115
190 default: 116 default:
@@ -254,48 +180,11 @@ xfs_inode_item_format(
254 vecp++; 180 vecp++;
255 nvecs = 1; 181 nvecs = 1;
256 182
257 /*
258 * Clear i_update_core if the timestamps (or any other
259 * non-transactional modification) need flushing/logging
260 * and we're about to log them with the rest of the core.
261 *
262 * This is the same logic as xfs_iflush() but this code can't
263 * run at the same time as xfs_iflush because we're in commit
264 * processing here and so we have the inode lock held in
265 * exclusive mode. Although it doesn't really matter
266 * for the timestamps if both routines were to grab the
267 * timestamps or not. That would be ok.
268 *
269 * We clear i_update_core before copying out the data.
270 * This is for coordination with our timestamp updates
271 * that don't hold the inode lock. They will always
272 * update the timestamps BEFORE setting i_update_core,
273 * so if we clear i_update_core after they set it we
274 * are guaranteed to see their updates to the timestamps
275 * either here. Likewise, if they set it after we clear it
276 * here, we'll see it either on the next commit of this
277 * inode or the next time the inode gets flushed via
278 * xfs_iflush(). This depends on strongly ordered memory
279 * semantics, but we have that. We use the SYNCHRONIZE
280 * macro to make sure that the compiler does not reorder
281 * the i_update_core access below the data copy below.
282 */
283 if (ip->i_update_core) {
284 ip->i_update_core = 0;
285 SYNCHRONIZE();
286 }
287
288 /*
289 * Make sure to get the latest timestamps from the Linux inode.
290 */
291 xfs_synchronize_times(ip);
292
293 vecp->i_addr = &ip->i_d; 183 vecp->i_addr = &ip->i_d;
294 vecp->i_len = sizeof(struct xfs_icdinode); 184 vecp->i_len = sizeof(struct xfs_icdinode);
295 vecp->i_type = XLOG_REG_TYPE_ICORE; 185 vecp->i_type = XLOG_REG_TYPE_ICORE;
296 vecp++; 186 vecp++;
297 nvecs++; 187 nvecs++;
298 iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
299 188
300 /* 189 /*
301 * If this is really an old format inode, then we need to 190 * If this is really an old format inode, then we need to
@@ -328,16 +217,17 @@ xfs_inode_item_format(
328 217
329 switch (ip->i_d.di_format) { 218 switch (ip->i_d.di_format) {
330 case XFS_DINODE_FMT_EXTENTS: 219 case XFS_DINODE_FMT_EXTENTS:
331 ASSERT(!(iip->ili_format.ilf_fields & 220 iip->ili_fields &=
332 (XFS_ILOG_DDATA | XFS_ILOG_DBROOT | 221 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
333 XFS_ILOG_DEV | XFS_ILOG_UUID))); 222 XFS_ILOG_DEV | XFS_ILOG_UUID);
334 if (iip->ili_format.ilf_fields & XFS_ILOG_DEXT) { 223
335 ASSERT(ip->i_df.if_bytes > 0); 224 if ((iip->ili_fields & XFS_ILOG_DEXT) &&
225 ip->i_d.di_nextents > 0 &&
226 ip->i_df.if_bytes > 0) {
336 ASSERT(ip->i_df.if_u1.if_extents != NULL); 227 ASSERT(ip->i_df.if_u1.if_extents != NULL);
337 ASSERT(ip->i_d.di_nextents > 0); 228 ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0);
338 ASSERT(iip->ili_extents_buf == NULL); 229 ASSERT(iip->ili_extents_buf == NULL);
339 ASSERT((ip->i_df.if_bytes / 230
340 (uint)sizeof(xfs_bmbt_rec_t)) > 0);
341#ifdef XFS_NATIVE_HOST 231#ifdef XFS_NATIVE_HOST
342 if (ip->i_d.di_nextents == ip->i_df.if_bytes / 232 if (ip->i_d.di_nextents == ip->i_df.if_bytes /
343 (uint)sizeof(xfs_bmbt_rec_t)) { 233 (uint)sizeof(xfs_bmbt_rec_t)) {
@@ -359,15 +249,18 @@ xfs_inode_item_format(
359 iip->ili_format.ilf_dsize = vecp->i_len; 249 iip->ili_format.ilf_dsize = vecp->i_len;
360 vecp++; 250 vecp++;
361 nvecs++; 251 nvecs++;
252 } else {
253 iip->ili_fields &= ~XFS_ILOG_DEXT;
362 } 254 }
363 break; 255 break;
364 256
365 case XFS_DINODE_FMT_BTREE: 257 case XFS_DINODE_FMT_BTREE:
366 ASSERT(!(iip->ili_format.ilf_fields & 258 iip->ili_fields &=
367 (XFS_ILOG_DDATA | XFS_ILOG_DEXT | 259 ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT |
368 XFS_ILOG_DEV | XFS_ILOG_UUID))); 260 XFS_ILOG_DEV | XFS_ILOG_UUID);
369 if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) { 261
370 ASSERT(ip->i_df.if_broot_bytes > 0); 262 if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
263 ip->i_df.if_broot_bytes > 0) {
371 ASSERT(ip->i_df.if_broot != NULL); 264 ASSERT(ip->i_df.if_broot != NULL);
372 vecp->i_addr = ip->i_df.if_broot; 265 vecp->i_addr = ip->i_df.if_broot;
373 vecp->i_len = ip->i_df.if_broot_bytes; 266 vecp->i_len = ip->i_df.if_broot_bytes;
@@ -375,15 +268,30 @@ xfs_inode_item_format(
375 vecp++; 268 vecp++;
376 nvecs++; 269 nvecs++;
377 iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; 270 iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes;
271 } else {
272 ASSERT(!(iip->ili_fields &
273 XFS_ILOG_DBROOT));
274#ifdef XFS_TRANS_DEBUG
275 if (iip->ili_root_size > 0) {
276 ASSERT(iip->ili_root_size ==
277 ip->i_df.if_broot_bytes);
278 ASSERT(memcmp(iip->ili_orig_root,
279 ip->i_df.if_broot,
280 iip->ili_root_size) == 0);
281 } else {
282 ASSERT(ip->i_df.if_broot_bytes == 0);
283 }
284#endif
285 iip->ili_fields &= ~XFS_ILOG_DBROOT;
378 } 286 }
379 break; 287 break;
380 288
381 case XFS_DINODE_FMT_LOCAL: 289 case XFS_DINODE_FMT_LOCAL:
382 ASSERT(!(iip->ili_format.ilf_fields & 290 iip->ili_fields &=
383 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | 291 ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT |
384 XFS_ILOG_DEV | XFS_ILOG_UUID))); 292 XFS_ILOG_DEV | XFS_ILOG_UUID);
385 if (iip->ili_format.ilf_fields & XFS_ILOG_DDATA) { 293 if ((iip->ili_fields & XFS_ILOG_DDATA) &&
386 ASSERT(ip->i_df.if_bytes > 0); 294 ip->i_df.if_bytes > 0) {
387 ASSERT(ip->i_df.if_u1.if_data != NULL); 295 ASSERT(ip->i_df.if_u1.if_data != NULL);
388 ASSERT(ip->i_d.di_size > 0); 296 ASSERT(ip->i_d.di_size > 0);
389 297
@@ -401,24 +309,26 @@ xfs_inode_item_format(
401 vecp++; 309 vecp++;
402 nvecs++; 310 nvecs++;
403 iip->ili_format.ilf_dsize = (unsigned)data_bytes; 311 iip->ili_format.ilf_dsize = (unsigned)data_bytes;
312 } else {
313 iip->ili_fields &= ~XFS_ILOG_DDATA;
404 } 314 }
405 break; 315 break;
406 316
407 case XFS_DINODE_FMT_DEV: 317 case XFS_DINODE_FMT_DEV:
408 ASSERT(!(iip->ili_format.ilf_fields & 318 iip->ili_fields &=
409 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | 319 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
410 XFS_ILOG_DDATA | XFS_ILOG_UUID))); 320 XFS_ILOG_DEXT | XFS_ILOG_UUID);
411 if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { 321 if (iip->ili_fields & XFS_ILOG_DEV) {
412 iip->ili_format.ilf_u.ilfu_rdev = 322 iip->ili_format.ilf_u.ilfu_rdev =
413 ip->i_df.if_u2.if_rdev; 323 ip->i_df.if_u2.if_rdev;
414 } 324 }
415 break; 325 break;
416 326
417 case XFS_DINODE_FMT_UUID: 327 case XFS_DINODE_FMT_UUID:
418 ASSERT(!(iip->ili_format.ilf_fields & 328 iip->ili_fields &=
419 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | 329 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
420 XFS_ILOG_DDATA | XFS_ILOG_DEV))); 330 XFS_ILOG_DEXT | XFS_ILOG_DEV);
421 if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { 331 if (iip->ili_fields & XFS_ILOG_UUID) {
422 iip->ili_format.ilf_u.ilfu_uuid = 332 iip->ili_format.ilf_u.ilfu_uuid =
423 ip->i_df.if_u2.if_uuid; 333 ip->i_df.if_u2.if_uuid;
424 } 334 }
@@ -430,31 +340,25 @@ xfs_inode_item_format(
430 } 340 }
431 341
432 /* 342 /*
433 * If there are no attributes associated with the file, 343 * If there are no attributes associated with the file, then we're done.
434 * then we're done.
435 * Assert that no attribute-related log flags are set.
436 */ 344 */
437 if (!XFS_IFORK_Q(ip)) { 345 if (!XFS_IFORK_Q(ip)) {
438 iip->ili_format.ilf_size = nvecs; 346 iip->ili_fields &=
439 ASSERT(!(iip->ili_format.ilf_fields & 347 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
440 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); 348 goto out;
441 return;
442 } 349 }
443 350
444 switch (ip->i_d.di_aformat) { 351 switch (ip->i_d.di_aformat) {
445 case XFS_DINODE_FMT_EXTENTS: 352 case XFS_DINODE_FMT_EXTENTS:
446 ASSERT(!(iip->ili_format.ilf_fields & 353 iip->ili_fields &=
447 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT))); 354 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);
448 if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) { 355
449#ifdef DEBUG 356 if ((iip->ili_fields & XFS_ILOG_AEXT) &&
450 int nrecs = ip->i_afp->if_bytes / 357 ip->i_d.di_anextents > 0 &&
451 (uint)sizeof(xfs_bmbt_rec_t); 358 ip->i_afp->if_bytes > 0) {
452 ASSERT(nrecs > 0); 359 ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) ==
453 ASSERT(nrecs == ip->i_d.di_anextents); 360 ip->i_d.di_anextents);
454 ASSERT(ip->i_afp->if_bytes > 0);
455 ASSERT(ip->i_afp->if_u1.if_extents != NULL); 361 ASSERT(ip->i_afp->if_u1.if_extents != NULL);
456 ASSERT(ip->i_d.di_anextents > 0);
457#endif
458#ifdef XFS_NATIVE_HOST 362#ifdef XFS_NATIVE_HOST
459 /* 363 /*
460 * There are not delayed allocation extents 364 * There are not delayed allocation extents
@@ -471,29 +375,36 @@ xfs_inode_item_format(
471 iip->ili_format.ilf_asize = vecp->i_len; 375 iip->ili_format.ilf_asize = vecp->i_len;
472 vecp++; 376 vecp++;
473 nvecs++; 377 nvecs++;
378 } else {
379 iip->ili_fields &= ~XFS_ILOG_AEXT;
474 } 380 }
475 break; 381 break;
476 382
477 case XFS_DINODE_FMT_BTREE: 383 case XFS_DINODE_FMT_BTREE:
478 ASSERT(!(iip->ili_format.ilf_fields & 384 iip->ili_fields &=
479 (XFS_ILOG_ADATA | XFS_ILOG_AEXT))); 385 ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);
480 if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) { 386
481 ASSERT(ip->i_afp->if_broot_bytes > 0); 387 if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
388 ip->i_afp->if_broot_bytes > 0) {
482 ASSERT(ip->i_afp->if_broot != NULL); 389 ASSERT(ip->i_afp->if_broot != NULL);
390
483 vecp->i_addr = ip->i_afp->if_broot; 391 vecp->i_addr = ip->i_afp->if_broot;
484 vecp->i_len = ip->i_afp->if_broot_bytes; 392 vecp->i_len = ip->i_afp->if_broot_bytes;
485 vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT; 393 vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT;
486 vecp++; 394 vecp++;
487 nvecs++; 395 nvecs++;
488 iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; 396 iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes;
397 } else {
398 iip->ili_fields &= ~XFS_ILOG_ABROOT;
489 } 399 }
490 break; 400 break;
491 401
492 case XFS_DINODE_FMT_LOCAL: 402 case XFS_DINODE_FMT_LOCAL:
493 ASSERT(!(iip->ili_format.ilf_fields & 403 iip->ili_fields &=
494 (XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); 404 ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
495 if (iip->ili_format.ilf_fields & XFS_ILOG_ADATA) { 405
496 ASSERT(ip->i_afp->if_bytes > 0); 406 if ((iip->ili_fields & XFS_ILOG_ADATA) &&
407 ip->i_afp->if_bytes > 0) {
497 ASSERT(ip->i_afp->if_u1.if_data != NULL); 408 ASSERT(ip->i_afp->if_u1.if_data != NULL);
498 409
499 vecp->i_addr = ip->i_afp->if_u1.if_data; 410 vecp->i_addr = ip->i_afp->if_u1.if_data;
@@ -510,6 +421,8 @@ xfs_inode_item_format(
510 vecp++; 421 vecp++;
511 nvecs++; 422 nvecs++;
512 iip->ili_format.ilf_asize = (unsigned)data_bytes; 423 iip->ili_format.ilf_asize = (unsigned)data_bytes;
424 } else {
425 iip->ili_fields &= ~XFS_ILOG_ADATA;
513 } 426 }
514 break; 427 break;
515 428
@@ -518,6 +431,15 @@ xfs_inode_item_format(
518 break; 431 break;
519 } 432 }
520 433
434out:
435 /*
436 * Now update the log format that goes out to disk from the in-core
437 * values. We always write the inode core to make the arithmetic
438 * games in recovery easier, which isn't a big deal as just about any
439 * transaction would dirty it anyway.
440 */
441 iip->ili_format.ilf_fields = XFS_ILOG_CORE |
442 (iip->ili_fields & ~XFS_ILOG_TIMESTAMP);
521 iip->ili_format.ilf_size = nvecs; 443 iip->ili_format.ilf_size = nvecs;
522} 444}
523 445
@@ -596,17 +518,13 @@ xfs_inode_item_trylock(
596 /* Stale items should force out the iclog */ 518 /* Stale items should force out the iclog */
597 if (ip->i_flags & XFS_ISTALE) { 519 if (ip->i_flags & XFS_ISTALE) {
598 xfs_ifunlock(ip); 520 xfs_ifunlock(ip);
599 /* 521 xfs_iunlock(ip, XFS_ILOCK_SHARED);
600 * we hold the AIL lock - notify the unlock routine of this
601 * so it doesn't try to get the lock again.
602 */
603 xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY);
604 return XFS_ITEM_PINNED; 522 return XFS_ITEM_PINNED;
605 } 523 }
606 524
607#ifdef DEBUG 525#ifdef DEBUG
608 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 526 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
609 ASSERT(iip->ili_format.ilf_fields != 0); 527 ASSERT(iip->ili_fields != 0);
610 ASSERT(iip->ili_logged == 0); 528 ASSERT(iip->ili_logged == 0);
611 ASSERT(lip->li_flags & XFS_LI_IN_AIL); 529 ASSERT(lip->li_flags & XFS_LI_IN_AIL);
612 } 530 }
@@ -638,7 +556,7 @@ xfs_inode_item_unlock(
638 if (iip->ili_extents_buf != NULL) { 556 if (iip->ili_extents_buf != NULL) {
639 ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS); 557 ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS);
640 ASSERT(ip->i_d.di_nextents > 0); 558 ASSERT(ip->i_d.di_nextents > 0);
641 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT); 559 ASSERT(iip->ili_fields & XFS_ILOG_DEXT);
642 ASSERT(ip->i_df.if_bytes > 0); 560 ASSERT(ip->i_df.if_bytes > 0);
643 kmem_free(iip->ili_extents_buf); 561 kmem_free(iip->ili_extents_buf);
644 iip->ili_extents_buf = NULL; 562 iip->ili_extents_buf = NULL;
@@ -646,7 +564,7 @@ xfs_inode_item_unlock(
646 if (iip->ili_aextents_buf != NULL) { 564 if (iip->ili_aextents_buf != NULL) {
647 ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS); 565 ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS);
648 ASSERT(ip->i_d.di_anextents > 0); 566 ASSERT(ip->i_d.di_anextents > 0);
649 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT); 567 ASSERT(iip->ili_fields & XFS_ILOG_AEXT);
650 ASSERT(ip->i_afp->if_bytes > 0); 568 ASSERT(ip->i_afp->if_bytes > 0);
651 kmem_free(iip->ili_aextents_buf); 569 kmem_free(iip->ili_aextents_buf);
652 iip->ili_aextents_buf = NULL; 570 iip->ili_aextents_buf = NULL;
@@ -761,8 +679,7 @@ xfs_inode_item_push(
761 * lock without sleeping, then there must not have been 679 * lock without sleeping, then there must not have been
762 * anyone in the process of flushing the inode. 680 * anyone in the process of flushing the inode.
763 */ 681 */
764 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || 682 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || iip->ili_fields != 0);
765 iip->ili_format.ilf_fields != 0);
766 683
767 /* 684 /*
768 * Push the inode to it's backing buffer. This will not remove the 685 * Push the inode to it's backing buffer. This will not remove the
@@ -985,7 +902,7 @@ xfs_iflush_abort(
985 * Clear the inode logging fields so no more flushes are 902 * Clear the inode logging fields so no more flushes are
986 * attempted. 903 * attempted.
987 */ 904 */
988 iip->ili_format.ilf_fields = 0; 905 iip->ili_fields = 0;
989 } 906 }
990 /* 907 /*
991 * Release the inode's flush lock since we're done with it. 908 * Release the inode's flush lock since we're done with it.
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index d3dee61e6d9..41d61c3b7a3 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -86,6 +86,15 @@ typedef struct xfs_inode_log_format_64 {
86#define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */ 86#define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */
87#define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */ 87#define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */
88 88
89
90/*
91 * The timestamps are dirty, but not necessarily anything else in the inode
92 * core. Unlike the other fields above this one must never make it to disk
93 * in the ilf_fields of the inode_log_format, but is purely store in-memory in
94 * ili_fields in the inode_log_item.
95 */
96#define XFS_ILOG_TIMESTAMP 0x4000
97
89#define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ 98#define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
90 XFS_ILOG_DBROOT | XFS_ILOG_DEV | \ 99 XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
91 XFS_ILOG_UUID | XFS_ILOG_ADATA | \ 100 XFS_ILOG_UUID | XFS_ILOG_ADATA | \
@@ -101,7 +110,7 @@ typedef struct xfs_inode_log_format_64 {
101 XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \ 110 XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \
102 XFS_ILOG_DEV | XFS_ILOG_UUID | \ 111 XFS_ILOG_DEV | XFS_ILOG_UUID | \
103 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ 112 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
104 XFS_ILOG_ABROOT) 113 XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP)
105 114
106static inline int xfs_ilog_fbroot(int w) 115static inline int xfs_ilog_fbroot(int w)
107{ 116{
@@ -134,6 +143,7 @@ typedef struct xfs_inode_log_item {
134 unsigned short ili_lock_flags; /* lock flags */ 143 unsigned short ili_lock_flags; /* lock flags */
135 unsigned short ili_logged; /* flushed logged data */ 144 unsigned short ili_logged; /* flushed logged data */
136 unsigned int ili_last_fields; /* fields when flushed */ 145 unsigned int ili_last_fields; /* fields when flushed */
146 unsigned int ili_fields; /* fields to be logged */
137 struct xfs_bmbt_rec *ili_extents_buf; /* array of logged 147 struct xfs_bmbt_rec *ili_extents_buf; /* array of logged
138 data exts */ 148 data exts */
139 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged 149 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged
@@ -148,9 +158,7 @@ typedef struct xfs_inode_log_item {
148 158
149static inline int xfs_inode_clean(xfs_inode_t *ip) 159static inline int xfs_inode_clean(xfs_inode_t *ip)
150{ 160{
151 return (!ip->i_itemp || 161 return !ip->i_itemp || !(ip->i_itemp->ili_fields & XFS_ILOG_ALL);
152 !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
153 !ip->i_update_core;
154} 162}
155 163
156extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); 164extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 76f3ca5cfc3..91f8ff547ab 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -209,6 +209,7 @@ xfs_open_by_handle(
209 struct file *filp; 209 struct file *filp;
210 struct inode *inode; 210 struct inode *inode;
211 struct dentry *dentry; 211 struct dentry *dentry;
212 fmode_t fmode;
212 213
213 if (!capable(CAP_SYS_ADMIN)) 214 if (!capable(CAP_SYS_ADMIN))
214 return -XFS_ERROR(EPERM); 215 return -XFS_ERROR(EPERM);
@@ -228,26 +229,21 @@ xfs_open_by_handle(
228 hreq->oflags |= O_LARGEFILE; 229 hreq->oflags |= O_LARGEFILE;
229#endif 230#endif
230 231
231 /* Put open permission in namei format. */
232 permflag = hreq->oflags; 232 permflag = hreq->oflags;
233 if ((permflag+1) & O_ACCMODE) 233 fmode = OPEN_FMODE(permflag);
234 permflag++;
235 if (permflag & O_TRUNC)
236 permflag |= 2;
237
238 if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) && 234 if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
239 (permflag & FMODE_WRITE) && IS_APPEND(inode)) { 235 (fmode & FMODE_WRITE) && IS_APPEND(inode)) {
240 error = -XFS_ERROR(EPERM); 236 error = -XFS_ERROR(EPERM);
241 goto out_dput; 237 goto out_dput;
242 } 238 }
243 239
244 if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) { 240 if ((fmode & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
245 error = -XFS_ERROR(EACCES); 241 error = -XFS_ERROR(EACCES);
246 goto out_dput; 242 goto out_dput;
247 } 243 }
248 244
249 /* Can't write directories. */ 245 /* Can't write directories. */
250 if (S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) { 246 if (S_ISDIR(inode->i_mode) && (fmode & FMODE_WRITE)) {
251 error = -XFS_ERROR(EISDIR); 247 error = -XFS_ERROR(EISDIR);
252 goto out_dput; 248 goto out_dput;
253 } 249 }
@@ -450,9 +446,12 @@ xfs_attrmulti_attr_get(
450 446
451 if (*len > XATTR_SIZE_MAX) 447 if (*len > XATTR_SIZE_MAX)
452 return EINVAL; 448 return EINVAL;
453 kbuf = kmalloc(*len, GFP_KERNEL); 449 kbuf = kmem_zalloc(*len, KM_SLEEP | KM_MAYFAIL);
454 if (!kbuf) 450 if (!kbuf) {
455 return ENOMEM; 451 kbuf = kmem_zalloc_large(*len);
452 if (!kbuf)
453 return ENOMEM;
454 }
456 455
457 error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags); 456 error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
458 if (error) 457 if (error)
@@ -462,7 +461,10 @@ xfs_attrmulti_attr_get(
462 error = EFAULT; 461 error = EFAULT;
463 462
464 out_kfree: 463 out_kfree:
465 kfree(kbuf); 464 if (is_vmalloc_addr(kbuf))
465 kmem_free_large(kbuf);
466 else
467 kmem_free(kbuf);
466 return error; 468 return error;
467} 469}
468 470
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index f9ccb7b7c04..a849a5473af 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -293,7 +293,7 @@ xfs_compat_ioc_bulkstat(
293 int res; 293 int res;
294 294
295 error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, 295 error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
296 sizeof(compat_xfs_bstat_t), 0, &res); 296 sizeof(compat_xfs_bstat_t), NULL, &res);
297 } else if (cmd == XFS_IOC_FSBULKSTAT_32) { 297 } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
298 error = xfs_bulkstat(mp, &inlast, &count, 298 error = xfs_bulkstat(mp, &inlast, &count,
299 xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t), 299 xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 246c7d57c6f..71a464503c4 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -31,6 +31,7 @@
31#include "xfs_ialloc_btree.h" 31#include "xfs_ialloc_btree.h"
32#include "xfs_dinode.h" 32#include "xfs_dinode.h"
33#include "xfs_inode.h" 33#include "xfs_inode.h"
34#include "xfs_inode_item.h"
34#include "xfs_btree.h" 35#include "xfs_btree.h"
35#include "xfs_bmap.h" 36#include "xfs_bmap.h"
36#include "xfs_rtalloc.h" 37#include "xfs_rtalloc.h"
@@ -645,6 +646,7 @@ xfs_iomap_write_unwritten(
645 xfs_trans_t *tp; 646 xfs_trans_t *tp;
646 xfs_bmbt_irec_t imap; 647 xfs_bmbt_irec_t imap;
647 xfs_bmap_free_t free_list; 648 xfs_bmap_free_t free_list;
649 xfs_fsize_t i_size;
648 uint resblks; 650 uint resblks;
649 int committed; 651 int committed;
650 int error; 652 int error;
@@ -705,7 +707,22 @@ xfs_iomap_write_unwritten(
705 if (error) 707 if (error)
706 goto error_on_bmapi_transaction; 708 goto error_on_bmapi_transaction;
707 709
708 error = xfs_bmap_finish(&(tp), &(free_list), &committed); 710 /*
711 * Log the updated inode size as we go. We have to be careful
712 * to only log it up to the actual write offset if it is
713 * halfway into a block.
714 */
715 i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb);
716 if (i_size > offset + count)
717 i_size = offset + count;
718
719 i_size = xfs_new_eof(ip, i_size);
720 if (i_size) {
721 ip->i_d.di_size = i_size;
722 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
723 }
724
725 error = xfs_bmap_finish(&tp, &free_list, &committed);
709 if (error) 726 if (error)
710 goto error_on_bmapi_transaction; 727 goto error_on_bmapi_transaction;
711 728
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ab302539e5b..3011b879f85 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -50,65 +50,15 @@
50#include <linux/fiemap.h> 50#include <linux/fiemap.h>
51#include <linux/slab.h> 51#include <linux/slab.h>
52 52
53/* 53static int
54 * Bring the timestamps in the XFS inode uptodate. 54xfs_initxattrs(
55 * 55 struct inode *inode,
56 * Used before writing the inode to disk. 56 const struct xattr *xattr_array,
57 */ 57 void *fs_info)
58void
59xfs_synchronize_times(
60 xfs_inode_t *ip)
61{
62 struct inode *inode = VFS_I(ip);
63
64 ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
65 ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
66 ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
67 ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
68 ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
69 ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
70}
71
72/*
73 * If the linux inode is valid, mark it dirty, else mark the dirty state
74 * in the XFS inode to make sure we pick it up when reclaiming the inode.
75 */
76void
77xfs_mark_inode_dirty_sync(
78 xfs_inode_t *ip)
79{
80 struct inode *inode = VFS_I(ip);
81
82 if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
83 mark_inode_dirty_sync(inode);
84 else {
85 barrier();
86 ip->i_update_core = 1;
87 }
88}
89
90void
91xfs_mark_inode_dirty(
92 xfs_inode_t *ip)
93{
94 struct inode *inode = VFS_I(ip);
95
96 if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
97 mark_inode_dirty(inode);
98 else {
99 barrier();
100 ip->i_update_core = 1;
101 }
102
103}
104
105
106int xfs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
107 void *fs_info)
108{ 58{
109 const struct xattr *xattr; 59 const struct xattr *xattr;
110 struct xfs_inode *ip = XFS_I(inode); 60 struct xfs_inode *ip = XFS_I(inode);
111 int error = 0; 61 int error = 0;
112 62
113 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 63 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
114 error = xfs_attr_set(ip, xattr->name, xattr->value, 64 error = xfs_attr_set(ip, xattr->name, xattr->value,
@@ -678,19 +628,16 @@ xfs_setattr_nonsize(
678 inode->i_atime = iattr->ia_atime; 628 inode->i_atime = iattr->ia_atime;
679 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; 629 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
680 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; 630 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
681 ip->i_update_core = 1;
682 } 631 }
683 if (mask & ATTR_CTIME) { 632 if (mask & ATTR_CTIME) {
684 inode->i_ctime = iattr->ia_ctime; 633 inode->i_ctime = iattr->ia_ctime;
685 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 634 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
686 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; 635 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
687 ip->i_update_core = 1;
688 } 636 }
689 if (mask & ATTR_MTIME) { 637 if (mask & ATTR_MTIME) {
690 inode->i_mtime = iattr->ia_mtime; 638 inode->i_mtime = iattr->ia_mtime;
691 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; 639 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
692 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; 640 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
693 ip->i_update_core = 1;
694 } 641 }
695 642
696 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 643 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@@ -918,13 +865,11 @@ xfs_setattr_size(
918 inode->i_ctime = iattr->ia_ctime; 865 inode->i_ctime = iattr->ia_ctime;
919 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 866 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
920 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; 867 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
921 ip->i_update_core = 1;
922 } 868 }
923 if (mask & ATTR_MTIME) { 869 if (mask & ATTR_MTIME) {
924 inode->i_mtime = iattr->ia_mtime; 870 inode->i_mtime = iattr->ia_mtime;
925 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; 871 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
926 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; 872 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
927 ip->i_update_core = 1;
928 } 873 }
929 874
930 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 875 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 751e94fe1f7..acc2bf264da 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -62,7 +62,6 @@ xfs_bulkstat_one_int(
62{ 62{
63 struct xfs_icdinode *dic; /* dinode core info pointer */ 63 struct xfs_icdinode *dic; /* dinode core info pointer */
64 struct xfs_inode *ip; /* incore inode pointer */ 64 struct xfs_inode *ip; /* incore inode pointer */
65 struct inode *inode;
66 struct xfs_bstat *buf; /* return buffer */ 65 struct xfs_bstat *buf; /* return buffer */
67 int error = 0; /* error value */ 66 int error = 0; /* error value */
68 67
@@ -76,7 +75,8 @@ xfs_bulkstat_one_int(
76 return XFS_ERROR(ENOMEM); 75 return XFS_ERROR(ENOMEM);
77 76
78 error = xfs_iget(mp, NULL, ino, 77 error = xfs_iget(mp, NULL, ino,
79 XFS_IGET_UNTRUSTED, XFS_ILOCK_SHARED, &ip); 78 (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED),
79 XFS_ILOCK_SHARED, &ip);
80 if (error) { 80 if (error) {
81 *stat = BULKSTAT_RV_NOTHING; 81 *stat = BULKSTAT_RV_NOTHING;
82 goto out_free; 82 goto out_free;
@@ -86,7 +86,6 @@ xfs_bulkstat_one_int(
86 ASSERT(ip->i_imap.im_blkno != 0); 86 ASSERT(ip->i_imap.im_blkno != 0);
87 87
88 dic = &ip->i_d; 88 dic = &ip->i_d;
89 inode = VFS_I(ip);
90 89
91 /* xfs_iget returns the following without needing 90 /* xfs_iget returns the following without needing
92 * further change. 91 * further change.
@@ -99,19 +98,12 @@ xfs_bulkstat_one_int(
99 buf->bs_uid = dic->di_uid; 98 buf->bs_uid = dic->di_uid;
100 buf->bs_gid = dic->di_gid; 99 buf->bs_gid = dic->di_gid;
101 buf->bs_size = dic->di_size; 100 buf->bs_size = dic->di_size;
102 101 buf->bs_atime.tv_sec = dic->di_atime.t_sec;
103 /* 102 buf->bs_atime.tv_nsec = dic->di_atime.t_nsec;
104 * We need to read the timestamps from the Linux inode because 103 buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
105 * the VFS keeps writing directly into the inode structure instead 104 buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
106 * of telling us about the updates. 105 buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
107 */ 106 buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec;
108 buf->bs_atime.tv_sec = inode->i_atime.tv_sec;
109 buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec;
110 buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec;
111 buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec;
112 buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec;
113 buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec;
114
115 buf->bs_xflags = xfs_ip2xflags(ip); 107 buf->bs_xflags = xfs_ip2xflags(ip);
116 buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog; 108 buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog;
117 buf->bs_extents = dic->di_nextents; 109 buf->bs_extents = dic->di_nextents;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index e2cc3568c29..6db1fef38bf 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -67,15 +67,10 @@ STATIC void xlog_state_switch_iclogs(xlog_t *log,
67 int eventual_size); 67 int eventual_size);
68STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); 68STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
69 69
70/* local functions to manipulate grant head */
71STATIC int xlog_grant_log_space(xlog_t *log,
72 xlog_ticket_t *xtic);
73STATIC void xlog_grant_push_ail(struct log *log, 70STATIC void xlog_grant_push_ail(struct log *log,
74 int need_bytes); 71 int need_bytes);
75STATIC void xlog_regrant_reserve_log_space(xlog_t *log, 72STATIC void xlog_regrant_reserve_log_space(xlog_t *log,
76 xlog_ticket_t *ticket); 73 xlog_ticket_t *ticket);
77STATIC int xlog_regrant_write_log_space(xlog_t *log,
78 xlog_ticket_t *ticket);
79STATIC void xlog_ungrant_log_space(xlog_t *log, 74STATIC void xlog_ungrant_log_space(xlog_t *log,
80 xlog_ticket_t *ticket); 75 xlog_ticket_t *ticket);
81 76
@@ -150,78 +145,93 @@ xlog_grant_add_space(
150 } while (head_val != old); 145 } while (head_val != old);
151} 146}
152 147
153STATIC bool 148STATIC void
154xlog_reserveq_wake( 149xlog_grant_head_init(
155 struct log *log, 150 struct xlog_grant_head *head)
156 int *free_bytes) 151{
152 xlog_assign_grant_head(&head->grant, 1, 0);
153 INIT_LIST_HEAD(&head->waiters);
154 spin_lock_init(&head->lock);
155}
156
157STATIC void
158xlog_grant_head_wake_all(
159 struct xlog_grant_head *head)
157{ 160{
158 struct xlog_ticket *tic; 161 struct xlog_ticket *tic;
159 int need_bytes;
160 162
161 list_for_each_entry(tic, &log->l_reserveq, t_queue) { 163 spin_lock(&head->lock);
164 list_for_each_entry(tic, &head->waiters, t_queue)
165 wake_up_process(tic->t_task);
166 spin_unlock(&head->lock);
167}
168
169static inline int
170xlog_ticket_reservation(
171 struct log *log,
172 struct xlog_grant_head *head,
173 struct xlog_ticket *tic)
174{
175 if (head == &log->l_write_head) {
176 ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
177 return tic->t_unit_res;
178 } else {
162 if (tic->t_flags & XLOG_TIC_PERM_RESERV) 179 if (tic->t_flags & XLOG_TIC_PERM_RESERV)
163 need_bytes = tic->t_unit_res * tic->t_cnt; 180 return tic->t_unit_res * tic->t_cnt;
164 else 181 else
165 need_bytes = tic->t_unit_res; 182 return tic->t_unit_res;
166
167 if (*free_bytes < need_bytes)
168 return false;
169 *free_bytes -= need_bytes;
170
171 trace_xfs_log_grant_wake_up(log, tic);
172 wake_up(&tic->t_wait);
173 } 183 }
174
175 return true;
176} 184}
177 185
178STATIC bool 186STATIC bool
179xlog_writeq_wake( 187xlog_grant_head_wake(
180 struct log *log, 188 struct log *log,
189 struct xlog_grant_head *head,
181 int *free_bytes) 190 int *free_bytes)
182{ 191{
183 struct xlog_ticket *tic; 192 struct xlog_ticket *tic;
184 int need_bytes; 193 int need_bytes;
185 194
186 list_for_each_entry(tic, &log->l_writeq, t_queue) { 195 list_for_each_entry(tic, &head->waiters, t_queue) {
187 ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); 196 need_bytes = xlog_ticket_reservation(log, head, tic);
188
189 need_bytes = tic->t_unit_res;
190
191 if (*free_bytes < need_bytes) 197 if (*free_bytes < need_bytes)
192 return false; 198 return false;
193 *free_bytes -= need_bytes;
194 199
195 trace_xfs_log_regrant_write_wake_up(log, tic); 200 *free_bytes -= need_bytes;
196 wake_up(&tic->t_wait); 201 trace_xfs_log_grant_wake_up(log, tic);
202 wake_up_process(tic->t_task);
197 } 203 }
198 204
199 return true; 205 return true;
200} 206}
201 207
202STATIC int 208STATIC int
203xlog_reserveq_wait( 209xlog_grant_head_wait(
204 struct log *log, 210 struct log *log,
211 struct xlog_grant_head *head,
205 struct xlog_ticket *tic, 212 struct xlog_ticket *tic,
206 int need_bytes) 213 int need_bytes)
207{ 214{
208 list_add_tail(&tic->t_queue, &log->l_reserveq); 215 list_add_tail(&tic->t_queue, &head->waiters);
209 216
210 do { 217 do {
211 if (XLOG_FORCED_SHUTDOWN(log)) 218 if (XLOG_FORCED_SHUTDOWN(log))
212 goto shutdown; 219 goto shutdown;
213 xlog_grant_push_ail(log, need_bytes); 220 xlog_grant_push_ail(log, need_bytes);
214 221
222 __set_current_state(TASK_UNINTERRUPTIBLE);
223 spin_unlock(&head->lock);
224
215 XFS_STATS_INC(xs_sleep_logspace); 225 XFS_STATS_INC(xs_sleep_logspace);
216 trace_xfs_log_grant_sleep(log, tic);
217 226
218 xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); 227 trace_xfs_log_grant_sleep(log, tic);
228 schedule();
219 trace_xfs_log_grant_wake(log, tic); 229 trace_xfs_log_grant_wake(log, tic);
220 230
221 spin_lock(&log->l_grant_reserve_lock); 231 spin_lock(&head->lock);
222 if (XLOG_FORCED_SHUTDOWN(log)) 232 if (XLOG_FORCED_SHUTDOWN(log))
223 goto shutdown; 233 goto shutdown;
224 } while (xlog_space_left(log, &log->l_grant_reserve_head) < need_bytes); 234 } while (xlog_space_left(log, &head->grant) < need_bytes);
225 235
226 list_del_init(&tic->t_queue); 236 list_del_init(&tic->t_queue);
227 return 0; 237 return 0;
@@ -230,35 +240,58 @@ shutdown:
230 return XFS_ERROR(EIO); 240 return XFS_ERROR(EIO);
231} 241}
232 242
243/*
244 * Atomically get the log space required for a log ticket.
245 *
246 * Once a ticket gets put onto head->waiters, it will only return after the
247 * needed reservation is satisfied.
248 *
249 * This function is structured so that it has a lock free fast path. This is
250 * necessary because every new transaction reservation will come through this
251 * path. Hence any lock will be globally hot if we take it unconditionally on
252 * every pass.
253 *
254 * As tickets are only ever moved on and off head->waiters under head->lock, we
255 * only need to take that lock if we are going to add the ticket to the queue
256 * and sleep. We can avoid taking the lock if the ticket was never added to
257 * head->waiters because the t_queue list head will be empty and we hold the
258 * only reference to it so it can safely be checked unlocked.
259 */
233STATIC int 260STATIC int
234xlog_writeq_wait( 261xlog_grant_head_check(
235 struct log *log, 262 struct log *log,
263 struct xlog_grant_head *head,
236 struct xlog_ticket *tic, 264 struct xlog_ticket *tic,
237 int need_bytes) 265 int *need_bytes)
238{ 266{
239 list_add_tail(&tic->t_queue, &log->l_writeq); 267 int free_bytes;
240 268 int error = 0;
241 do {
242 if (XLOG_FORCED_SHUTDOWN(log))
243 goto shutdown;
244 xlog_grant_push_ail(log, need_bytes);
245
246 XFS_STATS_INC(xs_sleep_logspace);
247 trace_xfs_log_regrant_write_sleep(log, tic);
248 269
249 xlog_wait(&tic->t_wait, &log->l_grant_write_lock); 270 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
250 trace_xfs_log_regrant_write_wake(log, tic);
251 271
252 spin_lock(&log->l_grant_write_lock); 272 /*
253 if (XLOG_FORCED_SHUTDOWN(log)) 273 * If there are other waiters on the queue then give them a chance at
254 goto shutdown; 274 * logspace before us. Wake up the first waiters, if we do not wake
255 } while (xlog_space_left(log, &log->l_grant_write_head) < need_bytes); 275 * up all the waiters then go to sleep waiting for more free space,
276 * otherwise try to get some space for this transaction.
277 */
278 *need_bytes = xlog_ticket_reservation(log, head, tic);
279 free_bytes = xlog_space_left(log, &head->grant);
280 if (!list_empty_careful(&head->waiters)) {
281 spin_lock(&head->lock);
282 if (!xlog_grant_head_wake(log, head, &free_bytes) ||
283 free_bytes < *need_bytes) {
284 error = xlog_grant_head_wait(log, head, tic,
285 *need_bytes);
286 }
287 spin_unlock(&head->lock);
288 } else if (free_bytes < *need_bytes) {
289 spin_lock(&head->lock);
290 error = xlog_grant_head_wait(log, head, tic, *need_bytes);
291 spin_unlock(&head->lock);
292 }
256 293
257 list_del_init(&tic->t_queue); 294 return error;
258 return 0;
259shutdown:
260 list_del_init(&tic->t_queue);
261 return XFS_ERROR(EIO);
262} 295}
263 296
264static void 297static void
@@ -286,6 +319,128 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
286} 319}
287 320
288/* 321/*
322 * Replenish the byte reservation required by moving the grant write head.
323 */
324int
325xfs_log_regrant(
326 struct xfs_mount *mp,
327 struct xlog_ticket *tic)
328{
329 struct log *log = mp->m_log;
330 int need_bytes;
331 int error = 0;
332
333 if (XLOG_FORCED_SHUTDOWN(log))
334 return XFS_ERROR(EIO);
335
336 XFS_STATS_INC(xs_try_logspace);
337
338 /*
339 * This is a new transaction on the ticket, so we need to change the
340 * transaction ID so that the next transaction has a different TID in
341 * the log. Just add one to the existing tid so that we can see chains
342 * of rolling transactions in the log easily.
343 */
344 tic->t_tid++;
345
346 xlog_grant_push_ail(log, tic->t_unit_res);
347
348 tic->t_curr_res = tic->t_unit_res;
349 xlog_tic_reset_res(tic);
350
351 if (tic->t_cnt > 0)
352 return 0;
353
354 trace_xfs_log_regrant(log, tic);
355
356 error = xlog_grant_head_check(log, &log->l_write_head, tic,
357 &need_bytes);
358 if (error)
359 goto out_error;
360
361 xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes);
362 trace_xfs_log_regrant_exit(log, tic);
363 xlog_verify_grant_tail(log);
364 return 0;
365
366out_error:
367 /*
368 * If we are failing, make sure the ticket doesn't have any current
369 * reservations. We don't want to add this back when the ticket/
370 * transaction gets cancelled.
371 */
372 tic->t_curr_res = 0;
373 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
374 return error;
375}
376
377/*
378 * Reserve log space and return a ticket corresponding the reservation.
379 *
380 * Each reservation is going to reserve extra space for a log record header.
381 * When writes happen to the on-disk log, we don't subtract the length of the
382 * log record header from any reservation. By wasting space in each
383 * reservation, we prevent over allocation problems.
384 */
385int
386xfs_log_reserve(
387 struct xfs_mount *mp,
388 int unit_bytes,
389 int cnt,
390 struct xlog_ticket **ticp,
391 __uint8_t client,
392 bool permanent,
393 uint t_type)
394{
395 struct log *log = mp->m_log;
396 struct xlog_ticket *tic;
397 int need_bytes;
398 int error = 0;
399
400 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
401
402 if (XLOG_FORCED_SHUTDOWN(log))
403 return XFS_ERROR(EIO);
404
405 XFS_STATS_INC(xs_try_logspace);
406
407 ASSERT(*ticp == NULL);
408 tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent,
409 KM_SLEEP | KM_MAYFAIL);
410 if (!tic)
411 return XFS_ERROR(ENOMEM);
412
413 tic->t_trans_type = t_type;
414 *ticp = tic;
415
416 xlog_grant_push_ail(log, tic->t_unit_res * tic->t_cnt);
417
418 trace_xfs_log_reserve(log, tic);
419
420 error = xlog_grant_head_check(log, &log->l_reserve_head, tic,
421 &need_bytes);
422 if (error)
423 goto out_error;
424
425 xlog_grant_add_space(log, &log->l_reserve_head.grant, need_bytes);
426 xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes);
427 trace_xfs_log_reserve_exit(log, tic);
428 xlog_verify_grant_tail(log);
429 return 0;
430
431out_error:
432 /*
433 * If we are failing, make sure the ticket doesn't have any current
434 * reservations. We don't want to add this back when the ticket/
435 * transaction gets cancelled.
436 */
437 tic->t_curr_res = 0;
438 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
439 return error;
440}
441
442
443/*
289 * NOTES: 444 * NOTES:
290 * 445 *
291 * 1. currblock field gets updated at startup and after in-core logs 446 * 1. currblock field gets updated at startup and after in-core logs
@@ -395,88 +550,6 @@ xfs_log_release_iclog(
395} 550}
396 551
397/* 552/*
398 * 1. Reserve an amount of on-disk log space and return a ticket corresponding
399 * to the reservation.
400 * 2. Potentially, push buffers at tail of log to disk.
401 *
402 * Each reservation is going to reserve extra space for a log record header.
403 * When writes happen to the on-disk log, we don't subtract the length of the
404 * log record header from any reservation. By wasting space in each
405 * reservation, we prevent over allocation problems.
406 */
407int
408xfs_log_reserve(
409 struct xfs_mount *mp,
410 int unit_bytes,
411 int cnt,
412 struct xlog_ticket **ticket,
413 __uint8_t client,
414 uint flags,
415 uint t_type)
416{
417 struct log *log = mp->m_log;
418 struct xlog_ticket *internal_ticket;
419 int retval = 0;
420
421 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
422
423 if (XLOG_FORCED_SHUTDOWN(log))
424 return XFS_ERROR(EIO);
425
426 XFS_STATS_INC(xs_try_logspace);
427
428
429 if (*ticket != NULL) {
430 ASSERT(flags & XFS_LOG_PERM_RESERV);
431 internal_ticket = *ticket;
432
433 /*
434 * this is a new transaction on the ticket, so we need to
435 * change the transaction ID so that the next transaction has a
436 * different TID in the log. Just add one to the existing tid
437 * so that we can see chains of rolling transactions in the log
438 * easily.
439 */
440 internal_ticket->t_tid++;
441
442 trace_xfs_log_reserve(log, internal_ticket);
443
444 xlog_grant_push_ail(log, internal_ticket->t_unit_res);
445 retval = xlog_regrant_write_log_space(log, internal_ticket);
446 } else {
447 /* may sleep if need to allocate more tickets */
448 internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt,
449 client, flags,
450 KM_SLEEP|KM_MAYFAIL);
451 if (!internal_ticket)
452 return XFS_ERROR(ENOMEM);
453 internal_ticket->t_trans_type = t_type;
454 *ticket = internal_ticket;
455
456 trace_xfs_log_reserve(log, internal_ticket);
457
458 xlog_grant_push_ail(log,
459 (internal_ticket->t_unit_res *
460 internal_ticket->t_cnt));
461 retval = xlog_grant_log_space(log, internal_ticket);
462 }
463
464 if (unlikely(retval)) {
465 /*
466 * If we are failing, make sure the ticket doesn't have any
467 * current reservations. We don't want to add this back
468 * when the ticket/ transaction gets cancelled.
469 */
470 internal_ticket->t_curr_res = 0;
471 /* ungrant will give back unit_res * t_cnt. */
472 internal_ticket->t_cnt = 0;
473 }
474
475 return retval;
476}
477
478
479/*
480 * Mount a log filesystem 553 * Mount a log filesystem
481 * 554 *
482 * mp - ubiquitous xfs mount point structure 555 * mp - ubiquitous xfs mount point structure
@@ -653,8 +726,9 @@ xfs_log_unmount_write(xfs_mount_t *mp)
653 .lv_iovecp = &reg, 726 .lv_iovecp = &reg,
654 }; 727 };
655 728
656 /* remove inited flag */ 729 /* remove inited flag, and account for space used */
657 tic->t_flags = 0; 730 tic->t_flags = 0;
731 tic->t_curr_res -= sizeof(magic);
658 error = xlog_write(log, &vec, tic, &lsn, 732 error = xlog_write(log, &vec, tic, &lsn,
659 NULL, XLOG_UNMOUNT_TRANS); 733 NULL, XLOG_UNMOUNT_TRANS);
660 /* 734 /*
@@ -760,64 +834,35 @@ xfs_log_item_init(
760 INIT_LIST_HEAD(&item->li_cil); 834 INIT_LIST_HEAD(&item->li_cil);
761} 835}
762 836
837/*
838 * Wake up processes waiting for log space after we have moved the log tail.
839 */
763void 840void
764xfs_log_move_tail(xfs_mount_t *mp, 841xfs_log_space_wake(
765 xfs_lsn_t tail_lsn) 842 struct xfs_mount *mp)
766{ 843{
767 xlog_ticket_t *tic; 844 struct log *log = mp->m_log;
768 xlog_t *log = mp->m_log; 845 int free_bytes;
769 int need_bytes, free_bytes;
770 846
771 if (XLOG_FORCED_SHUTDOWN(log)) 847 if (XLOG_FORCED_SHUTDOWN(log))
772 return; 848 return;
773 849
774 if (tail_lsn == 0) 850 if (!list_empty_careful(&log->l_write_head.waiters)) {
775 tail_lsn = atomic64_read(&log->l_last_sync_lsn); 851 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
776
777 /* tail_lsn == 1 implies that we weren't passed a valid value. */
778 if (tail_lsn != 1)
779 atomic64_set(&log->l_tail_lsn, tail_lsn);
780
781 if (!list_empty_careful(&log->l_writeq)) {
782#ifdef DEBUG
783 if (log->l_flags & XLOG_ACTIVE_RECOVERY)
784 panic("Recovery problem");
785#endif
786 spin_lock(&log->l_grant_write_lock);
787 free_bytes = xlog_space_left(log, &log->l_grant_write_head);
788 list_for_each_entry(tic, &log->l_writeq, t_queue) {
789 ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
790 852
791 if (free_bytes < tic->t_unit_res && tail_lsn != 1) 853 spin_lock(&log->l_write_head.lock);
792 break; 854 free_bytes = xlog_space_left(log, &log->l_write_head.grant);
793 tail_lsn = 0; 855 xlog_grant_head_wake(log, &log->l_write_head, &free_bytes);
794 free_bytes -= tic->t_unit_res; 856 spin_unlock(&log->l_write_head.lock);
795 trace_xfs_log_regrant_write_wake_up(log, tic);
796 wake_up(&tic->t_wait);
797 }
798 spin_unlock(&log->l_grant_write_lock);
799 } 857 }
800 858
801 if (!list_empty_careful(&log->l_reserveq)) { 859 if (!list_empty_careful(&log->l_reserve_head.waiters)) {
802#ifdef DEBUG 860 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
803 if (log->l_flags & XLOG_ACTIVE_RECOVERY) 861
804 panic("Recovery problem"); 862 spin_lock(&log->l_reserve_head.lock);
805#endif 863 free_bytes = xlog_space_left(log, &log->l_reserve_head.grant);
806 spin_lock(&log->l_grant_reserve_lock); 864 xlog_grant_head_wake(log, &log->l_reserve_head, &free_bytes);
807 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); 865 spin_unlock(&log->l_reserve_head.lock);
808 list_for_each_entry(tic, &log->l_reserveq, t_queue) {
809 if (tic->t_flags & XLOG_TIC_PERM_RESERV)
810 need_bytes = tic->t_unit_res*tic->t_cnt;
811 else
812 need_bytes = tic->t_unit_res;
813 if (free_bytes < need_bytes && tail_lsn != 1)
814 break;
815 tail_lsn = 0;
816 free_bytes -= need_bytes;
817 trace_xfs_log_grant_wake_up(log, tic);
818 wake_up(&tic->t_wait);
819 }
820 spin_unlock(&log->l_grant_reserve_lock);
821 } 866 }
822} 867}
823 868
@@ -867,21 +912,7 @@ xfs_log_need_covered(xfs_mount_t *mp)
867 return needed; 912 return needed;
868} 913}
869 914
870/****************************************************************************** 915/*
871 *
872 * local routines
873 *
874 ******************************************************************************
875 */
876
877/* xfs_trans_tail_ail returns 0 when there is nothing in the list.
878 * The log manager must keep track of the last LR which was committed
879 * to disk. The lsn of this LR will become the new tail_lsn whenever
880 * xfs_trans_tail_ail returns 0. If we don't do this, we run into
881 * the situation where stuff could be written into the log but nothing
882 * was ever in the AIL when asked. Eventually, we panic since the
883 * tail hits the head.
884 *
885 * We may be holding the log iclog lock upon entering this routine. 916 * We may be holding the log iclog lock upon entering this routine.
886 */ 917 */
887xfs_lsn_t 918xfs_lsn_t
@@ -891,10 +922,17 @@ xlog_assign_tail_lsn(
891 xfs_lsn_t tail_lsn; 922 xfs_lsn_t tail_lsn;
892 struct log *log = mp->m_log; 923 struct log *log = mp->m_log;
893 924
925 /*
926 * To make sure we always have a valid LSN for the log tail we keep
927 * track of the last LSN which was committed in log->l_last_sync_lsn,
928 * and use that when the AIL was empty and xfs_ail_min_lsn returns 0.
929 *
930 * If the AIL has been emptied we also need to wake any process
931 * waiting for this condition.
932 */
894 tail_lsn = xfs_ail_min_lsn(mp->m_ail); 933 tail_lsn = xfs_ail_min_lsn(mp->m_ail);
895 if (!tail_lsn) 934 if (!tail_lsn)
896 tail_lsn = atomic64_read(&log->l_last_sync_lsn); 935 tail_lsn = atomic64_read(&log->l_last_sync_lsn);
897
898 atomic64_set(&log->l_tail_lsn, tail_lsn); 936 atomic64_set(&log->l_tail_lsn, tail_lsn);
899 return tail_lsn; 937 return tail_lsn;
900} 938}
@@ -1100,12 +1138,9 @@ xlog_alloc_log(xfs_mount_t *mp,
1100 xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0); 1138 xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0);
1101 xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0); 1139 xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0);
1102 log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ 1140 log->l_curr_cycle = 1; /* 0 is bad since this is initial value */
1103 xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0); 1141
1104 xlog_assign_grant_head(&log->l_grant_write_head, 1, 0); 1142 xlog_grant_head_init(&log->l_reserve_head);
1105 INIT_LIST_HEAD(&log->l_reserveq); 1143 xlog_grant_head_init(&log->l_write_head);
1106 INIT_LIST_HEAD(&log->l_writeq);
1107 spin_lock_init(&log->l_grant_reserve_lock);
1108 spin_lock_init(&log->l_grant_write_lock);
1109 1144
1110 error = EFSCORRUPTED; 1145 error = EFSCORRUPTED;
1111 if (xfs_sb_version_hassector(&mp->m_sb)) { 1146 if (xfs_sb_version_hassector(&mp->m_sb)) {
@@ -1280,7 +1315,7 @@ xlog_grant_push_ail(
1280 1315
1281 ASSERT(BTOBB(need_bytes) < log->l_logBBsize); 1316 ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
1282 1317
1283 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); 1318 free_bytes = xlog_space_left(log, &log->l_reserve_head.grant);
1284 free_blocks = BTOBBT(free_bytes); 1319 free_blocks = BTOBBT(free_bytes);
1285 1320
1286 /* 1321 /*
@@ -1412,8 +1447,8 @@ xlog_sync(xlog_t *log,
1412 roundoff < BBTOB(1))); 1447 roundoff < BBTOB(1)));
1413 1448
1414 /* move grant heads by roundoff in sync */ 1449 /* move grant heads by roundoff in sync */
1415 xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff); 1450 xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff);
1416 xlog_grant_add_space(log, &log->l_grant_write_head, roundoff); 1451 xlog_grant_add_space(log, &log->l_write_head.grant, roundoff);
1417 1452
1418 /* put cycle number in every block */ 1453 /* put cycle number in every block */
1419 xlog_pack_data(log, iclog, roundoff); 1454 xlog_pack_data(log, iclog, roundoff);
@@ -2566,119 +2601,6 @@ restart:
2566 return 0; 2601 return 0;
2567} /* xlog_state_get_iclog_space */ 2602} /* xlog_state_get_iclog_space */
2568 2603
2569/*
2570 * Atomically get the log space required for a log ticket.
2571 *
2572 * Once a ticket gets put onto the reserveq, it will only return after the
2573 * needed reservation is satisfied.
2574 *
2575 * This function is structured so that it has a lock free fast path. This is
2576 * necessary because every new transaction reservation will come through this
2577 * path. Hence any lock will be globally hot if we take it unconditionally on
2578 * every pass.
2579 *
2580 * As tickets are only ever moved on and off the reserveq under the
2581 * l_grant_reserve_lock, we only need to take that lock if we are going to add
2582 * the ticket to the queue and sleep. We can avoid taking the lock if the ticket
2583 * was never added to the reserveq because the t_queue list head will be empty
2584 * and we hold the only reference to it so it can safely be checked unlocked.
2585 */
2586STATIC int
2587xlog_grant_log_space(
2588 struct log *log,
2589 struct xlog_ticket *tic)
2590{
2591 int free_bytes, need_bytes;
2592 int error = 0;
2593
2594 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
2595
2596 trace_xfs_log_grant_enter(log, tic);
2597
2598 /*
2599 * If there are other waiters on the queue then give them a chance at
2600 * logspace before us. Wake up the first waiters, if we do not wake
2601 * up all the waiters then go to sleep waiting for more free space,
2602 * otherwise try to get some space for this transaction.
2603 */
2604 need_bytes = tic->t_unit_res;
2605 if (tic->t_flags & XFS_LOG_PERM_RESERV)
2606 need_bytes *= tic->t_ocnt;
2607 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
2608 if (!list_empty_careful(&log->l_reserveq)) {
2609 spin_lock(&log->l_grant_reserve_lock);
2610 if (!xlog_reserveq_wake(log, &free_bytes) ||
2611 free_bytes < need_bytes)
2612 error = xlog_reserveq_wait(log, tic, need_bytes);
2613 spin_unlock(&log->l_grant_reserve_lock);
2614 } else if (free_bytes < need_bytes) {
2615 spin_lock(&log->l_grant_reserve_lock);
2616 error = xlog_reserveq_wait(log, tic, need_bytes);
2617 spin_unlock(&log->l_grant_reserve_lock);
2618 }
2619 if (error)
2620 return error;
2621
2622 xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes);
2623 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2624 trace_xfs_log_grant_exit(log, tic);
2625 xlog_verify_grant_tail(log);
2626 return 0;
2627}
2628
2629/*
2630 * Replenish the byte reservation required by moving the grant write head.
2631 *
2632 * Similar to xlog_grant_log_space, the function is structured to have a lock
2633 * free fast path.
2634 */
2635STATIC int
2636xlog_regrant_write_log_space(
2637 struct log *log,
2638 struct xlog_ticket *tic)
2639{
2640 int free_bytes, need_bytes;
2641 int error = 0;
2642
2643 tic->t_curr_res = tic->t_unit_res;
2644 xlog_tic_reset_res(tic);
2645
2646 if (tic->t_cnt > 0)
2647 return 0;
2648
2649 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
2650
2651 trace_xfs_log_regrant_write_enter(log, tic);
2652
2653 /*
2654 * If there are other waiters on the queue then give them a chance at
2655 * logspace before us. Wake up the first waiters, if we do not wake
2656 * up all the waiters then go to sleep waiting for more free space,
2657 * otherwise try to get some space for this transaction.
2658 */
2659 need_bytes = tic->t_unit_res;
2660 free_bytes = xlog_space_left(log, &log->l_grant_write_head);
2661 if (!list_empty_careful(&log->l_writeq)) {
2662 spin_lock(&log->l_grant_write_lock);
2663 if (!xlog_writeq_wake(log, &free_bytes) ||
2664 free_bytes < need_bytes)
2665 error = xlog_writeq_wait(log, tic, need_bytes);
2666 spin_unlock(&log->l_grant_write_lock);
2667 } else if (free_bytes < need_bytes) {
2668 spin_lock(&log->l_grant_write_lock);
2669 error = xlog_writeq_wait(log, tic, need_bytes);
2670 spin_unlock(&log->l_grant_write_lock);
2671 }
2672
2673 if (error)
2674 return error;
2675
2676 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2677 trace_xfs_log_regrant_write_exit(log, tic);
2678 xlog_verify_grant_tail(log);
2679 return 0;
2680}
2681
2682/* The first cnt-1 times through here we don't need to 2604/* The first cnt-1 times through here we don't need to
2683 * move the grant write head because the permanent 2605 * move the grant write head because the permanent
2684 * reservation has reserved cnt times the unit amount. 2606 * reservation has reserved cnt times the unit amount.
@@ -2695,9 +2617,9 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2695 if (ticket->t_cnt > 0) 2617 if (ticket->t_cnt > 0)
2696 ticket->t_cnt--; 2618 ticket->t_cnt--;
2697 2619
2698 xlog_grant_sub_space(log, &log->l_grant_reserve_head, 2620 xlog_grant_sub_space(log, &log->l_reserve_head.grant,
2699 ticket->t_curr_res); 2621 ticket->t_curr_res);
2700 xlog_grant_sub_space(log, &log->l_grant_write_head, 2622 xlog_grant_sub_space(log, &log->l_write_head.grant,
2701 ticket->t_curr_res); 2623 ticket->t_curr_res);
2702 ticket->t_curr_res = ticket->t_unit_res; 2624 ticket->t_curr_res = ticket->t_unit_res;
2703 xlog_tic_reset_res(ticket); 2625 xlog_tic_reset_res(ticket);
@@ -2708,7 +2630,7 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2708 if (ticket->t_cnt > 0) 2630 if (ticket->t_cnt > 0)
2709 return; 2631 return;
2710 2632
2711 xlog_grant_add_space(log, &log->l_grant_reserve_head, 2633 xlog_grant_add_space(log, &log->l_reserve_head.grant,
2712 ticket->t_unit_res); 2634 ticket->t_unit_res);
2713 2635
2714 trace_xfs_log_regrant_reserve_exit(log, ticket); 2636 trace_xfs_log_regrant_reserve_exit(log, ticket);
@@ -2754,14 +2676,13 @@ xlog_ungrant_log_space(xlog_t *log,
2754 bytes += ticket->t_unit_res*ticket->t_cnt; 2676 bytes += ticket->t_unit_res*ticket->t_cnt;
2755 } 2677 }
2756 2678
2757 xlog_grant_sub_space(log, &log->l_grant_reserve_head, bytes); 2679 xlog_grant_sub_space(log, &log->l_reserve_head.grant, bytes);
2758 xlog_grant_sub_space(log, &log->l_grant_write_head, bytes); 2680 xlog_grant_sub_space(log, &log->l_write_head.grant, bytes);
2759 2681
2760 trace_xfs_log_ungrant_exit(log, ticket); 2682 trace_xfs_log_ungrant_exit(log, ticket);
2761 2683
2762 xfs_log_move_tail(log->l_mp, 1); 2684 xfs_log_space_wake(log->l_mp);
2763} /* xlog_ungrant_log_space */ 2685}
2764
2765 2686
2766/* 2687/*
2767 * Flush iclog to disk if this is the last reference to the given iclog and 2688 * Flush iclog to disk if this is the last reference to the given iclog and
@@ -3219,7 +3140,7 @@ xlog_ticket_alloc(
3219 int unit_bytes, 3140 int unit_bytes,
3220 int cnt, 3141 int cnt,
3221 char client, 3142 char client,
3222 uint xflags, 3143 bool permanent,
3223 int alloc_flags) 3144 int alloc_flags)
3224{ 3145{
3225 struct xlog_ticket *tic; 3146 struct xlog_ticket *tic;
@@ -3313,6 +3234,7 @@ xlog_ticket_alloc(
3313 } 3234 }
3314 3235
3315 atomic_set(&tic->t_ref, 1); 3236 atomic_set(&tic->t_ref, 1);
3237 tic->t_task = current;
3316 INIT_LIST_HEAD(&tic->t_queue); 3238 INIT_LIST_HEAD(&tic->t_queue);
3317 tic->t_unit_res = unit_bytes; 3239 tic->t_unit_res = unit_bytes;
3318 tic->t_curr_res = unit_bytes; 3240 tic->t_curr_res = unit_bytes;
@@ -3322,9 +3244,8 @@ xlog_ticket_alloc(
3322 tic->t_clientid = client; 3244 tic->t_clientid = client;
3323 tic->t_flags = XLOG_TIC_INITED; 3245 tic->t_flags = XLOG_TIC_INITED;
3324 tic->t_trans_type = 0; 3246 tic->t_trans_type = 0;
3325 if (xflags & XFS_LOG_PERM_RESERV) 3247 if (permanent)
3326 tic->t_flags |= XLOG_TIC_PERM_RESERV; 3248 tic->t_flags |= XLOG_TIC_PERM_RESERV;
3327 init_waitqueue_head(&tic->t_wait);
3328 3249
3329 xlog_tic_reset_res(tic); 3250 xlog_tic_reset_res(tic);
3330 3251
@@ -3380,7 +3301,7 @@ xlog_verify_grant_tail(
3380 int tail_cycle, tail_blocks; 3301 int tail_cycle, tail_blocks;
3381 int cycle, space; 3302 int cycle, space;
3382 3303
3383 xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); 3304 xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &space);
3384 xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); 3305 xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks);
3385 if (tail_cycle != cycle) { 3306 if (tail_cycle != cycle) {
3386 if (cycle - 1 != tail_cycle && 3307 if (cycle - 1 != tail_cycle &&
@@ -3582,7 +3503,6 @@ xfs_log_force_umount(
3582 struct xfs_mount *mp, 3503 struct xfs_mount *mp,
3583 int logerror) 3504 int logerror)
3584{ 3505{
3585 xlog_ticket_t *tic;
3586 xlog_t *log; 3506 xlog_t *log;
3587 int retval; 3507 int retval;
3588 3508
@@ -3650,15 +3570,8 @@ xfs_log_force_umount(
3650 * we don't enqueue anything once the SHUTDOWN flag is set, and this 3570 * we don't enqueue anything once the SHUTDOWN flag is set, and this
3651 * action is protected by the grant locks. 3571 * action is protected by the grant locks.
3652 */ 3572 */
3653 spin_lock(&log->l_grant_reserve_lock); 3573 xlog_grant_head_wake_all(&log->l_reserve_head);
3654 list_for_each_entry(tic, &log->l_reserveq, t_queue) 3574 xlog_grant_head_wake_all(&log->l_write_head);
3655 wake_up(&tic->t_wait);
3656 spin_unlock(&log->l_grant_reserve_lock);
3657
3658 spin_lock(&log->l_grant_write_lock);
3659 list_for_each_entry(tic, &log->l_writeq, t_queue)
3660 wake_up(&tic->t_wait);
3661 spin_unlock(&log->l_grant_write_lock);
3662 3575
3663 if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { 3576 if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
3664 ASSERT(!logerror); 3577 ASSERT(!logerror);
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 2aee3b22d29..2c622bedb30 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -53,15 +53,6 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
53#define XFS_LOG_REL_PERM_RESERV 0x1 53#define XFS_LOG_REL_PERM_RESERV 0x1
54 54
55/* 55/*
56 * Flags to xfs_log_reserve()
57 *
58 * XFS_LOG_PERM_RESERV: Permanent reservation. When writes are
59 * performed against this type of reservation, the reservation
60 * is not decreased. Long running transactions should use this.
61 */
62#define XFS_LOG_PERM_RESERV 0x2
63
64/*
65 * Flags to xfs_log_force() 56 * Flags to xfs_log_force()
66 * 57 *
67 * XFS_LOG_SYNC: Synchronous force in-core log to disk 58 * XFS_LOG_SYNC: Synchronous force in-core log to disk
@@ -160,8 +151,8 @@ int xfs_log_mount(struct xfs_mount *mp,
160 xfs_daddr_t start_block, 151 xfs_daddr_t start_block,
161 int num_bblocks); 152 int num_bblocks);
162int xfs_log_mount_finish(struct xfs_mount *mp); 153int xfs_log_mount_finish(struct xfs_mount *mp);
163void xfs_log_move_tail(struct xfs_mount *mp, 154xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
164 xfs_lsn_t tail_lsn); 155void xfs_log_space_wake(struct xfs_mount *mp);
165int xfs_log_notify(struct xfs_mount *mp, 156int xfs_log_notify(struct xfs_mount *mp,
166 struct xlog_in_core *iclog, 157 struct xlog_in_core *iclog,
167 xfs_log_callback_t *callback_entry); 158 xfs_log_callback_t *callback_entry);
@@ -172,8 +163,9 @@ int xfs_log_reserve(struct xfs_mount *mp,
172 int count, 163 int count,
173 struct xlog_ticket **ticket, 164 struct xlog_ticket **ticket,
174 __uint8_t clientid, 165 __uint8_t clientid,
175 uint flags, 166 bool permanent,
176 uint t_type); 167 uint t_type);
168int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
177int xfs_log_unmount_write(struct xfs_mount *mp); 169int xfs_log_unmount_write(struct xfs_mount *mp);
178void xfs_log_unmount(struct xfs_mount *mp); 170void xfs_log_unmount(struct xfs_mount *mp);
179int xfs_log_force_umount(struct xfs_mount *mp, int logerror); 171int xfs_log_force_umount(struct xfs_mount *mp, int logerror);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 2d3b6a498d6..2152900b79d 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -239,8 +239,8 @@ typedef struct xlog_res {
239} xlog_res_t; 239} xlog_res_t;
240 240
241typedef struct xlog_ticket { 241typedef struct xlog_ticket {
242 wait_queue_head_t t_wait; /* ticket wait queue */
243 struct list_head t_queue; /* reserve/write queue */ 242 struct list_head t_queue; /* reserve/write queue */
243 struct task_struct *t_task; /* task that owns this ticket */
244 xlog_tid_t t_tid; /* transaction identifier : 4 */ 244 xlog_tid_t t_tid; /* transaction identifier : 4 */
245 atomic_t t_ref; /* ticket reference count : 4 */ 245 atomic_t t_ref; /* ticket reference count : 4 */
246 int t_curr_res; /* current reservation in bytes : 4 */ 246 int t_curr_res; /* current reservation in bytes : 4 */
@@ -470,6 +470,16 @@ struct xfs_cil {
470#define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4)) 470#define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4))
471 471
472/* 472/*
473 * ticket grant locks, queues and accounting have their own cachlines
474 * as these are quite hot and can be operated on concurrently.
475 */
476struct xlog_grant_head {
477 spinlock_t lock ____cacheline_aligned_in_smp;
478 struct list_head waiters;
479 atomic64_t grant;
480};
481
482/*
473 * The reservation head lsn is not made up of a cycle number and block number. 483 * The reservation head lsn is not made up of a cycle number and block number.
474 * Instead, it uses a cycle number and byte number. Logs don't expect to 484 * Instead, it uses a cycle number and byte number. Logs don't expect to
475 * overflow 31 bits worth of byte offset, so using a byte number will mean 485 * overflow 31 bits worth of byte offset, so using a byte number will mean
@@ -520,17 +530,8 @@ typedef struct log {
520 /* lsn of 1st LR with unflushed * buffers */ 530 /* lsn of 1st LR with unflushed * buffers */
521 atomic64_t l_tail_lsn ____cacheline_aligned_in_smp; 531 atomic64_t l_tail_lsn ____cacheline_aligned_in_smp;
522 532
523 /* 533 struct xlog_grant_head l_reserve_head;
524 * ticket grant locks, queues and accounting have their own cachlines 534 struct xlog_grant_head l_write_head;
525 * as these are quite hot and can be operated on concurrently.
526 */
527 spinlock_t l_grant_reserve_lock ____cacheline_aligned_in_smp;
528 struct list_head l_reserveq;
529 atomic64_t l_grant_reserve_head;
530
531 spinlock_t l_grant_write_lock ____cacheline_aligned_in_smp;
532 struct list_head l_writeq;
533 atomic64_t l_grant_write_head;
534 535
535 /* The following field are used for debugging; need to hold icloglock */ 536 /* The following field are used for debugging; need to hold icloglock */
536#ifdef DEBUG 537#ifdef DEBUG
@@ -545,14 +546,13 @@ typedef struct log {
545#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) 546#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR)
546 547
547/* common routines */ 548/* common routines */
548extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
549extern int xlog_recover(xlog_t *log); 549extern int xlog_recover(xlog_t *log);
550extern int xlog_recover_finish(xlog_t *log); 550extern int xlog_recover_finish(xlog_t *log);
551extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); 551extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
552 552
553extern kmem_zone_t *xfs_log_ticket_zone; 553extern kmem_zone_t *xfs_log_ticket_zone;
554struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes, 554struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes,
555 int count, char client, uint xflags, 555 int count, char client, bool permanent,
556 int alloc_flags); 556 int alloc_flags);
557 557
558 558
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 0ed9ee77937..8ecad5bad66 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -965,9 +965,9 @@ xlog_find_tail(
965 log->l_curr_cycle++; 965 log->l_curr_cycle++;
966 atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); 966 atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
967 atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); 967 atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
968 xlog_assign_grant_head(&log->l_grant_reserve_head, log->l_curr_cycle, 968 xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
969 BBTOB(log->l_curr_block)); 969 BBTOB(log->l_curr_block));
970 xlog_assign_grant_head(&log->l_grant_write_head, log->l_curr_cycle, 970 xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
971 BBTOB(log->l_curr_block)); 971 BBTOB(log->l_curr_block));
972 972
973 /* 973 /*
@@ -3161,37 +3161,26 @@ xlog_recover_process_iunlinks(
3161 */ 3161 */
3162 continue; 3162 continue;
3163 } 3163 }
3164 /*
3165 * Unlock the buffer so that it can be acquired in the normal
3166 * course of the transaction to truncate and free each inode.
3167 * Because we are not racing with anyone else here for the AGI
3168 * buffer, we don't even need to hold it locked to read the
3169 * initial unlinked bucket entries out of the buffer. We keep
3170 * buffer reference though, so that it stays pinned in memory
3171 * while we need the buffer.
3172 */
3164 agi = XFS_BUF_TO_AGI(agibp); 3173 agi = XFS_BUF_TO_AGI(agibp);
3174 xfs_buf_unlock(agibp);
3165 3175
3166 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { 3176 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
3167 agino = be32_to_cpu(agi->agi_unlinked[bucket]); 3177 agino = be32_to_cpu(agi->agi_unlinked[bucket]);
3168 while (agino != NULLAGINO) { 3178 while (agino != NULLAGINO) {
3169 /*
3170 * Release the agi buffer so that it can
3171 * be acquired in the normal course of the
3172 * transaction to truncate and free the inode.
3173 */
3174 xfs_buf_relse(agibp);
3175
3176 agino = xlog_recover_process_one_iunlink(mp, 3179 agino = xlog_recover_process_one_iunlink(mp,
3177 agno, agino, bucket); 3180 agno, agino, bucket);
3178
3179 /*
3180 * Reacquire the agibuffer and continue around
3181 * the loop. This should never fail as we know
3182 * the buffer was good earlier on.
3183 */
3184 error = xfs_read_agi(mp, NULL, agno, &agibp);
3185 ASSERT(error == 0);
3186 agi = XFS_BUF_TO_AGI(agibp);
3187 } 3181 }
3188 } 3182 }
3189 3183 xfs_buf_rele(agibp);
3190 /*
3191 * Release the buffer for the current agi so we can
3192 * go on to the next one.
3193 */
3194 xfs_buf_relse(agibp);
3195 } 3184 }
3196 3185
3197 mp->m_dmevmask = mp_dmevmask; 3186 mp->m_dmevmask = mp_dmevmask;
@@ -3695,7 +3684,7 @@ xlog_do_recover(
3695 3684
3696 /* Convert superblock from on-disk format */ 3685 /* Convert superblock from on-disk format */
3697 sbp = &log->l_mp->m_sb; 3686 sbp = &log->l_mp->m_sb;
3698 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); 3687 xfs_sb_from_disk(log->l_mp, XFS_BUF_TO_SBP(bp));
3699 ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); 3688 ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
3700 ASSERT(xfs_sb_good_version(sbp)); 3689 ASSERT(xfs_sb_good_version(sbp));
3701 xfs_buf_relse(bp); 3690 xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d06afbc3540..1ffead4b229 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -158,7 +158,7 @@ xfs_uuid_mount(
158 158
159 out_duplicate: 159 out_duplicate:
160 mutex_unlock(&xfs_uuid_table_mutex); 160 mutex_unlock(&xfs_uuid_table_mutex);
161 xfs_warn(mp, "Filesystem has duplicate UUID - can't mount"); 161 xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid);
162 return XFS_ERROR(EINVAL); 162 return XFS_ERROR(EINVAL);
163} 163}
164 164
@@ -553,9 +553,11 @@ out_unwind:
553 553
554void 554void
555xfs_sb_from_disk( 555xfs_sb_from_disk(
556 xfs_sb_t *to, 556 struct xfs_mount *mp,
557 xfs_dsb_t *from) 557 xfs_dsb_t *from)
558{ 558{
559 struct xfs_sb *to = &mp->m_sb;
560
559 to->sb_magicnum = be32_to_cpu(from->sb_magicnum); 561 to->sb_magicnum = be32_to_cpu(from->sb_magicnum);
560 to->sb_blocksize = be32_to_cpu(from->sb_blocksize); 562 to->sb_blocksize = be32_to_cpu(from->sb_blocksize);
561 to->sb_dblocks = be64_to_cpu(from->sb_dblocks); 563 to->sb_dblocks = be64_to_cpu(from->sb_dblocks);
@@ -693,7 +695,7 @@ reread:
693 * Initialize the mount structure from the superblock. 695 * Initialize the mount structure from the superblock.
694 * But first do some basic consistency checking. 696 * But first do some basic consistency checking.
695 */ 697 */
696 xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); 698 xfs_sb_from_disk(mp, XFS_BUF_TO_SBP(bp));
697 error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); 699 error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags);
698 if (error) { 700 if (error) {
699 if (loud) 701 if (loud)
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 19f69e23250..9eba7388782 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -211,6 +211,9 @@ typedef struct xfs_mount {
211 struct shrinker m_inode_shrink; /* inode reclaim shrinker */ 211 struct shrinker m_inode_shrink; /* inode reclaim shrinker */
212 int64_t m_low_space[XFS_LOWSP_MAX]; 212 int64_t m_low_space[XFS_LOWSP_MAX];
213 /* low free space thresholds */ 213 /* low free space thresholds */
214
215 struct workqueue_struct *m_data_workqueue;
216 struct workqueue_struct *m_unwritten_workqueue;
214} xfs_mount_t; 217} xfs_mount_t;
215 218
216/* 219/*
@@ -395,7 +398,7 @@ extern void xfs_set_low_space_thresholds(struct xfs_mount *);
395extern void xfs_mod_sb(struct xfs_trans *, __int64_t); 398extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
396extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t, 399extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t,
397 xfs_agnumber_t *); 400 xfs_agnumber_t *);
398extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); 401extern void xfs_sb_from_disk(struct xfs_mount *, struct xfs_dsb *);
399extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); 402extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
400 403
401#endif /* __XFS_MOUNT_H__ */ 404#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index c436def733b..55c6afedc87 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -48,194 +48,189 @@
48 * quota functionality, including maintaining the freelist and hash 48 * quota functionality, including maintaining the freelist and hash
49 * tables of dquots. 49 * tables of dquots.
50 */ 50 */
51struct mutex xfs_Gqm_lock;
52struct xfs_qm *xfs_Gqm;
53
54kmem_zone_t *qm_dqzone;
55kmem_zone_t *qm_dqtrxzone;
56
57STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int);
58STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
59
60STATIC int xfs_qm_init_quotainos(xfs_mount_t *); 51STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
61STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); 52STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
62STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); 53STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *);
63 54
64static struct shrinker xfs_qm_shaker = {
65 .shrink = xfs_qm_shake,
66 .seeks = DEFAULT_SEEKS,
67};
68
69/* 55/*
70 * Initialize the XQM structure. 56 * We use the batch lookup interface to iterate over the dquots as it
71 * Note that there is not one quota manager per file system. 57 * currently is the only interface into the radix tree code that allows
58 * fuzzy lookups instead of exact matches. Holding the lock over multiple
59 * operations is fine as all callers are used either during mount/umount
60 * or quotaoff.
72 */ 61 */
73STATIC struct xfs_qm * 62#define XFS_DQ_LOOKUP_BATCH 32
74xfs_Gqm_init(void) 63
64STATIC int
65xfs_qm_dquot_walk(
66 struct xfs_mount *mp,
67 int type,
68 int (*execute)(struct xfs_dquot *dqp))
75{ 69{
76 xfs_dqhash_t *udqhash, *gdqhash; 70 struct xfs_quotainfo *qi = mp->m_quotainfo;
77 xfs_qm_t *xqm; 71 struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type);
78 size_t hsize; 72 uint32_t next_index;
79 uint i; 73 int last_error = 0;
74 int skipped;
75 int nr_found;
76
77restart:
78 skipped = 0;
79 next_index = 0;
80 nr_found = 0;
81
82 while (1) {
83 struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH];
84 int error = 0;
85 int i;
86
87 mutex_lock(&qi->qi_tree_lock);
88 nr_found = radix_tree_gang_lookup(tree, (void **)batch,
89 next_index, XFS_DQ_LOOKUP_BATCH);
90 if (!nr_found) {
91 mutex_unlock(&qi->qi_tree_lock);
92 break;
93 }
80 94
81 /* 95 for (i = 0; i < nr_found; i++) {
82 * Initialize the dquot hash tables. 96 struct xfs_dquot *dqp = batch[i];
83 */
84 udqhash = kmem_zalloc_greedy(&hsize,
85 XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
86 XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
87 if (!udqhash)
88 goto out;
89 97
90 gdqhash = kmem_zalloc_large(hsize); 98 next_index = be32_to_cpu(dqp->q_core.d_id) + 1;
91 if (!gdqhash)
92 goto out_free_udqhash;
93 99
94 hsize /= sizeof(xfs_dqhash_t); 100 error = execute(batch[i]);
101 if (error == EAGAIN) {
102 skipped++;
103 continue;
104 }
105 if (error && last_error != EFSCORRUPTED)
106 last_error = error;
107 }
95 108
96 xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP); 109 mutex_unlock(&qi->qi_tree_lock);
97 xqm->qm_dqhashmask = hsize - 1;
98 xqm->qm_usr_dqhtable = udqhash;
99 xqm->qm_grp_dqhtable = gdqhash;
100 ASSERT(xqm->qm_usr_dqhtable != NULL);
101 ASSERT(xqm->qm_grp_dqhtable != NULL);
102 110
103 for (i = 0; i < hsize; i++) { 111 /* bail out if the filesystem is corrupted. */
104 xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i); 112 if (last_error == EFSCORRUPTED) {
105 xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i); 113 skipped = 0;
114 break;
115 }
106 } 116 }
107 117
108 /* 118 if (skipped) {
109 * Freelist of all dquots of all file systems 119 delay(1);
110 */ 120 goto restart;
111 INIT_LIST_HEAD(&xqm->qm_dqfrlist); 121 }
112 xqm->qm_dqfrlist_cnt = 0;
113 mutex_init(&xqm->qm_dqfrlist_lock);
114
115 /*
116 * dquot zone. we register our own low-memory callback.
117 */
118 if (!qm_dqzone) {
119 xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
120 "xfs_dquots");
121 qm_dqzone = xqm->qm_dqzone;
122 } else
123 xqm->qm_dqzone = qm_dqzone;
124
125 register_shrinker(&xfs_qm_shaker);
126
127 /*
128 * The t_dqinfo portion of transactions.
129 */
130 if (!qm_dqtrxzone) {
131 xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
132 "xfs_dqtrx");
133 qm_dqtrxzone = xqm->qm_dqtrxzone;
134 } else
135 xqm->qm_dqtrxzone = qm_dqtrxzone;
136
137 atomic_set(&xqm->qm_totaldquots, 0);
138 xqm->qm_nrefs = 0;
139 return xqm;
140 122
141 out_free_udqhash: 123 return last_error;
142 kmem_free_large(udqhash);
143 out:
144 return NULL;
145} 124}
146 125
126
147/* 127/*
148 * Destroy the global quota manager when its reference count goes to zero. 128 * Purge a dquot from all tracking data structures and free it.
149 */ 129 */
150STATIC void 130STATIC int
151xfs_qm_destroy( 131xfs_qm_dqpurge(
152 struct xfs_qm *xqm) 132 struct xfs_dquot *dqp)
153{ 133{
154 int hsize, i; 134 struct xfs_mount *mp = dqp->q_mount;
135 struct xfs_quotainfo *qi = mp->m_quotainfo;
136 struct xfs_dquot *gdqp = NULL;
155 137
156 ASSERT(xqm != NULL); 138 xfs_dqlock(dqp);
157 ASSERT(xqm->qm_nrefs == 0); 139 if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) {
140 xfs_dqunlock(dqp);
141 return EAGAIN;
142 }
158 143
159 unregister_shrinker(&xfs_qm_shaker); 144 /*
145 * If this quota has a group hint attached, prepare for releasing it
146 * now.
147 */
148 gdqp = dqp->q_gdquot;
149 if (gdqp) {
150 xfs_dqlock(gdqp);
151 dqp->q_gdquot = NULL;
152 }
160 153
161 mutex_lock(&xqm->qm_dqfrlist_lock); 154 dqp->dq_flags |= XFS_DQ_FREEING;
162 ASSERT(list_empty(&xqm->qm_dqfrlist));
163 mutex_unlock(&xqm->qm_dqfrlist_lock);
164 155
165 hsize = xqm->qm_dqhashmask + 1; 156 /*
166 for (i = 0; i < hsize; i++) { 157 * If we're turning off quotas, we have to make sure that, for
167 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); 158 * example, we don't delete quota disk blocks while dquots are
168 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i])); 159 * in the process of getting written to those disk blocks.
160 * This dquot might well be on AIL, and we can't leave it there
161 * if we're turning off quotas. Basically, we need this flush
162 * lock, and are willing to block on it.
163 */
164 if (!xfs_dqflock_nowait(dqp)) {
165 /*
166 * Block on the flush lock after nudging dquot buffer,
167 * if it is incore.
168 */
169 xfs_dqflock_pushbuf_wait(dqp);
169 } 170 }
170 kmem_free_large(xqm->qm_usr_dqhtable);
171 kmem_free_large(xqm->qm_grp_dqhtable);
172 xqm->qm_usr_dqhtable = NULL;
173 xqm->qm_grp_dqhtable = NULL;
174 xqm->qm_dqhashmask = 0;
175 171
176 kmem_free(xqm);
177}
178
179/*
180 * Called at mount time to let XQM know that another file system is
181 * starting quotas. This isn't crucial information as the individual mount
182 * structures are pretty independent, but it helps the XQM keep a
183 * global view of what's going on.
184 */
185/* ARGSUSED */
186STATIC int
187xfs_qm_hold_quotafs_ref(
188 struct xfs_mount *mp)
189{
190 /* 172 /*
191 * Need to lock the xfs_Gqm structure for things like this. For example, 173 * If we are turning this type of quotas off, we don't care
192 * the structure could disappear between the entry to this routine and 174 * about the dirty metadata sitting in this dquot. OTOH, if
193 * a HOLD operation if not locked. 175 * we're unmounting, we do care, so we flush it and wait.
194 */ 176 */
195 mutex_lock(&xfs_Gqm_lock); 177 if (XFS_DQ_IS_DIRTY(dqp)) {
178 int error;
196 179
197 if (!xfs_Gqm) { 180 /*
198 xfs_Gqm = xfs_Gqm_init(); 181 * We don't care about getting disk errors here. We need
199 if (!xfs_Gqm) { 182 * to purge this dquot anyway, so we go ahead regardless.
200 mutex_unlock(&xfs_Gqm_lock); 183 */
201 return ENOMEM; 184 error = xfs_qm_dqflush(dqp, SYNC_WAIT);
202 } 185 if (error)
186 xfs_warn(mp, "%s: dquot %p flush failed",
187 __func__, dqp);
188 xfs_dqflock(dqp);
203 } 189 }
204 190
191 ASSERT(atomic_read(&dqp->q_pincount) == 0);
192 ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
193 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
194
195 xfs_dqfunlock(dqp);
196 xfs_dqunlock(dqp);
197
198 radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags),
199 be32_to_cpu(dqp->q_core.d_id));
200 qi->qi_dquots--;
201
205 /* 202 /*
206 * We can keep a list of all filesystems with quotas mounted for 203 * We move dquots to the freelist as soon as their reference count
207 * debugging and statistical purposes, but ... 204 * hits zero, so it really should be on the freelist here.
208 * Just take a reference and get out.
209 */ 205 */
210 xfs_Gqm->qm_nrefs++; 206 mutex_lock(&qi->qi_lru_lock);
211 mutex_unlock(&xfs_Gqm_lock); 207 ASSERT(!list_empty(&dqp->q_lru));
208 list_del_init(&dqp->q_lru);
209 qi->qi_lru_count--;
210 XFS_STATS_DEC(xs_qm_dquot_unused);
211 mutex_unlock(&qi->qi_lru_lock);
212 212
213 xfs_qm_dqdestroy(dqp);
214
215 if (gdqp)
216 xfs_qm_dqput(gdqp);
213 return 0; 217 return 0;
214} 218}
215 219
216
217/* 220/*
218 * Release the reference that a filesystem took at mount time, 221 * Purge the dquot cache.
219 * so that we know when we need to destroy the entire quota manager.
220 */ 222 */
221/* ARGSUSED */ 223void
222STATIC void 224xfs_qm_dqpurge_all(
223xfs_qm_rele_quotafs_ref( 225 struct xfs_mount *mp,
224 struct xfs_mount *mp) 226 uint flags)
225{ 227{
226 ASSERT(xfs_Gqm); 228 if (flags & XFS_QMOPT_UQUOTA)
227 ASSERT(xfs_Gqm->qm_nrefs > 0); 229 xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge);
228 230 if (flags & XFS_QMOPT_GQUOTA)
229 /* 231 xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge);
230 * Destroy the entire XQM. If somebody mounts with quotaon, this'll 232 if (flags & XFS_QMOPT_PQUOTA)
231 * be restarted. 233 xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge);
232 */
233 mutex_lock(&xfs_Gqm_lock);
234 if (--xfs_Gqm->qm_nrefs == 0) {
235 xfs_qm_destroy(xfs_Gqm);
236 xfs_Gqm = NULL;
237 }
238 mutex_unlock(&xfs_Gqm_lock);
239} 234}
240 235
241/* 236/*
@@ -376,175 +371,6 @@ xfs_qm_unmount_quotas(
376 } 371 }
377} 372}
378 373
379/*
380 * Flush all dquots of the given file system to disk. The dquots are
381 * _not_ purged from memory here, just their data written to disk.
382 */
383STATIC int
384xfs_qm_dqflush_all(
385 struct xfs_mount *mp)
386{
387 struct xfs_quotainfo *q = mp->m_quotainfo;
388 int recl;
389 struct xfs_dquot *dqp;
390 int error;
391
392 if (!q)
393 return 0;
394again:
395 mutex_lock(&q->qi_dqlist_lock);
396 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
397 xfs_dqlock(dqp);
398 if ((dqp->dq_flags & XFS_DQ_FREEING) ||
399 !XFS_DQ_IS_DIRTY(dqp)) {
400 xfs_dqunlock(dqp);
401 continue;
402 }
403
404 /* XXX a sentinel would be better */
405 recl = q->qi_dqreclaims;
406 if (!xfs_dqflock_nowait(dqp)) {
407 /*
408 * If we can't grab the flush lock then check
409 * to see if the dquot has been flushed delayed
410 * write. If so, grab its buffer and send it
411 * out immediately. We'll be able to acquire
412 * the flush lock when the I/O completes.
413 */
414 xfs_dqflock_pushbuf_wait(dqp);
415 }
416 /*
417 * Let go of the mplist lock. We don't want to hold it
418 * across a disk write.
419 */
420 mutex_unlock(&q->qi_dqlist_lock);
421 error = xfs_qm_dqflush(dqp, 0);
422 xfs_dqunlock(dqp);
423 if (error)
424 return error;
425
426 mutex_lock(&q->qi_dqlist_lock);
427 if (recl != q->qi_dqreclaims) {
428 mutex_unlock(&q->qi_dqlist_lock);
429 /* XXX restart limit */
430 goto again;
431 }
432 }
433
434 mutex_unlock(&q->qi_dqlist_lock);
435 /* return ! busy */
436 return 0;
437}
438
439/*
440 * Release the group dquot pointers the user dquots may be
441 * carrying around as a hint. mplist is locked on entry and exit.
442 */
443STATIC void
444xfs_qm_detach_gdquots(
445 struct xfs_mount *mp)
446{
447 struct xfs_quotainfo *q = mp->m_quotainfo;
448 struct xfs_dquot *dqp, *gdqp;
449
450 again:
451 ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
452 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
453 xfs_dqlock(dqp);
454 if (dqp->dq_flags & XFS_DQ_FREEING) {
455 xfs_dqunlock(dqp);
456 mutex_unlock(&q->qi_dqlist_lock);
457 delay(1);
458 mutex_lock(&q->qi_dqlist_lock);
459 goto again;
460 }
461
462 gdqp = dqp->q_gdquot;
463 if (gdqp)
464 dqp->q_gdquot = NULL;
465 xfs_dqunlock(dqp);
466
467 if (gdqp)
468 xfs_qm_dqrele(gdqp);
469 }
470}
471
472/*
473 * Go through all the incore dquots of this file system and take them
474 * off the mplist and hashlist, if the dquot type matches the dqtype
475 * parameter. This is used when turning off quota accounting for
476 * users and/or groups, as well as when the filesystem is unmounting.
477 */
478STATIC int
479xfs_qm_dqpurge_int(
480 struct xfs_mount *mp,
481 uint flags)
482{
483 struct xfs_quotainfo *q = mp->m_quotainfo;
484 struct xfs_dquot *dqp, *n;
485 uint dqtype;
486 int nmisses = 0;
487 LIST_HEAD (dispose_list);
488
489 if (!q)
490 return 0;
491
492 dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
493 dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
494 dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
495
496 mutex_lock(&q->qi_dqlist_lock);
497
498 /*
499 * In the first pass through all incore dquots of this filesystem,
500 * we release the group dquot pointers the user dquots may be
501 * carrying around as a hint. We need to do this irrespective of
502 * what's being turned off.
503 */
504 xfs_qm_detach_gdquots(mp);
505
506 /*
507 * Try to get rid of all of the unwanted dquots.
508 */
509 list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
510 xfs_dqlock(dqp);
511 if ((dqp->dq_flags & dqtype) != 0 &&
512 !(dqp->dq_flags & XFS_DQ_FREEING)) {
513 if (dqp->q_nrefs == 0) {
514 dqp->dq_flags |= XFS_DQ_FREEING;
515 list_move_tail(&dqp->q_mplist, &dispose_list);
516 } else
517 nmisses++;
518 }
519 xfs_dqunlock(dqp);
520 }
521 mutex_unlock(&q->qi_dqlist_lock);
522
523 list_for_each_entry_safe(dqp, n, &dispose_list, q_mplist)
524 xfs_qm_dqpurge(dqp);
525
526 return nmisses;
527}
528
529int
530xfs_qm_dqpurge_all(
531 xfs_mount_t *mp,
532 uint flags)
533{
534 int ndquots;
535
536 /*
537 * Purge the dquot cache.
538 * None of the dquots should really be busy at this point.
539 */
540 if (mp->m_quotainfo) {
541 while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
542 delay(ndquots * 10);
543 }
544 }
545 return 0;
546}
547
548STATIC int 374STATIC int
549xfs_qm_dqattach_one( 375xfs_qm_dqattach_one(
550 xfs_inode_t *ip, 376 xfs_inode_t *ip,
@@ -783,14 +609,6 @@ xfs_qm_dqdetach(
783} 609}
784 610
785/* 611/*
786 * The hash chains and the mplist use the same xfs_dqhash structure as
787 * their list head, but we can take the mplist qh_lock and one of the
788 * hash qh_locks at the same time without any problem as they aren't
789 * related.
790 */
791static struct lock_class_key xfs_quota_mplist_class;
792
793/*
794 * This initializes all the quota information that's kept in the 612 * This initializes all the quota information that's kept in the
795 * mount structure 613 * mount structure
796 */ 614 */
@@ -804,13 +622,6 @@ xfs_qm_init_quotainfo(
804 622
805 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 623 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
806 624
807 /*
808 * Tell XQM that we exist as soon as possible.
809 */
810 if ((error = xfs_qm_hold_quotafs_ref(mp))) {
811 return error;
812 }
813
814 qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); 625 qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
815 626
816 /* 627 /*
@@ -823,11 +634,13 @@ xfs_qm_init_quotainfo(
823 return error; 634 return error;
824 } 635 }
825 636
826 INIT_LIST_HEAD(&qinf->qi_dqlist); 637 INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS);
827 mutex_init(&qinf->qi_dqlist_lock); 638 INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS);
828 lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class); 639 mutex_init(&qinf->qi_tree_lock);
829 640
830 qinf->qi_dqreclaims = 0; 641 INIT_LIST_HEAD(&qinf->qi_lru_list);
642 qinf->qi_lru_count = 0;
643 mutex_init(&qinf->qi_lru_lock);
831 644
832 /* mutex used to serialize quotaoffs */ 645 /* mutex used to serialize quotaoffs */
833 mutex_init(&qinf->qi_quotaofflock); 646 mutex_init(&qinf->qi_quotaofflock);
@@ -894,6 +707,9 @@ xfs_qm_init_quotainfo(
894 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; 707 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
895 } 708 }
896 709
710 qinf->qi_shrinker.shrink = xfs_qm_shake;
711 qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
712 register_shrinker(&qinf->qi_shrinker);
897 return 0; 713 return 0;
898} 714}
899 715
@@ -911,17 +727,8 @@ xfs_qm_destroy_quotainfo(
911 727
912 qi = mp->m_quotainfo; 728 qi = mp->m_quotainfo;
913 ASSERT(qi != NULL); 729 ASSERT(qi != NULL);
914 ASSERT(xfs_Gqm != NULL);
915
916 /*
917 * Release the reference that XQM kept, so that we know
918 * when the XQM structure should be freed. We cannot assume
919 * that xfs_Gqm is non-null after this point.
920 */
921 xfs_qm_rele_quotafs_ref(mp);
922 730
923 ASSERT(list_empty(&qi->qi_dqlist)); 731 unregister_shrinker(&qi->qi_shrinker);
924 mutex_destroy(&qi->qi_dqlist_lock);
925 732
926 if (qi->qi_uquotaip) { 733 if (qi->qi_uquotaip) {
927 IRELE(qi->qi_uquotaip); 734 IRELE(qi->qi_uquotaip);
@@ -936,30 +743,6 @@ xfs_qm_destroy_quotainfo(
936 mp->m_quotainfo = NULL; 743 mp->m_quotainfo = NULL;
937} 744}
938 745
939
940
941/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
942
943/* ARGSUSED */
944STATIC void
945xfs_qm_list_init(
946 xfs_dqlist_t *list,
947 char *str,
948 int n)
949{
950 mutex_init(&list->qh_lock);
951 INIT_LIST_HEAD(&list->qh_list);
952 list->qh_version = 0;
953 list->qh_nelems = 0;
954}
955
956STATIC void
957xfs_qm_list_destroy(
958 xfs_dqlist_t *list)
959{
960 mutex_destroy(&(list->qh_lock));
961}
962
963/* 746/*
964 * Create an inode and return with a reference already taken, but unlocked 747 * Create an inode and return with a reference already taken, but unlocked
965 * This is how we create quota inodes 748 * This is how we create quota inodes
@@ -1397,6 +1180,28 @@ error0:
1397 return error; 1180 return error;
1398} 1181}
1399 1182
1183STATIC int
1184xfs_qm_flush_one(
1185 struct xfs_dquot *dqp)
1186{
1187 int error = 0;
1188
1189 xfs_dqlock(dqp);
1190 if (dqp->dq_flags & XFS_DQ_FREEING)
1191 goto out_unlock;
1192 if (!XFS_DQ_IS_DIRTY(dqp))
1193 goto out_unlock;
1194
1195 if (!xfs_dqflock_nowait(dqp))
1196 xfs_dqflock_pushbuf_wait(dqp);
1197
1198 error = xfs_qm_dqflush(dqp, 0);
1199
1200out_unlock:
1201 xfs_dqunlock(dqp);
1202 return error;
1203}
1204
1400/* 1205/*
1401 * Walk thru all the filesystem inodes and construct a consistent view 1206 * Walk thru all the filesystem inodes and construct a consistent view
1402 * of the disk quota world. If the quotacheck fails, disable quotas. 1207 * of the disk quota world. If the quotacheck fails, disable quotas.
@@ -1405,7 +1210,7 @@ int
1405xfs_qm_quotacheck( 1210xfs_qm_quotacheck(
1406 xfs_mount_t *mp) 1211 xfs_mount_t *mp)
1407{ 1212{
1408 int done, count, error; 1213 int done, count, error, error2;
1409 xfs_ino_t lastino; 1214 xfs_ino_t lastino;
1410 size_t structsz; 1215 size_t structsz;
1411 xfs_inode_t *uip, *gip; 1216 xfs_inode_t *uip, *gip;
@@ -1419,12 +1224,6 @@ xfs_qm_quotacheck(
1419 ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip); 1224 ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
1420 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 1225 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1421 1226
1422 /*
1423 * There should be no cached dquots. The (simplistic) quotacheck
1424 * algorithm doesn't like that.
1425 */
1426 ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
1427
1428 xfs_notice(mp, "Quotacheck needed: Please wait."); 1227 xfs_notice(mp, "Quotacheck needed: Please wait.");
1429 1228
1430 /* 1229 /*
@@ -1463,12 +1262,21 @@ xfs_qm_quotacheck(
1463 } while (!done); 1262 } while (!done);
1464 1263
1465 /* 1264 /*
1466 * We've made all the changes that we need to make incore. 1265 * We've made all the changes that we need to make incore. Flush them
1467 * Flush them down to disk buffers if everything was updated 1266 * down to disk buffers if everything was updated successfully.
1468 * successfully.
1469 */ 1267 */
1470 if (!error) 1268 if (XFS_IS_UQUOTA_ON(mp))
1471 error = xfs_qm_dqflush_all(mp); 1269 error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one);
1270 if (XFS_IS_GQUOTA_ON(mp)) {
1271 error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one);
1272 if (!error)
1273 error = error2;
1274 }
1275 if (XFS_IS_PQUOTA_ON(mp)) {
1276 error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one);
1277 if (!error)
1278 error = error2;
1279 }
1472 1280
1473 /* 1281 /*
1474 * We can get this error if we couldn't do a dquot allocation inside 1282 * We can get this error if we couldn't do a dquot allocation inside
@@ -1496,7 +1304,7 @@ xfs_qm_quotacheck(
1496 * quotachecked status, since we won't be doing accounting for 1304 * quotachecked status, since we won't be doing accounting for
1497 * that type anymore. 1305 * that type anymore.
1498 */ 1306 */
1499 mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD); 1307 mp->m_qflags &= ~XFS_ALL_QUOTA_CHKD;
1500 mp->m_qflags |= flags; 1308 mp->m_qflags |= flags;
1501 1309
1502 error_return: 1310 error_return:
@@ -1508,7 +1316,6 @@ xfs_qm_quotacheck(
1508 * We must turn off quotas. 1316 * We must turn off quotas.
1509 */ 1317 */
1510 ASSERT(mp->m_quotainfo != NULL); 1318 ASSERT(mp->m_quotainfo != NULL);
1511 ASSERT(xfs_Gqm != NULL);
1512 xfs_qm_destroy_quotainfo(mp); 1319 xfs_qm_destroy_quotainfo(mp);
1513 if (xfs_mount_reset_sbqflags(mp)) { 1320 if (xfs_mount_reset_sbqflags(mp)) {
1514 xfs_warn(mp, 1321 xfs_warn(mp,
@@ -1604,16 +1411,12 @@ xfs_qm_dqfree_one(
1604 struct xfs_mount *mp = dqp->q_mount; 1411 struct xfs_mount *mp = dqp->q_mount;
1605 struct xfs_quotainfo *qi = mp->m_quotainfo; 1412 struct xfs_quotainfo *qi = mp->m_quotainfo;
1606 1413
1607 mutex_lock(&dqp->q_hash->qh_lock); 1414 mutex_lock(&qi->qi_tree_lock);
1608 list_del_init(&dqp->q_hashlist); 1415 radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags),
1609 dqp->q_hash->qh_version++; 1416 be32_to_cpu(dqp->q_core.d_id));
1610 mutex_unlock(&dqp->q_hash->qh_lock);
1611 1417
1612 mutex_lock(&qi->qi_dqlist_lock);
1613 list_del_init(&dqp->q_mplist);
1614 qi->qi_dquots--; 1418 qi->qi_dquots--;
1615 qi->qi_dqreclaims++; 1419 mutex_unlock(&qi->qi_tree_lock);
1616 mutex_unlock(&qi->qi_dqlist_lock);
1617 1420
1618 xfs_qm_dqdestroy(dqp); 1421 xfs_qm_dqdestroy(dqp);
1619} 1422}
@@ -1624,6 +1427,7 @@ xfs_qm_dqreclaim_one(
1624 struct list_head *dispose_list) 1427 struct list_head *dispose_list)
1625{ 1428{
1626 struct xfs_mount *mp = dqp->q_mount; 1429 struct xfs_mount *mp = dqp->q_mount;
1430 struct xfs_quotainfo *qi = mp->m_quotainfo;
1627 int error; 1431 int error;
1628 1432
1629 if (!xfs_dqlock_nowait(dqp)) 1433 if (!xfs_dqlock_nowait(dqp))
@@ -1637,16 +1441,14 @@ xfs_qm_dqreclaim_one(
1637 xfs_dqunlock(dqp); 1441 xfs_dqunlock(dqp);
1638 1442
1639 trace_xfs_dqreclaim_want(dqp); 1443 trace_xfs_dqreclaim_want(dqp);
1640 XQM_STATS_INC(xqmstats.xs_qm_dqwants); 1444 XFS_STATS_INC(xs_qm_dqwants);
1641 1445
1642 list_del_init(&dqp->q_freelist); 1446 list_del_init(&dqp->q_lru);
1643 xfs_Gqm->qm_dqfrlist_cnt--; 1447 qi->qi_lru_count--;
1448 XFS_STATS_DEC(xs_qm_dquot_unused);
1644 return; 1449 return;
1645 } 1450 }
1646 1451
1647 ASSERT(dqp->q_hash);
1648 ASSERT(!list_empty(&dqp->q_mplist));
1649
1650 /* 1452 /*
1651 * Try to grab the flush lock. If this dquot is in the process of 1453 * Try to grab the flush lock. If this dquot is in the process of
1652 * getting flushed to disk, we don't want to reclaim it. 1454 * getting flushed to disk, we don't want to reclaim it.
@@ -1688,11 +1490,12 @@ xfs_qm_dqreclaim_one(
1688 xfs_dqunlock(dqp); 1490 xfs_dqunlock(dqp);
1689 1491
1690 ASSERT(dqp->q_nrefs == 0); 1492 ASSERT(dqp->q_nrefs == 0);
1691 list_move_tail(&dqp->q_freelist, dispose_list); 1493 list_move_tail(&dqp->q_lru, dispose_list);
1692 xfs_Gqm->qm_dqfrlist_cnt--; 1494 qi->qi_lru_count--;
1495 XFS_STATS_DEC(xs_qm_dquot_unused);
1693 1496
1694 trace_xfs_dqreclaim_done(dqp); 1497 trace_xfs_dqreclaim_done(dqp);
1695 XQM_STATS_INC(xqmstats.xs_qm_dqreclaims); 1498 XFS_STATS_INC(xs_qm_dqreclaims);
1696 return; 1499 return;
1697 1500
1698out_busy: 1501out_busy:
@@ -1701,10 +1504,10 @@ out_busy:
1701 /* 1504 /*
1702 * Move the dquot to the tail of the list so that we don't spin on it. 1505 * Move the dquot to the tail of the list so that we don't spin on it.
1703 */ 1506 */
1704 list_move_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); 1507 list_move_tail(&dqp->q_lru, &qi->qi_lru_list);
1705 1508
1706 trace_xfs_dqreclaim_busy(dqp); 1509 trace_xfs_dqreclaim_busy(dqp);
1707 XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses); 1510 XFS_STATS_INC(xs_qm_dqreclaim_misses);
1708} 1511}
1709 1512
1710STATIC int 1513STATIC int
@@ -1712,6 +1515,8 @@ xfs_qm_shake(
1712 struct shrinker *shrink, 1515 struct shrinker *shrink,
1713 struct shrink_control *sc) 1516 struct shrink_control *sc)
1714{ 1517{
1518 struct xfs_quotainfo *qi =
1519 container_of(shrink, struct xfs_quotainfo, qi_shrinker);
1715 int nr_to_scan = sc->nr_to_scan; 1520 int nr_to_scan = sc->nr_to_scan;
1716 LIST_HEAD (dispose_list); 1521 LIST_HEAD (dispose_list);
1717 struct xfs_dquot *dqp; 1522 struct xfs_dquot *dqp;
@@ -1721,24 +1526,23 @@ xfs_qm_shake(
1721 if (!nr_to_scan) 1526 if (!nr_to_scan)
1722 goto out; 1527 goto out;
1723 1528
1724 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); 1529 mutex_lock(&qi->qi_lru_lock);
1725 while (!list_empty(&xfs_Gqm->qm_dqfrlist)) { 1530 while (!list_empty(&qi->qi_lru_list)) {
1726 if (nr_to_scan-- <= 0) 1531 if (nr_to_scan-- <= 0)
1727 break; 1532 break;
1728 dqp = list_first_entry(&xfs_Gqm->qm_dqfrlist, struct xfs_dquot, 1533 dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot,
1729 q_freelist); 1534 q_lru);
1730 xfs_qm_dqreclaim_one(dqp, &dispose_list); 1535 xfs_qm_dqreclaim_one(dqp, &dispose_list);
1731 } 1536 }
1732 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 1537 mutex_unlock(&qi->qi_lru_lock);
1733 1538
1734 while (!list_empty(&dispose_list)) { 1539 while (!list_empty(&dispose_list)) {
1735 dqp = list_first_entry(&dispose_list, struct xfs_dquot, 1540 dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru);
1736 q_freelist); 1541 list_del_init(&dqp->q_lru);
1737 list_del_init(&dqp->q_freelist);
1738 xfs_qm_dqfree_one(dqp); 1542 xfs_qm_dqfree_one(dqp);
1739 } 1543 }
1740out: 1544out:
1741 return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure; 1545 return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure;
1742} 1546}
1743 1547
1744/* 1548/*
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 9a9b997e1a0..44b858b79d7 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -21,21 +21,10 @@
21#include "xfs_dquot_item.h" 21#include "xfs_dquot_item.h"
22#include "xfs_dquot.h" 22#include "xfs_dquot.h"
23#include "xfs_quota_priv.h" 23#include "xfs_quota_priv.h"
24#include "xfs_qm_stats.h"
25 24
26struct xfs_qm;
27struct xfs_inode; 25struct xfs_inode;
28 26
29extern struct mutex xfs_Gqm_lock; 27extern struct kmem_zone *xfs_qm_dqtrxzone;
30extern struct xfs_qm *xfs_Gqm;
31extern kmem_zone_t *qm_dqzone;
32extern kmem_zone_t *qm_dqtrxzone;
33
34/*
35 * Dquot hashtable constants/threshold values.
36 */
37#define XFS_QM_HASHSIZE_LOW (PAGE_SIZE / sizeof(xfs_dqhash_t))
38#define XFS_QM_HASHSIZE_HIGH ((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t))
39 28
40/* 29/*
41 * This defines the unit of allocation of dquots. 30 * This defines the unit of allocation of dquots.
@@ -48,36 +37,20 @@ extern kmem_zone_t *qm_dqtrxzone;
48 */ 37 */
49#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 38#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1
50 39
51typedef xfs_dqhash_t xfs_dqlist_t;
52
53/*
54 * Quota Manager (global) structure. Lives only in core.
55 */
56typedef struct xfs_qm {
57 xfs_dqlist_t *qm_usr_dqhtable;/* udquot hash table */
58 xfs_dqlist_t *qm_grp_dqhtable;/* gdquot hash table */
59 uint qm_dqhashmask; /* # buckets in dq hashtab - 1 */
60 struct list_head qm_dqfrlist; /* freelist of dquots */
61 struct mutex qm_dqfrlist_lock;
62 int qm_dqfrlist_cnt;
63 atomic_t qm_totaldquots; /* total incore dquots */
64 uint qm_nrefs; /* file systems with quota on */
65 kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */
66 kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */
67} xfs_qm_t;
68
69/* 40/*
70 * Various quota information for individual filesystems. 41 * Various quota information for individual filesystems.
71 * The mount structure keeps a pointer to this. 42 * The mount structure keeps a pointer to this.
72 */ 43 */
73typedef struct xfs_quotainfo { 44typedef struct xfs_quotainfo {
45 struct radix_tree_root qi_uquota_tree;
46 struct radix_tree_root qi_gquota_tree;
47 struct mutex qi_tree_lock;
74 xfs_inode_t *qi_uquotaip; /* user quota inode */ 48 xfs_inode_t *qi_uquotaip; /* user quota inode */
75 xfs_inode_t *qi_gquotaip; /* group quota inode */ 49 xfs_inode_t *qi_gquotaip; /* group quota inode */
76 struct list_head qi_dqlist; /* all dquots in filesys */ 50 struct list_head qi_lru_list;
77 struct mutex qi_dqlist_lock; 51 struct mutex qi_lru_lock;
52 int qi_lru_count;
78 int qi_dquots; 53 int qi_dquots;
79 int qi_dqreclaims; /* a change here indicates
80 a removal in the dqlist */
81 time_t qi_btimelimit; /* limit for blks timer */ 54 time_t qi_btimelimit; /* limit for blks timer */
82 time_t qi_itimelimit; /* limit for inodes timer */ 55 time_t qi_itimelimit; /* limit for inodes timer */
83 time_t qi_rtbtimelimit;/* limit for rt blks timer */ 56 time_t qi_rtbtimelimit;/* limit for rt blks timer */
@@ -93,8 +66,14 @@ typedef struct xfs_quotainfo {
93 xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */ 66 xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */
94 xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */ 67 xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */
95 xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */ 68 xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */
69 struct shrinker qi_shrinker;
96} xfs_quotainfo_t; 70} xfs_quotainfo_t;
97 71
72#define XFS_DQUOT_TREE(qi, type) \
73 ((type & XFS_DQ_USER) ? \
74 &((qi)->qi_uquota_tree) : \
75 &((qi)->qi_gquota_tree))
76
98 77
99extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); 78extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
100extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, 79extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
@@ -130,7 +109,7 @@ extern int xfs_qm_quotacheck(xfs_mount_t *);
130extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); 109extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
131 110
132/* dquot stuff */ 111/* dquot stuff */
133extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); 112extern void xfs_qm_dqpurge_all(xfs_mount_t *, uint);
134extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); 113extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
135 114
136/* quota ops */ 115/* quota ops */
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index a0a829addca..e6986b5d80d 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -40,28 +40,28 @@
40STATIC void 40STATIC void
41xfs_fill_statvfs_from_dquot( 41xfs_fill_statvfs_from_dquot(
42 struct kstatfs *statp, 42 struct kstatfs *statp,
43 xfs_disk_dquot_t *dp) 43 struct xfs_dquot *dqp)
44{ 44{
45 __uint64_t limit; 45 __uint64_t limit;
46 46
47 limit = dp->d_blk_softlimit ? 47 limit = dqp->q_core.d_blk_softlimit ?
48 be64_to_cpu(dp->d_blk_softlimit) : 48 be64_to_cpu(dqp->q_core.d_blk_softlimit) :
49 be64_to_cpu(dp->d_blk_hardlimit); 49 be64_to_cpu(dqp->q_core.d_blk_hardlimit);
50 if (limit && statp->f_blocks > limit) { 50 if (limit && statp->f_blocks > limit) {
51 statp->f_blocks = limit; 51 statp->f_blocks = limit;
52 statp->f_bfree = statp->f_bavail = 52 statp->f_bfree = statp->f_bavail =
53 (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ? 53 (statp->f_blocks > dqp->q_res_bcount) ?
54 (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0; 54 (statp->f_blocks - dqp->q_res_bcount) : 0;
55 } 55 }
56 56
57 limit = dp->d_ino_softlimit ? 57 limit = dqp->q_core.d_ino_softlimit ?
58 be64_to_cpu(dp->d_ino_softlimit) : 58 be64_to_cpu(dqp->q_core.d_ino_softlimit) :
59 be64_to_cpu(dp->d_ino_hardlimit); 59 be64_to_cpu(dqp->q_core.d_ino_hardlimit);
60 if (limit && statp->f_files > limit) { 60 if (limit && statp->f_files > limit) {
61 statp->f_files = limit; 61 statp->f_files = limit;
62 statp->f_ffree = 62 statp->f_ffree =
63 (statp->f_files > be64_to_cpu(dp->d_icount)) ? 63 (statp->f_files > dqp->q_res_icount) ?
64 (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0; 64 (statp->f_ffree - dqp->q_res_icount) : 0;
65 } 65 }
66} 66}
67 67
@@ -82,7 +82,7 @@ xfs_qm_statvfs(
82 xfs_dquot_t *dqp; 82 xfs_dquot_t *dqp;
83 83
84 if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) { 84 if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) {
85 xfs_fill_statvfs_from_dquot(statp, &dqp->q_core); 85 xfs_fill_statvfs_from_dquot(statp, dqp);
86 xfs_qm_dqput(dqp); 86 xfs_qm_dqput(dqp);
87 } 87 }
88} 88}
@@ -156,21 +156,3 @@ xfs_qm_newmount(
156 156
157 return 0; 157 return 0;
158} 158}
159
160void __init
161xfs_qm_init(void)
162{
163 printk(KERN_INFO "SGI XFS Quota Management subsystem\n");
164 mutex_init(&xfs_Gqm_lock);
165 xfs_qm_init_procfs();
166}
167
168void __exit
169xfs_qm_exit(void)
170{
171 xfs_qm_cleanup_procfs();
172 if (qm_dqzone)
173 kmem_zone_destroy(qm_dqzone);
174 if (qm_dqtrxzone)
175 kmem_zone_destroy(qm_dqtrxzone);
176}
diff --git a/fs/xfs/xfs_qm_stats.c b/fs/xfs/xfs_qm_stats.c
deleted file mode 100644
index 5729ba57087..00000000000
--- a/fs/xfs/xfs_qm_stats.c
+++ /dev/null
@@ -1,105 +0,0 @@
1/*
2 * Copyright (c) 2000-2003 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_bit.h"
21#include "xfs_log.h"
22#include "xfs_inum.h"
23#include "xfs_trans.h"
24#include "xfs_sb.h"
25#include "xfs_ag.h"
26#include "xfs_alloc.h"
27#include "xfs_quota.h"
28#include "xfs_mount.h"
29#include "xfs_bmap_btree.h"
30#include "xfs_inode.h"
31#include "xfs_itable.h"
32#include "xfs_bmap.h"
33#include "xfs_rtalloc.h"
34#include "xfs_error.h"
35#include "xfs_attr.h"
36#include "xfs_buf_item.h"
37#include "xfs_qm.h"
38
39struct xqmstats xqmstats;
40
41static int xqm_proc_show(struct seq_file *m, void *v)
42{
43 /* maximum; incore; ratio free to inuse; freelist */
44 seq_printf(m, "%d\t%d\t%d\t%u\n",
45 0,
46 xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
47 0,
48 xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
49 return 0;
50}
51
52static int xqm_proc_open(struct inode *inode, struct file *file)
53{
54 return single_open(file, xqm_proc_show, NULL);
55}
56
57static const struct file_operations xqm_proc_fops = {
58 .owner = THIS_MODULE,
59 .open = xqm_proc_open,
60 .read = seq_read,
61 .llseek = seq_lseek,
62 .release = single_release,
63};
64
65static int xqmstat_proc_show(struct seq_file *m, void *v)
66{
67 /* quota performance statistics */
68 seq_printf(m, "qm %u %u %u %u %u %u %u %u\n",
69 xqmstats.xs_qm_dqreclaims,
70 xqmstats.xs_qm_dqreclaim_misses,
71 xqmstats.xs_qm_dquot_dups,
72 xqmstats.xs_qm_dqcachemisses,
73 xqmstats.xs_qm_dqcachehits,
74 xqmstats.xs_qm_dqwants,
75 xqmstats.xs_qm_dqshake_reclaims,
76 xqmstats.xs_qm_dqinact_reclaims);
77 return 0;
78}
79
80static int xqmstat_proc_open(struct inode *inode, struct file *file)
81{
82 return single_open(file, xqmstat_proc_show, NULL);
83}
84
85static const struct file_operations xqmstat_proc_fops = {
86 .owner = THIS_MODULE,
87 .open = xqmstat_proc_open,
88 .read = seq_read,
89 .llseek = seq_lseek,
90 .release = single_release,
91};
92
93void
94xfs_qm_init_procfs(void)
95{
96 proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops);
97 proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops);
98}
99
100void
101xfs_qm_cleanup_procfs(void)
102{
103 remove_proc_entry("fs/xfs/xqm", NULL);
104 remove_proc_entry("fs/xfs/xqmstat", NULL);
105}
diff --git a/fs/xfs/xfs_qm_stats.h b/fs/xfs/xfs_qm_stats.h
deleted file mode 100644
index 5b964fc0dc0..00000000000
--- a/fs/xfs/xfs_qm_stats.h
+++ /dev/null
@@ -1,53 +0,0 @@
1/*
2 * Copyright (c) 2002 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_QM_STATS_H__
19#define __XFS_QM_STATS_H__
20
21#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
22
23/*
24 * XQM global statistics
25 */
26struct xqmstats {
27 __uint32_t xs_qm_dqreclaims;
28 __uint32_t xs_qm_dqreclaim_misses;
29 __uint32_t xs_qm_dquot_dups;
30 __uint32_t xs_qm_dqcachemisses;
31 __uint32_t xs_qm_dqcachehits;
32 __uint32_t xs_qm_dqwants;
33 __uint32_t xs_qm_dqshake_reclaims;
34 __uint32_t xs_qm_dqinact_reclaims;
35};
36
37extern struct xqmstats xqmstats;
38
39# define XQM_STATS_INC(count) ( (count)++ )
40
41extern void xfs_qm_init_procfs(void);
42extern void xfs_qm_cleanup_procfs(void);
43
44#else
45
46# define XQM_STATS_INC(count) do { } while (0)
47
48static inline void xfs_qm_init_procfs(void) { };
49static inline void xfs_qm_cleanup_procfs(void) { };
50
51#endif
52
53#endif /* __XFS_QM_STATS_H__ */
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 711a86e39ff..c4f396e437a 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -47,9 +47,6 @@ STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
47 uint); 47 uint);
48STATIC uint xfs_qm_export_flags(uint); 48STATIC uint xfs_qm_export_flags(uint);
49STATIC uint xfs_qm_export_qtype_flags(uint); 49STATIC uint xfs_qm_export_qtype_flags(uint);
50STATIC void xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *,
51 fs_disk_quota_t *);
52
53 50
54/* 51/*
55 * Turn off quota accounting and/or enforcement for all udquots and/or 52 * Turn off quota accounting and/or enforcement for all udquots and/or
@@ -69,7 +66,6 @@ xfs_qm_scall_quotaoff(
69 int error; 66 int error;
70 uint inactivate_flags; 67 uint inactivate_flags;
71 xfs_qoff_logitem_t *qoffstart; 68 xfs_qoff_logitem_t *qoffstart;
72 int nculprits;
73 69
74 /* 70 /*
75 * No file system can have quotas enabled on disk but not in core. 71 * No file system can have quotas enabled on disk but not in core.
@@ -175,18 +171,13 @@ xfs_qm_scall_quotaoff(
175 * This isn't protected by a particular lock directly, because we 171 * This isn't protected by a particular lock directly, because we
176 * don't want to take a mrlock every time we depend on quotas being on. 172 * don't want to take a mrlock every time we depend on quotas being on.
177 */ 173 */
178 mp->m_qflags &= ~(flags); 174 mp->m_qflags &= ~flags;
179 175
180 /* 176 /*
181 * Go through all the dquots of this file system and purge them, 177 * Go through all the dquots of this file system and purge them,
182 * according to what was turned off. We may not be able to get rid 178 * according to what was turned off.
183 * of all dquots, because dquots can have temporary references that
184 * are not attached to inodes. eg. xfs_setattr, xfs_create.
185 * So, if we couldn't purge all the dquots from the filesystem,
186 * we can't get rid of the incore data structures.
187 */ 179 */
188 while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype))) 180 xfs_qm_dqpurge_all(mp, dqtype);
189 delay(10 * nculprits);
190 181
191 /* 182 /*
192 * Transactions that had started before ACTIVE state bit was cleared 183 * Transactions that had started before ACTIVE state bit was cleared
@@ -635,42 +626,6 @@ xfs_qm_scall_setqlim(
635 return error; 626 return error;
636} 627}
637 628
638int
639xfs_qm_scall_getquota(
640 xfs_mount_t *mp,
641 xfs_dqid_t id,
642 uint type,
643 fs_disk_quota_t *out)
644{
645 xfs_dquot_t *dqp;
646 int error;
647
648 /*
649 * Try to get the dquot. We don't want it allocated on disk, so
650 * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
651 * exist, we'll get ENOENT back.
652 */
653 if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) {
654 return (error);
655 }
656
657 /*
658 * If everything's NULL, this dquot doesn't quite exist as far as
659 * our utility programs are concerned.
660 */
661 if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
662 xfs_qm_dqput(dqp);
663 return XFS_ERROR(ENOENT);
664 }
665 /*
666 * Convert the disk dquot to the exportable format
667 */
668 xfs_qm_export_dquot(mp, &dqp->q_core, out);
669 xfs_qm_dqput(dqp);
670 return (error ? XFS_ERROR(EFAULT) : 0);
671}
672
673
674STATIC int 629STATIC int
675xfs_qm_log_quotaoff_end( 630xfs_qm_log_quotaoff_end(
676 xfs_mount_t *mp, 631 xfs_mount_t *mp,
@@ -759,50 +714,66 @@ error0:
759} 714}
760 715
761 716
762/* 717int
763 * Translate an internal style on-disk-dquot to the exportable format. 718xfs_qm_scall_getquota(
764 * The main differences are that the counters/limits are all in Basic 719 struct xfs_mount *mp,
765 * Blocks (BBs) instead of the internal FSBs, and all on-disk data has 720 xfs_dqid_t id,
766 * to be converted to the native endianness. 721 uint type,
767 */
768STATIC void
769xfs_qm_export_dquot(
770 xfs_mount_t *mp,
771 xfs_disk_dquot_t *src,
772 struct fs_disk_quota *dst) 722 struct fs_disk_quota *dst)
773{ 723{
724 struct xfs_dquot *dqp;
725 int error;
726
727 /*
728 * Try to get the dquot. We don't want it allocated on disk, so
729 * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
730 * exist, we'll get ENOENT back.
731 */
732 error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp);
733 if (error)
734 return error;
735
736 /*
737 * If everything's NULL, this dquot doesn't quite exist as far as
738 * our utility programs are concerned.
739 */
740 if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
741 error = XFS_ERROR(ENOENT);
742 goto out_put;
743 }
744
774 memset(dst, 0, sizeof(*dst)); 745 memset(dst, 0, sizeof(*dst));
775 dst->d_version = FS_DQUOT_VERSION; /* different from src->d_version */ 746 dst->d_version = FS_DQUOT_VERSION;
776 dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags); 747 dst->d_flags = xfs_qm_export_qtype_flags(dqp->q_core.d_flags);
777 dst->d_id = be32_to_cpu(src->d_id); 748 dst->d_id = be32_to_cpu(dqp->q_core.d_id);
778 dst->d_blk_hardlimit = 749 dst->d_blk_hardlimit =
779 XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit)); 750 XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit));
780 dst->d_blk_softlimit = 751 dst->d_blk_softlimit =
781 XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit)); 752 XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_blk_softlimit));
782 dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit); 753 dst->d_ino_hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
783 dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit); 754 dst->d_ino_softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
784 dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount)); 755 dst->d_bcount = XFS_FSB_TO_BB(mp, dqp->q_res_bcount);
785 dst->d_icount = be64_to_cpu(src->d_icount); 756 dst->d_icount = dqp->q_res_icount;
786 dst->d_btimer = be32_to_cpu(src->d_btimer); 757 dst->d_btimer = be32_to_cpu(dqp->q_core.d_btimer);
787 dst->d_itimer = be32_to_cpu(src->d_itimer); 758 dst->d_itimer = be32_to_cpu(dqp->q_core.d_itimer);
788 dst->d_iwarns = be16_to_cpu(src->d_iwarns); 759 dst->d_iwarns = be16_to_cpu(dqp->q_core.d_iwarns);
789 dst->d_bwarns = be16_to_cpu(src->d_bwarns); 760 dst->d_bwarns = be16_to_cpu(dqp->q_core.d_bwarns);
790 dst->d_rtb_hardlimit = 761 dst->d_rtb_hardlimit =
791 XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit)); 762 XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_rtb_hardlimit));
792 dst->d_rtb_softlimit = 763 dst->d_rtb_softlimit =
793 XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit)); 764 XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_rtb_softlimit));
794 dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount)); 765 dst->d_rtbcount = XFS_FSB_TO_BB(mp, dqp->q_res_rtbcount);
795 dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer); 766 dst->d_rtbtimer = be32_to_cpu(dqp->q_core.d_rtbtimer);
796 dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns); 767 dst->d_rtbwarns = be16_to_cpu(dqp->q_core.d_rtbwarns);
797 768
798 /* 769 /*
799 * Internally, we don't reset all the timers when quota enforcement 770 * Internally, we don't reset all the timers when quota enforcement
800 * gets turned off. No need to confuse the user level code, 771 * gets turned off. No need to confuse the user level code,
801 * so return zeroes in that case. 772 * so return zeroes in that case.
802 */ 773 */
803 if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) || 774 if ((!XFS_IS_UQUOTA_ENFORCED(mp) && dqp->q_core.d_flags == XFS_DQ_USER) ||
804 (!XFS_IS_OQUOTA_ENFORCED(mp) && 775 (!XFS_IS_OQUOTA_ENFORCED(mp) &&
805 (src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) { 776 (dqp->q_core.d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) {
806 dst->d_btimer = 0; 777 dst->d_btimer = 0;
807 dst->d_itimer = 0; 778 dst->d_itimer = 0;
808 dst->d_rtbtimer = 0; 779 dst->d_rtbtimer = 0;
@@ -823,6 +794,9 @@ xfs_qm_export_dquot(
823 } 794 }
824 } 795 }
825#endif 796#endif
797out_put:
798 xfs_qm_dqput(dqp);
799 return error;
826} 800}
827 801
828STATIC uint 802STATIC uint
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 8a0807e0f97..b50ec5b95d5 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -174,6 +174,8 @@ typedef struct xfs_qoff_logformat {
174#define XFS_UQUOTA_ACTIVE 0x0100 /* uquotas are being turned off */ 174#define XFS_UQUOTA_ACTIVE 0x0100 /* uquotas are being turned off */
175#define XFS_PQUOTA_ACTIVE 0x0200 /* pquotas are being turned off */ 175#define XFS_PQUOTA_ACTIVE 0x0200 /* pquotas are being turned off */
176#define XFS_GQUOTA_ACTIVE 0x0400 /* gquotas are being turned off */ 176#define XFS_GQUOTA_ACTIVE 0x0400 /* gquotas are being turned off */
177#define XFS_ALL_QUOTA_ACTIVE \
178 (XFS_UQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE)
177 179
178/* 180/*
179 * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees 181 * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees
diff --git a/fs/xfs/xfs_quota_priv.h b/fs/xfs/xfs_quota_priv.h
index 94a3d927d71..6d86219d93d 100644
--- a/fs/xfs/xfs_quota_priv.h
+++ b/fs/xfs/xfs_quota_priv.h
@@ -24,17 +24,6 @@
24 */ 24 */
25#define XFS_DQITER_MAP_SIZE 10 25#define XFS_DQITER_MAP_SIZE 10
26 26
27/*
28 * Hash into a bucket in the dquot hash table, based on <mp, id>.
29 */
30#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
31 (__psunsigned_t)(id)) & \
32 (xfs_Gqm->qm_dqhashmask - 1))
33#define XFS_DQ_HASH(mp, id, type) (type == XFS_DQ_USER ? \
34 (xfs_Gqm->qm_usr_dqhtable + \
35 XFS_DQ_HASHVAL(mp, id)) : \
36 (xfs_Gqm->qm_grp_dqhtable + \
37 XFS_DQ_HASHVAL(mp, id)))
38#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ 27#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
39 !dqp->q_core.d_blk_hardlimit && \ 28 !dqp->q_core.d_blk_hardlimit && \
40 !dqp->q_core.d_blk_softlimit && \ 29 !dqp->q_core.d_blk_softlimit && \
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 866de277079..e44ef7ee8ce 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -118,17 +118,6 @@ xfs_rename(
118 new_parent = (src_dp != target_dp); 118 new_parent = (src_dp != target_dp);
119 src_is_directory = S_ISDIR(src_ip->i_d.di_mode); 119 src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
120 120
121 if (src_is_directory) {
122 /*
123 * Check for link count overflow on target_dp
124 */
125 if (target_ip == NULL && new_parent &&
126 target_dp->i_d.di_nlink >= XFS_MAXLINK) {
127 error = XFS_ERROR(EMLINK);
128 goto std_return;
129 }
130 }
131
132 xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, 121 xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
133 inodes, &num_inodes); 122 inodes, &num_inodes);
134 123
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 87323f1ded6..ca4f31534a0 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -183,6 +183,7 @@ error_cancel:
183 oblocks = map.br_startoff + map.br_blockcount; 183 oblocks = map.br_startoff + map.br_blockcount;
184 } 184 }
185 return 0; 185 return 0;
186
186error: 187error:
187 return error; 188 return error;
188} 189}
@@ -2139,11 +2140,9 @@ xfs_rtfree_extent(
2139 xfs_buf_t *sumbp; /* summary file block buffer */ 2140 xfs_buf_t *sumbp; /* summary file block buffer */
2140 2141
2141 mp = tp->t_mountp; 2142 mp = tp->t_mountp;
2142 /* 2143
2143 * Synchronize by locking the bitmap inode. 2144 ASSERT(mp->m_rbmip->i_itemp != NULL);
2144 */ 2145 ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
2145 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
2146 xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
2147 2146
2148#if defined(__KERNEL__) && defined(DEBUG) 2147#if defined(__KERNEL__) && defined(DEBUG)
2149 /* 2148 /*
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index cb6ae715814..f429d9d5d32 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -529,7 +529,6 @@ static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp)
529#define XFS_BB_TO_FSB(mp,bb) \ 529#define XFS_BB_TO_FSB(mp,bb) \
530 (((bb) + (XFS_FSB_TO_BB(mp,1) - 1)) >> (mp)->m_blkbb_log) 530 (((bb) + (XFS_FSB_TO_BB(mp,1) - 1)) >> (mp)->m_blkbb_log)
531#define XFS_BB_TO_FSBT(mp,bb) ((bb) >> (mp)->m_blkbb_log) 531#define XFS_BB_TO_FSBT(mp,bb) ((bb) >> (mp)->m_blkbb_log)
532#define XFS_BB_FSB_OFFSET(mp,bb) ((bb) & ((mp)->m_bsize - 1))
533 532
534/* 533/*
535 * File system block to byte conversions. 534 * File system block to byte conversions.
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index 76fdc586193..ce372b7d564 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -20,9 +20,18 @@
20 20
21DEFINE_PER_CPU(struct xfsstats, xfsstats); 21DEFINE_PER_CPU(struct xfsstats, xfsstats);
22 22
23static int counter_val(int idx)
24{
25 int val = 0, cpu;
26
27 for_each_possible_cpu(cpu)
28 val += *(((__u32 *)&per_cpu(xfsstats, cpu) + idx));
29 return val;
30}
31
23static int xfs_stat_proc_show(struct seq_file *m, void *v) 32static int xfs_stat_proc_show(struct seq_file *m, void *v)
24{ 33{
25 int c, i, j, val; 34 int i, j;
26 __uint64_t xs_xstrat_bytes = 0; 35 __uint64_t xs_xstrat_bytes = 0;
27 __uint64_t xs_write_bytes = 0; 36 __uint64_t xs_write_bytes = 0;
28 __uint64_t xs_read_bytes = 0; 37 __uint64_t xs_read_bytes = 0;
@@ -50,20 +59,16 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v)
50 { "abtc2", XFSSTAT_END_ABTC_V2 }, 59 { "abtc2", XFSSTAT_END_ABTC_V2 },
51 { "bmbt2", XFSSTAT_END_BMBT_V2 }, 60 { "bmbt2", XFSSTAT_END_BMBT_V2 },
52 { "ibt2", XFSSTAT_END_IBT_V2 }, 61 { "ibt2", XFSSTAT_END_IBT_V2 },
62 /* we print both series of quota information together */
63 { "qm", XFSSTAT_END_QM },
53 }; 64 };
54 65
55 /* Loop over all stats groups */ 66 /* Loop over all stats groups */
56 for (i=j = 0; i < ARRAY_SIZE(xstats); i++) { 67 for (i = j = 0; i < ARRAY_SIZE(xstats); i++) {
57 seq_printf(m, "%s", xstats[i].desc); 68 seq_printf(m, "%s", xstats[i].desc);
58 /* inner loop does each group */ 69 /* inner loop does each group */
59 while (j < xstats[i].endpoint) { 70 for (; j < xstats[i].endpoint; j++)
60 val = 0; 71 seq_printf(m, " %u", counter_val(j));
61 /* sum over all cpus */
62 for_each_possible_cpu(c)
63 val += *(((__u32*)&per_cpu(xfsstats, c) + j));
64 seq_printf(m, " %u", val);
65 j++;
66 }
67 seq_putc(m, '\n'); 72 seq_putc(m, '\n');
68 } 73 }
69 /* extra precision counters */ 74 /* extra precision counters */
@@ -97,6 +102,58 @@ static const struct file_operations xfs_stat_proc_fops = {
97 .release = single_release, 102 .release = single_release,
98}; 103};
99 104
105/* legacy quota interfaces */
106#ifdef CONFIG_XFS_QUOTA
107static int xqm_proc_show(struct seq_file *m, void *v)
108{
109 /* maximum; incore; ratio free to inuse; freelist */
110 seq_printf(m, "%d\t%d\t%d\t%u\n",
111 0,
112 counter_val(XFSSTAT_END_XQMSTAT),
113 0,
114 counter_val(XFSSTAT_END_XQMSTAT + 1));
115 return 0;
116}
117
118static int xqm_proc_open(struct inode *inode, struct file *file)
119{
120 return single_open(file, xqm_proc_show, NULL);
121}
122
123static const struct file_operations xqm_proc_fops = {
124 .owner = THIS_MODULE,
125 .open = xqm_proc_open,
126 .read = seq_read,
127 .llseek = seq_lseek,
128 .release = single_release,
129};
130
131/* legacy quota stats interface no 2 */
132static int xqmstat_proc_show(struct seq_file *m, void *v)
133{
134 int j;
135
136 seq_printf(m, "qm");
137 for (j = XFSSTAT_END_IBT_V2; j < XFSSTAT_END_XQMSTAT; j++)
138 seq_printf(m, " %u", counter_val(j));
139 seq_putc(m, '\n');
140 return 0;
141}
142
143static int xqmstat_proc_open(struct inode *inode, struct file *file)
144{
145 return single_open(file, xqmstat_proc_show, NULL);
146}
147
148static const struct file_operations xqmstat_proc_fops = {
149 .owner = THIS_MODULE,
150 .open = xqmstat_proc_open,
151 .read = seq_read,
152 .llseek = seq_lseek,
153 .release = single_release,
154};
155#endif /* CONFIG_XFS_QUOTA */
156
100int 157int
101xfs_init_procfs(void) 158xfs_init_procfs(void)
102{ 159{
@@ -105,10 +162,24 @@ xfs_init_procfs(void)
105 162
106 if (!proc_create("fs/xfs/stat", 0, NULL, 163 if (!proc_create("fs/xfs/stat", 0, NULL,
107 &xfs_stat_proc_fops)) 164 &xfs_stat_proc_fops))
108 goto out_remove_entry; 165 goto out_remove_xfs_dir;
166#ifdef CONFIG_XFS_QUOTA
167 if (!proc_create("fs/xfs/xqmstat", 0, NULL,
168 &xqmstat_proc_fops))
169 goto out_remove_stat_file;
170 if (!proc_create("fs/xfs/xqm", 0, NULL,
171 &xqm_proc_fops))
172 goto out_remove_xqmstat_file;
173#endif
109 return 0; 174 return 0;
110 175
111 out_remove_entry: 176#ifdef CONFIG_XFS_QUOTA
177 out_remove_xqmstat_file:
178 remove_proc_entry("fs/xfs/xqmstat", NULL);
179 out_remove_stat_file:
180 remove_proc_entry("fs/xfs/stat", NULL);
181#endif
182 out_remove_xfs_dir:
112 remove_proc_entry("fs/xfs", NULL); 183 remove_proc_entry("fs/xfs", NULL);
113 out: 184 out:
114 return -ENOMEM; 185 return -ENOMEM;
@@ -117,6 +188,10 @@ xfs_init_procfs(void)
117void 188void
118xfs_cleanup_procfs(void) 189xfs_cleanup_procfs(void)
119{ 190{
191#ifdef CONFIG_XFS_QUOTA
192 remove_proc_entry("fs/xfs/xqm", NULL);
193 remove_proc_entry("fs/xfs/xqmstat", NULL);
194#endif
120 remove_proc_entry("fs/xfs/stat", NULL); 195 remove_proc_entry("fs/xfs/stat", NULL);
121 remove_proc_entry("fs/xfs", NULL); 196 remove_proc_entry("fs/xfs", NULL);
122} 197}
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index 736854b1ca1..c03ad38ceae 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -183,6 +183,16 @@ struct xfsstats {
183 __uint32_t xs_ibt_2_alloc; 183 __uint32_t xs_ibt_2_alloc;
184 __uint32_t xs_ibt_2_free; 184 __uint32_t xs_ibt_2_free;
185 __uint32_t xs_ibt_2_moves; 185 __uint32_t xs_ibt_2_moves;
186#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_IBT_V2+6)
187 __uint32_t xs_qm_dqreclaims;
188 __uint32_t xs_qm_dqreclaim_misses;
189 __uint32_t xs_qm_dquot_dups;
190 __uint32_t xs_qm_dqcachemisses;
191 __uint32_t xs_qm_dqcachehits;
192 __uint32_t xs_qm_dqwants;
193#define XFSSTAT_END_QM (XFSSTAT_END_XQMSTAT+2)
194 __uint32_t xs_qm_dquot;
195 __uint32_t xs_qm_dquot_unused;
186/* Extra precision counters */ 196/* Extra precision counters */
187 __uint64_t xs_xstrat_bytes; 197 __uint64_t xs_xstrat_bytes;
188 __uint64_t xs_write_bytes; 198 __uint64_t xs_write_bytes;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index ee5b695c99a..dab9a5f6dfd 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -324,10 +324,9 @@ xfs_parseargs(
324 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { 324 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
325 mp->m_flags |= XFS_MOUNT_FILESTREAMS; 325 mp->m_flags |= XFS_MOUNT_FILESTREAMS;
326 } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { 326 } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
327 mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | 327 mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
328 XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | 328 mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
329 XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | 329 mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
330 XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD);
331 } else if (!strcmp(this_char, MNTOPT_QUOTA) || 330 } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
332 !strcmp(this_char, MNTOPT_UQUOTA) || 331 !strcmp(this_char, MNTOPT_UQUOTA) ||
333 !strcmp(this_char, MNTOPT_USRQUOTA)) { 332 !strcmp(this_char, MNTOPT_USRQUOTA)) {
@@ -760,6 +759,36 @@ xfs_setup_devices(
760 return 0; 759 return 0;
761} 760}
762 761
762STATIC int
763xfs_init_mount_workqueues(
764 struct xfs_mount *mp)
765{
766 mp->m_data_workqueue = alloc_workqueue("xfs-data/%s",
767 WQ_MEM_RECLAIM, 0, mp->m_fsname);
768 if (!mp->m_data_workqueue)
769 goto out;
770
771 mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
772 WQ_MEM_RECLAIM, 0, mp->m_fsname);
773 if (!mp->m_unwritten_workqueue)
774 goto out_destroy_data_iodone_queue;
775
776 return 0;
777
778out_destroy_data_iodone_queue:
779 destroy_workqueue(mp->m_data_workqueue);
780out:
781 return -ENOMEM;
782}
783
784STATIC void
785xfs_destroy_mount_workqueues(
786 struct xfs_mount *mp)
787{
788 destroy_workqueue(mp->m_data_workqueue);
789 destroy_workqueue(mp->m_unwritten_workqueue);
790}
791
763/* Catch misguided souls that try to use this interface on XFS */ 792/* Catch misguided souls that try to use this interface on XFS */
764STATIC struct inode * 793STATIC struct inode *
765xfs_fs_alloc_inode( 794xfs_fs_alloc_inode(
@@ -834,91 +863,58 @@ xfs_fs_inode_init_once(
834} 863}
835 864
836/* 865/*
837 * Dirty the XFS inode when mark_inode_dirty_sync() is called so that 866 * This is called by the VFS when dirtying inode metadata. This can happen
838 * we catch unlogged VFS level updates to the inode. 867 * for a few reasons, but we only care about timestamp updates, given that
868 * we handled the rest ourselves. In theory no other calls should happen,
869 * but for example generic_write_end() keeps dirtying the inode after
870 * updating i_size. Thus we check that the flags are exactly I_DIRTY_SYNC,
871 * and skip this call otherwise.
839 * 872 *
840 * We need the barrier() to maintain correct ordering between unlogged 873 * We'll hopefull get a different method just for updating timestamps soon,
841 * updates and the transaction commit code that clears the i_update_core 874 * at which point this hack can go away, and maybe we'll also get real
842 * field. This requires all updates to be completed before marking the 875 * error handling here.
843 * inode dirty.
844 */ 876 */
845STATIC void 877STATIC void
846xfs_fs_dirty_inode( 878xfs_fs_dirty_inode(
847 struct inode *inode,
848 int flags)
849{
850 barrier();
851 XFS_I(inode)->i_update_core = 1;
852}
853
854STATIC int
855xfs_fs_write_inode(
856 struct inode *inode, 879 struct inode *inode,
857 struct writeback_control *wbc) 880 int flags)
858{ 881{
859 struct xfs_inode *ip = XFS_I(inode); 882 struct xfs_inode *ip = XFS_I(inode);
860 struct xfs_mount *mp = ip->i_mount; 883 struct xfs_mount *mp = ip->i_mount;
861 int error = EAGAIN; 884 struct xfs_trans *tp;
862 885 int error;
863 trace_xfs_write_inode(ip);
864
865 if (XFS_FORCED_SHUTDOWN(mp))
866 return -XFS_ERROR(EIO);
867
868 if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) {
869 /*
870 * Make sure the inode has made it it into the log. Instead
871 * of forcing it all the way to stable storage using a
872 * synchronous transaction we let the log force inside the
873 * ->sync_fs call do that for thus, which reduces the number
874 * of synchronous log forces dramatically.
875 */
876 error = xfs_log_dirty_inode(ip, NULL, 0);
877 if (error)
878 goto out;
879 return 0;
880 } else {
881 if (!ip->i_update_core)
882 return 0;
883 886
884 /* 887 if (flags != I_DIRTY_SYNC)
885 * We make this non-blocking if the inode is contended, return 888 return;
886 * EAGAIN to indicate to the caller that they did not succeed.
887 * This prevents the flush path from blocking on inodes inside
888 * another operation right now, they get caught later by
889 * xfs_sync.
890 */
891 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
892 goto out;
893 889
894 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) 890 trace_xfs_dirty_inode(ip);
895 goto out_unlock;
896 891
897 /* 892 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
898 * Now we have the flush lock and the inode is not pinned, we 893 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
899 * can check if the inode is really clean as we know that 894 if (error) {
900 * there are no pending transaction completions, it is not 895 xfs_trans_cancel(tp, 0);
901 * waiting on the delayed write queue and there is no IO in 896 goto trouble;
902 * progress.
903 */
904 if (xfs_inode_clean(ip)) {
905 xfs_ifunlock(ip);
906 error = 0;
907 goto out_unlock;
908 }
909 error = xfs_iflush(ip, SYNC_TRYLOCK);
910 } 897 }
911 898 xfs_ilock(ip, XFS_ILOCK_EXCL);
912 out_unlock:
913 xfs_iunlock(ip, XFS_ILOCK_SHARED);
914 out:
915 /* 899 /*
916 * if we failed to write out the inode then mark 900 * Grab all the latest timestamps from the Linux inode.
917 * it dirty again so we'll try again later.
918 */ 901 */
902 ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
903 ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
904 ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
905 ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
906 ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
907 ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
908
909 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
910 xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
911 error = xfs_trans_commit(tp, 0);
919 if (error) 912 if (error)
920 xfs_mark_inode_dirty_sync(ip); 913 goto trouble;
921 return -error; 914 return;
915
916trouble:
917 xfs_warn(mp, "failed to update timestamps for inode 0x%llx", ip->i_ino);
922} 918}
923 919
924STATIC void 920STATIC void
@@ -954,6 +950,22 @@ xfs_fs_evict_inode(
954 xfs_inactive(ip); 950 xfs_inactive(ip);
955} 951}
956 952
953/*
954 * We do an unlocked check for XFS_IDONTCACHE here because we are already
955 * serialised against cache hits here via the inode->i_lock and igrab() in
956 * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be
957 * racing with us, and it avoids needing to grab a spinlock here for every inode
958 * we drop the final reference on.
959 */
960STATIC int
961xfs_fs_drop_inode(
962 struct inode *inode)
963{
964 struct xfs_inode *ip = XFS_I(inode);
965
966 return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE);
967}
968
957STATIC void 969STATIC void
958xfs_free_fsname( 970xfs_free_fsname(
959 struct xfs_mount *mp) 971 struct xfs_mount *mp)
@@ -983,6 +995,7 @@ xfs_fs_put_super(
983 xfs_unmountfs(mp); 995 xfs_unmountfs(mp);
984 xfs_freesb(mp); 996 xfs_freesb(mp);
985 xfs_icsb_destroy_counters(mp); 997 xfs_icsb_destroy_counters(mp);
998 xfs_destroy_mount_workqueues(mp);
986 xfs_close_devices(mp); 999 xfs_close_devices(mp);
987 xfs_free_fsname(mp); 1000 xfs_free_fsname(mp);
988 kfree(mp); 1001 kfree(mp);
@@ -1309,10 +1322,14 @@ xfs_fs_fill_super(
1309 if (error) 1322 if (error)
1310 goto out_free_fsname; 1323 goto out_free_fsname;
1311 1324
1312 error = xfs_icsb_init_counters(mp); 1325 error = xfs_init_mount_workqueues(mp);
1313 if (error) 1326 if (error)
1314 goto out_close_devices; 1327 goto out_close_devices;
1315 1328
1329 error = xfs_icsb_init_counters(mp);
1330 if (error)
1331 goto out_destroy_workqueues;
1332
1316 error = xfs_readsb(mp, flags); 1333 error = xfs_readsb(mp, flags);
1317 if (error) 1334 if (error)
1318 goto out_destroy_counters; 1335 goto out_destroy_counters;
@@ -1341,6 +1358,7 @@ xfs_fs_fill_super(
1341 sb->s_blocksize = mp->m_sb.sb_blocksize; 1358 sb->s_blocksize = mp->m_sb.sb_blocksize;
1342 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; 1359 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
1343 sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); 1360 sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
1361 sb->s_max_links = XFS_MAXLINK;
1344 sb->s_time_gran = 1; 1362 sb->s_time_gran = 1;
1345 set_posix_acl_flag(sb); 1363 set_posix_acl_flag(sb);
1346 1364
@@ -1361,10 +1379,10 @@ xfs_fs_fill_super(
1361 error = EINVAL; 1379 error = EINVAL;
1362 goto out_syncd_stop; 1380 goto out_syncd_stop;
1363 } 1381 }
1364 sb->s_root = d_alloc_root(root); 1382 sb->s_root = d_make_root(root);
1365 if (!sb->s_root) { 1383 if (!sb->s_root) {
1366 error = ENOMEM; 1384 error = ENOMEM;
1367 goto out_iput; 1385 goto out_syncd_stop;
1368 } 1386 }
1369 1387
1370 return 0; 1388 return 0;
@@ -1375,6 +1393,8 @@ xfs_fs_fill_super(
1375 xfs_freesb(mp); 1393 xfs_freesb(mp);
1376 out_destroy_counters: 1394 out_destroy_counters:
1377 xfs_icsb_destroy_counters(mp); 1395 xfs_icsb_destroy_counters(mp);
1396out_destroy_workqueues:
1397 xfs_destroy_mount_workqueues(mp);
1378 out_close_devices: 1398 out_close_devices:
1379 xfs_close_devices(mp); 1399 xfs_close_devices(mp);
1380 out_free_fsname: 1400 out_free_fsname:
@@ -1383,8 +1403,6 @@ xfs_fs_fill_super(
1383 out: 1403 out:
1384 return -error; 1404 return -error;
1385 1405
1386 out_iput:
1387 iput(root);
1388 out_syncd_stop: 1406 out_syncd_stop:
1389 xfs_syncd_stop(mp); 1407 xfs_syncd_stop(mp);
1390 out_unmount: 1408 out_unmount:
@@ -1430,8 +1448,8 @@ static const struct super_operations xfs_super_operations = {
1430 .alloc_inode = xfs_fs_alloc_inode, 1448 .alloc_inode = xfs_fs_alloc_inode,
1431 .destroy_inode = xfs_fs_destroy_inode, 1449 .destroy_inode = xfs_fs_destroy_inode,
1432 .dirty_inode = xfs_fs_dirty_inode, 1450 .dirty_inode = xfs_fs_dirty_inode,
1433 .write_inode = xfs_fs_write_inode,
1434 .evict_inode = xfs_fs_evict_inode, 1451 .evict_inode = xfs_fs_evict_inode,
1452 .drop_inode = xfs_fs_drop_inode,
1435 .put_super = xfs_fs_put_super, 1453 .put_super = xfs_fs_put_super,
1436 .sync_fs = xfs_fs_sync_fs, 1454 .sync_fs = xfs_fs_sync_fs,
1437 .freeze_fs = xfs_fs_freeze, 1455 .freeze_fs = xfs_fs_freeze,
@@ -1605,12 +1623,28 @@ xfs_init_workqueues(void)
1605 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_NON_REENTRANT, 0); 1623 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_NON_REENTRANT, 0);
1606 if (!xfs_syncd_wq) 1624 if (!xfs_syncd_wq)
1607 return -ENOMEM; 1625 return -ENOMEM;
1626
1627 /*
1628 * The allocation workqueue can be used in memory reclaim situations
1629 * (writepage path), and parallelism is only limited by the number of
1630 * AGs in all the filesystems mounted. Hence use the default large
1631 * max_active value for this workqueue.
1632 */
1633 xfs_alloc_wq = alloc_workqueue("xfsalloc", WQ_MEM_RECLAIM, 0);
1634 if (!xfs_alloc_wq)
1635 goto out_destroy_syncd;
1636
1608 return 0; 1637 return 0;
1638
1639out_destroy_syncd:
1640 destroy_workqueue(xfs_syncd_wq);
1641 return -ENOMEM;
1609} 1642}
1610 1643
1611STATIC void 1644STATIC void
1612xfs_destroy_workqueues(void) 1645xfs_destroy_workqueues(void)
1613{ 1646{
1647 destroy_workqueue(xfs_alloc_wq);
1614 destroy_workqueue(xfs_syncd_wq); 1648 destroy_workqueue(xfs_syncd_wq);
1615} 1649}
1616 1650
@@ -1652,13 +1686,17 @@ init_xfs_fs(void)
1652 if (error) 1686 if (error)
1653 goto out_cleanup_procfs; 1687 goto out_cleanup_procfs;
1654 1688
1655 vfs_initquota(); 1689 error = xfs_qm_init();
1690 if (error)
1691 goto out_sysctl_unregister;
1656 1692
1657 error = register_filesystem(&xfs_fs_type); 1693 error = register_filesystem(&xfs_fs_type);
1658 if (error) 1694 if (error)
1659 goto out_sysctl_unregister; 1695 goto out_qm_exit;
1660 return 0; 1696 return 0;
1661 1697
1698 out_qm_exit:
1699 xfs_qm_exit();
1662 out_sysctl_unregister: 1700 out_sysctl_unregister:
1663 xfs_sysctl_unregister(); 1701 xfs_sysctl_unregister();
1664 out_cleanup_procfs: 1702 out_cleanup_procfs:
@@ -1680,7 +1718,7 @@ init_xfs_fs(void)
1680STATIC void __exit 1718STATIC void __exit
1681exit_xfs_fs(void) 1719exit_xfs_fs(void)
1682{ 1720{
1683 vfs_exitquota(); 1721 xfs_qm_exit();
1684 unregister_filesystem(&xfs_fs_type); 1722 unregister_filesystem(&xfs_fs_type);
1685 xfs_sysctl_unregister(); 1723 xfs_sysctl_unregister();
1686 xfs_cleanup_procfs(); 1724 xfs_cleanup_procfs();
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 50a3266c999..09b0c26b224 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -21,13 +21,11 @@
21#include <linux/exportfs.h> 21#include <linux/exportfs.h>
22 22
23#ifdef CONFIG_XFS_QUOTA 23#ifdef CONFIG_XFS_QUOTA
24extern void xfs_qm_init(void); 24extern int xfs_qm_init(void);
25extern void xfs_qm_exit(void); 25extern void xfs_qm_exit(void);
26# define vfs_initquota() xfs_qm_init()
27# define vfs_exitquota() xfs_qm_exit()
28#else 26#else
29# define vfs_initquota() do { } while (0) 27# define xfs_qm_init() (0)
30# define vfs_exitquota() do { } while (0) 28# define xfs_qm_exit() do { } while (0)
31#endif 29#endif
32 30
33#ifdef CONFIG_XFS_POSIX_ACL 31#ifdef CONFIG_XFS_POSIX_ACL
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 40b75eecd2b..205ebcb34d9 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -336,32 +336,6 @@ xfs_sync_fsdata(
336 return error; 336 return error;
337} 337}
338 338
339int
340xfs_log_dirty_inode(
341 struct xfs_inode *ip,
342 struct xfs_perag *pag,
343 int flags)
344{
345 struct xfs_mount *mp = ip->i_mount;
346 struct xfs_trans *tp;
347 int error;
348
349 if (!ip->i_update_core)
350 return 0;
351
352 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
353 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
354 if (error) {
355 xfs_trans_cancel(tp, 0);
356 return error;
357 }
358
359 xfs_ilock(ip, XFS_ILOCK_EXCL);
360 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
361 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
362 return xfs_trans_commit(tp, 0);
363}
364
365/* 339/*
366 * When remounting a filesystem read-only or freezing the filesystem, we have 340 * When remounting a filesystem read-only or freezing the filesystem, we have
367 * two phases to execute. This first phase is syncing the data before we 341 * two phases to execute. This first phase is syncing the data before we
@@ -385,16 +359,6 @@ xfs_quiesce_data(
385{ 359{
386 int error, error2 = 0; 360 int error, error2 = 0;
387 361
388 /*
389 * Log all pending size and timestamp updates. The vfs writeback
390 * code is supposed to do this, but due to its overagressive
391 * livelock detection it will skip inodes where appending writes
392 * were written out in the first non-blocking sync phase if their
393 * completion took long enough that it happened after taking the
394 * timestamp for the cut-off in the blocking phase.
395 */
396 xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0);
397
398 /* force out the log */ 362 /* force out the log */
399 xfs_log_force(mp, XFS_LOG_SYNC); 363 xfs_log_force(mp, XFS_LOG_SYNC);
400 364
@@ -913,17 +877,15 @@ reclaim:
913 * can reference the inodes in the cache without taking references. 877 * can reference the inodes in the cache without taking references.
914 * 878 *
915 * We make that OK here by ensuring that we wait until the inode is 879 * We make that OK here by ensuring that we wait until the inode is
916 * unlocked after the lookup before we go ahead and free it. We get 880 * unlocked after the lookup before we go ahead and free it.
917 * both the ilock and the iolock because the code may need to drop the
918 * ilock one but will still hold the iolock.
919 */ 881 */
920 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 882 xfs_ilock(ip, XFS_ILOCK_EXCL);
921 xfs_qm_dqdetach(ip); 883 xfs_qm_dqdetach(ip);
922 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 884 xfs_iunlock(ip, XFS_ILOCK_EXCL);
923 885
924 xfs_inode_free(ip); 886 xfs_inode_free(ip);
925 return error;
926 887
888 return error;
927} 889}
928 890
929/* 891/*
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h
index fa965479d78..941202e7ac6 100644
--- a/fs/xfs/xfs_sync.h
+++ b/fs/xfs/xfs_sync.h
@@ -34,8 +34,6 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
34 34
35void xfs_flush_inodes(struct xfs_inode *ip); 35void xfs_flush_inodes(struct xfs_inode *ip);
36 36
37int xfs_log_dirty_inode(struct xfs_inode *ip, struct xfs_perag *pag, int flags);
38
39int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); 37int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
40int xfs_reclaim_inodes_count(struct xfs_mount *mp); 38int xfs_reclaim_inodes_count(struct xfs_mount *mp);
41void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); 39void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index bb134a81993..06838c42b2a 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -580,7 +580,7 @@ DEFINE_INODE_EVENT(xfs_ioctl_setattr);
580DEFINE_INODE_EVENT(xfs_dir_fsync); 580DEFINE_INODE_EVENT(xfs_dir_fsync);
581DEFINE_INODE_EVENT(xfs_file_fsync); 581DEFINE_INODE_EVENT(xfs_file_fsync);
582DEFINE_INODE_EVENT(xfs_destroy_inode); 582DEFINE_INODE_EVENT(xfs_destroy_inode);
583DEFINE_INODE_EVENT(xfs_write_inode); 583DEFINE_INODE_EVENT(xfs_dirty_inode);
584DEFINE_INODE_EVENT(xfs_evict_inode); 584DEFINE_INODE_EVENT(xfs_evict_inode);
585 585
586DEFINE_INODE_EVENT(xfs_dquot_dqalloc); 586DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
@@ -627,16 +627,19 @@ DECLARE_EVENT_CLASS(xfs_namespace_class,
627 TP_STRUCT__entry( 627 TP_STRUCT__entry(
628 __field(dev_t, dev) 628 __field(dev_t, dev)
629 __field(xfs_ino_t, dp_ino) 629 __field(xfs_ino_t, dp_ino)
630 __field(int, namelen)
630 __dynamic_array(char, name, name->len) 631 __dynamic_array(char, name, name->len)
631 ), 632 ),
632 TP_fast_assign( 633 TP_fast_assign(
633 __entry->dev = VFS_I(dp)->i_sb->s_dev; 634 __entry->dev = VFS_I(dp)->i_sb->s_dev;
634 __entry->dp_ino = dp->i_ino; 635 __entry->dp_ino = dp->i_ino;
636 __entry->namelen = name->len;
635 memcpy(__get_str(name), name->name, name->len); 637 memcpy(__get_str(name), name->name, name->len);
636 ), 638 ),
637 TP_printk("dev %d:%d dp ino 0x%llx name %s", 639 TP_printk("dev %d:%d dp ino 0x%llx name %.*s",
638 MAJOR(__entry->dev), MINOR(__entry->dev), 640 MAJOR(__entry->dev), MINOR(__entry->dev),
639 __entry->dp_ino, 641 __entry->dp_ino,
642 __entry->namelen,
640 __get_str(name)) 643 __get_str(name))
641) 644)
642 645
@@ -658,6 +661,8 @@ TRACE_EVENT(xfs_rename,
658 __field(dev_t, dev) 661 __field(dev_t, dev)
659 __field(xfs_ino_t, src_dp_ino) 662 __field(xfs_ino_t, src_dp_ino)
660 __field(xfs_ino_t, target_dp_ino) 663 __field(xfs_ino_t, target_dp_ino)
664 __field(int, src_namelen)
665 __field(int, target_namelen)
661 __dynamic_array(char, src_name, src_name->len) 666 __dynamic_array(char, src_name, src_name->len)
662 __dynamic_array(char, target_name, target_name->len) 667 __dynamic_array(char, target_name, target_name->len)
663 ), 668 ),
@@ -665,15 +670,20 @@ TRACE_EVENT(xfs_rename,
665 __entry->dev = VFS_I(src_dp)->i_sb->s_dev; 670 __entry->dev = VFS_I(src_dp)->i_sb->s_dev;
666 __entry->src_dp_ino = src_dp->i_ino; 671 __entry->src_dp_ino = src_dp->i_ino;
667 __entry->target_dp_ino = target_dp->i_ino; 672 __entry->target_dp_ino = target_dp->i_ino;
673 __entry->src_namelen = src_name->len;
674 __entry->target_namelen = target_name->len;
668 memcpy(__get_str(src_name), src_name->name, src_name->len); 675 memcpy(__get_str(src_name), src_name->name, src_name->len);
669 memcpy(__get_str(target_name), target_name->name, target_name->len); 676 memcpy(__get_str(target_name), target_name->name,
677 target_name->len);
670 ), 678 ),
671 TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx" 679 TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx"
672 " src name %s target name %s", 680 " src name %.*s target name %.*s",
673 MAJOR(__entry->dev), MINOR(__entry->dev), 681 MAJOR(__entry->dev), MINOR(__entry->dev),
674 __entry->src_dp_ino, 682 __entry->src_dp_ino,
675 __entry->target_dp_ino, 683 __entry->target_dp_ino,
684 __entry->src_namelen,
676 __get_str(src_name), 685 __get_str(src_name),
686 __entry->target_namelen,
677 __get_str(target_name)) 687 __get_str(target_name))
678) 688)
679 689
@@ -741,10 +751,10 @@ DEFINE_DQUOT_EVENT(xfs_dqalloc);
741DEFINE_DQUOT_EVENT(xfs_dqtobp_read); 751DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
742DEFINE_DQUOT_EVENT(xfs_dqread); 752DEFINE_DQUOT_EVENT(xfs_dqread);
743DEFINE_DQUOT_EVENT(xfs_dqread_fail); 753DEFINE_DQUOT_EVENT(xfs_dqread_fail);
744DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
745DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
746DEFINE_DQUOT_EVENT(xfs_dqget_hit); 754DEFINE_DQUOT_EVENT(xfs_dqget_hit);
747DEFINE_DQUOT_EVENT(xfs_dqget_miss); 755DEFINE_DQUOT_EVENT(xfs_dqget_miss);
756DEFINE_DQUOT_EVENT(xfs_dqget_freeing);
757DEFINE_DQUOT_EVENT(xfs_dqget_dup);
748DEFINE_DQUOT_EVENT(xfs_dqput); 758DEFINE_DQUOT_EVENT(xfs_dqput);
749DEFINE_DQUOT_EVENT(xfs_dqput_wait); 759DEFINE_DQUOT_EVENT(xfs_dqput_wait);
750DEFINE_DQUOT_EVENT(xfs_dqput_free); 760DEFINE_DQUOT_EVENT(xfs_dqput_free);
@@ -782,12 +792,12 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
782 __entry->curr_res = tic->t_curr_res; 792 __entry->curr_res = tic->t_curr_res;
783 __entry->unit_res = tic->t_unit_res; 793 __entry->unit_res = tic->t_unit_res;
784 __entry->flags = tic->t_flags; 794 __entry->flags = tic->t_flags;
785 __entry->reserveq = list_empty(&log->l_reserveq); 795 __entry->reserveq = list_empty(&log->l_reserve_head.waiters);
786 __entry->writeq = list_empty(&log->l_writeq); 796 __entry->writeq = list_empty(&log->l_write_head.waiters);
787 xlog_crack_grant_head(&log->l_grant_reserve_head, 797 xlog_crack_grant_head(&log->l_reserve_head.grant,
788 &__entry->grant_reserve_cycle, 798 &__entry->grant_reserve_cycle,
789 &__entry->grant_reserve_bytes); 799 &__entry->grant_reserve_bytes);
790 xlog_crack_grant_head(&log->l_grant_write_head, 800 xlog_crack_grant_head(&log->l_write_head.grant,
791 &__entry->grant_write_cycle, 801 &__entry->grant_write_cycle,
792 &__entry->grant_write_bytes); 802 &__entry->grant_write_bytes);
793 __entry->curr_cycle = log->l_curr_cycle; 803 __entry->curr_cycle = log->l_curr_cycle;
@@ -826,20 +836,14 @@ DEFINE_EVENT(xfs_loggrant_class, name, \
826 TP_ARGS(log, tic)) 836 TP_ARGS(log, tic))
827DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm); 837DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
828DEFINE_LOGGRANT_EVENT(xfs_log_done_perm); 838DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
829DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
830DEFINE_LOGGRANT_EVENT(xfs_log_umount_write); 839DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
831DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
832DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
833DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
834DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep); 840DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep);
835DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake); 841DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake);
836DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up); 842DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
837DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); 843DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
838DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); 844DEFINE_LOGGRANT_EVENT(xfs_log_reserve_exit);
839DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); 845DEFINE_LOGGRANT_EVENT(xfs_log_regrant);
840DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep); 846DEFINE_LOGGRANT_EVENT(xfs_log_regrant_exit);
841DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake);
842DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
843DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); 847DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
844DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); 848DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
845DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); 849DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
@@ -1414,7 +1418,7 @@ DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
1414DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed); 1418DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
1415DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed); 1419DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
1416 1420
1417DECLARE_EVENT_CLASS(xfs_dir2_class, 1421DECLARE_EVENT_CLASS(xfs_da_class,
1418 TP_PROTO(struct xfs_da_args *args), 1422 TP_PROTO(struct xfs_da_args *args),
1419 TP_ARGS(args), 1423 TP_ARGS(args),
1420 TP_STRUCT__entry( 1424 TP_STRUCT__entry(
@@ -1449,7 +1453,7 @@ DECLARE_EVENT_CLASS(xfs_dir2_class,
1449) 1453)
1450 1454
1451#define DEFINE_DIR2_EVENT(name) \ 1455#define DEFINE_DIR2_EVENT(name) \
1452DEFINE_EVENT(xfs_dir2_class, name, \ 1456DEFINE_EVENT(xfs_da_class, name, \
1453 TP_PROTO(struct xfs_da_args *args), \ 1457 TP_PROTO(struct xfs_da_args *args), \
1454 TP_ARGS(args)) 1458 TP_ARGS(args))
1455DEFINE_DIR2_EVENT(xfs_dir2_sf_addname); 1459DEFINE_DIR2_EVENT(xfs_dir2_sf_addname);
@@ -1478,6 +1482,64 @@ DEFINE_DIR2_EVENT(xfs_dir2_node_replace);
1478DEFINE_DIR2_EVENT(xfs_dir2_node_removename); 1482DEFINE_DIR2_EVENT(xfs_dir2_node_removename);
1479DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf); 1483DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf);
1480 1484
1485#define DEFINE_ATTR_EVENT(name) \
1486DEFINE_EVENT(xfs_da_class, name, \
1487 TP_PROTO(struct xfs_da_args *args), \
1488 TP_ARGS(args))
1489DEFINE_ATTR_EVENT(xfs_attr_sf_add);
1490DEFINE_ATTR_EVENT(xfs_attr_sf_addname);
1491DEFINE_ATTR_EVENT(xfs_attr_sf_create);
1492DEFINE_ATTR_EVENT(xfs_attr_sf_lookup);
1493DEFINE_ATTR_EVENT(xfs_attr_sf_remove);
1494DEFINE_ATTR_EVENT(xfs_attr_sf_removename);
1495DEFINE_ATTR_EVENT(xfs_attr_sf_to_leaf);
1496
1497DEFINE_ATTR_EVENT(xfs_attr_leaf_add);
1498DEFINE_ATTR_EVENT(xfs_attr_leaf_add_old);
1499DEFINE_ATTR_EVENT(xfs_attr_leaf_add_new);
1500DEFINE_ATTR_EVENT(xfs_attr_leaf_addname);
1501DEFINE_ATTR_EVENT(xfs_attr_leaf_create);
1502DEFINE_ATTR_EVENT(xfs_attr_leaf_lookup);
1503DEFINE_ATTR_EVENT(xfs_attr_leaf_replace);
1504DEFINE_ATTR_EVENT(xfs_attr_leaf_removename);
1505DEFINE_ATTR_EVENT(xfs_attr_leaf_split);
1506DEFINE_ATTR_EVENT(xfs_attr_leaf_split_before);
1507DEFINE_ATTR_EVENT(xfs_attr_leaf_split_after);
1508DEFINE_ATTR_EVENT(xfs_attr_leaf_clearflag);
1509DEFINE_ATTR_EVENT(xfs_attr_leaf_setflag);
1510DEFINE_ATTR_EVENT(xfs_attr_leaf_flipflags);
1511DEFINE_ATTR_EVENT(xfs_attr_leaf_to_sf);
1512DEFINE_ATTR_EVENT(xfs_attr_leaf_to_node);
1513DEFINE_ATTR_EVENT(xfs_attr_leaf_rebalance);
1514DEFINE_ATTR_EVENT(xfs_attr_leaf_unbalance);
1515
1516DEFINE_ATTR_EVENT(xfs_attr_node_addname);
1517DEFINE_ATTR_EVENT(xfs_attr_node_lookup);
1518DEFINE_ATTR_EVENT(xfs_attr_node_replace);
1519DEFINE_ATTR_EVENT(xfs_attr_node_removename);
1520
1521#define DEFINE_DA_EVENT(name) \
1522DEFINE_EVENT(xfs_da_class, name, \
1523 TP_PROTO(struct xfs_da_args *args), \
1524 TP_ARGS(args))
1525DEFINE_DA_EVENT(xfs_da_split);
1526DEFINE_DA_EVENT(xfs_da_join);
1527DEFINE_DA_EVENT(xfs_da_link_before);
1528DEFINE_DA_EVENT(xfs_da_link_after);
1529DEFINE_DA_EVENT(xfs_da_unlink_back);
1530DEFINE_DA_EVENT(xfs_da_unlink_forward);
1531DEFINE_DA_EVENT(xfs_da_root_split);
1532DEFINE_DA_EVENT(xfs_da_root_join);
1533DEFINE_DA_EVENT(xfs_da_node_add);
1534DEFINE_DA_EVENT(xfs_da_node_create);
1535DEFINE_DA_EVENT(xfs_da_node_split);
1536DEFINE_DA_EVENT(xfs_da_node_remove);
1537DEFINE_DA_EVENT(xfs_da_node_rebalance);
1538DEFINE_DA_EVENT(xfs_da_node_unbalance);
1539DEFINE_DA_EVENT(xfs_da_swap_lastblock);
1540DEFINE_DA_EVENT(xfs_da_grow_inode);
1541DEFINE_DA_EVENT(xfs_da_shrink_inode);
1542
1481DECLARE_EVENT_CLASS(xfs_dir2_space_class, 1543DECLARE_EVENT_CLASS(xfs_dir2_space_class,
1482 TP_PROTO(struct xfs_da_args *args, int idx), 1544 TP_PROTO(struct xfs_da_args *args, int idx),
1483 TP_ARGS(args, idx), 1545 TP_ARGS(args, idx),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 7adcdf15ae0..103b00c9000 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -681,7 +681,6 @@ xfs_trans_reserve(
681 uint flags, 681 uint flags,
682 uint logcount) 682 uint logcount)
683{ 683{
684 int log_flags;
685 int error = 0; 684 int error = 0;
686 int rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; 685 int rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
687 686
@@ -707,24 +706,32 @@ xfs_trans_reserve(
707 * Reserve the log space needed for this transaction. 706 * Reserve the log space needed for this transaction.
708 */ 707 */
709 if (logspace > 0) { 708 if (logspace > 0) {
710 ASSERT((tp->t_log_res == 0) || (tp->t_log_res == logspace)); 709 bool permanent = false;
711 ASSERT((tp->t_log_count == 0) || 710
712 (tp->t_log_count == logcount)); 711 ASSERT(tp->t_log_res == 0 || tp->t_log_res == logspace);
712 ASSERT(tp->t_log_count == 0 || tp->t_log_count == logcount);
713
713 if (flags & XFS_TRANS_PERM_LOG_RES) { 714 if (flags & XFS_TRANS_PERM_LOG_RES) {
714 log_flags = XFS_LOG_PERM_RESERV;
715 tp->t_flags |= XFS_TRANS_PERM_LOG_RES; 715 tp->t_flags |= XFS_TRANS_PERM_LOG_RES;
716 permanent = true;
716 } else { 717 } else {
717 ASSERT(tp->t_ticket == NULL); 718 ASSERT(tp->t_ticket == NULL);
718 ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); 719 ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
719 log_flags = 0;
720 } 720 }
721 721
722 error = xfs_log_reserve(tp->t_mountp, logspace, logcount, 722 if (tp->t_ticket != NULL) {
723 &tp->t_ticket, 723 ASSERT(flags & XFS_TRANS_PERM_LOG_RES);
724 XFS_TRANSACTION, log_flags, tp->t_type); 724 error = xfs_log_regrant(tp->t_mountp, tp->t_ticket);
725 if (error) { 725 } else {
726 goto undo_blocks; 726 error = xfs_log_reserve(tp->t_mountp, logspace,
727 logcount, &tp->t_ticket,
728 XFS_TRANSACTION, permanent,
729 tp->t_type);
727 } 730 }
731
732 if (error)
733 goto undo_blocks;
734
728 tp->t_log_res = logspace; 735 tp->t_log_res = logspace;
729 tp->t_log_count = logcount; 736 tp->t_log_count = logcount;
730 } 737 }
@@ -752,6 +759,8 @@ xfs_trans_reserve(
752 */ 759 */
753undo_log: 760undo_log:
754 if (logspace > 0) { 761 if (logspace > 0) {
762 int log_flags;
763
755 if (flags & XFS_TRANS_PERM_LOG_RES) { 764 if (flags & XFS_TRANS_PERM_LOG_RES) {
756 log_flags = XFS_LOG_REL_PERM_RESERV; 765 log_flags = XFS_LOG_REL_PERM_RESERV;
757 } else { 766 } else {
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index ed9252bcdac..1dead07f092 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -611,50 +611,6 @@ xfs_ail_push_all(
611} 611}
612 612
613/* 613/*
614 * This is to be called when an item is unlocked that may have
615 * been in the AIL. It will wake up the first member of the AIL
616 * wait list if this item's unlocking might allow it to progress.
617 * If the item is in the AIL, then we need to get the AIL lock
618 * while doing our checking so we don't race with someone going
619 * to sleep waiting for this event in xfs_trans_push_ail().
620 */
621void
622xfs_trans_unlocked_item(
623 struct xfs_ail *ailp,
624 xfs_log_item_t *lip)
625{
626 xfs_log_item_t *min_lip;
627
628 /*
629 * If we're forcibly shutting down, we may have
630 * unlocked log items arbitrarily. The last thing
631 * we want to do is to move the tail of the log
632 * over some potentially valid data.
633 */
634 if (!(lip->li_flags & XFS_LI_IN_AIL) ||
635 XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
636 return;
637 }
638
639 /*
640 * This is the one case where we can call into xfs_ail_min()
641 * without holding the AIL lock because we only care about the
642 * case where we are at the tail of the AIL. If the object isn't
643 * at the tail, it doesn't matter what result we get back. This
644 * is slightly racy because since we were just unlocked, we could
645 * go to sleep between the call to xfs_ail_min and the call to
646 * xfs_log_move_tail, have someone else lock us, commit to us disk,
647 * move us out of the tail of the AIL, and then we wake up. However,
648 * the call to xfs_log_move_tail() doesn't do anything if there's
649 * not enough free space to wake people up so we're safe calling it.
650 */
651 min_lip = xfs_ail_min(ailp);
652
653 if (min_lip == lip)
654 xfs_log_move_tail(ailp->xa_mount, 1);
655} /* xfs_trans_unlocked_item */
656
657/*
658 * xfs_trans_ail_update - bulk AIL insertion operation. 614 * xfs_trans_ail_update - bulk AIL insertion operation.
659 * 615 *
660 * @xfs_trans_ail_update takes an array of log items that all need to be 616 * @xfs_trans_ail_update takes an array of log items that all need to be
@@ -685,7 +641,6 @@ xfs_trans_ail_update_bulk(
685 xfs_lsn_t lsn) __releases(ailp->xa_lock) 641 xfs_lsn_t lsn) __releases(ailp->xa_lock)
686{ 642{
687 xfs_log_item_t *mlip; 643 xfs_log_item_t *mlip;
688 xfs_lsn_t tail_lsn;
689 int mlip_changed = 0; 644 int mlip_changed = 0;
690 int i; 645 int i;
691 LIST_HEAD(tmp); 646 LIST_HEAD(tmp);
@@ -712,22 +667,12 @@ xfs_trans_ail_update_bulk(
712 667
713 if (!list_empty(&tmp)) 668 if (!list_empty(&tmp))
714 xfs_ail_splice(ailp, cur, &tmp, lsn); 669 xfs_ail_splice(ailp, cur, &tmp, lsn);
670 spin_unlock(&ailp->xa_lock);
715 671
716 if (!mlip_changed) { 672 if (mlip_changed && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
717 spin_unlock(&ailp->xa_lock); 673 xlog_assign_tail_lsn(ailp->xa_mount);
718 return; 674 xfs_log_space_wake(ailp->xa_mount);
719 } 675 }
720
721 /*
722 * It is not safe to access mlip after the AIL lock is dropped, so we
723 * must get a copy of li_lsn before we do so. This is especially
724 * important on 32-bit platforms where accessing and updating 64-bit
725 * values like li_lsn is not atomic.
726 */
727 mlip = xfs_ail_min(ailp);
728 tail_lsn = mlip->li_lsn;
729 spin_unlock(&ailp->xa_lock);
730 xfs_log_move_tail(ailp->xa_mount, tail_lsn);
731} 676}
732 677
733/* 678/*
@@ -758,7 +703,6 @@ xfs_trans_ail_delete_bulk(
758 int nr_items) __releases(ailp->xa_lock) 703 int nr_items) __releases(ailp->xa_lock)
759{ 704{
760 xfs_log_item_t *mlip; 705 xfs_log_item_t *mlip;
761 xfs_lsn_t tail_lsn;
762 int mlip_changed = 0; 706 int mlip_changed = 0;
763 int i; 707 int i;
764 708
@@ -785,23 +729,12 @@ xfs_trans_ail_delete_bulk(
785 if (mlip == lip) 729 if (mlip == lip)
786 mlip_changed = 1; 730 mlip_changed = 1;
787 } 731 }
732 spin_unlock(&ailp->xa_lock);
788 733
789 if (!mlip_changed) { 734 if (mlip_changed && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
790 spin_unlock(&ailp->xa_lock); 735 xlog_assign_tail_lsn(ailp->xa_mount);
791 return; 736 xfs_log_space_wake(ailp->xa_mount);
792 } 737 }
793
794 /*
795 * It is not safe to access mlip after the AIL lock is dropped, so we
796 * must get a copy of li_lsn before we do so. This is especially
797 * important on 32-bit platforms where accessing and updating 64-bit
798 * values like li_lsn is not atomic. It is possible we've emptied the
799 * AIL here, so if that is the case, pass an LSN of 0 to the tail move.
800 */
801 mlip = xfs_ail_min(ailp);
802 tail_lsn = mlip ? mlip->li_lsn : 0;
803 spin_unlock(&ailp->xa_lock);
804 xfs_log_move_tail(ailp->xa_mount, tail_lsn);
805} 738}
806 739
807/* 740/*
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 475a4ded4f4..1302d1d95a5 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -463,19 +463,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
463 * Default to a normal brelse() call if the tp is NULL. 463 * Default to a normal brelse() call if the tp is NULL.
464 */ 464 */
465 if (tp == NULL) { 465 if (tp == NULL) {
466 struct xfs_log_item *lip = bp->b_fspriv;
467
468 ASSERT(bp->b_transp == NULL); 466 ASSERT(bp->b_transp == NULL);
469
470 /*
471 * If there's a buf log item attached to the buffer,
472 * then let the AIL know that the buffer is being
473 * unlocked.
474 */
475 if (lip != NULL && lip->li_type == XFS_LI_BUF) {
476 bip = bp->b_fspriv;
477 xfs_trans_unlocked_item(bip->bli_item.li_ailp, lip);
478 }
479 xfs_buf_relse(bp); 467 xfs_buf_relse(bp);
480 return; 468 return;
481 } 469 }
@@ -550,21 +538,10 @@ xfs_trans_brelse(xfs_trans_t *tp,
550 ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); 538 ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
551 ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF)); 539 ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF));
552 xfs_buf_item_relse(bp); 540 xfs_buf_item_relse(bp);
553 bip = NULL;
554 }
555 bp->b_transp = NULL;
556
557 /*
558 * If we've still got a buf log item on the buffer, then
559 * tell the AIL that the buffer is being unlocked.
560 */
561 if (bip != NULL) {
562 xfs_trans_unlocked_item(bip->bli_item.li_ailp,
563 (xfs_log_item_t*)bip);
564 } 541 }
565 542
543 bp->b_transp = NULL;
566 xfs_buf_relse(bp); 544 xfs_buf_relse(bp);
567 return;
568} 545}
569 546
570/* 547/*
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index c4ba366d24e..279099717ed 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -605,7 +605,7 @@ xfs_trans_dqresv(
605 time_t timer; 605 time_t timer;
606 xfs_qwarncnt_t warns; 606 xfs_qwarncnt_t warns;
607 xfs_qwarncnt_t warnlimit; 607 xfs_qwarncnt_t warnlimit;
608 xfs_qcnt_t count; 608 xfs_qcnt_t total_count;
609 xfs_qcnt_t *resbcountp; 609 xfs_qcnt_t *resbcountp;
610 xfs_quotainfo_t *q = mp->m_quotainfo; 610 xfs_quotainfo_t *q = mp->m_quotainfo;
611 611
@@ -648,13 +648,12 @@ xfs_trans_dqresv(
648 * hardlimit or exceed the timelimit if we allocate 648 * hardlimit or exceed the timelimit if we allocate
649 * nblks. 649 * nblks.
650 */ 650 */
651 if (hardlimit > 0ULL && 651 total_count = *resbcountp + nblks;
652 hardlimit < nblks + *resbcountp) { 652 if (hardlimit && total_count > hardlimit) {
653 xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN); 653 xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
654 goto error_return; 654 goto error_return;
655 } 655 }
656 if (softlimit > 0ULL && 656 if (softlimit && total_count > softlimit) {
657 softlimit < nblks + *resbcountp) {
658 if ((timer != 0 && get_seconds() > timer) || 657 if ((timer != 0 && get_seconds() > timer) ||
659 (warns != 0 && warns >= warnlimit)) { 658 (warns != 0 && warns >= warnlimit)) {
660 xfs_quota_warn(mp, dqp, 659 xfs_quota_warn(mp, dqp,
@@ -666,7 +665,7 @@ xfs_trans_dqresv(
666 } 665 }
667 } 666 }
668 if (ninos > 0) { 667 if (ninos > 0) {
669 count = be64_to_cpu(dqp->q_core.d_icount); 668 total_count = be64_to_cpu(dqp->q_core.d_icount) + ninos;
670 timer = be32_to_cpu(dqp->q_core.d_itimer); 669 timer = be32_to_cpu(dqp->q_core.d_itimer);
671 warns = be16_to_cpu(dqp->q_core.d_iwarns); 670 warns = be16_to_cpu(dqp->q_core.d_iwarns);
672 warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit; 671 warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
@@ -677,13 +676,11 @@ xfs_trans_dqresv(
677 if (!softlimit) 676 if (!softlimit)
678 softlimit = q->qi_isoftlimit; 677 softlimit = q->qi_isoftlimit;
679 678
680 if (hardlimit > 0ULL && 679 if (hardlimit && total_count > hardlimit) {
681 hardlimit < ninos + count) {
682 xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN); 680 xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
683 goto error_return; 681 goto error_return;
684 } 682 }
685 if (softlimit > 0ULL && 683 if (softlimit && total_count > softlimit) {
686 softlimit < ninos + count) {
687 if ((timer != 0 && get_seconds() > timer) || 684 if ((timer != 0 && get_seconds() > timer) ||
688 (warns != 0 && warns >= warnlimit)) { 685 (warns != 0 && warns >= warnlimit)) {
689 xfs_quota_warn(mp, dqp, 686 xfs_quota_warn(mp, dqp,
@@ -878,7 +875,7 @@ STATIC void
878xfs_trans_alloc_dqinfo( 875xfs_trans_alloc_dqinfo(
879 xfs_trans_t *tp) 876 xfs_trans_t *tp)
880{ 877{
881 tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP); 878 tp->t_dqinfo = kmem_zone_zalloc(xfs_qm_dqtrxzone, KM_SLEEP);
882} 879}
883 880
884void 881void
@@ -887,6 +884,6 @@ xfs_trans_free_dqinfo(
887{ 884{
888 if (!tp->t_dqinfo) 885 if (!tp->t_dqinfo)
889 return; 886 return;
890 kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo); 887 kmem_zone_free(xfs_qm_dqtrxzone, tp->t_dqinfo);
891 tp->t_dqinfo = NULL; 888 tp->t_dqinfo = NULL;
892} 889}
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 32f0288ae10..7a7442c03f2 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -95,10 +95,14 @@ xfs_trans_ichgtime(
95 if ((flags & XFS_ICHGTIME_MOD) && 95 if ((flags & XFS_ICHGTIME_MOD) &&
96 !timespec_equal(&inode->i_mtime, &tv)) { 96 !timespec_equal(&inode->i_mtime, &tv)) {
97 inode->i_mtime = tv; 97 inode->i_mtime = tv;
98 ip->i_d.di_mtime.t_sec = tv.tv_sec;
99 ip->i_d.di_mtime.t_nsec = tv.tv_nsec;
98 } 100 }
99 if ((flags & XFS_ICHGTIME_CHG) && 101 if ((flags & XFS_ICHGTIME_CHG) &&
100 !timespec_equal(&inode->i_ctime, &tv)) { 102 !timespec_equal(&inode->i_ctime, &tv)) {
101 inode->i_ctime = tv; 103 inode->i_ctime = tv;
104 ip->i_d.di_ctime.t_sec = tv.tv_sec;
105 ip->i_d.di_ctime.t_nsec = tv.tv_nsec;
102 } 106 }
103} 107}
104 108
@@ -126,12 +130,12 @@ xfs_trans_log_inode(
126 /* 130 /*
127 * Always OR in the bits from the ili_last_fields field. 131 * Always OR in the bits from the ili_last_fields field.
128 * This is to coordinate with the xfs_iflush() and xfs_iflush_done() 132 * This is to coordinate with the xfs_iflush() and xfs_iflush_done()
129 * routines in the eventual clearing of the ilf_fields bits. 133 * routines in the eventual clearing of the ili_fields bits.
130 * See the big comment in xfs_iflush() for an explanation of 134 * See the big comment in xfs_iflush() for an explanation of
131 * this coordination mechanism. 135 * this coordination mechanism.
132 */ 136 */
133 flags |= ip->i_itemp->ili_last_fields; 137 flags |= ip->i_itemp->ili_last_fields;
134 ip->i_itemp->ili_format.ilf_fields |= flags; 138 ip->i_itemp->ili_fields |= flags;
135} 139}
136 140
137#ifdef XFS_TRANS_DEBUG 141#ifdef XFS_TRANS_DEBUG
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 44820b9fcb4..8ab2ced415f 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -104,9 +104,6 @@ void xfs_ail_push(struct xfs_ail *, xfs_lsn_t);
104void xfs_ail_push_all(struct xfs_ail *); 104void xfs_ail_push_all(struct xfs_ail *);
105xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); 105xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp);
106 106
107void xfs_trans_unlocked_item(struct xfs_ail *,
108 xfs_log_item_t *);
109
110struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp, 107struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
111 struct xfs_ail_cursor *cur, 108 struct xfs_ail_cursor *cur,
112 xfs_lsn_t lsn); 109 xfs_lsn_t lsn);
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 89dbb4a5087..79c05ac85bf 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -296,8 +296,6 @@ xfs_bumplink(
296 xfs_trans_t *tp, 296 xfs_trans_t *tp,
297 xfs_inode_t *ip) 297 xfs_inode_t *ip)
298{ 298{
299 if (ip->i_d.di_nlink >= XFS_MAXLINK)
300 return XFS_ERROR(EMLINK);
301 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); 299 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
302 300
303 ASSERT(ip->i_d.di_nlink > 0); 301 ASSERT(ip->i_d.di_nlink > 0);
diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h
index 7c220b4227b..db14d0c0868 100644
--- a/fs/xfs/xfs_vnode.h
+++ b/fs/xfs/xfs_vnode.h
@@ -22,7 +22,6 @@
22 22
23struct file; 23struct file;
24struct xfs_inode; 24struct xfs_inode;
25struct xfs_iomap;
26struct attrlist_cursor_kern; 25struct attrlist_cursor_kern;
27 26
28/* 27/*
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index ebdb88840a4..64981d7e737 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -917,14 +917,6 @@ xfs_create(
917 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 917 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
918 unlock_dp_on_error = B_TRUE; 918 unlock_dp_on_error = B_TRUE;
919 919
920 /*
921 * Check for directory link count overflow.
922 */
923 if (is_dir && dp->i_d.di_nlink >= XFS_MAXLINK) {
924 error = XFS_ERROR(EMLINK);
925 goto out_trans_cancel;
926 }
927
928 xfs_bmap_init(&free_list, &first_block); 920 xfs_bmap_init(&free_list, &first_block);
929 921
930 /* 922 /*
@@ -1429,14 +1421,6 @@ xfs_link(
1429 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); 1421 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
1430 1422
1431 /* 1423 /*
1432 * If the source has too many links, we can't make any more to it.
1433 */
1434 if (sip->i_d.di_nlink >= XFS_MAXLINK) {
1435 error = XFS_ERROR(EMLINK);
1436 goto error_return;
1437 }
1438
1439 /*
1440 * If we are using project inheritance, we only allow hard link 1424 * If we are using project inheritance, we only allow hard link
1441 * creation in our tree when the project IDs are the same; else 1425 * creation in our tree when the project IDs are the same; else
1442 * the tree quota mechanism could be circumvented. 1426 * the tree quota mechanism could be circumvented.
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 0c877cbde14..447e146b2ba 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -10,7 +10,6 @@ struct kiocb;
10struct pipe_inode_info; 10struct pipe_inode_info;
11struct uio; 11struct uio;
12struct xfs_inode; 12struct xfs_inode;
13struct xfs_iomap;
14 13
15 14
16int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags); 15int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags);
@@ -49,8 +48,6 @@ int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
49int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags); 48int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags);
50int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, 49int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
51 int flags, struct attrlist_cursor_kern *cursor); 50 int flags, struct attrlist_cursor_kern *cursor);
52int xfs_bmap(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
53 int flags, struct xfs_iomap *iomapp, int *niomaps);
54void xfs_tosspages(struct xfs_inode *inode, xfs_off_t first, 51void xfs_tosspages(struct xfs_inode *inode, xfs_off_t first,
55 xfs_off_t last, int fiopt); 52 xfs_off_t last, int fiopt);
56int xfs_flushinval_pages(struct xfs_inode *ip, xfs_off_t first, 53int xfs_flushinval_pages(struct xfs_inode *ip, xfs_off_t first,