summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorSteven Whitehouse <swhiteho@redhat.com>2006-07-03 10:25:08 -0400
committerSteven Whitehouse <swhiteho@redhat.com>2006-07-03 10:25:08 -0400
commit0a1340c185734a57fbf4775927966ad4a1347b02 (patch)
treed9ed8f0dd809a7c542a3356601125ea5b5aaa804 /fs
parentaf18ddb8864b096e3ed4732e2d4b21c956dcfe3a (diff)
parent29454dde27d8e340bb1987bad9aa504af7081eba (diff)
Merge rsync://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts: include/linux/kernel.h
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/conv.c1
-rw-r--r--fs/9p/error.c1
-rw-r--r--fs/9p/fcall.c1
-rw-r--r--fs/9p/fcprint.c1
-rw-r--r--fs/9p/fid.c1
-rw-r--r--fs/9p/mux.c5
-rw-r--r--fs/9p/trans_fd.c1
-rw-r--r--fs/9p/v9fs.c1
-rw-r--r--fs/9p/v9fs_vfs.h2
-rw-r--r--fs/9p/vfs_addr.c3
-rw-r--r--fs/9p/vfs_file.c1
-rw-r--r--fs/9p/vfs_inode.c14
-rw-r--r--fs/9p/vfs_super.c29
-rw-r--r--fs/Kconfig141
-rw-r--r--fs/Makefile2
-rw-r--r--fs/adfs/dir.c1
-rw-r--r--fs/adfs/inode.c2
-rw-r--r--fs/adfs/super.c15
-rw-r--r--fs/affs/affs.h6
-rw-r--r--fs/affs/file.c4
-rw-r--r--fs/affs/super.c24
-rw-r--r--fs/affs/symlink.c2
-rw-r--r--fs/afs/cell.c3
-rw-r--r--fs/afs/dir.c4
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/kafsasyncd.c9
-rw-r--r--fs/afs/mntpt.c13
-rw-r--r--fs/afs/server.c6
-rw-r--r--fs/afs/super.c26
-rw-r--r--fs/afs/super.h2
-rw-r--r--fs/afs/vlocation.c6
-rw-r--r--fs/afs/vnode.c3
-rw-r--r--fs/aio.c6
-rw-r--r--fs/autofs/init.c6
-rw-r--r--fs/autofs4/expire.c9
-rw-r--r--fs/autofs4/init.c6
-rw-r--r--fs/befs/linuxvfs.c52
-rw-r--r--fs/bfs/bfs.h2
-rw-r--r--fs/bfs/file.c2
-rw-r--r--fs/bfs/inode.c9
-rw-r--r--fs/binfmt_elf.c348
-rw-r--r--fs/binfmt_elf_fdpic.c26
-rw-r--r--fs/binfmt_flat.c3
-rw-r--r--fs/binfmt_misc.c10
-rw-r--r--fs/binfmt_som.c1
-rw-r--r--fs/block_dev.c42
-rw-r--r--fs/buffer.c10
-rw-r--r--fs/char_dev.c2
-rw-r--r--fs/cifs/CHANGES17
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/README39
-rw-r--r--fs/cifs/asn1.c11
-rw-r--r--fs/cifs/cifs_debug.c134
-rw-r--r--fs/cifs/cifs_debug.h4
-rw-r--r--fs/cifs/cifs_unicode.c1
-rw-r--r--fs/cifs/cifsencrypt.c140
-rw-r--r--fs/cifs/cifsfs.c25
-rw-r--r--fs/cifs/cifsfs.h7
-rw-r--r--fs/cifs/cifsglob.h71
-rw-r--r--fs/cifs/cifspdu.h98
-rw-r--r--fs/cifs/cifsproto.h14
-rw-r--r--fs/cifs/cifssmb.c287
-rw-r--r--fs/cifs/connect.c498
-rw-r--r--fs/cifs/dir.c15
-rw-r--r--fs/cifs/fcntl.c4
-rw-r--r--fs/cifs/file.c82
-rw-r--r--fs/cifs/inode.c39
-rw-r--r--fs/cifs/link.c7
-rw-r--r--fs/cifs/misc.c10
-rw-r--r--fs/cifs/netmisc.c4
-rw-r--r--fs/cifs/ntlmssp.c143
-rw-r--r--fs/cifs/readdir.c184
-rw-r--r--fs/cifs/sess.c538
-rw-r--r--fs/cifs/smbencrypt.c1
-rw-r--r--fs/cifs/transport.c3
-rw-r--r--fs/coda/file.c2
-rw-r--r--fs/coda/inode.c12
-rw-r--r--fs/coda/psdev.c25
-rw-r--r--fs/coda/symlink.c2
-rw-r--r--fs/coda/sysctl.c1
-rw-r--r--fs/coda/upcall.c6
-rw-r--r--fs/compat.c24
-rw-r--r--fs/compat_ioctl.c36
-rw-r--r--fs/configfs/dir.c12
-rw-r--r--fs/configfs/inode.c2
-rw-r--r--fs/configfs/mount.c8
-rw-r--r--fs/configfs/symlink.c2
-rw-r--r--fs/cramfs/inode.c19
-rw-r--r--fs/dcache.c115
-rw-r--r--fs/dcookies.c1
-rw-r--r--fs/debugfs/file.c1
-rw-r--r--fs/debugfs/inode.c11
-rw-r--r--fs/devfs/Makefile8
-rw-r--r--fs/devfs/base.c2836
-rw-r--r--fs/devfs/util.c97
-rw-r--r--fs/devpts/inode.c6
-rw-r--r--fs/direct-io.c18
-rw-r--r--fs/dquot.c4
-rw-r--r--fs/efs/inode.c2
-rw-r--r--fs/efs/super.c12
-rw-r--r--fs/efs/symlink.c2
-rw-r--r--fs/eventpoll.c36
-rw-r--r--fs/exec.c155
-rw-r--r--fs/ext2/Makefile2
-rw-r--r--fs/ext2/balloc.c23
-rw-r--r--fs/ext2/bitmap.c32
-rw-r--r--fs/ext2/dir.c6
-rw-r--r--fs/ext2/ext2.h6
-rw-r--r--fs/ext2/fsync.c2
-rw-r--r--fs/ext2/ialloc.c4
-rw-r--r--fs/ext2/inode.c6
-rw-r--r--fs/ext2/super.c40
-rw-r--r--fs/ext2/xattr.h1
-rw-r--r--fs/ext3/balloc.c243
-rw-r--r--fs/ext3/dir.c2
-rw-r--r--fs/ext3/ialloc.c10
-rw-r--r--fs/ext3/inode.c63
-rw-r--r--fs/ext3/ioctl.c2
-rw-r--r--fs/ext3/namei.c4
-rw-r--r--fs/ext3/resize.c82
-rw-r--r--fs/ext3/super.c123
-rw-r--r--fs/ext3/xattr.c27
-rw-r--r--fs/ext3/xattr.h1
-rw-r--r--fs/fat/inode.c10
-rw-r--r--fs/fat/misc.c1
-rw-r--r--fs/file_table.c3
-rw-r--r--fs/freevxfs/vxfs.h4
-rw-r--r--fs/freevxfs/vxfs_fshead.c12
-rw-r--r--fs/freevxfs/vxfs_immed.c2
-rw-r--r--fs/freevxfs/vxfs_inode.c6
-rw-r--r--fs/freevxfs/vxfs_subr.c5
-rw-r--r--fs/freevxfs/vxfs_super.c20
-rw-r--r--fs/fs-writeback.c10
-rw-r--r--fs/fuse/Makefile2
-rw-r--r--fs/fuse/control.c218
-rw-r--r--fs/fuse/dev.c418
-rw-r--r--fs/fuse/dir.c56
-rw-r--r--fs/fuse/file.c210
-rw-r--r--fs/fuse/fuse_i.h135
-rw-r--r--fs/fuse/inode.c194
-rw-r--r--fs/hfs/bnode.c2
-rw-r--r--fs/hfs/btree.c2
-rw-r--r--fs/hfs/hfs_fs.h4
-rw-r--r--fs/hfs/inode.c4
-rw-r--r--fs/hfs/super.c12
-rw-r--r--fs/hfsplus/bitmap.c15
-rw-r--r--fs/hfsplus/bnode.c2
-rw-r--r--fs/hfsplus/btree.c2
-rw-r--r--fs/hfsplus/hfsplus_fs.h4
-rw-r--r--fs/hfsplus/inode.c4
-rw-r--r--fs/hfsplus/super.c13
-rw-r--r--fs/hostfs/hostfs_kern.c18
-rw-r--r--fs/hpfs/file.c2
-rw-r--r--fs/hpfs/hpfs_fn.h4
-rw-r--r--fs/hpfs/namei.c2
-rw-r--r--fs/hpfs/super.c10
-rw-r--r--fs/hppfs/hppfs_kern.c10
-rw-r--r--fs/hugetlbfs/inode.c35
-rw-r--r--fs/inode.c12
-rw-r--r--fs/inotify.c991
-rw-r--r--fs/inotify_user.c719
-rw-r--r--fs/ioctl.c1
-rw-r--r--fs/ioprio.c35
-rw-r--r--fs/isofs/compress.c3
-rw-r--r--fs/isofs/dir.c1
-rw-r--r--fs/isofs/inode.c16
-rw-r--r--fs/isofs/isofs.h2
-rw-r--r--fs/isofs/rock.c2
-rw-r--r--fs/isofs/zisofs.h2
-rw-r--r--fs/jbd/checkpoint.c419
-rw-r--r--fs/jbd/commit.c21
-rw-r--r--fs/jbd/journal.c3
-rw-r--r--fs/jbd/recovery.c1
-rw-r--r--fs/jbd/transaction.c12
-rw-r--r--fs/jffs/inode-v23.c15
-rw-r--r--fs/jffs/intrep.c16
-rw-r--r--fs/jffs/jffs_fm.h1
-rw-r--r--fs/jffs2/Makefile3
-rw-r--r--fs/jffs2/README.Locking21
-rw-r--r--fs/jffs2/acl.c487
-rw-r--r--fs/jffs2/acl.h45
-rw-r--r--fs/jffs2/build.c2
-rw-r--r--fs/jffs2/compr.c2
-rw-r--r--fs/jffs2/compr.h4
-rw-r--r--fs/jffs2/compr_zlib.c1
-rw-r--r--fs/jffs2/debug.c14
-rw-r--r--fs/jffs2/debug.h7
-rw-r--r--fs/jffs2/dir.c121
-rw-r--r--fs/jffs2/erase.c88
-rw-r--r--fs/jffs2/file.c37
-rw-r--r--fs/jffs2/fs.c68
-rw-r--r--fs/jffs2/gc.c131
-rw-r--r--fs/jffs2/histo.h3
-rw-r--r--fs/jffs2/jffs2_fs_i.h55
-rw-r--r--fs/jffs2/jffs2_fs_sb.h136
-rw-r--r--fs/jffs2/malloc.c129
-rw-r--r--fs/jffs2/nodelist.c186
-rw-r--r--fs/jffs2/nodelist.h191
-rw-r--r--fs/jffs2/nodemgmt.c210
-rw-r--r--fs/jffs2/os-linux.h27
-rw-r--r--fs/jffs2/readinode.c138
-rw-r--r--fs/jffs2/scan.c449
-rw-r--r--fs/jffs2/security.c82
-rw-r--r--fs/jffs2/summary.c475
-rw-r--r--fs/jffs2/summary.h64
-rw-r--r--fs/jffs2/super.c70
-rw-r--r--fs/jffs2/symlink.c7
-rw-r--r--fs/jffs2/wbuf.c971
-rw-r--r--fs/jffs2/write.c147
-rw-r--r--fs/jffs2/xattr.c1326
-rw-r--r--fs/jffs2/xattr.h129
-rw-r--r--fs/jffs2/xattr_trusted.c52
-rw-r--r--fs/jffs2/xattr_user.c52
-rw-r--r--fs/jfs/inode.c2
-rw-r--r--fs/jfs/jfs_extent.c8
-rw-r--r--fs/jfs/jfs_inode.h2
-rw-r--r--fs/jfs/jfs_metapage.c7
-rw-r--r--fs/jfs/jfs_metapage.h2
-rw-r--r--fs/jfs/jfs_txnmgr.c2
-rw-r--r--fs/jfs/super.c12
-rw-r--r--fs/libfs.c30
-rw-r--r--fs/lockd/clntlock.c39
-rw-r--r--fs/lockd/clntproc.c15
-rw-r--r--fs/lockd/host.c9
-rw-r--r--fs/lockd/svc.c1
-rw-r--r--fs/lockd/svclock.c1
-rw-r--r--fs/lockd/svcproc.c1
-rw-r--r--fs/lockd/svcsubs.c1
-rw-r--r--fs/lockd/xdr.c1
-rw-r--r--fs/locks.c123
-rw-r--r--fs/minix/dir.c3
-rw-r--r--fs/minix/inode.c19
-rw-r--r--fs/mpage.c22
-rw-r--r--fs/msdos/namei.c9
-rw-r--r--fs/namei.c11
-rw-r--r--fs/namespace.c144
-rw-r--r--fs/ncpfs/dir.c1
-rw-r--r--fs/ncpfs/inode.c14
-rw-r--r--fs/ncpfs/ioctl.c1
-rw-r--r--fs/ncpfs/mmap.c2
-rw-r--r--fs/ncpfs/ncplib_kernel.c1
-rw-r--r--fs/ncpfs/ncplib_kernel.h1
-rw-r--r--fs/ncpfs/ncpsign_kernel.c1
-rw-r--r--fs/ncpfs/sock.c1
-rw-r--r--fs/ncpfs/symlink.c3
-rw-r--r--fs/nfs/Makefile8
-rw-r--r--fs/nfs/callback.c3
-rw-r--r--fs/nfs/callback_proc.c1
-rw-r--r--fs/nfs/callback_xdr.c3
-rw-r--r--fs/nfs/delegation.c1
-rw-r--r--fs/nfs/dir.c18
-rw-r--r--fs/nfs/direct.c5
-rw-r--r--fs/nfs/file.c36
-rw-r--r--fs/nfs/idmap.c1
-rw-r--r--fs/nfs/inode.c1293
-rw-r--r--fs/nfs/internal.h186
-rw-r--r--fs/nfs/namespace.c229
-rw-r--r--fs/nfs/nfs2xdr.c6
-rw-r--r--fs/nfs/nfs3acl.c11
-rw-r--r--fs/nfs/nfs3proc.c5
-rw-r--r--fs/nfs/nfs3xdr.c6
-rw-r--r--fs/nfs/nfs4_fs.h4
-rw-r--r--fs/nfs/nfs4namespace.c201
-rw-r--r--fs/nfs/nfs4proc.c111
-rw-r--r--fs/nfs/nfs4state.c1
-rw-r--r--fs/nfs/nfs4xdr.c218
-rw-r--r--fs/nfs/pagelist.c51
-rw-r--r--fs/nfs/proc.c5
-rw-r--r--fs/nfs/read.c121
-rw-r--r--fs/nfs/super.c1537
-rw-r--r--fs/nfs/symlink.c13
-rw-r--r--fs/nfs/sysctl.c11
-rw-r--r--fs/nfs/write.c57
-rw-r--r--fs/nfsctl.c1
-rw-r--r--fs/nfsd/export.c2
-rw-r--r--fs/nfsd/nfs4callback.c1
-rw-r--r--fs/nfsd/nfs4idmap.c1
-rw-r--r--fs/nfsd/nfs4state.c37
-rw-r--r--fs/nfsd/nfs4xdr.c2
-rw-r--r--fs/nfsd/nfscache.c3
-rw-r--r--fs/nfsd/nfsctl.c7
-rw-r--r--fs/nfsd/nfsfh.c27
-rw-r--r--fs/nfsd/nfssvc.c1
-rw-r--r--fs/nfsd/vfs.c17
-rw-r--r--fs/nls/nls_base.c1
-rw-r--r--fs/ntfs/aops.c4
-rw-r--r--fs/ntfs/aops.h3
-rw-r--r--fs/ntfs/attrib.c6
-rw-r--r--fs/ntfs/file.c42
-rw-r--r--fs/ntfs/ntfs.h4
-rw-r--r--fs/ntfs/super.c14
-rw-r--r--fs/ntfs/sysctl.h1
-rw-r--r--fs/ocfs2/aops.c11
-rw-r--r--fs/ocfs2/cluster/heartbeat.c22
-rw-r--r--fs/ocfs2/cluster/masklog.h22
-rw-r--r--fs/ocfs2/cluster/ocfs2_heartbeat.h1
-rw-r--r--fs/ocfs2/cluster/tcp.c16
-rw-r--r--fs/ocfs2/dir.c6
-rw-r--r--fs/ocfs2/dlm/dlmast.c15
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h65
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c33
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c6
-rw-r--r--fs/ocfs2/dlm/dlmdebug.h30
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c112
-rw-r--r--fs/ocfs2/dlm/dlmfs.c12
-rw-r--r--fs/ocfs2/dlm/dlmlock.c73
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c448
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c601
-rw-r--r--fs/ocfs2/dlm/dlmthread.c74
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c13
-rw-r--r--fs/ocfs2/dlm/userdlm.c2
-rw-r--r--fs/ocfs2/dlmglue.c5
-rw-r--r--fs/ocfs2/extent_map.c29
-rw-r--r--fs/ocfs2/inode.h2
-rw-r--r--fs/ocfs2/journal.c10
-rw-r--r--fs/ocfs2/mmap.c4
-rw-r--r--fs/ocfs2/ocfs2.h4
-rw-r--r--fs/ocfs2/slot_map.c2
-rw-r--r--fs/ocfs2/super.c71
-rw-r--r--fs/ocfs2/symlink.c5
-rw-r--r--fs/ocfs2/vote.c8
-rw-r--r--fs/open.c34
-rw-r--r--fs/openpromfs/inode.c1160
-rw-r--r--fs/partitions/Makefile1
-rw-r--r--fs/partitions/acorn.c1
-rw-r--r--fs/partitions/check.c44
-rw-r--r--fs/partitions/devfs.c130
-rw-r--r--fs/partitions/devfs.h10
-rw-r--r--fs/partitions/efi.c1
-rw-r--r--fs/partitions/efi.h1
-rw-r--r--fs/partitions/ibm.c1
-rw-r--r--fs/partitions/mac.c1
-rw-r--r--fs/partitions/msdos.c1
-rw-r--r--fs/pipe.c9
-rw-r--r--fs/pnode.c9
-rw-r--r--fs/proc/array.c1
-rw-r--r--fs/proc/base.c1092
-rw-r--r--fs/proc/inode.c11
-rw-r--r--fs/proc/internal.h22
-rw-r--r--fs/proc/kcore.c1
-rw-r--r--fs/proc/proc_misc.c24
-rw-r--r--fs/proc/root.c7
-rw-r--r--fs/proc/task_mmu.c140
-rw-r--r--fs/proc/task_nommu.c21
-rw-r--r--fs/proc/vmcore.c1
-rw-r--r--fs/qnx4/bitmap.c1
-rw-r--r--fs/qnx4/dir.c1
-rw-r--r--fs/qnx4/fsync.c1
-rw-r--r--fs/qnx4/inode.c16
-rw-r--r--fs/qnx4/namei.c1
-rw-r--r--fs/qnx4/truncate.c1
-rw-r--r--fs/ramfs/file-mmu.c2
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/ramfs/inode.c13
-rw-r--r--fs/ramfs/internal.h2
-rw-r--r--fs/reiserfs/bitmap.c1
-rw-r--r--fs/reiserfs/dir.c1
-rw-r--r--fs/reiserfs/do_balan.c1
-rw-r--r--fs/reiserfs/file.c8
-rw-r--r--fs/reiserfs/fix_node.c1
-rw-r--r--fs/reiserfs/ibalance.c1
-rw-r--r--fs/reiserfs/inode.c8
-rw-r--r--fs/reiserfs/journal.c7
-rw-r--r--fs/reiserfs/lbalance.c1
-rw-r--r--fs/reiserfs/namei.c1
-rw-r--r--fs/reiserfs/objectid.c1
-rw-r--r--fs/reiserfs/prints.c1
-rw-r--r--fs/reiserfs/procfs.c1
-rw-r--r--fs/reiserfs/stree.c1
-rw-r--r--fs/reiserfs/super.c18
-rw-r--r--fs/reiserfs/tail_conversion.c1
-rw-r--r--fs/reiserfs/xattr.c3
-rw-r--r--fs/romfs/inode.c13
-rw-r--r--fs/select.c90
-rw-r--r--fs/smbfs/file.c2
-rw-r--r--fs/smbfs/inode.c13
-rw-r--r--fs/smbfs/proc.c4
-rw-r--r--fs/smbfs/proto.h4
-rw-r--r--fs/smbfs/request.c6
-rw-r--r--fs/smbfs/smbiod.c30
-rw-r--r--fs/splice.c46
-rw-r--r--fs/stat.c1
-rw-r--r--fs/super.c114
-rw-r--r--fs/sync.c2
-rw-r--r--fs/sysfs/dir.c10
-rw-r--r--fs/sysfs/inode.c2
-rw-r--r--fs/sysfs/mount.c6
-rw-r--r--fs/sysv/dir.c3
-rw-r--r--fs/sysv/inode.c3
-rw-r--r--fs/sysv/itree.c2
-rw-r--r--fs/sysv/super.c13
-rw-r--r--fs/sysv/sysv.h2
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/udf/inode.c2
-rw-r--r--fs/udf/super.c13
-rw-r--r--fs/udf/symlink.c2
-rw-r--r--fs/udf/udfdecl.h7
-rw-r--r--fs/ufs/balloc.c406
-rw-r--r--fs/ufs/cylinder.c49
-rw-r--r--fs/ufs/dir.c1000
-rw-r--r--fs/ufs/file.c25
-rw-r--r--fs/ufs/ialloc.c63
-rw-r--r--fs/ufs/inode.c381
-rw-r--r--fs/ufs/namei.c84
-rw-r--r--fs/ufs/super.c437
-rw-r--r--fs/ufs/truncate.c250
-rw-r--r--fs/ufs/util.c102
-rw-r--r--fs/ufs/util.h115
-rw-r--r--fs/vfat/namei.c9
-rw-r--r--fs/xattr.c4
-rw-r--r--fs/xfs/Kconfig27
-rw-r--r--fs/xfs/Makefile-linux-2.64
-rw-r--r--fs/xfs/linux-2.6/kmem.h38
-rw-r--r--fs/xfs/linux-2.6/mrlock.h4
-rw-r--r--fs/xfs/linux-2.6/sema.h19
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c67
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h10
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c13
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c171
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c59
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c65
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c126
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h34
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c123
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h6
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c200
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.c41
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.h167
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c52
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h376
-rw-r--r--fs/xfs/quota/xfs_dquot.c8
-rw-r--r--fs/xfs/quota/xfs_dquot.h4
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c6
-rw-r--r--fs/xfs/quota/xfs_qm.c8
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c81
-rw-r--r--fs/xfs/quota/xfs_qm_stats.c2
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c6
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c2
-rw-r--r--fs/xfs/support/debug.c4
-rw-r--r--fs/xfs/support/debug.h3
-rw-r--r--fs/xfs/xfs_acl.c71
-rw-r--r--fs/xfs/xfs_acl.h16
-rw-r--r--fs/xfs/xfs_alloc.c31
-rw-r--r--fs/xfs/xfs_alloc.h2
-rw-r--r--fs/xfs/xfs_alloc_btree.c2
-rw-r--r--fs/xfs/xfs_attr.c66
-rw-r--r--fs/xfs/xfs_attr.h14
-rw-r--r--fs/xfs/xfs_attr_leaf.c4
-rw-r--r--fs/xfs/xfs_behavior.h3
-rw-r--r--fs/xfs/xfs_bmap.c386
-rw-r--r--fs/xfs/xfs_bmap.h22
-rw-r--r--fs/xfs/xfs_bmap_btree.c12
-rw-r--r--fs/xfs/xfs_btree.c2
-rw-r--r--fs/xfs/xfs_buf_item.c9
-rw-r--r--fs/xfs/xfs_cap.h10
-rw-r--r--fs/xfs/xfs_da_btree.c194
-rw-r--r--fs/xfs/xfs_da_btree.h4
-rw-r--r--fs/xfs/xfs_dfrag.c84
-rw-r--r--fs/xfs/xfs_dfrag.h3
-rw-r--r--fs/xfs/xfs_dinode.h5
-rw-r--r--fs/xfs/xfs_dir.c1217
-rw-r--r--fs/xfs/xfs_dir.h142
-rw-r--r--fs/xfs/xfs_dir2.c396
-rw-r--r--fs/xfs/xfs_dir2.h32
-rw-r--r--fs/xfs/xfs_dir2_block.c31
-rw-r--r--fs/xfs/xfs_dir2_data.c5
-rw-r--r--fs/xfs/xfs_dir2_data.h8
-rw-r--r--fs/xfs/xfs_dir2_leaf.c14
-rw-r--r--fs/xfs/xfs_dir2_node.c16
-rw-r--r--fs/xfs/xfs_dir2_sf.c13
-rw-r--r--fs/xfs/xfs_dir2_trace.c2
-rw-r--r--fs/xfs/xfs_dir_leaf.c2213
-rw-r--r--fs/xfs/xfs_dir_leaf.h231
-rw-r--r--fs/xfs/xfs_dir_sf.h155
-rw-r--r--fs/xfs/xfs_dmapi.h2
-rw-r--r--fs/xfs/xfs_dmops.c1
-rw-r--r--fs/xfs/xfs_error.c2
-rw-r--r--fs/xfs/xfs_extfree_item.c57
-rw-r--r--fs/xfs/xfs_extfree_item.h53
-rw-r--r--fs/xfs/xfs_fs.h7
-rw-r--r--fs/xfs/xfs_fsops.c20
-rw-r--r--fs/xfs/xfs_ialloc.c5
-rw-r--r--fs/xfs/xfs_ialloc_btree.c2
-rw-r--r--fs/xfs/xfs_iget.c22
-rw-r--r--fs/xfs/xfs_inode.c98
-rw-r--r--fs/xfs/xfs_inode.h7
-rw-r--r--fs/xfs/xfs_inode_item.c57
-rw-r--r--fs/xfs/xfs_inode_item.h61
-rw-r--r--fs/xfs/xfs_iocore.c7
-rw-r--r--fs/xfs/xfs_iomap.c26
-rw-r--r--fs/xfs/xfs_itable.c20
-rw-r--r--fs/xfs/xfs_itable.h1
-rw-r--r--fs/xfs/xfs_log.c25
-rw-r--r--fs/xfs/xfs_log_recover.c115
-rw-r--r--fs/xfs/xfs_mount.c75
-rw-r--r--fs/xfs/xfs_mount.h70
-rw-r--r--fs/xfs/xfs_qmops.c1
-rw-r--r--fs/xfs/xfs_quota.h2
-rw-r--r--fs/xfs/xfs_rename.c68
-rw-r--r--fs/xfs/xfs_rtalloc.c18
-rw-r--r--fs/xfs/xfs_rw.c124
-rw-r--r--fs/xfs/xfs_rw.h10
-rw-r--r--fs/xfs/xfs_trans.c48
-rw-r--r--fs/xfs/xfs_trans.h11
-rw-r--r--fs/xfs/xfs_trans_ail.c6
-rw-r--r--fs/xfs/xfs_trans_buf.c12
-rw-r--r--fs/xfs/xfs_trans_extfree.c1
-rw-r--r--fs/xfs/xfs_trans_inode.c2
-rw-r--r--fs/xfs/xfs_trans_item.c2
-rw-r--r--fs/xfs/xfs_trans_space.h11
-rw-r--r--fs/xfs/xfs_utils.c13
-rw-r--r--fs/xfs/xfs_utils.h7
-rw-r--r--fs/xfs/xfs_vfsops.c122
-rw-r--r--fs/xfs/xfs_vnodeops.c306
523 files changed, 18972 insertions, 19366 deletions
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
index a767e05b60bf..1e898144eb7c 100644
--- a/fs/9p/conv.c
+++ b/fs/9p/conv.c
@@ -24,7 +24,6 @@
24 * 24 *
25 */ 25 */
26 26
27#include <linux/config.h>
28#include <linux/module.h> 27#include <linux/module.h>
29#include <linux/errno.h> 28#include <linux/errno.h>
30#include <linux/fs.h> 29#include <linux/fs.h>
diff --git a/fs/9p/error.c b/fs/9p/error.c
index 981fe8ecd780..ae91555c1558 100644
--- a/fs/9p/error.c
+++ b/fs/9p/error.c
@@ -27,7 +27,6 @@
27 * 27 *
28 */ 28 */
29 29
30#include <linux/config.h>
31#include <linux/module.h> 30#include <linux/module.h>
32 31
33#include <linux/list.h> 32#include <linux/list.h>
diff --git a/fs/9p/fcall.c b/fs/9p/fcall.c
index 6f2617820a4e..8556097fcda8 100644
--- a/fs/9p/fcall.c
+++ b/fs/9p/fcall.c
@@ -24,7 +24,6 @@
24 * 24 *
25 */ 25 */
26 26
27#include <linux/config.h>
28#include <linux/module.h> 27#include <linux/module.h>
29#include <linux/errno.h> 28#include <linux/errno.h>
30#include <linux/fs.h> 29#include <linux/fs.h>
diff --git a/fs/9p/fcprint.c b/fs/9p/fcprint.c
index 583e827baebd..34b96114a28d 100644
--- a/fs/9p/fcprint.c
+++ b/fs/9p/fcprint.c
@@ -21,7 +21,6 @@
21 * Boston, MA 02111-1301 USA 21 * Boston, MA 02111-1301 USA
22 * 22 *
23 */ 23 */
24#include <linux/config.h>
25#include <linux/module.h> 24#include <linux/module.h>
26#include <linux/errno.h> 25#include <linux/errno.h>
27#include <linux/fs.h> 26#include <linux/fs.h>
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index b7608af07ce8..70492ccb4385 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -20,7 +20,6 @@
20 * 20 *
21 */ 21 */
22 22
23#include <linux/config.h>
24#include <linux/module.h> 23#include <linux/module.h>
25#include <linux/errno.h> 24#include <linux/errno.h>
26#include <linux/fs.h> 25#include <linux/fs.h>
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index f4407eb276c7..90a79c784549 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -23,7 +23,6 @@
23 * 23 *
24 */ 24 */
25 25
26#include <linux/config.h>
27#include <linux/module.h> 26#include <linux/module.h>
28#include <linux/errno.h> 27#include <linux/errno.h>
29#include <linux/fs.h> 28#include <linux/fs.h>
@@ -712,7 +711,7 @@ static void v9fs_read_work(void *a)
712 * v9fs_send_request - send 9P request 711 * v9fs_send_request - send 9P request
713 * The function can sleep until the request is scheduled for sending. 712 * The function can sleep until the request is scheduled for sending.
714 * The function can be interrupted. Return from the function is not 713 * The function can be interrupted. Return from the function is not
715 * a guarantee that the request is sent succesfully. Can return errors 714 * a guarantee that the request is sent successfully. Can return errors
716 * that can be retrieved by PTR_ERR macros. 715 * that can be retrieved by PTR_ERR macros.
717 * 716 *
718 * @m: mux data 717 * @m: mux data
@@ -932,6 +931,8 @@ v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
932 r.rcall || r.err); 931 r.rcall || r.err);
933 } while (!r.rcall && !r.err && err==-ERESTARTSYS && 932 } while (!r.rcall && !r.err && err==-ERESTARTSYS &&
934 m->trans->status==Connected && !m->err); 933 m->trans->status==Connected && !m->err);
934
935 err = -ERESTARTSYS;
935 } 936 }
936 sigpending = 1; 937 sigpending = 1;
937 } 938 }
diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c
index 94e0a7fd9fc2..34d43355beb7 100644
--- a/fs/9p/trans_fd.c
+++ b/fs/9p/trans_fd.c
@@ -25,7 +25,6 @@
25 * 25 *
26 */ 26 */
27 27
28#include <linux/config.h>
29#include <linux/in.h> 28#include <linux/in.h>
30#include <linux/module.h> 29#include <linux/module.h>
31#include <linux/net.h> 30#include <linux/net.h>
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index d37416eb5791..22f7ccd58d38 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -23,7 +23,6 @@
23 * 23 *
24 */ 24 */
25 25
26#include <linux/config.h>
27#include <linux/module.h> 26#include <linux/module.h>
28#include <linux/errno.h> 27#include <linux/errno.h>
29#include <linux/fs.h> 28#include <linux/fs.h>
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index f867b8d3e973..450b0c1b385e 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -38,7 +38,7 @@
38 */ 38 */
39 39
40extern struct file_system_type v9fs_fs_type; 40extern struct file_system_type v9fs_fs_type;
41extern struct address_space_operations v9fs_addr_operations; 41extern const struct address_space_operations v9fs_addr_operations;
42extern const struct file_operations v9fs_file_operations; 42extern const struct file_operations v9fs_file_operations;
43extern const struct file_operations v9fs_dir_operations; 43extern const struct file_operations v9fs_dir_operations;
44extern struct dentry_operations v9fs_dentry_operations; 44extern struct dentry_operations v9fs_dentry_operations;
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index efda46fb64d9..9dfd259a70b4 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -31,7 +31,6 @@
31#include <linux/string.h> 31#include <linux/string.h>
32#include <linux/smp_lock.h> 32#include <linux/smp_lock.h>
33#include <linux/inet.h> 33#include <linux/inet.h>
34#include <linux/version.h>
35#include <linux/pagemap.h> 34#include <linux/pagemap.h>
36#include <linux/idr.h> 35#include <linux/idr.h>
37 36
@@ -103,6 +102,6 @@ UnmapAndUnlock:
103 return retval; 102 return retval;
104} 103}
105 104
106struct address_space_operations v9fs_addr_operations = { 105const struct address_space_operations v9fs_addr_operations = {
107 .readpage = v9fs_vfs_readpage, 106 .readpage = v9fs_vfs_readpage,
108}; 107};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 1a8e46084f0e..c3c47eda7574 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -31,7 +31,6 @@
31#include <linux/string.h> 31#include <linux/string.h>
32#include <linux/smp_lock.h> 32#include <linux/smp_lock.h>
33#include <linux/inet.h> 33#include <linux/inet.h>
34#include <linux/version.h>
35#include <linux/list.h> 34#include <linux/list.h>
36#include <asm/uaccess.h> 35#include <asm/uaccess.h>
37#include <linux/idr.h> 36#include <linux/idr.h>
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 2cb87ba4b1c1..2f580a197b8d 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -300,7 +300,7 @@ clunk_fid:
300 fid = V9FS_NOFID; 300 fid = V9FS_NOFID;
301 301
302put_fid: 302put_fid:
303 if (fid >= 0) 303 if (fid != V9FS_NOFID)
304 v9fs_put_idpool(fid, &v9ses->fidpool); 304 v9fs_put_idpool(fid, &v9ses->fidpool);
305 305
306 kfree(fcall); 306 kfree(fcall);
@@ -530,9 +530,6 @@ error:
530 if (vfid) 530 if (vfid)
531 v9fs_fid_destroy(vfid); 531 v9fs_fid_destroy(vfid);
532 532
533 if (inode)
534 iput(inode);
535
536 return err; 533 return err;
537} 534}
538 535
@@ -1054,6 +1051,9 @@ static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
1054 int ret; 1051 int ret;
1055 char *link = __getname(); 1052 char *link = __getname();
1056 1053
1054 if (unlikely(!link))
1055 return -ENOMEM;
1056
1057 if (buflen > PATH_MAX) 1057 if (buflen > PATH_MAX)
1058 buflen = PATH_MAX; 1058 buflen = PATH_MAX;
1059 1059
@@ -1171,9 +1171,6 @@ error:
1171 if (vfid) 1171 if (vfid)
1172 v9fs_fid_destroy(vfid); 1172 v9fs_fid_destroy(vfid);
1173 1173
1174 if (inode)
1175 iput(inode);
1176
1177 return err; 1174 return err;
1178 1175
1179} 1176}
@@ -1227,6 +1224,9 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1227 } 1224 }
1228 1225
1229 name = __getname(); 1226 name = __getname();
1227 if (unlikely(!name))
1228 return -ENOMEM;
1229
1230 sprintf(name, "%d\n", oldfid->fid); 1230 sprintf(name, "%d\n", oldfid->fid);
1231 retval = v9fs_vfs_mkspecial(dir, dentry, V9FS_DMLINK, name); 1231 retval = v9fs_vfs_mkspecial(dir, dentry, V9FS_DMLINK, name);
1232 __putname(name); 1232 __putname(name);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 61c599b4a1e3..63320d4e15d2 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -25,7 +25,6 @@
25 */ 25 */
26 26
27#include <linux/kernel.h> 27#include <linux/kernel.h>
28#include <linux/config.h>
29#include <linux/module.h> 28#include <linux/module.h>
30#include <linux/errno.h> 29#include <linux/errno.h>
31#include <linux/fs.h> 30#include <linux/fs.h>
@@ -99,12 +98,13 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
99 * @flags: mount flags 98 * @flags: mount flags
100 * @dev_name: device name that was mounted 99 * @dev_name: device name that was mounted
101 * @data: mount options 100 * @data: mount options
101 * @mnt: mountpoint record to be instantiated
102 * 102 *
103 */ 103 */
104 104
105static struct super_block *v9fs_get_sb(struct file_system_type 105static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
106 *fs_type, int flags, 106 const char *dev_name, void *data,
107 const char *dev_name, void *data) 107 struct vfsmount *mnt)
108{ 108{
109 struct super_block *sb = NULL; 109 struct super_block *sb = NULL;
110 struct v9fs_fcall *fcall = NULL; 110 struct v9fs_fcall *fcall = NULL;
@@ -123,17 +123,19 @@ static struct super_block *v9fs_get_sb(struct file_system_type
123 123
124 v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); 124 v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
125 if (!v9ses) 125 if (!v9ses)
126 return ERR_PTR(-ENOMEM); 126 return -ENOMEM;
127 127
128 if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { 128 if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
129 dprintk(DEBUG_ERROR, "problem initiating session\n"); 129 dprintk(DEBUG_ERROR, "problem initiating session\n");
130 sb = ERR_PTR(newfid); 130 retval = newfid;
131 goto out_free_session; 131 goto out_free_session;
132 } 132 }
133 133
134 sb = sget(fs_type, NULL, v9fs_set_super, v9ses); 134 sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
135 if (IS_ERR(sb)) 135 if (IS_ERR(sb)) {
136 retval = PTR_ERR(sb);
136 goto out_close_session; 137 goto out_close_session;
138 }
137 v9fs_fill_super(sb, v9ses, flags); 139 v9fs_fill_super(sb, v9ses, flags);
138 140
139 inode = v9fs_get_inode(sb, S_IFDIR | mode); 141 inode = v9fs_get_inode(sb, S_IFDIR | mode);
@@ -184,19 +186,19 @@ static struct super_block *v9fs_get_sb(struct file_system_type
184 goto put_back_sb; 186 goto put_back_sb;
185 } 187 }
186 188
187 return sb; 189 return simple_set_mnt(mnt, sb);
188 190
189out_close_session: 191out_close_session:
190 v9fs_session_close(v9ses); 192 v9fs_session_close(v9ses);
191out_free_session: 193out_free_session:
192 kfree(v9ses); 194 kfree(v9ses);
193 return sb; 195 return retval;
194 196
195put_back_sb: 197put_back_sb:
196 /* deactivate_super calls v9fs_kill_super which will frees the rest */ 198 /* deactivate_super calls v9fs_kill_super which will frees the rest */
197 up_write(&sb->s_umount); 199 up_write(&sb->s_umount);
198 deactivate_super(sb); 200 deactivate_super(sb);
199 return ERR_PTR(retval); 201 return retval;
200} 202}
201 203
202/** 204/**
@@ -253,11 +255,12 @@ static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt)
253} 255}
254 256
255static void 257static void
256v9fs_umount_begin(struct super_block *sb) 258v9fs_umount_begin(struct vfsmount *vfsmnt, int flags)
257{ 259{
258 struct v9fs_session_info *v9ses = sb->s_fs_info; 260 struct v9fs_session_info *v9ses = vfsmnt->mnt_sb->s_fs_info;
259 261
260 v9fs_session_cancel(v9ses); 262 if (flags & MNT_FORCE)
263 v9fs_session_cancel(v9ses);
261} 264}
262 265
263static struct super_operations v9fs_super_ops = { 266static struct super_operations v9fs_super_ops = {
diff --git a/fs/Kconfig b/fs/Kconfig
index 563a59e5e694..a7cd7db5e533 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -53,7 +53,7 @@ config EXT2_FS_SECURITY
53 53
54config EXT2_FS_XIP 54config EXT2_FS_XIP
55 bool "Ext2 execute in place support" 55 bool "Ext2 execute in place support"
56 depends on EXT2_FS 56 depends on EXT2_FS && MMU
57 help 57 help
58 Execute in place can be used on memory-backed block devices. If you 58 Execute in place can be used on memory-backed block devices. If you
59 enable this option, you can select to mount block devices which are 59 enable this option, you can select to mount block devices which are
@@ -327,7 +327,7 @@ source "fs/gfs2/Kconfig"
327 327
328config OCFS2_FS 328config OCFS2_FS
329 tristate "OCFS2 file system support (EXPERIMENTAL)" 329 tristate "OCFS2 file system support (EXPERIMENTAL)"
330 depends on NET && EXPERIMENTAL 330 depends on NET && SYSFS && EXPERIMENTAL
331 select CONFIGFS_FS 331 select CONFIGFS_FS
332 select JBD 332 select JBD
333 select CRC32 333 select CRC32
@@ -357,6 +357,16 @@ config OCFS2_FS
357 - POSIX ACLs 357 - POSIX ACLs
358 - readpages / writepages (not user visible) 358 - readpages / writepages (not user visible)
359 359
360config OCFS2_DEBUG_MASKLOG
361 bool "OCFS2 logging support"
362 depends on OCFS2_FS
363 default y
364 help
365 The ocfs2 filesystem has an extensive logging system. The system
366 allows selection of events to log via files in /sys/o2cb/logmask/.
367 This option will enlarge your kernel, but it allows debugging of
368 ocfs2 filesystem issues.
369
360config MINIX_FS 370config MINIX_FS
361 tristate "Minix fs support" 371 tristate "Minix fs support"
362 help 372 help
@@ -394,18 +404,30 @@ config INOTIFY
394 bool "Inotify file change notification support" 404 bool "Inotify file change notification support"
395 default y 405 default y
396 ---help--- 406 ---help---
397 Say Y here to enable inotify support and the associated system 407 Say Y here to enable inotify support. Inotify is a file change
398 calls. Inotify is a file change notification system and a 408 notification system and a replacement for dnotify. Inotify fixes
399 replacement for dnotify. Inotify fixes numerous shortcomings in 409 numerous shortcomings in dnotify and introduces several new features
400 dnotify and introduces several new features. It allows monitoring 410 including multiple file events, one-shot support, and unmount
401 of both files and directories via a single open fd. Other features
402 include multiple file events, one-shot support, and unmount
403 notification. 411 notification.
404 412
405 For more information, see Documentation/filesystems/inotify.txt 413 For more information, see Documentation/filesystems/inotify.txt
406 414
407 If unsure, say Y. 415 If unsure, say Y.
408 416
417config INOTIFY_USER
418 bool "Inotify support for userspace"
419 depends on INOTIFY
420 default y
421 ---help---
422 Say Y here to enable inotify support for userspace, including the
423 associated system calls. Inotify allows monitoring of both files and
424 directories via a single open fd. Events are read from the file
425 descriptor, which is also select()- and poll()-able.
426
427 For more information, see Documentation/filesystems/inotify.txt
428
429 If unsure, say Y.
430
409config QUOTA 431config QUOTA
410 bool "Quota support" 432 bool "Quota support"
411 help 433 help
@@ -765,7 +787,8 @@ endmenu
765menu "Pseudo filesystems" 787menu "Pseudo filesystems"
766 788
767config PROC_FS 789config PROC_FS
768 bool "/proc file system support" 790 bool "/proc file system support" if EMBEDDED
791 default y
769 help 792 help
770 This is a virtual file system providing information about the status 793 This is a virtual file system providing information about the status
771 of the system. "Virtual" means that it doesn't take up any space on 794 of the system. "Virtual" means that it doesn't take up any space on
@@ -1102,6 +1125,44 @@ config JFFS2_SUMMARY
1102 1125
1103 If unsure, say 'N'. 1126 If unsure, say 'N'.
1104 1127
1128config JFFS2_FS_XATTR
1129 bool "JFFS2 XATTR support (EXPERIMENTAL)"
1130 depends on JFFS2_FS && EXPERIMENTAL
1131 default n
1132 help
1133 Extended attributes are name:value pairs associated with inodes by
1134 the kernel or by users (see the attr(5) manual page, or visit
1135 <http://acl.bestbits.at/> for details).
1136
1137 If unsure, say N.
1138
1139config JFFS2_FS_POSIX_ACL
1140 bool "JFFS2 POSIX Access Control Lists"
1141 depends on JFFS2_FS_XATTR
1142 default y
1143 select FS_POSIX_ACL
1144 help
1145 Posix Access Control Lists (ACLs) support permissions for users and
1146 groups beyond the owner/group/world scheme.
1147
1148 To learn more about Access Control Lists, visit the Posix ACLs for
1149 Linux website <http://acl.bestbits.at/>.
1150
1151 If you don't know what Access Control Lists are, say N
1152
1153config JFFS2_FS_SECURITY
1154 bool "JFFS2 Security Labels"
1155 depends on JFFS2_FS_XATTR
1156 default y
1157 help
1158 Security labels support alternative access control models
1159 implemented by security modules like SELinux. This option
1160 enables an extended attribute handler for file security
1161 labels in the jffs2 filesystem.
1162
1163 If you are not using a security module that requires using
1164 extended attributes for file security labels, say N.
1165
1105config JFFS2_COMPRESSION_OPTIONS 1166config JFFS2_COMPRESSION_OPTIONS
1106 bool "Advanced compression options for JFFS2" 1167 bool "Advanced compression options for JFFS2"
1107 depends on JFFS2_FS 1168 depends on JFFS2_FS
@@ -1321,11 +1382,19 @@ config UFS_FS
1321 1382
1322config UFS_FS_WRITE 1383config UFS_FS_WRITE
1323 bool "UFS file system write support (DANGEROUS)" 1384 bool "UFS file system write support (DANGEROUS)"
1324 depends on UFS_FS && EXPERIMENTAL && BROKEN 1385 depends on UFS_FS && EXPERIMENTAL
1325 help 1386 help
1326 Say Y here if you want to try writing to UFS partitions. This is 1387 Say Y here if you want to try writing to UFS partitions. This is
1327 experimental, so you should back up your UFS partitions beforehand. 1388 experimental, so you should back up your UFS partitions beforehand.
1328 1389
1390config UFS_DEBUG
1391 bool "UFS debugging"
1392 depends on UFS_FS
1393 help
1394 If you are experiencing any problems with the UFS filesystem, say
1395 Y here. This will result in _many_ additional debugging messages to be
1396 written to the system log.
1397
1329endmenu 1398endmenu
1330 1399
1331menu "Network File Systems" 1400menu "Network File Systems"
@@ -1432,7 +1501,12 @@ config NFSD
1432 select LOCKD 1501 select LOCKD
1433 select SUNRPC 1502 select SUNRPC
1434 select EXPORTFS 1503 select EXPORTFS
1435 select NFS_ACL_SUPPORT if NFSD_V3_ACL || NFSD_V2_ACL 1504 select NFSD_V2_ACL if NFSD_V3_ACL
1505 select NFS_ACL_SUPPORT if NFSD_V2_ACL
1506 select NFSD_TCP if NFSD_V4
1507 select CRYPTO_MD5 if NFSD_V4
1508 select CRYPTO if NFSD_V4
1509 select FS_POSIX_ACL if NFSD_V4
1436 help 1510 help
1437 If you want your Linux box to act as an NFS *server*, so that other 1511 If you want your Linux box to act as an NFS *server*, so that other
1438 computers on your local network which support NFS can access certain 1512 computers on your local network which support NFS can access certain
@@ -1470,7 +1544,6 @@ config NFSD_V3
1470config NFSD_V3_ACL 1544config NFSD_V3_ACL
1471 bool "Provide server support for the NFSv3 ACL protocol extension" 1545 bool "Provide server support for the NFSv3 ACL protocol extension"
1472 depends on NFSD_V3 1546 depends on NFSD_V3
1473 select NFSD_V2_ACL
1474 help 1547 help
1475 Implement the NFSv3 ACL protocol extension for manipulating POSIX 1548 Implement the NFSv3 ACL protocol extension for manipulating POSIX
1476 Access Control Lists on exported file systems. NFS clients should 1549 Access Control Lists on exported file systems. NFS clients should
@@ -1480,10 +1553,6 @@ config NFSD_V3_ACL
1480config NFSD_V4 1553config NFSD_V4
1481 bool "Provide NFSv4 server support (EXPERIMENTAL)" 1554 bool "Provide NFSv4 server support (EXPERIMENTAL)"
1482 depends on NFSD_V3 && EXPERIMENTAL 1555 depends on NFSD_V3 && EXPERIMENTAL
1483 select NFSD_TCP
1484 select CRYPTO_MD5
1485 select CRYPTO
1486 select FS_POSIX_ACL
1487 help 1556 help
1488 If you would like to include the NFSv4 server as well as the NFSv2 1557 If you would like to include the NFSv4 server as well as the NFSv2
1489 and NFSv3 servers, say Y here. This feature is experimental, and 1558 and NFSv3 servers, say Y here. This feature is experimental, and
@@ -1664,7 +1733,7 @@ config CIFS_STATS
1664 mounted by the cifs client to be displayed in /proc/fs/cifs/Stats 1733 mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
1665 1734
1666config CIFS_STATS2 1735config CIFS_STATS2
1667 bool "CIFS extended statistics" 1736 bool "Extended statistics"
1668 depends on CIFS_STATS 1737 depends on CIFS_STATS
1669 help 1738 help
1670 Enabling this option will allow more detailed statistics on SMB 1739 Enabling this option will allow more detailed statistics on SMB
@@ -1677,6 +1746,32 @@ config CIFS_STATS2
1677 Unless you are a developer or are doing network performance analysis 1746 Unless you are a developer or are doing network performance analysis
1678 or tuning, say N. 1747 or tuning, say N.
1679 1748
1749config CIFS_WEAK_PW_HASH
1750 bool "Support legacy servers which use weaker LANMAN security"
1751 depends on CIFS
1752 help
1753 Modern CIFS servers including Samba and most Windows versions
1754 (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
1755 security mechanisms. These hash the password more securely
1756 than the mechanisms used in the older LANMAN version of the
1757 SMB protocol needed to establish sessions with old SMB servers.
1758
1759 Enabling this option allows the cifs module to mount to older
1760 LANMAN based servers such as OS/2 and Windows 95, but such
1761 mounts may be less secure than mounts using NTLM or more recent
1762 security mechanisms if you are on a public network. Unless you
1763 have a need to access old SMB servers (and are on a private
1764 network) you probably want to say N. Even if this support
1765 is enabled in the kernel build, they will not be used
1766 automatically. At runtime LANMAN mounts are disabled but
1767 can be set to required (or optional) either in
1768 /proc/fs/cifs (see fs/cifs/README for more detail) or via an
1769 option on the mount command. This support is disabled by
1770 default in order to reduce the possibility of a downgrade
1771 attack.
1772
1773 If unsure, say N.
1774
1680config CIFS_XATTR 1775config CIFS_XATTR
1681 bool "CIFS extended attributes" 1776 bool "CIFS extended attributes"
1682 depends on CIFS 1777 depends on CIFS
@@ -1705,6 +1800,16 @@ config CIFS_POSIX
1705 (such as Samba 3.10 and later) which can negotiate 1800 (such as Samba 3.10 and later) which can negotiate
1706 CIFS POSIX ACL support. If unsure, say N. 1801 CIFS POSIX ACL support. If unsure, say N.
1707 1802
1803config CIFS_DEBUG2
1804 bool "Enable additional CIFS debugging routines"
1805 help
1806 Enabling this option adds a few more debugging routines
1807 to the cifs code which slightly increases the size of
1808 the cifs module and can cause additional logging of debug
1809 messages in some error paths, slowing performance. This
1810 option can be turned off unless you are debugging
1811 cifs problems. If unsure, say N.
1812
1708config CIFS_EXPERIMENTAL 1813config CIFS_EXPERIMENTAL
1709 bool "CIFS Experimental Features (EXPERIMENTAL)" 1814 bool "CIFS Experimental Features (EXPERIMENTAL)"
1710 depends on CIFS && EXPERIMENTAL 1815 depends on CIFS && EXPERIMENTAL
@@ -1720,7 +1825,7 @@ config CIFS_EXPERIMENTAL
1720 If unsure, say N. 1825 If unsure, say N.
1721 1826
1722config CIFS_UPCALL 1827config CIFS_UPCALL
1723 bool "CIFS Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)" 1828 bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)"
1724 depends on CIFS_EXPERIMENTAL 1829 depends on CIFS_EXPERIMENTAL
1725 select CONNECTOR 1830 select CONNECTOR
1726 help 1831 help
diff --git a/fs/Makefile b/fs/Makefile
index c731d2c0f409..64df11047ccc 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -13,6 +13,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \
13 ioprio.o pnode.o drop_caches.o splice.o sync.o 13 ioprio.o pnode.o drop_caches.o splice.o sync.o
14 14
15obj-$(CONFIG_INOTIFY) += inotify.o 15obj-$(CONFIG_INOTIFY) += inotify.o
16obj-$(CONFIG_INOTIFY_USER) += inotify_user.o
16obj-$(CONFIG_EPOLL) += eventpoll.o 17obj-$(CONFIG_EPOLL) += eventpoll.o
17obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o 18obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o
18 19
@@ -66,7 +67,6 @@ obj-$(CONFIG_MSDOS_FS) += msdos/
66obj-$(CONFIG_VFAT_FS) += vfat/ 67obj-$(CONFIG_VFAT_FS) += vfat/
67obj-$(CONFIG_BFS_FS) += bfs/ 68obj-$(CONFIG_BFS_FS) += bfs/
68obj-$(CONFIG_ISO9660_FS) += isofs/ 69obj-$(CONFIG_ISO9660_FS) += isofs/
69obj-$(CONFIG_DEVFS_FS) += devfs/
70obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ 70obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+
71obj-$(CONFIG_HFS_FS) += hfs/ 71obj-$(CONFIG_HFS_FS) += hfs/
72obj-$(CONFIG_VXFS_FS) += freevxfs/ 72obj-$(CONFIG_VXFS_FS) += freevxfs/
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 7b075fc397da..d3c7905b2ddc 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -9,7 +9,6 @@
9 * 9 *
10 * Common directory handling for ADFS 10 * Common directory handling for ADFS
11 */ 11 */
12#include <linux/config.h>
13#include <linux/errno.h> 12#include <linux/errno.h>
14#include <linux/fs.h> 13#include <linux/fs.h>
15#include <linux/adfs_fs.h> 14#include <linux/adfs_fs.h>
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index a02802a30798..534f3eecc985 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -72,7 +72,7 @@ static sector_t _adfs_bmap(struct address_space *mapping, sector_t block)
72 return generic_block_bmap(mapping, block, adfs_get_block); 72 return generic_block_bmap(mapping, block, adfs_get_block);
73} 73}
74 74
75static struct address_space_operations adfs_aops = { 75static const struct address_space_operations adfs_aops = {
76 .readpage = adfs_readpage, 76 .readpage = adfs_readpage,
77 .writepage = adfs_writepage, 77 .writepage = adfs_writepage,
78 .sync_page = block_sync_page, 78 .sync_page = block_sync_page,
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 252abda0d200..ba1c88af49fe 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -196,17 +196,17 @@ static int adfs_remount(struct super_block *sb, int *flags, char *data)
196 return parse_options(sb, data); 196 return parse_options(sb, data);
197} 197}
198 198
199static int adfs_statfs(struct super_block *sb, struct kstatfs *buf) 199static int adfs_statfs(struct dentry *dentry, struct kstatfs *buf)
200{ 200{
201 struct adfs_sb_info *asb = ADFS_SB(sb); 201 struct adfs_sb_info *asb = ADFS_SB(dentry->d_sb);
202 202
203 buf->f_type = ADFS_SUPER_MAGIC; 203 buf->f_type = ADFS_SUPER_MAGIC;
204 buf->f_namelen = asb->s_namelen; 204 buf->f_namelen = asb->s_namelen;
205 buf->f_bsize = sb->s_blocksize; 205 buf->f_bsize = dentry->d_sb->s_blocksize;
206 buf->f_blocks = asb->s_size; 206 buf->f_blocks = asb->s_size;
207 buf->f_files = asb->s_ids_per_zone * asb->s_map_size; 207 buf->f_files = asb->s_ids_per_zone * asb->s_map_size;
208 buf->f_bavail = 208 buf->f_bavail =
209 buf->f_bfree = adfs_map_free(sb); 209 buf->f_bfree = adfs_map_free(dentry->d_sb);
210 buf->f_ffree = (long)(buf->f_bfree * buf->f_files) / (long)buf->f_blocks; 210 buf->f_ffree = (long)(buf->f_bfree * buf->f_files) / (long)buf->f_blocks;
211 211
212 return 0; 212 return 0;
@@ -470,10 +470,11 @@ error:
470 return -EINVAL; 470 return -EINVAL;
471} 471}
472 472
473static struct super_block *adfs_get_sb(struct file_system_type *fs_type, 473static int adfs_get_sb(struct file_system_type *fs_type,
474 int flags, const char *dev_name, void *data) 474 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
475{ 475{
476 return get_sb_bdev(fs_type, flags, dev_name, data, adfs_fill_super); 476 return get_sb_bdev(fs_type, flags, dev_name, data, adfs_fill_super,
477 mnt);
477} 478}
478 479
479static struct file_system_type adfs_fs_type = { 480static struct file_system_type adfs_fs_type = {
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index a43a876742b8..0ddd4cc0d1a0 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -195,9 +195,9 @@ extern struct inode_operations affs_symlink_inode_operations;
195extern const struct file_operations affs_file_operations; 195extern const struct file_operations affs_file_operations;
196extern const struct file_operations affs_file_operations_ofs; 196extern const struct file_operations affs_file_operations_ofs;
197extern const struct file_operations affs_dir_operations; 197extern const struct file_operations affs_dir_operations;
198extern struct address_space_operations affs_symlink_aops; 198extern const struct address_space_operations affs_symlink_aops;
199extern struct address_space_operations affs_aops; 199extern const struct address_space_operations affs_aops;
200extern struct address_space_operations affs_aops_ofs; 200extern const struct address_space_operations affs_aops_ofs;
201 201
202extern struct dentry_operations affs_dentry_operations; 202extern struct dentry_operations affs_dentry_operations;
203extern struct dentry_operations affs_dentry_operations_intl; 203extern struct dentry_operations affs_dentry_operations_intl;
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 7076262af39b..3de8590e4f6a 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -406,7 +406,7 @@ static sector_t _affs_bmap(struct address_space *mapping, sector_t block)
406{ 406{
407 return generic_block_bmap(mapping,block,affs_get_block); 407 return generic_block_bmap(mapping,block,affs_get_block);
408} 408}
409struct address_space_operations affs_aops = { 409const struct address_space_operations affs_aops = {
410 .readpage = affs_readpage, 410 .readpage = affs_readpage,
411 .writepage = affs_writepage, 411 .writepage = affs_writepage,
412 .sync_page = block_sync_page, 412 .sync_page = block_sync_page,
@@ -759,7 +759,7 @@ out:
759 goto done; 759 goto done;
760} 760}
761 761
762struct address_space_operations affs_aops_ofs = { 762const struct address_space_operations affs_aops_ofs = {
763 .readpage = affs_readpage_ofs, 763 .readpage = affs_readpage_ofs,
764 //.writepage = affs_writepage_ofs, 764 //.writepage = affs_writepage_ofs,
765 //.sync_page = affs_sync_page_ofs, 765 //.sync_page = affs_sync_page_ofs,
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 4d7e5b19e5cd..5200f4938df0 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -18,7 +18,7 @@
18 18
19extern struct timezone sys_tz; 19extern struct timezone sys_tz;
20 20
21static int affs_statfs(struct super_block *sb, struct kstatfs *buf); 21static int affs_statfs(struct dentry *dentry, struct kstatfs *buf);
22static int affs_remount (struct super_block *sb, int *flags, char *data); 22static int affs_remount (struct super_block *sb, int *flags, char *data);
23 23
24static void 24static void
@@ -271,6 +271,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
271 int reserved; 271 int reserved;
272 unsigned long mount_flags; 272 unsigned long mount_flags;
273 int tmp_flags; /* fix remount prototype... */ 273 int tmp_flags; /* fix remount prototype... */
274 u8 sig[4];
274 275
275 pr_debug("AFFS: read_super(%s)\n",data ? (const char *)data : "no options"); 276 pr_debug("AFFS: read_super(%s)\n",data ? (const char *)data : "no options");
276 277
@@ -370,8 +371,9 @@ got_root:
370 printk(KERN_ERR "AFFS: Cannot read boot block\n"); 371 printk(KERN_ERR "AFFS: Cannot read boot block\n");
371 goto out_error; 372 goto out_error;
372 } 373 }
373 chksum = be32_to_cpu(*(__be32 *)boot_bh->b_data); 374 memcpy(sig, boot_bh->b_data, 4);
374 brelse(boot_bh); 375 brelse(boot_bh);
376 chksum = be32_to_cpu(*(__be32 *)sig);
375 377
376 /* Dircache filesystems are compatible with non-dircache ones 378 /* Dircache filesystems are compatible with non-dircache ones
377 * when reading. As long as they aren't supported, writing is 379 * when reading. As long as they aren't supported, writing is
@@ -420,11 +422,11 @@ got_root:
420 } 422 }
421 423
422 if (mount_flags & SF_VERBOSE) { 424 if (mount_flags & SF_VERBOSE) {
423 chksum = cpu_to_be32(chksum); 425 u8 len = AFFS_ROOT_TAIL(sb, root_bh)->disk_name[0];
424 printk(KERN_NOTICE "AFFS: Mounting volume \"%*s\": Type=%.3s\\%c, Blocksize=%d\n", 426 printk(KERN_NOTICE "AFFS: Mounting volume \"%.*s\": Type=%.3s\\%c, Blocksize=%d\n",
425 AFFS_ROOT_TAIL(sb, root_bh)->disk_name[0], 427 len > 31 ? 31 : len,
426 AFFS_ROOT_TAIL(sb, root_bh)->disk_name + 1, 428 AFFS_ROOT_TAIL(sb, root_bh)->disk_name + 1,
427 (char *)&chksum,((char *)&chksum)[3] + '0',blocksize); 429 sig, sig[3] + '0', blocksize);
428 } 430 }
429 431
430 sb->s_flags |= MS_NODEV | MS_NOSUID; 432 sb->s_flags |= MS_NODEV | MS_NOSUID;
@@ -508,8 +510,9 @@ affs_remount(struct super_block *sb, int *flags, char *data)
508} 510}
509 511
510static int 512static int
511affs_statfs(struct super_block *sb, struct kstatfs *buf) 513affs_statfs(struct dentry *dentry, struct kstatfs *buf)
512{ 514{
515 struct super_block *sb = dentry->d_sb;
513 int free; 516 int free;
514 517
515 pr_debug("AFFS: statfs() partsize=%d, reserved=%d\n",AFFS_SB(sb)->s_partition_size, 518 pr_debug("AFFS: statfs() partsize=%d, reserved=%d\n",AFFS_SB(sb)->s_partition_size,
@@ -524,10 +527,11 @@ affs_statfs(struct super_block *sb, struct kstatfs *buf)
524 return 0; 527 return 0;
525} 528}
526 529
527static struct super_block *affs_get_sb(struct file_system_type *fs_type, 530static int affs_get_sb(struct file_system_type *fs_type,
528 int flags, const char *dev_name, void *data) 531 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
529{ 532{
530 return get_sb_bdev(fs_type, flags, dev_name, data, affs_fill_super); 533 return get_sb_bdev(fs_type, flags, dev_name, data, affs_fill_super,
534 mnt);
531} 535}
532 536
533static struct file_system_type affs_fs_type = { 537static struct file_system_type affs_fs_type = {
diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c
index 426f0f094f23..f802256a5933 100644
--- a/fs/affs/symlink.c
+++ b/fs/affs/symlink.c
@@ -66,7 +66,7 @@ fail:
66 return err; 66 return err;
67} 67}
68 68
69struct address_space_operations affs_symlink_aops = { 69const struct address_space_operations affs_symlink_aops = {
70 .readpage = affs_symlink_readpage, 70 .readpage = affs_symlink_readpage,
71}; 71};
72 72
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 009a9ae88d61..bfc1fd22d5b1 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -413,8 +413,7 @@ int afs_server_find_by_peer(const struct rxrpc_peer *peer,
413 413
414 /* we found it in the graveyard - resurrect it */ 414 /* we found it in the graveyard - resurrect it */
415 found_dead_server: 415 found_dead_server:
416 list_del(&server->link); 416 list_move_tail(&server->link, &cell->sv_list);
417 list_add_tail(&server->link, &cell->sv_list);
418 afs_get_server(server); 417 afs_get_server(server);
419 afs_kafstimod_del_timer(&server->timeout); 418 afs_kafstimod_del_timer(&server->timeout);
420 spin_unlock(&cell->sv_gylock); 419 spin_unlock(&cell->sv_gylock);
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index a6dff6a4f204..2fc99877cb0d 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -185,9 +185,7 @@ static struct page *afs_dir_get_page(struct inode *dir, unsigned long index)
185 185
186 _enter("{%lu},%lu", dir->i_ino, index); 186 _enter("{%lu},%lu", dir->i_ino, index);
187 187
188 page = read_cache_page(dir->i_mapping,index, 188 page = read_mapping_page(dir->i_mapping, index, NULL);
189 (filler_t *) dir->i_mapping->a_ops->readpage,
190 NULL);
191 if (!IS_ERR(page)) { 189 if (!IS_ERR(page)) {
192 wait_on_page_locked(page); 190 wait_on_page_locked(page);
193 kmap(page); 191 kmap(page);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 7bb716887e29..67d6634101fd 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -35,7 +35,7 @@ struct inode_operations afs_file_inode_operations = {
35 .getattr = afs_inode_getattr, 35 .getattr = afs_inode_getattr,
36}; 36};
37 37
38struct address_space_operations afs_fs_aops = { 38const struct address_space_operations afs_fs_aops = {
39 .readpage = afs_file_readpage, 39 .readpage = afs_file_readpage,
40 .sync_page = block_sync_page, 40 .sync_page = block_sync_page,
41 .set_page_dirty = __set_page_dirty_nobuffers, 41 .set_page_dirty = __set_page_dirty_nobuffers,
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 72febdf9a35a..e88b3b65ae49 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -69,7 +69,7 @@ extern const struct file_operations afs_dir_file_operations;
69/* 69/*
70 * file.c 70 * file.c
71 */ 71 */
72extern struct address_space_operations afs_fs_aops; 72extern const struct address_space_operations afs_fs_aops;
73extern struct inode_operations afs_file_inode_operations; 73extern struct inode_operations afs_file_inode_operations;
74 74
75#ifdef AFS_CACHING_SUPPORT 75#ifdef AFS_CACHING_SUPPORT
diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c
index 7ac07d0d47b9..f09a794f248e 100644
--- a/fs/afs/kafsasyncd.c
+++ b/fs/afs/kafsasyncd.c
@@ -136,8 +136,7 @@ static int kafsasyncd(void *arg)
136 if (!list_empty(&kafsasyncd_async_attnq)) { 136 if (!list_empty(&kafsasyncd_async_attnq)) {
137 op = list_entry(kafsasyncd_async_attnq.next, 137 op = list_entry(kafsasyncd_async_attnq.next,
138 struct afs_async_op, link); 138 struct afs_async_op, link);
139 list_del(&op->link); 139 list_move_tail(&op->link,
140 list_add_tail(&op->link,
141 &kafsasyncd_async_busyq); 140 &kafsasyncd_async_busyq);
142 } 141 }
143 142
@@ -204,8 +203,7 @@ void afs_kafsasyncd_begin_op(struct afs_async_op *op)
204 init_waitqueue_entry(&op->waiter, kafsasyncd_task); 203 init_waitqueue_entry(&op->waiter, kafsasyncd_task);
205 add_wait_queue(&op->call->waitq, &op->waiter); 204 add_wait_queue(&op->call->waitq, &op->waiter);
206 205
207 list_del(&op->link); 206 list_move_tail(&op->link, &kafsasyncd_async_busyq);
208 list_add_tail(&op->link, &kafsasyncd_async_busyq);
209 207
210 spin_unlock(&kafsasyncd_async_lock); 208 spin_unlock(&kafsasyncd_async_lock);
211 209
@@ -223,8 +221,7 @@ void afs_kafsasyncd_attend_op(struct afs_async_op *op)
223 221
224 spin_lock(&kafsasyncd_async_lock); 222 spin_lock(&kafsasyncd_async_lock);
225 223
226 list_del(&op->link); 224 list_move_tail(&op->link, &kafsasyncd_async_attnq);
227 list_add_tail(&op->link, &kafsasyncd_async_attnq);
228 225
229 spin_unlock(&kafsasyncd_async_lock); 226 spin_unlock(&kafsasyncd_async_lock);
230 227
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 4e6eeb59b83c..99785a79d043 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -63,7 +63,6 @@ unsigned long afs_mntpt_expiry_timeout = 20;
63int afs_mntpt_check_symlink(struct afs_vnode *vnode) 63int afs_mntpt_check_symlink(struct afs_vnode *vnode)
64{ 64{
65 struct page *page; 65 struct page *page;
66 filler_t *filler;
67 size_t size; 66 size_t size;
68 char *buf; 67 char *buf;
69 int ret; 68 int ret;
@@ -71,10 +70,7 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode)
71 _enter("{%u,%u}", vnode->fid.vnode, vnode->fid.unique); 70 _enter("{%u,%u}", vnode->fid.vnode, vnode->fid.unique);
72 71
73 /* read the contents of the symlink into the pagecache */ 72 /* read the contents of the symlink into the pagecache */
74 filler = (filler_t *) AFS_VNODE_TO_I(vnode)->i_mapping->a_ops->readpage; 73 page = read_mapping_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, NULL);
75
76 page = read_cache_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0,
77 filler, NULL);
78 if (IS_ERR(page)) { 74 if (IS_ERR(page)) {
79 ret = PTR_ERR(page); 75 ret = PTR_ERR(page);
80 goto out; 76 goto out;
@@ -160,7 +156,6 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
160 struct page *page = NULL; 156 struct page *page = NULL;
161 size_t size; 157 size_t size;
162 char *buf, *devname = NULL, *options = NULL; 158 char *buf, *devname = NULL, *options = NULL;
163 filler_t *filler;
164 int ret; 159 int ret;
165 160
166 kenter("{%s}", mntpt->d_name.name); 161 kenter("{%s}", mntpt->d_name.name);
@@ -182,9 +177,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
182 goto error; 177 goto error;
183 178
184 /* read the contents of the AFS special symlink */ 179 /* read the contents of the AFS special symlink */
185 filler = (filler_t *)mntpt->d_inode->i_mapping->a_ops->readpage; 180 page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL);
186
187 page = read_cache_page(mntpt->d_inode->i_mapping, 0, filler, NULL);
188 if (IS_ERR(page)) { 181 if (IS_ERR(page)) {
189 ret = PTR_ERR(page); 182 ret = PTR_ERR(page);
190 goto error; 183 goto error;
@@ -210,7 +203,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
210 203
211 /* try and do the mount */ 204 /* try and do the mount */
212 kdebug("--- attempting mount %s -o %s ---", devname, options); 205 kdebug("--- attempting mount %s -o %s ---", devname, options);
213 mnt = do_kern_mount("afs", 0, devname, options); 206 mnt = vfs_kern_mount(&afs_fs_type, 0, devname, options);
214 kdebug("--- mount result %p ---", mnt); 207 kdebug("--- mount result %p ---", mnt);
215 208
216 free_page((unsigned long) devname); 209 free_page((unsigned long) devname);
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 62b093aa41c6..22afaae1a4ce 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -123,8 +123,7 @@ int afs_server_lookup(struct afs_cell *cell, const struct in_addr *addr,
123 resurrect_server: 123 resurrect_server:
124 _debug("resurrecting server"); 124 _debug("resurrecting server");
125 125
126 list_del(&zombie->link); 126 list_move_tail(&zombie->link, &cell->sv_list);
127 list_add_tail(&zombie->link, &cell->sv_list);
128 afs_get_server(zombie); 127 afs_get_server(zombie);
129 afs_kafstimod_del_timer(&zombie->timeout); 128 afs_kafstimod_del_timer(&zombie->timeout);
130 spin_unlock(&cell->sv_gylock); 129 spin_unlock(&cell->sv_gylock);
@@ -168,8 +167,7 @@ void afs_put_server(struct afs_server *server)
168 } 167 }
169 168
170 spin_lock(&cell->sv_gylock); 169 spin_lock(&cell->sv_gylock);
171 list_del(&server->link); 170 list_move_tail(&server->link, &cell->sv_graveyard);
172 list_add_tail(&server->link, &cell->sv_graveyard);
173 171
174 /* time out in 10 secs */ 172 /* time out in 10 secs */
175 afs_kafstimod_add_timer(&server->timeout, 10 * HZ); 173 afs_kafstimod_add_timer(&server->timeout, 10 * HZ);
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 53c56e7231ab..67d1f5c819ec 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -38,9 +38,9 @@ struct afs_mount_params {
38static void afs_i_init_once(void *foo, kmem_cache_t *cachep, 38static void afs_i_init_once(void *foo, kmem_cache_t *cachep,
39 unsigned long flags); 39 unsigned long flags);
40 40
41static struct super_block *afs_get_sb(struct file_system_type *fs_type, 41static int afs_get_sb(struct file_system_type *fs_type,
42 int flags, const char *dev_name, 42 int flags, const char *dev_name,
43 void *data); 43 void *data, struct vfsmount *mnt);
44 44
45static struct inode *afs_alloc_inode(struct super_block *sb); 45static struct inode *afs_alloc_inode(struct super_block *sb);
46 46
@@ -48,7 +48,7 @@ static void afs_put_super(struct super_block *sb);
48 48
49static void afs_destroy_inode(struct inode *inode); 49static void afs_destroy_inode(struct inode *inode);
50 50
51static struct file_system_type afs_fs_type = { 51struct file_system_type afs_fs_type = {
52 .owner = THIS_MODULE, 52 .owner = THIS_MODULE,
53 .name = "afs", 53 .name = "afs",
54 .get_sb = afs_get_sb, 54 .get_sb = afs_get_sb,
@@ -294,10 +294,11 @@ static int afs_fill_super(struct super_block *sb, void *data, int silent)
294 * get an AFS superblock 294 * get an AFS superblock
295 * - TODO: don't use get_sb_nodev(), but rather call sget() directly 295 * - TODO: don't use get_sb_nodev(), but rather call sget() directly
296 */ 296 */
297static struct super_block *afs_get_sb(struct file_system_type *fs_type, 297static int afs_get_sb(struct file_system_type *fs_type,
298 int flags, 298 int flags,
299 const char *dev_name, 299 const char *dev_name,
300 void *options) 300 void *options,
301 struct vfsmount *mnt)
301{ 302{
302 struct afs_mount_params params; 303 struct afs_mount_params params;
303 struct super_block *sb; 304 struct super_block *sb;
@@ -311,7 +312,7 @@ static struct super_block *afs_get_sb(struct file_system_type *fs_type,
311 ret = afscm_start(); 312 ret = afscm_start();
312 if (ret < 0) { 313 if (ret < 0) {
313 _leave(" = %d", ret); 314 _leave(" = %d", ret);
314 return ERR_PTR(ret); 315 return ret;
315 } 316 }
316 317
317 /* parse the options */ 318 /* parse the options */
@@ -348,18 +349,19 @@ static struct super_block *afs_get_sb(struct file_system_type *fs_type,
348 goto error; 349 goto error;
349 } 350 }
350 sb->s_flags |= MS_ACTIVE; 351 sb->s_flags |= MS_ACTIVE;
352 simple_set_mnt(mnt, sb);
351 353
352 afs_put_volume(params.volume); 354 afs_put_volume(params.volume);
353 afs_put_cell(params.default_cell); 355 afs_put_cell(params.default_cell);
354 _leave(" = %p", sb); 356 _leave(" = 0 [%p]", 0, sb);
355 return sb; 357 return 0;
356 358
357 error: 359 error:
358 afs_put_volume(params.volume); 360 afs_put_volume(params.volume);
359 afs_put_cell(params.default_cell); 361 afs_put_cell(params.default_cell);
360 afscm_stop(); 362 afscm_stop();
361 _leave(" = %d", ret); 363 _leave(" = %d", ret);
362 return ERR_PTR(ret); 364 return ret;
363} /* end afs_get_sb() */ 365} /* end afs_get_sb() */
364 366
365/*****************************************************************************/ 367/*****************************************************************************/
diff --git a/fs/afs/super.h b/fs/afs/super.h
index ac11362f4e95..32de8cc6fae8 100644
--- a/fs/afs/super.h
+++ b/fs/afs/super.h
@@ -38,6 +38,8 @@ static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
38 return sb->s_fs_info; 38 return sb->s_fs_info;
39} 39}
40 40
41extern struct file_system_type afs_fs_type;
42
41#endif /* __KERNEL__ */ 43#endif /* __KERNEL__ */
42 44
43#endif /* _LINUX_AFS_SUPER_H */ 45#endif /* _LINUX_AFS_SUPER_H */
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index eced20618ecc..331f730a1fb3 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -326,8 +326,7 @@ int afs_vlocation_lookup(struct afs_cell *cell,
326 /* found in the graveyard - resurrect */ 326 /* found in the graveyard - resurrect */
327 _debug("found in graveyard"); 327 _debug("found in graveyard");
328 atomic_inc(&vlocation->usage); 328 atomic_inc(&vlocation->usage);
329 list_del(&vlocation->link); 329 list_move_tail(&vlocation->link, &cell->vl_list);
330 list_add_tail(&vlocation->link, &cell->vl_list);
331 spin_unlock(&cell->vl_gylock); 330 spin_unlock(&cell->vl_gylock);
332 331
333 afs_kafstimod_del_timer(&vlocation->timeout); 332 afs_kafstimod_del_timer(&vlocation->timeout);
@@ -478,8 +477,7 @@ static void __afs_put_vlocation(struct afs_vlocation *vlocation)
478 } 477 }
479 478
480 /* move to graveyard queue */ 479 /* move to graveyard queue */
481 list_del(&vlocation->link); 480 list_move_tail(&vlocation->link,&cell->vl_graveyard);
482 list_add_tail(&vlocation->link,&cell->vl_graveyard);
483 481
484 /* remove from pending timeout queue (refcounted if actually being 482 /* remove from pending timeout queue (refcounted if actually being
485 * updated) */ 483 * updated) */
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index 9867fef3261d..cf62da5d7825 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -104,8 +104,7 @@ static void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
104 vnode->cb_expiry * HZ); 104 vnode->cb_expiry * HZ);
105 105
106 spin_lock(&afs_cb_hash_lock); 106 spin_lock(&afs_cb_hash_lock);
107 list_del(&vnode->cb_hash_link); 107 list_move_tail(&vnode->cb_hash_link,
108 list_add_tail(&vnode->cb_hash_link,
109 &afs_cb_hash(server, &vnode->fid)); 108 &afs_cb_hash(server, &vnode->fid));
110 spin_unlock(&afs_cb_hash_lock); 109 spin_unlock(&afs_cb_hash_lock);
111 110
diff --git a/fs/aio.c b/fs/aio.c
index e41e932ba489..950630187acc 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -641,7 +641,7 @@ static inline int __queue_kicked_iocb(struct kiocb *iocb)
641 * invoked both for initial i/o submission and 641 * invoked both for initial i/o submission and
642 * subsequent retries via the aio_kick_handler. 642 * subsequent retries via the aio_kick_handler.
643 * Expects to be invoked with iocb->ki_ctx->lock 643 * Expects to be invoked with iocb->ki_ctx->lock
644 * already held. The lock is released and reaquired 644 * already held. The lock is released and reacquired
645 * as needed during processing. 645 * as needed during processing.
646 * 646 *
647 * Calls the iocb retry method (already setup for the 647 * Calls the iocb retry method (already setup for the
@@ -777,11 +777,11 @@ out:
777static int __aio_run_iocbs(struct kioctx *ctx) 777static int __aio_run_iocbs(struct kioctx *ctx)
778{ 778{
779 struct kiocb *iocb; 779 struct kiocb *iocb;
780 LIST_HEAD(run_list); 780 struct list_head run_list;
781 781
782 assert_spin_locked(&ctx->ctx_lock); 782 assert_spin_locked(&ctx->ctx_lock);
783 783
784 list_splice_init(&ctx->run_list, &run_list); 784 list_replace_init(&ctx->run_list, &run_list);
785 while (!list_empty(&run_list)) { 785 while (!list_empty(&run_list)) {
786 iocb = list_entry(run_list.next, struct kiocb, 786 iocb = list_entry(run_list.next, struct kiocb,
787 ki_run_list); 787 ki_run_list);
diff --git a/fs/autofs/init.c b/fs/autofs/init.c
index b977ece69f0c..aca123752406 100644
--- a/fs/autofs/init.c
+++ b/fs/autofs/init.c
@@ -14,10 +14,10 @@
14#include <linux/init.h> 14#include <linux/init.h>
15#include "autofs_i.h" 15#include "autofs_i.h"
16 16
17static struct super_block *autofs_get_sb(struct file_system_type *fs_type, 17static int autofs_get_sb(struct file_system_type *fs_type,
18 int flags, const char *dev_name, void *data) 18 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
19{ 19{
20 return get_sb_nodev(fs_type, flags, data, autofs_fill_super); 20 return get_sb_nodev(fs_type, flags, data, autofs_fill_super, mnt);
21} 21}
22 22
23static struct file_system_type autofs_fs_type = { 23static struct file_system_type autofs_fs_type = {
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index b8ce02607d66..8dbd44f10e9d 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -174,6 +174,12 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
174 struct autofs_info *ino = autofs4_dentry_ino(p); 174 struct autofs_info *ino = autofs4_dentry_ino(p);
175 unsigned int ino_count = atomic_read(&ino->count); 175 unsigned int ino_count = atomic_read(&ino->count);
176 176
177 /*
178 * Clean stale dentries below that have not been
179 * invalidated after a mount fail during lookup
180 */
181 d_invalidate(p);
182
177 /* allow for dget above and top is already dgot */ 183 /* allow for dget above and top is already dgot */
178 if (p == top) 184 if (p == top)
179 ino_count += 2; 185 ino_count += 2;
@@ -370,8 +376,7 @@ next:
370 DPRINTK("returning %p %.*s", 376 DPRINTK("returning %p %.*s",
371 expired, (int)expired->d_name.len, expired->d_name.name); 377 expired, (int)expired->d_name.len, expired->d_name.name);
372 spin_lock(&dcache_lock); 378 spin_lock(&dcache_lock);
373 list_del(&expired->d_parent->d_subdirs); 379 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
374 list_add(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
375 spin_unlock(&dcache_lock); 380 spin_unlock(&dcache_lock);
376 return expired; 381 return expired;
377 } 382 }
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index acecec8578ce..5d9193332bef 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -14,10 +14,10 @@
14#include <linux/init.h> 14#include <linux/init.h>
15#include "autofs_i.h" 15#include "autofs_i.h"
16 16
17static struct super_block *autofs_get_sb(struct file_system_type *fs_type, 17static int autofs_get_sb(struct file_system_type *fs_type,
18 int flags, const char *dev_name, void *data) 18 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
19{ 19{
20 return get_sb_nodev(fs_type, flags, data, autofs4_fill_super); 20 return get_sb_nodev(fs_type, flags, data, autofs4_fill_super, mnt);
21} 21}
22 22
23static struct file_system_type autofs_fs_type = { 23static struct file_system_type autofs_fs_type = {
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 68ebd10f345d..fcaeead9696b 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -49,7 +49,7 @@ static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
49 char **out, int *out_len); 49 char **out, int *out_len);
50static void befs_put_super(struct super_block *); 50static void befs_put_super(struct super_block *);
51static int befs_remount(struct super_block *, int *, char *); 51static int befs_remount(struct super_block *, int *, char *);
52static int befs_statfs(struct super_block *, struct kstatfs *); 52static int befs_statfs(struct dentry *, struct kstatfs *);
53static int parse_options(char *, befs_mount_options *); 53static int parse_options(char *, befs_mount_options *);
54 54
55static const struct super_operations befs_sops = { 55static const struct super_operations befs_sops = {
@@ -73,7 +73,7 @@ static struct inode_operations befs_dir_inode_operations = {
73 .lookup = befs_lookup, 73 .lookup = befs_lookup,
74}; 74};
75 75
76static struct address_space_operations befs_aops = { 76static const struct address_space_operations befs_aops = {
77 .readpage = befs_readpage, 77 .readpage = befs_readpage,
78 .sync_page = block_sync_page, 78 .sync_page = block_sync_page,
79 .bmap = befs_bmap, 79 .bmap = befs_bmap,
@@ -325,7 +325,7 @@ befs_read_inode(struct inode *inode)
325 if (!bh) { 325 if (!bh) {
326 befs_error(sb, "unable to read inode block - " 326 befs_error(sb, "unable to read inode block - "
327 "inode = %lu", inode->i_ino); 327 "inode = %lu", inode->i_ino);
328 goto unaquire_none; 328 goto unacquire_none;
329 } 329 }
330 330
331 raw_inode = (befs_inode *) bh->b_data; 331 raw_inode = (befs_inode *) bh->b_data;
@@ -334,7 +334,7 @@ befs_read_inode(struct inode *inode)
334 334
335 if (befs_check_inode(sb, raw_inode, inode->i_ino) != BEFS_OK) { 335 if (befs_check_inode(sb, raw_inode, inode->i_ino) != BEFS_OK) {
336 befs_error(sb, "Bad inode: %lu", inode->i_ino); 336 befs_error(sb, "Bad inode: %lu", inode->i_ino);
337 goto unaquire_bh; 337 goto unacquire_bh;
338 } 338 }
339 339
340 inode->i_mode = (umode_t) fs32_to_cpu(sb, raw_inode->mode); 340 inode->i_mode = (umode_t) fs32_to_cpu(sb, raw_inode->mode);
@@ -402,17 +402,17 @@ befs_read_inode(struct inode *inode)
402 befs_error(sb, "Inode %lu is not a regular file, " 402 befs_error(sb, "Inode %lu is not a regular file, "
403 "directory or symlink. THAT IS WRONG! BeFS has no " 403 "directory or symlink. THAT IS WRONG! BeFS has no "
404 "on disk special files", inode->i_ino); 404 "on disk special files", inode->i_ino);
405 goto unaquire_bh; 405 goto unacquire_bh;
406 } 406 }
407 407
408 brelse(bh); 408 brelse(bh);
409 befs_debug(sb, "<--- befs_read_inode()"); 409 befs_debug(sb, "<--- befs_read_inode()");
410 return; 410 return;
411 411
412 unaquire_bh: 412 unacquire_bh:
413 brelse(bh); 413 brelse(bh);
414 414
415 unaquire_none: 415 unacquire_none:
416 make_bad_inode(inode); 416 make_bad_inode(inode);
417 befs_debug(sb, "<--- befs_read_inode() - Bad inode"); 417 befs_debug(sb, "<--- befs_read_inode() - Bad inode");
418 return; 418 return;
@@ -761,14 +761,14 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
761 printk(KERN_ERR 761 printk(KERN_ERR
762 "BeFS(%s): Unable to allocate memory for private " 762 "BeFS(%s): Unable to allocate memory for private "
763 "portion of superblock. Bailing.\n", sb->s_id); 763 "portion of superblock. Bailing.\n", sb->s_id);
764 goto unaquire_none; 764 goto unacquire_none;
765 } 765 }
766 befs_sb = BEFS_SB(sb); 766 befs_sb = BEFS_SB(sb);
767 memset(befs_sb, 0, sizeof(befs_sb_info)); 767 memset(befs_sb, 0, sizeof(befs_sb_info));
768 768
769 if (!parse_options((char *) data, &befs_sb->mount_opts)) { 769 if (!parse_options((char *) data, &befs_sb->mount_opts)) {
770 befs_error(sb, "cannot parse mount options"); 770 befs_error(sb, "cannot parse mount options");
771 goto unaquire_priv_sbp; 771 goto unacquire_priv_sbp;
772 } 772 }
773 773
774 befs_debug(sb, "---> befs_fill_super()"); 774 befs_debug(sb, "---> befs_fill_super()");
@@ -794,7 +794,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
794 794
795 if (!(bh = sb_bread(sb, sb_block))) { 795 if (!(bh = sb_bread(sb, sb_block))) {
796 befs_error(sb, "unable to read superblock"); 796 befs_error(sb, "unable to read superblock");
797 goto unaquire_priv_sbp; 797 goto unacquire_priv_sbp;
798 } 798 }
799 799
800 /* account for offset of super block on x86 */ 800 /* account for offset of super block on x86 */
@@ -809,20 +809,20 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
809 } 809 }
810 810
811 if (befs_load_sb(sb, disk_sb) != BEFS_OK) 811 if (befs_load_sb(sb, disk_sb) != BEFS_OK)
812 goto unaquire_bh; 812 goto unacquire_bh;
813 813
814 befs_dump_super_block(sb, disk_sb); 814 befs_dump_super_block(sb, disk_sb);
815 815
816 brelse(bh); 816 brelse(bh);
817 817
818 if (befs_check_sb(sb) != BEFS_OK) 818 if (befs_check_sb(sb) != BEFS_OK)
819 goto unaquire_priv_sbp; 819 goto unacquire_priv_sbp;
820 820
821 if( befs_sb->num_blocks > ~((sector_t)0) ) { 821 if( befs_sb->num_blocks > ~((sector_t)0) ) {
822 befs_error(sb, "blocks count: %Lu " 822 befs_error(sb, "blocks count: %Lu "
823 "is larger than the host can use", 823 "is larger than the host can use",
824 befs_sb->num_blocks); 824 befs_sb->num_blocks);
825 goto unaquire_priv_sbp; 825 goto unacquire_priv_sbp;
826 } 826 }
827 827
828 /* 828 /*
@@ -838,7 +838,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
838 if (!sb->s_root) { 838 if (!sb->s_root) {
839 iput(root); 839 iput(root);
840 befs_error(sb, "get root inode failed"); 840 befs_error(sb, "get root inode failed");
841 goto unaquire_priv_sbp; 841 goto unacquire_priv_sbp;
842 } 842 }
843 843
844 /* load nls library */ 844 /* load nls library */
@@ -860,13 +860,13 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
860 860
861 return 0; 861 return 0;
862/*****************/ 862/*****************/
863 unaquire_bh: 863 unacquire_bh:
864 brelse(bh); 864 brelse(bh);
865 865
866 unaquire_priv_sbp: 866 unacquire_priv_sbp:
867 kfree(sb->s_fs_info); 867 kfree(sb->s_fs_info);
868 868
869 unaquire_none: 869 unacquire_none:
870 sb->s_fs_info = NULL; 870 sb->s_fs_info = NULL;
871 return -EINVAL; 871 return -EINVAL;
872} 872}
@@ -880,8 +880,9 @@ befs_remount(struct super_block *sb, int *flags, char *data)
880} 880}
881 881
882static int 882static int
883befs_statfs(struct super_block *sb, struct kstatfs *buf) 883befs_statfs(struct dentry *dentry, struct kstatfs *buf)
884{ 884{
885 struct super_block *sb = dentry->d_sb;
885 886
886 befs_debug(sb, "---> befs_statfs()"); 887 befs_debug(sb, "---> befs_statfs()");
887 888
@@ -899,11 +900,12 @@ befs_statfs(struct super_block *sb, struct kstatfs *buf)
899 return 0; 900 return 0;
900} 901}
901 902
902static struct super_block * 903static int
903befs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, 904befs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name,
904 void *data) 905 void *data, struct vfsmount *mnt)
905{ 906{
906 return get_sb_bdev(fs_type, flags, dev_name, data, befs_fill_super); 907 return get_sb_bdev(fs_type, flags, dev_name, data, befs_fill_super,
908 mnt);
907} 909}
908 910
909static struct file_system_type befs_fs_type = { 911static struct file_system_type befs_fs_type = {
@@ -923,18 +925,18 @@ init_befs_fs(void)
923 925
924 err = befs_init_inodecache(); 926 err = befs_init_inodecache();
925 if (err) 927 if (err)
926 goto unaquire_none; 928 goto unacquire_none;
927 929
928 err = register_filesystem(&befs_fs_type); 930 err = register_filesystem(&befs_fs_type);
929 if (err) 931 if (err)
930 goto unaquire_inodecache; 932 goto unacquire_inodecache;
931 933
932 return 0; 934 return 0;
933 935
934unaquire_inodecache: 936unacquire_inodecache:
935 befs_destroy_inodecache(); 937 befs_destroy_inodecache();
936 938
937unaquire_none: 939unacquire_none:
938 return err; 940 return err;
939} 941}
940 942
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 9d791004b21c..31973bbbf057 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -50,7 +50,7 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode)
50/* file.c */ 50/* file.c */
51extern struct inode_operations bfs_file_inops; 51extern struct inode_operations bfs_file_inops;
52extern const struct file_operations bfs_file_operations; 52extern const struct file_operations bfs_file_operations;
53extern struct address_space_operations bfs_aops; 53extern const struct address_space_operations bfs_aops;
54 54
55/* dir.c */ 55/* dir.c */
56extern struct inode_operations bfs_dir_inops; 56extern struct inode_operations bfs_dir_inops;
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index d83cd74a2e4e..3d5aca28a0a0 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -153,7 +153,7 @@ static sector_t bfs_bmap(struct address_space *mapping, sector_t block)
153 return generic_block_bmap(mapping, block, bfs_get_block); 153 return generic_block_bmap(mapping, block, bfs_get_block);
154} 154}
155 155
156struct address_space_operations bfs_aops = { 156const struct address_space_operations bfs_aops = {
157 .readpage = bfs_readpage, 157 .readpage = bfs_readpage,
158 .writepage = bfs_writepage, 158 .writepage = bfs_writepage,
159 .sync_page = block_sync_page, 159 .sync_page = block_sync_page,
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 55a7a78332f8..cf74f3d4d966 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -203,8 +203,9 @@ static void bfs_put_super(struct super_block *s)
203 s->s_fs_info = NULL; 203 s->s_fs_info = NULL;
204} 204}
205 205
206static int bfs_statfs(struct super_block *s, struct kstatfs *buf) 206static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf)
207{ 207{
208 struct super_block *s = dentry->d_sb;
208 struct bfs_sb_info *info = BFS_SB(s); 209 struct bfs_sb_info *info = BFS_SB(s);
209 u64 id = huge_encode_dev(s->s_bdev->bd_dev); 210 u64 id = huge_encode_dev(s->s_bdev->bd_dev);
210 buf->f_type = BFS_MAGIC; 211 buf->f_type = BFS_MAGIC;
@@ -410,10 +411,10 @@ out:
410 return -EINVAL; 411 return -EINVAL;
411} 412}
412 413
413static struct super_block *bfs_get_sb(struct file_system_type *fs_type, 414static int bfs_get_sb(struct file_system_type *fs_type,
414 int flags, const char *dev_name, void *data) 415 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
415{ 416{
416 return get_sb_bdev(fs_type, flags, dev_name, data, bfs_fill_super); 417 return get_sb_bdev(fs_type, flags, dev_name, data, bfs_fill_super, mnt);
417} 418}
418 419
419static struct file_system_type bfs_fs_type = { 420static struct file_system_type bfs_fs_type = {
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 537893a16014..d0434406eaeb 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -38,15 +38,13 @@
38#include <linux/security.h> 38#include <linux/security.h>
39#include <linux/syscalls.h> 39#include <linux/syscalls.h>
40#include <linux/random.h> 40#include <linux/random.h>
41 41#include <linux/elf.h>
42#include <asm/uaccess.h> 42#include <asm/uaccess.h>
43#include <asm/param.h> 43#include <asm/param.h>
44#include <asm/page.h> 44#include <asm/page.h>
45 45
46#include <linux/elf.h> 46static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47 47static int load_elf_library(struct file *);
48static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs);
49static int load_elf_library(struct file*);
50static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int); 48static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
51extern int dump_fpu (struct pt_regs *, elf_fpregset_t *); 49extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
52 50
@@ -59,15 +57,15 @@ extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
59 * don't even try. 57 * don't even try.
60 */ 58 */
61#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 59#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
62static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file); 60static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
63#else 61#else
64#define elf_core_dump NULL 62#define elf_core_dump NULL
65#endif 63#endif
66 64
67#if ELF_EXEC_PAGESIZE > PAGE_SIZE 65#if ELF_EXEC_PAGESIZE > PAGE_SIZE
68# define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE 66#define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
69#else 67#else
70# define ELF_MIN_ALIGN PAGE_SIZE 68#define ELF_MIN_ALIGN PAGE_SIZE
71#endif 69#endif
72 70
73#ifndef ELF_CORE_EFLAGS 71#ifndef ELF_CORE_EFLAGS
@@ -86,7 +84,7 @@ static struct linux_binfmt elf_format = {
86 .min_coredump = ELF_EXEC_PAGESIZE 84 .min_coredump = ELF_EXEC_PAGESIZE
87}; 85};
88 86
89#define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE) 87#define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE)
90 88
91static int set_brk(unsigned long start, unsigned long end) 89static int set_brk(unsigned long start, unsigned long end)
92{ 90{
@@ -104,13 +102,11 @@ static int set_brk(unsigned long start, unsigned long end)
104 return 0; 102 return 0;
105} 103}
106 104
107
108/* We need to explicitly zero any fractional pages 105/* We need to explicitly zero any fractional pages
109 after the data section (i.e. bss). This would 106 after the data section (i.e. bss). This would
110 contain the junk from the file that should not 107 contain the junk from the file that should not
111 be in memory */ 108 be in memory
112 109 */
113
114static int padzero(unsigned long elf_bss) 110static int padzero(unsigned long elf_bss)
115{ 111{
116 unsigned long nbyte; 112 unsigned long nbyte;
@@ -129,7 +125,9 @@ static int padzero(unsigned long elf_bss)
129#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items)) 125#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
130#define STACK_ROUND(sp, items) \ 126#define STACK_ROUND(sp, items) \
131 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL) 127 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
132#define STACK_ALLOC(sp, len) ({ elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; old_sp; }) 128#define STACK_ALLOC(sp, len) ({ \
129 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
130 old_sp; })
133#else 131#else
134#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items)) 132#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
135#define STACK_ROUND(sp, items) \ 133#define STACK_ROUND(sp, items) \
@@ -138,7 +136,7 @@ static int padzero(unsigned long elf_bss)
138#endif 136#endif
139 137
140static int 138static int
141create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec, 139create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
142 int interp_aout, unsigned long load_addr, 140 int interp_aout, unsigned long load_addr,
143 unsigned long interp_load_addr) 141 unsigned long interp_load_addr)
144{ 142{
@@ -161,7 +159,6 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
161 * for userspace to get any other way, in others (i386) it is 159 * for userspace to get any other way, in others (i386) it is
162 * merely difficult. 160 * merely difficult.
163 */ 161 */
164
165 u_platform = NULL; 162 u_platform = NULL;
166 if (k_platform) { 163 if (k_platform) {
167 size_t len = strlen(k_platform) + 1; 164 size_t len = strlen(k_platform) + 1;
@@ -171,7 +168,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
171 * evictions by the processes running on the same package. One 168 * evictions by the processes running on the same package. One
172 * thing we can do is to shuffle the initial stack for them. 169 * thing we can do is to shuffle the initial stack for them.
173 */ 170 */
174 171
175 p = arch_align_stack(p); 172 p = arch_align_stack(p);
176 173
177 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len); 174 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
@@ -180,9 +177,12 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
180 } 177 }
181 178
182 /* Create the ELF interpreter info */ 179 /* Create the ELF interpreter info */
183 elf_info = (elf_addr_t *) current->mm->saved_auxv; 180 elf_info = (elf_addr_t *)current->mm->saved_auxv;
184#define NEW_AUX_ENT(id, val) \ 181#define NEW_AUX_ENT(id, val) \
185 do { elf_info[ei_index++] = id; elf_info[ei_index++] = val; } while (0) 182 do { \
183 elf_info[ei_index++] = id; \
184 elf_info[ei_index++] = val; \
185 } while (0)
186 186
187#ifdef ARCH_DLINFO 187#ifdef ARCH_DLINFO
188 /* 188 /*
@@ -195,21 +195,22 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
195 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE); 195 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
196 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC); 196 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
197 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff); 197 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
198 NEW_AUX_ENT(AT_PHENT, sizeof (struct elf_phdr)); 198 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
199 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum); 199 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
200 NEW_AUX_ENT(AT_BASE, interp_load_addr); 200 NEW_AUX_ENT(AT_BASE, interp_load_addr);
201 NEW_AUX_ENT(AT_FLAGS, 0); 201 NEW_AUX_ENT(AT_FLAGS, 0);
202 NEW_AUX_ENT(AT_ENTRY, exec->e_entry); 202 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
203 NEW_AUX_ENT(AT_UID, (elf_addr_t) tsk->uid); 203 NEW_AUX_ENT(AT_UID, tsk->uid);
204 NEW_AUX_ENT(AT_EUID, (elf_addr_t) tsk->euid); 204 NEW_AUX_ENT(AT_EUID, tsk->euid);
205 NEW_AUX_ENT(AT_GID, (elf_addr_t) tsk->gid); 205 NEW_AUX_ENT(AT_GID, tsk->gid);
206 NEW_AUX_ENT(AT_EGID, (elf_addr_t) tsk->egid); 206 NEW_AUX_ENT(AT_EGID, tsk->egid);
207 NEW_AUX_ENT(AT_SECURE, (elf_addr_t) security_bprm_secureexec(bprm)); 207 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
208 if (k_platform) { 208 if (k_platform) {
209 NEW_AUX_ENT(AT_PLATFORM, (elf_addr_t)(unsigned long)u_platform); 209 NEW_AUX_ENT(AT_PLATFORM,
210 (elf_addr_t)(unsigned long)u_platform);
210 } 211 }
211 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) { 212 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
212 NEW_AUX_ENT(AT_EXECFD, (elf_addr_t) bprm->interp_data); 213 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
213 } 214 }
214#undef NEW_AUX_ENT 215#undef NEW_AUX_ENT
215 /* AT_NULL is zero; clear the rest too */ 216 /* AT_NULL is zero; clear the rest too */
@@ -232,7 +233,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
232 /* Point sp at the lowest address on the stack */ 233 /* Point sp at the lowest address on the stack */
233#ifdef CONFIG_STACK_GROWSUP 234#ifdef CONFIG_STACK_GROWSUP
234 sp = (elf_addr_t __user *)bprm->p - items - ei_index; 235 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
235 bprm->exec = (unsigned long) sp; /* XXX: PARISC HACK */ 236 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
236#else 237#else
237 sp = (elf_addr_t __user *)bprm->p; 238 sp = (elf_addr_t __user *)bprm->p;
238#endif 239#endif
@@ -285,7 +286,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
285#ifndef elf_map 286#ifndef elf_map
286 287
287static unsigned long elf_map(struct file *filep, unsigned long addr, 288static unsigned long elf_map(struct file *filep, unsigned long addr,
288 struct elf_phdr *eppnt, int prot, int type) 289 struct elf_phdr *eppnt, int prot, int type)
289{ 290{
290 unsigned long map_addr; 291 unsigned long map_addr;
291 unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr); 292 unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
@@ -310,9 +311,8 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
310 is only provided so that we can read a.out libraries that have 311 is only provided so that we can read a.out libraries that have
311 an ELF header */ 312 an ELF header */
312 313
313static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex, 314static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
314 struct file * interpreter, 315 struct file *interpreter, unsigned long *interp_load_addr)
315 unsigned long *interp_load_addr)
316{ 316{
317 struct elf_phdr *elf_phdata; 317 struct elf_phdr *elf_phdata;
318 struct elf_phdr *eppnt; 318 struct elf_phdr *eppnt;
@@ -342,15 +342,15 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
342 goto out; 342 goto out;
343 343
344 /* Now read in all of the header information */ 344 /* Now read in all of the header information */
345
346 size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum; 345 size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
347 if (size > ELF_MIN_ALIGN) 346 if (size > ELF_MIN_ALIGN)
348 goto out; 347 goto out;
349 elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL); 348 elf_phdata = kmalloc(size, GFP_KERNEL);
350 if (!elf_phdata) 349 if (!elf_phdata)
351 goto out; 350 goto out;
352 351
353 retval = kernel_read(interpreter,interp_elf_ex->e_phoff,(char *)elf_phdata,size); 352 retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
353 (char *)elf_phdata,size);
354 error = -EIO; 354 error = -EIO;
355 if (retval != size) { 355 if (retval != size) {
356 if (retval < 0) 356 if (retval < 0)
@@ -359,58 +359,65 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
359 } 359 }
360 360
361 eppnt = elf_phdata; 361 eppnt = elf_phdata;
362 for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) { 362 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
363 if (eppnt->p_type == PT_LOAD) { 363 if (eppnt->p_type == PT_LOAD) {
364 int elf_type = MAP_PRIVATE | MAP_DENYWRITE; 364 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
365 int elf_prot = 0; 365 int elf_prot = 0;
366 unsigned long vaddr = 0; 366 unsigned long vaddr = 0;
367 unsigned long k, map_addr; 367 unsigned long k, map_addr;
368 368
369 if (eppnt->p_flags & PF_R) elf_prot = PROT_READ; 369 if (eppnt->p_flags & PF_R)
370 if (eppnt->p_flags & PF_W) elf_prot |= PROT_WRITE; 370 elf_prot = PROT_READ;
371 if (eppnt->p_flags & PF_X) elf_prot |= PROT_EXEC; 371 if (eppnt->p_flags & PF_W)
372 vaddr = eppnt->p_vaddr; 372 elf_prot |= PROT_WRITE;
373 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) 373 if (eppnt->p_flags & PF_X)
374 elf_type |= MAP_FIXED; 374 elf_prot |= PROT_EXEC;
375 375 vaddr = eppnt->p_vaddr;
376 map_addr = elf_map(interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type); 376 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
377 error = map_addr; 377 elf_type |= MAP_FIXED;
378 if (BAD_ADDR(map_addr)) 378
379 goto out_close; 379 map_addr = elf_map(interpreter, load_addr + vaddr,
380 380 eppnt, elf_prot, elf_type);
381 if (!load_addr_set && interp_elf_ex->e_type == ET_DYN) { 381 error = map_addr;
382 load_addr = map_addr - ELF_PAGESTART(vaddr); 382 if (BAD_ADDR(map_addr))
383 load_addr_set = 1; 383 goto out_close;
384 } 384
385 385 if (!load_addr_set &&
386 /* 386 interp_elf_ex->e_type == ET_DYN) {
387 * Check to see if the section's size will overflow the 387 load_addr = map_addr - ELF_PAGESTART(vaddr);
388 * allowed task size. Note that p_filesz must always be 388 load_addr_set = 1;
389 * <= p_memsize so it is only necessary to check p_memsz. 389 }
390 */ 390
391 k = load_addr + eppnt->p_vaddr; 391 /*
392 if (k > TASK_SIZE || eppnt->p_filesz > eppnt->p_memsz || 392 * Check to see if the section's size will overflow the
393 eppnt->p_memsz > TASK_SIZE || TASK_SIZE - eppnt->p_memsz < k) { 393 * allowed task size. Note that p_filesz must always be
394 error = -ENOMEM; 394 * <= p_memsize so it's only necessary to check p_memsz.
395 goto out_close; 395 */
396 } 396 k = load_addr + eppnt->p_vaddr;
397 397 if (k > TASK_SIZE ||
398 /* 398 eppnt->p_filesz > eppnt->p_memsz ||
399 * Find the end of the file mapping for this phdr, and keep 399 eppnt->p_memsz > TASK_SIZE ||
400 * track of the largest address we see for this. 400 TASK_SIZE - eppnt->p_memsz < k) {
401 */ 401 error = -ENOMEM;
402 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz; 402 goto out_close;
403 if (k > elf_bss) 403 }
404 elf_bss = k; 404
405 405 /*
406 /* 406 * Find the end of the file mapping for this phdr, and
407 * Do the same thing for the memory mapping - between 407 * keep track of the largest address we see for this.
408 * elf_bss and last_bss is the bss section. 408 */
409 */ 409 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
410 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr; 410 if (k > elf_bss)
411 if (k > last_bss) 411 elf_bss = k;
412 last_bss = k; 412
413 } 413 /*
414 * Do the same thing for the memory mapping - between
415 * elf_bss and last_bss is the bss section.
416 */
417 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
418 if (k > last_bss)
419 last_bss = k;
420 }
414 } 421 }
415 422
416 /* 423 /*
@@ -424,7 +431,8 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
424 goto out_close; 431 goto out_close;
425 } 432 }
426 433
427 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); /* What we have mapped so far */ 434 /* What we have mapped so far */
435 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
428 436
429 /* Map the last of the bss segment */ 437 /* Map the last of the bss segment */
430 if (last_bss > elf_bss) { 438 if (last_bss > elf_bss) {
@@ -436,7 +444,7 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
436 } 444 }
437 445
438 *interp_load_addr = load_addr; 446 *interp_load_addr = load_addr;
439 error = ((unsigned long) interp_elf_ex->e_entry) + load_addr; 447 error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
440 448
441out_close: 449out_close:
442 kfree(elf_phdata); 450 kfree(elf_phdata);
@@ -444,8 +452,8 @@ out:
444 return error; 452 return error;
445} 453}
446 454
447static unsigned long load_aout_interp(struct exec * interp_ex, 455static unsigned long load_aout_interp(struct exec *interp_ex,
448 struct file * interpreter) 456 struct file *interpreter)
449{ 457{
450 unsigned long text_data, elf_entry = ~0UL; 458 unsigned long text_data, elf_entry = ~0UL;
451 char __user * addr; 459 char __user * addr;
@@ -464,7 +472,7 @@ static unsigned long load_aout_interp(struct exec * interp_ex,
464 case ZMAGIC: 472 case ZMAGIC:
465 case QMAGIC: 473 case QMAGIC:
466 offset = N_TXTOFF(*interp_ex); 474 offset = N_TXTOFF(*interp_ex);
467 addr = (char __user *) N_TXTADDR(*interp_ex); 475 addr = (char __user *)N_TXTADDR(*interp_ex);
468 break; 476 break;
469 default: 477 default:
470 goto out; 478 goto out;
@@ -480,7 +488,6 @@ static unsigned long load_aout_interp(struct exec * interp_ex,
480 flush_icache_range((unsigned long)addr, 488 flush_icache_range((unsigned long)addr,
481 (unsigned long)addr + text_data); 489 (unsigned long)addr + text_data);
482 490
483
484 down_write(&current->mm->mmap_sem); 491 down_write(&current->mm->mmap_sem);
485 do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1), 492 do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
486 interp_ex->a_bss); 493 interp_ex->a_bss);
@@ -519,7 +526,7 @@ static unsigned long randomize_stack_top(unsigned long stack_top)
519#endif 526#endif
520} 527}
521 528
522static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) 529static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
523{ 530{
524 struct file *interpreter = NULL; /* to shut gcc up */ 531 struct file *interpreter = NULL; /* to shut gcc up */
525 unsigned long load_addr = 0, load_bias = 0; 532 unsigned long load_addr = 0, load_bias = 0;
@@ -528,7 +535,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
528 unsigned int interpreter_type = INTERPRETER_NONE; 535 unsigned int interpreter_type = INTERPRETER_NONE;
529 unsigned char ibcs2_interpreter = 0; 536 unsigned char ibcs2_interpreter = 0;
530 unsigned long error; 537 unsigned long error;
531 struct elf_phdr * elf_ppnt, *elf_phdata; 538 struct elf_phdr *elf_ppnt, *elf_phdata;
532 unsigned long elf_bss, elf_brk; 539 unsigned long elf_bss, elf_brk;
533 int elf_exec_fileno; 540 int elf_exec_fileno;
534 int retval, i; 541 int retval, i;
@@ -553,7 +560,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
553 } 560 }
554 561
555 /* Get the exec-header */ 562 /* Get the exec-header */
556 loc->elf_ex = *((struct elfhdr *) bprm->buf); 563 loc->elf_ex = *((struct elfhdr *)bprm->buf);
557 564
558 retval = -ENOEXEC; 565 retval = -ENOEXEC;
559 /* First of all, some simple consistency checks */ 566 /* First of all, some simple consistency checks */
@@ -568,7 +575,6 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
568 goto out; 575 goto out;
569 576
570 /* Now read in all of the header information */ 577 /* Now read in all of the header information */
571
572 if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr)) 578 if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
573 goto out; 579 goto out;
574 if (loc->elf_ex.e_phnum < 1 || 580 if (loc->elf_ex.e_phnum < 1 ||
@@ -576,18 +582,19 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
576 goto out; 582 goto out;
577 size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr); 583 size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
578 retval = -ENOMEM; 584 retval = -ENOMEM;
579 elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL); 585 elf_phdata = kmalloc(size, GFP_KERNEL);
580 if (!elf_phdata) 586 if (!elf_phdata)
581 goto out; 587 goto out;
582 588
583 retval = kernel_read(bprm->file, loc->elf_ex.e_phoff, (char *) elf_phdata, size); 589 retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
590 (char *)elf_phdata, size);
584 if (retval != size) { 591 if (retval != size) {
585 if (retval >= 0) 592 if (retval >= 0)
586 retval = -EIO; 593 retval = -EIO;
587 goto out_free_ph; 594 goto out_free_ph;
588 } 595 }
589 596
590 files = current->files; /* Refcounted so ok */ 597 files = current->files; /* Refcounted so ok */
591 retval = unshare_files(); 598 retval = unshare_files();
592 if (retval < 0) 599 if (retval < 0)
593 goto out_free_ph; 600 goto out_free_ph;
@@ -598,7 +605,6 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
598 605
599 /* exec will make our files private anyway, but for the a.out 606 /* exec will make our files private anyway, but for the a.out
600 loader stuff we need to do it earlier */ 607 loader stuff we need to do it earlier */
601
602 retval = get_unused_fd(); 608 retval = get_unused_fd();
603 if (retval < 0) 609 if (retval < 0)
604 goto out_free_fh; 610 goto out_free_fh;
@@ -620,7 +626,6 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
620 * shared libraries - for now assume that this 626 * shared libraries - for now assume that this
621 * is an a.out format binary 627 * is an a.out format binary
622 */ 628 */
623
624 retval = -ENOEXEC; 629 retval = -ENOEXEC;
625 if (elf_ppnt->p_filesz > PATH_MAX || 630 if (elf_ppnt->p_filesz > PATH_MAX ||
626 elf_ppnt->p_filesz < 2) 631 elf_ppnt->p_filesz < 2)
@@ -628,13 +633,13 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
628 633
629 retval = -ENOMEM; 634 retval = -ENOMEM;
630 elf_interpreter = kmalloc(elf_ppnt->p_filesz, 635 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
631 GFP_KERNEL); 636 GFP_KERNEL);
632 if (!elf_interpreter) 637 if (!elf_interpreter)
633 goto out_free_file; 638 goto out_free_file;
634 639
635 retval = kernel_read(bprm->file, elf_ppnt->p_offset, 640 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
636 elf_interpreter, 641 elf_interpreter,
637 elf_ppnt->p_filesz); 642 elf_ppnt->p_filesz);
638 if (retval != elf_ppnt->p_filesz) { 643 if (retval != elf_ppnt->p_filesz) {
639 if (retval >= 0) 644 if (retval >= 0)
640 retval = -EIO; 645 retval = -EIO;
@@ -678,7 +683,8 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
678 retval = PTR_ERR(interpreter); 683 retval = PTR_ERR(interpreter);
679 if (IS_ERR(interpreter)) 684 if (IS_ERR(interpreter))
680 goto out_free_interp; 685 goto out_free_interp;
681 retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE); 686 retval = kernel_read(interpreter, 0, bprm->buf,
687 BINPRM_BUF_SIZE);
682 if (retval != BINPRM_BUF_SIZE) { 688 if (retval != BINPRM_BUF_SIZE) {
683 if (retval >= 0) 689 if (retval >= 0)
684 retval = -EIO; 690 retval = -EIO;
@@ -686,8 +692,8 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
686 } 692 }
687 693
688 /* Get the exec headers */ 694 /* Get the exec headers */
689 loc->interp_ex = *((struct exec *) bprm->buf); 695 loc->interp_ex = *((struct exec *)bprm->buf);
690 loc->interp_elf_ex = *((struct elfhdr *) bprm->buf); 696 loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
691 break; 697 break;
692 } 698 }
693 elf_ppnt++; 699 elf_ppnt++;
@@ -739,7 +745,6 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
739 745
740 /* OK, we are done with that, now set up the arg stuff, 746 /* OK, we are done with that, now set up the arg stuff,
741 and then start this sucker up */ 747 and then start this sucker up */
742
743 if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) { 748 if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
744 char *passed_p = passed_fileno; 749 char *passed_p = passed_fileno;
745 sprintf(passed_fileno, "%d", elf_exec_fileno); 750 sprintf(passed_fileno, "%d", elf_exec_fileno);
@@ -759,7 +764,6 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
759 764
760 /* Discard our unneeded old files struct */ 765 /* Discard our unneeded old files struct */
761 if (files) { 766 if (files) {
762 steal_locks(files);
763 put_files_struct(files); 767 put_files_struct(files);
764 files = NULL; 768 files = NULL;
765 } 769 }
@@ -778,7 +782,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
778 if (elf_read_implies_exec(loc->elf_ex, executable_stack)) 782 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
779 current->personality |= READ_IMPLIES_EXEC; 783 current->personality |= READ_IMPLIES_EXEC;
780 784
781 if ( !(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) 785 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
782 current->flags |= PF_RANDOMIZE; 786 current->flags |= PF_RANDOMIZE;
783 arch_pick_mmap_layout(current->mm); 787 arch_pick_mmap_layout(current->mm);
784 788
@@ -799,8 +803,8 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
799 the correct location in memory. At this point, we assume that 803 the correct location in memory. At this point, we assume that
800 the image should be loaded at fixed address, not at a variable 804 the image should be loaded at fixed address, not at a variable
801 address. */ 805 address. */
802 806 for(i = 0, elf_ppnt = elf_phdata;
803 for(i = 0, elf_ppnt = elf_phdata; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { 807 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
804 int elf_prot = 0, elf_flags; 808 int elf_prot = 0, elf_flags;
805 unsigned long k, vaddr; 809 unsigned long k, vaddr;
806 810
@@ -828,30 +832,35 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
828 load_bias, nbyte)) { 832 load_bias, nbyte)) {
829 /* 833 /*
830 * This bss-zeroing can fail if the ELF 834 * This bss-zeroing can fail if the ELF
831 * file specifies odd protections. So 835 * file specifies odd protections. So
832 * we don't check the return value 836 * we don't check the return value
833 */ 837 */
834 } 838 }
835 } 839 }
836 } 840 }
837 841
838 if (elf_ppnt->p_flags & PF_R) elf_prot |= PROT_READ; 842 if (elf_ppnt->p_flags & PF_R)
839 if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE; 843 elf_prot |= PROT_READ;
840 if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC; 844 if (elf_ppnt->p_flags & PF_W)
845 elf_prot |= PROT_WRITE;
846 if (elf_ppnt->p_flags & PF_X)
847 elf_prot |= PROT_EXEC;
841 848
842 elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE; 849 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
843 850
844 vaddr = elf_ppnt->p_vaddr; 851 vaddr = elf_ppnt->p_vaddr;
845 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) { 852 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
846 elf_flags |= MAP_FIXED; 853 elf_flags |= MAP_FIXED;
847 } else if (loc->elf_ex.e_type == ET_DYN) { 854 } else if (loc->elf_ex.e_type == ET_DYN) {
848 /* Try and get dynamic programs out of the way of the default mmap 855 /* Try and get dynamic programs out of the way of the
849 base, as well as whatever program they might try to exec. This 856 * default mmap base, as well as whatever program they
850 is because the brk will follow the loader, and is not movable. */ 857 * might try to exec. This is because the brk will
858 * follow the loader, and is not movable. */
851 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); 859 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
852 } 860 }
853 861
854 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags); 862 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
863 elf_prot, elf_flags);
855 if (BAD_ADDR(error)) { 864 if (BAD_ADDR(error)) {
856 send_sig(SIGKILL, current, 0); 865 send_sig(SIGKILL, current, 0);
857 goto out_free_dentry; 866 goto out_free_dentry;
@@ -868,8 +877,10 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
868 } 877 }
869 } 878 }
870 k = elf_ppnt->p_vaddr; 879 k = elf_ppnt->p_vaddr;
871 if (k < start_code) start_code = k; 880 if (k < start_code)
872 if (start_data < k) start_data = k; 881 start_code = k;
882 if (start_data < k)
883 start_data = k;
873 884
874 /* 885 /*
875 * Check to see if the section's size will overflow the 886 * Check to see if the section's size will overflow the
@@ -879,7 +890,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
879 if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz || 890 if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
880 elf_ppnt->p_memsz > TASK_SIZE || 891 elf_ppnt->p_memsz > TASK_SIZE ||
881 TASK_SIZE - elf_ppnt->p_memsz < k) { 892 TASK_SIZE - elf_ppnt->p_memsz < k) {
882 /* set_brk can never work. Avoid overflows. */ 893 /* set_brk can never work. Avoid overflows. */
883 send_sig(SIGKILL, current, 0); 894 send_sig(SIGKILL, current, 0);
884 goto out_free_dentry; 895 goto out_free_dentry;
885 } 896 }
@@ -967,8 +978,9 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
967 978
968 compute_creds(bprm); 979 compute_creds(bprm);
969 current->flags &= ~PF_FORKNOEXEC; 980 current->flags &= ~PF_FORKNOEXEC;
970 create_elf_tables(bprm, &loc->elf_ex, (interpreter_type == INTERPRETER_AOUT), 981 create_elf_tables(bprm, &loc->elf_ex,
971 load_addr, interp_load_addr); 982 (interpreter_type == INTERPRETER_AOUT),
983 load_addr, interp_load_addr);
972 /* N.B. passed_fileno might not be initialized? */ 984 /* N.B. passed_fileno might not be initialized? */
973 if (interpreter_type == INTERPRETER_AOUT) 985 if (interpreter_type == INTERPRETER_AOUT)
974 current->mm->arg_start += strlen(passed_fileno) + 1; 986 current->mm->arg_start += strlen(passed_fileno) + 1;
@@ -982,7 +994,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
982 /* Why this, you ask??? Well SVr4 maps page 0 as read-only, 994 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
983 and some applications "depend" upon this behavior. 995 and some applications "depend" upon this behavior.
984 Since we do not have the power to recompile these, we 996 Since we do not have the power to recompile these, we
985 emulate the SVr4 behavior. Sigh. */ 997 emulate the SVr4 behavior. Sigh. */
986 down_write(&current->mm->mmap_sem); 998 down_write(&current->mm->mmap_sem);
987 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC, 999 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
988 MAP_FIXED | MAP_PRIVATE, 0); 1000 MAP_FIXED | MAP_PRIVATE, 0);
@@ -1037,7 +1049,6 @@ out_free_ph:
1037 1049
1038/* This is really simpleminded and specialized - we are loading an 1050/* This is really simpleminded and specialized - we are loading an
1039 a.out library that is given an ELF header. */ 1051 a.out library that is given an ELF header. */
1040
1041static int load_elf_library(struct file *file) 1052static int load_elf_library(struct file *file)
1042{ 1053{
1043 struct elf_phdr *elf_phdata; 1054 struct elf_phdr *elf_phdata;
@@ -1047,7 +1058,7 @@ static int load_elf_library(struct file *file)
1047 struct elfhdr elf_ex; 1058 struct elfhdr elf_ex;
1048 1059
1049 error = -ENOEXEC; 1060 error = -ENOEXEC;
1050 retval = kernel_read(file, 0, (char *) &elf_ex, sizeof(elf_ex)); 1061 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1051 if (retval != sizeof(elf_ex)) 1062 if (retval != sizeof(elf_ex))
1052 goto out; 1063 goto out;
1053 1064
@@ -1056,7 +1067,7 @@ static int load_elf_library(struct file *file)
1056 1067
1057 /* First of all, some simple consistency checks */ 1068 /* First of all, some simple consistency checks */
1058 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 || 1069 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1059 !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap) 1070 !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1060 goto out; 1071 goto out;
1061 1072
1062 /* Now read in all of the header information */ 1073 /* Now read in all of the header information */
@@ -1104,7 +1115,8 @@ static int load_elf_library(struct file *file)
1104 goto out_free_ph; 1115 goto out_free_ph;
1105 } 1116 }
1106 1117
1107 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + ELF_MIN_ALIGN - 1); 1118 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1119 ELF_MIN_ALIGN - 1);
1108 bss = eppnt->p_memsz + eppnt->p_vaddr; 1120 bss = eppnt->p_memsz + eppnt->p_vaddr;
1109 if (bss > len) { 1121 if (bss > len) {
1110 down_write(&current->mm->mmap_sem); 1122 down_write(&current->mm->mmap_sem);
@@ -1163,7 +1175,7 @@ static int maydump(struct vm_area_struct *vma)
1163 if (vma->vm_flags & (VM_IO | VM_RESERVED)) 1175 if (vma->vm_flags & (VM_IO | VM_RESERVED))
1164 return 0; 1176 return 0;
1165 1177
1166 /* Dump shared memory only if mapped from an anonymous file. */ 1178 /* Dump shared memory only if mapped from an anonymous file. */
1167 if (vma->vm_flags & VM_SHARED) 1179 if (vma->vm_flags & VM_SHARED)
1168 return vma->vm_file->f_dentry->d_inode->i_nlink == 0; 1180 return vma->vm_file->f_dentry->d_inode->i_nlink == 0;
1169 1181
@@ -1174,7 +1186,7 @@ static int maydump(struct vm_area_struct *vma)
1174 return 1; 1186 return 1;
1175} 1187}
1176 1188
1177#define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) 1189#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
1178 1190
1179/* An ELF note in memory */ 1191/* An ELF note in memory */
1180struct memelfnote 1192struct memelfnote
@@ -1277,11 +1289,11 @@ static void fill_note(struct memelfnote *note, const char *name, int type,
1277} 1289}
1278 1290
1279/* 1291/*
1280 * fill up all the fields in prstatus from the given task struct, except registers 1292 * fill up all the fields in prstatus from the given task struct, except
1281 * which need to be filled up separately. 1293 * registers which need to be filled up separately.
1282 */ 1294 */
1283static void fill_prstatus(struct elf_prstatus *prstatus, 1295static void fill_prstatus(struct elf_prstatus *prstatus,
1284 struct task_struct *p, long signr) 1296 struct task_struct *p, long signr)
1285{ 1297{
1286 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; 1298 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1287 prstatus->pr_sigpend = p->pending.signal.sig[0]; 1299 prstatus->pr_sigpend = p->pending.signal.sig[0];
@@ -1366,8 +1378,8 @@ struct elf_thread_status
1366 1378
1367/* 1379/*
1368 * In order to add the specific thread information for the elf file format, 1380 * In order to add the specific thread information for the elf file format,
1369 * we need to keep a linked list of every threads pr_status and then 1381 * we need to keep a linked list of every threads pr_status and then create
1370 * create a single section for them in the final core file. 1382 * a single section for them in the final core file.
1371 */ 1383 */
1372static int elf_dump_thread_status(long signr, struct elf_thread_status *t) 1384static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1373{ 1385{
@@ -1378,19 +1390,23 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1378 fill_prstatus(&t->prstatus, p, signr); 1390 fill_prstatus(&t->prstatus, p, signr);
1379 elf_core_copy_task_regs(p, &t->prstatus.pr_reg); 1391 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1380 1392
1381 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), &(t->prstatus)); 1393 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1394 &(t->prstatus));
1382 t->num_notes++; 1395 t->num_notes++;
1383 sz += notesize(&t->notes[0]); 1396 sz += notesize(&t->notes[0]);
1384 1397
1385 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL, &t->fpu))) { 1398 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1386 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu), &(t->fpu)); 1399 &t->fpu))) {
1400 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1401 &(t->fpu));
1387 t->num_notes++; 1402 t->num_notes++;
1388 sz += notesize(&t->notes[1]); 1403 sz += notesize(&t->notes[1]);
1389 } 1404 }
1390 1405
1391#ifdef ELF_CORE_COPY_XFPREGS 1406#ifdef ELF_CORE_COPY_XFPREGS
1392 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) { 1407 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1393 fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu), &t->xfpu); 1408 fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1409 &t->xfpu);
1394 t->num_notes++; 1410 t->num_notes++;
1395 sz += notesize(&t->notes[2]); 1411 sz += notesize(&t->notes[2]);
1396 } 1412 }
@@ -1405,7 +1421,7 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1405 * and then they are actually written out. If we run out of core limit 1421 * and then they are actually written out. If we run out of core limit
1406 * we just truncate. 1422 * we just truncate.
1407 */ 1423 */
1408static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file) 1424static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1409{ 1425{
1410#define NUM_NOTES 6 1426#define NUM_NOTES 6
1411 int has_dumped = 0; 1427 int has_dumped = 0;
@@ -1434,12 +1450,12 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
1434 /* 1450 /*
1435 * We no longer stop all VM operations. 1451 * We no longer stop all VM operations.
1436 * 1452 *
1437 * This is because those proceses that could possibly change map_count or 1453 * This is because those proceses that could possibly change map_count
1438 * the mmap / vma pages are now blocked in do_exit on current finishing 1454 * or the mmap / vma pages are now blocked in do_exit on current
1439 * this core dump. 1455 * finishing this core dump.
1440 * 1456 *
1441 * Only ptrace can touch these memory addresses, but it doesn't change 1457 * Only ptrace can touch these memory addresses, but it doesn't change
1442 * the map_count or the pages allocated. So no possibility of crashing 1458 * the map_count or the pages allocated. So no possibility of crashing
1443 * exists while dumping the mm->vm_next areas to the core file. 1459 * exists while dumping the mm->vm_next areas to the core file.
1444 */ 1460 */
1445 1461
@@ -1501,7 +1517,7 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
1501#endif 1517#endif
1502 1518
1503 /* Set up header */ 1519 /* Set up header */
1504 fill_elf_header(elf, segs+1); /* including notes section */ 1520 fill_elf_header(elf, segs + 1); /* including notes section */
1505 1521
1506 has_dumped = 1; 1522 has_dumped = 1;
1507 current->flags |= PF_DUMPCORE; 1523 current->flags |= PF_DUMPCORE;
@@ -1511,24 +1527,24 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
1511 * with info from their /proc. 1527 * with info from their /proc.
1512 */ 1528 */
1513 1529
1514 fill_note(notes +0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus); 1530 fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1515
1516 fill_psinfo(psinfo, current->group_leader, current->mm); 1531 fill_psinfo(psinfo, current->group_leader, current->mm);
1517 fill_note(notes +1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); 1532 fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1518 1533
1519 numnote = 2; 1534 numnote = 2;
1520 1535
1521 auxv = (elf_addr_t *) current->mm->saved_auxv; 1536 auxv = (elf_addr_t *)current->mm->saved_auxv;
1522 1537
1523 i = 0; 1538 i = 0;
1524 do 1539 do
1525 i += 2; 1540 i += 2;
1526 while (auxv[i - 2] != AT_NULL); 1541 while (auxv[i - 2] != AT_NULL);
1527 fill_note(&notes[numnote++], "CORE", NT_AUXV, 1542 fill_note(&notes[numnote++], "CORE", NT_AUXV,
1528 i * sizeof (elf_addr_t), auxv); 1543 i * sizeof(elf_addr_t), auxv);
1529 1544
1530 /* Try to dump the FPU. */ 1545 /* Try to dump the FPU. */
1531 if ((prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, fpu))) 1546 if ((prstatus->pr_fpvalid =
1547 elf_core_copy_task_fpregs(current, regs, fpu)))
1532 fill_note(notes + numnote++, 1548 fill_note(notes + numnote++,
1533 "CORE", NT_PRFPREG, sizeof(*fpu), fpu); 1549 "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1534#ifdef ELF_CORE_COPY_XFPREGS 1550#ifdef ELF_CORE_COPY_XFPREGS
@@ -1577,8 +1593,10 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
1577 phdr.p_memsz = sz; 1593 phdr.p_memsz = sz;
1578 offset += phdr.p_filesz; 1594 offset += phdr.p_filesz;
1579 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0; 1595 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1580 if (vma->vm_flags & VM_WRITE) phdr.p_flags |= PF_W; 1596 if (vma->vm_flags & VM_WRITE)
1581 if (vma->vm_flags & VM_EXEC) phdr.p_flags |= PF_X; 1597 phdr.p_flags |= PF_W;
1598 if (vma->vm_flags & VM_EXEC)
1599 phdr.p_flags |= PF_X;
1582 phdr.p_align = ELF_EXEC_PAGESIZE; 1600 phdr.p_align = ELF_EXEC_PAGESIZE;
1583 1601
1584 DUMP_WRITE(&phdr, sizeof(phdr)); 1602 DUMP_WRITE(&phdr, sizeof(phdr));
@@ -1595,7 +1613,9 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
1595 1613
1596 /* write out the thread status notes section */ 1614 /* write out the thread status notes section */
1597 list_for_each(t, &thread_list) { 1615 list_for_each(t, &thread_list) {
1598 struct elf_thread_status *tmp = list_entry(t, struct elf_thread_status, list); 1616 struct elf_thread_status *tmp =
1617 list_entry(t, struct elf_thread_status, list);
1618
1599 for (i = 0; i < tmp->num_notes; i++) 1619 for (i = 0; i < tmp->num_notes; i++)
1600 if (!writenote(&tmp->notes[i], file)) 1620 if (!writenote(&tmp->notes[i], file))
1601 goto end_coredump; 1621 goto end_coredump;
@@ -1612,18 +1632,19 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
1612 for (addr = vma->vm_start; 1632 for (addr = vma->vm_start;
1613 addr < vma->vm_end; 1633 addr < vma->vm_end;
1614 addr += PAGE_SIZE) { 1634 addr += PAGE_SIZE) {
1615 struct page* page; 1635 struct page *page;
1616 struct vm_area_struct *vma; 1636 struct vm_area_struct *vma;
1617 1637
1618 if (get_user_pages(current, current->mm, addr, 1, 0, 1, 1638 if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1619 &page, &vma) <= 0) { 1639 &page, &vma) <= 0) {
1620 DUMP_SEEK (file->f_pos + PAGE_SIZE); 1640 DUMP_SEEK(file->f_pos + PAGE_SIZE);
1621 } else { 1641 } else {
1622 if (page == ZERO_PAGE(addr)) { 1642 if (page == ZERO_PAGE(addr)) {
1623 DUMP_SEEK (file->f_pos + PAGE_SIZE); 1643 DUMP_SEEK(file->f_pos + PAGE_SIZE);
1624 } else { 1644 } else {
1625 void *kaddr; 1645 void *kaddr;
1626 flush_cache_page(vma, addr, page_to_pfn(page)); 1646 flush_cache_page(vma, addr,
1647 page_to_pfn(page));
1627 kaddr = kmap(page); 1648 kaddr = kmap(page);
1628 if ((size += PAGE_SIZE) > limit || 1649 if ((size += PAGE_SIZE) > limit ||
1629 !dump_write(file, kaddr, 1650 !dump_write(file, kaddr,
@@ -1645,7 +1666,8 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
1645 1666
1646 if ((off_t)file->f_pos != offset) { 1667 if ((off_t)file->f_pos != offset) {
1647 /* Sanity check */ 1668 /* Sanity check */
1648 printk(KERN_WARNING "elf_core_dump: file->f_pos (%ld) != offset (%ld)\n", 1669 printk(KERN_WARNING
1670 "elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
1649 (off_t)file->f_pos, offset); 1671 (off_t)file->f_pos, offset);
1650 } 1672 }
1651 1673
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index a2e48c999c24..eba4e23b9ca0 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -435,9 +435,10 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
435 struct elf_fdpic_params *interp_params) 435 struct elf_fdpic_params *interp_params)
436{ 436{
437 unsigned long sp, csp, nitems; 437 unsigned long sp, csp, nitems;
438 elf_caddr_t *argv, *envp; 438 elf_caddr_t __user *argv, *envp;
439 size_t platform_len = 0, len; 439 size_t platform_len = 0, len;
440 char *k_platform, *u_platform, *p; 440 char *k_platform;
441 char __user *u_platform, *p;
441 long hwcap; 442 long hwcap;
442 int loop; 443 int loop;
443 444
@@ -462,12 +463,11 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
462 if (k_platform) { 463 if (k_platform) {
463 platform_len = strlen(k_platform) + 1; 464 platform_len = strlen(k_platform) + 1;
464 sp -= platform_len; 465 sp -= platform_len;
466 u_platform = (char __user *) sp;
465 if (__copy_to_user(u_platform, k_platform, platform_len) != 0) 467 if (__copy_to_user(u_platform, k_platform, platform_len) != 0)
466 return -EFAULT; 468 return -EFAULT;
467 } 469 }
468 470
469 u_platform = (char *) sp;
470
471#if defined(__i386__) && defined(CONFIG_SMP) 471#if defined(__i386__) && defined(CONFIG_SMP)
472 /* in some cases (e.g. Hyper-Threading), we want to avoid L1 evictions 472 /* in some cases (e.g. Hyper-Threading), we want to avoid L1 evictions
473 * by the processes running on the same package. One thing we can do 473 * by the processes running on the same package. One thing we can do
@@ -490,7 +490,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
490 sp = (sp - len) & ~7UL; 490 sp = (sp - len) & ~7UL;
491 exec_params->map_addr = sp; 491 exec_params->map_addr = sp;
492 492
493 if (copy_to_user((void *) sp, exec_params->loadmap, len) != 0) 493 if (copy_to_user((void __user *) sp, exec_params->loadmap, len) != 0)
494 return -EFAULT; 494 return -EFAULT;
495 495
496 current->mm->context.exec_fdpic_loadmap = (unsigned long) sp; 496 current->mm->context.exec_fdpic_loadmap = (unsigned long) sp;
@@ -501,7 +501,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
501 sp = (sp - len) & ~7UL; 501 sp = (sp - len) & ~7UL;
502 interp_params->map_addr = sp; 502 interp_params->map_addr = sp;
503 503
504 if (copy_to_user((void *) sp, interp_params->loadmap, len) != 0) 504 if (copy_to_user((void __user *) sp, interp_params->loadmap, len) != 0)
505 return -EFAULT; 505 return -EFAULT;
506 506
507 current->mm->context.interp_fdpic_loadmap = (unsigned long) sp; 507 current->mm->context.interp_fdpic_loadmap = (unsigned long) sp;
@@ -527,7 +527,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
527 /* put the ELF interpreter info on the stack */ 527 /* put the ELF interpreter info on the stack */
528#define NEW_AUX_ENT(nr, id, val) \ 528#define NEW_AUX_ENT(nr, id, val) \
529 do { \ 529 do { \
530 struct { unsigned long _id, _val; } *ent = (void *) csp; \ 530 struct { unsigned long _id, _val; } __user *ent = (void __user *) csp; \
531 __put_user((id), &ent[nr]._id); \ 531 __put_user((id), &ent[nr]._id); \
532 __put_user((val), &ent[nr]._val); \ 532 __put_user((val), &ent[nr]._val); \
533 } while (0) 533 } while (0)
@@ -564,13 +564,13 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
564 564
565 /* allocate room for argv[] and envv[] */ 565 /* allocate room for argv[] and envv[] */
566 csp -= (bprm->envc + 1) * sizeof(elf_caddr_t); 566 csp -= (bprm->envc + 1) * sizeof(elf_caddr_t);
567 envp = (elf_caddr_t *) csp; 567 envp = (elf_caddr_t __user *) csp;
568 csp -= (bprm->argc + 1) * sizeof(elf_caddr_t); 568 csp -= (bprm->argc + 1) * sizeof(elf_caddr_t);
569 argv = (elf_caddr_t *) csp; 569 argv = (elf_caddr_t __user *) csp;
570 570
571 /* stack argc */ 571 /* stack argc */
572 csp -= sizeof(unsigned long); 572 csp -= sizeof(unsigned long);
573 __put_user(bprm->argc, (unsigned long *) csp); 573 __put_user(bprm->argc, (unsigned long __user *) csp);
574 574
575 BUG_ON(csp != sp); 575 BUG_ON(csp != sp);
576 576
@@ -581,7 +581,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
581 current->mm->arg_start = current->mm->start_stack - (MAX_ARG_PAGES * PAGE_SIZE - bprm->p); 581 current->mm->arg_start = current->mm->start_stack - (MAX_ARG_PAGES * PAGE_SIZE - bprm->p);
582#endif 582#endif
583 583
584 p = (char *) current->mm->arg_start; 584 p = (char __user *) current->mm->arg_start;
585 for (loop = bprm->argc; loop > 0; loop--) { 585 for (loop = bprm->argc; loop > 0; loop--) {
586 __put_user((elf_caddr_t) p, argv++); 586 __put_user((elf_caddr_t) p, argv++);
587 len = strnlen_user(p, PAGE_SIZE * MAX_ARG_PAGES); 587 len = strnlen_user(p, PAGE_SIZE * MAX_ARG_PAGES);
@@ -1025,7 +1025,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
1025 /* clear the bit between beginning of mapping and beginning of PT_LOAD */ 1025 /* clear the bit between beginning of mapping and beginning of PT_LOAD */
1026 if (prot & PROT_WRITE && disp > 0) { 1026 if (prot & PROT_WRITE && disp > 0) {
1027 kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp); 1027 kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp);
1028 clear_user((void *) maddr, disp); 1028 clear_user((void __user *) maddr, disp);
1029 maddr += disp; 1029 maddr += disp;
1030 } 1030 }
1031 1031
@@ -1059,7 +1059,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
1059 if (prot & PROT_WRITE && excess1 > 0) { 1059 if (prot & PROT_WRITE && excess1 > 0) {
1060 kdebug("clear[%d] ad=%lx sz=%lx", 1060 kdebug("clear[%d] ad=%lx sz=%lx",
1061 loop, maddr + phdr->p_filesz, excess1); 1061 loop, maddr + phdr->p_filesz, excess1);
1062 clear_user((void *) maddr + phdr->p_filesz, excess1); 1062 clear_user((void __user *) maddr + phdr->p_filesz, excess1);
1063 } 1063 }
1064 1064
1065#else 1065#else
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index b1c902e319c1..a62fd4018a20 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -16,7 +16,6 @@
16 */ 16 */
17 17
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/config.h>
20#include <linux/kernel.h> 19#include <linux/kernel.h>
21#include <linux/sched.h> 20#include <linux/sched.h>
22#include <linux/mm.h> 21#include <linux/mm.h>
@@ -510,7 +509,7 @@ static int load_flat_file(struct linux_binprm * bprm,
510 } 509 }
511 510
512 /* OK, This is the point of no return */ 511 /* OK, This is the point of no return */
513 set_personality(PER_LINUX); 512 set_personality(PER_LINUX_32BIT);
514 } 513 }
515 514
516 /* 515 /*
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index d73d75591a39..34ebbc191e46 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -55,6 +55,7 @@ typedef struct {
55} Node; 55} Node;
56 56
57static DEFINE_RWLOCK(entries_lock); 57static DEFINE_RWLOCK(entries_lock);
58static struct file_system_type bm_fs_type;
58static struct vfsmount *bm_mnt; 59static struct vfsmount *bm_mnt;
59static int entry_count; 60static int entry_count;
60 61
@@ -203,7 +204,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
203 goto _error; 204 goto _error;
204 205
205 if (files) { 206 if (files) {
206 steal_locks(files);
207 put_files_struct(files); 207 put_files_struct(files);
208 files = NULL; 208 files = NULL;
209 } 209 }
@@ -638,7 +638,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
638 if (!inode) 638 if (!inode)
639 goto out2; 639 goto out2;
640 640
641 err = simple_pin_fs("binfmt_misc", &bm_mnt, &entry_count); 641 err = simple_pin_fs(&bm_fs_type, &bm_mnt, &entry_count);
642 if (err) { 642 if (err) {
643 iput(inode); 643 iput(inode);
644 inode = NULL; 644 inode = NULL;
@@ -740,10 +740,10 @@ static int bm_fill_super(struct super_block * sb, void * data, int silent)
740 return err; 740 return err;
741} 741}
742 742
743static struct super_block *bm_get_sb(struct file_system_type *fs_type, 743static int bm_get_sb(struct file_system_type *fs_type,
744 int flags, const char *dev_name, void *data) 744 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
745{ 745{
746 return get_sb_single(fs_type, flags, data, bm_fill_super); 746 return get_sb_single(fs_type, flags, data, bm_fill_super, mnt);
747} 747}
748 748
749static struct linux_binfmt misc_format = { 749static struct linux_binfmt misc_format = {
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index 00a91dc25d16..32b5d625ce9c 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -32,7 +32,6 @@
32#include <asm/uaccess.h> 32#include <asm/uaccess.h>
33#include <asm/pgtable.h> 33#include <asm/pgtable.h>
34 34
35#include <linux/config.h>
36 35
37#include <linux/elf.h> 36#include <linux/elf.h>
38 37
diff --git a/fs/block_dev.c b/fs/block_dev.c
index f5958f413bd1..9633a490dab0 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -5,14 +5,12 @@
5 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE 5 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
6 */ 6 */
7 7
8#include <linux/config.h>
9#include <linux/init.h> 8#include <linux/init.h>
10#include <linux/mm.h> 9#include <linux/mm.h>
11#include <linux/fcntl.h> 10#include <linux/fcntl.h>
12#include <linux/slab.h> 11#include <linux/slab.h>
13#include <linux/kmod.h> 12#include <linux/kmod.h>
14#include <linux/major.h> 13#include <linux/major.h>
15#include <linux/devfs_fs_kernel.h>
16#include <linux/smp_lock.h> 14#include <linux/smp_lock.h>
17#include <linux/highmem.h> 15#include <linux/highmem.h>
18#include <linux/blkdev.h> 16#include <linux/blkdev.h>
@@ -300,10 +298,10 @@ static struct super_operations bdev_sops = {
300 .clear_inode = bdev_clear_inode, 298 .clear_inode = bdev_clear_inode,
301}; 299};
302 300
303static struct super_block *bd_get_sb(struct file_system_type *fs_type, 301static int bd_get_sb(struct file_system_type *fs_type,
304 int flags, const char *dev_name, void *data) 302 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
305{ 303{
306 return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576); 304 return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt);
307} 305}
308 306
309static struct file_system_type bd_type = { 307static struct file_system_type bd_type = {
@@ -414,21 +412,31 @@ EXPORT_SYMBOL(bdput);
414static struct block_device *bd_acquire(struct inode *inode) 412static struct block_device *bd_acquire(struct inode *inode)
415{ 413{
416 struct block_device *bdev; 414 struct block_device *bdev;
415
417 spin_lock(&bdev_lock); 416 spin_lock(&bdev_lock);
418 bdev = inode->i_bdev; 417 bdev = inode->i_bdev;
419 if (bdev && igrab(bdev->bd_inode)) { 418 if (bdev) {
419 atomic_inc(&bdev->bd_inode->i_count);
420 spin_unlock(&bdev_lock); 420 spin_unlock(&bdev_lock);
421 return bdev; 421 return bdev;
422 } 422 }
423 spin_unlock(&bdev_lock); 423 spin_unlock(&bdev_lock);
424
424 bdev = bdget(inode->i_rdev); 425 bdev = bdget(inode->i_rdev);
425 if (bdev) { 426 if (bdev) {
426 spin_lock(&bdev_lock); 427 spin_lock(&bdev_lock);
427 if (inode->i_bdev) 428 if (!inode->i_bdev) {
428 __bd_forget(inode); 429 /*
429 inode->i_bdev = bdev; 430 * We take an additional bd_inode->i_count for inode,
430 inode->i_mapping = bdev->bd_inode->i_mapping; 431 * and it's released in clear_inode() of inode.
431 list_add(&inode->i_devices, &bdev->bd_inodes); 432 * So, we can access it via ->i_mapping always
433 * without igrab().
434 */
435 atomic_inc(&bdev->bd_inode->i_count);
436 inode->i_bdev = bdev;
437 inode->i_mapping = bdev->bd_inode->i_mapping;
438 list_add(&inode->i_devices, &bdev->bd_inodes);
439 }
432 spin_unlock(&bdev_lock); 440 spin_unlock(&bdev_lock);
433 } 441 }
434 return bdev; 442 return bdev;
@@ -438,10 +446,18 @@ static struct block_device *bd_acquire(struct inode *inode)
438 446
439void bd_forget(struct inode *inode) 447void bd_forget(struct inode *inode)
440{ 448{
449 struct block_device *bdev = NULL;
450
441 spin_lock(&bdev_lock); 451 spin_lock(&bdev_lock);
442 if (inode->i_bdev) 452 if (inode->i_bdev) {
453 if (inode->i_sb != blockdev_superblock)
454 bdev = inode->i_bdev;
443 __bd_forget(inode); 455 __bd_forget(inode);
456 }
444 spin_unlock(&bdev_lock); 457 spin_unlock(&bdev_lock);
458
459 if (bdev)
460 iput(bdev->bd_inode);
445} 461}
446 462
447int bd_claim(struct block_device *bdev, void *holder) 463int bd_claim(struct block_device *bdev, void *holder)
@@ -1077,7 +1093,7 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1077 return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); 1093 return blkdev_ioctl(file->f_mapping->host, file, cmd, arg);
1078} 1094}
1079 1095
1080struct address_space_operations def_blk_aops = { 1096const struct address_space_operations def_blk_aops = {
1081 .readpage = blkdev_readpage, 1097 .readpage = blkdev_readpage,
1082 .writepage = blkdev_writepage, 1098 .writepage = blkdev_writepage,
1083 .sync_page = block_sync_page, 1099 .sync_page = block_sync_page,
diff --git a/fs/buffer.c b/fs/buffer.c
index 23f1f3a68077..3660dcb97591 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -18,7 +18,6 @@
18 * async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de> 18 * async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de>
19 */ 19 */
20 20
21#include <linux/config.h>
22#include <linux/kernel.h> 21#include <linux/kernel.h>
23#include <linux/syscalls.h> 22#include <linux/syscalls.h>
24#include <linux/fs.h> 23#include <linux/fs.h>
@@ -331,7 +330,6 @@ long do_fsync(struct file *file, int datasync)
331 goto out; 330 goto out;
332 } 331 }
333 332
334 current->flags |= PF_SYNCWRITE;
335 ret = filemap_fdatawrite(mapping); 333 ret = filemap_fdatawrite(mapping);
336 334
337 /* 335 /*
@@ -346,7 +344,6 @@ long do_fsync(struct file *file, int datasync)
346 err = filemap_fdatawait(mapping); 344 err = filemap_fdatawait(mapping);
347 if (!ret) 345 if (!ret)
348 ret = err; 346 ret = err;
349 current->flags &= ~PF_SYNCWRITE;
350out: 347out:
351 return ret; 348 return ret;
352} 349}
@@ -566,7 +563,7 @@ still_busy:
566 * Completion handler for block_write_full_page() - pages which are unlocked 563 * Completion handler for block_write_full_page() - pages which are unlocked
567 * during I/O, and which have PageWriteback cleared upon I/O completion. 564 * during I/O, and which have PageWriteback cleared upon I/O completion.
568 */ 565 */
569void end_buffer_async_write(struct buffer_head *bh, int uptodate) 566static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
570{ 567{
571 char b[BDEVNAME_SIZE]; 568 char b[BDEVNAME_SIZE];
572 unsigned long flags; 569 unsigned long flags;
@@ -854,7 +851,7 @@ int __set_page_dirty_buffers(struct page *page)
854 write_lock_irq(&mapping->tree_lock); 851 write_lock_irq(&mapping->tree_lock);
855 if (page->mapping) { /* Race with truncate? */ 852 if (page->mapping) { /* Race with truncate? */
856 if (mapping_cap_account_dirty(mapping)) 853 if (mapping_cap_account_dirty(mapping))
857 inc_page_state(nr_dirty); 854 __inc_zone_page_state(page, NR_FILE_DIRTY);
858 radix_tree_tag_set(&mapping->page_tree, 855 radix_tree_tag_set(&mapping->page_tree,
859 page_index(page), 856 page_index(page),
860 PAGECACHE_TAG_DIRTY); 857 PAGECACHE_TAG_DIRTY);
@@ -2600,7 +2597,7 @@ int nobh_truncate_page(struct address_space *mapping, loff_t from)
2600 unsigned offset = from & (PAGE_CACHE_SIZE-1); 2597 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2601 unsigned to; 2598 unsigned to;
2602 struct page *page; 2599 struct page *page;
2603 struct address_space_operations *a_ops = mapping->a_ops; 2600 const struct address_space_operations *a_ops = mapping->a_ops;
2604 char *kaddr; 2601 char *kaddr;
2605 int ret = 0; 2602 int ret = 0;
2606 2603
@@ -3168,7 +3165,6 @@ EXPORT_SYMBOL(block_sync_page);
3168EXPORT_SYMBOL(block_truncate_page); 3165EXPORT_SYMBOL(block_truncate_page);
3169EXPORT_SYMBOL(block_write_full_page); 3166EXPORT_SYMBOL(block_write_full_page);
3170EXPORT_SYMBOL(cont_prepare_write); 3167EXPORT_SYMBOL(cont_prepare_write);
3171EXPORT_SYMBOL(end_buffer_async_write);
3172EXPORT_SYMBOL(end_buffer_read_sync); 3168EXPORT_SYMBOL(end_buffer_read_sync);
3173EXPORT_SYMBOL(end_buffer_write_sync); 3169EXPORT_SYMBOL(end_buffer_write_sync);
3174EXPORT_SYMBOL(file_fsync); 3170EXPORT_SYMBOL(file_fsync);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index f3418f7a6e9d..a4cbc6706ef0 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -4,7 +4,6 @@
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */ 5 */
6 6
7#include <linux/config.h>
8#include <linux/init.h> 7#include <linux/init.h>
9#include <linux/fs.h> 8#include <linux/fs.h>
10#include <linux/slab.h> 9#include <linux/slab.h>
@@ -14,7 +13,6 @@
14#include <linux/errno.h> 13#include <linux/errno.h>
15#include <linux/module.h> 14#include <linux/module.h>
16#include <linux/smp_lock.h> 15#include <linux/smp_lock.h>
17#include <linux/devfs_fs_kernel.h>
18#include <linux/seq_file.h> 16#include <linux/seq_file.h>
19 17
20#include <linux/kobject.h> 18#include <linux/kobject.h>
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 7271bb0257f6..a61d17ed1827 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,9 +1,24 @@
1Version 1.44
2------------
3Rewritten sessionsetup support, including support for legacy SMB
4session setup needed for OS/2 and older servers such as Windows 95 and 98.
5Fix oops on ls to OS/2 servers. Add support for level 1 FindFirst
6so we can do search (ls etc.) to OS/2. Do not send NTCreateX
7or recent levels of FindFirst unless server says it supports NT SMBs
8(instead use legacy equivalents from LANMAN dialect). Fix to allow
9NTLMv2 authentication support (now can use stronger password hashing
10on mount if corresponding /proc/fs/cifs/SecurityFlags is set (0x4004).
11Allow override of global cifs security flags on mount via "sec=" option(s).
12
1Version 1.43 13Version 1.43
2------------ 14------------
3POSIX locking to servers which support CIFS POSIX Extensions 15POSIX locking to servers which support CIFS POSIX Extensions
4(disabled by default controlled by proc/fs/cifs/Experimental). 16(disabled by default controlled by proc/fs/cifs/Experimental).
5Handle conversion of long share names (especially Asian languages) 17Handle conversion of long share names (especially Asian languages)
6to Unicode during mount. 18to Unicode during mount. Fix memory leak in sess struct on reconnect.
19Fix rare oops after acpi suspend. Fix O_TRUNC opens to overwrite on
20cifs open which helps rare case when setpathinfo fails or server does
21not support it.
7 22
8Version 1.42 23Version 1.42
9------------ 24------------
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 58c77254a23b..a26f26ed5a17 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -3,4 +3,4 @@
3# 3#
4obj-$(CONFIG_CIFS) += cifs.o 4obj-$(CONFIG_CIFS) += cifs.o
5 5
6cifs-objs := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o readdir.o ioctl.o ntlmssp.o 6cifs-objs := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o readdir.o ioctl.o sess.o
diff --git a/fs/cifs/README b/fs/cifs/README
index 0355003f4f0a..7986d0d97ace 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -443,7 +443,10 @@ A partial list of the supported mount options follows:
443 SFU does). In the future the bottom 9 bits of the mode 443 SFU does). In the future the bottom 9 bits of the mode
444 mode also will be emulated using queries of the security 444 mode also will be emulated using queries of the security
445 descriptor (ACL). 445 descriptor (ACL).
446sec Security mode. Allowed values are: 446 sign Must use packet signing (helps avoid unwanted data modification
447 by intermediate systems in the route). Note that signing
448 does not work with lanman or plaintext authentication.
449 sec Security mode. Allowed values are:
447 none attempt to connection as a null user (no name) 450 none attempt to connection as a null user (no name)
448 krb5 Use Kerberos version 5 authentication 451 krb5 Use Kerberos version 5 authentication
449 krb5i Use Kerberos authentication and packet signing 452 krb5i Use Kerberos authentication and packet signing
@@ -453,6 +456,8 @@ sec Security mode. Allowed values are:
453 server requires signing also can be the default) 456 server requires signing also can be the default)
454 ntlmv2 Use NTLMv2 password hashing 457 ntlmv2 Use NTLMv2 password hashing
455 ntlmv2i Use NTLMv2 password hashing with packet signing 458 ntlmv2i Use NTLMv2 password hashing with packet signing
459 lanman (if configured in kernel config) use older
460 lanman hash
456 461
457The mount.cifs mount helper also accepts a few mount options before -o 462The mount.cifs mount helper also accepts a few mount options before -o
458including: 463including:
@@ -485,14 +490,34 @@ PacketSigningEnabled If set to one, cifs packet signing is enabled
485 it. If set to two, cifs packet signing is 490 it. If set to two, cifs packet signing is
486 required even if the server considers packet 491 required even if the server considers packet
487 signing optional. (default 1) 492 signing optional. (default 1)
493SecurityFlags Flags which control security negotiation and
494 also packet signing. Authentication (may/must)
495 flags (e.g. for NTLM and/or NTLMv2) may be combined with
496 the signing flags. Specifying two different password
497 hashing mechanisms (as "must use") on the other hand
498 does not make much sense. Default flags are
499 0x07007
500 (NTLM, NTLMv2 and packet signing allowed). Maximum
501 allowable flags if you want to allow mounts to servers
502 using weaker password hashes is 0x37037 (lanman,
503 plaintext, ntlm, ntlmv2, signing allowed):
504
505 may use packet signing 0x00001
506 must use packet signing 0x01001
507 may use NTLM (most common password hash) 0x00002
508 must use NTLM 0x02002
509 may use NTLMv2 0x00004
510 must use NTLMv2 0x04004
511 may use Kerberos security (not implemented yet) 0x00008
512 must use Kerberos (not implemented yet) 0x08008
513 may use lanman (weak) password hash 0x00010
514 must use lanman password hash 0x10010
515 may use plaintext passwords 0x00020
516 must use plaintext passwords 0x20020
517 (reserved for future packet encryption) 0x00040
518
488cifsFYI If set to one, additional debug information is 519cifsFYI If set to one, additional debug information is
489 logged to the system error log. (default 0) 520 logged to the system error log. (default 0)
490ExtendedSecurity If set to one, SPNEGO session establishment
491 is allowed which enables more advanced
492 secure CIFS session establishment (default 0)
493NTLMV2Enabled If set to one, more secure password hashes
494 are used when the server supports them and
495 when kerberos is not negotiated (default 0)
496traceSMB If set to one, debug information is logged to the 521traceSMB If set to one, debug information is logged to the
497 system error log with the start of smb requests 522 system error log with the start of smb requests
498 and responses (default 0) 523 and responses (default 0)
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 086ae8f4a207..2e75883b7f54 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -17,7 +17,6 @@
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
20#include <linux/config.h>
21#include <linux/module.h> 20#include <linux/module.h>
22#include <linux/types.h> 21#include <linux/types.h>
23#include <linux/kernel.h> 22#include <linux/kernel.h>
@@ -467,7 +466,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
467 asn1_open(&ctx, security_blob, length); 466 asn1_open(&ctx, security_blob, length);
468 467
469 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 468 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
470 cFYI(1, ("Error decoding negTokenInit header ")); 469 cFYI(1, ("Error decoding negTokenInit header"));
471 return 0; 470 return 0;
472 } else if ((cls != ASN1_APL) || (con != ASN1_CON) 471 } else if ((cls != ASN1_APL) || (con != ASN1_CON)
473 || (tag != ASN1_EOC)) { 472 || (tag != ASN1_EOC)) {
@@ -495,7 +494,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
495 } 494 }
496 495
497 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 496 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
498 cFYI(1, ("Error decoding negTokenInit ")); 497 cFYI(1, ("Error decoding negTokenInit"));
499 return 0; 498 return 0;
500 } else if ((cls != ASN1_CTX) || (con != ASN1_CON) 499 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
501 || (tag != ASN1_EOC)) { 500 || (tag != ASN1_EOC)) {
@@ -505,7 +504,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
505 } 504 }
506 505
507 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 506 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
508 cFYI(1, ("Error decoding negTokenInit ")); 507 cFYI(1, ("Error decoding negTokenInit"));
509 return 0; 508 return 0;
510 } else if ((cls != ASN1_UNI) || (con != ASN1_CON) 509 } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
511 || (tag != ASN1_SEQ)) { 510 || (tag != ASN1_SEQ)) {
@@ -515,7 +514,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
515 } 514 }
516 515
517 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 516 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
518 cFYI(1, ("Error decoding 2nd part of negTokenInit ")); 517 cFYI(1, ("Error decoding 2nd part of negTokenInit"));
519 return 0; 518 return 0;
520 } else if ((cls != ASN1_CTX) || (con != ASN1_CON) 519 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
521 || (tag != ASN1_EOC)) { 520 || (tag != ASN1_EOC)) {
@@ -527,7 +526,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
527 526
528 if (asn1_header_decode 527 if (asn1_header_decode
529 (&ctx, &sequence_end, &cls, &con, &tag) == 0) { 528 (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
530 cFYI(1, ("Error decoding 2nd part of negTokenInit ")); 529 cFYI(1, ("Error decoding 2nd part of negTokenInit"));
531 return 0; 530 return 0;
532 } else if ((cls != ASN1_UNI) || (con != ASN1_CON) 531 } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
533 || (tag != ASN1_SEQ)) { 532 || (tag != ASN1_SEQ)) {
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index f4124a32bef8..96abeb738978 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -39,7 +39,7 @@ cifs_dump_mem(char *label, void *data, int length)
39 char *charptr = data; 39 char *charptr = data;
40 char buf[10], line[80]; 40 char buf[10], line[80];
41 41
42 printk(KERN_DEBUG "%s: dump of %d bytes of data at 0x%p\n\n", 42 printk(KERN_DEBUG "%s: dump of %d bytes of data at 0x%p\n",
43 label, length, data); 43 label, length, data);
44 for (i = 0; i < length; i += 16) { 44 for (i = 0; i < length; i += 16) {
45 line[0] = 0; 45 line[0] = 0;
@@ -57,6 +57,57 @@ cifs_dump_mem(char *label, void *data, int length)
57 } 57 }
58} 58}
59 59
60#ifdef CONFIG_CIFS_DEBUG2
61void cifs_dump_detail(struct smb_hdr * smb)
62{
63 cERROR(1,("Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d",
64 smb->Command, smb->Status.CifsError,
65 smb->Flags, smb->Flags2, smb->Mid, smb->Pid));
66 cERROR(1,("smb buf %p len %d", smb, smbCalcSize_LE(smb)));
67}
68
69
70void cifs_dump_mids(struct TCP_Server_Info * server)
71{
72 struct list_head *tmp;
73 struct mid_q_entry * mid_entry;
74
75 if(server == NULL)
76 return;
77
78 cERROR(1,("Dump pending requests:"));
79 spin_lock(&GlobalMid_Lock);
80 list_for_each(tmp, &server->pending_mid_q) {
81 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
82 if(mid_entry) {
83 cERROR(1,("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d",
84 mid_entry->midState,
85 (int)mid_entry->command,
86 mid_entry->pid,
87 mid_entry->tsk,
88 mid_entry->mid));
89#ifdef CONFIG_CIFS_STATS2
90 cERROR(1,("IsLarge: %d buf: %p time rcv: %ld now: %ld",
91 mid_entry->largeBuf,
92 mid_entry->resp_buf,
93 mid_entry->when_received,
94 jiffies));
95#endif /* STATS2 */
96 cERROR(1,("IsMult: %d IsEnd: %d", mid_entry->multiRsp,
97 mid_entry->multiEnd));
98 if(mid_entry->resp_buf) {
99 cifs_dump_detail(mid_entry->resp_buf);
100 cifs_dump_mem("existing buf: ",
101 mid_entry->resp_buf,
102 62 /* fixme */);
103 }
104
105 }
106 }
107 spin_unlock(&GlobalMid_Lock);
108}
109#endif /* CONFIG_CIFS_DEBUG2 */
110
60#ifdef CONFIG_PROC_FS 111#ifdef CONFIG_PROC_FS
61static int 112static int
62cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, 113cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
@@ -73,7 +124,6 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
73 124
74 *beginBuffer = buf + offset; 125 *beginBuffer = buf + offset;
75 126
76
77 length = 127 length =
78 sprintf(buf, 128 sprintf(buf,
79 "Display Internal CIFS Data Structures for Debugging\n" 129 "Display Internal CIFS Data Structures for Debugging\n"
@@ -395,12 +445,12 @@ static read_proc_t traceSMB_read;
395static write_proc_t traceSMB_write; 445static write_proc_t traceSMB_write;
396static read_proc_t multiuser_mount_read; 446static read_proc_t multiuser_mount_read;
397static write_proc_t multiuser_mount_write; 447static write_proc_t multiuser_mount_write;
398static read_proc_t extended_security_read; 448static read_proc_t security_flags_read;
399static write_proc_t extended_security_write; 449static write_proc_t security_flags_write;
400static read_proc_t ntlmv2_enabled_read; 450/* static read_proc_t ntlmv2_enabled_read;
401static write_proc_t ntlmv2_enabled_write; 451static write_proc_t ntlmv2_enabled_write;
402static read_proc_t packet_signing_enabled_read; 452static read_proc_t packet_signing_enabled_read;
403static write_proc_t packet_signing_enabled_write; 453static write_proc_t packet_signing_enabled_write;*/
404static read_proc_t experimEnabled_read; 454static read_proc_t experimEnabled_read;
405static write_proc_t experimEnabled_write; 455static write_proc_t experimEnabled_write;
406static read_proc_t linuxExtensionsEnabled_read; 456static read_proc_t linuxExtensionsEnabled_read;
@@ -458,10 +508,10 @@ cifs_proc_init(void)
458 pde->write_proc = multiuser_mount_write; 508 pde->write_proc = multiuser_mount_write;
459 509
460 pde = 510 pde =
461 create_proc_read_entry("ExtendedSecurity", 0, proc_fs_cifs, 511 create_proc_read_entry("SecurityFlags", 0, proc_fs_cifs,
462 extended_security_read, NULL); 512 security_flags_read, NULL);
463 if (pde) 513 if (pde)
464 pde->write_proc = extended_security_write; 514 pde->write_proc = security_flags_write;
465 515
466 pde = 516 pde =
467 create_proc_read_entry("LookupCacheEnabled", 0, proc_fs_cifs, 517 create_proc_read_entry("LookupCacheEnabled", 0, proc_fs_cifs,
@@ -469,7 +519,7 @@ cifs_proc_init(void)
469 if (pde) 519 if (pde)
470 pde->write_proc = lookupFlag_write; 520 pde->write_proc = lookupFlag_write;
471 521
472 pde = 522/* pde =
473 create_proc_read_entry("NTLMV2Enabled", 0, proc_fs_cifs, 523 create_proc_read_entry("NTLMV2Enabled", 0, proc_fs_cifs,
474 ntlmv2_enabled_read, NULL); 524 ntlmv2_enabled_read, NULL);
475 if (pde) 525 if (pde)
@@ -479,7 +529,7 @@ cifs_proc_init(void)
479 create_proc_read_entry("PacketSigningEnabled", 0, proc_fs_cifs, 529 create_proc_read_entry("PacketSigningEnabled", 0, proc_fs_cifs,
480 packet_signing_enabled_read, NULL); 530 packet_signing_enabled_read, NULL);
481 if (pde) 531 if (pde)
482 pde->write_proc = packet_signing_enabled_write; 532 pde->write_proc = packet_signing_enabled_write;*/
483} 533}
484 534
485void 535void
@@ -496,9 +546,9 @@ cifs_proc_clean(void)
496#endif 546#endif
497 remove_proc_entry("MultiuserMount", proc_fs_cifs); 547 remove_proc_entry("MultiuserMount", proc_fs_cifs);
498 remove_proc_entry("OplockEnabled", proc_fs_cifs); 548 remove_proc_entry("OplockEnabled", proc_fs_cifs);
499 remove_proc_entry("NTLMV2Enabled",proc_fs_cifs); 549/* remove_proc_entry("NTLMV2Enabled",proc_fs_cifs); */
500 remove_proc_entry("ExtendedSecurity",proc_fs_cifs); 550 remove_proc_entry("SecurityFlags",proc_fs_cifs);
501 remove_proc_entry("PacketSigningEnabled",proc_fs_cifs); 551/* remove_proc_entry("PacketSigningEnabled",proc_fs_cifs); */
502 remove_proc_entry("LinuxExtensionsEnabled",proc_fs_cifs); 552 remove_proc_entry("LinuxExtensionsEnabled",proc_fs_cifs);
503 remove_proc_entry("Experimental",proc_fs_cifs); 553 remove_proc_entry("Experimental",proc_fs_cifs);
504 remove_proc_entry("LookupCacheEnabled",proc_fs_cifs); 554 remove_proc_entry("LookupCacheEnabled",proc_fs_cifs);
@@ -782,12 +832,12 @@ multiuser_mount_write(struct file *file, const char __user *buffer,
782} 832}
783 833
784static int 834static int
785extended_security_read(char *page, char **start, off_t off, 835security_flags_read(char *page, char **start, off_t off,
786 int count, int *eof, void *data) 836 int count, int *eof, void *data)
787{ 837{
788 int len; 838 int len;
789 839
790 len = sprintf(page, "%d\n", extended_security); 840 len = sprintf(page, "0x%x\n", extended_security);
791 841
792 len -= off; 842 len -= off;
793 *start = page + off; 843 *start = page + off;
@@ -803,24 +853,52 @@ extended_security_read(char *page, char **start, off_t off,
803 return len; 853 return len;
804} 854}
805static int 855static int
806extended_security_write(struct file *file, const char __user *buffer, 856security_flags_write(struct file *file, const char __user *buffer,
807 unsigned long count, void *data) 857 unsigned long count, void *data)
808{ 858{
859 unsigned int flags;
860 char flags_string[12];
809 char c; 861 char c;
810 int rc;
811 862
812 rc = get_user(c, buffer); 863 if((count < 1) || (count > 11))
813 if (rc) 864 return -EINVAL;
814 return rc; 865
815 if (c == '0' || c == 'n' || c == 'N') 866 memset(flags_string, 0, 12);
816 extended_security = 0; 867
817 else if (c == '1' || c == 'y' || c == 'Y') 868 if(copy_from_user(flags_string, buffer, count))
818 extended_security = 1; 869 return -EFAULT;
870
871 if(count < 3) {
872 /* single char or single char followed by null */
873 c = flags_string[0];
874 if (c == '0' || c == 'n' || c == 'N')
875 extended_security = CIFSSEC_DEF; /* default */
876 else if (c == '1' || c == 'y' || c == 'Y')
877 extended_security = CIFSSEC_MAX;
878 return count;
879 }
880 /* else we have a number */
881
882 flags = simple_strtoul(flags_string, NULL, 0);
883
884 cFYI(1,("sec flags 0x%x", flags));
885
886 if(flags <= 0) {
887 cERROR(1,("invalid security flags %s",flags_string));
888 return -EINVAL;
889 }
819 890
891 if(flags & ~CIFSSEC_MASK) {
892 cERROR(1,("attempt to set unsupported security flags 0x%x",
893 flags & ~CIFSSEC_MASK));
894 return -EINVAL;
895 }
896 /* flags look ok - update the global security flags for cifs module */
897 extended_security = flags;
820 return count; 898 return count;
821} 899}
822 900
823static int 901/* static int
824ntlmv2_enabled_read(char *page, char **start, off_t off, 902ntlmv2_enabled_read(char *page, char **start, off_t off,
825 int count, int *eof, void *data) 903 int count, int *eof, void *data)
826{ 904{
@@ -855,6 +933,8 @@ ntlmv2_enabled_write(struct file *file, const char __user *buffer,
855 ntlmv2_support = 0; 933 ntlmv2_support = 0;
856 else if (c == '1' || c == 'y' || c == 'Y') 934 else if (c == '1' || c == 'y' || c == 'Y')
857 ntlmv2_support = 1; 935 ntlmv2_support = 1;
936 else if (c == '2')
937 ntlmv2_support = 2;
858 938
859 return count; 939 return count;
860} 940}
@@ -898,7 +978,7 @@ packet_signing_enabled_write(struct file *file, const char __user *buffer,
898 sign_CIFS_PDUs = 2; 978 sign_CIFS_PDUs = 2;
899 979
900 return count; 980 return count;
901} 981} */
902 982
903 983
904#endif 984#endif
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index 4304d9dcfb6c..c26cd0d2c6d5 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -24,6 +24,10 @@
24#define _H_CIFS_DEBUG 24#define _H_CIFS_DEBUG
25 25
26void cifs_dump_mem(char *label, void *data, int length); 26void cifs_dump_mem(char *label, void *data, int length);
27#ifdef CONFIG_CIFS_DEBUG2
28void cifs_dump_detail(struct smb_hdr *);
29void cifs_dump_mids(struct TCP_Server_Info *);
30#endif
27extern int traceSMB; /* flag which enables the function below */ 31extern int traceSMB; /* flag which enables the function below */
28void dump_smb(struct smb_hdr *, int); 32void dump_smb(struct smb_hdr *, int);
29#define CIFS_INFO 0x01 33#define CIFS_INFO 0x01
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index d2b128255944..d2a8b2941fc2 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -22,6 +22,7 @@
22#include "cifs_unicode.h" 22#include "cifs_unicode.h"
23#include "cifs_uniupr.h" 23#include "cifs_uniupr.h"
24#include "cifspdu.h" 24#include "cifspdu.h"
25#include "cifsglob.h"
25#include "cifs_debug.h" 26#include "cifs_debug.h"
26 27
27/* 28/*
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index e7d63737e651..a89efaf78a26 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -26,6 +26,8 @@
26#include "md5.h" 26#include "md5.h"
27#include "cifs_unicode.h" 27#include "cifs_unicode.h"
28#include "cifsproto.h" 28#include "cifsproto.h"
29#include <linux/ctype.h>
30#include <linux/random.h>
29 31
30/* Calculate and return the CIFS signature based on the mac key and the smb pdu */ 32/* Calculate and return the CIFS signature based on the mac key and the smb pdu */
31/* the 16 byte signature must be allocated by the caller */ 33/* the 16 byte signature must be allocated by the caller */
@@ -35,6 +37,8 @@
35 37
36extern void mdfour(unsigned char *out, unsigned char *in, int n); 38extern void mdfour(unsigned char *out, unsigned char *in, int n);
37extern void E_md4hash(const unsigned char *passwd, unsigned char *p16); 39extern void E_md4hash(const unsigned char *passwd, unsigned char *p16);
40extern void SMBencrypt(unsigned char *passwd, unsigned char *c8,
41 unsigned char *p24);
38 42
39static int cifs_calculate_signature(const struct smb_hdr * cifs_pdu, 43static int cifs_calculate_signature(const struct smb_hdr * cifs_pdu,
40 const char * key, char * signature) 44 const char * key, char * signature)
@@ -45,7 +49,7 @@ static int cifs_calculate_signature(const struct smb_hdr * cifs_pdu,
45 return -EINVAL; 49 return -EINVAL;
46 50
47 MD5Init(&context); 51 MD5Init(&context);
48 MD5Update(&context,key,CIFS_SESSION_KEY_SIZE+16); 52 MD5Update(&context,key,CIFS_SESS_KEY_SIZE+16);
49 MD5Update(&context,cifs_pdu->Protocol,cifs_pdu->smb_buf_length); 53 MD5Update(&context,cifs_pdu->Protocol,cifs_pdu->smb_buf_length);
50 MD5Final(signature,&context); 54 MD5Final(signature,&context);
51 return 0; 55 return 0;
@@ -90,7 +94,7 @@ static int cifs_calc_signature2(const struct kvec * iov, int n_vec,
90 return -EINVAL; 94 return -EINVAL;
91 95
92 MD5Init(&context); 96 MD5Init(&context);
93 MD5Update(&context,key,CIFS_SESSION_KEY_SIZE+16); 97 MD5Update(&context,key,CIFS_SESS_KEY_SIZE+16);
94 for(i=0;i<n_vec;i++) { 98 for(i=0;i<n_vec;i++) {
95 if(iov[i].iov_base == NULL) { 99 if(iov[i].iov_base == NULL) {
96 cERROR(1,("null iovec entry")); 100 cERROR(1,("null iovec entry"));
@@ -204,11 +208,12 @@ int cifs_calculate_mac_key(char * key, const char * rn, const char * password)
204 208
205 E_md4hash(password, temp_key); 209 E_md4hash(password, temp_key);
206 mdfour(key,temp_key,16); 210 mdfour(key,temp_key,16);
207 memcpy(key+16,rn, CIFS_SESSION_KEY_SIZE); 211 memcpy(key+16,rn, CIFS_SESS_KEY_SIZE);
208 return 0; 212 return 0;
209} 213}
210 214
211int CalcNTLMv2_partial_mac_key(struct cifsSesInfo * ses, struct nls_table * nls_info) 215int CalcNTLMv2_partial_mac_key(struct cifsSesInfo * ses,
216 const struct nls_table * nls_info)
212{ 217{
213 char temp_hash[16]; 218 char temp_hash[16];
214 struct HMACMD5Context ctx; 219 struct HMACMD5Context ctx;
@@ -225,6 +230,8 @@ int CalcNTLMv2_partial_mac_key(struct cifsSesInfo * ses, struct nls_table * nls_
225 user_name_len = strlen(ses->userName); 230 user_name_len = strlen(ses->userName);
226 if(user_name_len > MAX_USERNAME_SIZE) 231 if(user_name_len > MAX_USERNAME_SIZE)
227 return -EINVAL; 232 return -EINVAL;
233 if(ses->domainName == NULL)
234 return -EINVAL; /* BB should we use CIFS_LINUX_DOM */
228 dom_name_len = strlen(ses->domainName); 235 dom_name_len = strlen(ses->domainName);
229 if(dom_name_len > MAX_USERNAME_SIZE) 236 if(dom_name_len > MAX_USERNAME_SIZE)
230 return -EINVAL; 237 return -EINVAL;
@@ -259,16 +266,131 @@ int CalcNTLMv2_partial_mac_key(struct cifsSesInfo * ses, struct nls_table * nls_
259 kfree(unicode_buf); 266 kfree(unicode_buf);
260 return 0; 267 return 0;
261} 268}
262void CalcNTLMv2_response(const struct cifsSesInfo * ses,char * v2_session_response) 269
270#ifdef CONFIG_CIFS_WEAK_PW_HASH
271void calc_lanman_hash(struct cifsSesInfo * ses, char * lnm_session_key)
272{
273 int i;
274 char password_with_pad[CIFS_ENCPWD_SIZE];
275
276 if(ses->server == NULL)
277 return;
278
279 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
280 strncpy(password_with_pad, ses->password, CIFS_ENCPWD_SIZE);
281
282 if((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0)
283 if(extended_security & CIFSSEC_MAY_PLNTXT) {
284 memcpy(lnm_session_key, password_with_pad, CIFS_ENCPWD_SIZE);
285 return;
286 }
287
288 /* calculate old style session key */
289 /* calling toupper is less broken than repeatedly
290 calling nls_toupper would be since that will never
291 work for UTF8, but neither handles multibyte code pages
292 but the only alternative would be converting to UCS-16 (Unicode)
293 (using a routine something like UniStrupr) then
294 uppercasing and then converting back from Unicode - which
295 would only worth doing it if we knew it were utf8. Basically
296 utf8 and other multibyte codepages each need their own strupper
297 function since a byte at a time will ont work. */
298
299 for(i = 0; i < CIFS_ENCPWD_SIZE; i++) {
300 password_with_pad[i] = toupper(password_with_pad[i]);
301 }
302
303 SMBencrypt(password_with_pad, ses->server->cryptKey, lnm_session_key);
304 /* clear password before we return/free memory */
305 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
306}
307#endif /* CIFS_WEAK_PW_HASH */
308
309static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
310 const struct nls_table * nls_cp)
311{
312 int rc = 0;
313 int len;
314 char nt_hash[16];
315 struct HMACMD5Context * pctxt;
316 wchar_t * user;
317 wchar_t * domain;
318
319 pctxt = kmalloc(sizeof(struct HMACMD5Context), GFP_KERNEL);
320
321 if(pctxt == NULL)
322 return -ENOMEM;
323
324 /* calculate md4 hash of password */
325 E_md4hash(ses->password, nt_hash);
326
327 /* convert Domainname to unicode and uppercase */
328 hmac_md5_init_limK_to_64(nt_hash, 16, pctxt);
329
330 /* convert ses->userName to unicode and uppercase */
331 len = strlen(ses->userName);
332 user = kmalloc(2 + (len * 2), GFP_KERNEL);
333 if(user == NULL)
334 goto calc_exit_2;
335 len = cifs_strtoUCS(user, ses->userName, len, nls_cp);
336 UniStrupr(user);
337 hmac_md5_update((char *)user, 2*len, pctxt);
338
339 /* convert ses->domainName to unicode and uppercase */
340 if(ses->domainName) {
341 len = strlen(ses->domainName);
342
343 domain = kmalloc(2 + (len * 2), GFP_KERNEL);
344 if(domain == NULL)
345 goto calc_exit_1;
346 len = cifs_strtoUCS(domain, ses->domainName, len, nls_cp);
347 UniStrupr(domain);
348
349 hmac_md5_update((char *)domain, 2*len, pctxt);
350
351 kfree(domain);
352 }
353calc_exit_1:
354 kfree(user);
355calc_exit_2:
356 /* BB FIXME what about bytes 24 through 40 of the signing key?
357 compare with the NTLM example */
358 hmac_md5_final(ses->server->mac_signing_key, pctxt);
359
360 return rc;
361}
362
363void setup_ntlmv2_rsp(struct cifsSesInfo * ses, char * resp_buf,
364 const struct nls_table * nls_cp)
365{
366 int rc;
367 struct ntlmv2_resp * buf = (struct ntlmv2_resp *)resp_buf;
368
369 buf->blob_signature = cpu_to_le32(0x00000101);
370 buf->reserved = 0;
371 buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
372 get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
373 buf->reserved2 = 0;
374 buf->names[0].type = 0;
375 buf->names[0].length = 0;
376
377 /* calculate buf->ntlmv2_hash */
378 rc = calc_ntlmv2_hash(ses, nls_cp);
379 if(rc)
380 cERROR(1,("could not get v2 hash rc %d",rc));
381 CalcNTLMv2_response(ses, resp_buf);
382}
383
384void CalcNTLMv2_response(const struct cifsSesInfo * ses, char * v2_session_response)
263{ 385{
264 struct HMACMD5Context context; 386 struct HMACMD5Context context;
387 /* rest of v2 struct already generated */
265 memcpy(v2_session_response + 8, ses->server->cryptKey,8); 388 memcpy(v2_session_response + 8, ses->server->cryptKey,8);
266 /* gen_blob(v2_session_response + 16); */
267 hmac_md5_init_limK_to_64(ses->server->mac_signing_key, 16, &context); 389 hmac_md5_init_limK_to_64(ses->server->mac_signing_key, 16, &context);
268 390
269 hmac_md5_update(ses->server->cryptKey,8,&context); 391 hmac_md5_update(v2_session_response+8,
270/* hmac_md5_update(v2_session_response+16)client thing,8,&context); */ /* BB fix */ 392 sizeof(struct ntlmv2_resp) - 8, &context);
271 393
272 hmac_md5_final(v2_session_response,&context); 394 hmac_md5_final(v2_session_response,&context);
273 cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); /* BB removeme BB */ 395/* cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */
274} 396}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index c262d8874ce9..c28ede599946 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -56,8 +56,8 @@ unsigned int experimEnabled = 0;
56unsigned int linuxExtEnabled = 1; 56unsigned int linuxExtEnabled = 1;
57unsigned int lookupCacheEnabled = 1; 57unsigned int lookupCacheEnabled = 1;
58unsigned int multiuser_mount = 0; 58unsigned int multiuser_mount = 0;
59unsigned int extended_security = 0; 59unsigned int extended_security = CIFSSEC_DEF;
60unsigned int ntlmv2_support = 0; 60/* unsigned int ntlmv2_support = 0; */
61unsigned int sign_CIFS_PDUs = 1; 61unsigned int sign_CIFS_PDUs = 1;
62extern struct task_struct * oplockThread; /* remove sparse warning */ 62extern struct task_struct * oplockThread; /* remove sparse warning */
63struct task_struct * oplockThread = NULL; 63struct task_struct * oplockThread = NULL;
@@ -166,8 +166,9 @@ cifs_put_super(struct super_block *sb)
166} 166}
167 167
168static int 168static int
169cifs_statfs(struct super_block *sb, struct kstatfs *buf) 169cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
170{ 170{
171 struct super_block *sb = dentry->d_sb;
171 int xid; 172 int xid;
172 int rc = -EOPNOTSUPP; 173 int rc = -EOPNOTSUPP;
173 struct cifs_sb_info *cifs_sb; 174 struct cifs_sb_info *cifs_sb;
@@ -402,12 +403,14 @@ static struct quotactl_ops cifs_quotactl_ops = {
402#endif 403#endif
403 404
404#ifdef CONFIG_CIFS_EXPERIMENTAL 405#ifdef CONFIG_CIFS_EXPERIMENTAL
405static void cifs_umount_begin(struct super_block * sblock) 406static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags)
406{ 407{
407 struct cifs_sb_info *cifs_sb; 408 struct cifs_sb_info *cifs_sb;
408 struct cifsTconInfo * tcon; 409 struct cifsTconInfo * tcon;
409 410
410 cifs_sb = CIFS_SB(sblock); 411 if (!(flags & MNT_FORCE))
412 return;
413 cifs_sb = CIFS_SB(vfsmnt->mnt_sb);
411 if(cifs_sb == NULL) 414 if(cifs_sb == NULL)
412 return; 415 return;
413 416
@@ -460,9 +463,9 @@ struct super_operations cifs_super_ops = {
460 .remount_fs = cifs_remount, 463 .remount_fs = cifs_remount,
461}; 464};
462 465
463static struct super_block * 466static int
464cifs_get_sb(struct file_system_type *fs_type, 467cifs_get_sb(struct file_system_type *fs_type,
465 int flags, const char *dev_name, void *data) 468 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
466{ 469{
467 int rc; 470 int rc;
468 struct super_block *sb = sget(fs_type, NULL, set_anon_super, NULL); 471 struct super_block *sb = sget(fs_type, NULL, set_anon_super, NULL);
@@ -470,7 +473,7 @@ cifs_get_sb(struct file_system_type *fs_type,
470 cFYI(1, ("Devname: %s flags: %d ", dev_name, flags)); 473 cFYI(1, ("Devname: %s flags: %d ", dev_name, flags));
471 474
472 if (IS_ERR(sb)) 475 if (IS_ERR(sb))
473 return sb; 476 return PTR_ERR(sb);
474 477
475 sb->s_flags = flags; 478 sb->s_flags = flags;
476 479
@@ -478,10 +481,10 @@ cifs_get_sb(struct file_system_type *fs_type,
478 if (rc) { 481 if (rc) {
479 up_write(&sb->s_umount); 482 up_write(&sb->s_umount);
480 deactivate_super(sb); 483 deactivate_super(sb);
481 return ERR_PTR(rc); 484 return rc;
482 } 485 }
483 sb->s_flags |= MS_ACTIVE; 486 sb->s_flags |= MS_ACTIVE;
484 return sb; 487 return simple_set_mnt(mnt, sb);
485} 488}
486 489
487static ssize_t cifs_file_writev(struct file *file, const struct iovec *iov, 490static ssize_t cifs_file_writev(struct file *file, const struct iovec *iov,
@@ -905,7 +908,7 @@ static int cifs_dnotify_thread(void * dummyarg)
905 struct cifsSesInfo *ses; 908 struct cifsSesInfo *ses;
906 909
907 do { 910 do {
908 if(try_to_freeze()) 911 if (try_to_freeze())
909 continue; 912 continue;
910 set_current_state(TASK_INTERRUPTIBLE); 913 set_current_state(TASK_INTERRUPTIBLE);
911 schedule_timeout(15*HZ); 914 schedule_timeout(15*HZ);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index c98755dca868..8f75c6f24701 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -32,7 +32,8 @@
32#define TRUE 1 32#define TRUE 1
33#endif 33#endif
34 34
35extern struct address_space_operations cifs_addr_ops; 35extern const struct address_space_operations cifs_addr_ops;
36extern const struct address_space_operations cifs_addr_ops_smallbuf;
36 37
37/* Functions related to super block operations */ 38/* Functions related to super block operations */
38extern struct super_operations cifs_super_ops; 39extern struct super_operations cifs_super_ops;
@@ -74,7 +75,7 @@ extern ssize_t cifs_user_write(struct file *file, const char __user *write_data,
74 size_t write_size, loff_t * poffset); 75 size_t write_size, loff_t * poffset);
75extern int cifs_lock(struct file *, int, struct file_lock *); 76extern int cifs_lock(struct file *, int, struct file_lock *);
76extern int cifs_fsync(struct file *, struct dentry *, int); 77extern int cifs_fsync(struct file *, struct dentry *, int);
77extern int cifs_flush(struct file *); 78extern int cifs_flush(struct file *, fl_owner_t id);
78extern int cifs_file_mmap(struct file * , struct vm_area_struct *); 79extern int cifs_file_mmap(struct file * , struct vm_area_struct *);
79extern const struct file_operations cifs_dir_ops; 80extern const struct file_operations cifs_dir_ops;
80extern int cifs_dir_open(struct inode *inode, struct file *file); 81extern int cifs_dir_open(struct inode *inode, struct file *file);
@@ -99,5 +100,5 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
99extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); 100extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
100extern int cifs_ioctl (struct inode * inode, struct file * filep, 101extern int cifs_ioctl (struct inode * inode, struct file * filep,
101 unsigned int command, unsigned long arg); 102 unsigned int command, unsigned long arg);
102#define CIFS_VERSION "1.43" 103#define CIFS_VERSION "1.44"
103#endif /* _CIFSFS_H */ 104#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 006eb33bff5f..6d7cf5f3bc0b 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -88,7 +88,8 @@ enum statusEnum {
88}; 88};
89 89
90enum securityEnum { 90enum securityEnum {
91 NTLM = 0, /* Legacy NTLM012 auth with NTLM hash */ 91 LANMAN = 0, /* Legacy LANMAN auth */
92 NTLM, /* Legacy NTLM012 auth with NTLM hash */
92 NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ 93 NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */
93 RawNTLMSSP, /* NTLMSSP without SPNEGO */ 94 RawNTLMSSP, /* NTLMSSP without SPNEGO */
94 NTLMSSP, /* NTLMSSP via SPNEGO */ 95 NTLMSSP, /* NTLMSSP via SPNEGO */
@@ -157,7 +158,7 @@ struct TCP_Server_Info {
157 /* 16th byte of RFC1001 workstation name is always null */ 158 /* 16th byte of RFC1001 workstation name is always null */
158 char workstation_RFC1001_name[SERVER_NAME_LEN_WITH_NULL]; 159 char workstation_RFC1001_name[SERVER_NAME_LEN_WITH_NULL];
159 __u32 sequence_number; /* needed for CIFS PDU signature */ 160 __u32 sequence_number; /* needed for CIFS PDU signature */
160 char mac_signing_key[CIFS_SESSION_KEY_SIZE + 16]; 161 char mac_signing_key[CIFS_SESS_KEY_SIZE + 16];
161}; 162};
162 163
163/* 164/*
@@ -179,10 +180,13 @@ struct cifsUidInfo {
179struct cifsSesInfo { 180struct cifsSesInfo {
180 struct list_head cifsSessionList; 181 struct list_head cifsSessionList;
181 struct semaphore sesSem; 182 struct semaphore sesSem;
183#if 0
182 struct cifsUidInfo *uidInfo; /* pointer to user info */ 184 struct cifsUidInfo *uidInfo; /* pointer to user info */
185#endif
183 struct TCP_Server_Info *server; /* pointer to server info */ 186 struct TCP_Server_Info *server; /* pointer to server info */
184 atomic_t inUse; /* # of mounts (tree connections) on this ses */ 187 atomic_t inUse; /* # of mounts (tree connections) on this ses */
185 enum statusEnum status; 188 enum statusEnum status;
189 unsigned overrideSecFlg; /* if non-zero override global sec flags */
186 __u16 ipc_tid; /* special tid for connection to IPC share */ 190 __u16 ipc_tid; /* special tid for connection to IPC share */
187 __u16 flags; 191 __u16 flags;
188 char *serverOS; /* name of operating system underlying server */ 192 char *serverOS; /* name of operating system underlying server */
@@ -194,7 +198,7 @@ struct cifsSesInfo {
194 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for 198 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for
195 TCP names - will ipv6 and sctp addresses fit? */ 199 TCP names - will ipv6 and sctp addresses fit? */
196 char userName[MAX_USERNAME_SIZE + 1]; 200 char userName[MAX_USERNAME_SIZE + 1];
197 char domainName[MAX_USERNAME_SIZE + 1]; 201 char * domainName;
198 char * password; 202 char * password;
199}; 203};
200/* session flags */ 204/* session flags */
@@ -209,12 +213,12 @@ struct cifsTconInfo {
209 struct list_head openFileList; 213 struct list_head openFileList;
210 struct semaphore tconSem; 214 struct semaphore tconSem;
211 struct cifsSesInfo *ses; /* pointer to session associated with */ 215 struct cifsSesInfo *ses; /* pointer to session associated with */
212 char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource (in ASCII not UTF) */ 216 char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */
213 char *nativeFileSystem; 217 char *nativeFileSystem;
214 __u16 tid; /* The 2 byte tree id */ 218 __u16 tid; /* The 2 byte tree id */
215 __u16 Flags; /* optional support bits */ 219 __u16 Flags; /* optional support bits */
216 enum statusEnum tidStatus; 220 enum statusEnum tidStatus;
217 atomic_t useCount; /* how many mounts (explicit or implicit) to this share */ 221 atomic_t useCount; /* how many explicit/implicit mounts to share */
218#ifdef CONFIG_CIFS_STATS 222#ifdef CONFIG_CIFS_STATS
219 atomic_t num_smbs_sent; 223 atomic_t num_smbs_sent;
220 atomic_t num_writes; 224 atomic_t num_writes;
@@ -254,7 +258,7 @@ struct cifsTconInfo {
254 spinlock_t stat_lock; 258 spinlock_t stat_lock;
255#endif /* CONFIG_CIFS_STATS */ 259#endif /* CONFIG_CIFS_STATS */
256 FILE_SYSTEM_DEVICE_INFO fsDevInfo; 260 FILE_SYSTEM_DEVICE_INFO fsDevInfo;
257 FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if file system name truncated */ 261 FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */
258 FILE_SYSTEM_UNIX_INFO fsUnixInfo; 262 FILE_SYSTEM_UNIX_INFO fsUnixInfo;
259 unsigned retry:1; 263 unsigned retry:1;
260 unsigned nocase:1; 264 unsigned nocase:1;
@@ -305,7 +309,6 @@ struct cifsFileInfo {
305 atomic_t wrtPending; /* handle in use - defer close */ 309 atomic_t wrtPending; /* handle in use - defer close */
306 struct semaphore fh_sem; /* prevents reopen race after dead ses*/ 310 struct semaphore fh_sem; /* prevents reopen race after dead ses*/
307 char * search_resume_name; /* BB removeme BB */ 311 char * search_resume_name; /* BB removeme BB */
308 unsigned int resume_name_length; /* BB removeme - field renamed and moved BB */
309 struct cifs_search_info srch_inf; 312 struct cifs_search_info srch_inf;
310}; 313};
311 314
@@ -391,9 +394,9 @@ struct mid_q_entry {
391 struct smb_hdr *resp_buf; /* response buffer */ 394 struct smb_hdr *resp_buf; /* response buffer */
392 int midState; /* wish this were enum but can not pass to wait_event */ 395 int midState; /* wish this were enum but can not pass to wait_event */
393 __u8 command; /* smb command code */ 396 __u8 command; /* smb command code */
394 unsigned multiPart:1; /* multiple responses to one SMB request */
395 unsigned largeBuf:1; /* if valid response, is pointer to large buf */ 397 unsigned largeBuf:1; /* if valid response, is pointer to large buf */
396 unsigned multiResp:1; /* multiple trans2 responses for one request */ 398 unsigned multiRsp:1; /* multiple trans2 responses for one request */
399 unsigned multiEnd:1; /* both received */
397}; 400};
398 401
399struct oplock_q_entry { 402struct oplock_q_entry {
@@ -430,15 +433,35 @@ struct dir_notify_req {
430#define CIFS_LARGE_BUFFER 2 433#define CIFS_LARGE_BUFFER 2
431#define CIFS_IOVEC 4 /* array of response buffers */ 434#define CIFS_IOVEC 4 /* array of response buffers */
432 435
433/* Type of session setup needed */ 436/* Security Flags: indicate type of session setup needed */
434#define CIFS_PLAINTEXT 0 437#define CIFSSEC_MAY_SIGN 0x00001
435#define CIFS_LANMAN 1 438#define CIFSSEC_MAY_NTLM 0x00002
436#define CIFS_NTLM 2 439#define CIFSSEC_MAY_NTLMV2 0x00004
437#define CIFS_NTLMSSP_NEG 3 440#define CIFSSEC_MAY_KRB5 0x00008
438#define CIFS_NTLMSSP_AUTH 4 441#ifdef CONFIG_CIFS_WEAK_PW_HASH
439#define CIFS_SPNEGO_INIT 5 442#define CIFSSEC_MAY_LANMAN 0x00010
440#define CIFS_SPNEGO_TARG 6 443#define CIFSSEC_MAY_PLNTXT 0x00020
441 444#endif /* weak passwords */
445#define CIFSSEC_MAY_SEAL 0x00040 /* not supported yet */
446
447#define CIFSSEC_MUST_SIGN 0x01001
448/* note that only one of the following can be set so the
449result of setting MUST flags more than once will be to
450require use of the stronger protocol */
451#define CIFSSEC_MUST_NTLM 0x02002
452#define CIFSSEC_MUST_NTLMV2 0x04004
453#define CIFSSEC_MUST_KRB5 0x08008
454#ifdef CONFIG_CIFS_WEAK_PW_HASH
455#define CIFSSEC_MUST_LANMAN 0x10010
456#define CIFSSEC_MUST_PLNTXT 0x20020
457#define CIFSSEC_MASK 0x37037 /* current flags supported if weak */
458#else
459#define CIFSSEC_MASK 0x07007 /* flags supported if no weak config */
460#endif /* WEAK_PW_HASH */
461#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */
462
463#define CIFSSEC_DEF CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2
464#define CIFSSEC_MAX CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2
442/* 465/*
443 ***************************************************************** 466 *****************************************************************
444 * All constants go here 467 * All constants go here
@@ -500,16 +523,16 @@ GLOBAL_EXTERN rwlock_t GlobalSMBSeslock; /* protects list inserts on 3 above */
500GLOBAL_EXTERN struct list_head GlobalOplock_Q; 523GLOBAL_EXTERN struct list_head GlobalOplock_Q;
501 524
502GLOBAL_EXTERN struct list_head GlobalDnotifyReqList; /* Outstanding dir notify requests */ 525GLOBAL_EXTERN struct list_head GlobalDnotifyReqList; /* Outstanding dir notify requests */
503GLOBAL_EXTERN struct list_head GlobalDnotifyRsp_Q; /* Dir notify response queue */ 526GLOBAL_EXTERN struct list_head GlobalDnotifyRsp_Q;/* DirNotify response queue */
504 527
505/* 528/*
506 * Global transaction id (XID) information 529 * Global transaction id (XID) information
507 */ 530 */
508GLOBAL_EXTERN unsigned int GlobalCurrentXid; /* protected by GlobalMid_Sem */ 531GLOBAL_EXTERN unsigned int GlobalCurrentXid; /* protected by GlobalMid_Sem */
509GLOBAL_EXTERN unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Sem */ 532GLOBAL_EXTERN unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Sem */
510GLOBAL_EXTERN unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Sem */ 533GLOBAL_EXTERN unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Sem */
511GLOBAL_EXTERN spinlock_t GlobalMid_Lock; /* protects above and list operations */ 534GLOBAL_EXTERN spinlock_t GlobalMid_Lock; /* protects above & list operations */
512 /* on midQ entries */ 535 /* on midQ entries */
513GLOBAL_EXTERN char Local_System_Name[15]; 536GLOBAL_EXTERN char Local_System_Name[15];
514 537
515/* 538/*
@@ -531,7 +554,7 @@ GLOBAL_EXTERN atomic_t smBufAllocCount;
531GLOBAL_EXTERN atomic_t midCount; 554GLOBAL_EXTERN atomic_t midCount;
532 555
533/* Misc globals */ 556/* Misc globals */
534GLOBAL_EXTERN unsigned int multiuser_mount; /* if enabled allows new sessions 557GLOBAL_EXTERN unsigned int multiuser_mount; /* if enabled allows new sessions
535 to be established on existing mount if we 558 to be established on existing mount if we
536 have the uid/password or Kerberos credential 559 have the uid/password or Kerberos credential
537 or equivalent for current user */ 560 or equivalent for current user */
@@ -540,8 +563,8 @@ GLOBAL_EXTERN unsigned int experimEnabled;
540GLOBAL_EXTERN unsigned int lookupCacheEnabled; 563GLOBAL_EXTERN unsigned int lookupCacheEnabled;
541GLOBAL_EXTERN unsigned int extended_security; /* if on, session setup sent 564GLOBAL_EXTERN unsigned int extended_security; /* if on, session setup sent
542 with more secure ntlmssp2 challenge/resp */ 565 with more secure ntlmssp2 challenge/resp */
543GLOBAL_EXTERN unsigned int ntlmv2_support; /* better optional password hash */
544GLOBAL_EXTERN unsigned int sign_CIFS_PDUs; /* enable smb packet signing */ 566GLOBAL_EXTERN unsigned int sign_CIFS_PDUs; /* enable smb packet signing */
567GLOBAL_EXTERN unsigned int secFlags;
545GLOBAL_EXTERN unsigned int linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/ 568GLOBAL_EXTERN unsigned int linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/
546GLOBAL_EXTERN unsigned int CIFSMaxBufSize; /* max size not including hdr */ 569GLOBAL_EXTERN unsigned int CIFSMaxBufSize; /* max size not including hdr */
547GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */ 570GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b2233ac05bd2..86239023545b 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -16,7 +16,7 @@
16 * 16 *
17 * You should have received a copy of the GNU Lesser General Public License 17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this library; if not, write to the Free Software 18 * along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */ 20 */
21 21
22#ifndef _CIFSPDU_H 22#ifndef _CIFSPDU_H
@@ -24,8 +24,14 @@
24 24
25#include <net/sock.h> 25#include <net/sock.h>
26 26
27#ifdef CONFIG_CIFS_WEAK_PW_HASH
28#define LANMAN_PROT 0
29#define CIFS_PROT 1
30#else
27#define CIFS_PROT 0 31#define CIFS_PROT 0
28#define BAD_PROT CIFS_PROT+1 32#endif
33#define POSIX_PROT CIFS_PROT+1
34#define BAD_PROT 0xFFFF
29 35
30/* SMB command codes */ 36/* SMB command codes */
31/* Some commands have minimal (wct=0,bcc=0), or uninteresting, responses 37/* Some commands have minimal (wct=0,bcc=0), or uninteresting, responses
@@ -110,7 +116,7 @@
110/* 116/*
111 * Size of the session key (crypto key encrypted with the password 117 * Size of the session key (crypto key encrypted with the password
112 */ 118 */
113#define CIFS_SESSION_KEY_SIZE (24) 119#define CIFS_SESS_KEY_SIZE (24)
114 120
115/* 121/*
116 * Maximum user name length 122 * Maximum user name length
@@ -400,6 +406,29 @@ typedef struct negotiate_req {
400 unsigned char DialectsArray[1]; 406 unsigned char DialectsArray[1];
401} __attribute__((packed)) NEGOTIATE_REQ; 407} __attribute__((packed)) NEGOTIATE_REQ;
402 408
409/* Dialect index is 13 for LANMAN */
410
411typedef struct lanman_neg_rsp {
412 struct smb_hdr hdr; /* wct = 13 */
413 __le16 DialectIndex;
414 __le16 SecurityMode;
415 __le16 MaxBufSize;
416 __le16 MaxMpxCount;
417 __le16 MaxNumberVcs;
418 __le16 RawMode;
419 __le32 SessionKey;
420 __le32 ServerTime;
421 __le16 ServerTimeZone;
422 __le16 EncryptionKeyLength;
423 __le16 Reserved;
424 __u16 ByteCount;
425 unsigned char EncryptionKey[1];
426} __attribute__((packed)) LANMAN_NEG_RSP;
427
428#define READ_RAW_ENABLE 1
429#define WRITE_RAW_ENABLE 2
430#define RAW_ENABLE (READ_RAW_ENABLE | WRITE_RAW_ENABLE)
431
403typedef struct negotiate_rsp { 432typedef struct negotiate_rsp {
404 struct smb_hdr hdr; /* wct = 17 */ 433 struct smb_hdr hdr; /* wct = 17 */
405 __le16 DialectIndex; 434 __le16 DialectIndex;
@@ -509,7 +538,7 @@ typedef union smb_com_session_setup_andx {
509/* unsigned char * NativeOS; */ 538/* unsigned char * NativeOS; */
510/* unsigned char * NativeLanMan; */ 539/* unsigned char * NativeLanMan; */
511/* unsigned char * PrimaryDomain; */ 540/* unsigned char * PrimaryDomain; */
512 } __attribute__((packed)) resp; /* NTLM response format (with or without extended security */ 541 } __attribute__((packed)) resp; /* NTLM response with or without extended sec*/
513 542
514 struct { /* request format */ 543 struct { /* request format */
515 struct smb_hdr hdr; /* wct = 10 */ 544 struct smb_hdr hdr; /* wct = 10 */
@@ -520,8 +549,8 @@ typedef union smb_com_session_setup_andx {
520 __le16 MaxMpxCount; 549 __le16 MaxMpxCount;
521 __le16 VcNumber; 550 __le16 VcNumber;
522 __u32 SessionKey; 551 __u32 SessionKey;
523 __le16 PassswordLength; 552 __le16 PasswordLength;
524 __u32 Reserved; 553 __u32 Reserved; /* encrypt key len and offset */
525 __le16 ByteCount; 554 __le16 ByteCount;
526 unsigned char AccountPassword[1]; /* followed by */ 555 unsigned char AccountPassword[1]; /* followed by */
527 /* STRING AccountName */ 556 /* STRING AccountName */
@@ -543,6 +572,26 @@ typedef union smb_com_session_setup_andx {
543 } __attribute__((packed)) old_resp; /* pre-NTLM (LANMAN2.1) response */ 572 } __attribute__((packed)) old_resp; /* pre-NTLM (LANMAN2.1) response */
544} __attribute__((packed)) SESSION_SETUP_ANDX; 573} __attribute__((packed)) SESSION_SETUP_ANDX;
545 574
575/* format of NLTMv2 Response ie "case sensitive password" hash when NTLMv2 */
576
577struct ntlmssp2_name {
578 __le16 type;
579 __le16 length;
580/* char name[length]; */
581} __attribute__((packed));
582
583struct ntlmv2_resp {
584 char ntlmv2_hash[CIFS_ENCPWD_SIZE];
585 __le32 blob_signature;
586 __u32 reserved;
587 __le64 time;
588 __u64 client_chal; /* random */
589 __u32 reserved2;
590 struct ntlmssp2_name names[1];
591 /* array of name entries could follow ending in minimum 4 byte struct */
592} __attribute__((packed));
593
594
546#define CIFS_NETWORK_OPSYS "CIFS VFS Client for Linux" 595#define CIFS_NETWORK_OPSYS "CIFS VFS Client for Linux"
547 596
548/* Capabilities bits (for NTLM SessSetup request) */ 597/* Capabilities bits (for NTLM SessSetup request) */
@@ -573,7 +622,9 @@ typedef struct smb_com_tconx_req {
573} __attribute__((packed)) TCONX_REQ; 622} __attribute__((packed)) TCONX_REQ;
574 623
575typedef struct smb_com_tconx_rsp { 624typedef struct smb_com_tconx_rsp {
576 struct smb_hdr hdr; /* wct = 3 *//* note that Win2000 has sent wct=7 in some cases on responses. Four unspecified words followed OptionalSupport */ 625 struct smb_hdr hdr; /* wct = 3 note that Win2000 has sent wct = 7
626 in some cases on responses. Four unspecified
627 words followed OptionalSupport */
577 __u8 AndXCommand; 628 __u8 AndXCommand;
578 __u8 AndXReserved; 629 __u8 AndXReserved;
579 __le16 AndXOffset; 630 __le16 AndXOffset;
@@ -1323,6 +1374,9 @@ struct smb_t2_rsp {
1323#define SMB_FILE_MAXIMUM_INFO 0x40d 1374#define SMB_FILE_MAXIMUM_INFO 0x40d
1324 1375
1325/* Find File infolevels */ 1376/* Find File infolevels */
1377#define SMB_FIND_FILE_INFO_STANDARD 0x001
1378#define SMB_FIND_FILE_QUERY_EA_SIZE 0x002
1379#define SMB_FIND_FILE_QUERY_EAS_FROM_LIST 0x003
1326#define SMB_FIND_FILE_DIRECTORY_INFO 0x101 1380#define SMB_FIND_FILE_DIRECTORY_INFO 0x101
1327#define SMB_FIND_FILE_FULL_DIRECTORY_INFO 0x102 1381#define SMB_FIND_FILE_FULL_DIRECTORY_INFO 0x102
1328#define SMB_FIND_FILE_NAMES_INFO 0x103 1382#define SMB_FIND_FILE_NAMES_INFO 0x103
@@ -1844,13 +1898,13 @@ typedef struct {
1844typedef struct { 1898typedef struct {
1845 __le32 DeviceType; 1899 __le32 DeviceType;
1846 __le32 DeviceCharacteristics; 1900 __le32 DeviceCharacteristics;
1847} __attribute__((packed)) FILE_SYSTEM_DEVICE_INFO; /* device info, level 0x104 */ 1901} __attribute__((packed)) FILE_SYSTEM_DEVICE_INFO; /* device info level 0x104 */
1848 1902
1849typedef struct { 1903typedef struct {
1850 __le32 Attributes; 1904 __le32 Attributes;
1851 __le32 MaxPathNameComponentLength; 1905 __le32 MaxPathNameComponentLength;
1852 __le32 FileSystemNameLen; 1906 __le32 FileSystemNameLen;
1853 char FileSystemName[52]; /* do not really need to save this - so potentially get only subset of name */ 1907 char FileSystemName[52]; /* do not have to save this - get subset? */
1854} __attribute__((packed)) FILE_SYSTEM_ATTRIBUTE_INFO; 1908} __attribute__((packed)) FILE_SYSTEM_ATTRIBUTE_INFO;
1855 1909
1856/******************************************************************************/ 1910/******************************************************************************/
@@ -1947,7 +2001,8 @@ typedef struct {
1947 2001
1948struct file_allocation_info { 2002struct file_allocation_info {
1949 __le64 AllocationSize; /* Note old Samba srvr rounds this up too much */ 2003 __le64 AllocationSize; /* Note old Samba srvr rounds this up too much */
1950} __attribute__((packed)); /* size used on disk, level 0x103 for set, 0x105 for query */ 2004} __attribute__((packed)); /* size used on disk, for level 0x103 for set,
2005 0x105 for query */
1951 2006
1952struct file_end_of_file_info { 2007struct file_end_of_file_info {
1953 __le64 FileSize; /* offset to end of file */ 2008 __le64 FileSize; /* offset to end of file */
@@ -2054,7 +2109,7 @@ typedef struct {
2054 __le32 ExtFileAttributes; 2109 __le32 ExtFileAttributes;
2055 __le32 FileNameLength; 2110 __le32 FileNameLength;
2056 char FileName[1]; 2111 char FileName[1];
2057} __attribute__((packed)) FILE_DIRECTORY_INFO; /* level 0x101 FF response data area */ 2112} __attribute__((packed)) FILE_DIRECTORY_INFO; /* level 0x101 FF resp data */
2058 2113
2059typedef struct { 2114typedef struct {
2060 __le32 NextEntryOffset; 2115 __le32 NextEntryOffset;
@@ -2069,7 +2124,7 @@ typedef struct {
2069 __le32 FileNameLength; 2124 __le32 FileNameLength;
2070 __le32 EaSize; /* length of the xattrs */ 2125 __le32 EaSize; /* length of the xattrs */
2071 char FileName[1]; 2126 char FileName[1];
2072} __attribute__((packed)) FILE_FULL_DIRECTORY_INFO; /* level 0x102 FF response data area */ 2127} __attribute__((packed)) FILE_FULL_DIRECTORY_INFO; /* level 0x102 rsp data */
2073 2128
2074typedef struct { 2129typedef struct {
2075 __le32 NextEntryOffset; 2130 __le32 NextEntryOffset;
@@ -2086,7 +2141,7 @@ typedef struct {
2086 __le32 Reserved; 2141 __le32 Reserved;
2087 __u64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit*/ 2142 __u64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit*/
2088 char FileName[1]; 2143 char FileName[1];
2089} __attribute__((packed)) SEARCH_ID_FULL_DIR_INFO; /* level 0x105 FF response data area */ 2144} __attribute__((packed)) SEARCH_ID_FULL_DIR_INFO; /* level 0x105 FF rsp data */
2090 2145
2091typedef struct { 2146typedef struct {
2092 __le32 NextEntryOffset; 2147 __le32 NextEntryOffset;
@@ -2104,7 +2159,22 @@ typedef struct {
2104 __u8 Reserved; 2159 __u8 Reserved;
2105 __u8 ShortName[12]; 2160 __u8 ShortName[12];
2106 char FileName[1]; 2161 char FileName[1];
2107} __attribute__((packed)) FILE_BOTH_DIRECTORY_INFO; /* level 0x104 FF response data area */ 2162} __attribute__((packed)) FILE_BOTH_DIRECTORY_INFO; /* level 0x104 FFrsp data */
2163
2164typedef struct {
2165 __u32 ResumeKey;
2166 __le16 CreationDate; /* SMB Date */
2167 __le16 CreationTime; /* SMB Time */
2168 __le16 LastAccessDate;
2169 __le16 LastAccessTime;
2170 __le16 LastWriteDate;
2171 __le16 LastWriteTime;
2172 __le32 DataSize; /* File Size (EOF) */
2173 __le32 AllocationSize;
2174 __le16 Attributes; /* verify not u32 */
2175 __u8 FileNameLength;
2176 char FileName[1];
2177} __attribute__((packed)) FIND_FILE_STANDARD_INFO; /* level 0x1 FF resp data */
2108 2178
2109 2179
2110struct win_dev { 2180struct win_dev {
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 310ea2f0e0bf..a5ddc62d6fe6 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -64,14 +64,12 @@ extern int map_smb_to_linux_error(struct smb_hdr *smb);
64extern void header_assemble(struct smb_hdr *, char /* command */ , 64extern void header_assemble(struct smb_hdr *, char /* command */ ,
65 const struct cifsTconInfo *, int /* length of 65 const struct cifsTconInfo *, int /* length of
66 fixed section (word count) in two byte units */); 66 fixed section (word count) in two byte units */);
67#ifdef CONFIG_CIFS_EXPERIMENTAL
68extern int small_smb_init_no_tc(const int smb_cmd, const int wct, 67extern int small_smb_init_no_tc(const int smb_cmd, const int wct,
69 struct cifsSesInfo *ses, 68 struct cifsSesInfo *ses,
70 void ** request_buf); 69 void ** request_buf);
71extern int CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, 70extern int CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
72 const int stage, int * pNTLMv2_flg, 71 const int stage,
73 const struct nls_table *nls_cp); 72 const struct nls_table *nls_cp);
74#endif
75extern __u16 GetNextMid(struct TCP_Server_Info *server); 73extern __u16 GetNextMid(struct TCP_Server_Info *server);
76extern struct oplock_q_entry * AllocOplockQEntry(struct inode *, u16, 74extern struct oplock_q_entry * AllocOplockQEntry(struct inode *, u16,
77 struct cifsTconInfo *); 75 struct cifsTconInfo *);
@@ -285,8 +283,14 @@ extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
285extern int cifs_verify_signature(struct smb_hdr *, const char * mac_key, 283extern int cifs_verify_signature(struct smb_hdr *, const char * mac_key,
286 __u32 expected_sequence_number); 284 __u32 expected_sequence_number);
287extern int cifs_calculate_mac_key(char * key,const char * rn,const char * pass); 285extern int cifs_calculate_mac_key(char * key,const char * rn,const char * pass);
288extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *, struct nls_table *); 286extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *,
289extern void CalcNTLMv2_response(const struct cifsSesInfo *,char * ); 287 const struct nls_table *);
288extern void CalcNTLMv2_response(const struct cifsSesInfo *, char * );
289extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
290 const struct nls_table *);
291#ifdef CONFIG_CIFS_WEAK_PW_HASH
292extern void calc_lanman_hash(struct cifsSesInfo * ses, char * lnm_session_key);
293#endif /* CIFS_WEAK_PW_HASH */
290extern int CIFSSMBCopy(int xid, 294extern int CIFSSMBCopy(int xid,
291 struct cifsTconInfo *source_tcon, 295 struct cifsTconInfo *source_tcon,
292 const char *fromName, 296 const char *fromName,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 925881e00ff2..19678c575dfc 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -44,8 +44,11 @@ static struct {
44 int index; 44 int index;
45 char *name; 45 char *name;
46} protocols[] = { 46} protocols[] = {
47#ifdef CONFIG_CIFS_WEAK_PW_HASH
48 {LANMAN_PROT, "\2LM1.2X002"},
49#endif /* weak password hashing for legacy clients */
47 {CIFS_PROT, "\2NT LM 0.12"}, 50 {CIFS_PROT, "\2NT LM 0.12"},
48 {CIFS_PROT, "\2POSIX 2"}, 51 {POSIX_PROT, "\2POSIX 2"},
49 {BAD_PROT, "\2"} 52 {BAD_PROT, "\2"}
50}; 53};
51#else 54#else
@@ -53,11 +56,29 @@ static struct {
53 int index; 56 int index;
54 char *name; 57 char *name;
55} protocols[] = { 58} protocols[] = {
59#ifdef CONFIG_CIFS_WEAK_PW_HASH
60 {LANMAN_PROT, "\2LM1.2X002"},
61#endif /* weak password hashing for legacy clients */
56 {CIFS_PROT, "\2NT LM 0.12"}, 62 {CIFS_PROT, "\2NT LM 0.12"},
57 {BAD_PROT, "\2"} 63 {BAD_PROT, "\2"}
58}; 64};
59#endif 65#endif
60 66
67/* define the number of elements in the cifs dialect array */
68#ifdef CONFIG_CIFS_POSIX
69#ifdef CONFIG_CIFS_WEAK_PW_HASH
70#define CIFS_NUM_PROT 3
71#else
72#define CIFS_NUM_PROT 2
73#endif /* CIFS_WEAK_PW_HASH */
74#else /* not posix */
75#ifdef CONFIG_CIFS_WEAK_PW_HASH
76#define CIFS_NUM_PROT 2
77#else
78#define CIFS_NUM_PROT 1
79#endif /* CONFIG_CIFS_WEAK_PW_HASH */
80#endif /* CIFS_POSIX */
81
61 82
62/* Mark as invalid, all open files on tree connections since they 83/* Mark as invalid, all open files on tree connections since they
63 were closed when session to server was lost */ 84 were closed when session to server was lost */
@@ -188,7 +209,6 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
188 return rc; 209 return rc;
189} 210}
190 211
191#ifdef CONFIG_CIFS_EXPERIMENTAL
192int 212int
193small_smb_init_no_tc(const int smb_command, const int wct, 213small_smb_init_no_tc(const int smb_command, const int wct,
194 struct cifsSesInfo *ses, void **request_buf) 214 struct cifsSesInfo *ses, void **request_buf)
@@ -214,7 +234,6 @@ small_smb_init_no_tc(const int smb_command, const int wct,
214 234
215 return rc; 235 return rc;
216} 236}
217#endif /* CONFIG_CIFS_EXPERIMENTAL */
218 237
219/* If the return code is zero, this function must fill in request_buf pointer */ 238/* If the return code is zero, this function must fill in request_buf pointer */
220static int 239static int
@@ -322,7 +341,8 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
322 /* potential retries of smb operations it turns out we can determine */ 341 /* potential retries of smb operations it turns out we can determine */
323 /* from the mid flags when the request buffer can be resent without */ 342 /* from the mid flags when the request buffer can be resent without */
324 /* having to use a second distinct buffer for the response */ 343 /* having to use a second distinct buffer for the response */
325 *response_buf = *request_buf; 344 if(response_buf)
345 *response_buf = *request_buf;
326 346
327 header_assemble((struct smb_hdr *) *request_buf, smb_command, tcon, 347 header_assemble((struct smb_hdr *) *request_buf, smb_command, tcon,
328 wct /*wct */ ); 348 wct /*wct */ );
@@ -373,8 +393,10 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
373 NEGOTIATE_RSP *pSMBr; 393 NEGOTIATE_RSP *pSMBr;
374 int rc = 0; 394 int rc = 0;
375 int bytes_returned; 395 int bytes_returned;
396 int i;
376 struct TCP_Server_Info * server; 397 struct TCP_Server_Info * server;
377 u16 count; 398 u16 count;
399 unsigned int secFlags;
378 400
379 if(ses->server) 401 if(ses->server)
380 server = ses->server; 402 server = ses->server;
@@ -386,101 +408,200 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
386 (void **) &pSMB, (void **) &pSMBr); 408 (void **) &pSMB, (void **) &pSMBr);
387 if (rc) 409 if (rc)
388 return rc; 410 return rc;
411
412 /* if any of auth flags (ie not sign or seal) are overriden use them */
413 if(ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL)))
414 secFlags = ses->overrideSecFlg;
415 else /* if override flags set only sign/seal OR them with global auth */
416 secFlags = extended_security | ses->overrideSecFlg;
417
418 cFYI(1,("secFlags 0x%x",secFlags));
419
389 pSMB->hdr.Mid = GetNextMid(server); 420 pSMB->hdr.Mid = GetNextMid(server);
390 pSMB->hdr.Flags2 |= SMBFLG2_UNICODE; 421 pSMB->hdr.Flags2 |= SMBFLG2_UNICODE;
391 if (extended_security) 422 if((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5)
392 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; 423 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
393 424
394 count = strlen(protocols[0].name) + 1; 425 count = 0;
395 strncpy(pSMB->DialectsArray, protocols[0].name, 30); 426 for(i=0;i<CIFS_NUM_PROT;i++) {
396 /* null guaranteed to be at end of source and target buffers anyway */ 427 strncpy(pSMB->DialectsArray+count, protocols[i].name, 16);
397 428 count += strlen(protocols[i].name) + 1;
429 /* null at end of source and target buffers anyway */
430 }
398 pSMB->hdr.smb_buf_length += count; 431 pSMB->hdr.smb_buf_length += count;
399 pSMB->ByteCount = cpu_to_le16(count); 432 pSMB->ByteCount = cpu_to_le16(count);
400 433
401 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB, 434 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
402 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 435 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
403 if (rc == 0) { 436 if (rc != 0)
404 server->secMode = pSMBr->SecurityMode; 437 goto neg_err_exit;
405 if((server->secMode & SECMODE_USER) == 0) 438
406 cFYI(1,("share mode security")); 439 cFYI(1,("Dialect: %d", pSMBr->DialectIndex));
407 server->secType = NTLM; /* BB override default for 440 /* Check wct = 1 error case */
408 NTLMv2 or kerberos v5 */ 441 if((pSMBr->hdr.WordCount < 13) || (pSMBr->DialectIndex == BAD_PROT)) {
409 /* one byte - no need to convert this or EncryptionKeyLen 442 /* core returns wct = 1, but we do not ask for core - otherwise
410 from little endian */ 443 small wct just comes when dialect index is -1 indicating we
411 server->maxReq = le16_to_cpu(pSMBr->MaxMpxCount); 444 could not negotiate a common dialect */
412 /* probably no need to store and check maxvcs */ 445 rc = -EOPNOTSUPP;
413 server->maxBuf = 446 goto neg_err_exit;
414 min(le32_to_cpu(pSMBr->MaxBufferSize), 447#ifdef CONFIG_CIFS_WEAK_PW_HASH
448 } else if((pSMBr->hdr.WordCount == 13)
449 && (pSMBr->DialectIndex == LANMAN_PROT)) {
450 struct lanman_neg_rsp * rsp = (struct lanman_neg_rsp *)pSMBr;
451
452 if((secFlags & CIFSSEC_MAY_LANMAN) ||
453 (secFlags & CIFSSEC_MAY_PLNTXT))
454 server->secType = LANMAN;
455 else {
456 cERROR(1, ("mount failed weak security disabled"
457 " in /proc/fs/cifs/SecurityFlags"));
458 rc = -EOPNOTSUPP;
459 goto neg_err_exit;
460 }
461 server->secMode = (__u8)le16_to_cpu(rsp->SecurityMode);
462 server->maxReq = le16_to_cpu(rsp->MaxMpxCount);
463 server->maxBuf = min((__u32)le16_to_cpu(rsp->MaxBufSize),
464 (__u32)CIFSMaxBufSize + MAX_CIFS_HDR_SIZE);
465 GETU32(server->sessid) = le32_to_cpu(rsp->SessionKey);
466 /* even though we do not use raw we might as well set this
467 accurately, in case we ever find a need for it */
468 if((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) {
469 server->maxRw = 0xFF00;
470 server->capabilities = CAP_MPX_MODE | CAP_RAW_MODE;
471 } else {
472 server->maxRw = 0;/* we do not need to use raw anyway */
473 server->capabilities = CAP_MPX_MODE;
474 }
475 server->timeZone = le16_to_cpu(rsp->ServerTimeZone);
476
477 /* BB get server time for time conversions and add
478 code to use it and timezone since this is not UTC */
479
480 if (rsp->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) {
481 memcpy(server->cryptKey, rsp->EncryptionKey,
482 CIFS_CRYPTO_KEY_SIZE);
483 } else if (server->secMode & SECMODE_PW_ENCRYPT) {
484 rc = -EIO; /* need cryptkey unless plain text */
485 goto neg_err_exit;
486 }
487
488 cFYI(1,("LANMAN negotiated"));
489 /* we will not end up setting signing flags - as no signing
490 was in LANMAN and server did not return the flags on */
491 goto signing_check;
492#else /* weak security disabled */
493 } else if(pSMBr->hdr.WordCount == 13) {
494 cERROR(1,("mount failed, cifs module not built "
495 "with CIFS_WEAK_PW_HASH support"));
496 rc = -EOPNOTSUPP;
497#endif /* WEAK_PW_HASH */
498 goto neg_err_exit;
499 } else if(pSMBr->hdr.WordCount != 17) {
500 /* unknown wct */
501 rc = -EOPNOTSUPP;
502 goto neg_err_exit;
503 }
504 /* else wct == 17 NTLM */
505 server->secMode = pSMBr->SecurityMode;
506 if((server->secMode & SECMODE_USER) == 0)
507 cFYI(1,("share mode security"));
508
509 if((server->secMode & SECMODE_PW_ENCRYPT) == 0)
510#ifdef CONFIG_CIFS_WEAK_PW_HASH
511 if ((secFlags & CIFSSEC_MAY_PLNTXT) == 0)
512#endif /* CIFS_WEAK_PW_HASH */
513 cERROR(1,("Server requests plain text password"
514 " but client support disabled"));
515
516 if((secFlags & CIFSSEC_MUST_NTLMV2) == CIFSSEC_MUST_NTLMV2)
517 server->secType = NTLMv2;
518 else if(secFlags & CIFSSEC_MAY_NTLM)
519 server->secType = NTLM;
520 else if(secFlags & CIFSSEC_MAY_NTLMV2)
521 server->secType = NTLMv2;
522 /* else krb5 ... any others ... */
523
524 /* one byte, so no need to convert this or EncryptionKeyLen from
525 little endian */
526 server->maxReq = le16_to_cpu(pSMBr->MaxMpxCount);
527 /* probably no need to store and check maxvcs */
528 server->maxBuf = min(le32_to_cpu(pSMBr->MaxBufferSize),
415 (__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE); 529 (__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE);
416 server->maxRw = le32_to_cpu(pSMBr->MaxRawSize); 530 server->maxRw = le32_to_cpu(pSMBr->MaxRawSize);
417 cFYI(0, ("Max buf = %d", ses->server->maxBuf)); 531 cFYI(0, ("Max buf = %d", ses->server->maxBuf));
418 GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey); 532 GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey);
419 server->capabilities = le32_to_cpu(pSMBr->Capabilities); 533 server->capabilities = le32_to_cpu(pSMBr->Capabilities);
420 server->timeZone = le16_to_cpu(pSMBr->ServerTimeZone); 534 server->timeZone = le16_to_cpu(pSMBr->ServerTimeZone);
421 /* BB with UTC do we ever need to be using srvr timezone? */ 535 if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) {
422 if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) { 536 memcpy(server->cryptKey, pSMBr->u.EncryptionKey,
423 memcpy(server->cryptKey, pSMBr->u.EncryptionKey, 537 CIFS_CRYPTO_KEY_SIZE);
424 CIFS_CRYPTO_KEY_SIZE); 538 } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC)
425 } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) 539 && (pSMBr->EncryptionKeyLength == 0)) {
426 && (pSMBr->EncryptionKeyLength == 0)) { 540 /* decode security blob */
427 /* decode security blob */ 541 } else if (server->secMode & SECMODE_PW_ENCRYPT) {
428 } else 542 rc = -EIO; /* no crypt key only if plain text pwd */
429 rc = -EIO; 543 goto neg_err_exit;
544 }
430 545
431 /* BB might be helpful to save off the domain of server here */ 546 /* BB might be helpful to save off the domain of server here */
432 547
433 if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) && 548 if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) &&
434 (server->capabilities & CAP_EXTENDED_SECURITY)) { 549 (server->capabilities & CAP_EXTENDED_SECURITY)) {
435 count = pSMBr->ByteCount; 550 count = pSMBr->ByteCount;
436 if (count < 16) 551 if (count < 16)
437 rc = -EIO; 552 rc = -EIO;
438 else if (count == 16) { 553 else if (count == 16) {
439 server->secType = RawNTLMSSP; 554 server->secType = RawNTLMSSP;
440 if (server->socketUseCount.counter > 1) { 555 if (server->socketUseCount.counter > 1) {
441 if (memcmp 556 if (memcmp(server->server_GUID,
442 (server->server_GUID, 557 pSMBr->u.extended_response.
443 pSMBr->u.extended_response. 558 GUID, 16) != 0) {
444 GUID, 16) != 0) { 559 cFYI(1, ("server UID changed"));
445 cFYI(1, ("server UID changed"));
446 memcpy(server->
447 server_GUID,
448 pSMBr->u.
449 extended_response.
450 GUID, 16);
451 }
452 } else
453 memcpy(server->server_GUID, 560 memcpy(server->server_GUID,
454 pSMBr->u.extended_response. 561 pSMBr->u.extended_response.GUID,
455 GUID, 16); 562 16);
456 } else {
457 rc = decode_negTokenInit(pSMBr->u.
458 extended_response.
459 SecurityBlob,
460 count - 16,
461 &server->secType);
462 if(rc == 1) {
463 /* BB Need to fill struct for sessetup here */
464 rc = -EOPNOTSUPP;
465 } else {
466 rc = -EINVAL;
467 } 563 }
564 } else
565 memcpy(server->server_GUID,
566 pSMBr->u.extended_response.GUID, 16);
567 } else {
568 rc = decode_negTokenInit(pSMBr->u.extended_response.
569 SecurityBlob,
570 count - 16,
571 &server->secType);
572 if(rc == 1) {
573 /* BB Need to fill struct for sessetup here */
574 rc = -EOPNOTSUPP;
575 } else {
576 rc = -EINVAL;
468 } 577 }
469 } else
470 server->capabilities &= ~CAP_EXTENDED_SECURITY;
471 if(sign_CIFS_PDUs == FALSE) {
472 if(server->secMode & SECMODE_SIGN_REQUIRED)
473 cERROR(1,
474 ("Server requires /proc/fs/cifs/PacketSigningEnabled"));
475 server->secMode &= ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
476 } else if(sign_CIFS_PDUs == 1) {
477 if((server->secMode & SECMODE_SIGN_REQUIRED) == 0)
478 server->secMode &= ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
479 } 578 }
480 579 } else
580 server->capabilities &= ~CAP_EXTENDED_SECURITY;
581
582#ifdef CONFIG_CIFS_WEAK_PW_HASH
583signing_check:
584#endif
585 if(sign_CIFS_PDUs == FALSE) {
586 if(server->secMode & SECMODE_SIGN_REQUIRED)
587 cERROR(1,("Server requires "
588 "/proc/fs/cifs/PacketSigningEnabled to be on"));
589 server->secMode &=
590 ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
591 } else if(sign_CIFS_PDUs == 1) {
592 if((server->secMode & SECMODE_SIGN_REQUIRED) == 0)
593 server->secMode &=
594 ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
595 } else if(sign_CIFS_PDUs == 2) {
596 if((server->secMode &
597 (SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED)) == 0) {
598 cERROR(1,("signing required but server lacks support"));
599 }
481 } 600 }
482 601neg_err_exit:
483 cifs_buf_release(pSMB); 602 cifs_buf_release(pSMB);
603
604 cFYI(1,("negprot rc %d",rc));
484 return rc; 605 return rc;
485} 606}
486 607
@@ -2239,7 +2360,7 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2239 } 2360 }
2240 symlinkinfo[buflen] = 0; /* just in case so the caller 2361 symlinkinfo[buflen] = 0; /* just in case so the caller
2241 does not go off the end of the buffer */ 2362 does not go off the end of the buffer */
2242 cFYI(1,("readlink result - %s ",symlinkinfo)); 2363 cFYI(1,("readlink result - %s",symlinkinfo));
2243 } 2364 }
2244 } 2365 }
2245qreparse_out: 2366qreparse_out:
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index bae1479318d1..876eb9ef85fe 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -49,8 +49,6 @@
49 49
50static DECLARE_COMPLETION(cifsd_complete); 50static DECLARE_COMPLETION(cifsd_complete);
51 51
52extern void SMBencrypt(unsigned char *passwd, unsigned char *c8,
53 unsigned char *p24);
54extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, 52extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
55 unsigned char *p24); 53 unsigned char *p24);
56 54
@@ -70,6 +68,7 @@ struct smb_vol {
70 gid_t linux_gid; 68 gid_t linux_gid;
71 mode_t file_mode; 69 mode_t file_mode;
72 mode_t dir_mode; 70 mode_t dir_mode;
71 unsigned secFlg;
73 unsigned rw:1; 72 unsigned rw:1;
74 unsigned retry:1; 73 unsigned retry:1;
75 unsigned intr:1; 74 unsigned intr:1;
@@ -83,12 +82,7 @@ struct smb_vol {
83 unsigned remap:1; /* set to remap seven reserved chars in filenames */ 82 unsigned remap:1; /* set to remap seven reserved chars in filenames */
84 unsigned posix_paths:1; /* unset to not ask for posix pathnames. */ 83 unsigned posix_paths:1; /* unset to not ask for posix pathnames. */
85 unsigned sfu_emul:1; 84 unsigned sfu_emul:1;
86 unsigned krb5:1;
87 unsigned ntlm:1;
88 unsigned ntlmv2:1;
89 unsigned nullauth:1; /* attempt to authenticate with null user */ 85 unsigned nullauth:1; /* attempt to authenticate with null user */
90 unsigned sign:1;
91 unsigned seal:1; /* encrypt */
92 unsigned nocase; /* request case insensitive filenames */ 86 unsigned nocase; /* request case insensitive filenames */
93 unsigned nobrl; /* disable sending byte range locks to srv */ 87 unsigned nobrl; /* disable sending byte range locks to srv */
94 unsigned int rsize; 88 unsigned int rsize;
@@ -369,21 +363,21 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
369 continue; 363 continue;
370 if (bigbuf == NULL) { 364 if (bigbuf == NULL) {
371 bigbuf = cifs_buf_get(); 365 bigbuf = cifs_buf_get();
372 if(bigbuf == NULL) { 366 if (!bigbuf) {
373 cERROR(1,("No memory for large SMB response")); 367 cERROR(1, ("No memory for large SMB response"));
374 msleep(3000); 368 msleep(3000);
375 /* retry will check if exiting */ 369 /* retry will check if exiting */
376 continue; 370 continue;
377 } 371 }
378 } else if(isLargeBuf) { 372 } else if (isLargeBuf) {
379 /* we are reusing a dirtry large buf, clear its start */ 373 /* we are reusing a dirty large buf, clear its start */
380 memset(bigbuf, 0, sizeof (struct smb_hdr)); 374 memset(bigbuf, 0, sizeof (struct smb_hdr));
381 } 375 }
382 376
383 if (smallbuf == NULL) { 377 if (smallbuf == NULL) {
384 smallbuf = cifs_small_buf_get(); 378 smallbuf = cifs_small_buf_get();
385 if(smallbuf == NULL) { 379 if (!smallbuf) {
386 cERROR(1,("No memory for SMB response")); 380 cERROR(1, ("No memory for SMB response"));
387 msleep(1000); 381 msleep(1000);
388 /* retry will check if exiting */ 382 /* retry will check if exiting */
389 continue; 383 continue;
@@ -403,12 +397,12 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
403 kernel_recvmsg(csocket, &smb_msg, 397 kernel_recvmsg(csocket, &smb_msg,
404 &iov, 1, 4, 0 /* BB see socket.h flags */); 398 &iov, 1, 4, 0 /* BB see socket.h flags */);
405 399
406 if(server->tcpStatus == CifsExiting) { 400 if (server->tcpStatus == CifsExiting) {
407 break; 401 break;
408 } else if (server->tcpStatus == CifsNeedReconnect) { 402 } else if (server->tcpStatus == CifsNeedReconnect) {
409 cFYI(1,("Reconnect after server stopped responding")); 403 cFYI(1, ("Reconnect after server stopped responding"));
410 cifs_reconnect(server); 404 cifs_reconnect(server);
411 cFYI(1,("call to reconnect done")); 405 cFYI(1, ("call to reconnect done"));
412 csocket = server->ssocket; 406 csocket = server->ssocket;
413 continue; 407 continue;
414 } else if ((length == -ERESTARTSYS) || (length == -EAGAIN)) { 408 } else if ((length == -ERESTARTSYS) || (length == -EAGAIN)) {
@@ -417,15 +411,15 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
417 tcpStatus CifsNeedReconnect if server hung */ 411 tcpStatus CifsNeedReconnect if server hung */
418 continue; 412 continue;
419 } else if (length <= 0) { 413 } else if (length <= 0) {
420 if(server->tcpStatus == CifsNew) { 414 if (server->tcpStatus == CifsNew) {
421 cFYI(1,("tcp session abend after SMBnegprot")); 415 cFYI(1, ("tcp session abend after SMBnegprot"));
422 /* some servers kill the TCP session rather than 416 /* some servers kill the TCP session rather than
423 returning an SMB negprot error, in which 417 returning an SMB negprot error, in which
424 case reconnecting here is not going to help, 418 case reconnecting here is not going to help,
425 and so simply return error to mount */ 419 and so simply return error to mount */
426 break; 420 break;
427 } 421 }
428 if(length == -EINTR) { 422 if (!try_to_freeze() && (length == -EINTR)) {
429 cFYI(1,("cifsd thread killed")); 423 cFYI(1,("cifsd thread killed"));
430 break; 424 break;
431 } 425 }
@@ -585,9 +579,11 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
585 /* merge response - fix up 1st*/ 579 /* merge response - fix up 1st*/
586 if(coalesce_t2(smb_buffer, 580 if(coalesce_t2(smb_buffer,
587 mid_entry->resp_buf)) { 581 mid_entry->resp_buf)) {
582 mid_entry->multiRsp = 1;
588 break; 583 break;
589 } else { 584 } else {
590 /* all parts received */ 585 /* all parts received */
586 mid_entry->multiEnd = 1;
591 goto multi_t2_fnd; 587 goto multi_t2_fnd;
592 } 588 }
593 } else { 589 } else {
@@ -632,9 +628,14 @@ multi_t2_fnd:
632 wake_up_process(task_to_wake); 628 wake_up_process(task_to_wake);
633 } else if ((is_valid_oplock_break(smb_buffer, server) == FALSE) 629 } else if ((is_valid_oplock_break(smb_buffer, server) == FALSE)
634 && (isMultiRsp == FALSE)) { 630 && (isMultiRsp == FALSE)) {
635 cERROR(1, ("No task to wake, unknown frame rcvd!")); 631 cERROR(1, ("No task to wake, unknown frame rcvd! NumMids %d", midCount.counter));
636 cifs_dump_mem("Received Data is: ",(char *)smb_buffer, 632 cifs_dump_mem("Received Data is: ",(char *)smb_buffer,
637 sizeof(struct smb_hdr)); 633 sizeof(struct smb_hdr));
634#ifdef CONFIG_CIFS_DEBUG2
635 cifs_dump_detail(smb_buffer);
636 cifs_dump_mids(server);
637#endif /* CIFS_DEBUG2 */
638
638 } 639 }
639 } /* end while !EXITING */ 640 } /* end while !EXITING */
640 641
@@ -784,7 +785,6 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
784 785
785 /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */ 786 /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */
786 vol->rw = TRUE; 787 vol->rw = TRUE;
787 vol->ntlm = TRUE;
788 /* default is always to request posix paths. */ 788 /* default is always to request posix paths. */
789 vol->posix_paths = 1; 789 vol->posix_paths = 1;
790 790
@@ -915,30 +915,35 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
915 cERROR(1,("no security value specified")); 915 cERROR(1,("no security value specified"));
916 continue; 916 continue;
917 } else if (strnicmp(value, "krb5i", 5) == 0) { 917 } else if (strnicmp(value, "krb5i", 5) == 0) {
918 vol->sign = 1; 918 vol->secFlg |= CIFSSEC_MAY_KRB5 |
919 vol->krb5 = 1; 919 CIFSSEC_MUST_SIGN;
920 } else if (strnicmp(value, "krb5p", 5) == 0) { 920 } else if (strnicmp(value, "krb5p", 5) == 0) {
921 /* vol->seal = 1; 921 /* vol->secFlg |= CIFSSEC_MUST_SEAL |
922 vol->krb5 = 1; */ 922 CIFSSEC_MAY_KRB5; */
923 cERROR(1,("Krb5 cifs privacy not supported")); 923 cERROR(1,("Krb5 cifs privacy not supported"));
924 return 1; 924 return 1;
925 } else if (strnicmp(value, "krb5", 4) == 0) { 925 } else if (strnicmp(value, "krb5", 4) == 0) {
926 vol->krb5 = 1; 926 vol->secFlg |= CIFSSEC_MAY_KRB5;
927 } else if (strnicmp(value, "ntlmv2i", 7) == 0) { 927 } else if (strnicmp(value, "ntlmv2i", 7) == 0) {
928 vol->ntlmv2 = 1; 928 vol->secFlg |= CIFSSEC_MAY_NTLMV2 |
929 vol->sign = 1; 929 CIFSSEC_MUST_SIGN;
930 } else if (strnicmp(value, "ntlmv2", 6) == 0) { 930 } else if (strnicmp(value, "ntlmv2", 6) == 0) {
931 vol->ntlmv2 = 1; 931 vol->secFlg |= CIFSSEC_MAY_NTLMV2;
932 } else if (strnicmp(value, "ntlmi", 5) == 0) { 932 } else if (strnicmp(value, "ntlmi", 5) == 0) {
933 vol->ntlm = 1; 933 vol->secFlg |= CIFSSEC_MAY_NTLM |
934 vol->sign = 1; 934 CIFSSEC_MUST_SIGN;
935 } else if (strnicmp(value, "ntlm", 4) == 0) { 935 } else if (strnicmp(value, "ntlm", 4) == 0) {
936 /* ntlm is default so can be turned off too */ 936 /* ntlm is default so can be turned off too */
937 vol->ntlm = 1; 937 vol->secFlg |= CIFSSEC_MAY_NTLM;
938 } else if (strnicmp(value, "nontlm", 6) == 0) { 938 } else if (strnicmp(value, "nontlm", 6) == 0) {
939 vol->ntlm = 0; 939 /* BB is there a better way to do this? */
940 vol->secFlg |= CIFSSEC_MAY_NTLMV2;
941#ifdef CONFIG_CIFS_WEAK_PW_HASH
942 } else if (strnicmp(value, "lanman", 6) == 0) {
943 vol->secFlg |= CIFSSEC_MAY_LANMAN;
944#endif
940 } else if (strnicmp(value, "none", 4) == 0) { 945 } else if (strnicmp(value, "none", 4) == 0) {
941 vol->nullauth = 1; 946 vol->nullauth = 1;
942 } else { 947 } else {
943 cERROR(1,("bad security option: %s", value)); 948 cERROR(1,("bad security option: %s", value));
944 return 1; 949 return 1;
@@ -976,7 +981,7 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
976 } 981 }
977 /* BB are there cases in which a comma can be valid in 982 /* BB are there cases in which a comma can be valid in
978 a domain name and need special handling? */ 983 a domain name and need special handling? */
979 if (strnlen(value, 65) < 65) { 984 if (strnlen(value, 256) < 256) {
980 vol->domainname = value; 985 vol->domainname = value;
981 cFYI(1, ("Domain name set")); 986 cFYI(1, ("Domain name set"));
982 } else { 987 } else {
@@ -1168,6 +1173,10 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
1168 vol->no_psx_acl = 0; 1173 vol->no_psx_acl = 0;
1169 } else if (strnicmp(data, "noacl",5) == 0) { 1174 } else if (strnicmp(data, "noacl",5) == 0) {
1170 vol->no_psx_acl = 1; 1175 vol->no_psx_acl = 1;
1176 } else if (strnicmp(data, "sign",4) == 0) {
1177 vol->secFlg |= CIFSSEC_MUST_SIGN;
1178/* } else if (strnicmp(data, "seal",4) == 0) {
1179 vol->secFlg |= CIFSSEC_MUST_SEAL; */
1171 } else if (strnicmp(data, "direct",6) == 0) { 1180 } else if (strnicmp(data, "direct",6) == 0) {
1172 vol->direct_io = 1; 1181 vol->direct_io = 1;
1173 } else if (strnicmp(data, "forcedirectio",13) == 0) { 1182 } else if (strnicmp(data, "forcedirectio",13) == 0) {
@@ -1762,11 +1771,18 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1762 if (volume_info.username) 1771 if (volume_info.username)
1763 strncpy(pSesInfo->userName, 1772 strncpy(pSesInfo->userName,
1764 volume_info.username,MAX_USERNAME_SIZE); 1773 volume_info.username,MAX_USERNAME_SIZE);
1765 if (volume_info.domainname) 1774 if (volume_info.domainname) {
1766 strncpy(pSesInfo->domainName, 1775 int len = strlen(volume_info.domainname);
1767 volume_info.domainname,MAX_USERNAME_SIZE); 1776 pSesInfo->domainName =
1777 kmalloc(len + 1, GFP_KERNEL);
1778 if(pSesInfo->domainName)
1779 strcpy(pSesInfo->domainName,
1780 volume_info.domainname);
1781 }
1768 pSesInfo->linux_uid = volume_info.linux_uid; 1782 pSesInfo->linux_uid = volume_info.linux_uid;
1783 pSesInfo->overrideSecFlg = volume_info.secFlg;
1769 down(&pSesInfo->sesSem); 1784 down(&pSesInfo->sesSem);
1785 /* BB FIXME need to pass vol->secFlgs BB */
1770 rc = cifs_setup_session(xid,pSesInfo, cifs_sb->local_nls); 1786 rc = cifs_setup_session(xid,pSesInfo, cifs_sb->local_nls);
1771 up(&pSesInfo->sesSem); 1787 up(&pSesInfo->sesSem);
1772 if(!rc) 1788 if(!rc)
@@ -1980,7 +1996,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1980 1996
1981static int 1997static int
1982CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, 1998CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
1983 char session_key[CIFS_SESSION_KEY_SIZE], 1999 char session_key[CIFS_SESS_KEY_SIZE],
1984 const struct nls_table *nls_codepage) 2000 const struct nls_table *nls_codepage)
1985{ 2001{
1986 struct smb_hdr *smb_buffer; 2002 struct smb_hdr *smb_buffer;
@@ -2038,15 +2054,15 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2038 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); 2054 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
2039 2055
2040 pSMB->req_no_secext.CaseInsensitivePasswordLength = 2056 pSMB->req_no_secext.CaseInsensitivePasswordLength =
2041 cpu_to_le16(CIFS_SESSION_KEY_SIZE); 2057 cpu_to_le16(CIFS_SESS_KEY_SIZE);
2042 2058
2043 pSMB->req_no_secext.CaseSensitivePasswordLength = 2059 pSMB->req_no_secext.CaseSensitivePasswordLength =
2044 cpu_to_le16(CIFS_SESSION_KEY_SIZE); 2060 cpu_to_le16(CIFS_SESS_KEY_SIZE);
2045 bcc_ptr = pByteArea(smb_buffer); 2061 bcc_ptr = pByteArea(smb_buffer);
2046 memcpy(bcc_ptr, (char *) session_key, CIFS_SESSION_KEY_SIZE); 2062 memcpy(bcc_ptr, (char *) session_key, CIFS_SESS_KEY_SIZE);
2047 bcc_ptr += CIFS_SESSION_KEY_SIZE; 2063 bcc_ptr += CIFS_SESS_KEY_SIZE;
2048 memcpy(bcc_ptr, (char *) session_key, CIFS_SESSION_KEY_SIZE); 2064 memcpy(bcc_ptr, (char *) session_key, CIFS_SESS_KEY_SIZE);
2049 bcc_ptr += CIFS_SESSION_KEY_SIZE; 2065 bcc_ptr += CIFS_SESS_KEY_SIZE;
2050 2066
2051 if (ses->capabilities & CAP_UNICODE) { 2067 if (ses->capabilities & CAP_UNICODE) {
2052 if ((long) bcc_ptr % 2) { /* must be word aligned for Unicode */ 2068 if ((long) bcc_ptr % 2) { /* must be word aligned for Unicode */
@@ -2054,7 +2070,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2054 bcc_ptr++; 2070 bcc_ptr++;
2055 } 2071 }
2056 if(user == NULL) 2072 if(user == NULL)
2057 bytes_returned = 0; /* skill null user */ 2073 bytes_returned = 0; /* skip null user */
2058 else 2074 else
2059 bytes_returned = 2075 bytes_returned =
2060 cifs_strtoUCS((__le16 *) bcc_ptr, user, 100, 2076 cifs_strtoUCS((__le16 *) bcc_ptr, user, 100,
@@ -2162,8 +2178,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2162 if (remaining_words > 0) { 2178 if (remaining_words > 0) {
2163 len = UniStrnlen((wchar_t *)bcc_ptr, 2179 len = UniStrnlen((wchar_t *)bcc_ptr,
2164 remaining_words-1); 2180 remaining_words-1);
2165 if(ses->serverNOS) 2181 kfree(ses->serverNOS);
2166 kfree(ses->serverNOS);
2167 ses->serverNOS = kzalloc(2 * (len + 1),GFP_KERNEL); 2182 ses->serverNOS = kzalloc(2 * (len + 1),GFP_KERNEL);
2168 if(ses->serverNOS == NULL) 2183 if(ses->serverNOS == NULL)
2169 goto sesssetup_nomem; 2184 goto sesssetup_nomem;
@@ -2203,12 +2218,10 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2203 /* if these kcallocs fail not much we 2218 /* if these kcallocs fail not much we
2204 can do, but better to not fail the 2219 can do, but better to not fail the
2205 sesssetup itself */ 2220 sesssetup itself */
2206 if(ses->serverDomain) 2221 kfree(ses->serverDomain);
2207 kfree(ses->serverDomain);
2208 ses->serverDomain = 2222 ses->serverDomain =
2209 kzalloc(2, GFP_KERNEL); 2223 kzalloc(2, GFP_KERNEL);
2210 if(ses->serverNOS) 2224 kfree(ses->serverNOS);
2211 kfree(ses->serverNOS);
2212 ses->serverNOS = 2225 ses->serverNOS =
2213 kzalloc(2, GFP_KERNEL); 2226 kzalloc(2, GFP_KERNEL);
2214 } 2227 }
@@ -2217,8 +2230,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2217 if (((long) bcc_ptr + len) - (long) 2230 if (((long) bcc_ptr + len) - (long)
2218 pByteArea(smb_buffer_response) 2231 pByteArea(smb_buffer_response)
2219 <= BCC(smb_buffer_response)) { 2232 <= BCC(smb_buffer_response)) {
2220 if(ses->serverOS) 2233 kfree(ses->serverOS);
2221 kfree(ses->serverOS);
2222 ses->serverOS = kzalloc(len + 1,GFP_KERNEL); 2234 ses->serverOS = kzalloc(len + 1,GFP_KERNEL);
2223 if(ses->serverOS == NULL) 2235 if(ses->serverOS == NULL)
2224 goto sesssetup_nomem; 2236 goto sesssetup_nomem;
@@ -2229,8 +2241,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2229 bcc_ptr++; 2241 bcc_ptr++;
2230 2242
2231 len = strnlen(bcc_ptr, 1024); 2243 len = strnlen(bcc_ptr, 1024);
2232 if(ses->serverNOS) 2244 kfree(ses->serverNOS);
2233 kfree(ses->serverNOS);
2234 ses->serverNOS = kzalloc(len + 1,GFP_KERNEL); 2245 ses->serverNOS = kzalloc(len + 1,GFP_KERNEL);
2235 if(ses->serverNOS == NULL) 2246 if(ses->serverNOS == NULL)
2236 goto sesssetup_nomem; 2247 goto sesssetup_nomem;
@@ -2274,292 +2285,6 @@ sesssetup_nomem: /* do not return an error on nomem for the info strings,
2274} 2285}
2275 2286
2276static int 2287static int
2277CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2278 char *SecurityBlob,int SecurityBlobLength,
2279 const struct nls_table *nls_codepage)
2280{
2281 struct smb_hdr *smb_buffer;
2282 struct smb_hdr *smb_buffer_response;
2283 SESSION_SETUP_ANDX *pSMB;
2284 SESSION_SETUP_ANDX *pSMBr;
2285 char *bcc_ptr;
2286 char *user;
2287 char *domain;
2288 int rc = 0;
2289 int remaining_words = 0;
2290 int bytes_returned = 0;
2291 int len;
2292 __u32 capabilities;
2293 __u16 count;
2294
2295 cFYI(1, ("In spnego sesssetup "));
2296 if(ses == NULL)
2297 return -EINVAL;
2298 user = ses->userName;
2299 domain = ses->domainName;
2300
2301 smb_buffer = cifs_buf_get();
2302 if (smb_buffer == NULL) {
2303 return -ENOMEM;
2304 }
2305 smb_buffer_response = smb_buffer;
2306 pSMBr = pSMB = (SESSION_SETUP_ANDX *) smb_buffer;
2307
2308 /* send SMBsessionSetup here */
2309 header_assemble(smb_buffer, SMB_COM_SESSION_SETUP_ANDX,
2310 NULL /* no tCon exists yet */ , 12 /* wct */ );
2311
2312 smb_buffer->Mid = GetNextMid(ses->server);
2313 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
2314 pSMB->req.AndXCommand = 0xFF;
2315 if(ses->server->maxBuf > 64*1024)
2316 ses->server->maxBuf = (64*1023);
2317 pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
2318 pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
2319
2320 if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
2321 smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
2322
2323 capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
2324 CAP_EXTENDED_SECURITY;
2325 if (ses->capabilities & CAP_UNICODE) {
2326 smb_buffer->Flags2 |= SMBFLG2_UNICODE;
2327 capabilities |= CAP_UNICODE;
2328 }
2329 if (ses->capabilities & CAP_STATUS32) {
2330 smb_buffer->Flags2 |= SMBFLG2_ERR_STATUS;
2331 capabilities |= CAP_STATUS32;
2332 }
2333 if (ses->capabilities & CAP_DFS) {
2334 smb_buffer->Flags2 |= SMBFLG2_DFS;
2335 capabilities |= CAP_DFS;
2336 }
2337 pSMB->req.Capabilities = cpu_to_le32(capabilities);
2338
2339 pSMB->req.SecurityBlobLength = cpu_to_le16(SecurityBlobLength);
2340 bcc_ptr = pByteArea(smb_buffer);
2341 memcpy(bcc_ptr, SecurityBlob, SecurityBlobLength);
2342 bcc_ptr += SecurityBlobLength;
2343
2344 if (ses->capabilities & CAP_UNICODE) {
2345 if ((long) bcc_ptr % 2) { /* must be word aligned for Unicode strings */
2346 *bcc_ptr = 0;
2347 bcc_ptr++;
2348 }
2349 bytes_returned =
2350 cifs_strtoUCS((__le16 *) bcc_ptr, user, 100, nls_codepage);
2351 bcc_ptr += 2 * bytes_returned; /* convert num of 16 bit words to bytes */
2352 bcc_ptr += 2; /* trailing null */
2353 if (domain == NULL)
2354 bytes_returned =
2355 cifs_strtoUCS((__le16 *) bcc_ptr,
2356 "CIFS_LINUX_DOM", 32, nls_codepage);
2357 else
2358 bytes_returned =
2359 cifs_strtoUCS((__le16 *) bcc_ptr, domain, 64,
2360 nls_codepage);
2361 bcc_ptr += 2 * bytes_returned;
2362 bcc_ptr += 2;
2363 bytes_returned =
2364 cifs_strtoUCS((__le16 *) bcc_ptr, "Linux version ",
2365 32, nls_codepage);
2366 bcc_ptr += 2 * bytes_returned;
2367 bytes_returned =
2368 cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release, 32,
2369 nls_codepage);
2370 bcc_ptr += 2 * bytes_returned;
2371 bcc_ptr += 2;
2372 bytes_returned =
2373 cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS,
2374 64, nls_codepage);
2375 bcc_ptr += 2 * bytes_returned;
2376 bcc_ptr += 2;
2377 } else {
2378 strncpy(bcc_ptr, user, 200);
2379 bcc_ptr += strnlen(user, 200);
2380 *bcc_ptr = 0;
2381 bcc_ptr++;
2382 if (domain == NULL) {
2383 strcpy(bcc_ptr, "CIFS_LINUX_DOM");
2384 bcc_ptr += strlen("CIFS_LINUX_DOM") + 1;
2385 } else {
2386 strncpy(bcc_ptr, domain, 64);
2387 bcc_ptr += strnlen(domain, 64);
2388 *bcc_ptr = 0;
2389 bcc_ptr++;
2390 }
2391 strcpy(bcc_ptr, "Linux version ");
2392 bcc_ptr += strlen("Linux version ");
2393 strcpy(bcc_ptr, system_utsname.release);
2394 bcc_ptr += strlen(system_utsname.release) + 1;
2395 strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
2396 bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
2397 }
2398 count = (long) bcc_ptr - (long) pByteArea(smb_buffer);
2399 smb_buffer->smb_buf_length += count;
2400 pSMB->req.ByteCount = cpu_to_le16(count);
2401
2402 rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response,
2403 &bytes_returned, 1);
2404 if (rc) {
2405/* rc = map_smb_to_linux_error(smb_buffer_response); *//* done in SendReceive now */
2406 } else if ((smb_buffer_response->WordCount == 3)
2407 || (smb_buffer_response->WordCount == 4)) {
2408 __u16 action = le16_to_cpu(pSMBr->resp.Action);
2409 __u16 blob_len =
2410 le16_to_cpu(pSMBr->resp.SecurityBlobLength);
2411 if (action & GUEST_LOGIN)
2412 cFYI(1, (" Guest login")); /* BB do we want to set anything in SesInfo struct ? */
2413 if (ses) {
2414 ses->Suid = smb_buffer_response->Uid; /* UID left in wire format (le) */
2415 cFYI(1, ("UID = %d ", ses->Suid));
2416 bcc_ptr = pByteArea(smb_buffer_response); /* response can have either 3 or 4 word count - Samba sends 3 */
2417
2418 /* BB Fix below to make endian neutral !! */
2419
2420 if ((pSMBr->resp.hdr.WordCount == 3)
2421 || ((pSMBr->resp.hdr.WordCount == 4)
2422 && (blob_len <
2423 pSMBr->resp.ByteCount))) {
2424 if (pSMBr->resp.hdr.WordCount == 4) {
2425 bcc_ptr +=
2426 blob_len;
2427 cFYI(1,
2428 ("Security Blob Length %d ",
2429 blob_len));
2430 }
2431
2432 if (smb_buffer->Flags2 & SMBFLG2_UNICODE) {
2433 if ((long) (bcc_ptr) % 2) {
2434 remaining_words =
2435 (BCC(smb_buffer_response)
2436 - 1) / 2;
2437 bcc_ptr++; /* Unicode strings must be word aligned */
2438 } else {
2439 remaining_words =
2440 BCC
2441 (smb_buffer_response) / 2;
2442 }
2443 len =
2444 UniStrnlen((wchar_t *) bcc_ptr,
2445 remaining_words - 1);
2446/* We look for obvious messed up bcc or strings in response so we do not go off
2447 the end since (at least) WIN2K and Windows XP have a major bug in not null
2448 terminating last Unicode string in response */
2449 if(ses->serverOS)
2450 kfree(ses->serverOS);
2451 ses->serverOS =
2452 kzalloc(2 * (len + 1), GFP_KERNEL);
2453 cifs_strfromUCS_le(ses->serverOS,
2454 (__le16 *)
2455 bcc_ptr, len,
2456 nls_codepage);
2457 bcc_ptr += 2 * (len + 1);
2458 remaining_words -= len + 1;
2459 ses->serverOS[2 * len] = 0;
2460 ses->serverOS[1 + (2 * len)] = 0;
2461 if (remaining_words > 0) {
2462 len = UniStrnlen((wchar_t *)bcc_ptr,
2463 remaining_words
2464 - 1);
2465 if(ses->serverNOS)
2466 kfree(ses->serverNOS);
2467 ses->serverNOS =
2468 kzalloc(2 * (len + 1),
2469 GFP_KERNEL);
2470 cifs_strfromUCS_le(ses->serverNOS,
2471 (__le16 *)bcc_ptr,
2472 len,
2473 nls_codepage);
2474 bcc_ptr += 2 * (len + 1);
2475 ses->serverNOS[2 * len] = 0;
2476 ses->serverNOS[1 + (2 * len)] = 0;
2477 remaining_words -= len + 1;
2478 if (remaining_words > 0) {
2479 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
2480 /* last string not null terminated (e.g.Windows XP/2000) */
2481 if(ses->serverDomain)
2482 kfree(ses->serverDomain);
2483 ses->serverDomain = kzalloc(2*(len+1),GFP_KERNEL);
2484 cifs_strfromUCS_le(ses->serverDomain,
2485 (__le16 *)bcc_ptr,
2486 len, nls_codepage);
2487 bcc_ptr += 2*(len+1);
2488 ses->serverDomain[2*len] = 0;
2489 ses->serverDomain[1+(2*len)] = 0;
2490 } /* else no more room so create dummy domain string */
2491 else {
2492 if(ses->serverDomain)
2493 kfree(ses->serverDomain);
2494 ses->serverDomain =
2495 kzalloc(2,GFP_KERNEL);
2496 }
2497 } else {/* no room use dummy domain&NOS */
2498 if(ses->serverDomain)
2499 kfree(ses->serverDomain);
2500 ses->serverDomain = kzalloc(2, GFP_KERNEL);
2501 if(ses->serverNOS)
2502 kfree(ses->serverNOS);
2503 ses->serverNOS = kzalloc(2, GFP_KERNEL);
2504 }
2505 } else { /* ASCII */
2506
2507 len = strnlen(bcc_ptr, 1024);
2508 if (((long) bcc_ptr + len) - (long)
2509 pByteArea(smb_buffer_response)
2510 <= BCC(smb_buffer_response)) {
2511 if(ses->serverOS)
2512 kfree(ses->serverOS);
2513 ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
2514 strncpy(ses->serverOS, bcc_ptr, len);
2515
2516 bcc_ptr += len;
2517 bcc_ptr[0] = 0; /* null terminate the string */
2518 bcc_ptr++;
2519
2520 len = strnlen(bcc_ptr, 1024);
2521 if(ses->serverNOS)
2522 kfree(ses->serverNOS);
2523 ses->serverNOS = kzalloc(len + 1,GFP_KERNEL);
2524 strncpy(ses->serverNOS, bcc_ptr, len);
2525 bcc_ptr += len;
2526 bcc_ptr[0] = 0;
2527 bcc_ptr++;
2528
2529 len = strnlen(bcc_ptr, 1024);
2530 if(ses->serverDomain)
2531 kfree(ses->serverDomain);
2532 ses->serverDomain = kzalloc(len + 1, GFP_KERNEL);
2533 strncpy(ses->serverDomain, bcc_ptr, len);
2534 bcc_ptr += len;
2535 bcc_ptr[0] = 0;
2536 bcc_ptr++;
2537 } else
2538 cFYI(1,
2539 ("Variable field of length %d extends beyond end of smb ",
2540 len));
2541 }
2542 } else {
2543 cERROR(1,
2544 (" Security Blob Length extends beyond end of SMB"));
2545 }
2546 } else {
2547 cERROR(1, ("No session structure passed in."));
2548 }
2549 } else {
2550 cERROR(1,
2551 (" Invalid Word count %d: ",
2552 smb_buffer_response->WordCount));
2553 rc = -EIO;
2554 }
2555
2556 if (smb_buffer)
2557 cifs_buf_release(smb_buffer);
2558
2559 return rc;
2560}
2561
2562static int
2563CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, 2288CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2564 struct cifsSesInfo *ses, int * pNTLMv2_flag, 2289 struct cifsSesInfo *ses, int * pNTLMv2_flag,
2565 const struct nls_table *nls_codepage) 2290 const struct nls_table *nls_codepage)
@@ -2635,8 +2360,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2635 /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN | */ NTLMSSP_NEGOTIATE_128; 2360 /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN | */ NTLMSSP_NEGOTIATE_128;
2636 if(sign_CIFS_PDUs) 2361 if(sign_CIFS_PDUs)
2637 negotiate_flags |= NTLMSSP_NEGOTIATE_SIGN; 2362 negotiate_flags |= NTLMSSP_NEGOTIATE_SIGN;
2638 if(ntlmv2_support) 2363/* if(ntlmv2_support)
2639 negotiate_flags |= NTLMSSP_NEGOTIATE_NTLMV2; 2364 negotiate_flags |= NTLMSSP_NEGOTIATE_NTLMV2;*/
2640 /* setup pointers to domain name and workstation name */ 2365 /* setup pointers to domain name and workstation name */
2641 bcc_ptr += SecurityBlobLength; 2366 bcc_ptr += SecurityBlobLength;
2642 2367
@@ -2783,8 +2508,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2783 bcc_ptr, 2508 bcc_ptr,
2784 remaining_words 2509 remaining_words
2785 - 1); 2510 - 1);
2786 if(ses->serverNOS) 2511 kfree(ses->serverNOS);
2787 kfree(ses->serverNOS);
2788 ses->serverNOS = 2512 ses->serverNOS =
2789 kzalloc(2 * (len + 1), 2513 kzalloc(2 * (len + 1),
2790 GFP_KERNEL); 2514 GFP_KERNEL);
@@ -2802,8 +2526,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2802 if (remaining_words > 0) { 2526 if (remaining_words > 0) {
2803 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); 2527 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
2804 /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ 2528 /* last string is not always null terminated (for e.g. for Windows XP & 2000) */
2805 if(ses->serverDomain) 2529 kfree(ses->serverDomain);
2806 kfree(ses->serverDomain);
2807 ses->serverDomain = 2530 ses->serverDomain =
2808 kzalloc(2 * 2531 kzalloc(2 *
2809 (len + 2532 (len +
@@ -2822,19 +2545,16 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2822 = 0; 2545 = 0;
2823 } /* else no more room so create dummy domain string */ 2546 } /* else no more room so create dummy domain string */
2824 else { 2547 else {
2825 if(ses->serverDomain) 2548 kfree(ses->serverDomain);
2826 kfree(ses->serverDomain);
2827 ses->serverDomain = 2549 ses->serverDomain =
2828 kzalloc(2, 2550 kzalloc(2,
2829 GFP_KERNEL); 2551 GFP_KERNEL);
2830 } 2552 }
2831 } else { /* no room so create dummy domain and NOS string */ 2553 } else { /* no room so create dummy domain and NOS string */
2832 if(ses->serverDomain); 2554 kfree(ses->serverDomain);
2833 kfree(ses->serverDomain);
2834 ses->serverDomain = 2555 ses->serverDomain =
2835 kzalloc(2, GFP_KERNEL); 2556 kzalloc(2, GFP_KERNEL);
2836 if(ses->serverNOS) 2557 kfree(ses->serverNOS);
2837 kfree(ses->serverNOS);
2838 ses->serverNOS = 2558 ses->serverNOS =
2839 kzalloc(2, GFP_KERNEL); 2559 kzalloc(2, GFP_KERNEL);
2840 } 2560 }
@@ -2856,8 +2576,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2856 bcc_ptr++; 2576 bcc_ptr++;
2857 2577
2858 len = strnlen(bcc_ptr, 1024); 2578 len = strnlen(bcc_ptr, 1024);
2859 if(ses->serverNOS) 2579 kfree(ses->serverNOS);
2860 kfree(ses->serverNOS);
2861 ses->serverNOS = 2580 ses->serverNOS =
2862 kzalloc(len + 1, 2581 kzalloc(len + 1,
2863 GFP_KERNEL); 2582 GFP_KERNEL);
@@ -2867,8 +2586,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2867 bcc_ptr++; 2586 bcc_ptr++;
2868 2587
2869 len = strnlen(bcc_ptr, 1024); 2588 len = strnlen(bcc_ptr, 1024);
2870 if(ses->serverDomain) 2589 kfree(ses->serverDomain);
2871 kfree(ses->serverDomain);
2872 ses->serverDomain = 2590 ses->serverDomain =
2873 kzalloc(len + 1, 2591 kzalloc(len + 1,
2874 GFP_KERNEL); 2592 GFP_KERNEL);
@@ -2994,14 +2712,14 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2994 SecurityBlob->LmChallengeResponse.Buffer = 0; 2712 SecurityBlob->LmChallengeResponse.Buffer = 0;
2995 2713
2996 SecurityBlob->NtChallengeResponse.Length = 2714 SecurityBlob->NtChallengeResponse.Length =
2997 cpu_to_le16(CIFS_SESSION_KEY_SIZE); 2715 cpu_to_le16(CIFS_SESS_KEY_SIZE);
2998 SecurityBlob->NtChallengeResponse.MaximumLength = 2716 SecurityBlob->NtChallengeResponse.MaximumLength =
2999 cpu_to_le16(CIFS_SESSION_KEY_SIZE); 2717 cpu_to_le16(CIFS_SESS_KEY_SIZE);
3000 memcpy(bcc_ptr, ntlm_session_key, CIFS_SESSION_KEY_SIZE); 2718 memcpy(bcc_ptr, ntlm_session_key, CIFS_SESS_KEY_SIZE);
3001 SecurityBlob->NtChallengeResponse.Buffer = 2719 SecurityBlob->NtChallengeResponse.Buffer =
3002 cpu_to_le32(SecurityBlobLength); 2720 cpu_to_le32(SecurityBlobLength);
3003 SecurityBlobLength += CIFS_SESSION_KEY_SIZE; 2721 SecurityBlobLength += CIFS_SESS_KEY_SIZE;
3004 bcc_ptr += CIFS_SESSION_KEY_SIZE; 2722 bcc_ptr += CIFS_SESS_KEY_SIZE;
3005 2723
3006 if (ses->capabilities & CAP_UNICODE) { 2724 if (ses->capabilities & CAP_UNICODE) {
3007 if (domain == NULL) { 2725 if (domain == NULL) {
@@ -3190,8 +2908,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3190 bcc_ptr, 2908 bcc_ptr,
3191 remaining_words 2909 remaining_words
3192 - 1); 2910 - 1);
3193 if(ses->serverNOS) 2911 kfree(ses->serverNOS);
3194 kfree(ses->serverNOS);
3195 ses->serverNOS = 2912 ses->serverNOS =
3196 kzalloc(2 * (len + 1), 2913 kzalloc(2 * (len + 1),
3197 GFP_KERNEL); 2914 GFP_KERNEL);
@@ -3244,8 +2961,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3244 if(ses->serverDomain) 2961 if(ses->serverDomain)
3245 kfree(ses->serverDomain); 2962 kfree(ses->serverDomain);
3246 ses->serverDomain = kzalloc(2, GFP_KERNEL); 2963 ses->serverDomain = kzalloc(2, GFP_KERNEL);
3247 if(ses->serverNOS) 2964 kfree(ses->serverNOS);
3248 kfree(ses->serverNOS);
3249 ses->serverNOS = kzalloc(2, GFP_KERNEL); 2965 ses->serverNOS = kzalloc(2, GFP_KERNEL);
3250 } 2966 }
3251 } else { /* ASCII */ 2967 } else { /* ASCII */
@@ -3263,8 +2979,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3263 bcc_ptr++; 2979 bcc_ptr++;
3264 2980
3265 len = strnlen(bcc_ptr, 1024); 2981 len = strnlen(bcc_ptr, 1024);
3266 if(ses->serverNOS) 2982 kfree(ses->serverNOS);
3267 kfree(ses->serverNOS);
3268 ses->serverNOS = kzalloc(len+1,GFP_KERNEL); 2983 ses->serverNOS = kzalloc(len+1,GFP_KERNEL);
3269 strncpy(ses->serverNOS, bcc_ptr, len); 2984 strncpy(ses->serverNOS, bcc_ptr, len);
3270 bcc_ptr += len; 2985 bcc_ptr += len;
@@ -3340,22 +3055,33 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3340 bcc_ptr = &pSMB->Password[0]; 3055 bcc_ptr = &pSMB->Password[0];
3341 if((ses->server->secMode) & SECMODE_USER) { 3056 if((ses->server->secMode) & SECMODE_USER) {
3342 pSMB->PasswordLength = cpu_to_le16(1); /* minimum */ 3057 pSMB->PasswordLength = cpu_to_le16(1); /* minimum */
3058 *bcc_ptr = 0; /* password is null byte */
3343 bcc_ptr++; /* skip password */ 3059 bcc_ptr++; /* skip password */
3060 /* already aligned so no need to do it below */
3344 } else { 3061 } else {
3345 pSMB->PasswordLength = cpu_to_le16(CIFS_SESSION_KEY_SIZE); 3062 pSMB->PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE);
3346 /* BB FIXME add code to fail this if NTLMv2 or Kerberos 3063 /* BB FIXME add code to fail this if NTLMv2 or Kerberos
3347 specified as required (when that support is added to 3064 specified as required (when that support is added to
3348 the vfs in the future) as only NTLM or the much 3065 the vfs in the future) as only NTLM or the much
3349 weaker LANMAN (which we do not send) is accepted 3066 weaker LANMAN (which we do not send by default) is accepted
3350 by Samba (not sure whether other servers allow 3067 by Samba (not sure whether other servers allow
3351 NTLMv2 password here) */ 3068 NTLMv2 password here) */
3069#ifdef CONFIG_CIFS_WEAK_PW_HASH
3070 if((extended_security & CIFSSEC_MAY_LANMAN) &&
3071 (ses->server->secType == LANMAN))
3072 calc_lanman_hash(ses, bcc_ptr);
3073 else
3074#endif /* CIFS_WEAK_PW_HASH */
3352 SMBNTencrypt(ses->password, 3075 SMBNTencrypt(ses->password,
3353 ses->server->cryptKey, 3076 ses->server->cryptKey,
3354 bcc_ptr); 3077 bcc_ptr);
3355 3078
3356 bcc_ptr += CIFS_SESSION_KEY_SIZE; 3079 bcc_ptr += CIFS_SESS_KEY_SIZE;
3357 *bcc_ptr = 0; 3080 if(ses->capabilities & CAP_UNICODE) {
3358 bcc_ptr++; /* align */ 3081 /* must align unicode strings */
3082 *bcc_ptr = 0; /* null byte password */
3083 bcc_ptr++;
3084 }
3359 } 3085 }
3360 3086
3361 if(ses->server->secMode & 3087 if(ses->server->secMode &
@@ -3429,7 +3155,10 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3429 } 3155 }
3430 /* else do not bother copying these informational fields */ 3156 /* else do not bother copying these informational fields */
3431 } 3157 }
3432 tcon->Flags = le16_to_cpu(pSMBr->OptionalSupport); 3158 if(smb_buffer_response->WordCount == 3)
3159 tcon->Flags = le16_to_cpu(pSMBr->OptionalSupport);
3160 else
3161 tcon->Flags = 0;
3433 cFYI(1, ("Tcon flags: 0x%x ", tcon->Flags)); 3162 cFYI(1, ("Tcon flags: 0x%x ", tcon->Flags));
3434 } else if ((rc == 0) && tcon == NULL) { 3163 } else if ((rc == 0) && tcon == NULL) {
3435 /* all we need to save for IPC$ connection */ 3164 /* all we need to save for IPC$ connection */
@@ -3494,7 +3223,7 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3494 struct nls_table * nls_info) 3223 struct nls_table * nls_info)
3495{ 3224{
3496 int rc = 0; 3225 int rc = 0;
3497 char ntlm_session_key[CIFS_SESSION_KEY_SIZE]; 3226 char ntlm_session_key[CIFS_SESS_KEY_SIZE];
3498 int ntlmv2_flag = FALSE; 3227 int ntlmv2_flag = FALSE;
3499 int first_time = 0; 3228 int first_time = 0;
3500 3229
@@ -3526,20 +3255,13 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3526 pSesInfo->server->secMode, 3255 pSesInfo->server->secMode,
3527 pSesInfo->server->capabilities, 3256 pSesInfo->server->capabilities,
3528 pSesInfo->server->timeZone)); 3257 pSesInfo->server->timeZone));
3529#ifdef CONFIG_CIFS_EXPERIMENTAL 3258 if(experimEnabled < 2)
3530 if(experimEnabled > 1) 3259 rc = CIFS_SessSetup(xid, pSesInfo,
3531 rc = CIFS_SessSetup(xid, pSesInfo, CIFS_NTLM /* type */, 3260 first_time, nls_info);
3532 &ntlmv2_flag, nls_info); 3261 else if (extended_security
3533 else
3534#endif
3535 if (extended_security
3536 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) 3262 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
3537 && (pSesInfo->server->secType == NTLMSSP)) { 3263 && (pSesInfo->server->secType == NTLMSSP)) {
3538 cFYI(1, ("New style sesssetup")); 3264 rc = -EOPNOTSUPP;
3539 rc = CIFSSpnegoSessSetup(xid, pSesInfo,
3540 NULL /* security blob */,
3541 0 /* blob length */,
3542 nls_info);
3543 } else if (extended_security 3265 } else if (extended_security
3544 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) 3266 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
3545 && (pSesInfo->server->secType == RawNTLMSSP)) { 3267 && (pSesInfo->server->secType == RawNTLMSSP)) {
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 82315edc77d7..ba4cbe9b0684 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -113,7 +113,7 @@ cifs_bp_rename_retry:
113 full_path[namelen+2] = 0; 113 full_path[namelen+2] = 0;
114BB remove above eight lines BB */ 114BB remove above eight lines BB */
115 115
116/* Inode operations in similar order to how they appear in the Linux file fs.h */ 116/* Inode operations in similar order to how they appear in Linux file fs.h */
117 117
118int 118int
119cifs_create(struct inode *inode, struct dentry *direntry, int mode, 119cifs_create(struct inode *inode, struct dentry *direntry, int mode,
@@ -178,11 +178,14 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
178 FreeXid(xid); 178 FreeXid(xid);
179 return -ENOMEM; 179 return -ENOMEM;
180 } 180 }
181 181 if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
182 rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, 182 rc = CIFSSMBOpen(xid, pTcon, full_path, disposition,
183 desiredAccess, CREATE_NOT_DIR, 183 desiredAccess, CREATE_NOT_DIR,
184 &fileHandle, &oplock, buf, cifs_sb->local_nls, 184 &fileHandle, &oplock, buf, cifs_sb->local_nls,
185 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 185 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
186 else
187 rc = -EIO; /* no NT SMB support fall into legacy open below */
188
186 if(rc == -EIO) { 189 if(rc == -EIO) {
187 /* old server, retry the open legacy style */ 190 /* old server, retry the open legacy style */
188 rc = SMBLegacyOpen(xid, pTcon, full_path, disposition, 191 rc = SMBLegacyOpen(xid, pTcon, full_path, disposition,
@@ -191,7 +194,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
191 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 194 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
192 } 195 }
193 if (rc) { 196 if (rc) {
194 cFYI(1, ("cifs_create returned 0x%x ", rc)); 197 cFYI(1, ("cifs_create returned 0x%x", rc));
195 } else { 198 } else {
196 /* If Open reported that we actually created a file 199 /* If Open reported that we actually created a file
197 then we now have to set the mode if possible */ 200 then we now have to set the mode if possible */
@@ -369,6 +372,10 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
369 cifs_sb->mnt_cifs_flags & 372 cifs_sb->mnt_cifs_flags &
370 CIFS_MOUNT_MAP_SPECIAL_CHR); 373 CIFS_MOUNT_MAP_SPECIAL_CHR);
371 374
375 /* BB FIXME - add handling for backlevel servers
376 which need legacy open and check for all
377 calls to SMBOpen for fallback to
378 SMBLeagcyOpen */
372 if(!rc) { 379 if(!rc) {
373 /* BB Do not bother to decode buf since no 380 /* BB Do not bother to decode buf since no
374 local inode yet to put timestamps in, 381 local inode yet to put timestamps in,
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
index 633a93811328..d91a3d44e9e3 100644
--- a/fs/cifs/fcntl.c
+++ b/fs/cifs/fcntl.c
@@ -91,14 +91,14 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
91 if(full_path == NULL) { 91 if(full_path == NULL) {
92 rc = -ENOMEM; 92 rc = -ENOMEM;
93 } else { 93 } else {
94 cERROR(1,("cifs dir notify on file %s with arg 0x%lx",full_path,arg)); /* BB removeme BB */ 94 cFYI(1,("dir notify on file %s Arg 0x%lx",full_path,arg));
95 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, 95 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
96 GENERIC_READ | SYNCHRONIZE, 0 /* create options */, 96 GENERIC_READ | SYNCHRONIZE, 0 /* create options */,
97 &netfid, &oplock,NULL, cifs_sb->local_nls, 97 &netfid, &oplock,NULL, cifs_sb->local_nls,
98 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 98 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
99 /* BB fixme - add this handle to a notify handle list */ 99 /* BB fixme - add this handle to a notify handle list */
100 if(rc) { 100 if(rc) {
101 cERROR(1,("Could not open directory for notify")); /* BB remove BB */ 101 cFYI(1,("Could not open directory for notify"));
102 } else { 102 } else {
103 filter = convert_to_cifs_notify_flags(arg); 103 filter = convert_to_cifs_notify_flags(arg);
104 if(filter != 0) { 104 if(filter != 0) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e2b4ce1dad66..944d2b9e092d 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -110,7 +110,6 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
110 &pCifsInode->openFileList); 110 &pCifsInode->openFileList);
111 } 111 }
112 write_unlock(&GlobalSMBSeslock); 112 write_unlock(&GlobalSMBSeslock);
113 write_unlock(&file->f_owner.lock);
114 if (pCifsInode->clientCanCacheRead) { 113 if (pCifsInode->clientCanCacheRead) {
115 /* we have the inode open somewhere else 114 /* we have the inode open somewhere else
116 no need to discard cache data */ 115 no need to discard cache data */
@@ -201,7 +200,7 @@ int cifs_open(struct inode *inode, struct file *file)
201 } else { 200 } else {
202 if (file->f_flags & O_EXCL) 201 if (file->f_flags & O_EXCL)
203 cERROR(1, ("could not find file instance for " 202 cERROR(1, ("could not find file instance for "
204 "new file %p ", file)); 203 "new file %p", file));
205 } 204 }
206 } 205 }
207 206
@@ -260,10 +259,15 @@ int cifs_open(struct inode *inode, struct file *file)
260 rc = -ENOMEM; 259 rc = -ENOMEM;
261 goto out; 260 goto out;
262 } 261 }
263 rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, desiredAccess, 262
264 CREATE_NOT_DIR, &netfid, &oplock, buf, 263 if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
264 rc = CIFSSMBOpen(xid, pTcon, full_path, disposition,
265 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
265 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags 266 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
266 & CIFS_MOUNT_MAP_SPECIAL_CHR); 267 & CIFS_MOUNT_MAP_SPECIAL_CHR);
268 else
269 rc = -EIO; /* no NT SMB support fall into legacy open below */
270
267 if (rc == -EIO) { 271 if (rc == -EIO) {
268 /* Old server, try legacy style OpenX */ 272 /* Old server, try legacy style OpenX */
269 rc = SMBLegacyOpen(xid, pTcon, full_path, disposition, 273 rc = SMBLegacyOpen(xid, pTcon, full_path, disposition,
@@ -272,7 +276,7 @@ int cifs_open(struct inode *inode, struct file *file)
272 & CIFS_MOUNT_MAP_SPECIAL_CHR); 276 & CIFS_MOUNT_MAP_SPECIAL_CHR);
273 } 277 }
274 if (rc) { 278 if (rc) {
275 cFYI(1, ("cifs_open returned 0x%x ", rc)); 279 cFYI(1, ("cifs_open returned 0x%x", rc));
276 goto out; 280 goto out;
277 } 281 }
278 file->private_data = 282 file->private_data =
@@ -282,7 +286,6 @@ int cifs_open(struct inode *inode, struct file *file)
282 goto out; 286 goto out;
283 } 287 }
284 pCifsFile = cifs_init_private(file->private_data, inode, file, netfid); 288 pCifsFile = cifs_init_private(file->private_data, inode, file, netfid);
285 write_lock(&file->f_owner.lock);
286 write_lock(&GlobalSMBSeslock); 289 write_lock(&GlobalSMBSeslock);
287 list_add(&pCifsFile->tlist, &pTcon->openFileList); 290 list_add(&pCifsFile->tlist, &pTcon->openFileList);
288 291
@@ -293,7 +296,6 @@ int cifs_open(struct inode *inode, struct file *file)
293 &oplock, buf, full_path, xid); 296 &oplock, buf, full_path, xid);
294 } else { 297 } else {
295 write_unlock(&GlobalSMBSeslock); 298 write_unlock(&GlobalSMBSeslock);
296 write_unlock(&file->f_owner.lock);
297 } 299 }
298 300
299 if (oplock & CIFS_CREATE_ACTION) { 301 if (oplock & CIFS_CREATE_ACTION) {
@@ -322,7 +324,7 @@ out:
322 return rc; 324 return rc;
323} 325}
324 326
325/* Try to reaquire byte range locks that were released when session */ 327/* Try to reacquire byte range locks that were released when session */
326/* to server was lost */ 328/* to server was lost */
327static int cifs_relock_file(struct cifsFileInfo *cifsFile) 329static int cifs_relock_file(struct cifsFileInfo *cifsFile)
328{ 330{
@@ -409,8 +411,8 @@ static int cifs_reopen_file(struct inode *inode, struct file *file,
409 CIFS_MOUNT_MAP_SPECIAL_CHR); 411 CIFS_MOUNT_MAP_SPECIAL_CHR);
410 if (rc) { 412 if (rc) {
411 up(&pCifsFile->fh_sem); 413 up(&pCifsFile->fh_sem);
412 cFYI(1, ("cifs_open returned 0x%x ", rc)); 414 cFYI(1, ("cifs_open returned 0x%x", rc));
413 cFYI(1, ("oplock: %d ", oplock)); 415 cFYI(1, ("oplock: %d", oplock));
414 } else { 416 } else {
415 pCifsFile->netfid = netfid; 417 pCifsFile->netfid = netfid;
416 pCifsFile->invalidHandle = FALSE; 418 pCifsFile->invalidHandle = FALSE;
@@ -472,7 +474,6 @@ int cifs_close(struct inode *inode, struct file *file)
472 pTcon = cifs_sb->tcon; 474 pTcon = cifs_sb->tcon;
473 if (pSMBFile) { 475 if (pSMBFile) {
474 pSMBFile->closePend = TRUE; 476 pSMBFile->closePend = TRUE;
475 write_lock(&file->f_owner.lock);
476 if (pTcon) { 477 if (pTcon) {
477 /* no sense reconnecting to close a file that is 478 /* no sense reconnecting to close a file that is
478 already closed */ 479 already closed */
@@ -487,23 +488,18 @@ int cifs_close(struct inode *inode, struct file *file)
487 the struct would be in each open file, 488 the struct would be in each open file,
488 but this should give enough time to 489 but this should give enough time to
489 clear the socket */ 490 clear the socket */
490 write_unlock(&file->f_owner.lock);
491 cERROR(1,("close with pending writes")); 491 cERROR(1,("close with pending writes"));
492 msleep(timeout); 492 msleep(timeout);
493 write_lock(&file->f_owner.lock);
494 timeout *= 4; 493 timeout *= 4;
495 } 494 }
496 write_unlock(&file->f_owner.lock);
497 rc = CIFSSMBClose(xid, pTcon, 495 rc = CIFSSMBClose(xid, pTcon,
498 pSMBFile->netfid); 496 pSMBFile->netfid);
499 write_lock(&file->f_owner.lock);
500 } 497 }
501 } 498 }
502 write_lock(&GlobalSMBSeslock); 499 write_lock(&GlobalSMBSeslock);
503 list_del(&pSMBFile->flist); 500 list_del(&pSMBFile->flist);
504 list_del(&pSMBFile->tlist); 501 list_del(&pSMBFile->tlist);
505 write_unlock(&GlobalSMBSeslock); 502 write_unlock(&GlobalSMBSeslock);
506 write_unlock(&file->f_owner.lock);
507 kfree(pSMBFile->search_resume_name); 503 kfree(pSMBFile->search_resume_name);
508 kfree(file->private_data); 504 kfree(file->private_data);
509 file->private_data = NULL; 505 file->private_data = NULL;
@@ -531,7 +527,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
531 (struct cifsFileInfo *)file->private_data; 527 (struct cifsFileInfo *)file->private_data;
532 char *ptmp; 528 char *ptmp;
533 529
534 cFYI(1, ("Closedir inode = 0x%p with ", inode)); 530 cFYI(1, ("Closedir inode = 0x%p", inode));
535 531
536 xid = GetXid(); 532 xid = GetXid();
537 533
@@ -605,7 +601,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
605 } 601 }
606 if (pfLock->fl_flags & FL_ACCESS) 602 if (pfLock->fl_flags & FL_ACCESS)
607 cFYI(1, ("Process suspended by mandatory locking - " 603 cFYI(1, ("Process suspended by mandatory locking - "
608 "not implemented yet ")); 604 "not implemented yet"));
609 if (pfLock->fl_flags & FL_LEASE) 605 if (pfLock->fl_flags & FL_LEASE)
610 cFYI(1, ("Lease on file - not implemented yet")); 606 cFYI(1, ("Lease on file - not implemented yet"));
611 if (pfLock->fl_flags & 607 if (pfLock->fl_flags &
@@ -1079,9 +1075,9 @@ static int cifs_writepages(struct address_space *mapping,
1079 unsigned int bytes_written; 1075 unsigned int bytes_written;
1080 struct cifs_sb_info *cifs_sb; 1076 struct cifs_sb_info *cifs_sb;
1081 int done = 0; 1077 int done = 0;
1082 pgoff_t end = -1; 1078 pgoff_t end;
1083 pgoff_t index; 1079 pgoff_t index;
1084 int is_range = 0; 1080 int range_whole = 0;
1085 struct kvec iov[32]; 1081 struct kvec iov[32];
1086 int len; 1082 int len;
1087 int n_iov = 0; 1083 int n_iov = 0;
@@ -1122,16 +1118,14 @@ static int cifs_writepages(struct address_space *mapping,
1122 xid = GetXid(); 1118 xid = GetXid();
1123 1119
1124 pagevec_init(&pvec, 0); 1120 pagevec_init(&pvec, 0);
1125 if (wbc->sync_mode == WB_SYNC_NONE) 1121 if (wbc->range_cyclic) {
1126 index = mapping->writeback_index; /* Start from prev offset */ 1122 index = mapping->writeback_index; /* Start from prev offset */
1127 else { 1123 end = -1;
1128 index = 0; 1124 } else {
1129 scanned = 1; 1125 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1130 } 1126 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1131 if (wbc->start || wbc->end) { 1127 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1132 index = wbc->start >> PAGE_CACHE_SHIFT; 1128 range_whole = 1;
1133 end = wbc->end >> PAGE_CACHE_SHIFT;
1134 is_range = 1;
1135 scanned = 1; 1129 scanned = 1;
1136 } 1130 }
1137retry: 1131retry:
@@ -1167,7 +1161,7 @@ retry:
1167 break; 1161 break;
1168 } 1162 }
1169 1163
1170 if (unlikely(is_range) && (page->index > end)) { 1164 if (!wbc->range_cyclic && page->index > end) {
1171 done = 1; 1165 done = 1;
1172 unlock_page(page); 1166 unlock_page(page);
1173 break; 1167 break;
@@ -1271,7 +1265,7 @@ retry:
1271 index = 0; 1265 index = 0;
1272 goto retry; 1266 goto retry;
1273 } 1267 }
1274 if (!is_range) 1268 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1275 mapping->writeback_index = index; 1269 mapping->writeback_index = index;
1276 1270
1277 FreeXid(xid); 1271 FreeXid(xid);
@@ -1377,7 +1371,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
1377 1371
1378 xid = GetXid(); 1372 xid = GetXid();
1379 1373
1380 cFYI(1, ("Sync file - name: %s datasync: 0x%x ", 1374 cFYI(1, ("Sync file - name: %s datasync: 0x%x",
1381 dentry->d_name.name, datasync)); 1375 dentry->d_name.name, datasync));
1382 1376
1383 rc = filemap_fdatawrite(inode->i_mapping); 1377 rc = filemap_fdatawrite(inode->i_mapping);
@@ -1406,7 +1400,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
1406/* fill in rpages then 1400/* fill in rpages then
1407 result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */ 1401 result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
1408 1402
1409/* cFYI(1, ("rpages is %d for sync page of Index %ld ", rpages, index)); 1403/* cFYI(1, ("rpages is %d for sync page of Index %ld", rpages, index));
1410 1404
1411#if 0 1405#if 0
1412 if (rc < 0) 1406 if (rc < 0)
@@ -1419,7 +1413,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
1419 * As file closes, flush all cached write data for this inode checking 1413 * As file closes, flush all cached write data for this inode checking
1420 * for write behind errors. 1414 * for write behind errors.
1421 */ 1415 */
1422int cifs_flush(struct file *file) 1416int cifs_flush(struct file *file, fl_owner_t id)
1423{ 1417{
1424 struct inode * inode = file->f_dentry->d_inode; 1418 struct inode * inode = file->f_dentry->d_inode;
1425 int rc = 0; 1419 int rc = 0;
@@ -1838,7 +1832,7 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
1838 if (rc < 0) 1832 if (rc < 0)
1839 goto io_error; 1833 goto io_error;
1840 else 1834 else
1841 cFYI(1, ("Bytes read %d ",rc)); 1835 cFYI(1, ("Bytes read %d",rc));
1842 1836
1843 file->f_dentry->d_inode->i_atime = 1837 file->f_dentry->d_inode->i_atime =
1844 current_fs_time(file->f_dentry->d_inode->i_sb); 1838 current_fs_time(file->f_dentry->d_inode->i_sb);
@@ -1948,7 +1942,7 @@ static int cifs_prepare_write(struct file *file, struct page *page,
1948 return 0; 1942 return 0;
1949} 1943}
1950 1944
1951struct address_space_operations cifs_addr_ops = { 1945const struct address_space_operations cifs_addr_ops = {
1952 .readpage = cifs_readpage, 1946 .readpage = cifs_readpage,
1953 .readpages = cifs_readpages, 1947 .readpages = cifs_readpages,
1954 .writepage = cifs_writepage, 1948 .writepage = cifs_writepage,
@@ -1959,3 +1953,19 @@ struct address_space_operations cifs_addr_ops = {
1959 /* .sync_page = cifs_sync_page, */ 1953 /* .sync_page = cifs_sync_page, */
1960 /* .direct_IO = */ 1954 /* .direct_IO = */
1961}; 1955};
1956
1957/*
1958 * cifs_readpages requires the server to support a buffer large enough to
1959 * contain the header plus one complete page of data. Otherwise, we need
1960 * to leave cifs_readpages out of the address space operations.
1961 */
1962const struct address_space_operations cifs_addr_ops_smallbuf = {
1963 .readpage = cifs_readpage,
1964 .writepage = cifs_writepage,
1965 .writepages = cifs_writepages,
1966 .prepare_write = cifs_prepare_write,
1967 .commit_write = cifs_commit_write,
1968 .set_page_dirty = __set_page_dirty_nobuffers,
1969 /* .sync_page = cifs_sync_page, */
1970 /* .direct_IO = */
1971};
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 4093764ef461..b88147c1dc27 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -41,7 +41,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
41 char *tmp_path; 41 char *tmp_path;
42 42
43 pTcon = cifs_sb->tcon; 43 pTcon = cifs_sb->tcon;
44 cFYI(1, ("Getting info on %s ", search_path)); 44 cFYI(1, ("Getting info on %s", search_path));
45 /* could have done a find first instead but this returns more info */ 45 /* could have done a find first instead but this returns more info */
46 rc = CIFSSMBUnixQPathInfo(xid, pTcon, search_path, &findData, 46 rc = CIFSSMBUnixQPathInfo(xid, pTcon, search_path, &findData,
47 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & 47 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
@@ -97,9 +97,9 @@ int cifs_get_inode_info_unix(struct inode **pinode,
97 inode = *pinode; 97 inode = *pinode;
98 cifsInfo = CIFS_I(inode); 98 cifsInfo = CIFS_I(inode);
99 99
100 cFYI(1, ("Old time %ld ", cifsInfo->time)); 100 cFYI(1, ("Old time %ld", cifsInfo->time));
101 cifsInfo->time = jiffies; 101 cifsInfo->time = jiffies;
102 cFYI(1, ("New time %ld ", cifsInfo->time)); 102 cFYI(1, ("New time %ld", cifsInfo->time));
103 /* this is ok to set on every inode revalidate */ 103 /* this is ok to set on every inode revalidate */
104 atomic_set(&cifsInfo->inUse,1); 104 atomic_set(&cifsInfo->inUse,1);
105 105
@@ -180,11 +180,12 @@ int cifs_get_inode_info_unix(struct inode **pinode,
180 else /* not direct, send byte range locks */ 180 else /* not direct, send byte range locks */
181 inode->i_fop = &cifs_file_ops; 181 inode->i_fop = &cifs_file_ops;
182 182
183 inode->i_data.a_ops = &cifs_addr_ops;
184 /* check if server can support readpages */ 183 /* check if server can support readpages */
185 if(pTcon->ses->server->maxBuf < 184 if(pTcon->ses->server->maxBuf <
186 4096 + MAX_CIFS_HDR_SIZE) 185 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)
187 inode->i_data.a_ops->readpages = NULL; 186 inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
187 else
188 inode->i_data.a_ops = &cifs_addr_ops;
188 } else if (S_ISDIR(inode->i_mode)) { 189 } else if (S_ISDIR(inode->i_mode)) {
189 cFYI(1, ("Directory inode")); 190 cFYI(1, ("Directory inode"));
190 inode->i_op = &cifs_dir_inode_ops; 191 inode->i_op = &cifs_dir_inode_ops;
@@ -421,23 +422,23 @@ int cifs_get_inode_info(struct inode **pinode,
421 inode = *pinode; 422 inode = *pinode;
422 cifsInfo = CIFS_I(inode); 423 cifsInfo = CIFS_I(inode);
423 cifsInfo->cifsAttrs = attr; 424 cifsInfo->cifsAttrs = attr;
424 cFYI(1, ("Old time %ld ", cifsInfo->time)); 425 cFYI(1, ("Old time %ld", cifsInfo->time));
425 cifsInfo->time = jiffies; 426 cifsInfo->time = jiffies;
426 cFYI(1, ("New time %ld ", cifsInfo->time)); 427 cFYI(1, ("New time %ld", cifsInfo->time));
427 428
428 /* blksize needs to be multiple of two. So safer to default to 429 /* blksize needs to be multiple of two. So safer to default to
429 blksize and blkbits set in superblock so 2**blkbits and blksize 430 blksize and blkbits set in superblock so 2**blkbits and blksize
430 will match rather than setting to: 431 will match rather than setting to:
431 (pTcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & 0xFFFFFE00;*/ 432 (pTcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & 0xFFFFFE00;*/
432 433
433 /* Linux can not store file creation time unfortunately so we ignore it */ 434 /* Linux can not store file creation time so ignore it */
434 inode->i_atime = 435 inode->i_atime =
435 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime)); 436 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime));
436 inode->i_mtime = 437 inode->i_mtime =
437 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime)); 438 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime));
438 inode->i_ctime = 439 inode->i_ctime =
439 cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime)); 440 cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime));
440 cFYI(0, ("Attributes came in as 0x%x ", attr)); 441 cFYI(0, ("Attributes came in as 0x%x", attr));
441 442
442 /* set default mode. will override for dirs below */ 443 /* set default mode. will override for dirs below */
443 if (atomic_read(&cifsInfo->inUse) == 0) 444 if (atomic_read(&cifsInfo->inUse) == 0)
@@ -519,10 +520,11 @@ int cifs_get_inode_info(struct inode **pinode,
519 else /* not direct, send byte range locks */ 520 else /* not direct, send byte range locks */
520 inode->i_fop = &cifs_file_ops; 521 inode->i_fop = &cifs_file_ops;
521 522
522 inode->i_data.a_ops = &cifs_addr_ops;
523 if(pTcon->ses->server->maxBuf < 523 if(pTcon->ses->server->maxBuf <
524 4096 + MAX_CIFS_HDR_SIZE) 524 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)
525 inode->i_data.a_ops->readpages = NULL; 525 inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
526 else
527 inode->i_data.a_ops = &cifs_addr_ops;
526 } else if (S_ISDIR(inode->i_mode)) { 528 } else if (S_ISDIR(inode->i_mode)) {
527 cFYI(1, ("Directory inode")); 529 cFYI(1, ("Directory inode"));
528 inode->i_op = &cifs_dir_inode_ops; 530 inode->i_op = &cifs_dir_inode_ops;
@@ -731,7 +733,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
731 rc = CIFSSMBMkDir(xid, pTcon, full_path, cifs_sb->local_nls, 733 rc = CIFSSMBMkDir(xid, pTcon, full_path, cifs_sb->local_nls,
732 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 734 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
733 if (rc) { 735 if (rc) {
734 cFYI(1, ("cifs_mkdir returned 0x%x ", rc)); 736 cFYI(1, ("cifs_mkdir returned 0x%x", rc));
735 d_drop(direntry); 737 d_drop(direntry);
736 } else { 738 } else {
737 inode->i_nlink++; 739 inode->i_nlink++;
@@ -798,7 +800,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
798 char *full_path = NULL; 800 char *full_path = NULL;
799 struct cifsInodeInfo *cifsInode; 801 struct cifsInodeInfo *cifsInode;
800 802
801 cFYI(1, ("cifs_rmdir, inode = 0x%p with ", inode)); 803 cFYI(1, ("cifs_rmdir, inode = 0x%p", inode));
802 804
803 xid = GetXid(); 805 xid = GetXid();
804 806
@@ -1121,7 +1123,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1121 1123
1122 xid = GetXid(); 1124 xid = GetXid();
1123 1125
1124 cFYI(1, ("In cifs_setattr, name = %s attrs->iavalid 0x%x ", 1126 cFYI(1, ("setattr on file %s attrs->iavalid 0x%x",
1125 direntry->d_name.name, attrs->ia_valid)); 1127 direntry->d_name.name, attrs->ia_valid));
1126 1128
1127 cifs_sb = CIFS_SB(direntry->d_inode->i_sb); 1129 cifs_sb = CIFS_SB(direntry->d_inode->i_sb);
@@ -1157,6 +1159,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1157 when the local oplock break takes longer to flush 1159 when the local oplock break takes longer to flush
1158 writebehind data than the SMB timeout for the SetPathInfo 1160 writebehind data than the SMB timeout for the SetPathInfo
1159 request would allow */ 1161 request would allow */
1162
1160 open_file = find_writable_file(cifsInode); 1163 open_file = find_writable_file(cifsInode);
1161 if (open_file) { 1164 if (open_file) {
1162 __u16 nfid = open_file->netfid; 1165 __u16 nfid = open_file->netfid;
@@ -1289,7 +1292,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1289 it may be useful to Windows - but we do 1292 it may be useful to Windows - but we do
1290 not want to set ctime unless some other 1293 not want to set ctime unless some other
1291 timestamp is changing */ 1294 timestamp is changing */
1292 cFYI(1, ("CIFS - CTIME changed ")); 1295 cFYI(1, ("CIFS - CTIME changed"));
1293 time_buf.ChangeTime = 1296 time_buf.ChangeTime =
1294 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime)); 1297 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
1295 } else 1298 } else
@@ -1356,7 +1359,7 @@ cifs_setattr_exit:
1356 1359
1357void cifs_delete_inode(struct inode *inode) 1360void cifs_delete_inode(struct inode *inode)
1358{ 1361{
1359 cFYI(1, ("In cifs_delete_inode, inode = 0x%p ", inode)); 1362 cFYI(1, ("In cifs_delete_inode, inode = 0x%p", inode));
1360 /* may have to add back in if and when safe distributed caching of 1363 /* may have to add back in if and when safe distributed caching of
1361 directories added e.g. via FindNotify */ 1364 directories added e.g. via FindNotify */
1362} 1365}
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 2ec99f833142..a57f5d6e6213 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -167,7 +167,7 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
167 return -ENOMEM; 167 return -ENOMEM;
168 } 168 }
169 169
170 cFYI(1, ("Full path: %s ", full_path)); 170 cFYI(1, ("Full path: %s", full_path));
171 cFYI(1, ("symname is %s", symname)); 171 cFYI(1, ("symname is %s", symname));
172 172
173 /* BB what if DFS and this volume is on different share? BB */ 173 /* BB what if DFS and this volume is on different share? BB */
@@ -186,8 +186,7 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
186 inode->i_sb,xid); 186 inode->i_sb,xid);
187 187
188 if (rc != 0) { 188 if (rc != 0) {
189 cFYI(1, 189 cFYI(1, ("Create symlink ok, getinodeinfo fail rc = %d",
190 ("Create symlink worked but get_inode_info failed with rc = %d ",
191 rc)); 190 rc));
192 } else { 191 } else {
193 if (pTcon->nocase) 192 if (pTcon->nocase)
@@ -289,7 +288,7 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen)
289 else { 288 else {
290 cFYI(1,("num referral: %d",num_referrals)); 289 cFYI(1,("num referral: %d",num_referrals));
291 if(referrals) { 290 if(referrals) {
292 cFYI(1,("referral string: %s ",referrals)); 291 cFYI(1,("referral string: %s",referrals));
293 strncpy(tmpbuffer, referrals, len-1); 292 strncpy(tmpbuffer, referrals, len-1);
294 } 293 }
295 } 294 }
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index fafd056426e4..22c937e5884f 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -101,6 +101,7 @@ sesInfoFree(struct cifsSesInfo *buf_to_free)
101 kfree(buf_to_free->serverDomain); 101 kfree(buf_to_free->serverDomain);
102 kfree(buf_to_free->serverNOS); 102 kfree(buf_to_free->serverNOS);
103 kfree(buf_to_free->password); 103 kfree(buf_to_free->password);
104 kfree(buf_to_free->domainName);
104 kfree(buf_to_free); 105 kfree(buf_to_free);
105} 106}
106 107
@@ -499,11 +500,12 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
499 if(pSMBr->ByteCount > sizeof(struct file_notify_information)) { 500 if(pSMBr->ByteCount > sizeof(struct file_notify_information)) {
500 data_offset = le32_to_cpu(pSMBr->DataOffset); 501 data_offset = le32_to_cpu(pSMBr->DataOffset);
501 502
502 pnotify = (struct file_notify_information *)((char *)&pSMBr->hdr.Protocol 503 pnotify = (struct file_notify_information *)
503 + data_offset); 504 ((char *)&pSMBr->hdr.Protocol + data_offset);
504 cFYI(1,("dnotify on %s with action: 0x%x",pnotify->FileName, 505 cFYI(1,("dnotify on %s Action: 0x%x",pnotify->FileName,
505 pnotify->Action)); /* BB removeme BB */ 506 pnotify->Action)); /* BB removeme BB */
506 /* cifs_dump_mem("Received notify Data is: ",buf,sizeof(struct smb_hdr)+60); */ 507 /* cifs_dump_mem("Rcvd notify Data: ",buf,
508 sizeof(struct smb_hdr)+60); */
507 return TRUE; 509 return TRUE;
508 } 510 }
509 if(pSMBr->hdr.Status.CifsError) { 511 if(pSMBr->hdr.Status.CifsError) {
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 5de74d216fdd..b66eff5dc624 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -84,11 +84,11 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = {
84 84
85static const struct smb_to_posix_error mapping_table_ERRSRV[] = { 85static const struct smb_to_posix_error mapping_table_ERRSRV[] = {
86 {ERRerror, -EIO}, 86 {ERRerror, -EIO},
87 {ERRbadpw, -EPERM}, 87 {ERRbadpw, -EACCES}, /* was EPERM */
88 {ERRbadtype, -EREMOTE}, 88 {ERRbadtype, -EREMOTE},
89 {ERRaccess, -EACCES}, 89 {ERRaccess, -EACCES},
90 {ERRinvtid, -ENXIO}, 90 {ERRinvtid, -ENXIO},
91 {ERRinvnetname, -ENODEV}, 91 {ERRinvnetname, -ENXIO},
92 {ERRinvdevice, -ENXIO}, 92 {ERRinvdevice, -ENXIO},
93 {ERRqfull, -ENOSPC}, 93 {ERRqfull, -ENOSPC},
94 {ERRqtoobig, -ENOSPC}, 94 {ERRqtoobig, -ENOSPC},
diff --git a/fs/cifs/ntlmssp.c b/fs/cifs/ntlmssp.c
deleted file mode 100644
index 115359cc7a32..000000000000
--- a/fs/cifs/ntlmssp.c
+++ /dev/null
@@ -1,143 +0,0 @@
1/*
2 * fs/cifs/ntlmssp.h
3 *
4 * Copyright (c) International Business Machines Corp., 2006
5 * Author(s): Steve French (sfrench@us.ibm.com)
6 *
7 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published
9 * by the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
15 * the GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#include "cifspdu.h"
23#include "cifsglob.h"
24#include "cifsproto.h"
25#include "cifs_unicode.h"
26#include "cifs_debug.h"
27#include "ntlmssp.h"
28#include "nterr.h"
29
30#ifdef CONFIG_CIFS_EXPERIMENTAL
31static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB)
32{
33 __u32 capabilities = 0;
34
35 /* init fields common to all four types of SessSetup */
36 /* note that header is initialized to zero in header_assemble */
37 pSMB->req.AndXCommand = 0xFF;
38 pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
39 pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
40
41 /* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
42
43 /* BB verify whether signing required on neg or just on auth frame
44 (and NTLM case) */
45
46 capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
47 CAP_LARGE_WRITE_X | CAP_LARGE_READ_X;
48
49 if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
50 pSMB->req.hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
51
52 if (ses->capabilities & CAP_UNICODE) {
53 pSMB->req.hdr.Flags2 |= SMBFLG2_UNICODE;
54 capabilities |= CAP_UNICODE;
55 }
56 if (ses->capabilities & CAP_STATUS32) {
57 pSMB->req.hdr.Flags2 |= SMBFLG2_ERR_STATUS;
58 capabilities |= CAP_STATUS32;
59 }
60 if (ses->capabilities & CAP_DFS) {
61 pSMB->req.hdr.Flags2 |= SMBFLG2_DFS;
62 capabilities |= CAP_DFS;
63 }
64
65 /* BB check whether to init vcnum BB */
66 return capabilities;
67}
68int
69CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, const int type,
70 int * pNTLMv2_flg, const struct nls_table *nls_cp)
71{
72 int rc = 0;
73 int wct;
74 struct smb_hdr *smb_buffer;
75 char *bcc_ptr;
76 SESSION_SETUP_ANDX *pSMB;
77 __u32 capabilities;
78
79 if(ses == NULL)
80 return -EINVAL;
81
82 cFYI(1,("SStp type: %d",type));
83 if(type < CIFS_NTLM) {
84#ifndef CONFIG_CIFS_WEAK_PW_HASH
85 /* LANMAN and plaintext are less secure and off by default.
86 So we make this explicitly be turned on in kconfig (in the
87 build) and turned on at runtime (changed from the default)
88 in proc/fs/cifs or via mount parm. Unfortunately this is
89 needed for old Win (e.g. Win95), some obscure NAS and OS/2 */
90 return -EOPNOTSUPP;
91#endif
92 wct = 10; /* lanman 2 style sessionsetup */
93 } else if(type < CIFS_NTLMSSP_NEG)
94 wct = 13; /* old style NTLM sessionsetup */
95 else /* same size for negotiate or auth, NTLMSSP or extended security */
96 wct = 12;
97
98 rc = small_smb_init_no_tc(SMB_COM_SESSION_SETUP_ANDX, wct, ses,
99 (void **)&smb_buffer);
100 if(rc)
101 return rc;
102
103 pSMB = (SESSION_SETUP_ANDX *)smb_buffer;
104
105 capabilities = cifs_ssetup_hdr(ses, pSMB);
106 bcc_ptr = pByteArea(smb_buffer);
107 if(type > CIFS_NTLM) {
108 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
109 capabilities |= CAP_EXTENDED_SECURITY;
110 pSMB->req.Capabilities = cpu_to_le32(capabilities);
111 /* BB set password lengths */
112 } else if(type < CIFS_NTLM) /* lanman */ {
113 /* no capabilities flags in old lanman negotiation */
114 /* pSMB->old_req.PasswordLength = */ /* BB fixme BB */
115 } else /* type CIFS_NTLM */ {
116 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
117 pSMB->req_no_secext.CaseInsensitivePasswordLength =
118 cpu_to_le16(CIFS_SESSION_KEY_SIZE);
119 pSMB->req_no_secext.CaseSensitivePasswordLength =
120 cpu_to_le16(CIFS_SESSION_KEY_SIZE);
121 }
122
123
124 /* copy session key */
125
126 /* if Unicode, align strings to two byte boundary */
127
128 /* copy user name */ /* BB Do we need to special case null user name? */
129
130 /* copy domain name */
131
132 /* copy Linux version */
133
134 /* copy network operating system name */
135
136 /* update bcc and smb buffer length */
137
138/* rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buf_type, 0); */
139 /* SMB request buf freed in SendReceive2 */
140
141 return rc;
142}
143#endif /* CONFIG_CIFS_EXPERIMENTAL */
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index b689c5035124..03bbcb377913 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -21,6 +21,7 @@
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */ 22 */
23#include <linux/fs.h> 23#include <linux/fs.h>
24#include <linux/pagemap.h>
24#include <linux/stat.h> 25#include <linux/stat.h>
25#include <linux/smp_lock.h> 26#include <linux/smp_lock.h>
26#include "cifspdu.h" 27#include "cifspdu.h"
@@ -31,8 +32,8 @@
31#include "cifs_fs_sb.h" 32#include "cifs_fs_sb.h"
32#include "cifsfs.h" 33#include "cifsfs.h"
33 34
34/* BB fixme - add debug wrappers around this function to disable it fixme BB */ 35#ifdef CONFIG_CIFS_DEBUG2
35/* static void dump_cifs_file_struct(struct file *file, char *label) 36static void dump_cifs_file_struct(struct file *file, char *label)
36{ 37{
37 struct cifsFileInfo * cf; 38 struct cifsFileInfo * cf;
38 39
@@ -53,7 +54,8 @@
53 } 54 }
54 55
55 } 56 }
56} */ 57}
58#endif /* DEBUG2 */
57 59
58/* Returns one if new inode created (which therefore needs to be hashed) */ 60/* Returns one if new inode created (which therefore needs to be hashed) */
59/* Might check in the future if inode number changed so we can rehash inode */ 61/* Might check in the future if inode number changed so we can rehash inode */
@@ -107,32 +109,52 @@ static int construct_dentry(struct qstr *qstring, struct file *file,
107 return rc; 109 return rc;
108} 110}
109 111
110static void fill_in_inode(struct inode *tmp_inode, 112static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
111 FILE_DIRECTORY_INFO *pfindData, int *pobject_type, int isNewInode) 113 char * buf, int *pobject_type, int isNewInode)
112{ 114{
113 loff_t local_size; 115 loff_t local_size;
114 struct timespec local_mtime; 116 struct timespec local_mtime;
115 117
116 struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode); 118 struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode);
117 struct cifs_sb_info *cifs_sb = CIFS_SB(tmp_inode->i_sb); 119 struct cifs_sb_info *cifs_sb = CIFS_SB(tmp_inode->i_sb);
118 __u32 attr = le32_to_cpu(pfindData->ExtFileAttributes); 120 __u32 attr;
119 __u64 allocation_size = le64_to_cpu(pfindData->AllocationSize); 121 __u64 allocation_size;
120 __u64 end_of_file = le64_to_cpu(pfindData->EndOfFile); 122 __u64 end_of_file;
121
122 cifsInfo->cifsAttrs = attr;
123 cifsInfo->time = jiffies;
124 123
125 /* save mtime and size */ 124 /* save mtime and size */
126 local_mtime = tmp_inode->i_mtime; 125 local_mtime = tmp_inode->i_mtime;
127 local_size = tmp_inode->i_size; 126 local_size = tmp_inode->i_size;
128 127
128 if(new_buf_type) {
129 FILE_DIRECTORY_INFO *pfindData = (FILE_DIRECTORY_INFO *)buf;
130
131 attr = le32_to_cpu(pfindData->ExtFileAttributes);
132 allocation_size = le64_to_cpu(pfindData->AllocationSize);
133 end_of_file = le64_to_cpu(pfindData->EndOfFile);
134 tmp_inode->i_atime =
135 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime));
136 tmp_inode->i_mtime =
137 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime));
138 tmp_inode->i_ctime =
139 cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime));
140 } else { /* legacy, OS2 and DOS style */
141 FIND_FILE_STANDARD_INFO * pfindData =
142 (FIND_FILE_STANDARD_INFO *)buf;
143
144 attr = le16_to_cpu(pfindData->Attributes);
145 allocation_size = le32_to_cpu(pfindData->AllocationSize);
146 end_of_file = le32_to_cpu(pfindData->DataSize);
147 tmp_inode->i_atime = CURRENT_TIME;
148 /* tmp_inode->i_mtime = BB FIXME - add dos time handling
149 tmp_inode->i_ctime = 0; BB FIXME */
150
151 }
152
129 /* Linux can not store file creation time unfortunately so ignore it */ 153 /* Linux can not store file creation time unfortunately so ignore it */
130 tmp_inode->i_atime = 154
131 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime)); 155 cifsInfo->cifsAttrs = attr;
132 tmp_inode->i_mtime = 156 cifsInfo->time = jiffies;
133 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime)); 157
134 tmp_inode->i_ctime =
135 cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime));
136 /* treat dos attribute of read-only as read-only mode bit e.g. 555? */ 158 /* treat dos attribute of read-only as read-only mode bit e.g. 555? */
137 /* 2767 perms - indicate mandatory locking */ 159 /* 2767 perms - indicate mandatory locking */
138 /* BB fill in uid and gid here? with help from winbind? 160 /* BB fill in uid and gid here? with help from winbind?
@@ -215,11 +237,13 @@ static void fill_in_inode(struct inode *tmp_inode,
215 else 237 else
216 tmp_inode->i_fop = &cifs_file_ops; 238 tmp_inode->i_fop = &cifs_file_ops;
217 239
218 tmp_inode->i_data.a_ops = &cifs_addr_ops;
219 if((cifs_sb->tcon) && (cifs_sb->tcon->ses) && 240 if((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
220 (cifs_sb->tcon->ses->server->maxBuf < 241 (cifs_sb->tcon->ses->server->maxBuf <
221 4096 + MAX_CIFS_HDR_SIZE)) 242 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE))
222 tmp_inode->i_data.a_ops->readpages = NULL; 243 tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
244 else
245 tmp_inode->i_data.a_ops = &cifs_addr_ops;
246
223 if(isNewInode) 247 if(isNewInode)
224 return; /* No sense invalidating pages for new inode 248 return; /* No sense invalidating pages for new inode
225 since have not started caching readahead file 249 since have not started caching readahead file
@@ -338,11 +362,12 @@ static void unix_fill_in_inode(struct inode *tmp_inode,
338 else 362 else
339 tmp_inode->i_fop = &cifs_file_ops; 363 tmp_inode->i_fop = &cifs_file_ops;
340 364
341 tmp_inode->i_data.a_ops = &cifs_addr_ops;
342 if((cifs_sb->tcon) && (cifs_sb->tcon->ses) && 365 if((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
343 (cifs_sb->tcon->ses->server->maxBuf < 366 (cifs_sb->tcon->ses->server->maxBuf <
344 4096 + MAX_CIFS_HDR_SIZE)) 367 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE))
345 tmp_inode->i_data.a_ops->readpages = NULL; 368 tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
369 else
370 tmp_inode->i_data.a_ops = &cifs_addr_ops;
346 371
347 if(isNewInode) 372 if(isNewInode)
348 return; /* No sense invalidating pages for new inode since we 373 return; /* No sense invalidating pages for new inode since we
@@ -415,7 +440,10 @@ static int initiate_cifs_search(const int xid, struct file *file)
415ffirst_retry: 440ffirst_retry:
416 /* test for Unix extensions */ 441 /* test for Unix extensions */
417 if (pTcon->ses->capabilities & CAP_UNIX) { 442 if (pTcon->ses->capabilities & CAP_UNIX) {
418 cifsFile->srch_inf.info_level = SMB_FIND_FILE_UNIX; 443 cifsFile->srch_inf.info_level = SMB_FIND_FILE_UNIX;
444 } else if ((pTcon->ses->capabilities &
445 (CAP_NT_SMBS | CAP_NT_FIND)) == 0) {
446 cifsFile->srch_inf.info_level = SMB_FIND_FILE_INFO_STANDARD;
419 } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { 447 } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
420 cifsFile->srch_inf.info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO; 448 cifsFile->srch_inf.info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO;
421 } else /* not srvinos - BB fixme add check for backlevel? */ { 449 } else /* not srvinos - BB fixme add check for backlevel? */ {
@@ -451,12 +479,19 @@ static int cifs_unicode_bytelen(char *str)
451 return len << 1; 479 return len << 1;
452} 480}
453 481
454static char *nxt_dir_entry(char *old_entry, char *end_of_smb) 482static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level)
455{ 483{
456 char * new_entry; 484 char * new_entry;
457 FILE_DIRECTORY_INFO * pDirInfo = (FILE_DIRECTORY_INFO *)old_entry; 485 FILE_DIRECTORY_INFO * pDirInfo = (FILE_DIRECTORY_INFO *)old_entry;
458 486
459 new_entry = old_entry + le32_to_cpu(pDirInfo->NextEntryOffset); 487 if(level == SMB_FIND_FILE_INFO_STANDARD) {
488 FIND_FILE_STANDARD_INFO * pfData;
489 pfData = (FIND_FILE_STANDARD_INFO *)pDirInfo;
490
491 new_entry = old_entry + sizeof(FIND_FILE_STANDARD_INFO) +
492 pfData->FileNameLength;
493 } else
494 new_entry = old_entry + le32_to_cpu(pDirInfo->NextEntryOffset);
460 cFYI(1,("new entry %p old entry %p",new_entry,old_entry)); 495 cFYI(1,("new entry %p old entry %p",new_entry,old_entry));
461 /* validate that new_entry is not past end of SMB */ 496 /* validate that new_entry is not past end of SMB */
462 if(new_entry >= end_of_smb) { 497 if(new_entry >= end_of_smb) {
@@ -464,7 +499,10 @@ static char *nxt_dir_entry(char *old_entry, char *end_of_smb)
464 ("search entry %p began after end of SMB %p old entry %p", 499 ("search entry %p began after end of SMB %p old entry %p",
465 new_entry, end_of_smb, old_entry)); 500 new_entry, end_of_smb, old_entry));
466 return NULL; 501 return NULL;
467 } else if (new_entry + sizeof(FILE_DIRECTORY_INFO) > end_of_smb) { 502 } else if(((level == SMB_FIND_FILE_INFO_STANDARD) &&
503 (new_entry + sizeof(FIND_FILE_STANDARD_INFO) > end_of_smb)) ||
504 ((level != SMB_FIND_FILE_INFO_STANDARD) &&
505 (new_entry + sizeof(FILE_DIRECTORY_INFO) > end_of_smb))) {
468 cERROR(1,("search entry %p extends after end of SMB %p", 506 cERROR(1,("search entry %p extends after end of SMB %p",
469 new_entry, end_of_smb)); 507 new_entry, end_of_smb));
470 return NULL; 508 return NULL;
@@ -482,7 +520,7 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile)
482 char * filename = NULL; 520 char * filename = NULL;
483 int len = 0; 521 int len = 0;
484 522
485 if(cfile->srch_inf.info_level == 0x202) { 523 if(cfile->srch_inf.info_level == SMB_FIND_FILE_UNIX) {
486 FILE_UNIX_INFO * pFindData = (FILE_UNIX_INFO *)current_entry; 524 FILE_UNIX_INFO * pFindData = (FILE_UNIX_INFO *)current_entry;
487 filename = &pFindData->FileName[0]; 525 filename = &pFindData->FileName[0];
488 if(cfile->srch_inf.unicode) { 526 if(cfile->srch_inf.unicode) {
@@ -491,26 +529,34 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile)
491 /* BB should we make this strnlen of PATH_MAX? */ 529 /* BB should we make this strnlen of PATH_MAX? */
492 len = strnlen(filename, 5); 530 len = strnlen(filename, 5);
493 } 531 }
494 } else if(cfile->srch_inf.info_level == 0x101) { 532 } else if(cfile->srch_inf.info_level == SMB_FIND_FILE_DIRECTORY_INFO) {
495 FILE_DIRECTORY_INFO * pFindData = 533 FILE_DIRECTORY_INFO * pFindData =
496 (FILE_DIRECTORY_INFO *)current_entry; 534 (FILE_DIRECTORY_INFO *)current_entry;
497 filename = &pFindData->FileName[0]; 535 filename = &pFindData->FileName[0];
498 len = le32_to_cpu(pFindData->FileNameLength); 536 len = le32_to_cpu(pFindData->FileNameLength);
499 } else if(cfile->srch_inf.info_level == 0x102) { 537 } else if(cfile->srch_inf.info_level ==
538 SMB_FIND_FILE_FULL_DIRECTORY_INFO) {
500 FILE_FULL_DIRECTORY_INFO * pFindData = 539 FILE_FULL_DIRECTORY_INFO * pFindData =
501 (FILE_FULL_DIRECTORY_INFO *)current_entry; 540 (FILE_FULL_DIRECTORY_INFO *)current_entry;
502 filename = &pFindData->FileName[0]; 541 filename = &pFindData->FileName[0];
503 len = le32_to_cpu(pFindData->FileNameLength); 542 len = le32_to_cpu(pFindData->FileNameLength);
504 } else if(cfile->srch_inf.info_level == 0x105) { 543 } else if(cfile->srch_inf.info_level ==
544 SMB_FIND_FILE_ID_FULL_DIR_INFO) {
505 SEARCH_ID_FULL_DIR_INFO * pFindData = 545 SEARCH_ID_FULL_DIR_INFO * pFindData =
506 (SEARCH_ID_FULL_DIR_INFO *)current_entry; 546 (SEARCH_ID_FULL_DIR_INFO *)current_entry;
507 filename = &pFindData->FileName[0]; 547 filename = &pFindData->FileName[0];
508 len = le32_to_cpu(pFindData->FileNameLength); 548 len = le32_to_cpu(pFindData->FileNameLength);
509 } else if(cfile->srch_inf.info_level == 0x104) { 549 } else if(cfile->srch_inf.info_level ==
550 SMB_FIND_FILE_BOTH_DIRECTORY_INFO) {
510 FILE_BOTH_DIRECTORY_INFO * pFindData = 551 FILE_BOTH_DIRECTORY_INFO * pFindData =
511 (FILE_BOTH_DIRECTORY_INFO *)current_entry; 552 (FILE_BOTH_DIRECTORY_INFO *)current_entry;
512 filename = &pFindData->FileName[0]; 553 filename = &pFindData->FileName[0];
513 len = le32_to_cpu(pFindData->FileNameLength); 554 len = le32_to_cpu(pFindData->FileNameLength);
555 } else if(cfile->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD) {
556 FIND_FILE_STANDARD_INFO * pFindData =
557 (FIND_FILE_STANDARD_INFO *)current_entry;
558 filename = &pFindData->FileName[0];
559 len = le32_to_cpu(pFindData->FileNameLength);
514 } else { 560 } else {
515 cFYI(1,("Unknown findfirst level %d",cfile->srch_inf.info_level)); 561 cFYI(1,("Unknown findfirst level %d",cfile->srch_inf.info_level));
516 } 562 }
@@ -597,7 +643,9 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
597 . and .. for the root of a drive and for those we need 643 . and .. for the root of a drive and for those we need
598 to start two entries earlier */ 644 to start two entries earlier */
599 645
600/* dump_cifs_file_struct(file, "In fce ");*/ 646#ifdef CONFIG_CIFS_DEBUG2
647 dump_cifs_file_struct(file, "In fce ");
648#endif
601 if(((index_to_find < cifsFile->srch_inf.index_of_last_entry) && 649 if(((index_to_find < cifsFile->srch_inf.index_of_last_entry) &&
602 is_dir_changed(file)) || 650 is_dir_changed(file)) ||
603 (index_to_find < first_entry_in_buffer)) { 651 (index_to_find < first_entry_in_buffer)) {
@@ -644,10 +692,12 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
644 first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry 692 first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry
645 - cifsFile->srch_inf.entries_in_buffer; 693 - cifsFile->srch_inf.entries_in_buffer;
646 pos_in_buf = index_to_find - first_entry_in_buffer; 694 pos_in_buf = index_to_find - first_entry_in_buffer;
647 cFYI(1,("found entry - pos_in_buf %d",pos_in_buf)); 695 cFYI(1,("found entry - pos_in_buf %d",pos_in_buf));
696
648 for(i=0;(i<(pos_in_buf)) && (current_entry != NULL);i++) { 697 for(i=0;(i<(pos_in_buf)) && (current_entry != NULL);i++) {
649 /* go entry by entry figuring out which is first */ 698 /* go entry by entry figuring out which is first */
650 current_entry = nxt_dir_entry(current_entry,end_of_smb); 699 current_entry = nxt_dir_entry(current_entry,end_of_smb,
700 cifsFile->srch_inf.info_level);
651 } 701 }
652 if((current_entry == NULL) && (i < pos_in_buf)) { 702 if((current_entry == NULL) && (i < pos_in_buf)) {
653 /* BB fixme - check if we should flag this error */ 703 /* BB fixme - check if we should flag this error */
@@ -674,7 +724,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
674/* inode num, inode type and filename returned */ 724/* inode num, inode type and filename returned */
675static int cifs_get_name_from_search_buf(struct qstr *pqst, 725static int cifs_get_name_from_search_buf(struct qstr *pqst,
676 char *current_entry, __u16 level, unsigned int unicode, 726 char *current_entry, __u16 level, unsigned int unicode,
677 struct cifs_sb_info * cifs_sb, ino_t *pinum) 727 struct cifs_sb_info * cifs_sb, int max_len, ino_t *pinum)
678{ 728{
679 int rc = 0; 729 int rc = 0;
680 unsigned int len = 0; 730 unsigned int len = 0;
@@ -718,10 +768,22 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
718 (FILE_BOTH_DIRECTORY_INFO *)current_entry; 768 (FILE_BOTH_DIRECTORY_INFO *)current_entry;
719 filename = &pFindData->FileName[0]; 769 filename = &pFindData->FileName[0];
720 len = le32_to_cpu(pFindData->FileNameLength); 770 len = le32_to_cpu(pFindData->FileNameLength);
771 } else if(level == SMB_FIND_FILE_INFO_STANDARD) {
772 FIND_FILE_STANDARD_INFO * pFindData =
773 (FIND_FILE_STANDARD_INFO *)current_entry;
774 filename = &pFindData->FileName[0];
775 /* one byte length, no name conversion */
776 len = (unsigned int)pFindData->FileNameLength;
721 } else { 777 } else {
722 cFYI(1,("Unknown findfirst level %d",level)); 778 cFYI(1,("Unknown findfirst level %d",level));
723 return -EINVAL; 779 return -EINVAL;
724 } 780 }
781
782 if(len > max_len) {
783 cERROR(1,("bad search response length %d past smb end", len));
784 return -EINVAL;
785 }
786
725 if(unicode) { 787 if(unicode) {
726 /* BB fixme - test with long names */ 788 /* BB fixme - test with long names */
727 /* Note converted filename can be longer than in unicode */ 789 /* Note converted filename can be longer than in unicode */
@@ -741,7 +803,7 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
741} 803}
742 804
743static int cifs_filldir(char *pfindEntry, struct file *file, 805static int cifs_filldir(char *pfindEntry, struct file *file,
744 filldir_t filldir, void *direntry, char *scratch_buf) 806 filldir_t filldir, void *direntry, char *scratch_buf, int max_len)
745{ 807{
746 int rc = 0; 808 int rc = 0;
747 struct qstr qstring; 809 struct qstr qstring;
@@ -777,6 +839,7 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
777 rc = cifs_get_name_from_search_buf(&qstring,pfindEntry, 839 rc = cifs_get_name_from_search_buf(&qstring,pfindEntry,
778 pCifsF->srch_inf.info_level, 840 pCifsF->srch_inf.info_level,
779 pCifsF->srch_inf.unicode,cifs_sb, 841 pCifsF->srch_inf.unicode,cifs_sb,
842 max_len,
780 &inum /* returned */); 843 &inum /* returned */);
781 844
782 if(rc) 845 if(rc)
@@ -798,13 +861,16 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
798 /* we pass in rc below, indicating whether it is a new inode, 861 /* we pass in rc below, indicating whether it is a new inode,
799 so we can figure out whether to invalidate the inode cached 862 so we can figure out whether to invalidate the inode cached
800 data if the file has changed */ 863 data if the file has changed */
801 if(pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX) { 864 if(pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX)
802 unix_fill_in_inode(tmp_inode, 865 unix_fill_in_inode(tmp_inode,
803 (FILE_UNIX_INFO *)pfindEntry,&obj_type, rc); 866 (FILE_UNIX_INFO *)pfindEntry,
804 } else { 867 &obj_type, rc);
805 fill_in_inode(tmp_inode, 868 else if(pCifsF->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD)
806 (FILE_DIRECTORY_INFO *)pfindEntry,&obj_type, rc); 869 fill_in_inode(tmp_inode, 0 /* old level 1 buffer type */,
807 } 870 pfindEntry, &obj_type, rc);
871 else
872 fill_in_inode(tmp_inode, 1 /* NT */, pfindEntry, &obj_type, rc);
873
808 874
809 rc = filldir(direntry,qstring.name,qstring.len,file->f_pos, 875 rc = filldir(direntry,qstring.name,qstring.len,file->f_pos,
810 tmp_inode->i_ino,obj_type); 876 tmp_inode->i_ino,obj_type);
@@ -864,6 +930,12 @@ static int cifs_save_resume_key(const char *current_entry,
864 filename = &pFindData->FileName[0]; 930 filename = &pFindData->FileName[0];
865 len = le32_to_cpu(pFindData->FileNameLength); 931 len = le32_to_cpu(pFindData->FileNameLength);
866 cifsFile->srch_inf.resume_key = pFindData->FileIndex; 932 cifsFile->srch_inf.resume_key = pFindData->FileIndex;
933 } else if(level == SMB_FIND_FILE_INFO_STANDARD) {
934 FIND_FILE_STANDARD_INFO * pFindData =
935 (FIND_FILE_STANDARD_INFO *)current_entry;
936 filename = &pFindData->FileName[0];
937 /* one byte length, no name conversion */
938 len = (unsigned int)pFindData->FileNameLength;
867 } else { 939 } else {
868 cFYI(1,("Unknown findfirst level %d",level)); 940 cFYI(1,("Unknown findfirst level %d",level));
869 return -EINVAL; 941 return -EINVAL;
@@ -884,6 +956,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
884 int num_to_fill = 0; 956 int num_to_fill = 0;
885 char * tmp_buf = NULL; 957 char * tmp_buf = NULL;
886 char * end_of_smb; 958 char * end_of_smb;
959 int max_len;
887 960
888 xid = GetXid(); 961 xid = GetXid();
889 962
@@ -909,7 +982,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
909 case 1: 982 case 1:
910 if (filldir(direntry, "..", 2, file->f_pos, 983 if (filldir(direntry, "..", 2, file->f_pos,
911 file->f_dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) { 984 file->f_dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) {
912 cERROR(1, ("Filldir for parent dir failed ")); 985 cERROR(1, ("Filldir for parent dir failed"));
913 rc = -ENOMEM; 986 rc = -ENOMEM;
914 break; 987 break;
915 } 988 }
@@ -959,10 +1032,11 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
959 goto rddir2_exit; 1032 goto rddir2_exit;
960 } 1033 }
961 cFYI(1,("loop through %d times filling dir for net buf %p", 1034 cFYI(1,("loop through %d times filling dir for net buf %p",
962 num_to_fill,cifsFile->srch_inf.ntwrk_buf_start)); 1035 num_to_fill,cifsFile->srch_inf.ntwrk_buf_start));
963 end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + 1036 max_len = smbCalcSize((struct smb_hdr *)
964 smbCalcSize((struct smb_hdr *) 1037 cifsFile->srch_inf.ntwrk_buf_start);
965 cifsFile->srch_inf.ntwrk_buf_start); 1038 end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len;
1039
966 /* To be safe - for UCS to UTF-8 with strings loaded 1040 /* To be safe - for UCS to UTF-8 with strings loaded
967 with the rare long characters alloc more to account for 1041 with the rare long characters alloc more to account for
968 such multibyte target UTF-8 characters. cifs_unicode.c, 1042 such multibyte target UTF-8 characters. cifs_unicode.c,
@@ -977,17 +1051,19 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
977 } 1051 }
978 /* if buggy server returns . and .. late do 1052 /* if buggy server returns . and .. late do
979 we want to check for that here? */ 1053 we want to check for that here? */
980 rc = cifs_filldir(current_entry, file, 1054 rc = cifs_filldir(current_entry, file,
981 filldir, direntry,tmp_buf); 1055 filldir, direntry, tmp_buf, max_len);
982 file->f_pos++; 1056 file->f_pos++;
983 if(file->f_pos == cifsFile->srch_inf.index_of_last_entry) { 1057 if(file->f_pos ==
1058 cifsFile->srch_inf.index_of_last_entry) {
984 cFYI(1,("last entry in buf at pos %lld %s", 1059 cFYI(1,("last entry in buf at pos %lld %s",
985 file->f_pos,tmp_buf)); /* BB removeme BB */ 1060 file->f_pos,tmp_buf));
986 cifs_save_resume_key(current_entry,cifsFile); 1061 cifs_save_resume_key(current_entry,cifsFile);
987 break; 1062 break;
988 } else 1063 } else
989 current_entry = nxt_dir_entry(current_entry, 1064 current_entry =
990 end_of_smb); 1065 nxt_dir_entry(current_entry, end_of_smb,
1066 cifsFile->srch_inf.info_level);
991 } 1067 }
992 kfree(tmp_buf); 1068 kfree(tmp_buf);
993 break; 1069 break;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
new file mode 100644
index 000000000000..7202d534ef0b
--- /dev/null
+++ b/fs/cifs/sess.c
@@ -0,0 +1,538 @@
1/*
2 * fs/cifs/sess.c
3 *
4 * SMB/CIFS session setup handling routines
5 *
6 * Copyright (c) International Business Machines Corp., 2006
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 *
9 * This library is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License as published
11 * by the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
17 * the GNU Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23
24#include "cifspdu.h"
25#include "cifsglob.h"
26#include "cifsproto.h"
27#include "cifs_unicode.h"
28#include "cifs_debug.h"
29#include "ntlmssp.h"
30#include "nterr.h"
31#include <linux/utsname.h>
32
33extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
34 unsigned char *p24);
35
36static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB)
37{
38 __u32 capabilities = 0;
39
40 /* init fields common to all four types of SessSetup */
41 /* note that header is initialized to zero in header_assemble */
42 pSMB->req.AndXCommand = 0xFF;
43 pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
44 pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
45
46 /* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
47
48 /* BB verify whether signing required on neg or just on auth frame
49 (and NTLM case) */
50
51 capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
52 CAP_LARGE_WRITE_X | CAP_LARGE_READ_X;
53
54 if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
55 pSMB->req.hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
56
57 if (ses->capabilities & CAP_UNICODE) {
58 pSMB->req.hdr.Flags2 |= SMBFLG2_UNICODE;
59 capabilities |= CAP_UNICODE;
60 }
61 if (ses->capabilities & CAP_STATUS32) {
62 pSMB->req.hdr.Flags2 |= SMBFLG2_ERR_STATUS;
63 capabilities |= CAP_STATUS32;
64 }
65 if (ses->capabilities & CAP_DFS) {
66 pSMB->req.hdr.Flags2 |= SMBFLG2_DFS;
67 capabilities |= CAP_DFS;
68 }
69 if (ses->capabilities & CAP_UNIX) {
70 capabilities |= CAP_UNIX;
71 }
72
73 /* BB check whether to init vcnum BB */
74 return capabilities;
75}
76
77static void unicode_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses,
78 const struct nls_table * nls_cp)
79{
80 char * bcc_ptr = *pbcc_area;
81 int bytes_ret = 0;
82
83 /* BB FIXME add check that strings total less
84 than 335 or will need to send them as arrays */
85
86 /* unicode strings, must be word aligned before the call */
87/* if ((long) bcc_ptr % 2) {
88 *bcc_ptr = 0;
89 bcc_ptr++;
90 } */
91 /* copy user */
92 if(ses->userName == NULL) {
93 /* BB what about null user mounts - check that we do this BB */
94 } else { /* 300 should be long enough for any conceivable user name */
95 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->userName,
96 300, nls_cp);
97 }
98 bcc_ptr += 2 * bytes_ret;
99 bcc_ptr += 2; /* account for null termination */
100 /* copy domain */
101 if(ses->domainName == NULL)
102 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr,
103 "CIFS_LINUX_DOM", 32, nls_cp);
104 else
105 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->domainName,
106 256, nls_cp);
107 bcc_ptr += 2 * bytes_ret;
108 bcc_ptr += 2; /* account for null terminator */
109
110 /* Copy OS version */
111 bytes_ret = cifs_strtoUCS((__le16 *)bcc_ptr, "Linux version ", 32,
112 nls_cp);
113 bcc_ptr += 2 * bytes_ret;
114 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release,
115 32, nls_cp);
116 bcc_ptr += 2 * bytes_ret;
117 bcc_ptr += 2; /* trailing null */
118
119 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS,
120 32, nls_cp);
121 bcc_ptr += 2 * bytes_ret;
122 bcc_ptr += 2; /* trailing null */
123
124 *pbcc_area = bcc_ptr;
125}
126
127static void ascii_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses,
128 const struct nls_table * nls_cp)
129{
130 char * bcc_ptr = *pbcc_area;
131
132 /* copy user */
133 /* BB what about null user mounts - check that we do this BB */
134 /* copy user */
135 if(ses->userName == NULL) {
136 /* BB what about null user mounts - check that we do this BB */
137 } else { /* 300 should be long enough for any conceivable user name */
138 strncpy(bcc_ptr, ses->userName, 300);
139 }
140 /* BB improve check for overflow */
141 bcc_ptr += strnlen(ses->userName, 300);
142 *bcc_ptr = 0;
143 bcc_ptr++; /* account for null termination */
144
145 /* copy domain */
146
147 if(ses->domainName == NULL) {
148 strcpy(bcc_ptr, "CIFS_LINUX_DOM");
149 bcc_ptr += 14; /* strlen(CIFS_LINUX_DOM) */
150 } else {
151 strncpy(bcc_ptr, ses->domainName, 256);
152 bcc_ptr += strnlen(ses->domainName, 256);
153 }
154 *bcc_ptr = 0;
155 bcc_ptr++;
156
157 /* BB check for overflow here */
158
159 strcpy(bcc_ptr, "Linux version ");
160 bcc_ptr += strlen("Linux version ");
161 strcpy(bcc_ptr, system_utsname.release);
162 bcc_ptr += strlen(system_utsname.release) + 1;
163
164 strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
165 bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
166
167 *pbcc_area = bcc_ptr;
168}
169
170static int decode_unicode_ssetup(char ** pbcc_area, int bleft, struct cifsSesInfo *ses,
171 const struct nls_table * nls_cp)
172{
173 int rc = 0;
174 int words_left, len;
175 char * data = *pbcc_area;
176
177
178
179 cFYI(1,("bleft %d",bleft));
180
181
182 /* word align, if bytes remaining is not even */
183 if(bleft % 2) {
184 bleft--;
185 data++;
186 }
187 words_left = bleft / 2;
188
189 /* save off server operating system */
190 len = UniStrnlen((wchar_t *) data, words_left);
191
192/* We look for obvious messed up bcc or strings in response so we do not go off
193 the end since (at least) WIN2K and Windows XP have a major bug in not null
194 terminating last Unicode string in response */
195 if(len >= words_left)
196 return rc;
197
198 if(ses->serverOS)
199 kfree(ses->serverOS);
200 /* UTF-8 string will not grow more than four times as big as UCS-16 */
201 ses->serverOS = kzalloc(4 * len, GFP_KERNEL);
202 if(ses->serverOS != NULL) {
203 cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len,
204 nls_cp);
205 }
206 data += 2 * (len + 1);
207 words_left -= len + 1;
208
209 /* save off server network operating system */
210 len = UniStrnlen((wchar_t *) data, words_left);
211
212 if(len >= words_left)
213 return rc;
214
215 if(ses->serverNOS)
216 kfree(ses->serverNOS);
217 ses->serverNOS = kzalloc(4 * len, GFP_KERNEL); /* BB this is wrong length FIXME BB */
218 if(ses->serverNOS != NULL) {
219 cifs_strfromUCS_le(ses->serverNOS, (__le16 *)data, len,
220 nls_cp);
221 if(strncmp(ses->serverNOS, "NT LAN Manager 4",16) == 0) {
222 cFYI(1,("NT4 server"));
223 ses->flags |= CIFS_SES_NT4;
224 }
225 }
226 data += 2 * (len + 1);
227 words_left -= len + 1;
228
229 /* save off server domain */
230 len = UniStrnlen((wchar_t *) data, words_left);
231
232 if(len > words_left)
233 return rc;
234
235 if(ses->serverDomain)
236 kfree(ses->serverDomain);
237 ses->serverDomain = kzalloc(2 * (len + 1), GFP_KERNEL); /* BB FIXME wrong length */
238 if(ses->serverDomain != NULL) {
239 cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len,
240 nls_cp);
241 ses->serverDomain[2*len] = 0;
242 ses->serverDomain[(2*len) + 1] = 0;
243 }
244 data += 2 * (len + 1);
245 words_left -= len + 1;
246
247 cFYI(1,("words left: %d",words_left));
248
249 return rc;
250}
251
252static int decode_ascii_ssetup(char ** pbcc_area, int bleft, struct cifsSesInfo *ses,
253 const struct nls_table * nls_cp)
254{
255 int rc = 0;
256 int len;
257 char * bcc_ptr = *pbcc_area;
258
259 cFYI(1,("decode sessetup ascii. bleft %d", bleft));
260
261 len = strnlen(bcc_ptr, bleft);
262 if(len >= bleft)
263 return rc;
264
265 if(ses->serverOS)
266 kfree(ses->serverOS);
267
268 ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
269 if(ses->serverOS)
270 strncpy(ses->serverOS, bcc_ptr, len);
271
272 bcc_ptr += len + 1;
273 bleft -= len + 1;
274
275 len = strnlen(bcc_ptr, bleft);
276 if(len >= bleft)
277 return rc;
278
279 if(ses->serverNOS)
280 kfree(ses->serverNOS);
281
282 ses->serverNOS = kzalloc(len + 1, GFP_KERNEL);
283 if(ses->serverNOS)
284 strncpy(ses->serverNOS, bcc_ptr, len);
285
286 bcc_ptr += len + 1;
287 bleft -= len + 1;
288
289 len = strnlen(bcc_ptr, bleft);
290 if(len > bleft)
291 return rc;
292
293 if(ses->serverDomain)
294 kfree(ses->serverDomain);
295
296 ses->serverDomain = kzalloc(len + 1, GFP_KERNEL);
297 if(ses->serverOS)
298 strncpy(ses->serverOS, bcc_ptr, len);
299
300 bcc_ptr += len + 1;
301 bleft -= len + 1;
302
303 cFYI(1,("ascii: bytes left %d",bleft));
304
305 return rc;
306}
307
308int
309CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
310 const struct nls_table *nls_cp)
311{
312 int rc = 0;
313 int wct;
314 struct smb_hdr *smb_buf;
315 char *bcc_ptr;
316 char *str_area;
317 SESSION_SETUP_ANDX *pSMB;
318 __u32 capabilities;
319 int count;
320 int resp_buf_type = 0;
321 struct kvec iov[2];
322 enum securityEnum type;
323 __u16 action;
324 int bytes_remaining;
325
326 if(ses == NULL)
327 return -EINVAL;
328
329 type = ses->server->secType;
330
331 cFYI(1,("sess setup type %d",type));
332 if(type == LANMAN) {
333#ifndef CONFIG_CIFS_WEAK_PW_HASH
334 /* LANMAN and plaintext are less secure and off by default.
335 So we make this explicitly be turned on in kconfig (in the
336 build) and turned on at runtime (changed from the default)
337 in proc/fs/cifs or via mount parm. Unfortunately this is
338 needed for old Win (e.g. Win95), some obscure NAS and OS/2 */
339 return -EOPNOTSUPP;
340#endif
341 wct = 10; /* lanman 2 style sessionsetup */
342 } else if((type == NTLM) || (type == NTLMv2)) {
343 /* For NTLMv2 failures eventually may need to retry NTLM */
344 wct = 13; /* old style NTLM sessionsetup */
345 } else /* same size for negotiate or auth, NTLMSSP or extended security */
346 wct = 12;
347
348 rc = small_smb_init_no_tc(SMB_COM_SESSION_SETUP_ANDX, wct, ses,
349 (void **)&smb_buf);
350 if(rc)
351 return rc;
352
353 pSMB = (SESSION_SETUP_ANDX *)smb_buf;
354
355 capabilities = cifs_ssetup_hdr(ses, pSMB);
356
357 /* we will send the SMB in two pieces,
358 a fixed length beginning part, and a
359 second part which will include the strings
360 and rest of bcc area, in order to avoid having
361 to do a large buffer 17K allocation */
362 iov[0].iov_base = (char *)pSMB;
363 iov[0].iov_len = smb_buf->smb_buf_length + 4;
364
365 /* 2000 big enough to fit max user, domain, NOS name etc. */
366 str_area = kmalloc(2000, GFP_KERNEL);
367 bcc_ptr = str_area;
368
369 if(type == LANMAN) {
370#ifdef CONFIG_CIFS_WEAK_PW_HASH
371 char lnm_session_key[CIFS_SESS_KEY_SIZE];
372
373 /* no capabilities flags in old lanman negotiation */
374
375 pSMB->old_req.PasswordLength = CIFS_SESS_KEY_SIZE;
376 /* BB calculate hash with password */
377 /* and copy into bcc */
378
379 calc_lanman_hash(ses, lnm_session_key);
380
381/* #ifdef CONFIG_CIFS_DEBUG2
382 cifs_dump_mem("cryptkey: ",ses->server->cryptKey,
383 CIFS_SESS_KEY_SIZE);
384#endif */
385 memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_SESS_KEY_SIZE);
386 bcc_ptr += CIFS_SESS_KEY_SIZE;
387
388 /* can not sign if LANMAN negotiated so no need
389 to calculate signing key? but what if server
390 changed to do higher than lanman dialect and
391 we reconnected would we ever calc signing_key? */
392
393 cFYI(1,("Negotiating LANMAN setting up strings"));
394 /* Unicode not allowed for LANMAN dialects */
395 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
396#endif
397 } else if (type == NTLM) {
398 char ntlm_session_key[CIFS_SESS_KEY_SIZE];
399
400 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
401 pSMB->req_no_secext.CaseInsensitivePasswordLength =
402 cpu_to_le16(CIFS_SESS_KEY_SIZE);
403 pSMB->req_no_secext.CaseSensitivePasswordLength =
404 cpu_to_le16(CIFS_SESS_KEY_SIZE);
405
406 /* calculate session key */
407 SMBNTencrypt(ses->password, ses->server->cryptKey,
408 ntlm_session_key);
409
410 if(first_time) /* should this be moved into common code
411 with similar ntlmv2 path? */
412 cifs_calculate_mac_key(ses->server->mac_signing_key,
413 ntlm_session_key, ses->password);
414 /* copy session key */
415
416 memcpy(bcc_ptr, (char *)ntlm_session_key,CIFS_SESS_KEY_SIZE);
417 bcc_ptr += CIFS_SESS_KEY_SIZE;
418 memcpy(bcc_ptr, (char *)ntlm_session_key,CIFS_SESS_KEY_SIZE);
419 bcc_ptr += CIFS_SESS_KEY_SIZE;
420 if(ses->capabilities & CAP_UNICODE) {
421 /* unicode strings must be word aligned */
422 if (iov[0].iov_len % 2) {
423 *bcc_ptr = 0;
424 bcc_ptr++;
425 }
426 unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
427 } else
428 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
429 } else if (type == NTLMv2) {
430 char * v2_sess_key =
431 kmalloc(sizeof(struct ntlmv2_resp), GFP_KERNEL);
432
433 /* BB FIXME change all users of v2_sess_key to
434 struct ntlmv2_resp */
435
436 if(v2_sess_key == NULL) {
437 cifs_small_buf_release(smb_buf);
438 return -ENOMEM;
439 }
440
441 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
442
443 /* LM2 password would be here if we supported it */
444 pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
445 /* cpu_to_le16(LM2_SESS_KEY_SIZE); */
446
447 pSMB->req_no_secext.CaseSensitivePasswordLength =
448 cpu_to_le16(sizeof(struct ntlmv2_resp));
449
450 /* calculate session key */
451 setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
452 if(first_time) /* should this be moved into common code
453 with similar ntlmv2 path? */
454 /* cifs_calculate_ntlmv2_mac_key(ses->server->mac_signing_key,
455 response BB FIXME, v2_sess_key); */
456
457 /* copy session key */
458
459 /* memcpy(bcc_ptr, (char *)ntlm_session_key,LM2_SESS_KEY_SIZE);
460 bcc_ptr += LM2_SESS_KEY_SIZE; */
461 memcpy(bcc_ptr, (char *)v2_sess_key, sizeof(struct ntlmv2_resp));
462 bcc_ptr += sizeof(struct ntlmv2_resp);
463 kfree(v2_sess_key);
464 if(ses->capabilities & CAP_UNICODE) {
465 if(iov[0].iov_len % 2) {
466 *bcc_ptr = 0;
467 } bcc_ptr++;
468 unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
469 } else
470 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
471 } else /* NTLMSSP or SPNEGO */ {
472 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
473 capabilities |= CAP_EXTENDED_SECURITY;
474 pSMB->req.Capabilities = cpu_to_le32(capabilities);
475 /* BB set password lengths */
476 }
477
478 count = (long) bcc_ptr - (long) str_area;
479 smb_buf->smb_buf_length += count;
480
481 BCC_LE(smb_buf) = cpu_to_le16(count);
482
483 iov[1].iov_base = str_area;
484 iov[1].iov_len = count;
485 rc = SendReceive2(xid, ses, iov, 2 /* num_iovecs */, &resp_buf_type, 0);
486 /* SMB request buf freed in SendReceive2 */
487
488 cFYI(1,("ssetup rc from sendrecv2 is %d",rc));
489 if(rc)
490 goto ssetup_exit;
491
492 pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base;
493 smb_buf = (struct smb_hdr *)iov[0].iov_base;
494
495 if((smb_buf->WordCount != 3) && (smb_buf->WordCount != 4)) {
496 rc = -EIO;
497 cERROR(1,("bad word count %d", smb_buf->WordCount));
498 goto ssetup_exit;
499 }
500 action = le16_to_cpu(pSMB->resp.Action);
501 if (action & GUEST_LOGIN)
502 cFYI(1, ("Guest login")); /* BB mark SesInfo struct? */
503 ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
504 cFYI(1, ("UID = %d ", ses->Suid));
505 /* response can have either 3 or 4 word count - Samba sends 3 */
506 /* and lanman response is 3 */
507 bytes_remaining = BCC(smb_buf);
508 bcc_ptr = pByteArea(smb_buf);
509
510 if(smb_buf->WordCount == 4) {
511 __u16 blob_len;
512 blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength);
513 bcc_ptr += blob_len;
514 if(blob_len > bytes_remaining) {
515 cERROR(1,("bad security blob length %d", blob_len));
516 rc = -EINVAL;
517 goto ssetup_exit;
518 }
519 bytes_remaining -= blob_len;
520 }
521
522 /* BB check if Unicode and decode strings */
523 if(smb_buf->Flags2 & SMBFLG2_UNICODE)
524 rc = decode_unicode_ssetup(&bcc_ptr, bytes_remaining,
525 ses, nls_cp);
526 else
527 rc = decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,nls_cp);
528
529ssetup_exit:
530 kfree(str_area);
531 if(resp_buf_type == CIFS_SMALL_BUFFER) {
532 cFYI(1,("ssetup freeing small buf %p", iov[0].iov_base));
533 cifs_small_buf_release(iov[0].iov_base);
534 } else if(resp_buf_type == CIFS_LARGE_BUFFER)
535 cifs_buf_release(iov[0].iov_base);
536
537 return rc;
538}
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index 6103bcdfb16d..f518c5e45035 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -30,6 +30,7 @@
30#include <linux/random.h> 30#include <linux/random.h>
31#include "cifs_unicode.h" 31#include "cifs_unicode.h"
32#include "cifspdu.h" 32#include "cifspdu.h"
33#include "cifsglob.h"
33#include "md5.h" 34#include "md5.h"
34#include "cifs_debug.h" 35#include "cifs_debug.h"
35#include "cifsencrypt.h" 36#include "cifsencrypt.h"
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 3da80409466c..17ba329e2b3d 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -654,8 +654,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
654 654
655 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 655 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
656 up(&ses->server->tcpSem); 656 up(&ses->server->tcpSem);
657 cERROR(1, 657 cERROR(1, ("Illegal length, greater than maximum frame, %d",
658 ("Illegal length, greater than maximum frame, %d ",
659 in_buf->smb_buf_length)); 658 in_buf->smb_buf_length));
660 DeleteMidQEntry(midQ); 659 DeleteMidQEntry(midQ);
661 /* If not lock req, update # of requests on wire to server */ 660 /* If not lock req, update # of requests on wire to server */
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 7c2642431fa5..cc66c681bd11 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -164,7 +164,7 @@ int coda_open(struct inode *coda_inode, struct file *coda_file)
164 return 0; 164 return 0;
165} 165}
166 166
167int coda_flush(struct file *coda_file) 167int coda_flush(struct file *coda_file, fl_owner_t id)
168{ 168{
169 unsigned short flags = coda_file->f_flags & ~O_EXCL; 169 unsigned short flags = coda_file->f_flags & ~O_EXCL;
170 unsigned short coda_flags = coda_flags_to_cflags(flags); 170 unsigned short coda_flags = coda_flags_to_cflags(flags);
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index ada1a81df6bd..87f1dc8aa24b 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -36,7 +36,7 @@
36/* VFS super_block ops */ 36/* VFS super_block ops */
37static void coda_clear_inode(struct inode *); 37static void coda_clear_inode(struct inode *);
38static void coda_put_super(struct super_block *); 38static void coda_put_super(struct super_block *);
39static int coda_statfs(struct super_block *sb, struct kstatfs *buf); 39static int coda_statfs(struct dentry *dentry, struct kstatfs *buf);
40 40
41static kmem_cache_t * coda_inode_cachep; 41static kmem_cache_t * coda_inode_cachep;
42 42
@@ -278,13 +278,13 @@ struct inode_operations coda_file_inode_operations = {
278 .setattr = coda_setattr, 278 .setattr = coda_setattr,
279}; 279};
280 280
281static int coda_statfs(struct super_block *sb, struct kstatfs *buf) 281static int coda_statfs(struct dentry *dentry, struct kstatfs *buf)
282{ 282{
283 int error; 283 int error;
284 284
285 lock_kernel(); 285 lock_kernel();
286 286
287 error = venus_statfs(sb, buf); 287 error = venus_statfs(dentry, buf);
288 288
289 unlock_kernel(); 289 unlock_kernel();
290 290
@@ -307,10 +307,10 @@ static int coda_statfs(struct super_block *sb, struct kstatfs *buf)
307 307
308/* init_coda: used by filesystems.c to register coda */ 308/* init_coda: used by filesystems.c to register coda */
309 309
310static struct super_block *coda_get_sb(struct file_system_type *fs_type, 310static int coda_get_sb(struct file_system_type *fs_type,
311 int flags, const char *dev_name, void *data) 311 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
312{ 312{
313 return get_sb_nodev(fs_type, flags, data, coda_fill_super); 313 return get_sb_nodev(fs_type, flags, data, coda_fill_super, mnt);
314} 314}
315 315
316struct file_system_type coda_fs_type = { 316struct file_system_type coda_fs_type = {
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 6c6771db36da..803aacf0d49c 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -28,7 +28,6 @@
28#include <linux/delay.h> 28#include <linux/delay.h>
29#include <linux/skbuff.h> 29#include <linux/skbuff.h>
30#include <linux/proc_fs.h> 30#include <linux/proc_fs.h>
31#include <linux/devfs_fs_kernel.h>
32#include <linux/vmalloc.h> 31#include <linux/vmalloc.h>
33#include <linux/fs.h> 32#include <linux/fs.h>
34#include <linux/file.h> 33#include <linux/file.h>
@@ -259,7 +258,7 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf,
259 /* If request was not a signal, enqueue and don't free */ 258 /* If request was not a signal, enqueue and don't free */
260 if (!(req->uc_flags & REQ_ASYNC)) { 259 if (!(req->uc_flags & REQ_ASYNC)) {
261 req->uc_flags |= REQ_READ; 260 req->uc_flags |= REQ_READ;
262 list_add(&(req->uc_chain), vcp->vc_processing.prev); 261 list_add_tail(&(req->uc_chain), &vcp->vc_processing);
263 goto out; 262 goto out;
264 } 263 }
265 264
@@ -365,22 +364,12 @@ static int init_coda_psdev(void)
365 err = PTR_ERR(coda_psdev_class); 364 err = PTR_ERR(coda_psdev_class);
366 goto out_chrdev; 365 goto out_chrdev;
367 } 366 }
368 devfs_mk_dir ("coda"); 367 for (i = 0; i < MAX_CODADEVS; i++)
369 for (i = 0; i < MAX_CODADEVS; i++) {
370 class_device_create(coda_psdev_class, NULL, 368 class_device_create(coda_psdev_class, NULL,
371 MKDEV(CODA_PSDEV_MAJOR,i), NULL, "cfs%d", i); 369 MKDEV(CODA_PSDEV_MAJOR,i), NULL, "cfs%d", i);
372 err = devfs_mk_cdev(MKDEV(CODA_PSDEV_MAJOR, i),
373 S_IFCHR|S_IRUSR|S_IWUSR, "coda/%d", i);
374 if (err)
375 goto out_class;
376 }
377 coda_sysctl_init(); 370 coda_sysctl_init();
378 goto out; 371 goto out;
379 372
380out_class:
381 for (i = 0; i < MAX_CODADEVS; i++)
382 class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i));
383 class_destroy(coda_psdev_class);
384out_chrdev: 373out_chrdev:
385 unregister_chrdev(CODA_PSDEV_MAJOR, "coda"); 374 unregister_chrdev(CODA_PSDEV_MAJOR, "coda");
386out: 375out:
@@ -419,12 +408,9 @@ static int __init init_coda(void)
419 } 408 }
420 return 0; 409 return 0;
421out: 410out:
422 for (i = 0; i < MAX_CODADEVS; i++) { 411 for (i = 0; i < MAX_CODADEVS; i++)
423 class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i)); 412 class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i));
424 devfs_remove("coda/%d", i);
425 }
426 class_destroy(coda_psdev_class); 413 class_destroy(coda_psdev_class);
427 devfs_remove("coda");
428 unregister_chrdev(CODA_PSDEV_MAJOR, "coda"); 414 unregister_chrdev(CODA_PSDEV_MAJOR, "coda");
429 coda_sysctl_clean(); 415 coda_sysctl_clean();
430out1: 416out1:
@@ -441,12 +427,9 @@ static void __exit exit_coda(void)
441 if ( err != 0 ) { 427 if ( err != 0 ) {
442 printk("coda: failed to unregister filesystem\n"); 428 printk("coda: failed to unregister filesystem\n");
443 } 429 }
444 for (i = 0; i < MAX_CODADEVS; i++) { 430 for (i = 0; i < MAX_CODADEVS; i++)
445 class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i)); 431 class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i));
446 devfs_remove("coda/%d", i);
447 }
448 class_destroy(coda_psdev_class); 432 class_destroy(coda_psdev_class);
449 devfs_remove("coda");
450 unregister_chrdev(CODA_PSDEV_MAJOR, "coda"); 433 unregister_chrdev(CODA_PSDEV_MAJOR, "coda");
451 coda_sysctl_clean(); 434 coda_sysctl_clean();
452 coda_destroy_inodecache(); 435 coda_destroy_inodecache();
diff --git a/fs/coda/symlink.c b/fs/coda/symlink.c
index b35e5bbd9c99..76e00a65a75b 100644
--- a/fs/coda/symlink.c
+++ b/fs/coda/symlink.c
@@ -50,6 +50,6 @@ fail:
50 return error; 50 return error;
51} 51}
52 52
53struct address_space_operations coda_symlink_aops = { 53const struct address_space_operations coda_symlink_aops = {
54 .readpage = coda_symlink_filler, 54 .readpage = coda_symlink_filler,
55}; 55};
diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c
index f0b10757288f..1c82e9a7d7c8 100644
--- a/fs/coda/sysctl.c
+++ b/fs/coda/sysctl.c
@@ -11,7 +11,6 @@
11 * 11 *
12 */ 12 */
13 13
14#include <linux/config.h>
15#include <linux/time.h> 14#include <linux/time.h>
16#include <linux/mm.h> 15#include <linux/mm.h>
17#include <linux/sysctl.h> 16#include <linux/sysctl.h>
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 1bae99650a91..a5b5e631ba61 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -611,7 +611,7 @@ int venus_pioctl(struct super_block *sb, struct CodaFid *fid,
611 return error; 611 return error;
612} 612}
613 613
614int venus_statfs(struct super_block *sb, struct kstatfs *sfs) 614int venus_statfs(struct dentry *dentry, struct kstatfs *sfs)
615{ 615{
616 union inputArgs *inp; 616 union inputArgs *inp;
617 union outputArgs *outp; 617 union outputArgs *outp;
@@ -620,7 +620,7 @@ int venus_statfs(struct super_block *sb, struct kstatfs *sfs)
620 insize = max_t(unsigned int, INSIZE(statfs), OUTSIZE(statfs)); 620 insize = max_t(unsigned int, INSIZE(statfs), OUTSIZE(statfs));
621 UPARG(CODA_STATFS); 621 UPARG(CODA_STATFS);
622 622
623 error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); 623 error = coda_upcall(coda_sbp(dentry->d_sb), insize, &outsize, inp);
624 624
625 if (!error) { 625 if (!error) {
626 sfs->f_blocks = outp->coda_statfs.stat.f_blocks; 626 sfs->f_blocks = outp->coda_statfs.stat.f_blocks;
@@ -725,7 +725,7 @@ static int coda_upcall(struct coda_sb_info *sbi,
725 ((union inputArgs *)buffer)->ih.unique = req->uc_unique; 725 ((union inputArgs *)buffer)->ih.unique = req->uc_unique;
726 726
727 /* Append msg to pending queue and poke Venus. */ 727 /* Append msg to pending queue and poke Venus. */
728 list_add(&(req->uc_chain), vcommp->vc_pending.prev); 728 list_add_tail(&(req->uc_chain), &vcommp->vc_pending);
729 729
730 wake_up_interruptible(&vcommp->vc_waitq); 730 wake_up_interruptible(&vcommp->vc_waitq);
731 /* We can be interrupted while we wait for Venus to process 731 /* We can be interrupted while we wait for Venus to process
diff --git a/fs/compat.c b/fs/compat.c
index b1f64786a613..e31e9cf96647 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -55,6 +55,20 @@
55 55
56extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat); 56extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
57 57
58int compat_log = 1;
59
60int compat_printk(const char *fmt, ...)
61{
62 va_list ap;
63 int ret;
64 if (!compat_log)
65 return 0;
66 va_start(ap, fmt);
67 ret = vprintk(fmt, ap);
68 va_end(ap);
69 return ret;
70}
71
58/* 72/*
59 * Not all architectures have sys_utime, so implement this in terms 73 * Not all architectures have sys_utime, so implement this in terms
60 * of sys_utimes. 74 * of sys_utimes.
@@ -197,7 +211,7 @@ asmlinkage long compat_sys_statfs(const char __user *path, struct compat_statfs
197 error = user_path_walk(path, &nd); 211 error = user_path_walk(path, &nd);
198 if (!error) { 212 if (!error) {
199 struct kstatfs tmp; 213 struct kstatfs tmp;
200 error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp); 214 error = vfs_statfs(nd.dentry, &tmp);
201 if (!error) 215 if (!error)
202 error = put_compat_statfs(buf, &tmp); 216 error = put_compat_statfs(buf, &tmp);
203 path_release(&nd); 217 path_release(&nd);
@@ -215,7 +229,7 @@ asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user
215 file = fget(fd); 229 file = fget(fd);
216 if (!file) 230 if (!file)
217 goto out; 231 goto out;
218 error = vfs_statfs(file->f_dentry->d_inode->i_sb, &tmp); 232 error = vfs_statfs(file->f_dentry, &tmp);
219 if (!error) 233 if (!error)
220 error = put_compat_statfs(buf, &tmp); 234 error = put_compat_statfs(buf, &tmp);
221 fput(file); 235 fput(file);
@@ -265,7 +279,7 @@ asmlinkage long compat_sys_statfs64(const char __user *path, compat_size_t sz, s
265 error = user_path_walk(path, &nd); 279 error = user_path_walk(path, &nd);
266 if (!error) { 280 if (!error) {
267 struct kstatfs tmp; 281 struct kstatfs tmp;
268 error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp); 282 error = vfs_statfs(nd.dentry, &tmp);
269 if (!error) 283 if (!error)
270 error = put_compat_statfs64(buf, &tmp); 284 error = put_compat_statfs64(buf, &tmp);
271 path_release(&nd); 285 path_release(&nd);
@@ -286,7 +300,7 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c
286 file = fget(fd); 300 file = fget(fd);
287 if (!file) 301 if (!file)
288 goto out; 302 goto out;
289 error = vfs_statfs(file->f_dentry->d_inode->i_sb, &tmp); 303 error = vfs_statfs(file->f_dentry, &tmp);
290 if (!error) 304 if (!error)
291 error = put_compat_statfs64(buf, &tmp); 305 error = put_compat_statfs64(buf, &tmp);
292 fput(file); 306 fput(file);
@@ -359,7 +373,7 @@ static void compat_ioctl_error(struct file *filp, unsigned int fd,
359 sprintf(buf,"'%c'", (cmd>>24) & 0x3f); 373 sprintf(buf,"'%c'", (cmd>>24) & 0x3f);
360 if (!isprint(buf[1])) 374 if (!isprint(buf[1]))
361 sprintf(buf, "%02x", buf[1]); 375 sprintf(buf, "%02x", buf[1]);
362 printk("ioctl32(%s:%d): Unknown cmd fd(%d) " 376 compat_printk("ioctl32(%s:%d): Unknown cmd fd(%d) "
363 "cmd(%08x){%s} arg(%08x) on %s\n", 377 "cmd(%08x){%s} arg(%08x) on %s\n",
364 current->comm, current->pid, 378 current->comm, current->pid,
365 (int)fd, (unsigned int)cmd, buf, 379 (int)fd, (unsigned int)cmd, buf,
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index d2c38875ab29..4063a9396977 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -10,7 +10,6 @@
10 * ioctls. 10 * ioctls.
11 */ 11 */
12 12
13#include <linux/config.h>
14#include <linux/types.h> 13#include <linux/types.h>
15#include <linux/compat.h> 14#include <linux/compat.h>
16#include <linux/kernel.h> 15#include <linux/kernel.h>
@@ -44,7 +43,6 @@
44#include <linux/loop.h> 43#include <linux/loop.h>
45#include <linux/auto_fs.h> 44#include <linux/auto_fs.h>
46#include <linux/auto_fs4.h> 45#include <linux/auto_fs4.h>
47#include <linux/devfs_fs.h>
48#include <linux/tty.h> 46#include <linux/tty.h>
49#include <linux/vt_kern.h> 47#include <linux/vt_kern.h>
50#include <linux/fb.h> 48#include <linux/fb.h>
@@ -80,6 +78,7 @@
80#include <net/bluetooth/rfcomm.h> 78#include <net/bluetooth/rfcomm.h>
81 79
82#include <linux/capi.h> 80#include <linux/capi.h>
81#include <linux/gigaset_dev.h>
83 82
84#include <scsi/scsi.h> 83#include <scsi/scsi.h>
85#include <scsi/scsi_ioctl.h> 84#include <scsi/scsi_ioctl.h>
@@ -205,38 +204,6 @@ static int do_ext3_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
205 return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg)); 204 return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg));
206} 205}
207 206
208struct compat_dmx_event {
209 dmx_event_t event;
210 compat_time_t timeStamp;
211 union
212 {
213 dmx_scrambling_status_t scrambling;
214 } u;
215};
216
217static int do_dmx_get_event(unsigned int fd, unsigned int cmd, unsigned long arg)
218{
219 struct dmx_event kevent;
220 mm_segment_t old_fs = get_fs();
221 int err;
222
223 set_fs(KERNEL_DS);
224 err = sys_ioctl(fd, cmd, (unsigned long) &kevent);
225 set_fs(old_fs);
226
227 if (!err) {
228 struct compat_dmx_event __user *up = compat_ptr(arg);
229
230 err = put_user(kevent.event, &up->event);
231 err |= put_user(kevent.timeStamp, &up->timeStamp);
232 err |= put_user(kevent.u.scrambling, &up->u.scrambling);
233 if (err)
234 err = -EFAULT;
235 }
236
237 return err;
238}
239
240struct compat_video_event { 207struct compat_video_event {
241 int32_t type; 208 int32_t type;
242 compat_time_t timestamp; 209 compat_time_t timestamp;
@@ -2964,7 +2931,6 @@ HANDLE_IOCTL(NCP_IOC_SETPRIVATEDATA_32, do_ncp_setprivatedata)
2964#endif 2931#endif
2965 2932
2966/* dvb */ 2933/* dvb */
2967HANDLE_IOCTL(DMX_GET_EVENT, do_dmx_get_event)
2968HANDLE_IOCTL(VIDEO_GET_EVENT, do_video_get_event) 2934HANDLE_IOCTL(VIDEO_GET_EVENT, do_video_get_event)
2969HANDLE_IOCTL(VIDEO_STILLPICTURE, do_video_stillpicture) 2935HANDLE_IOCTL(VIDEO_STILLPICTURE, do_video_stillpicture)
2970HANDLE_IOCTL(VIDEO_SET_SPU_PALETTE, do_video_set_spu_palette) 2936HANDLE_IOCTL(VIDEO_SET_SPU_PALETTE, do_video_set_spu_palette)
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 5f952187fc53..df025453dd97 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -211,7 +211,7 @@ static void remove_dir(struct dentry * d)
211 struct configfs_dirent * sd; 211 struct configfs_dirent * sd;
212 212
213 sd = d->d_fsdata; 213 sd = d->d_fsdata;
214 list_del_init(&sd->s_sibling); 214 list_del_init(&sd->s_sibling);
215 configfs_put(sd); 215 configfs_put(sd);
216 if (d->d_inode) 216 if (d->d_inode)
217 simple_rmdir(parent->d_inode,d); 217 simple_rmdir(parent->d_inode,d);
@@ -330,7 +330,7 @@ static int configfs_detach_prep(struct dentry *dentry)
330 330
331 ret = configfs_detach_prep(sd->s_dentry); 331 ret = configfs_detach_prep(sd->s_dentry);
332 if (!ret) 332 if (!ret)
333 continue; 333 continue;
334 } else 334 } else
335 ret = -ENOTEMPTY; 335 ret = -ENOTEMPTY;
336 336
@@ -931,7 +931,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
931 931
932 new_dentry = lookup_one_len(new_name, parent, strlen(new_name)); 932 new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
933 if (!IS_ERR(new_dentry)) { 933 if (!IS_ERR(new_dentry)) {
934 if (!new_dentry->d_inode) { 934 if (!new_dentry->d_inode) {
935 error = config_item_set_name(item, "%s", new_name); 935 error = config_item_set_name(item, "%s", new_name);
936 if (!error) { 936 if (!error) {
937 d_add(new_dentry, NULL); 937 d_add(new_dentry, NULL);
@@ -1009,8 +1009,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
1009 /* fallthrough */ 1009 /* fallthrough */
1010 default: 1010 default:
1011 if (filp->f_pos == 2) { 1011 if (filp->f_pos == 2) {
1012 list_del(q); 1012 list_move(q, &parent_sd->s_children);
1013 list_add(q, &parent_sd->s_children);
1014 } 1013 }
1015 for (p=q->next; p!= &parent_sd->s_children; p=p->next) { 1014 for (p=q->next; p!= &parent_sd->s_children; p=p->next) {
1016 struct configfs_dirent *next; 1015 struct configfs_dirent *next;
@@ -1033,8 +1032,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
1033 dt_type(next)) < 0) 1032 dt_type(next)) < 0)
1034 return 0; 1033 return 0;
1035 1034
1036 list_del(q); 1035 list_move(q, p);
1037 list_add(q, p);
1038 p = q; 1036 p = q;
1039 filp->f_pos++; 1037 filp->f_pos++;
1040 } 1038 }
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index c153bd9534cb..e14488ca6411 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -38,7 +38,7 @@
38 38
39extern struct super_block * configfs_sb; 39extern struct super_block * configfs_sb;
40 40
41static struct address_space_operations configfs_aops = { 41static const struct address_space_operations configfs_aops = {
42 .readpage = simple_readpage, 42 .readpage = simple_readpage,
43 .prepare_write = simple_prepare_write, 43 .prepare_write = simple_prepare_write,
44 .commit_write = simple_commit_write 44 .commit_write = simple_commit_write
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index f920d30478e5..3e5fe843e1df 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -103,10 +103,10 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
103 return 0; 103 return 0;
104} 104}
105 105
106static struct super_block *configfs_get_sb(struct file_system_type *fs_type, 106static int configfs_get_sb(struct file_system_type *fs_type,
107 int flags, const char *dev_name, void *data) 107 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
108{ 108{
109 return get_sb_single(fs_type, flags, data, configfs_fill_super); 109 return get_sb_single(fs_type, flags, data, configfs_fill_super, mnt);
110} 110}
111 111
112static struct file_system_type configfs_fs_type = { 112static struct file_system_type configfs_fs_type = {
@@ -118,7 +118,7 @@ static struct file_system_type configfs_fs_type = {
118 118
119int configfs_pin_fs(void) 119int configfs_pin_fs(void)
120{ 120{
121 return simple_pin_fs("configfs", &configfs_mount, 121 return simple_pin_fs(&configfs_fs_type, &configfs_mount,
122 &configfs_mnt_count); 122 &configfs_mnt_count);
123} 123}
124 124
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index e5512e295cf2..fb65e0800a86 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -66,7 +66,7 @@ static void fill_item_path(struct config_item * item, char * buffer, int length)
66} 66}
67 67
68static int create_link(struct config_item *parent_item, 68static int create_link(struct config_item *parent_item,
69 struct config_item *item, 69 struct config_item *item,
70 struct dentry *dentry) 70 struct dentry *dentry)
71{ 71{
72 struct configfs_dirent *target_sd = item->ci_dentry->d_fsdata; 72 struct configfs_dirent *target_sd = item->ci_dentry->d_fsdata;
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 9efcc3a164e8..223c0431042d 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -30,7 +30,7 @@
30static struct super_operations cramfs_ops; 30static struct super_operations cramfs_ops;
31static struct inode_operations cramfs_dir_inode_operations; 31static struct inode_operations cramfs_dir_inode_operations;
32static const struct file_operations cramfs_directory_operations; 32static const struct file_operations cramfs_directory_operations;
33static struct address_space_operations cramfs_aops; 33static const struct address_space_operations cramfs_aops;
34 34
35static DEFINE_MUTEX(read_mutex); 35static DEFINE_MUTEX(read_mutex);
36 36
@@ -181,9 +181,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
181 struct page *page = NULL; 181 struct page *page = NULL;
182 182
183 if (blocknr + i < devsize) { 183 if (blocknr + i < devsize) {
184 page = read_cache_page(mapping, blocknr + i, 184 page = read_mapping_page(mapping, blocknr + i, NULL);
185 (filler_t *)mapping->a_ops->readpage,
186 NULL);
187 /* synchronous error? */ 185 /* synchronous error? */
188 if (IS_ERR(page)) 186 if (IS_ERR(page))
189 page = NULL; 187 page = NULL;
@@ -322,8 +320,10 @@ out:
322 return -EINVAL; 320 return -EINVAL;
323} 321}
324 322
325static int cramfs_statfs(struct super_block *sb, struct kstatfs *buf) 323static int cramfs_statfs(struct dentry *dentry, struct kstatfs *buf)
326{ 324{
325 struct super_block *sb = dentry->d_sb;
326
327 buf->f_type = CRAMFS_MAGIC; 327 buf->f_type = CRAMFS_MAGIC;
328 buf->f_bsize = PAGE_CACHE_SIZE; 328 buf->f_bsize = PAGE_CACHE_SIZE;
329 buf->f_blocks = CRAMFS_SB(sb)->blocks; 329 buf->f_blocks = CRAMFS_SB(sb)->blocks;
@@ -501,7 +501,7 @@ static int cramfs_readpage(struct file *file, struct page * page)
501 return 0; 501 return 0;
502} 502}
503 503
504static struct address_space_operations cramfs_aops = { 504static const struct address_space_operations cramfs_aops = {
505 .readpage = cramfs_readpage 505 .readpage = cramfs_readpage
506}; 506};
507 507
@@ -528,10 +528,11 @@ static struct super_operations cramfs_ops = {
528 .statfs = cramfs_statfs, 528 .statfs = cramfs_statfs,
529}; 529};
530 530
531static struct super_block *cramfs_get_sb(struct file_system_type *fs_type, 531static int cramfs_get_sb(struct file_system_type *fs_type,
532 int flags, const char *dev_name, void *data) 532 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
533{ 533{
534 return get_sb_bdev(fs_type, flags, dev_name, data, cramfs_fill_super); 534 return get_sb_bdev(fs_type, flags, dev_name, data, cramfs_fill_super,
535 mnt);
535} 536}
536 537
537static struct file_system_type cramfs_fs_type = { 538static struct file_system_type cramfs_fs_type = {
diff --git a/fs/dcache.c b/fs/dcache.c
index 940d188e5d14..c6e3535be192 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -14,7 +14,6 @@
14 * the dcache entry is deleted or garbage collected. 14 * the dcache entry is deleted or garbage collected.
15 */ 15 */
16 16
17#include <linux/config.h>
18#include <linux/syscalls.h> 17#include <linux/syscalls.h>
19#include <linux/string.h> 18#include <linux/string.h>
20#include <linux/mm.h> 19#include <linux/mm.h>
@@ -359,12 +358,13 @@ restart:
359} 358}
360 359
361/* 360/*
362 * Throw away a dentry - free the inode, dput the parent. 361 * Throw away a dentry - free the inode, dput the parent. This requires that
363 * This requires that the LRU list has already been 362 * the LRU list has already been removed.
364 * removed. 363 *
365 * Called with dcache_lock, drops it and then regains. 364 * Called with dcache_lock, drops it and then regains.
365 * Called with dentry->d_lock held, drops it.
366 */ 366 */
367static inline void prune_one_dentry(struct dentry * dentry) 367static void prune_one_dentry(struct dentry * dentry)
368{ 368{
369 struct dentry * parent; 369 struct dentry * parent;
370 370
@@ -382,6 +382,8 @@ static inline void prune_one_dentry(struct dentry * dentry)
382/** 382/**
383 * prune_dcache - shrink the dcache 383 * prune_dcache - shrink the dcache
384 * @count: number of entries to try and free 384 * @count: number of entries to try and free
385 * @sb: if given, ignore dentries for other superblocks
386 * which are being unmounted.
385 * 387 *
386 * Shrink the dcache. This is done when we need 388 * Shrink the dcache. This is done when we need
387 * more memory, or simply when we need to unmount 389 * more memory, or simply when we need to unmount
@@ -392,16 +394,29 @@ static inline void prune_one_dentry(struct dentry * dentry)
392 * all the dentries are in use. 394 * all the dentries are in use.
393 */ 395 */
394 396
395static void prune_dcache(int count) 397static void prune_dcache(int count, struct super_block *sb)
396{ 398{
397 spin_lock(&dcache_lock); 399 spin_lock(&dcache_lock);
398 for (; count ; count--) { 400 for (; count ; count--) {
399 struct dentry *dentry; 401 struct dentry *dentry;
400 struct list_head *tmp; 402 struct list_head *tmp;
403 struct rw_semaphore *s_umount;
401 404
402 cond_resched_lock(&dcache_lock); 405 cond_resched_lock(&dcache_lock);
403 406
404 tmp = dentry_unused.prev; 407 tmp = dentry_unused.prev;
408 if (sb) {
409 /* Try to find a dentry for this sb, but don't try
410 * too hard, if they aren't near the tail they will
411 * be moved down again soon
412 */
413 int skip = count;
414 while (skip && tmp != &dentry_unused &&
415 list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
416 skip--;
417 tmp = tmp->prev;
418 }
419 }
405 if (tmp == &dentry_unused) 420 if (tmp == &dentry_unused)
406 break; 421 break;
407 list_del_init(tmp); 422 list_del_init(tmp);
@@ -427,7 +442,45 @@ static void prune_dcache(int count)
427 spin_unlock(&dentry->d_lock); 442 spin_unlock(&dentry->d_lock);
428 continue; 443 continue;
429 } 444 }
430 prune_one_dentry(dentry); 445 /*
446 * If the dentry is not DCACHED_REFERENCED, it is time
447 * to remove it from the dcache, provided the super block is
448 * NULL (which means we are trying to reclaim memory)
449 * or this dentry belongs to the same super block that
450 * we want to shrink.
451 */
452 /*
453 * If this dentry is for "my" filesystem, then I can prune it
454 * without taking the s_umount lock (I already hold it).
455 */
456 if (sb && dentry->d_sb == sb) {
457 prune_one_dentry(dentry);
458 continue;
459 }
460 /*
461 * ...otherwise we need to be sure this filesystem isn't being
462 * unmounted, otherwise we could race with
463 * generic_shutdown_super(), and end up holding a reference to
464 * an inode while the filesystem is unmounted.
465 * So we try to get s_umount, and make sure s_root isn't NULL.
466 * (Take a local copy of s_umount to avoid a use-after-free of
467 * `dentry').
468 */
469 s_umount = &dentry->d_sb->s_umount;
470 if (down_read_trylock(s_umount)) {
471 if (dentry->d_sb->s_root != NULL) {
472 prune_one_dentry(dentry);
473 up_read(s_umount);
474 continue;
475 }
476 up_read(s_umount);
477 }
478 spin_unlock(&dentry->d_lock);
479 /* Cannot remove the first dentry, and it isn't appropriate
480 * to move it to the head of the list, so give up, and try
481 * later
482 */
483 break;
431 } 484 }
432 spin_unlock(&dcache_lock); 485 spin_unlock(&dcache_lock);
433} 486}
@@ -468,8 +521,7 @@ void shrink_dcache_sb(struct super_block * sb)
468 dentry = list_entry(tmp, struct dentry, d_lru); 521 dentry = list_entry(tmp, struct dentry, d_lru);
469 if (dentry->d_sb != sb) 522 if (dentry->d_sb != sb)
470 continue; 523 continue;
471 list_del(tmp); 524 list_move(tmp, &dentry_unused);
472 list_add(tmp, &dentry_unused);
473 } 525 }
474 526
475 /* 527 /*
@@ -584,7 +636,7 @@ resume:
584 * of the unused list for prune_dcache 636 * of the unused list for prune_dcache
585 */ 637 */
586 if (!atomic_read(&dentry->d_count)) { 638 if (!atomic_read(&dentry->d_count)) {
587 list_add(&dentry->d_lru, dentry_unused.prev); 639 list_add_tail(&dentry->d_lru, &dentry_unused);
588 dentry_stat.nr_unused++; 640 dentry_stat.nr_unused++;
589 found++; 641 found++;
590 } 642 }
@@ -630,46 +682,7 @@ void shrink_dcache_parent(struct dentry * parent)
630 int found; 682 int found;
631 683
632 while ((found = select_parent(parent)) != 0) 684 while ((found = select_parent(parent)) != 0)
633 prune_dcache(found); 685 prune_dcache(found, parent->d_sb);
634}
635
636/**
637 * shrink_dcache_anon - further prune the cache
638 * @head: head of d_hash list of dentries to prune
639 *
640 * Prune the dentries that are anonymous
641 *
642 * parsing d_hash list does not hlist_for_each_entry_rcu() as it
643 * done under dcache_lock.
644 *
645 */
646void shrink_dcache_anon(struct hlist_head *head)
647{
648 struct hlist_node *lp;
649 int found;
650 do {
651 found = 0;
652 spin_lock(&dcache_lock);
653 hlist_for_each(lp, head) {
654 struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
655 if (!list_empty(&this->d_lru)) {
656 dentry_stat.nr_unused--;
657 list_del_init(&this->d_lru);
658 }
659
660 /*
661 * move only zero ref count dentries to the end
662 * of the unused list for prune_dcache
663 */
664 if (!atomic_read(&this->d_count)) {
665 list_add_tail(&this->d_lru, &dentry_unused);
666 dentry_stat.nr_unused++;
667 found++;
668 }
669 }
670 spin_unlock(&dcache_lock);
671 prune_dcache(found);
672 } while(found);
673} 686}
674 687
675/* 688/*
@@ -689,7 +702,7 @@ static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
689 if (nr) { 702 if (nr) {
690 if (!(gfp_mask & __GFP_FS)) 703 if (!(gfp_mask & __GFP_FS))
691 return -1; 704 return -1;
692 prune_dcache(nr); 705 prune_dcache(nr, NULL);
693 } 706 }
694 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 707 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
695} 708}
diff --git a/fs/dcookies.c b/fs/dcookies.c
index 8749339bf4f6..0c4b0674854b 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -12,7 +12,6 @@
12 * to the pair and can be looked up from userspace. 12 * to the pair and can be looked up from userspace.
13 */ 13 */
14 14
15#include <linux/config.h>
16#include <linux/syscalls.h> 15#include <linux/syscalls.h>
17#include <linux/module.h> 16#include <linux/module.h>
18#include <linux/slab.h> 17#include <linux/slab.h>
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 66a505422e5c..39640fd03458 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -13,7 +13,6 @@
13 * 13 *
14 */ 14 */
15 15
16#include <linux/config.h>
17#include <linux/module.h> 16#include <linux/module.h>
18#include <linux/fs.h> 17#include <linux/fs.h>
19#include <linux/pagemap.h> 18#include <linux/pagemap.h>
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b55b4ea9a676..e8ae3042b806 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -16,7 +16,6 @@
16/* uncomment to get debug messages from the debug filesystem, ah the irony. */ 16/* uncomment to get debug messages from the debug filesystem, ah the irony. */
17/* #define DEBUG */ 17/* #define DEBUG */
18 18
19#include <linux/config.h>
20#include <linux/module.h> 19#include <linux/module.h>
21#include <linux/fs.h> 20#include <linux/fs.h>
22#include <linux/mount.h> 21#include <linux/mount.h>
@@ -111,11 +110,11 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent)
111 return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); 110 return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
112} 111}
113 112
114static struct super_block *debug_get_sb(struct file_system_type *fs_type, 113static int debug_get_sb(struct file_system_type *fs_type,
115 int flags, const char *dev_name, 114 int flags, const char *dev_name,
116 void *data) 115 void *data, struct vfsmount *mnt)
117{ 116{
118 return get_sb_single(fs_type, flags, data, debug_fill_super); 117 return get_sb_single(fs_type, flags, data, debug_fill_super, mnt);
119} 118}
120 119
121static struct file_system_type debug_fs_type = { 120static struct file_system_type debug_fs_type = {
@@ -199,7 +198,7 @@ struct dentry *debugfs_create_file(const char *name, mode_t mode,
199 198
200 pr_debug("debugfs: creating file '%s'\n",name); 199 pr_debug("debugfs: creating file '%s'\n",name);
201 200
202 error = simple_pin_fs("debugfs", &debugfs_mount, &debugfs_mount_count); 201 error = simple_pin_fs(&debug_fs_type, &debugfs_mount, &debugfs_mount_count);
203 if (error) 202 if (error)
204 goto exit; 203 goto exit;
205 204
diff --git a/fs/devfs/Makefile b/fs/devfs/Makefile
deleted file mode 100644
index 6dd8d1245e2c..000000000000
--- a/fs/devfs/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
1#
2# Makefile for the linux devfs-filesystem routines.
3#
4
5obj-$(CONFIG_DEVFS_FS) += devfs.o
6
7devfs-objs := base.o util.o
8
diff --git a/fs/devfs/base.c b/fs/devfs/base.c
deleted file mode 100644
index 52f5059c4f31..000000000000
--- a/fs/devfs/base.c
+++ /dev/null
@@ -1,2836 +0,0 @@
1/* devfs (Device FileSystem) driver.
2
3 Copyright (C) 1998-2002 Richard Gooch
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public
7 License as published by the Free Software Foundation; either
8 version 2 of the License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with this library; if not, write to the Free
17 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
19 Richard Gooch may be reached by email at rgooch@atnf.csiro.au
20 The postal address is:
21 Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
22
23 ChangeLog
24
25 19980110 Richard Gooch <rgooch@atnf.csiro.au>
26 Original version.
27 v0.1
28 19980111 Richard Gooch <rgooch@atnf.csiro.au>
29 Created per-fs inode table rather than using inode->u.generic_ip
30 v0.2
31 19980111 Richard Gooch <rgooch@atnf.csiro.au>
32 Created .epoch inode which has a ctime of 0.
33 Fixed loss of named pipes when dentries lost.
34 Fixed loss of inode data when devfs_register() follows mknod().
35 v0.3
36 19980111 Richard Gooch <rgooch@atnf.csiro.au>
37 Fix for when compiling with CONFIG_KERNELD.
38 19980112 Richard Gooch <rgooch@atnf.csiro.au>
39 Fix for readdir() which sometimes didn't show entries.
40 Added <<tolerant>> option to <devfs_register>.
41 v0.4
42 19980113 Richard Gooch <rgooch@atnf.csiro.au>
43 Created <devfs_fill_file> function.
44 v0.5
45 19980115 Richard Gooch <rgooch@atnf.csiro.au>
46 Added subdirectory support. Major restructuring.
47 19980116 Richard Gooch <rgooch@atnf.csiro.au>
48 Fixed <find_by_dev> to not search major=0,minor=0.
49 Added symlink support.
50 v0.6
51 19980120 Richard Gooch <rgooch@atnf.csiro.au>
52 Created <devfs_mk_dir> function and support directory unregister
53 19980120 Richard Gooch <rgooch@atnf.csiro.au>
54 Auto-ownership uses real uid/gid rather than effective uid/gid.
55 v0.7
56 19980121 Richard Gooch <rgooch@atnf.csiro.au>
57 Supported creation of sockets.
58 v0.8
59 19980122 Richard Gooch <rgooch@atnf.csiro.au>
60 Added DEVFS_FL_HIDE_UNREG flag.
61 Interface change to <devfs_mk_symlink>.
62 Created <devfs_symlink> to support symlink(2).
63 v0.9
64 19980123 Richard Gooch <rgooch@atnf.csiro.au>
65 Added check to <devfs_fill_file> to check inode is in devfs.
66 Added optional traversal of symlinks.
67 v0.10
68 19980124 Richard Gooch <rgooch@atnf.csiro.au>
69 Created <devfs_get_flags> and <devfs_set_flags>.
70 v0.11
71 19980125 C. Scott Ananian <cananian@alumni.princeton.edu>
72 Created <devfs_find_handle>.
73 19980125 Richard Gooch <rgooch@atnf.csiro.au>
74 Allow removal of symlinks.
75 v0.12
76 19980125 Richard Gooch <rgooch@atnf.csiro.au>
77 Created <devfs_set_symlink_destination>.
78 19980126 Richard Gooch <rgooch@atnf.csiro.au>
79 Moved DEVFS_SUPER_MAGIC into header file.
80 Added DEVFS_FL_HIDE flag.
81 Created <devfs_get_maj_min>.
82 Created <devfs_get_handle_from_inode>.
83 Fixed minor bug in <find_by_dev>.
84 19980127 Richard Gooch <rgooch@atnf.csiro.au>
85 Changed interface to <find_by_dev>, <find_entry>,
86 <devfs_unregister>, <devfs_fill_file> and <devfs_find_handle>.
87 Fixed inode times when symlink created with symlink(2).
88 v0.13
89 19980129 C. Scott Ananian <cananian@alumni.princeton.edu>
90 Exported <devfs_set_symlink_destination>, <devfs_get_maj_min>
91 and <devfs_get_handle_from_inode>.
92 19980129 Richard Gooch <rgooch@atnf.csiro.au>
93 Created <devfs_unlink> to support unlink(2).
94 v0.14
95 19980129 Richard Gooch <rgooch@atnf.csiro.au>
96 Fixed kerneld support for entries in devfs subdirectories.
97 19980130 Richard Gooch <rgooch@atnf.csiro.au>
98 Bugfixes in <call_kerneld>.
99 v0.15
100 19980207 Richard Gooch <rgooch@atnf.csiro.au>
101 Call kerneld when looking up unregistered entries.
102 v0.16
103 19980326 Richard Gooch <rgooch@atnf.csiro.au>
104 Modified interface to <devfs_find_handle> for symlink traversal.
105 v0.17
106 19980331 Richard Gooch <rgooch@atnf.csiro.au>
107 Fixed persistence bug with device numbers for manually created
108 device files.
109 Fixed problem with recreating symlinks with different content.
110 v0.18
111 19980401 Richard Gooch <rgooch@atnf.csiro.au>
112 Changed to CONFIG_KMOD.
113 Hide entries which are manually unlinked.
114 Always invalidate devfs dentry cache when registering entries.
115 Created <devfs_rmdir> to support rmdir(2).
116 Ensure directories created by <devfs_mk_dir> are visible.
117 v0.19
118 19980402 Richard Gooch <rgooch@atnf.csiro.au>
119 Invalidate devfs dentry cache when making directories.
120 Invalidate devfs dentry cache when removing entries.
121 Fixed persistence bug with fifos.
122 v0.20
123 19980421 Richard Gooch <rgooch@atnf.csiro.au>
124 Print process command when debugging kerneld/kmod.
125 Added debugging for register/unregister/change operations.
126 19980422 Richard Gooch <rgooch@atnf.csiro.au>
127 Added "devfs=" boot options.
128 v0.21
129 19980426 Richard Gooch <rgooch@atnf.csiro.au>
130 No longer lock/unlock superblock in <devfs_put_super>.
131 Drop negative dentries when they are released.
132 Manage dcache more efficiently.
133 v0.22
134 19980427 Richard Gooch <rgooch@atnf.csiro.au>
135 Added DEVFS_FL_AUTO_DEVNUM flag.
136 v0.23
137 19980430 Richard Gooch <rgooch@atnf.csiro.au>
138 No longer set unnecessary methods.
139 v0.24
140 19980504 Richard Gooch <rgooch@atnf.csiro.au>
141 Added PID display to <call_kerneld> debugging message.
142 Added "after" debugging message to <call_kerneld>.
143 19980519 Richard Gooch <rgooch@atnf.csiro.au>
144 Added "diread" and "diwrite" boot options.
145 19980520 Richard Gooch <rgooch@atnf.csiro.au>
146 Fixed persistence problem with permissions.
147 v0.25
148 19980602 Richard Gooch <rgooch@atnf.csiro.au>
149 Support legacy device nodes.
150 Fixed bug where recreated inodes were hidden.
151 v0.26
152 19980602 Richard Gooch <rgooch@atnf.csiro.au>
153 Improved debugging in <get_vfs_inode>.
154 19980607 Richard Gooch <rgooch@atnf.csiro.au>
155 No longer free old dentries in <devfs_mk_dir>.
156 Free all dentries for a given entry when deleting inodes.
157 v0.27
158 19980627 Richard Gooch <rgooch@atnf.csiro.au>
159 Limit auto-device numbering to majors 128 to 239.
160 v0.28
161 19980629 Richard Gooch <rgooch@atnf.csiro.au>
162 Fixed inode times persistence problem.
163 v0.29
164 19980704 Richard Gooch <rgooch@atnf.csiro.au>
165 Fixed spelling in <devfs_readlink> debug.
166 Fixed bug in <devfs_setup> parsing "dilookup".
167 v0.30
168 19980705 Richard Gooch <rgooch@atnf.csiro.au>
169 Fixed devfs inode leak when manually recreating inodes.
170 Fixed permission persistence problem when recreating inodes.
171 v0.31
172 19980727 Richard Gooch <rgooch@atnf.csiro.au>
173 Removed harmless "unused variable" compiler warning.
174 Fixed modes for manually recreated device nodes.
175 v0.32
176 19980728 Richard Gooch <rgooch@atnf.csiro.au>
177 Added NULL devfs inode warning in <devfs_read_inode>.
178 Force all inode nlink values to 1.
179 v0.33
180 19980730 Richard Gooch <rgooch@atnf.csiro.au>
181 Added "dimknod" boot option.
182 Set inode nlink to 0 when freeing dentries.
183 Fixed modes for manually recreated symlinks.
184 v0.34
185 19980802 Richard Gooch <rgooch@atnf.csiro.au>
186 Fixed bugs in recreated directories and symlinks.
187 v0.35
188 19980806 Richard Gooch <rgooch@atnf.csiro.au>
189 Fixed bugs in recreated device nodes.
190 19980807 Richard Gooch <rgooch@atnf.csiro.au>
191 Fixed bug in currently unused <devfs_get_handle_from_inode>.
192 Defined new <devfs_handle_t> type.
193 Improved debugging when getting entries.
194 Fixed bug where directories could be emptied.
195 v0.36
196 19980809 Richard Gooch <rgooch@atnf.csiro.au>
197 Replaced dummy .epoch inode with .devfsd character device.
198 19980810 Richard Gooch <rgooch@atnf.csiro.au>
199 Implemented devfsd protocol revision 0.
200 v0.37
201 19980819 Richard Gooch <rgooch@atnf.csiro.au>
202 Added soothing message to warning in <devfs_d_iput>.
203 v0.38
204 19980829 Richard Gooch <rgooch@atnf.csiro.au>
205 Use GCC extensions for structure initialisations.
206 Implemented async open notification.
207 Incremented devfsd protocol revision to 1.
208 v0.39
209 19980908 Richard Gooch <rgooch@atnf.csiro.au>
210 Moved async open notification to end of <devfs_open>.
211 v0.40
212 19980910 Richard Gooch <rgooch@atnf.csiro.au>
213 Prepended "/dev/" to module load request.
214 Renamed <call_kerneld> to <call_kmod>.
215 v0.41
216 19980910 Richard Gooch <rgooch@atnf.csiro.au>
217 Fixed typo "AYSNC" -> "ASYNC".
218 v0.42
219 19980910 Richard Gooch <rgooch@atnf.csiro.au>
220 Added open flag for files.
221 v0.43
222 19980927 Richard Gooch <rgooch@atnf.csiro.au>
223 Set i_blocks=0 and i_blksize=1024 in <devfs_read_inode>.
224 v0.44
225 19981005 Richard Gooch <rgooch@atnf.csiro.au>
226 Added test for empty <<name>> in <devfs_find_handle>.
227 Renamed <generate_path> to <devfs_generate_path> and published.
228 v0.45
229 19981006 Richard Gooch <rgooch@atnf.csiro.au>
230 Created <devfs_get_fops>.
231 v0.46
232 19981007 Richard Gooch <rgooch@atnf.csiro.au>
233 Limit auto-device numbering to majors 144 to 239.
234 v0.47
235 19981010 Richard Gooch <rgooch@atnf.csiro.au>
236 Updated <devfs_follow_link> for VFS change in 2.1.125.
237 v0.48
238 19981022 Richard Gooch <rgooch@atnf.csiro.au>
239 Created DEVFS_ FL_COMPAT flag.
240 v0.49
241 19981023 Richard Gooch <rgooch@atnf.csiro.au>
242 Created "nocompat" boot option.
243 v0.50
244 19981025 Richard Gooch <rgooch@atnf.csiro.au>
245 Replaced "mount" boot option with "nomount".
246 v0.51
247 19981110 Richard Gooch <rgooch@atnf.csiro.au>
248 Created "only" boot option.
249 v0.52
250 19981112 Richard Gooch <rgooch@atnf.csiro.au>
251 Added DEVFS_FL_REMOVABLE flag.
252 v0.53
253 19981114 Richard Gooch <rgooch@atnf.csiro.au>
254 Only call <scan_dir_for_removable> on first call to
255 <devfs_readdir>.
256 v0.54
257 19981205 Richard Gooch <rgooch@atnf.csiro.au>
258 Updated <devfs_rmdir> for VFS change in 2.1.131.
259 v0.55
260 19981218 Richard Gooch <rgooch@atnf.csiro.au>
261 Created <devfs_mk_compat>.
262 19981220 Richard Gooch <rgooch@atnf.csiro.au>
263 Check for partitions on removable media in <devfs_lookup>.
264 v0.56
265 19990118 Richard Gooch <rgooch@atnf.csiro.au>
266 Added support for registering regular files.
267 Created <devfs_set_file_size>.
268 Update devfs inodes from entries if not changed through FS.
269 v0.57
270 19990124 Richard Gooch <rgooch@atnf.csiro.au>
271 Fixed <devfs_fill_file> to only initialise temporary inodes.
272 Trap for NULL fops in <devfs_register>.
273 Return -ENODEV in <devfs_fill_file> for non-driver inodes.
274 v0.58
275 19990126 Richard Gooch <rgooch@atnf.csiro.au>
276 Switched from PATH_MAX to DEVFS_PATHLEN.
277 v0.59
278 19990127 Richard Gooch <rgooch@atnf.csiro.au>
279 Created "nottycompat" boot option.
280 v0.60
281 19990318 Richard Gooch <rgooch@atnf.csiro.au>
282 Fixed <devfsd_read> to not overrun event buffer.
283 v0.61
284 19990329 Richard Gooch <rgooch@atnf.csiro.au>
285 Created <devfs_auto_unregister>.
286 v0.62
287 19990330 Richard Gooch <rgooch@atnf.csiro.au>
288 Don't return unregistred entries in <devfs_find_handle>.
289 Panic in <devfs_unregister> if entry unregistered.
290 19990401 Richard Gooch <rgooch@atnf.csiro.au>
291 Don't panic in <devfs_auto_unregister> for duplicates.
292 v0.63
293 19990402 Richard Gooch <rgooch@atnf.csiro.au>
294 Don't unregister already unregistered entries in <unregister>.
295 v0.64
296 19990510 Richard Gooch <rgooch@atnf.csiro.au>
297 Disable warning messages when unable to read partition table for
298 removable media.
299 v0.65
300 19990512 Richard Gooch <rgooch@atnf.csiro.au>
301 Updated <devfs_lookup> for VFS change in 2.3.1-pre1.
302 Created "oops-on-panic" boot option.
303 Improved debugging in <devfs_register> and <devfs_unregister>.
304 v0.66
305 19990519 Richard Gooch <rgooch@atnf.csiro.au>
306 Added documentation for some functions.
307 19990525 Richard Gooch <rgooch@atnf.csiro.au>
308 Removed "oops-on-panic" boot option: now always Oops.
309 v0.67
310 19990531 Richard Gooch <rgooch@atnf.csiro.au>
311 Improved debugging in <devfs_register>.
312 v0.68
313 19990604 Richard Gooch <rgooch@atnf.csiro.au>
314 Added "diunlink" and "nokmod" boot options.
315 Removed superfluous warning message in <devfs_d_iput>.
316 v0.69
317 19990611 Richard Gooch <rgooch@atnf.csiro.au>
318 Took account of change to <d_alloc_root>.
319 v0.70
320 19990614 Richard Gooch <rgooch@atnf.csiro.au>
321 Created separate event queue for each mounted devfs.
322 Removed <devfs_invalidate_dcache>.
323 Created new ioctl()s.
324 Incremented devfsd protocol revision to 3.
325 Fixed bug when re-creating directories: contents were lost.
326 Block access to inodes until devfsd updates permissions.
327 19990615 Richard Gooch <rgooch@atnf.csiro.au>
328 Support 2.2.x kernels.
329 v0.71
330 19990623 Richard Gooch <rgooch@atnf.csiro.au>
331 Switched to sending process uid/gid to devfsd.
332 Renamed <call_kmod> to <try_modload>.
333 Added DEVFSD_NOTIFY_LOOKUP event.
334 19990624 Richard Gooch <rgooch@atnf.csiro.au>
335 Added DEVFSD_NOTIFY_CHANGE event.
336 Incremented devfsd protocol revision to 4.
337 v0.72
338 19990713 Richard Gooch <rgooch@atnf.csiro.au>
339 Return EISDIR rather than EINVAL for read(2) on directories.
340 v0.73
341 19990809 Richard Gooch <rgooch@atnf.csiro.au>
342 Changed <devfs_setup> to new __init scheme.
343 v0.74
344 19990901 Richard Gooch <rgooch@atnf.csiro.au>
345 Changed remaining function declarations to new __init scheme.
346 v0.75
347 19991013 Richard Gooch <rgooch@atnf.csiro.au>
348 Created <devfs_get_info>, <devfs_set_info>,
349 <devfs_get_first_child> and <devfs_get_next_sibling>.
350 Added <<dir>> parameter to <devfs_register>, <devfs_mk_compat>,
351 <devfs_mk_dir> and <devfs_find_handle>.
352 Work sponsored by SGI.
353 v0.76
354 19991017 Richard Gooch <rgooch@atnf.csiro.au>
355 Allow multiple unregistrations.
356 Work sponsored by SGI.
357 v0.77
358 19991026 Richard Gooch <rgooch@atnf.csiro.au>
359 Added major and minor number to devfsd protocol.
360 Incremented devfsd protocol revision to 5.
361 Work sponsored by SGI.
362 v0.78
363 19991030 Richard Gooch <rgooch@atnf.csiro.au>
364 Support info pointer for all devfs entry types.
365 Added <<info>> parameter to <devfs_mk_dir> and
366 <devfs_mk_symlink>.
367 Work sponsored by SGI.
368 v0.79
369 19991031 Richard Gooch <rgooch@atnf.csiro.au>
370 Support "../" when searching devfs namespace.
371 Work sponsored by SGI.
372 v0.80
373 19991101 Richard Gooch <rgooch@atnf.csiro.au>
374 Created <devfs_get_unregister_slave>.
375 Work sponsored by SGI.
376 v0.81
377 19991103 Richard Gooch <rgooch@atnf.csiro.au>
378 Exported <devfs_get_parent>.
379 Work sponsored by SGI.
380 v0.82
381 19991104 Richard Gooch <rgooch@atnf.csiro.au>
382 Removed unused <devfs_set_symlink_destination>.
383 19991105 Richard Gooch <rgooch@atnf.csiro.au>
384 Do not hide entries from devfsd or children.
385 Removed DEVFS_ FL_TTY_COMPAT flag.
386 Removed "nottycompat" boot option.
387 Removed <devfs_mk_compat>.
388 Work sponsored by SGI.
389 v0.83
390 19991107 Richard Gooch <rgooch@atnf.csiro.au>
391 Added DEVFS_FL_WAIT flag.
392 Work sponsored by SGI.
393 v0.84
394 19991107 Richard Gooch <rgooch@atnf.csiro.au>
395 Support new "disc" naming scheme in <get_removable_partition>.
396 Allow NULL fops in <devfs_register>.
397 Work sponsored by SGI.
398 v0.85
399 19991110 Richard Gooch <rgooch@atnf.csiro.au>
400 Fall back to major table if NULL fops given to <devfs_register>.
401 Work sponsored by SGI.
402 v0.86
403 19991204 Richard Gooch <rgooch@atnf.csiro.au>
404 Support fifos when unregistering.
405 Work sponsored by SGI.
406 v0.87
407 19991209 Richard Gooch <rgooch@atnf.csiro.au>
408 Removed obsolete DEVFS_ FL_COMPAT and DEVFS_ FL_TOLERANT flags.
409 Work sponsored by SGI.
410 v0.88
411 19991214 Richard Gooch <rgooch@atnf.csiro.au>
412 Removed kmod support.
413 Work sponsored by SGI.
414 v0.89
415 19991216 Richard Gooch <rgooch@atnf.csiro.au>
416 Improved debugging in <get_vfs_inode>.
417 Ensure dentries created by devfsd will be cleaned up.
418 Work sponsored by SGI.
419 v0.90
420 19991223 Richard Gooch <rgooch@atnf.csiro.au>
421 Created <devfs_get_name>.
422 Work sponsored by SGI.
423 v0.91
424 20000203 Richard Gooch <rgooch@atnf.csiro.au>
425 Ported to kernel 2.3.42.
426 Removed <devfs_fill_file>.
427 Work sponsored by SGI.
428 v0.92
429 20000306 Richard Gooch <rgooch@atnf.csiro.au>
430 Added DEVFS_ FL_NO_PERSISTENCE flag.
431 Removed unnecessary call to <update_devfs_inode_from_entry> in
432 <devfs_readdir>.
433 Work sponsored by SGI.
434 v0.93
435 20000413 Richard Gooch <rgooch@atnf.csiro.au>
436 Set inode->i_size to correct size for symlinks.
437 20000414 Richard Gooch <rgooch@atnf.csiro.au>
438 Only give lookup() method to directories to comply with new VFS
439 assumptions.
440 Work sponsored by SGI.
441 20000415 Richard Gooch <rgooch@atnf.csiro.au>
442 Remove unnecessary tests in symlink methods.
443 Don't kill existing block ops in <devfs_read_inode>.
444 Work sponsored by SGI.
445 v0.94
446 20000424 Richard Gooch <rgooch@atnf.csiro.au>
447 Don't create missing directories in <devfs_find_handle>.
448 Work sponsored by SGI.
449 v0.95
450 20000430 Richard Gooch <rgooch@atnf.csiro.au>
451 Added CONFIG_DEVFS_MOUNT.
452 Work sponsored by SGI.
453 v0.96
454 20000608 Richard Gooch <rgooch@atnf.csiro.au>
455 Disabled multi-mount capability (use VFS bindings instead).
456 Work sponsored by SGI.
457 v0.97
458 20000610 Richard Gooch <rgooch@atnf.csiro.au>
459 Switched to FS_SINGLE to disable multi-mounts.
460 20000612 Richard Gooch <rgooch@atnf.csiro.au>
461 Removed module support.
462 Removed multi-mount code.
463 Removed compatibility macros: VFS has changed too much.
464 Work sponsored by SGI.
465 v0.98
466 20000614 Richard Gooch <rgooch@atnf.csiro.au>
467 Merged devfs inode into devfs entry.
468 Work sponsored by SGI.
469 v0.99
470 20000619 Richard Gooch <rgooch@atnf.csiro.au>
471 Removed dead code in <devfs_register> which used to call
472 <free_dentries>.
473 Work sponsored by SGI.
474 v0.100
475 20000621 Richard Gooch <rgooch@atnf.csiro.au>
476 Changed interface to <devfs_register>.
477 Work sponsored by SGI.
478 v0.101
479 20000622 Richard Gooch <rgooch@atnf.csiro.au>
480 Simplified interface to <devfs_mk_symlink> and <devfs_mk_dir>.
481 Simplified interface to <devfs_find_handle>.
482 Work sponsored by SGI.
483 v0.102
484 20010519 Richard Gooch <rgooch@atnf.csiro.au>
485 Ensure <devfs_generate_path> terminates string for root entry.
486 Exported <devfs_get_name> to modules.
487 20010520 Richard Gooch <rgooch@atnf.csiro.au>
488 Make <devfs_mk_symlink> send events to devfsd.
489 Cleaned up option processing in <devfs_setup>.
490 20010521 Richard Gooch <rgooch@atnf.csiro.au>
491 Fixed bugs in handling symlinks: could leak or cause Oops.
492 20010522 Richard Gooch <rgooch@atnf.csiro.au>
493 Cleaned up directory handling by separating fops.
494 v0.103
495 20010601 Richard Gooch <rgooch@atnf.csiro.au>
496 Fixed handling of inverted options in <devfs_setup>.
497 v0.104
498 20010604 Richard Gooch <rgooch@atnf.csiro.au>
499 Adjusted <try_modload> to account for <devfs_generate_path> fix.
500 v0.105
501 20010617 Richard Gooch <rgooch@atnf.csiro.au>
502 Answered question posed by Al Viro and removed his comments.
503 Moved setting of registered flag after other fields are changed.
504 Fixed race between <devfsd_close> and <devfsd_notify_one>.
505 Global VFS changes added bogus BKL to <devfsd_close>: removed.
506 Widened locking in <devfs_readlink> and <devfs_follow_link>.
507 Replaced <devfsd_read> stack usage with <devfsd_ioctl> kmalloc.
508 Simplified locking in <devfsd_ioctl> and fixed memory leak.
509 v0.106
510 20010709 Richard Gooch <rgooch@atnf.csiro.au>
511 Removed broken devnum allocation and use <devfs_alloc_devnum>.
512 Fixed old devnum leak by calling new <devfs_dealloc_devnum>.
513 v0.107
514 20010712 Richard Gooch <rgooch@atnf.csiro.au>
515 Fixed bug in <devfs_setup> which could hang boot process.
516 v0.108
517 20010730 Richard Gooch <rgooch@atnf.csiro.au>
518 Added DEVFSD_NOTIFY_DELETE event.
519 20010801 Richard Gooch <rgooch@atnf.csiro.au>
520 Removed #include <asm/segment.h>.
521 v0.109
522 20010807 Richard Gooch <rgooch@atnf.csiro.au>
523 Fixed inode table races by removing it and using
524 inode->u.generic_ip instead.
525 Moved <devfs_read_inode> into <get_vfs_inode>.
526 Moved <devfs_write_inode> into <devfs_notify_change>.
527 v0.110
528 20010808 Richard Gooch <rgooch@atnf.csiro.au>
529 Fixed race in <devfs_do_symlink> for uni-processor.
530 v0.111
531 20010818 Richard Gooch <rgooch@atnf.csiro.au>
532 Removed remnant of multi-mount support in <devfs_mknod>.
533 Removed unused DEVFS_FL_SHOW_UNREG flag.
534 v0.112
535 20010820 Richard Gooch <rgooch@atnf.csiro.au>
536 Removed nlink field from struct devfs_inode.
537 v0.113
538 20010823 Richard Gooch <rgooch@atnf.csiro.au>
539 Replaced BKL with global rwsem to protect symlink data (quick
540 and dirty hack).
541 v0.114
542 20010827 Richard Gooch <rgooch@atnf.csiro.au>
543 Replaced global rwsem for symlink with per-link refcount.
544 v0.115
545 20010919 Richard Gooch <rgooch@atnf.csiro.au>
546 Set inode->i_mapping->a_ops for block nodes in <get_vfs_inode>.
547 v0.116
548 20011008 Richard Gooch <rgooch@atnf.csiro.au>
549 Fixed overrun in <devfs_link> by removing function (not needed).
550 20011009 Richard Gooch <rgooch@atnf.csiro.au>
551 Fixed buffer underrun in <try_modload>.
552 20011029 Richard Gooch <rgooch@atnf.csiro.au>
553 Fixed race in <devfsd_ioctl> when setting event mask.
554 20011114 Richard Gooch <rgooch@atnf.csiro.au>
555 First release of new locking code.
556 v1.0
557 20011117 Richard Gooch <rgooch@atnf.csiro.au>
558 Discard temporary buffer, now use "%s" for dentry names.
559 20011118 Richard Gooch <rgooch@atnf.csiro.au>
560 Don't generate path in <try_modload>: use fake entry instead.
561 Use "existing" directory in <_devfs_make_parent_for_leaf>.
562 20011122 Richard Gooch <rgooch@atnf.csiro.au>
563 Use slab cache rather than fixed buffer for devfsd events.
564 v1.1
565 20011125 Richard Gooch <rgooch@atnf.csiro.au>
566 Send DEVFSD_NOTIFY_REGISTERED events in <devfs_mk_dir>.
567 20011127 Richard Gooch <rgooch@atnf.csiro.au>
568 Fixed locking bug in <devfs_d_revalidate_wait> due to typo.
569 Do not send CREATE, CHANGE, ASYNC_OPEN or DELETE events from
570 devfsd or children.
571 v1.2
572 20011202 Richard Gooch <rgooch@atnf.csiro.au>
573 Fixed bug in <devfsd_read>: was dereferencing freed pointer.
574 v1.3
575 20011203 Richard Gooch <rgooch@atnf.csiro.au>
576 Fixed bug in <devfsd_close>: was dereferencing freed pointer.
577 Added process group check for devfsd privileges.
578 v1.4
579 20011204 Richard Gooch <rgooch@atnf.csiro.au>
580 Use SLAB_ATOMIC in <devfsd_notify_de> from <devfs_d_delete>.
581 v1.5
582 20011211 Richard Gooch <rgooch@atnf.csiro.au>
583 Return old entry in <devfs_mk_dir> for 2.4.x kernels.
584 20011212 Richard Gooch <rgooch@atnf.csiro.au>
585 Increment refcount on module in <check_disc_changed>.
586 20011215 Richard Gooch <rgooch@atnf.csiro.au>
587 Created <devfs_get_handle> and exported <devfs_put>.
588 Increment refcount on module in <devfs_get_ops>.
589 Created <devfs_put_ops>.
590 v1.6
591 20011216 Richard Gooch <rgooch@atnf.csiro.au>
592 Added poisoning to <devfs_put>.
593 Improved debugging messages.
594 v1.7
595 20011221 Richard Gooch <rgooch@atnf.csiro.au>
596 Corrected (made useful) debugging message in <unregister>.
597 Moved <kmem_cache_create> in <mount_devfs_fs> to <init_devfs_fs>
598 20011224 Richard Gooch <rgooch@atnf.csiro.au>
599 Added magic number to guard against scribbling drivers.
600 20011226 Richard Gooch <rgooch@atnf.csiro.au>
601 Only return old entry in <devfs_mk_dir> if a directory.
602 Defined macros for error and debug messages.
603 v1.8
604 20020113 Richard Gooch <rgooch@atnf.csiro.au>
605 Fixed (rare, old) race in <devfs_lookup>.
606 v1.9
607 20020120 Richard Gooch <rgooch@atnf.csiro.au>
608 Fixed deadlock bug in <devfs_d_revalidate_wait>.
609 Tag VFS deletable in <devfs_mk_symlink> if handle ignored.
610 v1.10
611 20020129 Richard Gooch <rgooch@atnf.csiro.au>
612 Added KERN_* to remaining messages.
613 Cleaned up declaration of <stat_read>.
614 v1.11
615 20020219 Richard Gooch <rgooch@atnf.csiro.au>
616 Changed <devfs_rmdir> to allow later additions if not yet empty.
617 v1.12
618 20020406 Richard Gooch <rgooch@atnf.csiro.au>
619 Removed silently introduced calls to lock_kernel() and
620 unlock_kernel() due to recent VFS locking changes. BKL isn't
621 required in devfs.
622 v1.13
623 20020428 Richard Gooch <rgooch@atnf.csiro.au>
624 Removed 2.4.x compatibility code.
625 v1.14
626 20020510 Richard Gooch <rgooch@atnf.csiro.au>
627 Added BKL to <devfs_open> because drivers still need it.
628 v1.15
629 20020512 Richard Gooch <rgooch@atnf.csiro.au>
630 Protected <scan_dir_for_removable> and <get_removable_partition>
631 from changing directory contents.
632 v1.16
633 20020514 Richard Gooch <rgooch@atnf.csiro.au>
634 Minor cleanup of <scan_dir_for_removable>.
635 v1.17
636 20020721 Richard Gooch <rgooch@atnf.csiro.au>
637 Switched to ISO C structure field initialisers.
638 Switch to set_current_state() and move before add_wait_queue().
639 20020722 Richard Gooch <rgooch@atnf.csiro.au>
640 Fixed devfs entry leak in <devfs_readdir> when *readdir fails.
641 v1.18
642 20020725 Richard Gooch <rgooch@atnf.csiro.au>
643 Created <devfs_find_and_unregister>.
644 v1.19
645 20020728 Richard Gooch <rgooch@atnf.csiro.au>
646 Removed deprecated <devfs_find_handle>.
647 v1.20
648 20020820 Richard Gooch <rgooch@atnf.csiro.au>
649 Fixed module unload race in <devfs_open>.
650 v1.21
651 20021013 Richard Gooch <rgooch@atnf.csiro.au>
652 Removed DEVFS_ FL_AUTO_OWNER.
653 Switched lingering structure field initialiser to ISO C.
654 Added locking when updating FCB flags.
655 v1.22
656*/
657#include <linux/types.h>
658#include <linux/errno.h>
659#include <linux/time.h>
660#include <linux/tty.h>
661#include <linux/timer.h>
662#include <linux/config.h>
663#include <linux/kernel.h>
664#include <linux/wait.h>
665#include <linux/string.h>
666#include <linux/slab.h>
667#include <linux/ioport.h>
668#include <linux/delay.h>
669#include <linux/ctype.h>
670#include <linux/mm.h>
671#include <linux/module.h>
672#include <linux/init.h>
673#include <linux/devfs_fs.h>
674#include <linux/devfs_fs_kernel.h>
675#include <linux/smp_lock.h>
676#include <linux/smp.h>
677#include <linux/rwsem.h>
678#include <linux/sched.h>
679#include <linux/namei.h>
680#include <linux/bitops.h>
681
682#include <asm/uaccess.h>
683#include <asm/io.h>
684#include <asm/processor.h>
685#include <asm/system.h>
686#include <asm/pgtable.h>
687#include <asm/atomic.h>
688
689#define DEVFS_VERSION "2004-01-31"
690
691#define DEVFS_NAME "devfs"
692
693#define FIRST_INODE 1
694
695#define STRING_LENGTH 256
696#define FAKE_BLOCK_SIZE 1024
697#define POISON_PTR ( *(void **) poison_array )
698#define MAGIC_VALUE 0x327db823
699
700#ifndef TRUE
701# define TRUE 1
702# define FALSE 0
703#endif
704
705#define MODE_DIR (S_IFDIR | S_IWUSR | S_IRUGO | S_IXUGO)
706
707#define DEBUG_NONE 0x0000000
708#define DEBUG_MODULE_LOAD 0x0000001
709#define DEBUG_REGISTER 0x0000002
710#define DEBUG_UNREGISTER 0x0000004
711#define DEBUG_FREE 0x0000008
712#define DEBUG_SET_FLAGS 0x0000010
713#define DEBUG_S_READ 0x0000100 /* Break */
714#define DEBUG_I_LOOKUP 0x0001000 /* Break */
715#define DEBUG_I_CREATE 0x0002000
716#define DEBUG_I_GET 0x0004000
717#define DEBUG_I_CHANGE 0x0008000
718#define DEBUG_I_UNLINK 0x0010000
719#define DEBUG_I_RLINK 0x0020000
720#define DEBUG_I_FLINK 0x0040000
721#define DEBUG_I_MKNOD 0x0080000
722#define DEBUG_F_READDIR 0x0100000 /* Break */
723#define DEBUG_D_DELETE 0x1000000 /* Break */
724#define DEBUG_D_RELEASE 0x2000000
725#define DEBUG_D_IPUT 0x4000000
726#define DEBUG_ALL 0xfffffff
727#define DEBUG_DISABLED DEBUG_NONE
728
729#define OPTION_NONE 0x00
730#define OPTION_MOUNT 0x01
731
732#define PRINTK(format, args...) \
733 {printk (KERN_ERR "%s" format, __FUNCTION__ , ## args);}
734
735#define OOPS(format, args...) \
736 {printk (KERN_CRIT "%s" format, __FUNCTION__ , ## args); \
737 printk ("Forcing Oops\n"); \
738 BUG();}
739
740#ifdef CONFIG_DEVFS_DEBUG
741# define VERIFY_ENTRY(de) \
742 {if ((de) && (de)->magic_number != MAGIC_VALUE) \
743 OOPS ("(%p): bad magic value: %x\n", (de), (de)->magic_number);}
744# define WRITE_ENTRY_MAGIC(de,magic) (de)->magic_number = (magic)
745# define DPRINTK(flag, format, args...) \
746 {if (devfs_debug & flag) \
747 printk (KERN_INFO "%s" format, __FUNCTION__ , ## args);}
748#else
749# define VERIFY_ENTRY(de)
750# define WRITE_ENTRY_MAGIC(de,magic)
751# define DPRINTK(flag, format, args...)
752#endif
753
754typedef struct devfs_entry *devfs_handle_t;
755
756struct directory_type {
757 rwlock_t lock; /* Lock for searching(R)/updating(W) */
758 struct devfs_entry *first;
759 struct devfs_entry *last;
760 unsigned char no_more_additions:1;
761};
762
763struct symlink_type {
764 unsigned int length; /* Not including the NULL-termimator */
765 char *linkname; /* This is NULL-terminated */
766};
767
768struct devfs_inode { /* This structure is for "persistent" inode storage */
769 struct dentry *dentry;
770 struct timespec atime;
771 struct timespec mtime;
772 struct timespec ctime;
773 unsigned int ino; /* Inode number as seen in the VFS */
774 uid_t uid;
775 gid_t gid;
776};
777
778struct devfs_entry {
779#ifdef CONFIG_DEVFS_DEBUG
780 unsigned int magic_number;
781#endif
782 void *info;
783 atomic_t refcount; /* When this drops to zero, it's unused */
784 union {
785 struct directory_type dir;
786 dev_t dev;
787 struct symlink_type symlink;
788 const char *name; /* Only used for (mode == 0) */
789 } u;
790 struct devfs_entry *prev; /* Previous entry in the parent directory */
791 struct devfs_entry *next; /* Next entry in the parent directory */
792 struct devfs_entry *parent; /* The parent directory */
793 struct devfs_inode inode;
794 umode_t mode;
795 unsigned short namelen; /* I think 64k+ filenames are a way off... */
796 unsigned char vfs:1; /* Whether the VFS may delete the entry */
797 char name[1]; /* This is just a dummy: the allocated array
798 is bigger. This is NULL-terminated */
799};
800
801/* The root of the device tree */
802static struct devfs_entry *root_entry;
803
804struct devfsd_buf_entry {
805 struct devfs_entry *de; /* The name is generated with this */
806 unsigned short type; /* The type of event */
807 umode_t mode;
808 uid_t uid;
809 gid_t gid;
810 struct devfsd_buf_entry *next;
811};
812
813struct fs_info { /* This structure is for the mounted devfs */
814 struct super_block *sb;
815 spinlock_t devfsd_buffer_lock; /* Lock when inserting/deleting events */
816 struct devfsd_buf_entry *devfsd_first_event;
817 struct devfsd_buf_entry *devfsd_last_event;
818 volatile int devfsd_sleeping;
819 volatile struct task_struct *devfsd_task;
820 volatile pid_t devfsd_pgrp;
821 volatile struct file *devfsd_file;
822 struct devfsd_notify_struct *devfsd_info;
823 volatile unsigned long devfsd_event_mask;
824 atomic_t devfsd_overrun_count;
825 wait_queue_head_t devfsd_wait_queue; /* Wake devfsd on input */
826 wait_queue_head_t revalidate_wait_queue; /* Wake when devfsd sleeps */
827};
828
829static struct fs_info fs_info = {.devfsd_buffer_lock = SPIN_LOCK_UNLOCKED };
830static kmem_cache_t *devfsd_buf_cache;
831#ifdef CONFIG_DEVFS_DEBUG
832static unsigned int devfs_debug_init __initdata = DEBUG_NONE;
833static unsigned int devfs_debug = DEBUG_NONE;
834static DEFINE_SPINLOCK(stat_lock);
835static unsigned int stat_num_entries;
836static unsigned int stat_num_bytes;
837#endif
838static unsigned char poison_array[8] =
839 { 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a };
840
841#ifdef CONFIG_DEVFS_MOUNT
842static unsigned int boot_options = OPTION_MOUNT;
843#else
844static unsigned int boot_options = OPTION_NONE;
845#endif
846
847/* Forward function declarations */
848static devfs_handle_t _devfs_walk_path(struct devfs_entry *dir,
849 const char *name, int namelen,
850 int traverse_symlink);
851static ssize_t devfsd_read(struct file *file, char __user *buf, size_t len,
852 loff_t * ppos);
853static int devfsd_ioctl(struct inode *inode, struct file *file,
854 unsigned int cmd, unsigned long arg);
855static int devfsd_close(struct inode *inode, struct file *file);
856#ifdef CONFIG_DEVFS_DEBUG
857static ssize_t stat_read(struct file *file, char __user *buf, size_t len,
858 loff_t * ppos);
859static const struct file_operations stat_fops = {
860 .open = nonseekable_open,
861 .read = stat_read,
862};
863#endif
864
865/* Devfs daemon file operations */
866static const struct file_operations devfsd_fops = {
867 .open = nonseekable_open,
868 .read = devfsd_read,
869 .ioctl = devfsd_ioctl,
870 .release = devfsd_close,
871};
872
873/* Support functions follow */
874
875/**
876 * devfs_get - Get a reference to a devfs entry.
877 * @de: The devfs entry.
878 */
879
880static struct devfs_entry *devfs_get(struct devfs_entry *de)
881{
882 VERIFY_ENTRY(de);
883 if (de)
884 atomic_inc(&de->refcount);
885 return de;
886} /* End Function devfs_get */
887
888/**
889 * devfs_put - Put (release) a reference to a devfs entry.
890 * @de: The handle to the devfs entry.
891 */
892
893static void devfs_put(devfs_handle_t de)
894{
895 if (!de)
896 return;
897 VERIFY_ENTRY(de);
898 if (de->info == POISON_PTR)
899 OOPS("(%p): poisoned pointer\n", de);
900 if (!atomic_dec_and_test(&de->refcount))
901 return;
902 if (de == root_entry)
903 OOPS("(%p): root entry being freed\n", de);
904 DPRINTK(DEBUG_FREE, "(%s): de: %p, parent: %p \"%s\"\n",
905 de->name, de, de->parent,
906 de->parent ? de->parent->name : "no parent");
907 if (S_ISLNK(de->mode))
908 kfree(de->u.symlink.linkname);
909 WRITE_ENTRY_MAGIC(de, 0);
910#ifdef CONFIG_DEVFS_DEBUG
911 spin_lock(&stat_lock);
912 --stat_num_entries;
913 stat_num_bytes -= sizeof *de + de->namelen;
914 if (S_ISLNK(de->mode))
915 stat_num_bytes -= de->u.symlink.length + 1;
916 spin_unlock(&stat_lock);
917#endif
918 de->info = POISON_PTR;
919 kfree(de);
920} /* End Function devfs_put */
921
922/**
923 * _devfs_search_dir - Search for a devfs entry in a directory.
924 * @dir: The directory to search.
925 * @name: The name of the entry to search for.
926 * @namelen: The number of characters in @name.
927 *
928 * Search for a devfs entry in a directory and returns a pointer to the entry
929 * on success, else %NULL. The directory must be locked already.
930 * An implicit devfs_get() is performed on the returned entry.
931 */
932
933static struct devfs_entry *_devfs_search_dir(struct devfs_entry *dir,
934 const char *name,
935 unsigned int namelen)
936{
937 struct devfs_entry *curr;
938
939 if (!S_ISDIR(dir->mode)) {
940 PRINTK("(%s): not a directory\n", dir->name);
941 return NULL;
942 }
943 for (curr = dir->u.dir.first; curr != NULL; curr = curr->next) {
944 if (curr->namelen != namelen)
945 continue;
946 if (memcmp(curr->name, name, namelen) == 0)
947 break;
948 /* Not found: try the next one */
949 }
950 return devfs_get(curr);
951} /* End Function _devfs_search_dir */
952
953/**
954 * _devfs_alloc_entry - Allocate a devfs entry.
955 * @name: the name of the entry
956 * @namelen: the number of characters in @name
957 * @mode: the mode for the entry
958 *
959 * Allocate a devfs entry and returns a pointer to the entry on success, else
960 * %NULL.
961 */
962
963static struct devfs_entry *_devfs_alloc_entry(const char *name,
964 unsigned int namelen,
965 umode_t mode)
966{
967 struct devfs_entry *new;
968 static unsigned long inode_counter = FIRST_INODE;
969 static DEFINE_SPINLOCK(counter_lock);
970
971 if (name && (namelen < 1))
972 namelen = strlen(name);
973 if ((new = kmalloc(sizeof *new + namelen, GFP_KERNEL)) == NULL)
974 return NULL;
975 memset(new, 0, sizeof *new + namelen); /* Will set '\0' on name */
976 new->mode = mode;
977 if (S_ISDIR(mode))
978 rwlock_init(&new->u.dir.lock);
979 atomic_set(&new->refcount, 1);
980 spin_lock(&counter_lock);
981 new->inode.ino = inode_counter++;
982 spin_unlock(&counter_lock);
983 if (name)
984 memcpy(new->name, name, namelen);
985 new->namelen = namelen;
986 WRITE_ENTRY_MAGIC(new, MAGIC_VALUE);
987#ifdef CONFIG_DEVFS_DEBUG
988 spin_lock(&stat_lock);
989 ++stat_num_entries;
990 stat_num_bytes += sizeof *new + namelen;
991 spin_unlock(&stat_lock);
992#endif
993 return new;
994} /* End Function _devfs_alloc_entry */
995
996/**
997 * _devfs_append_entry - Append a devfs entry to a directory's child list.
998 * @dir: The directory to add to.
999 * @de: The devfs entry to append.
1000 * @old_de: If an existing entry exists, it will be written here. This may
1001 * be %NULL. An implicit devfs_get() is performed on this entry.
1002 *
1003 * Append a devfs entry to a directory's list of children, checking first to
1004 * see if an entry of the same name exists. The directory will be locked.
1005 * The value 0 is returned on success, else a negative error code.
1006 * On failure, an implicit devfs_put() is performed on %de.
1007 */
1008
1009static int _devfs_append_entry(devfs_handle_t dir, devfs_handle_t de,
1010 devfs_handle_t * old_de)
1011{
1012 int retval;
1013
1014 if (old_de)
1015 *old_de = NULL;
1016 if (!S_ISDIR(dir->mode)) {
1017 PRINTK("(%s): dir: \"%s\" is not a directory\n", de->name,
1018 dir->name);
1019 devfs_put(de);
1020 return -ENOTDIR;
1021 }
1022 write_lock(&dir->u.dir.lock);
1023 if (dir->u.dir.no_more_additions)
1024 retval = -ENOENT;
1025 else {
1026 struct devfs_entry *old;
1027
1028 old = _devfs_search_dir(dir, de->name, de->namelen);
1029 if (old_de)
1030 *old_de = old;
1031 else
1032 devfs_put(old);
1033 if (old == NULL) {
1034 de->parent = dir;
1035 de->prev = dir->u.dir.last;
1036 /* Append to the directory's list of children */
1037 if (dir->u.dir.first == NULL)
1038 dir->u.dir.first = de;
1039 else
1040 dir->u.dir.last->next = de;
1041 dir->u.dir.last = de;
1042 retval = 0;
1043 } else
1044 retval = -EEXIST;
1045 }
1046 write_unlock(&dir->u.dir.lock);
1047 if (retval)
1048 devfs_put(de);
1049 return retval;
1050} /* End Function _devfs_append_entry */
1051
1052/**
1053 * _devfs_get_root_entry - Get the root devfs entry.
1054 *
1055 * Returns the root devfs entry on success, else %NULL.
1056 *
1057 * TODO it must be called asynchronously due to the fact
1058 * that devfs is initialized relatively late. Proper way
1059 * is to remove module_init from init_devfs_fs and manually
1060 * call it early enough during system init
1061 */
1062
1063static struct devfs_entry *_devfs_get_root_entry(void)
1064{
1065 struct devfs_entry *new;
1066 static DEFINE_SPINLOCK(root_lock);
1067
1068 if (root_entry)
1069 return root_entry;
1070
1071 new = _devfs_alloc_entry(NULL, 0, MODE_DIR);
1072 if (new == NULL)
1073 return NULL;
1074
1075 spin_lock(&root_lock);
1076 if (root_entry) {
1077 spin_unlock(&root_lock);
1078 devfs_put(new);
1079 return root_entry;
1080 }
1081 root_entry = new;
1082 spin_unlock(&root_lock);
1083
1084 return root_entry;
1085} /* End Function _devfs_get_root_entry */
1086
1087/**
1088 * _devfs_descend - Descend down a tree using the next component name.
1089 * @dir: The directory to search.
1090 * @name: The component name to search for.
1091 * @namelen: The length of %name.
1092 * @next_pos: The position of the next '/' or '\0' is written here.
1093 *
1094 * Descend into a directory, searching for a component. This function forms
1095 * the core of a tree-walking algorithm. The directory will be locked.
1096 * The devfs entry corresponding to the component is returned. If there is
1097 * no matching entry, %NULL is returned.
1098 * An implicit devfs_get() is performed on the returned entry.
1099 */
1100
1101static struct devfs_entry *_devfs_descend(struct devfs_entry *dir,
1102 const char *name, int namelen,
1103 int *next_pos)
1104{
1105 const char *stop, *ptr;
1106 struct devfs_entry *entry;
1107
1108 if ((namelen >= 3) && (strncmp(name, "../", 3) == 0)) { /* Special-case going to parent directory */
1109 *next_pos = 3;
1110 return devfs_get(dir->parent);
1111 }
1112 stop = name + namelen;
1113 /* Search for a possible '/' */
1114 for (ptr = name; (ptr < stop) && (*ptr != '/'); ++ptr) ;
1115 *next_pos = ptr - name;
1116 read_lock(&dir->u.dir.lock);
1117 entry = _devfs_search_dir(dir, name, *next_pos);
1118 read_unlock(&dir->u.dir.lock);
1119 return entry;
1120} /* End Function _devfs_descend */
1121
1122static devfs_handle_t _devfs_make_parent_for_leaf(struct devfs_entry *dir,
1123 const char *name,
1124 int namelen, int *leaf_pos)
1125{
1126 int next_pos = 0;
1127
1128 if (dir == NULL)
1129 dir = _devfs_get_root_entry();
1130 if (dir == NULL)
1131 return NULL;
1132 devfs_get(dir);
1133 /* Search for possible trailing component and ignore it */
1134 for (--namelen; (namelen > 0) && (name[namelen] != '/'); --namelen) ;
1135 *leaf_pos = (name[namelen] == '/') ? (namelen + 1) : 0;
1136 for (; namelen > 0; name += next_pos, namelen -= next_pos) {
1137 struct devfs_entry *de, *old = NULL;
1138
1139 if ((de =
1140 _devfs_descend(dir, name, namelen, &next_pos)) == NULL) {
1141 de = _devfs_alloc_entry(name, next_pos, MODE_DIR);
1142 devfs_get(de);
1143 if (!de || _devfs_append_entry(dir, de, &old)) {
1144 devfs_put(de);
1145 if (!old || !S_ISDIR(old->mode)) {
1146 devfs_put(old);
1147 devfs_put(dir);
1148 return NULL;
1149 }
1150 de = old; /* Use the existing directory */
1151 }
1152 }
1153 if (de == dir->parent) {
1154 devfs_put(dir);
1155 devfs_put(de);
1156 return NULL;
1157 }
1158 devfs_put(dir);
1159 dir = de;
1160 if (name[next_pos] == '/')
1161 ++next_pos;
1162 }
1163 return dir;
1164} /* End Function _devfs_make_parent_for_leaf */
1165
1166static devfs_handle_t _devfs_prepare_leaf(devfs_handle_t * dir,
1167 const char *name, umode_t mode)
1168{
1169 int namelen, leaf_pos;
1170 struct devfs_entry *de;
1171
1172 namelen = strlen(name);
1173 if ((*dir = _devfs_make_parent_for_leaf(*dir, name, namelen,
1174 &leaf_pos)) == NULL) {
1175 PRINTK("(%s): could not create parent path\n", name);
1176 return NULL;
1177 }
1178 if ((de = _devfs_alloc_entry(name + leaf_pos, namelen - leaf_pos, mode))
1179 == NULL) {
1180 PRINTK("(%s): could not allocate entry\n", name);
1181 devfs_put(*dir);
1182 return NULL;
1183 }
1184 return de;
1185} /* End Function _devfs_prepare_leaf */
1186
1187static devfs_handle_t _devfs_walk_path(struct devfs_entry *dir,
1188 const char *name, int namelen,
1189 int traverse_symlink)
1190{
1191 int next_pos = 0;
1192
1193 if (dir == NULL)
1194 dir = _devfs_get_root_entry();
1195 if (dir == NULL)
1196 return NULL;
1197 devfs_get(dir);
1198 for (; namelen > 0; name += next_pos, namelen -= next_pos) {
1199 struct devfs_entry *de, *link;
1200
1201 if (!S_ISDIR(dir->mode)) {
1202 devfs_put(dir);
1203 return NULL;
1204 }
1205
1206 if ((de =
1207 _devfs_descend(dir, name, namelen, &next_pos)) == NULL) {
1208 devfs_put(dir);
1209 return NULL;
1210 }
1211 if (S_ISLNK(de->mode) && traverse_symlink) { /* Need to follow the link: this is a stack chomper */
1212 /* FIXME what if it puts outside of mounted tree? */
1213 link = _devfs_walk_path(dir, de->u.symlink.linkname,
1214 de->u.symlink.length, TRUE);
1215 devfs_put(de);
1216 if (!link) {
1217 devfs_put(dir);
1218 return NULL;
1219 }
1220 de = link;
1221 }
1222 devfs_put(dir);
1223 dir = de;
1224 if (name[next_pos] == '/')
1225 ++next_pos;
1226 }
1227 return dir;
1228} /* End Function _devfs_walk_path */
1229
1230/**
1231 * _devfs_find_entry - Find a devfs entry.
1232 * @dir: The handle to the parent devfs directory entry. If this is %NULL the
1233 * name is relative to the root of the devfs.
1234 * @name: The name of the entry. This may be %NULL.
1235 * @traverse_symlink: If %TRUE then symbolic links are traversed.
1236 *
1237 * Returns the devfs_entry pointer on success, else %NULL. An implicit
1238 * devfs_get() is performed.
1239 */
1240
1241static struct devfs_entry *_devfs_find_entry(devfs_handle_t dir,
1242 const char *name,
1243 int traverse_symlink)
1244{
1245 unsigned int namelen = strlen(name);
1246
1247 if (name[0] == '/') {
1248 /* Skip leading pathname component */
1249 if (namelen < 2) {
1250 PRINTK("(%s): too short\n", name);
1251 return NULL;
1252 }
1253 for (++name, --namelen; (*name != '/') && (namelen > 0);
1254 ++name, --namelen) ;
1255 if (namelen < 2) {
1256 PRINTK("(%s): too short\n", name);
1257 return NULL;
1258 }
1259 ++name;
1260 --namelen;
1261 }
1262 return _devfs_walk_path(dir, name, namelen, traverse_symlink);
1263} /* End Function _devfs_find_entry */
1264
1265static struct devfs_entry *get_devfs_entry_from_vfs_inode(struct inode *inode)
1266{
1267 if (inode == NULL)
1268 return NULL;
1269 VERIFY_ENTRY((struct devfs_entry *)inode->u.generic_ip);
1270 return inode->u.generic_ip;
1271} /* End Function get_devfs_entry_from_vfs_inode */
1272
1273/**
1274 * free_dentry - Free the dentry for a device entry and invalidate inode.
1275 * @de: The entry.
1276 *
1277 * This must only be called after the entry has been unhooked from its
1278 * parent directory.
1279 */
1280
1281static void free_dentry(struct devfs_entry *de)
1282{
1283 struct dentry *dentry = de->inode.dentry;
1284
1285 if (!dentry)
1286 return;
1287 spin_lock(&dcache_lock);
1288 dget_locked(dentry);
1289 spin_unlock(&dcache_lock);
1290 /* Forcefully remove the inode */
1291 if (dentry->d_inode != NULL)
1292 dentry->d_inode->i_nlink = 0;
1293 d_drop(dentry);
1294 dput(dentry);
1295} /* End Function free_dentry */
1296
1297/**
1298 * is_devfsd_or_child - Test if the current process is devfsd or one of its children.
1299 * @fs_info: The filesystem information.
1300 *
1301 * Returns %TRUE if devfsd or child, else %FALSE.
1302 */
1303
1304static int is_devfsd_or_child(struct fs_info *fs_info)
1305{
1306 struct task_struct *p = current;
1307
1308 if (p == fs_info->devfsd_task)
1309 return (TRUE);
1310 if (process_group(p) == fs_info->devfsd_pgrp)
1311 return (TRUE);
1312 read_lock(&tasklist_lock);
1313 for (; p != &init_task; p = p->real_parent) {
1314 if (p == fs_info->devfsd_task) {
1315 read_unlock(&tasklist_lock);
1316 return (TRUE);
1317 }
1318 }
1319 read_unlock(&tasklist_lock);
1320 return (FALSE);
1321} /* End Function is_devfsd_or_child */
1322
1323/**
1324 * devfsd_queue_empty - Test if devfsd has work pending in its event queue.
1325 * @fs_info: The filesystem information.
1326 *
1327 * Returns %TRUE if the queue is empty, else %FALSE.
1328 */
1329
1330static inline int devfsd_queue_empty(struct fs_info *fs_info)
1331{
1332 return (fs_info->devfsd_last_event) ? FALSE : TRUE;
1333} /* End Function devfsd_queue_empty */
1334
1335/**
1336 * wait_for_devfsd_finished - Wait for devfsd to finish processing its event queue.
1337 * @fs_info: The filesystem information.
1338 *
1339 * Returns %TRUE if no more waiting will be required, else %FALSE.
1340 */
1341
1342static int wait_for_devfsd_finished(struct fs_info *fs_info)
1343{
1344 DECLARE_WAITQUEUE(wait, current);
1345
1346 if (fs_info->devfsd_task == NULL)
1347 return (TRUE);
1348 if (devfsd_queue_empty(fs_info) && fs_info->devfsd_sleeping)
1349 return TRUE;
1350 if (is_devfsd_or_child(fs_info))
1351 return (FALSE);
1352 set_current_state(TASK_UNINTERRUPTIBLE);
1353 add_wait_queue(&fs_info->revalidate_wait_queue, &wait);
1354 if (!devfsd_queue_empty(fs_info) || !fs_info->devfsd_sleeping)
1355 if (fs_info->devfsd_task)
1356 schedule();
1357 remove_wait_queue(&fs_info->revalidate_wait_queue, &wait);
1358 __set_current_state(TASK_RUNNING);
1359 return (TRUE);
1360} /* End Function wait_for_devfsd_finished */
1361
1362/**
1363 * devfsd_notify_de - Notify the devfsd daemon of a change.
1364 * @de: The devfs entry that has changed. This and all parent entries will
1365 * have their reference counts incremented if the event was queued.
1366 * @type: The type of change.
1367 * @mode: The mode of the entry.
1368 * @uid: The user ID.
1369 * @gid: The group ID.
1370 * @fs_info: The filesystem info.
1371 *
1372 * Returns %TRUE if an event was queued and devfsd woken up, else %FALSE.
1373 */
1374
1375static int devfsd_notify_de(struct devfs_entry *de,
1376 unsigned short type, umode_t mode,
1377 uid_t uid, gid_t gid, struct fs_info *fs_info)
1378{
1379 struct devfsd_buf_entry *entry;
1380 struct devfs_entry *curr;
1381
1382 if (!(fs_info->devfsd_event_mask & (1 << type)))
1383 return (FALSE);
1384 if ((entry = kmem_cache_alloc(devfsd_buf_cache, SLAB_KERNEL)) == NULL) {
1385 atomic_inc(&fs_info->devfsd_overrun_count);
1386 return (FALSE);
1387 }
1388 for (curr = de; curr != NULL; curr = curr->parent)
1389 devfs_get(curr);
1390 entry->de = de;
1391 entry->type = type;
1392 entry->mode = mode;
1393 entry->uid = uid;
1394 entry->gid = gid;
1395 entry->next = NULL;
1396 spin_lock(&fs_info->devfsd_buffer_lock);
1397 if (!fs_info->devfsd_first_event)
1398 fs_info->devfsd_first_event = entry;
1399 if (fs_info->devfsd_last_event)
1400 fs_info->devfsd_last_event->next = entry;
1401 fs_info->devfsd_last_event = entry;
1402 spin_unlock(&fs_info->devfsd_buffer_lock);
1403 wake_up_interruptible(&fs_info->devfsd_wait_queue);
1404 return (TRUE);
1405} /* End Function devfsd_notify_de */
1406
1407/**
1408 * devfsd_notify - Notify the devfsd daemon of a change.
1409 * @de: The devfs entry that has changed.
1410 * @type: The type of change event.
1411 * @wait: If TRUE, the function waits for the daemon to finish processing
1412 * the event.
1413 */
1414
1415static void devfsd_notify(struct devfs_entry *de, unsigned short type)
1416{
1417 devfsd_notify_de(de, type, de->mode, current->euid,
1418 current->egid, &fs_info);
1419}
1420
1421static int devfs_mk_dev(dev_t dev, umode_t mode, const char *fmt, va_list args)
1422{
1423 struct devfs_entry *dir = NULL, *de;
1424 char buf[64];
1425 int error, n;
1426
1427 n = vsnprintf(buf, sizeof(buf), fmt, args);
1428 if (n >= sizeof(buf) || !buf[0]) {
1429 printk(KERN_WARNING "%s: invalid format string %s\n",
1430 __FUNCTION__, fmt);
1431 return -EINVAL;
1432 }
1433
1434 de = _devfs_prepare_leaf(&dir, buf, mode);
1435 if (!de) {
1436 printk(KERN_WARNING "%s: could not prepare leaf for %s\n",
1437 __FUNCTION__, buf);
1438 return -ENOMEM; /* could be more accurate... */
1439 }
1440
1441 de->u.dev = dev;
1442
1443 error = _devfs_append_entry(dir, de, NULL);
1444 if (error) {
1445 printk(KERN_WARNING "%s: could not append to parent for %s\n",
1446 __FUNCTION__, buf);
1447 goto out;
1448 }
1449
1450 devfsd_notify(de, DEVFSD_NOTIFY_REGISTERED);
1451 out:
1452 devfs_put(dir);
1453 return error;
1454}
1455
1456int devfs_mk_bdev(dev_t dev, umode_t mode, const char *fmt, ...)
1457{
1458 va_list args;
1459
1460 if (!S_ISBLK(mode)) {
1461 printk(KERN_WARNING "%s: invalide mode (%u) for %s\n",
1462 __FUNCTION__, mode, fmt);
1463 return -EINVAL;
1464 }
1465
1466 va_start(args, fmt);
1467 return devfs_mk_dev(dev, mode, fmt, args);
1468}
1469
1470EXPORT_SYMBOL(devfs_mk_bdev);
1471
1472int devfs_mk_cdev(dev_t dev, umode_t mode, const char *fmt, ...)
1473{
1474 va_list args;
1475
1476 if (!S_ISCHR(mode)) {
1477 printk(KERN_WARNING "%s: invalide mode (%u) for %s\n",
1478 __FUNCTION__, mode, fmt);
1479 return -EINVAL;
1480 }
1481
1482 va_start(args, fmt);
1483 return devfs_mk_dev(dev, mode, fmt, args);
1484}
1485
1486EXPORT_SYMBOL(devfs_mk_cdev);
1487
1488/**
1489 * _devfs_unhook - Unhook a device entry from its parents list
1490 * @de: The entry to unhook.
1491 *
1492 * Returns %TRUE if the entry was unhooked, else %FALSE if it was
1493 * previously unhooked.
1494 * The caller must have a write lock on the parent directory.
1495 */
1496
1497static int _devfs_unhook(struct devfs_entry *de)
1498{
1499 struct devfs_entry *parent;
1500
1501 if (!de || (de->prev == de))
1502 return FALSE;
1503 parent = de->parent;
1504 if (de->prev == NULL)
1505 parent->u.dir.first = de->next;
1506 else
1507 de->prev->next = de->next;
1508 if (de->next == NULL)
1509 parent->u.dir.last = de->prev;
1510 else
1511 de->next->prev = de->prev;
1512 de->prev = de; /* Indicate we're unhooked */
1513 de->next = NULL; /* Force early termination for <devfs_readdir> */
1514 return TRUE;
1515} /* End Function _devfs_unhook */
1516
1517/**
1518 * _devfs_unregister - Unregister a device entry from its parent.
1519 * @dir: The parent directory.
1520 * @de: The entry to unregister.
1521 *
1522 * The caller must have a write lock on the parent directory, which is
1523 * unlocked by this function.
1524 */
1525
1526static void _devfs_unregister(struct devfs_entry *dir, struct devfs_entry *de)
1527{
1528 int unhooked = _devfs_unhook(de);
1529
1530 write_unlock(&dir->u.dir.lock);
1531 if (!unhooked)
1532 return;
1533 devfs_get(dir);
1534 devfsd_notify(de, DEVFSD_NOTIFY_UNREGISTERED);
1535 free_dentry(de);
1536 devfs_put(dir);
1537 if (!S_ISDIR(de->mode))
1538 return;
1539 while (TRUE) { /* Recursively unregister: this is a stack chomper */
1540 struct devfs_entry *child;
1541
1542 write_lock(&de->u.dir.lock);
1543 de->u.dir.no_more_additions = TRUE;
1544 child = de->u.dir.first;
1545 VERIFY_ENTRY(child);
1546 _devfs_unregister(de, child);
1547 if (!child)
1548 break;
1549 DPRINTK(DEBUG_UNREGISTER, "(%s): child: %p refcount: %d\n",
1550 child->name, child, atomic_read(&child->refcount));
1551 devfs_put(child);
1552 }
1553} /* End Function _devfs_unregister */
1554
1555static int devfs_do_symlink(devfs_handle_t dir, const char *name,
1556 const char *link, devfs_handle_t * handle)
1557{
1558 int err;
1559 unsigned int linklength;
1560 char *newlink;
1561 struct devfs_entry *de;
1562
1563 if (handle != NULL)
1564 *handle = NULL;
1565 if (name == NULL) {
1566 PRINTK("(): NULL name pointer\n");
1567 return -EINVAL;
1568 }
1569 if (link == NULL) {
1570 PRINTK("(%s): NULL link pointer\n", name);
1571 return -EINVAL;
1572 }
1573 linklength = strlen(link);
1574 if ((newlink = kmalloc(linklength + 1, GFP_KERNEL)) == NULL)
1575 return -ENOMEM;
1576 memcpy(newlink, link, linklength);
1577 newlink[linklength] = '\0';
1578 if ((de = _devfs_prepare_leaf(&dir, name, S_IFLNK | S_IRUGO | S_IXUGO))
1579 == NULL) {
1580 PRINTK("(%s): could not prepare leaf\n", name);
1581 kfree(newlink);
1582 return -ENOTDIR;
1583 }
1584 de->info = NULL;
1585 de->u.symlink.linkname = newlink;
1586 de->u.symlink.length = linklength;
1587 if ((err = _devfs_append_entry(dir, de, NULL)) != 0) {
1588 PRINTK("(%s): could not append to parent, err: %d\n", name,
1589 err);
1590 devfs_put(dir);
1591 return err;
1592 }
1593 devfs_put(dir);
1594#ifdef CONFIG_DEVFS_DEBUG
1595 spin_lock(&stat_lock);
1596 stat_num_bytes += linklength + 1;
1597 spin_unlock(&stat_lock);
1598#endif
1599 if (handle != NULL)
1600 *handle = de;
1601 return 0;
1602} /* End Function devfs_do_symlink */
1603
1604/**
1605 * devfs_mk_symlink Create a symbolic link in the devfs namespace.
1606 * @from: The name of the entry.
1607 * @to: Name of the destination
1608 *
1609 * Returns 0 on success, else a negative error code is returned.
1610 */
1611
1612int devfs_mk_symlink(const char *from, const char *to)
1613{
1614 devfs_handle_t de;
1615 int err;
1616
1617 err = devfs_do_symlink(NULL, from, to, &de);
1618 if (!err) {
1619 de->vfs = TRUE;
1620 devfsd_notify(de, DEVFSD_NOTIFY_REGISTERED);
1621 }
1622
1623 return err;
1624}
1625
1626/**
1627 * devfs_mk_dir - Create a directory in the devfs namespace.
1628 * new name is relative to the root of the devfs.
1629 * @fmt: The name of the entry.
1630 *
1631 * Use of this function is optional. The devfs_register() function
1632 * will automatically create intermediate directories as needed. This function
1633 * is provided for efficiency reasons, as it provides a handle to a directory.
1634 * On failure %NULL is returned.
1635 */
1636
1637int devfs_mk_dir(const char *fmt, ...)
1638{
1639 struct devfs_entry *dir = NULL, *de = NULL, *old;
1640 char buf[64];
1641 va_list args;
1642 int error, n;
1643
1644 va_start(args, fmt);
1645 n = vsnprintf(buf, 64, fmt, args);
1646 if (n >= 64 || !buf[0]) {
1647 printk(KERN_WARNING "%s: invalid argument.", __FUNCTION__);
1648 return -EINVAL;
1649 }
1650
1651 de = _devfs_prepare_leaf(&dir, buf, MODE_DIR);
1652 if (!de) {
1653 PRINTK("(%s): could not prepare leaf\n", buf);
1654 return -EINVAL;
1655 }
1656
1657 error = _devfs_append_entry(dir, de, &old);
1658 if (error == -EEXIST && S_ISDIR(old->mode)) {
1659 /*
1660 * devfs_mk_dir() of an already-existing directory will
1661 * return success.
1662 */
1663 error = 0;
1664 goto out_put;
1665 } else if (error) {
1666 PRINTK("(%s): could not append to dir: %p \"%s\"\n",
1667 buf, dir, dir->name);
1668 devfs_put(old);
1669 goto out_put;
1670 }
1671
1672 devfsd_notify(de, DEVFSD_NOTIFY_REGISTERED);
1673
1674 out_put:
1675 devfs_put(dir);
1676 return error;
1677}
1678
1679void devfs_remove(const char *fmt, ...)
1680{
1681 char buf[64];
1682 va_list args;
1683 int n;
1684
1685 va_start(args, fmt);
1686 n = vsnprintf(buf, sizeof(buf), fmt, args);
1687 if (n < sizeof(buf) && buf[0]) {
1688 devfs_handle_t de = _devfs_find_entry(NULL, buf, 0);
1689
1690 if (!de) {
1691 printk(KERN_ERR "%s: %s not found, cannot remove\n",
1692 __FUNCTION__, buf);
1693 dump_stack();
1694 return;
1695 }
1696
1697 write_lock(&de->parent->u.dir.lock);
1698 _devfs_unregister(de->parent, de);
1699 devfs_put(de);
1700 devfs_put(de);
1701 }
1702}
1703
1704/**
1705 * devfs_generate_path - Generate a pathname for an entry, relative to the devfs root.
1706 * @de: The devfs entry.
1707 * @path: The buffer to write the pathname to. The pathname and '\0'
1708 * terminator will be written at the end of the buffer.
1709 * @buflen: The length of the buffer.
1710 *
1711 * Returns the offset in the buffer where the pathname starts on success,
1712 * else a negative error code.
1713 */
1714
1715static int devfs_generate_path(devfs_handle_t de, char *path, int buflen)
1716{
1717 int pos;
1718#define NAMEOF(de) ( (de)->mode ? (de)->name : (de)->u.name )
1719
1720 if (de == NULL)
1721 return -EINVAL;
1722 VERIFY_ENTRY(de);
1723 if (de->namelen >= buflen)
1724 return -ENAMETOOLONG; /* Must be first */
1725 path[buflen - 1] = '\0';
1726 if (de->parent == NULL)
1727 return buflen - 1; /* Don't prepend root */
1728 pos = buflen - de->namelen - 1;
1729 memcpy(path + pos, NAMEOF(de), de->namelen);
1730 for (de = de->parent; de->parent != NULL; de = de->parent) {
1731 if (pos - de->namelen - 1 < 0)
1732 return -ENAMETOOLONG;
1733 path[--pos] = '/';
1734 pos -= de->namelen;
1735 memcpy(path + pos, NAMEOF(de), de->namelen);
1736 }
1737 return pos;
1738} /* End Function devfs_generate_path */
1739
1740/**
1741 * devfs_setup - Process kernel boot options.
1742 * @str: The boot options after the "devfs=".
1743 */
1744
1745static int __init devfs_setup(char *str)
1746{
1747 static struct {
1748 char *name;
1749 unsigned int mask;
1750 unsigned int *opt;
1751 } devfs_options_tab[] __initdata = {
1752#ifdef CONFIG_DEVFS_DEBUG
1753 {
1754 "dall", DEBUG_ALL, &devfs_debug_init}, {
1755 "dmod", DEBUG_MODULE_LOAD, &devfs_debug_init}, {
1756 "dreg", DEBUG_REGISTER, &devfs_debug_init}, {
1757 "dunreg", DEBUG_UNREGISTER, &devfs_debug_init}, {
1758 "dfree", DEBUG_FREE, &devfs_debug_init}, {
1759 "diget", DEBUG_I_GET, &devfs_debug_init}, {
1760 "dchange", DEBUG_SET_FLAGS, &devfs_debug_init}, {
1761 "dsread", DEBUG_S_READ, &devfs_debug_init}, {
1762 "dichange", DEBUG_I_CHANGE, &devfs_debug_init}, {
1763 "dimknod", DEBUG_I_MKNOD, &devfs_debug_init}, {
1764 "dilookup", DEBUG_I_LOOKUP, &devfs_debug_init}, {
1765 "diunlink", DEBUG_I_UNLINK, &devfs_debug_init},
1766#endif /* CONFIG_DEVFS_DEBUG */
1767 {
1768 "mount", OPTION_MOUNT, &boot_options}, {
1769 NULL, 0, NULL}
1770 };
1771
1772 while ((*str != '\0') && !isspace(*str)) {
1773 int i, found = 0, invert = 0;
1774
1775 if (strncmp(str, "no", 2) == 0) {
1776 invert = 1;
1777 str += 2;
1778 }
1779 for (i = 0; devfs_options_tab[i].name != NULL; i++) {
1780 int len = strlen(devfs_options_tab[i].name);
1781
1782 if (strncmp(str, devfs_options_tab[i].name, len) == 0) {
1783 if (invert)
1784 *devfs_options_tab[i].opt &=
1785 ~devfs_options_tab[i].mask;
1786 else
1787 *devfs_options_tab[i].opt |=
1788 devfs_options_tab[i].mask;
1789 str += len;
1790 found = 1;
1791 break;
1792 }
1793 }
1794 if (!found)
1795 return 0; /* No match */
1796 if (*str != ',')
1797 return 0; /* No more options */
1798 ++str;
1799 }
1800 return 1;
1801} /* End Function devfs_setup */
1802
1803__setup("devfs=", devfs_setup);
1804
1805EXPORT_SYMBOL(devfs_mk_dir);
1806EXPORT_SYMBOL(devfs_remove);
1807
1808/**
1809 * try_modload - Notify devfsd of an inode lookup by a non-devfsd process.
1810 * @parent: The parent devfs entry.
1811 * @fs_info: The filesystem info.
1812 * @name: The device name.
1813 * @namelen: The number of characters in @name.
1814 * @buf: A working area that will be used. This must not go out of scope
1815 * until devfsd is idle again.
1816 *
1817 * Returns 0 on success (event was queued), else a negative error code.
1818 */
1819
1820static int try_modload(struct devfs_entry *parent, struct fs_info *fs_info,
1821 const char *name, unsigned namelen,
1822 struct devfs_entry *buf)
1823{
1824 if (!(fs_info->devfsd_event_mask & (1 << DEVFSD_NOTIFY_LOOKUP)))
1825 return -ENOENT;
1826 if (is_devfsd_or_child(fs_info))
1827 return -ENOENT;
1828 memset(buf, 0, sizeof *buf);
1829 atomic_set(&buf->refcount, 1);
1830 buf->parent = parent;
1831 buf->namelen = namelen;
1832 buf->u.name = name;
1833 WRITE_ENTRY_MAGIC(buf, MAGIC_VALUE);
1834 if (!devfsd_notify_de(buf, DEVFSD_NOTIFY_LOOKUP, 0,
1835 current->euid, current->egid, fs_info))
1836 return -ENOENT;
1837 /* Possible success: event has been queued */
1838 return 0;
1839} /* End Function try_modload */
1840
1841/* Superblock operations follow */
1842
1843static struct inode_operations devfs_iops;
1844static struct inode_operations devfs_dir_iops;
1845static const struct file_operations devfs_fops;
1846static const struct file_operations devfs_dir_fops;
1847static struct inode_operations devfs_symlink_iops;
1848
1849static int devfs_notify_change(struct dentry *dentry, struct iattr *iattr)
1850{
1851 int retval;
1852 struct devfs_entry *de;
1853 struct inode *inode = dentry->d_inode;
1854 struct fs_info *fs_info = inode->i_sb->s_fs_info;
1855
1856 de = get_devfs_entry_from_vfs_inode(inode);
1857 if (de == NULL)
1858 return -ENODEV;
1859 retval = inode_change_ok(inode, iattr);
1860 if (retval != 0)
1861 return retval;
1862 retval = inode_setattr(inode, iattr);
1863 if (retval != 0)
1864 return retval;
1865 DPRINTK(DEBUG_I_CHANGE, "(%d): VFS inode: %p devfs_entry: %p\n",
1866 (int)inode->i_ino, inode, de);
1867 DPRINTK(DEBUG_I_CHANGE, "(): mode: 0%o uid: %d gid: %d\n",
1868 (int)inode->i_mode, (int)inode->i_uid, (int)inode->i_gid);
1869 /* Inode is not on hash chains, thus must save permissions here rather
1870 than in a write_inode() method */
1871 de->mode = inode->i_mode;
1872 de->inode.uid = inode->i_uid;
1873 de->inode.gid = inode->i_gid;
1874 de->inode.atime = inode->i_atime;
1875 de->inode.mtime = inode->i_mtime;
1876 de->inode.ctime = inode->i_ctime;
1877 if ((iattr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) &&
1878 !is_devfsd_or_child(fs_info))
1879 devfsd_notify_de(de, DEVFSD_NOTIFY_CHANGE, inode->i_mode,
1880 inode->i_uid, inode->i_gid, fs_info);
1881 return 0;
1882} /* End Function devfs_notify_change */
1883
1884static struct super_operations devfs_sops = {
1885 .drop_inode = generic_delete_inode,
1886 .statfs = simple_statfs,
1887};
1888
1889/**
1890 * _devfs_get_vfs_inode - Get a VFS inode.
1891 * @sb: The super block.
1892 * @de: The devfs inode.
1893 * @dentry: The dentry to register with the devfs inode.
1894 *
1895 * Returns the inode on success, else %NULL. An implicit devfs_get() is
1896 * performed if the inode is created.
1897 */
1898
1899static struct inode *_devfs_get_vfs_inode(struct super_block *sb,
1900 struct devfs_entry *de,
1901 struct dentry *dentry)
1902{
1903 struct inode *inode;
1904
1905 if (de->prev == de)
1906 return NULL; /* Quick check to see if unhooked */
1907 if ((inode = new_inode(sb)) == NULL) {
1908 PRINTK("(%s): new_inode() failed, de: %p\n", de->name, de);
1909 return NULL;
1910 }
1911 if (de->parent) {
1912 read_lock(&de->parent->u.dir.lock);
1913 if (de->prev != de)
1914 de->inode.dentry = dentry; /* Not unhooked */
1915 read_unlock(&de->parent->u.dir.lock);
1916 } else
1917 de->inode.dentry = dentry; /* Root: no locking needed */
1918 if (de->inode.dentry != dentry) { /* Must have been unhooked */
1919 iput(inode);
1920 return NULL;
1921 }
1922 /* FIXME where is devfs_put? */
1923 inode->u.generic_ip = devfs_get(de);
1924 inode->i_ino = de->inode.ino;
1925 DPRINTK(DEBUG_I_GET, "(%d): VFS inode: %p devfs_entry: %p\n",
1926 (int)inode->i_ino, inode, de);
1927 inode->i_blocks = 0;
1928 inode->i_blksize = FAKE_BLOCK_SIZE;
1929 inode->i_op = &devfs_iops;
1930 inode->i_mode = de->mode;
1931 if (S_ISDIR(de->mode)) {
1932 inode->i_op = &devfs_dir_iops;
1933 inode->i_fop = &devfs_dir_fops;
1934 } else if (S_ISLNK(de->mode)) {
1935 inode->i_op = &devfs_symlink_iops;
1936 inode->i_size = de->u.symlink.length;
1937 } else if (S_ISCHR(de->mode) || S_ISBLK(de->mode)) {
1938 init_special_inode(inode, de->mode, de->u.dev);
1939 } else if (S_ISFIFO(de->mode) || S_ISSOCK(de->mode)) {
1940 init_special_inode(inode, de->mode, 0);
1941 } else {
1942 PRINTK("(%s): unknown mode %o de: %p\n",
1943 de->name, de->mode, de);
1944 iput(inode);
1945 devfs_put(de);
1946 return NULL;
1947 }
1948
1949 inode->i_uid = de->inode.uid;
1950 inode->i_gid = de->inode.gid;
1951 inode->i_atime = de->inode.atime;
1952 inode->i_mtime = de->inode.mtime;
1953 inode->i_ctime = de->inode.ctime;
1954 DPRINTK(DEBUG_I_GET, "(): mode: 0%o uid: %d gid: %d\n",
1955 (int)inode->i_mode, (int)inode->i_uid, (int)inode->i_gid);
1956 return inode;
1957} /* End Function _devfs_get_vfs_inode */
1958
1959/* File operations for device entries follow */
1960
1961static int devfs_readdir(struct file *file, void *dirent, filldir_t filldir)
1962{
1963 int err, count;
1964 int stored = 0;
1965 struct fs_info *fs_info;
1966 struct devfs_entry *parent, *de, *next = NULL;
1967 struct inode *inode = file->f_dentry->d_inode;
1968
1969 fs_info = inode->i_sb->s_fs_info;
1970 parent = get_devfs_entry_from_vfs_inode(file->f_dentry->d_inode);
1971 if ((long)file->f_pos < 0)
1972 return -EINVAL;
1973 DPRINTK(DEBUG_F_READDIR, "(%s): fs_info: %p pos: %ld\n",
1974 parent->name, fs_info, (long)file->f_pos);
1975 switch ((long)file->f_pos) {
1976 case 0:
1977 err = (*filldir) (dirent, "..", 2, file->f_pos,
1978 parent_ino(file->f_dentry), DT_DIR);
1979 if (err == -EINVAL)
1980 break;
1981 if (err < 0)
1982 return err;
1983 file->f_pos++;
1984 ++stored;
1985 /* Fall through */
1986 case 1:
1987 err =
1988 (*filldir) (dirent, ".", 1, file->f_pos, inode->i_ino,
1989 DT_DIR);
1990 if (err == -EINVAL)
1991 break;
1992 if (err < 0)
1993 return err;
1994 file->f_pos++;
1995 ++stored;
1996 /* Fall through */
1997 default:
1998 /* Skip entries */
1999 count = file->f_pos - 2;
2000 read_lock(&parent->u.dir.lock);
2001 for (de = parent->u.dir.first; de && (count > 0); de = de->next)
2002 --count;
2003 devfs_get(de);
2004 read_unlock(&parent->u.dir.lock);
2005 /* Now add all remaining entries */
2006 while (de) {
2007 err = (*filldir) (dirent, de->name, de->namelen,
2008 file->f_pos, de->inode.ino,
2009 de->mode >> 12);
2010 if (err < 0)
2011 devfs_put(de);
2012 else {
2013 file->f_pos++;
2014 ++stored;
2015 }
2016 if (err == -EINVAL)
2017 break;
2018 if (err < 0)
2019 return err;
2020 read_lock(&parent->u.dir.lock);
2021 next = devfs_get(de->next);
2022 read_unlock(&parent->u.dir.lock);
2023 devfs_put(de);
2024 de = next;
2025 }
2026 break;
2027 }
2028 return stored;
2029} /* End Function devfs_readdir */
2030
2031/* Open devfs specific special files */
2032static int devfs_open(struct inode *inode, struct file *file)
2033{
2034 int err;
2035 int minor = MINOR(inode->i_rdev);
2036 struct file_operations *old_fops, *new_fops;
2037
2038 switch (minor) {
2039 case 0: /* /dev/.devfsd */
2040 new_fops = fops_get(&devfsd_fops);
2041 break;
2042#ifdef CONFIG_DEVFS_DEBUG
2043 case 1: /* /dev/.stat */
2044 new_fops = fops_get(&stat_fops);
2045 break;
2046#endif
2047 default:
2048 return -ENODEV;
2049 }
2050
2051 if (new_fops == NULL)
2052 return -ENODEV;
2053 old_fops = file->f_op;
2054 file->f_op = new_fops;
2055 err = new_fops->open ? new_fops->open(inode, file) : 0;
2056 if (err) {
2057 file->f_op = old_fops;
2058 fops_put(new_fops);
2059 } else
2060 fops_put(old_fops);
2061 return err;
2062} /* End Function devfs_open */
2063
2064static const struct file_operations devfs_fops = {
2065 .open = devfs_open,
2066};
2067
2068static const struct file_operations devfs_dir_fops = {
2069 .read = generic_read_dir,
2070 .readdir = devfs_readdir,
2071};
2072
2073/* Dentry operations for device entries follow */
2074
2075/**
2076 * devfs_d_release - Callback for when a dentry is freed.
2077 * @dentry: The dentry.
2078 */
2079
2080static void devfs_d_release(struct dentry *dentry)
2081{
2082 DPRINTK(DEBUG_D_RELEASE, "(%p): inode: %p\n", dentry, dentry->d_inode);
2083} /* End Function devfs_d_release */
2084
2085/**
2086 * devfs_d_iput - Callback for when a dentry loses its inode.
2087 * @dentry: The dentry.
2088 * @inode: The inode.
2089 */
2090
2091static void devfs_d_iput(struct dentry *dentry, struct inode *inode)
2092{
2093 struct devfs_entry *de;
2094
2095 de = get_devfs_entry_from_vfs_inode(inode);
2096 DPRINTK(DEBUG_D_IPUT,
2097 "(%s): dentry: %p inode: %p de: %p de->dentry: %p\n", de->name,
2098 dentry, inode, de, de->inode.dentry);
2099 if (de->inode.dentry && (de->inode.dentry != dentry))
2100 OOPS("(%s): de: %p dentry: %p de->dentry: %p\n",
2101 de->name, de, dentry, de->inode.dentry);
2102 de->inode.dentry = NULL;
2103 iput(inode);
2104 devfs_put(de);
2105} /* End Function devfs_d_iput */
2106
2107static int devfs_d_delete(struct dentry *dentry);
2108
2109static struct dentry_operations devfs_dops = {
2110 .d_delete = devfs_d_delete,
2111 .d_release = devfs_d_release,
2112 .d_iput = devfs_d_iput,
2113};
2114
2115static int devfs_d_revalidate_wait(struct dentry *dentry, struct nameidata *);
2116
2117static struct dentry_operations devfs_wait_dops = {
2118 .d_delete = devfs_d_delete,
2119 .d_release = devfs_d_release,
2120 .d_iput = devfs_d_iput,
2121 .d_revalidate = devfs_d_revalidate_wait,
2122};
2123
2124/**
2125 * devfs_d_delete - Callback for when all files for a dentry are closed.
2126 * @dentry: The dentry.
2127 */
2128
2129static int devfs_d_delete(struct dentry *dentry)
2130{
2131 struct inode *inode = dentry->d_inode;
2132
2133 if (dentry->d_op == &devfs_wait_dops)
2134 dentry->d_op = &devfs_dops;
2135 /* Unhash dentry if negative (has no inode) */
2136 if (inode == NULL) {
2137 DPRINTK(DEBUG_D_DELETE, "(%p): dropping negative dentry\n",
2138 dentry);
2139 return 1;
2140 }
2141 return 0;
2142} /* End Function devfs_d_delete */
2143
2144struct devfs_lookup_struct {
2145 devfs_handle_t de;
2146 wait_queue_head_t wait_queue;
2147};
2148
2149/* XXX: this doesn't handle the case where we got a negative dentry
2150 but a devfs entry has been registered in the meanwhile */
2151static int devfs_d_revalidate_wait(struct dentry *dentry, struct nameidata *nd)
2152{
2153 struct inode *dir = dentry->d_parent->d_inode;
2154 struct fs_info *fs_info = dir->i_sb->s_fs_info;
2155 devfs_handle_t parent = get_devfs_entry_from_vfs_inode(dir);
2156 struct devfs_lookup_struct *lookup_info = dentry->d_fsdata;
2157 DECLARE_WAITQUEUE(wait, current);
2158 int need_lock;
2159
2160 /*
2161 * FIXME HACK
2162 *
2163 * make sure that
2164 * d_instantiate always runs under lock
2165 * we release i_mutex lock before going to sleep
2166 *
2167 * unfortunately sometimes d_revalidate is called with
2168 * and sometimes without i_mutex lock held. The following checks
2169 * attempt to deduce when we need to add (and drop resp.) lock
2170 * here. This relies on current (2.6.2) calling coventions:
2171 *
2172 * lookup_hash is always run under i_mutex and is passing NULL
2173 * as nd
2174 *
2175 * open(...,O_CREATE,...) calls _lookup_hash under i_mutex
2176 * and sets flags to LOOKUP_OPEN|LOOKUP_CREATE
2177 *
2178 * all other invocations of ->d_revalidate seem to happen
2179 * outside of i_mutex
2180 */
2181 need_lock = nd &&
2182 (!(nd->flags & LOOKUP_CREATE) || (nd->flags & LOOKUP_PARENT));
2183
2184 if (need_lock)
2185 mutex_lock(&dir->i_mutex);
2186
2187 if (is_devfsd_or_child(fs_info)) {
2188 devfs_handle_t de = lookup_info->de;
2189 struct inode *inode;
2190
2191 DPRINTK(DEBUG_I_LOOKUP,
2192 "(%s): dentry: %p inode: %p de: %p by: \"%s\"\n",
2193 dentry->d_name.name, dentry, dentry->d_inode, de,
2194 current->comm);
2195 if (dentry->d_inode)
2196 goto out;
2197 if (de == NULL) {
2198 read_lock(&parent->u.dir.lock);
2199 de = _devfs_search_dir(parent, dentry->d_name.name,
2200 dentry->d_name.len);
2201 read_unlock(&parent->u.dir.lock);
2202 if (de == NULL)
2203 goto out;
2204 lookup_info->de = de;
2205 }
2206 /* Create an inode, now that the driver information is available */
2207 inode = _devfs_get_vfs_inode(dir->i_sb, de, dentry);
2208 if (!inode)
2209 goto out;
2210 DPRINTK(DEBUG_I_LOOKUP,
2211 "(%s): new VFS inode(%u): %p de: %p by: \"%s\"\n",
2212 de->name, de->inode.ino, inode, de, current->comm);
2213 d_instantiate(dentry, inode);
2214 goto out;
2215 }
2216 if (lookup_info == NULL)
2217 goto out; /* Early termination */
2218 read_lock(&parent->u.dir.lock);
2219 if (dentry->d_fsdata) {
2220 set_current_state(TASK_UNINTERRUPTIBLE);
2221 add_wait_queue(&lookup_info->wait_queue, &wait);
2222 read_unlock(&parent->u.dir.lock);
2223 /* at this point it is always (hopefully) locked */
2224 mutex_unlock(&dir->i_mutex);
2225 schedule();
2226 mutex_lock(&dir->i_mutex);
2227 /*
2228 * This does not need nor should remove wait from wait_queue.
2229 * Wait queue head is never reused - nothing is ever added to it
2230 * after all waiters have been waked up and head itself disappears
2231 * very soon after it. Moreover it is local variable on stack that
2232 * is likely to have already disappeared so any reference to it
2233 * at this point is buggy.
2234 */
2235
2236 } else
2237 read_unlock(&parent->u.dir.lock);
2238
2239 out:
2240 if (need_lock)
2241 mutex_unlock(&dir->i_mutex);
2242 return 1;
2243} /* End Function devfs_d_revalidate_wait */
2244
2245/* Inode operations for device entries follow */
2246
2247static struct dentry *devfs_lookup(struct inode *dir, struct dentry *dentry,
2248 struct nameidata *nd)
2249{
2250 struct devfs_entry tmp; /* Must stay in scope until devfsd idle again */
2251 struct devfs_lookup_struct lookup_info;
2252 struct fs_info *fs_info = dir->i_sb->s_fs_info;
2253 struct devfs_entry *parent, *de;
2254 struct inode *inode;
2255 struct dentry *retval = NULL;
2256
2257 /* Set up the dentry operations before anything else, to ensure cleaning
2258 up on any error */
2259 dentry->d_op = &devfs_dops;
2260 /* First try to get the devfs entry for this directory */
2261 parent = get_devfs_entry_from_vfs_inode(dir);
2262 DPRINTK(DEBUG_I_LOOKUP, "(%s): dentry: %p parent: %p by: \"%s\"\n",
2263 dentry->d_name.name, dentry, parent, current->comm);
2264 if (parent == NULL)
2265 return ERR_PTR(-ENOENT);
2266 read_lock(&parent->u.dir.lock);
2267 de = _devfs_search_dir(parent, dentry->d_name.name, dentry->d_name.len);
2268 read_unlock(&parent->u.dir.lock);
2269 lookup_info.de = de;
2270 init_waitqueue_head(&lookup_info.wait_queue);
2271 dentry->d_fsdata = &lookup_info;
2272 if (de == NULL) { /* Try with devfsd. For any kind of failure, leave a negative dentry
2273 so someone else can deal with it (in the case where the sysadmin
2274 does a mknod()). It's important to do this before hashing the
2275 dentry, so that the devfsd queue is filled before revalidates
2276 can start */
2277 if (try_modload(parent, fs_info, dentry->d_name.name, dentry->d_name.len, &tmp) < 0) { /* Lookup event was not queued to devfsd */
2278 d_add(dentry, NULL);
2279 return NULL;
2280 }
2281 }
2282 dentry->d_op = &devfs_wait_dops;
2283 d_add(dentry, NULL); /* Open the floodgates */
2284 /* Unlock directory semaphore, which will release any waiters. They
2285 will get the hashed dentry, and may be forced to wait for
2286 revalidation */
2287 mutex_unlock(&dir->i_mutex);
2288 wait_for_devfsd_finished(fs_info); /* If I'm not devfsd, must wait */
2289 mutex_lock(&dir->i_mutex); /* Grab it again because them's the rules */
2290 de = lookup_info.de;
2291 /* If someone else has been so kind as to make the inode, we go home
2292 early */
2293 if (dentry->d_inode)
2294 goto out;
2295 if (de == NULL) {
2296 read_lock(&parent->u.dir.lock);
2297 de = _devfs_search_dir(parent, dentry->d_name.name,
2298 dentry->d_name.len);
2299 read_unlock(&parent->u.dir.lock);
2300 if (de == NULL)
2301 goto out;
2302 /* OK, there's an entry now, but no VFS inode yet */
2303 }
2304 /* Create an inode, now that the driver information is available */
2305 inode = _devfs_get_vfs_inode(dir->i_sb, de, dentry);
2306 if (!inode) {
2307 retval = ERR_PTR(-ENOMEM);
2308 goto out;
2309 }
2310 DPRINTK(DEBUG_I_LOOKUP,
2311 "(%s): new VFS inode(%u): %p de: %p by: \"%s\"\n", de->name,
2312 de->inode.ino, inode, de, current->comm);
2313 d_instantiate(dentry, inode);
2314 out:
2315 write_lock(&parent->u.dir.lock);
2316 dentry->d_op = &devfs_dops;
2317 dentry->d_fsdata = NULL;
2318 wake_up(&lookup_info.wait_queue);
2319 write_unlock(&parent->u.dir.lock);
2320 devfs_put(de);
2321 return retval;
2322} /* End Function devfs_lookup */
2323
2324static int devfs_unlink(struct inode *dir, struct dentry *dentry)
2325{
2326 int unhooked;
2327 struct devfs_entry *de;
2328 struct inode *inode = dentry->d_inode;
2329 struct fs_info *fs_info = dir->i_sb->s_fs_info;
2330
2331 de = get_devfs_entry_from_vfs_inode(inode);
2332 DPRINTK(DEBUG_I_UNLINK, "(%s): de: %p\n", dentry->d_name.name, de);
2333 if (de == NULL)
2334 return -ENOENT;
2335 if (!de->vfs)
2336 return -EPERM;
2337 write_lock(&de->parent->u.dir.lock);
2338 unhooked = _devfs_unhook(de);
2339 write_unlock(&de->parent->u.dir.lock);
2340 if (!unhooked)
2341 return -ENOENT;
2342 if (!is_devfsd_or_child(fs_info))
2343 devfsd_notify_de(de, DEVFSD_NOTIFY_DELETE, inode->i_mode,
2344 inode->i_uid, inode->i_gid, fs_info);
2345 free_dentry(de);
2346 devfs_put(de);
2347 return 0;
2348} /* End Function devfs_unlink */
2349
2350static int devfs_symlink(struct inode *dir, struct dentry *dentry,
2351 const char *symname)
2352{
2353 int err;
2354 struct fs_info *fs_info = dir->i_sb->s_fs_info;
2355 struct devfs_entry *parent, *de;
2356 struct inode *inode;
2357
2358 /* First try to get the devfs entry for this directory */
2359 parent = get_devfs_entry_from_vfs_inode(dir);
2360 if (parent == NULL)
2361 return -ENOENT;
2362 err = devfs_do_symlink(parent, dentry->d_name.name, symname, &de);
2363 DPRINTK(DEBUG_DISABLED, "(%s): errcode from <devfs_do_symlink>: %d\n",
2364 dentry->d_name.name, err);
2365 if (err < 0)
2366 return err;
2367 de->vfs = TRUE;
2368 de->inode.uid = current->euid;
2369 de->inode.gid = current->egid;
2370 de->inode.atime = CURRENT_TIME;
2371 de->inode.mtime = CURRENT_TIME;
2372 de->inode.ctime = CURRENT_TIME;
2373 if ((inode = _devfs_get_vfs_inode(dir->i_sb, de, dentry)) == NULL)
2374 return -ENOMEM;
2375 DPRINTK(DEBUG_DISABLED, "(%s): new VFS inode(%u): %p dentry: %p\n",
2376 dentry->d_name.name, de->inode.ino, inode, dentry);
2377 d_instantiate(dentry, inode);
2378 if (!is_devfsd_or_child(fs_info))
2379 devfsd_notify_de(de, DEVFSD_NOTIFY_CREATE, inode->i_mode,
2380 inode->i_uid, inode->i_gid, fs_info);
2381 return 0;
2382} /* End Function devfs_symlink */
2383
2384static int devfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2385{
2386 int err;
2387 struct fs_info *fs_info = dir->i_sb->s_fs_info;
2388 struct devfs_entry *parent, *de;
2389 struct inode *inode;
2390
2391 mode = (mode & ~S_IFMT) | S_IFDIR; /* VFS doesn't pass S_IFMT part */
2392 parent = get_devfs_entry_from_vfs_inode(dir);
2393 if (parent == NULL)
2394 return -ENOENT;
2395 de = _devfs_alloc_entry(dentry->d_name.name, dentry->d_name.len, mode);
2396 if (!de)
2397 return -ENOMEM;
2398 de->vfs = TRUE;
2399 if ((err = _devfs_append_entry(parent, de, NULL)) != 0)
2400 return err;
2401 de->inode.uid = current->euid;
2402 de->inode.gid = current->egid;
2403 de->inode.atime = CURRENT_TIME;
2404 de->inode.mtime = CURRENT_TIME;
2405 de->inode.ctime = CURRENT_TIME;
2406 if ((inode = _devfs_get_vfs_inode(dir->i_sb, de, dentry)) == NULL)
2407 return -ENOMEM;
2408 DPRINTK(DEBUG_DISABLED, "(%s): new VFS inode(%u): %p dentry: %p\n",
2409 dentry->d_name.name, de->inode.ino, inode, dentry);
2410 d_instantiate(dentry, inode);
2411 if (!is_devfsd_or_child(fs_info))
2412 devfsd_notify_de(de, DEVFSD_NOTIFY_CREATE, inode->i_mode,
2413 inode->i_uid, inode->i_gid, fs_info);
2414 return 0;
2415} /* End Function devfs_mkdir */
2416
2417static int devfs_rmdir(struct inode *dir, struct dentry *dentry)
2418{
2419 int err = 0;
2420 struct devfs_entry *de;
2421 struct fs_info *fs_info = dir->i_sb->s_fs_info;
2422 struct inode *inode = dentry->d_inode;
2423
2424 if (dir->i_sb->s_fs_info != inode->i_sb->s_fs_info)
2425 return -EINVAL;
2426 de = get_devfs_entry_from_vfs_inode(inode);
2427 if (de == NULL)
2428 return -ENOENT;
2429 if (!S_ISDIR(de->mode))
2430 return -ENOTDIR;
2431 if (!de->vfs)
2432 return -EPERM;
2433 /* First ensure the directory is empty and will stay that way */
2434 write_lock(&de->u.dir.lock);
2435 if (de->u.dir.first)
2436 err = -ENOTEMPTY;
2437 else
2438 de->u.dir.no_more_additions = TRUE;
2439 write_unlock(&de->u.dir.lock);
2440 if (err)
2441 return err;
2442 /* Now unhook the directory from its parent */
2443 write_lock(&de->parent->u.dir.lock);
2444 if (!_devfs_unhook(de))
2445 err = -ENOENT;
2446 write_unlock(&de->parent->u.dir.lock);
2447 if (err)
2448 return err;
2449 if (!is_devfsd_or_child(fs_info))
2450 devfsd_notify_de(de, DEVFSD_NOTIFY_DELETE, inode->i_mode,
2451 inode->i_uid, inode->i_gid, fs_info);
2452 free_dentry(de);
2453 devfs_put(de);
2454 return 0;
2455} /* End Function devfs_rmdir */
2456
2457static int devfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
2458 dev_t rdev)
2459{
2460 int err;
2461 struct fs_info *fs_info = dir->i_sb->s_fs_info;
2462 struct devfs_entry *parent, *de;
2463 struct inode *inode;
2464
2465 DPRINTK(DEBUG_I_MKNOD, "(%s): mode: 0%o dev: %u:%u\n",
2466 dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
2467 parent = get_devfs_entry_from_vfs_inode(dir);
2468 if (parent == NULL)
2469 return -ENOENT;
2470 de = _devfs_alloc_entry(dentry->d_name.name, dentry->d_name.len, mode);
2471 if (!de)
2472 return -ENOMEM;
2473 de->vfs = TRUE;
2474 if (S_ISCHR(mode) || S_ISBLK(mode))
2475 de->u.dev = rdev;
2476 if ((err = _devfs_append_entry(parent, de, NULL)) != 0)
2477 return err;
2478 de->inode.uid = current->euid;
2479 de->inode.gid = current->egid;
2480 de->inode.atime = CURRENT_TIME;
2481 de->inode.mtime = CURRENT_TIME;
2482 de->inode.ctime = CURRENT_TIME;
2483 if ((inode = _devfs_get_vfs_inode(dir->i_sb, de, dentry)) == NULL)
2484 return -ENOMEM;
2485 DPRINTK(DEBUG_I_MKNOD, ": new VFS inode(%u): %p dentry: %p\n",
2486 de->inode.ino, inode, dentry);
2487 d_instantiate(dentry, inode);
2488 if (!is_devfsd_or_child(fs_info))
2489 devfsd_notify_de(de, DEVFSD_NOTIFY_CREATE, inode->i_mode,
2490 inode->i_uid, inode->i_gid, fs_info);
2491 return 0;
2492} /* End Function devfs_mknod */
2493
2494static void *devfs_follow_link(struct dentry *dentry, struct nameidata *nd)
2495{
2496 struct devfs_entry *p = get_devfs_entry_from_vfs_inode(dentry->d_inode);
2497 nd_set_link(nd, p ? p->u.symlink.linkname : ERR_PTR(-ENODEV));
2498 return NULL;
2499} /* End Function devfs_follow_link */
2500
2501static struct inode_operations devfs_iops = {
2502 .setattr = devfs_notify_change,
2503};
2504
2505static struct inode_operations devfs_dir_iops = {
2506 .lookup = devfs_lookup,
2507 .unlink = devfs_unlink,
2508 .symlink = devfs_symlink,
2509 .mkdir = devfs_mkdir,
2510 .rmdir = devfs_rmdir,
2511 .mknod = devfs_mknod,
2512 .setattr = devfs_notify_change,
2513};
2514
2515static struct inode_operations devfs_symlink_iops = {
2516 .readlink = generic_readlink,
2517 .follow_link = devfs_follow_link,
2518 .setattr = devfs_notify_change,
2519};
2520
2521static int devfs_fill_super(struct super_block *sb, void *data, int silent)
2522{
2523 struct inode *root_inode = NULL;
2524
2525 if (_devfs_get_root_entry() == NULL)
2526 goto out_no_root;
2527 atomic_set(&fs_info.devfsd_overrun_count, 0);
2528 init_waitqueue_head(&fs_info.devfsd_wait_queue);
2529 init_waitqueue_head(&fs_info.revalidate_wait_queue);
2530 fs_info.sb = sb;
2531 sb->s_fs_info = &fs_info;
2532 sb->s_blocksize = 1024;
2533 sb->s_blocksize_bits = 10;
2534 sb->s_magic = DEVFS_SUPER_MAGIC;
2535 sb->s_op = &devfs_sops;
2536 sb->s_time_gran = 1;
2537 if ((root_inode = _devfs_get_vfs_inode(sb, root_entry, NULL)) == NULL)
2538 goto out_no_root;
2539 sb->s_root = d_alloc_root(root_inode);
2540 if (!sb->s_root)
2541 goto out_no_root;
2542 DPRINTK(DEBUG_S_READ, "(): made devfs ptr: %p\n", sb->s_fs_info);
2543 return 0;
2544
2545 out_no_root:
2546 PRINTK("(): get root inode failed\n");
2547 if (root_inode)
2548 iput(root_inode);
2549 return -EINVAL;
2550} /* End Function devfs_fill_super */
2551
2552static struct super_block *devfs_get_sb(struct file_system_type *fs_type,
2553 int flags, const char *dev_name,
2554 void *data)
2555{
2556 return get_sb_single(fs_type, flags, data, devfs_fill_super);
2557}
2558
2559static struct file_system_type devfs_fs_type = {
2560 .name = DEVFS_NAME,
2561 .get_sb = devfs_get_sb,
2562 .kill_sb = kill_anon_super,
2563};
2564
2565/* File operations for devfsd follow */
2566
2567static ssize_t devfsd_read(struct file *file, char __user *buf, size_t len,
2568 loff_t * ppos)
2569{
2570 int done = FALSE;
2571 int ival;
2572 loff_t pos, devname_offset, tlen, rpos;
2573 devfs_handle_t de;
2574 struct devfsd_buf_entry *entry;
2575 struct fs_info *fs_info = file->f_dentry->d_inode->i_sb->s_fs_info;
2576 struct devfsd_notify_struct *info = fs_info->devfsd_info;
2577 DECLARE_WAITQUEUE(wait, current);
2578
2579 /* Verify the task has grabbed the queue */
2580 if (fs_info->devfsd_task != current)
2581 return -EPERM;
2582 info->major = 0;
2583 info->minor = 0;
2584 /* Block for a new entry */
2585 set_current_state(TASK_INTERRUPTIBLE);
2586 add_wait_queue(&fs_info->devfsd_wait_queue, &wait);
2587 while (devfsd_queue_empty(fs_info)) {
2588 fs_info->devfsd_sleeping = TRUE;
2589 wake_up(&fs_info->revalidate_wait_queue);
2590 schedule();
2591 fs_info->devfsd_sleeping = FALSE;
2592 if (signal_pending(current)) {
2593 remove_wait_queue(&fs_info->devfsd_wait_queue, &wait);
2594 __set_current_state(TASK_RUNNING);
2595 return -EINTR;
2596 }
2597 set_current_state(TASK_INTERRUPTIBLE);
2598 }
2599 remove_wait_queue(&fs_info->devfsd_wait_queue, &wait);
2600 __set_current_state(TASK_RUNNING);
2601 /* Now play with the data */
2602 ival = atomic_read(&fs_info->devfsd_overrun_count);
2603 info->overrun_count = ival;
2604 entry = fs_info->devfsd_first_event;
2605 info->type = entry->type;
2606 info->mode = entry->mode;
2607 info->uid = entry->uid;
2608 info->gid = entry->gid;
2609 de = entry->de;
2610 if (S_ISCHR(de->mode) || S_ISBLK(de->mode)) {
2611 info->major = MAJOR(de->u.dev);
2612 info->minor = MINOR(de->u.dev);
2613 }
2614 pos = devfs_generate_path(de, info->devname, DEVFS_PATHLEN);
2615 if (pos < 0)
2616 return pos;
2617 info->namelen = DEVFS_PATHLEN - pos - 1;
2618 if (info->mode == 0)
2619 info->mode = de->mode;
2620 devname_offset = info->devname - (char *)info;
2621 rpos = *ppos;
2622 if (rpos < devname_offset) {
2623 /* Copy parts of the header */
2624 tlen = devname_offset - rpos;
2625 if (tlen > len)
2626 tlen = len;
2627 if (copy_to_user(buf, (char *)info + rpos, tlen)) {
2628 return -EFAULT;
2629 }
2630 rpos += tlen;
2631 buf += tlen;
2632 len -= tlen;
2633 }
2634 if ((rpos >= devname_offset) && (len > 0)) {
2635 /* Copy the name */
2636 tlen = info->namelen + 1;
2637 if (tlen > len)
2638 tlen = len;
2639 else
2640 done = TRUE;
2641 if (copy_to_user
2642 (buf, info->devname + pos + rpos - devname_offset, tlen)) {
2643 return -EFAULT;
2644 }
2645 rpos += tlen;
2646 }
2647 tlen = rpos - *ppos;
2648 if (done) {
2649 devfs_handle_t parent;
2650
2651 spin_lock(&fs_info->devfsd_buffer_lock);
2652 fs_info->devfsd_first_event = entry->next;
2653 if (entry->next == NULL)
2654 fs_info->devfsd_last_event = NULL;
2655 spin_unlock(&fs_info->devfsd_buffer_lock);
2656 for (; de != NULL; de = parent) {
2657 parent = de->parent;
2658 devfs_put(de);
2659 }
2660 kmem_cache_free(devfsd_buf_cache, entry);
2661 if (ival > 0)
2662 atomic_sub(ival, &fs_info->devfsd_overrun_count);
2663 *ppos = 0;
2664 } else
2665 *ppos = rpos;
2666 return tlen;
2667} /* End Function devfsd_read */
2668
2669static int devfsd_ioctl(struct inode *inode, struct file *file,
2670 unsigned int cmd, unsigned long arg)
2671{
2672 int ival;
2673 struct fs_info *fs_info = inode->i_sb->s_fs_info;
2674
2675 switch (cmd) {
2676 case DEVFSDIOC_GET_PROTO_REV:
2677 ival = DEVFSD_PROTOCOL_REVISION_KERNEL;
2678 if (copy_to_user((void __user *)arg, &ival, sizeof ival))
2679 return -EFAULT;
2680 break;
2681 case DEVFSDIOC_SET_EVENT_MASK:
2682 /* Ensure only one reader has access to the queue. This scheme will
2683 work even if the global kernel lock were to be removed, because it
2684 doesn't matter who gets in first, as long as only one gets it */
2685 if (fs_info->devfsd_task == NULL) {
2686 static DEFINE_SPINLOCK(lock);
2687
2688 if (!spin_trylock(&lock))
2689 return -EBUSY;
2690 if (fs_info->devfsd_task != NULL) { /* We lost the race... */
2691 spin_unlock(&lock);
2692 return -EBUSY;
2693 }
2694 fs_info->devfsd_task = current;
2695 spin_unlock(&lock);
2696 fs_info->devfsd_pgrp =
2697 (process_group(current) ==
2698 current->pid) ? process_group(current) : 0;
2699 fs_info->devfsd_file = file;
2700 fs_info->devfsd_info =
2701 kmalloc(sizeof *fs_info->devfsd_info, GFP_KERNEL);
2702 if (!fs_info->devfsd_info) {
2703 devfsd_close(inode, file);
2704 return -ENOMEM;
2705 }
2706 } else if (fs_info->devfsd_task != current)
2707 return -EBUSY;
2708 fs_info->devfsd_event_mask = arg; /* Let the masses come forth */
2709 break;
2710 case DEVFSDIOC_RELEASE_EVENT_QUEUE:
2711 if (fs_info->devfsd_file != file)
2712 return -EPERM;
2713 return devfsd_close(inode, file);
2714 /*break; */
2715#ifdef CONFIG_DEVFS_DEBUG
2716 case DEVFSDIOC_SET_DEBUG_MASK:
2717 if (copy_from_user(&ival, (void __user *)arg, sizeof ival))
2718 return -EFAULT;
2719 devfs_debug = ival;
2720 break;
2721#endif
2722 default:
2723 return -ENOIOCTLCMD;
2724 }
2725 return 0;
2726} /* End Function devfsd_ioctl */
2727
2728static int devfsd_close(struct inode *inode, struct file *file)
2729{
2730 struct devfsd_buf_entry *entry, *next;
2731 struct fs_info *fs_info = inode->i_sb->s_fs_info;
2732
2733 if (fs_info->devfsd_file != file)
2734 return 0;
2735 fs_info->devfsd_event_mask = 0;
2736 fs_info->devfsd_file = NULL;
2737 spin_lock(&fs_info->devfsd_buffer_lock);
2738 entry = fs_info->devfsd_first_event;
2739 fs_info->devfsd_first_event = NULL;
2740 fs_info->devfsd_last_event = NULL;
2741 kfree(fs_info->devfsd_info);
2742 fs_info->devfsd_info = NULL;
2743 spin_unlock(&fs_info->devfsd_buffer_lock);
2744 fs_info->devfsd_pgrp = 0;
2745 fs_info->devfsd_task = NULL;
2746 wake_up(&fs_info->revalidate_wait_queue);
2747 for (; entry; entry = next) {
2748 next = entry->next;
2749 kmem_cache_free(devfsd_buf_cache, entry);
2750 }
2751 return 0;
2752} /* End Function devfsd_close */
2753
2754#ifdef CONFIG_DEVFS_DEBUG
2755static ssize_t stat_read(struct file *file, char __user *buf, size_t len,
2756 loff_t * ppos)
2757{
2758 ssize_t num;
2759 char txt[80];
2760
2761 num = sprintf(txt, "Number of entries: %u number of bytes: %u\n",
2762 stat_num_entries, stat_num_bytes) + 1;
2763 if (*ppos >= num)
2764 return 0;
2765 if (*ppos + len > num)
2766 len = num - *ppos;
2767 if (copy_to_user(buf, txt + *ppos, len))
2768 return -EFAULT;
2769 *ppos += len;
2770 return len;
2771} /* End Function stat_read */
2772#endif
2773
2774static int __init init_devfs_fs(void)
2775{
2776 int err;
2777 int major;
2778 struct devfs_entry *devfsd;
2779#ifdef CONFIG_DEVFS_DEBUG
2780 struct devfs_entry *stat;
2781#endif
2782
2783 if (_devfs_get_root_entry() == NULL)
2784 return -ENOMEM;
2785
2786 printk(KERN_INFO "%s: %s Richard Gooch (rgooch@atnf.csiro.au)\n",
2787 DEVFS_NAME, DEVFS_VERSION);
2788 devfsd_buf_cache = kmem_cache_create("devfsd_event",
2789 sizeof(struct devfsd_buf_entry),
2790 0, 0, NULL, NULL);
2791 if (!devfsd_buf_cache)
2792 OOPS("(): unable to allocate event slab\n");
2793#ifdef CONFIG_DEVFS_DEBUG
2794 devfs_debug = devfs_debug_init;
2795 printk(KERN_INFO "%s: devfs_debug: 0x%0x\n", DEVFS_NAME, devfs_debug);
2796#endif
2797 printk(KERN_INFO "%s: boot_options: 0x%0x\n", DEVFS_NAME, boot_options);
2798
2799 /* register special device for devfsd communication */
2800 major = register_chrdev(0, "devfs", &devfs_fops);
2801 if (major < 0)
2802 return major;
2803
2804 /* And create the entry for ".devfsd" */
2805 devfsd = _devfs_alloc_entry(".devfsd", 0, S_IFCHR | S_IRUSR | S_IWUSR);
2806 if (devfsd == NULL)
2807 return -ENOMEM;
2808 devfsd->u.dev = MKDEV(major, 0);
2809 _devfs_append_entry(root_entry, devfsd, NULL);
2810
2811#ifdef CONFIG_DEVFS_DEBUG
2812 stat = _devfs_alloc_entry(".stat", 0, S_IFCHR | S_IRUGO);
2813 if (stat == NULL)
2814 return -ENOMEM;
2815 stat->u.dev = MKDEV(major, 1);
2816 _devfs_append_entry(root_entry, stat, NULL);
2817#endif
2818
2819 err = register_filesystem(&devfs_fs_type);
2820 return err;
2821} /* End Function init_devfs_fs */
2822
2823void __init mount_devfs_fs(void)
2824{
2825 int err;
2826
2827 if (!(boot_options & OPTION_MOUNT))
2828 return;
2829 err = do_mount("none", "/dev", "devfs", 0, NULL);
2830 if (err == 0)
2831 printk(KERN_INFO "Mounted devfs on /dev\n");
2832 else
2833 PRINTK("(): unable to mount devfs, err: %d\n", err);
2834} /* End Function mount_devfs_fs */
2835
2836module_init(init_devfs_fs)
diff --git a/fs/devfs/util.c b/fs/devfs/util.c
deleted file mode 100644
index db06d388c9ac..000000000000
--- a/fs/devfs/util.c
+++ /dev/null
@@ -1,97 +0,0 @@
1/* devfs (Device FileSystem) utilities.
2
3 Copyright (C) 1999-2002 Richard Gooch
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public
7 License as published by the Free Software Foundation; either
8 version 2 of the License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with this library; if not, write to the Free
17 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
19 Richard Gooch may be reached by email at rgooch@atnf.csiro.au
20 The postal address is:
21 Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
22
23 ChangeLog
24
25 19991031 Richard Gooch <rgooch@atnf.csiro.au>
26 Created.
27 19991103 Richard Gooch <rgooch@atnf.csiro.au>
28 Created <_devfs_convert_name> and supported SCSI and IDE CD-ROMs
29 20000203 Richard Gooch <rgooch@atnf.csiro.au>
30 Changed operations pointer type to void *.
31 20000621 Richard Gooch <rgooch@atnf.csiro.au>
32 Changed interface to <devfs_register_series>.
33 20000622 Richard Gooch <rgooch@atnf.csiro.au>
34 Took account of interface change to <devfs_mk_symlink>.
35 Took account of interface change to <devfs_mk_dir>.
36 20010519 Richard Gooch <rgooch@atnf.csiro.au>
37 Documentation cleanup.
38 20010709 Richard Gooch <rgooch@atnf.csiro.au>
39 Created <devfs_*alloc_major> and <devfs_*alloc_devnum>.
40 20010710 Richard Gooch <rgooch@atnf.csiro.au>
41 Created <devfs_*alloc_unique_number>.
42 20010730 Richard Gooch <rgooch@atnf.csiro.au>
43 Documentation typo fix.
44 20010806 Richard Gooch <rgooch@atnf.csiro.au>
45 Made <block_semaphore> and <char_semaphore> private.
46 20010813 Richard Gooch <rgooch@atnf.csiro.au>
47 Fixed bug in <devfs_alloc_unique_number>: limited to 128 numbers
48 20010818 Richard Gooch <rgooch@atnf.csiro.au>
49 Updated major masks up to Linus' "no new majors" proclamation.
50 Block: were 126 now 122 free, char: were 26 now 19 free.
51 20020324 Richard Gooch <rgooch@atnf.csiro.au>
52 Fixed bug in <devfs_alloc_unique_number>: was clearing beyond
53 bitfield.
54 20020326 Richard Gooch <rgooch@atnf.csiro.au>
55 Fixed bitfield data type for <devfs_*alloc_devnum>.
56 Made major bitfield type and initialiser 64 bit safe.
57 20020413 Richard Gooch <rgooch@atnf.csiro.au>
58 Fixed shift warning on 64 bit machines.
59 20020428 Richard Gooch <rgooch@atnf.csiro.au>
60 Copied and used macro for error messages from fs/devfs/base.c
61 20021013 Richard Gooch <rgooch@atnf.csiro.au>
62 Documentation fix.
63 20030101 Adam J. Richter <adam@yggdrasil.com>
64 Eliminate DEVFS_SPECIAL_{CHR,BLK}. Use mode_t instead.
65 20030106 Christoph Hellwig <hch@infradead.org>
66 Rewrite devfs_{,de}alloc_devnum to look like C code.
67*/
68#include <linux/module.h>
69#include <linux/init.h>
70#include <linux/devfs_fs_kernel.h>
71#include <linux/slab.h>
72#include <linux/vmalloc.h>
73#include <linux/genhd.h>
74#include <linux/bitops.h>
75
76int devfs_register_tape(const char *name)
77{
78 char tname[32], dest[64];
79 static unsigned int tape_counter;
80 unsigned int n = tape_counter++;
81
82 sprintf(dest, "../%s", name);
83 sprintf(tname, "tapes/tape%u", n);
84 devfs_mk_symlink(tname, dest);
85
86 return n;
87}
88
89EXPORT_SYMBOL(devfs_register_tape);
90
91void devfs_unregister_tape(int num)
92{
93 if (num >= 0)
94 devfs_remove("tapes/tape%u", num);
95}
96
97EXPORT_SYMBOL(devfs_unregister_tape);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 14c5620b5cab..f7aef5bb584a 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -130,10 +130,10 @@ fail:
130 return -ENOMEM; 130 return -ENOMEM;
131} 131}
132 132
133static struct super_block *devpts_get_sb(struct file_system_type *fs_type, 133static int devpts_get_sb(struct file_system_type *fs_type,
134 int flags, const char *dev_name, void *data) 134 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
135{ 135{
136 return get_sb_single(fs_type, flags, data, devpts_fill_super); 136 return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt);
137} 137}
138 138
139static struct file_system_type devpts_fs_type = { 139static struct file_system_type devpts_fs_type = {
diff --git a/fs/direct-io.c b/fs/direct-io.c
index b05d1b218776..538fb0418fba 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -162,7 +162,7 @@ static int dio_refill_pages(struct dio *dio)
162 NULL); /* vmas */ 162 NULL); /* vmas */
163 up_read(&current->mm->mmap_sem); 163 up_read(&current->mm->mmap_sem);
164 164
165 if (ret < 0 && dio->blocks_available && (dio->rw == WRITE)) { 165 if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) {
166 struct page *page = ZERO_PAGE(dio->curr_user_address); 166 struct page *page = ZERO_PAGE(dio->curr_user_address);
167 /* 167 /*
168 * A memory fault, but the filesystem has some outstanding 168 * A memory fault, but the filesystem has some outstanding
@@ -535,7 +535,7 @@ static int get_more_blocks(struct dio *dio)
535 map_bh->b_state = 0; 535 map_bh->b_state = 0;
536 map_bh->b_size = fs_count << dio->inode->i_blkbits; 536 map_bh->b_size = fs_count << dio->inode->i_blkbits;
537 537
538 create = dio->rw == WRITE; 538 create = dio->rw & WRITE;
539 if (dio->lock_type == DIO_LOCKING) { 539 if (dio->lock_type == DIO_LOCKING) {
540 if (dio->block_in_file < (i_size_read(dio->inode) >> 540 if (dio->block_in_file < (i_size_read(dio->inode) >>
541 dio->blkbits)) 541 dio->blkbits))
@@ -867,7 +867,7 @@ do_holes:
867 loff_t i_size_aligned; 867 loff_t i_size_aligned;
868 868
869 /* AKPM: eargh, -ENOTBLK is a hack */ 869 /* AKPM: eargh, -ENOTBLK is a hack */
870 if (dio->rw == WRITE) { 870 if (dio->rw & WRITE) {
871 page_cache_release(page); 871 page_cache_release(page);
872 return -ENOTBLK; 872 return -ENOTBLK;
873 } 873 }
@@ -1045,7 +1045,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1045 } 1045 }
1046 } /* end iovec loop */ 1046 } /* end iovec loop */
1047 1047
1048 if (ret == -ENOTBLK && rw == WRITE) { 1048 if (ret == -ENOTBLK && (rw & WRITE)) {
1049 /* 1049 /*
1050 * The remaining part of the request will be 1050 * The remaining part of the request will be
1051 * be handled by buffered I/O when we return 1051 * be handled by buffered I/O when we return
@@ -1089,7 +1089,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1089 if (dio->is_async) { 1089 if (dio->is_async) {
1090 int should_wait = 0; 1090 int should_wait = 0;
1091 1091
1092 if (dio->result < dio->size && rw == WRITE) { 1092 if (dio->result < dio->size && (rw & WRITE)) {
1093 dio->waiter = current; 1093 dio->waiter = current;
1094 should_wait = 1; 1094 should_wait = 1;
1095 } 1095 }
@@ -1142,7 +1142,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1142 ret = transferred; 1142 ret = transferred;
1143 1143
1144 /* We could have also come here on an AIO file extend */ 1144 /* We could have also come here on an AIO file extend */
1145 if (!is_sync_kiocb(iocb) && rw == WRITE && 1145 if (!is_sync_kiocb(iocb) && (rw & WRITE) &&
1146 ret >= 0 && dio->result == dio->size) 1146 ret >= 0 && dio->result == dio->size)
1147 /* 1147 /*
1148 * For AIO writes where we have completed the 1148 * For AIO writes where we have completed the
@@ -1194,7 +1194,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1194 int acquire_i_mutex = 0; 1194 int acquire_i_mutex = 0;
1195 1195
1196 if (rw & WRITE) 1196 if (rw & WRITE)
1197 current->flags |= PF_SYNCWRITE; 1197 rw = WRITE_SYNC;
1198 1198
1199 if (bdev) 1199 if (bdev)
1200 bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev)); 1200 bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev));
@@ -1270,7 +1270,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1270 * even for AIO, we need to wait for i/o to complete before 1270 * even for AIO, we need to wait for i/o to complete before
1271 * returning in this case. 1271 * returning in this case.
1272 */ 1272 */
1273 dio->is_async = !is_sync_kiocb(iocb) && !((rw == WRITE) && 1273 dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) &&
1274 (end > i_size_read(inode))); 1274 (end > i_size_read(inode)));
1275 1275
1276 retval = direct_io_worker(rw, iocb, inode, iov, offset, 1276 retval = direct_io_worker(rw, iocb, inode, iov, offset,
@@ -1284,8 +1284,6 @@ out:
1284 mutex_unlock(&inode->i_mutex); 1284 mutex_unlock(&inode->i_mutex);
1285 else if (acquire_i_mutex) 1285 else if (acquire_i_mutex)
1286 mutex_lock(&inode->i_mutex); 1286 mutex_lock(&inode->i_mutex);
1287 if (rw & WRITE)
1288 current->flags &= ~PF_SYNCWRITE;
1289 return retval; 1287 return retval;
1290} 1288}
1291EXPORT_SYMBOL(__blockdev_direct_IO); 1289EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/dquot.c b/fs/dquot.c
index 81d87a413c68..0122a279106a 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -250,7 +250,7 @@ static inline struct dquot *find_dquot(unsigned int hashent, struct super_block
250/* Add a dquot to the tail of the free list */ 250/* Add a dquot to the tail of the free list */
251static inline void put_dquot_last(struct dquot *dquot) 251static inline void put_dquot_last(struct dquot *dquot)
252{ 252{
253 list_add(&dquot->dq_free, free_dquots.prev); 253 list_add_tail(&dquot->dq_free, &free_dquots);
254 dqstats.free_dquots++; 254 dqstats.free_dquots++;
255} 255}
256 256
@@ -266,7 +266,7 @@ static inline void put_inuse(struct dquot *dquot)
266{ 266{
267 /* We add to the back of inuse list so we don't have to restart 267 /* We add to the back of inuse list so we don't have to restart
268 * when traversing this list and we block */ 268 * when traversing this list and we block */
269 list_add(&dquot->dq_inuse, inuse_list.prev); 269 list_add_tail(&dquot->dq_inuse, &inuse_list);
270 dqstats.allocated_dquots++; 270 dqstats.allocated_dquots++;
271} 271}
272 272
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index 180607f9314d..174696f9bf14 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -21,7 +21,7 @@ static sector_t _efs_bmap(struct address_space *mapping, sector_t block)
21{ 21{
22 return generic_block_bmap(mapping,block,efs_get_block); 22 return generic_block_bmap(mapping,block,efs_get_block);
23} 23}
24static struct address_space_operations efs_aops = { 24static const struct address_space_operations efs_aops = {
25 .readpage = efs_readpage, 25 .readpage = efs_readpage,
26 .sync_page = block_sync_page, 26 .sync_page = block_sync_page,
27 .bmap = _efs_bmap 27 .bmap = _efs_bmap
diff --git a/fs/efs/super.c b/fs/efs/super.c
index dff623e3ddbf..8ac2462ae5dd 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -15,13 +15,13 @@
15#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
16#include <linux/vfs.h> 16#include <linux/vfs.h>
17 17
18static int efs_statfs(struct super_block *s, struct kstatfs *buf); 18static int efs_statfs(struct dentry *dentry, struct kstatfs *buf);
19static int efs_fill_super(struct super_block *s, void *d, int silent); 19static int efs_fill_super(struct super_block *s, void *d, int silent);
20 20
21static struct super_block *efs_get_sb(struct file_system_type *fs_type, 21static int efs_get_sb(struct file_system_type *fs_type,
22 int flags, const char *dev_name, void *data) 22 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
23{ 23{
24 return get_sb_bdev(fs_type, flags, dev_name, data, efs_fill_super); 24 return get_sb_bdev(fs_type, flags, dev_name, data, efs_fill_super, mnt);
25} 25}
26 26
27static struct file_system_type efs_fs_type = { 27static struct file_system_type efs_fs_type = {
@@ -322,8 +322,8 @@ out_no_fs:
322 return -EINVAL; 322 return -EINVAL;
323} 323}
324 324
325static int efs_statfs(struct super_block *s, struct kstatfs *buf) { 325static int efs_statfs(struct dentry *dentry, struct kstatfs *buf) {
326 struct efs_sb_info *sb = SUPER_INFO(s); 326 struct efs_sb_info *sb = SUPER_INFO(dentry->d_sb);
327 327
328 buf->f_type = EFS_SUPER_MAGIC; /* efs magic number */ 328 buf->f_type = EFS_SUPER_MAGIC; /* efs magic number */
329 buf->f_bsize = EFS_BLOCKSIZE; /* blocksize */ 329 buf->f_bsize = EFS_BLOCKSIZE; /* blocksize */
diff --git a/fs/efs/symlink.c b/fs/efs/symlink.c
index 3d9a350e3e7f..e249cf733a6b 100644
--- a/fs/efs/symlink.c
+++ b/fs/efs/symlink.c
@@ -53,6 +53,6 @@ fail:
53 return err; 53 return err;
54} 54}
55 55
56struct address_space_operations efs_symlink_aops = { 56const struct address_space_operations efs_symlink_aops = {
57 .readpage = efs_symlink_readpage 57 .readpage = efs_symlink_readpage
58}; 58};
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1b4491cdd115..9c677bbd0b08 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * fs/eventpoll.c ( Efficent event polling implementation ) 2 * fs/eventpoll.c ( Efficent event polling implementation )
3 * Copyright (C) 2001,...,2003 Davide Libenzi 3 * Copyright (C) 2001,...,2006 Davide Libenzi
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
@@ -268,9 +268,9 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
268 int maxevents, long timeout); 268 int maxevents, long timeout);
269static int eventpollfs_delete_dentry(struct dentry *dentry); 269static int eventpollfs_delete_dentry(struct dentry *dentry);
270static struct inode *ep_eventpoll_inode(void); 270static struct inode *ep_eventpoll_inode(void);
271static struct super_block *eventpollfs_get_sb(struct file_system_type *fs_type, 271static int eventpollfs_get_sb(struct file_system_type *fs_type,
272 int flags, const char *dev_name, 272 int flags, const char *dev_name,
273 void *data); 273 void *data, struct vfsmount *mnt);
274 274
275/* 275/*
276 * This semaphore is used to serialize ep_free() and eventpoll_release_file(). 276 * This semaphore is used to serialize ep_free() and eventpoll_release_file().
@@ -337,20 +337,20 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1,
337/* Special initialization for the rb-tree node to detect linkage */ 337/* Special initialization for the rb-tree node to detect linkage */
338static inline void ep_rb_initnode(struct rb_node *n) 338static inline void ep_rb_initnode(struct rb_node *n)
339{ 339{
340 n->rb_parent = n; 340 rb_set_parent(n, n);
341} 341}
342 342
343/* Removes a node from the rb-tree and marks it for a fast is-linked check */ 343/* Removes a node from the rb-tree and marks it for a fast is-linked check */
344static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r) 344static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r)
345{ 345{
346 rb_erase(n, r); 346 rb_erase(n, r);
347 n->rb_parent = n; 347 rb_set_parent(n, n);
348} 348}
349 349
350/* Fast check to verify that the item is linked to the main rb-tree */ 350/* Fast check to verify that the item is linked to the main rb-tree */
351static inline int ep_rb_linked(struct rb_node *n) 351static inline int ep_rb_linked(struct rb_node *n)
352{ 352{
353 return n->rb_parent != n; 353 return rb_parent(n) != n;
354} 354}
355 355
356/* 356/*
@@ -1004,7 +1004,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
1004 1004
1005 /* Notify waiting tasks that events are available */ 1005 /* Notify waiting tasks that events are available */
1006 if (waitqueue_active(&ep->wq)) 1006 if (waitqueue_active(&ep->wq))
1007 wake_up(&ep->wq); 1007 __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE);
1008 if (waitqueue_active(&ep->poll_wait)) 1008 if (waitqueue_active(&ep->poll_wait))
1009 pwake++; 1009 pwake++;
1010 } 1010 }
@@ -1083,7 +1083,8 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
1083 1083
1084 /* Notify waiting tasks that events are available */ 1084 /* Notify waiting tasks that events are available */
1085 if (waitqueue_active(&ep->wq)) 1085 if (waitqueue_active(&ep->wq))
1086 wake_up(&ep->wq); 1086 __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
1087 TASK_INTERRUPTIBLE);
1087 if (waitqueue_active(&ep->poll_wait)) 1088 if (waitqueue_active(&ep->poll_wait))
1088 pwake++; 1089 pwake++;
1089 } 1090 }
@@ -1260,7 +1261,8 @@ is_linked:
1260 * wait list. 1261 * wait list.
1261 */ 1262 */
1262 if (waitqueue_active(&ep->wq)) 1263 if (waitqueue_active(&ep->wq))
1263 wake_up(&ep->wq); 1264 __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
1265 TASK_INTERRUPTIBLE);
1264 if (waitqueue_active(&ep->poll_wait)) 1266 if (waitqueue_active(&ep->poll_wait))
1265 pwake++; 1267 pwake++;
1266 1268
@@ -1444,7 +1446,8 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist)
1444 * wait list. 1446 * wait list.
1445 */ 1447 */
1446 if (waitqueue_active(&ep->wq)) 1448 if (waitqueue_active(&ep->wq))
1447 wake_up(&ep->wq); 1449 __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
1450 TASK_INTERRUPTIBLE);
1448 if (waitqueue_active(&ep->poll_wait)) 1451 if (waitqueue_active(&ep->poll_wait))
1449 pwake++; 1452 pwake++;
1450 } 1453 }
@@ -1516,7 +1519,7 @@ retry:
1516 * ep_poll_callback() when events will become available. 1519 * ep_poll_callback() when events will become available.
1517 */ 1520 */
1518 init_waitqueue_entry(&wait, current); 1521 init_waitqueue_entry(&wait, current);
1519 add_wait_queue(&ep->wq, &wait); 1522 __add_wait_queue(&ep->wq, &wait);
1520 1523
1521 for (;;) { 1524 for (;;) {
1522 /* 1525 /*
@@ -1536,7 +1539,7 @@ retry:
1536 jtimeout = schedule_timeout(jtimeout); 1539 jtimeout = schedule_timeout(jtimeout);
1537 write_lock_irqsave(&ep->lock, flags); 1540 write_lock_irqsave(&ep->lock, flags);
1538 } 1541 }
1539 remove_wait_queue(&ep->wq, &wait); 1542 __remove_wait_queue(&ep->wq, &wait);
1540 1543
1541 set_current_state(TASK_RUNNING); 1544 set_current_state(TASK_RUNNING);
1542 } 1545 }
@@ -1595,11 +1598,12 @@ eexit_1:
1595} 1598}
1596 1599
1597 1600
1598static struct super_block * 1601static int
1599eventpollfs_get_sb(struct file_system_type *fs_type, int flags, 1602eventpollfs_get_sb(struct file_system_type *fs_type, int flags,
1600 const char *dev_name, void *data) 1603 const char *dev_name, void *data, struct vfsmount *mnt)
1601{ 1604{
1602 return get_sb_pseudo(fs_type, "eventpoll:", NULL, EVENTPOLLFS_MAGIC); 1605 return get_sb_pseudo(fs_type, "eventpoll:", NULL, EVENTPOLLFS_MAGIC,
1606 mnt);
1603} 1607}
1604 1608
1605 1609
diff --git a/fs/exec.c b/fs/exec.c
index 3a79d97ac234..8344ba73a2a6 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -22,7 +22,6 @@
22 * formats. 22 * formats.
23 */ 23 */
24 24
25#include <linux/config.h>
26#include <linux/slab.h> 25#include <linux/slab.h>
27#include <linux/file.h> 26#include <linux/file.h>
28#include <linux/mman.h> 27#include <linux/mman.h>
@@ -49,6 +48,7 @@
49#include <linux/rmap.h> 48#include <linux/rmap.h>
50#include <linux/acct.h> 49#include <linux/acct.h>
51#include <linux/cn_proc.h> 50#include <linux/cn_proc.h>
51#include <linux/audit.h>
52 52
53#include <asm/uaccess.h> 53#include <asm/uaccess.h>
54#include <asm/mmu_context.h> 54#include <asm/mmu_context.h>
@@ -665,8 +665,6 @@ static int de_thread(struct task_struct *tsk)
665 * and to assume its PID: 665 * and to assume its PID:
666 */ 666 */
667 if (!thread_group_leader(current)) { 667 if (!thread_group_leader(current)) {
668 struct dentry *proc_dentry1, *proc_dentry2;
669
670 /* 668 /*
671 * Wait for the thread group leader to be a zombie. 669 * Wait for the thread group leader to be a zombie.
672 * It should already be zombie at this point, most 670 * It should already be zombie at this point, most
@@ -688,10 +686,6 @@ static int de_thread(struct task_struct *tsk)
688 */ 686 */
689 current->start_time = leader->start_time; 687 current->start_time = leader->start_time;
690 688
691 spin_lock(&leader->proc_lock);
692 spin_lock(&current->proc_lock);
693 proc_dentry1 = proc_pid_unhash(current);
694 proc_dentry2 = proc_pid_unhash(leader);
695 write_lock_irq(&tasklist_lock); 689 write_lock_irq(&tasklist_lock);
696 690
697 BUG_ON(leader->tgid != current->tgid); 691 BUG_ON(leader->tgid != current->tgid);
@@ -712,7 +706,7 @@ static int de_thread(struct task_struct *tsk)
712 attach_pid(current, PIDTYPE_PID, current->pid); 706 attach_pid(current, PIDTYPE_PID, current->pid);
713 attach_pid(current, PIDTYPE_PGID, current->signal->pgrp); 707 attach_pid(current, PIDTYPE_PGID, current->signal->pgrp);
714 attach_pid(current, PIDTYPE_SID, current->signal->session); 708 attach_pid(current, PIDTYPE_SID, current->signal->session);
715 list_add_tail_rcu(&current->tasks, &init_task.tasks); 709 list_replace_rcu(&leader->tasks, &current->tasks);
716 710
717 current->group_leader = current; 711 current->group_leader = current;
718 leader->group_leader = current; 712 leader->group_leader = current;
@@ -720,7 +714,6 @@ static int de_thread(struct task_struct *tsk)
720 /* Reduce leader to a thread */ 714 /* Reduce leader to a thread */
721 detach_pid(leader, PIDTYPE_PGID); 715 detach_pid(leader, PIDTYPE_PGID);
722 detach_pid(leader, PIDTYPE_SID); 716 detach_pid(leader, PIDTYPE_SID);
723 list_del_init(&leader->tasks);
724 717
725 current->exit_signal = SIGCHLD; 718 current->exit_signal = SIGCHLD;
726 719
@@ -728,10 +721,6 @@ static int de_thread(struct task_struct *tsk)
728 leader->exit_state = EXIT_DEAD; 721 leader->exit_state = EXIT_DEAD;
729 722
730 write_unlock_irq(&tasklist_lock); 723 write_unlock_irq(&tasklist_lock);
731 spin_unlock(&leader->proc_lock);
732 spin_unlock(&current->proc_lock);
733 proc_pid_flush(proc_dentry1);
734 proc_pid_flush(proc_dentry2);
735 } 724 }
736 725
737 /* 726 /*
@@ -865,7 +854,6 @@ int flush_old_exec(struct linux_binprm * bprm)
865 bprm->mm = NULL; /* We're using it now */ 854 bprm->mm = NULL; /* We're using it now */
866 855
867 /* This is the point of no return */ 856 /* This is the point of no return */
868 steal_locks(files);
869 put_files_struct(files); 857 put_files_struct(files);
870 858
871 current->sas_ss_sp = current->sas_ss_size = 0; 859 current->sas_ss_sp = current->sas_ss_size = 0;
@@ -1085,6 +1073,11 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1085 /* kernel module loader fixup */ 1073 /* kernel module loader fixup */
1086 /* so we don't try to load run modprobe in kernel space. */ 1074 /* so we don't try to load run modprobe in kernel space. */
1087 set_fs(USER_DS); 1075 set_fs(USER_DS);
1076
1077 retval = audit_bprm(bprm);
1078 if (retval)
1079 return retval;
1080
1088 retval = -ENOENT; 1081 retval = -ENOENT;
1089 for (try=0; try<2; try++) { 1082 for (try=0; try<2; try++) {
1090 read_lock(&binfmt_lock); 1083 read_lock(&binfmt_lock);
@@ -1374,67 +1367,102 @@ static void format_corename(char *corename, const char *pattern, long signr)
1374 *out_ptr = 0; 1367 *out_ptr = 0;
1375} 1368}
1376 1369
1377static void zap_threads (struct mm_struct *mm) 1370static void zap_process(struct task_struct *start)
1378{ 1371{
1379 struct task_struct *g, *p; 1372 struct task_struct *t;
1380 struct task_struct *tsk = current;
1381 struct completion *vfork_done = tsk->vfork_done;
1382 int traced = 0;
1383 1373
1384 /* 1374 start->signal->flags = SIGNAL_GROUP_EXIT;
1385 * Make sure nobody is waiting for us to release the VM, 1375 start->signal->group_stop_count = 0;
1386 * otherwise we can deadlock when we wait on each other
1387 */
1388 if (vfork_done) {
1389 tsk->vfork_done = NULL;
1390 complete(vfork_done);
1391 }
1392 1376
1393 read_lock(&tasklist_lock); 1377 t = start;
1394 do_each_thread(g,p) 1378 do {
1395 if (mm == p->mm && p != tsk) { 1379 if (t != current && t->mm) {
1396 force_sig_specific(SIGKILL, p); 1380 t->mm->core_waiters++;
1397 mm->core_waiters++; 1381 sigaddset(&t->pending.signal, SIGKILL);
1398 if (unlikely(p->ptrace) && 1382 signal_wake_up(t, 1);
1399 unlikely(p->parent->mm == mm))
1400 traced = 1;
1401 } 1383 }
1402 while_each_thread(g,p); 1384 } while ((t = next_thread(t)) != start);
1385}
1403 1386
1404 read_unlock(&tasklist_lock); 1387static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
1388 int exit_code)
1389{
1390 struct task_struct *g, *p;
1391 unsigned long flags;
1392 int err = -EAGAIN;
1393
1394 spin_lock_irq(&tsk->sighand->siglock);
1395 if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) {
1396 tsk->signal->group_exit_code = exit_code;
1397 zap_process(tsk);
1398 err = 0;
1399 }
1400 spin_unlock_irq(&tsk->sighand->siglock);
1401 if (err)
1402 return err;
1405 1403
1406 if (unlikely(traced)) { 1404 if (atomic_read(&mm->mm_users) == mm->core_waiters + 1)
1407 /* 1405 goto done;
1408 * We are zapping a thread and the thread it ptraces. 1406
1409 * If the tracee went into a ptrace stop for exit tracing, 1407 rcu_read_lock();
1410 * we could deadlock since the tracer is waiting for this 1408 for_each_process(g) {
1411 * coredump to finish. Detach them so they can both die. 1409 if (g == tsk->group_leader)
1412 */ 1410 continue;
1413 write_lock_irq(&tasklist_lock); 1411
1414 do_each_thread(g,p) { 1412 p = g;
1415 if (mm == p->mm && p != tsk && 1413 do {
1416 p->ptrace && p->parent->mm == mm) { 1414 if (p->mm) {
1417 __ptrace_detach(p, 0); 1415 if (p->mm == mm) {
1416 /*
1417 * p->sighand can't disappear, but
1418 * may be changed by de_thread()
1419 */
1420 lock_task_sighand(p, &flags);
1421 zap_process(p);
1422 unlock_task_sighand(p, &flags);
1423 }
1424 break;
1418 } 1425 }
1419 } while_each_thread(g,p); 1426 } while ((p = next_thread(p)) != g);
1420 write_unlock_irq(&tasklist_lock);
1421 } 1427 }
1428 rcu_read_unlock();
1429done:
1430 return mm->core_waiters;
1422} 1431}
1423 1432
1424static void coredump_wait(struct mm_struct *mm) 1433static int coredump_wait(int exit_code)
1425{ 1434{
1426 DECLARE_COMPLETION(startup_done); 1435 struct task_struct *tsk = current;
1436 struct mm_struct *mm = tsk->mm;
1437 struct completion startup_done;
1438 struct completion *vfork_done;
1427 int core_waiters; 1439 int core_waiters;
1428 1440
1441 init_completion(&mm->core_done);
1442 init_completion(&startup_done);
1429 mm->core_startup_done = &startup_done; 1443 mm->core_startup_done = &startup_done;
1430 1444
1431 zap_threads(mm); 1445 core_waiters = zap_threads(tsk, mm, exit_code);
1432 core_waiters = mm->core_waiters;
1433 up_write(&mm->mmap_sem); 1446 up_write(&mm->mmap_sem);
1434 1447
1448 if (unlikely(core_waiters < 0))
1449 goto fail;
1450
1451 /*
1452 * Make sure nobody is waiting for us to release the VM,
1453 * otherwise we can deadlock when we wait on each other
1454 */
1455 vfork_done = tsk->vfork_done;
1456 if (vfork_done) {
1457 tsk->vfork_done = NULL;
1458 complete(vfork_done);
1459 }
1460
1435 if (core_waiters) 1461 if (core_waiters)
1436 wait_for_completion(&startup_done); 1462 wait_for_completion(&startup_done);
1463fail:
1437 BUG_ON(mm->core_waiters); 1464 BUG_ON(mm->core_waiters);
1465 return core_waiters;
1438} 1466}
1439 1467
1440int do_coredump(long signr, int exit_code, struct pt_regs * regs) 1468int do_coredump(long signr, int exit_code, struct pt_regs * regs)
@@ -1468,22 +1496,9 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1468 } 1496 }
1469 mm->dumpable = 0; 1497 mm->dumpable = 0;
1470 1498
1471 retval = -EAGAIN; 1499 retval = coredump_wait(exit_code);
1472 spin_lock_irq(&current->sighand->siglock); 1500 if (retval < 0)
1473 if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) {
1474 current->signal->flags = SIGNAL_GROUP_EXIT;
1475 current->signal->group_exit_code = exit_code;
1476 current->signal->group_stop_count = 0;
1477 retval = 0;
1478 }
1479 spin_unlock_irq(&current->sighand->siglock);
1480 if (retval) {
1481 up_write(&mm->mmap_sem);
1482 goto fail; 1501 goto fail;
1483 }
1484
1485 init_completion(&mm->core_done);
1486 coredump_wait(mm);
1487 1502
1488 /* 1503 /*
1489 * Clear any false indication of pending signals that might 1504 * Clear any false indication of pending signals that might
diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile
index c5d02da73bc3..e0b2b43c1fdb 100644
--- a/fs/ext2/Makefile
+++ b/fs/ext2/Makefile
@@ -4,7 +4,7 @@
4 4
5obj-$(CONFIG_EXT2_FS) += ext2.o 5obj-$(CONFIG_EXT2_FS) += ext2.o
6 6
7ext2-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ 7ext2-y := balloc.o dir.o file.o fsync.o ialloc.o inode.o \
8 ioctl.o namei.o super.o symlink.o 8 ioctl.o namei.o super.o symlink.o
9 9
10ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o 10ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 2c00953d4b0b..d4870432ecfc 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -11,7 +11,6 @@
11 * David S. Miller (davem@caip.rutgers.edu), 1995 11 * David S. Miller (davem@caip.rutgers.edu), 1995
12 */ 12 */
13 13
14#include <linux/config.h>
15#include "ext2.h" 14#include "ext2.h"
16#include <linux/quotaops.h> 15#include <linux/quotaops.h>
17#include <linux/sched.h> 16#include <linux/sched.h>
@@ -521,6 +520,26 @@ io_error:
521 goto out_release; 520 goto out_release;
522} 521}
523 522
523#ifdef EXT2FS_DEBUG
524
525static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
526
527unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
528{
529 unsigned int i;
530 unsigned long sum = 0;
531
532 if (!map)
533 return (0);
534 for (i = 0; i < numchars; i++)
535 sum += nibblemap[map->b_data[i] & 0xf] +
536 nibblemap[(map->b_data[i] >> 4) & 0xf];
537 return (sum);
538}
539
540#endif /* EXT2FS_DEBUG */
541
542/* Superblock must be locked */
524unsigned long ext2_count_free_blocks (struct super_block * sb) 543unsigned long ext2_count_free_blocks (struct super_block * sb)
525{ 544{
526 struct ext2_group_desc * desc; 545 struct ext2_group_desc * desc;
@@ -530,7 +549,6 @@ unsigned long ext2_count_free_blocks (struct super_block * sb)
530 unsigned long bitmap_count, x; 549 unsigned long bitmap_count, x;
531 struct ext2_super_block *es; 550 struct ext2_super_block *es;
532 551
533 lock_super (sb);
534 es = EXT2_SB(sb)->s_es; 552 es = EXT2_SB(sb)->s_es;
535 desc_count = 0; 553 desc_count = 0;
536 bitmap_count = 0; 554 bitmap_count = 0;
@@ -554,7 +572,6 @@ unsigned long ext2_count_free_blocks (struct super_block * sb)
554 printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n", 572 printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
555 (long)le32_to_cpu(es->s_free_blocks_count), 573 (long)le32_to_cpu(es->s_free_blocks_count),
556 desc_count, bitmap_count); 574 desc_count, bitmap_count);
557 unlock_super (sb);
558 return bitmap_count; 575 return bitmap_count;
559#else 576#else
560 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { 577 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
diff --git a/fs/ext2/bitmap.c b/fs/ext2/bitmap.c
deleted file mode 100644
index e9983a0dd396..000000000000
--- a/fs/ext2/bitmap.c
+++ /dev/null
@@ -1,32 +0,0 @@
1/*
2 * linux/fs/ext2/bitmap.c
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 */
9
10#ifdef EXT2FS_DEBUG
11
12#include <linux/buffer_head.h>
13
14#include "ext2.h"
15
16static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
17
18unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
19{
20 unsigned int i;
21 unsigned long sum = 0;
22
23 if (!map)
24 return (0);
25 for (i = 0; i < numchars; i++)
26 sum += nibblemap[map->b_data[i] & 0xf] +
27 nibblemap[(map->b_data[i] >> 4) & 0xf];
28 return (sum);
29}
30
31#endif /* EXT2FS_DEBUG */
32
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index d672aa9f4061..92ea8265d7d5 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -159,8 +159,7 @@ fail:
159static struct page * ext2_get_page(struct inode *dir, unsigned long n) 159static struct page * ext2_get_page(struct inode *dir, unsigned long n)
160{ 160{
161 struct address_space *mapping = dir->i_mapping; 161 struct address_space *mapping = dir->i_mapping;
162 struct page *page = read_cache_page(mapping, n, 162 struct page *page = read_mapping_page(mapping, n, NULL);
163 (filler_t*)mapping->a_ops->readpage, NULL);
164 if (!IS_ERR(page)) { 163 if (!IS_ERR(page)) {
165 wait_on_page_locked(page); 164 wait_on_page_locked(page);
166 kmap(page); 165 kmap(page);
@@ -400,8 +399,7 @@ ino_t ext2_inode_by_name(struct inode * dir, struct dentry *dentry)
400 de = ext2_find_entry (dir, dentry, &page); 399 de = ext2_find_entry (dir, dentry, &page);
401 if (de) { 400 if (de) {
402 res = le32_to_cpu(de->inode); 401 res = le32_to_cpu(de->inode);
403 kunmap(page); 402 ext2_put_page(page);
404 page_cache_release(page);
405 } 403 }
406 return res; 404 return res;
407} 405}
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 9f74a62be555..e65a019fc7a5 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -162,9 +162,9 @@ extern const struct file_operations ext2_file_operations;
162extern const struct file_operations ext2_xip_file_operations; 162extern const struct file_operations ext2_xip_file_operations;
163 163
164/* inode.c */ 164/* inode.c */
165extern struct address_space_operations ext2_aops; 165extern const struct address_space_operations ext2_aops;
166extern struct address_space_operations ext2_aops_xip; 166extern const struct address_space_operations ext2_aops_xip;
167extern struct address_space_operations ext2_nobh_aops; 167extern const struct address_space_operations ext2_nobh_aops;
168 168
169/* namei.c */ 169/* namei.c */
170extern struct inode_operations ext2_dir_inode_operations; 170extern struct inode_operations ext2_dir_inode_operations;
diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c
index c9c2e5ffa48e..7806b9e8155b 100644
--- a/fs/ext2/fsync.c
+++ b/fs/ext2/fsync.c
@@ -24,7 +24,7 @@
24 24
25#include "ext2.h" 25#include "ext2.h"
26#include <linux/smp_lock.h> 26#include <linux/smp_lock.h>
27#include <linux/buffer_head.h> /* for fsync_inode_buffers() */ 27#include <linux/buffer_head.h> /* for sync_mapping_buffers() */
28 28
29 29
30/* 30/*
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index e52765219e16..de85c61c58c5 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -12,7 +12,6 @@
12 * David S. Miller (davem@caip.rutgers.edu), 1995 12 * David S. Miller (davem@caip.rutgers.edu), 1995
13 */ 13 */
14 14
15#include <linux/config.h>
16#include <linux/quotaops.h> 15#include <linux/quotaops.h>
17#include <linux/sched.h> 16#include <linux/sched.h>
18#include <linux/backing-dev.h> 17#include <linux/backing-dev.h>
@@ -638,6 +637,7 @@ fail:
638 return ERR_PTR(err); 637 return ERR_PTR(err);
639} 638}
640 639
640/* Superblock must be locked */
641unsigned long ext2_count_free_inodes (struct super_block * sb) 641unsigned long ext2_count_free_inodes (struct super_block * sb)
642{ 642{
643 struct ext2_group_desc *desc; 643 struct ext2_group_desc *desc;
@@ -649,7 +649,6 @@ unsigned long ext2_count_free_inodes (struct super_block * sb)
649 unsigned long bitmap_count = 0; 649 unsigned long bitmap_count = 0;
650 struct buffer_head *bitmap_bh = NULL; 650 struct buffer_head *bitmap_bh = NULL;
651 651
652 lock_super (sb);
653 es = EXT2_SB(sb)->s_es; 652 es = EXT2_SB(sb)->s_es;
654 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { 653 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
655 unsigned x; 654 unsigned x;
@@ -672,7 +671,6 @@ unsigned long ext2_count_free_inodes (struct super_block * sb)
672 printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n", 671 printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n",
673 percpu_counter_read(&EXT2_SB(sb)->s_freeinodes_counter), 672 percpu_counter_read(&EXT2_SB(sb)->s_freeinodes_counter),
674 desc_count, bitmap_count); 673 desc_count, bitmap_count);
675 unlock_super(sb);
676 return desc_count; 674 return desc_count;
677#else 675#else
678 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { 676 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 04af9c45dce2..fb4d3220eb8d 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -684,7 +684,7 @@ ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
684 return mpage_writepages(mapping, wbc, ext2_get_block); 684 return mpage_writepages(mapping, wbc, ext2_get_block);
685} 685}
686 686
687struct address_space_operations ext2_aops = { 687const struct address_space_operations ext2_aops = {
688 .readpage = ext2_readpage, 688 .readpage = ext2_readpage,
689 .readpages = ext2_readpages, 689 .readpages = ext2_readpages,
690 .writepage = ext2_writepage, 690 .writepage = ext2_writepage,
@@ -697,12 +697,12 @@ struct address_space_operations ext2_aops = {
697 .migratepage = buffer_migrate_page, 697 .migratepage = buffer_migrate_page,
698}; 698};
699 699
700struct address_space_operations ext2_aops_xip = { 700const struct address_space_operations ext2_aops_xip = {
701 .bmap = ext2_bmap, 701 .bmap = ext2_bmap,
702 .get_xip_page = ext2_get_xip_page, 702 .get_xip_page = ext2_get_xip_page,
703}; 703};
704 704
705struct address_space_operations ext2_nobh_aops = { 705const struct address_space_operations ext2_nobh_aops = {
706 .readpage = ext2_readpage, 706 .readpage = ext2_readpage,
707 .readpages = ext2_readpages, 707 .readpages = ext2_readpages,
708 .writepage = ext2_nobh_writepage, 708 .writepage = ext2_nobh_writepage,
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 7e30bae174ed..9f43879d6d68 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -16,7 +16,6 @@
16 * David S. Miller (davem@caip.rutgers.edu), 1995 16 * David S. Miller (davem@caip.rutgers.edu), 1995
17 */ 17 */
18 18
19#include <linux/config.h>
20#include <linux/module.h> 19#include <linux/module.h>
21#include <linux/string.h> 20#include <linux/string.h>
22#include <linux/fs.h> 21#include <linux/fs.h>
@@ -39,7 +38,7 @@
39static void ext2_sync_super(struct super_block *sb, 38static void ext2_sync_super(struct super_block *sb,
40 struct ext2_super_block *es); 39 struct ext2_super_block *es);
41static int ext2_remount (struct super_block * sb, int * flags, char * data); 40static int ext2_remount (struct super_block * sb, int * flags, char * data);
42static int ext2_statfs (struct super_block * sb, struct kstatfs * buf); 41static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf);
43 42
44void ext2_error (struct super_block * sb, const char * function, 43void ext2_error (struct super_block * sb, const char * function,
45 const char * fmt, ...) 44 const char * fmt, ...)
@@ -834,9 +833,6 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
834 printk ("EXT2-fs: not enough memory\n"); 833 printk ("EXT2-fs: not enough memory\n");
835 goto failed_mount; 834 goto failed_mount;
836 } 835 }
837 percpu_counter_init(&sbi->s_freeblocks_counter);
838 percpu_counter_init(&sbi->s_freeinodes_counter);
839 percpu_counter_init(&sbi->s_dirs_counter);
840 bgl_lock_init(&sbi->s_blockgroup_lock); 836 bgl_lock_init(&sbi->s_blockgroup_lock);
841 sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts), 837 sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts),
842 GFP_KERNEL); 838 GFP_KERNEL);
@@ -857,12 +853,18 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
857 } 853 }
858 if (!ext2_check_descriptors (sb)) { 854 if (!ext2_check_descriptors (sb)) {
859 printk ("EXT2-fs: group descriptors corrupted!\n"); 855 printk ("EXT2-fs: group descriptors corrupted!\n");
860 db_count = i;
861 goto failed_mount2; 856 goto failed_mount2;
862 } 857 }
863 sbi->s_gdb_count = db_count; 858 sbi->s_gdb_count = db_count;
864 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 859 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
865 spin_lock_init(&sbi->s_next_gen_lock); 860 spin_lock_init(&sbi->s_next_gen_lock);
861
862 percpu_counter_init(&sbi->s_freeblocks_counter,
863 ext2_count_free_blocks(sb));
864 percpu_counter_init(&sbi->s_freeinodes_counter,
865 ext2_count_free_inodes(sb));
866 percpu_counter_init(&sbi->s_dirs_counter,
867 ext2_count_dirs(sb));
866 /* 868 /*
867 * set up enough so that it can read an inode 869 * set up enough so that it can read an inode
868 */ 870 */
@@ -874,24 +876,18 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
874 if (!sb->s_root) { 876 if (!sb->s_root) {
875 iput(root); 877 iput(root);
876 printk(KERN_ERR "EXT2-fs: get root inode failed\n"); 878 printk(KERN_ERR "EXT2-fs: get root inode failed\n");
877 goto failed_mount2; 879 goto failed_mount3;
878 } 880 }
879 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 881 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
880 dput(sb->s_root); 882 dput(sb->s_root);
881 sb->s_root = NULL; 883 sb->s_root = NULL;
882 printk(KERN_ERR "EXT2-fs: corrupt root inode, run e2fsck\n"); 884 printk(KERN_ERR "EXT2-fs: corrupt root inode, run e2fsck\n");
883 goto failed_mount2; 885 goto failed_mount3;
884 } 886 }
885 if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) 887 if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL))
886 ext2_warning(sb, __FUNCTION__, 888 ext2_warning(sb, __FUNCTION__,
887 "mounting ext3 filesystem as ext2"); 889 "mounting ext3 filesystem as ext2");
888 ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY); 890 ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY);
889 percpu_counter_mod(&sbi->s_freeblocks_counter,
890 ext2_count_free_blocks(sb));
891 percpu_counter_mod(&sbi->s_freeinodes_counter,
892 ext2_count_free_inodes(sb));
893 percpu_counter_mod(&sbi->s_dirs_counter,
894 ext2_count_dirs(sb));
895 return 0; 891 return 0;
896 892
897cantfind_ext2: 893cantfind_ext2:
@@ -899,7 +895,10 @@ cantfind_ext2:
899 printk("VFS: Can't find an ext2 filesystem on dev %s.\n", 895 printk("VFS: Can't find an ext2 filesystem on dev %s.\n",
900 sb->s_id); 896 sb->s_id);
901 goto failed_mount; 897 goto failed_mount;
902 898failed_mount3:
899 percpu_counter_destroy(&sbi->s_freeblocks_counter);
900 percpu_counter_destroy(&sbi->s_freeinodes_counter);
901 percpu_counter_destroy(&sbi->s_dirs_counter);
903failed_mount2: 902failed_mount2:
904 for (i = 0; i < db_count; i++) 903 for (i = 0; i < db_count; i++)
905 brelse(sbi->s_group_desc[i]); 904 brelse(sbi->s_group_desc[i]);
@@ -1038,12 +1037,14 @@ restore_opts:
1038 return err; 1037 return err;
1039} 1038}
1040 1039
1041static int ext2_statfs (struct super_block * sb, struct kstatfs * buf) 1040static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
1042{ 1041{
1042 struct super_block *sb = dentry->d_sb;
1043 struct ext2_sb_info *sbi = EXT2_SB(sb); 1043 struct ext2_sb_info *sbi = EXT2_SB(sb);
1044 unsigned long overhead; 1044 unsigned long overhead;
1045 int i; 1045 int i;
1046 1046
1047 lock_super(sb);
1047 if (test_opt (sb, MINIX_DF)) 1048 if (test_opt (sb, MINIX_DF))
1048 overhead = 0; 1049 overhead = 0;
1049 else { 1050 else {
@@ -1084,13 +1085,14 @@ static int ext2_statfs (struct super_block * sb, struct kstatfs * buf)
1084 buf->f_files = le32_to_cpu(sbi->s_es->s_inodes_count); 1085 buf->f_files = le32_to_cpu(sbi->s_es->s_inodes_count);
1085 buf->f_ffree = ext2_count_free_inodes (sb); 1086 buf->f_ffree = ext2_count_free_inodes (sb);
1086 buf->f_namelen = EXT2_NAME_LEN; 1087 buf->f_namelen = EXT2_NAME_LEN;
1088 unlock_super(sb);
1087 return 0; 1089 return 0;
1088} 1090}
1089 1091
1090static struct super_block *ext2_get_sb(struct file_system_type *fs_type, 1092static int ext2_get_sb(struct file_system_type *fs_type,
1091 int flags, const char *dev_name, void *data) 1093 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
1092{ 1094{
1093 return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super); 1095 return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super, mnt);
1094} 1096}
1095 1097
1096#ifdef CONFIG_QUOTA 1098#ifdef CONFIG_QUOTA
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h
index 67cfeb66e897..bf8175b2ced9 100644
--- a/fs/ext2/xattr.h
+++ b/fs/ext2/xattr.h
@@ -6,7 +6,6 @@
6 (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> 6 (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
7*/ 7*/
8 8
9#include <linux/config.h>
10#include <linux/init.h> 9#include <linux/init.h>
11#include <linux/xattr.h> 10#include <linux/xattr.h>
12 11
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 77927d6938f6..a504a40d6d29 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -11,7 +11,6 @@
11 * David S. Miller (davem@caip.rutgers.edu), 1995 11 * David S. Miller (davem@caip.rutgers.edu), 1995
12 */ 12 */
13 13
14#include <linux/config.h>
15#include <linux/time.h> 14#include <linux/time.h>
16#include <linux/capability.h> 15#include <linux/capability.h>
17#include <linux/fs.h> 16#include <linux/fs.h>
@@ -163,20 +162,19 @@ restart:
163#endif 162#endif
164 163
165static int 164static int
166goal_in_my_reservation(struct ext3_reserve_window *rsv, int goal, 165goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal,
167 unsigned int group, struct super_block * sb) 166 unsigned int group, struct super_block * sb)
168{ 167{
169 unsigned long group_first_block, group_last_block; 168 ext3_fsblk_t group_first_block, group_last_block;
170 169
171 group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + 170 group_first_block = ext3_group_first_block_no(sb, group);
172 group * EXT3_BLOCKS_PER_GROUP(sb);
173 group_last_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1; 171 group_last_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1;
174 172
175 if ((rsv->_rsv_start > group_last_block) || 173 if ((rsv->_rsv_start > group_last_block) ||
176 (rsv->_rsv_end < group_first_block)) 174 (rsv->_rsv_end < group_first_block))
177 return 0; 175 return 0;
178 if ((goal >= 0) && ((goal + group_first_block < rsv->_rsv_start) 176 if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start)
179 || (goal + group_first_block > rsv->_rsv_end))) 177 || (grp_goal + group_first_block > rsv->_rsv_end)))
180 return 0; 178 return 0;
181 return 1; 179 return 1;
182} 180}
@@ -187,7 +185,7 @@ goal_in_my_reservation(struct ext3_reserve_window *rsv, int goal,
187 * Returns NULL if there are no windows or if all windows start after the goal. 185 * Returns NULL if there are no windows or if all windows start after the goal.
188 */ 186 */
189static struct ext3_reserve_window_node * 187static struct ext3_reserve_window_node *
190search_reserve_window(struct rb_root *root, unsigned long goal) 188search_reserve_window(struct rb_root *root, ext3_fsblk_t goal)
191{ 189{
192 struct rb_node *n = root->rb_node; 190 struct rb_node *n = root->rb_node;
193 struct ext3_reserve_window_node *rsv; 191 struct ext3_reserve_window_node *rsv;
@@ -223,7 +221,7 @@ void ext3_rsv_window_add(struct super_block *sb,
223{ 221{
224 struct rb_root *root = &EXT3_SB(sb)->s_rsv_window_root; 222 struct rb_root *root = &EXT3_SB(sb)->s_rsv_window_root;
225 struct rb_node *node = &rsv->rsv_node; 223 struct rb_node *node = &rsv->rsv_node;
226 unsigned int start = rsv->rsv_start; 224 ext3_fsblk_t start = rsv->rsv_start;
227 225
228 struct rb_node ** p = &root->rb_node; 226 struct rb_node ** p = &root->rb_node;
229 struct rb_node * parent = NULL; 227 struct rb_node * parent = NULL;
@@ -310,20 +308,20 @@ void ext3_discard_reservation(struct inode *inode)
310 308
311/* Free given blocks, update quota and i_blocks field */ 309/* Free given blocks, update quota and i_blocks field */
312void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb, 310void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb,
313 unsigned long block, unsigned long count, 311 ext3_fsblk_t block, unsigned long count,
314 int *pdquot_freed_blocks) 312 unsigned long *pdquot_freed_blocks)
315{ 313{
316 struct buffer_head *bitmap_bh = NULL; 314 struct buffer_head *bitmap_bh = NULL;
317 struct buffer_head *gd_bh; 315 struct buffer_head *gd_bh;
318 unsigned long block_group; 316 unsigned long block_group;
319 unsigned long bit; 317 ext3_grpblk_t bit;
320 unsigned long i; 318 unsigned long i;
321 unsigned long overflow; 319 unsigned long overflow;
322 struct ext3_group_desc * desc; 320 struct ext3_group_desc * desc;
323 struct ext3_super_block * es; 321 struct ext3_super_block * es;
324 struct ext3_sb_info *sbi; 322 struct ext3_sb_info *sbi;
325 int err = 0, ret; 323 int err = 0, ret;
326 unsigned group_freed; 324 ext3_grpblk_t group_freed;
327 325
328 *pdquot_freed_blocks = 0; 326 *pdquot_freed_blocks = 0;
329 sbi = EXT3_SB(sb); 327 sbi = EXT3_SB(sb);
@@ -333,7 +331,7 @@ void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb,
333 block + count > le32_to_cpu(es->s_blocks_count)) { 331 block + count > le32_to_cpu(es->s_blocks_count)) {
334 ext3_error (sb, "ext3_free_blocks", 332 ext3_error (sb, "ext3_free_blocks",
335 "Freeing blocks not in datazone - " 333 "Freeing blocks not in datazone - "
336 "block = %lu, count = %lu", block, count); 334 "block = "E3FSBLK", count = %lu", block, count);
337 goto error_return; 335 goto error_return;
338 } 336 }
339 337
@@ -369,7 +367,7 @@ do_more:
369 sbi->s_itb_per_group)) 367 sbi->s_itb_per_group))
370 ext3_error (sb, "ext3_free_blocks", 368 ext3_error (sb, "ext3_free_blocks",
371 "Freeing blocks in system zones - " 369 "Freeing blocks in system zones - "
372 "Block = %lu, count = %lu", 370 "Block = "E3FSBLK", count = %lu",
373 block, count); 371 block, count);
374 372
375 /* 373 /*
@@ -453,7 +451,8 @@ do_more:
453 bit + i, bitmap_bh->b_data)) { 451 bit + i, bitmap_bh->b_data)) {
454 jbd_unlock_bh_state(bitmap_bh); 452 jbd_unlock_bh_state(bitmap_bh);
455 ext3_error(sb, __FUNCTION__, 453 ext3_error(sb, __FUNCTION__,
456 "bit already cleared for block %lu", block + i); 454 "bit already cleared for block "E3FSBLK,
455 block + i);
457 jbd_lock_bh_state(bitmap_bh); 456 jbd_lock_bh_state(bitmap_bh);
458 BUFFER_TRACE(bitmap_bh, "bit already cleared"); 457 BUFFER_TRACE(bitmap_bh, "bit already cleared");
459 } else { 458 } else {
@@ -493,10 +492,10 @@ error_return:
493 492
494/* Free given blocks, update quota and i_blocks field */ 493/* Free given blocks, update quota and i_blocks field */
495void ext3_free_blocks(handle_t *handle, struct inode *inode, 494void ext3_free_blocks(handle_t *handle, struct inode *inode,
496 unsigned long block, unsigned long count) 495 ext3_fsblk_t block, unsigned long count)
497{ 496{
498 struct super_block * sb; 497 struct super_block * sb;
499 int dquot_freed_blocks; 498 unsigned long dquot_freed_blocks;
500 499
501 sb = inode->i_sb; 500 sb = inode->i_sb;
502 if (!sb) { 501 if (!sb) {
@@ -525,7 +524,7 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode,
525 * data-writes at some point, and disable it for metadata allocations or 524 * data-writes at some point, and disable it for metadata allocations or
526 * sync-data inodes. 525 * sync-data inodes.
527 */ 526 */
528static int ext3_test_allocatable(int nr, struct buffer_head *bh) 527static int ext3_test_allocatable(ext3_grpblk_t nr, struct buffer_head *bh)
529{ 528{
530 int ret; 529 int ret;
531 struct journal_head *jh = bh2jh(bh); 530 struct journal_head *jh = bh2jh(bh);
@@ -542,11 +541,11 @@ static int ext3_test_allocatable(int nr, struct buffer_head *bh)
542 return ret; 541 return ret;
543} 542}
544 543
545static int 544static ext3_grpblk_t
546bitmap_search_next_usable_block(int start, struct buffer_head *bh, 545bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
547 int maxblocks) 546 ext3_grpblk_t maxblocks)
548{ 547{
549 int next; 548 ext3_grpblk_t next;
550 struct journal_head *jh = bh2jh(bh); 549 struct journal_head *jh = bh2jh(bh);
551 550
552 /* 551 /*
@@ -576,10 +575,11 @@ bitmap_search_next_usable_block(int start, struct buffer_head *bh,
576 * the initial goal; then for a free byte somewhere in the bitmap; then 575 * the initial goal; then for a free byte somewhere in the bitmap; then
577 * for any free bit in the bitmap. 576 * for any free bit in the bitmap.
578 */ 577 */
579static int 578static ext3_grpblk_t
580find_next_usable_block(int start, struct buffer_head *bh, int maxblocks) 579find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
580 ext3_grpblk_t maxblocks)
581{ 581{
582 int here, next; 582 ext3_grpblk_t here, next;
583 char *p, *r; 583 char *p, *r;
584 584
585 if (start > 0) { 585 if (start > 0) {
@@ -591,7 +591,7 @@ find_next_usable_block(int start, struct buffer_head *bh, int maxblocks)
591 * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the 591 * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the
592 * next 64-bit boundary is simple.. 592 * next 64-bit boundary is simple..
593 */ 593 */
594 int end_goal = (start + 63) & ~63; 594 ext3_grpblk_t end_goal = (start + 63) & ~63;
595 if (end_goal > maxblocks) 595 if (end_goal > maxblocks)
596 end_goal = maxblocks; 596 end_goal = maxblocks;
597 here = ext3_find_next_zero_bit(bh->b_data, end_goal, start); 597 here = ext3_find_next_zero_bit(bh->b_data, end_goal, start);
@@ -628,7 +628,7 @@ find_next_usable_block(int start, struct buffer_head *bh, int maxblocks)
628 * zero (failure). 628 * zero (failure).
629 */ 629 */
630static inline int 630static inline int
631claim_block(spinlock_t *lock, int block, struct buffer_head *bh) 631claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh)
632{ 632{
633 struct journal_head *jh = bh2jh(bh); 633 struct journal_head *jh = bh2jh(bh);
634 int ret; 634 int ret;
@@ -651,19 +651,18 @@ claim_block(spinlock_t *lock, int block, struct buffer_head *bh)
651 * new bitmap. In that case we must release write access to the old one via 651 * new bitmap. In that case we must release write access to the old one via
652 * ext3_journal_release_buffer(), else we'll run out of credits. 652 * ext3_journal_release_buffer(), else we'll run out of credits.
653 */ 653 */
654static int 654static ext3_grpblk_t
655ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group, 655ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
656 struct buffer_head *bitmap_bh, int goal, 656 struct buffer_head *bitmap_bh, ext3_grpblk_t grp_goal,
657 unsigned long *count, struct ext3_reserve_window *my_rsv) 657 unsigned long *count, struct ext3_reserve_window *my_rsv)
658{ 658{
659 int group_first_block, start, end; 659 ext3_fsblk_t group_first_block;
660 ext3_grpblk_t start, end;
660 unsigned long num = 0; 661 unsigned long num = 0;
661 662
662 /* we do allocation within the reservation window if we have a window */ 663 /* we do allocation within the reservation window if we have a window */
663 if (my_rsv) { 664 if (my_rsv) {
664 group_first_block = 665 group_first_block = ext3_group_first_block_no(sb, group);
665 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) +
666 group * EXT3_BLOCKS_PER_GROUP(sb);
667 if (my_rsv->_rsv_start >= group_first_block) 666 if (my_rsv->_rsv_start >= group_first_block)
668 start = my_rsv->_rsv_start - group_first_block; 667 start = my_rsv->_rsv_start - group_first_block;
669 else 668 else
@@ -673,13 +672,13 @@ ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
673 if (end > EXT3_BLOCKS_PER_GROUP(sb)) 672 if (end > EXT3_BLOCKS_PER_GROUP(sb))
674 /* reservation window crosses group boundary */ 673 /* reservation window crosses group boundary */
675 end = EXT3_BLOCKS_PER_GROUP(sb); 674 end = EXT3_BLOCKS_PER_GROUP(sb);
676 if ((start <= goal) && (goal < end)) 675 if ((start <= grp_goal) && (grp_goal < end))
677 start = goal; 676 start = grp_goal;
678 else 677 else
679 goal = -1; 678 grp_goal = -1;
680 } else { 679 } else {
681 if (goal > 0) 680 if (grp_goal > 0)
682 start = goal; 681 start = grp_goal;
683 else 682 else
684 start = 0; 683 start = 0;
685 end = EXT3_BLOCKS_PER_GROUP(sb); 684 end = EXT3_BLOCKS_PER_GROUP(sb);
@@ -688,43 +687,43 @@ ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
688 BUG_ON(start > EXT3_BLOCKS_PER_GROUP(sb)); 687 BUG_ON(start > EXT3_BLOCKS_PER_GROUP(sb));
689 688
690repeat: 689repeat:
691 if (goal < 0 || !ext3_test_allocatable(goal, bitmap_bh)) { 690 if (grp_goal < 0 || !ext3_test_allocatable(grp_goal, bitmap_bh)) {
692 goal = find_next_usable_block(start, bitmap_bh, end); 691 grp_goal = find_next_usable_block(start, bitmap_bh, end);
693 if (goal < 0) 692 if (grp_goal < 0)
694 goto fail_access; 693 goto fail_access;
695 if (!my_rsv) { 694 if (!my_rsv) {
696 int i; 695 int i;
697 696
698 for (i = 0; i < 7 && goal > start && 697 for (i = 0; i < 7 && grp_goal > start &&
699 ext3_test_allocatable(goal - 1, 698 ext3_test_allocatable(grp_goal - 1,
700 bitmap_bh); 699 bitmap_bh);
701 i++, goal--) 700 i++, grp_goal--)
702 ; 701 ;
703 } 702 }
704 } 703 }
705 start = goal; 704 start = grp_goal;
706 705
707 if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group), goal, bitmap_bh)) { 706 if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group), grp_goal, bitmap_bh)) {
708 /* 707 /*
709 * The block was allocated by another thread, or it was 708 * The block was allocated by another thread, or it was
710 * allocated and then freed by another thread 709 * allocated and then freed by another thread
711 */ 710 */
712 start++; 711 start++;
713 goal++; 712 grp_goal++;
714 if (start >= end) 713 if (start >= end)
715 goto fail_access; 714 goto fail_access;
716 goto repeat; 715 goto repeat;
717 } 716 }
718 num++; 717 num++;
719 goal++; 718 grp_goal++;
720 while (num < *count && goal < end 719 while (num < *count && grp_goal < end
721 && ext3_test_allocatable(goal, bitmap_bh) 720 && ext3_test_allocatable(grp_goal, bitmap_bh)
722 && claim_block(sb_bgl_lock(EXT3_SB(sb), group), goal, bitmap_bh)) { 721 && claim_block(sb_bgl_lock(EXT3_SB(sb), group), grp_goal, bitmap_bh)) {
723 num++; 722 num++;
724 goal++; 723 grp_goal++;
725 } 724 }
726 *count = num; 725 *count = num;
727 return goal - num; 726 return grp_goal - num;
728fail_access: 727fail_access:
729 *count = num; 728 *count = num;
730 return -1; 729 return -1;
@@ -766,12 +765,13 @@ fail_access:
766static int find_next_reservable_window( 765static int find_next_reservable_window(
767 struct ext3_reserve_window_node *search_head, 766 struct ext3_reserve_window_node *search_head,
768 struct ext3_reserve_window_node *my_rsv, 767 struct ext3_reserve_window_node *my_rsv,
769 struct super_block * sb, int start_block, 768 struct super_block * sb,
770 int last_block) 769 ext3_fsblk_t start_block,
770 ext3_fsblk_t last_block)
771{ 771{
772 struct rb_node *next; 772 struct rb_node *next;
773 struct ext3_reserve_window_node *rsv, *prev; 773 struct ext3_reserve_window_node *rsv, *prev;
774 int cur; 774 ext3_fsblk_t cur;
775 int size = my_rsv->rsv_goal_size; 775 int size = my_rsv->rsv_goal_size;
776 776
777 /* TODO: make the start of the reservation window byte-aligned */ 777 /* TODO: make the start of the reservation window byte-aligned */
@@ -873,10 +873,10 @@ static int find_next_reservable_window(
873 * 873 *
874 * @rsv: the reservation 874 * @rsv: the reservation
875 * 875 *
876 * @goal: The goal (group-relative). It is where the search for a 876 * @grp_goal: The goal (group-relative). It is where the search for a
877 * free reservable space should start from. 877 * free reservable space should start from.
878 * if we have a goal(goal >0 ), then start from there, 878 * if we have a grp_goal(grp_goal >0 ), then start from there,
879 * no goal(goal = -1), we start from the first block 879 * no grp_goal(grp_goal = -1), we start from the first block
880 * of the group. 880 * of the group.
881 * 881 *
882 * @sb: the super block 882 * @sb: the super block
@@ -885,25 +885,24 @@ static int find_next_reservable_window(
885 * 885 *
886 */ 886 */
887static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, 887static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
888 int goal, struct super_block *sb, 888 ext3_grpblk_t grp_goal, struct super_block *sb,
889 unsigned int group, struct buffer_head *bitmap_bh) 889 unsigned int group, struct buffer_head *bitmap_bh)
890{ 890{
891 struct ext3_reserve_window_node *search_head; 891 struct ext3_reserve_window_node *search_head;
892 int group_first_block, group_end_block, start_block; 892 ext3_fsblk_t group_first_block, group_end_block, start_block;
893 int first_free_block; 893 ext3_grpblk_t first_free_block;
894 struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root; 894 struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root;
895 unsigned long size; 895 unsigned long size;
896 int ret; 896 int ret;
897 spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock; 897 spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
898 898
899 group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + 899 group_first_block = ext3_group_first_block_no(sb, group);
900 group * EXT3_BLOCKS_PER_GROUP(sb);
901 group_end_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1; 900 group_end_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1;
902 901
903 if (goal < 0) 902 if (grp_goal < 0)
904 start_block = group_first_block; 903 start_block = group_first_block;
905 else 904 else
906 start_block = goal + group_first_block; 905 start_block = grp_goal + group_first_block;
907 906
908 size = my_rsv->rsv_goal_size; 907 size = my_rsv->rsv_goal_size;
909 908
@@ -1057,14 +1056,15 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
1057 * sorted double linked list should be fast. 1056 * sorted double linked list should be fast.
1058 * 1057 *
1059 */ 1058 */
1060static int 1059static ext3_grpblk_t
1061ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, 1060ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1062 unsigned int group, struct buffer_head *bitmap_bh, 1061 unsigned int group, struct buffer_head *bitmap_bh,
1063 int goal, struct ext3_reserve_window_node * my_rsv, 1062 ext3_grpblk_t grp_goal,
1063 struct ext3_reserve_window_node * my_rsv,
1064 unsigned long *count, int *errp) 1064 unsigned long *count, int *errp)
1065{ 1065{
1066 unsigned long group_first_block; 1066 ext3_fsblk_t group_first_block;
1067 int ret = 0; 1067 ext3_grpblk_t ret = 0;
1068 int fatal; 1068 int fatal;
1069 unsigned long num = *count; 1069 unsigned long num = *count;
1070 1070
@@ -1090,17 +1090,16 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1090 */ 1090 */
1091 if (my_rsv == NULL ) { 1091 if (my_rsv == NULL ) {
1092 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, 1092 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
1093 goal, count, NULL); 1093 grp_goal, count, NULL);
1094 goto out; 1094 goto out;
1095 } 1095 }
1096 /* 1096 /*
1097 * goal is a group relative block number (if there is a goal) 1097 * grp_goal is a group relative block number (if there is a goal)
1098 * 0 < goal < EXT3_BLOCKS_PER_GROUP(sb) 1098 * 0 < grp_goal < EXT3_BLOCKS_PER_GROUP(sb)
1099 * first block is a filesystem wide block number 1099 * first block is a filesystem wide block number
1100 * first block is the block number of the first block in this group 1100 * first block is the block number of the first block in this group
1101 */ 1101 */
1102 group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + 1102 group_first_block = ext3_group_first_block_no(sb, group);
1103 group * EXT3_BLOCKS_PER_GROUP(sb);
1104 1103
1105 /* 1104 /*
1106 * Basically we will allocate a new block from inode's reservation 1105 * Basically we will allocate a new block from inode's reservation
@@ -1119,24 +1118,24 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1119 */ 1118 */
1120 while (1) { 1119 while (1) {
1121 if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || 1120 if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
1122 !goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb)) { 1121 !goal_in_my_reservation(&my_rsv->rsv_window, grp_goal, group, sb)) {
1123 if (my_rsv->rsv_goal_size < *count) 1122 if (my_rsv->rsv_goal_size < *count)
1124 my_rsv->rsv_goal_size = *count; 1123 my_rsv->rsv_goal_size = *count;
1125 ret = alloc_new_reservation(my_rsv, goal, sb, 1124 ret = alloc_new_reservation(my_rsv, grp_goal, sb,
1126 group, bitmap_bh); 1125 group, bitmap_bh);
1127 if (ret < 0) 1126 if (ret < 0)
1128 break; /* failed */ 1127 break; /* failed */
1129 1128
1130 if (!goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb)) 1129 if (!goal_in_my_reservation(&my_rsv->rsv_window, grp_goal, group, sb))
1131 goal = -1; 1130 grp_goal = -1;
1132 } else if (goal > 0 && (my_rsv->rsv_end-goal+1) < *count) 1131 } else if (grp_goal > 0 && (my_rsv->rsv_end-grp_goal+1) < *count)
1133 try_to_extend_reservation(my_rsv, sb, 1132 try_to_extend_reservation(my_rsv, sb,
1134 *count-my_rsv->rsv_end + goal - 1); 1133 *count-my_rsv->rsv_end + grp_goal - 1);
1135 1134
1136 if ((my_rsv->rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb)) 1135 if ((my_rsv->rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb))
1137 || (my_rsv->rsv_end < group_first_block)) 1136 || (my_rsv->rsv_end < group_first_block))
1138 BUG(); 1137 BUG();
1139 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, 1138 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, grp_goal,
1140 &num, &my_rsv->rsv_window); 1139 &num, &my_rsv->rsv_window);
1141 if (ret >= 0) { 1140 if (ret >= 0) {
1142 my_rsv->rsv_alloc_hit += num; 1141 my_rsv->rsv_alloc_hit += num;
@@ -1164,7 +1163,7 @@ out:
1164 1163
1165static int ext3_has_free_blocks(struct ext3_sb_info *sbi) 1164static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
1166{ 1165{
1167 int free_blocks, root_blocks; 1166 ext3_fsblk_t free_blocks, root_blocks;
1168 1167
1169 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 1168 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
1170 root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); 1169 root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
@@ -1200,19 +1199,20 @@ int ext3_should_retry_alloc(struct super_block *sb, int *retries)
1200 * bitmap, and then for any free bit if that fails. 1199 * bitmap, and then for any free bit if that fails.
1201 * This function also updates quota and i_blocks field. 1200 * This function also updates quota and i_blocks field.
1202 */ 1201 */
1203int ext3_new_blocks(handle_t *handle, struct inode *inode, 1202ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
1204 unsigned long goal, unsigned long *count, int *errp) 1203 ext3_fsblk_t goal, unsigned long *count, int *errp)
1205{ 1204{
1206 struct buffer_head *bitmap_bh = NULL; 1205 struct buffer_head *bitmap_bh = NULL;
1207 struct buffer_head *gdp_bh; 1206 struct buffer_head *gdp_bh;
1208 int group_no; 1207 int group_no;
1209 int goal_group; 1208 int goal_group;
1210 int ret_block; 1209 ext3_grpblk_t grp_target_blk; /* blockgroup relative goal block */
1210 ext3_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/
1211 ext3_fsblk_t ret_block; /* filesyetem-wide allocated block */
1211 int bgi; /* blockgroup iteration index */ 1212 int bgi; /* blockgroup iteration index */
1212 int target_block;
1213 int fatal = 0, err; 1213 int fatal = 0, err;
1214 int performed_allocation = 0; 1214 int performed_allocation = 0;
1215 int free_blocks; 1215 ext3_grpblk_t free_blocks; /* number of free blocks in a group */
1216 struct super_block *sb; 1216 struct super_block *sb;
1217 struct ext3_group_desc *gdp; 1217 struct ext3_group_desc *gdp;
1218 struct ext3_super_block *es; 1218 struct ext3_super_block *es;
@@ -1285,16 +1285,17 @@ retry:
1285 my_rsv = NULL; 1285 my_rsv = NULL;
1286 1286
1287 if (free_blocks > 0) { 1287 if (free_blocks > 0) {
1288 ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) % 1288 grp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) %
1289 EXT3_BLOCKS_PER_GROUP(sb)); 1289 EXT3_BLOCKS_PER_GROUP(sb));
1290 bitmap_bh = read_block_bitmap(sb, group_no); 1290 bitmap_bh = read_block_bitmap(sb, group_no);
1291 if (!bitmap_bh) 1291 if (!bitmap_bh)
1292 goto io_error; 1292 goto io_error;
1293 ret_block = ext3_try_to_allocate_with_rsv(sb, handle, group_no, 1293 grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle,
1294 bitmap_bh, ret_block, my_rsv, &num, &fatal); 1294 group_no, bitmap_bh, grp_target_blk,
1295 my_rsv, &num, &fatal);
1295 if (fatal) 1296 if (fatal)
1296 goto out; 1297 goto out;
1297 if (ret_block >= 0) 1298 if (grp_alloc_blk >= 0)
1298 goto allocated; 1299 goto allocated;
1299 } 1300 }
1300 1301
@@ -1327,11 +1328,15 @@ retry:
1327 bitmap_bh = read_block_bitmap(sb, group_no); 1328 bitmap_bh = read_block_bitmap(sb, group_no);
1328 if (!bitmap_bh) 1329 if (!bitmap_bh)
1329 goto io_error; 1330 goto io_error;
1330 ret_block = ext3_try_to_allocate_with_rsv(sb, handle, group_no, 1331 /*
1331 bitmap_bh, -1, my_rsv, &num, &fatal); 1332 * try to allocate block(s) from this group, without a goal(-1).
1333 */
1334 grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle,
1335 group_no, bitmap_bh, -1, my_rsv,
1336 &num, &fatal);
1332 if (fatal) 1337 if (fatal)
1333 goto out; 1338 goto out;
1334 if (ret_block >= 0) 1339 if (grp_alloc_blk >= 0)
1335 goto allocated; 1340 goto allocated;
1336 } 1341 }
1337 /* 1342 /*
@@ -1360,18 +1365,18 @@ allocated:
1360 if (fatal) 1365 if (fatal)
1361 goto out; 1366 goto out;
1362 1367
1363 target_block = ret_block + group_no * EXT3_BLOCKS_PER_GROUP(sb) 1368 ret_block = grp_alloc_blk + ext3_group_first_block_no(sb, group_no);
1364 + le32_to_cpu(es->s_first_data_block);
1365 1369
1366 if (in_range(le32_to_cpu(gdp->bg_block_bitmap), target_block, num) || 1370 if (in_range(le32_to_cpu(gdp->bg_block_bitmap), ret_block, num) ||
1367 in_range(le32_to_cpu(gdp->bg_inode_bitmap), target_block, num) || 1371 in_range(le32_to_cpu(gdp->bg_inode_bitmap), ret_block, num) ||
1368 in_range(target_block, le32_to_cpu(gdp->bg_inode_table), 1372 in_range(ret_block, le32_to_cpu(gdp->bg_inode_table),
1369 EXT3_SB(sb)->s_itb_per_group) || 1373 EXT3_SB(sb)->s_itb_per_group) ||
1370 in_range(target_block + num - 1, le32_to_cpu(gdp->bg_inode_table), 1374 in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table),
1371 EXT3_SB(sb)->s_itb_per_group)) 1375 EXT3_SB(sb)->s_itb_per_group))
1372 ext3_error(sb, "ext3_new_block", 1376 ext3_error(sb, "ext3_new_block",
1373 "Allocating block in system zone - " 1377 "Allocating block in system zone - "
1374 "blocks from %u, length %lu", target_block, num); 1378 "blocks from "E3FSBLK", length %lu",
1379 ret_block, num);
1375 1380
1376 performed_allocation = 1; 1381 performed_allocation = 1;
1377 1382
@@ -1380,7 +1385,7 @@ allocated:
1380 struct buffer_head *debug_bh; 1385 struct buffer_head *debug_bh;
1381 1386
1382 /* Record bitmap buffer state in the newly allocated block */ 1387 /* Record bitmap buffer state in the newly allocated block */
1383 debug_bh = sb_find_get_block(sb, target_block); 1388 debug_bh = sb_find_get_block(sb, ret_block);
1384 if (debug_bh) { 1389 if (debug_bh) {
1385 BUFFER_TRACE(debug_bh, "state when allocated"); 1390 BUFFER_TRACE(debug_bh, "state when allocated");
1386 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state"); 1391 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state");
@@ -1393,24 +1398,21 @@ allocated:
1393 int i; 1398 int i;
1394 1399
1395 for (i = 0; i < num; i++) { 1400 for (i = 0; i < num; i++) {
1396 if (ext3_test_bit(ret_block, 1401 if (ext3_test_bit(grp_alloc_blk+i,
1397 bh2jh(bitmap_bh)->b_committed_data)) { 1402 bh2jh(bitmap_bh)->b_committed_data)) {
1398 printk("%s: block was unexpectedly set in " 1403 printk("%s: block was unexpectedly set in "
1399 "b_committed_data\n", __FUNCTION__); 1404 "b_committed_data\n", __FUNCTION__);
1400 } 1405 }
1401 } 1406 }
1402 } 1407 }
1403 ext3_debug("found bit %d\n", ret_block); 1408 ext3_debug("found bit %d\n", grp_alloc_blk);
1404 spin_unlock(sb_bgl_lock(sbi, group_no)); 1409 spin_unlock(sb_bgl_lock(sbi, group_no));
1405 jbd_unlock_bh_state(bitmap_bh); 1410 jbd_unlock_bh_state(bitmap_bh);
1406#endif 1411#endif
1407 1412
1408 /* ret_block was blockgroup-relative. Now it becomes fs-relative */
1409 ret_block = target_block;
1410
1411 if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) { 1413 if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) {
1412 ext3_error(sb, "ext3_new_block", 1414 ext3_error(sb, "ext3_new_block",
1413 "block(%d) >= blocks count(%d) - " 1415 "block("E3FSBLK") >= blocks count(%d) - "
1414 "block_group = %d, es == %p ", ret_block, 1416 "block_group = %d, es == %p ", ret_block,
1415 le32_to_cpu(es->s_blocks_count), group_no, es); 1417 le32_to_cpu(es->s_blocks_count), group_no, es);
1416 goto out; 1418 goto out;
@@ -1421,7 +1423,7 @@ allocated:
1421 * list of some description. We don't know in advance whether 1423 * list of some description. We don't know in advance whether
1422 * the caller wants to use it as metadata or data. 1424 * the caller wants to use it as metadata or data.
1423 */ 1425 */
1424 ext3_debug("allocating block %d. Goal hits %d of %d.\n", 1426 ext3_debug("allocating block %lu. Goal hits %d of %d.\n",
1425 ret_block, goal_hits, goal_attempts); 1427 ret_block, goal_hits, goal_attempts);
1426 1428
1427 spin_lock(sb_bgl_lock(sbi, group_no)); 1429 spin_lock(sb_bgl_lock(sbi, group_no));
@@ -1461,23 +1463,24 @@ out:
1461 return 0; 1463 return 0;
1462} 1464}
1463 1465
1464int ext3_new_block(handle_t *handle, struct inode *inode, 1466ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode,
1465 unsigned long goal, int *errp) 1467 ext3_fsblk_t goal, int *errp)
1466{ 1468{
1467 unsigned long count = 1; 1469 unsigned long count = 1;
1468 1470
1469 return ext3_new_blocks(handle, inode, goal, &count, errp); 1471 return ext3_new_blocks(handle, inode, goal, &count, errp);
1470} 1472}
1471 1473
1472unsigned long ext3_count_free_blocks(struct super_block *sb) 1474ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb)
1473{ 1475{
1474 unsigned long desc_count; 1476 ext3_fsblk_t desc_count;
1475 struct ext3_group_desc *gdp; 1477 struct ext3_group_desc *gdp;
1476 int i; 1478 int i;
1477 unsigned long ngroups = EXT3_SB(sb)->s_groups_count; 1479 unsigned long ngroups = EXT3_SB(sb)->s_groups_count;
1478#ifdef EXT3FS_DEBUG 1480#ifdef EXT3FS_DEBUG
1479 struct ext3_super_block *es; 1481 struct ext3_super_block *es;
1480 unsigned long bitmap_count, x; 1482 ext3_fsblk_t bitmap_count;
1483 unsigned long x;
1481 struct buffer_head *bitmap_bh = NULL; 1484 struct buffer_head *bitmap_bh = NULL;
1482 1485
1483 es = EXT3_SB(sb)->s_es; 1486 es = EXT3_SB(sb)->s_es;
@@ -1502,8 +1505,10 @@ unsigned long ext3_count_free_blocks(struct super_block *sb)
1502 bitmap_count += x; 1505 bitmap_count += x;
1503 } 1506 }
1504 brelse(bitmap_bh); 1507 brelse(bitmap_bh);
1505 printk("ext3_count_free_blocks: stored = %u, computed = %lu, %lu\n", 1508 printk("ext3_count_free_blocks: stored = "E3FSBLK
1506 le32_to_cpu(es->s_free_blocks_count), desc_count, bitmap_count); 1509 ", computed = "E3FSBLK", "E3FSBLK"\n",
1510 le32_to_cpu(es->s_free_blocks_count),
1511 desc_count, bitmap_count);
1507 return bitmap_count; 1512 return bitmap_count;
1508#else 1513#else
1509 desc_count = 0; 1514 desc_count = 0;
@@ -1520,7 +1525,7 @@ unsigned long ext3_count_free_blocks(struct super_block *sb)
1520} 1525}
1521 1526
1522static inline int 1527static inline int
1523block_in_use(unsigned long block, struct super_block *sb, unsigned char *map) 1528block_in_use(ext3_fsblk_t block, struct super_block *sb, unsigned char *map)
1524{ 1529{
1525 return ext3_test_bit ((block - 1530 return ext3_test_bit ((block -
1526 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) % 1531 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) %
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index f37528ed222e..fbb0d4ed07d4 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -284,7 +284,7 @@ static void free_rb_tree_fname(struct rb_root *root)
284 * beginning of the loop and try to free the parent 284 * beginning of the loop and try to free the parent
285 * node. 285 * node.
286 */ 286 */
287 parent = n->rb_parent; 287 parent = rb_parent(n);
288 fname = rb_entry(n, struct fname, rb_hash); 288 fname = rb_entry(n, struct fname, rb_hash);
289 while (fname) { 289 while (fname) {
290 struct fname * old = fname; 290 struct fname * old = fname;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index dc826464f313..36546ed36a14 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -262,9 +262,11 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
262 int ngroups = sbi->s_groups_count; 262 int ngroups = sbi->s_groups_count;
263 int inodes_per_group = EXT3_INODES_PER_GROUP(sb); 263 int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
264 int freei, avefreei; 264 int freei, avefreei;
265 int freeb, avefreeb; 265 ext3_fsblk_t freeb, avefreeb;
266 int blocks_per_dir, ndirs; 266 ext3_fsblk_t blocks_per_dir;
267 int max_debt, max_dirs, min_blocks, min_inodes; 267 int ndirs;
268 int max_debt, max_dirs, min_inodes;
269 ext3_grpblk_t min_blocks;
268 int group = -1, i; 270 int group = -1, i;
269 struct ext3_group_desc *desc; 271 struct ext3_group_desc *desc;
270 struct buffer_head *bh; 272 struct buffer_head *bh;
@@ -307,7 +309,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
307 min_inodes = avefreei - inodes_per_group / 4; 309 min_inodes = avefreei - inodes_per_group / 4;
308 min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4; 310 min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4;
309 311
310 max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, BLOCK_COST); 312 max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext3_fsblk_t)BLOCK_COST);
311 if (max_debt * INODE_COST > inodes_per_group) 313 if (max_debt * INODE_COST > inodes_per_group)
312 max_debt = inodes_per_group / INODE_COST; 314 max_debt = inodes_per_group / INODE_COST;
313 if (max_debt > 255) 315 if (max_debt > 255)
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2edd7eec88fd..f804d5e9d60c 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -62,7 +62,7 @@ static int ext3_inode_is_fast_symlink(struct inode *inode)
62 * still needs to be revoked. 62 * still needs to be revoked.
63 */ 63 */
64int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode, 64int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
65 struct buffer_head *bh, int blocknr) 65 struct buffer_head *bh, ext3_fsblk_t blocknr)
66{ 66{
67 int err; 67 int err;
68 68
@@ -407,13 +407,13 @@ no_block:
407 * 407 *
408 * Caller must make sure that @ind is valid and will stay that way. 408 * Caller must make sure that @ind is valid and will stay that way.
409 */ 409 */
410static unsigned long ext3_find_near(struct inode *inode, Indirect *ind) 410static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind)
411{ 411{
412 struct ext3_inode_info *ei = EXT3_I(inode); 412 struct ext3_inode_info *ei = EXT3_I(inode);
413 __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; 413 __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data;
414 __le32 *p; 414 __le32 *p;
415 unsigned long bg_start; 415 ext3_fsblk_t bg_start;
416 unsigned long colour; 416 ext3_grpblk_t colour;
417 417
418 /* Try to find previous block */ 418 /* Try to find previous block */
419 for (p = ind->p - 1; p >= start; p--) { 419 for (p = ind->p - 1; p >= start; p--) {
@@ -429,8 +429,7 @@ static unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
429 * It is going to be referred to from the inode itself? OK, just put it 429 * It is going to be referred to from the inode itself? OK, just put it
430 * into the same cylinder group then. 430 * into the same cylinder group then.
431 */ 431 */
432 bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + 432 bg_start = ext3_group_first_block_no(inode->i_sb, ei->i_block_group);
433 le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
434 colour = (current->pid % 16) * 433 colour = (current->pid % 16) *
435 (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); 434 (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
436 return bg_start + colour; 435 return bg_start + colour;
@@ -448,7 +447,7 @@ static unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
448 * stores it in *@goal and returns zero. 447 * stores it in *@goal and returns zero.
449 */ 448 */
450 449
451static unsigned long ext3_find_goal(struct inode *inode, long block, 450static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block,
452 Indirect chain[4], Indirect *partial) 451 Indirect chain[4], Indirect *partial)
453{ 452{
454 struct ext3_block_alloc_info *block_i; 453 struct ext3_block_alloc_info *block_i;
@@ -516,13 +515,13 @@ static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
516 * direct blocks 515 * direct blocks
517 */ 516 */
518static int ext3_alloc_blocks(handle_t *handle, struct inode *inode, 517static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
519 unsigned long goal, int indirect_blks, int blks, 518 ext3_fsblk_t goal, int indirect_blks, int blks,
520 unsigned long long new_blocks[4], int *err) 519 ext3_fsblk_t new_blocks[4], int *err)
521{ 520{
522 int target, i; 521 int target, i;
523 unsigned long count = 0; 522 unsigned long count = 0;
524 int index = 0; 523 int index = 0;
525 unsigned long current_block = 0; 524 ext3_fsblk_t current_block = 0;
526 int ret = 0; 525 int ret = 0;
527 526
528 /* 527 /*
@@ -592,7 +591,7 @@ failed_out:
592 * as described above and return 0. 591 * as described above and return 0.
593 */ 592 */
594static int ext3_alloc_branch(handle_t *handle, struct inode *inode, 593static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
595 int indirect_blks, int *blks, unsigned long goal, 594 int indirect_blks, int *blks, ext3_fsblk_t goal,
596 int *offsets, Indirect *branch) 595 int *offsets, Indirect *branch)
597{ 596{
598 int blocksize = inode->i_sb->s_blocksize; 597 int blocksize = inode->i_sb->s_blocksize;
@@ -600,8 +599,8 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
600 int err = 0; 599 int err = 0;
601 struct buffer_head *bh; 600 struct buffer_head *bh;
602 int num; 601 int num;
603 unsigned long long new_blocks[4]; 602 ext3_fsblk_t new_blocks[4];
604 unsigned long long current_block; 603 ext3_fsblk_t current_block;
605 604
606 num = ext3_alloc_blocks(handle, inode, goal, indirect_blks, 605 num = ext3_alloc_blocks(handle, inode, goal, indirect_blks,
607 *blks, new_blocks, &err); 606 *blks, new_blocks, &err);
@@ -688,7 +687,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
688 int i; 687 int i;
689 int err = 0; 688 int err = 0;
690 struct ext3_block_alloc_info *block_i; 689 struct ext3_block_alloc_info *block_i;
691 unsigned long current_block; 690 ext3_fsblk_t current_block;
692 691
693 block_i = EXT3_I(inode)->i_block_alloc_info; 692 block_i = EXT3_I(inode)->i_block_alloc_info;
694 /* 693 /*
@@ -795,13 +794,13 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
795 int offsets[4]; 794 int offsets[4];
796 Indirect chain[4]; 795 Indirect chain[4];
797 Indirect *partial; 796 Indirect *partial;
798 unsigned long goal; 797 ext3_fsblk_t goal;
799 int indirect_blks; 798 int indirect_blks;
800 int blocks_to_boundary = 0; 799 int blocks_to_boundary = 0;
801 int depth; 800 int depth;
802 struct ext3_inode_info *ei = EXT3_I(inode); 801 struct ext3_inode_info *ei = EXT3_I(inode);
803 int count = 0; 802 int count = 0;
804 unsigned long first_block = 0; 803 ext3_fsblk_t first_block = 0;
805 804
806 805
807 J_ASSERT(handle != NULL || create == 0); 806 J_ASSERT(handle != NULL || create == 0);
@@ -819,7 +818,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
819 count++; 818 count++;
820 /*map more blocks*/ 819 /*map more blocks*/
821 while (count < maxblocks && count <= blocks_to_boundary) { 820 while (count < maxblocks && count <= blocks_to_boundary) {
822 unsigned long blk; 821 ext3_fsblk_t blk;
823 822
824 if (!verify_chain(chain, partial)) { 823 if (!verify_chain(chain, partial)) {
825 /* 824 /*
@@ -1699,7 +1698,7 @@ static int ext3_journalled_set_page_dirty(struct page *page)
1699 return __set_page_dirty_nobuffers(page); 1698 return __set_page_dirty_nobuffers(page);
1700} 1699}
1701 1700
1702static struct address_space_operations ext3_ordered_aops = { 1701static const struct address_space_operations ext3_ordered_aops = {
1703 .readpage = ext3_readpage, 1702 .readpage = ext3_readpage,
1704 .readpages = ext3_readpages, 1703 .readpages = ext3_readpages,
1705 .writepage = ext3_ordered_writepage, 1704 .writepage = ext3_ordered_writepage,
@@ -1713,7 +1712,7 @@ static struct address_space_operations ext3_ordered_aops = {
1713 .migratepage = buffer_migrate_page, 1712 .migratepage = buffer_migrate_page,
1714}; 1713};
1715 1714
1716static struct address_space_operations ext3_writeback_aops = { 1715static const struct address_space_operations ext3_writeback_aops = {
1717 .readpage = ext3_readpage, 1716 .readpage = ext3_readpage,
1718 .readpages = ext3_readpages, 1717 .readpages = ext3_readpages,
1719 .writepage = ext3_writeback_writepage, 1718 .writepage = ext3_writeback_writepage,
@@ -1727,7 +1726,7 @@ static struct address_space_operations ext3_writeback_aops = {
1727 .migratepage = buffer_migrate_page, 1726 .migratepage = buffer_migrate_page,
1728}; 1727};
1729 1728
1730static struct address_space_operations ext3_journalled_aops = { 1729static const struct address_space_operations ext3_journalled_aops = {
1731 .readpage = ext3_readpage, 1730 .readpage = ext3_readpage,
1732 .readpages = ext3_readpages, 1731 .readpages = ext3_readpages,
1733 .writepage = ext3_journalled_writepage, 1732 .writepage = ext3_journalled_writepage,
@@ -1759,7 +1758,7 @@ void ext3_set_aops(struct inode *inode)
1759static int ext3_block_truncate_page(handle_t *handle, struct page *page, 1758static int ext3_block_truncate_page(handle_t *handle, struct page *page,
1760 struct address_space *mapping, loff_t from) 1759 struct address_space *mapping, loff_t from)
1761{ 1760{
1762 unsigned long index = from >> PAGE_CACHE_SHIFT; 1761 ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT;
1763 unsigned offset = from & (PAGE_CACHE_SIZE-1); 1762 unsigned offset = from & (PAGE_CACHE_SIZE-1);
1764 unsigned blocksize, iblock, length, pos; 1763 unsigned blocksize, iblock, length, pos;
1765 struct inode *inode = mapping->host; 1764 struct inode *inode = mapping->host;
@@ -1960,7 +1959,7 @@ no_top:
1960 * than `count' because there can be holes in there. 1959 * than `count' because there can be holes in there.
1961 */ 1960 */
1962static void ext3_clear_blocks(handle_t *handle, struct inode *inode, 1961static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
1963 struct buffer_head *bh, unsigned long block_to_free, 1962 struct buffer_head *bh, ext3_fsblk_t block_to_free,
1964 unsigned long count, __le32 *first, __le32 *last) 1963 unsigned long count, __le32 *first, __le32 *last)
1965{ 1964{
1966 __le32 *p; 1965 __le32 *p;
@@ -2022,12 +2021,12 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
2022 struct buffer_head *this_bh, 2021 struct buffer_head *this_bh,
2023 __le32 *first, __le32 *last) 2022 __le32 *first, __le32 *last)
2024{ 2023{
2025 unsigned long block_to_free = 0; /* Starting block # of a run */ 2024 ext3_fsblk_t block_to_free = 0; /* Starting block # of a run */
2026 unsigned long count = 0; /* Number of blocks in the run */ 2025 unsigned long count = 0; /* Number of blocks in the run */
2027 __le32 *block_to_free_p = NULL; /* Pointer into inode/ind 2026 __le32 *block_to_free_p = NULL; /* Pointer into inode/ind
2028 corresponding to 2027 corresponding to
2029 block_to_free */ 2028 block_to_free */
2030 unsigned long nr; /* Current block # */ 2029 ext3_fsblk_t nr; /* Current block # */
2031 __le32 *p; /* Pointer into inode/ind 2030 __le32 *p; /* Pointer into inode/ind
2032 for current block */ 2031 for current block */
2033 int err; 2032 int err;
@@ -2089,7 +2088,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
2089 struct buffer_head *parent_bh, 2088 struct buffer_head *parent_bh,
2090 __le32 *first, __le32 *last, int depth) 2089 __le32 *first, __le32 *last, int depth)
2091{ 2090{
2092 unsigned long nr; 2091 ext3_fsblk_t nr;
2093 __le32 *p; 2092 __le32 *p;
2094 2093
2095 if (is_handle_aborted(handle)) 2094 if (is_handle_aborted(handle))
@@ -2113,7 +2112,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
2113 */ 2112 */
2114 if (!bh) { 2113 if (!bh) {
2115 ext3_error(inode->i_sb, "ext3_free_branches", 2114 ext3_error(inode->i_sb, "ext3_free_branches",
2116 "Read failure, inode=%ld, block=%ld", 2115 "Read failure, inode=%ld, block="E3FSBLK,
2117 inode->i_ino, nr); 2116 inode->i_ino, nr);
2118 continue; 2117 continue;
2119 } 2118 }
@@ -2394,11 +2393,12 @@ out_stop:
2394 ext3_journal_stop(handle); 2393 ext3_journal_stop(handle);
2395} 2394}
2396 2395
2397static unsigned long ext3_get_inode_block(struct super_block *sb, 2396static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
2398 unsigned long ino, struct ext3_iloc *iloc) 2397 unsigned long ino, struct ext3_iloc *iloc)
2399{ 2398{
2400 unsigned long desc, group_desc, block_group; 2399 unsigned long desc, group_desc, block_group;
2401 unsigned long offset, block; 2400 unsigned long offset;
2401 ext3_fsblk_t block;
2402 struct buffer_head *bh; 2402 struct buffer_head *bh;
2403 struct ext3_group_desc * gdp; 2403 struct ext3_group_desc * gdp;
2404 2404
@@ -2448,7 +2448,7 @@ static unsigned long ext3_get_inode_block(struct super_block *sb,
2448static int __ext3_get_inode_loc(struct inode *inode, 2448static int __ext3_get_inode_loc(struct inode *inode,
2449 struct ext3_iloc *iloc, int in_mem) 2449 struct ext3_iloc *iloc, int in_mem)
2450{ 2450{
2451 unsigned long block; 2451 ext3_fsblk_t block;
2452 struct buffer_head *bh; 2452 struct buffer_head *bh;
2453 2453
2454 block = ext3_get_inode_block(inode->i_sb, inode->i_ino, iloc); 2454 block = ext3_get_inode_block(inode->i_sb, inode->i_ino, iloc);
@@ -2459,7 +2459,8 @@ static int __ext3_get_inode_loc(struct inode *inode,
2459 if (!bh) { 2459 if (!bh) {
2460 ext3_error (inode->i_sb, "ext3_get_inode_loc", 2460 ext3_error (inode->i_sb, "ext3_get_inode_loc",
2461 "unable to read inode block - " 2461 "unable to read inode block - "
2462 "inode=%lu, block=%lu", inode->i_ino, block); 2462 "inode=%lu, block="E3FSBLK,
2463 inode->i_ino, block);
2463 return -EIO; 2464 return -EIO;
2464 } 2465 }
2465 if (!buffer_uptodate(bh)) { 2466 if (!buffer_uptodate(bh)) {
@@ -2540,7 +2541,7 @@ make_io:
2540 if (!buffer_uptodate(bh)) { 2541 if (!buffer_uptodate(bh)) {
2541 ext3_error(inode->i_sb, "ext3_get_inode_loc", 2542 ext3_error(inode->i_sb, "ext3_get_inode_loc",
2542 "unable to read inode block - " 2543 "unable to read inode block - "
2543 "inode=%lu, block=%lu", 2544 "inode=%lu, block="E3FSBLK,
2544 inode->i_ino, block); 2545 inode->i_ino, block);
2545 brelse(bh); 2546 brelse(bh);
2546 return -EIO; 2547 return -EIO;
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 8c22aa9a7fbb..3a6b012d120c 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -204,7 +204,7 @@ flags_err:
204 return 0; 204 return 0;
205 } 205 }
206 case EXT3_IOC_GROUP_EXTEND: { 206 case EXT3_IOC_GROUP_EXTEND: {
207 unsigned long n_blocks_count; 207 ext3_fsblk_t n_blocks_count;
208 struct super_block *sb = inode->i_sb; 208 struct super_block *sb = inode->i_sb;
209 int err; 209 int err;
210 210
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index b8f5cd1e540d..d9176dba3698 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1379,7 +1379,6 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
1379 int dx_fallback=0; 1379 int dx_fallback=0;
1380#endif 1380#endif
1381 unsigned blocksize; 1381 unsigned blocksize;
1382 unsigned nlen, rlen;
1383 u32 block, blocks; 1382 u32 block, blocks;
1384 1383
1385 sb = dir->i_sb; 1384 sb = dir->i_sb;
@@ -1417,8 +1416,7 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
1417 return retval; 1416 return retval;
1418 de = (struct ext3_dir_entry_2 *) bh->b_data; 1417 de = (struct ext3_dir_entry_2 *) bh->b_data;
1419 de->inode = 0; 1418 de->inode = 0;
1420 de->rec_len = cpu_to_le16(rlen = blocksize); 1419 de->rec_len = cpu_to_le16(blocksize);
1421 nlen = 0;
1422 return add_dirent_to_buf(handle, dentry, inode, de, bh); 1420 return add_dirent_to_buf(handle, dentry, inode, de, bh);
1423} 1421}
1424 1422
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 34b39e9a1e5a..5e1337fd878a 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -8,7 +8,6 @@
8 * This could probably be made into a module, because it is not often in use. 8 * This could probably be made into a module, because it is not often in use.
9 */ 9 */
10 10
11#include <linux/config.h>
12 11
13#define EXT3FS_DEBUG 12#define EXT3FS_DEBUG
14 13
@@ -28,16 +27,16 @@ static int verify_group_input(struct super_block *sb,
28{ 27{
29 struct ext3_sb_info *sbi = EXT3_SB(sb); 28 struct ext3_sb_info *sbi = EXT3_SB(sb);
30 struct ext3_super_block *es = sbi->s_es; 29 struct ext3_super_block *es = sbi->s_es;
31 unsigned start = le32_to_cpu(es->s_blocks_count); 30 ext3_fsblk_t start = le32_to_cpu(es->s_blocks_count);
32 unsigned end = start + input->blocks_count; 31 ext3_fsblk_t end = start + input->blocks_count;
33 unsigned group = input->group; 32 unsigned group = input->group;
34 unsigned itend = input->inode_table + sbi->s_itb_per_group; 33 ext3_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
35 unsigned overhead = ext3_bg_has_super(sb, group) ? 34 unsigned overhead = ext3_bg_has_super(sb, group) ?
36 (1 + ext3_bg_num_gdb(sb, group) + 35 (1 + ext3_bg_num_gdb(sb, group) +
37 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; 36 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
38 unsigned metaend = start + overhead; 37 ext3_fsblk_t metaend = start + overhead;
39 struct buffer_head *bh = NULL; 38 struct buffer_head *bh = NULL;
40 int free_blocks_count; 39 ext3_grpblk_t free_blocks_count;
41 int err = -EINVAL; 40 int err = -EINVAL;
42 41
43 input->free_blocks_count = free_blocks_count = 42 input->free_blocks_count = free_blocks_count =
@@ -64,7 +63,8 @@ static int verify_group_input(struct super_block *sb,
64 ext3_warning(sb, __FUNCTION__, "Bad blocks count %u", 63 ext3_warning(sb, __FUNCTION__, "Bad blocks count %u",
65 input->blocks_count); 64 input->blocks_count);
66 else if (!(bh = sb_bread(sb, end - 1))) 65 else if (!(bh = sb_bread(sb, end - 1)))
67 ext3_warning(sb, __FUNCTION__, "Cannot read last block (%u)", 66 ext3_warning(sb, __FUNCTION__,
67 "Cannot read last block ("E3FSBLK")",
68 end - 1); 68 end - 1);
69 else if (outside(input->block_bitmap, start, end)) 69 else if (outside(input->block_bitmap, start, end))
70 ext3_warning(sb, __FUNCTION__, 70 ext3_warning(sb, __FUNCTION__,
@@ -77,7 +77,7 @@ static int verify_group_input(struct super_block *sb,
77 else if (outside(input->inode_table, start, end) || 77 else if (outside(input->inode_table, start, end) ||
78 outside(itend - 1, start, end)) 78 outside(itend - 1, start, end))
79 ext3_warning(sb, __FUNCTION__, 79 ext3_warning(sb, __FUNCTION__,
80 "Inode table not in group (blocks %u-%u)", 80 "Inode table not in group (blocks %u-"E3FSBLK")",
81 input->inode_table, itend - 1); 81 input->inode_table, itend - 1);
82 else if (input->inode_bitmap == input->block_bitmap) 82 else if (input->inode_bitmap == input->block_bitmap)
83 ext3_warning(sb, __FUNCTION__, 83 ext3_warning(sb, __FUNCTION__,
@@ -85,24 +85,27 @@ static int verify_group_input(struct super_block *sb,
85 input->block_bitmap); 85 input->block_bitmap);
86 else if (inside(input->block_bitmap, input->inode_table, itend)) 86 else if (inside(input->block_bitmap, input->inode_table, itend))
87 ext3_warning(sb, __FUNCTION__, 87 ext3_warning(sb, __FUNCTION__,
88 "Block bitmap (%u) in inode table (%u-%u)", 88 "Block bitmap (%u) in inode table (%u-"E3FSBLK")",
89 input->block_bitmap, input->inode_table, itend-1); 89 input->block_bitmap, input->inode_table, itend-1);
90 else if (inside(input->inode_bitmap, input->inode_table, itend)) 90 else if (inside(input->inode_bitmap, input->inode_table, itend))
91 ext3_warning(sb, __FUNCTION__, 91 ext3_warning(sb, __FUNCTION__,
92 "Inode bitmap (%u) in inode table (%u-%u)", 92 "Inode bitmap (%u) in inode table (%u-"E3FSBLK")",
93 input->inode_bitmap, input->inode_table, itend-1); 93 input->inode_bitmap, input->inode_table, itend-1);
94 else if (inside(input->block_bitmap, start, metaend)) 94 else if (inside(input->block_bitmap, start, metaend))
95 ext3_warning(sb, __FUNCTION__, 95 ext3_warning(sb, __FUNCTION__,
96 "Block bitmap (%u) in GDT table (%u-%u)", 96 "Block bitmap (%u) in GDT table"
97 " ("E3FSBLK"-"E3FSBLK")",
97 input->block_bitmap, start, metaend - 1); 98 input->block_bitmap, start, metaend - 1);
98 else if (inside(input->inode_bitmap, start, metaend)) 99 else if (inside(input->inode_bitmap, start, metaend))
99 ext3_warning(sb, __FUNCTION__, 100 ext3_warning(sb, __FUNCTION__,
100 "Inode bitmap (%u) in GDT table (%u-%u)", 101 "Inode bitmap (%u) in GDT table"
102 " ("E3FSBLK"-"E3FSBLK")",
101 input->inode_bitmap, start, metaend - 1); 103 input->inode_bitmap, start, metaend - 1);
102 else if (inside(input->inode_table, start, metaend) || 104 else if (inside(input->inode_table, start, metaend) ||
103 inside(itend - 1, start, metaend)) 105 inside(itend - 1, start, metaend))
104 ext3_warning(sb, __FUNCTION__, 106 ext3_warning(sb, __FUNCTION__,
105 "Inode table (%u-%u) overlaps GDT table (%u-%u)", 107 "Inode table (%u-"E3FSBLK") overlaps"
108 "GDT table ("E3FSBLK"-"E3FSBLK")",
106 input->inode_table, itend - 1, start, metaend - 1); 109 input->inode_table, itend - 1, start, metaend - 1);
107 else 110 else
108 err = 0; 111 err = 0;
@@ -112,7 +115,7 @@ static int verify_group_input(struct super_block *sb,
112} 115}
113 116
114static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, 117static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
115 unsigned long blk) 118 ext3_fsblk_t blk)
116{ 119{
117 struct buffer_head *bh; 120 struct buffer_head *bh;
118 int err; 121 int err;
@@ -163,15 +166,14 @@ static int setup_new_group_blocks(struct super_block *sb,
163 struct ext3_new_group_data *input) 166 struct ext3_new_group_data *input)
164{ 167{
165 struct ext3_sb_info *sbi = EXT3_SB(sb); 168 struct ext3_sb_info *sbi = EXT3_SB(sb);
166 unsigned long start = input->group * sbi->s_blocks_per_group + 169 ext3_fsblk_t start = ext3_group_first_block_no(sb, input->group);
167 le32_to_cpu(sbi->s_es->s_first_data_block);
168 int reserved_gdb = ext3_bg_has_super(sb, input->group) ? 170 int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
169 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0; 171 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0;
170 unsigned long gdblocks = ext3_bg_num_gdb(sb, input->group); 172 unsigned long gdblocks = ext3_bg_num_gdb(sb, input->group);
171 struct buffer_head *bh; 173 struct buffer_head *bh;
172 handle_t *handle; 174 handle_t *handle;
173 unsigned long block; 175 ext3_fsblk_t block;
174 int bit; 176 ext3_grpblk_t bit;
175 int i; 177 int i;
176 int err = 0, err2; 178 int err = 0, err2;
177 179
@@ -328,7 +330,7 @@ static unsigned ext3_list_backups(struct super_block *sb, unsigned *three,
328static int verify_reserved_gdb(struct super_block *sb, 330static int verify_reserved_gdb(struct super_block *sb,
329 struct buffer_head *primary) 331 struct buffer_head *primary)
330{ 332{
331 const unsigned long blk = primary->b_blocknr; 333 const ext3_fsblk_t blk = primary->b_blocknr;
332 const unsigned long end = EXT3_SB(sb)->s_groups_count; 334 const unsigned long end = EXT3_SB(sb)->s_groups_count;
333 unsigned three = 1; 335 unsigned three = 1;
334 unsigned five = 5; 336 unsigned five = 5;
@@ -340,7 +342,8 @@ static int verify_reserved_gdb(struct super_block *sb,
340 while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) { 342 while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
341 if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){ 343 if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){
342 ext3_warning(sb, __FUNCTION__, 344 ext3_warning(sb, __FUNCTION__,
343 "reserved GDT %ld missing grp %d (%ld)", 345 "reserved GDT "E3FSBLK
346 " missing grp %d ("E3FSBLK")",
344 blk, grp, 347 blk, grp,
345 grp * EXT3_BLOCKS_PER_GROUP(sb) + blk); 348 grp * EXT3_BLOCKS_PER_GROUP(sb) + blk);
346 return -EINVAL; 349 return -EINVAL;
@@ -372,7 +375,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
372 struct super_block *sb = inode->i_sb; 375 struct super_block *sb = inode->i_sb;
373 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 376 struct ext3_super_block *es = EXT3_SB(sb)->s_es;
374 unsigned long gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb); 377 unsigned long gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
375 unsigned long gdblock = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; 378 ext3_fsblk_t gdblock = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
376 struct buffer_head **o_group_desc, **n_group_desc; 379 struct buffer_head **o_group_desc, **n_group_desc;
377 struct buffer_head *dind; 380 struct buffer_head *dind;
378 int gdbackups; 381 int gdbackups;
@@ -417,7 +420,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
417 data = (__u32 *)dind->b_data; 420 data = (__u32 *)dind->b_data;
418 if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) { 421 if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
419 ext3_warning(sb, __FUNCTION__, 422 ext3_warning(sb, __FUNCTION__,
420 "new group %u GDT block %lu not reserved", 423 "new group %u GDT block "E3FSBLK" not reserved",
421 input->group, gdblock); 424 input->group, gdblock);
422 err = -EINVAL; 425 err = -EINVAL;
423 goto exit_dind; 426 goto exit_dind;
@@ -515,7 +518,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
515 struct buffer_head **primary; 518 struct buffer_head **primary;
516 struct buffer_head *dind; 519 struct buffer_head *dind;
517 struct ext3_iloc iloc; 520 struct ext3_iloc iloc;
518 unsigned long blk; 521 ext3_fsblk_t blk;
519 __u32 *data, *end; 522 __u32 *data, *end;
520 int gdbackups = 0; 523 int gdbackups = 0;
521 int res, i; 524 int res, i;
@@ -540,7 +543,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
540 for (res = 0; res < reserved_gdb; res++, blk++) { 543 for (res = 0; res < reserved_gdb; res++, blk++) {
541 if (le32_to_cpu(*data) != blk) { 544 if (le32_to_cpu(*data) != blk) {
542 ext3_warning(sb, __FUNCTION__, 545 ext3_warning(sb, __FUNCTION__,
543 "reserved block %lu not at offset %ld", 546 "reserved block "E3FSBLK
547 " not at offset %ld",
544 blk, (long)(data - (__u32 *)dind->b_data)); 548 blk, (long)(data - (__u32 *)dind->b_data));
545 err = -EINVAL; 549 err = -EINVAL;
546 goto exit_bh; 550 goto exit_bh;
@@ -902,15 +906,16 @@ exit_put:
902 * GDT blocks are reserved to grow to the desired size. 906 * GDT blocks are reserved to grow to the desired size.
903 */ 907 */
904int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, 908int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
905 unsigned long n_blocks_count) 909 ext3_fsblk_t n_blocks_count)
906{ 910{
907 unsigned long o_blocks_count; 911 ext3_fsblk_t o_blocks_count;
908 unsigned long o_groups_count; 912 unsigned long o_groups_count;
909 unsigned long last; 913 ext3_grpblk_t last;
910 int add; 914 ext3_grpblk_t add;
911 struct buffer_head * bh; 915 struct buffer_head * bh;
912 handle_t *handle; 916 handle_t *handle;
913 int err, freed_blocks; 917 int err;
918 unsigned long freed_blocks;
914 919
915 /* We don't need to worry about locking wrt other resizers just 920 /* We don't need to worry about locking wrt other resizers just
916 * yet: we're going to revalidate es->s_blocks_count after 921 * yet: we're going to revalidate es->s_blocks_count after
@@ -919,12 +924,22 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
919 o_groups_count = EXT3_SB(sb)->s_groups_count; 924 o_groups_count = EXT3_SB(sb)->s_groups_count;
920 925
921 if (test_opt(sb, DEBUG)) 926 if (test_opt(sb, DEBUG))
922 printk(KERN_DEBUG "EXT3-fs: extending last group from %lu to %lu blocks\n", 927 printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n",
923 o_blocks_count, n_blocks_count); 928 o_blocks_count, n_blocks_count);
924 929
925 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) 930 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
926 return 0; 931 return 0;
927 932
933 if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
934 printk(KERN_ERR "EXT3-fs: filesystem on %s:"
935 " too large to resize to %lu blocks safely\n",
936 sb->s_id, n_blocks_count);
937 if (sizeof(sector_t) < 8)
938 ext3_warning(sb, __FUNCTION__,
939 "CONFIG_LBD not enabled\n");
940 return -EINVAL;
941 }
942
928 if (n_blocks_count < o_blocks_count) { 943 if (n_blocks_count < o_blocks_count) {
929 ext3_warning(sb, __FUNCTION__, 944 ext3_warning(sb, __FUNCTION__,
930 "can't shrink FS - resize aborted"); 945 "can't shrink FS - resize aborted");
@@ -948,7 +963,8 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
948 963
949 if (o_blocks_count + add < n_blocks_count) 964 if (o_blocks_count + add < n_blocks_count)
950 ext3_warning(sb, __FUNCTION__, 965 ext3_warning(sb, __FUNCTION__,
951 "will only finish group (%lu blocks, %u new)", 966 "will only finish group ("E3FSBLK
967 " blocks, %u new)",
952 o_blocks_count + add, add); 968 o_blocks_count + add, add);
953 969
954 /* See if the device is actually as big as what was requested */ 970 /* See if the device is actually as big as what was requested */
@@ -991,10 +1007,10 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
991 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); 1007 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
992 sb->s_dirt = 1; 1008 sb->s_dirt = 1;
993 unlock_super(sb); 1009 unlock_super(sb);
994 ext3_debug("freeing blocks %ld through %ld\n", o_blocks_count, 1010 ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
995 o_blocks_count + add); 1011 o_blocks_count + add);
996 ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); 1012 ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
997 ext3_debug("freed blocks %ld through %ld\n", o_blocks_count, 1013 ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n", o_blocks_count,
998 o_blocks_count + add); 1014 o_blocks_count + add);
999 if ((err = ext3_journal_stop(handle))) 1015 if ((err = ext3_journal_stop(handle)))
1000 goto exit_put; 1016 goto exit_put;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index f8a5266ea1ff..f2dd71336612 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -16,7 +16,6 @@
16 * David S. Miller (davem@caip.rutgers.edu), 1995 16 * David S. Miller (davem@caip.rutgers.edu), 1995
17 */ 17 */
18 18
19#include <linux/config.h>
20#include <linux/module.h> 19#include <linux/module.h>
21#include <linux/string.h> 20#include <linux/string.h>
22#include <linux/fs.h> 21#include <linux/fs.h>
@@ -58,7 +57,7 @@ static int ext3_sync_fs(struct super_block *sb, int wait);
58static const char *ext3_decode_error(struct super_block * sb, int errno, 57static const char *ext3_decode_error(struct super_block * sb, int errno,
59 char nbuf[16]); 58 char nbuf[16]);
60static int ext3_remount (struct super_block * sb, int * flags, char * data); 59static int ext3_remount (struct super_block * sb, int * flags, char * data);
61static int ext3_statfs (struct super_block * sb, struct kstatfs * buf); 60static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf);
62static void ext3_unlockfs(struct super_block *sb); 61static void ext3_unlockfs(struct super_block *sb);
63static void ext3_write_super (struct super_block * sb); 62static void ext3_write_super (struct super_block * sb);
64static void ext3_write_super_lockfs(struct super_block *sb); 63static void ext3_write_super_lockfs(struct super_block *sb);
@@ -499,20 +498,21 @@ static void ext3_clear_inode(struct inode *inode)
499{ 498{
500 struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info; 499 struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
501#ifdef CONFIG_EXT3_FS_POSIX_ACL 500#ifdef CONFIG_EXT3_FS_POSIX_ACL
502 if (EXT3_I(inode)->i_acl && 501 if (EXT3_I(inode)->i_acl &&
503 EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) { 502 EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) {
504 posix_acl_release(EXT3_I(inode)->i_acl); 503 posix_acl_release(EXT3_I(inode)->i_acl);
505 EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED; 504 EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED;
506 } 505 }
507 if (EXT3_I(inode)->i_default_acl && 506 if (EXT3_I(inode)->i_default_acl &&
508 EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) { 507 EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) {
509 posix_acl_release(EXT3_I(inode)->i_default_acl); 508 posix_acl_release(EXT3_I(inode)->i_default_acl);
510 EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED; 509 EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED;
511 } 510 }
512#endif 511#endif
513 ext3_discard_reservation(inode); 512 ext3_discard_reservation(inode);
514 EXT3_I(inode)->i_block_alloc_info = NULL; 513 EXT3_I(inode)->i_block_alloc_info = NULL;
515 kfree(rsv); 514 if (unlikely(rsv))
515 kfree(rsv);
516} 516}
517 517
518static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb) 518static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb)
@@ -629,7 +629,7 @@ enum {
629 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 629 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
630 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, 630 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
631 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 631 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
632 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, 632 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
633 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 633 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
634 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 634 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
635 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 635 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
@@ -665,6 +665,7 @@ static match_table_t tokens = {
665 {Opt_noreservation, "noreservation"}, 665 {Opt_noreservation, "noreservation"},
666 {Opt_noload, "noload"}, 666 {Opt_noload, "noload"},
667 {Opt_nobh, "nobh"}, 667 {Opt_nobh, "nobh"},
668 {Opt_bh, "bh"},
668 {Opt_commit, "commit=%u"}, 669 {Opt_commit, "commit=%u"},
669 {Opt_journal_update, "journal=update"}, 670 {Opt_journal_update, "journal=update"},
670 {Opt_journal_inum, "journal=%u"}, 671 {Opt_journal_inum, "journal=%u"},
@@ -688,14 +689,15 @@ static match_table_t tokens = {
688 {Opt_resize, "resize"}, 689 {Opt_resize, "resize"},
689}; 690};
690 691
691static unsigned long get_sb_block(void **data) 692static ext3_fsblk_t get_sb_block(void **data)
692{ 693{
693 unsigned long sb_block; 694 ext3_fsblk_t sb_block;
694 char *options = (char *) *data; 695 char *options = (char *) *data;
695 696
696 if (!options || strncmp(options, "sb=", 3) != 0) 697 if (!options || strncmp(options, "sb=", 3) != 0)
697 return 1; /* Default location */ 698 return 1; /* Default location */
698 options += 3; 699 options += 3;
700 /*todo: use simple_strtoll with >32bit ext3 */
699 sb_block = simple_strtoul(options, &options, 0); 701 sb_block = simple_strtoul(options, &options, 0);
700 if (*options && *options != ',') { 702 if (*options && *options != ',') {
701 printk("EXT3-fs: Invalid sb specification: %s\n", 703 printk("EXT3-fs: Invalid sb specification: %s\n",
@@ -710,7 +712,7 @@ static unsigned long get_sb_block(void **data)
710 712
711static int parse_options (char *options, struct super_block *sb, 713static int parse_options (char *options, struct super_block *sb,
712 unsigned long *inum, unsigned long *journal_devnum, 714 unsigned long *inum, unsigned long *journal_devnum,
713 unsigned long *n_blocks_count, int is_remount) 715 ext3_fsblk_t *n_blocks_count, int is_remount)
714{ 716{
715 struct ext3_sb_info *sbi = EXT3_SB(sb); 717 struct ext3_sb_info *sbi = EXT3_SB(sb);
716 char * p; 718 char * p;
@@ -1012,6 +1014,9 @@ clear_qf_name:
1012 case Opt_nobh: 1014 case Opt_nobh:
1013 set_opt(sbi->s_mount_opt, NOBH); 1015 set_opt(sbi->s_mount_opt, NOBH);
1014 break; 1016 break;
1017 case Opt_bh:
1018 clear_opt(sbi->s_mount_opt, NOBH);
1019 break;
1015 default: 1020 default:
1016 printk (KERN_ERR 1021 printk (KERN_ERR
1017 "EXT3-fs: Unrecognized mount option \"%s\" " 1022 "EXT3-fs: Unrecognized mount option \"%s\" "
@@ -1127,7 +1132,7 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1127static int ext3_check_descriptors (struct super_block * sb) 1132static int ext3_check_descriptors (struct super_block * sb)
1128{ 1133{
1129 struct ext3_sb_info *sbi = EXT3_SB(sb); 1134 struct ext3_sb_info *sbi = EXT3_SB(sb);
1130 unsigned long block = le32_to_cpu(sbi->s_es->s_first_data_block); 1135 ext3_fsblk_t block = le32_to_cpu(sbi->s_es->s_first_data_block);
1131 struct ext3_group_desc * gdp = NULL; 1136 struct ext3_group_desc * gdp = NULL;
1132 int desc_block = 0; 1137 int desc_block = 0;
1133 int i; 1138 int i;
@@ -1314,15 +1319,14 @@ static loff_t ext3_max_size(int bits)
1314 return res; 1319 return res;
1315} 1320}
1316 1321
1317static unsigned long descriptor_loc(struct super_block *sb, 1322static ext3_fsblk_t descriptor_loc(struct super_block *sb,
1318 unsigned long logic_sb_block, 1323 ext3_fsblk_t logic_sb_block,
1319 int nr) 1324 int nr)
1320{ 1325{
1321 struct ext3_sb_info *sbi = EXT3_SB(sb); 1326 struct ext3_sb_info *sbi = EXT3_SB(sb);
1322 unsigned long bg, first_data_block, first_meta_bg; 1327 unsigned long bg, first_meta_bg;
1323 int has_super = 0; 1328 int has_super = 0;
1324 1329
1325 first_data_block = le32_to_cpu(sbi->s_es->s_first_data_block);
1326 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 1330 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
1327 1331
1328 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) || 1332 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) ||
@@ -1331,7 +1335,7 @@ static unsigned long descriptor_loc(struct super_block *sb,
1331 bg = sbi->s_desc_per_block * nr; 1335 bg = sbi->s_desc_per_block * nr;
1332 if (ext3_bg_has_super(sb, bg)) 1336 if (ext3_bg_has_super(sb, bg))
1333 has_super = 1; 1337 has_super = 1;
1334 return (first_data_block + has_super + (bg * sbi->s_blocks_per_group)); 1338 return (has_super + ext3_group_first_block_no(sb, bg));
1335} 1339}
1336 1340
1337 1341
@@ -1340,9 +1344,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1340 struct buffer_head * bh; 1344 struct buffer_head * bh;
1341 struct ext3_super_block *es = NULL; 1345 struct ext3_super_block *es = NULL;
1342 struct ext3_sb_info *sbi; 1346 struct ext3_sb_info *sbi;
1343 unsigned long block; 1347 ext3_fsblk_t block;
1344 unsigned long sb_block = get_sb_block(&data); 1348 ext3_fsblk_t sb_block = get_sb_block(&data);
1345 unsigned long logic_sb_block; 1349 ext3_fsblk_t logic_sb_block;
1346 unsigned long offset = 0; 1350 unsigned long offset = 0;
1347 unsigned long journal_inum = 0; 1351 unsigned long journal_inum = 0;
1348 unsigned long journal_devnum = 0; 1352 unsigned long journal_devnum = 0;
@@ -1564,6 +1568,16 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1564 goto failed_mount; 1568 goto failed_mount;
1565 } 1569 }
1566 1570
1571 if (le32_to_cpu(es->s_blocks_count) >
1572 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
1573 printk(KERN_ERR "EXT3-fs: filesystem on %s:"
1574 " too large to mount safely\n", sb->s_id);
1575 if (sizeof(sector_t) < 8)
1576 printk(KERN_WARNING "EXT3-fs: CONFIG_LBD not "
1577 "enabled\n");
1578 goto failed_mount;
1579 }
1580
1567 if (EXT3_BLOCKS_PER_GROUP(sb) == 0) 1581 if (EXT3_BLOCKS_PER_GROUP(sb) == 0)
1568 goto cantfind_ext3; 1582 goto cantfind_ext3;
1569 sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) - 1583 sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) -
@@ -1579,9 +1593,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1579 goto failed_mount; 1593 goto failed_mount;
1580 } 1594 }
1581 1595
1582 percpu_counter_init(&sbi->s_freeblocks_counter);
1583 percpu_counter_init(&sbi->s_freeinodes_counter);
1584 percpu_counter_init(&sbi->s_dirs_counter);
1585 bgl_lock_init(&sbi->s_blockgroup_lock); 1596 bgl_lock_init(&sbi->s_blockgroup_lock);
1586 1597
1587 for (i = 0; i < db_count; i++) { 1598 for (i = 0; i < db_count; i++) {
@@ -1595,12 +1606,20 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1595 } 1606 }
1596 } 1607 }
1597 if (!ext3_check_descriptors (sb)) { 1608 if (!ext3_check_descriptors (sb)) {
1598 printk (KERN_ERR "EXT3-fs: group descriptors corrupted !\n"); 1609 printk(KERN_ERR "EXT3-fs: group descriptors corrupted!\n");
1599 goto failed_mount2; 1610 goto failed_mount2;
1600 } 1611 }
1601 sbi->s_gdb_count = db_count; 1612 sbi->s_gdb_count = db_count;
1602 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 1613 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
1603 spin_lock_init(&sbi->s_next_gen_lock); 1614 spin_lock_init(&sbi->s_next_gen_lock);
1615
1616 percpu_counter_init(&sbi->s_freeblocks_counter,
1617 ext3_count_free_blocks(sb));
1618 percpu_counter_init(&sbi->s_freeinodes_counter,
1619 ext3_count_free_inodes(sb));
1620 percpu_counter_init(&sbi->s_dirs_counter,
1621 ext3_count_dirs(sb));
1622
1604 /* per fileystem reservation list head & lock */ 1623 /* per fileystem reservation list head & lock */
1605 spin_lock_init(&sbi->s_rsv_window_lock); 1624 spin_lock_init(&sbi->s_rsv_window_lock);
1606 sbi->s_rsv_window_root = RB_ROOT; 1625 sbi->s_rsv_window_root = RB_ROOT;
@@ -1639,16 +1658,16 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1639 if (!test_opt(sb, NOLOAD) && 1658 if (!test_opt(sb, NOLOAD) &&
1640 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { 1659 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
1641 if (ext3_load_journal(sb, es, journal_devnum)) 1660 if (ext3_load_journal(sb, es, journal_devnum))
1642 goto failed_mount2; 1661 goto failed_mount3;
1643 } else if (journal_inum) { 1662 } else if (journal_inum) {
1644 if (ext3_create_journal(sb, es, journal_inum)) 1663 if (ext3_create_journal(sb, es, journal_inum))
1645 goto failed_mount2; 1664 goto failed_mount3;
1646 } else { 1665 } else {
1647 if (!silent) 1666 if (!silent)
1648 printk (KERN_ERR 1667 printk (KERN_ERR
1649 "ext3: No journal on filesystem on %s\n", 1668 "ext3: No journal on filesystem on %s\n",
1650 sb->s_id); 1669 sb->s_id);
1651 goto failed_mount2; 1670 goto failed_mount3;
1652 } 1671 }
1653 1672
1654 /* We have now updated the journal if required, so we can 1673 /* We have now updated the journal if required, so we can
@@ -1671,7 +1690,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1671 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) { 1690 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
1672 printk(KERN_ERR "EXT3-fs: Journal does not support " 1691 printk(KERN_ERR "EXT3-fs: Journal does not support "
1673 "requested data journaling mode\n"); 1692 "requested data journaling mode\n");
1674 goto failed_mount3; 1693 goto failed_mount4;
1675 } 1694 }
1676 default: 1695 default:
1677 break; 1696 break;
@@ -1694,13 +1713,13 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1694 if (!sb->s_root) { 1713 if (!sb->s_root) {
1695 printk(KERN_ERR "EXT3-fs: get root inode failed\n"); 1714 printk(KERN_ERR "EXT3-fs: get root inode failed\n");
1696 iput(root); 1715 iput(root);
1697 goto failed_mount3; 1716 goto failed_mount4;
1698 } 1717 }
1699 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 1718 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
1700 dput(sb->s_root); 1719 dput(sb->s_root);
1701 sb->s_root = NULL; 1720 sb->s_root = NULL;
1702 printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n"); 1721 printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n");
1703 goto failed_mount3; 1722 goto failed_mount4;
1704 } 1723 }
1705 1724
1706 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); 1725 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
@@ -1723,13 +1742,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1723 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": 1742 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
1724 "writeback"); 1743 "writeback");
1725 1744
1726 percpu_counter_mod(&sbi->s_freeblocks_counter,
1727 ext3_count_free_blocks(sb));
1728 percpu_counter_mod(&sbi->s_freeinodes_counter,
1729 ext3_count_free_inodes(sb));
1730 percpu_counter_mod(&sbi->s_dirs_counter,
1731 ext3_count_dirs(sb));
1732
1733 lock_kernel(); 1745 lock_kernel();
1734 return 0; 1746 return 0;
1735 1747
@@ -1739,8 +1751,12 @@ cantfind_ext3:
1739 sb->s_id); 1751 sb->s_id);
1740 goto failed_mount; 1752 goto failed_mount;
1741 1753
1742failed_mount3: 1754failed_mount4:
1743 journal_destroy(sbi->s_journal); 1755 journal_destroy(sbi->s_journal);
1756failed_mount3:
1757 percpu_counter_destroy(&sbi->s_freeblocks_counter);
1758 percpu_counter_destroy(&sbi->s_freeinodes_counter);
1759 percpu_counter_destroy(&sbi->s_dirs_counter);
1744failed_mount2: 1760failed_mount2:
1745 for (i = 0; i < db_count; i++) 1761 for (i = 0; i < db_count; i++)
1746 brelse(sbi->s_group_desc[i]); 1762 brelse(sbi->s_group_desc[i]);
@@ -1827,10 +1843,10 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
1827{ 1843{
1828 struct buffer_head * bh; 1844 struct buffer_head * bh;
1829 journal_t *journal; 1845 journal_t *journal;
1830 int start; 1846 ext3_fsblk_t start;
1831 int len; 1847 ext3_fsblk_t len;
1832 int hblock, blocksize; 1848 int hblock, blocksize;
1833 unsigned long sb_block; 1849 ext3_fsblk_t sb_block;
1834 unsigned long offset; 1850 unsigned long offset;
1835 struct ext3_super_block * es; 1851 struct ext3_super_block * es;
1836 struct block_device *bdev; 1852 struct block_device *bdev;
@@ -2203,7 +2219,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2203{ 2219{
2204 struct ext3_super_block * es; 2220 struct ext3_super_block * es;
2205 struct ext3_sb_info *sbi = EXT3_SB(sb); 2221 struct ext3_sb_info *sbi = EXT3_SB(sb);
2206 unsigned long n_blocks_count = 0; 2222 ext3_fsblk_t n_blocks_count = 0;
2207 unsigned long old_sb_flags; 2223 unsigned long old_sb_flags;
2208 struct ext3_mount_options old_opts; 2224 struct ext3_mount_options old_opts;
2209 int err; 2225 int err;
@@ -2318,11 +2334,12 @@ restore_opts:
2318 return err; 2334 return err;
2319} 2335}
2320 2336
2321static int ext3_statfs (struct super_block * sb, struct kstatfs * buf) 2337static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
2322{ 2338{
2339 struct super_block *sb = dentry->d_sb;
2323 struct ext3_sb_info *sbi = EXT3_SB(sb); 2340 struct ext3_sb_info *sbi = EXT3_SB(sb);
2324 struct ext3_super_block *es = sbi->s_es; 2341 struct ext3_super_block *es = sbi->s_es;
2325 unsigned long overhead; 2342 ext3_fsblk_t overhead;
2326 int i; 2343 int i;
2327 2344
2328 if (test_opt (sb, MINIX_DF)) 2345 if (test_opt (sb, MINIX_DF))
@@ -2646,10 +2663,10 @@ out:
2646 2663
2647#endif 2664#endif
2648 2665
2649static struct super_block *ext3_get_sb(struct file_system_type *fs_type, 2666static int ext3_get_sb(struct file_system_type *fs_type,
2650 int flags, const char *dev_name, void *data) 2667 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
2651{ 2668{
2652 return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super); 2669 return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt);
2653} 2670}
2654 2671
2655static struct file_system_type ext3_fs_type = { 2672static struct file_system_type ext3_fs_type = {
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index e8d60bf6b7df..a44a0562203a 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -225,7 +225,7 @@ ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
225 error = -ENODATA; 225 error = -ENODATA;
226 if (!EXT3_I(inode)->i_file_acl) 226 if (!EXT3_I(inode)->i_file_acl)
227 goto cleanup; 227 goto cleanup;
228 ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); 228 ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl);
229 bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); 229 bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
230 if (!bh) 230 if (!bh)
231 goto cleanup; 231 goto cleanup;
@@ -233,7 +233,7 @@ ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
233 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 233 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
234 if (ext3_xattr_check_block(bh)) { 234 if (ext3_xattr_check_block(bh)) {
235bad_block: ext3_error(inode->i_sb, __FUNCTION__, 235bad_block: ext3_error(inode->i_sb, __FUNCTION__,
236 "inode %ld: bad block %d", inode->i_ino, 236 "inode %ld: bad block "E3FSBLK, inode->i_ino,
237 EXT3_I(inode)->i_file_acl); 237 EXT3_I(inode)->i_file_acl);
238 error = -EIO; 238 error = -EIO;
239 goto cleanup; 239 goto cleanup;
@@ -366,7 +366,7 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
366 error = 0; 366 error = 0;
367 if (!EXT3_I(inode)->i_file_acl) 367 if (!EXT3_I(inode)->i_file_acl)
368 goto cleanup; 368 goto cleanup;
369 ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); 369 ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl);
370 bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); 370 bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
371 error = -EIO; 371 error = -EIO;
372 if (!bh) 372 if (!bh)
@@ -375,7 +375,7 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
375 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 375 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
376 if (ext3_xattr_check_block(bh)) { 376 if (ext3_xattr_check_block(bh)) {
377 ext3_error(inode->i_sb, __FUNCTION__, 377 ext3_error(inode->i_sb, __FUNCTION__,
378 "inode %ld: bad block %d", inode->i_ino, 378 "inode %ld: bad block "E3FSBLK, inode->i_ino,
379 EXT3_I(inode)->i_file_acl); 379 EXT3_I(inode)->i_file_acl);
380 error = -EIO; 380 error = -EIO;
381 goto cleanup; 381 goto cleanup;
@@ -647,7 +647,7 @@ ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i,
647 le32_to_cpu(BHDR(bs->bh)->h_refcount)); 647 le32_to_cpu(BHDR(bs->bh)->h_refcount));
648 if (ext3_xattr_check_block(bs->bh)) { 648 if (ext3_xattr_check_block(bs->bh)) {
649 ext3_error(sb, __FUNCTION__, 649 ext3_error(sb, __FUNCTION__,
650 "inode %ld: bad block %d", inode->i_ino, 650 "inode %ld: bad block "E3FSBLK, inode->i_ino,
651 EXT3_I(inode)->i_file_acl); 651 EXT3_I(inode)->i_file_acl);
652 error = -EIO; 652 error = -EIO;
653 goto cleanup; 653 goto cleanup;
@@ -792,11 +792,12 @@ inserted:
792 get_bh(new_bh); 792 get_bh(new_bh);
793 } else { 793 } else {
794 /* We need to allocate a new block */ 794 /* We need to allocate a new block */
795 int goal = le32_to_cpu( 795 ext3_fsblk_t goal = le32_to_cpu(
796 EXT3_SB(sb)->s_es->s_first_data_block) + 796 EXT3_SB(sb)->s_es->s_first_data_block) +
797 EXT3_I(inode)->i_block_group * 797 (ext3_fsblk_t)EXT3_I(inode)->i_block_group *
798 EXT3_BLOCKS_PER_GROUP(sb); 798 EXT3_BLOCKS_PER_GROUP(sb);
799 int block = ext3_new_block(handle, inode, goal, &error); 799 ext3_fsblk_t block = ext3_new_block(handle, inode,
800 goal, &error);
800 if (error) 801 if (error)
801 goto cleanup; 802 goto cleanup;
802 ea_idebug(inode, "creating block %d", block); 803 ea_idebug(inode, "creating block %d", block);
@@ -847,7 +848,7 @@ cleanup_dquot:
847 848
848bad_block: 849bad_block:
849 ext3_error(inode->i_sb, __FUNCTION__, 850 ext3_error(inode->i_sb, __FUNCTION__,
850 "inode %ld: bad block %d", inode->i_ino, 851 "inode %ld: bad block "E3FSBLK, inode->i_ino,
851 EXT3_I(inode)->i_file_acl); 852 EXT3_I(inode)->i_file_acl);
852 goto cleanup; 853 goto cleanup;
853 854
@@ -1076,14 +1077,14 @@ ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
1076 bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); 1077 bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
1077 if (!bh) { 1078 if (!bh) {
1078 ext3_error(inode->i_sb, __FUNCTION__, 1079 ext3_error(inode->i_sb, __FUNCTION__,
1079 "inode %ld: block %d read error", inode->i_ino, 1080 "inode %ld: block "E3FSBLK" read error", inode->i_ino,
1080 EXT3_I(inode)->i_file_acl); 1081 EXT3_I(inode)->i_file_acl);
1081 goto cleanup; 1082 goto cleanup;
1082 } 1083 }
1083 if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || 1084 if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
1084 BHDR(bh)->h_blocks != cpu_to_le32(1)) { 1085 BHDR(bh)->h_blocks != cpu_to_le32(1)) {
1085 ext3_error(inode->i_sb, __FUNCTION__, 1086 ext3_error(inode->i_sb, __FUNCTION__,
1086 "inode %ld: bad block %d", inode->i_ino, 1087 "inode %ld: bad block "E3FSBLK, inode->i_ino,
1087 EXT3_I(inode)->i_file_acl); 1088 EXT3_I(inode)->i_file_acl);
1088 goto cleanup; 1089 goto cleanup;
1089 } 1090 }
@@ -1210,11 +1211,11 @@ again:
1210 bh = sb_bread(inode->i_sb, ce->e_block); 1211 bh = sb_bread(inode->i_sb, ce->e_block);
1211 if (!bh) { 1212 if (!bh) {
1212 ext3_error(inode->i_sb, __FUNCTION__, 1213 ext3_error(inode->i_sb, __FUNCTION__,
1213 "inode %ld: block %ld read error", 1214 "inode %ld: block %lu read error",
1214 inode->i_ino, (unsigned long) ce->e_block); 1215 inode->i_ino, (unsigned long) ce->e_block);
1215 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= 1216 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
1216 EXT3_XATTR_REFCOUNT_MAX) { 1217 EXT3_XATTR_REFCOUNT_MAX) {
1217 ea_idebug(inode, "block %ld refcount %d>=%d", 1218 ea_idebug(inode, "block %lu refcount %d>=%d",
1218 (unsigned long) ce->e_block, 1219 (unsigned long) ce->e_block,
1219 le32_to_cpu(BHDR(bh)->h_refcount), 1220 le32_to_cpu(BHDR(bh)->h_refcount),
1220 EXT3_XATTR_REFCOUNT_MAX); 1221 EXT3_XATTR_REFCOUNT_MAX);
diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h
index 2ceae38f3d49..6b1ae1c6182c 100644
--- a/fs/ext3/xattr.h
+++ b/fs/ext3/xattr.h
@@ -6,7 +6,6 @@
6 (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> 6 (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
7*/ 7*/
8 8
9#include <linux/config.h>
10#include <linux/xattr.h> 9#include <linux/xattr.h>
11 10
12/* Magic value in attribute blocks */ 11/* Magic value in attribute blocks */
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index c1ce284f8a94..31b7174176ba 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -196,7 +196,7 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
196 return generic_block_bmap(mapping, block, fat_get_block); 196 return generic_block_bmap(mapping, block, fat_get_block);
197} 197}
198 198
199static struct address_space_operations fat_aops = { 199static const struct address_space_operations fat_aops = {
200 .readpage = fat_readpage, 200 .readpage = fat_readpage,
201 .readpages = fat_readpages, 201 .readpages = fat_readpages,
202 .writepage = fat_writepage, 202 .writepage = fat_writepage,
@@ -539,18 +539,18 @@ static int fat_remount(struct super_block *sb, int *flags, char *data)
539 return 0; 539 return 0;
540} 540}
541 541
542static int fat_statfs(struct super_block *sb, struct kstatfs *buf) 542static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
543{ 543{
544 struct msdos_sb_info *sbi = MSDOS_SB(sb); 544 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
545 545
546 /* If the count of free cluster is still unknown, counts it here. */ 546 /* If the count of free cluster is still unknown, counts it here. */
547 if (sbi->free_clusters == -1) { 547 if (sbi->free_clusters == -1) {
548 int err = fat_count_free_clusters(sb); 548 int err = fat_count_free_clusters(dentry->d_sb);
549 if (err) 549 if (err)
550 return err; 550 return err;
551 } 551 }
552 552
553 buf->f_type = sb->s_magic; 553 buf->f_type = dentry->d_sb->s_magic;
554 buf->f_bsize = sbi->cluster_size; 554 buf->f_bsize = sbi->cluster_size;
555 buf->f_blocks = sbi->max_cluster - FAT_START_ENT; 555 buf->f_blocks = sbi->max_cluster - FAT_START_ENT;
556 buf->f_bfree = sbi->free_clusters; 556 buf->f_bfree = sbi->free_clusters;
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 944652e9dde1..308f2b6b5026 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -210,4 +210,3 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
210 return err; 210 return err;
211} 211}
212 212
213EXPORT_SYMBOL_GPL(fat_sync_bhs);
diff --git a/fs/file_table.c b/fs/file_table.c
index bcea1998b4de..0131ba06e1ee 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -5,7 +5,6 @@
5 * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) 5 * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
6 */ 6 */
7 7
8#include <linux/config.h>
9#include <linux/string.h> 8#include <linux/string.h>
10#include <linux/slab.h> 9#include <linux/slab.h>
11#include <linux/file.h> 10#include <linux/file.h>
@@ -300,5 +299,5 @@ void __init files_init(unsigned long mempages)
300 if (files_stat.max_files < NR_FILE) 299 if (files_stat.max_files < NR_FILE)
301 files_stat.max_files = NR_FILE; 300 files_stat.max_files = NR_FILE;
302 files_defer_init(); 301 files_defer_init();
303 percpu_counter_init(&nr_files); 302 percpu_counter_init(&nr_files, 0);
304} 303}
diff --git a/fs/freevxfs/vxfs.h b/fs/freevxfs/vxfs.h
index 583bd78086d8..d35979a58743 100644
--- a/fs/freevxfs/vxfs.h
+++ b/fs/freevxfs/vxfs.h
@@ -159,11 +159,11 @@ struct vxfs_sb {
159 * In core superblock filesystem private data for VxFS. 159 * In core superblock filesystem private data for VxFS.
160 */ 160 */
161struct vxfs_sb_info { 161struct vxfs_sb_info {
162 struct vxfs_sb *vsi_raw; /* raw (on disk) supeblock */ 162 struct vxfs_sb *vsi_raw; /* raw (on disk) superblock */
163 struct buffer_head *vsi_bp; /* buffer for raw superblock*/ 163 struct buffer_head *vsi_bp; /* buffer for raw superblock*/
164 struct inode *vsi_fship; /* fileset header inode */ 164 struct inode *vsi_fship; /* fileset header inode */
165 struct inode *vsi_ilist; /* inode list inode */ 165 struct inode *vsi_ilist; /* inode list inode */
166 struct inode *vsi_stilist; /* structual inode list inode */ 166 struct inode *vsi_stilist; /* structural inode list inode */
167 u_long vsi_iext; /* initial inode list */ 167 u_long vsi_iext; /* initial inode list */
168 ino_t vsi_fshino; /* fileset header inode */ 168 ino_t vsi_fshino; /* fileset header inode */
169 daddr_t vsi_oltext; /* OLT extent */ 169 daddr_t vsi_oltext; /* OLT extent */
diff --git a/fs/freevxfs/vxfs_fshead.c b/fs/freevxfs/vxfs_fshead.c
index 6dee109aeea4..78948b4b1894 100644
--- a/fs/freevxfs/vxfs_fshead.c
+++ b/fs/freevxfs/vxfs_fshead.c
@@ -112,7 +112,7 @@ vxfs_read_fshead(struct super_block *sbp)
112 112
113 vip = vxfs_blkiget(sbp, infp->vsi_iext, infp->vsi_fshino); 113 vip = vxfs_blkiget(sbp, infp->vsi_iext, infp->vsi_fshino);
114 if (!vip) { 114 if (!vip) {
115 printk(KERN_ERR "vxfs: unabled to read fsh inode\n"); 115 printk(KERN_ERR "vxfs: unable to read fsh inode\n");
116 return -EINVAL; 116 return -EINVAL;
117 } 117 }
118 if (!VXFS_ISFSH(vip)) { 118 if (!VXFS_ISFSH(vip)) {
@@ -129,13 +129,13 @@ vxfs_read_fshead(struct super_block *sbp)
129 129
130 infp->vsi_fship = vxfs_get_fake_inode(sbp, vip); 130 infp->vsi_fship = vxfs_get_fake_inode(sbp, vip);
131 if (!infp->vsi_fship) { 131 if (!infp->vsi_fship) {
132 printk(KERN_ERR "vxfs: unabled to get fsh inode\n"); 132 printk(KERN_ERR "vxfs: unable to get fsh inode\n");
133 goto out_free_fship; 133 goto out_free_fship;
134 } 134 }
135 135
136 sfp = vxfs_getfsh(infp->vsi_fship, 0); 136 sfp = vxfs_getfsh(infp->vsi_fship, 0);
137 if (!sfp) { 137 if (!sfp) {
138 printk(KERN_ERR "vxfs: unabled to get structural fsh\n"); 138 printk(KERN_ERR "vxfs: unable to get structural fsh\n");
139 goto out_iput_fship; 139 goto out_iput_fship;
140 } 140 }
141 141
@@ -145,7 +145,7 @@ vxfs_read_fshead(struct super_block *sbp)
145 145
146 pfp = vxfs_getfsh(infp->vsi_fship, 1); 146 pfp = vxfs_getfsh(infp->vsi_fship, 1);
147 if (!pfp) { 147 if (!pfp) {
148 printk(KERN_ERR "vxfs: unabled to get primary fsh\n"); 148 printk(KERN_ERR "vxfs: unable to get primary fsh\n");
149 goto out_free_sfp; 149 goto out_free_sfp;
150 } 150 }
151 151
@@ -159,7 +159,7 @@ vxfs_read_fshead(struct super_block *sbp)
159 159
160 infp->vsi_stilist = vxfs_get_fake_inode(sbp, tip); 160 infp->vsi_stilist = vxfs_get_fake_inode(sbp, tip);
161 if (!infp->vsi_stilist) { 161 if (!infp->vsi_stilist) {
162 printk(KERN_ERR "vxfs: unabled to get structual list inode\n"); 162 printk(KERN_ERR "vxfs: unable to get structural list inode\n");
163 kfree(tip); 163 kfree(tip);
164 goto out_free_pfp; 164 goto out_free_pfp;
165 } 165 }
@@ -174,7 +174,7 @@ vxfs_read_fshead(struct super_block *sbp)
174 goto out_iput_stilist; 174 goto out_iput_stilist;
175 infp->vsi_ilist = vxfs_get_fake_inode(sbp, tip); 175 infp->vsi_ilist = vxfs_get_fake_inode(sbp, tip);
176 if (!infp->vsi_ilist) { 176 if (!infp->vsi_ilist) {
177 printk(KERN_ERR "vxfs: unabled to get inode list inode\n"); 177 printk(KERN_ERR "vxfs: unable to get inode list inode\n");
178 kfree(tip); 178 kfree(tip);
179 goto out_iput_stilist; 179 goto out_iput_stilist;
180 } 180 }
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index 6f5df1700e95..4e25f3fbed86 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -56,7 +56,7 @@ struct inode_operations vxfs_immed_symlink_iops = {
56/* 56/*
57 * Adress space operations for immed files and directories. 57 * Adress space operations for immed files and directories.
58 */ 58 */
59struct address_space_operations vxfs_immed_aops = { 59const struct address_space_operations vxfs_immed_aops = {
60 .readpage = vxfs_immed_readpage, 60 .readpage = vxfs_immed_readpage,
61}; 61};
62 62
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index f544aae9169f..ca6a39714771 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -41,8 +41,8 @@
41#include "vxfs_extern.h" 41#include "vxfs_extern.h"
42 42
43 43
44extern struct address_space_operations vxfs_aops; 44extern const struct address_space_operations vxfs_aops;
45extern struct address_space_operations vxfs_immed_aops; 45extern const struct address_space_operations vxfs_immed_aops;
46 46
47extern struct inode_operations vxfs_immed_symlink_iops; 47extern struct inode_operations vxfs_immed_symlink_iops;
48 48
@@ -295,7 +295,7 @@ vxfs_read_inode(struct inode *ip)
295{ 295{
296 struct super_block *sbp = ip->i_sb; 296 struct super_block *sbp = ip->i_sb;
297 struct vxfs_inode_info *vip; 297 struct vxfs_inode_info *vip;
298 struct address_space_operations *aops; 298 const struct address_space_operations *aops;
299 ino_t ino = ip->i_ino; 299 ino_t ino = ip->i_ino;
300 300
301 if (!(vip = __vxfs_iget(ino, VXFS_SBI(sbp)->vsi_ilist))) 301 if (!(vip = __vxfs_iget(ino, VXFS_SBI(sbp)->vsi_ilist)))
diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c
index 50aae77651b2..decac62efe57 100644
--- a/fs/freevxfs/vxfs_subr.c
+++ b/fs/freevxfs/vxfs_subr.c
@@ -42,7 +42,7 @@
42static int vxfs_readpage(struct file *, struct page *); 42static int vxfs_readpage(struct file *, struct page *);
43static sector_t vxfs_bmap(struct address_space *, sector_t); 43static sector_t vxfs_bmap(struct address_space *, sector_t);
44 44
45struct address_space_operations vxfs_aops = { 45const struct address_space_operations vxfs_aops = {
46 .readpage = vxfs_readpage, 46 .readpage = vxfs_readpage,
47 .bmap = vxfs_bmap, 47 .bmap = vxfs_bmap,
48 .sync_page = block_sync_page, 48 .sync_page = block_sync_page,
@@ -71,8 +71,7 @@ vxfs_get_page(struct address_space *mapping, u_long n)
71{ 71{
72 struct page * pp; 72 struct page * pp;
73 73
74 pp = read_cache_page(mapping, n, 74 pp = read_mapping_page(mapping, n, NULL);
75 (filler_t*)mapping->a_ops->readpage, NULL);
76 75
77 if (!IS_ERR(pp)) { 76 if (!IS_ERR(pp)) {
78 wait_on_page_locked(pp); 77 wait_on_page_locked(pp);
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index b44c916d24a1..b74b791fc23b 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -40,6 +40,7 @@
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include <linux/stat.h> 41#include <linux/stat.h>
42#include <linux/vfs.h> 42#include <linux/vfs.h>
43#include <linux/mount.h>
43 44
44#include "vxfs.h" 45#include "vxfs.h"
45#include "vxfs_extern.h" 46#include "vxfs_extern.h"
@@ -55,7 +56,7 @@ MODULE_ALIAS("vxfs"); /* makes mount -t vxfs autoload the module */
55 56
56 57
57static void vxfs_put_super(struct super_block *); 58static void vxfs_put_super(struct super_block *);
58static int vxfs_statfs(struct super_block *, struct kstatfs *); 59static int vxfs_statfs(struct dentry *, struct kstatfs *);
59static int vxfs_remount(struct super_block *, int *, char *); 60static int vxfs_remount(struct super_block *, int *, char *);
60 61
61static struct super_operations vxfs_super_ops = { 62static struct super_operations vxfs_super_ops = {
@@ -90,12 +91,12 @@ vxfs_put_super(struct super_block *sbp)
90 91
91/** 92/**
92 * vxfs_statfs - get filesystem information 93 * vxfs_statfs - get filesystem information
93 * @sbp: VFS superblock 94 * @dentry: VFS dentry to locate superblock
94 * @bufp: output buffer 95 * @bufp: output buffer
95 * 96 *
96 * Description: 97 * Description:
97 * vxfs_statfs fills the statfs buffer @bufp with information 98 * vxfs_statfs fills the statfs buffer @bufp with information
98 * about the filesystem described by @sbp. 99 * about the filesystem described by @dentry.
99 * 100 *
100 * Returns: 101 * Returns:
101 * Zero. 102 * Zero.
@@ -107,12 +108,12 @@ vxfs_put_super(struct super_block *sbp)
107 * This is everything but complete... 108 * This is everything but complete...
108 */ 109 */
109static int 110static int
110vxfs_statfs(struct super_block *sbp, struct kstatfs *bufp) 111vxfs_statfs(struct dentry *dentry, struct kstatfs *bufp)
111{ 112{
112 struct vxfs_sb_info *infp = VXFS_SBI(sbp); 113 struct vxfs_sb_info *infp = VXFS_SBI(dentry->d_sb);
113 114
114 bufp->f_type = VXFS_SUPER_MAGIC; 115 bufp->f_type = VXFS_SUPER_MAGIC;
115 bufp->f_bsize = sbp->s_blocksize; 116 bufp->f_bsize = dentry->d_sb->s_blocksize;
116 bufp->f_blocks = infp->vsi_raw->vs_dsize; 117 bufp->f_blocks = infp->vsi_raw->vs_dsize;
117 bufp->f_bfree = infp->vsi_raw->vs_free; 118 bufp->f_bfree = infp->vsi_raw->vs_free;
118 bufp->f_bavail = 0; 119 bufp->f_bavail = 0;
@@ -241,10 +242,11 @@ out:
241/* 242/*
242 * The usual module blurb. 243 * The usual module blurb.
243 */ 244 */
244static struct super_block *vxfs_get_sb(struct file_system_type *fs_type, 245static int vxfs_get_sb(struct file_system_type *fs_type,
245 int flags, const char *dev_name, void *data) 246 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
246{ 247{
247 return get_sb_bdev(fs_type, flags, dev_name, data, vxfs_fill_super); 248 return get_sb_bdev(fs_type, flags, dev_name, data, vxfs_fill_super,
249 mnt);
248} 250}
249 251
250static struct file_system_type vxfs_fs_type = { 252static struct file_system_type vxfs_fs_type = {
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index f3fbe2d030f4..892643dc9af1 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -461,9 +461,11 @@ void sync_inodes_sb(struct super_block *sb, int wait)
461{ 461{
462 struct writeback_control wbc = { 462 struct writeback_control wbc = {
463 .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_HOLD, 463 .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_HOLD,
464 .range_start = 0,
465 .range_end = LLONG_MAX,
464 }; 466 };
465 unsigned long nr_dirty = read_page_state(nr_dirty); 467 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
466 unsigned long nr_unstable = read_page_state(nr_unstable); 468 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
467 469
468 wbc.nr_to_write = nr_dirty + nr_unstable + 470 wbc.nr_to_write = nr_dirty + nr_unstable +
469 (inodes_stat.nr_inodes - inodes_stat.nr_unused) + 471 (inodes_stat.nr_inodes - inodes_stat.nr_unused) +
@@ -559,6 +561,8 @@ int write_inode_now(struct inode *inode, int sync)
559 struct writeback_control wbc = { 561 struct writeback_control wbc = {
560 .nr_to_write = LONG_MAX, 562 .nr_to_write = LONG_MAX,
561 .sync_mode = WB_SYNC_ALL, 563 .sync_mode = WB_SYNC_ALL,
564 .range_start = 0,
565 .range_end = LLONG_MAX,
562 }; 566 };
563 567
564 if (!mapping_cap_writeback_dirty(inode->i_mapping)) 568 if (!mapping_cap_writeback_dirty(inode->i_mapping))
@@ -619,7 +623,6 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int
619 int need_write_inode_now = 0; 623 int need_write_inode_now = 0;
620 int err2; 624 int err2;
621 625
622 current->flags |= PF_SYNCWRITE;
623 if (what & OSYNC_DATA) 626 if (what & OSYNC_DATA)
624 err = filemap_fdatawrite(mapping); 627 err = filemap_fdatawrite(mapping);
625 if (what & (OSYNC_METADATA|OSYNC_DATA)) { 628 if (what & (OSYNC_METADATA|OSYNC_DATA)) {
@@ -632,7 +635,6 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int
632 if (!err) 635 if (!err)
633 err = err2; 636 err = err2;
634 } 637 }
635 current->flags &= ~PF_SYNCWRITE;
636 638
637 spin_lock(&inode_lock); 639 spin_lock(&inode_lock);
638 if ((inode->i_state & I_DIRTY) && 640 if ((inode->i_state & I_DIRTY) &&
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index c3e1f760cac9..72437065f6ad 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_FUSE_FS) += fuse.o 5obj-$(CONFIG_FUSE_FS) += fuse.o
6 6
7fuse-objs := dev.o dir.o file.o inode.o 7fuse-objs := dev.o dir.o file.o inode.o control.o
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
new file mode 100644
index 000000000000..a3bce3a77253
--- /dev/null
+++ b/fs/fuse/control.c
@@ -0,0 +1,218 @@
1/*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7*/
8
9#include "fuse_i.h"
10
11#include <linux/init.h>
12#include <linux/module.h>
13
14#define FUSE_CTL_SUPER_MAGIC 0x65735543
15
16/*
17 * This is non-NULL when the single instance of the control filesystem
18 * exists. Protected by fuse_mutex
19 */
20static struct super_block *fuse_control_sb;
21
22static struct fuse_conn *fuse_ctl_file_conn_get(struct file *file)
23{
24 struct fuse_conn *fc;
25 mutex_lock(&fuse_mutex);
26 fc = file->f_dentry->d_inode->u.generic_ip;
27 if (fc)
28 fc = fuse_conn_get(fc);
29 mutex_unlock(&fuse_mutex);
30 return fc;
31}
32
33static ssize_t fuse_conn_abort_write(struct file *file, const char __user *buf,
34 size_t count, loff_t *ppos)
35{
36 struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
37 if (fc) {
38 fuse_abort_conn(fc);
39 fuse_conn_put(fc);
40 }
41 return count;
42}
43
44static ssize_t fuse_conn_waiting_read(struct file *file, char __user *buf,
45 size_t len, loff_t *ppos)
46{
47 char tmp[32];
48 size_t size;
49
50 if (!*ppos) {
51 struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
52 if (!fc)
53 return 0;
54
55 file->private_data=(void *)(long)atomic_read(&fc->num_waiting);
56 fuse_conn_put(fc);
57 }
58 size = sprintf(tmp, "%ld\n", (long)file->private_data);
59 return simple_read_from_buffer(buf, len, ppos, tmp, size);
60}
61
62static const struct file_operations fuse_ctl_abort_ops = {
63 .open = nonseekable_open,
64 .write = fuse_conn_abort_write,
65};
66
67static const struct file_operations fuse_ctl_waiting_ops = {
68 .open = nonseekable_open,
69 .read = fuse_conn_waiting_read,
70};
71
72static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
73 struct fuse_conn *fc,
74 const char *name,
75 int mode, int nlink,
76 struct inode_operations *iop,
77 const struct file_operations *fop)
78{
79 struct dentry *dentry;
80 struct inode *inode;
81
82 BUG_ON(fc->ctl_ndents >= FUSE_CTL_NUM_DENTRIES);
83 dentry = d_alloc_name(parent, name);
84 if (!dentry)
85 return NULL;
86
87 fc->ctl_dentry[fc->ctl_ndents++] = dentry;
88 inode = new_inode(fuse_control_sb);
89 if (!inode)
90 return NULL;
91
92 inode->i_mode = mode;
93 inode->i_uid = fc->user_id;
94 inode->i_gid = fc->group_id;
95 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
96 /* setting ->i_op to NULL is not allowed */
97 if (iop)
98 inode->i_op = iop;
99 inode->i_fop = fop;
100 inode->i_nlink = nlink;
101 inode->u.generic_ip = fc;
102 d_add(dentry, inode);
103 return dentry;
104}
105
106/*
107 * Add a connection to the control filesystem (if it exists). Caller
108 * must host fuse_mutex
109 */
110int fuse_ctl_add_conn(struct fuse_conn *fc)
111{
112 struct dentry *parent;
113 char name[32];
114
115 if (!fuse_control_sb)
116 return 0;
117
118 parent = fuse_control_sb->s_root;
119 parent->d_inode->i_nlink++;
120 sprintf(name, "%llu", (unsigned long long) fc->id);
121 parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2,
122 &simple_dir_inode_operations,
123 &simple_dir_operations);
124 if (!parent)
125 goto err;
126
127 if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, 1,
128 NULL, &fuse_ctl_waiting_ops) ||
129 !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, 1,
130 NULL, &fuse_ctl_abort_ops))
131 goto err;
132
133 return 0;
134
135 err:
136 fuse_ctl_remove_conn(fc);
137 return -ENOMEM;
138}
139
140/*
141 * Remove a connection from the control filesystem (if it exists).
142 * Caller must host fuse_mutex
143 */
144void fuse_ctl_remove_conn(struct fuse_conn *fc)
145{
146 int i;
147
148 if (!fuse_control_sb)
149 return;
150
151 for (i = fc->ctl_ndents - 1; i >= 0; i--) {
152 struct dentry *dentry = fc->ctl_dentry[i];
153 dentry->d_inode->u.generic_ip = NULL;
154 d_drop(dentry);
155 dput(dentry);
156 }
157 fuse_control_sb->s_root->d_inode->i_nlink--;
158}
159
160static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent)
161{
162 struct tree_descr empty_descr = {""};
163 struct fuse_conn *fc;
164 int err;
165
166 err = simple_fill_super(sb, FUSE_CTL_SUPER_MAGIC, &empty_descr);
167 if (err)
168 return err;
169
170 mutex_lock(&fuse_mutex);
171 BUG_ON(fuse_control_sb);
172 fuse_control_sb = sb;
173 list_for_each_entry(fc, &fuse_conn_list, entry) {
174 err = fuse_ctl_add_conn(fc);
175 if (err) {
176 fuse_control_sb = NULL;
177 mutex_unlock(&fuse_mutex);
178 return err;
179 }
180 }
181 mutex_unlock(&fuse_mutex);
182
183 return 0;
184}
185
186static int fuse_ctl_get_sb(struct file_system_type *fs_type, int flags,
187 const char *dev_name, void *raw_data,
188 struct vfsmount *mnt)
189{
190 return get_sb_single(fs_type, flags, raw_data,
191 fuse_ctl_fill_super, mnt);
192}
193
194static void fuse_ctl_kill_sb(struct super_block *sb)
195{
196 mutex_lock(&fuse_mutex);
197 fuse_control_sb = NULL;
198 mutex_unlock(&fuse_mutex);
199
200 kill_litter_super(sb);
201}
202
203static struct file_system_type fuse_ctl_fs_type = {
204 .owner = THIS_MODULE,
205 .name = "fusectl",
206 .get_sb = fuse_ctl_get_sb,
207 .kill_sb = fuse_ctl_kill_sb,
208};
209
210int __init fuse_ctl_init(void)
211{
212 return register_filesystem(&fuse_ctl_fs_type);
213}
214
215void fuse_ctl_cleanup(void)
216{
217 unregister_filesystem(&fuse_ctl_fs_type);
218}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 104a62dadb94..1e2006caf158 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -34,6 +34,7 @@ static void fuse_request_init(struct fuse_req *req)
34{ 34{
35 memset(req, 0, sizeof(*req)); 35 memset(req, 0, sizeof(*req));
36 INIT_LIST_HEAD(&req->list); 36 INIT_LIST_HEAD(&req->list);
37 INIT_LIST_HEAD(&req->intr_entry);
37 init_waitqueue_head(&req->waitq); 38 init_waitqueue_head(&req->waitq);
38 atomic_set(&req->count, 1); 39 atomic_set(&req->count, 1);
39} 40}
@@ -64,18 +65,6 @@ static void restore_sigs(sigset_t *oldset)
64 sigprocmask(SIG_SETMASK, oldset, NULL); 65 sigprocmask(SIG_SETMASK, oldset, NULL);
65} 66}
66 67
67/*
68 * Reset request, so that it can be reused
69 *
70 * The caller must be _very_ careful to make sure, that it is holding
71 * the only reference to req
72 */
73void fuse_reset_request(struct fuse_req *req)
74{
75 BUG_ON(atomic_read(&req->count) != 1);
76 fuse_request_init(req);
77}
78
79static void __fuse_get_request(struct fuse_req *req) 68static void __fuse_get_request(struct fuse_req *req)
80{ 69{
81 atomic_inc(&req->count); 70 atomic_inc(&req->count);
@@ -88,6 +77,13 @@ static void __fuse_put_request(struct fuse_req *req)
88 atomic_dec(&req->count); 77 atomic_dec(&req->count);
89} 78}
90 79
80static void fuse_req_init_context(struct fuse_req *req)
81{
82 req->in.h.uid = current->fsuid;
83 req->in.h.gid = current->fsgid;
84 req->in.h.pid = current->pid;
85}
86
91struct fuse_req *fuse_get_req(struct fuse_conn *fc) 87struct fuse_req *fuse_get_req(struct fuse_conn *fc)
92{ 88{
93 struct fuse_req *req; 89 struct fuse_req *req;
@@ -103,14 +99,16 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
103 if (intr) 99 if (intr)
104 goto out; 100 goto out;
105 101
102 err = -ENOTCONN;
103 if (!fc->connected)
104 goto out;
105
106 req = fuse_request_alloc(); 106 req = fuse_request_alloc();
107 err = -ENOMEM; 107 err = -ENOMEM;
108 if (!req) 108 if (!req)
109 goto out; 109 goto out;
110 110
111 req->in.h.uid = current->fsuid; 111 fuse_req_init_context(req);
112 req->in.h.gid = current->fsgid;
113 req->in.h.pid = current->pid;
114 req->waiting = 1; 112 req->waiting = 1;
115 return req; 113 return req;
116 114
@@ -119,142 +117,183 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
119 return ERR_PTR(err); 117 return ERR_PTR(err);
120} 118}
121 119
122void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) 120/*
121 * Return request in fuse_file->reserved_req. However that may
122 * currently be in use. If that is the case, wait for it to become
123 * available.
124 */
125static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
126 struct file *file)
123{ 127{
124 if (atomic_dec_and_test(&req->count)) { 128 struct fuse_req *req = NULL;
125 if (req->waiting) 129 struct fuse_file *ff = file->private_data;
126 atomic_dec(&fc->num_waiting); 130
127 fuse_request_free(req); 131 do {
128 } 132 wait_event(fc->blocked_waitq, ff->reserved_req);
133 spin_lock(&fc->lock);
134 if (ff->reserved_req) {
135 req = ff->reserved_req;
136 ff->reserved_req = NULL;
137 get_file(file);
138 req->stolen_file = file;
139 }
140 spin_unlock(&fc->lock);
141 } while (!req);
142
143 return req;
129} 144}
130 145
131/* 146/*
132 * Called with sbput_sem held for read (request_end) or write 147 * Put stolen request back into fuse_file->reserved_req
133 * (fuse_put_super). By the time fuse_put_super() is finished, all
134 * inodes belonging to background requests must be released, so the
135 * iputs have to be done within the locked region.
136 */ 148 */
137void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req) 149static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
138{ 150{
139 iput(req->inode); 151 struct file *file = req->stolen_file;
140 iput(req->inode2); 152 struct fuse_file *ff = file->private_data;
153
141 spin_lock(&fc->lock); 154 spin_lock(&fc->lock);
142 list_del(&req->bg_entry); 155 fuse_request_init(req);
143 if (fc->num_background == FUSE_MAX_BACKGROUND) { 156 BUG_ON(ff->reserved_req);
144 fc->blocked = 0; 157 ff->reserved_req = req;
145 wake_up_all(&fc->blocked_waitq); 158 wake_up(&fc->blocked_waitq);
146 }
147 fc->num_background--;
148 spin_unlock(&fc->lock); 159 spin_unlock(&fc->lock);
160 fput(file);
149} 161}
150 162
151/* 163/*
152 * This function is called when a request is finished. Either a reply 164 * Gets a requests for a file operation, always succeeds
153 * has arrived or it was interrupted (and not yet sent) or some error
154 * occurred during communication with userspace, or the device file
155 * was closed. In case of a background request the reference to the
156 * stored objects are released. The requester thread is woken up (if
157 * still waiting), the 'end' callback is called if given, else the
158 * reference to the request is released
159 * 165 *
160 * Releasing extra reference for foreground requests must be done 166 * This is used for sending the FLUSH request, which must get to
161 * within the same locked region as setting state to finished. This 167 * userspace, due to POSIX locks which may need to be unlocked.
162 * is because fuse_reset_request() may be called after request is
163 * finished and it must be the sole possessor. If request is
164 * interrupted and put in the background, it will return with an error
165 * and hence never be reset and reused.
166 * 168 *
167 * Called with fc->lock, unlocks it 169 * If allocation fails due to OOM, use the reserved request in
170 * fuse_file.
171 *
172 * This is very unlikely to deadlock accidentally, since the
173 * filesystem should not have it's own file open. If deadlock is
174 * intentional, it can still be broken by "aborting" the filesystem.
168 */ 175 */
169static void request_end(struct fuse_conn *fc, struct fuse_req *req) 176struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file)
170{ 177{
171 list_del(&req->list); 178 struct fuse_req *req;
172 req->state = FUSE_REQ_FINISHED;
173 if (!req->background) {
174 spin_unlock(&fc->lock);
175 wake_up(&req->waitq);
176 fuse_put_request(fc, req);
177 } else {
178 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
179 req->end = NULL;
180 spin_unlock(&fc->lock);
181 down_read(&fc->sbput_sem);
182 if (fc->mounted)
183 fuse_release_background(fc, req);
184 up_read(&fc->sbput_sem);
185 179
186 /* fput must go outside sbput_sem, otherwise it can deadlock */ 180 atomic_inc(&fc->num_waiting);
187 if (req->file) 181 wait_event(fc->blocked_waitq, !fc->blocked);
188 fput(req->file); 182 req = fuse_request_alloc();
183 if (!req)
184 req = get_reserved_req(fc, file);
189 185
190 if (end) 186 fuse_req_init_context(req);
191 end(fc, req); 187 req->waiting = 1;
188 return req;
189}
190
191void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
192{
193 if (atomic_dec_and_test(&req->count)) {
194 if (req->waiting)
195 atomic_dec(&fc->num_waiting);
196
197 if (req->stolen_file)
198 put_reserved_req(fc, req);
192 else 199 else
193 fuse_put_request(fc, req); 200 fuse_request_free(req);
194 } 201 }
195} 202}
196 203
197/* 204/*
198 * Unfortunately request interruption not just solves the deadlock 205 * This function is called when a request is finished. Either a reply
199 * problem, it causes problems too. These stem from the fact, that an 206 * has arrived or it was aborted (and not yet sent) or some error
200 * interrupted request is continued to be processed in userspace, 207 * occurred during communication with userspace, or the device file
201 * while all the locks and object references (inode and file) held 208 * was closed. The requester thread is woken up (if still waiting),
202 * during the operation are released. 209 * the 'end' callback is called if given, else the reference to the
203 * 210 * request is released
204 * To release the locks is exactly why there's a need to interrupt the
205 * request, so there's not a lot that can be done about this, except
206 * introduce additional locking in userspace.
207 *
208 * More important is to keep inode and file references until userspace
209 * has replied, otherwise FORGET and RELEASE could be sent while the
210 * inode/file is still used by the filesystem.
211 *
212 * For this reason the concept of "background" request is introduced.
213 * An interrupted request is backgrounded if it has been already sent
214 * to userspace. Backgrounding involves getting an extra reference to
215 * inode(s) or file used in the request, and adding the request to
216 * fc->background list. When a reply is received for a background
217 * request, the object references are released, and the request is
218 * removed from the list. If the filesystem is unmounted while there
219 * are still background requests, the list is walked and references
220 * are released as if a reply was received.
221 * 211 *
222 * There's one more use for a background request. The RELEASE message is 212 * Called with fc->lock, unlocks it
223 * always sent as background, since it doesn't return an error or
224 * data.
225 */ 213 */
226static void background_request(struct fuse_conn *fc, struct fuse_req *req) 214static void request_end(struct fuse_conn *fc, struct fuse_req *req)
227{ 215{
228 req->background = 1; 216 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
229 list_add(&req->bg_entry, &fc->background); 217 req->end = NULL;
230 fc->num_background++; 218 list_del(&req->list);
231 if (fc->num_background == FUSE_MAX_BACKGROUND) 219 list_del(&req->intr_entry);
232 fc->blocked = 1; 220 req->state = FUSE_REQ_FINISHED;
233 if (req->inode) 221 if (req->background) {
234 req->inode = igrab(req->inode); 222 if (fc->num_background == FUSE_MAX_BACKGROUND) {
235 if (req->inode2) 223 fc->blocked = 0;
236 req->inode2 = igrab(req->inode2); 224 wake_up_all(&fc->blocked_waitq);
225 }
226 fc->num_background--;
227 }
228 spin_unlock(&fc->lock);
229 dput(req->dentry);
230 mntput(req->vfsmount);
237 if (req->file) 231 if (req->file)
238 get_file(req->file); 232 fput(req->file);
233 wake_up(&req->waitq);
234 if (end)
235 end(fc, req);
236 else
237 fuse_put_request(fc, req);
239} 238}
240 239
241/* Called with fc->lock held. Releases, and then reacquires it. */ 240static void wait_answer_interruptible(struct fuse_conn *fc,
242static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) 241 struct fuse_req *req)
243{ 242{
244 sigset_t oldset; 243 if (signal_pending(current))
244 return;
245 245
246 spin_unlock(&fc->lock); 246 spin_unlock(&fc->lock);
247 block_sigs(&oldset);
248 wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED); 247 wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
249 restore_sigs(&oldset);
250 spin_lock(&fc->lock); 248 spin_lock(&fc->lock);
251 if (req->state == FUSE_REQ_FINISHED && !req->interrupted) 249}
252 return; 250
251static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
252{
253 list_add_tail(&req->intr_entry, &fc->interrupts);
254 wake_up(&fc->waitq);
255 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
256}
257
258/* Called with fc->lock held. Releases, and then reacquires it. */
259static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
260{
261 if (!fc->no_interrupt) {
262 /* Any signal may interrupt this */
263 wait_answer_interruptible(fc, req);
264
265 if (req->aborted)
266 goto aborted;
267 if (req->state == FUSE_REQ_FINISHED)
268 return;
253 269
254 if (!req->interrupted) {
255 req->out.h.error = -EINTR;
256 req->interrupted = 1; 270 req->interrupted = 1;
271 if (req->state == FUSE_REQ_SENT)
272 queue_interrupt(fc, req);
273 }
274
275 if (req->force) {
276 spin_unlock(&fc->lock);
277 wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
278 spin_lock(&fc->lock);
279 } else {
280 sigset_t oldset;
281
282 /* Only fatal signals may interrupt this */
283 block_sigs(&oldset);
284 wait_answer_interruptible(fc, req);
285 restore_sigs(&oldset);
257 } 286 }
287
288 if (req->aborted)
289 goto aborted;
290 if (req->state == FUSE_REQ_FINISHED)
291 return;
292
293 req->out.h.error = -EINTR;
294 req->aborted = 1;
295
296 aborted:
258 if (req->locked) { 297 if (req->locked) {
259 /* This is uninterruptible sleep, because data is 298 /* This is uninterruptible sleep, because data is
260 being copied to/from the buffers of req. During 299 being copied to/from the buffers of req. During
@@ -268,8 +307,11 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
268 if (req->state == FUSE_REQ_PENDING) { 307 if (req->state == FUSE_REQ_PENDING) {
269 list_del(&req->list); 308 list_del(&req->list);
270 __fuse_put_request(req); 309 __fuse_put_request(req);
271 } else if (req->state == FUSE_REQ_SENT) 310 } else if (req->state == FUSE_REQ_SENT) {
272 background_request(fc, req); 311 spin_unlock(&fc->lock);
312 wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
313 spin_lock(&fc->lock);
314 }
273} 315}
274 316
275static unsigned len_args(unsigned numargs, struct fuse_arg *args) 317static unsigned len_args(unsigned numargs, struct fuse_arg *args)
@@ -283,13 +325,19 @@ static unsigned len_args(unsigned numargs, struct fuse_arg *args)
283 return nbytes; 325 return nbytes;
284} 326}
285 327
328static u64 fuse_get_unique(struct fuse_conn *fc)
329 {
330 fc->reqctr++;
331 /* zero is special */
332 if (fc->reqctr == 0)
333 fc->reqctr = 1;
334
335 return fc->reqctr;
336}
337
286static void queue_request(struct fuse_conn *fc, struct fuse_req *req) 338static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
287{ 339{
288 fc->reqctr++; 340 req->in.h.unique = fuse_get_unique(fc);
289 /* zero is special */
290 if (fc->reqctr == 0)
291 fc->reqctr = 1;
292 req->in.h.unique = fc->reqctr;
293 req->in.h.len = sizeof(struct fuse_in_header) + 341 req->in.h.len = sizeof(struct fuse_in_header) +
294 len_args(req->in.numargs, (struct fuse_arg *) req->in.args); 342 len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
295 list_add_tail(&req->list, &fc->pending); 343 list_add_tail(&req->list, &fc->pending);
@@ -302,9 +350,6 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
302 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 350 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
303} 351}
304 352
305/*
306 * This can only be interrupted by a SIGKILL
307 */
308void request_send(struct fuse_conn *fc, struct fuse_req *req) 353void request_send(struct fuse_conn *fc, struct fuse_req *req)
309{ 354{
310 req->isreply = 1; 355 req->isreply = 1;
@@ -327,8 +372,12 @@ void request_send(struct fuse_conn *fc, struct fuse_req *req)
327static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) 372static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
328{ 373{
329 spin_lock(&fc->lock); 374 spin_lock(&fc->lock);
330 background_request(fc, req);
331 if (fc->connected) { 375 if (fc->connected) {
376 req->background = 1;
377 fc->num_background++;
378 if (fc->num_background == FUSE_MAX_BACKGROUND)
379 fc->blocked = 1;
380
332 queue_request(fc, req); 381 queue_request(fc, req);
333 spin_unlock(&fc->lock); 382 spin_unlock(&fc->lock);
334 } else { 383 } else {
@@ -352,14 +401,14 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
352/* 401/*
353 * Lock the request. Up to the next unlock_request() there mustn't be 402 * Lock the request. Up to the next unlock_request() there mustn't be
354 * anything that could cause a page-fault. If the request was already 403 * anything that could cause a page-fault. If the request was already
355 * interrupted bail out. 404 * aborted bail out.
356 */ 405 */
357static int lock_request(struct fuse_conn *fc, struct fuse_req *req) 406static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
358{ 407{
359 int err = 0; 408 int err = 0;
360 if (req) { 409 if (req) {
361 spin_lock(&fc->lock); 410 spin_lock(&fc->lock);
362 if (req->interrupted) 411 if (req->aborted)
363 err = -ENOENT; 412 err = -ENOENT;
364 else 413 else
365 req->locked = 1; 414 req->locked = 1;
@@ -369,7 +418,7 @@ static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
369} 418}
370 419
371/* 420/*
372 * Unlock request. If it was interrupted during being locked, the 421 * Unlock request. If it was aborted during being locked, the
373 * requester thread is currently waiting for it to be unlocked, so 422 * requester thread is currently waiting for it to be unlocked, so
374 * wake it up. 423 * wake it up.
375 */ 424 */
@@ -378,7 +427,7 @@ static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
378 if (req) { 427 if (req) {
379 spin_lock(&fc->lock); 428 spin_lock(&fc->lock);
380 req->locked = 0; 429 req->locked = 0;
381 if (req->interrupted) 430 if (req->aborted)
382 wake_up(&req->waitq); 431 wake_up(&req->waitq);
383 spin_unlock(&fc->lock); 432 spin_unlock(&fc->lock);
384 } 433 }
@@ -557,13 +606,18 @@ static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
557 return err; 606 return err;
558} 607}
559 608
609static int request_pending(struct fuse_conn *fc)
610{
611 return !list_empty(&fc->pending) || !list_empty(&fc->interrupts);
612}
613
560/* Wait until a request is available on the pending list */ 614/* Wait until a request is available on the pending list */
561static void request_wait(struct fuse_conn *fc) 615static void request_wait(struct fuse_conn *fc)
562{ 616{
563 DECLARE_WAITQUEUE(wait, current); 617 DECLARE_WAITQUEUE(wait, current);
564 618
565 add_wait_queue_exclusive(&fc->waitq, &wait); 619 add_wait_queue_exclusive(&fc->waitq, &wait);
566 while (fc->connected && list_empty(&fc->pending)) { 620 while (fc->connected && !request_pending(fc)) {
567 set_current_state(TASK_INTERRUPTIBLE); 621 set_current_state(TASK_INTERRUPTIBLE);
568 if (signal_pending(current)) 622 if (signal_pending(current))
569 break; 623 break;
@@ -577,11 +631,50 @@ static void request_wait(struct fuse_conn *fc)
577} 631}
578 632
579/* 633/*
634 * Transfer an interrupt request to userspace
635 *
636 * Unlike other requests this is assembled on demand, without a need
637 * to allocate a separate fuse_req structure.
638 *
639 * Called with fc->lock held, releases it
640 */
641static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req,
642 const struct iovec *iov, unsigned long nr_segs)
643{
644 struct fuse_copy_state cs;
645 struct fuse_in_header ih;
646 struct fuse_interrupt_in arg;
647 unsigned reqsize = sizeof(ih) + sizeof(arg);
648 int err;
649
650 list_del_init(&req->intr_entry);
651 req->intr_unique = fuse_get_unique(fc);
652 memset(&ih, 0, sizeof(ih));
653 memset(&arg, 0, sizeof(arg));
654 ih.len = reqsize;
655 ih.opcode = FUSE_INTERRUPT;
656 ih.unique = req->intr_unique;
657 arg.unique = req->in.h.unique;
658
659 spin_unlock(&fc->lock);
660 if (iov_length(iov, nr_segs) < reqsize)
661 return -EINVAL;
662
663 fuse_copy_init(&cs, fc, 1, NULL, iov, nr_segs);
664 err = fuse_copy_one(&cs, &ih, sizeof(ih));
665 if (!err)
666 err = fuse_copy_one(&cs, &arg, sizeof(arg));
667 fuse_copy_finish(&cs);
668
669 return err ? err : reqsize;
670}
671
672/*
580 * Read a single request into the userspace filesystem's buffer. This 673 * Read a single request into the userspace filesystem's buffer. This
581 * function waits until a request is available, then removes it from 674 * function waits until a request is available, then removes it from
582 * the pending list and copies request data to userspace buffer. If 675 * the pending list and copies request data to userspace buffer. If
583 * no reply is needed (FORGET) or request has been interrupted or 676 * no reply is needed (FORGET) or request has been aborted or there
584 * there was an error during the copying then it's finished by calling 677 * was an error during the copying then it's finished by calling
585 * request_end(). Otherwise add it to the processing list, and set 678 * request_end(). Otherwise add it to the processing list, and set
586 * the 'sent' flag. 679 * the 'sent' flag.
587 */ 680 */
@@ -601,7 +694,7 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
601 spin_lock(&fc->lock); 694 spin_lock(&fc->lock);
602 err = -EAGAIN; 695 err = -EAGAIN;
603 if ((file->f_flags & O_NONBLOCK) && fc->connected && 696 if ((file->f_flags & O_NONBLOCK) && fc->connected &&
604 list_empty(&fc->pending)) 697 !request_pending(fc))
605 goto err_unlock; 698 goto err_unlock;
606 699
607 request_wait(fc); 700 request_wait(fc);
@@ -609,9 +702,15 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
609 if (!fc->connected) 702 if (!fc->connected)
610 goto err_unlock; 703 goto err_unlock;
611 err = -ERESTARTSYS; 704 err = -ERESTARTSYS;
612 if (list_empty(&fc->pending)) 705 if (!request_pending(fc))
613 goto err_unlock; 706 goto err_unlock;
614 707
708 if (!list_empty(&fc->interrupts)) {
709 req = list_entry(fc->interrupts.next, struct fuse_req,
710 intr_entry);
711 return fuse_read_interrupt(fc, req, iov, nr_segs);
712 }
713
615 req = list_entry(fc->pending.next, struct fuse_req, list); 714 req = list_entry(fc->pending.next, struct fuse_req, list);
616 req->state = FUSE_REQ_READING; 715 req->state = FUSE_REQ_READING;
617 list_move(&req->list, &fc->io); 716 list_move(&req->list, &fc->io);
@@ -636,10 +735,10 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
636 fuse_copy_finish(&cs); 735 fuse_copy_finish(&cs);
637 spin_lock(&fc->lock); 736 spin_lock(&fc->lock);
638 req->locked = 0; 737 req->locked = 0;
639 if (!err && req->interrupted) 738 if (!err && req->aborted)
640 err = -ENOENT; 739 err = -ENOENT;
641 if (err) { 740 if (err) {
642 if (!req->interrupted) 741 if (!req->aborted)
643 req->out.h.error = -EIO; 742 req->out.h.error = -EIO;
644 request_end(fc, req); 743 request_end(fc, req);
645 return err; 744 return err;
@@ -649,6 +748,8 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
649 else { 748 else {
650 req->state = FUSE_REQ_SENT; 749 req->state = FUSE_REQ_SENT;
651 list_move_tail(&req->list, &fc->processing); 750 list_move_tail(&req->list, &fc->processing);
751 if (req->interrupted)
752 queue_interrupt(fc, req);
652 spin_unlock(&fc->lock); 753 spin_unlock(&fc->lock);
653 } 754 }
654 return reqsize; 755 return reqsize;
@@ -675,7 +776,7 @@ static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
675 list_for_each(entry, &fc->processing) { 776 list_for_each(entry, &fc->processing) {
676 struct fuse_req *req; 777 struct fuse_req *req;
677 req = list_entry(entry, struct fuse_req, list); 778 req = list_entry(entry, struct fuse_req, list);
678 if (req->in.h.unique == unique) 779 if (req->in.h.unique == unique || req->intr_unique == unique)
679 return req; 780 return req;
680 } 781 }
681 return NULL; 782 return NULL;
@@ -741,17 +842,33 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
741 goto err_unlock; 842 goto err_unlock;
742 843
743 req = request_find(fc, oh.unique); 844 req = request_find(fc, oh.unique);
744 err = -EINVAL;
745 if (!req) 845 if (!req)
746 goto err_unlock; 846 goto err_unlock;
747 847
748 if (req->interrupted) { 848 if (req->aborted) {
749 spin_unlock(&fc->lock); 849 spin_unlock(&fc->lock);
750 fuse_copy_finish(&cs); 850 fuse_copy_finish(&cs);
751 spin_lock(&fc->lock); 851 spin_lock(&fc->lock);
752 request_end(fc, req); 852 request_end(fc, req);
753 return -ENOENT; 853 return -ENOENT;
754 } 854 }
855 /* Is it an interrupt reply? */
856 if (req->intr_unique == oh.unique) {
857 err = -EINVAL;
858 if (nbytes != sizeof(struct fuse_out_header))
859 goto err_unlock;
860
861 if (oh.error == -ENOSYS)
862 fc->no_interrupt = 1;
863 else if (oh.error == -EAGAIN)
864 queue_interrupt(fc, req);
865
866 spin_unlock(&fc->lock);
867 fuse_copy_finish(&cs);
868 return nbytes;
869 }
870
871 req->state = FUSE_REQ_WRITING;
755 list_move(&req->list, &fc->io); 872 list_move(&req->list, &fc->io);
756 req->out.h = oh; 873 req->out.h = oh;
757 req->locked = 1; 874 req->locked = 1;
@@ -764,9 +881,9 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
764 spin_lock(&fc->lock); 881 spin_lock(&fc->lock);
765 req->locked = 0; 882 req->locked = 0;
766 if (!err) { 883 if (!err) {
767 if (req->interrupted) 884 if (req->aborted)
768 err = -ENOENT; 885 err = -ENOENT;
769 } else if (!req->interrupted) 886 } else if (!req->aborted)
770 req->out.h.error = -EIO; 887 req->out.h.error = -EIO;
771 request_end(fc, req); 888 request_end(fc, req);
772 889
@@ -800,7 +917,7 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
800 spin_lock(&fc->lock); 917 spin_lock(&fc->lock);
801 if (!fc->connected) 918 if (!fc->connected)
802 mask = POLLERR; 919 mask = POLLERR;
803 else if (!list_empty(&fc->pending)) 920 else if (request_pending(fc))
804 mask |= POLLIN | POLLRDNORM; 921 mask |= POLLIN | POLLRDNORM;
805 spin_unlock(&fc->lock); 922 spin_unlock(&fc->lock);
806 923
@@ -826,7 +943,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
826/* 943/*
827 * Abort requests under I/O 944 * Abort requests under I/O
828 * 945 *
829 * The requests are set to interrupted and finished, and the request 946 * The requests are set to aborted and finished, and the request
830 * waiter is woken up. This will make request_wait_answer() wait 947 * waiter is woken up. This will make request_wait_answer() wait
831 * until the request is unlocked and then return. 948 * until the request is unlocked and then return.
832 * 949 *
@@ -841,7 +958,7 @@ static void end_io_requests(struct fuse_conn *fc)
841 list_entry(fc->io.next, struct fuse_req, list); 958 list_entry(fc->io.next, struct fuse_req, list);
842 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; 959 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
843 960
844 req->interrupted = 1; 961 req->aborted = 1;
845 req->out.h.error = -ECONNABORTED; 962 req->out.h.error = -ECONNABORTED;
846 req->state = FUSE_REQ_FINISHED; 963 req->state = FUSE_REQ_FINISHED;
847 list_del_init(&req->list); 964 list_del_init(&req->list);
@@ -874,19 +991,20 @@ static void end_io_requests(struct fuse_conn *fc)
874 * onto the pending list is prevented by req->connected being false. 991 * onto the pending list is prevented by req->connected being false.
875 * 992 *
876 * Progression of requests under I/O to the processing list is 993 * Progression of requests under I/O to the processing list is
877 * prevented by the req->interrupted flag being true for these 994 * prevented by the req->aborted flag being true for these requests.
878 * requests. For this reason requests on the io list must be aborted 995 * For this reason requests on the io list must be aborted first.
879 * first.
880 */ 996 */
881void fuse_abort_conn(struct fuse_conn *fc) 997void fuse_abort_conn(struct fuse_conn *fc)
882{ 998{
883 spin_lock(&fc->lock); 999 spin_lock(&fc->lock);
884 if (fc->connected) { 1000 if (fc->connected) {
885 fc->connected = 0; 1001 fc->connected = 0;
1002 fc->blocked = 0;
886 end_io_requests(fc); 1003 end_io_requests(fc);
887 end_requests(fc, &fc->pending); 1004 end_requests(fc, &fc->pending);
888 end_requests(fc, &fc->processing); 1005 end_requests(fc, &fc->processing);
889 wake_up_all(&fc->waitq); 1006 wake_up_all(&fc->waitq);
1007 wake_up_all(&fc->blocked_waitq);
890 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 1008 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
891 } 1009 }
892 spin_unlock(&fc->lock); 1010 spin_unlock(&fc->lock);
@@ -902,7 +1020,7 @@ static int fuse_dev_release(struct inode *inode, struct file *file)
902 end_requests(fc, &fc->processing); 1020 end_requests(fc, &fc->processing);
903 spin_unlock(&fc->lock); 1021 spin_unlock(&fc->lock);
904 fasync_helper(-1, file, 0, &fc->fasync); 1022 fasync_helper(-1, file, 0, &fc->fasync);
905 kobject_put(&fc->kobj); 1023 fuse_conn_put(fc);
906 } 1024 }
907 1025
908 return 0; 1026 return 0;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 8d7546e832e8..72a74cde6de8 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -79,7 +79,6 @@ static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
79{ 79{
80 req->in.h.opcode = FUSE_LOOKUP; 80 req->in.h.opcode = FUSE_LOOKUP;
81 req->in.h.nodeid = get_node_id(dir); 81 req->in.h.nodeid = get_node_id(dir);
82 req->inode = dir;
83 req->in.numargs = 1; 82 req->in.numargs = 1;
84 req->in.args[0].size = entry->d_name.len + 1; 83 req->in.args[0].size = entry->d_name.len + 1;
85 req->in.args[0].value = entry->d_name.name; 84 req->in.args[0].value = entry->d_name.name;
@@ -225,6 +224,20 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
225} 224}
226 225
227/* 226/*
227 * Synchronous release for the case when something goes wrong in CREATE_OPEN
228 */
229static void fuse_sync_release(struct fuse_conn *fc, struct fuse_file *ff,
230 u64 nodeid, int flags)
231{
232 struct fuse_req *req;
233
234 req = fuse_release_fill(ff, nodeid, flags, FUSE_RELEASE);
235 req->force = 1;
236 request_send(fc, req);
237 fuse_put_request(fc, req);
238}
239
240/*
228 * Atomic create+open operation 241 * Atomic create+open operation
229 * 242 *
230 * If the filesystem doesn't support this, then fall back to separate 243 * If the filesystem doesn't support this, then fall back to separate
@@ -237,6 +250,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
237 struct inode *inode; 250 struct inode *inode;
238 struct fuse_conn *fc = get_fuse_conn(dir); 251 struct fuse_conn *fc = get_fuse_conn(dir);
239 struct fuse_req *req; 252 struct fuse_req *req;
253 struct fuse_req *forget_req;
240 struct fuse_open_in inarg; 254 struct fuse_open_in inarg;
241 struct fuse_open_out outopen; 255 struct fuse_open_out outopen;
242 struct fuse_entry_out outentry; 256 struct fuse_entry_out outentry;
@@ -247,9 +261,14 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
247 if (fc->no_create) 261 if (fc->no_create)
248 return -ENOSYS; 262 return -ENOSYS;
249 263
264 forget_req = fuse_get_req(fc);
265 if (IS_ERR(forget_req))
266 return PTR_ERR(forget_req);
267
250 req = fuse_get_req(fc); 268 req = fuse_get_req(fc);
269 err = PTR_ERR(req);
251 if (IS_ERR(req)) 270 if (IS_ERR(req))
252 return PTR_ERR(req); 271 goto out_put_forget_req;
253 272
254 err = -ENOMEM; 273 err = -ENOMEM;
255 ff = fuse_file_alloc(); 274 ff = fuse_file_alloc();
@@ -262,7 +281,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
262 inarg.mode = mode; 281 inarg.mode = mode;
263 req->in.h.opcode = FUSE_CREATE; 282 req->in.h.opcode = FUSE_CREATE;
264 req->in.h.nodeid = get_node_id(dir); 283 req->in.h.nodeid = get_node_id(dir);
265 req->inode = dir;
266 req->in.numargs = 2; 284 req->in.numargs = 2;
267 req->in.args[0].size = sizeof(inarg); 285 req->in.args[0].size = sizeof(inarg);
268 req->in.args[0].value = &inarg; 286 req->in.args[0].value = &inarg;
@@ -285,25 +303,23 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
285 if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid)) 303 if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
286 goto out_free_ff; 304 goto out_free_ff;
287 305
306 fuse_put_request(fc, req);
288 inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, 307 inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
289 &outentry.attr); 308 &outentry.attr);
290 err = -ENOMEM;
291 if (!inode) { 309 if (!inode) {
292 flags &= ~(O_CREAT | O_EXCL | O_TRUNC); 310 flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
293 ff->fh = outopen.fh; 311 ff->fh = outopen.fh;
294 /* Special release, with inode = NULL, this will 312 fuse_sync_release(fc, ff, outentry.nodeid, flags);
295 trigger a 'forget' request when the release is 313 fuse_send_forget(fc, forget_req, outentry.nodeid, 1);
296 complete */ 314 return -ENOMEM;
297 fuse_send_release(fc, ff, outentry.nodeid, NULL, flags, 0);
298 goto out_put_request;
299 } 315 }
300 fuse_put_request(fc, req); 316 fuse_put_request(fc, forget_req);
301 d_instantiate(entry, inode); 317 d_instantiate(entry, inode);
302 fuse_change_timeout(entry, &outentry); 318 fuse_change_timeout(entry, &outentry);
303 file = lookup_instantiate_filp(nd, entry, generic_file_open); 319 file = lookup_instantiate_filp(nd, entry, generic_file_open);
304 if (IS_ERR(file)) { 320 if (IS_ERR(file)) {
305 ff->fh = outopen.fh; 321 ff->fh = outopen.fh;
306 fuse_send_release(fc, ff, outentry.nodeid, inode, flags, 0); 322 fuse_sync_release(fc, ff, outentry.nodeid, flags);
307 return PTR_ERR(file); 323 return PTR_ERR(file);
308 } 324 }
309 fuse_finish_open(inode, file, ff, &outopen); 325 fuse_finish_open(inode, file, ff, &outopen);
@@ -313,6 +329,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
313 fuse_file_free(ff); 329 fuse_file_free(ff);
314 out_put_request: 330 out_put_request:
315 fuse_put_request(fc, req); 331 fuse_put_request(fc, req);
332 out_put_forget_req:
333 fuse_put_request(fc, forget_req);
316 return err; 334 return err;
317} 335}
318 336
@@ -328,7 +346,6 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
328 int err; 346 int err;
329 347
330 req->in.h.nodeid = get_node_id(dir); 348 req->in.h.nodeid = get_node_id(dir);
331 req->inode = dir;
332 req->out.numargs = 1; 349 req->out.numargs = 1;
333 req->out.args[0].size = sizeof(outarg); 350 req->out.args[0].size = sizeof(outarg);
334 req->out.args[0].value = &outarg; 351 req->out.args[0].value = &outarg;
@@ -448,7 +465,6 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
448 465
449 req->in.h.opcode = FUSE_UNLINK; 466 req->in.h.opcode = FUSE_UNLINK;
450 req->in.h.nodeid = get_node_id(dir); 467 req->in.h.nodeid = get_node_id(dir);
451 req->inode = dir;
452 req->in.numargs = 1; 468 req->in.numargs = 1;
453 req->in.args[0].size = entry->d_name.len + 1; 469 req->in.args[0].size = entry->d_name.len + 1;
454 req->in.args[0].value = entry->d_name.name; 470 req->in.args[0].value = entry->d_name.name;
@@ -480,7 +496,6 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
480 496
481 req->in.h.opcode = FUSE_RMDIR; 497 req->in.h.opcode = FUSE_RMDIR;
482 req->in.h.nodeid = get_node_id(dir); 498 req->in.h.nodeid = get_node_id(dir);
483 req->inode = dir;
484 req->in.numargs = 1; 499 req->in.numargs = 1;
485 req->in.args[0].size = entry->d_name.len + 1; 500 req->in.args[0].size = entry->d_name.len + 1;
486 req->in.args[0].value = entry->d_name.name; 501 req->in.args[0].value = entry->d_name.name;
@@ -510,8 +525,6 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
510 inarg.newdir = get_node_id(newdir); 525 inarg.newdir = get_node_id(newdir);
511 req->in.h.opcode = FUSE_RENAME; 526 req->in.h.opcode = FUSE_RENAME;
512 req->in.h.nodeid = get_node_id(olddir); 527 req->in.h.nodeid = get_node_id(olddir);
513 req->inode = olddir;
514 req->inode2 = newdir;
515 req->in.numargs = 3; 528 req->in.numargs = 3;
516 req->in.args[0].size = sizeof(inarg); 529 req->in.args[0].size = sizeof(inarg);
517 req->in.args[0].value = &inarg; 530 req->in.args[0].value = &inarg;
@@ -558,7 +571,6 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
558 memset(&inarg, 0, sizeof(inarg)); 571 memset(&inarg, 0, sizeof(inarg));
559 inarg.oldnodeid = get_node_id(inode); 572 inarg.oldnodeid = get_node_id(inode);
560 req->in.h.opcode = FUSE_LINK; 573 req->in.h.opcode = FUSE_LINK;
561 req->inode2 = inode;
562 req->in.numargs = 2; 574 req->in.numargs = 2;
563 req->in.args[0].size = sizeof(inarg); 575 req->in.args[0].size = sizeof(inarg);
564 req->in.args[0].value = &inarg; 576 req->in.args[0].value = &inarg;
@@ -587,7 +599,6 @@ int fuse_do_getattr(struct inode *inode)
587 599
588 req->in.h.opcode = FUSE_GETATTR; 600 req->in.h.opcode = FUSE_GETATTR;
589 req->in.h.nodeid = get_node_id(inode); 601 req->in.h.nodeid = get_node_id(inode);
590 req->inode = inode;
591 req->out.numargs = 1; 602 req->out.numargs = 1;
592 req->out.args[0].size = sizeof(arg); 603 req->out.args[0].size = sizeof(arg);
593 req->out.args[0].value = &arg; 604 req->out.args[0].value = &arg;
@@ -679,7 +690,6 @@ static int fuse_access(struct inode *inode, int mask)
679 inarg.mask = mask; 690 inarg.mask = mask;
680 req->in.h.opcode = FUSE_ACCESS; 691 req->in.h.opcode = FUSE_ACCESS;
681 req->in.h.nodeid = get_node_id(inode); 692 req->in.h.nodeid = get_node_id(inode);
682 req->inode = inode;
683 req->in.numargs = 1; 693 req->in.numargs = 1;
684 req->in.args[0].size = sizeof(inarg); 694 req->in.args[0].size = sizeof(inarg);
685 req->in.args[0].value = &inarg; 695 req->in.args[0].value = &inarg;
@@ -820,7 +830,6 @@ static char *read_link(struct dentry *dentry)
820 } 830 }
821 req->in.h.opcode = FUSE_READLINK; 831 req->in.h.opcode = FUSE_READLINK;
822 req->in.h.nodeid = get_node_id(inode); 832 req->in.h.nodeid = get_node_id(inode);
823 req->inode = inode;
824 req->out.argvar = 1; 833 req->out.argvar = 1;
825 req->out.numargs = 1; 834 req->out.numargs = 1;
826 req->out.args[0].size = PAGE_SIZE - 1; 835 req->out.args[0].size = PAGE_SIZE - 1;
@@ -939,7 +948,6 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
939 iattr_to_fattr(attr, &inarg); 948 iattr_to_fattr(attr, &inarg);
940 req->in.h.opcode = FUSE_SETATTR; 949 req->in.h.opcode = FUSE_SETATTR;
941 req->in.h.nodeid = get_node_id(inode); 950 req->in.h.nodeid = get_node_id(inode);
942 req->inode = inode;
943 req->in.numargs = 1; 951 req->in.numargs = 1;
944 req->in.args[0].size = sizeof(inarg); 952 req->in.args[0].size = sizeof(inarg);
945 req->in.args[0].value = &inarg; 953 req->in.args[0].value = &inarg;
@@ -1002,7 +1010,6 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
1002 inarg.flags = flags; 1010 inarg.flags = flags;
1003 req->in.h.opcode = FUSE_SETXATTR; 1011 req->in.h.opcode = FUSE_SETXATTR;
1004 req->in.h.nodeid = get_node_id(inode); 1012 req->in.h.nodeid = get_node_id(inode);
1005 req->inode = inode;
1006 req->in.numargs = 3; 1013 req->in.numargs = 3;
1007 req->in.args[0].size = sizeof(inarg); 1014 req->in.args[0].size = sizeof(inarg);
1008 req->in.args[0].value = &inarg; 1015 req->in.args[0].value = &inarg;
@@ -1041,7 +1048,6 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
1041 inarg.size = size; 1048 inarg.size = size;
1042 req->in.h.opcode = FUSE_GETXATTR; 1049 req->in.h.opcode = FUSE_GETXATTR;
1043 req->in.h.nodeid = get_node_id(inode); 1050 req->in.h.nodeid = get_node_id(inode);
1044 req->inode = inode;
1045 req->in.numargs = 2; 1051 req->in.numargs = 2;
1046 req->in.args[0].size = sizeof(inarg); 1052 req->in.args[0].size = sizeof(inarg);
1047 req->in.args[0].value = &inarg; 1053 req->in.args[0].value = &inarg;
@@ -1091,7 +1097,6 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
1091 inarg.size = size; 1097 inarg.size = size;
1092 req->in.h.opcode = FUSE_LISTXATTR; 1098 req->in.h.opcode = FUSE_LISTXATTR;
1093 req->in.h.nodeid = get_node_id(inode); 1099 req->in.h.nodeid = get_node_id(inode);
1094 req->inode = inode;
1095 req->in.numargs = 1; 1100 req->in.numargs = 1;
1096 req->in.args[0].size = sizeof(inarg); 1101 req->in.args[0].size = sizeof(inarg);
1097 req->in.args[0].value = &inarg; 1102 req->in.args[0].value = &inarg;
@@ -1135,7 +1140,6 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
1135 1140
1136 req->in.h.opcode = FUSE_REMOVEXATTR; 1141 req->in.h.opcode = FUSE_REMOVEXATTR;
1137 req->in.h.nodeid = get_node_id(inode); 1142 req->in.h.nodeid = get_node_id(inode);
1138 req->inode = inode;
1139 req->in.numargs = 1; 1143 req->in.numargs = 1;
1140 req->in.args[0].size = strlen(name) + 1; 1144 req->in.args[0].size = strlen(name) + 1;
1141 req->in.args[0].value = name; 1145 req->in.args[0].value = name;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index fc342cf7c2cc..63614ed16336 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -30,7 +30,6 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
30 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 30 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
31 req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; 31 req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
32 req->in.h.nodeid = get_node_id(inode); 32 req->in.h.nodeid = get_node_id(inode);
33 req->inode = inode;
34 req->in.numargs = 1; 33 req->in.numargs = 1;
35 req->in.args[0].size = sizeof(inarg); 34 req->in.args[0].size = sizeof(inarg);
36 req->in.args[0].value = &inarg; 35 req->in.args[0].value = &inarg;
@@ -49,8 +48,8 @@ struct fuse_file *fuse_file_alloc(void)
49 struct fuse_file *ff; 48 struct fuse_file *ff;
50 ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL); 49 ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
51 if (ff) { 50 if (ff) {
52 ff->release_req = fuse_request_alloc(); 51 ff->reserved_req = fuse_request_alloc();
53 if (!ff->release_req) { 52 if (!ff->reserved_req) {
54 kfree(ff); 53 kfree(ff);
55 ff = NULL; 54 ff = NULL;
56 } 55 }
@@ -60,7 +59,7 @@ struct fuse_file *fuse_file_alloc(void)
60 59
61void fuse_file_free(struct fuse_file *ff) 60void fuse_file_free(struct fuse_file *ff)
62{ 61{
63 fuse_request_free(ff->release_req); 62 fuse_request_free(ff->reserved_req);
64 kfree(ff); 63 kfree(ff);
65} 64}
66 65
@@ -113,37 +112,22 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir)
113 return err; 112 return err;
114} 113}
115 114
116/* Special case for failed iget in CREATE */ 115struct fuse_req *fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags,
117static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) 116 int opcode)
118{ 117{
119 /* If called from end_io_requests(), req has more than one 118 struct fuse_req *req = ff->reserved_req;
120 reference and fuse_reset_request() cannot work */
121 if (fc->connected) {
122 u64 nodeid = req->in.h.nodeid;
123 fuse_reset_request(req);
124 fuse_send_forget(fc, req, nodeid, 1);
125 } else
126 fuse_put_request(fc, req);
127}
128
129void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff,
130 u64 nodeid, struct inode *inode, int flags, int isdir)
131{
132 struct fuse_req * req = ff->release_req;
133 struct fuse_release_in *inarg = &req->misc.release_in; 119 struct fuse_release_in *inarg = &req->misc.release_in;
134 120
135 inarg->fh = ff->fh; 121 inarg->fh = ff->fh;
136 inarg->flags = flags; 122 inarg->flags = flags;
137 req->in.h.opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE; 123 req->in.h.opcode = opcode;
138 req->in.h.nodeid = nodeid; 124 req->in.h.nodeid = nodeid;
139 req->inode = inode;
140 req->in.numargs = 1; 125 req->in.numargs = 1;
141 req->in.args[0].size = sizeof(struct fuse_release_in); 126 req->in.args[0].size = sizeof(struct fuse_release_in);
142 req->in.args[0].value = inarg; 127 req->in.args[0].value = inarg;
143 request_send_background(fc, req);
144 if (!inode)
145 req->end = fuse_release_end;
146 kfree(ff); 128 kfree(ff);
129
130 return req;
147} 131}
148 132
149int fuse_release_common(struct inode *inode, struct file *file, int isdir) 133int fuse_release_common(struct inode *inode, struct file *file, int isdir)
@@ -151,8 +135,15 @@ int fuse_release_common(struct inode *inode, struct file *file, int isdir)
151 struct fuse_file *ff = file->private_data; 135 struct fuse_file *ff = file->private_data;
152 if (ff) { 136 if (ff) {
153 struct fuse_conn *fc = get_fuse_conn(inode); 137 struct fuse_conn *fc = get_fuse_conn(inode);
154 u64 nodeid = get_node_id(inode); 138 struct fuse_req *req;
155 fuse_send_release(fc, ff, nodeid, inode, file->f_flags, isdir); 139
140 req = fuse_release_fill(ff, get_node_id(inode), file->f_flags,
141 isdir ? FUSE_RELEASEDIR : FUSE_RELEASE);
142
143 /* Hold vfsmount and dentry until release is finished */
144 req->vfsmount = mntget(file->f_vfsmnt);
145 req->dentry = dget(file->f_dentry);
146 request_send_background(fc, req);
156 } 147 }
157 148
158 /* Return value is ignored by VFS */ 149 /* Return value is ignored by VFS */
@@ -169,7 +160,29 @@ static int fuse_release(struct inode *inode, struct file *file)
169 return fuse_release_common(inode, file, 0); 160 return fuse_release_common(inode, file, 0);
170} 161}
171 162
172static int fuse_flush(struct file *file) 163/*
164 * Scramble the ID space with XTEA, so that the value of the files_struct
165 * pointer is not exposed to userspace.
166 */
167static u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
168{
169 u32 *k = fc->scramble_key;
170 u64 v = (unsigned long) id;
171 u32 v0 = v;
172 u32 v1 = v >> 32;
173 u32 sum = 0;
174 int i;
175
176 for (i = 0; i < 32; i++) {
177 v0 += ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]);
178 sum += 0x9E3779B9;
179 v1 += ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]);
180 }
181
182 return (u64) v0 + ((u64) v1 << 32);
183}
184
185static int fuse_flush(struct file *file, fl_owner_t id)
173{ 186{
174 struct inode *inode = file->f_dentry->d_inode; 187 struct inode *inode = file->f_dentry->d_inode;
175 struct fuse_conn *fc = get_fuse_conn(inode); 188 struct fuse_conn *fc = get_fuse_conn(inode);
@@ -184,19 +197,16 @@ static int fuse_flush(struct file *file)
184 if (fc->no_flush) 197 if (fc->no_flush)
185 return 0; 198 return 0;
186 199
187 req = fuse_get_req(fc); 200 req = fuse_get_req_nofail(fc, file);
188 if (IS_ERR(req))
189 return PTR_ERR(req);
190
191 memset(&inarg, 0, sizeof(inarg)); 201 memset(&inarg, 0, sizeof(inarg));
192 inarg.fh = ff->fh; 202 inarg.fh = ff->fh;
203 inarg.lock_owner = fuse_lock_owner_id(fc, id);
193 req->in.h.opcode = FUSE_FLUSH; 204 req->in.h.opcode = FUSE_FLUSH;
194 req->in.h.nodeid = get_node_id(inode); 205 req->in.h.nodeid = get_node_id(inode);
195 req->inode = inode;
196 req->file = file;
197 req->in.numargs = 1; 206 req->in.numargs = 1;
198 req->in.args[0].size = sizeof(inarg); 207 req->in.args[0].size = sizeof(inarg);
199 req->in.args[0].value = &inarg; 208 req->in.args[0].value = &inarg;
209 req->force = 1;
200 request_send(fc, req); 210 request_send(fc, req);
201 err = req->out.h.error; 211 err = req->out.h.error;
202 fuse_put_request(fc, req); 212 fuse_put_request(fc, req);
@@ -232,8 +242,6 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
232 inarg.fsync_flags = datasync ? 1 : 0; 242 inarg.fsync_flags = datasync ? 1 : 0;
233 req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC; 243 req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
234 req->in.h.nodeid = get_node_id(inode); 244 req->in.h.nodeid = get_node_id(inode);
235 req->inode = inode;
236 req->file = file;
237 req->in.numargs = 1; 245 req->in.numargs = 1;
238 req->in.args[0].size = sizeof(inarg); 246 req->in.args[0].size = sizeof(inarg);
239 req->in.args[0].value = &inarg; 247 req->in.args[0].value = &inarg;
@@ -266,8 +274,6 @@ void fuse_read_fill(struct fuse_req *req, struct file *file,
266 inarg->size = count; 274 inarg->size = count;
267 req->in.h.opcode = opcode; 275 req->in.h.opcode = opcode;
268 req->in.h.nodeid = get_node_id(inode); 276 req->in.h.nodeid = get_node_id(inode);
269 req->inode = inode;
270 req->file = file;
271 req->in.numargs = 1; 277 req->in.numargs = 1;
272 req->in.args[0].size = sizeof(struct fuse_read_in); 278 req->in.args[0].size = sizeof(struct fuse_read_in);
273 req->in.args[0].value = inarg; 279 req->in.args[0].value = inarg;
@@ -342,6 +348,8 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file,
342 req->out.page_zeroing = 1; 348 req->out.page_zeroing = 1;
343 fuse_read_fill(req, file, inode, pos, count, FUSE_READ); 349 fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
344 if (fc->async_read) { 350 if (fc->async_read) {
351 get_file(file);
352 req->file = file;
345 req->end = fuse_readpages_end; 353 req->end = fuse_readpages_end;
346 request_send_background(fc, req); 354 request_send_background(fc, req);
347 } else { 355 } else {
@@ -420,8 +428,6 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file,
420 inarg.size = count; 428 inarg.size = count;
421 req->in.h.opcode = FUSE_WRITE; 429 req->in.h.opcode = FUSE_WRITE;
422 req->in.h.nodeid = get_node_id(inode); 430 req->in.h.nodeid = get_node_id(inode);
423 req->inode = inode;
424 req->file = file;
425 req->in.argpages = 1; 431 req->in.argpages = 1;
426 req->in.numargs = 2; 432 req->in.numargs = 2;
427 req->in.args[0].size = sizeof(struct fuse_write_in); 433 req->in.args[0].size = sizeof(struct fuse_write_in);
@@ -619,6 +625,126 @@ static int fuse_set_page_dirty(struct page *page)
619 return 0; 625 return 0;
620} 626}
621 627
628static int convert_fuse_file_lock(const struct fuse_file_lock *ffl,
629 struct file_lock *fl)
630{
631 switch (ffl->type) {
632 case F_UNLCK:
633 break;
634
635 case F_RDLCK:
636 case F_WRLCK:
637 if (ffl->start > OFFSET_MAX || ffl->end > OFFSET_MAX ||
638 ffl->end < ffl->start)
639 return -EIO;
640
641 fl->fl_start = ffl->start;
642 fl->fl_end = ffl->end;
643 fl->fl_pid = ffl->pid;
644 break;
645
646 default:
647 return -EIO;
648 }
649 fl->fl_type = ffl->type;
650 return 0;
651}
652
653static void fuse_lk_fill(struct fuse_req *req, struct file *file,
654 const struct file_lock *fl, int opcode, pid_t pid)
655{
656 struct inode *inode = file->f_dentry->d_inode;
657 struct fuse_conn *fc = get_fuse_conn(inode);
658 struct fuse_file *ff = file->private_data;
659 struct fuse_lk_in *arg = &req->misc.lk_in;
660
661 arg->fh = ff->fh;
662 arg->owner = fuse_lock_owner_id(fc, fl->fl_owner);
663 arg->lk.start = fl->fl_start;
664 arg->lk.end = fl->fl_end;
665 arg->lk.type = fl->fl_type;
666 arg->lk.pid = pid;
667 req->in.h.opcode = opcode;
668 req->in.h.nodeid = get_node_id(inode);
669 req->in.numargs = 1;
670 req->in.args[0].size = sizeof(*arg);
671 req->in.args[0].value = arg;
672}
673
674static int fuse_getlk(struct file *file, struct file_lock *fl)
675{
676 struct inode *inode = file->f_dentry->d_inode;
677 struct fuse_conn *fc = get_fuse_conn(inode);
678 struct fuse_req *req;
679 struct fuse_lk_out outarg;
680 int err;
681
682 req = fuse_get_req(fc);
683 if (IS_ERR(req))
684 return PTR_ERR(req);
685
686 fuse_lk_fill(req, file, fl, FUSE_GETLK, 0);
687 req->out.numargs = 1;
688 req->out.args[0].size = sizeof(outarg);
689 req->out.args[0].value = &outarg;
690 request_send(fc, req);
691 err = req->out.h.error;
692 fuse_put_request(fc, req);
693 if (!err)
694 err = convert_fuse_file_lock(&outarg.lk, fl);
695
696 return err;
697}
698
699static int fuse_setlk(struct file *file, struct file_lock *fl)
700{
701 struct inode *inode = file->f_dentry->d_inode;
702 struct fuse_conn *fc = get_fuse_conn(inode);
703 struct fuse_req *req;
704 int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
705 pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
706 int err;
707
708 /* Unlock on close is handled by the flush method */
709 if (fl->fl_flags & FL_CLOSE)
710 return 0;
711
712 req = fuse_get_req(fc);
713 if (IS_ERR(req))
714 return PTR_ERR(req);
715
716 fuse_lk_fill(req, file, fl, opcode, pid);
717 request_send(fc, req);
718 err = req->out.h.error;
719 /* locking is restartable */
720 if (err == -EINTR)
721 err = -ERESTARTSYS;
722 fuse_put_request(fc, req);
723 return err;
724}
725
726static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
727{
728 struct inode *inode = file->f_dentry->d_inode;
729 struct fuse_conn *fc = get_fuse_conn(inode);
730 int err;
731
732 if (cmd == F_GETLK) {
733 if (fc->no_lock) {
734 if (!posix_test_lock(file, fl, fl))
735 fl->fl_type = F_UNLCK;
736 err = 0;
737 } else
738 err = fuse_getlk(file, fl);
739 } else {
740 if (fc->no_lock)
741 err = posix_lock_file_wait(file, fl);
742 else
743 err = fuse_setlk(file, fl);
744 }
745 return err;
746}
747
622static const struct file_operations fuse_file_operations = { 748static const struct file_operations fuse_file_operations = {
623 .llseek = generic_file_llseek, 749 .llseek = generic_file_llseek,
624 .read = generic_file_read, 750 .read = generic_file_read,
@@ -628,6 +754,7 @@ static const struct file_operations fuse_file_operations = {
628 .flush = fuse_flush, 754 .flush = fuse_flush,
629 .release = fuse_release, 755 .release = fuse_release,
630 .fsync = fuse_fsync, 756 .fsync = fuse_fsync,
757 .lock = fuse_file_lock,
631 .sendfile = generic_file_sendfile, 758 .sendfile = generic_file_sendfile,
632}; 759};
633 760
@@ -639,10 +766,11 @@ static const struct file_operations fuse_direct_io_file_operations = {
639 .flush = fuse_flush, 766 .flush = fuse_flush,
640 .release = fuse_release, 767 .release = fuse_release,
641 .fsync = fuse_fsync, 768 .fsync = fuse_fsync,
769 .lock = fuse_file_lock,
642 /* no mmap and sendfile */ 770 /* no mmap and sendfile */
643}; 771};
644 772
645static struct address_space_operations fuse_file_aops = { 773static const struct address_space_operations fuse_file_aops = {
646 .readpage = fuse_readpage, 774 .readpage = fuse_readpage,
647 .prepare_write = fuse_prepare_write, 775 .prepare_write = fuse_prepare_write,
648 .commit_write = fuse_commit_write, 776 .commit_write = fuse_commit_write,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 0474202cb5dc..0dbf96621841 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -8,12 +8,13 @@
8 8
9#include <linux/fuse.h> 9#include <linux/fuse.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/mount.h>
11#include <linux/wait.h> 12#include <linux/wait.h>
12#include <linux/list.h> 13#include <linux/list.h>
13#include <linux/spinlock.h> 14#include <linux/spinlock.h>
14#include <linux/mm.h> 15#include <linux/mm.h>
15#include <linux/backing-dev.h> 16#include <linux/backing-dev.h>
16#include <asm/semaphore.h> 17#include <linux/mutex.h>
17 18
18/** Max number of pages that can be used in a single read request */ 19/** Max number of pages that can be used in a single read request */
19#define FUSE_MAX_PAGES_PER_REQ 32 20#define FUSE_MAX_PAGES_PER_REQ 32
@@ -24,6 +25,9 @@
24/** It could be as large as PATH_MAX, but would that have any uses? */ 25/** It could be as large as PATH_MAX, but would that have any uses? */
25#define FUSE_NAME_MAX 1024 26#define FUSE_NAME_MAX 1024
26 27
28/** Number of dentries for each connection in the control filesystem */
29#define FUSE_CTL_NUM_DENTRIES 3
30
27/** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem 31/** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem
28 module will check permissions based on the file mode. Otherwise no 32 module will check permissions based on the file mode. Otherwise no
29 permission checking is done in the kernel */ 33 permission checking is done in the kernel */
@@ -33,6 +37,11 @@
33 doing the mount will be allowed to access the filesystem */ 37 doing the mount will be allowed to access the filesystem */
34#define FUSE_ALLOW_OTHER (1 << 1) 38#define FUSE_ALLOW_OTHER (1 << 1)
35 39
40/** List of active connections */
41extern struct list_head fuse_conn_list;
42
43/** Global mutex protecting fuse_conn_list and the control filesystem */
44extern struct mutex fuse_mutex;
36 45
37/** FUSE inode */ 46/** FUSE inode */
38struct fuse_inode { 47struct fuse_inode {
@@ -56,7 +65,7 @@ struct fuse_inode {
56/** FUSE specific file data */ 65/** FUSE specific file data */
57struct fuse_file { 66struct fuse_file {
58 /** Request reserved for flush and release */ 67 /** Request reserved for flush and release */
59 struct fuse_req *release_req; 68 struct fuse_req *reserved_req;
60 69
61 /** File handle used by userspace */ 70 /** File handle used by userspace */
62 u64 fh; 71 u64 fh;
@@ -122,6 +131,7 @@ enum fuse_req_state {
122 FUSE_REQ_PENDING, 131 FUSE_REQ_PENDING,
123 FUSE_REQ_READING, 132 FUSE_REQ_READING,
124 FUSE_REQ_SENT, 133 FUSE_REQ_SENT,
134 FUSE_REQ_WRITING,
125 FUSE_REQ_FINISHED 135 FUSE_REQ_FINISHED
126}; 136};
127 137
@@ -135,12 +145,15 @@ struct fuse_req {
135 fuse_conn */ 145 fuse_conn */
136 struct list_head list; 146 struct list_head list;
137 147
138 /** Entry on the background list */ 148 /** Entry on the interrupts list */
139 struct list_head bg_entry; 149 struct list_head intr_entry;
140 150
141 /** refcount */ 151 /** refcount */
142 atomic_t count; 152 atomic_t count;
143 153
154 /** Unique ID for the interrupt request */
155 u64 intr_unique;
156
144 /* 157 /*
145 * The following bitfields are either set once before the 158 * The following bitfields are either set once before the
146 * request is queued or setting/clearing them is protected by 159 * request is queued or setting/clearing them is protected by
@@ -150,12 +163,18 @@ struct fuse_req {
150 /** True if the request has reply */ 163 /** True if the request has reply */
151 unsigned isreply:1; 164 unsigned isreply:1;
152 165
153 /** The request was interrupted */ 166 /** Force sending of the request even if interrupted */
154 unsigned interrupted:1; 167 unsigned force:1;
168
169 /** The request was aborted */
170 unsigned aborted:1;
155 171
156 /** Request is sent in the background */ 172 /** Request is sent in the background */
157 unsigned background:1; 173 unsigned background:1;
158 174
175 /** The request has been interrupted */
176 unsigned interrupted:1;
177
159 /** Data is being copied to/from the request */ 178 /** Data is being copied to/from the request */
160 unsigned locked:1; 179 unsigned locked:1;
161 180
@@ -181,6 +200,7 @@ struct fuse_req {
181 struct fuse_init_in init_in; 200 struct fuse_init_in init_in;
182 struct fuse_init_out init_out; 201 struct fuse_init_out init_out;
183 struct fuse_read_in read_in; 202 struct fuse_read_in read_in;
203 struct fuse_lk_in lk_in;
184 } misc; 204 } misc;
185 205
186 /** page vector */ 206 /** page vector */
@@ -192,17 +212,20 @@ struct fuse_req {
192 /** offset of data on first page */ 212 /** offset of data on first page */
193 unsigned page_offset; 213 unsigned page_offset;
194 214
195 /** Inode used in the request */
196 struct inode *inode;
197
198 /** Second inode used in the request (or NULL) */
199 struct inode *inode2;
200
201 /** File used in the request (or NULL) */ 215 /** File used in the request (or NULL) */
202 struct file *file; 216 struct file *file;
203 217
218 /** vfsmount used in release */
219 struct vfsmount *vfsmount;
220
221 /** dentry used in release */
222 struct dentry *dentry;
223
204 /** Request completion callback */ 224 /** Request completion callback */
205 void (*end)(struct fuse_conn *, struct fuse_req *); 225 void (*end)(struct fuse_conn *, struct fuse_req *);
226
227 /** Request is stolen from fuse_file->reserved_req */
228 struct file *stolen_file;
206}; 229};
207 230
208/** 231/**
@@ -216,6 +239,9 @@ struct fuse_conn {
216 /** Lock protecting accessess to members of this structure */ 239 /** Lock protecting accessess to members of this structure */
217 spinlock_t lock; 240 spinlock_t lock;
218 241
242 /** Refcount */
243 atomic_t count;
244
219 /** The user id for this mount */ 245 /** The user id for this mount */
220 uid_t user_id; 246 uid_t user_id;
221 247
@@ -243,13 +269,12 @@ struct fuse_conn {
243 /** The list of requests under I/O */ 269 /** The list of requests under I/O */
244 struct list_head io; 270 struct list_head io;
245 271
246 /** Requests put in the background (RELEASE or any other
247 interrupted request) */
248 struct list_head background;
249
250 /** Number of requests currently in the background */ 272 /** Number of requests currently in the background */
251 unsigned num_background; 273 unsigned num_background;
252 274
275 /** Pending interrupts */
276 struct list_head interrupts;
277
253 /** Flag indicating if connection is blocked. This will be 278 /** Flag indicating if connection is blocked. This will be
254 the case before the INIT reply is received, and if there 279 the case before the INIT reply is received, and if there
255 are too many outstading backgrounds requests */ 280 are too many outstading backgrounds requests */
@@ -258,15 +283,9 @@ struct fuse_conn {
258 /** waitq for blocked connection */ 283 /** waitq for blocked connection */
259 wait_queue_head_t blocked_waitq; 284 wait_queue_head_t blocked_waitq;
260 285
261 /** RW semaphore for exclusion with fuse_put_super() */
262 struct rw_semaphore sbput_sem;
263
264 /** The next unique request id */ 286 /** The next unique request id */
265 u64 reqctr; 287 u64 reqctr;
266 288
267 /** Mount is active */
268 unsigned mounted;
269
270 /** Connection established, cleared on umount, connection 289 /** Connection established, cleared on umount, connection
271 abort and device release */ 290 abort and device release */
272 unsigned connected; 291 unsigned connected;
@@ -305,12 +324,18 @@ struct fuse_conn {
305 /** Is removexattr not implemented by fs? */ 324 /** Is removexattr not implemented by fs? */
306 unsigned no_removexattr : 1; 325 unsigned no_removexattr : 1;
307 326
327 /** Are file locking primitives not implemented by fs? */
328 unsigned no_lock : 1;
329
308 /** Is access not implemented by fs? */ 330 /** Is access not implemented by fs? */
309 unsigned no_access : 1; 331 unsigned no_access : 1;
310 332
311 /** Is create not implemented by fs? */ 333 /** Is create not implemented by fs? */
312 unsigned no_create : 1; 334 unsigned no_create : 1;
313 335
336 /** Is interrupt not implemented by fs? */
337 unsigned no_interrupt : 1;
338
314 /** The number of requests waiting for completion */ 339 /** The number of requests waiting for completion */
315 atomic_t num_waiting; 340 atomic_t num_waiting;
316 341
@@ -320,11 +345,23 @@ struct fuse_conn {
320 /** Backing dev info */ 345 /** Backing dev info */
321 struct backing_dev_info bdi; 346 struct backing_dev_info bdi;
322 347
323 /** kobject */ 348 /** Entry on the fuse_conn_list */
324 struct kobject kobj; 349 struct list_head entry;
350
351 /** Unique ID */
352 u64 id;
353
354 /** Dentries in the control filesystem */
355 struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES];
356
357 /** number of dentries used in the above array */
358 int ctl_ndents;
325 359
326 /** O_ASYNC requests */ 360 /** O_ASYNC requests */
327 struct fasync_struct *fasync; 361 struct fasync_struct *fasync;
362
363 /** Key for lock owner ID scrambling */
364 u32 scramble_key[4];
328}; 365};
329 366
330static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) 367static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -337,11 +374,6 @@ static inline struct fuse_conn *get_fuse_conn(struct inode *inode)
337 return get_fuse_conn_super(inode->i_sb); 374 return get_fuse_conn_super(inode->i_sb);
338} 375}
339 376
340static inline struct fuse_conn *get_fuse_conn_kobj(struct kobject *obj)
341{
342 return container_of(obj, struct fuse_conn, kobj);
343}
344
345static inline struct fuse_inode *get_fuse_inode(struct inode *inode) 377static inline struct fuse_inode *get_fuse_inode(struct inode *inode)
346{ 378{
347 return container_of(inode, struct fuse_inode, inode); 379 return container_of(inode, struct fuse_inode, inode);
@@ -383,12 +415,9 @@ void fuse_file_free(struct fuse_file *ff);
383void fuse_finish_open(struct inode *inode, struct file *file, 415void fuse_finish_open(struct inode *inode, struct file *file,
384 struct fuse_file *ff, struct fuse_open_out *outarg); 416 struct fuse_file *ff, struct fuse_open_out *outarg);
385 417
386/** 418/** */
387 * Send a RELEASE request 419struct fuse_req *fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags,
388 */ 420 int opcode);
389void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff,
390 u64 nodeid, struct inode *inode, int flags, int isdir);
391
392/** 421/**
393 * Send RELEASE or RELEASEDIR request 422 * Send RELEASE or RELEASEDIR request
394 */ 423 */
@@ -435,6 +464,9 @@ int fuse_dev_init(void);
435 */ 464 */
436void fuse_dev_cleanup(void); 465void fuse_dev_cleanup(void);
437 466
467int fuse_ctl_init(void);
468void fuse_ctl_cleanup(void);
469
438/** 470/**
439 * Allocate a request 471 * Allocate a request
440 */ 472 */
@@ -446,14 +478,14 @@ struct fuse_req *fuse_request_alloc(void);
446void fuse_request_free(struct fuse_req *req); 478void fuse_request_free(struct fuse_req *req);
447 479
448/** 480/**
449 * Reinitialize a request, the preallocated flag is left unmodified 481 * Get a request, may fail with -ENOMEM
450 */ 482 */
451void fuse_reset_request(struct fuse_req *req); 483struct fuse_req *fuse_get_req(struct fuse_conn *fc);
452 484
453/** 485/**
454 * Reserve a preallocated request 486 * Gets a requests for a file operation, always succeeds
455 */ 487 */
456struct fuse_req *fuse_get_req(struct fuse_conn *fc); 488struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file);
457 489
458/** 490/**
459 * Decrement reference count of a request. If count goes to zero free 491 * Decrement reference count of a request. If count goes to zero free
@@ -476,11 +508,6 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
476 */ 508 */
477void request_send_background(struct fuse_conn *fc, struct fuse_req *req); 509void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
478 510
479/**
480 * Release inodes and file associated with background request
481 */
482void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req);
483
484/* Abort all requests */ 511/* Abort all requests */
485void fuse_abort_conn(struct fuse_conn *fc); 512void fuse_abort_conn(struct fuse_conn *fc);
486 513
@@ -493,3 +520,23 @@ int fuse_do_getattr(struct inode *inode);
493 * Invalidate inode attributes 520 * Invalidate inode attributes
494 */ 521 */
495void fuse_invalidate_attr(struct inode *inode); 522void fuse_invalidate_attr(struct inode *inode);
523
524/**
525 * Acquire reference to fuse_conn
526 */
527struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
528
529/**
530 * Release reference to fuse_conn
531 */
532void fuse_conn_put(struct fuse_conn *fc);
533
534/**
535 * Add connection to control filesystem
536 */
537int fuse_ctl_add_conn(struct fuse_conn *fc);
538
539/**
540 * Remove connection from control filesystem
541 */
542void fuse_ctl_remove_conn(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 7627022446b2..dcaaabd3b9c4 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -11,25 +11,20 @@
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/file.h> 13#include <linux/file.h>
14#include <linux/mount.h>
15#include <linux/seq_file.h> 14#include <linux/seq_file.h>
16#include <linux/init.h> 15#include <linux/init.h>
17#include <linux/module.h> 16#include <linux/module.h>
18#include <linux/parser.h> 17#include <linux/parser.h>
19#include <linux/statfs.h> 18#include <linux/statfs.h>
19#include <linux/random.h>
20 20
21MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 21MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
22MODULE_DESCRIPTION("Filesystem in Userspace"); 22MODULE_DESCRIPTION("Filesystem in Userspace");
23MODULE_LICENSE("GPL"); 23MODULE_LICENSE("GPL");
24 24
25static kmem_cache_t *fuse_inode_cachep; 25static kmem_cache_t *fuse_inode_cachep;
26static struct subsystem connections_subsys; 26struct list_head fuse_conn_list;
27 27DEFINE_MUTEX(fuse_mutex);
28struct fuse_conn_attr {
29 struct attribute attr;
30 ssize_t (*show)(struct fuse_conn *, char *);
31 ssize_t (*store)(struct fuse_conn *, const char *, size_t);
32};
33 28
34#define FUSE_SUPER_MAGIC 0x65735546 29#define FUSE_SUPER_MAGIC 0x65735546
35 30
@@ -104,6 +99,14 @@ static void fuse_clear_inode(struct inode *inode)
104 } 99 }
105} 100}
106 101
102static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
103{
104 if (*flags & MS_MANDLOCK)
105 return -EINVAL;
106
107 return 0;
108}
109
107void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr) 110void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
108{ 111{
109 if (S_ISREG(inode->i_mode) && i_size_read(inode) != attr->size) 112 if (S_ISREG(inode->i_mode) && i_size_read(inode) != attr->size)
@@ -195,31 +198,29 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
195 return inode; 198 return inode;
196} 199}
197 200
198static void fuse_umount_begin(struct super_block *sb) 201static void fuse_umount_begin(struct vfsmount *vfsmnt, int flags)
199{ 202{
200 fuse_abort_conn(get_fuse_conn_super(sb)); 203 if (flags & MNT_FORCE)
204 fuse_abort_conn(get_fuse_conn_super(vfsmnt->mnt_sb));
201} 205}
202 206
203static void fuse_put_super(struct super_block *sb) 207static void fuse_put_super(struct super_block *sb)
204{ 208{
205 struct fuse_conn *fc = get_fuse_conn_super(sb); 209 struct fuse_conn *fc = get_fuse_conn_super(sb);
206 210
207 down_write(&fc->sbput_sem);
208 while (!list_empty(&fc->background))
209 fuse_release_background(fc,
210 list_entry(fc->background.next,
211 struct fuse_req, bg_entry));
212
213 spin_lock(&fc->lock); 211 spin_lock(&fc->lock);
214 fc->mounted = 0;
215 fc->connected = 0; 212 fc->connected = 0;
213 fc->blocked = 0;
216 spin_unlock(&fc->lock); 214 spin_unlock(&fc->lock);
217 up_write(&fc->sbput_sem);
218 /* Flush all readers on this fs */ 215 /* Flush all readers on this fs */
219 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 216 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
220 wake_up_all(&fc->waitq); 217 wake_up_all(&fc->waitq);
221 kobject_del(&fc->kobj); 218 wake_up_all(&fc->blocked_waitq);
222 kobject_put(&fc->kobj); 219 mutex_lock(&fuse_mutex);
220 list_del(&fc->entry);
221 fuse_ctl_remove_conn(fc);
222 mutex_unlock(&fuse_mutex);
223 fuse_conn_put(fc);
223} 224}
224 225
225static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) 226static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
@@ -236,8 +237,9 @@ static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr
236 /* fsid is left zero */ 237 /* fsid is left zero */
237} 238}
238 239
239static int fuse_statfs(struct super_block *sb, struct kstatfs *buf) 240static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
240{ 241{
242 struct super_block *sb = dentry->d_sb;
241 struct fuse_conn *fc = get_fuse_conn_super(sb); 243 struct fuse_conn *fc = get_fuse_conn_super(sb);
242 struct fuse_req *req; 244 struct fuse_req *req;
243 struct fuse_statfs_out outarg; 245 struct fuse_statfs_out outarg;
@@ -368,11 +370,6 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
368 return 0; 370 return 0;
369} 371}
370 372
371static void fuse_conn_release(struct kobject *kobj)
372{
373 kfree(get_fuse_conn_kobj(kobj));
374}
375
376static struct fuse_conn *new_conn(void) 373static struct fuse_conn *new_conn(void)
377{ 374{
378 struct fuse_conn *fc; 375 struct fuse_conn *fc;
@@ -380,24 +377,35 @@ static struct fuse_conn *new_conn(void)
380 fc = kzalloc(sizeof(*fc), GFP_KERNEL); 377 fc = kzalloc(sizeof(*fc), GFP_KERNEL);
381 if (fc) { 378 if (fc) {
382 spin_lock_init(&fc->lock); 379 spin_lock_init(&fc->lock);
380 atomic_set(&fc->count, 1);
383 init_waitqueue_head(&fc->waitq); 381 init_waitqueue_head(&fc->waitq);
384 init_waitqueue_head(&fc->blocked_waitq); 382 init_waitqueue_head(&fc->blocked_waitq);
385 INIT_LIST_HEAD(&fc->pending); 383 INIT_LIST_HEAD(&fc->pending);
386 INIT_LIST_HEAD(&fc->processing); 384 INIT_LIST_HEAD(&fc->processing);
387 INIT_LIST_HEAD(&fc->io); 385 INIT_LIST_HEAD(&fc->io);
388 INIT_LIST_HEAD(&fc->background); 386 INIT_LIST_HEAD(&fc->interrupts);
389 init_rwsem(&fc->sbput_sem);
390 kobj_set_kset_s(fc, connections_subsys);
391 kobject_init(&fc->kobj);
392 atomic_set(&fc->num_waiting, 0); 387 atomic_set(&fc->num_waiting, 0);
393 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 388 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
394 fc->bdi.unplug_io_fn = default_unplug_io_fn; 389 fc->bdi.unplug_io_fn = default_unplug_io_fn;
395 fc->reqctr = 0; 390 fc->reqctr = 0;
396 fc->blocked = 1; 391 fc->blocked = 1;
392 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
397 } 393 }
398 return fc; 394 return fc;
399} 395}
400 396
397void fuse_conn_put(struct fuse_conn *fc)
398{
399 if (atomic_dec_and_test(&fc->count))
400 kfree(fc);
401}
402
403struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
404{
405 atomic_inc(&fc->count);
406 return fc;
407}
408
401static struct inode *get_root_inode(struct super_block *sb, unsigned mode) 409static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
402{ 410{
403 struct fuse_attr attr; 411 struct fuse_attr attr;
@@ -413,6 +421,7 @@ static struct super_operations fuse_super_operations = {
413 .destroy_inode = fuse_destroy_inode, 421 .destroy_inode = fuse_destroy_inode,
414 .read_inode = fuse_read_inode, 422 .read_inode = fuse_read_inode,
415 .clear_inode = fuse_clear_inode, 423 .clear_inode = fuse_clear_inode,
424 .remount_fs = fuse_remount_fs,
416 .put_super = fuse_put_super, 425 .put_super = fuse_put_super,
417 .umount_begin = fuse_umount_begin, 426 .umount_begin = fuse_umount_begin,
418 .statfs = fuse_statfs, 427 .statfs = fuse_statfs,
@@ -432,8 +441,12 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
432 ra_pages = arg->max_readahead / PAGE_CACHE_SIZE; 441 ra_pages = arg->max_readahead / PAGE_CACHE_SIZE;
433 if (arg->flags & FUSE_ASYNC_READ) 442 if (arg->flags & FUSE_ASYNC_READ)
434 fc->async_read = 1; 443 fc->async_read = 1;
435 } else 444 if (!(arg->flags & FUSE_POSIX_LOCKS))
445 fc->no_lock = 1;
446 } else {
436 ra_pages = fc->max_read / PAGE_CACHE_SIZE; 447 ra_pages = fc->max_read / PAGE_CACHE_SIZE;
448 fc->no_lock = 1;
449 }
437 450
438 fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); 451 fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
439 fc->minor = arg->minor; 452 fc->minor = arg->minor;
@@ -451,7 +464,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
451 arg->major = FUSE_KERNEL_VERSION; 464 arg->major = FUSE_KERNEL_VERSION;
452 arg->minor = FUSE_KERNEL_MINOR_VERSION; 465 arg->minor = FUSE_KERNEL_MINOR_VERSION;
453 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; 466 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
454 arg->flags |= FUSE_ASYNC_READ; 467 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS;
455 req->in.h.opcode = FUSE_INIT; 468 req->in.h.opcode = FUSE_INIT;
456 req->in.numargs = 1; 469 req->in.numargs = 1;
457 req->in.args[0].size = sizeof(*arg); 470 req->in.args[0].size = sizeof(*arg);
@@ -467,10 +480,9 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
467 request_send_background(fc, req); 480 request_send_background(fc, req);
468} 481}
469 482
470static unsigned long long conn_id(void) 483static u64 conn_id(void)
471{ 484{
472 /* BKL is held for ->get_sb() */ 485 static u64 ctr = 1;
473 static unsigned long long ctr = 1;
474 return ctr++; 486 return ctr++;
475} 487}
476 488
@@ -484,6 +496,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
484 struct fuse_req *init_req; 496 struct fuse_req *init_req;
485 int err; 497 int err;
486 498
499 if (sb->s_flags & MS_MANDLOCK)
500 return -EINVAL;
501
487 if (!parse_fuse_opt((char *) data, &d)) 502 if (!parse_fuse_opt((char *) data, &d))
488 return -EINVAL; 503 return -EINVAL;
489 504
@@ -527,25 +542,21 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
527 if (!init_req) 542 if (!init_req)
528 goto err_put_root; 543 goto err_put_root;
529 544
530 err = kobject_set_name(&fc->kobj, "%llu", conn_id()); 545 mutex_lock(&fuse_mutex);
531 if (err)
532 goto err_free_req;
533
534 err = kobject_add(&fc->kobj);
535 if (err)
536 goto err_free_req;
537
538 /* Setting file->private_data can't race with other mount()
539 instances, since BKL is held for ->get_sb() */
540 err = -EINVAL; 546 err = -EINVAL;
541 if (file->private_data) 547 if (file->private_data)
542 goto err_kobject_del; 548 goto err_unlock;
543 549
550 fc->id = conn_id();
551 err = fuse_ctl_add_conn(fc);
552 if (err)
553 goto err_unlock;
554
555 list_add_tail(&fc->entry, &fuse_conn_list);
544 sb->s_root = root_dentry; 556 sb->s_root = root_dentry;
545 fc->mounted = 1;
546 fc->connected = 1; 557 fc->connected = 1;
547 kobject_get(&fc->kobj); 558 file->private_data = fuse_conn_get(fc);
548 file->private_data = fc; 559 mutex_unlock(&fuse_mutex);
549 /* 560 /*
550 * atomic_dec_and_test() in fput() provides the necessary 561 * atomic_dec_and_test() in fput() provides the necessary
551 * memory barrier for file->private_data to be visible on all 562 * memory barrier for file->private_data to be visible on all
@@ -557,23 +568,22 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
557 568
558 return 0; 569 return 0;
559 570
560 err_kobject_del: 571 err_unlock:
561 kobject_del(&fc->kobj); 572 mutex_unlock(&fuse_mutex);
562 err_free_req:
563 fuse_request_free(init_req); 573 fuse_request_free(init_req);
564 err_put_root: 574 err_put_root:
565 dput(root_dentry); 575 dput(root_dentry);
566 err: 576 err:
567 fput(file); 577 fput(file);
568 kobject_put(&fc->kobj); 578 fuse_conn_put(fc);
569 return err; 579 return err;
570} 580}
571 581
572static struct super_block *fuse_get_sb(struct file_system_type *fs_type, 582static int fuse_get_sb(struct file_system_type *fs_type,
573 int flags, const char *dev_name, 583 int flags, const char *dev_name,
574 void *raw_data) 584 void *raw_data, struct vfsmount *mnt)
575{ 585{
576 return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super); 586 return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
577} 587}
578 588
579static struct file_system_type fuse_fs_type = { 589static struct file_system_type fuse_fs_type = {
@@ -583,68 +593,8 @@ static struct file_system_type fuse_fs_type = {
583 .kill_sb = kill_anon_super, 593 .kill_sb = kill_anon_super,
584}; 594};
585 595
586static ssize_t fuse_conn_waiting_show(struct fuse_conn *fc, char *page)
587{
588 return sprintf(page, "%i\n", atomic_read(&fc->num_waiting));
589}
590
591static ssize_t fuse_conn_abort_store(struct fuse_conn *fc, const char *page,
592 size_t count)
593{
594 fuse_abort_conn(fc);
595 return count;
596}
597
598static struct fuse_conn_attr fuse_conn_waiting =
599 __ATTR(waiting, 0400, fuse_conn_waiting_show, NULL);
600static struct fuse_conn_attr fuse_conn_abort =
601 __ATTR(abort, 0600, NULL, fuse_conn_abort_store);
602
603static struct attribute *fuse_conn_attrs[] = {
604 &fuse_conn_waiting.attr,
605 &fuse_conn_abort.attr,
606 NULL,
607};
608
609static ssize_t fuse_conn_attr_show(struct kobject *kobj,
610 struct attribute *attr,
611 char *page)
612{
613 struct fuse_conn_attr *fca =
614 container_of(attr, struct fuse_conn_attr, attr);
615
616 if (fca->show)
617 return fca->show(get_fuse_conn_kobj(kobj), page);
618 else
619 return -EACCES;
620}
621
622static ssize_t fuse_conn_attr_store(struct kobject *kobj,
623 struct attribute *attr,
624 const char *page, size_t count)
625{
626 struct fuse_conn_attr *fca =
627 container_of(attr, struct fuse_conn_attr, attr);
628
629 if (fca->store)
630 return fca->store(get_fuse_conn_kobj(kobj), page, count);
631 else
632 return -EACCES;
633}
634
635static struct sysfs_ops fuse_conn_sysfs_ops = {
636 .show = &fuse_conn_attr_show,
637 .store = &fuse_conn_attr_store,
638};
639
640static struct kobj_type ktype_fuse_conn = {
641 .release = fuse_conn_release,
642 .sysfs_ops = &fuse_conn_sysfs_ops,
643 .default_attrs = fuse_conn_attrs,
644};
645
646static decl_subsys(fuse, NULL, NULL); 596static decl_subsys(fuse, NULL, NULL);
647static decl_subsys(connections, &ktype_fuse_conn, NULL); 597static decl_subsys(connections, NULL, NULL);
648 598
649static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep, 599static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep,
650 unsigned long flags) 600 unsigned long flags)
@@ -718,6 +668,7 @@ static int __init fuse_init(void)
718 printk("fuse init (API version %i.%i)\n", 668 printk("fuse init (API version %i.%i)\n",
719 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); 669 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
720 670
671 INIT_LIST_HEAD(&fuse_conn_list);
721 res = fuse_fs_init(); 672 res = fuse_fs_init();
722 if (res) 673 if (res)
723 goto err; 674 goto err;
@@ -730,8 +681,14 @@ static int __init fuse_init(void)
730 if (res) 681 if (res)
731 goto err_dev_cleanup; 682 goto err_dev_cleanup;
732 683
684 res = fuse_ctl_init();
685 if (res)
686 goto err_sysfs_cleanup;
687
733 return 0; 688 return 0;
734 689
690 err_sysfs_cleanup:
691 fuse_sysfs_cleanup();
735 err_dev_cleanup: 692 err_dev_cleanup:
736 fuse_dev_cleanup(); 693 fuse_dev_cleanup();
737 err_fs_cleanup: 694 err_fs_cleanup:
@@ -744,6 +701,7 @@ static void __exit fuse_exit(void)
744{ 701{
745 printk(KERN_DEBUG "fuse exit\n"); 702 printk(KERN_DEBUG "fuse exit\n");
746 703
704 fuse_ctl_cleanup();
747 fuse_sysfs_cleanup(); 705 fuse_sysfs_cleanup();
748 fuse_fs_cleanup(); 706 fuse_fs_cleanup();
749 fuse_dev_cleanup(); 707 fuse_dev_cleanup();
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index 1e44dcfe49c4..13231dd5ce66 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -280,7 +280,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
280 block = off >> PAGE_CACHE_SHIFT; 280 block = off >> PAGE_CACHE_SHIFT;
281 node->page_offset = off & ~PAGE_CACHE_MASK; 281 node->page_offset = off & ~PAGE_CACHE_MASK;
282 for (i = 0; i < tree->pages_per_bnode; i++) { 282 for (i = 0; i < tree->pages_per_bnode; i++) {
283 page = read_cache_page(mapping, block++, (filler_t *)mapping->a_ops->readpage, NULL); 283 page = read_mapping_page(mapping, block++, NULL);
284 if (IS_ERR(page)) 284 if (IS_ERR(page))
285 goto fail; 285 goto fail;
286 if (PageError(page)) { 286 if (PageError(page)) {
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index d20131ce4b95..400357994319 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -59,7 +59,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
59 unlock_new_inode(tree->inode); 59 unlock_new_inode(tree->inode);
60 60
61 mapping = tree->inode->i_mapping; 61 mapping = tree->inode->i_mapping;
62 page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage, NULL); 62 page = read_mapping_page(mapping, 0, NULL);
63 if (IS_ERR(page)) 63 if (IS_ERR(page))
64 goto free_tree; 64 goto free_tree;
65 65
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 3ed8663a8db1..735332dfd1b8 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -182,8 +182,8 @@ extern void hfs_file_truncate(struct inode *);
182extern int hfs_get_block(struct inode *, sector_t, struct buffer_head *, int); 182extern int hfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
183 183
184/* inode.c */ 184/* inode.c */
185extern struct address_space_operations hfs_aops; 185extern const struct address_space_operations hfs_aops;
186extern struct address_space_operations hfs_btree_aops; 186extern const struct address_space_operations hfs_btree_aops;
187 187
188extern struct inode *hfs_new_inode(struct inode *, struct qstr *, int); 188extern struct inode *hfs_new_inode(struct inode *, struct qstr *, int);
189extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *); 189extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 2d4ced22201b..315cf44a90b2 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -114,7 +114,7 @@ static int hfs_writepages(struct address_space *mapping,
114 return mpage_writepages(mapping, wbc, hfs_get_block); 114 return mpage_writepages(mapping, wbc, hfs_get_block);
115} 115}
116 116
117struct address_space_operations hfs_btree_aops = { 117const struct address_space_operations hfs_btree_aops = {
118 .readpage = hfs_readpage, 118 .readpage = hfs_readpage,
119 .writepage = hfs_writepage, 119 .writepage = hfs_writepage,
120 .sync_page = block_sync_page, 120 .sync_page = block_sync_page,
@@ -124,7 +124,7 @@ struct address_space_operations hfs_btree_aops = {
124 .releasepage = hfs_releasepage, 124 .releasepage = hfs_releasepage,
125}; 125};
126 126
127struct address_space_operations hfs_aops = { 127const struct address_space_operations hfs_aops = {
128 .readpage = hfs_readpage, 128 .readpage = hfs_readpage,
129 .writepage = hfs_writepage, 129 .writepage = hfs_writepage,
130 .sync_page = block_sync_page, 130 .sync_page = block_sync_page,
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 1181d116117d..34937ee83ab1 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -12,7 +12,6 @@
12 * Based on the minix file system code, (C) 1991, 1992 by Linus Torvalds 12 * Based on the minix file system code, (C) 1991, 1992 by Linus Torvalds
13 */ 13 */
14 14
15#include <linux/config.h>
16#include <linux/module.h> 15#include <linux/module.h>
17#include <linux/blkdev.h> 16#include <linux/blkdev.h>
18#include <linux/mount.h> 17#include <linux/mount.h>
@@ -80,8 +79,10 @@ static void hfs_put_super(struct super_block *sb)
80 * 79 *
81 * changed f_files/f_ffree to reflect the fs_ablock/free_ablocks. 80 * changed f_files/f_ffree to reflect the fs_ablock/free_ablocks.
82 */ 81 */
83static int hfs_statfs(struct super_block *sb, struct kstatfs *buf) 82static int hfs_statfs(struct dentry *dentry, struct kstatfs *buf)
84{ 83{
84 struct super_block *sb = dentry->d_sb;
85
85 buf->f_type = HFS_SUPER_MAGIC; 86 buf->f_type = HFS_SUPER_MAGIC;
86 buf->f_bsize = sb->s_blocksize; 87 buf->f_bsize = sb->s_blocksize;
87 buf->f_blocks = (u32)HFS_SB(sb)->fs_ablocks * HFS_SB(sb)->fs_div; 88 buf->f_blocks = (u32)HFS_SB(sb)->fs_ablocks * HFS_SB(sb)->fs_div;
@@ -413,10 +414,11 @@ bail:
413 return res; 414 return res;
414} 415}
415 416
416static struct super_block *hfs_get_sb(struct file_system_type *fs_type, 417static int hfs_get_sb(struct file_system_type *fs_type,
417 int flags, const char *dev_name, void *data) 418 int flags, const char *dev_name, void *data,
419 struct vfsmount *mnt)
418{ 420{
419 return get_sb_bdev(fs_type, flags, dev_name, data, hfs_fill_super); 421 return get_sb_bdev(fs_type, flags, dev_name, data, hfs_fill_super, mnt);
420} 422}
421 423
422static struct file_system_type hfs_fs_type = { 424static struct file_system_type hfs_fs_type = {
diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c
index 9fb51632303c..d128a25b74d2 100644
--- a/fs/hfsplus/bitmap.c
+++ b/fs/hfsplus/bitmap.c
@@ -31,8 +31,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma
31 dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len); 31 dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len);
32 mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); 32 mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex);
33 mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; 33 mapping = HFSPLUS_SB(sb).alloc_file->i_mapping;
34 page = read_cache_page(mapping, offset / PAGE_CACHE_BITS, 34 page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL);
35 (filler_t *)mapping->a_ops->readpage, NULL);
36 pptr = kmap(page); 35 pptr = kmap(page);
37 curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32; 36 curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32;
38 i = offset % 32; 37 i = offset % 32;
@@ -72,8 +71,8 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma
72 offset += PAGE_CACHE_BITS; 71 offset += PAGE_CACHE_BITS;
73 if (offset >= size) 72 if (offset >= size)
74 break; 73 break;
75 page = read_cache_page(mapping, offset / PAGE_CACHE_BITS, 74 page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS,
76 (filler_t *)mapping->a_ops->readpage, NULL); 75 NULL);
77 curr = pptr = kmap(page); 76 curr = pptr = kmap(page);
78 if ((size ^ offset) / PAGE_CACHE_BITS) 77 if ((size ^ offset) / PAGE_CACHE_BITS)
79 end = pptr + PAGE_CACHE_BITS / 32; 78 end = pptr + PAGE_CACHE_BITS / 32;
@@ -119,8 +118,8 @@ found:
119 set_page_dirty(page); 118 set_page_dirty(page);
120 kunmap(page); 119 kunmap(page);
121 offset += PAGE_CACHE_BITS; 120 offset += PAGE_CACHE_BITS;
122 page = read_cache_page(mapping, offset / PAGE_CACHE_BITS, 121 page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS,
123 (filler_t *)mapping->a_ops->readpage, NULL); 122 NULL);
124 pptr = kmap(page); 123 pptr = kmap(page);
125 curr = pptr; 124 curr = pptr;
126 end = pptr + PAGE_CACHE_BITS / 32; 125 end = pptr + PAGE_CACHE_BITS / 32;
@@ -167,7 +166,7 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count)
167 mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); 166 mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex);
168 mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; 167 mapping = HFSPLUS_SB(sb).alloc_file->i_mapping;
169 pnr = offset / PAGE_CACHE_BITS; 168 pnr = offset / PAGE_CACHE_BITS;
170 page = read_cache_page(mapping, pnr, (filler_t *)mapping->a_ops->readpage, NULL); 169 page = read_mapping_page(mapping, pnr, NULL);
171 pptr = kmap(page); 170 pptr = kmap(page);
172 curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32; 171 curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32;
173 end = pptr + PAGE_CACHE_BITS / 32; 172 end = pptr + PAGE_CACHE_BITS / 32;
@@ -199,7 +198,7 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count)
199 break; 198 break;
200 set_page_dirty(page); 199 set_page_dirty(page);
201 kunmap(page); 200 kunmap(page);
202 page = read_cache_page(mapping, ++pnr, (filler_t *)mapping->a_ops->readpage, NULL); 201 page = read_mapping_page(mapping, ++pnr, NULL);
203 pptr = kmap(page); 202 pptr = kmap(page);
204 curr = pptr; 203 curr = pptr;
205 end = pptr + PAGE_CACHE_BITS / 32; 204 end = pptr + PAGE_CACHE_BITS / 32;
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 746abc9ecf70..77bf434da679 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -440,7 +440,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
440 block = off >> PAGE_CACHE_SHIFT; 440 block = off >> PAGE_CACHE_SHIFT;
441 node->page_offset = off & ~PAGE_CACHE_MASK; 441 node->page_offset = off & ~PAGE_CACHE_MASK;
442 for (i = 0; i < tree->pages_per_bnode; block++, i++) { 442 for (i = 0; i < tree->pages_per_bnode; block++, i++) {
443 page = read_cache_page(mapping, block, (filler_t *)mapping->a_ops->readpage, NULL); 443 page = read_mapping_page(mapping, block, NULL);
444 if (IS_ERR(page)) 444 if (IS_ERR(page))
445 goto fail; 445 goto fail;
446 if (PageError(page)) { 446 if (PageError(page)) {
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index effa8991999c..cfc852fdd1b5 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -38,7 +38,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
38 goto free_tree; 38 goto free_tree;
39 39
40 mapping = tree->inode->i_mapping; 40 mapping = tree->inode->i_mapping;
41 page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage, NULL); 41 page = read_mapping_page(mapping, 0, NULL);
42 if (IS_ERR(page)) 42 if (IS_ERR(page))
43 goto free_tree; 43 goto free_tree;
44 44
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 7ae393637a0c..8a1ca5ef7ada 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -323,8 +323,8 @@ int hfsplus_file_extend(struct inode *);
323void hfsplus_file_truncate(struct inode *); 323void hfsplus_file_truncate(struct inode *);
324 324
325/* inode.c */ 325/* inode.c */
326extern struct address_space_operations hfsplus_aops; 326extern const struct address_space_operations hfsplus_aops;
327extern struct address_space_operations hfsplus_btree_aops; 327extern const struct address_space_operations hfsplus_btree_aops;
328 328
329void hfsplus_inode_read_fork(struct inode *, struct hfsplus_fork_raw *); 329void hfsplus_inode_read_fork(struct inode *, struct hfsplus_fork_raw *);
330void hfsplus_inode_write_fork(struct inode *, struct hfsplus_fork_raw *); 330void hfsplus_inode_write_fork(struct inode *, struct hfsplus_fork_raw *);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index acf66dba3e01..924ecdef8091 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -109,7 +109,7 @@ static int hfsplus_writepages(struct address_space *mapping,
109 return mpage_writepages(mapping, wbc, hfsplus_get_block); 109 return mpage_writepages(mapping, wbc, hfsplus_get_block);
110} 110}
111 111
112struct address_space_operations hfsplus_btree_aops = { 112const struct address_space_operations hfsplus_btree_aops = {
113 .readpage = hfsplus_readpage, 113 .readpage = hfsplus_readpage,
114 .writepage = hfsplus_writepage, 114 .writepage = hfsplus_writepage,
115 .sync_page = block_sync_page, 115 .sync_page = block_sync_page,
@@ -119,7 +119,7 @@ struct address_space_operations hfsplus_btree_aops = {
119 .releasepage = hfsplus_releasepage, 119 .releasepage = hfsplus_releasepage,
120}; 120};
121 121
122struct address_space_operations hfsplus_aops = { 122const struct address_space_operations hfsplus_aops = {
123 .readpage = hfsplus_readpage, 123 .readpage = hfsplus_readpage,
124 .writepage = hfsplus_writepage, 124 .writepage = hfsplus_writepage,
125 .sync_page = block_sync_page, 125 .sync_page = block_sync_page,
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 7843f792a4b7..d279d5924f28 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -7,7 +7,6 @@
7 * 7 *
8 */ 8 */
9 9
10#include <linux/config.h>
11#include <linux/module.h> 10#include <linux/module.h>
12#include <linux/init.h> 11#include <linux/init.h>
13#include <linux/pagemap.h> 12#include <linux/pagemap.h>
@@ -212,8 +211,10 @@ static void hfsplus_put_super(struct super_block *sb)
212 sb->s_fs_info = NULL; 211 sb->s_fs_info = NULL;
213} 212}
214 213
215static int hfsplus_statfs(struct super_block *sb, struct kstatfs *buf) 214static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf)
216{ 215{
216 struct super_block *sb = dentry->d_sb;
217
217 buf->f_type = HFSPLUS_SUPER_MAGIC; 218 buf->f_type = HFSPLUS_SUPER_MAGIC;
218 buf->f_bsize = sb->s_blocksize; 219 buf->f_bsize = sb->s_blocksize;
219 buf->f_blocks = HFSPLUS_SB(sb).total_blocks << HFSPLUS_SB(sb).fs_shift; 220 buf->f_blocks = HFSPLUS_SB(sb).total_blocks << HFSPLUS_SB(sb).fs_shift;
@@ -450,10 +451,12 @@ static void hfsplus_destroy_inode(struct inode *inode)
450 451
451#define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) 452#define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info)
452 453
453static struct super_block *hfsplus_get_sb(struct file_system_type *fs_type, 454static int hfsplus_get_sb(struct file_system_type *fs_type,
454 int flags, const char *dev_name, void *data) 455 int flags, const char *dev_name, void *data,
456 struct vfsmount *mnt)
455{ 457{
456 return get_sb_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super); 458 return get_sb_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super,
459 mnt);
457} 460}
458 461
459static struct file_system_type hfsplus_fs_type = { 462static struct file_system_type hfsplus_fs_type = {
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index bf0f8e16e433..b82e3d9c8790 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -54,7 +54,7 @@ static int append = 0;
54 54
55static struct inode_operations hostfs_iops; 55static struct inode_operations hostfs_iops;
56static struct inode_operations hostfs_dir_iops; 56static struct inode_operations hostfs_dir_iops;
57static struct address_space_operations hostfs_link_aops; 57static const struct address_space_operations hostfs_link_aops;
58 58
59#ifndef MODULE 59#ifndef MODULE
60static int __init hostfs_args(char *options, int *add) 60static int __init hostfs_args(char *options, int *add)
@@ -239,7 +239,7 @@ static int read_inode(struct inode *ino)
239 return(err); 239 return(err);
240} 240}
241 241
242int hostfs_statfs(struct super_block *sb, struct kstatfs *sf) 242int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf)
243{ 243{
244 /* do_statfs uses struct statfs64 internally, but the linux kernel 244 /* do_statfs uses struct statfs64 internally, but the linux kernel
245 * struct statfs still has 32-bit versions for most of these fields, 245 * struct statfs still has 32-bit versions for most of these fields,
@@ -252,7 +252,7 @@ int hostfs_statfs(struct super_block *sb, struct kstatfs *sf)
252 long long f_files; 252 long long f_files;
253 long long f_ffree; 253 long long f_ffree;
254 254
255 err = do_statfs(HOSTFS_I(sb->s_root->d_inode)->host_filename, 255 err = do_statfs(HOSTFS_I(dentry->d_sb->s_root->d_inode)->host_filename,
256 &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files, 256 &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files,
257 &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), 257 &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid),
258 &sf->f_namelen, sf->f_spare); 258 &sf->f_namelen, sf->f_spare);
@@ -518,7 +518,7 @@ int hostfs_commit_write(struct file *file, struct page *page, unsigned from,
518 return(err); 518 return(err);
519} 519}
520 520
521static struct address_space_operations hostfs_aops = { 521static const struct address_space_operations hostfs_aops = {
522 .writepage = hostfs_writepage, 522 .writepage = hostfs_writepage,
523 .readpage = hostfs_readpage, 523 .readpage = hostfs_readpage,
524 .set_page_dirty = __set_page_dirty_nobuffers, 524 .set_page_dirty = __set_page_dirty_nobuffers,
@@ -935,7 +935,7 @@ int hostfs_link_readpage(struct file *file, struct page *page)
935 return(err); 935 return(err);
936} 936}
937 937
938static struct address_space_operations hostfs_link_aops = { 938static const struct address_space_operations hostfs_link_aops = {
939 .readpage = hostfs_link_readpage, 939 .readpage = hostfs_link_readpage,
940}; 940};
941 941
@@ -993,11 +993,11 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
993 return(err); 993 return(err);
994} 994}
995 995
996static struct super_block *hostfs_read_sb(struct file_system_type *type, 996static int hostfs_read_sb(struct file_system_type *type,
997 int flags, const char *dev_name, 997 int flags, const char *dev_name,
998 void *data) 998 void *data, struct vfsmount *mnt)
999{ 999{
1000 return(get_sb_nodev(type, flags, data, hostfs_fill_sb_common)); 1000 return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt);
1001} 1001}
1002 1002
1003static struct file_system_type hostfs_type = { 1003static struct file_system_type hostfs_type = {
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index d3b9fffe45a1..d9eb19b7b8ae 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -99,7 +99,7 @@ static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block)
99{ 99{
100 return generic_block_bmap(mapping,block,hpfs_get_block); 100 return generic_block_bmap(mapping,block,hpfs_get_block);
101} 101}
102struct address_space_operations hpfs_aops = { 102const struct address_space_operations hpfs_aops = {
103 .readpage = hpfs_readpage, 103 .readpage = hpfs_readpage,
104 .writepage = hpfs_writepage, 104 .writepage = hpfs_writepage,
105 .sync_page = block_sync_page, 105 .sync_page = block_sync_page,
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 29b7a3e55173..f687d54ed442 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -268,7 +268,7 @@ void hpfs_set_ea(struct inode *, struct fnode *, char *, char *, int);
268int hpfs_file_fsync(struct file *, struct dentry *, int); 268int hpfs_file_fsync(struct file *, struct dentry *, int);
269extern const struct file_operations hpfs_file_ops; 269extern const struct file_operations hpfs_file_ops;
270extern struct inode_operations hpfs_file_iops; 270extern struct inode_operations hpfs_file_iops;
271extern struct address_space_operations hpfs_aops; 271extern const struct address_space_operations hpfs_aops;
272 272
273/* inode.c */ 273/* inode.c */
274 274
@@ -304,7 +304,7 @@ void hpfs_decide_conv(struct inode *, unsigned char *, unsigned);
304/* namei.c */ 304/* namei.c */
305 305
306extern struct inode_operations hpfs_dir_iops; 306extern struct inode_operations hpfs_dir_iops;
307extern struct address_space_operations hpfs_symlink_aops; 307extern const struct address_space_operations hpfs_symlink_aops;
308 308
309static inline struct hpfs_inode_info *hpfs_i(struct inode *inode) 309static inline struct hpfs_inode_info *hpfs_i(struct inode *inode)
310{ 310{
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index a03abb12c610..59e7dc182a0c 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -538,7 +538,7 @@ fail:
538 return err; 538 return err;
539} 539}
540 540
541struct address_space_operations hpfs_symlink_aops = { 541const struct address_space_operations hpfs_symlink_aops = {
542 .readpage = hpfs_symlink_readpage 542 .readpage = hpfs_symlink_readpage
543}; 543};
544 544
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index d72d8c87c996..f798480a363f 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -135,8 +135,9 @@ static unsigned count_bitmaps(struct super_block *s)
135 return count; 135 return count;
136} 136}
137 137
138static int hpfs_statfs(struct super_block *s, struct kstatfs *buf) 138static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf)
139{ 139{
140 struct super_block *s = dentry->d_sb;
140 struct hpfs_sb_info *sbi = hpfs_sb(s); 141 struct hpfs_sb_info *sbi = hpfs_sb(s);
141 lock_kernel(); 142 lock_kernel();
142 143
@@ -662,10 +663,11 @@ bail0:
662 return -EINVAL; 663 return -EINVAL;
663} 664}
664 665
665static struct super_block *hpfs_get_sb(struct file_system_type *fs_type, 666static int hpfs_get_sb(struct file_system_type *fs_type,
666 int flags, const char *dev_name, void *data) 667 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
667{ 668{
668 return get_sb_bdev(fs_type, flags, dev_name, data, hpfs_fill_super); 669 return get_sb_bdev(fs_type, flags, dev_name, data, hpfs_fill_super,
670 mnt);
669} 671}
670 672
671static struct file_system_type hpfs_fs_type = { 673static struct file_system_type hpfs_fs_type = {
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index 5e6363be246f..3a9bdf58166f 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -616,7 +616,7 @@ static const struct file_operations hppfs_dir_fops = {
616 .fsync = hppfs_fsync, 616 .fsync = hppfs_fsync,
617}; 617};
618 618
619static int hppfs_statfs(struct super_block *sb, struct kstatfs *sf) 619static int hppfs_statfs(struct dentry *dentry, struct kstatfs *sf)
620{ 620{
621 sf->f_blocks = 0; 621 sf->f_blocks = 0;
622 sf->f_bfree = 0; 622 sf->f_bfree = 0;
@@ -769,11 +769,11 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
769 return(err); 769 return(err);
770} 770}
771 771
772static struct super_block *hppfs_read_super(struct file_system_type *type, 772static int hppfs_read_super(struct file_system_type *type,
773 int flags, const char *dev_name, 773 int flags, const char *dev_name,
774 void *data) 774 void *data, struct vfsmount *mnt)
775{ 775{
776 return(get_sb_nodev(type, flags, data, hppfs_fill_super)); 776 return get_sb_nodev(type, flags, data, hppfs_fill_super, mnt);
777} 777}
778 778
779static struct file_system_type hppfs_type = { 779static struct file_system_type hppfs_type = {
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 3a5b4e923455..6449cb697967 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -34,7 +34,7 @@
34#define HUGETLBFS_MAGIC 0x958458f6 34#define HUGETLBFS_MAGIC 0x958458f6
35 35
36static struct super_operations hugetlbfs_ops; 36static struct super_operations hugetlbfs_ops;
37static struct address_space_operations hugetlbfs_aops; 37static const struct address_space_operations hugetlbfs_aops;
38const struct file_operations hugetlbfs_file_operations; 38const struct file_operations hugetlbfs_file_operations;
39static struct inode_operations hugetlbfs_dir_inode_operations; 39static struct inode_operations hugetlbfs_dir_inode_operations;
40static struct inode_operations hugetlbfs_inode_operations; 40static struct inode_operations hugetlbfs_inode_operations;
@@ -59,7 +59,6 @@ static void huge_pagevec_release(struct pagevec *pvec)
59static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) 59static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
60{ 60{
61 struct inode *inode = file->f_dentry->d_inode; 61 struct inode *inode = file->f_dentry->d_inode;
62 struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
63 loff_t len, vma_len; 62 loff_t len, vma_len;
64 int ret; 63 int ret;
65 64
@@ -87,9 +86,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
87 if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size) 86 if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size)
88 goto out; 87 goto out;
89 88
90 if (vma->vm_flags & VM_MAYSHARE) 89 if (vma->vm_flags & VM_MAYSHARE &&
91 if (hugetlb_extend_reservation(info, len >> HPAGE_SHIFT) != 0) 90 hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT),
92 goto out; 91 len >> HPAGE_SHIFT))
92 goto out;
93 93
94 ret = 0; 94 ret = 0;
95 hugetlb_prefault_arch_hook(vma->vm_mm); 95 hugetlb_prefault_arch_hook(vma->vm_mm);
@@ -195,12 +195,8 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart)
195 const pgoff_t start = lstart >> HPAGE_SHIFT; 195 const pgoff_t start = lstart >> HPAGE_SHIFT;
196 struct pagevec pvec; 196 struct pagevec pvec;
197 pgoff_t next; 197 pgoff_t next;
198 int i; 198 int i, freed = 0;
199 199
200 hugetlb_truncate_reservation(HUGETLBFS_I(inode),
201 lstart >> HPAGE_SHIFT);
202 if (!mapping->nrpages)
203 return;
204 pagevec_init(&pvec, 0); 200 pagevec_init(&pvec, 0);
205 next = start; 201 next = start;
206 while (1) { 202 while (1) {
@@ -221,10 +217,12 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart)
221 truncate_huge_page(page); 217 truncate_huge_page(page);
222 unlock_page(page); 218 unlock_page(page);
223 hugetlb_put_quota(mapping); 219 hugetlb_put_quota(mapping);
220 freed++;
224 } 221 }
225 huge_pagevec_release(&pvec); 222 huge_pagevec_release(&pvec);
226 } 223 }
227 BUG_ON(!lstart && mapping->nrpages); 224 BUG_ON(!lstart && mapping->nrpages);
225 hugetlb_unreserve_pages(inode, start, freed);
228} 226}
229 227
230static void hugetlbfs_delete_inode(struct inode *inode) 228static void hugetlbfs_delete_inode(struct inode *inode)
@@ -366,6 +364,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
366 inode->i_mapping->a_ops = &hugetlbfs_aops; 364 inode->i_mapping->a_ops = &hugetlbfs_aops;
367 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; 365 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
368 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 366 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
367 INIT_LIST_HEAD(&inode->i_mapping->private_list);
369 info = HUGETLBFS_I(inode); 368 info = HUGETLBFS_I(inode);
370 mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL); 369 mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL);
371 switch (mode & S_IFMT) { 370 switch (mode & S_IFMT) {
@@ -467,9 +466,9 @@ static int hugetlbfs_set_page_dirty(struct page *page)
467 return 0; 466 return 0;
468} 467}
469 468
470static int hugetlbfs_statfs(struct super_block *sb, struct kstatfs *buf) 469static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
471{ 470{
472 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb); 471 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
473 472
474 buf->f_type = HUGETLBFS_MAGIC; 473 buf->f_type = HUGETLBFS_MAGIC;
475 buf->f_bsize = HPAGE_SIZE; 474 buf->f_bsize = HPAGE_SIZE;
@@ -538,7 +537,6 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
538 hugetlbfs_inc_free_inodes(sbinfo); 537 hugetlbfs_inc_free_inodes(sbinfo);
539 return NULL; 538 return NULL;
540 } 539 }
541 p->prereserved_hpages = 0;
542 return &p->vfs_inode; 540 return &p->vfs_inode;
543} 541}
544 542
@@ -549,7 +547,7 @@ static void hugetlbfs_destroy_inode(struct inode *inode)
549 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); 547 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
550} 548}
551 549
552static struct address_space_operations hugetlbfs_aops = { 550static const struct address_space_operations hugetlbfs_aops = {
553 .readpage = hugetlbfs_readpage, 551 .readpage = hugetlbfs_readpage,
554 .prepare_write = hugetlbfs_prepare_write, 552 .prepare_write = hugetlbfs_prepare_write,
555 .commit_write = hugetlbfs_commit_write, 553 .commit_write = hugetlbfs_commit_write,
@@ -723,10 +721,10 @@ void hugetlb_put_quota(struct address_space *mapping)
723 } 721 }
724} 722}
725 723
726static struct super_block *hugetlbfs_get_sb(struct file_system_type *fs_type, 724static int hugetlbfs_get_sb(struct file_system_type *fs_type,
727 int flags, const char *dev_name, void *data) 725 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
728{ 726{
729 return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super); 727 return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt);
730} 728}
731 729
732static struct file_system_type hugetlbfs_fs_type = { 730static struct file_system_type hugetlbfs_fs_type = {
@@ -781,8 +779,7 @@ struct file *hugetlb_zero_setup(size_t size)
781 goto out_file; 779 goto out_file;
782 780
783 error = -ENOMEM; 781 error = -ENOMEM;
784 if (hugetlb_extend_reservation(HUGETLBFS_I(inode), 782 if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT))
785 size >> HPAGE_SHIFT) != 0)
786 goto out_inode; 783 goto out_inode;
787 784
788 d_instantiate(dentry, inode); 785 d_instantiate(dentry, inode);
diff --git a/fs/inode.c b/fs/inode.c
index 3a2446a27d2c..0bf9f0444a96 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -4,7 +4,6 @@
4 * (C) 1997 Linus Torvalds 4 * (C) 1997 Linus Torvalds
5 */ 5 */
6 6
7#include <linux/config.h>
8#include <linux/fs.h> 7#include <linux/fs.h>
9#include <linux/mm.h> 8#include <linux/mm.h>
10#include <linux/dcache.h> 9#include <linux/dcache.h>
@@ -102,7 +101,7 @@ static kmem_cache_t * inode_cachep __read_mostly;
102 101
103static struct inode *alloc_inode(struct super_block *sb) 102static struct inode *alloc_inode(struct super_block *sb)
104{ 103{
105 static struct address_space_operations empty_aops; 104 static const struct address_space_operations empty_aops;
106 static struct inode_operations empty_iops; 105 static struct inode_operations empty_iops;
107 static const struct file_operations empty_fops; 106 static const struct file_operations empty_fops;
108 struct inode *inode; 107 struct inode *inode;
@@ -452,15 +451,14 @@ static void prune_icache(int nr_to_scan)
452 nr_pruned++; 451 nr_pruned++;
453 } 452 }
454 inodes_stat.nr_unused -= nr_pruned; 453 inodes_stat.nr_unused -= nr_pruned;
454 if (current_is_kswapd())
455 __count_vm_events(KSWAPD_INODESTEAL, reap);
456 else
457 __count_vm_events(PGINODESTEAL, reap);
455 spin_unlock(&inode_lock); 458 spin_unlock(&inode_lock);
456 459
457 dispose_list(&freeable); 460 dispose_list(&freeable);
458 mutex_unlock(&iprune_mutex); 461 mutex_unlock(&iprune_mutex);
459
460 if (current_is_kswapd())
461 mod_page_state(kswapd_inodesteal, reap);
462 else
463 mod_page_state(pginodesteal, reap);
464} 462}
465 463
466/* 464/*
diff --git a/fs/inotify.c b/fs/inotify.c
index 732ec4bd5774..723836a1f718 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -5,7 +5,10 @@
5 * John McCutchan <ttb@tentacle.dhs.org> 5 * John McCutchan <ttb@tentacle.dhs.org>
6 * Robert Love <rml@novell.com> 6 * Robert Love <rml@novell.com>
7 * 7 *
8 * Kernel API added by: Amy Griffis <amy.griffis@hp.com>
9 *
8 * Copyright (C) 2005 John McCutchan 10 * Copyright (C) 2005 John McCutchan
11 * Copyright 2006 Hewlett-Packard Development Company, L.P.
9 * 12 *
10 * This program is free software; you can redistribute it and/or modify it 13 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the 14 * under the terms of the GNU General Public License as published by the
@@ -20,35 +23,17 @@
20 23
21#include <linux/module.h> 24#include <linux/module.h>
22#include <linux/kernel.h> 25#include <linux/kernel.h>
23#include <linux/sched.h>
24#include <linux/spinlock.h> 26#include <linux/spinlock.h>
25#include <linux/idr.h> 27#include <linux/idr.h>
26#include <linux/slab.h> 28#include <linux/slab.h>
27#include <linux/fs.h> 29#include <linux/fs.h>
28#include <linux/file.h>
29#include <linux/mount.h>
30#include <linux/namei.h>
31#include <linux/poll.h>
32#include <linux/init.h> 30#include <linux/init.h>
33#include <linux/list.h> 31#include <linux/list.h>
34#include <linux/writeback.h> 32#include <linux/writeback.h>
35#include <linux/inotify.h> 33#include <linux/inotify.h>
36#include <linux/syscalls.h>
37
38#include <asm/ioctls.h>
39 34
40static atomic_t inotify_cookie; 35static atomic_t inotify_cookie;
41 36
42static kmem_cache_t *watch_cachep __read_mostly;
43static kmem_cache_t *event_cachep __read_mostly;
44
45static struct vfsmount *inotify_mnt __read_mostly;
46
47/* these are configurable via /proc/sys/fs/inotify/ */
48int inotify_max_user_instances __read_mostly;
49int inotify_max_user_watches __read_mostly;
50int inotify_max_queued_events __read_mostly;
51
52/* 37/*
53 * Lock ordering: 38 * Lock ordering:
54 * 39 *
@@ -56,327 +41,108 @@ int inotify_max_queued_events __read_mostly;
56 * iprune_mutex (synchronize shrink_icache_memory()) 41 * iprune_mutex (synchronize shrink_icache_memory())
57 * inode_lock (protects the super_block->s_inodes list) 42 * inode_lock (protects the super_block->s_inodes list)
58 * inode->inotify_mutex (protects inode->inotify_watches and watches->i_list) 43 * inode->inotify_mutex (protects inode->inotify_watches and watches->i_list)
59 * inotify_dev->mutex (protects inotify_device and watches->d_list) 44 * inotify_handle->mutex (protects inotify_handle and watches->h_list)
45 *
46 * The inode->inotify_mutex and inotify_handle->mutex and held during execution
47 * of a caller's event handler. Thus, the caller must not hold any locks
48 * taken in their event handler while calling any of the published inotify
49 * interfaces.
60 */ 50 */
61 51
62/* 52/*
63 * Lifetimes of the three main data structures--inotify_device, inode, and 53 * Lifetimes of the three main data structures--inotify_handle, inode, and
64 * inotify_watch--are managed by reference count. 54 * inotify_watch--are managed by reference count.
65 * 55 *
66 * inotify_device: Lifetime is from inotify_init() until release. Additional 56 * inotify_handle: Lifetime is from inotify_init() to inotify_destroy().
67 * references can bump the count via get_inotify_dev() and drop the count via 57 * Additional references can bump the count via get_inotify_handle() and drop
68 * put_inotify_dev(). 58 * the count via put_inotify_handle().
69 * 59 *
70 * inotify_watch: Lifetime is from create_watch() to destory_watch(). 60 * inotify_watch: for inotify's purposes, lifetime is from inotify_add_watch()
71 * Additional references can bump the count via get_inotify_watch() and drop 61 * to remove_watch_no_event(). Additional references can bump the count via
72 * the count via put_inotify_watch(). 62 * get_inotify_watch() and drop the count via put_inotify_watch(). The caller
63 * is reponsible for the final put after receiving IN_IGNORED, or when using
64 * IN_ONESHOT after receiving the first event. Inotify does the final put if
65 * inotify_destroy() is called.
73 * 66 *
74 * inode: Pinned so long as the inode is associated with a watch, from 67 * inode: Pinned so long as the inode is associated with a watch, from
75 * create_watch() to put_inotify_watch(). 68 * inotify_add_watch() to the final put_inotify_watch().
76 */ 69 */
77 70
78/* 71/*
79 * struct inotify_device - represents an inotify instance 72 * struct inotify_handle - represents an inotify instance
80 * 73 *
81 * This structure is protected by the mutex 'mutex'. 74 * This structure is protected by the mutex 'mutex'.
82 */ 75 */
83struct inotify_device { 76struct inotify_handle {
84 wait_queue_head_t wq; /* wait queue for i/o */
85 struct idr idr; /* idr mapping wd -> watch */ 77 struct idr idr; /* idr mapping wd -> watch */
86 struct mutex mutex; /* protects this bad boy */ 78 struct mutex mutex; /* protects this bad boy */
87 struct list_head events; /* list of queued events */
88 struct list_head watches; /* list of watches */ 79 struct list_head watches; /* list of watches */
89 atomic_t count; /* reference count */ 80 atomic_t count; /* reference count */
90 struct user_struct *user; /* user who opened this dev */
91 unsigned int queue_size; /* size of the queue (bytes) */
92 unsigned int event_count; /* number of pending events */
93 unsigned int max_events; /* maximum number of events */
94 u32 last_wd; /* the last wd allocated */ 81 u32 last_wd; /* the last wd allocated */
82 const struct inotify_operations *in_ops; /* inotify caller operations */
95}; 83};
96 84
97/* 85static inline void get_inotify_handle(struct inotify_handle *ih)
98 * struct inotify_kernel_event - An inotify event, originating from a watch and
99 * queued for user-space. A list of these is attached to each instance of the
100 * device. In read(), this list is walked and all events that can fit in the
101 * buffer are returned.
102 *
103 * Protected by dev->mutex of the device in which we are queued.
104 */
105struct inotify_kernel_event {
106 struct inotify_event event; /* the user-space event */
107 struct list_head list; /* entry in inotify_device's list */
108 char *name; /* filename, if any */
109};
110
111/*
112 * struct inotify_watch - represents a watch request on a specific inode
113 *
114 * d_list is protected by dev->mutex of the associated watch->dev.
115 * i_list and mask are protected by inode->inotify_mutex of the associated inode.
116 * dev, inode, and wd are never written to once the watch is created.
117 */
118struct inotify_watch {
119 struct list_head d_list; /* entry in inotify_device's list */
120 struct list_head i_list; /* entry in inode's list */
121 atomic_t count; /* reference count */
122 struct inotify_device *dev; /* associated device */
123 struct inode *inode; /* associated inode */
124 s32 wd; /* watch descriptor */
125 u32 mask; /* event mask for this watch */
126};
127
128#ifdef CONFIG_SYSCTL
129
130#include <linux/sysctl.h>
131
132static int zero;
133
134ctl_table inotify_table[] = {
135 {
136 .ctl_name = INOTIFY_MAX_USER_INSTANCES,
137 .procname = "max_user_instances",
138 .data = &inotify_max_user_instances,
139 .maxlen = sizeof(int),
140 .mode = 0644,
141 .proc_handler = &proc_dointvec_minmax,
142 .strategy = &sysctl_intvec,
143 .extra1 = &zero,
144 },
145 {
146 .ctl_name = INOTIFY_MAX_USER_WATCHES,
147 .procname = "max_user_watches",
148 .data = &inotify_max_user_watches,
149 .maxlen = sizeof(int),
150 .mode = 0644,
151 .proc_handler = &proc_dointvec_minmax,
152 .strategy = &sysctl_intvec,
153 .extra1 = &zero,
154 },
155 {
156 .ctl_name = INOTIFY_MAX_QUEUED_EVENTS,
157 .procname = "max_queued_events",
158 .data = &inotify_max_queued_events,
159 .maxlen = sizeof(int),
160 .mode = 0644,
161 .proc_handler = &proc_dointvec_minmax,
162 .strategy = &sysctl_intvec,
163 .extra1 = &zero
164 },
165 { .ctl_name = 0 }
166};
167#endif /* CONFIG_SYSCTL */
168
169static inline void get_inotify_dev(struct inotify_device *dev)
170{ 86{
171 atomic_inc(&dev->count); 87 atomic_inc(&ih->count);
172} 88}
173 89
174static inline void put_inotify_dev(struct inotify_device *dev) 90static inline void put_inotify_handle(struct inotify_handle *ih)
175{ 91{
176 if (atomic_dec_and_test(&dev->count)) { 92 if (atomic_dec_and_test(&ih->count)) {
177 atomic_dec(&dev->user->inotify_devs); 93 idr_destroy(&ih->idr);
178 free_uid(dev->user); 94 kfree(ih);
179 idr_destroy(&dev->idr);
180 kfree(dev);
181 } 95 }
182} 96}
183 97
184static inline void get_inotify_watch(struct inotify_watch *watch) 98/**
99 * get_inotify_watch - grab a reference to an inotify_watch
100 * @watch: watch to grab
101 */
102void get_inotify_watch(struct inotify_watch *watch)
185{ 103{
186 atomic_inc(&watch->count); 104 atomic_inc(&watch->count);
187} 105}
106EXPORT_SYMBOL_GPL(get_inotify_watch);
188 107
189/* 108/**
190 * put_inotify_watch - decrements the ref count on a given watch. cleans up 109 * put_inotify_watch - decrements the ref count on a given watch. cleans up
191 * the watch and its references if the count reaches zero. 110 * watch references if the count reaches zero. inotify_watch is freed by
111 * inotify callers via the destroy_watch() op.
112 * @watch: watch to release
192 */ 113 */
193static inline void put_inotify_watch(struct inotify_watch *watch) 114void put_inotify_watch(struct inotify_watch *watch)
194{ 115{
195 if (atomic_dec_and_test(&watch->count)) { 116 if (atomic_dec_and_test(&watch->count)) {
196 put_inotify_dev(watch->dev); 117 struct inotify_handle *ih = watch->ih;
197 iput(watch->inode);
198 kmem_cache_free(watch_cachep, watch);
199 }
200}
201
202/*
203 * kernel_event - create a new kernel event with the given parameters
204 *
205 * This function can sleep.
206 */
207static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
208 const char *name)
209{
210 struct inotify_kernel_event *kevent;
211
212 kevent = kmem_cache_alloc(event_cachep, GFP_KERNEL);
213 if (unlikely(!kevent))
214 return NULL;
215
216 /* we hand this out to user-space, so zero it just in case */
217 memset(&kevent->event, 0, sizeof(struct inotify_event));
218
219 kevent->event.wd = wd;
220 kevent->event.mask = mask;
221 kevent->event.cookie = cookie;
222
223 INIT_LIST_HEAD(&kevent->list);
224
225 if (name) {
226 size_t len, rem, event_size = sizeof(struct inotify_event);
227
228 /*
229 * We need to pad the filename so as to properly align an
230 * array of inotify_event structures. Because the structure is
231 * small and the common case is a small filename, we just round
232 * up to the next multiple of the structure's sizeof. This is
233 * simple and safe for all architectures.
234 */
235 len = strlen(name) + 1;
236 rem = event_size - len;
237 if (len > event_size) {
238 rem = event_size - (len % event_size);
239 if (len % event_size == 0)
240 rem = 0;
241 }
242
243 kevent->name = kmalloc(len + rem, GFP_KERNEL);
244 if (unlikely(!kevent->name)) {
245 kmem_cache_free(event_cachep, kevent);
246 return NULL;
247 }
248 memcpy(kevent->name, name, len);
249 if (rem)
250 memset(kevent->name + len, 0, rem);
251 kevent->event.len = len + rem;
252 } else {
253 kevent->event.len = 0;
254 kevent->name = NULL;
255 }
256
257 return kevent;
258}
259
260/*
261 * inotify_dev_get_event - return the next event in the given dev's queue
262 *
263 * Caller must hold dev->mutex.
264 */
265static inline struct inotify_kernel_event *
266inotify_dev_get_event(struct inotify_device *dev)
267{
268 return list_entry(dev->events.next, struct inotify_kernel_event, list);
269}
270
271/*
272 * inotify_dev_queue_event - add a new event to the given device
273 *
274 * Caller must hold dev->mutex. Can sleep (calls kernel_event()).
275 */
276static void inotify_dev_queue_event(struct inotify_device *dev,
277 struct inotify_watch *watch, u32 mask,
278 u32 cookie, const char *name)
279{
280 struct inotify_kernel_event *kevent, *last;
281
282 /* coalescing: drop this event if it is a dupe of the previous */
283 last = inotify_dev_get_event(dev);
284 if (last && last->event.mask == mask && last->event.wd == watch->wd &&
285 last->event.cookie == cookie) {
286 const char *lastname = last->name;
287
288 if (!name && !lastname)
289 return;
290 if (name && lastname && !strcmp(lastname, name))
291 return;
292 }
293
294 /* the queue overflowed and we already sent the Q_OVERFLOW event */
295 if (unlikely(dev->event_count > dev->max_events))
296 return;
297
298 /* if the queue overflows, we need to notify user space */
299 if (unlikely(dev->event_count == dev->max_events))
300 kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL);
301 else
302 kevent = kernel_event(watch->wd, mask, cookie, name);
303
304 if (unlikely(!kevent))
305 return;
306
307 /* queue the event and wake up anyone waiting */
308 dev->event_count++;
309 dev->queue_size += sizeof(struct inotify_event) + kevent->event.len;
310 list_add_tail(&kevent->list, &dev->events);
311 wake_up_interruptible(&dev->wq);
312}
313
314/*
315 * remove_kevent - cleans up and ultimately frees the given kevent
316 *
317 * Caller must hold dev->mutex.
318 */
319static void remove_kevent(struct inotify_device *dev,
320 struct inotify_kernel_event *kevent)
321{
322 list_del(&kevent->list);
323
324 dev->event_count--;
325 dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
326
327 kfree(kevent->name);
328 kmem_cache_free(event_cachep, kevent);
329}
330 118
331/* 119 iput(watch->inode);
332 * inotify_dev_event_dequeue - destroy an event on the given device 120 ih->in_ops->destroy_watch(watch);
333 * 121 put_inotify_handle(ih);
334 * Caller must hold dev->mutex.
335 */
336static void inotify_dev_event_dequeue(struct inotify_device *dev)
337{
338 if (!list_empty(&dev->events)) {
339 struct inotify_kernel_event *kevent;
340 kevent = inotify_dev_get_event(dev);
341 remove_kevent(dev, kevent);
342 } 122 }
343} 123}
124EXPORT_SYMBOL_GPL(put_inotify_watch);
344 125
345/* 126/*
346 * inotify_dev_get_wd - returns the next WD for use by the given dev 127 * inotify_handle_get_wd - returns the next WD for use by the given handle
347 * 128 *
348 * Callers must hold dev->mutex. This function can sleep. 129 * Callers must hold ih->mutex. This function can sleep.
349 */ 130 */
350static int inotify_dev_get_wd(struct inotify_device *dev, 131static int inotify_handle_get_wd(struct inotify_handle *ih,
351 struct inotify_watch *watch) 132 struct inotify_watch *watch)
352{ 133{
353 int ret; 134 int ret;
354 135
355 do { 136 do {
356 if (unlikely(!idr_pre_get(&dev->idr, GFP_KERNEL))) 137 if (unlikely(!idr_pre_get(&ih->idr, GFP_KERNEL)))
357 return -ENOSPC; 138 return -ENOSPC;
358 ret = idr_get_new_above(&dev->idr, watch, dev->last_wd+1, &watch->wd); 139 ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd);
359 } while (ret == -EAGAIN); 140 } while (ret == -EAGAIN);
360 141
361 return ret; 142 if (likely(!ret))
362} 143 ih->last_wd = watch->wd;
363 144
364/* 145 return ret;
365 * find_inode - resolve a user-given path to a specific inode and return a nd
366 */
367static int find_inode(const char __user *dirname, struct nameidata *nd,
368 unsigned flags)
369{
370 int error;
371
372 error = __user_walk(dirname, flags, nd);
373 if (error)
374 return error;
375 /* you can only watch an inode if you have read permissions on it */
376 error = vfs_permission(nd, MAY_READ);
377 if (error)
378 path_release(nd);
379 return error;
380} 146}
381 147
382/* 148/*
@@ -422,67 +188,18 @@ static void set_dentry_child_flags(struct inode *inode, int watched)
422} 188}
423 189
424/* 190/*
425 * create_watch - creates a watch on the given device. 191 * inotify_find_handle - find the watch associated with the given inode and
426 * 192 * handle
427 * Callers must hold dev->mutex. Calls inotify_dev_get_wd() so may sleep.
428 * Both 'dev' and 'inode' (by way of nameidata) need to be pinned.
429 */
430static struct inotify_watch *create_watch(struct inotify_device *dev,
431 u32 mask, struct inode *inode)
432{
433 struct inotify_watch *watch;
434 int ret;
435
436 if (atomic_read(&dev->user->inotify_watches) >=
437 inotify_max_user_watches)
438 return ERR_PTR(-ENOSPC);
439
440 watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL);
441 if (unlikely(!watch))
442 return ERR_PTR(-ENOMEM);
443
444 ret = inotify_dev_get_wd(dev, watch);
445 if (unlikely(ret)) {
446 kmem_cache_free(watch_cachep, watch);
447 return ERR_PTR(ret);
448 }
449
450 dev->last_wd = watch->wd;
451 watch->mask = mask;
452 atomic_set(&watch->count, 0);
453 INIT_LIST_HEAD(&watch->d_list);
454 INIT_LIST_HEAD(&watch->i_list);
455
456 /* save a reference to device and bump the count to make it official */
457 get_inotify_dev(dev);
458 watch->dev = dev;
459
460 /*
461 * Save a reference to the inode and bump the ref count to make it
462 * official. We hold a reference to nameidata, which makes this safe.
463 */
464 watch->inode = igrab(inode);
465
466 /* bump our own count, corresponding to our entry in dev->watches */
467 get_inotify_watch(watch);
468
469 atomic_inc(&dev->user->inotify_watches);
470
471 return watch;
472}
473
474/*
475 * inotify_find_dev - find the watch associated with the given inode and dev
476 * 193 *
477 * Callers must hold inode->inotify_mutex. 194 * Callers must hold inode->inotify_mutex.
478 */ 195 */
479static struct inotify_watch *inode_find_dev(struct inode *inode, 196static struct inotify_watch *inode_find_handle(struct inode *inode,
480 struct inotify_device *dev) 197 struct inotify_handle *ih)
481{ 198{
482 struct inotify_watch *watch; 199 struct inotify_watch *watch;
483 200
484 list_for_each_entry(watch, &inode->inotify_watches, i_list) { 201 list_for_each_entry(watch, &inode->inotify_watches, i_list) {
485 if (watch->dev == dev) 202 if (watch->ih == ih)
486 return watch; 203 return watch;
487 } 204 }
488 205
@@ -490,40 +207,40 @@ static struct inotify_watch *inode_find_dev(struct inode *inode,
490} 207}
491 208
492/* 209/*
493 * remove_watch_no_event - remove_watch() without the IN_IGNORED event. 210 * remove_watch_no_event - remove watch without the IN_IGNORED event.
211 *
212 * Callers must hold both inode->inotify_mutex and ih->mutex.
494 */ 213 */
495static void remove_watch_no_event(struct inotify_watch *watch, 214static void remove_watch_no_event(struct inotify_watch *watch,
496 struct inotify_device *dev) 215 struct inotify_handle *ih)
497{ 216{
498 list_del(&watch->i_list); 217 list_del(&watch->i_list);
499 list_del(&watch->d_list); 218 list_del(&watch->h_list);
500 219
501 if (!inotify_inode_watched(watch->inode)) 220 if (!inotify_inode_watched(watch->inode))
502 set_dentry_child_flags(watch->inode, 0); 221 set_dentry_child_flags(watch->inode, 0);
503 222
504 atomic_dec(&dev->user->inotify_watches); 223 idr_remove(&ih->idr, watch->wd);
505 idr_remove(&dev->idr, watch->wd);
506 put_inotify_watch(watch);
507} 224}
508 225
509/* 226/**
510 * remove_watch - Remove a watch from both the device and the inode. Sends 227 * inotify_remove_watch_locked - Remove a watch from both the handle and the
511 * the IN_IGNORED event to the given device signifying that the inode is no 228 * inode. Sends the IN_IGNORED event signifying that the inode is no longer
512 * longer watched. 229 * watched. May be invoked from a caller's event handler.
513 * 230 * @ih: inotify handle associated with watch
514 * Callers must hold both inode->inotify_mutex and dev->mutex. We drop a 231 * @watch: watch to remove
515 * reference to the inode before returning.
516 * 232 *
517 * The inode is not iput() so as to remain atomic. If the inode needs to be 233 * Callers must hold both inode->inotify_mutex and ih->mutex.
518 * iput(), the call returns one. Otherwise, it returns zero.
519 */ 234 */
520static void remove_watch(struct inotify_watch *watch,struct inotify_device *dev) 235void inotify_remove_watch_locked(struct inotify_handle *ih,
236 struct inotify_watch *watch)
521{ 237{
522 inotify_dev_queue_event(dev, watch, IN_IGNORED, 0, NULL); 238 remove_watch_no_event(watch, ih);
523 remove_watch_no_event(watch, dev); 239 ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL, NULL);
524} 240}
241EXPORT_SYMBOL_GPL(inotify_remove_watch_locked);
525 242
526/* Kernel API */ 243/* Kernel API for producing events */
527 244
528/* 245/*
529 * inotify_d_instantiate - instantiate dcache entry for inode 246 * inotify_d_instantiate - instantiate dcache entry for inode
@@ -563,9 +280,10 @@ void inotify_d_move(struct dentry *entry)
563 * @mask: event mask describing this event 280 * @mask: event mask describing this event
564 * @cookie: cookie for synchronization, or zero 281 * @cookie: cookie for synchronization, or zero
565 * @name: filename, if any 282 * @name: filename, if any
283 * @n_inode: inode associated with name
566 */ 284 */
567void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie, 285void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
568 const char *name) 286 const char *name, struct inode *n_inode)
569{ 287{
570 struct inotify_watch *watch, *next; 288 struct inotify_watch *watch, *next;
571 289
@@ -576,14 +294,13 @@ void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
576 list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) { 294 list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
577 u32 watch_mask = watch->mask; 295 u32 watch_mask = watch->mask;
578 if (watch_mask & mask) { 296 if (watch_mask & mask) {
579 struct inotify_device *dev = watch->dev; 297 struct inotify_handle *ih= watch->ih;
580 get_inotify_watch(watch); 298 mutex_lock(&ih->mutex);
581 mutex_lock(&dev->mutex);
582 inotify_dev_queue_event(dev, watch, mask, cookie, name);
583 if (watch_mask & IN_ONESHOT) 299 if (watch_mask & IN_ONESHOT)
584 remove_watch_no_event(watch, dev); 300 remove_watch_no_event(watch, ih);
585 mutex_unlock(&dev->mutex); 301 ih->in_ops->handle_event(watch, watch->wd, mask, cookie,
586 put_inotify_watch(watch); 302 name, n_inode);
303 mutex_unlock(&ih->mutex);
587 } 304 }
588 } 305 }
589 mutex_unlock(&inode->inotify_mutex); 306 mutex_unlock(&inode->inotify_mutex);
@@ -613,7 +330,8 @@ void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
613 if (inotify_inode_watched(inode)) { 330 if (inotify_inode_watched(inode)) {
614 dget(parent); 331 dget(parent);
615 spin_unlock(&dentry->d_lock); 332 spin_unlock(&dentry->d_lock);
616 inotify_inode_queue_event(inode, mask, cookie, name); 333 inotify_inode_queue_event(inode, mask, cookie, name,
334 dentry->d_inode);
617 dput(parent); 335 dput(parent);
618 } else 336 } else
619 spin_unlock(&dentry->d_lock); 337 spin_unlock(&dentry->d_lock);
@@ -665,7 +383,7 @@ void inotify_unmount_inodes(struct list_head *list)
665 383
666 need_iput_tmp = need_iput; 384 need_iput_tmp = need_iput;
667 need_iput = NULL; 385 need_iput = NULL;
668 /* In case the remove_watch() drops a reference. */ 386 /* In case inotify_remove_watch_locked() drops a reference. */
669 if (inode != need_iput_tmp) 387 if (inode != need_iput_tmp)
670 __iget(inode); 388 __iget(inode);
671 else 389 else
@@ -694,11 +412,12 @@ void inotify_unmount_inodes(struct list_head *list)
694 mutex_lock(&inode->inotify_mutex); 412 mutex_lock(&inode->inotify_mutex);
695 watches = &inode->inotify_watches; 413 watches = &inode->inotify_watches;
696 list_for_each_entry_safe(watch, next_w, watches, i_list) { 414 list_for_each_entry_safe(watch, next_w, watches, i_list) {
697 struct inotify_device *dev = watch->dev; 415 struct inotify_handle *ih= watch->ih;
698 mutex_lock(&dev->mutex); 416 mutex_lock(&ih->mutex);
699 inotify_dev_queue_event(dev, watch, IN_UNMOUNT,0,NULL); 417 ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
700 remove_watch(watch, dev); 418 NULL, NULL);
701 mutex_unlock(&dev->mutex); 419 inotify_remove_watch_locked(ih, watch);
420 mutex_unlock(&ih->mutex);
702 } 421 }
703 mutex_unlock(&inode->inotify_mutex); 422 mutex_unlock(&inode->inotify_mutex);
704 iput(inode); 423 iput(inode);
@@ -718,432 +437,292 @@ void inotify_inode_is_dead(struct inode *inode)
718 437
719 mutex_lock(&inode->inotify_mutex); 438 mutex_lock(&inode->inotify_mutex);
720 list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) { 439 list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
721 struct inotify_device *dev = watch->dev; 440 struct inotify_handle *ih = watch->ih;
722 mutex_lock(&dev->mutex); 441 mutex_lock(&ih->mutex);
723 remove_watch(watch, dev); 442 inotify_remove_watch_locked(ih, watch);
724 mutex_unlock(&dev->mutex); 443 mutex_unlock(&ih->mutex);
725 } 444 }
726 mutex_unlock(&inode->inotify_mutex); 445 mutex_unlock(&inode->inotify_mutex);
727} 446}
728EXPORT_SYMBOL_GPL(inotify_inode_is_dead); 447EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
729 448
730/* Device Interface */ 449/* Kernel Consumer API */
731 450
732static unsigned int inotify_poll(struct file *file, poll_table *wait) 451/**
452 * inotify_init - allocate and initialize an inotify instance
453 * @ops: caller's inotify operations
454 */
455struct inotify_handle *inotify_init(const struct inotify_operations *ops)
733{ 456{
734 struct inotify_device *dev = file->private_data; 457 struct inotify_handle *ih;
735 int ret = 0;
736 458
737 poll_wait(file, &dev->wq, wait); 459 ih = kmalloc(sizeof(struct inotify_handle), GFP_KERNEL);
738 mutex_lock(&dev->mutex); 460 if (unlikely(!ih))
739 if (!list_empty(&dev->events)) 461 return ERR_PTR(-ENOMEM);
740 ret = POLLIN | POLLRDNORM;
741 mutex_unlock(&dev->mutex);
742 462
743 return ret; 463 idr_init(&ih->idr);
464 INIT_LIST_HEAD(&ih->watches);
465 mutex_init(&ih->mutex);
466 ih->last_wd = 0;
467 ih->in_ops = ops;
468 atomic_set(&ih->count, 0);
469 get_inotify_handle(ih);
470
471 return ih;
744} 472}
473EXPORT_SYMBOL_GPL(inotify_init);
745 474
746static ssize_t inotify_read(struct file *file, char __user *buf, 475/**
747 size_t count, loff_t *pos) 476 * inotify_init_watch - initialize an inotify watch
477 * @watch: watch to initialize
478 */
479void inotify_init_watch(struct inotify_watch *watch)
748{ 480{
749 size_t event_size = sizeof (struct inotify_event); 481 INIT_LIST_HEAD(&watch->h_list);
750 struct inotify_device *dev; 482 INIT_LIST_HEAD(&watch->i_list);
751 char __user *start; 483 atomic_set(&watch->count, 0);
752 int ret; 484 get_inotify_watch(watch); /* initial get */
753 DEFINE_WAIT(wait);
754
755 start = buf;
756 dev = file->private_data;
757
758 while (1) {
759 int events;
760
761 prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
762
763 mutex_lock(&dev->mutex);
764 events = !list_empty(&dev->events);
765 mutex_unlock(&dev->mutex);
766 if (events) {
767 ret = 0;
768 break;
769 }
770
771 if (file->f_flags & O_NONBLOCK) {
772 ret = -EAGAIN;
773 break;
774 }
775
776 if (signal_pending(current)) {
777 ret = -EINTR;
778 break;
779 }
780
781 schedule();
782 }
783
784 finish_wait(&dev->wq, &wait);
785 if (ret)
786 return ret;
787
788 mutex_lock(&dev->mutex);
789 while (1) {
790 struct inotify_kernel_event *kevent;
791
792 ret = buf - start;
793 if (list_empty(&dev->events))
794 break;
795
796 kevent = inotify_dev_get_event(dev);
797 if (event_size + kevent->event.len > count)
798 break;
799
800 if (copy_to_user(buf, &kevent->event, event_size)) {
801 ret = -EFAULT;
802 break;
803 }
804 buf += event_size;
805 count -= event_size;
806
807 if (kevent->name) {
808 if (copy_to_user(buf, kevent->name, kevent->event.len)){
809 ret = -EFAULT;
810 break;
811 }
812 buf += kevent->event.len;
813 count -= kevent->event.len;
814 }
815
816 remove_kevent(dev, kevent);
817 }
818 mutex_unlock(&dev->mutex);
819
820 return ret;
821} 485}
486EXPORT_SYMBOL_GPL(inotify_init_watch);
822 487
823static int inotify_release(struct inode *ignored, struct file *file) 488/**
489 * inotify_destroy - clean up and destroy an inotify instance
490 * @ih: inotify handle
491 */
492void inotify_destroy(struct inotify_handle *ih)
824{ 493{
825 struct inotify_device *dev = file->private_data;
826
827 /* 494 /*
828 * Destroy all of the watches on this device. Unfortunately, not very 495 * Destroy all of the watches for this handle. Unfortunately, not very
829 * pretty. We cannot do a simple iteration over the list, because we 496 * pretty. We cannot do a simple iteration over the list, because we
830 * do not know the inode until we iterate to the watch. But we need to 497 * do not know the inode until we iterate to the watch. But we need to
831 * hold inode->inotify_mutex before dev->mutex. The following works. 498 * hold inode->inotify_mutex before ih->mutex. The following works.
832 */ 499 */
833 while (1) { 500 while (1) {
834 struct inotify_watch *watch; 501 struct inotify_watch *watch;
835 struct list_head *watches; 502 struct list_head *watches;
836 struct inode *inode; 503 struct inode *inode;
837 504
838 mutex_lock(&dev->mutex); 505 mutex_lock(&ih->mutex);
839 watches = &dev->watches; 506 watches = &ih->watches;
840 if (list_empty(watches)) { 507 if (list_empty(watches)) {
841 mutex_unlock(&dev->mutex); 508 mutex_unlock(&ih->mutex);
842 break; 509 break;
843 } 510 }
844 watch = list_entry(watches->next, struct inotify_watch, d_list); 511 watch = list_entry(watches->next, struct inotify_watch, h_list);
845 get_inotify_watch(watch); 512 get_inotify_watch(watch);
846 mutex_unlock(&dev->mutex); 513 mutex_unlock(&ih->mutex);
847 514
848 inode = watch->inode; 515 inode = watch->inode;
849 mutex_lock(&inode->inotify_mutex); 516 mutex_lock(&inode->inotify_mutex);
850 mutex_lock(&dev->mutex); 517 mutex_lock(&ih->mutex);
851 518
852 /* make sure we didn't race with another list removal */ 519 /* make sure we didn't race with another list removal */
853 if (likely(idr_find(&dev->idr, watch->wd))) 520 if (likely(idr_find(&ih->idr, watch->wd))) {
854 remove_watch_no_event(watch, dev); 521 remove_watch_no_event(watch, ih);
522 put_inotify_watch(watch);
523 }
855 524
856 mutex_unlock(&dev->mutex); 525 mutex_unlock(&ih->mutex);
857 mutex_unlock(&inode->inotify_mutex); 526 mutex_unlock(&inode->inotify_mutex);
858 put_inotify_watch(watch); 527 put_inotify_watch(watch);
859 } 528 }
860 529
861 /* destroy all of the events on this device */ 530 /* free this handle: the put matching the get in inotify_init() */
862 mutex_lock(&dev->mutex); 531 put_inotify_handle(ih);
863 while (!list_empty(&dev->events))
864 inotify_dev_event_dequeue(dev);
865 mutex_unlock(&dev->mutex);
866
867 /* free this device: the put matching the get in inotify_init() */
868 put_inotify_dev(dev);
869
870 return 0;
871} 532}
533EXPORT_SYMBOL_GPL(inotify_destroy);
872 534
873/* 535/**
874 * inotify_ignore - remove a given wd from this inotify instance. 536 * inotify_find_watch - find an existing watch for an (ih,inode) pair
537 * @ih: inotify handle
538 * @inode: inode to watch
539 * @watchp: pointer to existing inotify_watch
875 * 540 *
876 * Can sleep. 541 * Caller must pin given inode (via nameidata).
877 */ 542 */
878static int inotify_ignore(struct inotify_device *dev, s32 wd) 543s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode,
544 struct inotify_watch **watchp)
879{ 545{
880 struct inotify_watch *watch; 546 struct inotify_watch *old;
881 struct inode *inode; 547 int ret = -ENOENT;
882
883 mutex_lock(&dev->mutex);
884 watch = idr_find(&dev->idr, wd);
885 if (unlikely(!watch)) {
886 mutex_unlock(&dev->mutex);
887 return -EINVAL;
888 }
889 get_inotify_watch(watch);
890 inode = watch->inode;
891 mutex_unlock(&dev->mutex);
892 548
893 mutex_lock(&inode->inotify_mutex); 549 mutex_lock(&inode->inotify_mutex);
894 mutex_lock(&dev->mutex); 550 mutex_lock(&ih->mutex);
895 551
896 /* make sure that we did not race */ 552 old = inode_find_handle(inode, ih);
897 if (likely(idr_find(&dev->idr, wd) == watch)) 553 if (unlikely(old)) {
898 remove_watch(watch, dev); 554 get_inotify_watch(old); /* caller must put watch */
555 *watchp = old;
556 ret = old->wd;
557 }
899 558
900 mutex_unlock(&dev->mutex); 559 mutex_unlock(&ih->mutex);
901 mutex_unlock(&inode->inotify_mutex); 560 mutex_unlock(&inode->inotify_mutex);
902 put_inotify_watch(watch);
903 561
904 return 0; 562 return ret;
905} 563}
564EXPORT_SYMBOL_GPL(inotify_find_watch);
906 565
907static long inotify_ioctl(struct file *file, unsigned int cmd, 566/**
908 unsigned long arg) 567 * inotify_find_update_watch - find and update the mask of an existing watch
568 * @ih: inotify handle
569 * @inode: inode's watch to update
570 * @mask: mask of events to watch
571 *
572 * Caller must pin given inode (via nameidata).
573 */
574s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode,
575 u32 mask)
909{ 576{
910 struct inotify_device *dev; 577 struct inotify_watch *old;
911 void __user *p; 578 int mask_add = 0;
912 int ret = -ENOTTY; 579 int ret;
913
914 dev = file->private_data;
915 p = (void __user *) arg;
916
917 switch (cmd) {
918 case FIONREAD:
919 ret = put_user(dev->queue_size, (int __user *) p);
920 break;
921 }
922
923 return ret;
924}
925 580
926static const struct file_operations inotify_fops = { 581 if (mask & IN_MASK_ADD)
927 .poll = inotify_poll, 582 mask_add = 1;
928 .read = inotify_read,
929 .release = inotify_release,
930 .unlocked_ioctl = inotify_ioctl,
931 .compat_ioctl = inotify_ioctl,
932};
933 583
934asmlinkage long sys_inotify_init(void) 584 /* don't allow invalid bits: we don't want flags set */
935{ 585 mask &= IN_ALL_EVENTS | IN_ONESHOT;
936 struct inotify_device *dev; 586 if (unlikely(!mask))
937 struct user_struct *user; 587 return -EINVAL;
938 struct file *filp;
939 int fd, ret;
940
941 fd = get_unused_fd();
942 if (fd < 0)
943 return fd;
944
945 filp = get_empty_filp();
946 if (!filp) {
947 ret = -ENFILE;
948 goto out_put_fd;
949 }
950 588
951 user = get_uid(current->user); 589 mutex_lock(&inode->inotify_mutex);
952 if (unlikely(atomic_read(&user->inotify_devs) >= 590 mutex_lock(&ih->mutex);
953 inotify_max_user_instances)) {
954 ret = -EMFILE;
955 goto out_free_uid;
956 }
957 591
958 dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL); 592 /*
959 if (unlikely(!dev)) { 593 * Handle the case of re-adding a watch on an (inode,ih) pair that we
960 ret = -ENOMEM; 594 * are already watching. We just update the mask and return its wd.
961 goto out_free_uid; 595 */
596 old = inode_find_handle(inode, ih);
597 if (unlikely(!old)) {
598 ret = -ENOENT;
599 goto out;
962 } 600 }
963 601
964 filp->f_op = &inotify_fops; 602 if (mask_add)
965 filp->f_vfsmnt = mntget(inotify_mnt); 603 old->mask |= mask;
966 filp->f_dentry = dget(inotify_mnt->mnt_root); 604 else
967 filp->f_mapping = filp->f_dentry->d_inode->i_mapping; 605 old->mask = mask;
968 filp->f_mode = FMODE_READ; 606 ret = old->wd;
969 filp->f_flags = O_RDONLY; 607out:
970 filp->private_data = dev; 608 mutex_unlock(&ih->mutex);
971 609 mutex_unlock(&inode->inotify_mutex);
972 idr_init(&dev->idr);
973 INIT_LIST_HEAD(&dev->events);
974 INIT_LIST_HEAD(&dev->watches);
975 init_waitqueue_head(&dev->wq);
976 mutex_init(&dev->mutex);
977 dev->event_count = 0;
978 dev->queue_size = 0;
979 dev->max_events = inotify_max_queued_events;
980 dev->user = user;
981 dev->last_wd = 0;
982 atomic_set(&dev->count, 0);
983
984 get_inotify_dev(dev);
985 atomic_inc(&user->inotify_devs);
986 fd_install(fd, filp);
987
988 return fd;
989out_free_uid:
990 free_uid(user);
991 put_filp(filp);
992out_put_fd:
993 put_unused_fd(fd);
994 return ret; 610 return ret;
995} 611}
612EXPORT_SYMBOL_GPL(inotify_find_update_watch);
996 613
997asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) 614/**
615 * inotify_add_watch - add a watch to an inotify instance
616 * @ih: inotify handle
617 * @watch: caller allocated watch structure
618 * @inode: inode to watch
619 * @mask: mask of events to watch
620 *
621 * Caller must pin given inode (via nameidata).
622 * Caller must ensure it only calls inotify_add_watch() once per watch.
623 * Calls inotify_handle_get_wd() so may sleep.
624 */
625s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
626 struct inode *inode, u32 mask)
998{ 627{
999 struct inotify_watch *watch, *old; 628 int ret = 0;
1000 struct inode *inode;
1001 struct inotify_device *dev;
1002 struct nameidata nd;
1003 struct file *filp;
1004 int ret, fput_needed;
1005 int mask_add = 0;
1006 unsigned flags = 0;
1007
1008 filp = fget_light(fd, &fput_needed);
1009 if (unlikely(!filp))
1010 return -EBADF;
1011
1012 /* verify that this is indeed an inotify instance */
1013 if (unlikely(filp->f_op != &inotify_fops)) {
1014 ret = -EINVAL;
1015 goto fput_and_out;
1016 }
1017
1018 if (!(mask & IN_DONT_FOLLOW))
1019 flags |= LOOKUP_FOLLOW;
1020 if (mask & IN_ONLYDIR)
1021 flags |= LOOKUP_DIRECTORY;
1022
1023 ret = find_inode(path, &nd, flags);
1024 if (unlikely(ret))
1025 goto fput_and_out;
1026 629
1027 /* inode held in place by reference to nd; dev by fget on fd */ 630 /* don't allow invalid bits: we don't want flags set */
1028 inode = nd.dentry->d_inode; 631 mask &= IN_ALL_EVENTS | IN_ONESHOT;
1029 dev = filp->private_data; 632 if (unlikely(!mask))
633 return -EINVAL;
634 watch->mask = mask;
1030 635
1031 mutex_lock(&inode->inotify_mutex); 636 mutex_lock(&inode->inotify_mutex);
1032 mutex_lock(&dev->mutex); 637 mutex_lock(&ih->mutex);
1033
1034 if (mask & IN_MASK_ADD)
1035 mask_add = 1;
1036 638
1037 /* don't let user-space set invalid bits: we don't want flags set */ 639 /* Initialize a new watch */
1038 mask &= IN_ALL_EVENTS | IN_ONESHOT; 640 ret = inotify_handle_get_wd(ih, watch);
1039 if (unlikely(!mask)) { 641 if (unlikely(ret))
1040 ret = -EINVAL;
1041 goto out; 642 goto out;
1042 } 643 ret = watch->wd;
644
645 /* save a reference to handle and bump the count to make it official */
646 get_inotify_handle(ih);
647 watch->ih = ih;
1043 648
1044 /* 649 /*
1045 * Handle the case of re-adding a watch on an (inode,dev) pair that we 650 * Save a reference to the inode and bump the ref count to make it
1046 * are already watching. We just update the mask and return its wd. 651 * official. We hold a reference to nameidata, which makes this safe.
1047 */ 652 */
1048 old = inode_find_dev(inode, dev); 653 watch->inode = igrab(inode);
1049 if (unlikely(old)) {
1050 if (mask_add)
1051 old->mask |= mask;
1052 else
1053 old->mask = mask;
1054 ret = old->wd;
1055 goto out;
1056 }
1057
1058 watch = create_watch(dev, mask, inode);
1059 if (unlikely(IS_ERR(watch))) {
1060 ret = PTR_ERR(watch);
1061 goto out;
1062 }
1063 654
1064 if (!inotify_inode_watched(inode)) 655 if (!inotify_inode_watched(inode))
1065 set_dentry_child_flags(inode, 1); 656 set_dentry_child_flags(inode, 1);
1066 657
1067 /* Add the watch to the device's and the inode's list */ 658 /* Add the watch to the handle's and the inode's list */
1068 list_add(&watch->d_list, &dev->watches); 659 list_add(&watch->h_list, &ih->watches);
1069 list_add(&watch->i_list, &inode->inotify_watches); 660 list_add(&watch->i_list, &inode->inotify_watches);
1070 ret = watch->wd;
1071out: 661out:
1072 mutex_unlock(&dev->mutex); 662 mutex_unlock(&ih->mutex);
1073 mutex_unlock(&inode->inotify_mutex); 663 mutex_unlock(&inode->inotify_mutex);
1074 path_release(&nd);
1075fput_and_out:
1076 fput_light(filp, fput_needed);
1077 return ret; 664 return ret;
1078} 665}
666EXPORT_SYMBOL_GPL(inotify_add_watch);
1079 667
1080asmlinkage long sys_inotify_rm_watch(int fd, u32 wd) 668/**
669 * inotify_rm_wd - remove a watch from an inotify instance
670 * @ih: inotify handle
671 * @wd: watch descriptor to remove
672 *
673 * Can sleep.
674 */
675int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
1081{ 676{
1082 struct file *filp; 677 struct inotify_watch *watch;
1083 struct inotify_device *dev; 678 struct inode *inode;
1084 int ret, fput_needed;
1085
1086 filp = fget_light(fd, &fput_needed);
1087 if (unlikely(!filp))
1088 return -EBADF;
1089 679
1090 /* verify that this is indeed an inotify instance */ 680 mutex_lock(&ih->mutex);
1091 if (unlikely(filp->f_op != &inotify_fops)) { 681 watch = idr_find(&ih->idr, wd);
1092 ret = -EINVAL; 682 if (unlikely(!watch)) {
1093 goto out; 683 mutex_unlock(&ih->mutex);
684 return -EINVAL;
1094 } 685 }
686 get_inotify_watch(watch);
687 inode = watch->inode;
688 mutex_unlock(&ih->mutex);
1095 689
1096 dev = filp->private_data; 690 mutex_lock(&inode->inotify_mutex);
1097 ret = inotify_ignore(dev, wd); 691 mutex_lock(&ih->mutex);
1098 692
1099out: 693 /* make sure that we did not race */
1100 fput_light(filp, fput_needed); 694 if (likely(idr_find(&ih->idr, wd) == watch))
1101 return ret; 695 inotify_remove_watch_locked(ih, watch);
696
697 mutex_unlock(&ih->mutex);
698 mutex_unlock(&inode->inotify_mutex);
699 put_inotify_watch(watch);
700
701 return 0;
1102} 702}
703EXPORT_SYMBOL_GPL(inotify_rm_wd);
1103 704
1104static struct super_block * 705/**
1105inotify_get_sb(struct file_system_type *fs_type, int flags, 706 * inotify_rm_watch - remove a watch from an inotify instance
1106 const char *dev_name, void *data) 707 * @ih: inotify handle
708 * @watch: watch to remove
709 *
710 * Can sleep.
711 */
712int inotify_rm_watch(struct inotify_handle *ih,
713 struct inotify_watch *watch)
1107{ 714{
1108 return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA); 715 return inotify_rm_wd(ih, watch->wd);
1109} 716}
1110 717EXPORT_SYMBOL_GPL(inotify_rm_watch);
1111static struct file_system_type inotify_fs_type = {
1112 .name = "inotifyfs",
1113 .get_sb = inotify_get_sb,
1114 .kill_sb = kill_anon_super,
1115};
1116 718
1117/* 719/*
1118 * inotify_setup - Our initialization function. Note that we cannnot return 720 * inotify_setup - core initialization function
1119 * error because we have compiled-in VFS hooks. So an (unlikely) failure here
1120 * must result in panic().
1121 */ 721 */
1122static int __init inotify_setup(void) 722static int __init inotify_setup(void)
1123{ 723{
1124 int ret;
1125
1126 ret = register_filesystem(&inotify_fs_type);
1127 if (unlikely(ret))
1128 panic("inotify: register_filesystem returned %d!\n", ret);
1129
1130 inotify_mnt = kern_mount(&inotify_fs_type);
1131 if (IS_ERR(inotify_mnt))
1132 panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
1133
1134 inotify_max_queued_events = 16384;
1135 inotify_max_user_instances = 128;
1136 inotify_max_user_watches = 8192;
1137
1138 atomic_set(&inotify_cookie, 0); 724 atomic_set(&inotify_cookie, 0);
1139 725
1140 watch_cachep = kmem_cache_create("inotify_watch_cache",
1141 sizeof(struct inotify_watch),
1142 0, SLAB_PANIC, NULL, NULL);
1143 event_cachep = kmem_cache_create("inotify_event_cache",
1144 sizeof(struct inotify_kernel_event),
1145 0, SLAB_PANIC, NULL, NULL);
1146
1147 return 0; 726 return 0;
1148} 727}
1149 728
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
new file mode 100644
index 000000000000..f2386442adee
--- /dev/null
+++ b/fs/inotify_user.c
@@ -0,0 +1,719 @@
1/*
2 * fs/inotify_user.c - inotify support for userspace
3 *
4 * Authors:
5 * John McCutchan <ttb@tentacle.dhs.org>
6 * Robert Love <rml@novell.com>
7 *
8 * Copyright (C) 2005 John McCutchan
9 * Copyright 2006 Hewlett-Packard Development Company, L.P.
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation; either version 2, or (at your option) any
14 * later version.
15 *
16 * This program is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 */
21
22#include <linux/kernel.h>
23#include <linux/sched.h>
24#include <linux/slab.h>
25#include <linux/fs.h>
26#include <linux/file.h>
27#include <linux/mount.h>
28#include <linux/namei.h>
29#include <linux/poll.h>
30#include <linux/init.h>
31#include <linux/list.h>
32#include <linux/inotify.h>
33#include <linux/syscalls.h>
34
35#include <asm/ioctls.h>
36
37static kmem_cache_t *watch_cachep __read_mostly;
38static kmem_cache_t *event_cachep __read_mostly;
39
40static struct vfsmount *inotify_mnt __read_mostly;
41
42/* these are configurable via /proc/sys/fs/inotify/ */
43int inotify_max_user_instances __read_mostly;
44int inotify_max_user_watches __read_mostly;
45int inotify_max_queued_events __read_mostly;
46
47/*
48 * Lock ordering:
49 *
50 * inotify_dev->up_mutex (ensures we don't re-add the same watch)
51 * inode->inotify_mutex (protects inode's watch list)
52 * inotify_handle->mutex (protects inotify_handle's watch list)
53 * inotify_dev->ev_mutex (protects device's event queue)
54 */
55
56/*
57 * Lifetimes of the main data structures:
58 *
59 * inotify_device: Lifetime is managed by reference count, from
60 * sys_inotify_init() until release. Additional references can bump the count
61 * via get_inotify_dev() and drop the count via put_inotify_dev().
62 *
63 * inotify_user_watch: Lifetime is from create_watch() to the receipt of an
64 * IN_IGNORED event from inotify, or when using IN_ONESHOT, to receipt of the
65 * first event, or to inotify_destroy().
66 */
67
68/*
69 * struct inotify_device - represents an inotify instance
70 *
71 * This structure is protected by the mutex 'mutex'.
72 */
73struct inotify_device {
74 wait_queue_head_t wq; /* wait queue for i/o */
75 struct mutex ev_mutex; /* protects event queue */
76 struct mutex up_mutex; /* synchronizes watch updates */
77 struct list_head events; /* list of queued events */
78 atomic_t count; /* reference count */
79 struct user_struct *user; /* user who opened this dev */
80 struct inotify_handle *ih; /* inotify handle */
81 unsigned int queue_size; /* size of the queue (bytes) */
82 unsigned int event_count; /* number of pending events */
83 unsigned int max_events; /* maximum number of events */
84};
85
86/*
87 * struct inotify_kernel_event - An inotify event, originating from a watch and
88 * queued for user-space. A list of these is attached to each instance of the
89 * device. In read(), this list is walked and all events that can fit in the
90 * buffer are returned.
91 *
92 * Protected by dev->ev_mutex of the device in which we are queued.
93 */
94struct inotify_kernel_event {
95 struct inotify_event event; /* the user-space event */
96 struct list_head list; /* entry in inotify_device's list */
97 char *name; /* filename, if any */
98};
99
100/*
101 * struct inotify_user_watch - our version of an inotify_watch, we add
102 * a reference to the associated inotify_device.
103 */
104struct inotify_user_watch {
105 struct inotify_device *dev; /* associated device */
106 struct inotify_watch wdata; /* inotify watch data */
107};
108
109#ifdef CONFIG_SYSCTL
110
111#include <linux/sysctl.h>
112
113static int zero;
114
115ctl_table inotify_table[] = {
116 {
117 .ctl_name = INOTIFY_MAX_USER_INSTANCES,
118 .procname = "max_user_instances",
119 .data = &inotify_max_user_instances,
120 .maxlen = sizeof(int),
121 .mode = 0644,
122 .proc_handler = &proc_dointvec_minmax,
123 .strategy = &sysctl_intvec,
124 .extra1 = &zero,
125 },
126 {
127 .ctl_name = INOTIFY_MAX_USER_WATCHES,
128 .procname = "max_user_watches",
129 .data = &inotify_max_user_watches,
130 .maxlen = sizeof(int),
131 .mode = 0644,
132 .proc_handler = &proc_dointvec_minmax,
133 .strategy = &sysctl_intvec,
134 .extra1 = &zero,
135 },
136 {
137 .ctl_name = INOTIFY_MAX_QUEUED_EVENTS,
138 .procname = "max_queued_events",
139 .data = &inotify_max_queued_events,
140 .maxlen = sizeof(int),
141 .mode = 0644,
142 .proc_handler = &proc_dointvec_minmax,
143 .strategy = &sysctl_intvec,
144 .extra1 = &zero
145 },
146 { .ctl_name = 0 }
147};
148#endif /* CONFIG_SYSCTL */
149
150static inline void get_inotify_dev(struct inotify_device *dev)
151{
152 atomic_inc(&dev->count);
153}
154
155static inline void put_inotify_dev(struct inotify_device *dev)
156{
157 if (atomic_dec_and_test(&dev->count)) {
158 atomic_dec(&dev->user->inotify_devs);
159 free_uid(dev->user);
160 kfree(dev);
161 }
162}
163
164/*
165 * free_inotify_user_watch - cleans up the watch and its references
166 */
167static void free_inotify_user_watch(struct inotify_watch *w)
168{
169 struct inotify_user_watch *watch;
170 struct inotify_device *dev;
171
172 watch = container_of(w, struct inotify_user_watch, wdata);
173 dev = watch->dev;
174
175 atomic_dec(&dev->user->inotify_watches);
176 put_inotify_dev(dev);
177 kmem_cache_free(watch_cachep, watch);
178}
179
180/*
181 * kernel_event - create a new kernel event with the given parameters
182 *
183 * This function can sleep.
184 */
185static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
186 const char *name)
187{
188 struct inotify_kernel_event *kevent;
189
190 kevent = kmem_cache_alloc(event_cachep, GFP_KERNEL);
191 if (unlikely(!kevent))
192 return NULL;
193
194 /* we hand this out to user-space, so zero it just in case */
195 memset(&kevent->event, 0, sizeof(struct inotify_event));
196
197 kevent->event.wd = wd;
198 kevent->event.mask = mask;
199 kevent->event.cookie = cookie;
200
201 INIT_LIST_HEAD(&kevent->list);
202
203 if (name) {
204 size_t len, rem, event_size = sizeof(struct inotify_event);
205
206 /*
207 * We need to pad the filename so as to properly align an
208 * array of inotify_event structures. Because the structure is
209 * small and the common case is a small filename, we just round
210 * up to the next multiple of the structure's sizeof. This is
211 * simple and safe for all architectures.
212 */
213 len = strlen(name) + 1;
214 rem = event_size - len;
215 if (len > event_size) {
216 rem = event_size - (len % event_size);
217 if (len % event_size == 0)
218 rem = 0;
219 }
220
221 kevent->name = kmalloc(len + rem, GFP_KERNEL);
222 if (unlikely(!kevent->name)) {
223 kmem_cache_free(event_cachep, kevent);
224 return NULL;
225 }
226 memcpy(kevent->name, name, len);
227 if (rem)
228 memset(kevent->name + len, 0, rem);
229 kevent->event.len = len + rem;
230 } else {
231 kevent->event.len = 0;
232 kevent->name = NULL;
233 }
234
235 return kevent;
236}
237
238/*
239 * inotify_dev_get_event - return the next event in the given dev's queue
240 *
241 * Caller must hold dev->ev_mutex.
242 */
243static inline struct inotify_kernel_event *
244inotify_dev_get_event(struct inotify_device *dev)
245{
246 return list_entry(dev->events.next, struct inotify_kernel_event, list);
247}
248
249/*
250 * inotify_dev_queue_event - event handler registered with core inotify, adds
251 * a new event to the given device
252 *
253 * Can sleep (calls kernel_event()).
254 */
255static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask,
256 u32 cookie, const char *name,
257 struct inode *ignored)
258{
259 struct inotify_user_watch *watch;
260 struct inotify_device *dev;
261 struct inotify_kernel_event *kevent, *last;
262
263 watch = container_of(w, struct inotify_user_watch, wdata);
264 dev = watch->dev;
265
266 mutex_lock(&dev->ev_mutex);
267
268 /* we can safely put the watch as we don't reference it while
269 * generating the event
270 */
271 if (mask & IN_IGNORED || mask & IN_ONESHOT)
272 put_inotify_watch(w); /* final put */
273
274 /* coalescing: drop this event if it is a dupe of the previous */
275 last = inotify_dev_get_event(dev);
276 if (last && last->event.mask == mask && last->event.wd == wd &&
277 last->event.cookie == cookie) {
278 const char *lastname = last->name;
279
280 if (!name && !lastname)
281 goto out;
282 if (name && lastname && !strcmp(lastname, name))
283 goto out;
284 }
285
286 /* the queue overflowed and we already sent the Q_OVERFLOW event */
287 if (unlikely(dev->event_count > dev->max_events))
288 goto out;
289
290 /* if the queue overflows, we need to notify user space */
291 if (unlikely(dev->event_count == dev->max_events))
292 kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL);
293 else
294 kevent = kernel_event(wd, mask, cookie, name);
295
296 if (unlikely(!kevent))
297 goto out;
298
299 /* queue the event and wake up anyone waiting */
300 dev->event_count++;
301 dev->queue_size += sizeof(struct inotify_event) + kevent->event.len;
302 list_add_tail(&kevent->list, &dev->events);
303 wake_up_interruptible(&dev->wq);
304
305out:
306 mutex_unlock(&dev->ev_mutex);
307}
308
309/*
310 * remove_kevent - cleans up and ultimately frees the given kevent
311 *
312 * Caller must hold dev->ev_mutex.
313 */
314static void remove_kevent(struct inotify_device *dev,
315 struct inotify_kernel_event *kevent)
316{
317 list_del(&kevent->list);
318
319 dev->event_count--;
320 dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
321
322 kfree(kevent->name);
323 kmem_cache_free(event_cachep, kevent);
324}
325
326/*
327 * inotify_dev_event_dequeue - destroy an event on the given device
328 *
329 * Caller must hold dev->ev_mutex.
330 */
331static void inotify_dev_event_dequeue(struct inotify_device *dev)
332{
333 if (!list_empty(&dev->events)) {
334 struct inotify_kernel_event *kevent;
335 kevent = inotify_dev_get_event(dev);
336 remove_kevent(dev, kevent);
337 }
338}
339
340/*
341 * find_inode - resolve a user-given path to a specific inode and return a nd
342 */
343static int find_inode(const char __user *dirname, struct nameidata *nd,
344 unsigned flags)
345{
346 int error;
347
348 error = __user_walk(dirname, flags, nd);
349 if (error)
350 return error;
351 /* you can only watch an inode if you have read permissions on it */
352 error = vfs_permission(nd, MAY_READ);
353 if (error)
354 path_release(nd);
355 return error;
356}
357
358/*
359 * create_watch - creates a watch on the given device.
360 *
361 * Callers must hold dev->up_mutex.
362 */
363static int create_watch(struct inotify_device *dev, struct inode *inode,
364 u32 mask)
365{
366 struct inotify_user_watch *watch;
367 int ret;
368
369 if (atomic_read(&dev->user->inotify_watches) >=
370 inotify_max_user_watches)
371 return -ENOSPC;
372
373 watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL);
374 if (unlikely(!watch))
375 return -ENOMEM;
376
377 /* save a reference to device and bump the count to make it official */
378 get_inotify_dev(dev);
379 watch->dev = dev;
380
381 atomic_inc(&dev->user->inotify_watches);
382
383 inotify_init_watch(&watch->wdata);
384 ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask);
385 if (ret < 0)
386 free_inotify_user_watch(&watch->wdata);
387
388 return ret;
389}
390
391/* Device Interface */
392
393static unsigned int inotify_poll(struct file *file, poll_table *wait)
394{
395 struct inotify_device *dev = file->private_data;
396 int ret = 0;
397
398 poll_wait(file, &dev->wq, wait);
399 mutex_lock(&dev->ev_mutex);
400 if (!list_empty(&dev->events))
401 ret = POLLIN | POLLRDNORM;
402 mutex_unlock(&dev->ev_mutex);
403
404 return ret;
405}
406
407static ssize_t inotify_read(struct file *file, char __user *buf,
408 size_t count, loff_t *pos)
409{
410 size_t event_size = sizeof (struct inotify_event);
411 struct inotify_device *dev;
412 char __user *start;
413 int ret;
414 DEFINE_WAIT(wait);
415
416 start = buf;
417 dev = file->private_data;
418
419 while (1) {
420 int events;
421
422 prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
423
424 mutex_lock(&dev->ev_mutex);
425 events = !list_empty(&dev->events);
426 mutex_unlock(&dev->ev_mutex);
427 if (events) {
428 ret = 0;
429 break;
430 }
431
432 if (file->f_flags & O_NONBLOCK) {
433 ret = -EAGAIN;
434 break;
435 }
436
437 if (signal_pending(current)) {
438 ret = -EINTR;
439 break;
440 }
441
442 schedule();
443 }
444
445 finish_wait(&dev->wq, &wait);
446 if (ret)
447 return ret;
448
449 mutex_lock(&dev->ev_mutex);
450 while (1) {
451 struct inotify_kernel_event *kevent;
452
453 ret = buf - start;
454 if (list_empty(&dev->events))
455 break;
456
457 kevent = inotify_dev_get_event(dev);
458 if (event_size + kevent->event.len > count)
459 break;
460
461 if (copy_to_user(buf, &kevent->event, event_size)) {
462 ret = -EFAULT;
463 break;
464 }
465 buf += event_size;
466 count -= event_size;
467
468 if (kevent->name) {
469 if (copy_to_user(buf, kevent->name, kevent->event.len)){
470 ret = -EFAULT;
471 break;
472 }
473 buf += kevent->event.len;
474 count -= kevent->event.len;
475 }
476
477 remove_kevent(dev, kevent);
478 }
479 mutex_unlock(&dev->ev_mutex);
480
481 return ret;
482}
483
484static int inotify_release(struct inode *ignored, struct file *file)
485{
486 struct inotify_device *dev = file->private_data;
487
488 inotify_destroy(dev->ih);
489
490 /* destroy all of the events on this device */
491 mutex_lock(&dev->ev_mutex);
492 while (!list_empty(&dev->events))
493 inotify_dev_event_dequeue(dev);
494 mutex_unlock(&dev->ev_mutex);
495
496 /* free this device: the put matching the get in inotify_init() */
497 put_inotify_dev(dev);
498
499 return 0;
500}
501
502static long inotify_ioctl(struct file *file, unsigned int cmd,
503 unsigned long arg)
504{
505 struct inotify_device *dev;
506 void __user *p;
507 int ret = -ENOTTY;
508
509 dev = file->private_data;
510 p = (void __user *) arg;
511
512 switch (cmd) {
513 case FIONREAD:
514 ret = put_user(dev->queue_size, (int __user *) p);
515 break;
516 }
517
518 return ret;
519}
520
521static const struct file_operations inotify_fops = {
522 .poll = inotify_poll,
523 .read = inotify_read,
524 .release = inotify_release,
525 .unlocked_ioctl = inotify_ioctl,
526 .compat_ioctl = inotify_ioctl,
527};
528
529static const struct inotify_operations inotify_user_ops = {
530 .handle_event = inotify_dev_queue_event,
531 .destroy_watch = free_inotify_user_watch,
532};
533
534asmlinkage long sys_inotify_init(void)
535{
536 struct inotify_device *dev;
537 struct inotify_handle *ih;
538 struct user_struct *user;
539 struct file *filp;
540 int fd, ret;
541
542 fd = get_unused_fd();
543 if (fd < 0)
544 return fd;
545
546 filp = get_empty_filp();
547 if (!filp) {
548 ret = -ENFILE;
549 goto out_put_fd;
550 }
551
552 user = get_uid(current->user);
553 if (unlikely(atomic_read(&user->inotify_devs) >=
554 inotify_max_user_instances)) {
555 ret = -EMFILE;
556 goto out_free_uid;
557 }
558
559 dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
560 if (unlikely(!dev)) {
561 ret = -ENOMEM;
562 goto out_free_uid;
563 }
564
565 ih = inotify_init(&inotify_user_ops);
566 if (unlikely(IS_ERR(ih))) {
567 ret = PTR_ERR(ih);
568 goto out_free_dev;
569 }
570 dev->ih = ih;
571
572 filp->f_op = &inotify_fops;
573 filp->f_vfsmnt = mntget(inotify_mnt);
574 filp->f_dentry = dget(inotify_mnt->mnt_root);
575 filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
576 filp->f_mode = FMODE_READ;
577 filp->f_flags = O_RDONLY;
578 filp->private_data = dev;
579
580 INIT_LIST_HEAD(&dev->events);
581 init_waitqueue_head(&dev->wq);
582 mutex_init(&dev->ev_mutex);
583 mutex_init(&dev->up_mutex);
584 dev->event_count = 0;
585 dev->queue_size = 0;
586 dev->max_events = inotify_max_queued_events;
587 dev->user = user;
588 atomic_set(&dev->count, 0);
589
590 get_inotify_dev(dev);
591 atomic_inc(&user->inotify_devs);
592 fd_install(fd, filp);
593
594 return fd;
595out_free_dev:
596 kfree(dev);
597out_free_uid:
598 free_uid(user);
599 put_filp(filp);
600out_put_fd:
601 put_unused_fd(fd);
602 return ret;
603}
604
605asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
606{
607 struct inode *inode;
608 struct inotify_device *dev;
609 struct nameidata nd;
610 struct file *filp;
611 int ret, fput_needed;
612 unsigned flags = 0;
613
614 filp = fget_light(fd, &fput_needed);
615 if (unlikely(!filp))
616 return -EBADF;
617
618 /* verify that this is indeed an inotify instance */
619 if (unlikely(filp->f_op != &inotify_fops)) {
620 ret = -EINVAL;
621 goto fput_and_out;
622 }
623
624 if (!(mask & IN_DONT_FOLLOW))
625 flags |= LOOKUP_FOLLOW;
626 if (mask & IN_ONLYDIR)
627 flags |= LOOKUP_DIRECTORY;
628
629 ret = find_inode(path, &nd, flags);
630 if (unlikely(ret))
631 goto fput_and_out;
632
633 /* inode held in place by reference to nd; dev by fget on fd */
634 inode = nd.dentry->d_inode;
635 dev = filp->private_data;
636
637 mutex_lock(&dev->up_mutex);
638 ret = inotify_find_update_watch(dev->ih, inode, mask);
639 if (ret == -ENOENT)
640 ret = create_watch(dev, inode, mask);
641 mutex_unlock(&dev->up_mutex);
642
643 path_release(&nd);
644fput_and_out:
645 fput_light(filp, fput_needed);
646 return ret;
647}
648
649asmlinkage long sys_inotify_rm_watch(int fd, u32 wd)
650{
651 struct file *filp;
652 struct inotify_device *dev;
653 int ret, fput_needed;
654
655 filp = fget_light(fd, &fput_needed);
656 if (unlikely(!filp))
657 return -EBADF;
658
659 /* verify that this is indeed an inotify instance */
660 if (unlikely(filp->f_op != &inotify_fops)) {
661 ret = -EINVAL;
662 goto out;
663 }
664
665 dev = filp->private_data;
666
667 /* we free our watch data when we get IN_IGNORED */
668 ret = inotify_rm_wd(dev->ih, wd);
669
670out:
671 fput_light(filp, fput_needed);
672 return ret;
673}
674
675static int
676inotify_get_sb(struct file_system_type *fs_type, int flags,
677 const char *dev_name, void *data, struct vfsmount *mnt)
678{
679 return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA, mnt);
680}
681
682static struct file_system_type inotify_fs_type = {
683 .name = "inotifyfs",
684 .get_sb = inotify_get_sb,
685 .kill_sb = kill_anon_super,
686};
687
688/*
689 * inotify_user_setup - Our initialization function. Note that we cannnot return
690 * error because we have compiled-in VFS hooks. So an (unlikely) failure here
691 * must result in panic().
692 */
693static int __init inotify_user_setup(void)
694{
695 int ret;
696
697 ret = register_filesystem(&inotify_fs_type);
698 if (unlikely(ret))
699 panic("inotify: register_filesystem returned %d!\n", ret);
700
701 inotify_mnt = kern_mount(&inotify_fs_type);
702 if (IS_ERR(inotify_mnt))
703 panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
704
705 inotify_max_queued_events = 16384;
706 inotify_max_user_instances = 128;
707 inotify_max_user_watches = 8192;
708
709 watch_cachep = kmem_cache_create("inotify_watch_cache",
710 sizeof(struct inotify_user_watch),
711 0, SLAB_PANIC, NULL, NULL);
712 event_cachep = kmem_cache_create("inotify_event_cache",
713 sizeof(struct inotify_kernel_event),
714 0, SLAB_PANIC, NULL, NULL);
715
716 return 0;
717}
718
719module_init(inotify_user_setup);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index f8aeec3ca10c..4b7660b09ac0 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -4,7 +4,6 @@
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */ 5 */
6 6
7#include <linux/config.h>
8#include <linux/syscalls.h> 7#include <linux/syscalls.h>
9#include <linux/mm.h> 8#include <linux/mm.h>
10#include <linux/smp_lock.h> 9#include <linux/smp_lock.h>
diff --git a/fs/ioprio.c b/fs/ioprio.c
index ca77008146c0..93aa5715f224 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -24,15 +24,21 @@
24#include <linux/blkdev.h> 24#include <linux/blkdev.h>
25#include <linux/capability.h> 25#include <linux/capability.h>
26#include <linux/syscalls.h> 26#include <linux/syscalls.h>
27#include <linux/security.h>
27 28
28static int set_task_ioprio(struct task_struct *task, int ioprio) 29static int set_task_ioprio(struct task_struct *task, int ioprio)
29{ 30{
31 int err;
30 struct io_context *ioc; 32 struct io_context *ioc;
31 33
32 if (task->uid != current->euid && 34 if (task->uid != current->euid &&
33 task->uid != current->uid && !capable(CAP_SYS_NICE)) 35 task->uid != current->uid && !capable(CAP_SYS_NICE))
34 return -EPERM; 36 return -EPERM;
35 37
38 err = security_task_setioprio(task, ioprio);
39 if (err)
40 return err;
41
36 task_lock(task); 42 task_lock(task);
37 43
38 task->ioprio = ioprio; 44 task->ioprio = ioprio;
@@ -119,11 +125,24 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
119 return ret; 125 return ret;
120} 126}
121 127
128static int get_task_ioprio(struct task_struct *p)
129{
130 int ret;
131
132 ret = security_task_getioprio(p);
133 if (ret)
134 goto out;
135 ret = p->ioprio;
136out:
137 return ret;
138}
139
122asmlinkage long sys_ioprio_get(int which, int who) 140asmlinkage long sys_ioprio_get(int which, int who)
123{ 141{
124 struct task_struct *g, *p; 142 struct task_struct *g, *p;
125 struct user_struct *user; 143 struct user_struct *user;
126 int ret = -ESRCH; 144 int ret = -ESRCH;
145 int tmpio;
127 146
128 read_lock_irq(&tasklist_lock); 147 read_lock_irq(&tasklist_lock);
129 switch (which) { 148 switch (which) {
@@ -133,16 +152,19 @@ asmlinkage long sys_ioprio_get(int which, int who)
133 else 152 else
134 p = find_task_by_pid(who); 153 p = find_task_by_pid(who);
135 if (p) 154 if (p)
136 ret = p->ioprio; 155 ret = get_task_ioprio(p);
137 break; 156 break;
138 case IOPRIO_WHO_PGRP: 157 case IOPRIO_WHO_PGRP:
139 if (!who) 158 if (!who)
140 who = process_group(current); 159 who = process_group(current);
141 do_each_task_pid(who, PIDTYPE_PGID, p) { 160 do_each_task_pid(who, PIDTYPE_PGID, p) {
161 tmpio = get_task_ioprio(p);
162 if (tmpio < 0)
163 continue;
142 if (ret == -ESRCH) 164 if (ret == -ESRCH)
143 ret = p->ioprio; 165 ret = tmpio;
144 else 166 else
145 ret = ioprio_best(ret, p->ioprio); 167 ret = ioprio_best(ret, tmpio);
146 } while_each_task_pid(who, PIDTYPE_PGID, p); 168 } while_each_task_pid(who, PIDTYPE_PGID, p);
147 break; 169 break;
148 case IOPRIO_WHO_USER: 170 case IOPRIO_WHO_USER:
@@ -157,10 +179,13 @@ asmlinkage long sys_ioprio_get(int which, int who)
157 do_each_thread(g, p) { 179 do_each_thread(g, p) {
158 if (p->uid != user->uid) 180 if (p->uid != user->uid)
159 continue; 181 continue;
182 tmpio = get_task_ioprio(p);
183 if (tmpio < 0)
184 continue;
160 if (ret == -ESRCH) 185 if (ret == -ESRCH)
161 ret = p->ioprio; 186 ret = tmpio;
162 else 187 else
163 ret = ioprio_best(ret, p->ioprio); 188 ret = ioprio_best(ret, tmpio);
164 } while_each_thread(g, p); 189 } while_each_thread(g, p);
165 190
166 if (who) 191 if (who)
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 4917315db732..731816332b12 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -16,7 +16,6 @@
16 * Transparent decompression of files on an iso9660 filesystem 16 * Transparent decompression of files on an iso9660 filesystem
17 */ 17 */
18 18
19#include <linux/config.h>
20#include <linux/module.h> 19#include <linux/module.h>
21#include <linux/init.h> 20#include <linux/init.h>
22 21
@@ -312,7 +311,7 @@ eio:
312 return err; 311 return err;
313} 312}
314 313
315struct address_space_operations zisofs_aops = { 314const struct address_space_operations zisofs_aops = {
316 .readpage = zisofs_readpage, 315 .readpage = zisofs_readpage,
317 /* No sync_page operation supported? */ 316 /* No sync_page operation supported? */
318 /* No bmap operation supported */ 317 /* No bmap operation supported */
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 5440ea292c69..27e276987fd2 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -10,7 +10,6 @@
10 * 10 *
11 * isofs directory handling functions 11 * isofs directory handling functions
12 */ 12 */
13#include <linux/config.h>
14#include <linux/smp_lock.h> 13#include <linux/smp_lock.h>
15#include "isofs.h" 14#include "isofs.h"
16 15
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 70adbb98bad1..14391361c886 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -11,7 +11,6 @@
11 * 2004 Paul Serice - NFS Export Operations 11 * 2004 Paul Serice - NFS Export Operations
12 */ 12 */
13 13
14#include <linux/config.h>
15#include <linux/init.h> 14#include <linux/init.h>
16#include <linux/module.h> 15#include <linux/module.h>
17 16
@@ -56,7 +55,7 @@ static void isofs_put_super(struct super_block *sb)
56} 55}
57 56
58static void isofs_read_inode(struct inode *); 57static void isofs_read_inode(struct inode *);
59static int isofs_statfs (struct super_block *, struct kstatfs *); 58static int isofs_statfs (struct dentry *, struct kstatfs *);
60 59
61static kmem_cache_t *isofs_inode_cachep; 60static kmem_cache_t *isofs_inode_cachep;
62 61
@@ -901,8 +900,10 @@ out_freesbi:
901 return -EINVAL; 900 return -EINVAL;
902} 901}
903 902
904static int isofs_statfs (struct super_block *sb, struct kstatfs *buf) 903static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf)
905{ 904{
905 struct super_block *sb = dentry->d_sb;
906
906 buf->f_type = ISOFS_SUPER_MAGIC; 907 buf->f_type = ISOFS_SUPER_MAGIC;
907 buf->f_bsize = sb->s_blocksize; 908 buf->f_bsize = sb->s_blocksize;
908 buf->f_blocks = (ISOFS_SB(sb)->s_nzones 909 buf->f_blocks = (ISOFS_SB(sb)->s_nzones
@@ -1052,7 +1053,7 @@ static sector_t _isofs_bmap(struct address_space *mapping, sector_t block)
1052 return generic_block_bmap(mapping,block,isofs_get_block); 1053 return generic_block_bmap(mapping,block,isofs_get_block);
1053} 1054}
1054 1055
1055static struct address_space_operations isofs_aops = { 1056static const struct address_space_operations isofs_aops = {
1056 .readpage = isofs_readpage, 1057 .readpage = isofs_readpage,
1057 .sync_page = block_sync_page, 1058 .sync_page = block_sync_page,
1058 .bmap = _isofs_bmap 1059 .bmap = _isofs_bmap
@@ -1399,10 +1400,11 @@ struct inode *isofs_iget(struct super_block *sb,
1399 return inode; 1400 return inode;
1400} 1401}
1401 1402
1402static struct super_block *isofs_get_sb(struct file_system_type *fs_type, 1403static int isofs_get_sb(struct file_system_type *fs_type,
1403 int flags, const char *dev_name, void *data) 1404 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
1404{ 1405{
1405 return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super); 1406 return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super,
1407 mnt);
1406} 1408}
1407 1409
1408static struct file_system_type iso9660_fs_type = { 1410static struct file_system_type iso9660_fs_type = {
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index b87ba066f5e7..e6308c8b5735 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -176,5 +176,5 @@ isofs_normalize_block_and_offset(struct iso_directory_record* de,
176 176
177extern struct inode_operations isofs_dir_inode_operations; 177extern struct inode_operations isofs_dir_inode_operations;
178extern const struct file_operations isofs_dir_operations; 178extern const struct file_operations isofs_dir_operations;
179extern struct address_space_operations isofs_symlink_aops; 179extern const struct address_space_operations isofs_symlink_aops;
180extern struct export_operations isofs_export_ops; 180extern struct export_operations isofs_export_ops;
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 4326cb47f8fa..f3a1db3098de 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -754,6 +754,6 @@ error:
754 return -EIO; 754 return -EIO;
755} 755}
756 756
757struct address_space_operations isofs_symlink_aops = { 757const struct address_space_operations isofs_symlink_aops = {
758 .readpage = rock_ridge_symlink_readpage 758 .readpage = rock_ridge_symlink_readpage
759}; 759};
diff --git a/fs/isofs/zisofs.h b/fs/isofs/zisofs.h
index d78485d101c2..273795709155 100644
--- a/fs/isofs/zisofs.h
+++ b/fs/isofs/zisofs.h
@@ -15,7 +15,7 @@
15 */ 15 */
16 16
17#ifdef CONFIG_ZISOFS 17#ifdef CONFIG_ZISOFS
18extern struct address_space_operations zisofs_aops; 18extern const struct address_space_operations zisofs_aops;
19extern int __init zisofs_init(void); 19extern int __init zisofs_init(void);
20extern void zisofs_cleanup(void); 20extern void zisofs_cleanup(void);
21#endif 21#endif
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 3f5102b069db..47678a26c13b 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -24,29 +24,67 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25 25
26/* 26/*
27 * Unlink a buffer from a transaction. 27 * Unlink a buffer from a transaction checkpoint list.
28 * 28 *
29 * Called with j_list_lock held. 29 * Called with j_list_lock held.
30 */ 30 */
31 31static inline void __buffer_unlink_first(struct journal_head *jh)
32static inline void __buffer_unlink(struct journal_head *jh)
33{ 32{
34 transaction_t *transaction; 33 transaction_t *transaction = jh->b_cp_transaction;
35
36 transaction = jh->b_cp_transaction;
37 jh->b_cp_transaction = NULL;
38 34
39 jh->b_cpnext->b_cpprev = jh->b_cpprev; 35 jh->b_cpnext->b_cpprev = jh->b_cpprev;
40 jh->b_cpprev->b_cpnext = jh->b_cpnext; 36 jh->b_cpprev->b_cpnext = jh->b_cpnext;
41 if (transaction->t_checkpoint_list == jh) 37 if (transaction->t_checkpoint_list == jh) {
42 transaction->t_checkpoint_list = jh->b_cpnext; 38 transaction->t_checkpoint_list = jh->b_cpnext;
43 if (transaction->t_checkpoint_list == jh) 39 if (transaction->t_checkpoint_list == jh)
44 transaction->t_checkpoint_list = NULL; 40 transaction->t_checkpoint_list = NULL;
41 }
42}
43
44/*
45 * Unlink a buffer from a transaction checkpoint(io) list.
46 *
47 * Called with j_list_lock held.
48 */
49static inline void __buffer_unlink(struct journal_head *jh)
50{
51 transaction_t *transaction = jh->b_cp_transaction;
52
53 __buffer_unlink_first(jh);
54 if (transaction->t_checkpoint_io_list == jh) {
55 transaction->t_checkpoint_io_list = jh->b_cpnext;
56 if (transaction->t_checkpoint_io_list == jh)
57 transaction->t_checkpoint_io_list = NULL;
58 }
59}
60
61/*
62 * Move a buffer from the checkpoint list to the checkpoint io list
63 *
64 * Called with j_list_lock held
65 */
66static inline void __buffer_relink_io(struct journal_head *jh)
67{
68 transaction_t *transaction = jh->b_cp_transaction;
69
70 __buffer_unlink_first(jh);
71
72 if (!transaction->t_checkpoint_io_list) {
73 jh->b_cpnext = jh->b_cpprev = jh;
74 } else {
75 jh->b_cpnext = transaction->t_checkpoint_io_list;
76 jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
77 jh->b_cpprev->b_cpnext = jh;
78 jh->b_cpnext->b_cpprev = jh;
79 }
80 transaction->t_checkpoint_io_list = jh;
45} 81}
46 82
47/* 83/*
48 * Try to release a checkpointed buffer from its transaction. 84 * Try to release a checkpointed buffer from its transaction.
49 * Returns 1 if we released it. 85 * Returns 1 if we released it and 2 if we also released the
86 * whole transaction.
87 *
50 * Requires j_list_lock 88 * Requires j_list_lock
51 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 89 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
52 */ 90 */
@@ -57,12 +95,11 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
57 95
58 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { 96 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
59 JBUFFER_TRACE(jh, "remove from checkpoint list"); 97 JBUFFER_TRACE(jh, "remove from checkpoint list");
60 __journal_remove_checkpoint(jh); 98 ret = __journal_remove_checkpoint(jh) + 1;
61 jbd_unlock_bh_state(bh); 99 jbd_unlock_bh_state(bh);
62 journal_remove_journal_head(bh); 100 journal_remove_journal_head(bh);
63 BUFFER_TRACE(bh, "release"); 101 BUFFER_TRACE(bh, "release");
64 __brelse(bh); 102 __brelse(bh);
65 ret = 1;
66 } else { 103 } else {
67 jbd_unlock_bh_state(bh); 104 jbd_unlock_bh_state(bh);
68 } 105 }
@@ -117,83 +154,54 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
117} 154}
118 155
119/* 156/*
120 * Clean up a transaction's checkpoint list. 157 * Clean up transaction's list of buffers submitted for io.
121 * 158 * We wait for any pending IO to complete and remove any clean
122 * We wait for any pending IO to complete and make sure any clean 159 * buffers. Note that we take the buffers in the opposite ordering
123 * buffers are removed from the transaction. 160 * from the one in which they were submitted for IO.
124 *
125 * Return 1 if we performed any actions which might have destroyed the
126 * checkpoint. (journal_remove_checkpoint() deletes the transaction when
127 * the last checkpoint buffer is cleansed)
128 * 161 *
129 * Called with j_list_lock held. 162 * Called with j_list_lock held.
130 */ 163 */
131static int __cleanup_transaction(journal_t *journal, transaction_t *transaction) 164static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
132{ 165{
133 struct journal_head *jh, *next_jh, *last_jh; 166 struct journal_head *jh;
134 struct buffer_head *bh; 167 struct buffer_head *bh;
135 int ret = 0; 168 tid_t this_tid;
136 169 int released = 0;
137 assert_spin_locked(&journal->j_list_lock); 170
138 jh = transaction->t_checkpoint_list; 171 this_tid = transaction->t_tid;
139 if (!jh) 172restart:
140 return 0; 173 /* Did somebody clean up the transaction in the meanwhile? */
141 174 if (journal->j_checkpoint_transactions != transaction ||
142 last_jh = jh->b_cpprev; 175 transaction->t_tid != this_tid)
143 next_jh = jh; 176 return;
144 do { 177 while (!released && transaction->t_checkpoint_io_list) {
145 jh = next_jh; 178 jh = transaction->t_checkpoint_io_list;
146 bh = jh2bh(jh); 179 bh = jh2bh(jh);
180 if (!jbd_trylock_bh_state(bh)) {
181 jbd_sync_bh(journal, bh);
182 spin_lock(&journal->j_list_lock);
183 goto restart;
184 }
147 if (buffer_locked(bh)) { 185 if (buffer_locked(bh)) {
148 atomic_inc(&bh->b_count); 186 atomic_inc(&bh->b_count);
149 spin_unlock(&journal->j_list_lock); 187 spin_unlock(&journal->j_list_lock);
188 jbd_unlock_bh_state(bh);
150 wait_on_buffer(bh); 189 wait_on_buffer(bh);
151 /* the journal_head may have gone by now */ 190 /* the journal_head may have gone by now */
152 BUFFER_TRACE(bh, "brelse"); 191 BUFFER_TRACE(bh, "brelse");
153 __brelse(bh); 192 __brelse(bh);
154 goto out_return_1; 193 spin_lock(&journal->j_list_lock);
194 goto restart;
155 } 195 }
156
157 /* 196 /*
158 * This is foul 197 * Now in whatever state the buffer currently is, we know that
198 * it has been written out and so we can drop it from the list
159 */ 199 */
160 if (!jbd_trylock_bh_state(bh)) { 200 released = __journal_remove_checkpoint(jh);
161 jbd_sync_bh(journal, bh); 201 jbd_unlock_bh_state(bh);
162 goto out_return_1; 202 journal_remove_journal_head(bh);
163 } 203 __brelse(bh);
164 204 }
165 if (jh->b_transaction != NULL) {
166 transaction_t *t = jh->b_transaction;
167 tid_t tid = t->t_tid;
168
169 spin_unlock(&journal->j_list_lock);
170 jbd_unlock_bh_state(bh);
171 log_start_commit(journal, tid);
172 log_wait_commit(journal, tid);
173 goto out_return_1;
174 }
175
176 /*
177 * AKPM: I think the buffer_jbddirty test is redundant - it
178 * shouldn't have NULL b_transaction?
179 */
180 next_jh = jh->b_cpnext;
181 if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) {
182 BUFFER_TRACE(bh, "remove from checkpoint");
183 __journal_remove_checkpoint(jh);
184 jbd_unlock_bh_state(bh);
185 journal_remove_journal_head(bh);
186 __brelse(bh);
187 ret = 1;
188 } else {
189 jbd_unlock_bh_state(bh);
190 }
191 } while (jh != last_jh);
192
193 return ret;
194out_return_1:
195 spin_lock(&journal->j_list_lock);
196 return 1;
197} 205}
198 206
199#define NR_BATCH 64 207#define NR_BATCH 64
@@ -203,9 +211,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
203{ 211{
204 int i; 212 int i;
205 213
206 spin_unlock(&journal->j_list_lock);
207 ll_rw_block(SWRITE, *batch_count, bhs); 214 ll_rw_block(SWRITE, *batch_count, bhs);
208 spin_lock(&journal->j_list_lock);
209 for (i = 0; i < *batch_count; i++) { 215 for (i = 0; i < *batch_count; i++) {
210 struct buffer_head *bh = bhs[i]; 216 struct buffer_head *bh = bhs[i];
211 clear_buffer_jwrite(bh); 217 clear_buffer_jwrite(bh);
@@ -221,19 +227,43 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
221 * Return 1 if something happened which requires us to abort the current 227 * Return 1 if something happened which requires us to abort the current
222 * scan of the checkpoint list. 228 * scan of the checkpoint list.
223 * 229 *
224 * Called with j_list_lock held. 230 * Called with j_list_lock held and drops it if 1 is returned
225 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 231 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
226 */ 232 */
227static int __flush_buffer(journal_t *journal, struct journal_head *jh, 233static int __process_buffer(journal_t *journal, struct journal_head *jh,
228 struct buffer_head **bhs, int *batch_count, 234 struct buffer_head **bhs, int *batch_count)
229 int *drop_count)
230{ 235{
231 struct buffer_head *bh = jh2bh(jh); 236 struct buffer_head *bh = jh2bh(jh);
232 int ret = 0; 237 int ret = 0;
233 238
234 if (buffer_dirty(bh) && !buffer_locked(bh) && jh->b_jlist == BJ_None) { 239 if (buffer_locked(bh)) {
235 J_ASSERT_JH(jh, jh->b_transaction == NULL); 240 atomic_inc(&bh->b_count);
241 spin_unlock(&journal->j_list_lock);
242 jbd_unlock_bh_state(bh);
243 wait_on_buffer(bh);
244 /* the journal_head may have gone by now */
245 BUFFER_TRACE(bh, "brelse");
246 __brelse(bh);
247 ret = 1;
248 } else if (jh->b_transaction != NULL) {
249 transaction_t *t = jh->b_transaction;
250 tid_t tid = t->t_tid;
236 251
252 spin_unlock(&journal->j_list_lock);
253 jbd_unlock_bh_state(bh);
254 log_start_commit(journal, tid);
255 log_wait_commit(journal, tid);
256 ret = 1;
257 } else if (!buffer_dirty(bh)) {
258 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
259 BUFFER_TRACE(bh, "remove from checkpoint");
260 __journal_remove_checkpoint(jh);
261 spin_unlock(&journal->j_list_lock);
262 jbd_unlock_bh_state(bh);
263 journal_remove_journal_head(bh);
264 __brelse(bh);
265 ret = 1;
266 } else {
237 /* 267 /*
238 * Important: we are about to write the buffer, and 268 * Important: we are about to write the buffer, and
239 * possibly block, while still holding the journal lock. 269 * possibly block, while still holding the journal lock.
@@ -246,45 +276,30 @@ static int __flush_buffer(journal_t *journal, struct journal_head *jh,
246 J_ASSERT_BH(bh, !buffer_jwrite(bh)); 276 J_ASSERT_BH(bh, !buffer_jwrite(bh));
247 set_buffer_jwrite(bh); 277 set_buffer_jwrite(bh);
248 bhs[*batch_count] = bh; 278 bhs[*batch_count] = bh;
279 __buffer_relink_io(jh);
249 jbd_unlock_bh_state(bh); 280 jbd_unlock_bh_state(bh);
250 (*batch_count)++; 281 (*batch_count)++;
251 if (*batch_count == NR_BATCH) { 282 if (*batch_count == NR_BATCH) {
283 spin_unlock(&journal->j_list_lock);
252 __flush_batch(journal, bhs, batch_count); 284 __flush_batch(journal, bhs, batch_count);
253 ret = 1; 285 ret = 1;
254 } 286 }
255 } else {
256 int last_buffer = 0;
257 if (jh->b_cpnext == jh) {
258 /* We may be about to drop the transaction. Tell the
259 * caller that the lists have changed.
260 */
261 last_buffer = 1;
262 }
263 if (__try_to_free_cp_buf(jh)) {
264 (*drop_count)++;
265 ret = last_buffer;
266 }
267 } 287 }
268 return ret; 288 return ret;
269} 289}
270 290
271/* 291/*
272 * Perform an actual checkpoint. We don't write out only enough to 292 * Perform an actual checkpoint. We take the first transaction on the
273 * satisfy the current blocked requests: rather we submit a reasonably 293 * list of transactions to be checkpointed and send all its buffers
274 * sized chunk of the outstanding data to disk at once for 294 * to disk. We submit larger chunks of data at once.
275 * efficiency. __log_wait_for_space() will retry if we didn't free enough.
276 * 295 *
277 * However, we _do_ take into account the amount requested so that once
278 * the IO has been queued, we can return as soon as enough of it has
279 * completed to disk.
280 *
281 * The journal should be locked before calling this function. 296 * The journal should be locked before calling this function.
282 */ 297 */
283int log_do_checkpoint(journal_t *journal) 298int log_do_checkpoint(journal_t *journal)
284{ 299{
300 transaction_t *transaction;
301 tid_t this_tid;
285 int result; 302 int result;
286 int batch_count = 0;
287 struct buffer_head *bhs[NR_BATCH];
288 303
289 jbd_debug(1, "Start checkpoint\n"); 304 jbd_debug(1, "Start checkpoint\n");
290 305
@@ -299,79 +314,68 @@ int log_do_checkpoint(journal_t *journal)
299 return result; 314 return result;
300 315
301 /* 316 /*
302 * OK, we need to start writing disk blocks. Try to free up a 317 * OK, we need to start writing disk blocks. Take one transaction
303 * quarter of the log in a single checkpoint if we can. 318 * and write it.
304 */ 319 */
320 spin_lock(&journal->j_list_lock);
321 if (!journal->j_checkpoint_transactions)
322 goto out;
323 transaction = journal->j_checkpoint_transactions;
324 this_tid = transaction->t_tid;
325restart:
305 /* 326 /*
306 * AKPM: check this code. I had a feeling a while back that it 327 * If someone cleaned up this transaction while we slept, we're
307 * degenerates into a busy loop at unmount time. 328 * done (maybe it's a new transaction, but it fell at the same
329 * address).
308 */ 330 */
309 spin_lock(&journal->j_list_lock); 331 if (journal->j_checkpoint_transactions == transaction &&
310 while (journal->j_checkpoint_transactions) { 332 transaction->t_tid == this_tid) {
311 transaction_t *transaction; 333 int batch_count = 0;
312 struct journal_head *jh, *last_jh, *next_jh; 334 struct buffer_head *bhs[NR_BATCH];
313 int drop_count = 0; 335 struct journal_head *jh;
314 int cleanup_ret, retry = 0; 336 int retry = 0;
315 tid_t this_tid; 337
316 338 while (!retry && transaction->t_checkpoint_list) {
317 transaction = journal->j_checkpoint_transactions;
318 this_tid = transaction->t_tid;
319 jh = transaction->t_checkpoint_list;
320 last_jh = jh->b_cpprev;
321 next_jh = jh;
322 do {
323 struct buffer_head *bh; 339 struct buffer_head *bh;
324 340
325 jh = next_jh; 341 jh = transaction->t_checkpoint_list;
326 next_jh = jh->b_cpnext;
327 bh = jh2bh(jh); 342 bh = jh2bh(jh);
328 if (!jbd_trylock_bh_state(bh)) { 343 if (!jbd_trylock_bh_state(bh)) {
329 jbd_sync_bh(journal, bh); 344 jbd_sync_bh(journal, bh);
330 spin_lock(&journal->j_list_lock);
331 retry = 1; 345 retry = 1;
332 break; 346 break;
333 } 347 }
334 retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count); 348 retry = __process_buffer(journal, jh, bhs,&batch_count);
335 if (cond_resched_lock(&journal->j_list_lock)) { 349 if (!retry && lock_need_resched(&journal->j_list_lock)){
350 spin_unlock(&journal->j_list_lock);
336 retry = 1; 351 retry = 1;
337 break; 352 break;
338 } 353 }
339 } while (jh != last_jh && !retry); 354 }
340 355
341 if (batch_count) { 356 if (batch_count) {
357 if (!retry) {
358 spin_unlock(&journal->j_list_lock);
359 retry = 1;
360 }
342 __flush_batch(journal, bhs, &batch_count); 361 __flush_batch(journal, bhs, &batch_count);
343 retry = 1;
344 } 362 }
345 363
364 if (retry) {
365 spin_lock(&journal->j_list_lock);
366 goto restart;
367 }
346 /* 368 /*
347 * If someone cleaned up this transaction while we slept, we're 369 * Now we have cleaned up the first transaction's checkpoint
348 * done 370 * list. Let's clean up the second one
349 */
350 if (journal->j_checkpoint_transactions != transaction)
351 break;
352 if (retry)
353 continue;
354 /*
355 * Maybe it's a new transaction, but it fell at the same
356 * address
357 */
358 if (transaction->t_tid != this_tid)
359 continue;
360 /*
361 * We have walked the whole transaction list without
362 * finding anything to write to disk. We had better be
363 * able to make some progress or we are in trouble.
364 */ 371 */
365 cleanup_ret = __cleanup_transaction(journal, transaction); 372 __wait_cp_io(journal, transaction);
366 J_ASSERT(drop_count != 0 || cleanup_ret != 0);
367 if (journal->j_checkpoint_transactions != transaction)
368 break;
369 } 373 }
374out:
370 spin_unlock(&journal->j_list_lock); 375 spin_unlock(&journal->j_list_lock);
371 result = cleanup_journal_tail(journal); 376 result = cleanup_journal_tail(journal);
372 if (result < 0) 377 if (result < 0)
373 return result; 378 return result;
374
375 return 0; 379 return 0;
376} 380}
377 381
@@ -456,52 +460,98 @@ int cleanup_journal_tail(journal_t *journal)
456/* Checkpoint list management */ 460/* Checkpoint list management */
457 461
458/* 462/*
463 * journal_clean_one_cp_list
464 *
465 * Find all the written-back checkpoint buffers in the given list and release them.
466 *
467 * Called with the journal locked.
468 * Called with j_list_lock held.
469 * Returns number of bufers reaped (for debug)
470 */
471
472static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
473{
474 struct journal_head *last_jh;
475 struct journal_head *next_jh = jh;
476 int ret, freed = 0;
477
478 *released = 0;
479 if (!jh)
480 return 0;
481
482 last_jh = jh->b_cpprev;
483 do {
484 jh = next_jh;
485 next_jh = jh->b_cpnext;
486 /* Use trylock because of the ranking */
487 if (jbd_trylock_bh_state(jh2bh(jh))) {
488 ret = __try_to_free_cp_buf(jh);
489 if (ret) {
490 freed++;
491 if (ret == 2) {
492 *released = 1;
493 return freed;
494 }
495 }
496 }
497 /*
498 * This function only frees up some memory
499 * if possible so we dont have an obligation
500 * to finish processing. Bail out if preemption
501 * requested:
502 */
503 if (need_resched())
504 return freed;
505 } while (jh != last_jh);
506
507 return freed;
508}
509
510/*
459 * journal_clean_checkpoint_list 511 * journal_clean_checkpoint_list
460 * 512 *
461 * Find all the written-back checkpoint buffers in the journal and release them. 513 * Find all the written-back checkpoint buffers in the journal and release them.
462 * 514 *
463 * Called with the journal locked. 515 * Called with the journal locked.
464 * Called with j_list_lock held. 516 * Called with j_list_lock held.
465 * Returns number of bufers reaped (for debug) 517 * Returns number of buffers reaped (for debug)
466 */ 518 */
467 519
468int __journal_clean_checkpoint_list(journal_t *journal) 520int __journal_clean_checkpoint_list(journal_t *journal)
469{ 521{
470 transaction_t *transaction, *last_transaction, *next_transaction; 522 transaction_t *transaction, *last_transaction, *next_transaction;
471 int ret = 0; 523 int ret = 0;
524 int released;
472 525
473 transaction = journal->j_checkpoint_transactions; 526 transaction = journal->j_checkpoint_transactions;
474 if (transaction == 0) 527 if (!transaction)
475 goto out; 528 goto out;
476 529
477 last_transaction = transaction->t_cpprev; 530 last_transaction = transaction->t_cpprev;
478 next_transaction = transaction; 531 next_transaction = transaction;
479 do { 532 do {
480 struct journal_head *jh;
481
482 transaction = next_transaction; 533 transaction = next_transaction;
483 next_transaction = transaction->t_cpnext; 534 next_transaction = transaction->t_cpnext;
484 jh = transaction->t_checkpoint_list; 535 ret += journal_clean_one_cp_list(transaction->
485 if (jh) { 536 t_checkpoint_list, &released);
486 struct journal_head *last_jh = jh->b_cpprev; 537 /*
487 struct journal_head *next_jh = jh; 538 * This function only frees up some memory if possible so we
488 539 * dont have an obligation to finish processing. Bail out if
489 do { 540 * preemption requested:
490 jh = next_jh; 541 */
491 next_jh = jh->b_cpnext; 542 if (need_resched())
492 /* Use trylock because of the ranknig */ 543 goto out;
493 if (jbd_trylock_bh_state(jh2bh(jh))) 544 if (released)
494 ret += __try_to_free_cp_buf(jh); 545 continue;
495 /* 546 /*
496 * This function only frees up some memory 547 * It is essential that we are as careful as in the case of
497 * if possible so we dont have an obligation 548 * t_checkpoint_list with removing the buffer from the list as
498 * to finish processing. Bail out if preemption 549 * we can possibly see not yet submitted buffers on io_list
499 * requested: 550 */
500 */ 551 ret += journal_clean_one_cp_list(transaction->
501 if (need_resched()) 552 t_checkpoint_io_list, &released);
502 goto out; 553 if (need_resched())
503 } while (jh != last_jh); 554 goto out;
504 }
505 } while (transaction != last_transaction); 555 } while (transaction != last_transaction);
506out: 556out:
507 return ret; 557 return ret;
@@ -516,18 +566,22 @@ out:
516 * buffer updates committed in that transaction have safely been stored 566 * buffer updates committed in that transaction have safely been stored
517 * elsewhere on disk. To achieve this, all of the buffers in a 567 * elsewhere on disk. To achieve this, all of the buffers in a
518 * transaction need to be maintained on the transaction's checkpoint 568 * transaction need to be maintained on the transaction's checkpoint
519 * list until they have been rewritten, at which point this function is 569 * lists until they have been rewritten, at which point this function is
520 * called to remove the buffer from the existing transaction's 570 * called to remove the buffer from the existing transaction's
521 * checkpoint list. 571 * checkpoint lists.
572 *
573 * The function returns 1 if it frees the transaction, 0 otherwise.
522 * 574 *
523 * This function is called with the journal locked. 575 * This function is called with the journal locked.
524 * This function is called with j_list_lock held. 576 * This function is called with j_list_lock held.
577 * This function is called with jbd_lock_bh_state(jh2bh(jh))
525 */ 578 */
526 579
527void __journal_remove_checkpoint(struct journal_head *jh) 580int __journal_remove_checkpoint(struct journal_head *jh)
528{ 581{
529 transaction_t *transaction; 582 transaction_t *transaction;
530 journal_t *journal; 583 journal_t *journal;
584 int ret = 0;
531 585
532 JBUFFER_TRACE(jh, "entry"); 586 JBUFFER_TRACE(jh, "entry");
533 587
@@ -538,8 +592,10 @@ void __journal_remove_checkpoint(struct journal_head *jh)
538 journal = transaction->t_journal; 592 journal = transaction->t_journal;
539 593
540 __buffer_unlink(jh); 594 __buffer_unlink(jh);
595 jh->b_cp_transaction = NULL;
541 596
542 if (transaction->t_checkpoint_list != NULL) 597 if (transaction->t_checkpoint_list != NULL ||
598 transaction->t_checkpoint_io_list != NULL)
543 goto out; 599 goto out;
544 JBUFFER_TRACE(jh, "transaction has no more buffers"); 600 JBUFFER_TRACE(jh, "transaction has no more buffers");
545 601
@@ -565,8 +621,10 @@ void __journal_remove_checkpoint(struct journal_head *jh)
565 /* Just in case anybody was waiting for more transactions to be 621 /* Just in case anybody was waiting for more transactions to be
566 checkpointed... */ 622 checkpointed... */
567 wake_up(&journal->j_wait_logspace); 623 wake_up(&journal->j_wait_logspace);
624 ret = 1;
568out: 625out:
569 JBUFFER_TRACE(jh, "exit"); 626 JBUFFER_TRACE(jh, "exit");
627 return ret;
570} 628}
571 629
572/* 630/*
@@ -628,6 +686,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
628 J_ASSERT(transaction->t_shadow_list == NULL); 686 J_ASSERT(transaction->t_shadow_list == NULL);
629 J_ASSERT(transaction->t_log_list == NULL); 687 J_ASSERT(transaction->t_log_list == NULL);
630 J_ASSERT(transaction->t_checkpoint_list == NULL); 688 J_ASSERT(transaction->t_checkpoint_list == NULL);
689 J_ASSERT(transaction->t_checkpoint_io_list == NULL);
631 J_ASSERT(transaction->t_updates == 0); 690 J_ASSERT(transaction->t_updates == 0);
632 J_ASSERT(journal->j_committing_transaction != transaction); 691 J_ASSERT(journal->j_committing_transaction != transaction);
633 J_ASSERT(journal->j_running_transaction != transaction); 692 J_ASSERT(journal->j_running_transaction != transaction);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 002ad2bbc769..0971814c38b8 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -790,11 +790,22 @@ restart_loop:
790 jbd_unlock_bh_state(bh); 790 jbd_unlock_bh_state(bh);
791 } else { 791 } else {
792 J_ASSERT_BH(bh, !buffer_dirty(bh)); 792 J_ASSERT_BH(bh, !buffer_dirty(bh));
793 J_ASSERT_JH(jh, jh->b_next_transaction == NULL); 793 /* The buffer on BJ_Forget list and not jbddirty means
794 __journal_unfile_buffer(jh); 794 * it has been freed by this transaction and hence it
795 jbd_unlock_bh_state(bh); 795 * could not have been reallocated until this
796 journal_remove_journal_head(bh); /* needs a brelse */ 796 * transaction has committed. *BUT* it could be
797 release_buffer_page(bh); 797 * reallocated once we have written all the data to
798 * disk and before we process the buffer on BJ_Forget
799 * list. */
800 JBUFFER_TRACE(jh, "refile or unfile freed buffer");
801 __journal_refile_buffer(jh);
802 if (!jh->b_transaction) {
803 jbd_unlock_bh_state(bh);
804 /* needs a brelse */
805 journal_remove_journal_head(bh);
806 release_buffer_page(bh);
807 } else
808 jbd_unlock_bh_state(bh);
798 } 809 }
799 cond_resched_lock(&journal->j_list_lock); 810 cond_resched_lock(&journal->j_list_lock);
800 } 811 }
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 7f96b5cb6781..8c9b28dff119 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -34,6 +34,7 @@
34#include <linux/suspend.h> 34#include <linux/suspend.h>
35#include <linux/pagemap.h> 35#include <linux/pagemap.h>
36#include <linux/kthread.h> 36#include <linux/kthread.h>
37#include <linux/poison.h>
37#include <linux/proc_fs.h> 38#include <linux/proc_fs.h>
38 39
39#include <asm/uaccess.h> 40#include <asm/uaccess.h>
@@ -1675,7 +1676,7 @@ static void journal_free_journal_head(struct journal_head *jh)
1675{ 1676{
1676#ifdef CONFIG_JBD_DEBUG 1677#ifdef CONFIG_JBD_DEBUG
1677 atomic_dec(&nr_journal_heads); 1678 atomic_dec(&nr_journal_heads);
1678 memset(jh, 0x5b, sizeof(*jh)); 1679 memset(jh, JBD_POISON_FREE, sizeof(*jh));
1679#endif 1680#endif
1680 kmem_cache_free(journal_head_cache, jh); 1681 kmem_cache_free(journal_head_cache, jh);
1681} 1682}
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 80d7f53fd0a7..de5bafb4e853 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -531,6 +531,7 @@ static int do_one_pass(journal_t *journal,
531 default: 531 default:
532 jbd_debug(3, "Unrecognised magic %d, end of scan.\n", 532 jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
533 blocktype); 533 blocktype);
534 brelse(bh);
534 goto done; 535 goto done;
535 } 536 }
536 } 537 }
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index c609f5034fcd..508b2ea91f43 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -227,7 +227,8 @@ repeat_locked:
227 spin_unlock(&transaction->t_handle_lock); 227 spin_unlock(&transaction->t_handle_lock);
228 spin_unlock(&journal->j_state_lock); 228 spin_unlock(&journal->j_state_lock);
229out: 229out:
230 kfree(new_transaction); 230 if (unlikely(new_transaction)) /* It's usually NULL */
231 kfree(new_transaction);
231 return ret; 232 return ret;
232} 233}
233 234
@@ -724,7 +725,8 @@ done:
724 journal_cancel_revoke(handle, jh); 725 journal_cancel_revoke(handle, jh);
725 726
726out: 727out:
727 kfree(frozen_buffer); 728 if (unlikely(frozen_buffer)) /* It's usually NULL */
729 kfree(frozen_buffer);
728 730
729 JBUFFER_TRACE(jh, "exit"); 731 JBUFFER_TRACE(jh, "exit");
730 return error; 732 return error;
@@ -903,7 +905,8 @@ repeat:
903 jbd_unlock_bh_state(bh); 905 jbd_unlock_bh_state(bh);
904out: 906out:
905 journal_put_journal_head(jh); 907 journal_put_journal_head(jh);
906 kfree(committed_data); 908 if (unlikely(committed_data))
909 kfree(committed_data);
907 return err; 910 return err;
908} 911}
909 912
@@ -2038,7 +2041,8 @@ void __journal_refile_buffer(struct journal_head *jh)
2038 __journal_temp_unlink_buffer(jh); 2041 __journal_temp_unlink_buffer(jh);
2039 jh->b_transaction = jh->b_next_transaction; 2042 jh->b_transaction = jh->b_next_transaction;
2040 jh->b_next_transaction = NULL; 2043 jh->b_next_transaction = NULL;
2041 __journal_file_buffer(jh, jh->b_transaction, BJ_Metadata); 2044 __journal_file_buffer(jh, jh->b_transaction,
2045 was_dirty ? BJ_Metadata : BJ_Reserved);
2042 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); 2046 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
2043 2047
2044 if (was_dirty) 2048 if (was_dirty)
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 020cc097c539..93068697a9bf 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -59,7 +59,7 @@ static const struct file_operations jffs_file_operations;
59static struct inode_operations jffs_file_inode_operations; 59static struct inode_operations jffs_file_inode_operations;
60static const struct file_operations jffs_dir_operations; 60static const struct file_operations jffs_dir_operations;
61static struct inode_operations jffs_dir_inode_operations; 61static struct inode_operations jffs_dir_inode_operations;
62static struct address_space_operations jffs_address_operations; 62static const struct address_space_operations jffs_address_operations;
63 63
64kmem_cache_t *node_cache = NULL; 64kmem_cache_t *node_cache = NULL;
65kmem_cache_t *fm_cache = NULL; 65kmem_cache_t *fm_cache = NULL;
@@ -377,9 +377,9 @@ jffs_new_inode(const struct inode * dir, struct jffs_raw_inode *raw_inode,
377 377
378/* Get statistics of the file system. */ 378/* Get statistics of the file system. */
379static int 379static int
380jffs_statfs(struct super_block *sb, struct kstatfs *buf) 380jffs_statfs(struct dentry *dentry, struct kstatfs *buf)
381{ 381{
382 struct jffs_control *c = (struct jffs_control *) sb->s_fs_info; 382 struct jffs_control *c = (struct jffs_control *) dentry->d_sb->s_fs_info;
383 struct jffs_fmcontrol *fmc; 383 struct jffs_fmcontrol *fmc;
384 384
385 lock_kernel(); 385 lock_kernel();
@@ -1614,7 +1614,7 @@ jffs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
1614} /* jffs_ioctl() */ 1614} /* jffs_ioctl() */
1615 1615
1616 1616
1617static struct address_space_operations jffs_address_operations = { 1617static const struct address_space_operations jffs_address_operations = {
1618 .readpage = jffs_readpage, 1618 .readpage = jffs_readpage,
1619 .prepare_write = jffs_prepare_write, 1619 .prepare_write = jffs_prepare_write,
1620 .commit_write = jffs_commit_write, 1620 .commit_write = jffs_commit_write,
@@ -1785,10 +1785,11 @@ static struct super_operations jffs_ops =
1785 .remount_fs = jffs_remount, 1785 .remount_fs = jffs_remount,
1786}; 1786};
1787 1787
1788static struct super_block *jffs_get_sb(struct file_system_type *fs_type, 1788static int jffs_get_sb(struct file_system_type *fs_type,
1789 int flags, const char *dev_name, void *data) 1789 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
1790{ 1790{
1791 return get_sb_bdev(fs_type, flags, dev_name, data, jffs_fill_super); 1791 return get_sb_bdev(fs_type, flags, dev_name, data, jffs_fill_super,
1792 mnt);
1792} 1793}
1793 1794
1794static struct file_system_type jffs_fs_type = { 1795static struct file_system_type jffs_fs_type = {
diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index 0ef207dfaf6f..9000f1effedf 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -55,7 +55,6 @@
55 * 55 *
56 */ 56 */
57 57
58#include <linux/config.h>
59#include <linux/types.h> 58#include <linux/types.h>
60#include <linux/slab.h> 59#include <linux/slab.h>
61#include <linux/jffs.h> 60#include <linux/jffs.h>
@@ -247,7 +246,7 @@ flash_safe_read(struct mtd_info *mtd, loff_t from,
247 D3(printk(KERN_NOTICE "flash_safe_read(%p, %08x, %p, %08x)\n", 246 D3(printk(KERN_NOTICE "flash_safe_read(%p, %08x, %p, %08x)\n",
248 mtd, (unsigned int) from, buf, count)); 247 mtd, (unsigned int) from, buf, count));
249 248
250 res = MTD_READ(mtd, from, count, &retlen, buf); 249 res = mtd->read(mtd, from, count, &retlen, buf);
251 if (retlen != count) { 250 if (retlen != count) {
252 panic("Didn't read all bytes in flash_safe_read(). Returned %d\n", res); 251 panic("Didn't read all bytes in flash_safe_read(). Returned %d\n", res);
253 } 252 }
@@ -262,7 +261,7 @@ flash_read_u32(struct mtd_info *mtd, loff_t from)
262 __u32 ret; 261 __u32 ret;
263 int res; 262 int res;
264 263
265 res = MTD_READ(mtd, from, 4, &retlen, (unsigned char *)&ret); 264 res = mtd->read(mtd, from, 4, &retlen, (unsigned char *)&ret);
266 if (retlen != 4) { 265 if (retlen != 4) {
267 printk("Didn't read all bytes in flash_read_u32(). Returned %d\n", res); 266 printk("Didn't read all bytes in flash_read_u32(). Returned %d\n", res);
268 return 0; 267 return 0;
@@ -282,7 +281,7 @@ flash_safe_write(struct mtd_info *mtd, loff_t to,
282 D3(printk(KERN_NOTICE "flash_safe_write(%p, %08x, %p, %08x)\n", 281 D3(printk(KERN_NOTICE "flash_safe_write(%p, %08x, %p, %08x)\n",
283 mtd, (unsigned int) to, buf, count)); 282 mtd, (unsigned int) to, buf, count));
284 283
285 res = MTD_WRITE(mtd, to, count, &retlen, buf); 284 res = mtd->write(mtd, to, count, &retlen, buf);
286 if (retlen != count) { 285 if (retlen != count) {
287 printk("Didn't write all bytes in flash_safe_write(). Returned %d\n", res); 286 printk("Didn't write all bytes in flash_safe_write(). Returned %d\n", res);
288 } 287 }
@@ -300,9 +299,9 @@ flash_safe_writev(struct mtd_info *mtd, const struct kvec *vecs,
300 299
301 D3(printk(KERN_NOTICE "flash_safe_writev(%p, %08x, %p)\n", 300 D3(printk(KERN_NOTICE "flash_safe_writev(%p, %08x, %p)\n",
302 mtd, (unsigned int) to, vecs)); 301 mtd, (unsigned int) to, vecs));
303 302
304 if (mtd->writev) { 303 if (mtd->writev) {
305 res = MTD_WRITEV(mtd, vecs, iovec_cnt, to, &retlen); 304 res = mtd->writev(mtd, vecs, iovec_cnt, to, &retlen);
306 return res ? res : retlen; 305 return res ? res : retlen;
307 } 306 }
308 /* Not implemented writev. Repeatedly use write - on the not so 307 /* Not implemented writev. Repeatedly use write - on the not so
@@ -312,7 +311,8 @@ flash_safe_writev(struct mtd_info *mtd, const struct kvec *vecs,
312 retlen=0; 311 retlen=0;
313 312
314 for (i=0; !res && i<iovec_cnt; i++) { 313 for (i=0; !res && i<iovec_cnt; i++) {
315 res = MTD_WRITE(mtd, to, vecs[i].iov_len, &retlen_a, vecs[i].iov_base); 314 res = mtd->write(mtd, to, vecs[i].iov_len, &retlen_a,
315 vecs[i].iov_base);
316 if (retlen_a != vecs[i].iov_len) { 316 if (retlen_a != vecs[i].iov_len) {
317 printk("Didn't write all bytes in flash_safe_writev(). Returned %d\n", res); 317 printk("Didn't write all bytes in flash_safe_writev(). Returned %d\n", res);
318 if (i != iovec_cnt-1) 318 if (i != iovec_cnt-1)
@@ -393,7 +393,7 @@ flash_erase_region(struct mtd_info *mtd, loff_t start,
393 set_current_state(TASK_UNINTERRUPTIBLE); 393 set_current_state(TASK_UNINTERRUPTIBLE);
394 add_wait_queue(&wait_q, &wait); 394 add_wait_queue(&wait_q, &wait);
395 395
396 if (MTD_ERASE(mtd, erase) < 0) { 396 if (mtd->erase(mtd, erase) < 0) {
397 set_current_state(TASK_RUNNING); 397 set_current_state(TASK_RUNNING);
398 remove_wait_queue(&wait_q, &wait); 398 remove_wait_queue(&wait_q, &wait);
399 kfree(erase); 399 kfree(erase);
diff --git a/fs/jffs/jffs_fm.h b/fs/jffs/jffs_fm.h
index c794d923df2a..9ee6ad29eff5 100644
--- a/fs/jffs/jffs_fm.h
+++ b/fs/jffs/jffs_fm.h
@@ -20,7 +20,6 @@
20#ifndef __LINUX_JFFS_FM_H__ 20#ifndef __LINUX_JFFS_FM_H__
21#define __LINUX_JFFS_FM_H__ 21#define __LINUX_JFFS_FM_H__
22 22
23#include <linux/config.h>
24#include <linux/types.h> 23#include <linux/types.h>
25#include <linux/jffs.h> 24#include <linux/jffs.h>
26#include <linux/mtd/mtd.h> 25#include <linux/mtd/mtd.h>
diff --git a/fs/jffs2/Makefile b/fs/jffs2/Makefile
index 77dc5561a04e..7f28ee0bd132 100644
--- a/fs/jffs2/Makefile
+++ b/fs/jffs2/Makefile
@@ -12,6 +12,9 @@ jffs2-y += symlink.o build.o erase.o background.o fs.o writev.o
12jffs2-y += super.o debug.o 12jffs2-y += super.o debug.o
13 13
14jffs2-$(CONFIG_JFFS2_FS_WRITEBUFFER) += wbuf.o 14jffs2-$(CONFIG_JFFS2_FS_WRITEBUFFER) += wbuf.o
15jffs2-$(CONFIG_JFFS2_FS_XATTR) += xattr.o xattr_trusted.o xattr_user.o
16jffs2-$(CONFIG_JFFS2_FS_SECURITY) += security.o
17jffs2-$(CONFIG_JFFS2_FS_POSIX_ACL) += acl.o
15jffs2-$(CONFIG_JFFS2_RUBIN) += compr_rubin.o 18jffs2-$(CONFIG_JFFS2_RUBIN) += compr_rubin.o
16jffs2-$(CONFIG_JFFS2_RTIME) += compr_rtime.o 19jffs2-$(CONFIG_JFFS2_RTIME) += compr_rtime.o
17jffs2-$(CONFIG_JFFS2_ZLIB) += compr_zlib.o 20jffs2-$(CONFIG_JFFS2_ZLIB) += compr_zlib.o
diff --git a/fs/jffs2/README.Locking b/fs/jffs2/README.Locking
index b7943439b6ec..c8f0bd64e53e 100644
--- a/fs/jffs2/README.Locking
+++ b/fs/jffs2/README.Locking
@@ -150,3 +150,24 @@ the buffer.
150 150
151Ordering constraints: 151Ordering constraints:
152 Lock wbuf_sem last, after the alloc_sem or and f->sem. 152 Lock wbuf_sem last, after the alloc_sem or and f->sem.
153
154
155 c->xattr_sem
156 ------------
157
158This read/write semaphore protects against concurrent access to the
159xattr related objects which include stuff in superblock and ic->xref.
160In read-only path, write-semaphore is too much exclusion. It's enough
161by read-semaphore. But you must hold write-semaphore when updating,
162creating or deleting any xattr related object.
163
164Once xattr_sem released, there would be no assurance for the existence
165of those objects. Thus, a series of processes is often required to retry,
166when updating such a object is necessary under holding read semaphore.
167For example, do_jffs2_getxattr() holds read-semaphore to scan xref and
168xdatum at first. But it retries this process with holding write-semaphore
169after release read-semaphore, if it's necessary to load name/value pair
170from medium.
171
172Ordering constraints:
173 Lock xattr_sem last, after the alloc_sem.
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
new file mode 100644
index 000000000000..9c2077e7e081
--- /dev/null
+++ b/fs/jffs2/acl.c
@@ -0,0 +1,487 @@
1/*
2 * JFFS2 -- Journalling Flash File System, Version 2.
3 *
4 * Copyright (C) 2006 NEC Corporation
5 *
6 * Created by KaiGai Kohei <kaigai@ak.jp.nec.com>
7 *
8 * For licensing information, see the file 'LICENCE' in this directory.
9 *
10 */
11#include <linux/kernel.h>
12#include <linux/slab.h>
13#include <linux/fs.h>
14#include <linux/time.h>
15#include <linux/crc32.h>
16#include <linux/jffs2.h>
17#include <linux/xattr.h>
18#include <linux/posix_acl_xattr.h>
19#include <linux/mtd/mtd.h>
20#include "nodelist.h"
21
22static size_t jffs2_acl_size(int count)
23{
24 if (count <= 4) {
25 return sizeof(struct jffs2_acl_header)
26 + count * sizeof(struct jffs2_acl_entry_short);
27 } else {
28 return sizeof(struct jffs2_acl_header)
29 + 4 * sizeof(struct jffs2_acl_entry_short)
30 + (count - 4) * sizeof(struct jffs2_acl_entry);
31 }
32}
33
34static int jffs2_acl_count(size_t size)
35{
36 size_t s;
37
38 size -= sizeof(struct jffs2_acl_header);
39 s = size - 4 * sizeof(struct jffs2_acl_entry_short);
40 if (s < 0) {
41 if (size % sizeof(struct jffs2_acl_entry_short))
42 return -1;
43 return size / sizeof(struct jffs2_acl_entry_short);
44 } else {
45 if (s % sizeof(struct jffs2_acl_entry))
46 return -1;
47 return s / sizeof(struct jffs2_acl_entry) + 4;
48 }
49}
50
51static struct posix_acl *jffs2_acl_from_medium(void *value, size_t size)
52{
53 void *end = value + size;
54 struct jffs2_acl_header *header = value;
55 struct jffs2_acl_entry *entry;
56 struct posix_acl *acl;
57 uint32_t ver;
58 int i, count;
59
60 if (!value)
61 return NULL;
62 if (size < sizeof(struct jffs2_acl_header))
63 return ERR_PTR(-EINVAL);
64 ver = je32_to_cpu(header->a_version);
65 if (ver != JFFS2_ACL_VERSION) {
66 JFFS2_WARNING("Invalid ACL version. (=%u)\n", ver);
67 return ERR_PTR(-EINVAL);
68 }
69
70 value += sizeof(struct jffs2_acl_header);
71 count = jffs2_acl_count(size);
72 if (count < 0)
73 return ERR_PTR(-EINVAL);
74 if (count == 0)
75 return NULL;
76
77 acl = posix_acl_alloc(count, GFP_KERNEL);
78 if (!acl)
79 return ERR_PTR(-ENOMEM);
80
81 for (i=0; i < count; i++) {
82 entry = value;
83 if (value + sizeof(struct jffs2_acl_entry_short) > end)
84 goto fail;
85 acl->a_entries[i].e_tag = je16_to_cpu(entry->e_tag);
86 acl->a_entries[i].e_perm = je16_to_cpu(entry->e_perm);
87 switch (acl->a_entries[i].e_tag) {
88 case ACL_USER_OBJ:
89 case ACL_GROUP_OBJ:
90 case ACL_MASK:
91 case ACL_OTHER:
92 value += sizeof(struct jffs2_acl_entry_short);
93 acl->a_entries[i].e_id = ACL_UNDEFINED_ID;
94 break;
95
96 case ACL_USER:
97 case ACL_GROUP:
98 value += sizeof(struct jffs2_acl_entry);
99 if (value > end)
100 goto fail;
101 acl->a_entries[i].e_id = je32_to_cpu(entry->e_id);
102 break;
103
104 default:
105 goto fail;
106 }
107 }
108 if (value != end)
109 goto fail;
110 return acl;
111 fail:
112 posix_acl_release(acl);
113 return ERR_PTR(-EINVAL);
114}
115
116static void *jffs2_acl_to_medium(const struct posix_acl *acl, size_t *size)
117{
118 struct jffs2_acl_header *header;
119 struct jffs2_acl_entry *entry;
120 void *e;
121 size_t i;
122
123 *size = jffs2_acl_size(acl->a_count);
124 header = kmalloc(sizeof(*header) + acl->a_count * sizeof(*entry), GFP_KERNEL);
125 if (!header)
126 return ERR_PTR(-ENOMEM);
127 header->a_version = cpu_to_je32(JFFS2_ACL_VERSION);
128 e = header + 1;
129 for (i=0; i < acl->a_count; i++) {
130 entry = e;
131 entry->e_tag = cpu_to_je16(acl->a_entries[i].e_tag);
132 entry->e_perm = cpu_to_je16(acl->a_entries[i].e_perm);
133 switch(acl->a_entries[i].e_tag) {
134 case ACL_USER:
135 case ACL_GROUP:
136 entry->e_id = cpu_to_je32(acl->a_entries[i].e_id);
137 e += sizeof(struct jffs2_acl_entry);
138 break;
139
140 case ACL_USER_OBJ:
141 case ACL_GROUP_OBJ:
142 case ACL_MASK:
143 case ACL_OTHER:
144 e += sizeof(struct jffs2_acl_entry_short);
145 break;
146
147 default:
148 goto fail;
149 }
150 }
151 return header;
152 fail:
153 kfree(header);
154 return ERR_PTR(-EINVAL);
155}
156
157static struct posix_acl *jffs2_iget_acl(struct inode *inode, struct posix_acl **i_acl)
158{
159 struct posix_acl *acl = JFFS2_ACL_NOT_CACHED;
160
161 spin_lock(&inode->i_lock);
162 if (*i_acl != JFFS2_ACL_NOT_CACHED)
163 acl = posix_acl_dup(*i_acl);
164 spin_unlock(&inode->i_lock);
165 return acl;
166}
167
168static void jffs2_iset_acl(struct inode *inode, struct posix_acl **i_acl, struct posix_acl *acl)
169{
170 spin_lock(&inode->i_lock);
171 if (*i_acl != JFFS2_ACL_NOT_CACHED)
172 posix_acl_release(*i_acl);
173 *i_acl = posix_acl_dup(acl);
174 spin_unlock(&inode->i_lock);
175}
176
177static struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
178{
179 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
180 struct posix_acl *acl;
181 char *value = NULL;
182 int rc, xprefix;
183
184 switch (type) {
185 case ACL_TYPE_ACCESS:
186 acl = jffs2_iget_acl(inode, &f->i_acl_access);
187 if (acl != JFFS2_ACL_NOT_CACHED)
188 return acl;
189 xprefix = JFFS2_XPREFIX_ACL_ACCESS;
190 break;
191 case ACL_TYPE_DEFAULT:
192 acl = jffs2_iget_acl(inode, &f->i_acl_default);
193 if (acl != JFFS2_ACL_NOT_CACHED)
194 return acl;
195 xprefix = JFFS2_XPREFIX_ACL_DEFAULT;
196 break;
197 default:
198 return ERR_PTR(-EINVAL);
199 }
200 rc = do_jffs2_getxattr(inode, xprefix, "", NULL, 0);
201 if (rc > 0) {
202 value = kmalloc(rc, GFP_KERNEL);
203 if (!value)
204 return ERR_PTR(-ENOMEM);
205 rc = do_jffs2_getxattr(inode, xprefix, "", value, rc);
206 }
207 if (rc > 0) {
208 acl = jffs2_acl_from_medium(value, rc);
209 } else if (rc == -ENODATA || rc == -ENOSYS) {
210 acl = NULL;
211 } else {
212 acl = ERR_PTR(rc);
213 }
214 if (value)
215 kfree(value);
216 if (!IS_ERR(acl)) {
217 switch (type) {
218 case ACL_TYPE_ACCESS:
219 jffs2_iset_acl(inode, &f->i_acl_access, acl);
220 break;
221 case ACL_TYPE_DEFAULT:
222 jffs2_iset_acl(inode, &f->i_acl_default, acl);
223 break;
224 }
225 }
226 return acl;
227}
228
229static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
230{
231 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
232 size_t size = 0;
233 char *value = NULL;
234 int rc, xprefix;
235
236 if (S_ISLNK(inode->i_mode))
237 return -EOPNOTSUPP;
238
239 switch (type) {
240 case ACL_TYPE_ACCESS:
241 xprefix = JFFS2_XPREFIX_ACL_ACCESS;
242 if (acl) {
243 mode_t mode = inode->i_mode;
244 rc = posix_acl_equiv_mode(acl, &mode);
245 if (rc < 0)
246 return rc;
247 if (inode->i_mode != mode) {
248 inode->i_mode = mode;
249 jffs2_dirty_inode(inode);
250 }
251 if (rc == 0)
252 acl = NULL;
253 }
254 break;
255 case ACL_TYPE_DEFAULT:
256 xprefix = JFFS2_XPREFIX_ACL_DEFAULT;
257 if (!S_ISDIR(inode->i_mode))
258 return acl ? -EACCES : 0;
259 break;
260 default:
261 return -EINVAL;
262 }
263 if (acl) {
264 value = jffs2_acl_to_medium(acl, &size);
265 if (IS_ERR(value))
266 return PTR_ERR(value);
267 }
268
269 rc = do_jffs2_setxattr(inode, xprefix, "", value, size, 0);
270 if (!value && rc == -ENODATA)
271 rc = 0;
272 if (value)
273 kfree(value);
274 if (!rc) {
275 switch(type) {
276 case ACL_TYPE_ACCESS:
277 jffs2_iset_acl(inode, &f->i_acl_access, acl);
278 break;
279 case ACL_TYPE_DEFAULT:
280 jffs2_iset_acl(inode, &f->i_acl_default, acl);
281 break;
282 }
283 }
284 return rc;
285}
286
287static int jffs2_check_acl(struct inode *inode, int mask)
288{
289 struct posix_acl *acl;
290 int rc;
291
292 acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS);
293 if (IS_ERR(acl))
294 return PTR_ERR(acl);
295 if (acl) {
296 rc = posix_acl_permission(inode, acl, mask);
297 posix_acl_release(acl);
298 return rc;
299 }
300 return -EAGAIN;
301}
302
303int jffs2_permission(struct inode *inode, int mask, struct nameidata *nd)
304{
305 return generic_permission(inode, mask, jffs2_check_acl);
306}
307
308int jffs2_init_acl(struct inode *inode, struct inode *dir)
309{
310 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
311 struct posix_acl *acl = NULL, *clone;
312 mode_t mode;
313 int rc = 0;
314
315 f->i_acl_access = JFFS2_ACL_NOT_CACHED;
316 f->i_acl_default = JFFS2_ACL_NOT_CACHED;
317 if (!S_ISLNK(inode->i_mode)) {
318 acl = jffs2_get_acl(dir, ACL_TYPE_DEFAULT);
319 if (IS_ERR(acl))
320 return PTR_ERR(acl);
321 if (!acl)
322 inode->i_mode &= ~current->fs->umask;
323 }
324 if (acl) {
325 if (S_ISDIR(inode->i_mode)) {
326 rc = jffs2_set_acl(inode, ACL_TYPE_DEFAULT, acl);
327 if (rc)
328 goto cleanup;
329 }
330 clone = posix_acl_clone(acl, GFP_KERNEL);
331 rc = -ENOMEM;
332 if (!clone)
333 goto cleanup;
334 mode = inode->i_mode;
335 rc = posix_acl_create_masq(clone, &mode);
336 if (rc >= 0) {
337 inode->i_mode = mode;
338 if (rc > 0)
339 rc = jffs2_set_acl(inode, ACL_TYPE_ACCESS, clone);
340 }
341 posix_acl_release(clone);
342 }
343 cleanup:
344 posix_acl_release(acl);
345 return rc;
346}
347
348void jffs2_clear_acl(struct inode *inode)
349{
350 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
351
352 if (f->i_acl_access && f->i_acl_access != JFFS2_ACL_NOT_CACHED) {
353 posix_acl_release(f->i_acl_access);
354 f->i_acl_access = JFFS2_ACL_NOT_CACHED;
355 }
356 if (f->i_acl_default && f->i_acl_default != JFFS2_ACL_NOT_CACHED) {
357 posix_acl_release(f->i_acl_default);
358 f->i_acl_default = JFFS2_ACL_NOT_CACHED;
359 }
360}
361
362int jffs2_acl_chmod(struct inode *inode)
363{
364 struct posix_acl *acl, *clone;
365 int rc;
366
367 if (S_ISLNK(inode->i_mode))
368 return -EOPNOTSUPP;
369 acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS);
370 if (IS_ERR(acl) || !acl)
371 return PTR_ERR(acl);
372 clone = posix_acl_clone(acl, GFP_KERNEL);
373 posix_acl_release(acl);
374 if (!clone)
375 return -ENOMEM;
376 rc = posix_acl_chmod_masq(clone, inode->i_mode);
377 if (!rc)
378 rc = jffs2_set_acl(inode, ACL_TYPE_ACCESS, clone);
379 posix_acl_release(clone);
380 return rc;
381}
382
383static size_t jffs2_acl_access_listxattr(struct inode *inode, char *list, size_t list_size,
384 const char *name, size_t name_len)
385{
386 const int retlen = sizeof(POSIX_ACL_XATTR_ACCESS);
387
388 if (list && retlen <= list_size)
389 strcpy(list, POSIX_ACL_XATTR_ACCESS);
390 return retlen;
391}
392
393static size_t jffs2_acl_default_listxattr(struct inode *inode, char *list, size_t list_size,
394 const char *name, size_t name_len)
395{
396 const int retlen = sizeof(POSIX_ACL_XATTR_DEFAULT);
397
398 if (list && retlen <= list_size)
399 strcpy(list, POSIX_ACL_XATTR_DEFAULT);
400 return retlen;
401}
402
403static int jffs2_acl_getxattr(struct inode *inode, int type, void *buffer, size_t size)
404{
405 struct posix_acl *acl;
406 int rc;
407
408 acl = jffs2_get_acl(inode, type);
409 if (IS_ERR(acl))
410 return PTR_ERR(acl);
411 if (!acl)
412 return -ENODATA;
413 rc = posix_acl_to_xattr(acl, buffer, size);
414 posix_acl_release(acl);
415
416 return rc;
417}
418
419static int jffs2_acl_access_getxattr(struct inode *inode, const char *name, void *buffer, size_t size)
420{
421 if (name[0] != '\0')
422 return -EINVAL;
423 return jffs2_acl_getxattr(inode, ACL_TYPE_ACCESS, buffer, size);
424}
425
426static int jffs2_acl_default_getxattr(struct inode *inode, const char *name, void *buffer, size_t size)
427{
428 if (name[0] != '\0')
429 return -EINVAL;
430 return jffs2_acl_getxattr(inode, ACL_TYPE_DEFAULT, buffer, size);
431}
432
433static int jffs2_acl_setxattr(struct inode *inode, int type, const void *value, size_t size)
434{
435 struct posix_acl *acl;
436 int rc;
437
438 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
439 return -EPERM;
440
441 if (value) {
442 acl = posix_acl_from_xattr(value, size);
443 if (IS_ERR(acl))
444 return PTR_ERR(acl);
445 if (acl) {
446 rc = posix_acl_valid(acl);
447 if (rc)
448 goto out;
449 }
450 } else {
451 acl = NULL;
452 }
453 rc = jffs2_set_acl(inode, type, acl);
454 out:
455 posix_acl_release(acl);
456 return rc;
457}
458
459static int jffs2_acl_access_setxattr(struct inode *inode, const char *name,
460 const void *buffer, size_t size, int flags)
461{
462 if (name[0] != '\0')
463 return -EINVAL;
464 return jffs2_acl_setxattr(inode, ACL_TYPE_ACCESS, buffer, size);
465}
466
467static int jffs2_acl_default_setxattr(struct inode *inode, const char *name,
468 const void *buffer, size_t size, int flags)
469{
470 if (name[0] != '\0')
471 return -EINVAL;
472 return jffs2_acl_setxattr(inode, ACL_TYPE_DEFAULT, buffer, size);
473}
474
475struct xattr_handler jffs2_acl_access_xattr_handler = {
476 .prefix = POSIX_ACL_XATTR_ACCESS,
477 .list = jffs2_acl_access_listxattr,
478 .get = jffs2_acl_access_getxattr,
479 .set = jffs2_acl_access_setxattr,
480};
481
482struct xattr_handler jffs2_acl_default_xattr_handler = {
483 .prefix = POSIX_ACL_XATTR_DEFAULT,
484 .list = jffs2_acl_default_listxattr,
485 .get = jffs2_acl_default_getxattr,
486 .set = jffs2_acl_default_setxattr,
487};
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
new file mode 100644
index 000000000000..8893bd1a6ba7
--- /dev/null
+++ b/fs/jffs2/acl.h
@@ -0,0 +1,45 @@
1/*
2 * JFFS2 -- Journalling Flash File System, Version 2.
3 *
4 * Copyright (C) 2006 NEC Corporation
5 *
6 * Created by KaiGai Kohei <kaigai@ak.jp.nec.com>
7 *
8 * For licensing information, see the file 'LICENCE' in this directory.
9 *
10 */
11struct jffs2_acl_entry {
12 jint16_t e_tag;
13 jint16_t e_perm;
14 jint32_t e_id;
15};
16
17struct jffs2_acl_entry_short {
18 jint16_t e_tag;
19 jint16_t e_perm;
20};
21
22struct jffs2_acl_header {
23 jint32_t a_version;
24};
25
26#ifdef CONFIG_JFFS2_FS_POSIX_ACL
27
28#define JFFS2_ACL_NOT_CACHED ((void *)-1)
29
30extern int jffs2_permission(struct inode *, int, struct nameidata *);
31extern int jffs2_acl_chmod(struct inode *);
32extern int jffs2_init_acl(struct inode *, struct inode *);
33extern void jffs2_clear_acl(struct inode *);
34
35extern struct xattr_handler jffs2_acl_access_xattr_handler;
36extern struct xattr_handler jffs2_acl_default_xattr_handler;
37
38#else
39
40#define jffs2_permission NULL
41#define jffs2_acl_chmod(inode) (0)
42#define jffs2_init_acl(inode,dir) (0)
43#define jffs2_clear_acl(inode)
44
45#endif /* CONFIG_JFFS2_FS_POSIX_ACL */
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index 70f7a896c04a..02826967ab58 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -160,6 +160,7 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
160 ic->scan_dents = NULL; 160 ic->scan_dents = NULL;
161 cond_resched(); 161 cond_resched();
162 } 162 }
163 jffs2_build_xattr_subsystem(c);
163 c->flags &= ~JFFS2_SB_FLAG_BUILDING; 164 c->flags &= ~JFFS2_SB_FLAG_BUILDING;
164 165
165 dbg_fsbuild("FS build complete\n"); 166 dbg_fsbuild("FS build complete\n");
@@ -178,6 +179,7 @@ exit:
178 jffs2_free_full_dirent(fd); 179 jffs2_free_full_dirent(fd);
179 } 180 }
180 } 181 }
182 jffs2_clear_xattr_subsystem(c);
181 } 183 }
182 184
183 return ret; 185 return ret;
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index e7944e665b9f..7001ba26c067 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -412,7 +412,7 @@ void jffs2_free_comprbuf(unsigned char *comprbuf, unsigned char *orig)
412 kfree(comprbuf); 412 kfree(comprbuf);
413} 413}
414 414
415int jffs2_compressors_init(void) 415int __init jffs2_compressors_init(void)
416{ 416{
417/* Registering compressors */ 417/* Registering compressors */
418#ifdef CONFIG_JFFS2_ZLIB 418#ifdef CONFIG_JFFS2_ZLIB
diff --git a/fs/jffs2/compr.h b/fs/jffs2/compr.h
index a77e830d85c5..509b8b1c0811 100644
--- a/fs/jffs2/compr.h
+++ b/fs/jffs2/compr.h
@@ -23,8 +23,8 @@
23#include <linux/errno.h> 23#include <linux/errno.h>
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/jffs2.h> 25#include <linux/jffs2.h>
26#include <linux/jffs2_fs_i.h> 26#include "jffs2_fs_i.h"
27#include <linux/jffs2_fs_sb.h> 27#include "jffs2_fs_sb.h"
28#include "nodelist.h" 28#include "nodelist.h"
29 29
30#define JFFS2_RUBINMIPS_PRIORITY 10 30#define JFFS2_RUBINMIPS_PRIORITY 10
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 5c63e0cdcf4c..3681d0728ac7 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -15,7 +15,6 @@
15#error "The userspace support got too messy and was removed. Update your mkfs.jffs2" 15#error "The userspace support got too messy and was removed. Update your mkfs.jffs2"
16#endif 16#endif
17 17
18#include <linux/config.h>
19#include <linux/kernel.h> 18#include <linux/kernel.h>
20#include <linux/sched.h> 19#include <linux/sched.h>
21#include <linux/slab.h> 20#include <linux/slab.h>
diff --git a/fs/jffs2/debug.c b/fs/jffs2/debug.c
index 1fe17de713e8..72b4fc13a106 100644
--- a/fs/jffs2/debug.c
+++ b/fs/jffs2/debug.c
@@ -192,13 +192,13 @@ __jffs2_dbg_acct_paranoia_check_nolock(struct jffs2_sb_info *c,
192 else 192 else
193 my_dirty_size += totlen; 193 my_dirty_size += totlen;
194 194
195 if ((!ref2->next_phys) != (ref2 == jeb->last_node)) { 195 if ((!ref_next(ref2)) != (ref2 == jeb->last_node)) {
196 JFFS2_ERROR("node_ref for node at %#08x (mem %p) has next_phys at %#08x (mem %p), last_node is at %#08x (mem %p).\n", 196 JFFS2_ERROR("node_ref for node at %#08x (mem %p) has next at %#08x (mem %p), last_node is at %#08x (mem %p).\n",
197 ref_offset(ref2), ref2, ref_offset(ref2->next_phys), ref2->next_phys, 197 ref_offset(ref2), ref2, ref_offset(ref_next(ref2)), ref_next(ref2),
198 ref_offset(jeb->last_node), jeb->last_node); 198 ref_offset(jeb->last_node), jeb->last_node);
199 goto error; 199 goto error;
200 } 200 }
201 ref2 = ref2->next_phys; 201 ref2 = ref_next(ref2);
202 } 202 }
203 203
204 if (my_used_size != jeb->used_size) { 204 if (my_used_size != jeb->used_size) {
@@ -268,9 +268,9 @@ __jffs2_dbg_dump_node_refs_nolock(struct jffs2_sb_info *c,
268 } 268 }
269 269
270 printk(JFFS2_DBG); 270 printk(JFFS2_DBG);
271 for (ref = jeb->first_node; ; ref = ref->next_phys) { 271 for (ref = jeb->first_node; ; ref = ref_next(ref)) {
272 printk("%#08x(%#x)", ref_offset(ref), ref->__totlen); 272 printk("%#08x(%#x)", ref_offset(ref), ref->__totlen);
273 if (ref->next_phys) 273 if (ref_next(ref))
274 printk("->"); 274 printk("->");
275 else 275 else
276 break; 276 break;
diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h
index 162af6dfe292..3daf3bca0376 100644
--- a/fs/jffs2/debug.h
+++ b/fs/jffs2/debug.h
@@ -13,7 +13,6 @@
13#ifndef _JFFS2_DEBUG_H_ 13#ifndef _JFFS2_DEBUG_H_
14#define _JFFS2_DEBUG_H_ 14#define _JFFS2_DEBUG_H_
15 15
16#include <linux/config.h>
17 16
18#ifndef CONFIG_JFFS2_FS_DEBUG 17#ifndef CONFIG_JFFS2_FS_DEBUG
19#define CONFIG_JFFS2_FS_DEBUG 0 18#define CONFIG_JFFS2_FS_DEBUG 0
@@ -171,6 +170,12 @@
171#define dbg_memalloc(fmt, ...) 170#define dbg_memalloc(fmt, ...)
172#endif 171#endif
173 172
173/* Watch the XATTR subsystem */
174#ifdef JFFS2_DBG_XATTR_MESSAGES
175#define dbg_xattr(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__)
176#else
177#define dbg_xattr(fmt, ...)
178#endif
174 179
175/* "Sanity" checks */ 180/* "Sanity" checks */
176void 181void
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 8bc7a5018e40..edd8371fc6a5 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -17,8 +17,8 @@
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/crc32.h> 18#include <linux/crc32.h>
19#include <linux/jffs2.h> 19#include <linux/jffs2.h>
20#include <linux/jffs2_fs_i.h> 20#include "jffs2_fs_i.h"
21#include <linux/jffs2_fs_sb.h> 21#include "jffs2_fs_sb.h"
22#include <linux/time.h> 22#include <linux/time.h>
23#include "nodelist.h" 23#include "nodelist.h"
24 24
@@ -57,7 +57,12 @@ struct inode_operations jffs2_dir_inode_operations =
57 .rmdir = jffs2_rmdir, 57 .rmdir = jffs2_rmdir,
58 .mknod = jffs2_mknod, 58 .mknod = jffs2_mknod,
59 .rename = jffs2_rename, 59 .rename = jffs2_rename,
60 .permission = jffs2_permission,
60 .setattr = jffs2_setattr, 61 .setattr = jffs2_setattr,
62 .setxattr = jffs2_setxattr,
63 .getxattr = jffs2_getxattr,
64 .listxattr = jffs2_listxattr,
65 .removexattr = jffs2_removexattr
61}; 66};
62 67
63/***********************************************************************/ 68/***********************************************************************/
@@ -78,6 +83,9 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
78 83
79 D1(printk(KERN_DEBUG "jffs2_lookup()\n")); 84 D1(printk(KERN_DEBUG "jffs2_lookup()\n"));
80 85
86 if (target->d_name.len > JFFS2_MAX_NAME_LEN)
87 return ERR_PTR(-ENAMETOOLONG);
88
81 dir_f = JFFS2_INODE_INFO(dir_i); 89 dir_f = JFFS2_INODE_INFO(dir_i);
82 c = JFFS2_SB_INFO(dir_i->i_sb); 90 c = JFFS2_SB_INFO(dir_i->i_sb);
83 91
@@ -206,12 +214,15 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
206 ret = jffs2_do_create(c, dir_f, f, ri, 214 ret = jffs2_do_create(c, dir_f, f, ri,
207 dentry->d_name.name, dentry->d_name.len); 215 dentry->d_name.name, dentry->d_name.len);
208 216
209 if (ret) { 217 if (ret)
210 make_bad_inode(inode); 218 goto fail;
211 iput(inode); 219
212 jffs2_free_raw_inode(ri); 220 ret = jffs2_init_security(inode, dir_i);
213 return ret; 221 if (ret)
214 } 222 goto fail;
223 ret = jffs2_init_acl(inode, dir_i);
224 if (ret)
225 goto fail;
215 226
216 dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(ri->ctime)); 227 dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(ri->ctime));
217 228
@@ -221,6 +232,12 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
221 D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n", 232 D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n",
222 inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->nlink, inode->i_mapping->nrpages)); 233 inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->nlink, inode->i_mapping->nrpages));
223 return 0; 234 return 0;
235
236 fail:
237 make_bad_inode(inode);
238 iput(inode);
239 jffs2_free_raw_inode(ri);
240 return ret;
224} 241}
225 242
226/***********************************************************************/ 243/***********************************************************************/
@@ -291,7 +308,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
291 struct jffs2_full_dnode *fn; 308 struct jffs2_full_dnode *fn;
292 struct jffs2_full_dirent *fd; 309 struct jffs2_full_dirent *fd;
293 int namelen; 310 int namelen;
294 uint32_t alloclen, phys_ofs; 311 uint32_t alloclen;
295 int ret, targetlen = strlen(target); 312 int ret, targetlen = strlen(target);
296 313
297 /* FIXME: If you care. We'd need to use frags for the target 314 /* FIXME: If you care. We'd need to use frags for the target
@@ -310,8 +327,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
310 * Just the node will do for now, though 327 * Just the node will do for now, though
311 */ 328 */
312 namelen = dentry->d_name.len; 329 namelen = dentry->d_name.len;
313 ret = jffs2_reserve_space(c, sizeof(*ri) + targetlen, &phys_ofs, &alloclen, 330 ret = jffs2_reserve_space(c, sizeof(*ri) + targetlen, &alloclen,
314 ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); 331 ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
315 332
316 if (ret) { 333 if (ret) {
317 jffs2_free_raw_inode(ri); 334 jffs2_free_raw_inode(ri);
@@ -339,7 +356,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
339 ri->data_crc = cpu_to_je32(crc32(0, target, targetlen)); 356 ri->data_crc = cpu_to_je32(crc32(0, target, targetlen));
340 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); 357 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8));
341 358
342 fn = jffs2_write_dnode(c, f, ri, target, targetlen, phys_ofs, ALLOC_NORMAL); 359 fn = jffs2_write_dnode(c, f, ri, target, targetlen, ALLOC_NORMAL);
343 360
344 jffs2_free_raw_inode(ri); 361 jffs2_free_raw_inode(ri);
345 362
@@ -371,8 +388,20 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
371 up(&f->sem); 388 up(&f->sem);
372 389
373 jffs2_complete_reservation(c); 390 jffs2_complete_reservation(c);
374 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &phys_ofs, &alloclen, 391
375 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); 392 ret = jffs2_init_security(inode, dir_i);
393 if (ret) {
394 jffs2_clear_inode(inode);
395 return ret;
396 }
397 ret = jffs2_init_acl(inode, dir_i);
398 if (ret) {
399 jffs2_clear_inode(inode);
400 return ret;
401 }
402
403 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
404 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
376 if (ret) { 405 if (ret) {
377 /* Eep. */ 406 /* Eep. */
378 jffs2_clear_inode(inode); 407 jffs2_clear_inode(inode);
@@ -404,7 +433,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
404 rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8)); 433 rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8));
405 rd->name_crc = cpu_to_je32(crc32(0, dentry->d_name.name, namelen)); 434 rd->name_crc = cpu_to_je32(crc32(0, dentry->d_name.name, namelen));
406 435
407 fd = jffs2_write_dirent(c, dir_f, rd, dentry->d_name.name, namelen, phys_ofs, ALLOC_NORMAL); 436 fd = jffs2_write_dirent(c, dir_f, rd, dentry->d_name.name, namelen, ALLOC_NORMAL);
408 437
409 if (IS_ERR(fd)) { 438 if (IS_ERR(fd)) {
410 /* dirent failed to write. Delete the inode normally 439 /* dirent failed to write. Delete the inode normally
@@ -442,7 +471,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
442 struct jffs2_full_dnode *fn; 471 struct jffs2_full_dnode *fn;
443 struct jffs2_full_dirent *fd; 472 struct jffs2_full_dirent *fd;
444 int namelen; 473 int namelen;
445 uint32_t alloclen, phys_ofs; 474 uint32_t alloclen;
446 int ret; 475 int ret;
447 476
448 mode |= S_IFDIR; 477 mode |= S_IFDIR;
@@ -457,8 +486,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
457 * Just the node will do for now, though 486 * Just the node will do for now, though
458 */ 487 */
459 namelen = dentry->d_name.len; 488 namelen = dentry->d_name.len;
460 ret = jffs2_reserve_space(c, sizeof(*ri), &phys_ofs, &alloclen, ALLOC_NORMAL, 489 ret = jffs2_reserve_space(c, sizeof(*ri), &alloclen, ALLOC_NORMAL,
461 JFFS2_SUMMARY_INODE_SIZE); 490 JFFS2_SUMMARY_INODE_SIZE);
462 491
463 if (ret) { 492 if (ret) {
464 jffs2_free_raw_inode(ri); 493 jffs2_free_raw_inode(ri);
@@ -483,7 +512,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
483 ri->data_crc = cpu_to_je32(0); 512 ri->data_crc = cpu_to_je32(0);
484 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); 513 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8));
485 514
486 fn = jffs2_write_dnode(c, f, ri, NULL, 0, phys_ofs, ALLOC_NORMAL); 515 fn = jffs2_write_dnode(c, f, ri, NULL, 0, ALLOC_NORMAL);
487 516
488 jffs2_free_raw_inode(ri); 517 jffs2_free_raw_inode(ri);
489 518
@@ -501,8 +530,20 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
501 up(&f->sem); 530 up(&f->sem);
502 531
503 jffs2_complete_reservation(c); 532 jffs2_complete_reservation(c);
504 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &phys_ofs, &alloclen, 533
505 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); 534 ret = jffs2_init_security(inode, dir_i);
535 if (ret) {
536 jffs2_clear_inode(inode);
537 return ret;
538 }
539 ret = jffs2_init_acl(inode, dir_i);
540 if (ret) {
541 jffs2_clear_inode(inode);
542 return ret;
543 }
544
545 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
546 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
506 if (ret) { 547 if (ret) {
507 /* Eep. */ 548 /* Eep. */
508 jffs2_clear_inode(inode); 549 jffs2_clear_inode(inode);
@@ -534,7 +575,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
534 rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8)); 575 rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8));
535 rd->name_crc = cpu_to_je32(crc32(0, dentry->d_name.name, namelen)); 576 rd->name_crc = cpu_to_je32(crc32(0, dentry->d_name.name, namelen));
536 577
537 fd = jffs2_write_dirent(c, dir_f, rd, dentry->d_name.name, namelen, phys_ofs, ALLOC_NORMAL); 578 fd = jffs2_write_dirent(c, dir_f, rd, dentry->d_name.name, namelen, ALLOC_NORMAL);
538 579
539 if (IS_ERR(fd)) { 580 if (IS_ERR(fd)) {
540 /* dirent failed to write. Delete the inode normally 581 /* dirent failed to write. Delete the inode normally
@@ -588,12 +629,12 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
588 struct jffs2_full_dnode *fn; 629 struct jffs2_full_dnode *fn;
589 struct jffs2_full_dirent *fd; 630 struct jffs2_full_dirent *fd;
590 int namelen; 631 int namelen;
591 jint16_t dev; 632 union jffs2_device_node dev;
592 int devlen = 0; 633 int devlen = 0;
593 uint32_t alloclen, phys_ofs; 634 uint32_t alloclen;
594 int ret; 635 int ret;
595 636
596 if (!old_valid_dev(rdev)) 637 if (!new_valid_dev(rdev))
597 return -EINVAL; 638 return -EINVAL;
598 639
599 ri = jffs2_alloc_raw_inode(); 640 ri = jffs2_alloc_raw_inode();
@@ -602,17 +643,15 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
602 643
603 c = JFFS2_SB_INFO(dir_i->i_sb); 644 c = JFFS2_SB_INFO(dir_i->i_sb);
604 645
605 if (S_ISBLK(mode) || S_ISCHR(mode)) { 646 if (S_ISBLK(mode) || S_ISCHR(mode))
606 dev = cpu_to_je16(old_encode_dev(rdev)); 647 devlen = jffs2_encode_dev(&dev, rdev);
607 devlen = sizeof(dev);
608 }
609 648
610 /* Try to reserve enough space for both node and dirent. 649 /* Try to reserve enough space for both node and dirent.
611 * Just the node will do for now, though 650 * Just the node will do for now, though
612 */ 651 */
613 namelen = dentry->d_name.len; 652 namelen = dentry->d_name.len;
614 ret = jffs2_reserve_space(c, sizeof(*ri) + devlen, &phys_ofs, &alloclen, 653 ret = jffs2_reserve_space(c, sizeof(*ri) + devlen, &alloclen,
615 ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); 654 ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
616 655
617 if (ret) { 656 if (ret) {
618 jffs2_free_raw_inode(ri); 657 jffs2_free_raw_inode(ri);
@@ -639,7 +678,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
639 ri->data_crc = cpu_to_je32(crc32(0, &dev, devlen)); 678 ri->data_crc = cpu_to_je32(crc32(0, &dev, devlen));
640 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); 679 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8));
641 680
642 fn = jffs2_write_dnode(c, f, ri, (char *)&dev, devlen, phys_ofs, ALLOC_NORMAL); 681 fn = jffs2_write_dnode(c, f, ri, (char *)&dev, devlen, ALLOC_NORMAL);
643 682
644 jffs2_free_raw_inode(ri); 683 jffs2_free_raw_inode(ri);
645 684
@@ -657,8 +696,20 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
657 up(&f->sem); 696 up(&f->sem);
658 697
659 jffs2_complete_reservation(c); 698 jffs2_complete_reservation(c);
660 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &phys_ofs, &alloclen, 699
661 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); 700 ret = jffs2_init_security(inode, dir_i);
701 if (ret) {
702 jffs2_clear_inode(inode);
703 return ret;
704 }
705 ret = jffs2_init_acl(inode, dir_i);
706 if (ret) {
707 jffs2_clear_inode(inode);
708 return ret;
709 }
710
711 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
712 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
662 if (ret) { 713 if (ret) {
663 /* Eep. */ 714 /* Eep. */
664 jffs2_clear_inode(inode); 715 jffs2_clear_inode(inode);
@@ -693,7 +744,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
693 rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8)); 744 rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8));
694 rd->name_crc = cpu_to_je32(crc32(0, dentry->d_name.name, namelen)); 745 rd->name_crc = cpu_to_je32(crc32(0, dentry->d_name.name, namelen));
695 746
696 fd = jffs2_write_dirent(c, dir_f, rd, dentry->d_name.name, namelen, phys_ofs, ALLOC_NORMAL); 747 fd = jffs2_write_dirent(c, dir_f, rd, dentry->d_name.name, namelen, ALLOC_NORMAL);
697 748
698 if (IS_ERR(fd)) { 749 if (IS_ERR(fd)) {
699 /* dirent failed to write. Delete the inode normally 750 /* dirent failed to write. Delete the inode normally
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index dad68fdffe9e..ad0121088dde 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -30,7 +30,6 @@ static void jffs2_erase_callback(struct erase_info *);
30#endif 30#endif
31static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t bad_offset); 31static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t bad_offset);
32static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb); 32static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb);
33static void jffs2_free_all_node_refs(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb);
34static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb); 33static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb);
35 34
36static void jffs2_erase_block(struct jffs2_sb_info *c, 35static void jffs2_erase_block(struct jffs2_sb_info *c,
@@ -54,8 +53,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
54 if (!instr) { 53 if (!instr) {
55 printk(KERN_WARNING "kmalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n"); 54 printk(KERN_WARNING "kmalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n");
56 spin_lock(&c->erase_completion_lock); 55 spin_lock(&c->erase_completion_lock);
57 list_del(&jeb->list); 56 list_move(&jeb->list, &c->erase_pending_list);
58 list_add(&jeb->list, &c->erase_pending_list);
59 c->erasing_size -= c->sector_size; 57 c->erasing_size -= c->sector_size;
60 c->dirty_size += c->sector_size; 58 c->dirty_size += c->sector_size;
61 jeb->dirty_size = c->sector_size; 59 jeb->dirty_size = c->sector_size;
@@ -87,8 +85,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
87 /* Erase failed immediately. Refile it on the list */ 85 /* Erase failed immediately. Refile it on the list */
88 D1(printk(KERN_DEBUG "Erase at 0x%08x failed: %d. Refiling on erase_pending_list\n", jeb->offset, ret)); 86 D1(printk(KERN_DEBUG "Erase at 0x%08x failed: %d. Refiling on erase_pending_list\n", jeb->offset, ret));
89 spin_lock(&c->erase_completion_lock); 87 spin_lock(&c->erase_completion_lock);
90 list_del(&jeb->list); 88 list_move(&jeb->list, &c->erase_pending_list);
91 list_add(&jeb->list, &c->erase_pending_list);
92 c->erasing_size -= c->sector_size; 89 c->erasing_size -= c->sector_size;
93 c->dirty_size += c->sector_size; 90 c->dirty_size += c->sector_size;
94 jeb->dirty_size = c->sector_size; 91 jeb->dirty_size = c->sector_size;
@@ -136,7 +133,7 @@ void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
136 c->used_size -= jeb->used_size; 133 c->used_size -= jeb->used_size;
137 c->dirty_size -= jeb->dirty_size; 134 c->dirty_size -= jeb->dirty_size;
138 jeb->wasted_size = jeb->used_size = jeb->dirty_size = jeb->free_size = 0; 135 jeb->wasted_size = jeb->used_size = jeb->dirty_size = jeb->free_size = 0;
139 jffs2_free_all_node_refs(c, jeb); 136 jffs2_free_jeb_node_refs(c, jeb);
140 list_add(&jeb->list, &c->erasing_list); 137 list_add(&jeb->list, &c->erasing_list);
141 spin_unlock(&c->erase_completion_lock); 138 spin_unlock(&c->erase_completion_lock);
142 139
@@ -162,8 +159,7 @@ static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblo
162{ 159{
163 D1(printk(KERN_DEBUG "Erase completed successfully at 0x%08x\n", jeb->offset)); 160 D1(printk(KERN_DEBUG "Erase completed successfully at 0x%08x\n", jeb->offset));
164 spin_lock(&c->erase_completion_lock); 161 spin_lock(&c->erase_completion_lock);
165 list_del(&jeb->list); 162 list_move_tail(&jeb->list, &c->erase_complete_list);
166 list_add_tail(&jeb->list, &c->erase_complete_list);
167 spin_unlock(&c->erase_completion_lock); 163 spin_unlock(&c->erase_completion_lock);
168 /* Ensure that kupdated calls us again to mark them clean */ 164 /* Ensure that kupdated calls us again to mark them clean */
169 jffs2_erase_pending_trigger(c); 165 jffs2_erase_pending_trigger(c);
@@ -179,8 +175,7 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock
179 if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) { 175 if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) {
180 /* We'd like to give this block another try. */ 176 /* We'd like to give this block another try. */
181 spin_lock(&c->erase_completion_lock); 177 spin_lock(&c->erase_completion_lock);
182 list_del(&jeb->list); 178 list_move(&jeb->list, &c->erase_pending_list);
183 list_add(&jeb->list, &c->erase_pending_list);
184 c->erasing_size -= c->sector_size; 179 c->erasing_size -= c->sector_size;
185 c->dirty_size += c->sector_size; 180 c->dirty_size += c->sector_size;
186 jeb->dirty_size = c->sector_size; 181 jeb->dirty_size = c->sector_size;
@@ -192,8 +187,7 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock
192 spin_lock(&c->erase_completion_lock); 187 spin_lock(&c->erase_completion_lock);
193 c->erasing_size -= c->sector_size; 188 c->erasing_size -= c->sector_size;
194 c->bad_size += c->sector_size; 189 c->bad_size += c->sector_size;
195 list_del(&jeb->list); 190 list_move(&jeb->list, &c->bad_list);
196 list_add(&jeb->list, &c->bad_list);
197 c->nr_erasing_blocks--; 191 c->nr_erasing_blocks--;
198 spin_unlock(&c->erase_completion_lock); 192 spin_unlock(&c->erase_completion_lock);
199 wake_up(&c->erase_wait); 193 wake_up(&c->erase_wait);
@@ -254,7 +248,8 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
254 248
255 /* PARANOIA */ 249 /* PARANOIA */
256 if (!ic) { 250 if (!ic) {
257 printk(KERN_WARNING "inode_cache not found in remove_node_refs()!!\n"); 251 JFFS2_WARNING("inode_cache/xattr_datum/xattr_ref"
252 " not found in remove_node_refs()!!\n");
258 return; 253 return;
259 } 254 }
260 255
@@ -279,26 +274,42 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
279 printk("\n"); 274 printk("\n");
280 }); 275 });
281 276
282 if (ic->nodes == (void *)ic && ic->nlink == 0) 277 switch (ic->class) {
283 jffs2_del_ino_cache(c, ic); 278#ifdef CONFIG_JFFS2_FS_XATTR
279 case RAWNODE_CLASS_XATTR_DATUM:
280 jffs2_release_xattr_datum(c, (struct jffs2_xattr_datum *)ic);
281 break;
282 case RAWNODE_CLASS_XATTR_REF:
283 jffs2_release_xattr_ref(c, (struct jffs2_xattr_ref *)ic);
284 break;
285#endif
286 default:
287 if (ic->nodes == (void *)ic && ic->nlink == 0)
288 jffs2_del_ino_cache(c, ic);
289 }
284} 290}
285 291
286static void jffs2_free_all_node_refs(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) 292void jffs2_free_jeb_node_refs(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
287{ 293{
288 struct jffs2_raw_node_ref *ref; 294 struct jffs2_raw_node_ref *block, *ref;
289 D1(printk(KERN_DEBUG "Freeing all node refs for eraseblock offset 0x%08x\n", jeb->offset)); 295 D1(printk(KERN_DEBUG "Freeing all node refs for eraseblock offset 0x%08x\n", jeb->offset));
290 while(jeb->first_node) {
291 ref = jeb->first_node;
292 jeb->first_node = ref->next_phys;
293 296
294 /* Remove from the inode-list */ 297 block = ref = jeb->first_node;
295 if (ref->next_in_ino) 298
299 while (ref) {
300 if (ref->flash_offset == REF_LINK_NODE) {
301 ref = ref->next_in_ino;
302 jffs2_free_refblock(block);
303 block = ref;
304 continue;
305 }
306 if (ref->flash_offset != REF_EMPTY_NODE && ref->next_in_ino)
296 jffs2_remove_node_refs_from_ino_list(c, ref, jeb); 307 jffs2_remove_node_refs_from_ino_list(c, ref, jeb);
297 /* else it was a non-inode node or already removed, so don't bother */ 308 /* else it was a non-inode node or already removed, so don't bother */
298 309
299 jffs2_free_raw_node_ref(ref); 310 ref++;
300 } 311 }
301 jeb->last_node = NULL; 312 jeb->first_node = jeb->last_node = NULL;
302} 313}
303 314
304static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t *bad_offset) 315static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t *bad_offset)
@@ -351,7 +362,6 @@ fail:
351 362
352static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) 363static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
353{ 364{
354 struct jffs2_raw_node_ref *marker_ref = NULL;
355 size_t retlen; 365 size_t retlen;
356 int ret; 366 int ret;
357 uint32_t bad_offset; 367 uint32_t bad_offset;
@@ -373,12 +383,8 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb
373 goto filebad; 383 goto filebad;
374 } 384 }
375 385
376 jeb->first_node = jeb->last_node = NULL; 386 /* Everything else got zeroed before the erase */
377 jeb->free_size = c->sector_size; 387 jeb->free_size = c->sector_size;
378 jeb->used_size = 0;
379 jeb->dirty_size = 0;
380 jeb->wasted_size = 0;
381
382 } else { 388 } else {
383 389
384 struct kvec vecs[1]; 390 struct kvec vecs[1];
@@ -388,11 +394,7 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb
388 .totlen = cpu_to_je32(c->cleanmarker_size) 394 .totlen = cpu_to_je32(c->cleanmarker_size)
389 }; 395 };
390 396
391 marker_ref = jffs2_alloc_raw_node_ref(); 397 jffs2_prealloc_raw_node_refs(c, jeb, 1);
392 if (!marker_ref) {
393 printk(KERN_WARNING "Failed to allocate raw node ref for clean marker. Refiling\n");
394 goto refile;
395 }
396 398
397 marker.hdr_crc = cpu_to_je32(crc32(0, &marker, sizeof(struct jffs2_unknown_node)-4)); 399 marker.hdr_crc = cpu_to_je32(crc32(0, &marker, sizeof(struct jffs2_unknown_node)-4));
398 400
@@ -408,21 +410,13 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb
408 printk(KERN_WARNING "Short write to newly-erased block at 0x%08x: Wanted %zd, got %zd\n", 410 printk(KERN_WARNING "Short write to newly-erased block at 0x%08x: Wanted %zd, got %zd\n",
409 jeb->offset, sizeof(marker), retlen); 411 jeb->offset, sizeof(marker), retlen);
410 412
411 jffs2_free_raw_node_ref(marker_ref);
412 goto filebad; 413 goto filebad;
413 } 414 }
414 415
415 marker_ref->next_in_ino = NULL; 416 /* Everything else got zeroed before the erase */
416 marker_ref->next_phys = NULL; 417 jeb->free_size = c->sector_size;
417 marker_ref->flash_offset = jeb->offset | REF_NORMAL; 418 /* FIXME Special case for cleanmarker in empty block */
418 marker_ref->__totlen = c->cleanmarker_size; 419 jffs2_link_node_ref(c, jeb, jeb->offset | REF_NORMAL, c->cleanmarker_size, NULL);
419
420 jeb->first_node = jeb->last_node = marker_ref;
421
422 jeb->free_size = c->sector_size - c->cleanmarker_size;
423 jeb->used_size = c->cleanmarker_size;
424 jeb->dirty_size = 0;
425 jeb->wasted_size = 0;
426 } 420 }
427 421
428 spin_lock(&c->erase_completion_lock); 422 spin_lock(&c->erase_completion_lock);
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 9f4171213e58..3ed6e3e120b6 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -54,10 +54,15 @@ const struct file_operations jffs2_file_operations =
54 54
55struct inode_operations jffs2_file_inode_operations = 55struct inode_operations jffs2_file_inode_operations =
56{ 56{
57 .setattr = jffs2_setattr 57 .permission = jffs2_permission,
58 .setattr = jffs2_setattr,
59 .setxattr = jffs2_setxattr,
60 .getxattr = jffs2_getxattr,
61 .listxattr = jffs2_listxattr,
62 .removexattr = jffs2_removexattr
58}; 63};
59 64
60struct address_space_operations jffs2_file_address_operations = 65const struct address_space_operations jffs2_file_address_operations =
61{ 66{
62 .readpage = jffs2_readpage, 67 .readpage = jffs2_readpage,
63 .prepare_write =jffs2_prepare_write, 68 .prepare_write =jffs2_prepare_write,
@@ -129,13 +134,13 @@ static int jffs2_prepare_write (struct file *filp, struct page *pg,
129 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); 134 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
130 struct jffs2_raw_inode ri; 135 struct jffs2_raw_inode ri;
131 struct jffs2_full_dnode *fn; 136 struct jffs2_full_dnode *fn;
132 uint32_t phys_ofs, alloc_len; 137 uint32_t alloc_len;
133 138
134 D1(printk(KERN_DEBUG "Writing new hole frag 0x%x-0x%x between current EOF and new page\n", 139 D1(printk(KERN_DEBUG "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
135 (unsigned int)inode->i_size, pageofs)); 140 (unsigned int)inode->i_size, pageofs));
136 141
137 ret = jffs2_reserve_space(c, sizeof(ri), &phys_ofs, &alloc_len, 142 ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
138 ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); 143 ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
139 if (ret) 144 if (ret)
140 return ret; 145 return ret;
141 146
@@ -161,7 +166,7 @@ static int jffs2_prepare_write (struct file *filp, struct page *pg,
161 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8)); 166 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
162 ri.data_crc = cpu_to_je32(0); 167 ri.data_crc = cpu_to_je32(0);
163 168
164 fn = jffs2_write_dnode(c, f, &ri, NULL, 0, phys_ofs, ALLOC_NORMAL); 169 fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_NORMAL);
165 170
166 if (IS_ERR(fn)) { 171 if (IS_ERR(fn)) {
167 ret = PTR_ERR(fn); 172 ret = PTR_ERR(fn);
@@ -215,12 +220,20 @@ static int jffs2_commit_write (struct file *filp, struct page *pg,
215 D1(printk(KERN_DEBUG "jffs2_commit_write(): ino #%lu, page at 0x%lx, range %d-%d, flags %lx\n", 220 D1(printk(KERN_DEBUG "jffs2_commit_write(): ino #%lu, page at 0x%lx, range %d-%d, flags %lx\n",
216 inode->i_ino, pg->index << PAGE_CACHE_SHIFT, start, end, pg->flags)); 221 inode->i_ino, pg->index << PAGE_CACHE_SHIFT, start, end, pg->flags));
217 222
218 if (!start && end == PAGE_CACHE_SIZE) { 223 if (end == PAGE_CACHE_SIZE) {
219 /* We need to avoid deadlock with page_cache_read() in 224 if (!start) {
220 jffs2_garbage_collect_pass(). So we have to mark the 225 /* We need to avoid deadlock with page_cache_read() in
221 page up to date, to prevent page_cache_read() from 226 jffs2_garbage_collect_pass(). So we have to mark the
222 trying to re-lock it. */ 227 page up to date, to prevent page_cache_read() from
223 SetPageUptodate(pg); 228 trying to re-lock it. */
229 SetPageUptodate(pg);
230 } else {
231 /* When writing out the end of a page, write out the
232 _whole_ page. This helps to reduce the number of
233 nodes in files which have many short writes, like
234 syslog files. */
235 start = aligned_start = 0;
236 }
224 } 237 }
225 238
226 ri = jffs2_alloc_raw_inode(); 239 ri = jffs2_alloc_raw_inode();
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 09e5d10b8840..4780f82825d6 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -12,7 +12,6 @@
12 */ 12 */
13 13
14#include <linux/capability.h> 14#include <linux/capability.h>
15#include <linux/config.h>
16#include <linux/kernel.h> 15#include <linux/kernel.h>
17#include <linux/sched.h> 16#include <linux/sched.h>
18#include <linux/fs.h> 17#include <linux/fs.h>
@@ -33,11 +32,11 @@ static int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
33 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); 32 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
34 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); 33 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
35 struct jffs2_raw_inode *ri; 34 struct jffs2_raw_inode *ri;
36 unsigned short dev; 35 union jffs2_device_node dev;
37 unsigned char *mdata = NULL; 36 unsigned char *mdata = NULL;
38 int mdatalen = 0; 37 int mdatalen = 0;
39 unsigned int ivalid; 38 unsigned int ivalid;
40 uint32_t phys_ofs, alloclen; 39 uint32_t alloclen;
41 int ret; 40 int ret;
42 D1(printk(KERN_DEBUG "jffs2_setattr(): ino #%lu\n", inode->i_ino)); 41 D1(printk(KERN_DEBUG "jffs2_setattr(): ino #%lu\n", inode->i_ino));
43 ret = inode_change_ok(inode, iattr); 42 ret = inode_change_ok(inode, iattr);
@@ -51,20 +50,24 @@ static int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
51 it out again with the appropriate data attached */ 50 it out again with the appropriate data attached */
52 if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { 51 if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
53 /* For these, we don't actually need to read the old node */ 52 /* For these, we don't actually need to read the old node */
54 dev = old_encode_dev(inode->i_rdev); 53 mdatalen = jffs2_encode_dev(&dev, inode->i_rdev);
55 mdata = (char *)&dev; 54 mdata = (char *)&dev;
56 mdatalen = sizeof(dev);
57 D1(printk(KERN_DEBUG "jffs2_setattr(): Writing %d bytes of kdev_t\n", mdatalen)); 55 D1(printk(KERN_DEBUG "jffs2_setattr(): Writing %d bytes of kdev_t\n", mdatalen));
58 } else if (S_ISLNK(inode->i_mode)) { 56 } else if (S_ISLNK(inode->i_mode)) {
57 down(&f->sem);
59 mdatalen = f->metadata->size; 58 mdatalen = f->metadata->size;
60 mdata = kmalloc(f->metadata->size, GFP_USER); 59 mdata = kmalloc(f->metadata->size, GFP_USER);
61 if (!mdata) 60 if (!mdata) {
61 up(&f->sem);
62 return -ENOMEM; 62 return -ENOMEM;
63 }
63 ret = jffs2_read_dnode(c, f, f->metadata, mdata, 0, mdatalen); 64 ret = jffs2_read_dnode(c, f, f->metadata, mdata, 0, mdatalen);
64 if (ret) { 65 if (ret) {
66 up(&f->sem);
65 kfree(mdata); 67 kfree(mdata);
66 return ret; 68 return ret;
67 } 69 }
70 up(&f->sem);
68 D1(printk(KERN_DEBUG "jffs2_setattr(): Writing %d bytes of symlink target\n", mdatalen)); 71 D1(printk(KERN_DEBUG "jffs2_setattr(): Writing %d bytes of symlink target\n", mdatalen));
69 } 72 }
70 73
@@ -75,8 +78,8 @@ static int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
75 return -ENOMEM; 78 return -ENOMEM;
76 } 79 }
77 80
78 ret = jffs2_reserve_space(c, sizeof(*ri) + mdatalen, &phys_ofs, &alloclen, 81 ret = jffs2_reserve_space(c, sizeof(*ri) + mdatalen, &alloclen,
79 ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); 82 ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
80 if (ret) { 83 if (ret) {
81 jffs2_free_raw_inode(ri); 84 jffs2_free_raw_inode(ri);
82 if (S_ISLNK(inode->i_mode & S_IFMT)) 85 if (S_ISLNK(inode->i_mode & S_IFMT))
@@ -127,7 +130,7 @@ static int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
127 else 130 else
128 ri->data_crc = cpu_to_je32(0); 131 ri->data_crc = cpu_to_je32(0);
129 132
130 new_metadata = jffs2_write_dnode(c, f, ri, mdata, mdatalen, phys_ofs, ALLOC_NORMAL); 133 new_metadata = jffs2_write_dnode(c, f, ri, mdata, mdatalen, ALLOC_NORMAL);
131 if (S_ISLNK(inode->i_mode)) 134 if (S_ISLNK(inode->i_mode))
132 kfree(mdata); 135 kfree(mdata);
133 136
@@ -180,12 +183,17 @@ static int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
180 183
181int jffs2_setattr(struct dentry *dentry, struct iattr *iattr) 184int jffs2_setattr(struct dentry *dentry, struct iattr *iattr)
182{ 185{
183 return jffs2_do_setattr(dentry->d_inode, iattr); 186 int rc;
187
188 rc = jffs2_do_setattr(dentry->d_inode, iattr);
189 if (!rc && (iattr->ia_valid & ATTR_MODE))
190 rc = jffs2_acl_chmod(dentry->d_inode);
191 return rc;
184} 192}
185 193
186int jffs2_statfs(struct super_block *sb, struct kstatfs *buf) 194int jffs2_statfs(struct dentry *dentry, struct kstatfs *buf)
187{ 195{
188 struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); 196 struct jffs2_sb_info *c = JFFS2_SB_INFO(dentry->d_sb);
189 unsigned long avail; 197 unsigned long avail;
190 198
191 buf->f_type = JFFS2_SUPER_MAGIC; 199 buf->f_type = JFFS2_SUPER_MAGIC;
@@ -218,7 +226,6 @@ void jffs2_clear_inode (struct inode *inode)
218 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); 226 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
219 227
220 D1(printk(KERN_DEBUG "jffs2_clear_inode(): ino #%lu mode %o\n", inode->i_ino, inode->i_mode)); 228 D1(printk(KERN_DEBUG "jffs2_clear_inode(): ino #%lu mode %o\n", inode->i_ino, inode->i_mode));
221
222 jffs2_do_clear_inode(c, f); 229 jffs2_do_clear_inode(c, f);
223} 230}
224 231
@@ -227,6 +234,8 @@ void jffs2_read_inode (struct inode *inode)
227 struct jffs2_inode_info *f; 234 struct jffs2_inode_info *f;
228 struct jffs2_sb_info *c; 235 struct jffs2_sb_info *c;
229 struct jffs2_raw_inode latest_node; 236 struct jffs2_raw_inode latest_node;
237 union jffs2_device_node jdev;
238 dev_t rdev = 0;
230 int ret; 239 int ret;
231 240
232 D1(printk(KERN_DEBUG "jffs2_read_inode(): inode->i_ino == %lu\n", inode->i_ino)); 241 D1(printk(KERN_DEBUG "jffs2_read_inode(): inode->i_ino == %lu\n", inode->i_ino));
@@ -258,7 +267,6 @@ void jffs2_read_inode (struct inode *inode)
258 inode->i_blocks = (inode->i_size + 511) >> 9; 267 inode->i_blocks = (inode->i_size + 511) >> 9;
259 268
260 switch (inode->i_mode & S_IFMT) { 269 switch (inode->i_mode & S_IFMT) {
261 jint16_t rdev;
262 270
263 case S_IFLNK: 271 case S_IFLNK:
264 inode->i_op = &jffs2_symlink_inode_operations; 272 inode->i_op = &jffs2_symlink_inode_operations;
@@ -292,8 +300,16 @@ void jffs2_read_inode (struct inode *inode)
292 case S_IFBLK: 300 case S_IFBLK:
293 case S_IFCHR: 301 case S_IFCHR:
294 /* Read the device numbers from the media */ 302 /* Read the device numbers from the media */
303 if (f->metadata->size != sizeof(jdev.old) &&
304 f->metadata->size != sizeof(jdev.new)) {
305 printk(KERN_NOTICE "Device node has strange size %d\n", f->metadata->size);
306 up(&f->sem);
307 jffs2_do_clear_inode(c, f);
308 make_bad_inode(inode);
309 return;
310 }
295 D1(printk(KERN_DEBUG "Reading device numbers from flash\n")); 311 D1(printk(KERN_DEBUG "Reading device numbers from flash\n"));
296 if (jffs2_read_dnode(c, f, f->metadata, (char *)&rdev, 0, sizeof(rdev)) < 0) { 312 if (jffs2_read_dnode(c, f, f->metadata, (char *)&jdev, 0, f->metadata->size) < 0) {
297 /* Eep */ 313 /* Eep */
298 printk(KERN_NOTICE "Read device numbers for inode %lu failed\n", (unsigned long)inode->i_ino); 314 printk(KERN_NOTICE "Read device numbers for inode %lu failed\n", (unsigned long)inode->i_ino);
299 up(&f->sem); 315 up(&f->sem);
@@ -301,12 +317,15 @@ void jffs2_read_inode (struct inode *inode)
301 make_bad_inode(inode); 317 make_bad_inode(inode);
302 return; 318 return;
303 } 319 }
320 if (f->metadata->size == sizeof(jdev.old))
321 rdev = old_decode_dev(je16_to_cpu(jdev.old));
322 else
323 rdev = new_decode_dev(je32_to_cpu(jdev.new));
304 324
305 case S_IFSOCK: 325 case S_IFSOCK:
306 case S_IFIFO: 326 case S_IFIFO:
307 inode->i_op = &jffs2_file_inode_operations; 327 inode->i_op = &jffs2_file_inode_operations;
308 init_special_inode(inode, inode->i_mode, 328 init_special_inode(inode, inode->i_mode, rdev);
309 old_decode_dev((je16_to_cpu(rdev))));
310 break; 329 break;
311 330
312 default: 331 default:
@@ -492,6 +511,8 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
492 } 511 }
493 memset(c->inocache_list, 0, INOCACHE_HASHSIZE * sizeof(struct jffs2_inode_cache *)); 512 memset(c->inocache_list, 0, INOCACHE_HASHSIZE * sizeof(struct jffs2_inode_cache *));
494 513
514 jffs2_init_xattr_subsystem(c);
515
495 if ((ret = jffs2_do_mount_fs(c))) 516 if ((ret = jffs2_do_mount_fs(c)))
496 goto out_inohash; 517 goto out_inohash;
497 518
@@ -526,6 +547,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
526 else 547 else
527 kfree(c->blocks); 548 kfree(c->blocks);
528 out_inohash: 549 out_inohash:
550 jffs2_clear_xattr_subsystem(c);
529 kfree(c->inocache_list); 551 kfree(c->inocache_list);
530 out_wbuf: 552 out_wbuf:
531 jffs2_flash_cleanup(c); 553 jffs2_flash_cleanup(c);
@@ -639,13 +661,6 @@ static int jffs2_flash_setup(struct jffs2_sb_info *c) {
639 return ret; 661 return ret;
640 } 662 }
641 663
642 /* add setups for other bizarre flashes here... */
643 if (jffs2_nor_ecc(c)) {
644 ret = jffs2_nor_ecc_flash_setup(c);
645 if (ret)
646 return ret;
647 }
648
649 /* and Dataflash */ 664 /* and Dataflash */
650 if (jffs2_dataflash(c)) { 665 if (jffs2_dataflash(c)) {
651 ret = jffs2_dataflash_setup(c); 666 ret = jffs2_dataflash_setup(c);
@@ -669,11 +684,6 @@ void jffs2_flash_cleanup(struct jffs2_sb_info *c) {
669 jffs2_nand_flash_cleanup(c); 684 jffs2_nand_flash_cleanup(c);
670 } 685 }
671 686
672 /* add cleanups for other bizarre flashes here... */
673 if (jffs2_nor_ecc(c)) {
674 jffs2_nor_ecc_flash_cleanup(c);
675 }
676
677 /* and DataFlash */ 687 /* and DataFlash */
678 if (jffs2_dataflash(c)) { 688 if (jffs2_dataflash(c)) {
679 jffs2_dataflash_cleanup(c); 689 jffs2_dataflash_cleanup(c);
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index f9ffece453a3..daff3341ff92 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -125,6 +125,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
125 struct jffs2_eraseblock *jeb; 125 struct jffs2_eraseblock *jeb;
126 struct jffs2_raw_node_ref *raw; 126 struct jffs2_raw_node_ref *raw;
127 int ret = 0, inum, nlink; 127 int ret = 0, inum, nlink;
128 int xattr = 0;
128 129
129 if (down_interruptible(&c->alloc_sem)) 130 if (down_interruptible(&c->alloc_sem))
130 return -EINTR; 131 return -EINTR;
@@ -138,7 +139,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
138 the node CRCs etc. Do it now. */ 139 the node CRCs etc. Do it now. */
139 140
140 /* checked_ino is protected by the alloc_sem */ 141 /* checked_ino is protected by the alloc_sem */
141 if (c->checked_ino > c->highest_ino) { 142 if (c->checked_ino > c->highest_ino && xattr) {
142 printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n", 143 printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
143 c->unchecked_size); 144 c->unchecked_size);
144 jffs2_dbg_dump_block_lists_nolock(c); 145 jffs2_dbg_dump_block_lists_nolock(c);
@@ -148,6 +149,9 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
148 149
149 spin_unlock(&c->erase_completion_lock); 150 spin_unlock(&c->erase_completion_lock);
150 151
152 if (!xattr)
153 xattr = jffs2_verify_xattr(c);
154
151 spin_lock(&c->inocache_lock); 155 spin_lock(&c->inocache_lock);
152 156
153 ic = jffs2_get_ino_cache(c, c->checked_ino++); 157 ic = jffs2_get_ino_cache(c, c->checked_ino++);
@@ -161,6 +165,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
161 D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n", 165 D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
162 ic->ino)); 166 ic->ino));
163 spin_unlock(&c->inocache_lock); 167 spin_unlock(&c->inocache_lock);
168 jffs2_xattr_delete_inode(c, ic);
164 continue; 169 continue;
165 } 170 }
166 switch(ic->state) { 171 switch(ic->state) {
@@ -181,6 +186,10 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
181 and trigger the BUG() above while we haven't yet 186 and trigger the BUG() above while we haven't yet
182 finished checking all its nodes */ 187 finished checking all its nodes */
183 D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino)); 188 D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
189 /* We need to come back again for the _same_ inode. We've
190 made no progress in this case, but that should be OK */
191 c->checked_ino--;
192
184 up(&c->alloc_sem); 193 up(&c->alloc_sem);
185 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock); 194 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
186 return 0; 195 return 0;
@@ -231,7 +240,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
231 240
232 while(ref_obsolete(raw)) { 241 while(ref_obsolete(raw)) {
233 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw))); 242 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
234 raw = raw->next_phys; 243 raw = ref_next(raw);
235 if (unlikely(!raw)) { 244 if (unlikely(!raw)) {
236 printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n"); 245 printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
237 printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n", 246 printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
@@ -248,16 +257,36 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
248 257
249 if (!raw->next_in_ino) { 258 if (!raw->next_in_ino) {
250 /* Inode-less node. Clean marker, snapshot or something like that */ 259 /* Inode-less node. Clean marker, snapshot or something like that */
251 /* FIXME: If it's something that needs to be copied, including something
252 we don't grok that has JFFS2_NODETYPE_RWCOMPAT_COPY, we should do so */
253 spin_unlock(&c->erase_completion_lock); 260 spin_unlock(&c->erase_completion_lock);
254 jffs2_mark_node_obsolete(c, raw); 261 if (ref_flags(raw) == REF_PRISTINE) {
262 /* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
263 jffs2_garbage_collect_pristine(c, NULL, raw);
264 } else {
265 /* Just mark it obsolete */
266 jffs2_mark_node_obsolete(c, raw);
267 }
255 up(&c->alloc_sem); 268 up(&c->alloc_sem);
256 goto eraseit_lock; 269 goto eraseit_lock;
257 } 270 }
258 271
259 ic = jffs2_raw_ref_to_ic(raw); 272 ic = jffs2_raw_ref_to_ic(raw);
260 273
274#ifdef CONFIG_JFFS2_FS_XATTR
275 /* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
276 * We can decide whether this node is inode or xattr by ic->class. */
277 if (ic->class == RAWNODE_CLASS_XATTR_DATUM
278 || ic->class == RAWNODE_CLASS_XATTR_REF) {
279 spin_unlock(&c->erase_completion_lock);
280
281 if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
282 ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
283 } else {
284 ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
285 }
286 goto release_sem;
287 }
288#endif
289
261 /* We need to hold the inocache. Either the erase_completion_lock or 290 /* We need to hold the inocache. Either the erase_completion_lock or
262 the inocache_lock are sufficient; we trade down since the inocache_lock 291 the inocache_lock are sufficient; we trade down since the inocache_lock
263 causes less contention. */ 292 causes less contention. */
@@ -499,7 +528,6 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
499 struct jffs2_raw_node_ref *raw) 528 struct jffs2_raw_node_ref *raw)
500{ 529{
501 union jffs2_node_union *node; 530 union jffs2_node_union *node;
502 struct jffs2_raw_node_ref *nraw;
503 size_t retlen; 531 size_t retlen;
504 int ret; 532 int ret;
505 uint32_t phys_ofs, alloclen; 533 uint32_t phys_ofs, alloclen;
@@ -508,15 +536,16 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
508 536
509 D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw))); 537 D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
510 538
511 rawlen = ref_totlen(c, c->gcblock, raw); 539 alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
512 540
513 /* Ask for a small amount of space (or the totlen if smaller) because we 541 /* Ask for a small amount of space (or the totlen if smaller) because we
514 don't want to force wastage of the end of a block if splitting would 542 don't want to force wastage of the end of a block if splitting would
515 work. */ 543 work. */
516 ret = jffs2_reserve_space_gc(c, min_t(uint32_t, sizeof(struct jffs2_raw_inode) + 544 if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
517 JFFS2_MIN_DATA_LEN, rawlen), &phys_ofs, &alloclen, rawlen); 545 alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
518 /* this is not the exact summary size of it, 546
519 it is only an upper estimation */ 547 ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
548 /* 'rawlen' is not the exact summary size; it is only an upper estimation */
520 549
521 if (ret) 550 if (ret)
522 return ret; 551 return ret;
@@ -580,22 +609,17 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
580 } 609 }
581 break; 610 break;
582 default: 611 default:
583 printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n", 612 /* If it's inode-less, we don't _know_ what it is. Just copy it intact */
584 ref_offset(raw), je16_to_cpu(node->u.nodetype)); 613 if (ic) {
585 goto bail; 614 printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
586 } 615 ref_offset(raw), je16_to_cpu(node->u.nodetype));
587 616 goto bail;
588 nraw = jffs2_alloc_raw_node_ref(); 617 }
589 if (!nraw) {
590 ret = -ENOMEM;
591 goto out_node;
592 } 618 }
593 619
594 /* OK, all the CRCs are good; this node can just be copied as-is. */ 620 /* OK, all the CRCs are good; this node can just be copied as-is. */
595 retry: 621 retry:
596 nraw->flash_offset = phys_ofs; 622 phys_ofs = write_ofs(c);
597 nraw->__totlen = rawlen;
598 nraw->next_phys = NULL;
599 623
600 ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node); 624 ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
601 625
@@ -603,17 +627,11 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
603 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n", 627 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
604 rawlen, phys_ofs, ret, retlen); 628 rawlen, phys_ofs, ret, retlen);
605 if (retlen) { 629 if (retlen) {
606 /* Doesn't belong to any inode */ 630 jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
607 nraw->next_in_ino = NULL;
608
609 nraw->flash_offset |= REF_OBSOLETE;
610 jffs2_add_physical_node_ref(c, nraw);
611 jffs2_mark_node_obsolete(c, nraw);
612 } else { 631 } else {
613 printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", nraw->flash_offset); 632 printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", phys_ofs);
614 jffs2_free_raw_node_ref(nraw);
615 } 633 }
616 if (!retried && (nraw = jffs2_alloc_raw_node_ref())) { 634 if (!retried) {
617 /* Try to reallocate space and retry */ 635 /* Try to reallocate space and retry */
618 uint32_t dummy; 636 uint32_t dummy;
619 struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size]; 637 struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
@@ -625,7 +643,7 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
625 jffs2_dbg_acct_sanity_check(c,jeb); 643 jffs2_dbg_acct_sanity_check(c,jeb);
626 jffs2_dbg_acct_paranoia_check(c, jeb); 644 jffs2_dbg_acct_paranoia_check(c, jeb);
627 645
628 ret = jffs2_reserve_space_gc(c, rawlen, &phys_ofs, &dummy, rawlen); 646 ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
629 /* this is not the exact summary size of it, 647 /* this is not the exact summary size of it,
630 it is only an upper estimation */ 648 it is only an upper estimation */
631 649
@@ -638,25 +656,13 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
638 goto retry; 656 goto retry;
639 } 657 }
640 D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret)); 658 D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
641 jffs2_free_raw_node_ref(nraw);
642 } 659 }
643 660
644 jffs2_free_raw_node_ref(nraw);
645 if (!ret) 661 if (!ret)
646 ret = -EIO; 662 ret = -EIO;
647 goto out_node; 663 goto out_node;
648 } 664 }
649 nraw->flash_offset |= REF_PRISTINE; 665 jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
650 jffs2_add_physical_node_ref(c, nraw);
651
652 /* Link into per-inode list. This is safe because of the ic
653 state being INO_STATE_GC. Note that if we're doing this
654 for an inode which is in-core, the 'nraw' pointer is then
655 going to be fetched from ic->nodes by our caller. */
656 spin_lock(&c->erase_completion_lock);
657 nraw->next_in_ino = ic->nodes;
658 ic->nodes = nraw;
659 spin_unlock(&c->erase_completion_lock);
660 666
661 jffs2_mark_node_obsolete(c, raw); 667 jffs2_mark_node_obsolete(c, raw);
662 D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw))); 668 D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
@@ -675,19 +681,16 @@ static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_
675 struct jffs2_full_dnode *new_fn; 681 struct jffs2_full_dnode *new_fn;
676 struct jffs2_raw_inode ri; 682 struct jffs2_raw_inode ri;
677 struct jffs2_node_frag *last_frag; 683 struct jffs2_node_frag *last_frag;
678 jint16_t dev; 684 union jffs2_device_node dev;
679 char *mdata = NULL, mdatalen = 0; 685 char *mdata = NULL, mdatalen = 0;
680 uint32_t alloclen, phys_ofs, ilen; 686 uint32_t alloclen, ilen;
681 int ret; 687 int ret;
682 688
683 if (S_ISBLK(JFFS2_F_I_MODE(f)) || 689 if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
684 S_ISCHR(JFFS2_F_I_MODE(f)) ) { 690 S_ISCHR(JFFS2_F_I_MODE(f)) ) {
685 /* For these, we don't actually need to read the old node */ 691 /* For these, we don't actually need to read the old node */
686 /* FIXME: for minor or major > 255. */ 692 mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
687 dev = cpu_to_je16(((JFFS2_F_I_RDEV_MAJ(f) << 8) |
688 JFFS2_F_I_RDEV_MIN(f)));
689 mdata = (char *)&dev; 693 mdata = (char *)&dev;
690 mdatalen = sizeof(dev);
691 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen)); 694 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
692 } else if (S_ISLNK(JFFS2_F_I_MODE(f))) { 695 } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
693 mdatalen = fn->size; 696 mdatalen = fn->size;
@@ -706,7 +709,7 @@ static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_
706 709
707 } 710 }
708 711
709 ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &phys_ofs, &alloclen, 712 ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
710 JFFS2_SUMMARY_INODE_SIZE); 713 JFFS2_SUMMARY_INODE_SIZE);
711 if (ret) { 714 if (ret) {
712 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n", 715 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
@@ -744,7 +747,7 @@ static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_
744 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8)); 747 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
745 ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen)); 748 ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
746 749
747 new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, phys_ofs, ALLOC_GC); 750 new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
748 751
749 if (IS_ERR(new_fn)) { 752 if (IS_ERR(new_fn)) {
750 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn)); 753 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
@@ -765,7 +768,7 @@ static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_er
765{ 768{
766 struct jffs2_full_dirent *new_fd; 769 struct jffs2_full_dirent *new_fd;
767 struct jffs2_raw_dirent rd; 770 struct jffs2_raw_dirent rd;
768 uint32_t alloclen, phys_ofs; 771 uint32_t alloclen;
769 int ret; 772 int ret;
770 773
771 rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 774 rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@@ -787,14 +790,14 @@ static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_er
787 rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8)); 790 rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
788 rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize)); 791 rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
789 792
790 ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &phys_ofs, &alloclen, 793 ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
791 JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize)); 794 JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
792 if (ret) { 795 if (ret) {
793 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n", 796 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
794 sizeof(rd)+rd.nsize, ret); 797 sizeof(rd)+rd.nsize, ret);
795 return ret; 798 return ret;
796 } 799 }
797 new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, phys_ofs, ALLOC_GC); 800 new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
798 801
799 if (IS_ERR(new_fd)) { 802 if (IS_ERR(new_fd)) {
800 printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd)); 803 printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
@@ -922,7 +925,7 @@ static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eras
922 struct jffs2_raw_inode ri; 925 struct jffs2_raw_inode ri;
923 struct jffs2_node_frag *frag; 926 struct jffs2_node_frag *frag;
924 struct jffs2_full_dnode *new_fn; 927 struct jffs2_full_dnode *new_fn;
925 uint32_t alloclen, phys_ofs, ilen; 928 uint32_t alloclen, ilen;
926 int ret; 929 int ret;
927 930
928 D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n", 931 D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
@@ -1001,14 +1004,14 @@ static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eras
1001 ri.data_crc = cpu_to_je32(0); 1004 ri.data_crc = cpu_to_je32(0);
1002 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8)); 1005 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1003 1006
1004 ret = jffs2_reserve_space_gc(c, sizeof(ri), &phys_ofs, &alloclen, 1007 ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1005 JFFS2_SUMMARY_INODE_SIZE); 1008 JFFS2_SUMMARY_INODE_SIZE);
1006 if (ret) { 1009 if (ret) {
1007 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n", 1010 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1008 sizeof(ri), ret); 1011 sizeof(ri), ret);
1009 return ret; 1012 return ret;
1010 } 1013 }
1011 new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, phys_ofs, ALLOC_GC); 1014 new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1012 1015
1013 if (IS_ERR(new_fn)) { 1016 if (IS_ERR(new_fn)) {
1014 printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn)); 1017 printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
@@ -1070,7 +1073,7 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
1070{ 1073{
1071 struct jffs2_full_dnode *new_fn; 1074 struct jffs2_full_dnode *new_fn;
1072 struct jffs2_raw_inode ri; 1075 struct jffs2_raw_inode ri;
1073 uint32_t alloclen, phys_ofs, offset, orig_end, orig_start; 1076 uint32_t alloclen, offset, orig_end, orig_start;
1074 int ret = 0; 1077 int ret = 0;
1075 unsigned char *comprbuf = NULL, *writebuf; 1078 unsigned char *comprbuf = NULL, *writebuf;
1076 unsigned long pg; 1079 unsigned long pg;
@@ -1227,7 +1230,7 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
1227 uint32_t cdatalen; 1230 uint32_t cdatalen;
1228 uint16_t comprtype = JFFS2_COMPR_NONE; 1231 uint16_t comprtype = JFFS2_COMPR_NONE;
1229 1232
1230 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN, &phys_ofs, 1233 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1231 &alloclen, JFFS2_SUMMARY_INODE_SIZE); 1234 &alloclen, JFFS2_SUMMARY_INODE_SIZE);
1232 1235
1233 if (ret) { 1236 if (ret) {
@@ -1264,7 +1267,7 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
1264 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8)); 1267 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1265 ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen)); 1268 ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1266 1269
1267 new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, phys_ofs, ALLOC_GC); 1270 new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1268 1271
1269 jffs2_free_comprbuf(comprbuf, writebuf); 1272 jffs2_free_comprbuf(comprbuf, writebuf);
1270 1273
diff --git a/fs/jffs2/histo.h b/fs/jffs2/histo.h
deleted file mode 100644
index 22a93a08210c..000000000000
--- a/fs/jffs2/histo.h
+++ /dev/null
@@ -1,3 +0,0 @@
1/* This file provides the bit-probabilities for the input file */
2#define BIT_DIVIDER 629
3static int bits[9] = { 179,167,183,165,159,198,178,119,}; /* ia32 .so files */
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
new file mode 100644
index 000000000000..2e0cc8e00b85
--- /dev/null
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -0,0 +1,55 @@
1/* $Id: jffs2_fs_i.h,v 1.19 2005/11/07 11:14:52 gleixner Exp $ */
2
3#ifndef _JFFS2_FS_I
4#define _JFFS2_FS_I
5
6#include <linux/version.h>
7#include <linux/rbtree.h>
8#include <linux/posix_acl.h>
9#include <asm/semaphore.h>
10
11struct jffs2_inode_info {
12 /* We need an internal mutex similar to inode->i_mutex.
13 Unfortunately, we can't used the existing one, because
14 either the GC would deadlock, or we'd have to release it
15 before letting GC proceed. Or we'd have to put ugliness
16 into the GC code so it didn't attempt to obtain the i_mutex
17 for the inode(s) which are already locked */
18 struct semaphore sem;
19
20 /* The highest (datanode) version number used for this ino */
21 uint32_t highest_version;
22
23 /* List of data fragments which make up the file */
24 struct rb_root fragtree;
25
26 /* There may be one datanode which isn't referenced by any of the
27 above fragments, if it contains a metadata update but no actual
28 data - or if this is a directory inode */
29 /* This also holds the _only_ dnode for symlinks/device nodes,
30 etc. */
31 struct jffs2_full_dnode *metadata;
32
33 /* Directory entries */
34 struct jffs2_full_dirent *dents;
35
36 /* The target path if this is the inode of a symlink */
37 unsigned char *target;
38
39 /* Some stuff we just have to keep in-core at all times, for each inode. */
40 struct jffs2_inode_cache *inocache;
41
42 uint16_t flags;
43 uint8_t usercompr;
44#if !defined (__ECOS)
45#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,2)
46 struct inode vfs_inode;
47#endif
48#endif
49#ifdef CONFIG_JFFS2_FS_POSIX_ACL
50 struct posix_acl *i_acl_access;
51 struct posix_acl *i_acl_default;
52#endif
53};
54
55#endif /* _JFFS2_FS_I */
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
new file mode 100644
index 000000000000..b98594992eed
--- /dev/null
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -0,0 +1,136 @@
1/* $Id: jffs2_fs_sb.h,v 1.54 2005/09/21 13:37:34 dedekind Exp $ */
2
3#ifndef _JFFS2_FS_SB
4#define _JFFS2_FS_SB
5
6#include <linux/types.h>
7#include <linux/spinlock.h>
8#include <linux/workqueue.h>
9#include <linux/completion.h>
10#include <asm/semaphore.h>
11#include <linux/timer.h>
12#include <linux/wait.h>
13#include <linux/list.h>
14#include <linux/rwsem.h>
15
16#define JFFS2_SB_FLAG_RO 1
17#define JFFS2_SB_FLAG_SCANNING 2 /* Flash scanning is in progress */
18#define JFFS2_SB_FLAG_BUILDING 4 /* File system building is in progress */
19
20struct jffs2_inodirty;
21
22/* A struct for the overall file system control. Pointers to
23 jffs2_sb_info structs are named `c' in the source code.
24 Nee jffs_control
25*/
26struct jffs2_sb_info {
27 struct mtd_info *mtd;
28
29 uint32_t highest_ino;
30 uint32_t checked_ino;
31
32 unsigned int flags;
33
34 struct task_struct *gc_task; /* GC task struct */
35 struct completion gc_thread_start; /* GC thread start completion */
36 struct completion gc_thread_exit; /* GC thread exit completion port */
37
38 struct semaphore alloc_sem; /* Used to protect all the following
39 fields, and also to protect against
40 out-of-order writing of nodes. And GC. */
41 uint32_t cleanmarker_size; /* Size of an _inline_ CLEANMARKER
42 (i.e. zero for OOB CLEANMARKER */
43
44 uint32_t flash_size;
45 uint32_t used_size;
46 uint32_t dirty_size;
47 uint32_t wasted_size;
48 uint32_t free_size;
49 uint32_t erasing_size;
50 uint32_t bad_size;
51 uint32_t sector_size;
52 uint32_t unchecked_size;
53
54 uint32_t nr_free_blocks;
55 uint32_t nr_erasing_blocks;
56
57 /* Number of free blocks there must be before we... */
58 uint8_t resv_blocks_write; /* ... allow a normal filesystem write */
59 uint8_t resv_blocks_deletion; /* ... allow a normal filesystem deletion */
60 uint8_t resv_blocks_gctrigger; /* ... wake up the GC thread */
61 uint8_t resv_blocks_gcbad; /* ... pick a block from the bad_list to GC */
62 uint8_t resv_blocks_gcmerge; /* ... merge pages when garbage collecting */
63
64 uint32_t nospc_dirty_size;
65
66 uint32_t nr_blocks;
67 struct jffs2_eraseblock *blocks; /* The whole array of blocks. Used for getting blocks
68 * from the offset (blocks[ofs / sector_size]) */
69 struct jffs2_eraseblock *nextblock; /* The block we're currently filling */
70
71 struct jffs2_eraseblock *gcblock; /* The block we're currently garbage-collecting */
72
73 struct list_head clean_list; /* Blocks 100% full of clean data */
74 struct list_head very_dirty_list; /* Blocks with lots of dirty space */
75 struct list_head dirty_list; /* Blocks with some dirty space */
76 struct list_head erasable_list; /* Blocks which are completely dirty, and need erasing */
77 struct list_head erasable_pending_wbuf_list; /* Blocks which need erasing but only after the current wbuf is flushed */
78 struct list_head erasing_list; /* Blocks which are currently erasing */
79 struct list_head erase_pending_list; /* Blocks which need erasing now */
80 struct list_head erase_complete_list; /* Blocks which are erased and need the clean marker written to them */
81 struct list_head free_list; /* Blocks which are free and ready to be used */
82 struct list_head bad_list; /* Bad blocks. */
83 struct list_head bad_used_list; /* Bad blocks with valid data in. */
84
85 spinlock_t erase_completion_lock; /* Protect free_list and erasing_list
86 against erase completion handler */
87 wait_queue_head_t erase_wait; /* For waiting for erases to complete */
88
89 wait_queue_head_t inocache_wq;
90 struct jffs2_inode_cache **inocache_list;
91 spinlock_t inocache_lock;
92
93 /* Sem to allow jffs2_garbage_collect_deletion_dirent to
94 drop the erase_completion_lock while it's holding a pointer
95 to an obsoleted node. I don't like this. Alternatives welcomed. */
96 struct semaphore erase_free_sem;
97
98 uint32_t wbuf_pagesize; /* 0 for NOR and other flashes with no wbuf */
99
100#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
101 /* Write-behind buffer for NAND flash */
102 unsigned char *wbuf;
103 unsigned char *oobbuf;
104 uint32_t wbuf_ofs;
105 uint32_t wbuf_len;
106 struct jffs2_inodirty *wbuf_inodes;
107
108 struct rw_semaphore wbuf_sem; /* Protects the write buffer */
109
110 /* Information about out-of-band area usage... */
111 struct nand_ecclayout *ecclayout;
112 uint32_t badblock_pos;
113 uint32_t fsdata_pos;
114 uint32_t fsdata_len;
115#endif
116
117 struct jffs2_summary *summary; /* Summary information */
118
119#ifdef CONFIG_JFFS2_FS_XATTR
120#define XATTRINDEX_HASHSIZE (57)
121 uint32_t highest_xid;
122 uint32_t highest_xseqno;
123 struct list_head xattrindex[XATTRINDEX_HASHSIZE];
124 struct list_head xattr_unchecked;
125 struct list_head xattr_dead_list;
126 struct jffs2_xattr_ref *xref_dead_list;
127 struct jffs2_xattr_ref *xref_temp;
128 struct rw_semaphore xattr_sem;
129 uint32_t xdatum_mem_usage;
130 uint32_t xdatum_mem_threshold;
131#endif
132 /* OS-private pointer for getting back to master superblock info */
133 void *os_priv;
134};
135
136#endif /* _JFFS2_FB_SB */
diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c
index 036cbd11c004..8310c95478e9 100644
--- a/fs/jffs2/malloc.c
+++ b/fs/jffs2/malloc.c
@@ -26,6 +26,10 @@ static kmem_cache_t *tmp_dnode_info_slab;
26static kmem_cache_t *raw_node_ref_slab; 26static kmem_cache_t *raw_node_ref_slab;
27static kmem_cache_t *node_frag_slab; 27static kmem_cache_t *node_frag_slab;
28static kmem_cache_t *inode_cache_slab; 28static kmem_cache_t *inode_cache_slab;
29#ifdef CONFIG_JFFS2_FS_XATTR
30static kmem_cache_t *xattr_datum_cache;
31static kmem_cache_t *xattr_ref_cache;
32#endif
29 33
30int __init jffs2_create_slab_caches(void) 34int __init jffs2_create_slab_caches(void)
31{ 35{
@@ -53,8 +57,8 @@ int __init jffs2_create_slab_caches(void)
53 if (!tmp_dnode_info_slab) 57 if (!tmp_dnode_info_slab)
54 goto err; 58 goto err;
55 59
56 raw_node_ref_slab = kmem_cache_create("jffs2_raw_node_ref", 60 raw_node_ref_slab = kmem_cache_create("jffs2_refblock",
57 sizeof(struct jffs2_raw_node_ref), 61 sizeof(struct jffs2_raw_node_ref) * (REFS_PER_BLOCK + 1),
58 0, 0, NULL, NULL); 62 0, 0, NULL, NULL);
59 if (!raw_node_ref_slab) 63 if (!raw_node_ref_slab)
60 goto err; 64 goto err;
@@ -68,8 +72,24 @@ int __init jffs2_create_slab_caches(void)
68 inode_cache_slab = kmem_cache_create("jffs2_inode_cache", 72 inode_cache_slab = kmem_cache_create("jffs2_inode_cache",
69 sizeof(struct jffs2_inode_cache), 73 sizeof(struct jffs2_inode_cache),
70 0, 0, NULL, NULL); 74 0, 0, NULL, NULL);
71 if (inode_cache_slab) 75 if (!inode_cache_slab)
72 return 0; 76 goto err;
77
78#ifdef CONFIG_JFFS2_FS_XATTR
79 xattr_datum_cache = kmem_cache_create("jffs2_xattr_datum",
80 sizeof(struct jffs2_xattr_datum),
81 0, 0, NULL, NULL);
82 if (!xattr_datum_cache)
83 goto err;
84
85 xattr_ref_cache = kmem_cache_create("jffs2_xattr_ref",
86 sizeof(struct jffs2_xattr_ref),
87 0, 0, NULL, NULL);
88 if (!xattr_ref_cache)
89 goto err;
90#endif
91
92 return 0;
73 err: 93 err:
74 jffs2_destroy_slab_caches(); 94 jffs2_destroy_slab_caches();
75 return -ENOMEM; 95 return -ENOMEM;
@@ -91,6 +111,12 @@ void jffs2_destroy_slab_caches(void)
91 kmem_cache_destroy(node_frag_slab); 111 kmem_cache_destroy(node_frag_slab);
92 if(inode_cache_slab) 112 if(inode_cache_slab)
93 kmem_cache_destroy(inode_cache_slab); 113 kmem_cache_destroy(inode_cache_slab);
114#ifdef CONFIG_JFFS2_FS_XATTR
115 if (xattr_datum_cache)
116 kmem_cache_destroy(xattr_datum_cache);
117 if (xattr_ref_cache)
118 kmem_cache_destroy(xattr_ref_cache);
119#endif
94} 120}
95 121
96struct jffs2_full_dirent *jffs2_alloc_full_dirent(int namesize) 122struct jffs2_full_dirent *jffs2_alloc_full_dirent(int namesize)
@@ -164,15 +190,65 @@ void jffs2_free_tmp_dnode_info(struct jffs2_tmp_dnode_info *x)
164 kmem_cache_free(tmp_dnode_info_slab, x); 190 kmem_cache_free(tmp_dnode_info_slab, x);
165} 191}
166 192
167struct jffs2_raw_node_ref *jffs2_alloc_raw_node_ref(void) 193struct jffs2_raw_node_ref *jffs2_alloc_refblock(void)
168{ 194{
169 struct jffs2_raw_node_ref *ret; 195 struct jffs2_raw_node_ref *ret;
196
170 ret = kmem_cache_alloc(raw_node_ref_slab, GFP_KERNEL); 197 ret = kmem_cache_alloc(raw_node_ref_slab, GFP_KERNEL);
171 dbg_memalloc("%p\n", ret); 198 if (ret) {
199 int i = 0;
200 for (i=0; i < REFS_PER_BLOCK; i++) {
201 ret[i].flash_offset = REF_EMPTY_NODE;
202 ret[i].next_in_ino = NULL;
203 }
204 ret[i].flash_offset = REF_LINK_NODE;
205 ret[i].next_in_ino = NULL;
206 }
172 return ret; 207 return ret;
173} 208}
174 209
175void jffs2_free_raw_node_ref(struct jffs2_raw_node_ref *x) 210int jffs2_prealloc_raw_node_refs(struct jffs2_sb_info *c,
211 struct jffs2_eraseblock *jeb, int nr)
212{
213 struct jffs2_raw_node_ref **p, *ref;
214 int i = nr;
215
216 dbg_memalloc("%d\n", nr);
217
218 p = &jeb->last_node;
219 ref = *p;
220
221 dbg_memalloc("Reserving %d refs for block @0x%08x\n", nr, jeb->offset);
222
223 /* If jeb->last_node is really a valid node then skip over it */
224 if (ref && ref->flash_offset != REF_EMPTY_NODE)
225 ref++;
226
227 while (i) {
228 if (!ref) {
229 dbg_memalloc("Allocating new refblock linked from %p\n", p);
230 ref = *p = jffs2_alloc_refblock();
231 if (!ref)
232 return -ENOMEM;
233 }
234 if (ref->flash_offset == REF_LINK_NODE) {
235 p = &ref->next_in_ino;
236 ref = *p;
237 continue;
238 }
239 i--;
240 ref++;
241 }
242 jeb->allocated_refs = nr;
243
244 dbg_memalloc("Reserved %d refs for block @0x%08x, last_node is %p (%08x,%p)\n",
245 nr, jeb->offset, jeb->last_node, jeb->last_node->flash_offset,
246 jeb->last_node->next_in_ino);
247
248 return 0;
249}
250
251void jffs2_free_refblock(struct jffs2_raw_node_ref *x)
176{ 252{
177 dbg_memalloc("%p\n", x); 253 dbg_memalloc("%p\n", x);
178 kmem_cache_free(raw_node_ref_slab, x); 254 kmem_cache_free(raw_node_ref_slab, x);
@@ -205,3 +281,42 @@ void jffs2_free_inode_cache(struct jffs2_inode_cache *x)
205 dbg_memalloc("%p\n", x); 281 dbg_memalloc("%p\n", x);
206 kmem_cache_free(inode_cache_slab, x); 282 kmem_cache_free(inode_cache_slab, x);
207} 283}
284
285#ifdef CONFIG_JFFS2_FS_XATTR
286struct jffs2_xattr_datum *jffs2_alloc_xattr_datum(void)
287{
288 struct jffs2_xattr_datum *xd;
289 xd = kmem_cache_alloc(xattr_datum_cache, GFP_KERNEL);
290 dbg_memalloc("%p\n", xd);
291
292 memset(xd, 0, sizeof(struct jffs2_xattr_datum));
293 xd->class = RAWNODE_CLASS_XATTR_DATUM;
294 xd->node = (void *)xd;
295 INIT_LIST_HEAD(&xd->xindex);
296 return xd;
297}
298
299void jffs2_free_xattr_datum(struct jffs2_xattr_datum *xd)
300{
301 dbg_memalloc("%p\n", xd);
302 kmem_cache_free(xattr_datum_cache, xd);
303}
304
305struct jffs2_xattr_ref *jffs2_alloc_xattr_ref(void)
306{
307 struct jffs2_xattr_ref *ref;
308 ref = kmem_cache_alloc(xattr_ref_cache, GFP_KERNEL);
309 dbg_memalloc("%p\n", ref);
310
311 memset(ref, 0, sizeof(struct jffs2_xattr_ref));
312 ref->class = RAWNODE_CLASS_XATTR_REF;
313 ref->node = (void *)ref;
314 return ref;
315}
316
317void jffs2_free_xattr_ref(struct jffs2_xattr_ref *ref)
318{
319 dbg_memalloc("%p\n", ref);
320 kmem_cache_free(xattr_ref_cache, ref);
321}
322#endif
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index 1d46677afd17..7675b33396c7 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -438,8 +438,7 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
438 if (c->mtd->point) { 438 if (c->mtd->point) {
439 err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer); 439 err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer);
440 if (!err && retlen < tn->csize) { 440 if (!err && retlen < tn->csize) {
441 JFFS2_WARNING("MTD point returned len too short: %zu " 441 JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize);
442 "instead of %u.\n", retlen, tn->csize);
443 c->mtd->unpoint(c->mtd, buffer, ofs, len); 442 c->mtd->unpoint(c->mtd, buffer, ofs, len);
444 } else if (err) 443 } else if (err)
445 JFFS2_WARNING("MTD point failed: error code %d.\n", err); 444 JFFS2_WARNING("MTD point failed: error code %d.\n", err);
@@ -462,8 +461,7 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
462 } 461 }
463 462
464 if (retlen != len) { 463 if (retlen != len) {
465 JFFS2_ERROR("short read at %#08x: %zd instead of %d.\n", 464 JFFS2_ERROR("short read at %#08x: %zd instead of %d.\n", ofs, retlen, len);
466 ofs, retlen, len);
467 err = -EIO; 465 err = -EIO;
468 goto free_out; 466 goto free_out;
469 } 467 }
@@ -908,6 +906,9 @@ void jffs2_del_ino_cache(struct jffs2_sb_info *c, struct jffs2_inode_cache *old)
908{ 906{
909 struct jffs2_inode_cache **prev; 907 struct jffs2_inode_cache **prev;
910 908
909#ifdef CONFIG_JFFS2_FS_XATTR
910 BUG_ON(old->xref);
911#endif
911 dbg_inocache("del %p (ino #%u)\n", old, old->ino); 912 dbg_inocache("del %p (ino #%u)\n", old, old->ino);
912 spin_lock(&c->inocache_lock); 913 spin_lock(&c->inocache_lock);
913 914
@@ -940,6 +941,7 @@ void jffs2_free_ino_caches(struct jffs2_sb_info *c)
940 this = c->inocache_list[i]; 941 this = c->inocache_list[i];
941 while (this) { 942 while (this) {
942 next = this->next; 943 next = this->next;
944 jffs2_xattr_free_inode(c, this);
943 jffs2_free_inode_cache(this); 945 jffs2_free_inode_cache(this);
944 this = next; 946 this = next;
945 } 947 }
@@ -954,9 +956,13 @@ void jffs2_free_raw_node_refs(struct jffs2_sb_info *c)
954 956
955 for (i=0; i<c->nr_blocks; i++) { 957 for (i=0; i<c->nr_blocks; i++) {
956 this = c->blocks[i].first_node; 958 this = c->blocks[i].first_node;
957 while(this) { 959 while (this) {
958 next = this->next_phys; 960 if (this[REFS_PER_BLOCK].flash_offset == REF_LINK_NODE)
959 jffs2_free_raw_node_ref(this); 961 next = this[REFS_PER_BLOCK].next_in_ino;
962 else
963 next = NULL;
964
965 jffs2_free_refblock(this);
960 this = next; 966 this = next;
961 } 967 }
962 c->blocks[i].first_node = c->blocks[i].last_node = NULL; 968 c->blocks[i].first_node = c->blocks[i].last_node = NULL;
@@ -1047,3 +1053,169 @@ void jffs2_kill_fragtree(struct rb_root *root, struct jffs2_sb_info *c)
1047 cond_resched(); 1053 cond_resched();
1048 } 1054 }
1049} 1055}
1056
1057struct jffs2_raw_node_ref *jffs2_link_node_ref(struct jffs2_sb_info *c,
1058 struct jffs2_eraseblock *jeb,
1059 uint32_t ofs, uint32_t len,
1060 struct jffs2_inode_cache *ic)
1061{
1062 struct jffs2_raw_node_ref *ref;
1063
1064 BUG_ON(!jeb->allocated_refs);
1065 jeb->allocated_refs--;
1066
1067 ref = jeb->last_node;
1068
1069 dbg_noderef("Last node at %p is (%08x,%p)\n", ref, ref->flash_offset,
1070 ref->next_in_ino);
1071
1072 while (ref->flash_offset != REF_EMPTY_NODE) {
1073 if (ref->flash_offset == REF_LINK_NODE)
1074 ref = ref->next_in_ino;
1075 else
1076 ref++;
1077 }
1078
1079 dbg_noderef("New ref is %p (%08x becomes %08x,%p) len 0x%x\n", ref,
1080 ref->flash_offset, ofs, ref->next_in_ino, len);
1081
1082 ref->flash_offset = ofs;
1083
1084 if (!jeb->first_node) {
1085 jeb->first_node = ref;
1086 BUG_ON(ref_offset(ref) != jeb->offset);
1087 } else if (unlikely(ref_offset(ref) != jeb->offset + c->sector_size - jeb->free_size)) {
1088 uint32_t last_len = ref_totlen(c, jeb, jeb->last_node);
1089
1090 JFFS2_ERROR("Adding new ref %p at (0x%08x-0x%08x) not immediately after previous (0x%08x-0x%08x)\n",
1091 ref, ref_offset(ref), ref_offset(ref)+len,
1092 ref_offset(jeb->last_node),
1093 ref_offset(jeb->last_node)+last_len);
1094 BUG();
1095 }
1096 jeb->last_node = ref;
1097
1098 if (ic) {
1099 ref->next_in_ino = ic->nodes;
1100 ic->nodes = ref;
1101 } else {
1102 ref->next_in_ino = NULL;
1103 }
1104
1105 switch(ref_flags(ref)) {
1106 case REF_UNCHECKED:
1107 c->unchecked_size += len;
1108 jeb->unchecked_size += len;
1109 break;
1110
1111 case REF_NORMAL:
1112 case REF_PRISTINE:
1113 c->used_size += len;
1114 jeb->used_size += len;
1115 break;
1116
1117 case REF_OBSOLETE:
1118 c->dirty_size += len;
1119 jeb->dirty_size += len;
1120 break;
1121 }
1122 c->free_size -= len;
1123 jeb->free_size -= len;
1124
1125#ifdef TEST_TOTLEN
1126 /* Set (and test) __totlen field... for now */
1127 ref->__totlen = len;
1128 ref_totlen(c, jeb, ref);
1129#endif
1130 return ref;
1131}
1132
1133/* No locking, no reservation of 'ref'. Do not use on a live file system */
1134int jffs2_scan_dirty_space(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1135 uint32_t size)
1136{
1137 if (!size)
1138 return 0;
1139 if (unlikely(size > jeb->free_size)) {
1140 printk(KERN_CRIT "Dirty space 0x%x larger then free_size 0x%x (wasted 0x%x)\n",
1141 size, jeb->free_size, jeb->wasted_size);
1142 BUG();
1143 }
1144 /* REF_EMPTY_NODE is !obsolete, so that works OK */
1145 if (jeb->last_node && ref_obsolete(jeb->last_node)) {
1146#ifdef TEST_TOTLEN
1147 jeb->last_node->__totlen += size;
1148#endif
1149 c->dirty_size += size;
1150 c->free_size -= size;
1151 jeb->dirty_size += size;
1152 jeb->free_size -= size;
1153 } else {
1154 uint32_t ofs = jeb->offset + c->sector_size - jeb->free_size;
1155 ofs |= REF_OBSOLETE;
1156
1157 jffs2_link_node_ref(c, jeb, ofs, size, NULL);
1158 }
1159
1160 return 0;
1161}
1162
1163/* Calculate totlen from surrounding nodes or eraseblock */
1164static inline uint32_t __ref_totlen(struct jffs2_sb_info *c,
1165 struct jffs2_eraseblock *jeb,
1166 struct jffs2_raw_node_ref *ref)
1167{
1168 uint32_t ref_end;
1169 struct jffs2_raw_node_ref *next_ref = ref_next(ref);
1170
1171 if (next_ref)
1172 ref_end = ref_offset(next_ref);
1173 else {
1174 if (!jeb)
1175 jeb = &c->blocks[ref->flash_offset / c->sector_size];
1176
1177 /* Last node in block. Use free_space */
1178 if (unlikely(ref != jeb->last_node)) {
1179 printk(KERN_CRIT "ref %p @0x%08x is not jeb->last_node (%p @0x%08x)\n",
1180 ref, ref_offset(ref), jeb->last_node, jeb->last_node?ref_offset(jeb->last_node):0);
1181 BUG();
1182 }
1183 ref_end = jeb->offset + c->sector_size - jeb->free_size;
1184 }
1185 return ref_end - ref_offset(ref);
1186}
1187
1188uint32_t __jffs2_ref_totlen(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1189 struct jffs2_raw_node_ref *ref)
1190{
1191 uint32_t ret;
1192
1193 ret = __ref_totlen(c, jeb, ref);
1194
1195#ifdef TEST_TOTLEN
1196 if (unlikely(ret != ref->__totlen)) {
1197 if (!jeb)
1198 jeb = &c->blocks[ref->flash_offset / c->sector_size];
1199
1200 printk(KERN_CRIT "Totlen for ref at %p (0x%08x-0x%08x) miscalculated as 0x%x instead of %x\n",
1201 ref, ref_offset(ref), ref_offset(ref)+ref->__totlen,
1202 ret, ref->__totlen);
1203 if (ref_next(ref)) {
1204 printk(KERN_CRIT "next %p (0x%08x-0x%08x)\n", ref_next(ref), ref_offset(ref_next(ref)),
1205 ref_offset(ref_next(ref))+ref->__totlen);
1206 } else
1207 printk(KERN_CRIT "No next ref. jeb->last_node is %p\n", jeb->last_node);
1208
1209 printk(KERN_CRIT "jeb->wasted_size %x, dirty_size %x, used_size %x, free_size %x\n", jeb->wasted_size, jeb->dirty_size, jeb->used_size, jeb->free_size);
1210
1211#if defined(JFFS2_DBG_DUMPS) || defined(JFFS2_DBG_PARANOIA_CHECKS)
1212 __jffs2_dbg_dump_node_refs_nolock(c, jeb);
1213#endif
1214
1215 WARN_ON(1);
1216
1217 ret = ref->__totlen;
1218 }
1219#endif /* TEST_TOTLEN */
1220 return ret;
1221}
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 23a67bb3052f..f752baa8d399 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -14,12 +14,13 @@
14#ifndef __JFFS2_NODELIST_H__ 14#ifndef __JFFS2_NODELIST_H__
15#define __JFFS2_NODELIST_H__ 15#define __JFFS2_NODELIST_H__
16 16
17#include <linux/config.h>
18#include <linux/fs.h> 17#include <linux/fs.h>
19#include <linux/types.h> 18#include <linux/types.h>
20#include <linux/jffs2.h> 19#include <linux/jffs2.h>
21#include <linux/jffs2_fs_sb.h> 20#include "jffs2_fs_sb.h"
22#include <linux/jffs2_fs_i.h> 21#include "jffs2_fs_i.h"
22#include "xattr.h"
23#include "acl.h"
23#include "summary.h" 24#include "summary.h"
24 25
25#ifdef __ECOS 26#ifdef __ECOS
@@ -75,14 +76,50 @@
75struct jffs2_raw_node_ref 76struct jffs2_raw_node_ref
76{ 77{
77 struct jffs2_raw_node_ref *next_in_ino; /* Points to the next raw_node_ref 78 struct jffs2_raw_node_ref *next_in_ino; /* Points to the next raw_node_ref
78 for this inode. If this is the last, it points to the inode_cache 79 for this object. If this _is_ the last, it points to the inode_cache,
79 for this inode instead. The inode_cache will have NULL in the first 80 xattr_ref or xattr_datum instead. The common part of those structures
80 word so you know when you've got there :) */ 81 has NULL in the first word. See jffs2_raw_ref_to_ic() below */
81 struct jffs2_raw_node_ref *next_phys;
82 uint32_t flash_offset; 82 uint32_t flash_offset;
83#define TEST_TOTLEN
84#ifdef TEST_TOTLEN
83 uint32_t __totlen; /* This may die; use ref_totlen(c, jeb, ) below */ 85 uint32_t __totlen; /* This may die; use ref_totlen(c, jeb, ) below */
86#endif
84}; 87};
85 88
89#define REF_LINK_NODE ((int32_t)-1)
90#define REF_EMPTY_NODE ((int32_t)-2)
91
92/* Use blocks of about 256 bytes */
93#define REFS_PER_BLOCK ((255/sizeof(struct jffs2_raw_node_ref))-1)
94
95static inline struct jffs2_raw_node_ref *ref_next(struct jffs2_raw_node_ref *ref)
96{
97 ref++;
98
99 /* Link to another block of refs */
100 if (ref->flash_offset == REF_LINK_NODE) {
101 ref = ref->next_in_ino;
102 if (!ref)
103 return ref;
104 }
105
106 /* End of chain */
107 if (ref->flash_offset == REF_EMPTY_NODE)
108 return NULL;
109
110 return ref;
111}
112
113static inline struct jffs2_inode_cache *jffs2_raw_ref_to_ic(struct jffs2_raw_node_ref *raw)
114{
115 while(raw->next_in_ino)
116 raw = raw->next_in_ino;
117
118 /* NB. This can be a jffs2_xattr_datum or jffs2_xattr_ref and
119 not actually a jffs2_inode_cache. Check ->class */
120 return ((struct jffs2_inode_cache *)raw);
121}
122
86 /* flash_offset & 3 always has to be zero, because nodes are 123 /* flash_offset & 3 always has to be zero, because nodes are
87 always aligned at 4 bytes. So we have a couple of extra bits 124 always aligned at 4 bytes. So we have a couple of extra bits
88 to play with, which indicate the node's status; see below: */ 125 to play with, which indicate the node's status; see below: */
@@ -95,6 +132,11 @@ struct jffs2_raw_node_ref
95#define ref_obsolete(ref) (((ref)->flash_offset & 3) == REF_OBSOLETE) 132#define ref_obsolete(ref) (((ref)->flash_offset & 3) == REF_OBSOLETE)
96#define mark_ref_normal(ref) do { (ref)->flash_offset = ref_offset(ref) | REF_NORMAL; } while(0) 133#define mark_ref_normal(ref) do { (ref)->flash_offset = ref_offset(ref) | REF_NORMAL; } while(0)
97 134
135/* NB: REF_PRISTINE for an inode-less node (ref->next_in_ino == NULL) indicates
136 it is an unknown node of type JFFS2_NODETYPE_RWCOMPAT_COPY, so it'll get
137 copied. If you need to do anything different to GC inode-less nodes, then
138 you need to modify gc.c accordingly. */
139
98/* For each inode in the filesystem, we need to keep a record of 140/* For each inode in the filesystem, we need to keep a record of
99 nlink, because it would be a PITA to scan the whole directory tree 141 nlink, because it would be a PITA to scan the whole directory tree
100 at read_inode() time to calculate it, and to keep sufficient information 142 at read_inode() time to calculate it, and to keep sufficient information
@@ -103,15 +145,27 @@ struct jffs2_raw_node_ref
103 a pointer to the first physical node which is part of this inode, too. 145 a pointer to the first physical node which is part of this inode, too.
104*/ 146*/
105struct jffs2_inode_cache { 147struct jffs2_inode_cache {
148 /* First part of structure is shared with other objects which
149 can terminate the raw node refs' next_in_ino list -- which
150 currently struct jffs2_xattr_datum and struct jffs2_xattr_ref. */
151
106 struct jffs2_full_dirent *scan_dents; /* Used during scan to hold 152 struct jffs2_full_dirent *scan_dents; /* Used during scan to hold
107 temporary lists of dirents, and later must be set to 153 temporary lists of dirents, and later must be set to
108 NULL to mark the end of the raw_node_ref->next_in_ino 154 NULL to mark the end of the raw_node_ref->next_in_ino
109 chain. */ 155 chain. */
110 struct jffs2_inode_cache *next;
111 struct jffs2_raw_node_ref *nodes; 156 struct jffs2_raw_node_ref *nodes;
157 uint8_t class; /* It's used for identification */
158
159 /* end of shared structure */
160
161 uint8_t flags;
162 uint16_t state;
112 uint32_t ino; 163 uint32_t ino;
164 struct jffs2_inode_cache *next;
165#ifdef CONFIG_JFFS2_FS_XATTR
166 struct jffs2_xattr_ref *xref;
167#endif
113 int nlink; 168 int nlink;
114 int state;
115}; 169};
116 170
117/* Inode states for 'state' above. We need the 'GC' state to prevent 171/* Inode states for 'state' above. We need the 'GC' state to prevent
@@ -125,8 +179,16 @@ struct jffs2_inode_cache {
125#define INO_STATE_READING 5 /* In read_inode() */ 179#define INO_STATE_READING 5 /* In read_inode() */
126#define INO_STATE_CLEARING 6 /* In clear_inode() */ 180#define INO_STATE_CLEARING 6 /* In clear_inode() */
127 181
182#define INO_FLAGS_XATTR_CHECKED 0x01 /* has no duplicate xattr_ref */
183
184#define RAWNODE_CLASS_INODE_CACHE 0
185#define RAWNODE_CLASS_XATTR_DATUM 1
186#define RAWNODE_CLASS_XATTR_REF 2
187
128#define INOCACHE_HASHSIZE 128 188#define INOCACHE_HASHSIZE 128
129 189
190#define write_ofs(c) ((c)->nextblock->offset + (c)->sector_size - (c)->nextblock->free_size)
191
130/* 192/*
131 Larger representation of a raw node, kept in-core only when the 193 Larger representation of a raw node, kept in-core only when the
132 struct inode for this particular ino is instantiated. 194 struct inode for this particular ino is instantiated.
@@ -192,6 +254,7 @@ struct jffs2_eraseblock
192 uint32_t wasted_size; 254 uint32_t wasted_size;
193 uint32_t free_size; /* Note that sector_size - free_size 255 uint32_t free_size; /* Note that sector_size - free_size
194 is the address of the first free space */ 256 is the address of the first free space */
257 uint32_t allocated_refs;
195 struct jffs2_raw_node_ref *first_node; 258 struct jffs2_raw_node_ref *first_node;
196 struct jffs2_raw_node_ref *last_node; 259 struct jffs2_raw_node_ref *last_node;
197 260
@@ -203,57 +266,7 @@ static inline int jffs2_blocks_use_vmalloc(struct jffs2_sb_info *c)
203 return ((c->flash_size / c->sector_size) * sizeof (struct jffs2_eraseblock)) > (128 * 1024); 266 return ((c->flash_size / c->sector_size) * sizeof (struct jffs2_eraseblock)) > (128 * 1024);
204} 267}
205 268
206/* Calculate totlen from surrounding nodes or eraseblock */ 269#define ref_totlen(a, b, c) __jffs2_ref_totlen((a), (b), (c))
207static inline uint32_t __ref_totlen(struct jffs2_sb_info *c,
208 struct jffs2_eraseblock *jeb,
209 struct jffs2_raw_node_ref *ref)
210{
211 uint32_t ref_end;
212
213 if (ref->next_phys)
214 ref_end = ref_offset(ref->next_phys);
215 else {
216 if (!jeb)
217 jeb = &c->blocks[ref->flash_offset / c->sector_size];
218
219 /* Last node in block. Use free_space */
220 BUG_ON(ref != jeb->last_node);
221 ref_end = jeb->offset + c->sector_size - jeb->free_size;
222 }
223 return ref_end - ref_offset(ref);
224}
225
226static inline uint32_t ref_totlen(struct jffs2_sb_info *c,
227 struct jffs2_eraseblock *jeb,
228 struct jffs2_raw_node_ref *ref)
229{
230 uint32_t ret;
231
232#if CONFIG_JFFS2_FS_DEBUG > 0
233 if (jeb && jeb != &c->blocks[ref->flash_offset / c->sector_size]) {
234 printk(KERN_CRIT "ref_totlen called with wrong block -- at 0x%08x instead of 0x%08x; ref 0x%08x\n",
235 jeb->offset, c->blocks[ref->flash_offset / c->sector_size].offset, ref_offset(ref));
236 BUG();
237 }
238#endif
239
240#if 1
241 ret = ref->__totlen;
242#else
243 /* This doesn't actually work yet */
244 ret = __ref_totlen(c, jeb, ref);
245 if (ret != ref->__totlen) {
246 printk(KERN_CRIT "Totlen for ref at %p (0x%08x-0x%08x) miscalculated as 0x%x instead of %x\n",
247 ref, ref_offset(ref), ref_offset(ref)+ref->__totlen,
248 ret, ref->__totlen);
249 if (!jeb)
250 jeb = &c->blocks[ref->flash_offset / c->sector_size];
251 jffs2_dbg_dump_node_refs_nolock(c, jeb);
252 BUG();
253 }
254#endif
255 return ret;
256}
257 270
258#define ALLOC_NORMAL 0 /* Normal allocation */ 271#define ALLOC_NORMAL 0 /* Normal allocation */
259#define ALLOC_DELETION 1 /* Deletion node. Best to allow it */ 272#define ALLOC_DELETION 1 /* Deletion node. Best to allow it */
@@ -268,13 +281,15 @@ static inline uint32_t ref_totlen(struct jffs2_sb_info *c,
268 281
269#define PAD(x) (((x)+3)&~3) 282#define PAD(x) (((x)+3)&~3)
270 283
271static inline struct jffs2_inode_cache *jffs2_raw_ref_to_ic(struct jffs2_raw_node_ref *raw) 284static inline int jffs2_encode_dev(union jffs2_device_node *jdev, dev_t rdev)
272{ 285{
273 while(raw->next_in_ino) { 286 if (old_valid_dev(rdev)) {
274 raw = raw->next_in_ino; 287 jdev->old = cpu_to_je16(old_encode_dev(rdev));
288 return sizeof(jdev->old);
289 } else {
290 jdev->new = cpu_to_je32(new_encode_dev(rdev));
291 return sizeof(jdev->new);
275 } 292 }
276
277 return ((struct jffs2_inode_cache *)raw);
278} 293}
279 294
280static inline struct jffs2_node_frag *frag_first(struct rb_root *root) 295static inline struct jffs2_node_frag *frag_first(struct rb_root *root)
@@ -299,7 +314,6 @@ static inline struct jffs2_node_frag *frag_last(struct rb_root *root)
299 return rb_entry(node, struct jffs2_node_frag, rb); 314 return rb_entry(node, struct jffs2_node_frag, rb);
300} 315}
301 316
302#define rb_parent(rb) ((rb)->rb_parent)
303#define frag_next(frag) rb_entry(rb_next(&(frag)->rb), struct jffs2_node_frag, rb) 317#define frag_next(frag) rb_entry(rb_next(&(frag)->rb), struct jffs2_node_frag, rb)
304#define frag_prev(frag) rb_entry(rb_prev(&(frag)->rb), struct jffs2_node_frag, rb) 318#define frag_prev(frag) rb_entry(rb_prev(&(frag)->rb), struct jffs2_node_frag, rb)
305#define frag_parent(frag) rb_entry(rb_parent(&(frag)->rb), struct jffs2_node_frag, rb) 319#define frag_parent(frag) rb_entry(rb_parent(&(frag)->rb), struct jffs2_node_frag, rb)
@@ -324,28 +338,44 @@ void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, struct jffs2_node_frag *t
324int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_full_dnode *fn); 338int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_full_dnode *fn);
325void jffs2_truncate_fragtree (struct jffs2_sb_info *c, struct rb_root *list, uint32_t size); 339void jffs2_truncate_fragtree (struct jffs2_sb_info *c, struct rb_root *list, uint32_t size);
326int jffs2_add_older_frag_to_fragtree(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn); 340int jffs2_add_older_frag_to_fragtree(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn);
341struct jffs2_raw_node_ref *jffs2_link_node_ref(struct jffs2_sb_info *c,
342 struct jffs2_eraseblock *jeb,
343 uint32_t ofs, uint32_t len,
344 struct jffs2_inode_cache *ic);
345extern uint32_t __jffs2_ref_totlen(struct jffs2_sb_info *c,
346 struct jffs2_eraseblock *jeb,
347 struct jffs2_raw_node_ref *ref);
327 348
328/* nodemgmt.c */ 349/* nodemgmt.c */
329int jffs2_thread_should_wake(struct jffs2_sb_info *c); 350int jffs2_thread_should_wake(struct jffs2_sb_info *c);
330int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uint32_t *ofs, 351int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
331 uint32_t *len, int prio, uint32_t sumsize); 352 uint32_t *len, int prio, uint32_t sumsize);
332int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize, uint32_t *ofs, 353int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize,
333 uint32_t *len, uint32_t sumsize); 354 uint32_t *len, uint32_t sumsize);
334int jffs2_add_physical_node_ref(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *new); 355struct jffs2_raw_node_ref *jffs2_add_physical_node_ref(struct jffs2_sb_info *c,
356 uint32_t ofs, uint32_t len,
357 struct jffs2_inode_cache *ic);
335void jffs2_complete_reservation(struct jffs2_sb_info *c); 358void jffs2_complete_reservation(struct jffs2_sb_info *c);
336void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *raw); 359void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *raw);
337 360
338/* write.c */ 361/* write.c */
339int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint32_t mode, struct jffs2_raw_inode *ri); 362int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint32_t mode, struct jffs2_raw_inode *ri);
340 363
341struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_raw_inode *ri, const unsigned char *data, uint32_t datalen, uint32_t flash_ofs, int alloc_mode); 364struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
342struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_raw_dirent *rd, const unsigned char *name, uint32_t namelen, uint32_t flash_ofs, int alloc_mode); 365 struct jffs2_raw_inode *ri, const unsigned char *data,
366 uint32_t datalen, int alloc_mode);
367struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
368 struct jffs2_raw_dirent *rd, const unsigned char *name,
369 uint32_t namelen, int alloc_mode);
343int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f, 370int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
344 struct jffs2_raw_inode *ri, unsigned char *buf, 371 struct jffs2_raw_inode *ri, unsigned char *buf,
345 uint32_t offset, uint32_t writelen, uint32_t *retlen); 372 uint32_t offset, uint32_t writelen, uint32_t *retlen);
346int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, struct jffs2_inode_info *f, struct jffs2_raw_inode *ri, const char *name, int namelen); 373int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, struct jffs2_inode_info *f,
347int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, const char *name, int namelen, struct jffs2_inode_info *dead_f, uint32_t time); 374 struct jffs2_raw_inode *ri, const char *name, int namelen);
348int jffs2_do_link (struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, uint32_t ino, uint8_t type, const char *name, int namelen, uint32_t time); 375int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, const char *name,
376 int namelen, struct jffs2_inode_info *dead_f, uint32_t time);
377int jffs2_do_link(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, uint32_t ino,
378 uint8_t type, const char *name, int namelen, uint32_t time);
349 379
350 380
351/* readinode.c */ 381/* readinode.c */
@@ -368,12 +398,19 @@ struct jffs2_raw_inode *jffs2_alloc_raw_inode(void);
368void jffs2_free_raw_inode(struct jffs2_raw_inode *); 398void jffs2_free_raw_inode(struct jffs2_raw_inode *);
369struct jffs2_tmp_dnode_info *jffs2_alloc_tmp_dnode_info(void); 399struct jffs2_tmp_dnode_info *jffs2_alloc_tmp_dnode_info(void);
370void jffs2_free_tmp_dnode_info(struct jffs2_tmp_dnode_info *); 400void jffs2_free_tmp_dnode_info(struct jffs2_tmp_dnode_info *);
371struct jffs2_raw_node_ref *jffs2_alloc_raw_node_ref(void); 401int jffs2_prealloc_raw_node_refs(struct jffs2_sb_info *c,
372void jffs2_free_raw_node_ref(struct jffs2_raw_node_ref *); 402 struct jffs2_eraseblock *jeb, int nr);
403void jffs2_free_refblock(struct jffs2_raw_node_ref *);
373struct jffs2_node_frag *jffs2_alloc_node_frag(void); 404struct jffs2_node_frag *jffs2_alloc_node_frag(void);
374void jffs2_free_node_frag(struct jffs2_node_frag *); 405void jffs2_free_node_frag(struct jffs2_node_frag *);
375struct jffs2_inode_cache *jffs2_alloc_inode_cache(void); 406struct jffs2_inode_cache *jffs2_alloc_inode_cache(void);
376void jffs2_free_inode_cache(struct jffs2_inode_cache *); 407void jffs2_free_inode_cache(struct jffs2_inode_cache *);
408#ifdef CONFIG_JFFS2_FS_XATTR
409struct jffs2_xattr_datum *jffs2_alloc_xattr_datum(void);
410void jffs2_free_xattr_datum(struct jffs2_xattr_datum *);
411struct jffs2_xattr_ref *jffs2_alloc_xattr_ref(void);
412void jffs2_free_xattr_ref(struct jffs2_xattr_ref *);
413#endif
377 414
378/* gc.c */ 415/* gc.c */
379int jffs2_garbage_collect_pass(struct jffs2_sb_info *c); 416int jffs2_garbage_collect_pass(struct jffs2_sb_info *c);
@@ -393,12 +430,14 @@ int jffs2_fill_scan_buf(struct jffs2_sb_info *c, void *buf,
393 uint32_t ofs, uint32_t len); 430 uint32_t ofs, uint32_t len);
394struct jffs2_inode_cache *jffs2_scan_make_ino_cache(struct jffs2_sb_info *c, uint32_t ino); 431struct jffs2_inode_cache *jffs2_scan_make_ino_cache(struct jffs2_sb_info *c, uint32_t ino);
395int jffs2_scan_classify_jeb(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb); 432int jffs2_scan_classify_jeb(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb);
433int jffs2_scan_dirty_space(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t size);
396 434
397/* build.c */ 435/* build.c */
398int jffs2_do_mount_fs(struct jffs2_sb_info *c); 436int jffs2_do_mount_fs(struct jffs2_sb_info *c);
399 437
400/* erase.c */ 438/* erase.c */
401void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count); 439void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count);
440void jffs2_free_jeb_node_refs(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb);
402 441
403#ifdef CONFIG_JFFS2_FS_WRITEBUFFER 442#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
404/* wbuf.c */ 443/* wbuf.c */
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index 49127a1f0458..d88376992ed9 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -23,13 +23,12 @@
23 * jffs2_reserve_space - request physical space to write nodes to flash 23 * jffs2_reserve_space - request physical space to write nodes to flash
24 * @c: superblock info 24 * @c: superblock info
25 * @minsize: Minimum acceptable size of allocation 25 * @minsize: Minimum acceptable size of allocation
26 * @ofs: Returned value of node offset
27 * @len: Returned value of allocation length 26 * @len: Returned value of allocation length
28 * @prio: Allocation type - ALLOC_{NORMAL,DELETION} 27 * @prio: Allocation type - ALLOC_{NORMAL,DELETION}
29 * 28 *
30 * Requests a block of physical space on the flash. Returns zero for success 29 * Requests a block of physical space on the flash. Returns zero for success
31 * and puts 'ofs' and 'len' into the appriopriate place, or returns -ENOSPC 30 * and puts 'len' into the appropriate place, or returns -ENOSPC or other
32 * or other error if appropriate. 31 * error if appropriate. Doesn't return len since that's
33 * 32 *
34 * If it returns zero, jffs2_reserve_space() also downs the per-filesystem 33 * If it returns zero, jffs2_reserve_space() also downs the per-filesystem
35 * allocation semaphore, to prevent more than one allocation from being 34 * allocation semaphore, to prevent more than one allocation from being
@@ -40,9 +39,9 @@
40 */ 39 */
41 40
42static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, 41static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
43 uint32_t *ofs, uint32_t *len, uint32_t sumsize); 42 uint32_t *len, uint32_t sumsize);
44 43
45int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uint32_t *ofs, 44int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
46 uint32_t *len, int prio, uint32_t sumsize) 45 uint32_t *len, int prio, uint32_t sumsize)
47{ 46{
48 int ret = -EAGAIN; 47 int ret = -EAGAIN;
@@ -132,19 +131,21 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uint32_t *ofs
132 spin_lock(&c->erase_completion_lock); 131 spin_lock(&c->erase_completion_lock);
133 } 132 }
134 133
135 ret = jffs2_do_reserve_space(c, minsize, ofs, len, sumsize); 134 ret = jffs2_do_reserve_space(c, minsize, len, sumsize);
136 if (ret) { 135 if (ret) {
137 D1(printk(KERN_DEBUG "jffs2_reserve_space: ret is %d\n", ret)); 136 D1(printk(KERN_DEBUG "jffs2_reserve_space: ret is %d\n", ret));
138 } 137 }
139 } 138 }
140 spin_unlock(&c->erase_completion_lock); 139 spin_unlock(&c->erase_completion_lock);
140 if (!ret)
141 ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1);
141 if (ret) 142 if (ret)
142 up(&c->alloc_sem); 143 up(&c->alloc_sem);
143 return ret; 144 return ret;
144} 145}
145 146
146int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize, uint32_t *ofs, 147int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize,
147 uint32_t *len, uint32_t sumsize) 148 uint32_t *len, uint32_t sumsize)
148{ 149{
149 int ret = -EAGAIN; 150 int ret = -EAGAIN;
150 minsize = PAD(minsize); 151 minsize = PAD(minsize);
@@ -153,12 +154,15 @@ int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize, uint32_t *
153 154
154 spin_lock(&c->erase_completion_lock); 155 spin_lock(&c->erase_completion_lock);
155 while(ret == -EAGAIN) { 156 while(ret == -EAGAIN) {
156 ret = jffs2_do_reserve_space(c, minsize, ofs, len, sumsize); 157 ret = jffs2_do_reserve_space(c, minsize, len, sumsize);
157 if (ret) { 158 if (ret) {
158 D1(printk(KERN_DEBUG "jffs2_reserve_space_gc: looping, ret is %d\n", ret)); 159 D1(printk(KERN_DEBUG "jffs2_reserve_space_gc: looping, ret is %d\n", ret));
159 } 160 }
160 } 161 }
161 spin_unlock(&c->erase_completion_lock); 162 spin_unlock(&c->erase_completion_lock);
163 if (!ret)
164 ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1);
165
162 return ret; 166 return ret;
163} 167}
164 168
@@ -207,8 +211,7 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c)
207 struct jffs2_eraseblock *ejeb; 211 struct jffs2_eraseblock *ejeb;
208 212
209 ejeb = list_entry(c->erasable_list.next, struct jffs2_eraseblock, list); 213 ejeb = list_entry(c->erasable_list.next, struct jffs2_eraseblock, list);
210 list_del(&ejeb->list); 214 list_move_tail(&ejeb->list, &c->erase_pending_list);
211 list_add_tail(&ejeb->list, &c->erase_pending_list);
212 c->nr_erasing_blocks++; 215 c->nr_erasing_blocks++;
213 jffs2_erase_pending_trigger(c); 216 jffs2_erase_pending_trigger(c);
214 D1(printk(KERN_DEBUG "jffs2_find_nextblock: Triggering erase of erasable block at 0x%08x\n", 217 D1(printk(KERN_DEBUG "jffs2_find_nextblock: Triggering erase of erasable block at 0x%08x\n",
@@ -259,10 +262,11 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c)
259} 262}
260 263
261/* Called with alloc sem _and_ erase_completion_lock */ 264/* Called with alloc sem _and_ erase_completion_lock */
262static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uint32_t *ofs, uint32_t *len, uint32_t sumsize) 265static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
266 uint32_t *len, uint32_t sumsize)
263{ 267{
264 struct jffs2_eraseblock *jeb = c->nextblock; 268 struct jffs2_eraseblock *jeb = c->nextblock;
265 uint32_t reserved_size; /* for summary information at the end of the jeb */ 269 uint32_t reserved_size; /* for summary information at the end of the jeb */
266 int ret; 270 int ret;
267 271
268 restart: 272 restart:
@@ -312,6 +316,8 @@ static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uin
312 } 316 }
313 } else { 317 } else {
314 if (jeb && minsize > jeb->free_size) { 318 if (jeb && minsize > jeb->free_size) {
319 uint32_t waste;
320
315 /* Skip the end of this block and file it as having some dirty space */ 321 /* Skip the end of this block and file it as having some dirty space */
316 /* If there's a pending write to it, flush now */ 322 /* If there's a pending write to it, flush now */
317 323
@@ -324,10 +330,26 @@ static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uin
324 goto restart; 330 goto restart;
325 } 331 }
326 332
327 c->wasted_size += jeb->free_size; 333 spin_unlock(&c->erase_completion_lock);
328 c->free_size -= jeb->free_size; 334
329 jeb->wasted_size += jeb->free_size; 335 ret = jffs2_prealloc_raw_node_refs(c, jeb, 1);
330 jeb->free_size = 0; 336 if (ret)
337 return ret;
338 /* Just lock it again and continue. Nothing much can change because
339 we hold c->alloc_sem anyway. In fact, it's not entirely clear why
340 we hold c->erase_completion_lock in the majority of this function...
341 but that's a question for another (more caffeine-rich) day. */
342 spin_lock(&c->erase_completion_lock);
343
344 waste = jeb->free_size;
345 jffs2_link_node_ref(c, jeb,
346 (jeb->offset + c->sector_size - waste) | REF_OBSOLETE,
347 waste, NULL);
348 /* FIXME: that made it count as dirty. Convert to wasted */
349 jeb->dirty_size -= waste;
350 c->dirty_size -= waste;
351 jeb->wasted_size += waste;
352 c->wasted_size += waste;
331 353
332 jffs2_close_nextblock(c, jeb); 354 jffs2_close_nextblock(c, jeb);
333 jeb = NULL; 355 jeb = NULL;
@@ -349,7 +371,6 @@ static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uin
349 } 371 }
350 /* OK, jeb (==c->nextblock) is now pointing at a block which definitely has 372 /* OK, jeb (==c->nextblock) is now pointing at a block which definitely has
351 enough space */ 373 enough space */
352 *ofs = jeb->offset + (c->sector_size - jeb->free_size);
353 *len = jeb->free_size - reserved_size; 374 *len = jeb->free_size - reserved_size;
354 375
355 if (c->cleanmarker_size && jeb->used_size == c->cleanmarker_size && 376 if (c->cleanmarker_size && jeb->used_size == c->cleanmarker_size &&
@@ -365,7 +386,8 @@ static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uin
365 spin_lock(&c->erase_completion_lock); 386 spin_lock(&c->erase_completion_lock);
366 } 387 }
367 388
368 D1(printk(KERN_DEBUG "jffs2_do_reserve_space(): Giving 0x%x bytes at 0x%x\n", *len, *ofs)); 389 D1(printk(KERN_DEBUG "jffs2_do_reserve_space(): Giving 0x%x bytes at 0x%x\n",
390 *len, jeb->offset + (c->sector_size - jeb->free_size)));
369 return 0; 391 return 0;
370} 392}
371 393
@@ -374,7 +396,6 @@ static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uin
374 * @c: superblock info 396 * @c: superblock info
375 * @new: new node reference to add 397 * @new: new node reference to add
376 * @len: length of this physical node 398 * @len: length of this physical node
377 * @dirty: dirty flag for new node
378 * 399 *
379 * Should only be used to report nodes for which space has been allocated 400 * Should only be used to report nodes for which space has been allocated
380 * by jffs2_reserve_space. 401 * by jffs2_reserve_space.
@@ -382,42 +403,30 @@ static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uin
382 * Must be called with the alloc_sem held. 403 * Must be called with the alloc_sem held.
383 */ 404 */
384 405
385int jffs2_add_physical_node_ref(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *new) 406struct jffs2_raw_node_ref *jffs2_add_physical_node_ref(struct jffs2_sb_info *c,
407 uint32_t ofs, uint32_t len,
408 struct jffs2_inode_cache *ic)
386{ 409{
387 struct jffs2_eraseblock *jeb; 410 struct jffs2_eraseblock *jeb;
388 uint32_t len; 411 struct jffs2_raw_node_ref *new;
389 412
390 jeb = &c->blocks[new->flash_offset / c->sector_size]; 413 jeb = &c->blocks[ofs / c->sector_size];
391 len = ref_totlen(c, jeb, new);
392 414
393 D1(printk(KERN_DEBUG "jffs2_add_physical_node_ref(): Node at 0x%x(%d), size 0x%x\n", ref_offset(new), ref_flags(new), len)); 415 D1(printk(KERN_DEBUG "jffs2_add_physical_node_ref(): Node at 0x%x(%d), size 0x%x\n",
416 ofs & ~3, ofs & 3, len));
394#if 1 417#if 1
395 /* we could get some obsolete nodes after nextblock was refiled 418 /* Allow non-obsolete nodes only to be added at the end of c->nextblock,
396 in wbuf.c */ 419 if c->nextblock is set. Note that wbuf.c will file obsolete nodes
397 if ((c->nextblock || !ref_obsolete(new)) 420 even after refiling c->nextblock */
398 &&(jeb != c->nextblock || ref_offset(new) != jeb->offset + (c->sector_size - jeb->free_size))) { 421 if ((c->nextblock || ((ofs & 3) != REF_OBSOLETE))
422 && (jeb != c->nextblock || (ofs & ~3) != jeb->offset + (c->sector_size - jeb->free_size))) {
399 printk(KERN_WARNING "argh. node added in wrong place\n"); 423 printk(KERN_WARNING "argh. node added in wrong place\n");
400 jffs2_free_raw_node_ref(new); 424 return ERR_PTR(-EINVAL);
401 return -EINVAL;
402 } 425 }
403#endif 426#endif
404 spin_lock(&c->erase_completion_lock); 427 spin_lock(&c->erase_completion_lock);
405 428
406 if (!jeb->first_node) 429 new = jffs2_link_node_ref(c, jeb, ofs, len, ic);
407 jeb->first_node = new;
408 if (jeb->last_node)
409 jeb->last_node->next_phys = new;
410 jeb->last_node = new;
411
412 jeb->free_size -= len;
413 c->free_size -= len;
414 if (ref_obsolete(new)) {
415 jeb->dirty_size += len;
416 c->dirty_size += len;
417 } else {
418 jeb->used_size += len;
419 c->used_size += len;
420 }
421 430
422 if (!jeb->free_size && !jeb->dirty_size && !ISDIRTY(jeb->wasted_size)) { 431 if (!jeb->free_size && !jeb->dirty_size && !ISDIRTY(jeb->wasted_size)) {
423 /* If it lives on the dirty_list, jffs2_reserve_space will put it there */ 432 /* If it lives on the dirty_list, jffs2_reserve_space will put it there */
@@ -438,7 +447,7 @@ int jffs2_add_physical_node_ref(struct jffs2_sb_info *c, struct jffs2_raw_node_r
438 447
439 spin_unlock(&c->erase_completion_lock); 448 spin_unlock(&c->erase_completion_lock);
440 449
441 return 0; 450 return new;
442} 451}
443 452
444 453
@@ -470,8 +479,9 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
470 struct jffs2_unknown_node n; 479 struct jffs2_unknown_node n;
471 int ret, addedsize; 480 int ret, addedsize;
472 size_t retlen; 481 size_t retlen;
482 uint32_t freed_len;
473 483
474 if(!ref) { 484 if(unlikely(!ref)) {
475 printk(KERN_NOTICE "EEEEEK. jffs2_mark_node_obsolete called with NULL node\n"); 485 printk(KERN_NOTICE "EEEEEK. jffs2_mark_node_obsolete called with NULL node\n");
476 return; 486 return;
477 } 487 }
@@ -499,32 +509,34 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
499 509
500 spin_lock(&c->erase_completion_lock); 510 spin_lock(&c->erase_completion_lock);
501 511
512 freed_len = ref_totlen(c, jeb, ref);
513
502 if (ref_flags(ref) == REF_UNCHECKED) { 514 if (ref_flags(ref) == REF_UNCHECKED) {
503 D1(if (unlikely(jeb->unchecked_size < ref_totlen(c, jeb, ref))) { 515 D1(if (unlikely(jeb->unchecked_size < freed_len)) {
504 printk(KERN_NOTICE "raw unchecked node of size 0x%08x freed from erase block %d at 0x%08x, but unchecked_size was already 0x%08x\n", 516 printk(KERN_NOTICE "raw unchecked node of size 0x%08x freed from erase block %d at 0x%08x, but unchecked_size was already 0x%08x\n",
505 ref_totlen(c, jeb, ref), blocknr, ref->flash_offset, jeb->used_size); 517 freed_len, blocknr, ref->flash_offset, jeb->used_size);
506 BUG(); 518 BUG();
507 }) 519 })
508 D1(printk(KERN_DEBUG "Obsoleting previously unchecked node at 0x%08x of len %x: ", ref_offset(ref), ref_totlen(c, jeb, ref))); 520 D1(printk(KERN_DEBUG "Obsoleting previously unchecked node at 0x%08x of len %x: ", ref_offset(ref), freed_len));
509 jeb->unchecked_size -= ref_totlen(c, jeb, ref); 521 jeb->unchecked_size -= freed_len;
510 c->unchecked_size -= ref_totlen(c, jeb, ref); 522 c->unchecked_size -= freed_len;
511 } else { 523 } else {
512 D1(if (unlikely(jeb->used_size < ref_totlen(c, jeb, ref))) { 524 D1(if (unlikely(jeb->used_size < freed_len)) {
513 printk(KERN_NOTICE "raw node of size 0x%08x freed from erase block %d at 0x%08x, but used_size was already 0x%08x\n", 525 printk(KERN_NOTICE "raw node of size 0x%08x freed from erase block %d at 0x%08x, but used_size was already 0x%08x\n",
514 ref_totlen(c, jeb, ref), blocknr, ref->flash_offset, jeb->used_size); 526 freed_len, blocknr, ref->flash_offset, jeb->used_size);
515 BUG(); 527 BUG();
516 }) 528 })
517 D1(printk(KERN_DEBUG "Obsoleting node at 0x%08x of len %#x: ", ref_offset(ref), ref_totlen(c, jeb, ref))); 529 D1(printk(KERN_DEBUG "Obsoleting node at 0x%08x of len %#x: ", ref_offset(ref), freed_len));
518 jeb->used_size -= ref_totlen(c, jeb, ref); 530 jeb->used_size -= freed_len;
519 c->used_size -= ref_totlen(c, jeb, ref); 531 c->used_size -= freed_len;
520 } 532 }
521 533
522 // Take care, that wasted size is taken into concern 534 // Take care, that wasted size is taken into concern
523 if ((jeb->dirty_size || ISDIRTY(jeb->wasted_size + ref_totlen(c, jeb, ref))) && jeb != c->nextblock) { 535 if ((jeb->dirty_size || ISDIRTY(jeb->wasted_size + freed_len)) && jeb != c->nextblock) {
524 D1(printk(KERN_DEBUG "Dirtying\n")); 536 D1(printk("Dirtying\n"));
525 addedsize = ref_totlen(c, jeb, ref); 537 addedsize = freed_len;
526 jeb->dirty_size += ref_totlen(c, jeb, ref); 538 jeb->dirty_size += freed_len;
527 c->dirty_size += ref_totlen(c, jeb, ref); 539 c->dirty_size += freed_len;
528 540
529 /* Convert wasted space to dirty, if not a bad block */ 541 /* Convert wasted space to dirty, if not a bad block */
530 if (jeb->wasted_size) { 542 if (jeb->wasted_size) {
@@ -543,10 +555,10 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
543 } 555 }
544 } 556 }
545 } else { 557 } else {
546 D1(printk(KERN_DEBUG "Wasting\n")); 558 D1(printk("Wasting\n"));
547 addedsize = 0; 559 addedsize = 0;
548 jeb->wasted_size += ref_totlen(c, jeb, ref); 560 jeb->wasted_size += freed_len;
549 c->wasted_size += ref_totlen(c, jeb, ref); 561 c->wasted_size += freed_len;
550 } 562 }
551 ref->flash_offset = ref_offset(ref) | REF_OBSOLETE; 563 ref->flash_offset = ref_offset(ref) | REF_OBSOLETE;
552 564
@@ -622,7 +634,7 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
622 /* The erase_free_sem is locked, and has been since before we marked the node obsolete 634 /* The erase_free_sem is locked, and has been since before we marked the node obsolete
623 and potentially put its eraseblock onto the erase_pending_list. Thus, we know that 635 and potentially put its eraseblock onto the erase_pending_list. Thus, we know that
624 the block hasn't _already_ been erased, and that 'ref' itself hasn't been freed yet 636 the block hasn't _already_ been erased, and that 'ref' itself hasn't been freed yet
625 by jffs2_free_all_node_refs() in erase.c. Which is nice. */ 637 by jffs2_free_jeb_node_refs() in erase.c. Which is nice. */
626 638
627 D1(printk(KERN_DEBUG "obliterating obsoleted node at 0x%08x\n", ref_offset(ref))); 639 D1(printk(KERN_DEBUG "obliterating obsoleted node at 0x%08x\n", ref_offset(ref)));
628 ret = jffs2_flash_read(c, ref_offset(ref), sizeof(n), &retlen, (char *)&n); 640 ret = jffs2_flash_read(c, ref_offset(ref), sizeof(n), &retlen, (char *)&n);
@@ -634,8 +646,8 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
634 printk(KERN_WARNING "Short read from obsoleted node at 0x%08x: %zd\n", ref_offset(ref), retlen); 646 printk(KERN_WARNING "Short read from obsoleted node at 0x%08x: %zd\n", ref_offset(ref), retlen);
635 goto out_erase_sem; 647 goto out_erase_sem;
636 } 648 }
637 if (PAD(je32_to_cpu(n.totlen)) != PAD(ref_totlen(c, jeb, ref))) { 649 if (PAD(je32_to_cpu(n.totlen)) != PAD(freed_len)) {
638 printk(KERN_WARNING "Node totlen on flash (0x%08x) != totlen from node ref (0x%08x)\n", je32_to_cpu(n.totlen), ref_totlen(c, jeb, ref)); 650 printk(KERN_WARNING "Node totlen on flash (0x%08x) != totlen from node ref (0x%08x)\n", je32_to_cpu(n.totlen), freed_len);
639 goto out_erase_sem; 651 goto out_erase_sem;
640 } 652 }
641 if (!(je16_to_cpu(n.nodetype) & JFFS2_NODE_ACCURATE)) { 653 if (!(je16_to_cpu(n.nodetype) & JFFS2_NODE_ACCURATE)) {
@@ -677,57 +689,23 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
677 *p = ref->next_in_ino; 689 *p = ref->next_in_ino;
678 ref->next_in_ino = NULL; 690 ref->next_in_ino = NULL;
679 691
680 if (ic->nodes == (void *)ic && ic->nlink == 0) 692 switch (ic->class) {
681 jffs2_del_ino_cache(c, ic); 693#ifdef CONFIG_JFFS2_FS_XATTR
682 694 case RAWNODE_CLASS_XATTR_DATUM:
683 spin_unlock(&c->erase_completion_lock); 695 jffs2_release_xattr_datum(c, (struct jffs2_xattr_datum *)ic);
684 } 696 break;
685 697 case RAWNODE_CLASS_XATTR_REF:
686 698 jffs2_release_xattr_ref(c, (struct jffs2_xattr_ref *)ic);
687 /* Merge with the next node in the physical list, if there is one 699 break;
688 and if it's also obsolete and if it doesn't belong to any inode */ 700#endif
689 if (ref->next_phys && ref_obsolete(ref->next_phys) && 701 default:
690 !ref->next_phys->next_in_ino) { 702 if (ic->nodes == (void *)ic && ic->nlink == 0)
691 struct jffs2_raw_node_ref *n = ref->next_phys; 703 jffs2_del_ino_cache(c, ic);
692 704 break;
693 spin_lock(&c->erase_completion_lock);
694
695 ref->__totlen += n->__totlen;
696 ref->next_phys = n->next_phys;
697 if (jeb->last_node == n) jeb->last_node = ref;
698 if (jeb->gc_node == n) {
699 /* gc will be happy continuing gc on this node */
700 jeb->gc_node=ref;
701 } 705 }
702 spin_unlock(&c->erase_completion_lock); 706 spin_unlock(&c->erase_completion_lock);
703
704 jffs2_free_raw_node_ref(n);
705 } 707 }
706 708
707 /* Also merge with the previous node in the list, if there is one
708 and that one is obsolete */
709 if (ref != jeb->first_node ) {
710 struct jffs2_raw_node_ref *p = jeb->first_node;
711
712 spin_lock(&c->erase_completion_lock);
713
714 while (p->next_phys != ref)
715 p = p->next_phys;
716
717 if (ref_obsolete(p) && !ref->next_in_ino) {
718 p->__totlen += ref->__totlen;
719 if (jeb->last_node == ref) {
720 jeb->last_node = p;
721 }
722 if (jeb->gc_node == ref) {
723 /* gc will be happy continuing gc on this node */
724 jeb->gc_node=p;
725 }
726 p->next_phys = ref->next_phys;
727 jffs2_free_raw_node_ref(ref);
728 }
729 spin_unlock(&c->erase_completion_lock);
730 }
731 out_erase_sem: 709 out_erase_sem:
732 up(&c->erase_free_sem); 710 up(&c->erase_free_sem);
733} 711}
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index d307cf548625..9f41fc01a371 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -31,9 +31,7 @@ struct kvec;
31#define JFFS2_F_I_MODE(f) (OFNI_EDONI_2SFFJ(f)->i_mode) 31#define JFFS2_F_I_MODE(f) (OFNI_EDONI_2SFFJ(f)->i_mode)
32#define JFFS2_F_I_UID(f) (OFNI_EDONI_2SFFJ(f)->i_uid) 32#define JFFS2_F_I_UID(f) (OFNI_EDONI_2SFFJ(f)->i_uid)
33#define JFFS2_F_I_GID(f) (OFNI_EDONI_2SFFJ(f)->i_gid) 33#define JFFS2_F_I_GID(f) (OFNI_EDONI_2SFFJ(f)->i_gid)
34 34#define JFFS2_F_I_RDEV(f) (OFNI_EDONI_2SFFJ(f)->i_rdev)
35#define JFFS2_F_I_RDEV_MIN(f) (iminor(OFNI_EDONI_2SFFJ(f)))
36#define JFFS2_F_I_RDEV_MAJ(f) (imajor(OFNI_EDONI_2SFFJ(f)))
37 35
38#define ITIME(sec) ((struct timespec){sec, 0}) 36#define ITIME(sec) ((struct timespec){sec, 0})
39#define I_SEC(tv) ((tv).tv_sec) 37#define I_SEC(tv) ((tv).tv_sec)
@@ -60,6 +58,10 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f)
60 f->target = NULL; 58 f->target = NULL;
61 f->flags = 0; 59 f->flags = 0;
62 f->usercompr = 0; 60 f->usercompr = 0;
61#ifdef CONFIG_JFFS2_FS_POSIX_ACL
62 f->i_acl_access = JFFS2_ACL_NOT_CACHED;
63 f->i_acl_default = JFFS2_ACL_NOT_CACHED;
64#endif
63} 65}
64 66
65 67
@@ -90,13 +92,10 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f)
90#define jffs2_flash_writev(a,b,c,d,e,f) jffs2_flash_direct_writev(a,b,c,d,e) 92#define jffs2_flash_writev(a,b,c,d,e,f) jffs2_flash_direct_writev(a,b,c,d,e)
91#define jffs2_wbuf_timeout NULL 93#define jffs2_wbuf_timeout NULL
92#define jffs2_wbuf_process NULL 94#define jffs2_wbuf_process NULL
93#define jffs2_nor_ecc(c) (0)
94#define jffs2_dataflash(c) (0) 95#define jffs2_dataflash(c) (0)
95#define jffs2_nor_wbuf_flash(c) (0)
96#define jffs2_nor_ecc_flash_setup(c) (0)
97#define jffs2_nor_ecc_flash_cleanup(c) do {} while (0)
98#define jffs2_dataflash_setup(c) (0) 96#define jffs2_dataflash_setup(c) (0)
99#define jffs2_dataflash_cleanup(c) do {} while (0) 97#define jffs2_dataflash_cleanup(c) do {} while (0)
98#define jffs2_nor_wbuf_flash(c) (0)
100#define jffs2_nor_wbuf_flash_setup(c) (0) 99#define jffs2_nor_wbuf_flash_setup(c) (0)
101#define jffs2_nor_wbuf_flash_cleanup(c) do {} while (0) 100#define jffs2_nor_wbuf_flash_cleanup(c) do {} while (0)
102 101
@@ -107,9 +106,7 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f)
107#ifdef CONFIG_JFFS2_SUMMARY 106#ifdef CONFIG_JFFS2_SUMMARY
108#define jffs2_can_mark_obsolete(c) (0) 107#define jffs2_can_mark_obsolete(c) (0)
109#else 108#else
110#define jffs2_can_mark_obsolete(c) \ 109#define jffs2_can_mark_obsolete(c) (c->mtd->flags & (MTD_BIT_WRITEABLE))
111 ((c->mtd->type == MTD_NORFLASH && !(c->mtd->flags & (MTD_ECC|MTD_PROGRAM_REGIONS))) || \
112 c->mtd->type == MTD_RAM)
113#endif 110#endif
114 111
115#define jffs2_cleanmarker_oob(c) (c->mtd->type == MTD_NANDFLASH) 112#define jffs2_cleanmarker_oob(c) (c->mtd->type == MTD_NANDFLASH)
@@ -133,15 +130,11 @@ int jffs2_flush_wbuf_pad(struct jffs2_sb_info *c);
133int jffs2_nand_flash_setup(struct jffs2_sb_info *c); 130int jffs2_nand_flash_setup(struct jffs2_sb_info *c);
134void jffs2_nand_flash_cleanup(struct jffs2_sb_info *c); 131void jffs2_nand_flash_cleanup(struct jffs2_sb_info *c);
135 132
136#define jffs2_nor_ecc(c) (c->mtd->type == MTD_NORFLASH && (c->mtd->flags & MTD_ECC))
137int jffs2_nor_ecc_flash_setup(struct jffs2_sb_info *c);
138void jffs2_nor_ecc_flash_cleanup(struct jffs2_sb_info *c);
139
140#define jffs2_dataflash(c) (c->mtd->type == MTD_DATAFLASH) 133#define jffs2_dataflash(c) (c->mtd->type == MTD_DATAFLASH)
141int jffs2_dataflash_setup(struct jffs2_sb_info *c); 134int jffs2_dataflash_setup(struct jffs2_sb_info *c);
142void jffs2_dataflash_cleanup(struct jffs2_sb_info *c); 135void jffs2_dataflash_cleanup(struct jffs2_sb_info *c);
143 136
144#define jffs2_nor_wbuf_flash(c) (c->mtd->type == MTD_NORFLASH && (c->mtd->flags & MTD_PROGRAM_REGIONS)) 137#define jffs2_nor_wbuf_flash(c) (c->mtd->type == MTD_NORFLASH && ! (c->mtd->flags & MTD_BIT_WRITEABLE))
145int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c); 138int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c);
146void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c); 139void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c);
147 140
@@ -165,7 +158,7 @@ extern struct inode_operations jffs2_dir_inode_operations;
165/* file.c */ 158/* file.c */
166extern const struct file_operations jffs2_file_operations; 159extern const struct file_operations jffs2_file_operations;
167extern struct inode_operations jffs2_file_inode_operations; 160extern struct inode_operations jffs2_file_inode_operations;
168extern struct address_space_operations jffs2_file_address_operations; 161extern const struct address_space_operations jffs2_file_address_operations;
169int jffs2_fsync(struct file *, struct dentry *, int); 162int jffs2_fsync(struct file *, struct dentry *, int);
170int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg); 163int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
171 164
@@ -182,7 +175,7 @@ void jffs2_clear_inode (struct inode *);
182void jffs2_dirty_inode(struct inode *inode); 175void jffs2_dirty_inode(struct inode *inode);
183struct inode *jffs2_new_inode (struct inode *dir_i, int mode, 176struct inode *jffs2_new_inode (struct inode *dir_i, int mode,
184 struct jffs2_raw_inode *ri); 177 struct jffs2_raw_inode *ri);
185int jffs2_statfs (struct super_block *, struct kstatfs *); 178int jffs2_statfs (struct dentry *, struct kstatfs *);
186void jffs2_write_super (struct super_block *); 179void jffs2_write_super (struct super_block *);
187int jffs2_remount_fs (struct super_block *, int *, char *); 180int jffs2_remount_fs (struct super_block *, int *, char *);
188int jffs2_do_fill_super(struct super_block *sb, void *data, int silent); 181int jffs2_do_fill_super(struct super_block *sb, void *data, int silent);
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index f1695642d0f7..cc1899268c43 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -66,7 +66,7 @@ static void jffs2_free_tmp_dnode_info_list(struct rb_root *list)
66 jffs2_free_full_dnode(tn->fn); 66 jffs2_free_full_dnode(tn->fn);
67 jffs2_free_tmp_dnode_info(tn); 67 jffs2_free_tmp_dnode_info(tn);
68 68
69 this = this->rb_parent; 69 this = rb_parent(this);
70 if (!this) 70 if (!this)
71 break; 71 break;
72 72
@@ -116,19 +116,42 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r
116 uint32_t *latest_mctime, uint32_t *mctime_ver) 116 uint32_t *latest_mctime, uint32_t *mctime_ver)
117{ 117{
118 struct jffs2_full_dirent *fd; 118 struct jffs2_full_dirent *fd;
119 uint32_t crc;
119 120
120 /* The direntry nodes are checked during the flash scanning */
121 BUG_ON(ref_flags(ref) == REF_UNCHECKED);
122 /* Obsoleted. This cannot happen, surely? dwmw2 20020308 */ 121 /* Obsoleted. This cannot happen, surely? dwmw2 20020308 */
123 BUG_ON(ref_obsolete(ref)); 122 BUG_ON(ref_obsolete(ref));
124 123
125 /* Sanity check */ 124 crc = crc32(0, rd, sizeof(*rd) - 8);
126 if (unlikely(PAD((rd->nsize + sizeof(*rd))) != PAD(je32_to_cpu(rd->totlen)))) { 125 if (unlikely(crc != je32_to_cpu(rd->node_crc))) {
127 JFFS2_ERROR("illegal nsize in node at %#08x: nsize %#02x, totlen %#04x\n", 126 JFFS2_NOTICE("header CRC failed on dirent node at %#08x: read %#08x, calculated %#08x\n",
128 ref_offset(ref), rd->nsize, je32_to_cpu(rd->totlen)); 127 ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
129 return 1; 128 return 1;
130 } 129 }
131 130
131 /* If we've never checked the CRCs on this node, check them now */
132 if (ref_flags(ref) == REF_UNCHECKED) {
133 struct jffs2_eraseblock *jeb;
134 int len;
135
136 /* Sanity check */
137 if (unlikely(PAD((rd->nsize + sizeof(*rd))) != PAD(je32_to_cpu(rd->totlen)))) {
138 JFFS2_ERROR("illegal nsize in node at %#08x: nsize %#02x, totlen %#04x\n",
139 ref_offset(ref), rd->nsize, je32_to_cpu(rd->totlen));
140 return 1;
141 }
142
143 jeb = &c->blocks[ref->flash_offset / c->sector_size];
144 len = ref_totlen(c, jeb, ref);
145
146 spin_lock(&c->erase_completion_lock);
147 jeb->used_size += len;
148 jeb->unchecked_size -= len;
149 c->used_size += len;
150 c->unchecked_size -= len;
151 ref->flash_offset = ref_offset(ref) | REF_PRISTINE;
152 spin_unlock(&c->erase_completion_lock);
153 }
154
132 fd = jffs2_alloc_full_dirent(rd->nsize + 1); 155 fd = jffs2_alloc_full_dirent(rd->nsize + 1);
133 if (unlikely(!fd)) 156 if (unlikely(!fd))
134 return -ENOMEM; 157 return -ENOMEM;
@@ -198,13 +221,21 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
198 struct jffs2_tmp_dnode_info *tn; 221 struct jffs2_tmp_dnode_info *tn;
199 uint32_t len, csize; 222 uint32_t len, csize;
200 int ret = 1; 223 int ret = 1;
224 uint32_t crc;
201 225
202 /* Obsoleted. This cannot happen, surely? dwmw2 20020308 */ 226 /* Obsoleted. This cannot happen, surely? dwmw2 20020308 */
203 BUG_ON(ref_obsolete(ref)); 227 BUG_ON(ref_obsolete(ref));
204 228
229 crc = crc32(0, rd, sizeof(*rd) - 8);
230 if (unlikely(crc != je32_to_cpu(rd->node_crc))) {
231 JFFS2_NOTICE("node CRC failed on dnode at %#08x: read %#08x, calculated %#08x\n",
232 ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
233 return 1;
234 }
235
205 tn = jffs2_alloc_tmp_dnode_info(); 236 tn = jffs2_alloc_tmp_dnode_info();
206 if (!tn) { 237 if (!tn) {
207 JFFS2_ERROR("failed to allocate tn (%d bytes).\n", sizeof(*tn)); 238 JFFS2_ERROR("failed to allocate tn (%zu bytes).\n", sizeof(*tn));
208 return -ENOMEM; 239 return -ENOMEM;
209 } 240 }
210 241
@@ -213,14 +244,6 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
213 244
214 /* If we've never checked the CRCs on this node, check them now */ 245 /* If we've never checked the CRCs on this node, check them now */
215 if (ref_flags(ref) == REF_UNCHECKED) { 246 if (ref_flags(ref) == REF_UNCHECKED) {
216 uint32_t crc;
217
218 crc = crc32(0, rd, sizeof(*rd) - 8);
219 if (unlikely(crc != je32_to_cpu(rd->node_crc))) {
220 JFFS2_NOTICE("header CRC failed on node at %#08x: read %#08x, calculated %#08x\n",
221 ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
222 goto free_out;
223 }
224 247
225 /* Sanity checks */ 248 /* Sanity checks */
226 if (unlikely(je32_to_cpu(rd->offset) > je32_to_cpu(rd->isize)) || 249 if (unlikely(je32_to_cpu(rd->offset) > je32_to_cpu(rd->isize)) ||
@@ -343,7 +366,7 @@ free_out:
343 * Helper function for jffs2_get_inode_nodes(). 366 * Helper function for jffs2_get_inode_nodes().
344 * It is called every time an unknown node is found. 367 * It is called every time an unknown node is found.
345 * 368 *
346 * Returns: 0 on succes; 369 * Returns: 0 on success;
347 * 1 if the node should be marked obsolete; 370 * 1 if the node should be marked obsolete;
348 * negative error code on failure. 371 * negative error code on failure.
349 */ 372 */
@@ -354,37 +377,30 @@ static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_re
354 377
355 un->nodetype = cpu_to_je16(JFFS2_NODE_ACCURATE | je16_to_cpu(un->nodetype)); 378 un->nodetype = cpu_to_je16(JFFS2_NODE_ACCURATE | je16_to_cpu(un->nodetype));
356 379
357 if (crc32(0, un, sizeof(struct jffs2_unknown_node) - 4) != je32_to_cpu(un->hdr_crc)) { 380 switch(je16_to_cpu(un->nodetype) & JFFS2_COMPAT_MASK) {
358 /* Hmmm. This should have been caught at scan time. */
359 JFFS2_NOTICE("node header CRC failed at %#08x. But it must have been OK earlier.\n", ref_offset(ref));
360 jffs2_dbg_dump_node(c, ref_offset(ref));
361 return 1;
362 } else {
363 switch(je16_to_cpu(un->nodetype) & JFFS2_COMPAT_MASK) {
364 381
365 case JFFS2_FEATURE_INCOMPAT: 382 case JFFS2_FEATURE_INCOMPAT:
366 JFFS2_ERROR("unknown INCOMPAT nodetype %#04X at %#08x\n", 383 JFFS2_ERROR("unknown INCOMPAT nodetype %#04X at %#08x\n",
367 je16_to_cpu(un->nodetype), ref_offset(ref)); 384 je16_to_cpu(un->nodetype), ref_offset(ref));
368 /* EEP */ 385 /* EEP */
369 BUG(); 386 BUG();
370 break; 387 break;
371 388
372 case JFFS2_FEATURE_ROCOMPAT: 389 case JFFS2_FEATURE_ROCOMPAT:
373 JFFS2_ERROR("unknown ROCOMPAT nodetype %#04X at %#08x\n", 390 JFFS2_ERROR("unknown ROCOMPAT nodetype %#04X at %#08x\n",
374 je16_to_cpu(un->nodetype), ref_offset(ref)); 391 je16_to_cpu(un->nodetype), ref_offset(ref));
375 BUG_ON(!(c->flags & JFFS2_SB_FLAG_RO)); 392 BUG_ON(!(c->flags & JFFS2_SB_FLAG_RO));
376 break; 393 break;
377 394
378 case JFFS2_FEATURE_RWCOMPAT_COPY: 395 case JFFS2_FEATURE_RWCOMPAT_COPY:
379 JFFS2_NOTICE("unknown RWCOMPAT_COPY nodetype %#04X at %#08x\n", 396 JFFS2_NOTICE("unknown RWCOMPAT_COPY nodetype %#04X at %#08x\n",
380 je16_to_cpu(un->nodetype), ref_offset(ref)); 397 je16_to_cpu(un->nodetype), ref_offset(ref));
381 break; 398 break;
382 399
383 case JFFS2_FEATURE_RWCOMPAT_DELETE: 400 case JFFS2_FEATURE_RWCOMPAT_DELETE:
384 JFFS2_NOTICE("unknown RWCOMPAT_DELETE nodetype %#04X at %#08x\n", 401 JFFS2_NOTICE("unknown RWCOMPAT_DELETE nodetype %#04X at %#08x\n",
385 je16_to_cpu(un->nodetype), ref_offset(ref)); 402 je16_to_cpu(un->nodetype), ref_offset(ref));
386 return 1; 403 return 1;
387 }
388 } 404 }
389 405
390 return 0; 406 return 0;
@@ -434,7 +450,7 @@ static int read_more(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
434 } 450 }
435 451
436 if (retlen < len) { 452 if (retlen < len) {
437 JFFS2_ERROR("short read at %#08x: %d instead of %d.\n", 453 JFFS2_ERROR("short read at %#08x: %zu instead of %d.\n",
438 offs, retlen, len); 454 offs, retlen, len);
439 return -EIO; 455 return -EIO;
440 } 456 }
@@ -542,13 +558,25 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf
542 } 558 }
543 559
544 if (retlen < len) { 560 if (retlen < len) {
545 JFFS2_ERROR("short read at %#08x: %d instead of %d.\n", ref_offset(ref), retlen, len); 561 JFFS2_ERROR("short read at %#08x: %zu instead of %d.\n", ref_offset(ref), retlen, len);
546 err = -EIO; 562 err = -EIO;
547 goto free_out; 563 goto free_out;
548 } 564 }
549 565
550 node = (union jffs2_node_union *)bufstart; 566 node = (union jffs2_node_union *)bufstart;
551 567
568 /* No need to mask in the valid bit; it shouldn't be invalid */
569 if (je32_to_cpu(node->u.hdr_crc) != crc32(0, node, sizeof(node->u)-4)) {
570 JFFS2_NOTICE("Node header CRC failed at %#08x. {%04x,%04x,%08x,%08x}\n",
571 ref_offset(ref), je16_to_cpu(node->u.magic),
572 je16_to_cpu(node->u.nodetype),
573 je32_to_cpu(node->u.totlen),
574 je32_to_cpu(node->u.hdr_crc));
575 jffs2_dbg_dump_node(c, ref_offset(ref));
576 jffs2_mark_node_obsolete(c, ref);
577 goto cont;
578 }
579
552 switch (je16_to_cpu(node->u.nodetype)) { 580 switch (je16_to_cpu(node->u.nodetype)) {
553 581
554 case JFFS2_NODETYPE_DIRENT: 582 case JFFS2_NODETYPE_DIRENT:
@@ -606,6 +634,7 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf
606 goto free_out; 634 goto free_out;
607 635
608 } 636 }
637 cont:
609 spin_lock(&c->erase_completion_lock); 638 spin_lock(&c->erase_completion_lock);
610 } 639 }
611 640
@@ -679,12 +708,12 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
679 jffs2_mark_node_obsolete(c, fn->raw); 708 jffs2_mark_node_obsolete(c, fn->raw);
680 709
681 BUG_ON(rb->rb_left); 710 BUG_ON(rb->rb_left);
682 if (rb->rb_parent && rb->rb_parent->rb_left == rb) { 711 if (rb_parent(rb) && rb_parent(rb)->rb_left == rb) {
683 /* We were then left-hand child of our parent. We need 712 /* We were then left-hand child of our parent. We need
684 * to move our own right-hand child into our place. */ 713 * to move our own right-hand child into our place. */
685 repl_rb = rb->rb_right; 714 repl_rb = rb->rb_right;
686 if (repl_rb) 715 if (repl_rb)
687 repl_rb->rb_parent = rb->rb_parent; 716 rb_set_parent(repl_rb, rb_parent(rb));
688 } else 717 } else
689 repl_rb = NULL; 718 repl_rb = NULL;
690 719
@@ -692,14 +721,14 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
692 721
693 /* Remove the spent tn from the tree; don't bother rebalancing 722 /* Remove the spent tn from the tree; don't bother rebalancing
694 * but put our right-hand child in our own place. */ 723 * but put our right-hand child in our own place. */
695 if (tn->rb.rb_parent) { 724 if (rb_parent(&tn->rb)) {
696 if (tn->rb.rb_parent->rb_left == &tn->rb) 725 if (rb_parent(&tn->rb)->rb_left == &tn->rb)
697 tn->rb.rb_parent->rb_left = repl_rb; 726 rb_parent(&tn->rb)->rb_left = repl_rb;
698 else if (tn->rb.rb_parent->rb_right == &tn->rb) 727 else if (rb_parent(&tn->rb)->rb_right == &tn->rb)
699 tn->rb.rb_parent->rb_right = repl_rb; 728 rb_parent(&tn->rb)->rb_right = repl_rb;
700 else BUG(); 729 else BUG();
701 } else if (tn->rb.rb_right) 730 } else if (tn->rb.rb_right)
702 tn->rb.rb_right->rb_parent = NULL; 731 rb_set_parent(tn->rb.rb_right, NULL);
703 732
704 jffs2_free_tmp_dnode_info(tn); 733 jffs2_free_tmp_dnode_info(tn);
705 if (ret) { 734 if (ret) {
@@ -939,6 +968,7 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
939 struct jffs2_full_dirent *fd, *fds; 968 struct jffs2_full_dirent *fd, *fds;
940 int deleted; 969 int deleted;
941 970
971 jffs2_xattr_delete_inode(c, f->inocache);
942 down(&f->sem); 972 down(&f->sem);
943 deleted = f->inocache && !f->inocache->nlink; 973 deleted = f->inocache && !f->inocache->nlink;
944 974
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index cf55b221fc2b..2bfdc33752d3 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -65,6 +65,28 @@ static inline uint32_t EMPTY_SCAN_SIZE(uint32_t sector_size) {
65 return DEFAULT_EMPTY_SCAN_SIZE; 65 return DEFAULT_EMPTY_SCAN_SIZE;
66} 66}
67 67
68static int file_dirty(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
69{
70 int ret;
71
72 if ((ret = jffs2_prealloc_raw_node_refs(c, jeb, 1)))
73 return ret;
74 if ((ret = jffs2_scan_dirty_space(c, jeb, jeb->free_size)))
75 return ret;
76 /* Turned wasted size into dirty, since we apparently
77 think it's recoverable now. */
78 jeb->dirty_size += jeb->wasted_size;
79 c->dirty_size += jeb->wasted_size;
80 c->wasted_size -= jeb->wasted_size;
81 jeb->wasted_size = 0;
82 if (VERYDIRTY(c, jeb->dirty_size)) {
83 list_add(&jeb->list, &c->very_dirty_list);
84 } else {
85 list_add(&jeb->list, &c->dirty_list);
86 }
87 return 0;
88}
89
68int jffs2_scan_medium(struct jffs2_sb_info *c) 90int jffs2_scan_medium(struct jffs2_sb_info *c)
69{ 91{
70 int i, ret; 92 int i, ret;
@@ -170,34 +192,20 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
170 (!c->nextblock || c->nextblock->free_size < jeb->free_size)) { 192 (!c->nextblock || c->nextblock->free_size < jeb->free_size)) {
171 /* Better candidate for the next writes to go to */ 193 /* Better candidate for the next writes to go to */
172 if (c->nextblock) { 194 if (c->nextblock) {
173 c->nextblock->dirty_size += c->nextblock->free_size + c->nextblock->wasted_size; 195 ret = file_dirty(c, c->nextblock);
174 c->dirty_size += c->nextblock->free_size + c->nextblock->wasted_size; 196 if (ret)
175 c->free_size -= c->nextblock->free_size; 197 return ret;
176 c->wasted_size -= c->nextblock->wasted_size;
177 c->nextblock->free_size = c->nextblock->wasted_size = 0;
178 if (VERYDIRTY(c, c->nextblock->dirty_size)) {
179 list_add(&c->nextblock->list, &c->very_dirty_list);
180 } else {
181 list_add(&c->nextblock->list, &c->dirty_list);
182 }
183 /* deleting summary information of the old nextblock */ 198 /* deleting summary information of the old nextblock */
184 jffs2_sum_reset_collected(c->summary); 199 jffs2_sum_reset_collected(c->summary);
185 } 200 }
186 /* update collected summary infromation for the current nextblock */ 201 /* update collected summary information for the current nextblock */
187 jffs2_sum_move_collected(c, s); 202 jffs2_sum_move_collected(c, s);
188 D1(printk(KERN_DEBUG "jffs2_scan_medium(): new nextblock = 0x%08x\n", jeb->offset)); 203 D1(printk(KERN_DEBUG "jffs2_scan_medium(): new nextblock = 0x%08x\n", jeb->offset));
189 c->nextblock = jeb; 204 c->nextblock = jeb;
190 } else { 205 } else {
191 jeb->dirty_size += jeb->free_size + jeb->wasted_size; 206 ret = file_dirty(c, jeb);
192 c->dirty_size += jeb->free_size + jeb->wasted_size; 207 if (ret)
193 c->free_size -= jeb->free_size; 208 return ret;
194 c->wasted_size -= jeb->wasted_size;
195 jeb->free_size = jeb->wasted_size = 0;
196 if (VERYDIRTY(c, jeb->dirty_size)) {
197 list_add(&jeb->list, &c->very_dirty_list);
198 } else {
199 list_add(&jeb->list, &c->dirty_list);
200 }
201 } 209 }
202 break; 210 break;
203 211
@@ -222,9 +230,6 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
222 } 230 }
223 } 231 }
224 232
225 if (jffs2_sum_active() && s)
226 kfree(s);
227
228 /* Nextblock dirty is always seen as wasted, because we cannot recycle it now */ 233 /* Nextblock dirty is always seen as wasted, because we cannot recycle it now */
229 if (c->nextblock && (c->nextblock->dirty_size)) { 234 if (c->nextblock && (c->nextblock->dirty_size)) {
230 c->nextblock->wasted_size += c->nextblock->dirty_size; 235 c->nextblock->wasted_size += c->nextblock->dirty_size;
@@ -242,11 +247,8 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
242 247
243 D1(printk(KERN_DEBUG "jffs2_scan_medium(): Skipping %d bytes in nextblock to ensure page alignment\n", 248 D1(printk(KERN_DEBUG "jffs2_scan_medium(): Skipping %d bytes in nextblock to ensure page alignment\n",
244 skip)); 249 skip));
245 c->nextblock->wasted_size += skip; 250 jffs2_prealloc_raw_node_refs(c, c->nextblock, 1);
246 c->wasted_size += skip; 251 jffs2_scan_dirty_space(c, c->nextblock, skip);
247
248 c->nextblock->free_size -= skip;
249 c->free_size -= skip;
250 } 252 }
251#endif 253#endif
252 if (c->nr_erasing_blocks) { 254 if (c->nr_erasing_blocks) {
@@ -266,6 +268,9 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
266 else 268 else
267 c->mtd->unpoint(c->mtd, flashbuf, 0, c->mtd->size); 269 c->mtd->unpoint(c->mtd, flashbuf, 0, c->mtd->size);
268#endif 270#endif
271 if (s)
272 kfree(s);
273
269 return ret; 274 return ret;
270} 275}
271 276
@@ -290,7 +295,7 @@ int jffs2_fill_scan_buf (struct jffs2_sb_info *c, void *buf,
290int jffs2_scan_classify_jeb(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) 295int jffs2_scan_classify_jeb(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
291{ 296{
292 if ((jeb->used_size + jeb->unchecked_size) == PAD(c->cleanmarker_size) && !jeb->dirty_size 297 if ((jeb->used_size + jeb->unchecked_size) == PAD(c->cleanmarker_size) && !jeb->dirty_size
293 && (!jeb->first_node || !jeb->first_node->next_phys) ) 298 && (!jeb->first_node || !ref_next(jeb->first_node)) )
294 return BLK_STATE_CLEANMARKER; 299 return BLK_STATE_CLEANMARKER;
295 300
296 /* move blocks with max 4 byte dirty space to cleanlist */ 301 /* move blocks with max 4 byte dirty space to cleanlist */
@@ -306,11 +311,126 @@ int jffs2_scan_classify_jeb(struct jffs2_sb_info *c, struct jffs2_eraseblock *je
306 return BLK_STATE_ALLDIRTY; 311 return BLK_STATE_ALLDIRTY;
307} 312}
308 313
314#ifdef CONFIG_JFFS2_FS_XATTR
315static int jffs2_scan_xattr_node(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
316 struct jffs2_raw_xattr *rx, uint32_t ofs,
317 struct jffs2_summary *s)
318{
319 struct jffs2_xattr_datum *xd;
320 uint32_t xid, version, totlen, crc;
321 int err;
322
323 crc = crc32(0, rx, sizeof(struct jffs2_raw_xattr) - 4);
324 if (crc != je32_to_cpu(rx->node_crc)) {
325 JFFS2_WARNING("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
326 ofs, je32_to_cpu(rx->node_crc), crc);
327 if ((err = jffs2_scan_dirty_space(c, jeb, je32_to_cpu(rx->totlen))))
328 return err;
329 return 0;
330 }
331
332 xid = je32_to_cpu(rx->xid);
333 version = je32_to_cpu(rx->version);
334
335 totlen = PAD(sizeof(struct jffs2_raw_xattr)
336 + rx->name_len + 1 + je16_to_cpu(rx->value_len));
337 if (totlen != je32_to_cpu(rx->totlen)) {
338 JFFS2_WARNING("node length mismatch at %#08x, read=%u, calc=%u\n",
339 ofs, je32_to_cpu(rx->totlen), totlen);
340 if ((err = jffs2_scan_dirty_space(c, jeb, je32_to_cpu(rx->totlen))))
341 return err;
342 return 0;
343 }
344
345 xd = jffs2_setup_xattr_datum(c, xid, version);
346 if (IS_ERR(xd))
347 return PTR_ERR(xd);
348
349 if (xd->version > version) {
350 struct jffs2_raw_node_ref *raw
351 = jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, totlen, NULL);
352 raw->next_in_ino = xd->node->next_in_ino;
353 xd->node->next_in_ino = raw;
354 } else {
355 xd->version = version;
356 xd->xprefix = rx->xprefix;
357 xd->name_len = rx->name_len;
358 xd->value_len = je16_to_cpu(rx->value_len);
359 xd->data_crc = je32_to_cpu(rx->data_crc);
360
361 jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, totlen, (void *)xd);
362 }
363
364 if (jffs2_sum_active())
365 jffs2_sum_add_xattr_mem(s, rx, ofs - jeb->offset);
366 dbg_xattr("scaning xdatum at %#08x (xid=%u, version=%u)\n",
367 ofs, xd->xid, xd->version);
368 return 0;
369}
370
371static int jffs2_scan_xref_node(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
372 struct jffs2_raw_xref *rr, uint32_t ofs,
373 struct jffs2_summary *s)
374{
375 struct jffs2_xattr_ref *ref;
376 uint32_t crc;
377 int err;
378
379 crc = crc32(0, rr, sizeof(*rr) - 4);
380 if (crc != je32_to_cpu(rr->node_crc)) {
381 JFFS2_WARNING("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
382 ofs, je32_to_cpu(rr->node_crc), crc);
383 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(rr->totlen)))))
384 return err;
385 return 0;
386 }
387
388 if (PAD(sizeof(struct jffs2_raw_xref)) != je32_to_cpu(rr->totlen)) {
389 JFFS2_WARNING("node length mismatch at %#08x, read=%u, calc=%zd\n",
390 ofs, je32_to_cpu(rr->totlen),
391 PAD(sizeof(struct jffs2_raw_xref)));
392 if ((err = jffs2_scan_dirty_space(c, jeb, je32_to_cpu(rr->totlen))))
393 return err;
394 return 0;
395 }
396
397 ref = jffs2_alloc_xattr_ref();
398 if (!ref)
399 return -ENOMEM;
400
401 /* BEFORE jffs2_build_xattr_subsystem() called,
402 * and AFTER xattr_ref is marked as a dead xref,
403 * ref->xid is used to store 32bit xid, xd is not used
404 * ref->ino is used to store 32bit inode-number, ic is not used
405 * Thoes variables are declared as union, thus using those
406 * are exclusive. In a similar way, ref->next is temporarily
407 * used to chain all xattr_ref object. It's re-chained to
408 * jffs2_inode_cache in jffs2_build_xattr_subsystem() correctly.
409 */
410 ref->ino = je32_to_cpu(rr->ino);
411 ref->xid = je32_to_cpu(rr->xid);
412 ref->xseqno = je32_to_cpu(rr->xseqno);
413 if (ref->xseqno > c->highest_xseqno)
414 c->highest_xseqno = (ref->xseqno & ~XREF_DELETE_MARKER);
415 ref->next = c->xref_temp;
416 c->xref_temp = ref;
417
418 jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, PAD(je32_to_cpu(rr->totlen)), (void *)ref);
419
420 if (jffs2_sum_active())
421 jffs2_sum_add_xref_mem(s, rr, ofs - jeb->offset);
422 dbg_xattr("scan xref at %#08x (xid=%u, ino=%u)\n",
423 ofs, ref->xid, ref->ino);
424 return 0;
425}
426#endif
427
428/* Called with 'buf_size == 0' if buf is in fact a pointer _directly_ into
429 the flash, XIP-style */
309static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 430static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
310 unsigned char *buf, uint32_t buf_size, struct jffs2_summary *s) { 431 unsigned char *buf, uint32_t buf_size, struct jffs2_summary *s) {
311 struct jffs2_unknown_node *node; 432 struct jffs2_unknown_node *node;
312 struct jffs2_unknown_node crcnode; 433 struct jffs2_unknown_node crcnode;
313 struct jffs2_sum_marker *sm;
314 uint32_t ofs, prevofs; 434 uint32_t ofs, prevofs;
315 uint32_t hdr_crc, buf_ofs, buf_len; 435 uint32_t hdr_crc, buf_ofs, buf_len;
316 int err; 436 int err;
@@ -344,44 +464,75 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
344#endif 464#endif
345 465
346 if (jffs2_sum_active()) { 466 if (jffs2_sum_active()) {
347 sm = kmalloc(sizeof(struct jffs2_sum_marker), GFP_KERNEL); 467 struct jffs2_sum_marker *sm;
348 if (!sm) { 468 void *sumptr = NULL;
349 return -ENOMEM; 469 uint32_t sumlen;
350 } 470
351 471 if (!buf_size) {
352 err = jffs2_fill_scan_buf(c, (unsigned char *) sm, jeb->offset + c->sector_size - 472 /* XIP case. Just look, point at the summary if it's there */
353 sizeof(struct jffs2_sum_marker), sizeof(struct jffs2_sum_marker)); 473 sm = (void *)buf + c->sector_size - sizeof(*sm);
354 if (err) { 474 if (je32_to_cpu(sm->magic) == JFFS2_SUM_MAGIC) {
355 kfree(sm); 475 sumptr = buf + je32_to_cpu(sm->offset);
356 return err; 476 sumlen = c->sector_size - je32_to_cpu(sm->offset);
357 } 477 }
358 478 } else {
359 if (je32_to_cpu(sm->magic) == JFFS2_SUM_MAGIC ) { 479 /* If NAND flash, read a whole page of it. Else just the end */
360 err = jffs2_sum_scan_sumnode(c, jeb, je32_to_cpu(sm->offset), &pseudo_random); 480 if (c->wbuf_pagesize)
361 if (err) { 481 buf_len = c->wbuf_pagesize;
362 kfree(sm); 482 else
483 buf_len = sizeof(*sm);
484
485 /* Read as much as we want into the _end_ of the preallocated buffer */
486 err = jffs2_fill_scan_buf(c, buf + buf_size - buf_len,
487 jeb->offset + c->sector_size - buf_len,
488 buf_len);
489 if (err)
363 return err; 490 return err;
491
492 sm = (void *)buf + buf_size - sizeof(*sm);
493 if (je32_to_cpu(sm->magic) == JFFS2_SUM_MAGIC) {
494 sumlen = c->sector_size - je32_to_cpu(sm->offset);
495 sumptr = buf + buf_size - sumlen;
496
497 /* Now, make sure the summary itself is available */
498 if (sumlen > buf_size) {
499 /* Need to kmalloc for this. */
500 sumptr = kmalloc(sumlen, GFP_KERNEL);
501 if (!sumptr)
502 return -ENOMEM;
503 memcpy(sumptr + sumlen - buf_len, buf + buf_size - buf_len, buf_len);
504 }
505 if (buf_len < sumlen) {
506 /* Need to read more so that the entire summary node is present */
507 err = jffs2_fill_scan_buf(c, sumptr,
508 jeb->offset + c->sector_size - sumlen,
509 sumlen - buf_len);
510 if (err)
511 return err;
512 }
364 } 513 }
514
365 } 515 }
366 516
367 kfree(sm); 517 if (sumptr) {
518 err = jffs2_sum_scan_sumnode(c, jeb, sumptr, sumlen, &pseudo_random);
368 519
369 ofs = jeb->offset; 520 if (buf_size && sumlen > buf_size)
370 prevofs = jeb->offset - 1; 521 kfree(sumptr);
522 /* If it returns with a real error, bail.
523 If it returns positive, that's a block classification
524 (i.e. BLK_STATE_xxx) so return that too.
525 If it returns zero, fall through to full scan. */
526 if (err)
527 return err;
528 }
371 } 529 }
372 530
373 buf_ofs = jeb->offset; 531 buf_ofs = jeb->offset;
374 532
375 if (!buf_size) { 533 if (!buf_size) {
534 /* This is the XIP case -- we're reading _directly_ from the flash chip */
376 buf_len = c->sector_size; 535 buf_len = c->sector_size;
377
378 if (jffs2_sum_active()) {
379 /* must reread because of summary test */
380 err = jffs2_fill_scan_buf(c, buf, buf_ofs, buf_len);
381 if (err)
382 return err;
383 }
384
385 } else { 536 } else {
386 buf_len = EMPTY_SCAN_SIZE(c->sector_size); 537 buf_len = EMPTY_SCAN_SIZE(c->sector_size);
387 err = jffs2_fill_scan_buf(c, buf, buf_ofs, buf_len); 538 err = jffs2_fill_scan_buf(c, buf, buf_ofs, buf_len);
@@ -418,7 +569,10 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
418 if (ofs) { 569 if (ofs) {
419 D1(printk(KERN_DEBUG "Free space at %08x ends at %08x\n", jeb->offset, 570 D1(printk(KERN_DEBUG "Free space at %08x ends at %08x\n", jeb->offset,
420 jeb->offset + ofs)); 571 jeb->offset + ofs));
421 DIRTY_SPACE(ofs); 572 if ((err = jffs2_prealloc_raw_node_refs(c, jeb, 1)))
573 return err;
574 if ((err = jffs2_scan_dirty_space(c, jeb, ofs)))
575 return err;
422 } 576 }
423 577
424 /* Now ofs is a complete physical flash offset as it always was... */ 578 /* Now ofs is a complete physical flash offset as it always was... */
@@ -433,6 +587,11 @@ scan_more:
433 587
434 jffs2_dbg_acct_paranoia_check_nolock(c, jeb); 588 jffs2_dbg_acct_paranoia_check_nolock(c, jeb);
435 589
590 /* Make sure there are node refs available for use */
591 err = jffs2_prealloc_raw_node_refs(c, jeb, 2);
592 if (err)
593 return err;
594
436 cond_resched(); 595 cond_resched();
437 596
438 if (ofs & 3) { 597 if (ofs & 3) {
@@ -442,7 +601,8 @@ scan_more:
442 } 601 }
443 if (ofs == prevofs) { 602 if (ofs == prevofs) {
444 printk(KERN_WARNING "ofs 0x%08x has already been seen. Skipping\n", ofs); 603 printk(KERN_WARNING "ofs 0x%08x has already been seen. Skipping\n", ofs);
445 DIRTY_SPACE(4); 604 if ((err = jffs2_scan_dirty_space(c, jeb, 4)))
605 return err;
446 ofs += 4; 606 ofs += 4;
447 continue; 607 continue;
448 } 608 }
@@ -451,7 +611,8 @@ scan_more:
451 if (jeb->offset + c->sector_size < ofs + sizeof(*node)) { 611 if (jeb->offset + c->sector_size < ofs + sizeof(*node)) {
452 D1(printk(KERN_DEBUG "Fewer than %zd bytes left to end of block. (%x+%x<%x+%zx) Not reading\n", sizeof(struct jffs2_unknown_node), 612 D1(printk(KERN_DEBUG "Fewer than %zd bytes left to end of block. (%x+%x<%x+%zx) Not reading\n", sizeof(struct jffs2_unknown_node),
453 jeb->offset, c->sector_size, ofs, sizeof(*node))); 613 jeb->offset, c->sector_size, ofs, sizeof(*node)));
454 DIRTY_SPACE((jeb->offset + c->sector_size)-ofs); 614 if ((err = jffs2_scan_dirty_space(c, jeb, (jeb->offset + c->sector_size)-ofs)))
615 return err;
455 break; 616 break;
456 } 617 }
457 618
@@ -481,7 +642,8 @@ scan_more:
481 if (*(uint32_t *)(&buf[inbuf_ofs]) != 0xffffffff) { 642 if (*(uint32_t *)(&buf[inbuf_ofs]) != 0xffffffff) {
482 printk(KERN_WARNING "Empty flash at 0x%08x ends at 0x%08x\n", 643 printk(KERN_WARNING "Empty flash at 0x%08x ends at 0x%08x\n",
483 empty_start, ofs); 644 empty_start, ofs);
484 DIRTY_SPACE(ofs-empty_start); 645 if ((err = jffs2_scan_dirty_space(c, jeb, ofs-empty_start)))
646 return err;
485 goto scan_more; 647 goto scan_more;
486 } 648 }
487 649
@@ -494,7 +656,7 @@ scan_more:
494 /* If we're only checking the beginning of a block with a cleanmarker, 656 /* If we're only checking the beginning of a block with a cleanmarker,
495 bail now */ 657 bail now */
496 if (buf_ofs == jeb->offset && jeb->used_size == PAD(c->cleanmarker_size) && 658 if (buf_ofs == jeb->offset && jeb->used_size == PAD(c->cleanmarker_size) &&
497 c->cleanmarker_size && !jeb->dirty_size && !jeb->first_node->next_phys) { 659 c->cleanmarker_size && !jeb->dirty_size && !ref_next(jeb->first_node)) {
498 D1(printk(KERN_DEBUG "%d bytes at start of block seems clean... assuming all clean\n", EMPTY_SCAN_SIZE(c->sector_size))); 660 D1(printk(KERN_DEBUG "%d bytes at start of block seems clean... assuming all clean\n", EMPTY_SCAN_SIZE(c->sector_size)));
499 return BLK_STATE_CLEANMARKER; 661 return BLK_STATE_CLEANMARKER;
500 } 662 }
@@ -518,20 +680,23 @@ scan_more:
518 680
519 if (ofs == jeb->offset && je16_to_cpu(node->magic) == KSAMTIB_CIGAM_2SFFJ) { 681 if (ofs == jeb->offset && je16_to_cpu(node->magic) == KSAMTIB_CIGAM_2SFFJ) {
520 printk(KERN_WARNING "Magic bitmask is backwards at offset 0x%08x. Wrong endian filesystem?\n", ofs); 682 printk(KERN_WARNING "Magic bitmask is backwards at offset 0x%08x. Wrong endian filesystem?\n", ofs);
521 DIRTY_SPACE(4); 683 if ((err = jffs2_scan_dirty_space(c, jeb, 4)))
684 return err;
522 ofs += 4; 685 ofs += 4;
523 continue; 686 continue;
524 } 687 }
525 if (je16_to_cpu(node->magic) == JFFS2_DIRTY_BITMASK) { 688 if (je16_to_cpu(node->magic) == JFFS2_DIRTY_BITMASK) {
526 D1(printk(KERN_DEBUG "Dirty bitmask at 0x%08x\n", ofs)); 689 D1(printk(KERN_DEBUG "Dirty bitmask at 0x%08x\n", ofs));
527 DIRTY_SPACE(4); 690 if ((err = jffs2_scan_dirty_space(c, jeb, 4)))
691 return err;
528 ofs += 4; 692 ofs += 4;
529 continue; 693 continue;
530 } 694 }
531 if (je16_to_cpu(node->magic) == JFFS2_OLD_MAGIC_BITMASK) { 695 if (je16_to_cpu(node->magic) == JFFS2_OLD_MAGIC_BITMASK) {
532 printk(KERN_WARNING "Old JFFS2 bitmask found at 0x%08x\n", ofs); 696 printk(KERN_WARNING "Old JFFS2 bitmask found at 0x%08x\n", ofs);
533 printk(KERN_WARNING "You cannot use older JFFS2 filesystems with newer kernels\n"); 697 printk(KERN_WARNING "You cannot use older JFFS2 filesystems with newer kernels\n");
534 DIRTY_SPACE(4); 698 if ((err = jffs2_scan_dirty_space(c, jeb, 4)))
699 return err;
535 ofs += 4; 700 ofs += 4;
536 continue; 701 continue;
537 } 702 }
@@ -540,7 +705,8 @@ scan_more:
540 noisy_printk(&noise, "jffs2_scan_eraseblock(): Magic bitmask 0x%04x not found at 0x%08x: 0x%04x instead\n", 705 noisy_printk(&noise, "jffs2_scan_eraseblock(): Magic bitmask 0x%04x not found at 0x%08x: 0x%04x instead\n",
541 JFFS2_MAGIC_BITMASK, ofs, 706 JFFS2_MAGIC_BITMASK, ofs,
542 je16_to_cpu(node->magic)); 707 je16_to_cpu(node->magic));
543 DIRTY_SPACE(4); 708 if ((err = jffs2_scan_dirty_space(c, jeb, 4)))
709 return err;
544 ofs += 4; 710 ofs += 4;
545 continue; 711 continue;
546 } 712 }
@@ -557,7 +723,8 @@ scan_more:
557 je32_to_cpu(node->totlen), 723 je32_to_cpu(node->totlen),
558 je32_to_cpu(node->hdr_crc), 724 je32_to_cpu(node->hdr_crc),
559 hdr_crc); 725 hdr_crc);
560 DIRTY_SPACE(4); 726 if ((err = jffs2_scan_dirty_space(c, jeb, 4)))
727 return err;
561 ofs += 4; 728 ofs += 4;
562 continue; 729 continue;
563 } 730 }
@@ -568,7 +735,8 @@ scan_more:
568 printk(KERN_WARNING "Node at 0x%08x with length 0x%08x would run over the end of the erase block\n", 735 printk(KERN_WARNING "Node at 0x%08x with length 0x%08x would run over the end of the erase block\n",
569 ofs, je32_to_cpu(node->totlen)); 736 ofs, je32_to_cpu(node->totlen));
570 printk(KERN_WARNING "Perhaps the file system was created with the wrong erase size?\n"); 737 printk(KERN_WARNING "Perhaps the file system was created with the wrong erase size?\n");
571 DIRTY_SPACE(4); 738 if ((err = jffs2_scan_dirty_space(c, jeb, 4)))
739 return err;
572 ofs += 4; 740 ofs += 4;
573 continue; 741 continue;
574 } 742 }
@@ -576,7 +744,8 @@ scan_more:
576 if (!(je16_to_cpu(node->nodetype) & JFFS2_NODE_ACCURATE)) { 744 if (!(je16_to_cpu(node->nodetype) & JFFS2_NODE_ACCURATE)) {
577 /* Wheee. This is an obsoleted node */ 745 /* Wheee. This is an obsoleted node */
578 D2(printk(KERN_DEBUG "Node at 0x%08x is obsolete. Skipping\n", ofs)); 746 D2(printk(KERN_DEBUG "Node at 0x%08x is obsolete. Skipping\n", ofs));
579 DIRTY_SPACE(PAD(je32_to_cpu(node->totlen))); 747 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(node->totlen)))))
748 return err;
580 ofs += PAD(je32_to_cpu(node->totlen)); 749 ofs += PAD(je32_to_cpu(node->totlen));
581 continue; 750 continue;
582 } 751 }
@@ -614,30 +783,59 @@ scan_more:
614 ofs += PAD(je32_to_cpu(node->totlen)); 783 ofs += PAD(je32_to_cpu(node->totlen));
615 break; 784 break;
616 785
786#ifdef CONFIG_JFFS2_FS_XATTR
787 case JFFS2_NODETYPE_XATTR:
788 if (buf_ofs + buf_len < ofs + je32_to_cpu(node->totlen)) {
789 buf_len = min_t(uint32_t, buf_size, jeb->offset + c->sector_size - ofs);
790 D1(printk(KERN_DEBUG "Fewer than %d bytes (xattr node)"
791 " left to end of buf. Reading 0x%x at 0x%08x\n",
792 je32_to_cpu(node->totlen), buf_len, ofs));
793 err = jffs2_fill_scan_buf(c, buf, ofs, buf_len);
794 if (err)
795 return err;
796 buf_ofs = ofs;
797 node = (void *)buf;
798 }
799 err = jffs2_scan_xattr_node(c, jeb, (void *)node, ofs, s);
800 if (err)
801 return err;
802 ofs += PAD(je32_to_cpu(node->totlen));
803 break;
804 case JFFS2_NODETYPE_XREF:
805 if (buf_ofs + buf_len < ofs + je32_to_cpu(node->totlen)) {
806 buf_len = min_t(uint32_t, buf_size, jeb->offset + c->sector_size - ofs);
807 D1(printk(KERN_DEBUG "Fewer than %d bytes (xref node)"
808 " left to end of buf. Reading 0x%x at 0x%08x\n",
809 je32_to_cpu(node->totlen), buf_len, ofs));
810 err = jffs2_fill_scan_buf(c, buf, ofs, buf_len);
811 if (err)
812 return err;
813 buf_ofs = ofs;
814 node = (void *)buf;
815 }
816 err = jffs2_scan_xref_node(c, jeb, (void *)node, ofs, s);
817 if (err)
818 return err;
819 ofs += PAD(je32_to_cpu(node->totlen));
820 break;
821#endif /* CONFIG_JFFS2_FS_XATTR */
822
617 case JFFS2_NODETYPE_CLEANMARKER: 823 case JFFS2_NODETYPE_CLEANMARKER:
618 D1(printk(KERN_DEBUG "CLEANMARKER node found at 0x%08x\n", ofs)); 824 D1(printk(KERN_DEBUG "CLEANMARKER node found at 0x%08x\n", ofs));
619 if (je32_to_cpu(node->totlen) != c->cleanmarker_size) { 825 if (je32_to_cpu(node->totlen) != c->cleanmarker_size) {
620 printk(KERN_NOTICE "CLEANMARKER node found at 0x%08x has totlen 0x%x != normal 0x%x\n", 826 printk(KERN_NOTICE "CLEANMARKER node found at 0x%08x has totlen 0x%x != normal 0x%x\n",
621 ofs, je32_to_cpu(node->totlen), c->cleanmarker_size); 827 ofs, je32_to_cpu(node->totlen), c->cleanmarker_size);
622 DIRTY_SPACE(PAD(sizeof(struct jffs2_unknown_node))); 828 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(sizeof(struct jffs2_unknown_node)))))
829 return err;
623 ofs += PAD(sizeof(struct jffs2_unknown_node)); 830 ofs += PAD(sizeof(struct jffs2_unknown_node));
624 } else if (jeb->first_node) { 831 } else if (jeb->first_node) {
625 printk(KERN_NOTICE "CLEANMARKER node found at 0x%08x, not first node in block (0x%08x)\n", ofs, jeb->offset); 832 printk(KERN_NOTICE "CLEANMARKER node found at 0x%08x, not first node in block (0x%08x)\n", ofs, jeb->offset);
626 DIRTY_SPACE(PAD(sizeof(struct jffs2_unknown_node))); 833 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(sizeof(struct jffs2_unknown_node)))))
834 return err;
627 ofs += PAD(sizeof(struct jffs2_unknown_node)); 835 ofs += PAD(sizeof(struct jffs2_unknown_node));
628 } else { 836 } else {
629 struct jffs2_raw_node_ref *marker_ref = jffs2_alloc_raw_node_ref(); 837 jffs2_link_node_ref(c, jeb, ofs | REF_NORMAL, c->cleanmarker_size, NULL);
630 if (!marker_ref) {
631 printk(KERN_NOTICE "Failed to allocate node ref for clean marker\n");
632 return -ENOMEM;
633 }
634 marker_ref->next_in_ino = NULL;
635 marker_ref->next_phys = NULL;
636 marker_ref->flash_offset = ofs | REF_NORMAL;
637 marker_ref->__totlen = c->cleanmarker_size;
638 jeb->first_node = jeb->last_node = marker_ref;
639 838
640 USED_SPACE(PAD(c->cleanmarker_size));
641 ofs += PAD(c->cleanmarker_size); 839 ofs += PAD(c->cleanmarker_size);
642 } 840 }
643 break; 841 break;
@@ -645,7 +843,8 @@ scan_more:
645 case JFFS2_NODETYPE_PADDING: 843 case JFFS2_NODETYPE_PADDING:
646 if (jffs2_sum_active()) 844 if (jffs2_sum_active())
647 jffs2_sum_add_padding_mem(s, je32_to_cpu(node->totlen)); 845 jffs2_sum_add_padding_mem(s, je32_to_cpu(node->totlen));
648 DIRTY_SPACE(PAD(je32_to_cpu(node->totlen))); 846 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(node->totlen)))))
847 return err;
649 ofs += PAD(je32_to_cpu(node->totlen)); 848 ofs += PAD(je32_to_cpu(node->totlen));
650 break; 849 break;
651 850
@@ -656,7 +855,8 @@ scan_more:
656 c->flags |= JFFS2_SB_FLAG_RO; 855 c->flags |= JFFS2_SB_FLAG_RO;
657 if (!(jffs2_is_readonly(c))) 856 if (!(jffs2_is_readonly(c)))
658 return -EROFS; 857 return -EROFS;
659 DIRTY_SPACE(PAD(je32_to_cpu(node->totlen))); 858 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(node->totlen)))))
859 return err;
660 ofs += PAD(je32_to_cpu(node->totlen)); 860 ofs += PAD(je32_to_cpu(node->totlen));
661 break; 861 break;
662 862
@@ -666,15 +866,21 @@ scan_more:
666 866
667 case JFFS2_FEATURE_RWCOMPAT_DELETE: 867 case JFFS2_FEATURE_RWCOMPAT_DELETE:
668 D1(printk(KERN_NOTICE "Unknown but compatible feature node (0x%04x) found at offset 0x%08x\n", je16_to_cpu(node->nodetype), ofs)); 868 D1(printk(KERN_NOTICE "Unknown but compatible feature node (0x%04x) found at offset 0x%08x\n", je16_to_cpu(node->nodetype), ofs));
669 DIRTY_SPACE(PAD(je32_to_cpu(node->totlen))); 869 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(node->totlen)))))
870 return err;
670 ofs += PAD(je32_to_cpu(node->totlen)); 871 ofs += PAD(je32_to_cpu(node->totlen));
671 break; 872 break;
672 873
673 case JFFS2_FEATURE_RWCOMPAT_COPY: 874 case JFFS2_FEATURE_RWCOMPAT_COPY: {
674 D1(printk(KERN_NOTICE "Unknown but compatible feature node (0x%04x) found at offset 0x%08x\n", je16_to_cpu(node->nodetype), ofs)); 875 D1(printk(KERN_NOTICE "Unknown but compatible feature node (0x%04x) found at offset 0x%08x\n", je16_to_cpu(node->nodetype), ofs));
675 USED_SPACE(PAD(je32_to_cpu(node->totlen))); 876
877 jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, PAD(je32_to_cpu(node->totlen)), NULL);
878
879 /* We can't summarise nodes we don't grok */
880 jffs2_sum_disable_collecting(s);
676 ofs += PAD(je32_to_cpu(node->totlen)); 881 ofs += PAD(je32_to_cpu(node->totlen));
677 break; 882 break;
883 }
678 } 884 }
679 } 885 }
680 } 886 }
@@ -687,9 +893,9 @@ scan_more:
687 } 893 }
688 } 894 }
689 895
690 D1(printk(KERN_DEBUG "Block at 0x%08x: free 0x%08x, dirty 0x%08x, unchecked 0x%08x, used 0x%08x\n", jeb->offset, 896 D1(printk(KERN_DEBUG "Block at 0x%08x: free 0x%08x, dirty 0x%08x, unchecked 0x%08x, used 0x%08x, wasted 0x%08x\n",
691 jeb->free_size, jeb->dirty_size, jeb->unchecked_size, jeb->used_size)); 897 jeb->offset,jeb->free_size, jeb->dirty_size, jeb->unchecked_size, jeb->used_size, jeb->wasted_size));
692 898
693 /* mark_node_obsolete can add to wasted !! */ 899 /* mark_node_obsolete can add to wasted !! */
694 if (jeb->wasted_size) { 900 if (jeb->wasted_size) {
695 jeb->dirty_size += jeb->wasted_size; 901 jeb->dirty_size += jeb->wasted_size;
@@ -730,9 +936,9 @@ struct jffs2_inode_cache *jffs2_scan_make_ino_cache(struct jffs2_sb_info *c, uin
730static int jffs2_scan_inode_node(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 936static int jffs2_scan_inode_node(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
731 struct jffs2_raw_inode *ri, uint32_t ofs, struct jffs2_summary *s) 937 struct jffs2_raw_inode *ri, uint32_t ofs, struct jffs2_summary *s)
732{ 938{
733 struct jffs2_raw_node_ref *raw;
734 struct jffs2_inode_cache *ic; 939 struct jffs2_inode_cache *ic;
735 uint32_t ino = je32_to_cpu(ri->ino); 940 uint32_t ino = je32_to_cpu(ri->ino);
941 int err;
736 942
737 D1(printk(KERN_DEBUG "jffs2_scan_inode_node(): Node at 0x%08x\n", ofs)); 943 D1(printk(KERN_DEBUG "jffs2_scan_inode_node(): Node at 0x%08x\n", ofs));
738 944
@@ -745,12 +951,6 @@ static int jffs2_scan_inode_node(struct jffs2_sb_info *c, struct jffs2_erasebloc
745 Which means that the _full_ amount of time to get to proper write mode with GC 951 Which means that the _full_ amount of time to get to proper write mode with GC
746 operational may actually be _longer_ than before. Sucks to be me. */ 952 operational may actually be _longer_ than before. Sucks to be me. */
747 953
748 raw = jffs2_alloc_raw_node_ref();
749 if (!raw) {
750 printk(KERN_NOTICE "jffs2_scan_inode_node(): allocation of node reference failed\n");
751 return -ENOMEM;
752 }
753
754 ic = jffs2_get_ino_cache(c, ino); 954 ic = jffs2_get_ino_cache(c, ino);
755 if (!ic) { 955 if (!ic) {
756 /* Inocache get failed. Either we read a bogus ino# or it's just genuinely the 956 /* Inocache get failed. Either we read a bogus ino# or it's just genuinely the
@@ -762,30 +962,17 @@ static int jffs2_scan_inode_node(struct jffs2_sb_info *c, struct jffs2_erasebloc
762 printk(KERN_NOTICE "jffs2_scan_inode_node(): CRC failed on node at 0x%08x: Read 0x%08x, calculated 0x%08x\n", 962 printk(KERN_NOTICE "jffs2_scan_inode_node(): CRC failed on node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
763 ofs, je32_to_cpu(ri->node_crc), crc); 963 ofs, je32_to_cpu(ri->node_crc), crc);
764 /* We believe totlen because the CRC on the node _header_ was OK, just the node itself failed. */ 964 /* We believe totlen because the CRC on the node _header_ was OK, just the node itself failed. */
765 DIRTY_SPACE(PAD(je32_to_cpu(ri->totlen))); 965 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(ri->totlen)))))
766 jffs2_free_raw_node_ref(raw); 966 return err;
767 return 0; 967 return 0;
768 } 968 }
769 ic = jffs2_scan_make_ino_cache(c, ino); 969 ic = jffs2_scan_make_ino_cache(c, ino);
770 if (!ic) { 970 if (!ic)
771 jffs2_free_raw_node_ref(raw);
772 return -ENOMEM; 971 return -ENOMEM;
773 }
774 } 972 }
775 973
776 /* Wheee. It worked */ 974 /* Wheee. It worked */
777 975 jffs2_link_node_ref(c, jeb, ofs | REF_UNCHECKED, PAD(je32_to_cpu(ri->totlen)), ic);
778 raw->flash_offset = ofs | REF_UNCHECKED;
779 raw->__totlen = PAD(je32_to_cpu(ri->totlen));
780 raw->next_phys = NULL;
781 raw->next_in_ino = ic->nodes;
782
783 ic->nodes = raw;
784 if (!jeb->first_node)
785 jeb->first_node = raw;
786 if (jeb->last_node)
787 jeb->last_node->next_phys = raw;
788 jeb->last_node = raw;
789 976
790 D1(printk(KERN_DEBUG "Node is ino #%u, version %d. Range 0x%x-0x%x\n", 977 D1(printk(KERN_DEBUG "Node is ino #%u, version %d. Range 0x%x-0x%x\n",
791 je32_to_cpu(ri->ino), je32_to_cpu(ri->version), 978 je32_to_cpu(ri->ino), je32_to_cpu(ri->version),
@@ -794,8 +981,6 @@ static int jffs2_scan_inode_node(struct jffs2_sb_info *c, struct jffs2_erasebloc
794 981
795 pseudo_random += je32_to_cpu(ri->version); 982 pseudo_random += je32_to_cpu(ri->version);
796 983
797 UNCHECKED_SPACE(PAD(je32_to_cpu(ri->totlen)));
798
799 if (jffs2_sum_active()) { 984 if (jffs2_sum_active()) {
800 jffs2_sum_add_inode_mem(s, ri, ofs - jeb->offset); 985 jffs2_sum_add_inode_mem(s, ri, ofs - jeb->offset);
801 } 986 }
@@ -806,10 +991,10 @@ static int jffs2_scan_inode_node(struct jffs2_sb_info *c, struct jffs2_erasebloc
806static int jffs2_scan_dirent_node(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 991static int jffs2_scan_dirent_node(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
807 struct jffs2_raw_dirent *rd, uint32_t ofs, struct jffs2_summary *s) 992 struct jffs2_raw_dirent *rd, uint32_t ofs, struct jffs2_summary *s)
808{ 993{
809 struct jffs2_raw_node_ref *raw;
810 struct jffs2_full_dirent *fd; 994 struct jffs2_full_dirent *fd;
811 struct jffs2_inode_cache *ic; 995 struct jffs2_inode_cache *ic;
812 uint32_t crc; 996 uint32_t crc;
997 int err;
813 998
814 D1(printk(KERN_DEBUG "jffs2_scan_dirent_node(): Node at 0x%08x\n", ofs)); 999 D1(printk(KERN_DEBUG "jffs2_scan_dirent_node(): Node at 0x%08x\n", ofs));
815 1000
@@ -821,7 +1006,8 @@ static int jffs2_scan_dirent_node(struct jffs2_sb_info *c, struct jffs2_eraseblo
821 printk(KERN_NOTICE "jffs2_scan_dirent_node(): Node CRC failed on node at 0x%08x: Read 0x%08x, calculated 0x%08x\n", 1006 printk(KERN_NOTICE "jffs2_scan_dirent_node(): Node CRC failed on node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
822 ofs, je32_to_cpu(rd->node_crc), crc); 1007 ofs, je32_to_cpu(rd->node_crc), crc);
823 /* We believe totlen because the CRC on the node _header_ was OK, just the node itself failed. */ 1008 /* We believe totlen because the CRC on the node _header_ was OK, just the node itself failed. */
824 DIRTY_SPACE(PAD(je32_to_cpu(rd->totlen))); 1009 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(rd->totlen)))))
1010 return err;
825 return 0; 1011 return 0;
826 } 1012 }
827 1013
@@ -842,40 +1028,23 @@ static int jffs2_scan_dirent_node(struct jffs2_sb_info *c, struct jffs2_eraseblo
842 jffs2_free_full_dirent(fd); 1028 jffs2_free_full_dirent(fd);
843 /* FIXME: Why do we believe totlen? */ 1029 /* FIXME: Why do we believe totlen? */
844 /* We believe totlen because the CRC on the node _header_ was OK, just the name failed. */ 1030 /* We believe totlen because the CRC on the node _header_ was OK, just the name failed. */
845 DIRTY_SPACE(PAD(je32_to_cpu(rd->totlen))); 1031 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(rd->totlen)))))
1032 return err;
846 return 0; 1033 return 0;
847 } 1034 }
848 raw = jffs2_alloc_raw_node_ref();
849 if (!raw) {
850 jffs2_free_full_dirent(fd);
851 printk(KERN_NOTICE "jffs2_scan_dirent_node(): allocation of node reference failed\n");
852 return -ENOMEM;
853 }
854 ic = jffs2_scan_make_ino_cache(c, je32_to_cpu(rd->pino)); 1035 ic = jffs2_scan_make_ino_cache(c, je32_to_cpu(rd->pino));
855 if (!ic) { 1036 if (!ic) {
856 jffs2_free_full_dirent(fd); 1037 jffs2_free_full_dirent(fd);
857 jffs2_free_raw_node_ref(raw);
858 return -ENOMEM; 1038 return -ENOMEM;
859 } 1039 }
860 1040
861 raw->__totlen = PAD(je32_to_cpu(rd->totlen)); 1041 fd->raw = jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, PAD(je32_to_cpu(rd->totlen)), ic);
862 raw->flash_offset = ofs | REF_PRISTINE;
863 raw->next_phys = NULL;
864 raw->next_in_ino = ic->nodes;
865 ic->nodes = raw;
866 if (!jeb->first_node)
867 jeb->first_node = raw;
868 if (jeb->last_node)
869 jeb->last_node->next_phys = raw;
870 jeb->last_node = raw;
871 1042
872 fd->raw = raw;
873 fd->next = NULL; 1043 fd->next = NULL;
874 fd->version = je32_to_cpu(rd->version); 1044 fd->version = je32_to_cpu(rd->version);
875 fd->ino = je32_to_cpu(rd->ino); 1045 fd->ino = je32_to_cpu(rd->ino);
876 fd->nhash = full_name_hash(fd->name, rd->nsize); 1046 fd->nhash = full_name_hash(fd->name, rd->nsize);
877 fd->type = rd->type; 1047 fd->type = rd->type;
878 USED_SPACE(PAD(je32_to_cpu(rd->totlen)));
879 jffs2_add_fd_to_list(c, fd, &ic->scan_dents); 1048 jffs2_add_fd_to_list(c, fd, &ic->scan_dents);
880 1049
881 if (jffs2_sum_active()) { 1050 if (jffs2_sum_active()) {
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
new file mode 100644
index 000000000000..52a9894a6364
--- /dev/null
+++ b/fs/jffs2/security.c
@@ -0,0 +1,82 @@
1/*
2 * JFFS2 -- Journalling Flash File System, Version 2.
3 *
4 * Copyright (C) 2006 NEC Corporation
5 *
6 * Created by KaiGai Kohei <kaigai@ak.jp.nec.com>
7 *
8 * For licensing information, see the file 'LICENCE' in this directory.
9 *
10 */
11#include <linux/kernel.h>
12#include <linux/slab.h>
13#include <linux/fs.h>
14#include <linux/time.h>
15#include <linux/pagemap.h>
16#include <linux/highmem.h>
17#include <linux/crc32.h>
18#include <linux/jffs2.h>
19#include <linux/xattr.h>
20#include <linux/mtd/mtd.h>
21#include <linux/security.h>
22#include "nodelist.h"
23
24/* ---- Initial Security Label Attachment -------------- */
25int jffs2_init_security(struct inode *inode, struct inode *dir)
26{
27 int rc;
28 size_t len;
29 void *value;
30 char *name;
31
32 rc = security_inode_init_security(inode, dir, &name, &value, &len);
33 if (rc) {
34 if (rc == -EOPNOTSUPP)
35 return 0;
36 return rc;
37 }
38 rc = do_jffs2_setxattr(inode, JFFS2_XPREFIX_SECURITY, name, value, len, 0);
39
40 kfree(name);
41 kfree(value);
42 return rc;
43}
44
45/* ---- XATTR Handler for "security.*" ----------------- */
46static int jffs2_security_getxattr(struct inode *inode, const char *name,
47 void *buffer, size_t size)
48{
49 if (!strcmp(name, ""))
50 return -EINVAL;
51
52 return do_jffs2_getxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size);
53}
54
55static int jffs2_security_setxattr(struct inode *inode, const char *name, const void *buffer,
56 size_t size, int flags)
57{
58 if (!strcmp(name, ""))
59 return -EINVAL;
60
61 return do_jffs2_setxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size, flags);
62}
63
64static size_t jffs2_security_listxattr(struct inode *inode, char *list, size_t list_size,
65 const char *name, size_t name_len)
66{
67 size_t retlen = XATTR_SECURITY_PREFIX_LEN + name_len + 1;
68
69 if (list && retlen <= list_size) {
70 strcpy(list, XATTR_SECURITY_PREFIX);
71 strcpy(list + XATTR_SECURITY_PREFIX_LEN, name);
72 }
73
74 return retlen;
75}
76
77struct xattr_handler jffs2_security_xattr_handler = {
78 .prefix = XATTR_SECURITY_PREFIX,
79 .list = jffs2_security_listxattr,
80 .set = jffs2_security_setxattr,
81 .get = jffs2_security_getxattr
82};
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index fb9cec61fcf2..c19bd476e8ec 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -5,6 +5,7 @@
5 * Zoltan Sogor <weth@inf.u-szeged.hu>, 5 * Zoltan Sogor <weth@inf.u-szeged.hu>,
6 * Patrik Kluba <pajko@halom.u-szeged.hu>, 6 * Patrik Kluba <pajko@halom.u-szeged.hu>,
7 * University of Szeged, Hungary 7 * University of Szeged, Hungary
8 * 2006 KaiGai Kohei <kaigai@ak.jp.nec.com>
8 * 9 *
9 * For licensing information, see the file 'LICENCE' in this directory. 10 * For licensing information, see the file 'LICENCE' in this directory.
10 * 11 *
@@ -42,7 +43,7 @@ int jffs2_sum_init(struct jffs2_sb_info *c)
42 return -ENOMEM; 43 return -ENOMEM;
43 } 44 }
44 45
45 dbg_summary("returned succesfully\n"); 46 dbg_summary("returned successfully\n");
46 47
47 return 0; 48 return 0;
48} 49}
@@ -81,6 +82,19 @@ static int jffs2_sum_add_mem(struct jffs2_summary *s, union jffs2_sum_mem *item)
81 dbg_summary("dirent (%u) added to summary\n", 82 dbg_summary("dirent (%u) added to summary\n",
82 je32_to_cpu(item->d.ino)); 83 je32_to_cpu(item->d.ino));
83 break; 84 break;
85#ifdef CONFIG_JFFS2_FS_XATTR
86 case JFFS2_NODETYPE_XATTR:
87 s->sum_size += JFFS2_SUMMARY_XATTR_SIZE;
88 s->sum_num++;
89 dbg_summary("xattr (xid=%u, version=%u) added to summary\n",
90 je32_to_cpu(item->x.xid), je32_to_cpu(item->x.version));
91 break;
92 case JFFS2_NODETYPE_XREF:
93 s->sum_size += JFFS2_SUMMARY_XREF_SIZE;
94 s->sum_num++;
95 dbg_summary("xref added to summary\n");
96 break;
97#endif
84 default: 98 default:
85 JFFS2_WARNING("UNKNOWN node type %u\n", 99 JFFS2_WARNING("UNKNOWN node type %u\n",
86 je16_to_cpu(item->u.nodetype)); 100 je16_to_cpu(item->u.nodetype));
@@ -141,6 +155,40 @@ int jffs2_sum_add_dirent_mem(struct jffs2_summary *s, struct jffs2_raw_dirent *r
141 return jffs2_sum_add_mem(s, (union jffs2_sum_mem *)temp); 155 return jffs2_sum_add_mem(s, (union jffs2_sum_mem *)temp);
142} 156}
143 157
158#ifdef CONFIG_JFFS2_FS_XATTR
159int jffs2_sum_add_xattr_mem(struct jffs2_summary *s, struct jffs2_raw_xattr *rx, uint32_t ofs)
160{
161 struct jffs2_sum_xattr_mem *temp;
162
163 temp = kmalloc(sizeof(struct jffs2_sum_xattr_mem), GFP_KERNEL);
164 if (!temp)
165 return -ENOMEM;
166
167 temp->nodetype = rx->nodetype;
168 temp->xid = rx->xid;
169 temp->version = rx->version;
170 temp->offset = cpu_to_je32(ofs);
171 temp->totlen = rx->totlen;
172 temp->next = NULL;
173
174 return jffs2_sum_add_mem(s, (union jffs2_sum_mem *)temp);
175}
176
177int jffs2_sum_add_xref_mem(struct jffs2_summary *s, struct jffs2_raw_xref *rr, uint32_t ofs)
178{
179 struct jffs2_sum_xref_mem *temp;
180
181 temp = kmalloc(sizeof(struct jffs2_sum_xref_mem), GFP_KERNEL);
182 if (!temp)
183 return -ENOMEM;
184
185 temp->nodetype = rr->nodetype;
186 temp->offset = cpu_to_je32(ofs);
187 temp->next = NULL;
188
189 return jffs2_sum_add_mem(s, (union jffs2_sum_mem *)temp);
190}
191#endif
144/* Cleanup every collected summary information */ 192/* Cleanup every collected summary information */
145 193
146static void jffs2_sum_clean_collected(struct jffs2_summary *s) 194static void jffs2_sum_clean_collected(struct jffs2_summary *s)
@@ -259,7 +307,34 @@ int jffs2_sum_add_kvec(struct jffs2_sb_info *c, const struct kvec *invecs,
259 307
260 return jffs2_sum_add_mem(c->summary, (union jffs2_sum_mem *)temp); 308 return jffs2_sum_add_mem(c->summary, (union jffs2_sum_mem *)temp);
261 } 309 }
310#ifdef CONFIG_JFFS2_FS_XATTR
311 case JFFS2_NODETYPE_XATTR: {
312 struct jffs2_sum_xattr_mem *temp;
313 temp = kmalloc(sizeof(struct jffs2_sum_xattr_mem), GFP_KERNEL);
314 if (!temp)
315 goto no_mem;
316
317 temp->nodetype = node->x.nodetype;
318 temp->xid = node->x.xid;
319 temp->version = node->x.version;
320 temp->totlen = node->x.totlen;
321 temp->offset = cpu_to_je32(ofs);
322 temp->next = NULL;
262 323
324 return jffs2_sum_add_mem(c->summary, (union jffs2_sum_mem *)temp);
325 }
326 case JFFS2_NODETYPE_XREF: {
327 struct jffs2_sum_xref_mem *temp;
328 temp = kmalloc(sizeof(struct jffs2_sum_xref_mem), GFP_KERNEL);
329 if (!temp)
330 goto no_mem;
331 temp->nodetype = node->r.nodetype;
332 temp->offset = cpu_to_je32(ofs);
333 temp->next = NULL;
334
335 return jffs2_sum_add_mem(c->summary, (union jffs2_sum_mem *)temp);
336 }
337#endif
263 case JFFS2_NODETYPE_PADDING: 338 case JFFS2_NODETYPE_PADDING:
264 dbg_summary("node PADDING\n"); 339 dbg_summary("node PADDING\n");
265 c->summary->sum_padded += je32_to_cpu(node->u.totlen); 340 c->summary->sum_padded += je32_to_cpu(node->u.totlen);
@@ -288,23 +363,41 @@ no_mem:
288 return -ENOMEM; 363 return -ENOMEM;
289} 364}
290 365
366static struct jffs2_raw_node_ref *sum_link_node_ref(struct jffs2_sb_info *c,
367 struct jffs2_eraseblock *jeb,
368 uint32_t ofs, uint32_t len,
369 struct jffs2_inode_cache *ic)
370{
371 /* If there was a gap, mark it dirty */
372 if ((ofs & ~3) > c->sector_size - jeb->free_size) {
373 /* Ew. Summary doesn't actually tell us explicitly about dirty space */
374 jffs2_scan_dirty_space(c, jeb, (ofs & ~3) - (c->sector_size - jeb->free_size));
375 }
376
377 return jffs2_link_node_ref(c, jeb, jeb->offset + ofs, len, ic);
378}
291 379
292/* Process the stored summary information - helper function for jffs2_sum_scan_sumnode() */ 380/* Process the stored summary information - helper function for jffs2_sum_scan_sumnode() */
293 381
294static int jffs2_sum_process_sum_data(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 382static int jffs2_sum_process_sum_data(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
295 struct jffs2_raw_summary *summary, uint32_t *pseudo_random) 383 struct jffs2_raw_summary *summary, uint32_t *pseudo_random)
296{ 384{
297 struct jffs2_raw_node_ref *raw;
298 struct jffs2_inode_cache *ic; 385 struct jffs2_inode_cache *ic;
299 struct jffs2_full_dirent *fd; 386 struct jffs2_full_dirent *fd;
300 void *sp; 387 void *sp;
301 int i, ino; 388 int i, ino;
389 int err;
302 390
303 sp = summary->sum; 391 sp = summary->sum;
304 392
305 for (i=0; i<je32_to_cpu(summary->sum_num); i++) { 393 for (i=0; i<je32_to_cpu(summary->sum_num); i++) {
306 dbg_summary("processing summary index %d\n", i); 394 dbg_summary("processing summary index %d\n", i);
307 395
396 /* Make sure there's a spare ref for dirty space */
397 err = jffs2_prealloc_raw_node_refs(c, jeb, 2);
398 if (err)
399 return err;
400
308 switch (je16_to_cpu(((struct jffs2_sum_unknown_flash *)sp)->nodetype)) { 401 switch (je16_to_cpu(((struct jffs2_sum_unknown_flash *)sp)->nodetype)) {
309 case JFFS2_NODETYPE_INODE: { 402 case JFFS2_NODETYPE_INODE: {
310 struct jffs2_sum_inode_flash *spi; 403 struct jffs2_sum_inode_flash *spi;
@@ -312,38 +405,20 @@ static int jffs2_sum_process_sum_data(struct jffs2_sb_info *c, struct jffs2_eras
312 405
313 ino = je32_to_cpu(spi->inode); 406 ino = je32_to_cpu(spi->inode);
314 407
315 dbg_summary("Inode at 0x%08x\n", 408 dbg_summary("Inode at 0x%08x-0x%08x\n",
316 jeb->offset + je32_to_cpu(spi->offset)); 409 jeb->offset + je32_to_cpu(spi->offset),
317 410 jeb->offset + je32_to_cpu(spi->offset) + je32_to_cpu(spi->totlen));
318 raw = jffs2_alloc_raw_node_ref();
319 if (!raw) {
320 JFFS2_NOTICE("allocation of node reference failed\n");
321 kfree(summary);
322 return -ENOMEM;
323 }
324 411
325 ic = jffs2_scan_make_ino_cache(c, ino); 412 ic = jffs2_scan_make_ino_cache(c, ino);
326 if (!ic) { 413 if (!ic) {
327 JFFS2_NOTICE("scan_make_ino_cache failed\n"); 414 JFFS2_NOTICE("scan_make_ino_cache failed\n");
328 jffs2_free_raw_node_ref(raw);
329 kfree(summary);
330 return -ENOMEM; 415 return -ENOMEM;
331 } 416 }
332 417
333 raw->flash_offset = (jeb->offset + je32_to_cpu(spi->offset)) | REF_UNCHECKED; 418 sum_link_node_ref(c, jeb, je32_to_cpu(spi->offset) | REF_UNCHECKED,
334 raw->__totlen = PAD(je32_to_cpu(spi->totlen)); 419 PAD(je32_to_cpu(spi->totlen)), ic);
335 raw->next_phys = NULL;
336 raw->next_in_ino = ic->nodes;
337
338 ic->nodes = raw;
339 if (!jeb->first_node)
340 jeb->first_node = raw;
341 if (jeb->last_node)
342 jeb->last_node->next_phys = raw;
343 jeb->last_node = raw;
344 *pseudo_random += je32_to_cpu(spi->version);
345 420
346 UNCHECKED_SPACE(PAD(je32_to_cpu(spi->totlen))); 421 *pseudo_random += je32_to_cpu(spi->version);
347 422
348 sp += JFFS2_SUMMARY_INODE_SIZE; 423 sp += JFFS2_SUMMARY_INODE_SIZE;
349 424
@@ -354,52 +429,33 @@ static int jffs2_sum_process_sum_data(struct jffs2_sb_info *c, struct jffs2_eras
354 struct jffs2_sum_dirent_flash *spd; 429 struct jffs2_sum_dirent_flash *spd;
355 spd = sp; 430 spd = sp;
356 431
357 dbg_summary("Dirent at 0x%08x\n", 432 dbg_summary("Dirent at 0x%08x-0x%08x\n",
358 jeb->offset + je32_to_cpu(spd->offset)); 433 jeb->offset + je32_to_cpu(spd->offset),
434 jeb->offset + je32_to_cpu(spd->offset) + je32_to_cpu(spd->totlen));
435
359 436
360 fd = jffs2_alloc_full_dirent(spd->nsize+1); 437 fd = jffs2_alloc_full_dirent(spd->nsize+1);
361 if (!fd) { 438 if (!fd)
362 kfree(summary);
363 return -ENOMEM; 439 return -ENOMEM;
364 }
365 440
366 memcpy(&fd->name, spd->name, spd->nsize); 441 memcpy(&fd->name, spd->name, spd->nsize);
367 fd->name[spd->nsize] = 0; 442 fd->name[spd->nsize] = 0;
368 443
369 raw = jffs2_alloc_raw_node_ref();
370 if (!raw) {
371 jffs2_free_full_dirent(fd);
372 JFFS2_NOTICE("allocation of node reference failed\n");
373 kfree(summary);
374 return -ENOMEM;
375 }
376
377 ic = jffs2_scan_make_ino_cache(c, je32_to_cpu(spd->pino)); 444 ic = jffs2_scan_make_ino_cache(c, je32_to_cpu(spd->pino));
378 if (!ic) { 445 if (!ic) {
379 jffs2_free_full_dirent(fd); 446 jffs2_free_full_dirent(fd);
380 jffs2_free_raw_node_ref(raw);
381 kfree(summary);
382 return -ENOMEM; 447 return -ENOMEM;
383 } 448 }
384 449
385 raw->__totlen = PAD(je32_to_cpu(spd->totlen)); 450 fd->raw = sum_link_node_ref(c, jeb, je32_to_cpu(spd->offset) | REF_UNCHECKED,
386 raw->flash_offset = (jeb->offset + je32_to_cpu(spd->offset)) | REF_PRISTINE; 451 PAD(je32_to_cpu(spd->totlen)), ic);
387 raw->next_phys = NULL; 452
388 raw->next_in_ino = ic->nodes;
389 ic->nodes = raw;
390 if (!jeb->first_node)
391 jeb->first_node = raw;
392 if (jeb->last_node)
393 jeb->last_node->next_phys = raw;
394 jeb->last_node = raw;
395
396 fd->raw = raw;
397 fd->next = NULL; 453 fd->next = NULL;
398 fd->version = je32_to_cpu(spd->version); 454 fd->version = je32_to_cpu(spd->version);
399 fd->ino = je32_to_cpu(spd->ino); 455 fd->ino = je32_to_cpu(spd->ino);
400 fd->nhash = full_name_hash(fd->name, spd->nsize); 456 fd->nhash = full_name_hash(fd->name, spd->nsize);
401 fd->type = spd->type; 457 fd->type = spd->type;
402 USED_SPACE(PAD(je32_to_cpu(spd->totlen))); 458
403 jffs2_add_fd_to_list(c, fd, &ic->scan_dents); 459 jffs2_add_fd_to_list(c, fd, &ic->scan_dents);
404 460
405 *pseudo_random += je32_to_cpu(spd->version); 461 *pseudo_random += je32_to_cpu(spd->version);
@@ -408,48 +464,100 @@ static int jffs2_sum_process_sum_data(struct jffs2_sb_info *c, struct jffs2_eras
408 464
409 break; 465 break;
410 } 466 }
467#ifdef CONFIG_JFFS2_FS_XATTR
468 case JFFS2_NODETYPE_XATTR: {
469 struct jffs2_xattr_datum *xd;
470 struct jffs2_sum_xattr_flash *spx;
471
472 spx = (struct jffs2_sum_xattr_flash *)sp;
473 dbg_summary("xattr at %#08x-%#08x (xid=%u, version=%u)\n",
474 jeb->offset + je32_to_cpu(spx->offset),
475 jeb->offset + je32_to_cpu(spx->offset) + je32_to_cpu(spx->totlen),
476 je32_to_cpu(spx->xid), je32_to_cpu(spx->version));
477
478 xd = jffs2_setup_xattr_datum(c, je32_to_cpu(spx->xid),
479 je32_to_cpu(spx->version));
480 if (IS_ERR(xd))
481 return PTR_ERR(xd);
482 if (xd->version > je32_to_cpu(spx->version)) {
483 /* node is not the newest one */
484 struct jffs2_raw_node_ref *raw
485 = sum_link_node_ref(c, jeb, je32_to_cpu(spx->offset) | REF_UNCHECKED,
486 PAD(je32_to_cpu(spx->totlen)), NULL);
487 raw->next_in_ino = xd->node->next_in_ino;
488 xd->node->next_in_ino = raw;
489 } else {
490 xd->version = je32_to_cpu(spx->version);
491 sum_link_node_ref(c, jeb, je32_to_cpu(spx->offset) | REF_UNCHECKED,
492 PAD(je32_to_cpu(spx->totlen)), (void *)xd);
493 }
494 *pseudo_random += je32_to_cpu(spx->xid);
495 sp += JFFS2_SUMMARY_XATTR_SIZE;
496
497 break;
498 }
499 case JFFS2_NODETYPE_XREF: {
500 struct jffs2_xattr_ref *ref;
501 struct jffs2_sum_xref_flash *spr;
502
503 spr = (struct jffs2_sum_xref_flash *)sp;
504 dbg_summary("xref at %#08x-%#08x\n",
505 jeb->offset + je32_to_cpu(spr->offset),
506 jeb->offset + je32_to_cpu(spr->offset) +
507 (uint32_t)PAD(sizeof(struct jffs2_raw_xref)));
508
509 ref = jffs2_alloc_xattr_ref();
510 if (!ref) {
511 JFFS2_NOTICE("allocation of xattr_datum failed\n");
512 return -ENOMEM;
513 }
514 ref->next = c->xref_temp;
515 c->xref_temp = ref;
516
517 sum_link_node_ref(c, jeb, je32_to_cpu(spr->offset) | REF_UNCHECKED,
518 PAD(sizeof(struct jffs2_raw_xref)), (void *)ref);
519
520 *pseudo_random += ref->node->flash_offset;
521 sp += JFFS2_SUMMARY_XREF_SIZE;
411 522
523 break;
524 }
525#endif
412 default : { 526 default : {
413 JFFS2_WARNING("Unsupported node type found in summary! Exiting..."); 527 uint16_t nodetype = je16_to_cpu(((struct jffs2_sum_unknown_flash *)sp)->nodetype);
414 kfree(summary); 528 JFFS2_WARNING("Unsupported node type %x found in summary! Exiting...\n", nodetype);
415 return -EIO; 529 if ((nodetype & JFFS2_COMPAT_MASK) == JFFS2_FEATURE_INCOMPAT)
530 return -EIO;
531
532 /* For compatible node types, just fall back to the full scan */
533 c->wasted_size -= jeb->wasted_size;
534 c->free_size += c->sector_size - jeb->free_size;
535 c->used_size -= jeb->used_size;
536 c->dirty_size -= jeb->dirty_size;
537 jeb->wasted_size = jeb->used_size = jeb->dirty_size = 0;
538 jeb->free_size = c->sector_size;
539
540 jffs2_free_jeb_node_refs(c, jeb);
541 return -ENOTRECOVERABLE;
416 } 542 }
417 } 543 }
418 } 544 }
419
420 kfree(summary);
421 return 0; 545 return 0;
422} 546}
423 547
424/* Process the summary node - called from jffs2_scan_eraseblock() */ 548/* Process the summary node - called from jffs2_scan_eraseblock() */
425
426int jffs2_sum_scan_sumnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 549int jffs2_sum_scan_sumnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
427 uint32_t ofs, uint32_t *pseudo_random) 550 struct jffs2_raw_summary *summary, uint32_t sumsize,
551 uint32_t *pseudo_random)
428{ 552{
429 struct jffs2_unknown_node crcnode; 553 struct jffs2_unknown_node crcnode;
430 struct jffs2_raw_node_ref *cache_ref; 554 int ret, ofs;
431 struct jffs2_raw_summary *summary;
432 int ret, sumsize;
433 uint32_t crc; 555 uint32_t crc;
434 556
435 sumsize = c->sector_size - ofs; 557 ofs = c->sector_size - sumsize;
436 ofs += jeb->offset;
437 558
438 dbg_summary("summary found for 0x%08x at 0x%08x (0x%x bytes)\n", 559 dbg_summary("summary found for 0x%08x at 0x%08x (0x%x bytes)\n",
439 jeb->offset, ofs, sumsize); 560 jeb->offset, jeb->offset + ofs, sumsize);
440
441 summary = kmalloc(sumsize, GFP_KERNEL);
442
443 if (!summary) {
444 return -ENOMEM;
445 }
446
447 ret = jffs2_fill_scan_buf(c, (unsigned char *)summary, ofs, sumsize);
448
449 if (ret) {
450 kfree(summary);
451 return ret;
452 }
453 561
454 /* OK, now check for node validity and CRC */ 562 /* OK, now check for node validity and CRC */
455 crcnode.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 563 crcnode.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@@ -486,66 +594,49 @@ int jffs2_sum_scan_sumnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb
486 594
487 dbg_summary("Summary : CLEANMARKER node \n"); 595 dbg_summary("Summary : CLEANMARKER node \n");
488 596
597 ret = jffs2_prealloc_raw_node_refs(c, jeb, 1);
598 if (ret)
599 return ret;
600
489 if (je32_to_cpu(summary->cln_mkr) != c->cleanmarker_size) { 601 if (je32_to_cpu(summary->cln_mkr) != c->cleanmarker_size) {
490 dbg_summary("CLEANMARKER node has totlen 0x%x != normal 0x%x\n", 602 dbg_summary("CLEANMARKER node has totlen 0x%x != normal 0x%x\n",
491 je32_to_cpu(summary->cln_mkr), c->cleanmarker_size); 603 je32_to_cpu(summary->cln_mkr), c->cleanmarker_size);
492 UNCHECKED_SPACE(PAD(je32_to_cpu(summary->cln_mkr))); 604 if ((ret = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(summary->cln_mkr)))))
605 return ret;
493 } else if (jeb->first_node) { 606 } else if (jeb->first_node) {
494 dbg_summary("CLEANMARKER node not first node in block " 607 dbg_summary("CLEANMARKER node not first node in block "
495 "(0x%08x)\n", jeb->offset); 608 "(0x%08x)\n", jeb->offset);
496 UNCHECKED_SPACE(PAD(je32_to_cpu(summary->cln_mkr))); 609 if ((ret = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(summary->cln_mkr)))))
610 return ret;
497 } else { 611 } else {
498 struct jffs2_raw_node_ref *marker_ref = jffs2_alloc_raw_node_ref(); 612 jffs2_link_node_ref(c, jeb, jeb->offset | REF_NORMAL,
499 613 je32_to_cpu(summary->cln_mkr), NULL);
500 if (!marker_ref) {
501 JFFS2_NOTICE("Failed to allocate node ref for clean marker\n");
502 kfree(summary);
503 return -ENOMEM;
504 }
505
506 marker_ref->next_in_ino = NULL;
507 marker_ref->next_phys = NULL;
508 marker_ref->flash_offset = jeb->offset | REF_NORMAL;
509 marker_ref->__totlen = je32_to_cpu(summary->cln_mkr);
510 jeb->first_node = jeb->last_node = marker_ref;
511
512 USED_SPACE( PAD(je32_to_cpu(summary->cln_mkr)) );
513 } 614 }
514 } 615 }
515 616
516 if (je32_to_cpu(summary->padded)) {
517 DIRTY_SPACE(je32_to_cpu(summary->padded));
518 }
519
520 ret = jffs2_sum_process_sum_data(c, jeb, summary, pseudo_random); 617 ret = jffs2_sum_process_sum_data(c, jeb, summary, pseudo_random);
618 /* -ENOTRECOVERABLE isn't a fatal error -- it means we should do a full
619 scan of this eraseblock. So return zero */
620 if (ret == -ENOTRECOVERABLE)
621 return 0;
521 if (ret) 622 if (ret)
522 return ret; 623 return ret; /* real error */
523 624
524 /* for PARANOIA_CHECK */ 625 /* for PARANOIA_CHECK */
525 cache_ref = jffs2_alloc_raw_node_ref(); 626 ret = jffs2_prealloc_raw_node_refs(c, jeb, 2);
526 627 if (ret)
527 if (!cache_ref) { 628 return ret;
528 JFFS2_NOTICE("Failed to allocate node ref for cache\n");
529 return -ENOMEM;
530 }
531
532 cache_ref->next_in_ino = NULL;
533 cache_ref->next_phys = NULL;
534 cache_ref->flash_offset = ofs | REF_NORMAL;
535 cache_ref->__totlen = sumsize;
536
537 if (!jeb->first_node)
538 jeb->first_node = cache_ref;
539 if (jeb->last_node)
540 jeb->last_node->next_phys = cache_ref;
541 jeb->last_node = cache_ref;
542 629
543 USED_SPACE(sumsize); 630 sum_link_node_ref(c, jeb, ofs | REF_NORMAL, sumsize, NULL);
544 631
545 jeb->wasted_size += jeb->free_size; 632 if (unlikely(jeb->free_size)) {
546 c->wasted_size += jeb->free_size; 633 JFFS2_WARNING("Free size 0x%x bytes in eraseblock @0x%08x with summary?\n",
547 c->free_size -= jeb->free_size; 634 jeb->free_size, jeb->offset);
548 jeb->free_size = 0; 635 jeb->wasted_size += jeb->free_size;
636 c->wasted_size += jeb->free_size;
637 c->free_size -= jeb->free_size;
638 jeb->free_size = 0;
639 }
549 640
550 return jffs2_scan_classify_jeb(c, jeb); 641 return jffs2_scan_classify_jeb(c, jeb);
551 642
@@ -564,6 +655,7 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
564 union jffs2_sum_mem *temp; 655 union jffs2_sum_mem *temp;
565 struct jffs2_sum_marker *sm; 656 struct jffs2_sum_marker *sm;
566 struct kvec vecs[2]; 657 struct kvec vecs[2];
658 uint32_t sum_ofs;
567 void *wpage; 659 void *wpage;
568 int ret; 660 int ret;
569 size_t retlen; 661 size_t retlen;
@@ -581,16 +673,17 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
581 wpage = c->summary->sum_buf; 673 wpage = c->summary->sum_buf;
582 674
583 while (c->summary->sum_num) { 675 while (c->summary->sum_num) {
676 temp = c->summary->sum_list_head;
584 677
585 switch (je16_to_cpu(c->summary->sum_list_head->u.nodetype)) { 678 switch (je16_to_cpu(temp->u.nodetype)) {
586 case JFFS2_NODETYPE_INODE: { 679 case JFFS2_NODETYPE_INODE: {
587 struct jffs2_sum_inode_flash *sino_ptr = wpage; 680 struct jffs2_sum_inode_flash *sino_ptr = wpage;
588 681
589 sino_ptr->nodetype = c->summary->sum_list_head->i.nodetype; 682 sino_ptr->nodetype = temp->i.nodetype;
590 sino_ptr->inode = c->summary->sum_list_head->i.inode; 683 sino_ptr->inode = temp->i.inode;
591 sino_ptr->version = c->summary->sum_list_head->i.version; 684 sino_ptr->version = temp->i.version;
592 sino_ptr->offset = c->summary->sum_list_head->i.offset; 685 sino_ptr->offset = temp->i.offset;
593 sino_ptr->totlen = c->summary->sum_list_head->i.totlen; 686 sino_ptr->totlen = temp->i.totlen;
594 687
595 wpage += JFFS2_SUMMARY_INODE_SIZE; 688 wpage += JFFS2_SUMMARY_INODE_SIZE;
596 689
@@ -600,30 +693,60 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
600 case JFFS2_NODETYPE_DIRENT: { 693 case JFFS2_NODETYPE_DIRENT: {
601 struct jffs2_sum_dirent_flash *sdrnt_ptr = wpage; 694 struct jffs2_sum_dirent_flash *sdrnt_ptr = wpage;
602 695
603 sdrnt_ptr->nodetype = c->summary->sum_list_head->d.nodetype; 696 sdrnt_ptr->nodetype = temp->d.nodetype;
604 sdrnt_ptr->totlen = c->summary->sum_list_head->d.totlen; 697 sdrnt_ptr->totlen = temp->d.totlen;
605 sdrnt_ptr->offset = c->summary->sum_list_head->d.offset; 698 sdrnt_ptr->offset = temp->d.offset;
606 sdrnt_ptr->pino = c->summary->sum_list_head->d.pino; 699 sdrnt_ptr->pino = temp->d.pino;
607 sdrnt_ptr->version = c->summary->sum_list_head->d.version; 700 sdrnt_ptr->version = temp->d.version;
608 sdrnt_ptr->ino = c->summary->sum_list_head->d.ino; 701 sdrnt_ptr->ino = temp->d.ino;
609 sdrnt_ptr->nsize = c->summary->sum_list_head->d.nsize; 702 sdrnt_ptr->nsize = temp->d.nsize;
610 sdrnt_ptr->type = c->summary->sum_list_head->d.type; 703 sdrnt_ptr->type = temp->d.type;
611 704
612 memcpy(sdrnt_ptr->name, c->summary->sum_list_head->d.name, 705 memcpy(sdrnt_ptr->name, temp->d.name,
613 c->summary->sum_list_head->d.nsize); 706 temp->d.nsize);
614 707
615 wpage += JFFS2_SUMMARY_DIRENT_SIZE(c->summary->sum_list_head->d.nsize); 708 wpage += JFFS2_SUMMARY_DIRENT_SIZE(temp->d.nsize);
616 709
617 break; 710 break;
618 } 711 }
712#ifdef CONFIG_JFFS2_FS_XATTR
713 case JFFS2_NODETYPE_XATTR: {
714 struct jffs2_sum_xattr_flash *sxattr_ptr = wpage;
715
716 temp = c->summary->sum_list_head;
717 sxattr_ptr->nodetype = temp->x.nodetype;
718 sxattr_ptr->xid = temp->x.xid;
719 sxattr_ptr->version = temp->x.version;
720 sxattr_ptr->offset = temp->x.offset;
721 sxattr_ptr->totlen = temp->x.totlen;
722
723 wpage += JFFS2_SUMMARY_XATTR_SIZE;
724 break;
725 }
726 case JFFS2_NODETYPE_XREF: {
727 struct jffs2_sum_xref_flash *sxref_ptr = wpage;
728
729 temp = c->summary->sum_list_head;
730 sxref_ptr->nodetype = temp->r.nodetype;
731 sxref_ptr->offset = temp->r.offset;
619 732
733 wpage += JFFS2_SUMMARY_XREF_SIZE;
734 break;
735 }
736#endif
620 default : { 737 default : {
621 BUG(); /* unknown node in summary information */ 738 if ((je16_to_cpu(temp->u.nodetype) & JFFS2_COMPAT_MASK)
739 == JFFS2_FEATURE_RWCOMPAT_COPY) {
740 dbg_summary("Writing unknown RWCOMPAT_COPY node type %x\n",
741 je16_to_cpu(temp->u.nodetype));
742 jffs2_sum_disable_collecting(c->summary);
743 } else {
744 BUG(); /* unknown node in summary information */
745 }
622 } 746 }
623 } 747 }
624 748
625 temp = c->summary->sum_list_head; 749 c->summary->sum_list_head = temp->u.next;
626 c->summary->sum_list_head = c->summary->sum_list_head->u.next;
627 kfree(temp); 750 kfree(temp);
628 751
629 c->summary->sum_num--; 752 c->summary->sum_num--;
@@ -645,25 +768,34 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
645 vecs[1].iov_base = c->summary->sum_buf; 768 vecs[1].iov_base = c->summary->sum_buf;
646 vecs[1].iov_len = datasize; 769 vecs[1].iov_len = datasize;
647 770
648 dbg_summary("JFFS2: writing out data to flash to pos : 0x%08x\n", 771 sum_ofs = jeb->offset + c->sector_size - jeb->free_size;
649 jeb->offset + c->sector_size - jeb->free_size);
650 772
651 spin_unlock(&c->erase_completion_lock); 773 dbg_summary("JFFS2: writing out data to flash to pos : 0x%08x\n",
652 ret = jffs2_flash_writev(c, vecs, 2, jeb->offset + c->sector_size - 774 sum_ofs);
653 jeb->free_size, &retlen, 0);
654 spin_lock(&c->erase_completion_lock);
655 775
776 ret = jffs2_flash_writev(c, vecs, 2, sum_ofs, &retlen, 0);
656 777
657 if (ret || (retlen != infosize)) { 778 if (ret || (retlen != infosize)) {
658 JFFS2_WARNING("Write of %zd bytes at 0x%08x failed. returned %d, retlen %zd\n", 779
659 infosize, jeb->offset + c->sector_size - jeb->free_size, ret, retlen); 780 JFFS2_WARNING("Write of %u bytes at 0x%08x failed. returned %d, retlen %zd\n",
781 infosize, sum_ofs, ret, retlen);
782
783 if (retlen) {
784 /* Waste remaining space */
785 spin_lock(&c->erase_completion_lock);
786 jffs2_link_node_ref(c, jeb, sum_ofs | REF_OBSOLETE, infosize, NULL);
787 spin_unlock(&c->erase_completion_lock);
788 }
660 789
661 c->summary->sum_size = JFFS2_SUMMARY_NOSUM_SIZE; 790 c->summary->sum_size = JFFS2_SUMMARY_NOSUM_SIZE;
662 WASTED_SPACE(infosize);
663 791
664 return 1; 792 return 0;
665 } 793 }
666 794
795 spin_lock(&c->erase_completion_lock);
796 jffs2_link_node_ref(c, jeb, sum_ofs | REF_NORMAL, infosize, NULL);
797 spin_unlock(&c->erase_completion_lock);
798
667 return 0; 799 return 0;
668} 800}
669 801
@@ -671,13 +803,16 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
671 803
672int jffs2_sum_write_sumnode(struct jffs2_sb_info *c) 804int jffs2_sum_write_sumnode(struct jffs2_sb_info *c)
673{ 805{
674 struct jffs2_raw_node_ref *summary_ref; 806 int datasize, infosize, padsize;
675 int datasize, infosize, padsize, ret;
676 struct jffs2_eraseblock *jeb; 807 struct jffs2_eraseblock *jeb;
808 int ret;
677 809
678 dbg_summary("called\n"); 810 dbg_summary("called\n");
679 811
812 spin_unlock(&c->erase_completion_lock);
813
680 jeb = c->nextblock; 814 jeb = c->nextblock;
815 jffs2_prealloc_raw_node_refs(c, jeb, 1);
681 816
682 if (!c->summary->sum_num || !c->summary->sum_list_head) { 817 if (!c->summary->sum_num || !c->summary->sum_list_head) {
683 JFFS2_WARNING("Empty summary info!!!\n"); 818 JFFS2_WARNING("Empty summary info!!!\n");
@@ -696,35 +831,11 @@ int jffs2_sum_write_sumnode(struct jffs2_sb_info *c)
696 jffs2_sum_disable_collecting(c->summary); 831 jffs2_sum_disable_collecting(c->summary);
697 832
698 JFFS2_WARNING("Not enough space for summary, padsize = %d\n", padsize); 833 JFFS2_WARNING("Not enough space for summary, padsize = %d\n", padsize);
834 spin_lock(&c->erase_completion_lock);
699 return 0; 835 return 0;
700 } 836 }
701 837
702 ret = jffs2_sum_write_data(c, jeb, infosize, datasize, padsize); 838 ret = jffs2_sum_write_data(c, jeb, infosize, datasize, padsize);
703 if (ret)
704 return 0; /* can't write out summary, block is marked as NOSUM_SIZE */
705
706 /* for ACCT_PARANOIA_CHECK */
707 spin_unlock(&c->erase_completion_lock);
708 summary_ref = jffs2_alloc_raw_node_ref();
709 spin_lock(&c->erase_completion_lock); 839 spin_lock(&c->erase_completion_lock);
710 840 return ret;
711 if (!summary_ref) {
712 JFFS2_NOTICE("Failed to allocate node ref for summary\n");
713 return -ENOMEM;
714 }
715
716 summary_ref->next_in_ino = NULL;
717 summary_ref->next_phys = NULL;
718 summary_ref->flash_offset = (jeb->offset + c->sector_size - jeb->free_size) | REF_NORMAL;
719 summary_ref->__totlen = infosize;
720
721 if (!jeb->first_node)
722 jeb->first_node = summary_ref;
723 if (jeb->last_node)
724 jeb->last_node->next_phys = summary_ref;
725 jeb->last_node = summary_ref;
726
727 USED_SPACE(infosize);
728
729 return 0;
730} 841}
diff --git a/fs/jffs2/summary.h b/fs/jffs2/summary.h
index b7a678be1709..6bf1f6aa4552 100644
--- a/fs/jffs2/summary.h
+++ b/fs/jffs2/summary.h
@@ -18,23 +18,6 @@
18#include <linux/uio.h> 18#include <linux/uio.h>
19#include <linux/jffs2.h> 19#include <linux/jffs2.h>
20 20
21#define DIRTY_SPACE(x) do { typeof(x) _x = (x); \
22 c->free_size -= _x; c->dirty_size += _x; \
23 jeb->free_size -= _x ; jeb->dirty_size += _x; \
24 }while(0)
25#define USED_SPACE(x) do { typeof(x) _x = (x); \
26 c->free_size -= _x; c->used_size += _x; \
27 jeb->free_size -= _x ; jeb->used_size += _x; \
28 }while(0)
29#define WASTED_SPACE(x) do { typeof(x) _x = (x); \
30 c->free_size -= _x; c->wasted_size += _x; \
31 jeb->free_size -= _x ; jeb->wasted_size += _x; \
32 }while(0)
33#define UNCHECKED_SPACE(x) do { typeof(x) _x = (x); \
34 c->free_size -= _x; c->unchecked_size += _x; \
35 jeb->free_size -= _x ; jeb->unchecked_size += _x; \
36 }while(0)
37
38#define BLK_STATE_ALLFF 0 21#define BLK_STATE_ALLFF 0
39#define BLK_STATE_CLEAN 1 22#define BLK_STATE_CLEAN 1
40#define BLK_STATE_PARTDIRTY 2 23#define BLK_STATE_PARTDIRTY 2
@@ -45,6 +28,8 @@
45#define JFFS2_SUMMARY_NOSUM_SIZE 0xffffffff 28#define JFFS2_SUMMARY_NOSUM_SIZE 0xffffffff
46#define JFFS2_SUMMARY_INODE_SIZE (sizeof(struct jffs2_sum_inode_flash)) 29#define JFFS2_SUMMARY_INODE_SIZE (sizeof(struct jffs2_sum_inode_flash))
47#define JFFS2_SUMMARY_DIRENT_SIZE(x) (sizeof(struct jffs2_sum_dirent_flash) + (x)) 30#define JFFS2_SUMMARY_DIRENT_SIZE(x) (sizeof(struct jffs2_sum_dirent_flash) + (x))
31#define JFFS2_SUMMARY_XATTR_SIZE (sizeof(struct jffs2_sum_xattr_flash))
32#define JFFS2_SUMMARY_XREF_SIZE (sizeof(struct jffs2_sum_xref_flash))
48 33
49/* Summary structures used on flash */ 34/* Summary structures used on flash */
50 35
@@ -75,11 +60,28 @@ struct jffs2_sum_dirent_flash
75 uint8_t name[0]; /* dirent name */ 60 uint8_t name[0]; /* dirent name */
76} __attribute__((packed)); 61} __attribute__((packed));
77 62
63struct jffs2_sum_xattr_flash
64{
65 jint16_t nodetype; /* == JFFS2_NODETYPE_XATR */
66 jint32_t xid; /* xattr identifier */
67 jint32_t version; /* version number */
68 jint32_t offset; /* offset on jeb */
69 jint32_t totlen; /* node length */
70} __attribute__((packed));
71
72struct jffs2_sum_xref_flash
73{
74 jint16_t nodetype; /* == JFFS2_NODETYPE_XREF */
75 jint32_t offset; /* offset on jeb */
76} __attribute__((packed));
77
78union jffs2_sum_flash 78union jffs2_sum_flash
79{ 79{
80 struct jffs2_sum_unknown_flash u; 80 struct jffs2_sum_unknown_flash u;
81 struct jffs2_sum_inode_flash i; 81 struct jffs2_sum_inode_flash i;
82 struct jffs2_sum_dirent_flash d; 82 struct jffs2_sum_dirent_flash d;
83 struct jffs2_sum_xattr_flash x;
84 struct jffs2_sum_xref_flash r;
83}; 85};
84 86
85/* Summary structures used in the memory */ 87/* Summary structures used in the memory */
@@ -114,11 +116,30 @@ struct jffs2_sum_dirent_mem
114 uint8_t name[0]; /* dirent name */ 116 uint8_t name[0]; /* dirent name */
115} __attribute__((packed)); 117} __attribute__((packed));
116 118
119struct jffs2_sum_xattr_mem
120{
121 union jffs2_sum_mem *next;
122 jint16_t nodetype;
123 jint32_t xid;
124 jint32_t version;
125 jint32_t offset;
126 jint32_t totlen;
127} __attribute__((packed));
128
129struct jffs2_sum_xref_mem
130{
131 union jffs2_sum_mem *next;
132 jint16_t nodetype;
133 jint32_t offset;
134} __attribute__((packed));
135
117union jffs2_sum_mem 136union jffs2_sum_mem
118{ 137{
119 struct jffs2_sum_unknown_mem u; 138 struct jffs2_sum_unknown_mem u;
120 struct jffs2_sum_inode_mem i; 139 struct jffs2_sum_inode_mem i;
121 struct jffs2_sum_dirent_mem d; 140 struct jffs2_sum_dirent_mem d;
141 struct jffs2_sum_xattr_mem x;
142 struct jffs2_sum_xref_mem r;
122}; 143};
123 144
124/* Summary related information stored in superblock */ 145/* Summary related information stored in superblock */
@@ -159,8 +180,11 @@ int jffs2_sum_write_sumnode(struct jffs2_sb_info *c);
159int jffs2_sum_add_padding_mem(struct jffs2_summary *s, uint32_t size); 180int jffs2_sum_add_padding_mem(struct jffs2_summary *s, uint32_t size);
160int jffs2_sum_add_inode_mem(struct jffs2_summary *s, struct jffs2_raw_inode *ri, uint32_t ofs); 181int jffs2_sum_add_inode_mem(struct jffs2_summary *s, struct jffs2_raw_inode *ri, uint32_t ofs);
161int jffs2_sum_add_dirent_mem(struct jffs2_summary *s, struct jffs2_raw_dirent *rd, uint32_t ofs); 182int jffs2_sum_add_dirent_mem(struct jffs2_summary *s, struct jffs2_raw_dirent *rd, uint32_t ofs);
183int jffs2_sum_add_xattr_mem(struct jffs2_summary *s, struct jffs2_raw_xattr *rx, uint32_t ofs);
184int jffs2_sum_add_xref_mem(struct jffs2_summary *s, struct jffs2_raw_xref *rr, uint32_t ofs);
162int jffs2_sum_scan_sumnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 185int jffs2_sum_scan_sumnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
163 uint32_t ofs, uint32_t *pseudo_random); 186 struct jffs2_raw_summary *summary, uint32_t sumlen,
187 uint32_t *pseudo_random);
164 188
165#else /* SUMMARY DISABLED */ 189#else /* SUMMARY DISABLED */
166 190
@@ -176,7 +200,9 @@ int jffs2_sum_scan_sumnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb
176#define jffs2_sum_add_padding_mem(a,b) 200#define jffs2_sum_add_padding_mem(a,b)
177#define jffs2_sum_add_inode_mem(a,b,c) 201#define jffs2_sum_add_inode_mem(a,b,c)
178#define jffs2_sum_add_dirent_mem(a,b,c) 202#define jffs2_sum_add_dirent_mem(a,b,c)
179#define jffs2_sum_scan_sumnode(a,b,c,d) (0) 203#define jffs2_sum_add_xattr_mem(a,b,c)
204#define jffs2_sum_add_xref_mem(a,b,c)
205#define jffs2_sum_scan_sumnode(a,b,c,d,e) (0)
180 206
181#endif /* CONFIG_JFFS2_SUMMARY */ 207#endif /* CONFIG_JFFS2_SUMMARY */
182 208
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index ffd8e84b22cc..68e3953419b4 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -11,7 +11,6 @@
11 * 11 *
12 */ 12 */
13 13
14#include <linux/config.h>
15#include <linux/kernel.h> 14#include <linux/kernel.h>
16#include <linux/module.h> 15#include <linux/module.h>
17#include <linux/slab.h> 16#include <linux/slab.h>
@@ -111,9 +110,10 @@ static int jffs2_sb_set(struct super_block *sb, void *data)
111 return 0; 110 return 0;
112} 111}
113 112
114static struct super_block *jffs2_get_sb_mtd(struct file_system_type *fs_type, 113static int jffs2_get_sb_mtd(struct file_system_type *fs_type,
115 int flags, const char *dev_name, 114 int flags, const char *dev_name,
116 void *data, struct mtd_info *mtd) 115 void *data, struct mtd_info *mtd,
116 struct vfsmount *mnt)
117{ 117{
118 struct super_block *sb; 118 struct super_block *sb;
119 struct jffs2_sb_info *c; 119 struct jffs2_sb_info *c;
@@ -121,19 +121,20 @@ static struct super_block *jffs2_get_sb_mtd(struct file_system_type *fs_type,
121 121
122 c = kmalloc(sizeof(*c), GFP_KERNEL); 122 c = kmalloc(sizeof(*c), GFP_KERNEL);
123 if (!c) 123 if (!c)
124 return ERR_PTR(-ENOMEM); 124 return -ENOMEM;
125 memset(c, 0, sizeof(*c)); 125 memset(c, 0, sizeof(*c));
126 c->mtd = mtd; 126 c->mtd = mtd;
127 127
128 sb = sget(fs_type, jffs2_sb_compare, jffs2_sb_set, c); 128 sb = sget(fs_type, jffs2_sb_compare, jffs2_sb_set, c);
129 129
130 if (IS_ERR(sb)) 130 if (IS_ERR(sb))
131 goto out_put; 131 goto out_error;
132 132
133 if (sb->s_root) { 133 if (sb->s_root) {
134 /* New mountpoint for JFFS2 which is already mounted */ 134 /* New mountpoint for JFFS2 which is already mounted */
135 D1(printk(KERN_DEBUG "jffs2_get_sb_mtd(): Device %d (\"%s\") is already mounted\n", 135 D1(printk(KERN_DEBUG "jffs2_get_sb_mtd(): Device %d (\"%s\") is already mounted\n",
136 mtd->index, mtd->name)); 136 mtd->index, mtd->name));
137 ret = simple_set_mnt(mnt, sb);
137 goto out_put; 138 goto out_put;
138 } 139 }
139 140
@@ -151,51 +152,57 @@ static struct super_block *jffs2_get_sb_mtd(struct file_system_type *fs_type,
151 152
152 sb->s_op = &jffs2_super_operations; 153 sb->s_op = &jffs2_super_operations;
153 sb->s_flags = flags | MS_NOATIME; 154 sb->s_flags = flags | MS_NOATIME;
154 155 sb->s_xattr = jffs2_xattr_handlers;
156#ifdef CONFIG_JFFS2_FS_POSIX_ACL
157 sb->s_flags |= MS_POSIXACL;
158#endif
155 ret = jffs2_do_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); 159 ret = jffs2_do_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
156 160
157 if (ret) { 161 if (ret) {
158 /* Failure case... */ 162 /* Failure case... */
159 up_write(&sb->s_umount); 163 up_write(&sb->s_umount);
160 deactivate_super(sb); 164 deactivate_super(sb);
161 return ERR_PTR(ret); 165 return ret;
162 } 166 }
163 167
164 sb->s_flags |= MS_ACTIVE; 168 sb->s_flags |= MS_ACTIVE;
165 return sb; 169 return simple_set_mnt(mnt, sb);
166 170
171out_error:
172 ret = PTR_ERR(sb);
167 out_put: 173 out_put:
168 kfree(c); 174 kfree(c);
169 put_mtd_device(mtd); 175 put_mtd_device(mtd);
170 176
171 return sb; 177 return ret;
172} 178}
173 179
174static struct super_block *jffs2_get_sb_mtdnr(struct file_system_type *fs_type, 180static int jffs2_get_sb_mtdnr(struct file_system_type *fs_type,
175 int flags, const char *dev_name, 181 int flags, const char *dev_name,
176 void *data, int mtdnr) 182 void *data, int mtdnr,
183 struct vfsmount *mnt)
177{ 184{
178 struct mtd_info *mtd; 185 struct mtd_info *mtd;
179 186
180 mtd = get_mtd_device(NULL, mtdnr); 187 mtd = get_mtd_device(NULL, mtdnr);
181 if (!mtd) { 188 if (!mtd) {
182 D1(printk(KERN_DEBUG "jffs2: MTD device #%u doesn't appear to exist\n", mtdnr)); 189 D1(printk(KERN_DEBUG "jffs2: MTD device #%u doesn't appear to exist\n", mtdnr));
183 return ERR_PTR(-EINVAL); 190 return -EINVAL;
184 } 191 }
185 192
186 return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd); 193 return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt);
187} 194}
188 195
189static struct super_block *jffs2_get_sb(struct file_system_type *fs_type, 196static int jffs2_get_sb(struct file_system_type *fs_type,
190 int flags, const char *dev_name, 197 int flags, const char *dev_name,
191 void *data) 198 void *data, struct vfsmount *mnt)
192{ 199{
193 int err; 200 int err;
194 struct nameidata nd; 201 struct nameidata nd;
195 int mtdnr; 202 int mtdnr;
196 203
197 if (!dev_name) 204 if (!dev_name)
198 return ERR_PTR(-EINVAL); 205 return -EINVAL;
199 206
200 D1(printk(KERN_DEBUG "jffs2_get_sb(): dev_name \"%s\"\n", dev_name)); 207 D1(printk(KERN_DEBUG "jffs2_get_sb(): dev_name \"%s\"\n", dev_name));
201 208
@@ -217,7 +224,7 @@ static struct super_block *jffs2_get_sb(struct file_system_type *fs_type,
217 mtd = get_mtd_device(NULL, mtdnr); 224 mtd = get_mtd_device(NULL, mtdnr);
218 if (mtd) { 225 if (mtd) {
219 if (!strcmp(mtd->name, dev_name+4)) 226 if (!strcmp(mtd->name, dev_name+4))
220 return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd); 227 return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt);
221 put_mtd_device(mtd); 228 put_mtd_device(mtd);
222 } 229 }
223 } 230 }
@@ -230,7 +237,7 @@ static struct super_block *jffs2_get_sb(struct file_system_type *fs_type,
230 if (!*endptr) { 237 if (!*endptr) {
231 /* It was a valid number */ 238 /* It was a valid number */
232 D1(printk(KERN_DEBUG "jffs2_get_sb(): mtd%%d, mtdnr %d\n", mtdnr)); 239 D1(printk(KERN_DEBUG "jffs2_get_sb(): mtd%%d, mtdnr %d\n", mtdnr));
233 return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr); 240 return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr, mnt);
234 } 241 }
235 } 242 }
236 } 243 }
@@ -244,7 +251,7 @@ static struct super_block *jffs2_get_sb(struct file_system_type *fs_type,
244 err, nd.dentry->d_inode)); 251 err, nd.dentry->d_inode));
245 252
246 if (err) 253 if (err)
247 return ERR_PTR(err); 254 return err;
248 255
249 err = -EINVAL; 256 err = -EINVAL;
250 257
@@ -266,11 +273,11 @@ static struct super_block *jffs2_get_sb(struct file_system_type *fs_type,
266 mtdnr = iminor(nd.dentry->d_inode); 273 mtdnr = iminor(nd.dentry->d_inode);
267 path_release(&nd); 274 path_release(&nd);
268 275
269 return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr); 276 return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr, mnt);
270 277
271out: 278out:
272 path_release(&nd); 279 path_release(&nd);
273 return ERR_PTR(err); 280 return err;
274} 281}
275 282
276static void jffs2_put_super (struct super_block *sb) 283static void jffs2_put_super (struct super_block *sb)
@@ -293,6 +300,7 @@ static void jffs2_put_super (struct super_block *sb)
293 kfree(c->blocks); 300 kfree(c->blocks);
294 jffs2_flash_cleanup(c); 301 jffs2_flash_cleanup(c);
295 kfree(c->inocache_list); 302 kfree(c->inocache_list);
303 jffs2_clear_xattr_subsystem(c);
296 if (c->mtd->sync) 304 if (c->mtd->sync)
297 c->mtd->sync(c->mtd); 305 c->mtd->sync(c->mtd);
298 306
@@ -320,6 +328,18 @@ static int __init init_jffs2_fs(void)
320{ 328{
321 int ret; 329 int ret;
322 330
331 /* Paranoia checks for on-medium structures. If we ask GCC
332 to pack them with __attribute__((packed)) then it _also_
333 assumes that they're not aligned -- so it emits crappy
334 code on some architectures. Ideally we want an attribute
335 which means just 'no padding', without the alignment
336 thing. But GCC doesn't have that -- we have to just
337 hope the structs are the right sizes, instead. */
338 BUG_ON(sizeof(struct jffs2_unknown_node) != 12);
339 BUG_ON(sizeof(struct jffs2_raw_dirent) != 40);
340 BUG_ON(sizeof(struct jffs2_raw_inode) != 68);
341 BUG_ON(sizeof(struct jffs2_raw_summary) != 32);
342
323 printk(KERN_INFO "JFFS2 version 2.2." 343 printk(KERN_INFO "JFFS2 version 2.2."
324#ifdef CONFIG_JFFS2_FS_WRITEBUFFER 344#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
325 " (NAND)" 345 " (NAND)"
@@ -327,7 +347,7 @@ static int __init init_jffs2_fs(void)
327#ifdef CONFIG_JFFS2_SUMMARY 347#ifdef CONFIG_JFFS2_SUMMARY
328 " (SUMMARY) " 348 " (SUMMARY) "
329#endif 349#endif
330 " (C) 2001-2003 Red Hat, Inc.\n"); 350 " (C) 2001-2006 Red Hat, Inc.\n");
331 351
332 jffs2_inode_cachep = kmem_cache_create("jffs2_i", 352 jffs2_inode_cachep = kmem_cache_create("jffs2_i",
333 sizeof(struct jffs2_inode_info), 353 sizeof(struct jffs2_inode_info),
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c
index d55754fe8925..fc211b6e9b03 100644
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c
@@ -24,7 +24,12 @@ struct inode_operations jffs2_symlink_inode_operations =
24{ 24{
25 .readlink = generic_readlink, 25 .readlink = generic_readlink,
26 .follow_link = jffs2_follow_link, 26 .follow_link = jffs2_follow_link,
27 .setattr = jffs2_setattr 27 .permission = jffs2_permission,
28 .setattr = jffs2_setattr,
29 .setxattr = jffs2_setxattr,
30 .getxattr = jffs2_getxattr,
31 .listxattr = jffs2_listxattr,
32 .removexattr = jffs2_removexattr
28}; 33};
29 34
30static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd) 35static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 4cebf0e57c46..b9b700730dfe 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -156,69 +156,130 @@ static void jffs2_block_refile(struct jffs2_sb_info *c, struct jffs2_eraseblock
156 jffs2_erase_pending_trigger(c); 156 jffs2_erase_pending_trigger(c);
157 } 157 }
158 158
159 /* Adjust its size counts accordingly */ 159 if (!jffs2_prealloc_raw_node_refs(c, jeb, 1)) {
160 c->wasted_size += jeb->free_size; 160 uint32_t oldfree = jeb->free_size;
161 c->free_size -= jeb->free_size; 161
162 jeb->wasted_size += jeb->free_size; 162 jffs2_link_node_ref(c, jeb,
163 jeb->free_size = 0; 163 (jeb->offset+c->sector_size-oldfree) | REF_OBSOLETE,
164 oldfree, NULL);
165 /* convert to wasted */
166 c->wasted_size += oldfree;
167 jeb->wasted_size += oldfree;
168 c->dirty_size -= oldfree;
169 jeb->dirty_size -= oldfree;
170 }
164 171
165 jffs2_dbg_dump_block_lists_nolock(c); 172 jffs2_dbg_dump_block_lists_nolock(c);
166 jffs2_dbg_acct_sanity_check_nolock(c,jeb); 173 jffs2_dbg_acct_sanity_check_nolock(c,jeb);
167 jffs2_dbg_acct_paranoia_check_nolock(c, jeb); 174 jffs2_dbg_acct_paranoia_check_nolock(c, jeb);
168} 175}
169 176
177static struct jffs2_raw_node_ref **jffs2_incore_replace_raw(struct jffs2_sb_info *c,
178 struct jffs2_inode_info *f,
179 struct jffs2_raw_node_ref *raw,
180 union jffs2_node_union *node)
181{
182 struct jffs2_node_frag *frag;
183 struct jffs2_full_dirent *fd;
184
185 dbg_noderef("incore_replace_raw: node at %p is {%04x,%04x}\n",
186 node, je16_to_cpu(node->u.magic), je16_to_cpu(node->u.nodetype));
187
188 BUG_ON(je16_to_cpu(node->u.magic) != 0x1985 &&
189 je16_to_cpu(node->u.magic) != 0);
190
191 switch (je16_to_cpu(node->u.nodetype)) {
192 case JFFS2_NODETYPE_INODE:
193 if (f->metadata && f->metadata->raw == raw) {
194 dbg_noderef("Will replace ->raw in f->metadata at %p\n", f->metadata);
195 return &f->metadata->raw;
196 }
197 frag = jffs2_lookup_node_frag(&f->fragtree, je32_to_cpu(node->i.offset));
198 BUG_ON(!frag);
199 /* Find a frag which refers to the full_dnode we want to modify */
200 while (!frag->node || frag->node->raw != raw) {
201 frag = frag_next(frag);
202 BUG_ON(!frag);
203 }
204 dbg_noderef("Will replace ->raw in full_dnode at %p\n", frag->node);
205 return &frag->node->raw;
206
207 case JFFS2_NODETYPE_DIRENT:
208 for (fd = f->dents; fd; fd = fd->next) {
209 if (fd->raw == raw) {
210 dbg_noderef("Will replace ->raw in full_dirent at %p\n", fd);
211 return &fd->raw;
212 }
213 }
214 BUG();
215
216 default:
217 dbg_noderef("Don't care about replacing raw for nodetype %x\n",
218 je16_to_cpu(node->u.nodetype));
219 break;
220 }
221 return NULL;
222}
223
170/* Recover from failure to write wbuf. Recover the nodes up to the 224/* Recover from failure to write wbuf. Recover the nodes up to the
171 * wbuf, not the one which we were starting to try to write. */ 225 * wbuf, not the one which we were starting to try to write. */
172 226
173static void jffs2_wbuf_recover(struct jffs2_sb_info *c) 227static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
174{ 228{
175 struct jffs2_eraseblock *jeb, *new_jeb; 229 struct jffs2_eraseblock *jeb, *new_jeb;
176 struct jffs2_raw_node_ref **first_raw, **raw; 230 struct jffs2_raw_node_ref *raw, *next, *first_raw = NULL;
177 size_t retlen; 231 size_t retlen;
178 int ret; 232 int ret;
233 int nr_refile = 0;
179 unsigned char *buf; 234 unsigned char *buf;
180 uint32_t start, end, ofs, len; 235 uint32_t start, end, ofs, len;
181 236
182 spin_lock(&c->erase_completion_lock);
183
184 jeb = &c->blocks[c->wbuf_ofs / c->sector_size]; 237 jeb = &c->blocks[c->wbuf_ofs / c->sector_size];
185 238
239 spin_lock(&c->erase_completion_lock);
186 jffs2_block_refile(c, jeb, REFILE_NOTEMPTY); 240 jffs2_block_refile(c, jeb, REFILE_NOTEMPTY);
241 spin_unlock(&c->erase_completion_lock);
242
243 BUG_ON(!ref_obsolete(jeb->last_node));
187 244
188 /* Find the first node to be recovered, by skipping over every 245 /* Find the first node to be recovered, by skipping over every
189 node which ends before the wbuf starts, or which is obsolete. */ 246 node which ends before the wbuf starts, or which is obsolete. */
190 first_raw = &jeb->first_node; 247 for (next = raw = jeb->first_node; next; raw = next) {
191 while (*first_raw && 248 next = ref_next(raw);
192 (ref_obsolete(*first_raw) || 249
193 (ref_offset(*first_raw)+ref_totlen(c, jeb, *first_raw)) < c->wbuf_ofs)) { 250 if (ref_obsolete(raw) ||
194 D1(printk(KERN_DEBUG "Skipping node at 0x%08x(%d)-0x%08x which is either before 0x%08x or obsolete\n", 251 (next && ref_offset(next) <= c->wbuf_ofs)) {
195 ref_offset(*first_raw), ref_flags(*first_raw), 252 dbg_noderef("Skipping node at 0x%08x(%d)-0x%08x which is either before 0x%08x or obsolete\n",
196 (ref_offset(*first_raw) + ref_totlen(c, jeb, *first_raw)), 253 ref_offset(raw), ref_flags(raw),
197 c->wbuf_ofs)); 254 (ref_offset(raw) + ref_totlen(c, jeb, raw)),
198 first_raw = &(*first_raw)->next_phys; 255 c->wbuf_ofs);
256 continue;
257 }
258 dbg_noderef("First node to be recovered is at 0x%08x(%d)-0x%08x\n",
259 ref_offset(raw), ref_flags(raw),
260 (ref_offset(raw) + ref_totlen(c, jeb, raw)));
261
262 first_raw = raw;
263 break;
199 } 264 }
200 265
201 if (!*first_raw) { 266 if (!first_raw) {
202 /* All nodes were obsolete. Nothing to recover. */ 267 /* All nodes were obsolete. Nothing to recover. */
203 D1(printk(KERN_DEBUG "No non-obsolete nodes to be recovered. Just filing block bad\n")); 268 D1(printk(KERN_DEBUG "No non-obsolete nodes to be recovered. Just filing block bad\n"));
204 spin_unlock(&c->erase_completion_lock); 269 c->wbuf_len = 0;
205 return; 270 return;
206 } 271 }
207 272
208 start = ref_offset(*first_raw); 273 start = ref_offset(first_raw);
209 end = ref_offset(*first_raw) + ref_totlen(c, jeb, *first_raw); 274 end = ref_offset(jeb->last_node);
210 275 nr_refile = 1;
211 /* Find the last node to be recovered */
212 raw = first_raw;
213 while ((*raw)) {
214 if (!ref_obsolete(*raw))
215 end = ref_offset(*raw) + ref_totlen(c, jeb, *raw);
216 276
217 raw = &(*raw)->next_phys; 277 /* Count the number of refs which need to be copied */
218 } 278 while ((raw = ref_next(raw)) != jeb->last_node)
219 spin_unlock(&c->erase_completion_lock); 279 nr_refile++;
220 280
221 D1(printk(KERN_DEBUG "wbuf recover %08x-%08x\n", start, end)); 281 dbg_noderef("wbuf recover %08x-%08x (%d bytes in %d nodes)\n",
282 start, end, end - start, nr_refile);
222 283
223 buf = NULL; 284 buf = NULL;
224 if (start < c->wbuf_ofs) { 285 if (start < c->wbuf_ofs) {
@@ -233,28 +294,37 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
233 } 294 }
234 295
235 /* Do the read... */ 296 /* Do the read... */
236 if (jffs2_cleanmarker_oob(c)) 297 ret = c->mtd->read(c->mtd, start, c->wbuf_ofs - start, &retlen, buf);
237 ret = c->mtd->read_ecc(c->mtd, start, c->wbuf_ofs - start, &retlen, buf, NULL, c->oobinfo);
238 else
239 ret = c->mtd->read(c->mtd, start, c->wbuf_ofs - start, &retlen, buf);
240 298
241 if (ret == -EBADMSG && retlen == c->wbuf_ofs - start) { 299 /* ECC recovered ? */
242 /* ECC recovered */ 300 if ((ret == -EUCLEAN || ret == -EBADMSG) &&
301 (retlen == c->wbuf_ofs - start))
243 ret = 0; 302 ret = 0;
244 } 303
245 if (ret || retlen != c->wbuf_ofs - start) { 304 if (ret || retlen != c->wbuf_ofs - start) {
246 printk(KERN_CRIT "Old data are already lost in wbuf recovery. Data loss ensues.\n"); 305 printk(KERN_CRIT "Old data are already lost in wbuf recovery. Data loss ensues.\n");
247 306
248 kfree(buf); 307 kfree(buf);
249 buf = NULL; 308 buf = NULL;
250 read_failed: 309 read_failed:
251 first_raw = &(*first_raw)->next_phys; 310 first_raw = ref_next(first_raw);
311 nr_refile--;
312 while (first_raw && ref_obsolete(first_raw)) {
313 first_raw = ref_next(first_raw);
314 nr_refile--;
315 }
316
252 /* If this was the only node to be recovered, give up */ 317 /* If this was the only node to be recovered, give up */
253 if (!(*first_raw)) 318 if (!first_raw) {
319 c->wbuf_len = 0;
254 return; 320 return;
321 }
255 322
256 /* It wasn't. Go on and try to recover nodes complete in the wbuf */ 323 /* It wasn't. Go on and try to recover nodes complete in the wbuf */
257 start = ref_offset(*first_raw); 324 start = ref_offset(first_raw);
325 dbg_noderef("wbuf now recover %08x-%08x (%d bytes in %d nodes)\n",
326 start, end, end - start, nr_refile);
327
258 } else { 328 } else {
259 /* Read succeeded. Copy the remaining data from the wbuf */ 329 /* Read succeeded. Copy the remaining data from the wbuf */
260 memcpy(buf + (c->wbuf_ofs - start), c->wbuf, end - c->wbuf_ofs); 330 memcpy(buf + (c->wbuf_ofs - start), c->wbuf, end - c->wbuf_ofs);
@@ -263,14 +333,23 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
263 /* OK... we're to rewrite (end-start) bytes of data from first_raw onwards. 333 /* OK... we're to rewrite (end-start) bytes of data from first_raw onwards.
264 Either 'buf' contains the data, or we find it in the wbuf */ 334 Either 'buf' contains the data, or we find it in the wbuf */
265 335
266
267 /* ... and get an allocation of space from a shiny new block instead */ 336 /* ... and get an allocation of space from a shiny new block instead */
268 ret = jffs2_reserve_space_gc(c, end-start, &ofs, &len, JFFS2_SUMMARY_NOSUM_SIZE); 337 ret = jffs2_reserve_space_gc(c, end-start, &len, JFFS2_SUMMARY_NOSUM_SIZE);
269 if (ret) { 338 if (ret) {
270 printk(KERN_WARNING "Failed to allocate space for wbuf recovery. Data loss ensues.\n"); 339 printk(KERN_WARNING "Failed to allocate space for wbuf recovery. Data loss ensues.\n");
271 kfree(buf); 340 kfree(buf);
272 return; 341 return;
273 } 342 }
343
344 ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, nr_refile);
345 if (ret) {
346 printk(KERN_WARNING "Failed to allocate node refs for wbuf recovery. Data loss ensues.\n");
347 kfree(buf);
348 return;
349 }
350
351 ofs = write_ofs(c);
352
274 if (end-start >= c->wbuf_pagesize) { 353 if (end-start >= c->wbuf_pagesize) {
275 /* Need to do another write immediately, but it's possible 354 /* Need to do another write immediately, but it's possible
276 that this is just because the wbuf itself is completely 355 that this is just because the wbuf itself is completely
@@ -288,36 +367,22 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
288 if (breakme++ == 20) { 367 if (breakme++ == 20) {
289 printk(KERN_NOTICE "Faking write error at 0x%08x\n", ofs); 368 printk(KERN_NOTICE "Faking write error at 0x%08x\n", ofs);
290 breakme = 0; 369 breakme = 0;
291 c->mtd->write_ecc(c->mtd, ofs, towrite, &retlen, 370 c->mtd->write(c->mtd, ofs, towrite, &retlen,
292 brokenbuf, NULL, c->oobinfo); 371 brokenbuf);
293 ret = -EIO; 372 ret = -EIO;
294 } else 373 } else
295#endif 374#endif
296 if (jffs2_cleanmarker_oob(c)) 375 ret = c->mtd->write(c->mtd, ofs, towrite, &retlen,
297 ret = c->mtd->write_ecc(c->mtd, ofs, towrite, &retlen, 376 rewrite_buf);
298 rewrite_buf, NULL, c->oobinfo);
299 else
300 ret = c->mtd->write(c->mtd, ofs, towrite, &retlen, rewrite_buf);
301 377
302 if (ret || retlen != towrite) { 378 if (ret || retlen != towrite) {
303 /* Argh. We tried. Really we did. */ 379 /* Argh. We tried. Really we did. */
304 printk(KERN_CRIT "Recovery of wbuf failed due to a second write error\n"); 380 printk(KERN_CRIT "Recovery of wbuf failed due to a second write error\n");
305 kfree(buf); 381 kfree(buf);
306 382
307 if (retlen) { 383 if (retlen)
308 struct jffs2_raw_node_ref *raw2; 384 jffs2_add_physical_node_ref(c, ofs | REF_OBSOLETE, ref_totlen(c, jeb, first_raw), NULL);
309
310 raw2 = jffs2_alloc_raw_node_ref();
311 if (!raw2)
312 return;
313 385
314 raw2->flash_offset = ofs | REF_OBSOLETE;
315 raw2->__totlen = ref_totlen(c, jeb, *first_raw);
316 raw2->next_phys = NULL;
317 raw2->next_in_ino = NULL;
318
319 jffs2_add_physical_node_ref(c, raw2);
320 }
321 return; 386 return;
322 } 387 }
323 printk(KERN_NOTICE "Recovery of wbuf succeeded to %08x\n", ofs); 388 printk(KERN_NOTICE "Recovery of wbuf succeeded to %08x\n", ofs);
@@ -326,12 +391,10 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
326 c->wbuf_ofs = ofs + towrite; 391 c->wbuf_ofs = ofs + towrite;
327 memmove(c->wbuf, rewrite_buf + towrite, c->wbuf_len); 392 memmove(c->wbuf, rewrite_buf + towrite, c->wbuf_len);
328 /* Don't muck about with c->wbuf_inodes. False positives are harmless. */ 393 /* Don't muck about with c->wbuf_inodes. False positives are harmless. */
329 kfree(buf);
330 } else { 394 } else {
331 /* OK, now we're left with the dregs in whichever buffer we're using */ 395 /* OK, now we're left with the dregs in whichever buffer we're using */
332 if (buf) { 396 if (buf) {
333 memcpy(c->wbuf, buf, end-start); 397 memcpy(c->wbuf, buf, end-start);
334 kfree(buf);
335 } else { 398 } else {
336 memmove(c->wbuf, c->wbuf + (start - c->wbuf_ofs), end - start); 399 memmove(c->wbuf, c->wbuf + (start - c->wbuf_ofs), end - start);
337 } 400 }
@@ -343,62 +406,110 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
343 new_jeb = &c->blocks[ofs / c->sector_size]; 406 new_jeb = &c->blocks[ofs / c->sector_size];
344 407
345 spin_lock(&c->erase_completion_lock); 408 spin_lock(&c->erase_completion_lock);
346 if (new_jeb->first_node) { 409 for (raw = first_raw; raw != jeb->last_node; raw = ref_next(raw)) {
347 /* Odd, but possible with ST flash later maybe */ 410 uint32_t rawlen = ref_totlen(c, jeb, raw);
348 new_jeb->last_node->next_phys = *first_raw; 411 struct jffs2_inode_cache *ic;
349 } else { 412 struct jffs2_raw_node_ref *new_ref;
350 new_jeb->first_node = *first_raw; 413 struct jffs2_raw_node_ref **adjust_ref = NULL;
351 } 414 struct jffs2_inode_info *f = NULL;
352
353 raw = first_raw;
354 while (*raw) {
355 uint32_t rawlen = ref_totlen(c, jeb, *raw);
356 415
357 D1(printk(KERN_DEBUG "Refiling block of %08x at %08x(%d) to %08x\n", 416 D1(printk(KERN_DEBUG "Refiling block of %08x at %08x(%d) to %08x\n",
358 rawlen, ref_offset(*raw), ref_flags(*raw), ofs)); 417 rawlen, ref_offset(raw), ref_flags(raw), ofs));
418
419 ic = jffs2_raw_ref_to_ic(raw);
420
421 /* Ick. This XATTR mess should be fixed shortly... */
422 if (ic && ic->class == RAWNODE_CLASS_XATTR_DATUM) {
423 struct jffs2_xattr_datum *xd = (void *)ic;
424 BUG_ON(xd->node != raw);
425 adjust_ref = &xd->node;
426 raw->next_in_ino = NULL;
427 ic = NULL;
428 } else if (ic && ic->class == RAWNODE_CLASS_XATTR_REF) {
429 struct jffs2_xattr_datum *xr = (void *)ic;
430 BUG_ON(xr->node != raw);
431 adjust_ref = &xr->node;
432 raw->next_in_ino = NULL;
433 ic = NULL;
434 } else if (ic && ic->class == RAWNODE_CLASS_INODE_CACHE) {
435 struct jffs2_raw_node_ref **p = &ic->nodes;
436
437 /* Remove the old node from the per-inode list */
438 while (*p && *p != (void *)ic) {
439 if (*p == raw) {
440 (*p) = (raw->next_in_ino);
441 raw->next_in_ino = NULL;
442 break;
443 }
444 p = &((*p)->next_in_ino);
445 }
359 446
360 if (ref_obsolete(*raw)) { 447 if (ic->state == INO_STATE_PRESENT && !ref_obsolete(raw)) {
361 /* Shouldn't really happen much */ 448 /* If it's an in-core inode, then we have to adjust any
362 new_jeb->dirty_size += rawlen; 449 full_dirent or full_dnode structure to point to the
363 new_jeb->free_size -= rawlen; 450 new version instead of the old */
364 c->dirty_size += rawlen; 451 f = jffs2_gc_fetch_inode(c, ic->ino, ic->nlink);
365 } else { 452 if (IS_ERR(f)) {
366 new_jeb->used_size += rawlen; 453 /* Should never happen; it _must_ be present */
367 new_jeb->free_size -= rawlen; 454 JFFS2_ERROR("Failed to iget() ino #%u, err %ld\n",
455 ic->ino, PTR_ERR(f));
456 BUG();
457 }
458 /* We don't lock f->sem. There's a number of ways we could
459 end up in here with it already being locked, and nobody's
460 going to modify it on us anyway because we hold the
461 alloc_sem. We're only changing one ->raw pointer too,
462 which we can get away with without upsetting readers. */
463 adjust_ref = jffs2_incore_replace_raw(c, f, raw,
464 (void *)(buf?:c->wbuf) + (ref_offset(raw) - start));
465 } else if (unlikely(ic->state != INO_STATE_PRESENT &&
466 ic->state != INO_STATE_CHECKEDABSENT &&
467 ic->state != INO_STATE_GC)) {
468 JFFS2_ERROR("Inode #%u is in strange state %d!\n", ic->ino, ic->state);
469 BUG();
470 }
471 }
472
473 new_ref = jffs2_link_node_ref(c, new_jeb, ofs | ref_flags(raw), rawlen, ic);
474
475 if (adjust_ref) {
476 BUG_ON(*adjust_ref != raw);
477 *adjust_ref = new_ref;
478 }
479 if (f)
480 jffs2_gc_release_inode(c, f);
481
482 if (!ref_obsolete(raw)) {
368 jeb->dirty_size += rawlen; 483 jeb->dirty_size += rawlen;
369 jeb->used_size -= rawlen; 484 jeb->used_size -= rawlen;
370 c->dirty_size += rawlen; 485 c->dirty_size += rawlen;
486 c->used_size -= rawlen;
487 raw->flash_offset = ref_offset(raw) | REF_OBSOLETE;
488 BUG_ON(raw->next_in_ino);
371 } 489 }
372 c->free_size -= rawlen;
373 (*raw)->flash_offset = ofs | ref_flags(*raw);
374 ofs += rawlen; 490 ofs += rawlen;
375 new_jeb->last_node = *raw;
376
377 raw = &(*raw)->next_phys;
378 } 491 }
379 492
493 kfree(buf);
494
380 /* Fix up the original jeb now it's on the bad_list */ 495 /* Fix up the original jeb now it's on the bad_list */
381 *first_raw = NULL; 496 if (first_raw == jeb->first_node) {
382 if (first_raw == &jeb->first_node) {
383 jeb->last_node = NULL;
384 D1(printk(KERN_DEBUG "Failing block at %08x is now empty. Moving to erase_pending_list\n", jeb->offset)); 497 D1(printk(KERN_DEBUG "Failing block at %08x is now empty. Moving to erase_pending_list\n", jeb->offset));
385 list_del(&jeb->list); 498 list_move(&jeb->list, &c->erase_pending_list);
386 list_add(&jeb->list, &c->erase_pending_list);
387 c->nr_erasing_blocks++; 499 c->nr_erasing_blocks++;
388 jffs2_erase_pending_trigger(c); 500 jffs2_erase_pending_trigger(c);
389 } 501 }
390 else
391 jeb->last_node = container_of(first_raw, struct jffs2_raw_node_ref, next_phys);
392 502
393 jffs2_dbg_acct_sanity_check_nolock(c, jeb); 503 jffs2_dbg_acct_sanity_check_nolock(c, jeb);
394 jffs2_dbg_acct_paranoia_check_nolock(c, jeb); 504 jffs2_dbg_acct_paranoia_check_nolock(c, jeb);
395 505
396 jffs2_dbg_acct_sanity_check_nolock(c, new_jeb); 506 jffs2_dbg_acct_sanity_check_nolock(c, new_jeb);
397 jffs2_dbg_acct_paranoia_check_nolock(c, new_jeb); 507 jffs2_dbg_acct_paranoia_check_nolock(c, new_jeb);
398 508
399 spin_unlock(&c->erase_completion_lock); 509 spin_unlock(&c->erase_completion_lock);
400 510
401 D1(printk(KERN_DEBUG "wbuf recovery completed OK\n")); 511 D1(printk(KERN_DEBUG "wbuf recovery completed OK. wbuf_ofs 0x%08x, len 0x%x\n", c->wbuf_ofs, c->wbuf_len));
512
402} 513}
403 514
404/* Meaning of pad argument: 515/* Meaning of pad argument:
@@ -412,6 +523,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
412 523
413static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad) 524static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
414{ 525{
526 struct jffs2_eraseblock *wbuf_jeb;
415 int ret; 527 int ret;
416 size_t retlen; 528 size_t retlen;
417 529
@@ -429,6 +541,10 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
429 if (!c->wbuf_len) /* already checked c->wbuf above */ 541 if (!c->wbuf_len) /* already checked c->wbuf above */
430 return 0; 542 return 0;
431 543
544 wbuf_jeb = &c->blocks[c->wbuf_ofs / c->sector_size];
545 if (jffs2_prealloc_raw_node_refs(c, wbuf_jeb, c->nextblock->allocated_refs + 1))
546 return -ENOMEM;
547
432 /* claim remaining space on the page 548 /* claim remaining space on the page
433 this happens, if we have a change to a new block, 549 this happens, if we have a change to a new block,
434 or if fsync forces us to flush the writebuffer. 550 or if fsync forces us to flush the writebuffer.
@@ -458,15 +574,12 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
458 if (breakme++ == 20) { 574 if (breakme++ == 20) {
459 printk(KERN_NOTICE "Faking write error at 0x%08x\n", c->wbuf_ofs); 575 printk(KERN_NOTICE "Faking write error at 0x%08x\n", c->wbuf_ofs);
460 breakme = 0; 576 breakme = 0;
461 c->mtd->write_ecc(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, 577 c->mtd->write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen,
462 &retlen, brokenbuf, NULL, c->oobinfo); 578 brokenbuf);
463 ret = -EIO; 579 ret = -EIO;
464 } else 580 } else
465#endif 581#endif
466 582
467 if (jffs2_cleanmarker_oob(c))
468 ret = c->mtd->write_ecc(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, c->wbuf, NULL, c->oobinfo);
469 else
470 ret = c->mtd->write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, c->wbuf); 583 ret = c->mtd->write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, c->wbuf);
471 584
472 if (ret || retlen != c->wbuf_pagesize) { 585 if (ret || retlen != c->wbuf_pagesize) {
@@ -483,32 +596,34 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
483 return ret; 596 return ret;
484 } 597 }
485 598
486 spin_lock(&c->erase_completion_lock);
487
488 /* Adjust free size of the block if we padded. */ 599 /* Adjust free size of the block if we padded. */
489 if (pad) { 600 if (pad) {
490 struct jffs2_eraseblock *jeb; 601 uint32_t waste = c->wbuf_pagesize - c->wbuf_len;
491
492 jeb = &c->blocks[c->wbuf_ofs / c->sector_size];
493 602
494 D1(printk(KERN_DEBUG "jffs2_flush_wbuf() adjusting free_size of %sblock at %08x\n", 603 D1(printk(KERN_DEBUG "jffs2_flush_wbuf() adjusting free_size of %sblock at %08x\n",
495 (jeb==c->nextblock)?"next":"", jeb->offset)); 604 (wbuf_jeb==c->nextblock)?"next":"", wbuf_jeb->offset));
496 605
497 /* wbuf_pagesize - wbuf_len is the amount of space that's to be 606 /* wbuf_pagesize - wbuf_len is the amount of space that's to be
498 padded. If there is less free space in the block than that, 607 padded. If there is less free space in the block than that,
499 something screwed up */ 608 something screwed up */
500 if (jeb->free_size < (c->wbuf_pagesize - c->wbuf_len)) { 609 if (wbuf_jeb->free_size < waste) {
501 printk(KERN_CRIT "jffs2_flush_wbuf(): Accounting error. wbuf at 0x%08x has 0x%03x bytes, 0x%03x left.\n", 610 printk(KERN_CRIT "jffs2_flush_wbuf(): Accounting error. wbuf at 0x%08x has 0x%03x bytes, 0x%03x left.\n",
502 c->wbuf_ofs, c->wbuf_len, c->wbuf_pagesize-c->wbuf_len); 611 c->wbuf_ofs, c->wbuf_len, waste);
503 printk(KERN_CRIT "jffs2_flush_wbuf(): But free_size for block at 0x%08x is only 0x%08x\n", 612 printk(KERN_CRIT "jffs2_flush_wbuf(): But free_size for block at 0x%08x is only 0x%08x\n",
504 jeb->offset, jeb->free_size); 613 wbuf_jeb->offset, wbuf_jeb->free_size);
505 BUG(); 614 BUG();
506 } 615 }
507 jeb->free_size -= (c->wbuf_pagesize - c->wbuf_len); 616
508 c->free_size -= (c->wbuf_pagesize - c->wbuf_len); 617 spin_lock(&c->erase_completion_lock);
509 jeb->wasted_size += (c->wbuf_pagesize - c->wbuf_len); 618
510 c->wasted_size += (c->wbuf_pagesize - c->wbuf_len); 619 jffs2_link_node_ref(c, wbuf_jeb, (c->wbuf_ofs + c->wbuf_len) | REF_OBSOLETE, waste, NULL);
511 } 620 /* FIXME: that made it count as dirty. Convert to wasted */
621 wbuf_jeb->dirty_size -= waste;
622 c->dirty_size -= waste;
623 wbuf_jeb->wasted_size += waste;
624 c->wasted_size += waste;
625 } else
626 spin_lock(&c->erase_completion_lock);
512 627
513 /* Stick any now-obsoleted blocks on the erase_pending_list */ 628 /* Stick any now-obsoleted blocks on the erase_pending_list */
514 jffs2_refile_wbuf_blocks(c); 629 jffs2_refile_wbuf_blocks(c);
@@ -603,20 +718,30 @@ int jffs2_flush_wbuf_pad(struct jffs2_sb_info *c)
603 718
604 return ret; 719 return ret;
605} 720}
606int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs, unsigned long count, loff_t to, size_t *retlen, uint32_t ino) 721
722static size_t jffs2_fill_wbuf(struct jffs2_sb_info *c, const uint8_t *buf,
723 size_t len)
724{
725 if (len && !c->wbuf_len && (len >= c->wbuf_pagesize))
726 return 0;
727
728 if (len > (c->wbuf_pagesize - c->wbuf_len))
729 len = c->wbuf_pagesize - c->wbuf_len;
730 memcpy(c->wbuf + c->wbuf_len, buf, len);
731 c->wbuf_len += (uint32_t) len;
732 return len;
733}
734
735int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs,
736 unsigned long count, loff_t to, size_t *retlen,
737 uint32_t ino)
607{ 738{
608 struct kvec outvecs[3]; 739 struct jffs2_eraseblock *jeb;
609 uint32_t totlen = 0; 740 size_t wbuf_retlen, donelen = 0;
610 uint32_t split_ofs = 0;
611 uint32_t old_totlen;
612 int ret, splitvec = -1;
613 int invec, outvec;
614 size_t wbuf_retlen;
615 unsigned char *wbuf_ptr;
616 size_t donelen = 0;
617 uint32_t outvec_to = to; 741 uint32_t outvec_to = to;
742 int ret, invec;
618 743
619 /* If not NAND flash, don't bother */ 744 /* If not writebuffered flash, don't bother */
620 if (!jffs2_is_writebuffered(c)) 745 if (!jffs2_is_writebuffered(c))
621 return jffs2_flash_direct_writev(c, invecs, count, to, retlen); 746 return jffs2_flash_direct_writev(c, invecs, count, to, retlen);
622 747
@@ -629,34 +754,22 @@ int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs, unsig
629 memset(c->wbuf,0xff,c->wbuf_pagesize); 754 memset(c->wbuf,0xff,c->wbuf_pagesize);
630 } 755 }
631 756
632 /* Fixup the wbuf if we are moving to a new eraseblock. The checks below 757 /*
633 fail for ECC'd NOR because cleanmarker == 16, so a block starts at 758 * Sanity checks on target address. It's permitted to write
634 xxx0010. */ 759 * at PAD(c->wbuf_len+c->wbuf_ofs), and it's permitted to
635 if (jffs2_nor_ecc(c)) { 760 * write at the beginning of a new erase block. Anything else,
636 if (((c->wbuf_ofs % c->sector_size) == 0) && !c->wbuf_len) { 761 * and you die. New block starts at xxx000c (0-b = block
637 c->wbuf_ofs = PAGE_DIV(to); 762 * header)
638 c->wbuf_len = PAGE_MOD(to); 763 */
639 memset(c->wbuf,0xff,c->wbuf_pagesize);
640 }
641 }
642
643 /* Sanity checks on target address.
644 It's permitted to write at PAD(c->wbuf_len+c->wbuf_ofs),
645 and it's permitted to write at the beginning of a new
646 erase block. Anything else, and you die.
647 New block starts at xxx000c (0-b = block header)
648 */
649 if (SECTOR_ADDR(to) != SECTOR_ADDR(c->wbuf_ofs)) { 764 if (SECTOR_ADDR(to) != SECTOR_ADDR(c->wbuf_ofs)) {
650 /* It's a write to a new block */ 765 /* It's a write to a new block */
651 if (c->wbuf_len) { 766 if (c->wbuf_len) {
652 D1(printk(KERN_DEBUG "jffs2_flash_writev() to 0x%lx causes flush of wbuf at 0x%08x\n", (unsigned long)to, c->wbuf_ofs)); 767 D1(printk(KERN_DEBUG "jffs2_flash_writev() to 0x%lx "
768 "causes flush of wbuf at 0x%08x\n",
769 (unsigned long)to, c->wbuf_ofs));
653 ret = __jffs2_flush_wbuf(c, PAD_NOACCOUNT); 770 ret = __jffs2_flush_wbuf(c, PAD_NOACCOUNT);
654 if (ret) { 771 if (ret)
655 /* the underlying layer has to check wbuf_len to do the cleanup */ 772 goto outerr;
656 D1(printk(KERN_WARNING "jffs2_flush_wbuf() called from jffs2_flash_writev() failed %d\n", ret));
657 *retlen = 0;
658 goto exit;
659 }
660 } 773 }
661 /* set pointer to new block */ 774 /* set pointer to new block */
662 c->wbuf_ofs = PAGE_DIV(to); 775 c->wbuf_ofs = PAGE_DIV(to);
@@ -665,165 +778,70 @@ int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs, unsig
665 778
666 if (to != PAD(c->wbuf_ofs + c->wbuf_len)) { 779 if (to != PAD(c->wbuf_ofs + c->wbuf_len)) {
667 /* We're not writing immediately after the writebuffer. Bad. */ 780 /* We're not writing immediately after the writebuffer. Bad. */
668 printk(KERN_CRIT "jffs2_flash_writev(): Non-contiguous write to %08lx\n", (unsigned long)to); 781 printk(KERN_CRIT "jffs2_flash_writev(): Non-contiguous write "
782 "to %08lx\n", (unsigned long)to);
669 if (c->wbuf_len) 783 if (c->wbuf_len)
670 printk(KERN_CRIT "wbuf was previously %08x-%08x\n", 784 printk(KERN_CRIT "wbuf was previously %08x-%08x\n",
671 c->wbuf_ofs, c->wbuf_ofs+c->wbuf_len); 785 c->wbuf_ofs, c->wbuf_ofs+c->wbuf_len);
672 BUG(); 786 BUG();
673 } 787 }
674 788
675 /* Note outvecs[3] above. We know count is never greater than 2 */ 789 /* adjust alignment offset */
676 if (count > 2) { 790 if (c->wbuf_len != PAGE_MOD(to)) {
677 printk(KERN_CRIT "jffs2_flash_writev(): count is %ld\n", count); 791 c->wbuf_len = PAGE_MOD(to);
678 BUG(); 792 /* take care of alignment to next page */
679 } 793 if (!c->wbuf_len) {
680 794 c->wbuf_len = c->wbuf_pagesize;
681 invec = 0; 795 ret = __jffs2_flush_wbuf(c, NOPAD);
682 outvec = 0; 796 if (ret)
683 797 goto outerr;
684 /* Fill writebuffer first, if already in use */
685 if (c->wbuf_len) {
686 uint32_t invec_ofs = 0;
687
688 /* adjust alignment offset */
689 if (c->wbuf_len != PAGE_MOD(to)) {
690 c->wbuf_len = PAGE_MOD(to);
691 /* take care of alignment to next page */
692 if (!c->wbuf_len)
693 c->wbuf_len = c->wbuf_pagesize;
694 }
695
696 while(c->wbuf_len < c->wbuf_pagesize) {
697 uint32_t thislen;
698
699 if (invec == count)
700 goto alldone;
701
702 thislen = c->wbuf_pagesize - c->wbuf_len;
703
704 if (thislen >= invecs[invec].iov_len)
705 thislen = invecs[invec].iov_len;
706
707 invec_ofs = thislen;
708
709 memcpy(c->wbuf + c->wbuf_len, invecs[invec].iov_base, thislen);
710 c->wbuf_len += thislen;
711 donelen += thislen;
712 /* Get next invec, if actual did not fill the buffer */
713 if (c->wbuf_len < c->wbuf_pagesize)
714 invec++;
715 }
716
717 /* write buffer is full, flush buffer */
718 ret = __jffs2_flush_wbuf(c, NOPAD);
719 if (ret) {
720 /* the underlying layer has to check wbuf_len to do the cleanup */
721 D1(printk(KERN_WARNING "jffs2_flush_wbuf() called from jffs2_flash_writev() failed %d\n", ret));
722 /* Retlen zero to make sure our caller doesn't mark the space dirty.
723 We've already done everything that's necessary */
724 *retlen = 0;
725 goto exit;
726 }
727 outvec_to += donelen;
728 c->wbuf_ofs = outvec_to;
729
730 /* All invecs done ? */
731 if (invec == count)
732 goto alldone;
733
734 /* Set up the first outvec, containing the remainder of the
735 invec we partially used */
736 if (invecs[invec].iov_len > invec_ofs) {
737 outvecs[0].iov_base = invecs[invec].iov_base+invec_ofs;
738 totlen = outvecs[0].iov_len = invecs[invec].iov_len-invec_ofs;
739 if (totlen > c->wbuf_pagesize) {
740 splitvec = outvec;
741 split_ofs = outvecs[0].iov_len - PAGE_MOD(totlen);
742 }
743 outvec++;
744 }
745 invec++;
746 }
747
748 /* OK, now we've flushed the wbuf and the start of the bits
749 we have been asked to write, now to write the rest.... */
750
751 /* totlen holds the amount of data still to be written */
752 old_totlen = totlen;
753 for ( ; invec < count; invec++,outvec++ ) {
754 outvecs[outvec].iov_base = invecs[invec].iov_base;
755 totlen += outvecs[outvec].iov_len = invecs[invec].iov_len;
756 if (PAGE_DIV(totlen) != PAGE_DIV(old_totlen)) {
757 splitvec = outvec;
758 split_ofs = outvecs[outvec].iov_len - PAGE_MOD(totlen);
759 old_totlen = totlen;
760 } 798 }
761 } 799 }
762 800
763 /* Now the outvecs array holds all the remaining data to write */ 801 for (invec = 0; invec < count; invec++) {
764 /* Up to splitvec,split_ofs is to be written immediately. The rest 802 int vlen = invecs[invec].iov_len;
765 goes into the (now-empty) wbuf */ 803 uint8_t *v = invecs[invec].iov_base;
766
767 if (splitvec != -1) {
768 uint32_t remainder;
769
770 remainder = outvecs[splitvec].iov_len - split_ofs;
771 outvecs[splitvec].iov_len = split_ofs;
772
773 /* We did cross a page boundary, so we write some now */
774 if (jffs2_cleanmarker_oob(c))
775 ret = c->mtd->writev_ecc(c->mtd, outvecs, splitvec+1, outvec_to, &wbuf_retlen, NULL, c->oobinfo);
776 else
777 ret = jffs2_flash_direct_writev(c, outvecs, splitvec+1, outvec_to, &wbuf_retlen);
778 804
779 if (ret < 0 || wbuf_retlen != PAGE_DIV(totlen)) { 805 wbuf_retlen = jffs2_fill_wbuf(c, v, vlen);
780 /* At this point we have no problem,
781 c->wbuf is empty. However refile nextblock to avoid
782 writing again to same address.
783 */
784 struct jffs2_eraseblock *jeb;
785 806
786 spin_lock(&c->erase_completion_lock); 807 if (c->wbuf_len == c->wbuf_pagesize) {
787 808 ret = __jffs2_flush_wbuf(c, NOPAD);
788 jeb = &c->blocks[outvec_to / c->sector_size]; 809 if (ret)
789 jffs2_block_refile(c, jeb, REFILE_ANYWAY); 810 goto outerr;
790
791 *retlen = 0;
792 spin_unlock(&c->erase_completion_lock);
793 goto exit;
794 } 811 }
795 812 vlen -= wbuf_retlen;
813 outvec_to += wbuf_retlen;
796 donelen += wbuf_retlen; 814 donelen += wbuf_retlen;
797 c->wbuf_ofs = PAGE_DIV(outvec_to) + PAGE_DIV(totlen); 815 v += wbuf_retlen;
798 816
799 if (remainder) { 817 if (vlen >= c->wbuf_pagesize) {
800 outvecs[splitvec].iov_base += split_ofs; 818 ret = c->mtd->write(c->mtd, outvec_to, PAGE_DIV(vlen),
801 outvecs[splitvec].iov_len = remainder; 819 &wbuf_retlen, v);
802 } else { 820 if (ret < 0 || wbuf_retlen != PAGE_DIV(vlen))
803 splitvec++; 821 goto outfile;
822
823 vlen -= wbuf_retlen;
824 outvec_to += wbuf_retlen;
825 c->wbuf_ofs = outvec_to;
826 donelen += wbuf_retlen;
827 v += wbuf_retlen;
804 } 828 }
805 829
806 } else { 830 wbuf_retlen = jffs2_fill_wbuf(c, v, vlen);
807 splitvec = 0; 831 if (c->wbuf_len == c->wbuf_pagesize) {
808 } 832 ret = __jffs2_flush_wbuf(c, NOPAD);
809 833 if (ret)
810 /* Now splitvec points to the start of the bits we have to copy 834 goto outerr;
811 into the wbuf */ 835 }
812 wbuf_ptr = c->wbuf;
813 836
814 for ( ; splitvec < outvec; splitvec++) { 837 outvec_to += wbuf_retlen;
815 /* Don't copy the wbuf into itself */ 838 donelen += wbuf_retlen;
816 if (outvecs[splitvec].iov_base == c->wbuf)
817 continue;
818 memcpy(wbuf_ptr, outvecs[splitvec].iov_base, outvecs[splitvec].iov_len);
819 wbuf_ptr += outvecs[splitvec].iov_len;
820 donelen += outvecs[splitvec].iov_len;
821 } 839 }
822 c->wbuf_len = wbuf_ptr - c->wbuf;
823 840
824 /* If there's a remainder in the wbuf and it's a non-GC write, 841 /*
825 remember that the wbuf affects this ino */ 842 * If there's a remainder in the wbuf and it's a non-GC write,
826alldone: 843 * remember that the wbuf affects this ino
844 */
827 *retlen = donelen; 845 *retlen = donelen;
828 846
829 if (jffs2_sum_active()) { 847 if (jffs2_sum_active()) {
@@ -836,8 +854,24 @@ alldone:
836 jffs2_wbuf_dirties_inode(c, ino); 854 jffs2_wbuf_dirties_inode(c, ino);
837 855
838 ret = 0; 856 ret = 0;
857 up_write(&c->wbuf_sem);
858 return ret;
839 859
840exit: 860outfile:
861 /*
862 * At this point we have no problem, c->wbuf is empty. However
863 * refile nextblock to avoid writing again to same address.
864 */
865
866 spin_lock(&c->erase_completion_lock);
867
868 jeb = &c->blocks[outvec_to / c->sector_size];
869 jffs2_block_refile(c, jeb, REFILE_ANYWAY);
870
871 spin_unlock(&c->erase_completion_lock);
872
873outerr:
874 *retlen = 0;
841 up_write(&c->wbuf_sem); 875 up_write(&c->wbuf_sem);
842 return ret; 876 return ret;
843} 877}
@@ -846,7 +880,8 @@ exit:
846 * This is the entry for flash write. 880 * This is the entry for flash write.
847 * Check, if we work on NAND FLASH, if so build an kvec and write it via vritev 881 * Check, if we work on NAND FLASH, if so build an kvec and write it via vritev
848*/ 882*/
849int jffs2_flash_write(struct jffs2_sb_info *c, loff_t ofs, size_t len, size_t *retlen, const u_char *buf) 883int jffs2_flash_write(struct jffs2_sb_info *c, loff_t ofs, size_t len,
884 size_t *retlen, const u_char *buf)
850{ 885{
851 struct kvec vecs[1]; 886 struct kvec vecs[1];
852 887
@@ -871,25 +906,23 @@ int jffs2_flash_read(struct jffs2_sb_info *c, loff_t ofs, size_t len, size_t *re
871 906
872 /* Read flash */ 907 /* Read flash */
873 down_read(&c->wbuf_sem); 908 down_read(&c->wbuf_sem);
874 if (jffs2_cleanmarker_oob(c)) 909 ret = c->mtd->read(c->mtd, ofs, len, retlen, buf);
875 ret = c->mtd->read_ecc(c->mtd, ofs, len, retlen, buf, NULL, c->oobinfo); 910
876 else 911 if ( (ret == -EBADMSG || ret == -EUCLEAN) && (*retlen == len) ) {
877 ret = c->mtd->read(c->mtd, ofs, len, retlen, buf); 912 if (ret == -EBADMSG)
878 913 printk(KERN_WARNING "mtd->read(0x%zx bytes from 0x%llx)"
879 if ( (ret == -EBADMSG) && (*retlen == len) ) { 914 " returned ECC error\n", len, ofs);
880 printk(KERN_WARNING "mtd->read(0x%zx bytes from 0x%llx) returned ECC error\n",
881 len, ofs);
882 /* 915 /*
883 * We have the raw data without ECC correction in the buffer, maybe 916 * We have the raw data without ECC correction in the buffer,
884 * we are lucky and all data or parts are correct. We check the node. 917 * maybe we are lucky and all data or parts are correct. We
885 * If data are corrupted node check will sort it out. 918 * check the node. If data are corrupted node check will sort
886 * We keep this block, it will fail on write or erase and the we 919 * it out. We keep this block, it will fail on write or erase
887 * mark it bad. Or should we do that now? But we should give him a chance. 920 * and the we mark it bad. Or should we do that now? But we
888 * Maybe we had a system crash or power loss before the ecc write or 921 * should give him a chance. Maybe we had a system crash or
889 * a erase was completed. 922 * power loss before the ecc write or a erase was completed.
890 * So we return success. :) 923 * So we return success. :)
891 */ 924 */
892 ret = 0; 925 ret = 0;
893 } 926 }
894 927
895 /* if no writebuffer available or write buffer empty, return */ 928 /* if no writebuffer available or write buffer empty, return */
@@ -911,7 +944,7 @@ int jffs2_flash_read(struct jffs2_sb_info *c, loff_t ofs, size_t len, size_t *re
911 orbf = (c->wbuf_ofs - ofs); /* offset in read buffer */ 944 orbf = (c->wbuf_ofs - ofs); /* offset in read buffer */
912 if (orbf > len) /* is write beyond write buffer ? */ 945 if (orbf > len) /* is write beyond write buffer ? */
913 goto exit; 946 goto exit;
914 lwbf = len - orbf; /* number of bytes to copy */ 947 lwbf = len - orbf; /* number of bytes to copy */
915 if (lwbf > c->wbuf_len) 948 if (lwbf > c->wbuf_len)
916 lwbf = c->wbuf_len; 949 lwbf = c->wbuf_len;
917 } 950 }
@@ -923,158 +956,159 @@ exit:
923 return ret; 956 return ret;
924} 957}
925 958
959#define NR_OOB_SCAN_PAGES 4
960
926/* 961/*
927 * Check, if the out of band area is empty 962 * Check, if the out of band area is empty
928 */ 963 */
929int jffs2_check_oob_empty( struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, int mode) 964int jffs2_check_oob_empty(struct jffs2_sb_info *c,
965 struct jffs2_eraseblock *jeb, int mode)
930{ 966{
931 unsigned char *buf; 967 int i, page, ret;
932 int ret = 0; 968 int oobsize = c->mtd->oobsize;
933 int i,len,page; 969 struct mtd_oob_ops ops;
934 size_t retlen; 970
935 int oob_size; 971 ops.len = NR_OOB_SCAN_PAGES * oobsize;
936 972 ops.ooblen = oobsize;
937 /* allocate a buffer for all oob data in this sector */ 973 ops.oobbuf = c->oobbuf;
938 oob_size = c->mtd->oobsize; 974 ops.ooboffs = 0;
939 len = 4 * oob_size; 975 ops.datbuf = NULL;
940 buf = kmalloc(len, GFP_KERNEL); 976 ops.mode = MTD_OOB_PLACE;
941 if (!buf) { 977
942 printk(KERN_NOTICE "jffs2_check_oob_empty(): allocation of temporary data buffer for oob check failed\n"); 978 ret = c->mtd->read_oob(c->mtd, jeb->offset, &ops);
943 return -ENOMEM;
944 }
945 /*
946 * if mode = 0, we scan for a total empty oob area, else we have
947 * to take care of the cleanmarker in the first page of the block
948 */
949 ret = jffs2_flash_read_oob(c, jeb->offset, len , &retlen, buf);
950 if (ret) { 979 if (ret) {
951 D1(printk(KERN_WARNING "jffs2_check_oob_empty(): Read OOB failed %d for block at %08x\n", ret, jeb->offset)); 980 D1(printk(KERN_WARNING "jffs2_check_oob_empty(): Read OOB "
952 goto out; 981 "failed %d for block at %08x\n", ret, jeb->offset));
982 return ret;
953 } 983 }
954 984
955 if (retlen < len) { 985 if (ops.retlen < ops.len) {
956 D1(printk(KERN_WARNING "jffs2_check_oob_empty(): Read OOB return short read " 986 D1(printk(KERN_WARNING "jffs2_check_oob_empty(): Read OOB "
957 "(%zd bytes not %d) for block at %08x\n", retlen, len, jeb->offset)); 987 "returned short read (%zd bytes not %d) for block "
958 ret = -EIO; 988 "at %08x\n", ops.retlen, ops.len, jeb->offset));
959 goto out; 989 return -EIO;
960 } 990 }
961 991
962 /* Special check for first page */ 992 /* Special check for first page */
963 for(i = 0; i < oob_size ; i++) { 993 for(i = 0; i < oobsize ; i++) {
964 /* Yeah, we know about the cleanmarker. */ 994 /* Yeah, we know about the cleanmarker. */
965 if (mode && i >= c->fsdata_pos && 995 if (mode && i >= c->fsdata_pos &&
966 i < c->fsdata_pos + c->fsdata_len) 996 i < c->fsdata_pos + c->fsdata_len)
967 continue; 997 continue;
968 998
969 if (buf[i] != 0xFF) { 999 if (ops.oobbuf[i] != 0xFF) {
970 D2(printk(KERN_DEBUG "Found %02x at %x in OOB for %08x\n", 1000 D2(printk(KERN_DEBUG "Found %02x at %x in OOB for "
971 buf[i], i, jeb->offset)); 1001 "%08x\n", ops.oobbuf[i], i, jeb->offset));
972 ret = 1; 1002 return 1;
973 goto out;
974 } 1003 }
975 } 1004 }
976 1005
977 /* we know, we are aligned :) */ 1006 /* we know, we are aligned :) */
978 for (page = oob_size; page < len; page += sizeof(long)) { 1007 for (page = oobsize; page < ops.len; page += sizeof(long)) {
979 unsigned long dat = *(unsigned long *)(&buf[page]); 1008 long dat = *(long *)(&ops.oobbuf[page]);
980 if(dat != -1) { 1009 if(dat != -1)
981 ret = 1; 1010 return 1;
982 goto out;
983 }
984 } 1011 }
985 1012 return 0;
986out:
987 kfree(buf);
988
989 return ret;
990} 1013}
991 1014
992/* 1015/*
993* Scan for a valid cleanmarker and for bad blocks 1016 * Scan for a valid cleanmarker and for bad blocks
994* For virtual blocks (concatenated physical blocks) check the cleanmarker 1017 */
995* only in the first page of the first physical block, but scan for bad blocks in all 1018int jffs2_check_nand_cleanmarker (struct jffs2_sb_info *c,
996* physical blocks 1019 struct jffs2_eraseblock *jeb)
997*/
998int jffs2_check_nand_cleanmarker (struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
999{ 1020{
1000 struct jffs2_unknown_node n; 1021 struct jffs2_unknown_node n;
1001 unsigned char buf[2 * NAND_MAX_OOBSIZE]; 1022 struct mtd_oob_ops ops;
1002 unsigned char *p; 1023 int oobsize = c->mtd->oobsize;
1003 int ret, i, cnt, retval = 0; 1024 unsigned char *p,*b;
1004 size_t retlen, offset; 1025 int i, ret;
1005 int oob_size; 1026 size_t offset = jeb->offset;
1006 1027
1007 offset = jeb->offset; 1028 /* Check first if the block is bad. */
1008 oob_size = c->mtd->oobsize; 1029 if (c->mtd->block_isbad(c->mtd, offset)) {
1009 1030 D1 (printk(KERN_WARNING "jffs2_check_nand_cleanmarker()"
1010 /* Loop through the physical blocks */ 1031 ": Bad block at %08x\n", jeb->offset));
1011 for (cnt = 0; cnt < (c->sector_size / c->mtd->erasesize); cnt++) { 1032 return 2;
1012 /* Check first if the block is bad. */ 1033 }
1013 if (c->mtd->block_isbad (c->mtd, offset)) {
1014 D1 (printk (KERN_WARNING "jffs2_check_nand_cleanmarker(): Bad block at %08x\n", jeb->offset));
1015 return 2;
1016 }
1017 /*
1018 * We read oob data from page 0 and 1 of the block.
1019 * page 0 contains cleanmarker and badblock info
1020 * page 1 contains failure count of this block
1021 */
1022 ret = c->mtd->read_oob (c->mtd, offset, oob_size << 1, &retlen, buf);
1023 1034
1024 if (ret) { 1035 ops.len = oobsize;
1025 D1 (printk (KERN_WARNING "jffs2_check_nand_cleanmarker(): Read OOB failed %d for block at %08x\n", ret, jeb->offset)); 1036 ops.ooblen = oobsize;
1026 return ret; 1037 ops.oobbuf = c->oobbuf;
1027 } 1038 ops.ooboffs = 0;
1028 if (retlen < (oob_size << 1)) { 1039 ops.datbuf = NULL;
1029 D1 (printk (KERN_WARNING "jffs2_check_nand_cleanmarker(): Read OOB return short read (%zd bytes not %d) for block at %08x\n", retlen, oob_size << 1, jeb->offset)); 1040 ops.mode = MTD_OOB_PLACE;
1030 return -EIO;
1031 }
1032 1041
1033 /* Check cleanmarker only on the first physical block */ 1042 ret = c->mtd->read_oob(c->mtd, offset, &ops);
1034 if (!cnt) { 1043 if (ret) {
1035 n.magic = cpu_to_je16 (JFFS2_MAGIC_BITMASK); 1044 D1 (printk(KERN_WARNING "jffs2_check_nand_cleanmarker(): "
1036 n.nodetype = cpu_to_je16 (JFFS2_NODETYPE_CLEANMARKER); 1045 "Read OOB failed %d for block at %08x\n",
1037 n.totlen = cpu_to_je32 (8); 1046 ret, jeb->offset));
1038 p = (unsigned char *) &n; 1047 return ret;
1048 }
1039 1049
1040 for (i = 0; i < c->fsdata_len; i++) { 1050 if (ops.retlen < ops.len) {
1041 if (buf[c->fsdata_pos + i] != p[i]) { 1051 D1 (printk (KERN_WARNING "jffs2_check_nand_cleanmarker(): "
1042 retval = 1; 1052 "Read OOB return short read (%zd bytes not %d) "
1043 } 1053 "for block at %08x\n", ops.retlen, ops.len,
1044 } 1054 jeb->offset));
1045 D1(if (retval == 1) { 1055 return -EIO;
1046 printk(KERN_WARNING "jffs2_check_nand_cleanmarker(): Cleanmarker node not detected in block at %08x\n", jeb->offset);
1047 printk(KERN_WARNING "OOB at %08x was ", offset);
1048 for (i=0; i < oob_size; i++) {
1049 printk("%02x ", buf[i]);
1050 }
1051 printk("\n");
1052 })
1053 }
1054 offset += c->mtd->erasesize;
1055 } 1056 }
1056 return retval; 1057
1058 n.magic = cpu_to_je16 (JFFS2_MAGIC_BITMASK);
1059 n.nodetype = cpu_to_je16 (JFFS2_NODETYPE_CLEANMARKER);
1060 n.totlen = cpu_to_je32 (8);
1061 p = (unsigned char *) &n;
1062 b = c->oobbuf + c->fsdata_pos;
1063
1064 for (i = c->fsdata_len; i; i--) {
1065 if (*b++ != *p++)
1066 ret = 1;
1067 }
1068
1069 D1(if (ret == 1) {
1070 printk(KERN_WARNING "jffs2_check_nand_cleanmarker(): "
1071 "Cleanmarker node not detected in block at %08x\n",
1072 offset);
1073 printk(KERN_WARNING "OOB at %08zx was ", offset);
1074 for (i=0; i < oobsize; i++)
1075 printk("%02x ", c->oobbuf[i]);
1076 printk("\n");
1077 });
1078 return ret;
1057} 1079}
1058 1080
1059int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) 1081int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c,
1082 struct jffs2_eraseblock *jeb)
1060{ 1083{
1061 struct jffs2_unknown_node n; 1084 struct jffs2_unknown_node n;
1062 int ret; 1085 int ret;
1063 size_t retlen; 1086 struct mtd_oob_ops ops;
1064 1087
1065 n.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 1088 n.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1066 n.nodetype = cpu_to_je16(JFFS2_NODETYPE_CLEANMARKER); 1089 n.nodetype = cpu_to_je16(JFFS2_NODETYPE_CLEANMARKER);
1067 n.totlen = cpu_to_je32(8); 1090 n.totlen = cpu_to_je32(8);
1068 1091
1069 ret = jffs2_flash_write_oob(c, jeb->offset + c->fsdata_pos, c->fsdata_len, &retlen, (unsigned char *)&n); 1092 ops.len = c->fsdata_len;
1093 ops.ooblen = c->fsdata_len;;
1094 ops.oobbuf = (uint8_t *)&n;
1095 ops.ooboffs = c->fsdata_pos;
1096 ops.datbuf = NULL;
1097 ops.mode = MTD_OOB_PLACE;
1098
1099 ret = c->mtd->write_oob(c->mtd, jeb->offset, &ops);
1070 1100
1071 if (ret) { 1101 if (ret) {
1072 D1(printk(KERN_WARNING "jffs2_write_nand_cleanmarker(): Write failed for block at %08x: error %d\n", jeb->offset, ret)); 1102 D1(printk(KERN_WARNING "jffs2_write_nand_cleanmarker(): "
1103 "Write failed for block at %08x: error %d\n",
1104 jeb->offset, ret));
1073 return ret; 1105 return ret;
1074 } 1106 }
1075 if (retlen != c->fsdata_len) { 1107 if (ops.retlen != ops.len) {
1076 D1(printk(KERN_WARNING "jffs2_write_nand_cleanmarker(): Short write for block at %08x: %zd not %d\n", jeb->offset, retlen, c->fsdata_len)); 1108 D1(printk(KERN_WARNING "jffs2_write_nand_cleanmarker(): "
1077 return ret; 1109 "Short write for block at %08x: %zd not %d\n",
1110 jeb->offset, ops.retlen, ops.len));
1111 return -EIO;
1078 } 1112 }
1079 return 0; 1113 return 0;
1080} 1114}
@@ -1108,18 +1142,9 @@ int jffs2_write_nand_badblock(struct jffs2_sb_info *c, struct jffs2_eraseblock *
1108 return 1; 1142 return 1;
1109} 1143}
1110 1144
1111#define NAND_JFFS2_OOB16_FSDALEN 8
1112
1113static struct nand_oobinfo jffs2_oobinfo_docecc = {
1114 .useecc = MTD_NANDECC_PLACE,
1115 .eccbytes = 6,
1116 .eccpos = {0,1,2,3,4,5}
1117};
1118
1119
1120static int jffs2_nand_set_oobinfo(struct jffs2_sb_info *c) 1145static int jffs2_nand_set_oobinfo(struct jffs2_sb_info *c)
1121{ 1146{
1122 struct nand_oobinfo *oinfo = &c->mtd->oobinfo; 1147 struct nand_ecclayout *oinfo = c->mtd->ecclayout;
1123 1148
1124 /* Do this only, if we have an oob buffer */ 1149 /* Do this only, if we have an oob buffer */
1125 if (!c->mtd->oobsize) 1150 if (!c->mtd->oobsize)
@@ -1129,33 +1154,23 @@ static int jffs2_nand_set_oobinfo(struct jffs2_sb_info *c)
1129 c->cleanmarker_size = 0; 1154 c->cleanmarker_size = 0;
1130 1155
1131 /* Should we use autoplacement ? */ 1156 /* Should we use autoplacement ? */
1132 if (oinfo && oinfo->useecc == MTD_NANDECC_AUTOPLACE) { 1157 if (!oinfo) {
1133 D1(printk(KERN_DEBUG "JFFS2 using autoplace on NAND\n")); 1158 D1(printk(KERN_DEBUG "JFFS2 on NAND. No autoplacment info found\n"));
1134 /* Get the position of the free bytes */ 1159 return -EINVAL;
1135 if (!oinfo->oobfree[0][1]) { 1160 }
1136 printk (KERN_WARNING "jffs2_nand_set_oobinfo(): Eeep. Autoplacement selected and no empty space in oob\n");
1137 return -ENOSPC;
1138 }
1139 c->fsdata_pos = oinfo->oobfree[0][0];
1140 c->fsdata_len = oinfo->oobfree[0][1];
1141 if (c->fsdata_len > 8)
1142 c->fsdata_len = 8;
1143 } else {
1144 /* This is just a legacy fallback and should go away soon */
1145 switch(c->mtd->ecctype) {
1146 case MTD_ECC_RS_DiskOnChip:
1147 printk(KERN_WARNING "JFFS2 using DiskOnChip hardware ECC without autoplacement. Fix it!\n");
1148 c->oobinfo = &jffs2_oobinfo_docecc;
1149 c->fsdata_pos = 6;
1150 c->fsdata_len = NAND_JFFS2_OOB16_FSDALEN;
1151 c->badblock_pos = 15;
1152 break;
1153 1161
1154 default: 1162 D1(printk(KERN_DEBUG "JFFS2 using autoplace on NAND\n"));
1155 D1(printk(KERN_DEBUG "JFFS2 on NAND. No autoplacment info found\n")); 1163 /* Get the position of the free bytes */
1156 return -EINVAL; 1164 if (!oinfo->oobfree[0].length) {
1157 } 1165 printk (KERN_WARNING "jffs2_nand_set_oobinfo(): Eeep."
1166 " Autoplacement selected and no empty space in oob\n");
1167 return -ENOSPC;
1158 } 1168 }
1169 c->fsdata_pos = oinfo->oobfree[0].offset;
1170 c->fsdata_len = oinfo->oobfree[0].length;
1171 if (c->fsdata_len > 8)
1172 c->fsdata_len = 8;
1173
1159 return 0; 1174 return 0;
1160} 1175}
1161 1176
@@ -1165,13 +1180,17 @@ int jffs2_nand_flash_setup(struct jffs2_sb_info *c)
1165 1180
1166 /* Initialise write buffer */ 1181 /* Initialise write buffer */
1167 init_rwsem(&c->wbuf_sem); 1182 init_rwsem(&c->wbuf_sem);
1168 c->wbuf_pagesize = c->mtd->oobblock; 1183 c->wbuf_pagesize = c->mtd->writesize;
1169 c->wbuf_ofs = 0xFFFFFFFF; 1184 c->wbuf_ofs = 0xFFFFFFFF;
1170 1185
1171 c->wbuf = kmalloc(c->wbuf_pagesize, GFP_KERNEL); 1186 c->wbuf = kmalloc(c->wbuf_pagesize, GFP_KERNEL);
1172 if (!c->wbuf) 1187 if (!c->wbuf)
1173 return -ENOMEM; 1188 return -ENOMEM;
1174 1189
1190 c->oobbuf = kmalloc(NR_OOB_SCAN_PAGES * c->mtd->oobsize, GFP_KERNEL);
1191 if (!c->oobbuf)
1192 return -ENOMEM;
1193
1175 res = jffs2_nand_set_oobinfo(c); 1194 res = jffs2_nand_set_oobinfo(c);
1176 1195
1177#ifdef BREAKME 1196#ifdef BREAKME
@@ -1189,6 +1208,7 @@ int jffs2_nand_flash_setup(struct jffs2_sb_info *c)
1189void jffs2_nand_flash_cleanup(struct jffs2_sb_info *c) 1208void jffs2_nand_flash_cleanup(struct jffs2_sb_info *c)
1190{ 1209{
1191 kfree(c->wbuf); 1210 kfree(c->wbuf);
1211 kfree(c->oobbuf);
1192} 1212}
1193 1213
1194int jffs2_dataflash_setup(struct jffs2_sb_info *c) { 1214int jffs2_dataflash_setup(struct jffs2_sb_info *c) {
@@ -1236,33 +1256,14 @@ void jffs2_dataflash_cleanup(struct jffs2_sb_info *c) {
1236 kfree(c->wbuf); 1256 kfree(c->wbuf);
1237} 1257}
1238 1258
1239int jffs2_nor_ecc_flash_setup(struct jffs2_sb_info *c) {
1240 /* Cleanmarker is actually larger on the flashes */
1241 c->cleanmarker_size = 16;
1242
1243 /* Initialize write buffer */
1244 init_rwsem(&c->wbuf_sem);
1245 c->wbuf_pagesize = c->mtd->eccsize;
1246 c->wbuf_ofs = 0xFFFFFFFF;
1247
1248 c->wbuf = kmalloc(c->wbuf_pagesize, GFP_KERNEL);
1249 if (!c->wbuf)
1250 return -ENOMEM;
1251
1252 return 0;
1253}
1254
1255void jffs2_nor_ecc_flash_cleanup(struct jffs2_sb_info *c) {
1256 kfree(c->wbuf);
1257}
1258
1259int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) { 1259int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) {
1260 /* Cleanmarker currently occupies a whole programming region */ 1260 /* Cleanmarker currently occupies whole programming regions,
1261 c->cleanmarker_size = MTD_PROGREGION_SIZE(c->mtd); 1261 * either one or 2 for 8Byte STMicro flashes. */
1262 c->cleanmarker_size = max(16u, c->mtd->writesize);
1262 1263
1263 /* Initialize write buffer */ 1264 /* Initialize write buffer */
1264 init_rwsem(&c->wbuf_sem); 1265 init_rwsem(&c->wbuf_sem);
1265 c->wbuf_pagesize = MTD_PROGREGION_SIZE(c->mtd); 1266 c->wbuf_pagesize = c->mtd->writesize;
1266 c->wbuf_ofs = 0xFFFFFFFF; 1267 c->wbuf_ofs = 0xFFFFFFFF;
1267 1268
1268 c->wbuf = kmalloc(c->wbuf_pagesize, GFP_KERNEL); 1269 c->wbuf = kmalloc(c->wbuf_pagesize, GFP_KERNEL);
diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c
index 1342f0158e9b..67176792e138 100644
--- a/fs/jffs2/write.c
+++ b/fs/jffs2/write.c
@@ -37,7 +37,6 @@ int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint
37 f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache; 37 f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache;
38 f->inocache->state = INO_STATE_PRESENT; 38 f->inocache->state = INO_STATE_PRESENT;
39 39
40
41 jffs2_add_ino_cache(c, f->inocache); 40 jffs2_add_ino_cache(c, f->inocache);
42 D1(printk(KERN_DEBUG "jffs2_do_new_inode(): Assigned ino# %d\n", f->inocache->ino)); 41 D1(printk(KERN_DEBUG "jffs2_do_new_inode(): Assigned ino# %d\n", f->inocache->ino));
43 ri->ino = cpu_to_je32(f->inocache->ino); 42 ri->ino = cpu_to_je32(f->inocache->ino);
@@ -57,12 +56,14 @@ int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint
57/* jffs2_write_dnode - given a raw_inode, allocate a full_dnode for it, 56/* jffs2_write_dnode - given a raw_inode, allocate a full_dnode for it,
58 write it to the flash, link it into the existing inode/fragment list */ 57 write it to the flash, link it into the existing inode/fragment list */
59 58
60struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_raw_inode *ri, const unsigned char *data, uint32_t datalen, uint32_t flash_ofs, int alloc_mode) 59struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
60 struct jffs2_raw_inode *ri, const unsigned char *data,
61 uint32_t datalen, int alloc_mode)
61 62
62{ 63{
63 struct jffs2_raw_node_ref *raw;
64 struct jffs2_full_dnode *fn; 64 struct jffs2_full_dnode *fn;
65 size_t retlen; 65 size_t retlen;
66 uint32_t flash_ofs;
66 struct kvec vecs[2]; 67 struct kvec vecs[2];
67 int ret; 68 int ret;
68 int retried = 0; 69 int retried = 0;
@@ -78,34 +79,21 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2
78 vecs[1].iov_base = (unsigned char *)data; 79 vecs[1].iov_base = (unsigned char *)data;
79 vecs[1].iov_len = datalen; 80 vecs[1].iov_len = datalen;
80 81
81 jffs2_dbg_prewrite_paranoia_check(c, flash_ofs, vecs[0].iov_len + vecs[1].iov_len);
82
83 if (je32_to_cpu(ri->totlen) != sizeof(*ri) + datalen) { 82 if (je32_to_cpu(ri->totlen) != sizeof(*ri) + datalen) {
84 printk(KERN_WARNING "jffs2_write_dnode: ri->totlen (0x%08x) != sizeof(*ri) (0x%08zx) + datalen (0x%08x)\n", je32_to_cpu(ri->totlen), sizeof(*ri), datalen); 83 printk(KERN_WARNING "jffs2_write_dnode: ri->totlen (0x%08x) != sizeof(*ri) (0x%08zx) + datalen (0x%08x)\n", je32_to_cpu(ri->totlen), sizeof(*ri), datalen);
85 } 84 }
86 raw = jffs2_alloc_raw_node_ref();
87 if (!raw)
88 return ERR_PTR(-ENOMEM);
89 85
90 fn = jffs2_alloc_full_dnode(); 86 fn = jffs2_alloc_full_dnode();
91 if (!fn) { 87 if (!fn)
92 jffs2_free_raw_node_ref(raw);
93 return ERR_PTR(-ENOMEM); 88 return ERR_PTR(-ENOMEM);
94 }
95
96 fn->ofs = je32_to_cpu(ri->offset);
97 fn->size = je32_to_cpu(ri->dsize);
98 fn->frags = 0;
99 89
100 /* check number of valid vecs */ 90 /* check number of valid vecs */
101 if (!datalen || !data) 91 if (!datalen || !data)
102 cnt = 1; 92 cnt = 1;
103 retry: 93 retry:
104 fn->raw = raw; 94 flash_ofs = write_ofs(c);
105 95
106 raw->flash_offset = flash_ofs; 96 jffs2_dbg_prewrite_paranoia_check(c, flash_ofs, vecs[0].iov_len + vecs[1].iov_len);
107 raw->__totlen = PAD(sizeof(*ri)+datalen);
108 raw->next_phys = NULL;
109 97
110 if ((alloc_mode!=ALLOC_GC) && (je32_to_cpu(ri->version) < f->highest_version)) { 98 if ((alloc_mode!=ALLOC_GC) && (je32_to_cpu(ri->version) < f->highest_version)) {
111 BUG_ON(!retried); 99 BUG_ON(!retried);
@@ -125,22 +113,16 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2
125 113
126 /* Mark the space as dirtied */ 114 /* Mark the space as dirtied */
127 if (retlen) { 115 if (retlen) {
128 /* Doesn't belong to any inode */
129 raw->next_in_ino = NULL;
130
131 /* Don't change raw->size to match retlen. We may have 116 /* Don't change raw->size to match retlen. We may have
132 written the node header already, and only the data will 117 written the node header already, and only the data will
133 seem corrupted, in which case the scan would skip over 118 seem corrupted, in which case the scan would skip over
134 any node we write before the original intended end of 119 any node we write before the original intended end of
135 this node */ 120 this node */
136 raw->flash_offset |= REF_OBSOLETE; 121 jffs2_add_physical_node_ref(c, flash_ofs | REF_OBSOLETE, PAD(sizeof(*ri)+datalen), NULL);
137 jffs2_add_physical_node_ref(c, raw);
138 jffs2_mark_node_obsolete(c, raw);
139 } else { 122 } else {
140 printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", raw->flash_offset); 123 printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", flash_ofs);
141 jffs2_free_raw_node_ref(raw);
142 } 124 }
143 if (!retried && alloc_mode != ALLOC_NORETRY && (raw = jffs2_alloc_raw_node_ref())) { 125 if (!retried && alloc_mode != ALLOC_NORETRY) {
144 /* Try to reallocate space and retry */ 126 /* Try to reallocate space and retry */
145 uint32_t dummy; 127 uint32_t dummy;
146 struct jffs2_eraseblock *jeb = &c->blocks[flash_ofs / c->sector_size]; 128 struct jffs2_eraseblock *jeb = &c->blocks[flash_ofs / c->sector_size];
@@ -153,19 +135,20 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2
153 jffs2_dbg_acct_paranoia_check(c, jeb); 135 jffs2_dbg_acct_paranoia_check(c, jeb);
154 136
155 if (alloc_mode == ALLOC_GC) { 137 if (alloc_mode == ALLOC_GC) {
156 ret = jffs2_reserve_space_gc(c, sizeof(*ri) + datalen, &flash_ofs, 138 ret = jffs2_reserve_space_gc(c, sizeof(*ri) + datalen, &dummy,
157 &dummy, JFFS2_SUMMARY_INODE_SIZE); 139 JFFS2_SUMMARY_INODE_SIZE);
158 } else { 140 } else {
159 /* Locking pain */ 141 /* Locking pain */
160 up(&f->sem); 142 up(&f->sem);
161 jffs2_complete_reservation(c); 143 jffs2_complete_reservation(c);
162 144
163 ret = jffs2_reserve_space(c, sizeof(*ri) + datalen, &flash_ofs, 145 ret = jffs2_reserve_space(c, sizeof(*ri) + datalen, &dummy,
164 &dummy, alloc_mode, JFFS2_SUMMARY_INODE_SIZE); 146 alloc_mode, JFFS2_SUMMARY_INODE_SIZE);
165 down(&f->sem); 147 down(&f->sem);
166 } 148 }
167 149
168 if (!ret) { 150 if (!ret) {
151 flash_ofs = write_ofs(c);
169 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", flash_ofs)); 152 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", flash_ofs));
170 153
171 jffs2_dbg_acct_sanity_check(c,jeb); 154 jffs2_dbg_acct_sanity_check(c,jeb);
@@ -174,7 +157,6 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2
174 goto retry; 157 goto retry;
175 } 158 }
176 D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret)); 159 D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
177 jffs2_free_raw_node_ref(raw);
178 } 160 }
179 /* Release the full_dnode which is now useless, and return */ 161 /* Release the full_dnode which is now useless, and return */
180 jffs2_free_full_dnode(fn); 162 jffs2_free_full_dnode(fn);
@@ -188,20 +170,17 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2
188 if ((je32_to_cpu(ri->dsize) >= PAGE_CACHE_SIZE) || 170 if ((je32_to_cpu(ri->dsize) >= PAGE_CACHE_SIZE) ||
189 ( ((je32_to_cpu(ri->offset)&(PAGE_CACHE_SIZE-1))==0) && 171 ( ((je32_to_cpu(ri->offset)&(PAGE_CACHE_SIZE-1))==0) &&
190 (je32_to_cpu(ri->dsize)+je32_to_cpu(ri->offset) == je32_to_cpu(ri->isize)))) { 172 (je32_to_cpu(ri->dsize)+je32_to_cpu(ri->offset) == je32_to_cpu(ri->isize)))) {
191 raw->flash_offset |= REF_PRISTINE; 173 flash_ofs |= REF_PRISTINE;
192 } else { 174 } else {
193 raw->flash_offset |= REF_NORMAL; 175 flash_ofs |= REF_NORMAL;
194 } 176 }
195 jffs2_add_physical_node_ref(c, raw); 177 fn->raw = jffs2_add_physical_node_ref(c, flash_ofs, PAD(sizeof(*ri)+datalen), f->inocache);
196 178 fn->ofs = je32_to_cpu(ri->offset);
197 /* Link into per-inode list */ 179 fn->size = je32_to_cpu(ri->dsize);
198 spin_lock(&c->erase_completion_lock); 180 fn->frags = 0;
199 raw->next_in_ino = f->inocache->nodes;
200 f->inocache->nodes = raw;
201 spin_unlock(&c->erase_completion_lock);
202 181
203 D1(printk(KERN_DEBUG "jffs2_write_dnode wrote node at 0x%08x(%d) with dsize 0x%x, csize 0x%x, node_crc 0x%08x, data_crc 0x%08x, totlen 0x%08x\n", 182 D1(printk(KERN_DEBUG "jffs2_write_dnode wrote node at 0x%08x(%d) with dsize 0x%x, csize 0x%x, node_crc 0x%08x, data_crc 0x%08x, totlen 0x%08x\n",
204 flash_ofs, ref_flags(raw), je32_to_cpu(ri->dsize), 183 flash_ofs & ~3, flash_ofs & 3, je32_to_cpu(ri->dsize),
205 je32_to_cpu(ri->csize), je32_to_cpu(ri->node_crc), 184 je32_to_cpu(ri->csize), je32_to_cpu(ri->node_crc),
206 je32_to_cpu(ri->data_crc), je32_to_cpu(ri->totlen))); 185 je32_to_cpu(ri->data_crc), je32_to_cpu(ri->totlen)));
207 186
@@ -212,12 +191,14 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2
212 return fn; 191 return fn;
213} 192}
214 193
215struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_raw_dirent *rd, const unsigned char *name, uint32_t namelen, uint32_t flash_ofs, int alloc_mode) 194struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
195 struct jffs2_raw_dirent *rd, const unsigned char *name,
196 uint32_t namelen, int alloc_mode)
216{ 197{
217 struct jffs2_raw_node_ref *raw;
218 struct jffs2_full_dirent *fd; 198 struct jffs2_full_dirent *fd;
219 size_t retlen; 199 size_t retlen;
220 struct kvec vecs[2]; 200 struct kvec vecs[2];
201 uint32_t flash_ofs;
221 int retried = 0; 202 int retried = 0;
222 int ret; 203 int ret;
223 204
@@ -228,26 +209,16 @@ struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jff
228 D1(if(je32_to_cpu(rd->hdr_crc) != crc32(0, rd, sizeof(struct jffs2_unknown_node)-4)) { 209 D1(if(je32_to_cpu(rd->hdr_crc) != crc32(0, rd, sizeof(struct jffs2_unknown_node)-4)) {
229 printk(KERN_CRIT "Eep. CRC not correct in jffs2_write_dirent()\n"); 210 printk(KERN_CRIT "Eep. CRC not correct in jffs2_write_dirent()\n");
230 BUG(); 211 BUG();
231 } 212 });
232 );
233 213
234 vecs[0].iov_base = rd; 214 vecs[0].iov_base = rd;
235 vecs[0].iov_len = sizeof(*rd); 215 vecs[0].iov_len = sizeof(*rd);
236 vecs[1].iov_base = (unsigned char *)name; 216 vecs[1].iov_base = (unsigned char *)name;
237 vecs[1].iov_len = namelen; 217 vecs[1].iov_len = namelen;
238 218
239 jffs2_dbg_prewrite_paranoia_check(c, flash_ofs, vecs[0].iov_len + vecs[1].iov_len);
240
241 raw = jffs2_alloc_raw_node_ref();
242
243 if (!raw)
244 return ERR_PTR(-ENOMEM);
245
246 fd = jffs2_alloc_full_dirent(namelen+1); 219 fd = jffs2_alloc_full_dirent(namelen+1);
247 if (!fd) { 220 if (!fd)
248 jffs2_free_raw_node_ref(raw);
249 return ERR_PTR(-ENOMEM); 221 return ERR_PTR(-ENOMEM);
250 }
251 222
252 fd->version = je32_to_cpu(rd->version); 223 fd->version = je32_to_cpu(rd->version);
253 fd->ino = je32_to_cpu(rd->ino); 224 fd->ino = je32_to_cpu(rd->ino);
@@ -257,11 +228,9 @@ struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jff
257 fd->name[namelen]=0; 228 fd->name[namelen]=0;
258 229
259 retry: 230 retry:
260 fd->raw = raw; 231 flash_ofs = write_ofs(c);
261 232
262 raw->flash_offset = flash_ofs; 233 jffs2_dbg_prewrite_paranoia_check(c, flash_ofs, vecs[0].iov_len + vecs[1].iov_len);
263 raw->__totlen = PAD(sizeof(*rd)+namelen);
264 raw->next_phys = NULL;
265 234
266 if ((alloc_mode!=ALLOC_GC) && (je32_to_cpu(rd->version) < f->highest_version)) { 235 if ((alloc_mode!=ALLOC_GC) && (je32_to_cpu(rd->version) < f->highest_version)) {
267 BUG_ON(!retried); 236 BUG_ON(!retried);
@@ -280,15 +249,11 @@ struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jff
280 sizeof(*rd)+namelen, flash_ofs, ret, retlen); 249 sizeof(*rd)+namelen, flash_ofs, ret, retlen);
281 /* Mark the space as dirtied */ 250 /* Mark the space as dirtied */
282 if (retlen) { 251 if (retlen) {
283 raw->next_in_ino = NULL; 252 jffs2_add_physical_node_ref(c, flash_ofs | REF_OBSOLETE, PAD(sizeof(*rd)+namelen), NULL);
284 raw->flash_offset |= REF_OBSOLETE;
285 jffs2_add_physical_node_ref(c, raw);
286 jffs2_mark_node_obsolete(c, raw);
287 } else { 253 } else {
288 printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", raw->flash_offset); 254 printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", flash_ofs);
289 jffs2_free_raw_node_ref(raw);
290 } 255 }
291 if (!retried && (raw = jffs2_alloc_raw_node_ref())) { 256 if (!retried) {
292 /* Try to reallocate space and retry */ 257 /* Try to reallocate space and retry */
293 uint32_t dummy; 258 uint32_t dummy;
294 struct jffs2_eraseblock *jeb = &c->blocks[flash_ofs / c->sector_size]; 259 struct jffs2_eraseblock *jeb = &c->blocks[flash_ofs / c->sector_size];
@@ -301,39 +266,33 @@ struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jff
301 jffs2_dbg_acct_paranoia_check(c, jeb); 266 jffs2_dbg_acct_paranoia_check(c, jeb);
302 267
303 if (alloc_mode == ALLOC_GC) { 268 if (alloc_mode == ALLOC_GC) {
304 ret = jffs2_reserve_space_gc(c, sizeof(*rd) + namelen, &flash_ofs, 269 ret = jffs2_reserve_space_gc(c, sizeof(*rd) + namelen, &dummy,
305 &dummy, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); 270 JFFS2_SUMMARY_DIRENT_SIZE(namelen));
306 } else { 271 } else {
307 /* Locking pain */ 272 /* Locking pain */
308 up(&f->sem); 273 up(&f->sem);
309 jffs2_complete_reservation(c); 274 jffs2_complete_reservation(c);
310 275
311 ret = jffs2_reserve_space(c, sizeof(*rd) + namelen, &flash_ofs, 276 ret = jffs2_reserve_space(c, sizeof(*rd) + namelen, &dummy,
312 &dummy, alloc_mode, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); 277 alloc_mode, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
313 down(&f->sem); 278 down(&f->sem);
314 } 279 }
315 280
316 if (!ret) { 281 if (!ret) {
282 flash_ofs = write_ofs(c);
317 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", flash_ofs)); 283 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", flash_ofs));
318 jffs2_dbg_acct_sanity_check(c,jeb); 284 jffs2_dbg_acct_sanity_check(c,jeb);
319 jffs2_dbg_acct_paranoia_check(c, jeb); 285 jffs2_dbg_acct_paranoia_check(c, jeb);
320 goto retry; 286 goto retry;
321 } 287 }
322 D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret)); 288 D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
323 jffs2_free_raw_node_ref(raw);
324 } 289 }
325 /* Release the full_dnode which is now useless, and return */ 290 /* Release the full_dnode which is now useless, and return */
326 jffs2_free_full_dirent(fd); 291 jffs2_free_full_dirent(fd);
327 return ERR_PTR(ret?ret:-EIO); 292 return ERR_PTR(ret?ret:-EIO);
328 } 293 }
329 /* Mark the space used */ 294 /* Mark the space used */
330 raw->flash_offset |= REF_PRISTINE; 295 fd->raw = jffs2_add_physical_node_ref(c, flash_ofs | REF_PRISTINE, PAD(sizeof(*rd)+namelen), f->inocache);
331 jffs2_add_physical_node_ref(c, raw);
332
333 spin_lock(&c->erase_completion_lock);
334 raw->next_in_ino = f->inocache->nodes;
335 f->inocache->nodes = raw;
336 spin_unlock(&c->erase_completion_lock);
337 296
338 if (retried) { 297 if (retried) {
339 jffs2_dbg_acct_sanity_check(c,NULL); 298 jffs2_dbg_acct_sanity_check(c,NULL);
@@ -359,14 +318,14 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
359 struct jffs2_full_dnode *fn; 318 struct jffs2_full_dnode *fn;
360 unsigned char *comprbuf = NULL; 319 unsigned char *comprbuf = NULL;
361 uint16_t comprtype = JFFS2_COMPR_NONE; 320 uint16_t comprtype = JFFS2_COMPR_NONE;
362 uint32_t phys_ofs, alloclen; 321 uint32_t alloclen;
363 uint32_t datalen, cdatalen; 322 uint32_t datalen, cdatalen;
364 int retried = 0; 323 int retried = 0;
365 324
366 retry: 325 retry:
367 D2(printk(KERN_DEBUG "jffs2_commit_write() loop: 0x%x to write to 0x%x\n", writelen, offset)); 326 D2(printk(KERN_DEBUG "jffs2_commit_write() loop: 0x%x to write to 0x%x\n", writelen, offset));
368 327
369 ret = jffs2_reserve_space(c, sizeof(*ri) + JFFS2_MIN_DATA_LEN, &phys_ofs, 328 ret = jffs2_reserve_space(c, sizeof(*ri) + JFFS2_MIN_DATA_LEN,
370 &alloclen, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); 329 &alloclen, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
371 if (ret) { 330 if (ret) {
372 D1(printk(KERN_DEBUG "jffs2_reserve_space returned %d\n", ret)); 331 D1(printk(KERN_DEBUG "jffs2_reserve_space returned %d\n", ret));
@@ -394,7 +353,7 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
394 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); 353 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8));
395 ri->data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen)); 354 ri->data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
396 355
397 fn = jffs2_write_dnode(c, f, ri, comprbuf, cdatalen, phys_ofs, ALLOC_NORETRY); 356 fn = jffs2_write_dnode(c, f, ri, comprbuf, cdatalen, ALLOC_NORETRY);
398 357
399 jffs2_free_comprbuf(comprbuf, buf); 358 jffs2_free_comprbuf(comprbuf, buf);
400 359
@@ -448,13 +407,13 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
448 struct jffs2_raw_dirent *rd; 407 struct jffs2_raw_dirent *rd;
449 struct jffs2_full_dnode *fn; 408 struct jffs2_full_dnode *fn;
450 struct jffs2_full_dirent *fd; 409 struct jffs2_full_dirent *fd;
451 uint32_t alloclen, phys_ofs; 410 uint32_t alloclen;
452 int ret; 411 int ret;
453 412
454 /* Try to reserve enough space for both node and dirent. 413 /* Try to reserve enough space for both node and dirent.
455 * Just the node will do for now, though 414 * Just the node will do for now, though
456 */ 415 */
457 ret = jffs2_reserve_space(c, sizeof(*ri), &phys_ofs, &alloclen, ALLOC_NORMAL, 416 ret = jffs2_reserve_space(c, sizeof(*ri), &alloclen, ALLOC_NORMAL,
458 JFFS2_SUMMARY_INODE_SIZE); 417 JFFS2_SUMMARY_INODE_SIZE);
459 D1(printk(KERN_DEBUG "jffs2_do_create(): reserved 0x%x bytes\n", alloclen)); 418 D1(printk(KERN_DEBUG "jffs2_do_create(): reserved 0x%x bytes\n", alloclen));
460 if (ret) { 419 if (ret) {
@@ -465,7 +424,7 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
465 ri->data_crc = cpu_to_je32(0); 424 ri->data_crc = cpu_to_je32(0);
466 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); 425 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8));
467 426
468 fn = jffs2_write_dnode(c, f, ri, NULL, 0, phys_ofs, ALLOC_NORMAL); 427 fn = jffs2_write_dnode(c, f, ri, NULL, 0, ALLOC_NORMAL);
469 428
470 D1(printk(KERN_DEBUG "jffs2_do_create created file with mode 0x%x\n", 429 D1(printk(KERN_DEBUG "jffs2_do_create created file with mode 0x%x\n",
471 jemode_to_cpu(ri->mode))); 430 jemode_to_cpu(ri->mode)));
@@ -484,7 +443,7 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
484 443
485 up(&f->sem); 444 up(&f->sem);
486 jffs2_complete_reservation(c); 445 jffs2_complete_reservation(c);
487 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &phys_ofs, &alloclen, 446 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
488 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); 447 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
489 448
490 if (ret) { 449 if (ret) {
@@ -516,7 +475,7 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
516 rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8)); 475 rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8));
517 rd->name_crc = cpu_to_je32(crc32(0, name, namelen)); 476 rd->name_crc = cpu_to_je32(crc32(0, name, namelen));
518 477
519 fd = jffs2_write_dirent(c, dir_f, rd, name, namelen, phys_ofs, ALLOC_NORMAL); 478 fd = jffs2_write_dirent(c, dir_f, rd, name, namelen, ALLOC_NORMAL);
520 479
521 jffs2_free_raw_dirent(rd); 480 jffs2_free_raw_dirent(rd);
522 481
@@ -545,7 +504,7 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
545{ 504{
546 struct jffs2_raw_dirent *rd; 505 struct jffs2_raw_dirent *rd;
547 struct jffs2_full_dirent *fd; 506 struct jffs2_full_dirent *fd;
548 uint32_t alloclen, phys_ofs; 507 uint32_t alloclen;
549 int ret; 508 int ret;
550 509
551 if (1 /* alternative branch needs testing */ || 510 if (1 /* alternative branch needs testing */ ||
@@ -556,7 +515,7 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
556 if (!rd) 515 if (!rd)
557 return -ENOMEM; 516 return -ENOMEM;
558 517
559 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &phys_ofs, &alloclen, 518 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
560 ALLOC_DELETION, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); 519 ALLOC_DELETION, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
561 if (ret) { 520 if (ret) {
562 jffs2_free_raw_dirent(rd); 521 jffs2_free_raw_dirent(rd);
@@ -580,7 +539,7 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
580 rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8)); 539 rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8));
581 rd->name_crc = cpu_to_je32(crc32(0, name, namelen)); 540 rd->name_crc = cpu_to_je32(crc32(0, name, namelen));
582 541
583 fd = jffs2_write_dirent(c, dir_f, rd, name, namelen, phys_ofs, ALLOC_DELETION); 542 fd = jffs2_write_dirent(c, dir_f, rd, name, namelen, ALLOC_DELETION);
584 543
585 jffs2_free_raw_dirent(rd); 544 jffs2_free_raw_dirent(rd);
586 545
@@ -659,14 +618,14 @@ int jffs2_do_link (struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, uint
659{ 618{
660 struct jffs2_raw_dirent *rd; 619 struct jffs2_raw_dirent *rd;
661 struct jffs2_full_dirent *fd; 620 struct jffs2_full_dirent *fd;
662 uint32_t alloclen, phys_ofs; 621 uint32_t alloclen;
663 int ret; 622 int ret;
664 623
665 rd = jffs2_alloc_raw_dirent(); 624 rd = jffs2_alloc_raw_dirent();
666 if (!rd) 625 if (!rd)
667 return -ENOMEM; 626 return -ENOMEM;
668 627
669 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &phys_ofs, &alloclen, 628 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
670 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); 629 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
671 if (ret) { 630 if (ret) {
672 jffs2_free_raw_dirent(rd); 631 jffs2_free_raw_dirent(rd);
@@ -692,7 +651,7 @@ int jffs2_do_link (struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, uint
692 rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8)); 651 rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8));
693 rd->name_crc = cpu_to_je32(crc32(0, name, namelen)); 652 rd->name_crc = cpu_to_je32(crc32(0, name, namelen));
694 653
695 fd = jffs2_write_dirent(c, dir_f, rd, name, namelen, phys_ofs, ALLOC_NORMAL); 654 fd = jffs2_write_dirent(c, dir_f, rd, name, namelen, ALLOC_NORMAL);
696 655
697 jffs2_free_raw_dirent(rd); 656 jffs2_free_raw_dirent(rd);
698 657
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
new file mode 100644
index 000000000000..18e66dbf23b4
--- /dev/null
+++ b/fs/jffs2/xattr.c
@@ -0,0 +1,1326 @@
1/*
2 * JFFS2 -- Journalling Flash File System, Version 2.
3 *
4 * Copyright (C) 2006 NEC Corporation
5 *
6 * Created by KaiGai Kohei <kaigai@ak.jp.nec.com>
7 *
8 * For licensing information, see the file 'LICENCE' in this directory.
9 *
10 */
11#include <linux/kernel.h>
12#include <linux/slab.h>
13#include <linux/fs.h>
14#include <linux/time.h>
15#include <linux/pagemap.h>
16#include <linux/highmem.h>
17#include <linux/crc32.h>
18#include <linux/jffs2.h>
19#include <linux/xattr.h>
20#include <linux/mtd/mtd.h>
21#include "nodelist.h"
22/* -------- xdatum related functions ----------------
23 * xattr_datum_hashkey(xprefix, xname, xvalue, xsize)
24 * is used to calcurate xdatum hashkey. The reminder of hashkey into XATTRINDEX_HASHSIZE is
25 * the index of the xattr name/value pair cache (c->xattrindex).
26 * is_xattr_datum_unchecked(c, xd)
27 * returns 1, if xdatum contains any unchecked raw nodes. if all raw nodes are not
28 * unchecked, it returns 0.
29 * unload_xattr_datum(c, xd)
30 * is used to release xattr name/value pair and detach from c->xattrindex.
31 * reclaim_xattr_datum(c)
32 * is used to reclaim xattr name/value pairs on the xattr name/value pair cache when
33 * memory usage by cache is over c->xdatum_mem_threshold. Currentry, this threshold
34 * is hard coded as 32KiB.
35 * do_verify_xattr_datum(c, xd)
36 * is used to load the xdatum informations without name/value pair from the medium.
37 * It's necessary once, because those informations are not collected during mounting
38 * process when EBS is enabled.
39 * 0 will be returned, if success. An negative return value means recoverable error, and
40 * positive return value means unrecoverable error. Thus, caller must remove this xdatum
41 * and xref when it returned positive value.
42 * do_load_xattr_datum(c, xd)
43 * is used to load name/value pair from the medium.
44 * The meanings of return value is same as do_verify_xattr_datum().
45 * load_xattr_datum(c, xd)
46 * is used to be as a wrapper of do_verify_xattr_datum() and do_load_xattr_datum().
47 * If xd need to call do_verify_xattr_datum() at first, it's called before calling
48 * do_load_xattr_datum(). The meanings of return value is same as do_verify_xattr_datum().
49 * save_xattr_datum(c, xd)
50 * is used to write xdatum to medium. xd->version will be incremented.
51 * create_xattr_datum(c, xprefix, xname, xvalue, xsize)
52 * is used to create new xdatum and write to medium.
53 * delete_xattr_datum(c, xd)
54 * is used to delete a xdatum. It marks xd JFFS2_XFLAGS_DEAD, and allows
55 * GC to reclaim those physical nodes.
56 * -------------------------------------------------- */
57static uint32_t xattr_datum_hashkey(int xprefix, const char *xname, const char *xvalue, int xsize)
58{
59 int name_len = strlen(xname);
60
61 return crc32(xprefix, xname, name_len) ^ crc32(xprefix, xvalue, xsize);
62}
63
64static int is_xattr_datum_unchecked(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
65{
66 struct jffs2_raw_node_ref *raw;
67 int rc = 0;
68
69 spin_lock(&c->erase_completion_lock);
70 for (raw=xd->node; raw != (void *)xd; raw=raw->next_in_ino) {
71 if (ref_flags(raw) == REF_UNCHECKED) {
72 rc = 1;
73 break;
74 }
75 }
76 spin_unlock(&c->erase_completion_lock);
77 return rc;
78}
79
80static void unload_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
81{
82 /* must be called under down_write(xattr_sem) */
83 D1(dbg_xattr("%s: xid=%u, version=%u\n", __FUNCTION__, xd->xid, xd->version));
84 if (xd->xname) {
85 c->xdatum_mem_usage -= (xd->name_len + 1 + xd->value_len);
86 kfree(xd->xname);
87 }
88
89 list_del_init(&xd->xindex);
90 xd->hashkey = 0;
91 xd->xname = NULL;
92 xd->xvalue = NULL;
93}
94
95static void reclaim_xattr_datum(struct jffs2_sb_info *c)
96{
97 /* must be called under down_write(xattr_sem) */
98 struct jffs2_xattr_datum *xd, *_xd;
99 uint32_t target, before;
100 static int index = 0;
101 int count;
102
103 if (c->xdatum_mem_threshold > c->xdatum_mem_usage)
104 return;
105
106 before = c->xdatum_mem_usage;
107 target = c->xdatum_mem_usage * 4 / 5; /* 20% reduction */
108 for (count = 0; count < XATTRINDEX_HASHSIZE; count++) {
109 list_for_each_entry_safe(xd, _xd, &c->xattrindex[index], xindex) {
110 if (xd->flags & JFFS2_XFLAGS_HOT) {
111 xd->flags &= ~JFFS2_XFLAGS_HOT;
112 } else if (!(xd->flags & JFFS2_XFLAGS_BIND)) {
113 unload_xattr_datum(c, xd);
114 }
115 if (c->xdatum_mem_usage <= target)
116 goto out;
117 }
118 index = (index+1) % XATTRINDEX_HASHSIZE;
119 }
120 out:
121 JFFS2_NOTICE("xdatum_mem_usage from %u byte to %u byte (%u byte reclaimed)\n",
122 before, c->xdatum_mem_usage, before - c->xdatum_mem_usage);
123}
124
125static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
126{
127 /* must be called under down_write(xattr_sem) */
128 struct jffs2_eraseblock *jeb;
129 struct jffs2_raw_node_ref *raw;
130 struct jffs2_raw_xattr rx;
131 size_t readlen;
132 uint32_t crc, offset, totlen;
133 int rc;
134
135 spin_lock(&c->erase_completion_lock);
136 offset = ref_offset(xd->node);
137 if (ref_flags(xd->node) == REF_PRISTINE)
138 goto complete;
139 spin_unlock(&c->erase_completion_lock);
140
141 rc = jffs2_flash_read(c, offset, sizeof(rx), &readlen, (char *)&rx);
142 if (rc || readlen != sizeof(rx)) {
143 JFFS2_WARNING("jffs2_flash_read()=%d, req=%zu, read=%zu at %#08x\n",
144 rc, sizeof(rx), readlen, offset);
145 return rc ? rc : -EIO;
146 }
147 crc = crc32(0, &rx, sizeof(rx) - 4);
148 if (crc != je32_to_cpu(rx.node_crc)) {
149 JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
150 offset, je32_to_cpu(rx.hdr_crc), crc);
151 xd->flags |= JFFS2_XFLAGS_INVALID;
152 return EIO;
153 }
154 totlen = PAD(sizeof(rx) + rx.name_len + 1 + je16_to_cpu(rx.value_len));
155 if (je16_to_cpu(rx.magic) != JFFS2_MAGIC_BITMASK
156 || je16_to_cpu(rx.nodetype) != JFFS2_NODETYPE_XATTR
157 || je32_to_cpu(rx.totlen) != totlen
158 || je32_to_cpu(rx.xid) != xd->xid
159 || je32_to_cpu(rx.version) != xd->version) {
160 JFFS2_ERROR("inconsistent xdatum at %#08x, magic=%#04x/%#04x, "
161 "nodetype=%#04x/%#04x, totlen=%u/%u, xid=%u/%u, version=%u/%u\n",
162 offset, je16_to_cpu(rx.magic), JFFS2_MAGIC_BITMASK,
163 je16_to_cpu(rx.nodetype), JFFS2_NODETYPE_XATTR,
164 je32_to_cpu(rx.totlen), totlen,
165 je32_to_cpu(rx.xid), xd->xid,
166 je32_to_cpu(rx.version), xd->version);
167 xd->flags |= JFFS2_XFLAGS_INVALID;
168 return EIO;
169 }
170 xd->xprefix = rx.xprefix;
171 xd->name_len = rx.name_len;
172 xd->value_len = je16_to_cpu(rx.value_len);
173 xd->data_crc = je32_to_cpu(rx.data_crc);
174
175 spin_lock(&c->erase_completion_lock);
176 complete:
177 for (raw=xd->node; raw != (void *)xd; raw=raw->next_in_ino) {
178 jeb = &c->blocks[ref_offset(raw) / c->sector_size];
179 totlen = PAD(ref_totlen(c, jeb, raw));
180 if (ref_flags(raw) == REF_UNCHECKED) {
181 c->unchecked_size -= totlen; c->used_size += totlen;
182 jeb->unchecked_size -= totlen; jeb->used_size += totlen;
183 }
184 raw->flash_offset = ref_offset(raw) | ((xd->node==raw) ? REF_PRISTINE : REF_NORMAL);
185 }
186 spin_unlock(&c->erase_completion_lock);
187
188 /* unchecked xdatum is chained with c->xattr_unchecked */
189 list_del_init(&xd->xindex);
190
191 dbg_xattr("success on verfying xdatum (xid=%u, version=%u)\n",
192 xd->xid, xd->version);
193
194 return 0;
195}
196
197static int do_load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
198{
199 /* must be called under down_write(xattr_sem) */
200 char *data;
201 size_t readlen;
202 uint32_t crc, length;
203 int i, ret, retry = 0;
204
205 BUG_ON(ref_flags(xd->node) != REF_PRISTINE);
206 BUG_ON(!list_empty(&xd->xindex));
207 retry:
208 length = xd->name_len + 1 + xd->value_len;
209 data = kmalloc(length, GFP_KERNEL);
210 if (!data)
211 return -ENOMEM;
212
213 ret = jffs2_flash_read(c, ref_offset(xd->node)+sizeof(struct jffs2_raw_xattr),
214 length, &readlen, data);
215
216 if (ret || length!=readlen) {
217 JFFS2_WARNING("jffs2_flash_read() returned %d, request=%d, readlen=%zu, at %#08x\n",
218 ret, length, readlen, ref_offset(xd->node));
219 kfree(data);
220 return ret ? ret : -EIO;
221 }
222
223 data[xd->name_len] = '\0';
224 crc = crc32(0, data, length);
225 if (crc != xd->data_crc) {
226 JFFS2_WARNING("node CRC failed (JFFS2_NODETYPE_XREF)"
227 " at %#08x, read: 0x%08x calculated: 0x%08x\n",
228 ref_offset(xd->node), xd->data_crc, crc);
229 kfree(data);
230 xd->flags |= JFFS2_XFLAGS_INVALID;
231 return EIO;
232 }
233
234 xd->flags |= JFFS2_XFLAGS_HOT;
235 xd->xname = data;
236 xd->xvalue = data + xd->name_len+1;
237
238 c->xdatum_mem_usage += length;
239
240 xd->hashkey = xattr_datum_hashkey(xd->xprefix, xd->xname, xd->xvalue, xd->value_len);
241 i = xd->hashkey % XATTRINDEX_HASHSIZE;
242 list_add(&xd->xindex, &c->xattrindex[i]);
243 if (!retry) {
244 retry = 1;
245 reclaim_xattr_datum(c);
246 if (!xd->xname)
247 goto retry;
248 }
249
250 dbg_xattr("success on loading xdatum (xid=%u, xprefix=%u, xname='%s')\n",
251 xd->xid, xd->xprefix, xd->xname);
252
253 return 0;
254}
255
256static int load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
257{
258 /* must be called under down_write(xattr_sem);
259 * rc < 0 : recoverable error, try again
260 * rc = 0 : success
261 * rc > 0 : Unrecoverable error, this node should be deleted.
262 */
263 int rc = 0;
264
265 BUG_ON(xd->flags & JFFS2_XFLAGS_DEAD);
266 if (xd->xname)
267 return 0;
268 if (xd->flags & JFFS2_XFLAGS_INVALID)
269 return EIO;
270 if (unlikely(is_xattr_datum_unchecked(c, xd)))
271 rc = do_verify_xattr_datum(c, xd);
272 if (!rc)
273 rc = do_load_xattr_datum(c, xd);
274 return rc;
275}
276
277static int save_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
278{
279 /* must be called under down_write(xattr_sem) */
280 struct jffs2_raw_xattr rx;
281 struct kvec vecs[2];
282 size_t length;
283 int rc, totlen;
284 uint32_t phys_ofs = write_ofs(c);
285
286 BUG_ON(!xd->xname);
287 BUG_ON(xd->flags & (JFFS2_XFLAGS_DEAD|JFFS2_XFLAGS_INVALID));
288
289 vecs[0].iov_base = &rx;
290 vecs[0].iov_len = sizeof(rx);
291 vecs[1].iov_base = xd->xname;
292 vecs[1].iov_len = xd->name_len + 1 + xd->value_len;
293 totlen = vecs[0].iov_len + vecs[1].iov_len;
294
295 /* Setup raw-xattr */
296 memset(&rx, 0, sizeof(rx));
297 rx.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
298 rx.nodetype = cpu_to_je16(JFFS2_NODETYPE_XATTR);
299 rx.totlen = cpu_to_je32(PAD(totlen));
300 rx.hdr_crc = cpu_to_je32(crc32(0, &rx, sizeof(struct jffs2_unknown_node) - 4));
301
302 rx.xid = cpu_to_je32(xd->xid);
303 rx.version = cpu_to_je32(++xd->version);
304 rx.xprefix = xd->xprefix;
305 rx.name_len = xd->name_len;
306 rx.value_len = cpu_to_je16(xd->value_len);
307 rx.data_crc = cpu_to_je32(crc32(0, vecs[1].iov_base, vecs[1].iov_len));
308 rx.node_crc = cpu_to_je32(crc32(0, &rx, sizeof(struct jffs2_raw_xattr) - 4));
309
310 rc = jffs2_flash_writev(c, vecs, 2, phys_ofs, &length, 0);
311 if (rc || totlen != length) {
312 JFFS2_WARNING("jffs2_flash_writev()=%d, req=%u, wrote=%zu, at %#08x\n",
313 rc, totlen, length, phys_ofs);
314 rc = rc ? rc : -EIO;
315 if (length)
316 jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, PAD(totlen), NULL);
317
318 return rc;
319 }
320 /* success */
321 jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, PAD(totlen), (void *)xd);
322
323 dbg_xattr("success on saving xdatum (xid=%u, version=%u, xprefix=%u, xname='%s')\n",
324 xd->xid, xd->version, xd->xprefix, xd->xname);
325
326 return 0;
327}
328
329static struct jffs2_xattr_datum *create_xattr_datum(struct jffs2_sb_info *c,
330 int xprefix, const char *xname,
331 const char *xvalue, int xsize)
332{
333 /* must be called under down_write(xattr_sem) */
334 struct jffs2_xattr_datum *xd;
335 uint32_t hashkey, name_len;
336 char *data;
337 int i, rc;
338
339 /* Search xattr_datum has same xname/xvalue by index */
340 hashkey = xattr_datum_hashkey(xprefix, xname, xvalue, xsize);
341 i = hashkey % XATTRINDEX_HASHSIZE;
342 list_for_each_entry(xd, &c->xattrindex[i], xindex) {
343 if (xd->hashkey==hashkey
344 && xd->xprefix==xprefix
345 && xd->value_len==xsize
346 && !strcmp(xd->xname, xname)
347 && !memcmp(xd->xvalue, xvalue, xsize)) {
348 atomic_inc(&xd->refcnt);
349 return xd;
350 }
351 }
352
353 /* Not found, Create NEW XATTR-Cache */
354 name_len = strlen(xname);
355
356 xd = jffs2_alloc_xattr_datum();
357 if (!xd)
358 return ERR_PTR(-ENOMEM);
359
360 data = kmalloc(name_len + 1 + xsize, GFP_KERNEL);
361 if (!data) {
362 jffs2_free_xattr_datum(xd);
363 return ERR_PTR(-ENOMEM);
364 }
365 strcpy(data, xname);
366 memcpy(data + name_len + 1, xvalue, xsize);
367
368 atomic_set(&xd->refcnt, 1);
369 xd->xid = ++c->highest_xid;
370 xd->flags |= JFFS2_XFLAGS_HOT;
371 xd->xprefix = xprefix;
372
373 xd->hashkey = hashkey;
374 xd->xname = data;
375 xd->xvalue = data + name_len + 1;
376 xd->name_len = name_len;
377 xd->value_len = xsize;
378 xd->data_crc = crc32(0, data, xd->name_len + 1 + xd->value_len);
379
380 rc = save_xattr_datum(c, xd);
381 if (rc) {
382 kfree(xd->xname);
383 jffs2_free_xattr_datum(xd);
384 return ERR_PTR(rc);
385 }
386
387 /* Insert Hash Index */
388 i = hashkey % XATTRINDEX_HASHSIZE;
389 list_add(&xd->xindex, &c->xattrindex[i]);
390
391 c->xdatum_mem_usage += (xd->name_len + 1 + xd->value_len);
392 reclaim_xattr_datum(c);
393
394 return xd;
395}
396
397static void delete_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
398{
399 /* must be called under down_write(xattr_sem) */
400 BUG_ON(atomic_read(&xd->refcnt));
401
402 unload_xattr_datum(c, xd);
403 xd->flags |= JFFS2_XFLAGS_DEAD;
404 spin_lock(&c->erase_completion_lock);
405 if (xd->node == (void *)xd) {
406 BUG_ON(!(xd->flags & JFFS2_XFLAGS_INVALID));
407 jffs2_free_xattr_datum(xd);
408 } else {
409 list_add(&xd->xindex, &c->xattr_dead_list);
410 }
411 spin_unlock(&c->erase_completion_lock);
412 dbg_xattr("xdatum(xid=%u, version=%u) was removed.\n", xd->xid, xd->version);
413}
414
415/* -------- xref related functions ------------------
416 * verify_xattr_ref(c, ref)
417 * is used to load xref information from medium. Because summary data does not
418 * contain xid/ino, it's necessary to verify once while mounting process.
419 * save_xattr_ref(c, ref)
420 * is used to write xref to medium. If delete marker is marked, it write
421 * a delete marker of xref into medium.
422 * create_xattr_ref(c, ic, xd)
423 * is used to create a new xref and write to medium.
424 * delete_xattr_ref(c, ref)
425 * is used to delete jffs2_xattr_ref. It marks xref XREF_DELETE_MARKER,
426 * and allows GC to reclaim those physical nodes.
427 * jffs2_xattr_delete_inode(c, ic)
428 * is called to remove xrefs related to obsolete inode when inode is unlinked.
429 * jffs2_xattr_free_inode(c, ic)
430 * is called to release xattr related objects when unmounting.
431 * check_xattr_ref_inode(c, ic)
432 * is used to confirm inode does not have duplicate xattr name/value pair.
433 * -------------------------------------------------- */
434static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref)
435{
436 struct jffs2_eraseblock *jeb;
437 struct jffs2_raw_node_ref *raw;
438 struct jffs2_raw_xref rr;
439 size_t readlen;
440 uint32_t crc, offset, totlen;
441 int rc;
442
443 spin_lock(&c->erase_completion_lock);
444 if (ref_flags(ref->node) != REF_UNCHECKED)
445 goto complete;
446 offset = ref_offset(ref->node);
447 spin_unlock(&c->erase_completion_lock);
448
449 rc = jffs2_flash_read(c, offset, sizeof(rr), &readlen, (char *)&rr);
450 if (rc || sizeof(rr) != readlen) {
451 JFFS2_WARNING("jffs2_flash_read()=%d, req=%zu, read=%zu, at %#08x\n",
452 rc, sizeof(rr), readlen, offset);
453 return rc ? rc : -EIO;
454 }
455 /* obsolete node */
456 crc = crc32(0, &rr, sizeof(rr) - 4);
457 if (crc != je32_to_cpu(rr.node_crc)) {
458 JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
459 offset, je32_to_cpu(rr.node_crc), crc);
460 return EIO;
461 }
462 if (je16_to_cpu(rr.magic) != JFFS2_MAGIC_BITMASK
463 || je16_to_cpu(rr.nodetype) != JFFS2_NODETYPE_XREF
464 || je32_to_cpu(rr.totlen) != PAD(sizeof(rr))) {
465 JFFS2_ERROR("inconsistent xref at %#08x, magic=%#04x/%#04x, "
466 "nodetype=%#04x/%#04x, totlen=%u/%zu\n",
467 offset, je16_to_cpu(rr.magic), JFFS2_MAGIC_BITMASK,
468 je16_to_cpu(rr.nodetype), JFFS2_NODETYPE_XREF,
469 je32_to_cpu(rr.totlen), PAD(sizeof(rr)));
470 return EIO;
471 }
472 ref->ino = je32_to_cpu(rr.ino);
473 ref->xid = je32_to_cpu(rr.xid);
474 ref->xseqno = je32_to_cpu(rr.xseqno);
475 if (ref->xseqno > c->highest_xseqno)
476 c->highest_xseqno = (ref->xseqno & ~XREF_DELETE_MARKER);
477
478 spin_lock(&c->erase_completion_lock);
479 complete:
480 for (raw=ref->node; raw != (void *)ref; raw=raw->next_in_ino) {
481 jeb = &c->blocks[ref_offset(raw) / c->sector_size];
482 totlen = PAD(ref_totlen(c, jeb, raw));
483 if (ref_flags(raw) == REF_UNCHECKED) {
484 c->unchecked_size -= totlen; c->used_size += totlen;
485 jeb->unchecked_size -= totlen; jeb->used_size += totlen;
486 }
487 raw->flash_offset = ref_offset(raw) | ((ref->node==raw) ? REF_PRISTINE : REF_NORMAL);
488 }
489 spin_unlock(&c->erase_completion_lock);
490
491 dbg_xattr("success on verifying xref (ino=%u, xid=%u) at %#08x\n",
492 ref->ino, ref->xid, ref_offset(ref->node));
493 return 0;
494}
495
496static int save_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref)
497{
498 /* must be called under down_write(xattr_sem) */
499 struct jffs2_raw_xref rr;
500 size_t length;
501 uint32_t xseqno, phys_ofs = write_ofs(c);
502 int ret;
503
504 rr.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
505 rr.nodetype = cpu_to_je16(JFFS2_NODETYPE_XREF);
506 rr.totlen = cpu_to_je32(PAD(sizeof(rr)));
507 rr.hdr_crc = cpu_to_je32(crc32(0, &rr, sizeof(struct jffs2_unknown_node) - 4));
508
509 xseqno = (c->highest_xseqno += 2);
510 if (is_xattr_ref_dead(ref)) {
511 xseqno |= XREF_DELETE_MARKER;
512 rr.ino = cpu_to_je32(ref->ino);
513 rr.xid = cpu_to_je32(ref->xid);
514 } else {
515 rr.ino = cpu_to_je32(ref->ic->ino);
516 rr.xid = cpu_to_je32(ref->xd->xid);
517 }
518 rr.xseqno = cpu_to_je32(xseqno);
519 rr.node_crc = cpu_to_je32(crc32(0, &rr, sizeof(rr) - 4));
520
521 ret = jffs2_flash_write(c, phys_ofs, sizeof(rr), &length, (char *)&rr);
522 if (ret || sizeof(rr) != length) {
523 JFFS2_WARNING("jffs2_flash_write() returned %d, request=%zu, retlen=%zu, at %#08x\n",
524 ret, sizeof(rr), length, phys_ofs);
525 ret = ret ? ret : -EIO;
526 if (length)
527 jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, PAD(sizeof(rr)), NULL);
528
529 return ret;
530 }
531 /* success */
532 ref->xseqno = xseqno;
533 jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, PAD(sizeof(rr)), (void *)ref);
534
535 dbg_xattr("success on saving xref (ino=%u, xid=%u)\n", ref->ic->ino, ref->xd->xid);
536
537 return 0;
538}
539
540static struct jffs2_xattr_ref *create_xattr_ref(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic,
541 struct jffs2_xattr_datum *xd)
542{
543 /* must be called under down_write(xattr_sem) */
544 struct jffs2_xattr_ref *ref;
545 int ret;
546
547 ref = jffs2_alloc_xattr_ref();
548 if (!ref)
549 return ERR_PTR(-ENOMEM);
550 ref->ic = ic;
551 ref->xd = xd;
552
553 ret = save_xattr_ref(c, ref);
554 if (ret) {
555 jffs2_free_xattr_ref(ref);
556 return ERR_PTR(ret);
557 }
558
559 /* Chain to inode */
560 ref->next = ic->xref;
561 ic->xref = ref;
562
563 return ref; /* success */
564}
565
566static void delete_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref)
567{
568 /* must be called under down_write(xattr_sem) */
569 struct jffs2_xattr_datum *xd;
570
571 xd = ref->xd;
572 ref->xseqno |= XREF_DELETE_MARKER;
573 ref->ino = ref->ic->ino;
574 ref->xid = ref->xd->xid;
575 spin_lock(&c->erase_completion_lock);
576 ref->next = c->xref_dead_list;
577 c->xref_dead_list = ref;
578 spin_unlock(&c->erase_completion_lock);
579
580 dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) was removed.\n",
581 ref->ino, ref->xid, ref->xseqno);
582
583 if (atomic_dec_and_test(&xd->refcnt))
584 delete_xattr_datum(c, xd);
585}
586
587void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
588{
589 /* It's called from jffs2_clear_inode() on inode removing.
590 When an inode with XATTR is removed, those XATTRs must be removed. */
591 struct jffs2_xattr_ref *ref, *_ref;
592
593 if (!ic || ic->nlink > 0)
594 return;
595
596 down_write(&c->xattr_sem);
597 for (ref = ic->xref; ref; ref = _ref) {
598 _ref = ref->next;
599 delete_xattr_ref(c, ref);
600 }
601 ic->xref = NULL;
602 up_write(&c->xattr_sem);
603}
604
605void jffs2_xattr_free_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
606{
607 /* It's called from jffs2_free_ino_caches() until unmounting FS. */
608 struct jffs2_xattr_datum *xd;
609 struct jffs2_xattr_ref *ref, *_ref;
610
611 down_write(&c->xattr_sem);
612 for (ref = ic->xref; ref; ref = _ref) {
613 _ref = ref->next;
614 xd = ref->xd;
615 if (atomic_dec_and_test(&xd->refcnt)) {
616 unload_xattr_datum(c, xd);
617 jffs2_free_xattr_datum(xd);
618 }
619 jffs2_free_xattr_ref(ref);
620 }
621 ic->xref = NULL;
622 up_write(&c->xattr_sem);
623}
624
625static int check_xattr_ref_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
626{
627 /* success of check_xattr_ref_inode() means taht inode (ic) dose not have
628 * duplicate name/value pairs. If duplicate name/value pair would be found,
629 * one will be removed.
630 */
631 struct jffs2_xattr_ref *ref, *cmp, **pref, **pcmp;
632 int rc = 0;
633
634 if (likely(ic->flags & INO_FLAGS_XATTR_CHECKED))
635 return 0;
636 down_write(&c->xattr_sem);
637 retry:
638 rc = 0;
639 for (ref=ic->xref, pref=&ic->xref; ref; pref=&ref->next, ref=ref->next) {
640 if (!ref->xd->xname) {
641 rc = load_xattr_datum(c, ref->xd);
642 if (unlikely(rc > 0)) {
643 *pref = ref->next;
644 delete_xattr_ref(c, ref);
645 goto retry;
646 } else if (unlikely(rc < 0))
647 goto out;
648 }
649 for (cmp=ref->next, pcmp=&ref->next; cmp; pcmp=&cmp->next, cmp=cmp->next) {
650 if (!cmp->xd->xname) {
651 ref->xd->flags |= JFFS2_XFLAGS_BIND;
652 rc = load_xattr_datum(c, cmp->xd);
653 ref->xd->flags &= ~JFFS2_XFLAGS_BIND;
654 if (unlikely(rc > 0)) {
655 *pcmp = cmp->next;
656 delete_xattr_ref(c, cmp);
657 goto retry;
658 } else if (unlikely(rc < 0))
659 goto out;
660 }
661 if (ref->xd->xprefix == cmp->xd->xprefix
662 && !strcmp(ref->xd->xname, cmp->xd->xname)) {
663 if (ref->xseqno > cmp->xseqno) {
664 *pcmp = cmp->next;
665 delete_xattr_ref(c, cmp);
666 } else {
667 *pref = ref->next;
668 delete_xattr_ref(c, ref);
669 }
670 goto retry;
671 }
672 }
673 }
674 ic->flags |= INO_FLAGS_XATTR_CHECKED;
675 out:
676 up_write(&c->xattr_sem);
677
678 return rc;
679}
680
681/* -------- xattr subsystem functions ---------------
682 * jffs2_init_xattr_subsystem(c)
683 * is used to initialize semaphore and list_head, and some variables.
684 * jffs2_find_xattr_datum(c, xid)
685 * is used to lookup xdatum while scanning process.
686 * jffs2_clear_xattr_subsystem(c)
687 * is used to release any xattr related objects.
688 * jffs2_build_xattr_subsystem(c)
689 * is used to associate xdatum and xref while super block building process.
690 * jffs2_setup_xattr_datum(c, xid, version)
691 * is used to insert xdatum while scanning process.
692 * -------------------------------------------------- */
693void jffs2_init_xattr_subsystem(struct jffs2_sb_info *c)
694{
695 int i;
696
697 for (i=0; i < XATTRINDEX_HASHSIZE; i++)
698 INIT_LIST_HEAD(&c->xattrindex[i]);
699 INIT_LIST_HEAD(&c->xattr_unchecked);
700 INIT_LIST_HEAD(&c->xattr_dead_list);
701 c->xref_dead_list = NULL;
702 c->xref_temp = NULL;
703
704 init_rwsem(&c->xattr_sem);
705 c->highest_xid = 0;
706 c->highest_xseqno = 0;
707 c->xdatum_mem_usage = 0;
708 c->xdatum_mem_threshold = 32 * 1024; /* Default 32KB */
709}
710
711static struct jffs2_xattr_datum *jffs2_find_xattr_datum(struct jffs2_sb_info *c, uint32_t xid)
712{
713 struct jffs2_xattr_datum *xd;
714 int i = xid % XATTRINDEX_HASHSIZE;
715
716 /* It's only used in scanning/building process. */
717 BUG_ON(!(c->flags & (JFFS2_SB_FLAG_SCANNING|JFFS2_SB_FLAG_BUILDING)));
718
719 list_for_each_entry(xd, &c->xattrindex[i], xindex) {
720 if (xd->xid==xid)
721 return xd;
722 }
723 return NULL;
724}
725
726void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c)
727{
728 struct jffs2_xattr_datum *xd, *_xd;
729 struct jffs2_xattr_ref *ref, *_ref;
730 int i;
731
732 for (ref=c->xref_temp; ref; ref = _ref) {
733 _ref = ref->next;
734 jffs2_free_xattr_ref(ref);
735 }
736
737 for (ref=c->xref_dead_list; ref; ref = _ref) {
738 _ref = ref->next;
739 jffs2_free_xattr_ref(ref);
740 }
741
742 for (i=0; i < XATTRINDEX_HASHSIZE; i++) {
743 list_for_each_entry_safe(xd, _xd, &c->xattrindex[i], xindex) {
744 list_del(&xd->xindex);
745 if (xd->xname)
746 kfree(xd->xname);
747 jffs2_free_xattr_datum(xd);
748 }
749 }
750
751 list_for_each_entry_safe(xd, _xd, &c->xattr_dead_list, xindex) {
752 list_del(&xd->xindex);
753 jffs2_free_xattr_datum(xd);
754 }
755}
756
757#define XREF_TMPHASH_SIZE (128)
758void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
759{
760 struct jffs2_xattr_ref *ref, *_ref;
761 struct jffs2_xattr_ref *xref_tmphash[XREF_TMPHASH_SIZE];
762 struct jffs2_xattr_datum *xd, *_xd;
763 struct jffs2_inode_cache *ic;
764 struct jffs2_raw_node_ref *raw;
765 int i, xdatum_count = 0, xdatum_unchecked_count = 0, xref_count = 0;
766 int xdatum_orphan_count = 0, xref_orphan_count = 0, xref_dead_count = 0;
767
768 BUG_ON(!(c->flags & JFFS2_SB_FLAG_BUILDING));
769
770 /* Phase.1 : Merge same xref */
771 for (i=0; i < XREF_TMPHASH_SIZE; i++)
772 xref_tmphash[i] = NULL;
773 for (ref=c->xref_temp; ref; ref=_ref) {
774 struct jffs2_xattr_ref *tmp;
775
776 _ref = ref->next;
777 if (ref_flags(ref->node) != REF_PRISTINE) {
778 if (verify_xattr_ref(c, ref)) {
779 BUG_ON(ref->node->next_in_ino != (void *)ref);
780 ref->node->next_in_ino = NULL;
781 jffs2_mark_node_obsolete(c, ref->node);
782 jffs2_free_xattr_ref(ref);
783 continue;
784 }
785 }
786
787 i = (ref->ino ^ ref->xid) % XREF_TMPHASH_SIZE;
788 for (tmp=xref_tmphash[i]; tmp; tmp=tmp->next) {
789 if (tmp->ino == ref->ino && tmp->xid == ref->xid)
790 break;
791 }
792 if (tmp) {
793 raw = ref->node;
794 if (ref->xseqno > tmp->xseqno) {
795 tmp->xseqno = ref->xseqno;
796 raw->next_in_ino = tmp->node;
797 tmp->node = raw;
798 } else {
799 raw->next_in_ino = tmp->node->next_in_ino;
800 tmp->node->next_in_ino = raw;
801 }
802 jffs2_free_xattr_ref(ref);
803 continue;
804 } else {
805 ref->next = xref_tmphash[i];
806 xref_tmphash[i] = ref;
807 }
808 }
809 c->xref_temp = NULL;
810
811 /* Phase.2 : Bind xref with inode_cache and xattr_datum */
812 for (i=0; i < XREF_TMPHASH_SIZE; i++) {
813 for (ref=xref_tmphash[i]; ref; ref=_ref) {
814 xref_count++;
815 _ref = ref->next;
816 if (is_xattr_ref_dead(ref)) {
817 ref->next = c->xref_dead_list;
818 c->xref_dead_list = ref;
819 xref_dead_count++;
820 continue;
821 }
822 /* At this point, ref->xid and ref->ino contain XID and inode number.
823 ref->xd and ref->ic are not valid yet. */
824 xd = jffs2_find_xattr_datum(c, ref->xid);
825 ic = jffs2_get_ino_cache(c, ref->ino);
826 if (!xd || !ic) {
827 dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) is orphan.\n",
828 ref->ino, ref->xid, ref->xseqno);
829 ref->xseqno |= XREF_DELETE_MARKER;
830 ref->next = c->xref_dead_list;
831 c->xref_dead_list = ref;
832 xref_orphan_count++;
833 continue;
834 }
835 ref->xd = xd;
836 ref->ic = ic;
837 atomic_inc(&xd->refcnt);
838 ref->next = ic->xref;
839 ic->xref = ref;
840 }
841 }
842
843 /* Phase.3 : Link unchecked xdatum to xattr_unchecked list */
844 for (i=0; i < XATTRINDEX_HASHSIZE; i++) {
845 list_for_each_entry_safe(xd, _xd, &c->xattrindex[i], xindex) {
846 xdatum_count++;
847 list_del_init(&xd->xindex);
848 if (!atomic_read(&xd->refcnt)) {
849 dbg_xattr("xdatum(xid=%u, version=%u) is orphan.\n",
850 xd->xid, xd->version);
851 xd->flags |= JFFS2_XFLAGS_DEAD;
852 list_add(&xd->xindex, &c->xattr_unchecked);
853 xdatum_orphan_count++;
854 continue;
855 }
856 if (is_xattr_datum_unchecked(c, xd)) {
857 dbg_xattr("unchecked xdatum(xid=%u, version=%u)\n",
858 xd->xid, xd->version);
859 list_add(&xd->xindex, &c->xattr_unchecked);
860 xdatum_unchecked_count++;
861 }
862 }
863 }
864 /* build complete */
865 JFFS2_NOTICE("complete building xattr subsystem, %u of xdatum"
866 " (%u unchecked, %u orphan) and "
867 "%u of xref (%u dead, %u orphan) found.\n",
868 xdatum_count, xdatum_unchecked_count, xdatum_orphan_count,
869 xref_count, xref_dead_count, xref_orphan_count);
870}
871
872struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c,
873 uint32_t xid, uint32_t version)
874{
875 struct jffs2_xattr_datum *xd;
876
877 xd = jffs2_find_xattr_datum(c, xid);
878 if (!xd) {
879 xd = jffs2_alloc_xattr_datum();
880 if (!xd)
881 return ERR_PTR(-ENOMEM);
882 xd->xid = xid;
883 xd->version = version;
884 if (xd->xid > c->highest_xid)
885 c->highest_xid = xd->xid;
886 list_add_tail(&xd->xindex, &c->xattrindex[xid % XATTRINDEX_HASHSIZE]);
887 }
888 return xd;
889}
890
891/* -------- xattr subsystem functions ---------------
892 * xprefix_to_handler(xprefix)
893 * is used to translate xprefix into xattr_handler.
894 * jffs2_listxattr(dentry, buffer, size)
895 * is an implementation of listxattr handler on jffs2.
896 * do_jffs2_getxattr(inode, xprefix, xname, buffer, size)
897 * is an implementation of getxattr handler on jffs2.
898 * do_jffs2_setxattr(inode, xprefix, xname, buffer, size, flags)
899 * is an implementation of setxattr handler on jffs2.
900 * -------------------------------------------------- */
901struct xattr_handler *jffs2_xattr_handlers[] = {
902 &jffs2_user_xattr_handler,
903#ifdef CONFIG_JFFS2_FS_SECURITY
904 &jffs2_security_xattr_handler,
905#endif
906#ifdef CONFIG_JFFS2_FS_POSIX_ACL
907 &jffs2_acl_access_xattr_handler,
908 &jffs2_acl_default_xattr_handler,
909#endif
910 &jffs2_trusted_xattr_handler,
911 NULL
912};
913
914static struct xattr_handler *xprefix_to_handler(int xprefix) {
915 struct xattr_handler *ret;
916
917 switch (xprefix) {
918 case JFFS2_XPREFIX_USER:
919 ret = &jffs2_user_xattr_handler;
920 break;
921#ifdef CONFIG_JFFS2_FS_SECURITY
922 case JFFS2_XPREFIX_SECURITY:
923 ret = &jffs2_security_xattr_handler;
924 break;
925#endif
926#ifdef CONFIG_JFFS2_FS_POSIX_ACL
927 case JFFS2_XPREFIX_ACL_ACCESS:
928 ret = &jffs2_acl_access_xattr_handler;
929 break;
930 case JFFS2_XPREFIX_ACL_DEFAULT:
931 ret = &jffs2_acl_default_xattr_handler;
932 break;
933#endif
934 case JFFS2_XPREFIX_TRUSTED:
935 ret = &jffs2_trusted_xattr_handler;
936 break;
937 default:
938 ret = NULL;
939 break;
940 }
941 return ret;
942}
943
944ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
945{
946 struct inode *inode = dentry->d_inode;
947 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
948 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
949 struct jffs2_inode_cache *ic = f->inocache;
950 struct jffs2_xattr_ref *ref, **pref;
951 struct jffs2_xattr_datum *xd;
952 struct xattr_handler *xhandle;
953 ssize_t len, rc;
954 int retry = 0;
955
956 rc = check_xattr_ref_inode(c, ic);
957 if (unlikely(rc))
958 return rc;
959
960 down_read(&c->xattr_sem);
961 retry:
962 len = 0;
963 for (ref=ic->xref, pref=&ic->xref; ref; pref=&ref->next, ref=ref->next) {
964 BUG_ON(ref->ic != ic);
965 xd = ref->xd;
966 if (!xd->xname) {
967 /* xdatum is unchached */
968 if (!retry) {
969 retry = 1;
970 up_read(&c->xattr_sem);
971 down_write(&c->xattr_sem);
972 goto retry;
973 } else {
974 rc = load_xattr_datum(c, xd);
975 if (unlikely(rc > 0)) {
976 *pref = ref->next;
977 delete_xattr_ref(c, ref);
978 goto retry;
979 } else if (unlikely(rc < 0))
980 goto out;
981 }
982 }
983 xhandle = xprefix_to_handler(xd->xprefix);
984 if (!xhandle)
985 continue;
986 if (buffer) {
987 rc = xhandle->list(inode, buffer+len, size-len, xd->xname, xd->name_len);
988 } else {
989 rc = xhandle->list(inode, NULL, 0, xd->xname, xd->name_len);
990 }
991 if (rc < 0)
992 goto out;
993 len += rc;
994 }
995 rc = len;
996 out:
997 if (!retry) {
998 up_read(&c->xattr_sem);
999 } else {
1000 up_write(&c->xattr_sem);
1001 }
1002 return rc;
1003}
1004
1005int do_jffs2_getxattr(struct inode *inode, int xprefix, const char *xname,
1006 char *buffer, size_t size)
1007{
1008 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
1009 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
1010 struct jffs2_inode_cache *ic = f->inocache;
1011 struct jffs2_xattr_datum *xd;
1012 struct jffs2_xattr_ref *ref, **pref;
1013 int rc, retry = 0;
1014
1015 rc = check_xattr_ref_inode(c, ic);
1016 if (unlikely(rc))
1017 return rc;
1018
1019 down_read(&c->xattr_sem);
1020 retry:
1021 for (ref=ic->xref, pref=&ic->xref; ref; pref=&ref->next, ref=ref->next) {
1022 BUG_ON(ref->ic!=ic);
1023
1024 xd = ref->xd;
1025 if (xd->xprefix != xprefix)
1026 continue;
1027 if (!xd->xname) {
1028 /* xdatum is unchached */
1029 if (!retry) {
1030 retry = 1;
1031 up_read(&c->xattr_sem);
1032 down_write(&c->xattr_sem);
1033 goto retry;
1034 } else {
1035 rc = load_xattr_datum(c, xd);
1036 if (unlikely(rc > 0)) {
1037 *pref = ref->next;
1038 delete_xattr_ref(c, ref);
1039 goto retry;
1040 } else if (unlikely(rc < 0)) {
1041 goto out;
1042 }
1043 }
1044 }
1045 if (!strcmp(xname, xd->xname)) {
1046 rc = xd->value_len;
1047 if (buffer) {
1048 if (size < rc) {
1049 rc = -ERANGE;
1050 } else {
1051 memcpy(buffer, xd->xvalue, rc);
1052 }
1053 }
1054 goto out;
1055 }
1056 }
1057 rc = -ENODATA;
1058 out:
1059 if (!retry) {
1060 up_read(&c->xattr_sem);
1061 } else {
1062 up_write(&c->xattr_sem);
1063 }
1064 return rc;
1065}
1066
1067int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
1068 const char *buffer, size_t size, int flags)
1069{
1070 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
1071 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
1072 struct jffs2_inode_cache *ic = f->inocache;
1073 struct jffs2_xattr_datum *xd;
1074 struct jffs2_xattr_ref *ref, *newref, **pref;
1075 uint32_t length, request;
1076 int rc;
1077
1078 rc = check_xattr_ref_inode(c, ic);
1079 if (unlikely(rc))
1080 return rc;
1081
1082 request = PAD(sizeof(struct jffs2_raw_xattr) + strlen(xname) + 1 + size);
1083 rc = jffs2_reserve_space(c, request, &length,
1084 ALLOC_NORMAL, JFFS2_SUMMARY_XATTR_SIZE);
1085 if (rc) {
1086 JFFS2_WARNING("jffs2_reserve_space()=%d, request=%u\n", rc, request);
1087 return rc;
1088 }
1089
1090 /* Find existing xattr */
1091 down_write(&c->xattr_sem);
1092 retry:
1093 for (ref=ic->xref, pref=&ic->xref; ref; pref=&ref->next, ref=ref->next) {
1094 xd = ref->xd;
1095 if (xd->xprefix != xprefix)
1096 continue;
1097 if (!xd->xname) {
1098 rc = load_xattr_datum(c, xd);
1099 if (unlikely(rc > 0)) {
1100 *pref = ref->next;
1101 delete_xattr_ref(c, ref);
1102 goto retry;
1103 } else if (unlikely(rc < 0))
1104 goto out;
1105 }
1106 if (!strcmp(xd->xname, xname)) {
1107 if (flags & XATTR_CREATE) {
1108 rc = -EEXIST;
1109 goto out;
1110 }
1111 if (!buffer) {
1112 ref->ino = ic->ino;
1113 ref->xid = xd->xid;
1114 ref->xseqno |= XREF_DELETE_MARKER;
1115 rc = save_xattr_ref(c, ref);
1116 if (!rc) {
1117 *pref = ref->next;
1118 spin_lock(&c->erase_completion_lock);
1119 ref->next = c->xref_dead_list;
1120 c->xref_dead_list = ref;
1121 spin_unlock(&c->erase_completion_lock);
1122 if (atomic_dec_and_test(&xd->refcnt))
1123 delete_xattr_datum(c, xd);
1124 } else {
1125 ref->ic = ic;
1126 ref->xd = xd;
1127 ref->xseqno &= ~XREF_DELETE_MARKER;
1128 }
1129 goto out;
1130 }
1131 goto found;
1132 }
1133 }
1134 /* not found */
1135 if (flags & XATTR_REPLACE) {
1136 rc = -ENODATA;
1137 goto out;
1138 }
1139 if (!buffer) {
1140 rc = -ENODATA;
1141 goto out;
1142 }
1143 found:
1144 xd = create_xattr_datum(c, xprefix, xname, buffer, size);
1145 if (IS_ERR(xd)) {
1146 rc = PTR_ERR(xd);
1147 goto out;
1148 }
1149 up_write(&c->xattr_sem);
1150 jffs2_complete_reservation(c);
1151
1152 /* create xattr_ref */
1153 request = PAD(sizeof(struct jffs2_raw_xref));
1154 rc = jffs2_reserve_space(c, request, &length,
1155 ALLOC_NORMAL, JFFS2_SUMMARY_XREF_SIZE);
1156 down_write(&c->xattr_sem);
1157 if (rc) {
1158 JFFS2_WARNING("jffs2_reserve_space()=%d, request=%u\n", rc, request);
1159 if (atomic_dec_and_test(&xd->refcnt))
1160 delete_xattr_datum(c, xd);
1161 up_write(&c->xattr_sem);
1162 return rc;
1163 }
1164 if (ref)
1165 *pref = ref->next;
1166 newref = create_xattr_ref(c, ic, xd);
1167 if (IS_ERR(newref)) {
1168 if (ref) {
1169 ref->next = ic->xref;
1170 ic->xref = ref;
1171 }
1172 rc = PTR_ERR(newref);
1173 if (atomic_dec_and_test(&xd->refcnt))
1174 delete_xattr_datum(c, xd);
1175 } else if (ref) {
1176 delete_xattr_ref(c, ref);
1177 }
1178 out:
1179 up_write(&c->xattr_sem);
1180 jffs2_complete_reservation(c);
1181 return rc;
1182}
1183
1184/* -------- garbage collector functions -------------
1185 * jffs2_garbage_collect_xattr_datum(c, xd, raw)
1186 * is used to move xdatum into new node.
1187 * jffs2_garbage_collect_xattr_ref(c, ref, raw)
1188 * is used to move xref into new node.
1189 * jffs2_verify_xattr(c)
1190 * is used to call do_verify_xattr_datum() before garbage collecting.
1191 * jffs2_release_xattr_datum(c, xd)
1192 * is used to release an in-memory object of xdatum.
1193 * jffs2_release_xattr_ref(c, ref)
1194 * is used to release an in-memory object of xref.
1195 * -------------------------------------------------- */
1196int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd,
1197 struct jffs2_raw_node_ref *raw)
1198{
1199 uint32_t totlen, length, old_ofs;
1200 int rc = 0;
1201
1202 down_write(&c->xattr_sem);
1203 if (xd->node != raw)
1204 goto out;
1205 if (xd->flags & (JFFS2_XFLAGS_DEAD|JFFS2_XFLAGS_INVALID))
1206 goto out;
1207
1208 rc = load_xattr_datum(c, xd);
1209 if (unlikely(rc)) {
1210 rc = (rc > 0) ? 0 : rc;
1211 goto out;
1212 }
1213 old_ofs = ref_offset(xd->node);
1214 totlen = PAD(sizeof(struct jffs2_raw_xattr)
1215 + xd->name_len + 1 + xd->value_len);
1216 rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XATTR_SIZE);
1217 if (rc) {
1218 JFFS2_WARNING("jffs2_reserve_space_gc()=%d, request=%u\n", rc, totlen);
1219 rc = rc ? rc : -EBADFD;
1220 goto out;
1221 }
1222 rc = save_xattr_datum(c, xd);
1223 if (!rc)
1224 dbg_xattr("xdatum (xid=%u, version=%u) GC'ed from %#08x to %08x\n",
1225 xd->xid, xd->version, old_ofs, ref_offset(xd->node));
1226 out:
1227 if (!rc)
1228 jffs2_mark_node_obsolete(c, raw);
1229 up_write(&c->xattr_sem);
1230 return rc;
1231}
1232
1233int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref,
1234 struct jffs2_raw_node_ref *raw)
1235{
1236 uint32_t totlen, length, old_ofs;
1237 int rc = 0;
1238
1239 down_write(&c->xattr_sem);
1240 BUG_ON(!ref->node);
1241
1242 if (ref->node != raw)
1243 goto out;
1244 if (is_xattr_ref_dead(ref) && (raw->next_in_ino == (void *)ref))
1245 goto out;
1246
1247 old_ofs = ref_offset(ref->node);
1248 totlen = ref_totlen(c, c->gcblock, ref->node);
1249
1250 rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XREF_SIZE);
1251 if (rc) {
1252 JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n",
1253 __FUNCTION__, rc, totlen);
1254 rc = rc ? rc : -EBADFD;
1255 goto out;
1256 }
1257 rc = save_xattr_ref(c, ref);
1258 if (!rc)
1259 dbg_xattr("xref (ino=%u, xid=%u) GC'ed from %#08x to %08x\n",
1260 ref->ic->ino, ref->xd->xid, old_ofs, ref_offset(ref->node));
1261 out:
1262 if (!rc)
1263 jffs2_mark_node_obsolete(c, raw);
1264 up_write(&c->xattr_sem);
1265 return rc;
1266}
1267
1268int jffs2_verify_xattr(struct jffs2_sb_info *c)
1269{
1270 struct jffs2_xattr_datum *xd, *_xd;
1271 struct jffs2_eraseblock *jeb;
1272 struct jffs2_raw_node_ref *raw;
1273 uint32_t totlen;
1274 int rc;
1275
1276 down_write(&c->xattr_sem);
1277 list_for_each_entry_safe(xd, _xd, &c->xattr_unchecked, xindex) {
1278 rc = do_verify_xattr_datum(c, xd);
1279 if (rc < 0)
1280 continue;
1281 list_del_init(&xd->xindex);
1282 spin_lock(&c->erase_completion_lock);
1283 for (raw=xd->node; raw != (void *)xd; raw=raw->next_in_ino) {
1284 if (ref_flags(raw) != REF_UNCHECKED)
1285 continue;
1286 jeb = &c->blocks[ref_offset(raw) / c->sector_size];
1287 totlen = PAD(ref_totlen(c, jeb, raw));
1288 c->unchecked_size -= totlen; c->used_size += totlen;
1289 jeb->unchecked_size -= totlen; jeb->used_size += totlen;
1290 raw->flash_offset = ref_offset(raw)
1291 | ((xd->node == (void *)raw) ? REF_PRISTINE : REF_NORMAL);
1292 }
1293 if (xd->flags & JFFS2_XFLAGS_DEAD)
1294 list_add(&xd->xindex, &c->xattr_dead_list);
1295 spin_unlock(&c->erase_completion_lock);
1296 }
1297 up_write(&c->xattr_sem);
1298 return list_empty(&c->xattr_unchecked) ? 1 : 0;
1299}
1300
1301void jffs2_release_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
1302{
1303 /* must be called under spin_lock(&c->erase_completion_lock) */
1304 if (atomic_read(&xd->refcnt) || xd->node != (void *)xd)
1305 return;
1306
1307 list_del(&xd->xindex);
1308 jffs2_free_xattr_datum(xd);
1309}
1310
1311void jffs2_release_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref)
1312{
1313 /* must be called under spin_lock(&c->erase_completion_lock) */
1314 struct jffs2_xattr_ref *tmp, **ptmp;
1315
1316 if (ref->node != (void *)ref)
1317 return;
1318
1319 for (tmp=c->xref_dead_list, ptmp=&c->xref_dead_list; tmp; ptmp=&tmp->next, tmp=tmp->next) {
1320 if (ref == tmp) {
1321 *ptmp = tmp->next;
1322 break;
1323 }
1324 }
1325 jffs2_free_xattr_ref(ref);
1326}
diff --git a/fs/jffs2/xattr.h b/fs/jffs2/xattr.h
new file mode 100644
index 000000000000..06a5c69dcf8b
--- /dev/null
+++ b/fs/jffs2/xattr.h
@@ -0,0 +1,129 @@
1/*
2 * JFFS2 -- Journalling Flash File System, Version 2.
3 *
4 * Copyright (C) 2006 NEC Corporation
5 *
6 * Created by KaiGai Kohei <kaigai@ak.jp.nec.com>
7 *
8 * For licensing information, see the file 'LICENCE' in this directory.
9 *
10 */
11#ifndef _JFFS2_FS_XATTR_H_
12#define _JFFS2_FS_XATTR_H_
13
14#include <linux/xattr.h>
15#include <linux/list.h>
16
17#define JFFS2_XFLAGS_HOT (0x01) /* This datum is HOT */
18#define JFFS2_XFLAGS_BIND (0x02) /* This datum is not reclaimed */
19#define JFFS2_XFLAGS_DEAD (0x40) /* This datum is already dead */
20#define JFFS2_XFLAGS_INVALID (0x80) /* This datum contains crc error */
21
22struct jffs2_xattr_datum
23{
24 void *always_null;
25 struct jffs2_raw_node_ref *node;
26 uint8_t class;
27 uint8_t flags;
28 uint16_t xprefix; /* see JFFS2_XATTR_PREFIX_* */
29
30 struct list_head xindex; /* chained from c->xattrindex[n] */
31 atomic_t refcnt; /* # of xattr_ref refers this */
32 uint32_t xid;
33 uint32_t version;
34
35 uint32_t data_crc;
36 uint32_t hashkey;
37 char *xname; /* XATTR name without prefix */
38 uint32_t name_len; /* length of xname */
39 char *xvalue; /* XATTR value */
40 uint32_t value_len; /* length of xvalue */
41};
42
43struct jffs2_inode_cache;
44struct jffs2_xattr_ref
45{
46 void *always_null;
47 struct jffs2_raw_node_ref *node;
48 uint8_t class;
49 uint8_t flags; /* Currently unused */
50 u16 unused;
51
52 uint32_t xseqno;
53 union {
54 struct jffs2_inode_cache *ic; /* reference to jffs2_inode_cache */
55 uint32_t ino; /* only used in scanning/building */
56 };
57 union {
58 struct jffs2_xattr_datum *xd; /* reference to jffs2_xattr_datum */
59 uint32_t xid; /* only used in sccanning/building */
60 };
61 struct jffs2_xattr_ref *next; /* chained from ic->xref_list */
62};
63
64#define XREF_DELETE_MARKER (0x00000001)
65static inline int is_xattr_ref_dead(struct jffs2_xattr_ref *ref)
66{
67 return ((ref->xseqno & XREF_DELETE_MARKER) != 0);
68}
69
70#ifdef CONFIG_JFFS2_FS_XATTR
71
72extern void jffs2_init_xattr_subsystem(struct jffs2_sb_info *c);
73extern void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c);
74extern void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c);
75
76extern struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c,
77 uint32_t xid, uint32_t version);
78
79extern void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic);
80extern void jffs2_xattr_free_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic);
81
82extern int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd,
83 struct jffs2_raw_node_ref *raw);
84extern int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref,
85 struct jffs2_raw_node_ref *raw);
86extern int jffs2_verify_xattr(struct jffs2_sb_info *c);
87extern void jffs2_release_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd);
88extern void jffs2_release_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref);
89
90extern int do_jffs2_getxattr(struct inode *inode, int xprefix, const char *xname,
91 char *buffer, size_t size);
92extern int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
93 const char *buffer, size_t size, int flags);
94
95extern struct xattr_handler *jffs2_xattr_handlers[];
96extern struct xattr_handler jffs2_user_xattr_handler;
97extern struct xattr_handler jffs2_trusted_xattr_handler;
98
99extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t);
100#define jffs2_getxattr generic_getxattr
101#define jffs2_setxattr generic_setxattr
102#define jffs2_removexattr generic_removexattr
103
104#else
105
106#define jffs2_init_xattr_subsystem(c)
107#define jffs2_build_xattr_subsystem(c)
108#define jffs2_clear_xattr_subsystem(c)
109
110#define jffs2_xattr_delete_inode(c, ic)
111#define jffs2_xattr_free_inode(c, ic)
112#define jffs2_verify_xattr(c) (1)
113
114#define jffs2_xattr_handlers NULL
115#define jffs2_listxattr NULL
116#define jffs2_getxattr NULL
117#define jffs2_setxattr NULL
118#define jffs2_removexattr NULL
119
120#endif /* CONFIG_JFFS2_FS_XATTR */
121
122#ifdef CONFIG_JFFS2_FS_SECURITY
123extern int jffs2_init_security(struct inode *inode, struct inode *dir);
124extern struct xattr_handler jffs2_security_xattr_handler;
125#else
126#define jffs2_init_security(inode,dir) (0)
127#endif /* CONFIG_JFFS2_FS_SECURITY */
128
129#endif /* _JFFS2_FS_XATTR_H_ */
diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c
new file mode 100644
index 000000000000..ed046e19dbfa
--- /dev/null
+++ b/fs/jffs2/xattr_trusted.c
@@ -0,0 +1,52 @@
1/*
2 * JFFS2 -- Journalling Flash File System, Version 2.
3 *
4 * Copyright (C) 2006 NEC Corporation
5 *
6 * Created by KaiGai Kohei <kaigai@ak.jp.nec.com>
7 *
8 * For licensing information, see the file 'LICENCE' in this directory.
9 *
10 */
11#include <linux/kernel.h>
12#include <linux/fs.h>
13#include <linux/jffs2.h>
14#include <linux/xattr.h>
15#include <linux/mtd/mtd.h>
16#include "nodelist.h"
17
18static int jffs2_trusted_getxattr(struct inode *inode, const char *name,
19 void *buffer, size_t size)
20{
21 if (!strcmp(name, ""))
22 return -EINVAL;
23 return do_jffs2_getxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size);
24}
25
26static int jffs2_trusted_setxattr(struct inode *inode, const char *name, const void *buffer,
27 size_t size, int flags)
28{
29 if (!strcmp(name, ""))
30 return -EINVAL;
31 return do_jffs2_setxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size, flags);
32}
33
34static size_t jffs2_trusted_listxattr(struct inode *inode, char *list, size_t list_size,
35 const char *name, size_t name_len)
36{
37 size_t retlen = XATTR_TRUSTED_PREFIX_LEN + name_len + 1;
38
39 if (list && retlen<=list_size) {
40 strcpy(list, XATTR_TRUSTED_PREFIX);
41 strcpy(list + XATTR_TRUSTED_PREFIX_LEN, name);
42 }
43
44 return retlen;
45}
46
47struct xattr_handler jffs2_trusted_xattr_handler = {
48 .prefix = XATTR_TRUSTED_PREFIX,
49 .list = jffs2_trusted_listxattr,
50 .set = jffs2_trusted_setxattr,
51 .get = jffs2_trusted_getxattr
52};
diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c
new file mode 100644
index 000000000000..2f8e9aa01ea0
--- /dev/null
+++ b/fs/jffs2/xattr_user.c
@@ -0,0 +1,52 @@
1/*
2 * JFFS2 -- Journalling Flash File System, Version 2.
3 *
4 * Copyright (C) 2006 NEC Corporation
5 *
6 * Created by KaiGai Kohei <kaigai@ak.jp.nec.com>
7 *
8 * For licensing information, see the file 'LICENCE' in this directory.
9 *
10 */
11#include <linux/kernel.h>
12#include <linux/fs.h>
13#include <linux/jffs2.h>
14#include <linux/xattr.h>
15#include <linux/mtd/mtd.h>
16#include "nodelist.h"
17
18static int jffs2_user_getxattr(struct inode *inode, const char *name,
19 void *buffer, size_t size)
20{
21 if (!strcmp(name, ""))
22 return -EINVAL;
23 return do_jffs2_getxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size);
24}
25
26static int jffs2_user_setxattr(struct inode *inode, const char *name, const void *buffer,
27 size_t size, int flags)
28{
29 if (!strcmp(name, ""))
30 return -EINVAL;
31 return do_jffs2_setxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size, flags);
32}
33
34static size_t jffs2_user_listxattr(struct inode *inode, char *list, size_t list_size,
35 const char *name, size_t name_len)
36{
37 size_t retlen = XATTR_USER_PREFIX_LEN + name_len + 1;
38
39 if (list && retlen <= list_size) {
40 strcpy(list, XATTR_USER_PREFIX);
41 strcpy(list + XATTR_USER_PREFIX_LEN, name);
42 }
43
44 return retlen;
45}
46
47struct xattr_handler jffs2_user_xattr_handler = {
48 .prefix = XATTR_USER_PREFIX,
49 .list = jffs2_user_listxattr,
50 .set = jffs2_user_setxattr,
51 .get = jffs2_user_getxattr
52};
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 04eb78f1252e..43e3f566aad6 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -305,7 +305,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
305 offset, nr_segs, jfs_get_block, NULL); 305 offset, nr_segs, jfs_get_block, NULL);
306} 306}
307 307
308struct address_space_operations jfs_aops = { 308const struct address_space_operations jfs_aops = {
309 .readpage = jfs_readpage, 309 .readpage = jfs_readpage,
310 .readpages = jfs_readpages, 310 .readpages = jfs_readpages,
311 .writepage = jfs_writepage, 311 .writepage = jfs_writepage,
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index 5549378358bf..4d52593a5fc6 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -126,7 +126,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
126 126
127 /* allocate the disk blocks for the extent. initially, extBalloc() 127 /* allocate the disk blocks for the extent. initially, extBalloc()
128 * will try to allocate disk blocks for the requested size (xlen). 128 * will try to allocate disk blocks for the requested size (xlen).
129 * if this fails (xlen contigious free blocks not avaliable), it'll 129 * if this fails (xlen contiguous free blocks not avaliable), it'll
130 * try to allocate a smaller number of blocks (producing a smaller 130 * try to allocate a smaller number of blocks (producing a smaller
131 * extent), with this smaller number of blocks consisting of the 131 * extent), with this smaller number of blocks consisting of the
132 * requested number of blocks rounded down to the next smaller 132 * requested number of blocks rounded down to the next smaller
@@ -493,7 +493,7 @@ int extFill(struct inode *ip, xad_t * xp)
493 * 493 *
494 * initially, we will try to allocate disk blocks for the 494 * initially, we will try to allocate disk blocks for the
495 * requested size (nblocks). if this fails (nblocks 495 * requested size (nblocks). if this fails (nblocks
496 * contigious free blocks not avaliable), we'll try to allocate 496 * contiguous free blocks not avaliable), we'll try to allocate
497 * a smaller number of blocks (producing a smaller extent), with 497 * a smaller number of blocks (producing a smaller extent), with
498 * this smaller number of blocks consisting of the requested 498 * this smaller number of blocks consisting of the requested
499 * number of blocks rounded down to the next smaller power of 2 499 * number of blocks rounded down to the next smaller power of 2
@@ -529,7 +529,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
529 529
530 /* get the number of blocks to initially attempt to allocate. 530 /* get the number of blocks to initially attempt to allocate.
531 * we'll first try the number of blocks requested unless this 531 * we'll first try the number of blocks requested unless this
532 * number is greater than the maximum number of contigious free 532 * number is greater than the maximum number of contiguous free
533 * blocks in the map. in that case, we'll start off with the 533 * blocks in the map. in that case, we'll start off with the
534 * maximum free. 534 * maximum free.
535 */ 535 */
@@ -586,7 +586,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
586 * in place. if this fails, we'll try to move the extent 586 * in place. if this fails, we'll try to move the extent
587 * to a new set of blocks. if moving the extent, we initially 587 * to a new set of blocks. if moving the extent, we initially
588 * will try to allocate disk blocks for the requested size 588 * will try to allocate disk blocks for the requested size
589 * (nnew). if this fails (nnew contigious free blocks not 589 * (nnew). if this fails (new contiguous free blocks not
590 * avaliable), we'll try to allocate a smaller number of 590 * avaliable), we'll try to allocate a smaller number of
591 * blocks (producing a smaller extent), with this smaller 591 * blocks (producing a smaller extent), with this smaller
592 * number of blocks consisting of the requested number of 592 * number of blocks consisting of the requested number of
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index c30072674464..b5c7da6190dc 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -33,7 +33,7 @@ extern void jfs_free_zero_link(struct inode *);
33extern struct dentry *jfs_get_parent(struct dentry *dentry); 33extern struct dentry *jfs_get_parent(struct dentry *dentry);
34extern void jfs_set_inode_flags(struct inode *); 34extern void jfs_set_inode_flags(struct inode *);
35 35
36extern struct address_space_operations jfs_aops; 36extern const struct address_space_operations jfs_aops;
37extern struct inode_operations jfs_dir_inode_operations; 37extern struct inode_operations jfs_dir_inode_operations;
38extern const struct file_operations jfs_dir_operations; 38extern const struct file_operations jfs_dir_operations;
39extern struct inode_operations jfs_file_inode_operations; 39extern struct inode_operations jfs_file_inode_operations;
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 2b220dd6b4e7..e1e0a6e6ebdf 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -577,7 +577,7 @@ static void metapage_invalidatepage(struct page *page, unsigned long offset)
577 metapage_releasepage(page, 0); 577 metapage_releasepage(page, 0);
578} 578}
579 579
580struct address_space_operations jfs_metapage_aops = { 580const struct address_space_operations jfs_metapage_aops = {
581 .readpage = metapage_readpage, 581 .readpage = metapage_readpage,
582 .writepage = metapage_writepage, 582 .writepage = metapage_writepage,
583 .sync_page = block_sync_page, 583 .sync_page = block_sync_page,
@@ -632,10 +632,9 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
632 } 632 }
633 SetPageUptodate(page); 633 SetPageUptodate(page);
634 } else { 634 } else {
635 page = read_cache_page(mapping, page_index, 635 page = read_mapping_page(mapping, page_index, NULL);
636 (filler_t *)mapping->a_ops->readpage, NULL);
637 if (IS_ERR(page) || !PageUptodate(page)) { 636 if (IS_ERR(page) || !PageUptodate(page)) {
638 jfs_err("read_cache_page failed!"); 637 jfs_err("read_mapping_page failed!");
639 return NULL; 638 return NULL;
640 } 639 }
641 lock_page(page); 640 lock_page(page);
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index f0b7d3282b07..d17a3290f5aa 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -139,7 +139,7 @@ static inline void metapage_homeok(struct metapage *mp)
139 put_metapage(mp); 139 put_metapage(mp);
140} 140}
141 141
142extern struct address_space_operations jfs_metapage_aops; 142extern const struct address_space_operations jfs_metapage_aops;
143 143
144/* 144/*
145 * This routines invalidate all pages for an extent. 145 * This routines invalidate all pages for an extent.
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index ac3d66948e8c..10c46231ce15 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -842,7 +842,7 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
842 TXN_UNLOCK(); 842 TXN_UNLOCK();
843 release_metapage(mp); 843 release_metapage(mp);
844 TXN_LOCK(); 844 TXN_LOCK();
845 xtid = tlck->tid; /* reaquire after dropping TXN_LOCK */ 845 xtid = tlck->tid; /* reacquire after dropping TXN_LOCK */
846 846
847 jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d", 847 jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d",
848 tid, xtid, lid); 848 tid, xtid, lid);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index db6f41d6dd60..4f6cfebc82db 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -18,7 +18,6 @@
18 */ 18 */
19 19
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/config.h>
22#include <linux/module.h> 21#include <linux/module.h>
23#include <linux/parser.h> 22#include <linux/parser.h>
24#include <linux/completion.h> 23#include <linux/completion.h>
@@ -139,9 +138,9 @@ static void jfs_destroy_inode(struct inode *inode)
139 kmem_cache_free(jfs_inode_cachep, ji); 138 kmem_cache_free(jfs_inode_cachep, ji);
140} 139}
141 140
142static int jfs_statfs(struct super_block *sb, struct kstatfs *buf) 141static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
143{ 142{
144 struct jfs_sb_info *sbi = JFS_SBI(sb); 143 struct jfs_sb_info *sbi = JFS_SBI(dentry->d_sb);
145 s64 maxinodes; 144 s64 maxinodes;
146 struct inomap *imap = JFS_IP(sbi->ipimap)->i_imap; 145 struct inomap *imap = JFS_IP(sbi->ipimap)->i_imap;
147 146
@@ -565,10 +564,11 @@ static void jfs_unlockfs(struct super_block *sb)
565 } 564 }
566} 565}
567 566
568static struct super_block *jfs_get_sb(struct file_system_type *fs_type, 567static int jfs_get_sb(struct file_system_type *fs_type,
569 int flags, const char *dev_name, void *data) 568 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
570{ 569{
571 return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super); 570 return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super,
571 mnt);
572} 572}
573 573
574static int jfs_sync_fs(struct super_block *sb, int wait) 574static int jfs_sync_fs(struct super_block *sb, int wait)
diff --git a/fs/libfs.c b/fs/libfs.c
index 7145ba7a48d0..ac02ea602c3d 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -20,9 +20,9 @@ int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
20 return 0; 20 return 0;
21} 21}
22 22
23int simple_statfs(struct super_block *sb, struct kstatfs *buf) 23int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
24{ 24{
25 buf->f_type = sb->s_magic; 25 buf->f_type = dentry->d_sb->s_magic;
26 buf->f_bsize = PAGE_CACHE_SIZE; 26 buf->f_bsize = PAGE_CACHE_SIZE;
27 buf->f_namelen = NAME_MAX; 27 buf->f_namelen = NAME_MAX;
28 return 0; 28 return 0;
@@ -149,10 +149,9 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
149 /* fallthrough */ 149 /* fallthrough */
150 default: 150 default:
151 spin_lock(&dcache_lock); 151 spin_lock(&dcache_lock);
152 if (filp->f_pos == 2) { 152 if (filp->f_pos == 2)
153 list_del(q); 153 list_move(q, &dentry->d_subdirs);
154 list_add(q, &dentry->d_subdirs); 154
155 }
156 for (p=q->next; p != &dentry->d_subdirs; p=p->next) { 155 for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
157 struct dentry *next; 156 struct dentry *next;
158 next = list_entry(p, struct dentry, d_u.d_child); 157 next = list_entry(p, struct dentry, d_u.d_child);
@@ -164,8 +163,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
164 return 0; 163 return 0;
165 spin_lock(&dcache_lock); 164 spin_lock(&dcache_lock);
166 /* next is still alive */ 165 /* next is still alive */
167 list_del(q); 166 list_move(q, p);
168 list_add(q, p);
169 p = q; 167 p = q;
170 filp->f_pos++; 168 filp->f_pos++;
171 } 169 }
@@ -196,9 +194,9 @@ struct inode_operations simple_dir_inode_operations = {
196 * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that 194 * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
197 * will never be mountable) 195 * will never be mountable)
198 */ 196 */
199struct super_block * 197int get_sb_pseudo(struct file_system_type *fs_type, char *name,
200get_sb_pseudo(struct file_system_type *fs_type, char *name, 198 struct super_operations *ops, unsigned long magic,
201 struct super_operations *ops, unsigned long magic) 199 struct vfsmount *mnt)
202{ 200{
203 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); 201 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
204 static struct super_operations default_ops = {.statfs = simple_statfs}; 202 static struct super_operations default_ops = {.statfs = simple_statfs};
@@ -207,7 +205,7 @@ get_sb_pseudo(struct file_system_type *fs_type, char *name,
207 struct qstr d_name = {.name = name, .len = strlen(name)}; 205 struct qstr d_name = {.name = name, .len = strlen(name)};
208 206
209 if (IS_ERR(s)) 207 if (IS_ERR(s))
210 return s; 208 return PTR_ERR(s);
211 209
212 s->s_flags = MS_NOUSER; 210 s->s_flags = MS_NOUSER;
213 s->s_maxbytes = ~0ULL; 211 s->s_maxbytes = ~0ULL;
@@ -232,12 +230,12 @@ get_sb_pseudo(struct file_system_type *fs_type, char *name,
232 d_instantiate(dentry, root); 230 d_instantiate(dentry, root);
233 s->s_root = dentry; 231 s->s_root = dentry;
234 s->s_flags |= MS_ACTIVE; 232 s->s_flags |= MS_ACTIVE;
235 return s; 233 return simple_set_mnt(mnt, s);
236 234
237Enomem: 235Enomem:
238 up_write(&s->s_umount); 236 up_write(&s->s_umount);
239 deactivate_super(s); 237 deactivate_super(s);
240 return ERR_PTR(-ENOMEM); 238 return -ENOMEM;
241} 239}
242 240
243int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) 241int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
@@ -424,13 +422,13 @@ out:
424 422
425static DEFINE_SPINLOCK(pin_fs_lock); 423static DEFINE_SPINLOCK(pin_fs_lock);
426 424
427int simple_pin_fs(char *name, struct vfsmount **mount, int *count) 425int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *count)
428{ 426{
429 struct vfsmount *mnt = NULL; 427 struct vfsmount *mnt = NULL;
430 spin_lock(&pin_fs_lock); 428 spin_lock(&pin_fs_lock);
431 if (unlikely(!*mount)) { 429 if (unlikely(!*mount)) {
432 spin_unlock(&pin_fs_lock); 430 spin_unlock(&pin_fs_lock);
433 mnt = do_kern_mount(name, 0, name, NULL); 431 mnt = vfs_kern_mount(type, 0, type->name, NULL);
434 if (IS_ERR(mnt)) 432 if (IS_ERR(mnt))
435 return PTR_ERR(mnt); 433 return PTR_ERR(mnt);
436 spin_lock(&pin_fs_lock); 434 spin_lock(&pin_fs_lock);
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index bce744468708..52774feab93f 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -147,11 +147,10 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
147 * Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number, 147 * Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number,
148 * that we mark locks for reclaiming, and that we bump the pseudo NSM state. 148 * that we mark locks for reclaiming, and that we bump the pseudo NSM state.
149 */ 149 */
150static inline 150static void nlmclnt_prepare_reclaim(struct nlm_host *host)
151void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate)
152{ 151{
152 down_write(&host->h_rwsem);
153 host->h_monitored = 0; 153 host->h_monitored = 0;
154 host->h_nsmstate = newstate;
155 host->h_state++; 154 host->h_state++;
156 host->h_nextrebind = 0; 155 host->h_nextrebind = 0;
157 nlm_rebind_host(host); 156 nlm_rebind_host(host);
@@ -164,6 +163,13 @@ void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate)
164 dprintk("NLM: reclaiming locks for host %s", host->h_name); 163 dprintk("NLM: reclaiming locks for host %s", host->h_name);
165} 164}
166 165
166static void nlmclnt_finish_reclaim(struct nlm_host *host)
167{
168 host->h_reclaiming = 0;
169 up_write(&host->h_rwsem);
170 dprintk("NLM: done reclaiming locks for host %s", host->h_name);
171}
172
167/* 173/*
168 * Reclaim all locks on server host. We do this by spawning a separate 174 * Reclaim all locks on server host. We do this by spawning a separate
169 * reclaimer thread. 175 * reclaimer thread.
@@ -171,12 +177,10 @@ void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate)
171void 177void
172nlmclnt_recovery(struct nlm_host *host, u32 newstate) 178nlmclnt_recovery(struct nlm_host *host, u32 newstate)
173{ 179{
174 if (host->h_reclaiming++) { 180 if (host->h_nsmstate == newstate)
175 if (host->h_nsmstate == newstate) 181 return;
176 return; 182 host->h_nsmstate = newstate;
177 nlmclnt_prepare_reclaim(host, newstate); 183 if (!host->h_reclaiming++) {
178 } else {
179 nlmclnt_prepare_reclaim(host, newstate);
180 nlm_get_host(host); 184 nlm_get_host(host);
181 __module_get(THIS_MODULE); 185 __module_get(THIS_MODULE);
182 if (kernel_thread(reclaimer, host, CLONE_KERNEL) < 0) 186 if (kernel_thread(reclaimer, host, CLONE_KERNEL) < 0)
@@ -190,6 +194,7 @@ reclaimer(void *ptr)
190 struct nlm_host *host = (struct nlm_host *) ptr; 194 struct nlm_host *host = (struct nlm_host *) ptr;
191 struct nlm_wait *block; 195 struct nlm_wait *block;
192 struct file_lock *fl, *next; 196 struct file_lock *fl, *next;
197 u32 nsmstate;
193 198
194 daemonize("%s-reclaim", host->h_name); 199 daemonize("%s-reclaim", host->h_name);
195 allow_signal(SIGKILL); 200 allow_signal(SIGKILL);
@@ -199,19 +204,25 @@ reclaimer(void *ptr)
199 lock_kernel(); 204 lock_kernel();
200 lockd_up(); 205 lockd_up();
201 206
207 nlmclnt_prepare_reclaim(host);
202 /* First, reclaim all locks that have been marked. */ 208 /* First, reclaim all locks that have been marked. */
203restart: 209restart:
210 nsmstate = host->h_nsmstate;
204 list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) { 211 list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) {
205 list_del_init(&fl->fl_u.nfs_fl.list); 212 list_del_init(&fl->fl_u.nfs_fl.list);
206 213
207 if (signalled()) 214 if (signalled())
208 continue; 215 continue;
209 if (nlmclnt_reclaim(host, fl) == 0) 216 if (nlmclnt_reclaim(host, fl) != 0)
210 list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted); 217 continue;
211 goto restart; 218 list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
219 if (host->h_nsmstate != nsmstate) {
220 /* Argh! The server rebooted again! */
221 list_splice_init(&host->h_granted, &host->h_reclaim);
222 goto restart;
223 }
212 } 224 }
213 225 nlmclnt_finish_reclaim(host);
214 host->h_reclaiming = 0;
215 226
216 /* Now, wake up all processes that sleep on a blocked lock */ 227 /* Now, wake up all processes that sleep on a blocked lock */
217 list_for_each_entry(block, &nlm_blocked, b_list) { 228 list_for_each_entry(block, &nlm_blocked, b_list) {
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index f96e38155b5c..5980c45998cc 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -6,7 +6,6 @@
6 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> 6 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
7 */ 7 */
8 8
9#include <linux/config.h>
10#include <linux/module.h> 9#include <linux/module.h>
11#include <linux/types.h> 10#include <linux/types.h>
12#include <linux/errno.h> 11#include <linux/errno.h>
@@ -508,7 +507,10 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
508 } 507 }
509 508
510 block = nlmclnt_prepare_block(host, fl); 509 block = nlmclnt_prepare_block(host, fl);
510again:
511 for(;;) { 511 for(;;) {
512 /* Reboot protection */
513 fl->fl_u.nfs_fl.state = host->h_state;
512 status = nlmclnt_call(req, NLMPROC_LOCK); 514 status = nlmclnt_call(req, NLMPROC_LOCK);
513 if (status < 0) 515 if (status < 0)
514 goto out_unblock; 516 goto out_unblock;
@@ -531,10 +533,16 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
531 } 533 }
532 534
533 if (resp->status == NLM_LCK_GRANTED) { 535 if (resp->status == NLM_LCK_GRANTED) {
534 fl->fl_u.nfs_fl.state = host->h_state; 536 down_read(&host->h_rwsem);
537 /* Check whether or not the server has rebooted */
538 if (fl->fl_u.nfs_fl.state != host->h_state) {
539 up_read(&host->h_rwsem);
540 goto again;
541 }
535 fl->fl_flags |= FL_SLEEP; 542 fl->fl_flags |= FL_SLEEP;
536 /* Ensure the resulting lock will get added to granted list */ 543 /* Ensure the resulting lock will get added to granted list */
537 do_vfs_lock(fl); 544 do_vfs_lock(fl);
545 up_read(&host->h_rwsem);
538 } 546 }
539 status = nlm_stat_to_errno(resp->status); 547 status = nlm_stat_to_errno(resp->status);
540out_unblock: 548out_unblock:
@@ -596,6 +604,7 @@ nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl)
596static int 604static int
597nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) 605nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
598{ 606{
607 struct nlm_host *host = req->a_host;
599 struct nlm_res *resp = &req->a_res; 608 struct nlm_res *resp = &req->a_res;
600 int status; 609 int status;
601 610
@@ -604,7 +613,9 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
604 * request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either 613 * request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either
605 * case, we want to unlock. 614 * case, we want to unlock.
606 */ 615 */
616 down_read(&host->h_rwsem);
607 do_vfs_lock(fl); 617 do_vfs_lock(fl);
618 up_read(&host->h_rwsem);
608 619
609 if (req->a_flags & RPC_TASK_ASYNC) 620 if (req->a_flags & RPC_TASK_ASYNC)
610 return nlm_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops); 621 return nlm_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 729ac427d359..38b0e8a1aec0 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -112,11 +112,12 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
112 host->h_version = version; 112 host->h_version = version;
113 host->h_proto = proto; 113 host->h_proto = proto;
114 host->h_rpcclnt = NULL; 114 host->h_rpcclnt = NULL;
115 init_MUTEX(&host->h_sema); 115 mutex_init(&host->h_mutex);
116 host->h_nextrebind = jiffies + NLM_HOST_REBIND; 116 host->h_nextrebind = jiffies + NLM_HOST_REBIND;
117 host->h_expires = jiffies + NLM_HOST_EXPIRE; 117 host->h_expires = jiffies + NLM_HOST_EXPIRE;
118 atomic_set(&host->h_count, 1); 118 atomic_set(&host->h_count, 1);
119 init_waitqueue_head(&host->h_gracewait); 119 init_waitqueue_head(&host->h_gracewait);
120 init_rwsem(&host->h_rwsem);
120 host->h_state = 0; /* pseudo NSM state */ 121 host->h_state = 0; /* pseudo NSM state */
121 host->h_nsmstate = 0; /* real NSM state */ 122 host->h_nsmstate = 0; /* real NSM state */
122 host->h_server = server; 123 host->h_server = server;
@@ -172,7 +173,7 @@ nlm_bind_host(struct nlm_host *host)
172 (unsigned)ntohl(host->h_addr.sin_addr.s_addr)); 173 (unsigned)ntohl(host->h_addr.sin_addr.s_addr));
173 174
174 /* Lock host handle */ 175 /* Lock host handle */
175 down(&host->h_sema); 176 mutex_lock(&host->h_mutex);
176 177
177 /* If we've already created an RPC client, check whether 178 /* If we've already created an RPC client, check whether
178 * RPC rebind is required 179 * RPC rebind is required
@@ -204,12 +205,12 @@ nlm_bind_host(struct nlm_host *host)
204 host->h_rpcclnt = clnt; 205 host->h_rpcclnt = clnt;
205 } 206 }
206 207
207 up(&host->h_sema); 208 mutex_unlock(&host->h_mutex);
208 return clnt; 209 return clnt;
209 210
210forgetit: 211forgetit:
211 printk("lockd: couldn't create RPC handle for %s\n", host->h_name); 212 printk("lockd: couldn't create RPC handle for %s\n", host->h_name);
212 up(&host->h_sema); 213 mutex_unlock(&host->h_mutex);
213 return NULL; 214 return NULL;
214} 215}
215 216
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index fd56c8872f34..9a991b52c647 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -12,7 +12,6 @@
12 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 12 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
13 */ 13 */
14 14
15#include <linux/config.h>
16#include <linux/module.h> 15#include <linux/module.h>
17#include <linux/init.h> 16#include <linux/init.h>
18#include <linux/sysctl.h> 17#include <linux/sysctl.h>
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 3ef739120dff..baf5ae513481 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -20,7 +20,6 @@
20 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> 20 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
21 */ 21 */
22 22
23#include <linux/config.h>
24#include <linux/types.h> 23#include <linux/types.h>
25#include <linux/errno.h> 24#include <linux/errno.h>
26#include <linux/kernel.h> 25#include <linux/kernel.h>
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index d210cf304e92..dbb66a3b5cd9 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -7,7 +7,6 @@
7 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> 7 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
8 */ 8 */
9 9
10#include <linux/config.h>
11#include <linux/types.h> 10#include <linux/types.h>
12#include <linux/time.h> 11#include <linux/time.h>
13#include <linux/slab.h> 12#include <linux/slab.h>
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index a570e5c8a930..2a4df9b3779a 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -6,7 +6,6 @@
6 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> 6 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
7 */ 7 */
8 8
9#include <linux/config.h>
10#include <linux/types.h> 9#include <linux/types.h>
11#include <linux/string.h> 10#include <linux/string.h>
12#include <linux/time.h> 11#include <linux/time.h>
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index f22a3764461a..033ea4ac2c30 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -6,7 +6,6 @@
6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
7 */ 7 */
8 8
9#include <linux/config.h>
10#include <linux/types.h> 9#include <linux/types.h>
11#include <linux/sched.h> 10#include <linux/sched.h>
12#include <linux/utsname.h> 11#include <linux/utsname.h>
diff --git a/fs/locks.c b/fs/locks.c
index ab61a8b54829..1ad29c9b6252 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -703,7 +703,7 @@ EXPORT_SYMBOL(posix_test_lock);
703 * from a broken NFS client. But broken NFS clients have a lot more to 703 * from a broken NFS client. But broken NFS clients have a lot more to
704 * worry about than proper deadlock detection anyway... --okir 704 * worry about than proper deadlock detection anyway... --okir
705 */ 705 */
706int posix_locks_deadlock(struct file_lock *caller_fl, 706static int posix_locks_deadlock(struct file_lock *caller_fl,
707 struct file_lock *block_fl) 707 struct file_lock *block_fl)
708{ 708{
709 struct list_head *tmp; 709 struct list_head *tmp;
@@ -722,8 +722,6 @@ next_task:
722 return 0; 722 return 0;
723} 723}
724 724
725EXPORT_SYMBOL(posix_locks_deadlock);
726
727/* Try to create a FLOCK lock on filp. We always insert new FLOCK locks 725/* Try to create a FLOCK lock on filp. We always insert new FLOCK locks
728 * at the head of the list, but that's secret knowledge known only to 726 * at the head of the list, but that's secret knowledge known only to
729 * flock_lock_file and posix_lock_file. 727 * flock_lock_file and posix_lock_file.
@@ -794,7 +792,8 @@ out:
794static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request, struct file_lock *conflock) 792static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
795{ 793{
796 struct file_lock *fl; 794 struct file_lock *fl;
797 struct file_lock *new_fl, *new_fl2; 795 struct file_lock *new_fl = NULL;
796 struct file_lock *new_fl2 = NULL;
798 struct file_lock *left = NULL; 797 struct file_lock *left = NULL;
799 struct file_lock *right = NULL; 798 struct file_lock *right = NULL;
800 struct file_lock **before; 799 struct file_lock **before;
@@ -803,9 +802,15 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request
803 /* 802 /*
804 * We may need two file_lock structures for this operation, 803 * We may need two file_lock structures for this operation,
805 * so we get them in advance to avoid races. 804 * so we get them in advance to avoid races.
805 *
806 * In some cases we can be sure, that no new locks will be needed
806 */ 807 */
807 new_fl = locks_alloc_lock(); 808 if (!(request->fl_flags & FL_ACCESS) &&
808 new_fl2 = locks_alloc_lock(); 809 (request->fl_type != F_UNLCK ||
810 request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
811 new_fl = locks_alloc_lock();
812 new_fl2 = locks_alloc_lock();
813 }
809 814
810 lock_kernel(); 815 lock_kernel();
811 if (request->fl_type != F_UNLCK) { 816 if (request->fl_type != F_UNLCK) {
@@ -834,14 +839,7 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request
834 if (request->fl_flags & FL_ACCESS) 839 if (request->fl_flags & FL_ACCESS)
835 goto out; 840 goto out;
836 841
837 error = -ENOLCK; /* "no luck" */
838 if (!(new_fl && new_fl2))
839 goto out;
840
841 /* 842 /*
842 * We've allocated the new locks in advance, so there are no
843 * errors possible (and no blocking operations) from here on.
844 *
845 * Find the first old lock with the same owner as the new lock. 843 * Find the first old lock with the same owner as the new lock.
846 */ 844 */
847 845
@@ -938,10 +936,25 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request
938 before = &fl->fl_next; 936 before = &fl->fl_next;
939 } 937 }
940 938
939 /*
940 * The above code only modifies existing locks in case of
941 * merging or replacing. If new lock(s) need to be inserted
942 * all modifications are done bellow this, so it's safe yet to
943 * bail out.
944 */
945 error = -ENOLCK; /* "no luck" */
946 if (right && left == right && !new_fl2)
947 goto out;
948
941 error = 0; 949 error = 0;
942 if (!added) { 950 if (!added) {
943 if (request->fl_type == F_UNLCK) 951 if (request->fl_type == F_UNLCK)
944 goto out; 952 goto out;
953
954 if (!new_fl) {
955 error = -ENOLCK;
956 goto out;
957 }
945 locks_copy_lock(new_fl, request); 958 locks_copy_lock(new_fl, request);
946 locks_insert_lock(before, new_fl); 959 locks_insert_lock(before, new_fl);
947 new_fl = NULL; 960 new_fl = NULL;
@@ -1881,19 +1894,18 @@ out:
1881 */ 1894 */
1882void locks_remove_posix(struct file *filp, fl_owner_t owner) 1895void locks_remove_posix(struct file *filp, fl_owner_t owner)
1883{ 1896{
1884 struct file_lock lock, **before; 1897 struct file_lock lock;
1885 1898
1886 /* 1899 /*
1887 * If there are no locks held on this file, we don't need to call 1900 * If there are no locks held on this file, we don't need to call
1888 * posix_lock_file(). Another process could be setting a lock on this 1901 * posix_lock_file(). Another process could be setting a lock on this
1889 * file at the same time, but we wouldn't remove that lock anyway. 1902 * file at the same time, but we wouldn't remove that lock anyway.
1890 */ 1903 */
1891 before = &filp->f_dentry->d_inode->i_flock; 1904 if (!filp->f_dentry->d_inode->i_flock)
1892 if (*before == NULL)
1893 return; 1905 return;
1894 1906
1895 lock.fl_type = F_UNLCK; 1907 lock.fl_type = F_UNLCK;
1896 lock.fl_flags = FL_POSIX; 1908 lock.fl_flags = FL_POSIX | FL_CLOSE;
1897 lock.fl_start = 0; 1909 lock.fl_start = 0;
1898 lock.fl_end = OFFSET_MAX; 1910 lock.fl_end = OFFSET_MAX;
1899 lock.fl_owner = owner; 1911 lock.fl_owner = owner;
@@ -1902,25 +1914,11 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
1902 lock.fl_ops = NULL; 1914 lock.fl_ops = NULL;
1903 lock.fl_lmops = NULL; 1915 lock.fl_lmops = NULL;
1904 1916
1905 if (filp->f_op && filp->f_op->lock != NULL) { 1917 if (filp->f_op && filp->f_op->lock != NULL)
1906 filp->f_op->lock(filp, F_SETLK, &lock); 1918 filp->f_op->lock(filp, F_SETLK, &lock);
1907 goto out; 1919 else
1908 } 1920 posix_lock_file(filp, &lock);
1909 1921
1910 /* Can't use posix_lock_file here; we need to remove it no matter
1911 * which pid we have.
1912 */
1913 lock_kernel();
1914 while (*before != NULL) {
1915 struct file_lock *fl = *before;
1916 if (IS_POSIX(fl) && posix_same_owner(fl, &lock)) {
1917 locks_delete_lock(before);
1918 continue;
1919 }
1920 before = &fl->fl_next;
1921 }
1922 unlock_kernel();
1923out:
1924 if (lock.fl_ops && lock.fl_ops->fl_release_private) 1922 if (lock.fl_ops && lock.fl_ops->fl_release_private)
1925 lock.fl_ops->fl_release_private(&lock); 1923 lock.fl_ops->fl_release_private(&lock);
1926} 1924}
@@ -2206,63 +2204,6 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
2206 2204
2207EXPORT_SYMBOL(lock_may_write); 2205EXPORT_SYMBOL(lock_may_write);
2208 2206
2209static inline void __steal_locks(struct file *file, fl_owner_t from)
2210{
2211 struct inode *inode = file->f_dentry->d_inode;
2212 struct file_lock *fl = inode->i_flock;
2213
2214 while (fl) {
2215 if (fl->fl_file == file && fl->fl_owner == from)
2216 fl->fl_owner = current->files;
2217 fl = fl->fl_next;
2218 }
2219}
2220
2221/* When getting ready for executing a binary, we make sure that current
2222 * has a files_struct on its own. Before dropping the old files_struct,
2223 * we take over ownership of all locks for all file descriptors we own.
2224 * Note that we may accidentally steal a lock for a file that a sibling
2225 * has created since the unshare_files() call.
2226 */
2227void steal_locks(fl_owner_t from)
2228{
2229 struct files_struct *files = current->files;
2230 int i, j;
2231 struct fdtable *fdt;
2232
2233 if (from == files)
2234 return;
2235
2236 lock_kernel();
2237 j = 0;
2238
2239 /*
2240 * We are not taking a ref to the file structures, so
2241 * we need to acquire ->file_lock.
2242 */
2243 spin_lock(&files->file_lock);
2244 fdt = files_fdtable(files);
2245 for (;;) {
2246 unsigned long set;
2247 i = j * __NFDBITS;
2248 if (i >= fdt->max_fdset || i >= fdt->max_fds)
2249 break;
2250 set = fdt->open_fds->fds_bits[j++];
2251 while (set) {
2252 if (set & 1) {
2253 struct file *file = fdt->fd[i];
2254 if (file)
2255 __steal_locks(file, from);
2256 }
2257 i++;
2258 set >>= 1;
2259 }
2260 }
2261 spin_unlock(&files->file_lock);
2262 unlock_kernel();
2263}
2264EXPORT_SYMBOL(steal_locks);
2265
2266static int __init filelock_init(void) 2207static int __init filelock_init(void)
2267{ 2208{
2268 filelock_cache = kmem_cache_create("file_lock_cache", 2209 filelock_cache = kmem_cache_create("file_lock_cache",
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 69224d1fe043..2b0a389d1987 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -60,8 +60,7 @@ static int dir_commit_chunk(struct page *page, unsigned from, unsigned to)
60static struct page * dir_get_page(struct inode *dir, unsigned long n) 60static struct page * dir_get_page(struct inode *dir, unsigned long n)
61{ 61{
62 struct address_space *mapping = dir->i_mapping; 62 struct address_space *mapping = dir->i_mapping;
63 struct page *page = read_cache_page(mapping, n, 63 struct page *page = read_mapping_page(mapping, n, NULL);
64 (filler_t*)mapping->a_ops->readpage, NULL);
65 if (!IS_ERR(page)) { 64 if (!IS_ERR(page)) {
66 wait_on_page_locked(page); 65 wait_on_page_locked(page);
67 kmap(page); 66 kmap(page);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 2dcccf1d1b7f..9ea91c5eeb7b 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -19,7 +19,7 @@
19 19
20static void minix_read_inode(struct inode * inode); 20static void minix_read_inode(struct inode * inode);
21static int minix_write_inode(struct inode * inode, int wait); 21static int minix_write_inode(struct inode * inode, int wait);
22static int minix_statfs(struct super_block *sb, struct kstatfs *buf); 22static int minix_statfs(struct dentry *dentry, struct kstatfs *buf);
23static int minix_remount (struct super_block * sb, int * flags, char * data); 23static int minix_remount (struct super_block * sb, int * flags, char * data);
24 24
25static void minix_delete_inode(struct inode *inode) 25static void minix_delete_inode(struct inode *inode)
@@ -296,11 +296,11 @@ out_bad_sb:
296 return -EINVAL; 296 return -EINVAL;
297} 297}
298 298
299static int minix_statfs(struct super_block *sb, struct kstatfs *buf) 299static int minix_statfs(struct dentry *dentry, struct kstatfs *buf)
300{ 300{
301 struct minix_sb_info *sbi = minix_sb(sb); 301 struct minix_sb_info *sbi = minix_sb(dentry->d_sb);
302 buf->f_type = sb->s_magic; 302 buf->f_type = dentry->d_sb->s_magic;
303 buf->f_bsize = sb->s_blocksize; 303 buf->f_bsize = dentry->d_sb->s_blocksize;
304 buf->f_blocks = (sbi->s_nzones - sbi->s_firstdatazone) << sbi->s_log_zone_size; 304 buf->f_blocks = (sbi->s_nzones - sbi->s_firstdatazone) << sbi->s_log_zone_size;
305 buf->f_bfree = minix_count_free_blocks(sbi); 305 buf->f_bfree = minix_count_free_blocks(sbi);
306 buf->f_bavail = buf->f_bfree; 306 buf->f_bavail = buf->f_bfree;
@@ -335,7 +335,7 @@ static sector_t minix_bmap(struct address_space *mapping, sector_t block)
335{ 335{
336 return generic_block_bmap(mapping,block,minix_get_block); 336 return generic_block_bmap(mapping,block,minix_get_block);
337} 337}
338static struct address_space_operations minix_aops = { 338static const struct address_space_operations minix_aops = {
339 .readpage = minix_readpage, 339 .readpage = minix_readpage,
340 .writepage = minix_writepage, 340 .writepage = minix_writepage,
341 .sync_page = block_sync_page, 341 .sync_page = block_sync_page,
@@ -559,10 +559,11 @@ void minix_truncate(struct inode * inode)
559 V2_minix_truncate(inode); 559 V2_minix_truncate(inode);
560} 560}
561 561
562static struct super_block *minix_get_sb(struct file_system_type *fs_type, 562static int minix_get_sb(struct file_system_type *fs_type,
563 int flags, const char *dev_name, void *data) 563 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
564{ 564{
565 return get_sb_bdev(fs_type, flags, dev_name, data, minix_fill_super); 565 return get_sb_bdev(fs_type, flags, dev_name, data, minix_fill_super,
566 mnt);
566} 567}
567 568
568static struct file_system_type minix_fs_type = { 569static struct file_system_type minix_fs_type = {
diff --git a/fs/mpage.c b/fs/mpage.c
index 9bf2eb30e6f4..1e4598247d0b 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -707,9 +707,9 @@ mpage_writepages(struct address_space *mapping,
707 struct pagevec pvec; 707 struct pagevec pvec;
708 int nr_pages; 708 int nr_pages;
709 pgoff_t index; 709 pgoff_t index;
710 pgoff_t end = -1; /* Inclusive */ 710 pgoff_t end; /* Inclusive */
711 int scanned = 0; 711 int scanned = 0;
712 int is_range = 0; 712 int range_whole = 0;
713 713
714 if (wbc->nonblocking && bdi_write_congested(bdi)) { 714 if (wbc->nonblocking && bdi_write_congested(bdi)) {
715 wbc->encountered_congestion = 1; 715 wbc->encountered_congestion = 1;
@@ -721,16 +721,14 @@ mpage_writepages(struct address_space *mapping,
721 writepage = mapping->a_ops->writepage; 721 writepage = mapping->a_ops->writepage;
722 722
723 pagevec_init(&pvec, 0); 723 pagevec_init(&pvec, 0);
724 if (wbc->sync_mode == WB_SYNC_NONE) { 724 if (wbc->range_cyclic) {
725 index = mapping->writeback_index; /* Start from prev offset */ 725 index = mapping->writeback_index; /* Start from prev offset */
726 end = -1;
726 } else { 727 } else {
727 index = 0; /* whole-file sweep */ 728 index = wbc->range_start >> PAGE_CACHE_SHIFT;
728 scanned = 1; 729 end = wbc->range_end >> PAGE_CACHE_SHIFT;
729 } 730 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
730 if (wbc->start || wbc->end) { 731 range_whole = 1;
731 index = wbc->start >> PAGE_CACHE_SHIFT;
732 end = wbc->end >> PAGE_CACHE_SHIFT;
733 is_range = 1;
734 scanned = 1; 732 scanned = 1;
735 } 733 }
736retry: 734retry:
@@ -759,7 +757,7 @@ retry:
759 continue; 757 continue;
760 } 758 }
761 759
762 if (unlikely(is_range) && page->index > end) { 760 if (!wbc->range_cyclic && page->index > end) {
763 done = 1; 761 done = 1;
764 unlock_page(page); 762 unlock_page(page);
765 continue; 763 continue;
@@ -810,7 +808,7 @@ retry:
810 index = 0; 808 index = 0;
811 goto retry; 809 goto retry;
812 } 810 }
813 if (!is_range) 811 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
814 mapping->writeback_index = index; 812 mapping->writeback_index = index;
815 if (bio) 813 if (bio)
816 mpage_bio_submit(WRITE, bio); 814 mpage_bio_submit(WRITE, bio);
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 5b76ccd19e3f..9e44158a7540 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -661,11 +661,12 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent)
661 return 0; 661 return 0;
662} 662}
663 663
664static struct super_block *msdos_get_sb(struct file_system_type *fs_type, 664static int msdos_get_sb(struct file_system_type *fs_type,
665 int flags, const char *dev_name, 665 int flags, const char *dev_name,
666 void *data) 666 void *data, struct vfsmount *mnt)
667{ 667{
668 return get_sb_bdev(fs_type, flags, dev_name, data, msdos_fill_super); 668 return get_sb_bdev(fs_type, flags, dev_name, data, msdos_fill_super,
669 mnt);
669} 670}
670 671
671static struct file_system_type msdos_fs_type = { 672static struct file_system_type msdos_fs_type = {
diff --git a/fs/namei.c b/fs/namei.c
index d6e2ee251736..c784e8bb57a3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1127,7 +1127,7 @@ out:
1127 if (likely(retval == 0)) { 1127 if (likely(retval == 0)) {
1128 if (unlikely(current->audit_context && nd && nd->dentry && 1128 if (unlikely(current->audit_context && nd && nd->dentry &&
1129 nd->dentry->d_inode)) 1129 nd->dentry->d_inode))
1130 audit_inode(name, nd->dentry->d_inode, flags); 1130 audit_inode(name, nd->dentry->d_inode);
1131 } 1131 }
1132out_fail: 1132out_fail:
1133 return retval; 1133 return retval;
@@ -2243,14 +2243,16 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
2243 int error; 2243 int error;
2244 char * to; 2244 char * to;
2245 2245
2246 if (flags != 0) 2246 if ((flags & ~AT_SYMLINK_FOLLOW) != 0)
2247 return -EINVAL; 2247 return -EINVAL;
2248 2248
2249 to = getname(newname); 2249 to = getname(newname);
2250 if (IS_ERR(to)) 2250 if (IS_ERR(to))
2251 return PTR_ERR(to); 2251 return PTR_ERR(to);
2252 2252
2253 error = __user_walk_fd(olddfd, oldname, 0, &old_nd); 2253 error = __user_walk_fd(olddfd, oldname,
2254 flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
2255 &old_nd);
2254 if (error) 2256 if (error)
2255 goto exit; 2257 goto exit;
2256 error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); 2258 error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd);
@@ -2577,8 +2579,7 @@ static char *page_getlink(struct dentry * dentry, struct page **ppage)
2577{ 2579{
2578 struct page * page; 2580 struct page * page;
2579 struct address_space *mapping = dentry->d_inode->i_mapping; 2581 struct address_space *mapping = dentry->d_inode->i_mapping;
2580 page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage, 2582 page = read_mapping_page(mapping, 0, NULL);
2581 NULL);
2582 if (IS_ERR(page)) 2583 if (IS_ERR(page))
2583 goto sync_fail; 2584 goto sync_fail;
2584 wait_on_page_locked(page); 2585 wait_on_page_locked(page);
diff --git a/fs/namespace.c b/fs/namespace.c
index bf478addb852..fa7ed6a9fc2d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -8,7 +8,6 @@
8 * Heavily rewritten. 8 * Heavily rewritten.
9 */ 9 */
10 10
11#include <linux/config.h>
12#include <linux/syscalls.h> 11#include <linux/syscalls.h>
13#include <linux/slab.h> 12#include <linux/slab.h>
14#include <linux/sched.h> 13#include <linux/sched.h>
@@ -86,6 +85,15 @@ struct vfsmount *alloc_vfsmnt(const char *name)
86 return mnt; 85 return mnt;
87} 86}
88 87
88int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
89{
90 mnt->mnt_sb = sb;
91 mnt->mnt_root = dget(sb->s_root);
92 return 0;
93}
94
95EXPORT_SYMBOL(simple_set_mnt);
96
89void free_vfsmnt(struct vfsmount *mnt) 97void free_vfsmnt(struct vfsmount *mnt)
90{ 98{
91 kfree(mnt->mnt_devname); 99 kfree(mnt->mnt_devname);
@@ -517,10 +525,8 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
517{ 525{
518 struct vfsmount *p; 526 struct vfsmount *p;
519 527
520 for (p = mnt; p; p = next_mnt(p, mnt)) { 528 for (p = mnt; p; p = next_mnt(p, mnt))
521 list_del(&p->mnt_hash); 529 list_move(&p->mnt_hash, kill);
522 list_add(&p->mnt_hash, kill);
523 }
524 530
525 if (propagate) 531 if (propagate)
526 propagate_umount(kill); 532 propagate_umount(kill);
@@ -576,8 +582,8 @@ static int do_umount(struct vfsmount *mnt, int flags)
576 */ 582 */
577 583
578 lock_kernel(); 584 lock_kernel();
579 if ((flags & MNT_FORCE) && sb->s_op->umount_begin) 585 if (sb->s_op->umount_begin)
580 sb->s_op->umount_begin(sb); 586 sb->s_op->umount_begin(mnt, flags);
581 unlock_kernel(); 587 unlock_kernel();
582 588
583 /* 589 /*
@@ -1163,13 +1169,46 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts,
1163} 1169}
1164 1170
1165/* 1171/*
1172 * go through the vfsmounts we've just consigned to the graveyard to
1173 * - check that they're still dead
1174 * - delete the vfsmount from the appropriate namespace under lock
1175 * - dispose of the corpse
1176 */
1177static void expire_mount_list(struct list_head *graveyard, struct list_head *mounts)
1178{
1179 struct namespace *namespace;
1180 struct vfsmount *mnt;
1181
1182 while (!list_empty(graveyard)) {
1183 LIST_HEAD(umounts);
1184 mnt = list_entry(graveyard->next, struct vfsmount, mnt_expire);
1185 list_del_init(&mnt->mnt_expire);
1186
1187 /* don't do anything if the namespace is dead - all the
1188 * vfsmounts from it are going away anyway */
1189 namespace = mnt->mnt_namespace;
1190 if (!namespace || !namespace->root)
1191 continue;
1192 get_namespace(namespace);
1193
1194 spin_unlock(&vfsmount_lock);
1195 down_write(&namespace_sem);
1196 expire_mount(mnt, mounts, &umounts);
1197 up_write(&namespace_sem);
1198 release_mounts(&umounts);
1199 mntput(mnt);
1200 put_namespace(namespace);
1201 spin_lock(&vfsmount_lock);
1202 }
1203}
1204
1205/*
1166 * process a list of expirable mountpoints with the intent of discarding any 1206 * process a list of expirable mountpoints with the intent of discarding any
1167 * mountpoints that aren't in use and haven't been touched since last we came 1207 * mountpoints that aren't in use and haven't been touched since last we came
1168 * here 1208 * here
1169 */ 1209 */
1170void mark_mounts_for_expiry(struct list_head *mounts) 1210void mark_mounts_for_expiry(struct list_head *mounts)
1171{ 1211{
1172 struct namespace *namespace;
1173 struct vfsmount *mnt, *next; 1212 struct vfsmount *mnt, *next;
1174 LIST_HEAD(graveyard); 1213 LIST_HEAD(graveyard);
1175 1214
@@ -1193,38 +1232,79 @@ void mark_mounts_for_expiry(struct list_head *mounts)
1193 list_move(&mnt->mnt_expire, &graveyard); 1232 list_move(&mnt->mnt_expire, &graveyard);
1194 } 1233 }
1195 1234
1196 /* 1235 expire_mount_list(&graveyard, mounts);
1197 * go through the vfsmounts we've just consigned to the graveyard to
1198 * - check that they're still dead
1199 * - delete the vfsmount from the appropriate namespace under lock
1200 * - dispose of the corpse
1201 */
1202 while (!list_empty(&graveyard)) {
1203 LIST_HEAD(umounts);
1204 mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire);
1205 list_del_init(&mnt->mnt_expire);
1206 1236
1207 /* don't do anything if the namespace is dead - all the 1237 spin_unlock(&vfsmount_lock);
1208 * vfsmounts from it are going away anyway */ 1238}
1209 namespace = mnt->mnt_namespace; 1239
1210 if (!namespace || !namespace->root) 1240EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
1241
1242/*
1243 * Ripoff of 'select_parent()'
1244 *
1245 * search the list of submounts for a given mountpoint, and move any
1246 * shrinkable submounts to the 'graveyard' list.
1247 */
1248static int select_submounts(struct vfsmount *parent, struct list_head *graveyard)
1249{
1250 struct vfsmount *this_parent = parent;
1251 struct list_head *next;
1252 int found = 0;
1253
1254repeat:
1255 next = this_parent->mnt_mounts.next;
1256resume:
1257 while (next != &this_parent->mnt_mounts) {
1258 struct list_head *tmp = next;
1259 struct vfsmount *mnt = list_entry(tmp, struct vfsmount, mnt_child);
1260
1261 next = tmp->next;
1262 if (!(mnt->mnt_flags & MNT_SHRINKABLE))
1211 continue; 1263 continue;
1212 get_namespace(namespace); 1264 /*
1265 * Descend a level if the d_mounts list is non-empty.
1266 */
1267 if (!list_empty(&mnt->mnt_mounts)) {
1268 this_parent = mnt;
1269 goto repeat;
1270 }
1213 1271
1214 spin_unlock(&vfsmount_lock); 1272 if (!propagate_mount_busy(mnt, 1)) {
1215 down_write(&namespace_sem); 1273 mntget(mnt);
1216 expire_mount(mnt, mounts, &umounts); 1274 list_move_tail(&mnt->mnt_expire, graveyard);
1217 up_write(&namespace_sem); 1275 found++;
1218 release_mounts(&umounts); 1276 }
1219 mntput(mnt);
1220 put_namespace(namespace);
1221 spin_lock(&vfsmount_lock);
1222 } 1277 }
1278 /*
1279 * All done at this level ... ascend and resume the search
1280 */
1281 if (this_parent != parent) {
1282 next = this_parent->mnt_child.next;
1283 this_parent = this_parent->mnt_parent;
1284 goto resume;
1285 }
1286 return found;
1287}
1288
1289/*
1290 * process a list of expirable mountpoints with the intent of discarding any
1291 * submounts of a specific parent mountpoint
1292 */
1293void shrink_submounts(struct vfsmount *mountpoint, struct list_head *mounts)
1294{
1295 LIST_HEAD(graveyard);
1296 int found;
1297
1298 spin_lock(&vfsmount_lock);
1299
1300 /* extract submounts of 'mountpoint' from the expiration list */
1301 while ((found = select_submounts(mountpoint, &graveyard)) != 0)
1302 expire_mount_list(&graveyard, mounts);
1223 1303
1224 spin_unlock(&vfsmount_lock); 1304 spin_unlock(&vfsmount_lock);
1225} 1305}
1226 1306
1227EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); 1307EXPORT_SYMBOL_GPL(shrink_submounts);
1228 1308
1229/* 1309/*
1230 * Some copy_from_user() implementations do not return the exact number of 1310 * Some copy_from_user() implementations do not return the exact number of
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index f0860c602d8b..b4ee89250e95 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -10,7 +10,6 @@
10 * 10 *
11 */ 11 */
12 12
13#include <linux/config.h>
14 13
15#include <linux/time.h> 14#include <linux/time.h>
16#include <linux/errno.h> 15#include <linux/errno.h>
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index a1f3e972c6ef..1ddf77b0b825 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -9,7 +9,6 @@
9 * 9 *
10 */ 10 */
11 11
12#include <linux/config.h>
13#include <linux/module.h> 12#include <linux/module.h>
14 13
15#include <asm/system.h> 14#include <asm/system.h>
@@ -39,7 +38,7 @@
39 38
40static void ncp_delete_inode(struct inode *); 39static void ncp_delete_inode(struct inode *);
41static void ncp_put_super(struct super_block *); 40static void ncp_put_super(struct super_block *);
42static int ncp_statfs(struct super_block *, struct kstatfs *); 41static int ncp_statfs(struct dentry *, struct kstatfs *);
43 42
44static kmem_cache_t * ncp_inode_cachep; 43static kmem_cache_t * ncp_inode_cachep;
45 44
@@ -105,7 +104,7 @@ static struct super_operations ncp_sops =
105 104
106extern struct dentry_operations ncp_root_dentry_operations; 105extern struct dentry_operations ncp_root_dentry_operations;
107#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS) 106#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS)
108extern struct address_space_operations ncp_symlink_aops; 107extern const struct address_space_operations ncp_symlink_aops;
109extern int ncp_symlink(struct inode*, struct dentry*, const char*); 108extern int ncp_symlink(struct inode*, struct dentry*, const char*);
110#endif 109#endif
111 110
@@ -724,13 +723,14 @@ static void ncp_put_super(struct super_block *sb)
724 kfree(server); 723 kfree(server);
725} 724}
726 725
727static int ncp_statfs(struct super_block *sb, struct kstatfs *buf) 726static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf)
728{ 727{
729 struct dentry* d; 728 struct dentry* d;
730 struct inode* i; 729 struct inode* i;
731 struct ncp_inode_info* ni; 730 struct ncp_inode_info* ni;
732 struct ncp_server* s; 731 struct ncp_server* s;
733 struct ncp_volume_info vi; 732 struct ncp_volume_info vi;
733 struct super_block *sb = dentry->d_sb;
734 int err; 734 int err;
735 __u8 dh; 735 __u8 dh;
736 736
@@ -957,10 +957,10 @@ out:
957 return result; 957 return result;
958} 958}
959 959
960static struct super_block *ncp_get_sb(struct file_system_type *fs_type, 960static int ncp_get_sb(struct file_system_type *fs_type,
961 int flags, const char *dev_name, void *data) 961 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
962{ 962{
963 return get_sb_nodev(fs_type, flags, data, ncp_fill_super); 963 return get_sb_nodev(fs_type, flags, data, ncp_fill_super, mnt);
964} 964}
965 965
966static struct file_system_type ncp_fs_type = { 966static struct file_system_type ncp_fs_type = {
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index eb3813ad136f..42039fe0653c 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -7,7 +7,6 @@
7 * 7 *
8 */ 8 */
9 9
10#include <linux/config.h>
11 10
12#include <asm/uaccess.h> 11#include <asm/uaccess.h>
13#include <linux/capability.h> 12#include <linux/capability.h>
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 52d60c3d8996..e7d5a3097fe6 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -93,7 +93,7 @@ static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area,
93 */ 93 */
94 if (type) 94 if (type)
95 *type = VM_FAULT_MAJOR; 95 *type = VM_FAULT_MAJOR;
96 inc_page_state(pgmajfault); 96 count_vm_event(PGMAJFAULT);
97 return page; 97 return page;
98} 98}
99 99
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
index d9ebf6439f59..551e0bac7aac 100644
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -10,7 +10,6 @@
10 */ 10 */
11 11
12 12
13#include <linux/config.h>
14 13
15#include "ncplib_kernel.h" 14#include "ncplib_kernel.h"
16 15
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 799e5c2bec55..2441d1ab57dc 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -12,7 +12,6 @@
12#ifndef _NCPLIB_H 12#ifndef _NCPLIB_H
13#define _NCPLIB_H 13#define _NCPLIB_H
14 14
15#include <linux/config.h>
16 15
17#include <linux/fs.h> 16#include <linux/fs.h>
18#include <linux/types.h> 17#include <linux/types.h>
diff --git a/fs/ncpfs/ncpsign_kernel.c b/fs/ncpfs/ncpsign_kernel.c
index a6ec90cd8894..749a18d33599 100644
--- a/fs/ncpfs/ncpsign_kernel.c
+++ b/fs/ncpfs/ncpsign_kernel.c
@@ -5,7 +5,6 @@
5 * 5 *
6 */ 6 */
7 7
8#include <linux/config.h>
9 8
10#ifdef CONFIG_NCPFS_PACKET_SIGNING 9#ifdef CONFIG_NCPFS_PACKET_SIGNING
11 10
diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index 8783eb7ec641..11c2b252ebed 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -8,7 +8,6 @@
8 * 8 *
9 */ 9 */
10 10
11#include <linux/config.h>
12 11
13#include <linux/time.h> 12#include <linux/time.h>
14#include <linux/errno.h> 13#include <linux/errno.h>
diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c
index e935f1b34bc2..ca92c2406635 100644
--- a/fs/ncpfs/symlink.c
+++ b/fs/ncpfs/symlink.c
@@ -20,7 +20,6 @@
20 * 20 *
21 */ 21 */
22 22
23#include <linux/config.h>
24 23
25#include <asm/uaccess.h> 24#include <asm/uaccess.h>
26 25
@@ -99,7 +98,7 @@ fail:
99/* 98/*
100 * symlinks can't do much... 99 * symlinks can't do much...
101 */ 100 */
102struct address_space_operations ncp_symlink_aops = { 101const struct address_space_operations ncp_symlink_aops = {
103 .readpage = ncp_symlink_readpage, 102 .readpage = ncp_symlink_readpage,
104}; 103};
105 104
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index ec61fd56a1a9..0b572a0c1967 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -4,14 +4,16 @@
4 4
5obj-$(CONFIG_NFS_FS) += nfs.o 5obj-$(CONFIG_NFS_FS) += nfs.o
6 6
7nfs-y := dir.o file.o inode.o nfs2xdr.o pagelist.o \ 7nfs-y := dir.o file.o inode.o super.o nfs2xdr.o pagelist.o \
8 proc.o read.o symlink.o unlink.o write.o 8 proc.o read.o symlink.o unlink.o write.o \
9 namespace.o
9nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o 10nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o
10nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o 11nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
11nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o 12nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
12nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ 13nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
13 delegation.o idmap.o \ 14 delegation.o idmap.o \
14 callback.o callback_xdr.o callback_proc.o 15 callback.o callback_xdr.o callback_proc.o \
16 nfs4namespace.o
15nfs-$(CONFIG_NFS_DIRECTIO) += direct.o 17nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
16nfs-$(CONFIG_SYSCTL) += sysctl.o 18nfs-$(CONFIG_SYSCTL) += sysctl.o
17nfs-objs := $(nfs-y) 19nfs-objs := $(nfs-y)
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 90c95adc8c1b..fe0a6b8ac149 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -6,7 +6,6 @@
6 * NFSv4 callback handling 6 * NFSv4 callback handling
7 */ 7 */
8 8
9#include <linux/config.h>
10#include <linux/completion.h> 9#include <linux/completion.h>
11#include <linux/ip.h> 10#include <linux/ip.h>
12#include <linux/module.h> 11#include <linux/module.h>
@@ -182,8 +181,6 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp)
182/* 181/*
183 * Define NFS4 callback program 182 * Define NFS4 callback program
184 */ 183 */
185extern struct svc_version nfs4_callback_version1;
186
187static struct svc_version *nfs4_callback_version[] = { 184static struct svc_version *nfs4_callback_version[] = {
188 [1] = &nfs4_callback_version1, 185 [1] = &nfs4_callback_version1,
189}; 186};
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 462cfceb50c5..7719483ecdfc 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -5,7 +5,6 @@
5 * 5 *
6 * NFSv4 callback procedures 6 * NFSv4 callback procedures
7 */ 7 */
8#include <linux/config.h>
9#include <linux/nfs4.h> 8#include <linux/nfs4.h>
10#include <linux/nfs_fs.h> 9#include <linux/nfs_fs.h>
11#include "nfs4_fs.h" 10#include "nfs4_fs.h"
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 05c38cf40b69..29f932192054 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -5,7 +5,6 @@
5 * 5 *
6 * NFSv4 callback encode/decode procedures 6 * NFSv4 callback encode/decode procedures
7 */ 7 */
8#include <linux/config.h>
9#include <linux/kernel.h> 8#include <linux/kernel.h>
10#include <linux/sunrpc/svc.h> 9#include <linux/sunrpc/svc.h>
11#include <linux/nfs4.h> 10#include <linux/nfs4.h>
@@ -202,7 +201,7 @@ static unsigned decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xd
202 status = decode_fh(xdr, &args->fh); 201 status = decode_fh(xdr, &args->fh);
203out: 202out:
204 dprintk("%s: exit with status = %d\n", __FUNCTION__, status); 203 dprintk("%s: exit with status = %d\n", __FUNCTION__, status);
205 return 0; 204 return status;
206} 205}
207 206
208static unsigned encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) 207static unsigned encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index d3be923d4e43..9540a316c05e 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -6,7 +6,6 @@
6 * NFS file delegation management 6 * NFS file delegation management
7 * 7 *
8 */ 8 */
9#include <linux/config.h>
10#include <linux/completion.h> 9#include <linux/completion.h>
11#include <linux/kthread.h> 10#include <linux/kthread.h>
12#include <linux/module.h> 11#include <linux/module.h>
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index cae74dd4c7f5..3ddda6f7ecc2 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -528,7 +528,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
528 528
529 lock_kernel(); 529 lock_kernel();
530 530
531 res = nfs_revalidate_inode(NFS_SERVER(inode), inode); 531 res = nfs_revalidate_mapping(inode, filp->f_mapping);
532 if (res < 0) { 532 if (res < 0) {
533 unlock_kernel(); 533 unlock_kernel();
534 return res; 534 return res;
@@ -868,6 +868,17 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
868 return (nd->intent.open.flags & O_EXCL) != 0; 868 return (nd->intent.open.flags & O_EXCL) != 0;
869} 869}
870 870
871static inline int nfs_reval_fsid(struct inode *dir,
872 struct nfs_fh *fh, struct nfs_fattr *fattr)
873{
874 struct nfs_server *server = NFS_SERVER(dir);
875
876 if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
877 /* Revalidate fsid on root dir */
878 return __nfs_revalidate_inode(server, dir->i_sb->s_root->d_inode);
879 return 0;
880}
881
871static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 882static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
872{ 883{
873 struct dentry *res; 884 struct dentry *res;
@@ -900,6 +911,11 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
900 res = ERR_PTR(error); 911 res = ERR_PTR(error);
901 goto out_unlock; 912 goto out_unlock;
902 } 913 }
914 error = nfs_reval_fsid(dir, &fhandle, &fattr);
915 if (error < 0) {
916 res = ERR_PTR(error);
917 goto out_unlock;
918 }
903 inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); 919 inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
904 res = (struct dentry *)inode; 920 res = (struct dentry *)inode;
905 if (IS_ERR(res)) 921 if (IS_ERR(res))
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 3c72b0c07283..4cdd1b499e35 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -38,7 +38,6 @@
38 * 38 *
39 */ 39 */
40 40
41#include <linux/config.h>
42#include <linux/errno.h> 41#include <linux/errno.h>
43#include <linux/sched.h> 42#include <linux/sched.h>
44#include <linux/kernel.h> 43#include <linux/kernel.h>
@@ -892,7 +891,7 @@ out:
892 * nfs_init_directcache - create a slab cache for nfs_direct_req structures 891 * nfs_init_directcache - create a slab cache for nfs_direct_req structures
893 * 892 *
894 */ 893 */
895int nfs_init_directcache(void) 894int __init nfs_init_directcache(void)
896{ 895{
897 nfs_direct_cachep = kmem_cache_create("nfs_direct_cache", 896 nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
898 sizeof(struct nfs_direct_req), 897 sizeof(struct nfs_direct_req),
@@ -906,7 +905,7 @@ int nfs_init_directcache(void)
906} 905}
907 906
908/** 907/**
909 * nfs_init_directcache - destroy the slab cache for nfs_direct_req structures 908 * nfs_destroy_directcache - destroy the slab cache for nfs_direct_req structures
910 * 909 *
911 */ 910 */
912void nfs_destroy_directcache(void) 911void nfs_destroy_directcache(void)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index fade02c15e6e..cc2b874ad5a4 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -43,7 +43,7 @@ static int nfs_file_mmap(struct file *, struct vm_area_struct *);
43static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); 43static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
44static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t); 44static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t);
45static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t); 45static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t);
46static int nfs_file_flush(struct file *); 46static int nfs_file_flush(struct file *, fl_owner_t id);
47static int nfs_fsync(struct file *, struct dentry *dentry, int datasync); 47static int nfs_fsync(struct file *, struct dentry *dentry, int datasync);
48static int nfs_check_flags(int flags); 48static int nfs_check_flags(int flags);
49static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); 49static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
@@ -127,23 +127,6 @@ nfs_file_release(struct inode *inode, struct file *filp)
127} 127}
128 128
129/** 129/**
130 * nfs_revalidate_file - Revalidate the page cache & related metadata
131 * @inode - pointer to inode struct
132 * @file - pointer to file
133 */
134static int nfs_revalidate_file(struct inode *inode, struct file *filp)
135{
136 struct nfs_inode *nfsi = NFS_I(inode);
137 int retval = 0;
138
139 if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR))
140 || nfs_attribute_timeout(inode))
141 retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
142 nfs_revalidate_mapping(inode, filp->f_mapping);
143 return 0;
144}
145
146/**
147 * nfs_revalidate_size - Revalidate the file size 130 * nfs_revalidate_size - Revalidate the file size
148 * @inode - pointer to inode struct 131 * @inode - pointer to inode struct
149 * @file - pointer to struct file 132 * @file - pointer to struct file
@@ -188,7 +171,7 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
188 * 171 *
189 */ 172 */
190static int 173static int
191nfs_file_flush(struct file *file) 174nfs_file_flush(struct file *file, fl_owner_t id)
192{ 175{
193 struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; 176 struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
194 struct inode *inode = file->f_dentry->d_inode; 177 struct inode *inode = file->f_dentry->d_inode;
@@ -228,7 +211,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
228 dentry->d_parent->d_name.name, dentry->d_name.name, 211 dentry->d_parent->d_name.name, dentry->d_name.name,
229 (unsigned long) count, (unsigned long) pos); 212 (unsigned long) count, (unsigned long) pos);
230 213
231 result = nfs_revalidate_file(inode, iocb->ki_filp); 214 result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
232 nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count); 215 nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
233 if (!result) 216 if (!result)
234 result = generic_file_aio_read(iocb, buf, count, pos); 217 result = generic_file_aio_read(iocb, buf, count, pos);
@@ -247,7 +230,7 @@ nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count,
247 dentry->d_parent->d_name.name, dentry->d_name.name, 230 dentry->d_parent->d_name.name, dentry->d_name.name,
248 (unsigned long) count, (unsigned long long) *ppos); 231 (unsigned long) count, (unsigned long long) *ppos);
249 232
250 res = nfs_revalidate_file(inode, filp); 233 res = nfs_revalidate_mapping(inode, filp->f_mapping);
251 if (!res) 234 if (!res)
252 res = generic_file_sendfile(filp, ppos, count, actor, target); 235 res = generic_file_sendfile(filp, ppos, count, actor, target);
253 return res; 236 return res;
@@ -263,7 +246,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
263 dfprintk(VFS, "nfs: mmap(%s/%s)\n", 246 dfprintk(VFS, "nfs: mmap(%s/%s)\n",
264 dentry->d_parent->d_name.name, dentry->d_name.name); 247 dentry->d_parent->d_name.name, dentry->d_name.name);
265 248
266 status = nfs_revalidate_file(inode, file); 249 status = nfs_revalidate_mapping(inode, file->f_mapping);
267 if (!status) 250 if (!status)
268 status = generic_file_mmap(file, vma); 251 status = generic_file_mmap(file, vma);
269 return status; 252 return status;
@@ -320,7 +303,11 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse
320 303
321static void nfs_invalidate_page(struct page *page, unsigned long offset) 304static void nfs_invalidate_page(struct page *page, unsigned long offset)
322{ 305{
323 /* FIXME: we really should cancel any unstarted writes on this page */ 306 struct inode *inode = page->mapping->host;
307
308 /* Cancel any unstarted writes on this page */
309 if (offset == 0)
310 nfs_sync_inode_wait(inode, page->index, 1, FLUSH_INVALIDATE);
324} 311}
325 312
326static int nfs_release_page(struct page *page, gfp_t gfp) 313static int nfs_release_page(struct page *page, gfp_t gfp)
@@ -328,7 +315,7 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
328 return !nfs_wb_page(page->mapping->host, page); 315 return !nfs_wb_page(page->mapping->host, page);
329} 316}
330 317
331struct address_space_operations nfs_file_aops = { 318const struct address_space_operations nfs_file_aops = {
332 .readpage = nfs_readpage, 319 .readpage = nfs_readpage,
333 .readpages = nfs_readpages, 320 .readpages = nfs_readpages,
334 .set_page_dirty = __set_page_dirty_nobuffers, 321 .set_page_dirty = __set_page_dirty_nobuffers,
@@ -373,7 +360,6 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t
373 if (result) 360 if (result)
374 goto out; 361 goto out;
375 } 362 }
376 nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
377 363
378 result = count; 364 result = count;
379 if (!count) 365 if (!count)
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 3fab5b0cfc5a..b81e7ed3c902 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -47,7 +47,6 @@
47#include <linux/workqueue.h> 47#include <linux/workqueue.h>
48#include <linux/sunrpc/rpc_pipe_fs.h> 48#include <linux/sunrpc/rpc_pipe_fs.h>
49 49
50#include <linux/nfs_fs_sb.h>
51#include <linux/nfs_fs.h> 50#include <linux/nfs_fs.h>
52 51
53#include <linux/nfs_idmap.h> 52#include <linux/nfs_idmap.h>
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d0b991a92327..d349fb2245da 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -13,7 +13,6 @@
13 * 13 *
14 */ 14 */
15 15
16#include <linux/config.h>
17#include <linux/module.h> 16#include <linux/module.h>
18#include <linux/init.h> 17#include <linux/init.h>
19 18
@@ -36,6 +35,8 @@
36#include <linux/mount.h> 35#include <linux/mount.h>
37#include <linux/nfs_idmap.h> 36#include <linux/nfs_idmap.h>
38#include <linux/vfs.h> 37#include <linux/vfs.h>
38#include <linux/inet.h>
39#include <linux/nfs_xdr.h>
39 40
40#include <asm/system.h> 41#include <asm/system.h>
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
@@ -44,89 +45,17 @@
44#include "callback.h" 45#include "callback.h"
45#include "delegation.h" 46#include "delegation.h"
46#include "iostat.h" 47#include "iostat.h"
48#include "internal.h"
47 49
48#define NFSDBG_FACILITY NFSDBG_VFS 50#define NFSDBG_FACILITY NFSDBG_VFS
49#define NFS_PARANOIA 1 51#define NFS_PARANOIA 1
50 52
51/* Maximum number of readahead requests
52 * FIXME: this should really be a sysctl so that users may tune it to suit
53 * their needs. People that do NFS over a slow network, might for
54 * instance want to reduce it to something closer to 1 for improved
55 * interactive response.
56 */
57#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1)
58
59static void nfs_invalidate_inode(struct inode *); 53static void nfs_invalidate_inode(struct inode *);
60static int nfs_update_inode(struct inode *, struct nfs_fattr *); 54static int nfs_update_inode(struct inode *, struct nfs_fattr *);
61 55
62static struct inode *nfs_alloc_inode(struct super_block *sb);
63static void nfs_destroy_inode(struct inode *);
64static int nfs_write_inode(struct inode *,int);
65static void nfs_delete_inode(struct inode *);
66static void nfs_clear_inode(struct inode *);
67static void nfs_umount_begin(struct super_block *);
68static int nfs_statfs(struct super_block *, struct kstatfs *);
69static int nfs_show_options(struct seq_file *, struct vfsmount *);
70static int nfs_show_stats(struct seq_file *, struct vfsmount *);
71static void nfs_zap_acl_cache(struct inode *); 56static void nfs_zap_acl_cache(struct inode *);
72 57
73static struct rpc_program nfs_program; 58static kmem_cache_t * nfs_inode_cachep;
74
75static struct super_operations nfs_sops = {
76 .alloc_inode = nfs_alloc_inode,
77 .destroy_inode = nfs_destroy_inode,
78 .write_inode = nfs_write_inode,
79 .delete_inode = nfs_delete_inode,
80 .statfs = nfs_statfs,
81 .clear_inode = nfs_clear_inode,
82 .umount_begin = nfs_umount_begin,
83 .show_options = nfs_show_options,
84 .show_stats = nfs_show_stats,
85};
86
87/*
88 * RPC cruft for NFS
89 */
90static struct rpc_stat nfs_rpcstat = {
91 .program = &nfs_program
92};
93static struct rpc_version * nfs_version[] = {
94 NULL,
95 NULL,
96 &nfs_version2,
97#if defined(CONFIG_NFS_V3)
98 &nfs_version3,
99#elif defined(CONFIG_NFS_V4)
100 NULL,
101#endif
102#if defined(CONFIG_NFS_V4)
103 &nfs_version4,
104#endif
105};
106
107static struct rpc_program nfs_program = {
108 .name = "nfs",
109 .number = NFS_PROGRAM,
110 .nrvers = ARRAY_SIZE(nfs_version),
111 .version = nfs_version,
112 .stats = &nfs_rpcstat,
113 .pipe_dir_name = "/nfs",
114};
115
116#ifdef CONFIG_NFS_V3_ACL
117static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program };
118static struct rpc_version * nfsacl_version[] = {
119 [3] = &nfsacl_version3,
120};
121
122struct rpc_program nfsacl_program = {
123 .name = "nfsacl",
124 .number = NFS_ACL_PROGRAM,
125 .nrvers = ARRAY_SIZE(nfsacl_version),
126 .version = nfsacl_version,
127 .stats = &nfsacl_rpcstat,
128};
129#endif /* CONFIG_NFS_V3_ACL */
130 59
131static inline unsigned long 60static inline unsigned long
132nfs_fattr_to_ino_t(struct nfs_fattr *fattr) 61nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
@@ -134,8 +63,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
134 return nfs_fileid_to_ino_t(fattr->fileid); 63 return nfs_fileid_to_ino_t(fattr->fileid);
135} 64}
136 65
137static int 66int nfs_write_inode(struct inode *inode, int sync)
138nfs_write_inode(struct inode *inode, int sync)
139{ 67{
140 int flags = sync ? FLUSH_SYNC : 0; 68 int flags = sync ? FLUSH_SYNC : 0;
141 int ret; 69 int ret;
@@ -146,31 +74,15 @@ nfs_write_inode(struct inode *inode, int sync)
146 return 0; 74 return 0;
147} 75}
148 76
149static void 77void nfs_clear_inode(struct inode *inode)
150nfs_delete_inode(struct inode * inode)
151{ 78{
152 dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); 79 struct nfs_inode *nfsi = NFS_I(inode);
153 80 struct rpc_cred *cred;
154 truncate_inode_pages(&inode->i_data, 0);
155 81
156 nfs_wb_all(inode);
157 /* 82 /*
158 * The following should never happen... 83 * The following should never happen...
159 */ 84 */
160 if (nfs_have_writebacks(inode)) { 85 BUG_ON(nfs_have_writebacks(inode));
161 printk(KERN_ERR "nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino);
162 }
163
164 clear_inode(inode);
165}
166
167static void
168nfs_clear_inode(struct inode *inode)
169{
170 struct nfs_inode *nfsi = NFS_I(inode);
171 struct rpc_cred *cred;
172
173 nfs_wb_all(inode);
174 BUG_ON (!list_empty(&nfsi->open_files)); 86 BUG_ON (!list_empty(&nfsi->open_files));
175 nfs_zap_acl_cache(inode); 87 nfs_zap_acl_cache(inode);
176 cred = nfsi->cache_access.cred; 88 cred = nfsi->cache_access.cred;
@@ -179,554 +91,6 @@ nfs_clear_inode(struct inode *inode)
179 BUG_ON(atomic_read(&nfsi->data_updates) != 0); 91 BUG_ON(atomic_read(&nfsi->data_updates) != 0);
180} 92}
181 93
182void
183nfs_umount_begin(struct super_block *sb)
184{
185 struct rpc_clnt *rpc = NFS_SB(sb)->client;
186
187 /* -EIO all pending I/O */
188 if (!IS_ERR(rpc))
189 rpc_killall_tasks(rpc);
190 rpc = NFS_SB(sb)->client_acl;
191 if (!IS_ERR(rpc))
192 rpc_killall_tasks(rpc);
193}
194
195
196static inline unsigned long
197nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp)
198{
199 /* make sure blocksize is a power of two */
200 if ((bsize & (bsize - 1)) || nrbitsp) {
201 unsigned char nrbits;
202
203 for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--)
204 ;
205 bsize = 1 << nrbits;
206 if (nrbitsp)
207 *nrbitsp = nrbits;
208 }
209
210 return bsize;
211}
212
213/*
214 * Calculate the number of 512byte blocks used.
215 */
216static inline unsigned long
217nfs_calc_block_size(u64 tsize)
218{
219 loff_t used = (tsize + 511) >> 9;
220 return (used > ULONG_MAX) ? ULONG_MAX : used;
221}
222
223/*
224 * Compute and set NFS server blocksize
225 */
226static inline unsigned long
227nfs_block_size(unsigned long bsize, unsigned char *nrbitsp)
228{
229 if (bsize < NFS_MIN_FILE_IO_SIZE)
230 bsize = NFS_DEF_FILE_IO_SIZE;
231 else if (bsize >= NFS_MAX_FILE_IO_SIZE)
232 bsize = NFS_MAX_FILE_IO_SIZE;
233
234 return nfs_block_bits(bsize, nrbitsp);
235}
236
237/*
238 * Obtain the root inode of the file system.
239 */
240static struct inode *
241nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo)
242{
243 struct nfs_server *server = NFS_SB(sb);
244 int error;
245
246 error = server->rpc_ops->getroot(server, rootfh, fsinfo);
247 if (error < 0) {
248 dprintk("nfs_get_root: getattr error = %d\n", -error);
249 return ERR_PTR(error);
250 }
251
252 return nfs_fhget(sb, rootfh, fsinfo->fattr);
253}
254
255/*
256 * Do NFS version-independent mount processing, and sanity checking
257 */
258static int
259nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
260{
261 struct nfs_server *server;
262 struct inode *root_inode;
263 struct nfs_fattr fattr;
264 struct nfs_fsinfo fsinfo = {
265 .fattr = &fattr,
266 };
267 struct nfs_pathconf pathinfo = {
268 .fattr = &fattr,
269 };
270 int no_root_error = 0;
271 unsigned long max_rpc_payload;
272
273 /* We probably want something more informative here */
274 snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
275
276 server = NFS_SB(sb);
277
278 sb->s_magic = NFS_SUPER_MAGIC;
279
280 server->io_stats = nfs_alloc_iostats();
281 if (server->io_stats == NULL)
282 return -ENOMEM;
283
284 root_inode = nfs_get_root(sb, &server->fh, &fsinfo);
285 /* Did getting the root inode fail? */
286 if (IS_ERR(root_inode)) {
287 no_root_error = PTR_ERR(root_inode);
288 goto out_no_root;
289 }
290 sb->s_root = d_alloc_root(root_inode);
291 if (!sb->s_root) {
292 no_root_error = -ENOMEM;
293 goto out_no_root;
294 }
295 sb->s_root->d_op = server->rpc_ops->dentry_ops;
296
297 /* mount time stamp, in seconds */
298 server->mount_time = jiffies;
299
300 /* Get some general file system info */
301 if (server->namelen == 0 &&
302 server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
303 server->namelen = pathinfo.max_namelen;
304 /* Work out a lot of parameters */
305 if (server->rsize == 0)
306 server->rsize = nfs_block_size(fsinfo.rtpref, NULL);
307 if (server->wsize == 0)
308 server->wsize = nfs_block_size(fsinfo.wtpref, NULL);
309
310 if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax)
311 server->rsize = nfs_block_size(fsinfo.rtmax, NULL);
312 if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax)
313 server->wsize = nfs_block_size(fsinfo.wtmax, NULL);
314
315 max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
316 if (server->rsize > max_rpc_payload)
317 server->rsize = max_rpc_payload;
318 if (server->rsize > NFS_MAX_FILE_IO_SIZE)
319 server->rsize = NFS_MAX_FILE_IO_SIZE;
320 server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
321
322 if (server->wsize > max_rpc_payload)
323 server->wsize = max_rpc_payload;
324 if (server->wsize > NFS_MAX_FILE_IO_SIZE)
325 server->wsize = NFS_MAX_FILE_IO_SIZE;
326 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
327
328 if (sb->s_blocksize == 0)
329 sb->s_blocksize = nfs_block_bits(server->wsize,
330 &sb->s_blocksize_bits);
331 server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL);
332
333 server->dtsize = nfs_block_size(fsinfo.dtpref, NULL);
334 if (server->dtsize > PAGE_CACHE_SIZE)
335 server->dtsize = PAGE_CACHE_SIZE;
336 if (server->dtsize > server->rsize)
337 server->dtsize = server->rsize;
338
339 if (server->flags & NFS_MOUNT_NOAC) {
340 server->acregmin = server->acregmax = 0;
341 server->acdirmin = server->acdirmax = 0;
342 sb->s_flags |= MS_SYNCHRONOUS;
343 }
344 server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
345
346 sb->s_maxbytes = fsinfo.maxfilesize;
347 if (sb->s_maxbytes > MAX_LFS_FILESIZE)
348 sb->s_maxbytes = MAX_LFS_FILESIZE;
349
350 server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0;
351 server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0;
352
353 /* We're airborne Set socket buffersize */
354 rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
355 return 0;
356 /* Yargs. It didn't work out. */
357out_no_root:
358 dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error);
359 if (!IS_ERR(root_inode))
360 iput(root_inode);
361 return no_root_error;
362}
363
364static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans)
365{
366 to->to_initval = timeo * HZ / 10;
367 to->to_retries = retrans;
368 if (!to->to_retries)
369 to->to_retries = 2;
370
371 switch (proto) {
372 case IPPROTO_TCP:
373 if (!to->to_initval)
374 to->to_initval = 60 * HZ;
375 if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
376 to->to_initval = NFS_MAX_TCP_TIMEOUT;
377 to->to_increment = to->to_initval;
378 to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
379 to->to_exponential = 0;
380 break;
381 case IPPROTO_UDP:
382 default:
383 if (!to->to_initval)
384 to->to_initval = 11 * HZ / 10;
385 if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
386 to->to_initval = NFS_MAX_UDP_TIMEOUT;
387 to->to_maxval = NFS_MAX_UDP_TIMEOUT;
388 to->to_exponential = 1;
389 break;
390 }
391}
392
393/*
394 * Create an RPC client handle.
395 */
396static struct rpc_clnt *
397nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
398{
399 struct rpc_timeout timeparms;
400 struct rpc_xprt *xprt = NULL;
401 struct rpc_clnt *clnt = NULL;
402 int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
403
404 nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
405
406 server->retrans_timeo = timeparms.to_initval;
407 server->retrans_count = timeparms.to_retries;
408
409 /* create transport and client */
410 xprt = xprt_create_proto(proto, &server->addr, &timeparms);
411 if (IS_ERR(xprt)) {
412 dprintk("%s: cannot create RPC transport. Error = %ld\n",
413 __FUNCTION__, PTR_ERR(xprt));
414 return (struct rpc_clnt *)xprt;
415 }
416 clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
417 server->rpc_ops->version, data->pseudoflavor);
418 if (IS_ERR(clnt)) {
419 dprintk("%s: cannot create RPC client. Error = %ld\n",
420 __FUNCTION__, PTR_ERR(xprt));
421 goto out_fail;
422 }
423
424 clnt->cl_intr = 1;
425 clnt->cl_softrtry = 1;
426
427 return clnt;
428
429out_fail:
430 return clnt;
431}
432
433/*
434 * The way this works is that the mount process passes a structure
435 * in the data argument which contains the server's IP address
436 * and the root file handle obtained from the server's mount
437 * daemon. We stash these away in the private superblock fields.
438 */
439static int
440nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
441{
442 struct nfs_server *server;
443 rpc_authflavor_t authflavor;
444
445 server = NFS_SB(sb);
446 sb->s_blocksize_bits = 0;
447 sb->s_blocksize = 0;
448 if (data->bsize)
449 sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
450 if (data->rsize)
451 server->rsize = nfs_block_size(data->rsize, NULL);
452 if (data->wsize)
453 server->wsize = nfs_block_size(data->wsize, NULL);
454 server->flags = data->flags & NFS_MOUNT_FLAGMASK;
455
456 server->acregmin = data->acregmin*HZ;
457 server->acregmax = data->acregmax*HZ;
458 server->acdirmin = data->acdirmin*HZ;
459 server->acdirmax = data->acdirmax*HZ;
460
461 /* Start lockd here, before we might error out */
462 if (!(server->flags & NFS_MOUNT_NONLM))
463 lockd_up();
464
465 server->namelen = data->namlen;
466 server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL);
467 if (!server->hostname)
468 return -ENOMEM;
469 strcpy(server->hostname, data->hostname);
470
471 /* Check NFS protocol revision and initialize RPC op vector
472 * and file handle pool. */
473#ifdef CONFIG_NFS_V3
474 if (server->flags & NFS_MOUNT_VER3) {
475 server->rpc_ops = &nfs_v3_clientops;
476 server->caps |= NFS_CAP_READDIRPLUS;
477 } else {
478 server->rpc_ops = &nfs_v2_clientops;
479 }
480#else
481 server->rpc_ops = &nfs_v2_clientops;
482#endif
483
484 /* Fill in pseudoflavor for mount version < 5 */
485 if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
486 data->pseudoflavor = RPC_AUTH_UNIX;
487 authflavor = data->pseudoflavor; /* save for sb_init() */
488 /* XXX maybe we want to add a server->pseudoflavor field */
489
490 /* Create RPC client handles */
491 server->client = nfs_create_client(server, data);
492 if (IS_ERR(server->client))
493 return PTR_ERR(server->client);
494 /* RFC 2623, sec 2.3.2 */
495 if (authflavor != RPC_AUTH_UNIX) {
496 struct rpc_auth *auth;
497
498 server->client_sys = rpc_clone_client(server->client);
499 if (IS_ERR(server->client_sys))
500 return PTR_ERR(server->client_sys);
501 auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys);
502 if (IS_ERR(auth))
503 return PTR_ERR(auth);
504 } else {
505 atomic_inc(&server->client->cl_count);
506 server->client_sys = server->client;
507 }
508 if (server->flags & NFS_MOUNT_VER3) {
509#ifdef CONFIG_NFS_V3_ACL
510 if (!(server->flags & NFS_MOUNT_NOACL)) {
511 server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
512 /* No errors! Assume that Sun nfsacls are supported */
513 if (!IS_ERR(server->client_acl))
514 server->caps |= NFS_CAP_ACLS;
515 }
516#else
517 server->flags &= ~NFS_MOUNT_NOACL;
518#endif /* CONFIG_NFS_V3_ACL */
519 /*
520 * The VFS shouldn't apply the umask to mode bits. We will
521 * do so ourselves when necessary.
522 */
523 sb->s_flags |= MS_POSIXACL;
524 if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
525 server->namelen = NFS3_MAXNAMLEN;
526 sb->s_time_gran = 1;
527 } else {
528 if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
529 server->namelen = NFS2_MAXNAMLEN;
530 }
531
532 sb->s_op = &nfs_sops;
533 return nfs_sb_init(sb, authflavor);
534}
535
536static int
537nfs_statfs(struct super_block *sb, struct kstatfs *buf)
538{
539 struct nfs_server *server = NFS_SB(sb);
540 unsigned char blockbits;
541 unsigned long blockres;
542 struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode);
543 struct nfs_fattr fattr;
544 struct nfs_fsstat res = {
545 .fattr = &fattr,
546 };
547 int error;
548
549 lock_kernel();
550
551 error = server->rpc_ops->statfs(server, rootfh, &res);
552 buf->f_type = NFS_SUPER_MAGIC;
553 if (error < 0)
554 goto out_err;
555
556 /*
557 * Current versions of glibc do not correctly handle the
558 * case where f_frsize != f_bsize. Eventually we want to
559 * report the value of wtmult in this field.
560 */
561 buf->f_frsize = sb->s_blocksize;
562
563 /*
564 * On most *nix systems, f_blocks, f_bfree, and f_bavail
565 * are reported in units of f_frsize. Linux hasn't had
566 * an f_frsize field in its statfs struct until recently,
567 * thus historically Linux's sys_statfs reports these
568 * fields in units of f_bsize.
569 */
570 buf->f_bsize = sb->s_blocksize;
571 blockbits = sb->s_blocksize_bits;
572 blockres = (1 << blockbits) - 1;
573 buf->f_blocks = (res.tbytes + blockres) >> blockbits;
574 buf->f_bfree = (res.fbytes + blockres) >> blockbits;
575 buf->f_bavail = (res.abytes + blockres) >> blockbits;
576
577 buf->f_files = res.tfiles;
578 buf->f_ffree = res.afiles;
579
580 buf->f_namelen = server->namelen;
581 out:
582 unlock_kernel();
583 return 0;
584
585 out_err:
586 dprintk("%s: statfs error = %d\n", __FUNCTION__, -error);
587 buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1;
588 goto out;
589
590}
591
592static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
593{
594 static struct proc_nfs_info {
595 int flag;
596 char *str;
597 char *nostr;
598 } nfs_info[] = {
599 { NFS_MOUNT_SOFT, ",soft", ",hard" },
600 { NFS_MOUNT_INTR, ",intr", "" },
601 { NFS_MOUNT_NOCTO, ",nocto", "" },
602 { NFS_MOUNT_NOAC, ",noac", "" },
603 { NFS_MOUNT_NONLM, ",nolock", "" },
604 { NFS_MOUNT_NOACL, ",noacl", "" },
605 { 0, NULL, NULL }
606 };
607 struct proc_nfs_info *nfs_infop;
608 char buf[12];
609 char *proto;
610
611 seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
612 seq_printf(m, ",rsize=%d", nfss->rsize);
613 seq_printf(m, ",wsize=%d", nfss->wsize);
614 if (nfss->acregmin != 3*HZ || showdefaults)
615 seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ);
616 if (nfss->acregmax != 60*HZ || showdefaults)
617 seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ);
618 if (nfss->acdirmin != 30*HZ || showdefaults)
619 seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ);
620 if (nfss->acdirmax != 60*HZ || showdefaults)
621 seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ);
622 for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
623 if (nfss->flags & nfs_infop->flag)
624 seq_puts(m, nfs_infop->str);
625 else
626 seq_puts(m, nfs_infop->nostr);
627 }
628 switch (nfss->client->cl_xprt->prot) {
629 case IPPROTO_TCP:
630 proto = "tcp";
631 break;
632 case IPPROTO_UDP:
633 proto = "udp";
634 break;
635 default:
636 snprintf(buf, sizeof(buf), "%u", nfss->client->cl_xprt->prot);
637 proto = buf;
638 }
639 seq_printf(m, ",proto=%s", proto);
640 seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ);
641 seq_printf(m, ",retrans=%u", nfss->retrans_count);
642}
643
644static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
645{
646 struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
647
648 nfs_show_mount_options(m, nfss, 0);
649
650 seq_puts(m, ",addr=");
651 seq_escape(m, nfss->hostname, " \t\n\\");
652
653 return 0;
654}
655
656static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
657{
658 int i, cpu;
659 struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
660 struct rpc_auth *auth = nfss->client->cl_auth;
661 struct nfs_iostats totals = { };
662
663 seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS);
664
665 /*
666 * Display all mount option settings
667 */
668 seq_printf(m, "\n\topts:\t");
669 seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw");
670 seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : "");
671 seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : "");
672 seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : "");
673 nfs_show_mount_options(m, nfss, 1);
674
675 seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ);
676
677 seq_printf(m, "\n\tcaps:\t");
678 seq_printf(m, "caps=0x%x", nfss->caps);
679 seq_printf(m, ",wtmult=%d", nfss->wtmult);
680 seq_printf(m, ",dtsize=%d", nfss->dtsize);
681 seq_printf(m, ",bsize=%d", nfss->bsize);
682 seq_printf(m, ",namelen=%d", nfss->namelen);
683
684#ifdef CONFIG_NFS_V4
685 if (nfss->rpc_ops->version == 4) {
686 seq_printf(m, "\n\tnfsv4:\t");
687 seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
688 seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
689 seq_printf(m, ",acl=0x%x", nfss->acl_bitmask);
690 }
691#endif
692
693 /*
694 * Display security flavor in effect for this mount
695 */
696 seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor);
697 if (auth->au_flavor)
698 seq_printf(m, ",pseudoflavor=%d", auth->au_flavor);
699
700 /*
701 * Display superblock I/O counters
702 */
703 for_each_possible_cpu(cpu) {
704 struct nfs_iostats *stats;
705
706 preempt_disable();
707 stats = per_cpu_ptr(nfss->io_stats, cpu);
708
709 for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
710 totals.events[i] += stats->events[i];
711 for (i = 0; i < __NFSIOS_BYTESMAX; i++)
712 totals.bytes[i] += stats->bytes[i];
713
714 preempt_enable();
715 }
716
717 seq_printf(m, "\n\tevents:\t");
718 for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
719 seq_printf(m, "%lu ", totals.events[i]);
720 seq_printf(m, "\n\tbytes:\t");
721 for (i = 0; i < __NFSIOS_BYTESMAX; i++)
722 seq_printf(m, "%Lu ", totals.bytes[i]);
723 seq_printf(m, "\n");
724
725 rpc_print_iostats(m, nfss->client);
726
727 return 0;
728}
729
730/** 94/**
731 * nfs_sync_mapping - helper to flush all mmapped dirty data to disk 95 * nfs_sync_mapping - helper to flush all mmapped dirty data to disk
732 */ 96 */
@@ -889,6 +253,14 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
889 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) 253 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
890 && fattr->size <= NFS_LIMIT_READDIRPLUS) 254 && fattr->size <= NFS_LIMIT_READDIRPLUS)
891 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); 255 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
256 /* Deal with crossing mountpoints */
257 if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) {
258 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
259 inode->i_op = &nfs_referral_inode_operations;
260 else
261 inode->i_op = &nfs_mountpoint_inode_operations;
262 inode->i_fop = NULL;
263 }
892 } else if (S_ISLNK(inode->i_mode)) 264 } else if (S_ISLNK(inode->i_mode))
893 inode->i_op = &nfs_symlink_inode_operations; 265 inode->i_op = &nfs_symlink_inode_operations;
894 else 266 else
@@ -1207,6 +579,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
1207 dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", 579 dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n",
1208 inode->i_sb->s_id, (long long)NFS_FILEID(inode)); 580 inode->i_sb->s_id, (long long)NFS_FILEID(inode));
1209 581
582 nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
1210 lock_kernel(); 583 lock_kernel();
1211 if (!inode || is_bad_inode(inode)) 584 if (!inode || is_bad_inode(inode))
1212 goto out_nowait; 585 goto out_nowait;
@@ -1220,7 +593,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
1220 status = -ESTALE; 593 status = -ESTALE;
1221 /* Do we trust the cached ESTALE? */ 594 /* Do we trust the cached ESTALE? */
1222 if (NFS_ATTRTIMEO(inode) != 0) { 595 if (NFS_ATTRTIMEO(inode) != 0) {
1223 if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) { 596 if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME)) {
1224 /* no */ 597 /* no */
1225 } else 598 } else
1226 goto out; 599 goto out;
@@ -1251,8 +624,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
1251 } 624 }
1252 spin_unlock(&inode->i_lock); 625 spin_unlock(&inode->i_lock);
1253 626
1254 nfs_revalidate_mapping(inode, inode->i_mapping);
1255
1256 if (nfsi->cache_validity & NFS_INO_INVALID_ACL) 627 if (nfsi->cache_validity & NFS_INO_INVALID_ACL)
1257 nfs_zap_acl_cache(inode); 628 nfs_zap_acl_cache(inode);
1258 629
@@ -1286,8 +657,7 @@ int nfs_attribute_timeout(struct inode *inode)
1286 */ 657 */
1287int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) 658int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
1288{ 659{
1289 nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); 660 if (!(NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATTR)
1290 if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
1291 && !nfs_attribute_timeout(inode)) 661 && !nfs_attribute_timeout(inode))
1292 return NFS_STALE(inode) ? -ESTALE : 0; 662 return NFS_STALE(inode) ? -ESTALE : 0;
1293 return __nfs_revalidate_inode(server, inode); 663 return __nfs_revalidate_inode(server, inode);
@@ -1298,9 +668,16 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
1298 * @inode - pointer to host inode 668 * @inode - pointer to host inode
1299 * @mapping - pointer to mapping 669 * @mapping - pointer to mapping
1300 */ 670 */
1301void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) 671int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
1302{ 672{
1303 struct nfs_inode *nfsi = NFS_I(inode); 673 struct nfs_inode *nfsi = NFS_I(inode);
674 int ret = 0;
675
676 if (NFS_STALE(inode))
677 ret = -ESTALE;
678 if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
679 || nfs_attribute_timeout(inode))
680 ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
1304 681
1305 if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { 682 if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
1306 nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); 683 nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
@@ -1321,6 +698,7 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
1321 inode->i_sb->s_id, 698 inode->i_sb->s_id,
1322 (long long)NFS_FILEID(inode)); 699 (long long)NFS_FILEID(inode));
1323 } 700 }
701 return ret;
1324} 702}
1325 703
1326/** 704/**
@@ -1360,12 +738,6 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1360{ 738{
1361 struct nfs_inode *nfsi = NFS_I(inode); 739 struct nfs_inode *nfsi = NFS_I(inode);
1362 740
1363 if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0
1364 && nfsi->change_attr == fattr->pre_change_attr) {
1365 nfsi->change_attr = fattr->change_attr;
1366 nfsi->cache_change_attribute = jiffies;
1367 }
1368
1369 /* If we have atomic WCC data, we may update some attributes */ 741 /* If we have atomic WCC data, we may update some attributes */
1370 if ((fattr->valid & NFS_ATTR_WCC) != 0) { 742 if ((fattr->valid & NFS_ATTR_WCC) != 0) {
1371 if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) { 743 if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) {
@@ -1399,9 +771,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
1399 int data_unstable; 771 int data_unstable;
1400 772
1401 773
1402 if ((fattr->valid & NFS_ATTR_FATTR) == 0)
1403 return 0;
1404
1405 /* Has the inode gone and changed behind our back? */ 774 /* Has the inode gone and changed behind our back? */
1406 if (nfsi->fileid != fattr->fileid 775 if (nfsi->fileid != fattr->fileid
1407 || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { 776 || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
@@ -1414,20 +783,13 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
1414 /* Do atomic weak cache consistency updates */ 783 /* Do atomic weak cache consistency updates */
1415 nfs_wcc_update_inode(inode, fattr); 784 nfs_wcc_update_inode(inode, fattr);
1416 785
1417 if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0) { 786 if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
1418 if (nfsi->change_attr == fattr->change_attr) 787 nfsi->change_attr != fattr->change_attr)
1419 goto out; 788 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
1420 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
1421 if (!data_unstable)
1422 nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
1423 }
1424 789
1425 /* Verify a few of the more important attributes */ 790 /* Verify a few of the more important attributes */
1426 if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { 791 if (!timespec_equal(&inode->i_mtime, &fattr->mtime))
1427 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 792 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
1428 if (!data_unstable)
1429 nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
1430 }
1431 793
1432 cur_size = i_size_read(inode); 794 cur_size = i_size_read(inode);
1433 new_isize = nfs_size_to_loff_t(fattr->size); 795 new_isize = nfs_size_to_loff_t(fattr->size);
@@ -1444,7 +806,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
1444 if (inode->i_nlink != fattr->nlink) 806 if (inode->i_nlink != fattr->nlink)
1445 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 807 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
1446 808
1447out:
1448 if (!timespec_equal(&inode->i_atime, &fattr->atime)) 809 if (!timespec_equal(&inode->i_atime, &fattr->atime))
1449 nfsi->cache_validity |= NFS_INO_INVALID_ATIME; 810 nfsi->cache_validity |= NFS_INO_INVALID_ATIME;
1450 811
@@ -1470,7 +831,6 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
1470 if ((fattr->valid & NFS_ATTR_FATTR) == 0) 831 if ((fattr->valid & NFS_ATTR_FATTR) == 0)
1471 return 0; 832 return 0;
1472 spin_lock(&inode->i_lock); 833 spin_lock(&inode->i_lock);
1473 nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE;
1474 if (time_after(fattr->time_start, nfsi->last_updated)) 834 if (time_after(fattr->time_start, nfsi->last_updated))
1475 status = nfs_update_inode(inode, fattr); 835 status = nfs_update_inode(inode, fattr);
1476 else 836 else
@@ -1495,7 +855,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1495 855
1496 spin_lock(&inode->i_lock); 856 spin_lock(&inode->i_lock);
1497 if (unlikely((fattr->valid & NFS_ATTR_FATTR) == 0)) { 857 if (unlikely((fattr->valid & NFS_ATTR_FATTR) == 0)) {
1498 nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; 858 nfsi->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
1499 goto out; 859 goto out;
1500 } 860 }
1501 status = nfs_update_inode(inode, fattr); 861 status = nfs_update_inode(inode, fattr);
@@ -1518,6 +878,7 @@ out:
1518 */ 878 */
1519static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) 879static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1520{ 880{
881 struct nfs_server *server;
1521 struct nfs_inode *nfsi = NFS_I(inode); 882 struct nfs_inode *nfsi = NFS_I(inode);
1522 loff_t cur_isize, new_isize; 883 loff_t cur_isize, new_isize;
1523 unsigned int invalid = 0; 884 unsigned int invalid = 0;
@@ -1527,9 +888,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1527 __FUNCTION__, inode->i_sb->s_id, inode->i_ino, 888 __FUNCTION__, inode->i_sb->s_id, inode->i_ino,
1528 atomic_read(&inode->i_count), fattr->valid); 889 atomic_read(&inode->i_count), fattr->valid);
1529 890
1530 if ((fattr->valid & NFS_ATTR_FATTR) == 0)
1531 return 0;
1532
1533 if (nfsi->fileid != fattr->fileid) 891 if (nfsi->fileid != fattr->fileid)
1534 goto out_fileid; 892 goto out_fileid;
1535 893
@@ -1539,6 +897,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1539 if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) 897 if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
1540 goto out_changed; 898 goto out_changed;
1541 899
900 server = NFS_SERVER(inode);
901 /* Update the fsid if and only if this is the root directory */
902 if (inode == inode->i_sb->s_root->d_inode
903 && !nfs_fsid_equal(&server->fsid, &fattr->fsid))
904 server->fsid = fattr->fsid;
905
1542 /* 906 /*
1543 * Update the read time so we don't revalidate too often. 907 * Update the read time so we don't revalidate too often.
1544 */ 908 */
@@ -1548,7 +912,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1548 /* Are we racing with known updates of the metadata on the server? */ 912 /* Are we racing with known updates of the metadata on the server? */
1549 data_stable = nfs_verify_change_attribute(inode, fattr->time_start); 913 data_stable = nfs_verify_change_attribute(inode, fattr->time_start);
1550 if (data_stable) 914 if (data_stable)
1551 nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); 915 nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATIME);
1552 916
1553 /* Do atomic weak cache consistency updates */ 917 /* Do atomic weak cache consistency updates */
1554 nfs_wcc_update_inode(inode, fattr); 918 nfs_wcc_update_inode(inode, fattr);
@@ -1612,15 +976,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1612 inode->i_blksize = fattr->du.nfs2.blocksize; 976 inode->i_blksize = fattr->du.nfs2.blocksize;
1613 } 977 }
1614 978
1615 if ((fattr->valid & NFS_ATTR_FATTR_V4)) { 979 if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
1616 if (nfsi->change_attr != fattr->change_attr) { 980 nfsi->change_attr != fattr->change_attr) {
1617 dprintk("NFS: change_attr change on server for file %s/%ld\n", 981 dprintk("NFS: change_attr change on server for file %s/%ld\n",
1618 inode->i_sb->s_id, inode->i_ino); 982 inode->i_sb->s_id, inode->i_ino);
1619 nfsi->change_attr = fattr->change_attr; 983 nfsi->change_attr = fattr->change_attr;
1620 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 984 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1621 nfsi->cache_change_attribute = jiffies; 985 nfsi->cache_change_attribute = jiffies;
1622 } else
1623 invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA);
1624 } 986 }
1625 987
1626 /* Update attrtimeo value if we're out of the unstable period */ 988 /* Update attrtimeo value if we're out of the unstable period */
@@ -1668,190 +1030,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1668 goto out_err; 1030 goto out_err;
1669} 1031}
1670 1032
1671/*
1672 * File system information
1673 */
1674
1675static int nfs_set_super(struct super_block *s, void *data)
1676{
1677 s->s_fs_info = data;
1678 return set_anon_super(s, data);
1679}
1680
1681static int nfs_compare_super(struct super_block *sb, void *data)
1682{
1683 struct nfs_server *server = data;
1684 struct nfs_server *old = NFS_SB(sb);
1685
1686 if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr)
1687 return 0;
1688 if (old->addr.sin_port != server->addr.sin_port)
1689 return 0;
1690 return !nfs_compare_fh(&old->fh, &server->fh);
1691}
1692
1693static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
1694 int flags, const char *dev_name, void *raw_data)
1695{
1696 int error;
1697 struct nfs_server *server = NULL;
1698 struct super_block *s;
1699 struct nfs_fh *root;
1700 struct nfs_mount_data *data = raw_data;
1701
1702 s = ERR_PTR(-EINVAL);
1703 if (data == NULL) {
1704 dprintk("%s: missing data argument\n", __FUNCTION__);
1705 goto out_err;
1706 }
1707 if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
1708 dprintk("%s: bad mount version\n", __FUNCTION__);
1709 goto out_err;
1710 }
1711 switch (data->version) {
1712 case 1:
1713 data->namlen = 0;
1714 case 2:
1715 data->bsize = 0;
1716 case 3:
1717 if (data->flags & NFS_MOUNT_VER3) {
1718 dprintk("%s: mount structure version %d does not support NFSv3\n",
1719 __FUNCTION__,
1720 data->version);
1721 goto out_err;
1722 }
1723 data->root.size = NFS2_FHSIZE;
1724 memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
1725 case 4:
1726 if (data->flags & NFS_MOUNT_SECFLAVOUR) {
1727 dprintk("%s: mount structure version %d does not support strong security\n",
1728 __FUNCTION__,
1729 data->version);
1730 goto out_err;
1731 }
1732 case 5:
1733 memset(data->context, 0, sizeof(data->context));
1734 }
1735#ifndef CONFIG_NFS_V3
1736 /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
1737 s = ERR_PTR(-EPROTONOSUPPORT);
1738 if (data->flags & NFS_MOUNT_VER3) {
1739 dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
1740 goto out_err;
1741 }
1742#endif /* CONFIG_NFS_V3 */
1743
1744 s = ERR_PTR(-ENOMEM);
1745 server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
1746 if (!server)
1747 goto out_err;
1748 /* Zero out the NFS state stuff */
1749 init_nfsv4_state(server);
1750 server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
1751
1752 root = &server->fh;
1753 if (data->flags & NFS_MOUNT_VER3)
1754 root->size = data->root.size;
1755 else
1756 root->size = NFS2_FHSIZE;
1757 s = ERR_PTR(-EINVAL);
1758 if (root->size > sizeof(root->data)) {
1759 dprintk("%s: invalid root filehandle\n", __FUNCTION__);
1760 goto out_err;
1761 }
1762 memcpy(root->data, data->root.data, root->size);
1763
1764 /* We now require that the mount process passes the remote address */
1765 memcpy(&server->addr, &data->addr, sizeof(server->addr));
1766 if (server->addr.sin_addr.s_addr == INADDR_ANY) {
1767 dprintk("%s: mount program didn't pass remote address!\n",
1768 __FUNCTION__);
1769 goto out_err;
1770 }
1771
1772 /* Fire up rpciod if not yet running */
1773 s = ERR_PTR(rpciod_up());
1774 if (IS_ERR(s)) {
1775 dprintk("%s: couldn't start rpciod! Error = %ld\n",
1776 __FUNCTION__, PTR_ERR(s));
1777 goto out_err;
1778 }
1779
1780 s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
1781 if (IS_ERR(s) || s->s_root)
1782 goto out_rpciod_down;
1783
1784 s->s_flags = flags;
1785
1786 error = nfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
1787 if (error) {
1788 up_write(&s->s_umount);
1789 deactivate_super(s);
1790 return ERR_PTR(error);
1791 }
1792 s->s_flags |= MS_ACTIVE;
1793 return s;
1794out_rpciod_down:
1795 rpciod_down();
1796out_err:
1797 kfree(server);
1798 return s;
1799}
1800
1801static void nfs_kill_super(struct super_block *s)
1802{
1803 struct nfs_server *server = NFS_SB(s);
1804
1805 kill_anon_super(s);
1806
1807 if (!IS_ERR(server->client))
1808 rpc_shutdown_client(server->client);
1809 if (!IS_ERR(server->client_sys))
1810 rpc_shutdown_client(server->client_sys);
1811 if (!IS_ERR(server->client_acl))
1812 rpc_shutdown_client(server->client_acl);
1813
1814 if (!(server->flags & NFS_MOUNT_NONLM))
1815 lockd_down(); /* release rpc.lockd */
1816
1817 rpciod_down(); /* release rpciod */
1818
1819 nfs_free_iostats(server->io_stats);
1820 kfree(server->hostname);
1821 kfree(server);
1822}
1823
1824static struct file_system_type nfs_fs_type = {
1825 .owner = THIS_MODULE,
1826 .name = "nfs",
1827 .get_sb = nfs_get_sb,
1828 .kill_sb = nfs_kill_super,
1829 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
1830};
1831 1033
1832#ifdef CONFIG_NFS_V4 1034#ifdef CONFIG_NFS_V4
1833 1035
1834static void nfs4_clear_inode(struct inode *);
1835
1836
1837static struct super_operations nfs4_sops = {
1838 .alloc_inode = nfs_alloc_inode,
1839 .destroy_inode = nfs_destroy_inode,
1840 .write_inode = nfs_write_inode,
1841 .delete_inode = nfs_delete_inode,
1842 .statfs = nfs_statfs,
1843 .clear_inode = nfs4_clear_inode,
1844 .umount_begin = nfs_umount_begin,
1845 .show_options = nfs_show_options,
1846 .show_stats = nfs_show_stats,
1847};
1848
1849/* 1036/*
1850 * Clean out any remaining NFSv4 state that might be left over due 1037 * Clean out any remaining NFSv4 state that might be left over due
1851 * to open() calls that passed nfs_atomic_lookup, but failed to call 1038 * to open() calls that passed nfs_atomic_lookup, but failed to call
1852 * nfs_open(). 1039 * nfs_open().
1853 */ 1040 */
1854static void nfs4_clear_inode(struct inode *inode) 1041void nfs4_clear_inode(struct inode *inode)
1855{ 1042{
1856 struct nfs_inode *nfsi = NFS_I(inode); 1043 struct nfs_inode *nfsi = NFS_I(inode);
1857 1044
@@ -1875,357 +1062,9 @@ static void nfs4_clear_inode(struct inode *inode)
1875 nfs4_close_state(state, state->state); 1062 nfs4_close_state(state, state->state);
1876 } 1063 }
1877} 1064}
1878
1879
1880static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent)
1881{
1882 struct nfs_server *server;
1883 struct nfs4_client *clp = NULL;
1884 struct rpc_xprt *xprt = NULL;
1885 struct rpc_clnt *clnt = NULL;
1886 struct rpc_timeout timeparms;
1887 rpc_authflavor_t authflavour;
1888 int err = -EIO;
1889
1890 sb->s_blocksize_bits = 0;
1891 sb->s_blocksize = 0;
1892 server = NFS_SB(sb);
1893 if (data->rsize != 0)
1894 server->rsize = nfs_block_size(data->rsize, NULL);
1895 if (data->wsize != 0)
1896 server->wsize = nfs_block_size(data->wsize, NULL);
1897 server->flags = data->flags & NFS_MOUNT_FLAGMASK;
1898 server->caps = NFS_CAP_ATOMIC_OPEN;
1899
1900 server->acregmin = data->acregmin*HZ;
1901 server->acregmax = data->acregmax*HZ;
1902 server->acdirmin = data->acdirmin*HZ;
1903 server->acdirmax = data->acdirmax*HZ;
1904
1905 server->rpc_ops = &nfs_v4_clientops;
1906
1907 nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
1908
1909 server->retrans_timeo = timeparms.to_initval;
1910 server->retrans_count = timeparms.to_retries;
1911
1912 clp = nfs4_get_client(&server->addr.sin_addr);
1913 if (!clp) {
1914 dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
1915 return -EIO;
1916 }
1917
1918 /* Now create transport and client */
1919 authflavour = RPC_AUTH_UNIX;
1920 if (data->auth_flavourlen != 0) {
1921 if (data->auth_flavourlen != 1) {
1922 dprintk("%s: Invalid number of RPC auth flavours %d.\n",
1923 __FUNCTION__, data->auth_flavourlen);
1924 err = -EINVAL;
1925 goto out_fail;
1926 }
1927 if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) {
1928 err = -EFAULT;
1929 goto out_fail;
1930 }
1931 }
1932
1933 down_write(&clp->cl_sem);
1934 if (IS_ERR(clp->cl_rpcclient)) {
1935 xprt = xprt_create_proto(data->proto, &server->addr, &timeparms);
1936 if (IS_ERR(xprt)) {
1937 up_write(&clp->cl_sem);
1938 err = PTR_ERR(xprt);
1939 dprintk("%s: cannot create RPC transport. Error = %d\n",
1940 __FUNCTION__, err);
1941 goto out_fail;
1942 }
1943 clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
1944 server->rpc_ops->version, authflavour);
1945 if (IS_ERR(clnt)) {
1946 up_write(&clp->cl_sem);
1947 err = PTR_ERR(clnt);
1948 dprintk("%s: cannot create RPC client. Error = %d\n",
1949 __FUNCTION__, err);
1950 goto out_fail;
1951 }
1952 clnt->cl_intr = 1;
1953 clnt->cl_softrtry = 1;
1954 clp->cl_rpcclient = clnt;
1955 memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
1956 nfs_idmap_new(clp);
1957 }
1958 list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
1959 clnt = rpc_clone_client(clp->cl_rpcclient);
1960 if (!IS_ERR(clnt))
1961 server->nfs4_state = clp;
1962 up_write(&clp->cl_sem);
1963 clp = NULL;
1964
1965 if (IS_ERR(clnt)) {
1966 err = PTR_ERR(clnt);
1967 dprintk("%s: cannot create RPC client. Error = %d\n",
1968 __FUNCTION__, err);
1969 return err;
1970 }
1971
1972 server->client = clnt;
1973
1974 if (server->nfs4_state->cl_idmap == NULL) {
1975 dprintk("%s: failed to create idmapper.\n", __FUNCTION__);
1976 return -ENOMEM;
1977 }
1978
1979 if (clnt->cl_auth->au_flavor != authflavour) {
1980 struct rpc_auth *auth;
1981
1982 auth = rpcauth_create(authflavour, clnt);
1983 if (IS_ERR(auth)) {
1984 dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
1985 return PTR_ERR(auth);
1986 }
1987 }
1988
1989 sb->s_time_gran = 1;
1990
1991 sb->s_op = &nfs4_sops;
1992 err = nfs_sb_init(sb, authflavour);
1993 if (err == 0)
1994 return 0;
1995out_fail:
1996 if (clp)
1997 nfs4_put_client(clp);
1998 return err;
1999}
2000
2001static int nfs4_compare_super(struct super_block *sb, void *data)
2002{
2003 struct nfs_server *server = data;
2004 struct nfs_server *old = NFS_SB(sb);
2005
2006 if (strcmp(server->hostname, old->hostname) != 0)
2007 return 0;
2008 if (strcmp(server->mnt_path, old->mnt_path) != 0)
2009 return 0;
2010 return 1;
2011}
2012
2013static void *
2014nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
2015{
2016 void *p = NULL;
2017
2018 if (!src->len)
2019 return ERR_PTR(-EINVAL);
2020 if (src->len < maxlen)
2021 maxlen = src->len;
2022 if (dst == NULL) {
2023 p = dst = kmalloc(maxlen + 1, GFP_KERNEL);
2024 if (p == NULL)
2025 return ERR_PTR(-ENOMEM);
2026 }
2027 if (copy_from_user(dst, src->data, maxlen)) {
2028 kfree(p);
2029 return ERR_PTR(-EFAULT);
2030 }
2031 dst[maxlen] = '\0';
2032 return dst;
2033}
2034
2035static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
2036 int flags, const char *dev_name, void *raw_data)
2037{
2038 int error;
2039 struct nfs_server *server;
2040 struct super_block *s;
2041 struct nfs4_mount_data *data = raw_data;
2042 void *p;
2043
2044 if (data == NULL) {
2045 dprintk("%s: missing data argument\n", __FUNCTION__);
2046 return ERR_PTR(-EINVAL);
2047 }
2048 if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) {
2049 dprintk("%s: bad mount version\n", __FUNCTION__);
2050 return ERR_PTR(-EINVAL);
2051 }
2052
2053 server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
2054 if (!server)
2055 return ERR_PTR(-ENOMEM);
2056 /* Zero out the NFS state stuff */
2057 init_nfsv4_state(server);
2058 server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
2059
2060 p = nfs_copy_user_string(NULL, &data->hostname, 256);
2061 if (IS_ERR(p))
2062 goto out_err;
2063 server->hostname = p;
2064
2065 p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
2066 if (IS_ERR(p))
2067 goto out_err;
2068 server->mnt_path = p;
2069
2070 p = nfs_copy_user_string(server->ip_addr, &data->client_addr,
2071 sizeof(server->ip_addr) - 1);
2072 if (IS_ERR(p))
2073 goto out_err;
2074
2075 /* We now require that the mount process passes the remote address */
2076 if (data->host_addrlen != sizeof(server->addr)) {
2077 s = ERR_PTR(-EINVAL);
2078 goto out_free;
2079 }
2080 if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) {
2081 s = ERR_PTR(-EFAULT);
2082 goto out_free;
2083 }
2084 if (server->addr.sin_family != AF_INET ||
2085 server->addr.sin_addr.s_addr == INADDR_ANY) {
2086 dprintk("%s: mount program didn't pass remote IP address!\n",
2087 __FUNCTION__);
2088 s = ERR_PTR(-EINVAL);
2089 goto out_free;
2090 }
2091
2092 /* Fire up rpciod if not yet running */
2093 s = ERR_PTR(rpciod_up());
2094 if (IS_ERR(s)) {
2095 dprintk("%s: couldn't start rpciod! Error = %ld\n",
2096 __FUNCTION__, PTR_ERR(s));
2097 goto out_free;
2098 }
2099
2100 s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
2101
2102 if (IS_ERR(s) || s->s_root)
2103 goto out_free;
2104
2105 s->s_flags = flags;
2106
2107 error = nfs4_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
2108 if (error) {
2109 up_write(&s->s_umount);
2110 deactivate_super(s);
2111 return ERR_PTR(error);
2112 }
2113 s->s_flags |= MS_ACTIVE;
2114 return s;
2115out_err:
2116 s = (struct super_block *)p;
2117out_free:
2118 kfree(server->mnt_path);
2119 kfree(server->hostname);
2120 kfree(server);
2121 return s;
2122}
2123
2124static void nfs4_kill_super(struct super_block *sb)
2125{
2126 struct nfs_server *server = NFS_SB(sb);
2127
2128 nfs_return_all_delegations(sb);
2129 kill_anon_super(sb);
2130
2131 nfs4_renewd_prepare_shutdown(server);
2132
2133 if (server->client != NULL && !IS_ERR(server->client))
2134 rpc_shutdown_client(server->client);
2135
2136 destroy_nfsv4_state(server);
2137
2138 rpciod_down();
2139
2140 nfs_free_iostats(server->io_stats);
2141 kfree(server->hostname);
2142 kfree(server);
2143}
2144
2145static struct file_system_type nfs4_fs_type = {
2146 .owner = THIS_MODULE,
2147 .name = "nfs4",
2148 .get_sb = nfs4_get_sb,
2149 .kill_sb = nfs4_kill_super,
2150 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
2151};
2152
2153static const int nfs_set_port_min = 0;
2154static const int nfs_set_port_max = 65535;
2155static int param_set_port(const char *val, struct kernel_param *kp)
2156{
2157 char *endp;
2158 int num = simple_strtol(val, &endp, 0);
2159 if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max)
2160 return -EINVAL;
2161 *((int *)kp->arg) = num;
2162 return 0;
2163}
2164
2165module_param_call(callback_tcpport, param_set_port, param_get_int,
2166 &nfs_callback_set_tcpport, 0644);
2167
2168static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
2169{
2170 char *endp;
2171 int num = simple_strtol(val, &endp, 0);
2172 int jif = num * HZ;
2173 if (endp == val || *endp || num < 0 || jif < num)
2174 return -EINVAL;
2175 *((int *)kp->arg) = jif;
2176 return 0;
2177}
2178
2179module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
2180 &nfs_idmap_cache_timeout, 0644);
2181
2182#define nfs4_init_once(nfsi) \
2183 do { \
2184 INIT_LIST_HEAD(&(nfsi)->open_states); \
2185 nfsi->delegation = NULL; \
2186 nfsi->delegation_state = 0; \
2187 init_rwsem(&nfsi->rwsem); \
2188 } while(0)
2189
2190static inline int register_nfs4fs(void)
2191{
2192 int ret;
2193
2194 ret = nfs_register_sysctl();
2195 if (ret != 0)
2196 return ret;
2197 ret = register_filesystem(&nfs4_fs_type);
2198 if (ret != 0)
2199 nfs_unregister_sysctl();
2200 return ret;
2201}
2202
2203static inline void unregister_nfs4fs(void)
2204{
2205 unregister_filesystem(&nfs4_fs_type);
2206 nfs_unregister_sysctl();
2207}
2208#else
2209#define nfs4_init_once(nfsi) \
2210 do { } while (0)
2211#define register_nfs4fs() (0)
2212#define unregister_nfs4fs()
2213#endif 1065#endif
2214 1066
2215extern int nfs_init_nfspagecache(void); 1067struct inode *nfs_alloc_inode(struct super_block *sb)
2216extern void nfs_destroy_nfspagecache(void);
2217extern int nfs_init_readpagecache(void);
2218extern void nfs_destroy_readpagecache(void);
2219extern int nfs_init_writepagecache(void);
2220extern void nfs_destroy_writepagecache(void);
2221#ifdef CONFIG_NFS_DIRECTIO
2222extern int nfs_init_directcache(void);
2223extern void nfs_destroy_directcache(void);
2224#endif
2225
2226static kmem_cache_t * nfs_inode_cachep;
2227
2228static struct inode *nfs_alloc_inode(struct super_block *sb)
2229{ 1068{
2230 struct nfs_inode *nfsi; 1069 struct nfs_inode *nfsi;
2231 nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL); 1070 nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL);
@@ -2244,11 +1083,21 @@ static struct inode *nfs_alloc_inode(struct super_block *sb)
2244 return &nfsi->vfs_inode; 1083 return &nfsi->vfs_inode;
2245} 1084}
2246 1085
2247static void nfs_destroy_inode(struct inode *inode) 1086void nfs_destroy_inode(struct inode *inode)
2248{ 1087{
2249 kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); 1088 kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
2250} 1089}
2251 1090
1091static inline void nfs4_init_once(struct nfs_inode *nfsi)
1092{
1093#ifdef CONFIG_NFS_V4
1094 INIT_LIST_HEAD(&nfsi->open_states);
1095 nfsi->delegation = NULL;
1096 nfsi->delegation_state = 0;
1097 init_rwsem(&nfsi->rwsem);
1098#endif
1099}
1100
2252static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) 1101static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
2253{ 1102{
2254 struct nfs_inode *nfsi = (struct nfs_inode *) foo; 1103 struct nfs_inode *nfsi = (struct nfs_inode *) foo;
@@ -2269,7 +1118,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
2269 } 1118 }
2270} 1119}
2271 1120
2272static int nfs_init_inodecache(void) 1121static int __init nfs_init_inodecache(void)
2273{ 1122{
2274 nfs_inode_cachep = kmem_cache_create("nfs_inode_cache", 1123 nfs_inode_cachep = kmem_cache_create("nfs_inode_cache",
2275 sizeof(struct nfs_inode), 1124 sizeof(struct nfs_inode),
@@ -2311,29 +1160,22 @@ static int __init init_nfs_fs(void)
2311 if (err) 1160 if (err)
2312 goto out1; 1161 goto out1;
2313 1162
2314#ifdef CONFIG_NFS_DIRECTIO
2315 err = nfs_init_directcache(); 1163 err = nfs_init_directcache();
2316 if (err) 1164 if (err)
2317 goto out0; 1165 goto out0;
2318#endif
2319 1166
2320#ifdef CONFIG_PROC_FS 1167#ifdef CONFIG_PROC_FS
2321 rpc_proc_register(&nfs_rpcstat); 1168 rpc_proc_register(&nfs_rpcstat);
2322#endif 1169#endif
2323 err = register_filesystem(&nfs_fs_type); 1170 if ((err = register_nfs_fs()) != 0)
2324 if (err)
2325 goto out;
2326 if ((err = register_nfs4fs()) != 0)
2327 goto out; 1171 goto out;
2328 return 0; 1172 return 0;
2329out: 1173out:
2330#ifdef CONFIG_PROC_FS 1174#ifdef CONFIG_PROC_FS
2331 rpc_proc_unregister("nfs"); 1175 rpc_proc_unregister("nfs");
2332#endif 1176#endif
2333#ifdef CONFIG_NFS_DIRECTIO
2334 nfs_destroy_directcache(); 1177 nfs_destroy_directcache();
2335out0: 1178out0:
2336#endif
2337 nfs_destroy_writepagecache(); 1179 nfs_destroy_writepagecache();
2338out1: 1180out1:
2339 nfs_destroy_readpagecache(); 1181 nfs_destroy_readpagecache();
@@ -2347,9 +1189,7 @@ out4:
2347 1189
2348static void __exit exit_nfs_fs(void) 1190static void __exit exit_nfs_fs(void)
2349{ 1191{
2350#ifdef CONFIG_NFS_DIRECTIO
2351 nfs_destroy_directcache(); 1192 nfs_destroy_directcache();
2352#endif
2353 nfs_destroy_writepagecache(); 1193 nfs_destroy_writepagecache();
2354 nfs_destroy_readpagecache(); 1194 nfs_destroy_readpagecache();
2355 nfs_destroy_inodecache(); 1195 nfs_destroy_inodecache();
@@ -2357,8 +1197,7 @@ static void __exit exit_nfs_fs(void)
2357#ifdef CONFIG_PROC_FS 1197#ifdef CONFIG_PROC_FS
2358 rpc_proc_unregister("nfs"); 1198 rpc_proc_unregister("nfs");
2359#endif 1199#endif
2360 unregister_filesystem(&nfs_fs_type); 1200 unregister_nfs_fs();
2361 unregister_nfs4fs();
2362} 1201}
2363 1202
2364/* Not quite true; I just maintain it */ 1203/* Not quite true; I just maintain it */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
new file mode 100644
index 000000000000..e4f4e5def0fc
--- /dev/null
+++ b/fs/nfs/internal.h
@@ -0,0 +1,186 @@
1/*
2 * NFS internal definitions
3 */
4
5#include <linux/mount.h>
6
7struct nfs_clone_mount {
8 const struct super_block *sb;
9 const struct dentry *dentry;
10 struct nfs_fh *fh;
11 struct nfs_fattr *fattr;
12 char *hostname;
13 char *mnt_path;
14 struct sockaddr_in *addr;
15 rpc_authflavor_t authflavor;
16};
17
18/* namespace-nfs4.c */
19#ifdef CONFIG_NFS_V4
20extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry);
21#else
22static inline
23struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry)
24{
25 return ERR_PTR(-ENOENT);
26}
27#endif
28
29/* callback_xdr.c */
30extern struct svc_version nfs4_callback_version1;
31
32/* pagelist.c */
33extern int __init nfs_init_nfspagecache(void);
34extern void nfs_destroy_nfspagecache(void);
35extern int __init nfs_init_readpagecache(void);
36extern void nfs_destroy_readpagecache(void);
37extern int __init nfs_init_writepagecache(void);
38extern void nfs_destroy_writepagecache(void);
39
40#ifdef CONFIG_NFS_DIRECTIO
41extern int __init nfs_init_directcache(void);
42extern void nfs_destroy_directcache(void);
43#else
44#define nfs_init_directcache() (0)
45#define nfs_destroy_directcache() do {} while(0)
46#endif
47
48/* nfs2xdr.c */
49extern struct rpc_procinfo nfs_procedures[];
50extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
51
52/* nfs3xdr.c */
53extern struct rpc_procinfo nfs3_procedures[];
54extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
55
56/* nfs4xdr.c */
57extern int nfs_stat_to_errno(int);
58extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
59
60/* nfs4proc.c */
61#ifdef CONFIG_NFS_V4
62extern struct rpc_procinfo nfs4_procedures[];
63
64extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry,
65 struct nfs4_fs_locations *fs_locations,
66 struct page *page);
67#endif
68
69/* inode.c */
70extern struct inode *nfs_alloc_inode(struct super_block *sb);
71extern void nfs_destroy_inode(struct inode *);
72extern int nfs_write_inode(struct inode *,int);
73extern void nfs_clear_inode(struct inode *);
74#ifdef CONFIG_NFS_V4
75extern void nfs4_clear_inode(struct inode *);
76#endif
77
78/* super.c */
79extern struct file_system_type nfs_referral_nfs4_fs_type;
80extern struct file_system_type clone_nfs_fs_type;
81#ifdef CONFIG_NFS_V4
82extern struct file_system_type clone_nfs4_fs_type;
83#endif
84
85extern struct rpc_stat nfs_rpcstat;
86
87extern int __init register_nfs_fs(void);
88extern void __exit unregister_nfs_fs(void);
89
90/* namespace.c */
91extern char *nfs_path(const char *base, const struct dentry *dentry,
92 char *buffer, ssize_t buflen);
93
94/*
95 * Determine the mount path as a string
96 */
97static inline char *
98nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen)
99{
100#ifdef CONFIG_NFS_V4
101 return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen);
102#else
103 return NULL;
104#endif
105}
106
107/*
108 * Determine the device name as a string
109 */
110static inline char *nfs_devname(const struct vfsmount *mnt_parent,
111 const struct dentry *dentry,
112 char *buffer, ssize_t buflen)
113{
114 return nfs_path(mnt_parent->mnt_devname, dentry, buffer, buflen);
115}
116
117/*
118 * Determine the actual block size (and log2 thereof)
119 */
120static inline
121unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp)
122{
123 /* make sure blocksize is a power of two */
124 if ((bsize & (bsize - 1)) || nrbitsp) {
125 unsigned char nrbits;
126
127 for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--)
128 ;
129 bsize = 1 << nrbits;
130 if (nrbitsp)
131 *nrbitsp = nrbits;
132 }
133
134 return bsize;
135}
136
137/*
138 * Calculate the number of 512byte blocks used.
139 */
140static inline unsigned long nfs_calc_block_size(u64 tsize)
141{
142 loff_t used = (tsize + 511) >> 9;
143 return (used > ULONG_MAX) ? ULONG_MAX : used;
144}
145
146/*
147 * Compute and set NFS server blocksize
148 */
149static inline
150unsigned long nfs_block_size(unsigned long bsize, unsigned char *nrbitsp)
151{
152 if (bsize < NFS_MIN_FILE_IO_SIZE)
153 bsize = NFS_DEF_FILE_IO_SIZE;
154 else if (bsize >= NFS_MAX_FILE_IO_SIZE)
155 bsize = NFS_MAX_FILE_IO_SIZE;
156
157 return nfs_block_bits(bsize, nrbitsp);
158}
159
160/*
161 * Determine the maximum file size for a superblock
162 */
163static inline
164void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize)
165{
166 sb->s_maxbytes = (loff_t)maxfilesize;
167 if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0)
168 sb->s_maxbytes = MAX_LFS_FILESIZE;
169}
170
171/*
172 * Check if the string represents a "valid" IPv4 address
173 */
174static inline int valid_ipaddr4(const char *buf)
175{
176 int rc, count, in[4];
177
178 rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]);
179 if (rc != 4)
180 return -EINVAL;
181 for (count = 0; count < 4; count++) {
182 if (in[count] > 255)
183 return -EINVAL;
184 }
185 return 0;
186}
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
new file mode 100644
index 000000000000..19b98ca468eb
--- /dev/null
+++ b/fs/nfs/namespace.c
@@ -0,0 +1,229 @@
1/*
2 * linux/fs/nfs/namespace.c
3 *
4 * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
5 *
6 * NFS namespace
7 */
8
9#include <linux/config.h>
10
11#include <linux/dcache.h>
12#include <linux/mount.h>
13#include <linux/namei.h>
14#include <linux/nfs_fs.h>
15#include <linux/string.h>
16#include <linux/sunrpc/clnt.h>
17#include <linux/vfs.h>
18#include "internal.h"
19
20#define NFSDBG_FACILITY NFSDBG_VFS
21
22static void nfs_expire_automounts(void *list);
23
24LIST_HEAD(nfs_automount_list);
25static DECLARE_WORK(nfs_automount_task, nfs_expire_automounts, &nfs_automount_list);
26int nfs_mountpoint_expiry_timeout = 500 * HZ;
27
28/*
29 * nfs_path - reconstruct the path given an arbitrary dentry
30 * @base - arbitrary string to prepend to the path
31 * @dentry - pointer to dentry
32 * @buffer - result buffer
33 * @buflen - length of buffer
34 *
35 * Helper function for constructing the path from the
36 * root dentry to an arbitrary hashed dentry.
37 *
38 * This is mainly for use in figuring out the path on the
39 * server side when automounting on top of an existing partition.
40 */
41char *nfs_path(const char *base, const struct dentry *dentry,
42 char *buffer, ssize_t buflen)
43{
44 char *end = buffer+buflen;
45 int namelen;
46
47 *--end = '\0';
48 buflen--;
49 spin_lock(&dcache_lock);
50 while (!IS_ROOT(dentry)) {
51 namelen = dentry->d_name.len;
52 buflen -= namelen + 1;
53 if (buflen < 0)
54 goto Elong;
55 end -= namelen;
56 memcpy(end, dentry->d_name.name, namelen);
57 *--end = '/';
58 dentry = dentry->d_parent;
59 }
60 spin_unlock(&dcache_lock);
61 namelen = strlen(base);
62 /* Strip off excess slashes in base string */
63 while (namelen > 0 && base[namelen - 1] == '/')
64 namelen--;
65 buflen -= namelen;
66 if (buflen < 0)
67 goto Elong;
68 end -= namelen;
69 memcpy(end, base, namelen);
70 return end;
71Elong:
72 return ERR_PTR(-ENAMETOOLONG);
73}
74
75/*
76 * nfs_follow_mountpoint - handle crossing a mountpoint on the server
77 * @dentry - dentry of mountpoint
78 * @nd - nameidata info
79 *
80 * When we encounter a mountpoint on the server, we want to set up
81 * a mountpoint on the client too, to prevent inode numbers from
82 * colliding, and to allow "df" to work properly.
83 * On NFSv4, we also want to allow for the fact that different
84 * filesystems may be migrated to different servers in a failover
85 * situation, and that different filesystems may want to use
86 * different security flavours.
87 */
88static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
89{
90 struct vfsmount *mnt;
91 struct nfs_server *server = NFS_SERVER(dentry->d_inode);
92 struct dentry *parent;
93 struct nfs_fh fh;
94 struct nfs_fattr fattr;
95 int err;
96
97 BUG_ON(IS_ROOT(dentry));
98 dprintk("%s: enter\n", __FUNCTION__);
99 dput(nd->dentry);
100 nd->dentry = dget(dentry);
101 if (d_mountpoint(nd->dentry))
102 goto out_follow;
103 /* Look it up again */
104 parent = dget_parent(nd->dentry);
105 err = server->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name, &fh, &fattr);
106 dput(parent);
107 if (err != 0)
108 goto out_err;
109
110 if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL)
111 mnt = nfs_do_refmount(nd->mnt, nd->dentry);
112 else
113 mnt = nfs_do_submount(nd->mnt, nd->dentry, &fh, &fattr);
114 err = PTR_ERR(mnt);
115 if (IS_ERR(mnt))
116 goto out_err;
117
118 mntget(mnt);
119 err = do_add_mount(mnt, nd, nd->mnt->mnt_flags|MNT_SHRINKABLE, &nfs_automount_list);
120 if (err < 0) {
121 mntput(mnt);
122 if (err == -EBUSY)
123 goto out_follow;
124 goto out_err;
125 }
126 mntput(nd->mnt);
127 dput(nd->dentry);
128 nd->mnt = mnt;
129 nd->dentry = dget(mnt->mnt_root);
130 schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
131out:
132 dprintk("%s: done, returned %d\n", __FUNCTION__, err);
133 return ERR_PTR(err);
134out_err:
135 path_release(nd);
136 goto out;
137out_follow:
138 while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
139 ;
140 err = 0;
141 goto out;
142}
143
144struct inode_operations nfs_mountpoint_inode_operations = {
145 .follow_link = nfs_follow_mountpoint,
146 .getattr = nfs_getattr,
147};
148
149struct inode_operations nfs_referral_inode_operations = {
150 .follow_link = nfs_follow_mountpoint,
151};
152
153static void nfs_expire_automounts(void *data)
154{
155 struct list_head *list = (struct list_head *)data;
156
157 mark_mounts_for_expiry(list);
158 if (!list_empty(list))
159 schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
160}
161
162void nfs_release_automount_timer(void)
163{
164 if (list_empty(&nfs_automount_list)) {
165 cancel_delayed_work(&nfs_automount_task);
166 flush_scheduled_work();
167 }
168}
169
170/*
171 * Clone a mountpoint of the appropriate type
172 */
173static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devname,
174 struct nfs_clone_mount *mountdata)
175{
176#ifdef CONFIG_NFS_V4
177 struct vfsmount *mnt = NULL;
178 switch (server->rpc_ops->version) {
179 case 2:
180 case 3:
181 mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata);
182 break;
183 case 4:
184 mnt = vfs_kern_mount(&clone_nfs4_fs_type, 0, devname, mountdata);
185 }
186 return mnt;
187#else
188 return vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata);
189#endif
190}
191
192/**
193 * nfs_do_submount - set up mountpoint when crossing a filesystem boundary
194 * @mnt_parent - mountpoint of parent directory
195 * @dentry - parent directory
196 * @fh - filehandle for new root dentry
197 * @fattr - attributes for new root inode
198 *
199 */
200struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
201 const struct dentry *dentry, struct nfs_fh *fh,
202 struct nfs_fattr *fattr)
203{
204 struct nfs_clone_mount mountdata = {
205 .sb = mnt_parent->mnt_sb,
206 .dentry = dentry,
207 .fh = fh,
208 .fattr = fattr,
209 };
210 struct vfsmount *mnt = ERR_PTR(-ENOMEM);
211 char *page = (char *) __get_free_page(GFP_USER);
212 char *devname;
213
214 dprintk("%s: submounting on %s/%s\n", __FUNCTION__,
215 dentry->d_parent->d_name.name,
216 dentry->d_name.name);
217 if (page == NULL)
218 goto out;
219 devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE);
220 mnt = (struct vfsmount *)devname;
221 if (IS_ERR(devname))
222 goto free_page;
223 mnt = nfs_do_clone_mount(NFS_SB(mnt_parent->mnt_sb), devname, &mountdata);
224free_page:
225 free_page((unsigned long)page);
226out:
227 dprintk("%s: done\n", __FUNCTION__);
228 return mnt;
229}
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index f0015fa876e1..67391eef6b93 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -23,12 +23,11 @@
23#include <linux/nfs.h> 23#include <linux/nfs.h>
24#include <linux/nfs2.h> 24#include <linux/nfs2.h>
25#include <linux/nfs_fs.h> 25#include <linux/nfs_fs.h>
26#include "internal.h"
26 27
27#define NFSDBG_FACILITY NFSDBG_XDR 28#define NFSDBG_FACILITY NFSDBG_XDR
28/* #define NFS_PARANOIA 1 */ 29/* #define NFS_PARANOIA 1 */
29 30
30extern int nfs_stat_to_errno(int stat);
31
32/* Mapping from NFS error code to "errno" error code. */ 31/* Mapping from NFS error code to "errno" error code. */
33#define errno_NFSERR_IO EIO 32#define errno_NFSERR_IO EIO
34 33
@@ -131,7 +130,8 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
131 fattr->du.nfs2.blocksize = ntohl(*p++); 130 fattr->du.nfs2.blocksize = ntohl(*p++);
132 rdev = ntohl(*p++); 131 rdev = ntohl(*p++);
133 fattr->du.nfs2.blocks = ntohl(*p++); 132 fattr->du.nfs2.blocks = ntohl(*p++);
134 fattr->fsid_u.nfs3 = ntohl(*p++); 133 fattr->fsid.major = ntohl(*p++);
134 fattr->fsid.minor = 0;
135 fattr->fileid = ntohl(*p++); 135 fattr->fileid = ntohl(*p++);
136 p = xdr_decode_time(p, &fattr->atime); 136 p = xdr_decode_time(p, &fattr->atime);
137 p = xdr_decode_time(p, &fattr->mtime); 137 p = xdr_decode_time(p, &fattr->mtime);
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 33287879bd23..7322da4d2055 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -172,8 +172,10 @@ static void nfs3_cache_acls(struct inode *inode, struct posix_acl *acl,
172 inode->i_ino, acl, dfacl); 172 inode->i_ino, acl, dfacl);
173 spin_lock(&inode->i_lock); 173 spin_lock(&inode->i_lock);
174 __nfs3_forget_cached_acls(NFS_I(inode)); 174 __nfs3_forget_cached_acls(NFS_I(inode));
175 nfsi->acl_access = posix_acl_dup(acl); 175 if (!IS_ERR(acl))
176 nfsi->acl_default = posix_acl_dup(dfacl); 176 nfsi->acl_access = posix_acl_dup(acl);
177 if (!IS_ERR(dfacl))
178 nfsi->acl_default = posix_acl_dup(dfacl);
177 spin_unlock(&inode->i_lock); 179 spin_unlock(&inode->i_lock);
178} 180}
179 181
@@ -254,7 +256,9 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
254 res.acl_access = NULL; 256 res.acl_access = NULL;
255 } 257 }
256 } 258 }
257 nfs3_cache_acls(inode, res.acl_access, res.acl_default); 259 nfs3_cache_acls(inode,
260 (res.mask & NFS_ACL) ? res.acl_access : ERR_PTR(-EINVAL),
261 (res.mask & NFS_DFACL) ? res.acl_default : ERR_PTR(-EINVAL));
258 262
259 switch(type) { 263 switch(type) {
260 case ACL_TYPE_ACCESS: 264 case ACL_TYPE_ACCESS:
@@ -329,6 +333,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
329 switch (status) { 333 switch (status) {
330 case 0: 334 case 0:
331 status = nfs_refresh_inode(inode, &fattr); 335 status = nfs_refresh_inode(inode, &fattr);
336 nfs3_cache_acls(inode, acl, dfacl);
332 break; 337 break;
333 case -EPFNOSUPPORT: 338 case -EPFNOSUPPORT:
334 case -EPROTONOSUPPORT: 339 case -EPROTONOSUPPORT:
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index cf186f0d2b3b..7143b1f82cea 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -20,11 +20,10 @@
20#include <linux/nfs_mount.h> 20#include <linux/nfs_mount.h>
21 21
22#include "iostat.h" 22#include "iostat.h"
23#include "internal.h"
23 24
24#define NFSDBG_FACILITY NFSDBG_PROC 25#define NFSDBG_FACILITY NFSDBG_PROC
25 26
26extern struct rpc_procinfo nfs3_procedures[];
27
28/* A wrapper to handle the EJUKEBOX error message */ 27/* A wrapper to handle the EJUKEBOX error message */
29static int 28static int
30nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) 29nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
@@ -809,8 +808,6 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
809 return status; 808 return status;
810} 809}
811 810
812extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
813
814static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) 811static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
815{ 812{
816 if (nfs3_async_handle_jukebox(task, data->inode)) 813 if (nfs3_async_handle_jukebox(task, data->inode))
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index ec233619687e..0250269e9753 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -22,14 +22,13 @@
22#include <linux/nfs3.h> 22#include <linux/nfs3.h>
23#include <linux/nfs_fs.h> 23#include <linux/nfs_fs.h>
24#include <linux/nfsacl.h> 24#include <linux/nfsacl.h>
25#include "internal.h"
25 26
26#define NFSDBG_FACILITY NFSDBG_XDR 27#define NFSDBG_FACILITY NFSDBG_XDR
27 28
28/* Mapping from NFS error code to "errno" error code. */ 29/* Mapping from NFS error code to "errno" error code. */
29#define errno_NFSERR_IO EIO 30#define errno_NFSERR_IO EIO
30 31
31extern int nfs_stat_to_errno(int);
32
33/* 32/*
34 * Declare the space requirements for NFS arguments and replies as 33 * Declare the space requirements for NFS arguments and replies as
35 * number of 32bit-words 34 * number of 32bit-words
@@ -166,7 +165,8 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
166 if (MAJOR(fattr->rdev) != major || MINOR(fattr->rdev) != minor) 165 if (MAJOR(fattr->rdev) != major || MINOR(fattr->rdev) != minor)
167 fattr->rdev = 0; 166 fattr->rdev = 0;
168 167
169 p = xdr_decode_hyper(p, &fattr->fsid_u.nfs3); 168 p = xdr_decode_hyper(p, &fattr->fsid.major);
169 fattr->fsid.minor = 0;
170 p = xdr_decode_hyper(p, &fattr->fileid); 170 p = xdr_decode_hyper(p, &fattr->fileid);
171 p = xdr_decode_time3(p, &fattr->atime); 171 p = xdr_decode_time3(p, &fattr->atime);
172 p = xdr_decode_time3(p, &fattr->mtime); 172 p = xdr_decode_time3(p, &fattr->mtime);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 0f5e4e7cddec..9a102860df37 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -217,6 +217,9 @@ extern int nfs4_proc_renew(struct nfs4_client *, struct rpc_cred *);
217extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state); 217extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
218extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); 218extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
219extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); 219extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
220extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
221extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry,
222 struct nfs4_fs_locations *fs_locations, struct page *page);
220 223
221extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; 224extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops;
222extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; 225extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops;
@@ -225,6 +228,7 @@ extern const u32 nfs4_fattr_bitmap[2];
225extern const u32 nfs4_statfs_bitmap[2]; 228extern const u32 nfs4_statfs_bitmap[2];
226extern const u32 nfs4_pathconf_bitmap[2]; 229extern const u32 nfs4_pathconf_bitmap[2];
227extern const u32 nfs4_fsinfo_bitmap[2]; 230extern const u32 nfs4_fsinfo_bitmap[2];
231extern const u32 nfs4_fs_locations_bitmap[2];
228 232
229/* nfs4renewd.c */ 233/* nfs4renewd.c */
230extern void nfs4_schedule_state_renewal(struct nfs4_client *); 234extern void nfs4_schedule_state_renewal(struct nfs4_client *);
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
new file mode 100644
index 000000000000..ea38d27b74e6
--- /dev/null
+++ b/fs/nfs/nfs4namespace.c
@@ -0,0 +1,201 @@
1/*
2 * linux/fs/nfs/nfs4namespace.c
3 *
4 * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
5 *
6 * NFSv4 namespace
7 */
8
9#include <linux/config.h>
10
11#include <linux/dcache.h>
12#include <linux/mount.h>
13#include <linux/namei.h>
14#include <linux/nfs_fs.h>
15#include <linux/string.h>
16#include <linux/sunrpc/clnt.h>
17#include <linux/vfs.h>
18#include <linux/inet.h>
19#include "internal.h"
20
21#define NFSDBG_FACILITY NFSDBG_VFS
22
23/*
24 * Check if fs_root is valid
25 */
26static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname,
27 char *buffer, ssize_t buflen)
28{
29 char *end = buffer + buflen;
30 int n;
31
32 *--end = '\0';
33 buflen--;
34
35 n = pathname->ncomponents;
36 while (--n >= 0) {
37 struct nfs4_string *component = &pathname->components[n];
38 buflen -= component->len + 1;
39 if (buflen < 0)
40 goto Elong;
41 end -= component->len;
42 memcpy(end, component->data, component->len);
43 *--end = '/';
44 }
45 return end;
46Elong:
47 return ERR_PTR(-ENAMETOOLONG);
48}
49
50
51/**
52 * nfs_follow_referral - set up mountpoint when hitting a referral on moved error
53 * @mnt_parent - mountpoint of parent directory
54 * @dentry - parent directory
55 * @fspath - fs path returned in fs_locations
56 * @mntpath - mount path to new server
57 * @hostname - hostname of new server
58 * @addr - host addr of new server
59 *
60 */
61static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
62 const struct dentry *dentry,
63 struct nfs4_fs_locations *locations)
64{
65 struct vfsmount *mnt = ERR_PTR(-ENOENT);
66 struct nfs_clone_mount mountdata = {
67 .sb = mnt_parent->mnt_sb,
68 .dentry = dentry,
69 .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor,
70 };
71 char *page, *page2;
72 char *path, *fs_path;
73 char *devname;
74 int loc, s;
75
76 if (locations == NULL || locations->nlocations <= 0)
77 goto out;
78
79 dprintk("%s: referral at %s/%s\n", __FUNCTION__,
80 dentry->d_parent->d_name.name, dentry->d_name.name);
81
82 /* Ensure fs path is a prefix of current dentry path */
83 page = (char *) __get_free_page(GFP_USER);
84 if (page == NULL)
85 goto out;
86 page2 = (char *) __get_free_page(GFP_USER);
87 if (page2 == NULL)
88 goto out;
89
90 path = nfs4_path(dentry, page, PAGE_SIZE);
91 if (IS_ERR(path))
92 goto out_free;
93
94 fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE);
95 if (IS_ERR(fs_path))
96 goto out_free;
97
98 if (strncmp(path, fs_path, strlen(fs_path)) != 0) {
99 dprintk("%s: path %s does not begin with fsroot %s\n", __FUNCTION__, path, fs_path);
100 goto out_free;
101 }
102
103 devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE);
104 if (IS_ERR(devname)) {
105 mnt = (struct vfsmount *)devname;
106 goto out_free;
107 }
108
109 loc = 0;
110 while (loc < locations->nlocations && IS_ERR(mnt)) {
111 struct nfs4_fs_location *location = &locations->locations[loc];
112 char *mnt_path;
113
114 if (location == NULL || location->nservers <= 0 ||
115 location->rootpath.ncomponents == 0) {
116 loc++;
117 continue;
118 }
119
120 mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE);
121 if (IS_ERR(mnt_path)) {
122 loc++;
123 continue;
124 }
125 mountdata.mnt_path = mnt_path;
126
127 s = 0;
128 while (s < location->nservers) {
129 struct sockaddr_in addr = {};
130
131 if (location->servers[s].len <= 0 ||
132 valid_ipaddr4(location->servers[s].data) < 0) {
133 s++;
134 continue;
135 }
136
137 mountdata.hostname = location->servers[s].data;
138 addr.sin_addr.s_addr = in_aton(mountdata.hostname);
139 addr.sin_family = AF_INET;
140 addr.sin_port = htons(NFS_PORT);
141 mountdata.addr = &addr;
142
143 mnt = vfs_kern_mount(&nfs_referral_nfs4_fs_type, 0, devname, &mountdata);
144 if (!IS_ERR(mnt)) {
145 break;
146 }
147 s++;
148 }
149 loc++;
150 }
151
152out_free:
153 free_page((unsigned long)page);
154 free_page((unsigned long)page2);
155out:
156 dprintk("%s: done\n", __FUNCTION__);
157 return mnt;
158}
159
160/*
161 * nfs_do_refmount - handle crossing a referral on server
162 * @dentry - dentry of referral
163 * @nd - nameidata info
164 *
165 */
166struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry)
167{
168 struct vfsmount *mnt = ERR_PTR(-ENOENT);
169 struct dentry *parent;
170 struct nfs4_fs_locations *fs_locations = NULL;
171 struct page *page;
172 int err;
173
174 /* BUG_ON(IS_ROOT(dentry)); */
175 dprintk("%s: enter\n", __FUNCTION__);
176
177 page = alloc_page(GFP_KERNEL);
178 if (page == NULL)
179 goto out;
180
181 fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
182 if (fs_locations == NULL)
183 goto out_free;
184
185 /* Get locations */
186 parent = dget_parent(dentry);
187 dprintk("%s: getting locations for %s/%s\n", __FUNCTION__, parent->d_name.name, dentry->d_name.name);
188 err = nfs4_proc_fs_locations(parent->d_inode, dentry, fs_locations, page);
189 dput(parent);
190 if (err != 0 || fs_locations->nlocations <= 0 ||
191 fs_locations->fs_path.ncomponents <= 0)
192 goto out_free;
193
194 mnt = nfs_follow_referral(mnt_parent, dentry, fs_locations);
195out_free:
196 __free_page(page);
197 kfree(fs_locations);
198out:
199 dprintk("%s: done\n", __FUNCTION__);
200 return mnt;
201}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d86c0db7b1e8..b4916b092194 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -65,8 +65,6 @@ static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *)
65static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); 65static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
66static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); 66static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
67static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp); 67static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp);
68extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
69extern struct rpc_procinfo nfs4_procedures[];
70 68
71/* Prevent leaks of NFSv4 errors into userland */ 69/* Prevent leaks of NFSv4 errors into userland */
72int nfs4_map_errors(int err) 70int nfs4_map_errors(int err)
@@ -121,6 +119,25 @@ const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE
121 0 119 0
122}; 120};
123 121
122const u32 nfs4_fs_locations_bitmap[2] = {
123 FATTR4_WORD0_TYPE
124 | FATTR4_WORD0_CHANGE
125 | FATTR4_WORD0_SIZE
126 | FATTR4_WORD0_FSID
127 | FATTR4_WORD0_FILEID
128 | FATTR4_WORD0_FS_LOCATIONS,
129 FATTR4_WORD1_MODE
130 | FATTR4_WORD1_NUMLINKS
131 | FATTR4_WORD1_OWNER
132 | FATTR4_WORD1_OWNER_GROUP
133 | FATTR4_WORD1_RAWDEV
134 | FATTR4_WORD1_SPACE_USED
135 | FATTR4_WORD1_TIME_ACCESS
136 | FATTR4_WORD1_TIME_METADATA
137 | FATTR4_WORD1_TIME_MODIFY
138 | FATTR4_WORD1_MOUNTED_ON_FILEID
139};
140
124static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry, 141static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
125 struct nfs4_readdir_arg *readdir) 142 struct nfs4_readdir_arg *readdir)
126{ 143{
@@ -185,15 +202,15 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp
185 spin_unlock(&clp->cl_lock); 202 spin_unlock(&clp->cl_lock);
186} 203}
187 204
188static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinfo) 205static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
189{ 206{
190 struct nfs_inode *nfsi = NFS_I(inode); 207 struct nfs_inode *nfsi = NFS_I(dir);
191 208
192 spin_lock(&inode->i_lock); 209 spin_lock(&dir->i_lock);
193 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 210 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA;
194 if (cinfo->before == nfsi->change_attr && cinfo->atomic) 211 if (cinfo->before == nfsi->change_attr && cinfo->atomic)
195 nfsi->change_attr = cinfo->after; 212 nfsi->change_attr = cinfo->after;
196 spin_unlock(&inode->i_lock); 213 spin_unlock(&dir->i_lock);
197} 214}
198 215
199struct nfs4_opendata { 216struct nfs4_opendata {
@@ -1331,7 +1348,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
1331 return status; 1348 return status;
1332} 1349}
1333 1350
1334static int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) 1351int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
1335{ 1352{
1336 struct nfs4_exception exception = { }; 1353 struct nfs4_exception exception = { };
1337 int err; 1354 int err;
@@ -1443,6 +1460,50 @@ out:
1443 return nfs4_map_errors(status); 1460 return nfs4_map_errors(status);
1444} 1461}
1445 1462
1463/*
1464 * Get locations and (maybe) other attributes of a referral.
1465 * Note that we'll actually follow the referral later when
1466 * we detect fsid mismatch in inode revalidation
1467 */
1468static int nfs4_get_referral(struct inode *dir, struct qstr *name, struct nfs_fattr *fattr, struct nfs_fh *fhandle)
1469{
1470 int status = -ENOMEM;
1471 struct page *page = NULL;
1472 struct nfs4_fs_locations *locations = NULL;
1473 struct dentry dentry = {};
1474
1475 page = alloc_page(GFP_KERNEL);
1476 if (page == NULL)
1477 goto out;
1478 locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
1479 if (locations == NULL)
1480 goto out;
1481
1482 dentry.d_name.name = name->name;
1483 dentry.d_name.len = name->len;
1484 status = nfs4_proc_fs_locations(dir, &dentry, locations, page);
1485 if (status != 0)
1486 goto out;
1487 /* Make sure server returned a different fsid for the referral */
1488 if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) {
1489 dprintk("%s: server did not return a different fsid for a referral at %s\n", __FUNCTION__, name->name);
1490 status = -EIO;
1491 goto out;
1492 }
1493
1494 memcpy(fattr, &locations->fattr, sizeof(struct nfs_fattr));
1495 fattr->valid |= NFS_ATTR_FATTR_V4_REFERRAL;
1496 if (!fattr->mode)
1497 fattr->mode = S_IFDIR;
1498 memset(fhandle, 0, sizeof(struct nfs_fh));
1499out:
1500 if (page)
1501 __free_page(page);
1502 if (locations)
1503 kfree(locations);
1504 return status;
1505}
1506
1446static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) 1507static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
1447{ 1508{
1448 struct nfs4_getattr_arg args = { 1509 struct nfs4_getattr_arg args = {
@@ -1547,6 +1608,8 @@ static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
1547 1608
1548 dprintk("NFS call lookup %s\n", name->name); 1609 dprintk("NFS call lookup %s\n", name->name);
1549 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 1610 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
1611 if (status == -NFS4ERR_MOVED)
1612 status = nfs4_get_referral(dir, name, fattr, fhandle);
1550 dprintk("NFS reply lookup: %d\n", status); 1613 dprintk("NFS reply lookup: %d\n", status);
1551 return status; 1614 return status;
1552} 1615}
@@ -2008,7 +2071,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *
2008 if (!status) { 2071 if (!status) {
2009 update_changeattr(dir, &res.cinfo); 2072 update_changeattr(dir, &res.cinfo);
2010 nfs_post_op_update_inode(dir, res.dir_attr); 2073 nfs_post_op_update_inode(dir, res.dir_attr);
2011 nfs_refresh_inode(inode, res.fattr); 2074 nfs_post_op_update_inode(inode, res.fattr);
2012 } 2075 }
2013 2076
2014 return status; 2077 return status;
@@ -3570,6 +3633,36 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
3570 return len; 3633 return len;
3571} 3634}
3572 3635
3636int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry,
3637 struct nfs4_fs_locations *fs_locations, struct page *page)
3638{
3639 struct nfs_server *server = NFS_SERVER(dir);
3640 u32 bitmask[2] = {
3641 [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
3642 [1] = FATTR4_WORD1_MOUNTED_ON_FILEID,
3643 };
3644 struct nfs4_fs_locations_arg args = {
3645 .dir_fh = NFS_FH(dir),
3646 .name = &dentry->d_name,
3647 .page = page,
3648 .bitmask = bitmask,
3649 };
3650 struct rpc_message msg = {
3651 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS],
3652 .rpc_argp = &args,
3653 .rpc_resp = fs_locations,
3654 };
3655 int status;
3656
3657 dprintk("%s: start\n", __FUNCTION__);
3658 fs_locations->fattr.valid = 0;
3659 fs_locations->server = server;
3660 fs_locations->nlocations = 0;
3661 status = rpc_call_sync(server->client, &msg, 0);
3662 dprintk("%s: returned status = %d\n", __FUNCTION__, status);
3663 return status;
3664}
3665
3573struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = { 3666struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = {
3574 .recover_open = nfs4_open_reclaim, 3667 .recover_open = nfs4_open_reclaim,
3575 .recover_lock = nfs4_lock_reclaim, 3668 .recover_lock = nfs4_lock_reclaim,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 96e5b82c153b..090a36b07a22 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -38,7 +38,6 @@
38 * subsequent patch. 38 * subsequent patch.
39 */ 39 */
40 40
41#include <linux/config.h>
42#include <linux/slab.h> 41#include <linux/slab.h>
43#include <linux/smp_lock.h> 42#include <linux/smp_lock.h>
44#include <linux/nfs_fs.h> 43#include <linux/nfs_fs.h>
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 7c5d70efe720..1750d996f49f 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -411,6 +411,15 @@ static int nfs_stat_to_errno(int);
411#define NFS4_dec_setacl_sz (compound_decode_hdr_maxsz + \ 411#define NFS4_dec_setacl_sz (compound_decode_hdr_maxsz + \
412 decode_putfh_maxsz + \ 412 decode_putfh_maxsz + \
413 op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) 413 op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
414#define NFS4_enc_fs_locations_sz \
415 (compound_encode_hdr_maxsz + \
416 encode_putfh_maxsz + \
417 encode_getattr_maxsz)
418#define NFS4_dec_fs_locations_sz \
419 (compound_decode_hdr_maxsz + \
420 decode_putfh_maxsz + \
421 op_decode_hdr_maxsz + \
422 nfs4_fattr_bitmap_maxsz)
414 423
415static struct { 424static struct {
416 unsigned int mode; 425 unsigned int mode;
@@ -722,6 +731,13 @@ static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask)
722 bitmask[1] & nfs4_fsinfo_bitmap[1]); 731 bitmask[1] & nfs4_fsinfo_bitmap[1]);
723} 732}
724 733
734static int encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask)
735{
736 return encode_getattr_two(xdr,
737 bitmask[0] & nfs4_fs_locations_bitmap[0],
738 bitmask[1] & nfs4_fs_locations_bitmap[1]);
739}
740
725static int encode_getfh(struct xdr_stream *xdr) 741static int encode_getfh(struct xdr_stream *xdr)
726{ 742{
727 uint32_t *p; 743 uint32_t *p;
@@ -2003,6 +2019,38 @@ out:
2003} 2019}
2004 2020
2005/* 2021/*
2022 * Encode FS_LOCATIONS request
2023 */
2024static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, uint32_t *p, struct nfs4_fs_locations_arg *args)
2025{
2026 struct xdr_stream xdr;
2027 struct compound_hdr hdr = {
2028 .nops = 3,
2029 };
2030 struct rpc_auth *auth = req->rq_task->tk_auth;
2031 int replen;
2032 int status;
2033
2034 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2035 encode_compound_hdr(&xdr, &hdr);
2036 if ((status = encode_putfh(&xdr, args->dir_fh)) != 0)
2037 goto out;
2038 if ((status = encode_lookup(&xdr, args->name)) != 0)
2039 goto out;
2040 if ((status = encode_fs_locations(&xdr, args->bitmask)) != 0)
2041 goto out;
2042 /* set up reply
2043 * toplevel_status + OP_PUTFH + status
2044 * + OP_LOOKUP + status + OP_GETATTR + status = 7
2045 */
2046 replen = (RPC_REPHDRSIZE + auth->au_rslack + 7) << 2;
2047 xdr_inline_pages(&req->rq_rcv_buf, replen, &args->page,
2048 0, PAGE_SIZE);
2049out:
2050 return status;
2051}
2052
2053/*
2006 * START OF "GENERIC" DECODE ROUTINES. 2054 * START OF "GENERIC" DECODE ROUTINES.
2007 * These may look a little ugly since they are imported from a "generic" 2055 * These may look a little ugly since they are imported from a "generic"
2008 * set of XDR encode/decode routines which are intended to be shared by 2056 * set of XDR encode/decode routines which are intended to be shared by
@@ -2036,7 +2084,7 @@ out:
2036 } \ 2084 } \
2037} while (0) 2085} while (0)
2038 2086
2039static int decode_opaque_inline(struct xdr_stream *xdr, uint32_t *len, char **string) 2087static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string)
2040{ 2088{
2041 uint32_t *p; 2089 uint32_t *p;
2042 2090
@@ -2087,7 +2135,7 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
2087static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp) 2135static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp)
2088{ 2136{
2089 uint32_t *p; 2137 uint32_t *p;
2090 uint32_t strlen; 2138 unsigned int strlen;
2091 char *str; 2139 char *str;
2092 2140
2093 READ_BUF(12); 2141 READ_BUF(12);
@@ -2217,7 +2265,7 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap,
2217 return 0; 2265 return 0;
2218} 2266}
2219 2267
2220static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fsid *fsid) 2268static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid)
2221{ 2269{
2222 uint32_t *p; 2270 uint32_t *p;
2223 2271
@@ -2285,6 +2333,22 @@ static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
2285 return 0; 2333 return 0;
2286} 2334}
2287 2335
2336static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
2337{
2338 uint32_t *p;
2339
2340 *fileid = 0;
2341 if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U)))
2342 return -EIO;
2343 if (likely(bitmap[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) {
2344 READ_BUF(8);
2345 READ64(*fileid);
2346 bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
2347 }
2348 dprintk("%s: fileid=%Lu\n", __FUNCTION__, (unsigned long long)*fileid);
2349 return 0;
2350}
2351
2288static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 2352static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
2289{ 2353{
2290 uint32_t *p; 2354 uint32_t *p;
@@ -2336,6 +2400,116 @@ static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
2336 return status; 2400 return status;
2337} 2401}
2338 2402
2403static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
2404{
2405 int n;
2406 uint32_t *p;
2407 int status = 0;
2408
2409 READ_BUF(4);
2410 READ32(n);
2411 if (n < 0)
2412 goto out_eio;
2413 if (n == 0)
2414 goto root_path;
2415 dprintk("path ");
2416 path->ncomponents = 0;
2417 while (path->ncomponents < n) {
2418 struct nfs4_string *component = &path->components[path->ncomponents];
2419 status = decode_opaque_inline(xdr, &component->len, &component->data);
2420 if (unlikely(status != 0))
2421 goto out_eio;
2422 if (path->ncomponents != n)
2423 dprintk("/");
2424 dprintk("%s", component->data);
2425 if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS)
2426 path->ncomponents++;
2427 else {
2428 dprintk("cannot parse %d components in path\n", n);
2429 goto out_eio;
2430 }
2431 }
2432out:
2433 dprintk("\n");
2434 return status;
2435root_path:
2436/* a root pathname is sent as a zero component4 */
2437 path->ncomponents = 1;
2438 path->components[0].len=0;
2439 path->components[0].data=NULL;
2440 dprintk("path /\n");
2441 goto out;
2442out_eio:
2443 dprintk(" status %d", status);
2444 status = -EIO;
2445 goto out;
2446}
2447
2448static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fs_locations *res)
2449{
2450 int n;
2451 uint32_t *p;
2452 int status = -EIO;
2453
2454 if (unlikely(bitmap[0] & (FATTR4_WORD0_FS_LOCATIONS -1U)))
2455 goto out;
2456 status = 0;
2457 if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS)))
2458 goto out;
2459 dprintk("%s: fsroot ", __FUNCTION__);
2460 status = decode_pathname(xdr, &res->fs_path);
2461 if (unlikely(status != 0))
2462 goto out;
2463 READ_BUF(4);
2464 READ32(n);
2465 if (n <= 0)
2466 goto out_eio;
2467 res->nlocations = 0;
2468 while (res->nlocations < n) {
2469 int m;
2470 struct nfs4_fs_location *loc = &res->locations[res->nlocations];
2471
2472 READ_BUF(4);
2473 READ32(m);
2474 if (m <= 0)
2475 goto out_eio;
2476
2477 loc->nservers = 0;
2478 dprintk("%s: servers ", __FUNCTION__);
2479 while (loc->nservers < m) {
2480 struct nfs4_string *server = &loc->servers[loc->nservers];
2481 status = decode_opaque_inline(xdr, &server->len, &server->data);
2482 if (unlikely(status != 0))
2483 goto out_eio;
2484 dprintk("%s ", server->data);
2485 if (loc->nservers < NFS4_FS_LOCATION_MAXSERVERS)
2486 loc->nservers++;
2487 else {
2488 int i;
2489 dprintk("%s: using first %d of %d servers returned for location %d\n", __FUNCTION__, NFS4_FS_LOCATION_MAXSERVERS, m, res->nlocations);
2490 for (i = loc->nservers; i < m; i++) {
2491 int len;
2492 char *data;
2493 status = decode_opaque_inline(xdr, &len, &data);
2494 if (unlikely(status != 0))
2495 goto out_eio;
2496 }
2497 }
2498 }
2499 status = decode_pathname(xdr, &loc->rootpath);
2500 if (unlikely(status != 0))
2501 goto out_eio;
2502 if (res->nlocations < NFS4_FS_LOCATIONS_MAXENTRIES)
2503 res->nlocations++;
2504 }
2505out:
2506 dprintk("%s: fs_locations done, error = %d\n", __FUNCTION__, status);
2507 return status;
2508out_eio:
2509 status = -EIO;
2510 goto out;
2511}
2512
2339static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 2513static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
2340{ 2514{
2341 uint32_t *p; 2515 uint32_t *p;
@@ -2841,6 +3015,7 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons
2841 bitmap[2] = {0}, 3015 bitmap[2] = {0},
2842 type; 3016 type;
2843 int status, fmode = 0; 3017 int status, fmode = 0;
3018 uint64_t fileid;
2844 3019
2845 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) 3020 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
2846 goto xdr_error; 3021 goto xdr_error;
@@ -2863,10 +3038,14 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons
2863 goto xdr_error; 3038 goto xdr_error;
2864 if ((status = decode_attr_size(xdr, bitmap, &fattr->size)) != 0) 3039 if ((status = decode_attr_size(xdr, bitmap, &fattr->size)) != 0)
2865 goto xdr_error; 3040 goto xdr_error;
2866 if ((status = decode_attr_fsid(xdr, bitmap, &fattr->fsid_u.nfs4)) != 0) 3041 if ((status = decode_attr_fsid(xdr, bitmap, &fattr->fsid)) != 0)
2867 goto xdr_error; 3042 goto xdr_error;
2868 if ((status = decode_attr_fileid(xdr, bitmap, &fattr->fileid)) != 0) 3043 if ((status = decode_attr_fileid(xdr, bitmap, &fattr->fileid)) != 0)
2869 goto xdr_error; 3044 goto xdr_error;
3045 if ((status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr,
3046 struct nfs4_fs_locations,
3047 fattr))) != 0)
3048 goto xdr_error;
2870 if ((status = decode_attr_mode(xdr, bitmap, &fattr->mode)) != 0) 3049 if ((status = decode_attr_mode(xdr, bitmap, &fattr->mode)) != 0)
2871 goto xdr_error; 3050 goto xdr_error;
2872 fattr->mode |= fmode; 3051 fattr->mode |= fmode;
@@ -2886,6 +3065,10 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons
2886 goto xdr_error; 3065 goto xdr_error;
2887 if ((status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime)) != 0) 3066 if ((status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime)) != 0)
2888 goto xdr_error; 3067 goto xdr_error;
3068 if ((status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid)) != 0)
3069 goto xdr_error;
3070 if (fattr->fileid == 0 && fileid != 0)
3071 fattr->fileid = fileid;
2889 if ((status = verify_attr_len(xdr, savep, attrlen)) == 0) 3072 if ((status = verify_attr_len(xdr, savep, attrlen)) == 0)
2890 fattr->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4; 3073 fattr->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4;
2891xdr_error: 3074xdr_error:
@@ -3350,8 +3533,7 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
3350 attrlen, recvd); 3533 attrlen, recvd);
3351 return -EINVAL; 3534 return -EINVAL;
3352 } 3535 }
3353 if (attrlen <= *acl_len) 3536 xdr_read_pages(xdr, attrlen);
3354 xdr_read_pages(xdr, attrlen);
3355 *acl_len = attrlen; 3537 *acl_len = attrlen;
3356 } else 3538 } else
3357 status = -EOPNOTSUPP; 3539 status = -EOPNOTSUPP;
@@ -4211,6 +4393,29 @@ out:
4211 return status; 4393 return status;
4212} 4394}
4213 4395
4396/*
4397 * FS_LOCATIONS request
4398 */
4399static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, uint32_t *p, struct nfs4_fs_locations *res)
4400{
4401 struct xdr_stream xdr;
4402 struct compound_hdr hdr;
4403 int status;
4404
4405 xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
4406 status = decode_compound_hdr(&xdr, &hdr);
4407 if (status != 0)
4408 goto out;
4409 if ((status = decode_putfh(&xdr)) != 0)
4410 goto out;
4411 if ((status = decode_lookup(&xdr)) != 0)
4412 goto out;
4413 xdr_enter_page(&xdr, PAGE_SIZE);
4414 status = decode_getfattr(&xdr, &res->fattr, res->server);
4415out:
4416 return status;
4417}
4418
4214uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus) 4419uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus)
4215{ 4420{
4216 uint32_t bitmap[2] = {0}; 4421 uint32_t bitmap[2] = {0};
@@ -4382,6 +4587,7 @@ struct rpc_procinfo nfs4_procedures[] = {
4382 PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn), 4587 PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn),
4383 PROC(GETACL, enc_getacl, dec_getacl), 4588 PROC(GETACL, enc_getacl, dec_getacl),
4384 PROC(SETACL, enc_setacl, dec_setacl), 4589 PROC(SETACL, enc_setacl, dec_setacl),
4590 PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations),
4385}; 4591};
4386 4592
4387struct rpc_version nfs_version4 = { 4593struct rpc_version nfs_version4 = {
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 106aca388ebc..36e902a88ca1 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -9,7 +9,6 @@
9 * 9 *
10 */ 10 */
11 11
12#include <linux/config.h>
13#include <linux/slab.h> 12#include <linux/slab.h>
14#include <linux/file.h> 13#include <linux/file.h>
15#include <linux/sunrpc/clnt.h> 14#include <linux/sunrpc/clnt.h>
@@ -315,6 +314,7 @@ nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst,
315 req->wb_index, NFS_PAGE_TAG_DIRTY); 314 req->wb_index, NFS_PAGE_TAG_DIRTY);
316 nfs_list_remove_request(req); 315 nfs_list_remove_request(req);
317 nfs_list_add_request(req, dst); 316 nfs_list_add_request(req, dst);
317 dec_zone_page_state(req->wb_page, NR_FILE_DIRTY);
318 res++; 318 res++;
319 } 319 }
320 } 320 }
@@ -325,6 +325,7 @@ out:
325 325
326/** 326/**
327 * nfs_scan_list - Scan a list for matching requests 327 * nfs_scan_list - Scan a list for matching requests
328 * @nfsi: NFS inode
328 * @head: One of the NFS inode request lists 329 * @head: One of the NFS inode request lists
329 * @dst: Destination list 330 * @dst: Destination list
330 * @idx_start: lower bound of page->index to scan 331 * @idx_start: lower bound of page->index to scan
@@ -336,14 +337,15 @@ out:
336 * The requests are *not* checked to ensure that they form a contiguous set. 337 * The requests are *not* checked to ensure that they form a contiguous set.
337 * You must be holding the inode's req_lock when calling this function 338 * You must be holding the inode's req_lock when calling this function
338 */ 339 */
339int 340int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
340nfs_scan_list(struct list_head *head, struct list_head *dst, 341 struct list_head *dst, unsigned long idx_start,
341 unsigned long idx_start, unsigned int npages) 342 unsigned int npages)
342{ 343{
343 struct list_head *pos, *tmp; 344 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
344 struct nfs_page *req; 345 struct nfs_page *req;
345 unsigned long idx_end; 346 unsigned long idx_end;
346 int res; 347 int found, i;
348 int res;
347 349
348 res = 0; 350 res = 0;
349 if (npages == 0) 351 if (npages == 0)
@@ -351,25 +353,32 @@ nfs_scan_list(struct list_head *head, struct list_head *dst,
351 else 353 else
352 idx_end = idx_start + npages - 1; 354 idx_end = idx_start + npages - 1;
353 355
354 list_for_each_safe(pos, tmp, head) { 356 for (;;) {
355 357 found = radix_tree_gang_lookup(&nfsi->nfs_page_tree,
356 req = nfs_list_entry(pos); 358 (void **)&pgvec[0], idx_start,
357 359 NFS_SCAN_MAXENTRIES);
358 if (req->wb_index < idx_start) 360 if (found <= 0)
359 continue;
360 if (req->wb_index > idx_end)
361 break; 361 break;
362 for (i = 0; i < found; i++) {
363 req = pgvec[i];
364 if (req->wb_index > idx_end)
365 goto out;
366 idx_start = req->wb_index + 1;
367 if (req->wb_list_head != head)
368 continue;
369 if (nfs_set_page_writeback_locked(req)) {
370 nfs_list_remove_request(req);
371 nfs_list_add_request(req, dst);
372 res++;
373 }
374 }
362 375
363 if (!nfs_set_page_writeback_locked(req))
364 continue;
365 nfs_list_remove_request(req);
366 nfs_list_add_request(req, dst);
367 res++;
368 } 376 }
377out:
369 return res; 378 return res;
370} 379}
371 380
372int nfs_init_nfspagecache(void) 381int __init nfs_init_nfspagecache(void)
373{ 382{
374 nfs_page_cachep = kmem_cache_create("nfs_page", 383 nfs_page_cachep = kmem_cache_create("nfs_page",
375 sizeof(struct nfs_page), 384 sizeof(struct nfs_page),
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 9dd85cac2df0..b3899ea3229e 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -44,11 +44,10 @@
44#include <linux/nfs_page.h> 44#include <linux/nfs_page.h>
45#include <linux/lockd/bind.h> 45#include <linux/lockd/bind.h>
46#include <linux/smp_lock.h> 46#include <linux/smp_lock.h>
47#include "internal.h"
47 48
48#define NFSDBG_FACILITY NFSDBG_PROC 49#define NFSDBG_FACILITY NFSDBG_PROC
49 50
50extern struct rpc_procinfo nfs_procedures[];
51
52/* 51/*
53 * Bare-bones access to getattr: this is for nfs_read_super. 52 * Bare-bones access to getattr: this is for nfs_read_super.
54 */ 53 */
@@ -611,8 +610,6 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
611 return 0; 610 return 0;
612} 611}
613 612
614extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
615
616static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) 613static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
617{ 614{
618 if (task->tk_status >= 0) { 615 if (task->tk_status >= 0) {
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 624ca7146b6b..52bf634260a1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -15,7 +15,6 @@
15 * within the RPC code when root squashing is suspected. 15 * within the RPC code when root squashing is suspected.
16 */ 16 */
17 17
18#include <linux/config.h>
19#include <linux/time.h> 18#include <linux/time.h>
20#include <linux/kernel.h> 19#include <linux/kernel.h>
21#include <linux/errno.h> 20#include <linux/errno.h>
@@ -51,14 +50,11 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
51 if (p) { 50 if (p) {
52 memset(p, 0, sizeof(*p)); 51 memset(p, 0, sizeof(*p));
53 INIT_LIST_HEAD(&p->pages); 52 INIT_LIST_HEAD(&p->pages);
54 if (pagecount < NFS_PAGEVEC_SIZE) 53 if (pagecount <= ARRAY_SIZE(p->page_array))
55 p->pagevec = &p->page_array[0]; 54 p->pagevec = p->page_array;
56 else { 55 else {
57 size_t size = ++pagecount * sizeof(struct page *); 56 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
58 p->pagevec = kmalloc(size, GFP_NOFS); 57 if (!p->pagevec) {
59 if (p->pagevec) {
60 memset(p->pagevec, 0, size);
61 } else {
62 mempool_free(p, nfs_rdata_mempool); 58 mempool_free(p, nfs_rdata_mempool);
63 p = NULL; 59 p = NULL;
64 } 60 }
@@ -104,6 +100,28 @@ int nfs_return_empty_page(struct page *page)
104 return 0; 100 return 0;
105} 101}
106 102
103static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
104{
105 unsigned int remainder = data->args.count - data->res.count;
106 unsigned int base = data->args.pgbase + data->res.count;
107 unsigned int pglen;
108 struct page **pages;
109
110 if (data->res.eof == 0 || remainder == 0)
111 return;
112 /*
113 * Note: "remainder" can never be negative, since we check for
114 * this in the XDR code.
115 */
116 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
117 base &= ~PAGE_CACHE_MASK;
118 pglen = PAGE_CACHE_SIZE - base;
119 if (pglen < remainder)
120 memclear_highpage_flush(*pages, base, pglen);
121 else
122 memclear_highpage_flush(*pages, base, remainder);
123}
124
107/* 125/*
108 * Read a page synchronously. 126 * Read a page synchronously.
109 */ 127 */
@@ -177,11 +195,9 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
177 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; 195 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
178 spin_unlock(&inode->i_lock); 196 spin_unlock(&inode->i_lock);
179 197
180 if (count) 198 nfs_readpage_truncate_uninitialised_page(rdata);
181 memclear_highpage_flush(page, rdata->args.pgbase, count); 199 if (rdata->res.eof || rdata->res.count == rdata->args.count)
182 SetPageUptodate(page); 200 SetPageUptodate(page);
183 if (PageError(page))
184 ClearPageError(page);
185 result = 0; 201 result = 0;
186 202
187io_error: 203io_error:
@@ -436,20 +452,12 @@ static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
436 struct nfs_page *req = data->req; 452 struct nfs_page *req = data->req;
437 struct page *page = req->wb_page; 453 struct page *page = req->wb_page;
438 454
455 if (likely(task->tk_status >= 0))
456 nfs_readpage_truncate_uninitialised_page(data);
457 else
458 SetPageError(page);
439 if (nfs_readpage_result(task, data) != 0) 459 if (nfs_readpage_result(task, data) != 0)
440 return; 460 return;
441 if (task->tk_status >= 0) {
442 unsigned int request = data->args.count;
443 unsigned int result = data->res.count;
444
445 if (result < request) {
446 memclear_highpage_flush(page,
447 data->args.pgbase + result,
448 request - result);
449 }
450 } else
451 SetPageError(page);
452
453 if (atomic_dec_and_test(&req->wb_complete)) { 461 if (atomic_dec_and_test(&req->wb_complete)) {
454 if (!PageError(page)) 462 if (!PageError(page))
455 SetPageUptodate(page); 463 SetPageUptodate(page);
@@ -462,6 +470,40 @@ static const struct rpc_call_ops nfs_read_partial_ops = {
462 .rpc_release = nfs_readdata_release, 470 .rpc_release = nfs_readdata_release,
463}; 471};
464 472
473static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
474{
475 unsigned int count = data->res.count;
476 unsigned int base = data->args.pgbase;
477 struct page **pages;
478
479 if (unlikely(count == 0))
480 return;
481 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
482 base &= ~PAGE_CACHE_MASK;
483 count += base;
484 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
485 SetPageUptodate(*pages);
486 /*
487 * Was this an eof or a short read? If the latter, don't mark the page
488 * as uptodate yet.
489 */
490 if (count > 0 && (data->res.eof || data->args.count == data->res.count))
491 SetPageUptodate(*pages);
492}
493
494static void nfs_readpage_set_pages_error(struct nfs_read_data *data)
495{
496 unsigned int count = data->args.count;
497 unsigned int base = data->args.pgbase;
498 struct page **pages;
499
500 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
501 base &= ~PAGE_CACHE_MASK;
502 count += base;
503 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
504 SetPageError(*pages);
505}
506
465/* 507/*
466 * This is the callback from RPC telling us whether a reply was 508 * This is the callback from RPC telling us whether a reply was
467 * received or some error occurred (timeout or socket shutdown). 509 * received or some error occurred (timeout or socket shutdown).
@@ -469,27 +511,24 @@ static const struct rpc_call_ops nfs_read_partial_ops = {
469static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) 511static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
470{ 512{
471 struct nfs_read_data *data = calldata; 513 struct nfs_read_data *data = calldata;
472 unsigned int count = data->res.count;
473 514
515 /*
516 * Note: nfs_readpage_result may change the values of
517 * data->args. In the multi-page case, we therefore need
518 * to ensure that we call the next nfs_readpage_set_page_uptodate()
519 * first in the multi-page case.
520 */
521 if (likely(task->tk_status >= 0)) {
522 nfs_readpage_truncate_uninitialised_page(data);
523 nfs_readpage_set_pages_uptodate(data);
524 } else
525 nfs_readpage_set_pages_error(data);
474 if (nfs_readpage_result(task, data) != 0) 526 if (nfs_readpage_result(task, data) != 0)
475 return; 527 return;
476 while (!list_empty(&data->pages)) { 528 while (!list_empty(&data->pages)) {
477 struct nfs_page *req = nfs_list_entry(data->pages.next); 529 struct nfs_page *req = nfs_list_entry(data->pages.next);
478 struct page *page = req->wb_page;
479 nfs_list_remove_request(req);
480 530
481 if (task->tk_status >= 0) { 531 nfs_list_remove_request(req);
482 if (count < PAGE_CACHE_SIZE) {
483 if (count < req->wb_bytes)
484 memclear_highpage_flush(page,
485 req->wb_pgbase + count,
486 req->wb_bytes - count);
487 count = 0;
488 } else
489 count -= PAGE_CACHE_SIZE;
490 SetPageUptodate(page);
491 } else
492 SetPageError(page);
493 nfs_readpage_release(req); 532 nfs_readpage_release(req);
494 } 533 }
495} 534}
@@ -654,7 +693,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
654 return ret; 693 return ret;
655} 694}
656 695
657int nfs_init_readpagecache(void) 696int __init nfs_init_readpagecache(void)
658{ 697{
659 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 698 nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
660 sizeof(struct nfs_read_data), 699 sizeof(struct nfs_read_data),
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
new file mode 100644
index 000000000000..e8a9bee74d9d
--- /dev/null
+++ b/fs/nfs/super.c
@@ -0,0 +1,1537 @@
1/*
2 * linux/fs/nfs/super.c
3 *
4 * Copyright (C) 1992 Rick Sladkey
5 *
6 * nfs superblock handling functions
7 *
8 * Modularised by Alan Cox <Alan.Cox@linux.org>, while hacking some
9 * experimental NFS changes. Modularisation taken straight from SYS5 fs.
10 *
11 * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts.
12 * J.S.Peatfield@damtp.cam.ac.uk
13 *
14 * Split from inode.c by David Howells <dhowells@redhat.com>
15 *
16 */
17
18#include <linux/config.h>
19#include <linux/module.h>
20#include <linux/init.h>
21
22#include <linux/time.h>
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/string.h>
26#include <linux/stat.h>
27#include <linux/errno.h>
28#include <linux/unistd.h>
29#include <linux/sunrpc/clnt.h>
30#include <linux/sunrpc/stats.h>
31#include <linux/sunrpc/metrics.h>
32#include <linux/nfs_fs.h>
33#include <linux/nfs_mount.h>
34#include <linux/nfs4_mount.h>
35#include <linux/lockd/bind.h>
36#include <linux/smp_lock.h>
37#include <linux/seq_file.h>
38#include <linux/mount.h>
39#include <linux/nfs_idmap.h>
40#include <linux/vfs.h>
41#include <linux/inet.h>
42#include <linux/nfs_xdr.h>
43
44#include <asm/system.h>
45#include <asm/uaccess.h>
46
47#include "nfs4_fs.h"
48#include "callback.h"
49#include "delegation.h"
50#include "iostat.h"
51#include "internal.h"
52
53#define NFSDBG_FACILITY NFSDBG_VFS
54
55/* Maximum number of readahead requests
56 * FIXME: this should really be a sysctl so that users may tune it to suit
57 * their needs. People that do NFS over a slow network, might for
58 * instance want to reduce it to something closer to 1 for improved
59 * interactive response.
60 */
61#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1)
62
63/*
64 * RPC cruft for NFS
65 */
66static struct rpc_version * nfs_version[] = {
67 NULL,
68 NULL,
69 &nfs_version2,
70#if defined(CONFIG_NFS_V3)
71 &nfs_version3,
72#elif defined(CONFIG_NFS_V4)
73 NULL,
74#endif
75#if defined(CONFIG_NFS_V4)
76 &nfs_version4,
77#endif
78};
79
80static struct rpc_program nfs_program = {
81 .name = "nfs",
82 .number = NFS_PROGRAM,
83 .nrvers = ARRAY_SIZE(nfs_version),
84 .version = nfs_version,
85 .stats = &nfs_rpcstat,
86 .pipe_dir_name = "/nfs",
87};
88
89struct rpc_stat nfs_rpcstat = {
90 .program = &nfs_program
91};
92
93
94#ifdef CONFIG_NFS_V3_ACL
95static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program };
96static struct rpc_version * nfsacl_version[] = {
97 [3] = &nfsacl_version3,
98};
99
100struct rpc_program nfsacl_program = {
101 .name = "nfsacl",
102 .number = NFS_ACL_PROGRAM,
103 .nrvers = ARRAY_SIZE(nfsacl_version),
104 .version = nfsacl_version,
105 .stats = &nfsacl_rpcstat,
106};
107#endif /* CONFIG_NFS_V3_ACL */
108
109static void nfs_umount_begin(struct vfsmount *, int);
110static int nfs_statfs(struct dentry *, struct kstatfs *);
111static int nfs_show_options(struct seq_file *, struct vfsmount *);
112static int nfs_show_stats(struct seq_file *, struct vfsmount *);
113static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *);
114static int nfs_clone_nfs_sb(struct file_system_type *fs_type,
115 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
116static void nfs_kill_super(struct super_block *);
117
118static struct file_system_type nfs_fs_type = {
119 .owner = THIS_MODULE,
120 .name = "nfs",
121 .get_sb = nfs_get_sb,
122 .kill_sb = nfs_kill_super,
123 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
124};
125
126struct file_system_type clone_nfs_fs_type = {
127 .owner = THIS_MODULE,
128 .name = "nfs",
129 .get_sb = nfs_clone_nfs_sb,
130 .kill_sb = nfs_kill_super,
131 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
132};
133
134static struct super_operations nfs_sops = {
135 .alloc_inode = nfs_alloc_inode,
136 .destroy_inode = nfs_destroy_inode,
137 .write_inode = nfs_write_inode,
138 .statfs = nfs_statfs,
139 .clear_inode = nfs_clear_inode,
140 .umount_begin = nfs_umount_begin,
141 .show_options = nfs_show_options,
142 .show_stats = nfs_show_stats,
143};
144
145#ifdef CONFIG_NFS_V4
146static int nfs4_get_sb(struct file_system_type *fs_type,
147 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
148static int nfs_clone_nfs4_sb(struct file_system_type *fs_type,
149 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
150static int nfs_referral_nfs4_sb(struct file_system_type *fs_type,
151 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
152static void nfs4_kill_super(struct super_block *sb);
153
154static struct file_system_type nfs4_fs_type = {
155 .owner = THIS_MODULE,
156 .name = "nfs4",
157 .get_sb = nfs4_get_sb,
158 .kill_sb = nfs4_kill_super,
159 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
160};
161
162struct file_system_type clone_nfs4_fs_type = {
163 .owner = THIS_MODULE,
164 .name = "nfs4",
165 .get_sb = nfs_clone_nfs4_sb,
166 .kill_sb = nfs4_kill_super,
167 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
168};
169
170struct file_system_type nfs_referral_nfs4_fs_type = {
171 .owner = THIS_MODULE,
172 .name = "nfs4",
173 .get_sb = nfs_referral_nfs4_sb,
174 .kill_sb = nfs4_kill_super,
175 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
176};
177
178static struct super_operations nfs4_sops = {
179 .alloc_inode = nfs_alloc_inode,
180 .destroy_inode = nfs_destroy_inode,
181 .write_inode = nfs_write_inode,
182 .statfs = nfs_statfs,
183 .clear_inode = nfs4_clear_inode,
184 .umount_begin = nfs_umount_begin,
185 .show_options = nfs_show_options,
186 .show_stats = nfs_show_stats,
187};
188#endif
189
190#ifdef CONFIG_NFS_V4
191static const int nfs_set_port_min = 0;
192static const int nfs_set_port_max = 65535;
193
194static int param_set_port(const char *val, struct kernel_param *kp)
195{
196 char *endp;
197 int num = simple_strtol(val, &endp, 0);
198 if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max)
199 return -EINVAL;
200 *((int *)kp->arg) = num;
201 return 0;
202}
203
204module_param_call(callback_tcpport, param_set_port, param_get_int,
205 &nfs_callback_set_tcpport, 0644);
206#endif
207
208#ifdef CONFIG_NFS_V4
209static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
210{
211 char *endp;
212 int num = simple_strtol(val, &endp, 0);
213 int jif = num * HZ;
214 if (endp == val || *endp || num < 0 || jif < num)
215 return -EINVAL;
216 *((int *)kp->arg) = jif;
217 return 0;
218}
219
220module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
221 &nfs_idmap_cache_timeout, 0644);
222#endif
223
224/*
225 * Register the NFS filesystems
226 */
227int __init register_nfs_fs(void)
228{
229 int ret;
230
231 ret = register_filesystem(&nfs_fs_type);
232 if (ret < 0)
233 goto error_0;
234
235#ifdef CONFIG_NFS_V4
236 ret = nfs_register_sysctl();
237 if (ret < 0)
238 goto error_1;
239 ret = register_filesystem(&nfs4_fs_type);
240 if (ret < 0)
241 goto error_2;
242#endif
243 return 0;
244
245#ifdef CONFIG_NFS_V4
246error_2:
247 nfs_unregister_sysctl();
248error_1:
249 unregister_filesystem(&nfs_fs_type);
250#endif
251error_0:
252 return ret;
253}
254
255/*
256 * Unregister the NFS filesystems
257 */
258void __exit unregister_nfs_fs(void)
259{
260#ifdef CONFIG_NFS_V4
261 unregister_filesystem(&nfs4_fs_type);
262 nfs_unregister_sysctl();
263#endif
264 unregister_filesystem(&nfs_fs_type);
265}
266
267/*
268 * Deliver file system statistics to userspace
269 */
270static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
271{
272 struct super_block *sb = dentry->d_sb;
273 struct nfs_server *server = NFS_SB(sb);
274 unsigned char blockbits;
275 unsigned long blockres;
276 struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode);
277 struct nfs_fattr fattr;
278 struct nfs_fsstat res = {
279 .fattr = &fattr,
280 };
281 int error;
282
283 lock_kernel();
284
285 error = server->rpc_ops->statfs(server, rootfh, &res);
286 buf->f_type = NFS_SUPER_MAGIC;
287 if (error < 0)
288 goto out_err;
289
290 /*
291 * Current versions of glibc do not correctly handle the
292 * case where f_frsize != f_bsize. Eventually we want to
293 * report the value of wtmult in this field.
294 */
295 buf->f_frsize = sb->s_blocksize;
296
297 /*
298 * On most *nix systems, f_blocks, f_bfree, and f_bavail
299 * are reported in units of f_frsize. Linux hasn't had
300 * an f_frsize field in its statfs struct until recently,
301 * thus historically Linux's sys_statfs reports these
302 * fields in units of f_bsize.
303 */
304 buf->f_bsize = sb->s_blocksize;
305 blockbits = sb->s_blocksize_bits;
306 blockres = (1 << blockbits) - 1;
307 buf->f_blocks = (res.tbytes + blockres) >> blockbits;
308 buf->f_bfree = (res.fbytes + blockres) >> blockbits;
309 buf->f_bavail = (res.abytes + blockres) >> blockbits;
310
311 buf->f_files = res.tfiles;
312 buf->f_ffree = res.afiles;
313
314 buf->f_namelen = server->namelen;
315 out:
316 unlock_kernel();
317 return 0;
318
319 out_err:
320 dprintk("%s: statfs error = %d\n", __FUNCTION__, -error);
321 buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1;
322 goto out;
323
324}
325
326static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
327{
328 static struct {
329 rpc_authflavor_t flavour;
330 const char *str;
331 } sec_flavours[] = {
332 { RPC_AUTH_NULL, "null" },
333 { RPC_AUTH_UNIX, "sys" },
334 { RPC_AUTH_GSS_KRB5, "krb5" },
335 { RPC_AUTH_GSS_KRB5I, "krb5i" },
336 { RPC_AUTH_GSS_KRB5P, "krb5p" },
337 { RPC_AUTH_GSS_LKEY, "lkey" },
338 { RPC_AUTH_GSS_LKEYI, "lkeyi" },
339 { RPC_AUTH_GSS_LKEYP, "lkeyp" },
340 { RPC_AUTH_GSS_SPKM, "spkm" },
341 { RPC_AUTH_GSS_SPKMI, "spkmi" },
342 { RPC_AUTH_GSS_SPKMP, "spkmp" },
343 { -1, "unknown" }
344 };
345 int i;
346
347 for (i=0; sec_flavours[i].flavour != -1; i++) {
348 if (sec_flavours[i].flavour == flavour)
349 break;
350 }
351 return sec_flavours[i].str;
352}
353
354/*
355 * Describe the mount options in force on this server representation
356 */
357static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
358{
359 static struct proc_nfs_info {
360 int flag;
361 char *str;
362 char *nostr;
363 } nfs_info[] = {
364 { NFS_MOUNT_SOFT, ",soft", ",hard" },
365 { NFS_MOUNT_INTR, ",intr", "" },
366 { NFS_MOUNT_NOCTO, ",nocto", "" },
367 { NFS_MOUNT_NOAC, ",noac", "" },
368 { NFS_MOUNT_NONLM, ",nolock", "" },
369 { NFS_MOUNT_NOACL, ",noacl", "" },
370 { 0, NULL, NULL }
371 };
372 struct proc_nfs_info *nfs_infop;
373 char buf[12];
374 char *proto;
375
376 seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
377 seq_printf(m, ",rsize=%d", nfss->rsize);
378 seq_printf(m, ",wsize=%d", nfss->wsize);
379 if (nfss->acregmin != 3*HZ || showdefaults)
380 seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ);
381 if (nfss->acregmax != 60*HZ || showdefaults)
382 seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ);
383 if (nfss->acdirmin != 30*HZ || showdefaults)
384 seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ);
385 if (nfss->acdirmax != 60*HZ || showdefaults)
386 seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ);
387 for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
388 if (nfss->flags & nfs_infop->flag)
389 seq_puts(m, nfs_infop->str);
390 else
391 seq_puts(m, nfs_infop->nostr);
392 }
393 switch (nfss->client->cl_xprt->prot) {
394 case IPPROTO_TCP:
395 proto = "tcp";
396 break;
397 case IPPROTO_UDP:
398 proto = "udp";
399 break;
400 default:
401 snprintf(buf, sizeof(buf), "%u", nfss->client->cl_xprt->prot);
402 proto = buf;
403 }
404 seq_printf(m, ",proto=%s", proto);
405 seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ);
406 seq_printf(m, ",retrans=%u", nfss->retrans_count);
407 seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
408}
409
410/*
411 * Describe the mount options on this VFS mountpoint
412 */
413static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
414{
415 struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
416
417 nfs_show_mount_options(m, nfss, 0);
418
419 seq_puts(m, ",addr=");
420 seq_escape(m, nfss->hostname, " \t\n\\");
421
422 return 0;
423}
424
425/*
426 * Present statistical information for this VFS mountpoint
427 */
428static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
429{
430 int i, cpu;
431 struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
432 struct rpc_auth *auth = nfss->client->cl_auth;
433 struct nfs_iostats totals = { };
434
435 seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS);
436
437 /*
438 * Display all mount option settings
439 */
440 seq_printf(m, "\n\topts:\t");
441 seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw");
442 seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : "");
443 seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : "");
444 seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : "");
445 nfs_show_mount_options(m, nfss, 1);
446
447 seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ);
448
449 seq_printf(m, "\n\tcaps:\t");
450 seq_printf(m, "caps=0x%x", nfss->caps);
451 seq_printf(m, ",wtmult=%d", nfss->wtmult);
452 seq_printf(m, ",dtsize=%d", nfss->dtsize);
453 seq_printf(m, ",bsize=%d", nfss->bsize);
454 seq_printf(m, ",namelen=%d", nfss->namelen);
455
456#ifdef CONFIG_NFS_V4
457 if (nfss->rpc_ops->version == 4) {
458 seq_printf(m, "\n\tnfsv4:\t");
459 seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
460 seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
461 seq_printf(m, ",acl=0x%x", nfss->acl_bitmask);
462 }
463#endif
464
465 /*
466 * Display security flavor in effect for this mount
467 */
468 seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor);
469 if (auth->au_flavor)
470 seq_printf(m, ",pseudoflavor=%d", auth->au_flavor);
471
472 /*
473 * Display superblock I/O counters
474 */
475 for_each_possible_cpu(cpu) {
476 struct nfs_iostats *stats;
477
478 preempt_disable();
479 stats = per_cpu_ptr(nfss->io_stats, cpu);
480
481 for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
482 totals.events[i] += stats->events[i];
483 for (i = 0; i < __NFSIOS_BYTESMAX; i++)
484 totals.bytes[i] += stats->bytes[i];
485
486 preempt_enable();
487 }
488
489 seq_printf(m, "\n\tevents:\t");
490 for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
491 seq_printf(m, "%lu ", totals.events[i]);
492 seq_printf(m, "\n\tbytes:\t");
493 for (i = 0; i < __NFSIOS_BYTESMAX; i++)
494 seq_printf(m, "%Lu ", totals.bytes[i]);
495 seq_printf(m, "\n");
496
497 rpc_print_iostats(m, nfss->client);
498
499 return 0;
500}
501
502/*
503 * Begin unmount by attempting to remove all automounted mountpoints we added
504 * in response to traversals
505 */
506static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
507{
508 struct nfs_server *server;
509 struct rpc_clnt *rpc;
510
511 shrink_submounts(vfsmnt, &nfs_automount_list);
512 if (!(flags & MNT_FORCE))
513 return;
514 /* -EIO all pending I/O */
515 server = NFS_SB(vfsmnt->mnt_sb);
516 rpc = server->client;
517 if (!IS_ERR(rpc))
518 rpc_killall_tasks(rpc);
519 rpc = server->client_acl;
520 if (!IS_ERR(rpc))
521 rpc_killall_tasks(rpc);
522}
523
524/*
525 * Obtain the root inode of the file system.
526 */
527static struct inode *
528nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo)
529{
530 struct nfs_server *server = NFS_SB(sb);
531 int error;
532
533 error = server->rpc_ops->getroot(server, rootfh, fsinfo);
534 if (error < 0) {
535 dprintk("nfs_get_root: getattr error = %d\n", -error);
536 return ERR_PTR(error);
537 }
538
539 server->fsid = fsinfo->fattr->fsid;
540 return nfs_fhget(sb, rootfh, fsinfo->fattr);
541}
542
543/*
544 * Do NFS version-independent mount processing, and sanity checking
545 */
546static int
547nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
548{
549 struct nfs_server *server;
550 struct inode *root_inode;
551 struct nfs_fattr fattr;
552 struct nfs_fsinfo fsinfo = {
553 .fattr = &fattr,
554 };
555 struct nfs_pathconf pathinfo = {
556 .fattr = &fattr,
557 };
558 int no_root_error = 0;
559 unsigned long max_rpc_payload;
560
561 /* We probably want something more informative here */
562 snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
563
564 server = NFS_SB(sb);
565
566 sb->s_magic = NFS_SUPER_MAGIC;
567
568 server->io_stats = nfs_alloc_iostats();
569 if (server->io_stats == NULL)
570 return -ENOMEM;
571
572 root_inode = nfs_get_root(sb, &server->fh, &fsinfo);
573 /* Did getting the root inode fail? */
574 if (IS_ERR(root_inode)) {
575 no_root_error = PTR_ERR(root_inode);
576 goto out_no_root;
577 }
578 sb->s_root = d_alloc_root(root_inode);
579 if (!sb->s_root) {
580 no_root_error = -ENOMEM;
581 goto out_no_root;
582 }
583 sb->s_root->d_op = server->rpc_ops->dentry_ops;
584
585 /* mount time stamp, in seconds */
586 server->mount_time = jiffies;
587
588 /* Get some general file system info */
589 if (server->namelen == 0 &&
590 server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
591 server->namelen = pathinfo.max_namelen;
592 /* Work out a lot of parameters */
593 if (server->rsize == 0)
594 server->rsize = nfs_block_size(fsinfo.rtpref, NULL);
595 if (server->wsize == 0)
596 server->wsize = nfs_block_size(fsinfo.wtpref, NULL);
597
598 if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax)
599 server->rsize = nfs_block_size(fsinfo.rtmax, NULL);
600 if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax)
601 server->wsize = nfs_block_size(fsinfo.wtmax, NULL);
602
603 max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
604 if (server->rsize > max_rpc_payload)
605 server->rsize = max_rpc_payload;
606 if (server->rsize > NFS_MAX_FILE_IO_SIZE)
607 server->rsize = NFS_MAX_FILE_IO_SIZE;
608 server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
609
610 if (server->wsize > max_rpc_payload)
611 server->wsize = max_rpc_payload;
612 if (server->wsize > NFS_MAX_FILE_IO_SIZE)
613 server->wsize = NFS_MAX_FILE_IO_SIZE;
614 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
615
616 if (sb->s_blocksize == 0)
617 sb->s_blocksize = nfs_block_bits(server->wsize,
618 &sb->s_blocksize_bits);
619 server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL);
620
621 server->dtsize = nfs_block_size(fsinfo.dtpref, NULL);
622 if (server->dtsize > PAGE_CACHE_SIZE)
623 server->dtsize = PAGE_CACHE_SIZE;
624 if (server->dtsize > server->rsize)
625 server->dtsize = server->rsize;
626
627 if (server->flags & NFS_MOUNT_NOAC) {
628 server->acregmin = server->acregmax = 0;
629 server->acdirmin = server->acdirmax = 0;
630 sb->s_flags |= MS_SYNCHRONOUS;
631 }
632 server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
633
634 nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
635
636 server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0;
637 server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0;
638
639 /* We're airborne Set socket buffersize */
640 rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
641 return 0;
642 /* Yargs. It didn't work out. */
643out_no_root:
644 dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error);
645 if (!IS_ERR(root_inode))
646 iput(root_inode);
647 return no_root_error;
648}
649
650/*
651 * Initialise the timeout values for a connection
652 */
653static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans)
654{
655 to->to_initval = timeo * HZ / 10;
656 to->to_retries = retrans;
657 if (!to->to_retries)
658 to->to_retries = 2;
659
660 switch (proto) {
661 case IPPROTO_TCP:
662 if (!to->to_initval)
663 to->to_initval = 60 * HZ;
664 if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
665 to->to_initval = NFS_MAX_TCP_TIMEOUT;
666 to->to_increment = to->to_initval;
667 to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
668 to->to_exponential = 0;
669 break;
670 case IPPROTO_UDP:
671 default:
672 if (!to->to_initval)
673 to->to_initval = 11 * HZ / 10;
674 if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
675 to->to_initval = NFS_MAX_UDP_TIMEOUT;
676 to->to_maxval = NFS_MAX_UDP_TIMEOUT;
677 to->to_exponential = 1;
678 break;
679 }
680}
681
682/*
683 * Create an RPC client handle.
684 */
685static struct rpc_clnt *
686nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
687{
688 struct rpc_timeout timeparms;
689 struct rpc_xprt *xprt = NULL;
690 struct rpc_clnt *clnt = NULL;
691 int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
692
693 nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
694
695 server->retrans_timeo = timeparms.to_initval;
696 server->retrans_count = timeparms.to_retries;
697
698 /* create transport and client */
699 xprt = xprt_create_proto(proto, &server->addr, &timeparms);
700 if (IS_ERR(xprt)) {
701 dprintk("%s: cannot create RPC transport. Error = %ld\n",
702 __FUNCTION__, PTR_ERR(xprt));
703 return (struct rpc_clnt *)xprt;
704 }
705 clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
706 server->rpc_ops->version, data->pseudoflavor);
707 if (IS_ERR(clnt)) {
708 dprintk("%s: cannot create RPC client. Error = %ld\n",
709 __FUNCTION__, PTR_ERR(xprt));
710 goto out_fail;
711 }
712
713 clnt->cl_intr = 1;
714 clnt->cl_softrtry = 1;
715
716 return clnt;
717
718out_fail:
719 return clnt;
720}
721
722/*
723 * Clone a server record
724 */
725static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_clone_mount *data)
726{
727 struct nfs_server *server = NFS_SB(sb);
728 struct nfs_server *parent = NFS_SB(data->sb);
729 struct inode *root_inode;
730 struct nfs_fsinfo fsinfo;
731 void *err = ERR_PTR(-ENOMEM);
732
733 sb->s_op = data->sb->s_op;
734 sb->s_blocksize = data->sb->s_blocksize;
735 sb->s_blocksize_bits = data->sb->s_blocksize_bits;
736 sb->s_maxbytes = data->sb->s_maxbytes;
737
738 server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
739 server->io_stats = nfs_alloc_iostats();
740 if (server->io_stats == NULL)
741 goto out;
742
743 server->client = rpc_clone_client(parent->client);
744 if (IS_ERR((err = server->client)))
745 goto out;
746
747 if (!IS_ERR(parent->client_sys)) {
748 server->client_sys = rpc_clone_client(parent->client_sys);
749 if (IS_ERR((err = server->client_sys)))
750 goto out;
751 }
752 if (!IS_ERR(parent->client_acl)) {
753 server->client_acl = rpc_clone_client(parent->client_acl);
754 if (IS_ERR((err = server->client_acl)))
755 goto out;
756 }
757 root_inode = nfs_fhget(sb, data->fh, data->fattr);
758 if (!root_inode)
759 goto out;
760 sb->s_root = d_alloc_root(root_inode);
761 if (!sb->s_root)
762 goto out_put_root;
763 fsinfo.fattr = data->fattr;
764 if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0)
765 nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
766 sb->s_root->d_op = server->rpc_ops->dentry_ops;
767 sb->s_flags |= MS_ACTIVE;
768 return server;
769out_put_root:
770 iput(root_inode);
771out:
772 return err;
773}
774
775/*
776 * Copy an existing superblock and attach revised data
777 */
778static int nfs_clone_generic_sb(struct nfs_clone_mount *data,
779 struct super_block *(*fill_sb)(struct nfs_server *, struct nfs_clone_mount *),
780 struct nfs_server *(*fill_server)(struct super_block *, struct nfs_clone_mount *),
781 struct vfsmount *mnt)
782{
783 struct nfs_server *server;
784 struct nfs_server *parent = NFS_SB(data->sb);
785 struct super_block *sb = ERR_PTR(-EINVAL);
786 char *hostname;
787 int error = -ENOMEM;
788 int len;
789
790 server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
791 if (server == NULL)
792 goto out_err;
793 memcpy(server, parent, sizeof(*server));
794 hostname = (data->hostname != NULL) ? data->hostname : parent->hostname;
795 len = strlen(hostname) + 1;
796 server->hostname = kmalloc(len, GFP_KERNEL);
797 if (server->hostname == NULL)
798 goto free_server;
799 memcpy(server->hostname, hostname, len);
800 error = rpciod_up();
801 if (error != 0)
802 goto free_hostname;
803
804 sb = fill_sb(server, data);
805 if (IS_ERR(sb)) {
806 error = PTR_ERR(sb);
807 goto kill_rpciod;
808 }
809
810 if (sb->s_root)
811 goto out_rpciod_down;
812
813 server = fill_server(sb, data);
814 if (IS_ERR(server)) {
815 error = PTR_ERR(server);
816 goto out_deactivate;
817 }
818 return simple_set_mnt(mnt, sb);
819out_deactivate:
820 up_write(&sb->s_umount);
821 deactivate_super(sb);
822 return error;
823out_rpciod_down:
824 rpciod_down();
825 kfree(server->hostname);
826 kfree(server);
827 return simple_set_mnt(mnt, sb);
828kill_rpciod:
829 rpciod_down();
830free_hostname:
831 kfree(server->hostname);
832free_server:
833 kfree(server);
834out_err:
835 return error;
836}
837
838/*
839 * Set up an NFS2/3 superblock
840 *
841 * The way this works is that the mount process passes a structure
842 * in the data argument which contains the server's IP address
843 * and the root file handle obtained from the server's mount
844 * daemon. We stash these away in the private superblock fields.
845 */
846static int
847nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
848{
849 struct nfs_server *server;
850 rpc_authflavor_t authflavor;
851
852 server = NFS_SB(sb);
853 sb->s_blocksize_bits = 0;
854 sb->s_blocksize = 0;
855 if (data->bsize)
856 sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
857 if (data->rsize)
858 server->rsize = nfs_block_size(data->rsize, NULL);
859 if (data->wsize)
860 server->wsize = nfs_block_size(data->wsize, NULL);
861 server->flags = data->flags & NFS_MOUNT_FLAGMASK;
862
863 server->acregmin = data->acregmin*HZ;
864 server->acregmax = data->acregmax*HZ;
865 server->acdirmin = data->acdirmin*HZ;
866 server->acdirmax = data->acdirmax*HZ;
867
868 /* Start lockd here, before we might error out */
869 if (!(server->flags & NFS_MOUNT_NONLM))
870 lockd_up();
871
872 server->namelen = data->namlen;
873 server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL);
874 if (!server->hostname)
875 return -ENOMEM;
876 strcpy(server->hostname, data->hostname);
877
878 /* Check NFS protocol revision and initialize RPC op vector
879 * and file handle pool. */
880#ifdef CONFIG_NFS_V3
881 if (server->flags & NFS_MOUNT_VER3) {
882 server->rpc_ops = &nfs_v3_clientops;
883 server->caps |= NFS_CAP_READDIRPLUS;
884 } else {
885 server->rpc_ops = &nfs_v2_clientops;
886 }
887#else
888 server->rpc_ops = &nfs_v2_clientops;
889#endif
890
891 /* Fill in pseudoflavor for mount version < 5 */
892 if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
893 data->pseudoflavor = RPC_AUTH_UNIX;
894 authflavor = data->pseudoflavor; /* save for sb_init() */
895 /* XXX maybe we want to add a server->pseudoflavor field */
896
897 /* Create RPC client handles */
898 server->client = nfs_create_client(server, data);
899 if (IS_ERR(server->client))
900 return PTR_ERR(server->client);
901 /* RFC 2623, sec 2.3.2 */
902 if (authflavor != RPC_AUTH_UNIX) {
903 struct rpc_auth *auth;
904
905 server->client_sys = rpc_clone_client(server->client);
906 if (IS_ERR(server->client_sys))
907 return PTR_ERR(server->client_sys);
908 auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys);
909 if (IS_ERR(auth))
910 return PTR_ERR(auth);
911 } else {
912 atomic_inc(&server->client->cl_count);
913 server->client_sys = server->client;
914 }
915 if (server->flags & NFS_MOUNT_VER3) {
916#ifdef CONFIG_NFS_V3_ACL
917 if (!(server->flags & NFS_MOUNT_NOACL)) {
918 server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
919 /* No errors! Assume that Sun nfsacls are supported */
920 if (!IS_ERR(server->client_acl))
921 server->caps |= NFS_CAP_ACLS;
922 }
923#else
924 server->flags &= ~NFS_MOUNT_NOACL;
925#endif /* CONFIG_NFS_V3_ACL */
926 /*
927 * The VFS shouldn't apply the umask to mode bits. We will
928 * do so ourselves when necessary.
929 */
930 sb->s_flags |= MS_POSIXACL;
931 if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
932 server->namelen = NFS3_MAXNAMLEN;
933 sb->s_time_gran = 1;
934 } else {
935 if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
936 server->namelen = NFS2_MAXNAMLEN;
937 }
938
939 sb->s_op = &nfs_sops;
940 return nfs_sb_init(sb, authflavor);
941}
942
943static int nfs_set_super(struct super_block *s, void *data)
944{
945 s->s_fs_info = data;
946 return set_anon_super(s, data);
947}
948
949static int nfs_compare_super(struct super_block *sb, void *data)
950{
951 struct nfs_server *server = data;
952 struct nfs_server *old = NFS_SB(sb);
953
954 if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr)
955 return 0;
956 if (old->addr.sin_port != server->addr.sin_port)
957 return 0;
958 return !nfs_compare_fh(&old->fh, &server->fh);
959}
960
961static int nfs_get_sb(struct file_system_type *fs_type,
962 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
963{
964 int error;
965 struct nfs_server *server = NULL;
966 struct super_block *s;
967 struct nfs_fh *root;
968 struct nfs_mount_data *data = raw_data;
969
970 error = -EINVAL;
971 if (data == NULL) {
972 dprintk("%s: missing data argument\n", __FUNCTION__);
973 goto out_err_noserver;
974 }
975 if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
976 dprintk("%s: bad mount version\n", __FUNCTION__);
977 goto out_err_noserver;
978 }
979 switch (data->version) {
980 case 1:
981 data->namlen = 0;
982 case 2:
983 data->bsize = 0;
984 case 3:
985 if (data->flags & NFS_MOUNT_VER3) {
986 dprintk("%s: mount structure version %d does not support NFSv3\n",
987 __FUNCTION__,
988 data->version);
989 goto out_err_noserver;
990 }
991 data->root.size = NFS2_FHSIZE;
992 memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
993 case 4:
994 if (data->flags & NFS_MOUNT_SECFLAVOUR) {
995 dprintk("%s: mount structure version %d does not support strong security\n",
996 __FUNCTION__,
997 data->version);
998 goto out_err_noserver;
999 }
1000 case 5:
1001 memset(data->context, 0, sizeof(data->context));
1002 }
1003#ifndef CONFIG_NFS_V3
1004 /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
1005 error = -EPROTONOSUPPORT;
1006 if (data->flags & NFS_MOUNT_VER3) {
1007 dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
1008 goto out_err_noserver;
1009 }
1010#endif /* CONFIG_NFS_V3 */
1011
1012 error = -ENOMEM;
1013 server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
1014 if (!server)
1015 goto out_err_noserver;
1016 /* Zero out the NFS state stuff */
1017 init_nfsv4_state(server);
1018 server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
1019
1020 root = &server->fh;
1021 if (data->flags & NFS_MOUNT_VER3)
1022 root->size = data->root.size;
1023 else
1024 root->size = NFS2_FHSIZE;
1025 error = -EINVAL;
1026 if (root->size > sizeof(root->data)) {
1027 dprintk("%s: invalid root filehandle\n", __FUNCTION__);
1028 goto out_err;
1029 }
1030 memcpy(root->data, data->root.data, root->size);
1031
1032 /* We now require that the mount process passes the remote address */
1033 memcpy(&server->addr, &data->addr, sizeof(server->addr));
1034 if (server->addr.sin_addr.s_addr == INADDR_ANY) {
1035 dprintk("%s: mount program didn't pass remote address!\n",
1036 __FUNCTION__);
1037 goto out_err;
1038 }
1039
1040 /* Fire up rpciod if not yet running */
1041 error = rpciod_up();
1042 if (error < 0) {
1043 dprintk("%s: couldn't start rpciod! Error = %d\n",
1044 __FUNCTION__, error);
1045 goto out_err;
1046 }
1047
1048 s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
1049 if (IS_ERR(s)) {
1050 error = PTR_ERR(s);
1051 goto out_err_rpciod;
1052 }
1053
1054 if (s->s_root)
1055 goto out_rpciod_down;
1056
1057 s->s_flags = flags;
1058
1059 error = nfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
1060 if (error) {
1061 up_write(&s->s_umount);
1062 deactivate_super(s);
1063 return error;
1064 }
1065 s->s_flags |= MS_ACTIVE;
1066 return simple_set_mnt(mnt, s);
1067
1068out_rpciod_down:
1069 rpciod_down();
1070 kfree(server);
1071 return simple_set_mnt(mnt, s);
1072
1073out_err_rpciod:
1074 rpciod_down();
1075out_err:
1076 kfree(server);
1077out_err_noserver:
1078 return error;
1079}
1080
1081static void nfs_kill_super(struct super_block *s)
1082{
1083 struct nfs_server *server = NFS_SB(s);
1084
1085 kill_anon_super(s);
1086
1087 if (!IS_ERR(server->client))
1088 rpc_shutdown_client(server->client);
1089 if (!IS_ERR(server->client_sys))
1090 rpc_shutdown_client(server->client_sys);
1091 if (!IS_ERR(server->client_acl))
1092 rpc_shutdown_client(server->client_acl);
1093
1094 if (!(server->flags & NFS_MOUNT_NONLM))
1095 lockd_down(); /* release rpc.lockd */
1096
1097 rpciod_down(); /* release rpciod */
1098
1099 nfs_free_iostats(server->io_stats);
1100 kfree(server->hostname);
1101 kfree(server);
1102 nfs_release_automount_timer();
1103}
1104
1105static struct super_block *nfs_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data)
1106{
1107 struct super_block *sb;
1108
1109 server->fsid = data->fattr->fsid;
1110 nfs_copy_fh(&server->fh, data->fh);
1111 sb = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
1112 if (!IS_ERR(sb) && sb->s_root == NULL && !(server->flags & NFS_MOUNT_NONLM))
1113 lockd_up();
1114 return sb;
1115}
1116
1117static int nfs_clone_nfs_sb(struct file_system_type *fs_type,
1118 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
1119{
1120 struct nfs_clone_mount *data = raw_data;
1121 return nfs_clone_generic_sb(data, nfs_clone_sb, nfs_clone_server, mnt);
1122}
1123
1124#ifdef CONFIG_NFS_V4
1125static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
1126 struct rpc_timeout *timeparms, int proto, rpc_authflavor_t flavor)
1127{
1128 struct nfs4_client *clp;
1129 struct rpc_xprt *xprt = NULL;
1130 struct rpc_clnt *clnt = NULL;
1131 int err = -EIO;
1132
1133 clp = nfs4_get_client(&server->addr.sin_addr);
1134 if (!clp) {
1135 dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
1136 return ERR_PTR(err);
1137 }
1138
1139 /* Now create transport and client */
1140 down_write(&clp->cl_sem);
1141 if (IS_ERR(clp->cl_rpcclient)) {
1142 xprt = xprt_create_proto(proto, &server->addr, timeparms);
1143 if (IS_ERR(xprt)) {
1144 up_write(&clp->cl_sem);
1145 err = PTR_ERR(xprt);
1146 dprintk("%s: cannot create RPC transport. Error = %d\n",
1147 __FUNCTION__, err);
1148 goto out_fail;
1149 }
1150 /* Bind to a reserved port! */
1151 xprt->resvport = 1;
1152 clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
1153 server->rpc_ops->version, flavor);
1154 if (IS_ERR(clnt)) {
1155 up_write(&clp->cl_sem);
1156 err = PTR_ERR(clnt);
1157 dprintk("%s: cannot create RPC client. Error = %d\n",
1158 __FUNCTION__, err);
1159 goto out_fail;
1160 }
1161 clnt->cl_intr = 1;
1162 clnt->cl_softrtry = 1;
1163 clp->cl_rpcclient = clnt;
1164 memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
1165 nfs_idmap_new(clp);
1166 }
1167 list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
1168 clnt = rpc_clone_client(clp->cl_rpcclient);
1169 if (!IS_ERR(clnt))
1170 server->nfs4_state = clp;
1171 up_write(&clp->cl_sem);
1172 clp = NULL;
1173
1174 if (IS_ERR(clnt)) {
1175 dprintk("%s: cannot create RPC client. Error = %d\n",
1176 __FUNCTION__, err);
1177 return clnt;
1178 }
1179
1180 if (server->nfs4_state->cl_idmap == NULL) {
1181 dprintk("%s: failed to create idmapper.\n", __FUNCTION__);
1182 return ERR_PTR(-ENOMEM);
1183 }
1184
1185 if (clnt->cl_auth->au_flavor != flavor) {
1186 struct rpc_auth *auth;
1187
1188 auth = rpcauth_create(flavor, clnt);
1189 if (IS_ERR(auth)) {
1190 dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
1191 return (struct rpc_clnt *)auth;
1192 }
1193 }
1194 return clnt;
1195
1196 out_fail:
1197 if (clp)
1198 nfs4_put_client(clp);
1199 return ERR_PTR(err);
1200}
1201
1202/*
1203 * Set up an NFS4 superblock
1204 */
1205static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent)
1206{
1207 struct nfs_server *server;
1208 struct rpc_timeout timeparms;
1209 rpc_authflavor_t authflavour;
1210 int err = -EIO;
1211
1212 sb->s_blocksize_bits = 0;
1213 sb->s_blocksize = 0;
1214 server = NFS_SB(sb);
1215 if (data->rsize != 0)
1216 server->rsize = nfs_block_size(data->rsize, NULL);
1217 if (data->wsize != 0)
1218 server->wsize = nfs_block_size(data->wsize, NULL);
1219 server->flags = data->flags & NFS_MOUNT_FLAGMASK;
1220 server->caps = NFS_CAP_ATOMIC_OPEN;
1221
1222 server->acregmin = data->acregmin*HZ;
1223 server->acregmax = data->acregmax*HZ;
1224 server->acdirmin = data->acdirmin*HZ;
1225 server->acdirmax = data->acdirmax*HZ;
1226
1227 server->rpc_ops = &nfs_v4_clientops;
1228
1229 nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
1230
1231 server->retrans_timeo = timeparms.to_initval;
1232 server->retrans_count = timeparms.to_retries;
1233
1234 /* Now create transport and client */
1235 authflavour = RPC_AUTH_UNIX;
1236 if (data->auth_flavourlen != 0) {
1237 if (data->auth_flavourlen != 1) {
1238 dprintk("%s: Invalid number of RPC auth flavours %d.\n",
1239 __FUNCTION__, data->auth_flavourlen);
1240 err = -EINVAL;
1241 goto out_fail;
1242 }
1243 if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) {
1244 err = -EFAULT;
1245 goto out_fail;
1246 }
1247 }
1248
1249 server->client = nfs4_create_client(server, &timeparms, data->proto, authflavour);
1250 if (IS_ERR(server->client)) {
1251 err = PTR_ERR(server->client);
1252 dprintk("%s: cannot create RPC client. Error = %d\n",
1253 __FUNCTION__, err);
1254 goto out_fail;
1255 }
1256
1257 sb->s_time_gran = 1;
1258
1259 sb->s_op = &nfs4_sops;
1260 err = nfs_sb_init(sb, authflavour);
1261
1262 out_fail:
1263 return err;
1264}
1265
1266static int nfs4_compare_super(struct super_block *sb, void *data)
1267{
1268 struct nfs_server *server = data;
1269 struct nfs_server *old = NFS_SB(sb);
1270
1271 if (strcmp(server->hostname, old->hostname) != 0)
1272 return 0;
1273 if (strcmp(server->mnt_path, old->mnt_path) != 0)
1274 return 0;
1275 return 1;
1276}
1277
1278static void *
1279nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
1280{
1281 void *p = NULL;
1282
1283 if (!src->len)
1284 return ERR_PTR(-EINVAL);
1285 if (src->len < maxlen)
1286 maxlen = src->len;
1287 if (dst == NULL) {
1288 p = dst = kmalloc(maxlen + 1, GFP_KERNEL);
1289 if (p == NULL)
1290 return ERR_PTR(-ENOMEM);
1291 }
1292 if (copy_from_user(dst, src->data, maxlen)) {
1293 kfree(p);
1294 return ERR_PTR(-EFAULT);
1295 }
1296 dst[maxlen] = '\0';
1297 return dst;
1298}
1299
1300static int nfs4_get_sb(struct file_system_type *fs_type,
1301 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
1302{
1303 int error;
1304 struct nfs_server *server;
1305 struct super_block *s;
1306 struct nfs4_mount_data *data = raw_data;
1307 void *p;
1308
1309 if (data == NULL) {
1310 dprintk("%s: missing data argument\n", __FUNCTION__);
1311 return -EINVAL;
1312 }
1313 if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) {
1314 dprintk("%s: bad mount version\n", __FUNCTION__);
1315 return -EINVAL;
1316 }
1317
1318 server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
1319 if (!server)
1320 return -ENOMEM;
1321 /* Zero out the NFS state stuff */
1322 init_nfsv4_state(server);
1323 server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
1324
1325 p = nfs_copy_user_string(NULL, &data->hostname, 256);
1326 if (IS_ERR(p))
1327 goto out_err;
1328 server->hostname = p;
1329
1330 p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
1331 if (IS_ERR(p))
1332 goto out_err;
1333 server->mnt_path = p;
1334
1335 p = nfs_copy_user_string(server->ip_addr, &data->client_addr,
1336 sizeof(server->ip_addr) - 1);
1337 if (IS_ERR(p))
1338 goto out_err;
1339
1340 /* We now require that the mount process passes the remote address */
1341 if (data->host_addrlen != sizeof(server->addr)) {
1342 error = -EINVAL;
1343 goto out_free;
1344 }
1345 if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) {
1346 error = -EFAULT;
1347 goto out_free;
1348 }
1349 if (server->addr.sin_family != AF_INET ||
1350 server->addr.sin_addr.s_addr == INADDR_ANY) {
1351 dprintk("%s: mount program didn't pass remote IP address!\n",
1352 __FUNCTION__);
1353 error = -EINVAL;
1354 goto out_free;
1355 }
1356
1357 /* Fire up rpciod if not yet running */
1358 error = rpciod_up();
1359 if (error < 0) {
1360 dprintk("%s: couldn't start rpciod! Error = %d\n",
1361 __FUNCTION__, error);
1362 goto out_free;
1363 }
1364
1365 s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
1366
1367 if (IS_ERR(s)) {
1368 error = PTR_ERR(s);
1369 goto out_free;
1370 }
1371
1372 if (s->s_root) {
1373 kfree(server->mnt_path);
1374 kfree(server->hostname);
1375 kfree(server);
1376 return simple_set_mnt(mnt, s);
1377 }
1378
1379 s->s_flags = flags;
1380
1381 error = nfs4_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
1382 if (error) {
1383 up_write(&s->s_umount);
1384 deactivate_super(s);
1385 return error;
1386 }
1387 s->s_flags |= MS_ACTIVE;
1388 return simple_set_mnt(mnt, s);
1389out_err:
1390 error = PTR_ERR(p);
1391out_free:
1392 kfree(server->mnt_path);
1393 kfree(server->hostname);
1394 kfree(server);
1395 return error;
1396}
1397
1398static void nfs4_kill_super(struct super_block *sb)
1399{
1400 struct nfs_server *server = NFS_SB(sb);
1401
1402 nfs_return_all_delegations(sb);
1403 kill_anon_super(sb);
1404
1405 nfs4_renewd_prepare_shutdown(server);
1406
1407 if (server->client != NULL && !IS_ERR(server->client))
1408 rpc_shutdown_client(server->client);
1409
1410 destroy_nfsv4_state(server);
1411
1412 rpciod_down();
1413
1414 nfs_free_iostats(server->io_stats);
1415 kfree(server->hostname);
1416 kfree(server);
1417 nfs_release_automount_timer();
1418}
1419
1420/*
1421 * Constructs the SERVER-side path
1422 */
1423static inline char *nfs4_dup_path(const struct dentry *dentry)
1424{
1425 char *page = (char *) __get_free_page(GFP_USER);
1426 char *path;
1427
1428 path = nfs4_path(dentry, page, PAGE_SIZE);
1429 if (!IS_ERR(path)) {
1430 int len = PAGE_SIZE + page - path;
1431 char *tmp = path;
1432
1433 path = kmalloc(len, GFP_KERNEL);
1434 if (path)
1435 memcpy(path, tmp, len);
1436 else
1437 path = ERR_PTR(-ENOMEM);
1438 }
1439 free_page((unsigned long)page);
1440 return path;
1441}
1442
1443static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data)
1444{
1445 const struct dentry *dentry = data->dentry;
1446 struct nfs4_client *clp = server->nfs4_state;
1447 struct super_block *sb;
1448
1449 server->fsid = data->fattr->fsid;
1450 nfs_copy_fh(&server->fh, data->fh);
1451 server->mnt_path = nfs4_dup_path(dentry);
1452 if (IS_ERR(server->mnt_path)) {
1453 sb = (struct super_block *)server->mnt_path;
1454 goto err;
1455 }
1456 sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server);
1457 if (IS_ERR(sb) || sb->s_root)
1458 goto free_path;
1459 nfs4_server_capabilities(server, &server->fh);
1460
1461 down_write(&clp->cl_sem);
1462 atomic_inc(&clp->cl_count);
1463 list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
1464 up_write(&clp->cl_sem);
1465 return sb;
1466free_path:
1467 kfree(server->mnt_path);
1468err:
1469 server->mnt_path = NULL;
1470 return sb;
1471}
1472
1473static int nfs_clone_nfs4_sb(struct file_system_type *fs_type,
1474 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
1475{
1476 struct nfs_clone_mount *data = raw_data;
1477 return nfs_clone_generic_sb(data, nfs4_clone_sb, nfs_clone_server, mnt);
1478}
1479
1480static struct super_block *nfs4_referral_sb(struct nfs_server *server, struct nfs_clone_mount *data)
1481{
1482 struct super_block *sb = ERR_PTR(-ENOMEM);
1483 int len;
1484
1485 len = strlen(data->mnt_path) + 1;
1486 server->mnt_path = kmalloc(len, GFP_KERNEL);
1487 if (server->mnt_path == NULL)
1488 goto err;
1489 memcpy(server->mnt_path, data->mnt_path, len);
1490 memcpy(&server->addr, data->addr, sizeof(struct sockaddr_in));
1491
1492 sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server);
1493 if (IS_ERR(sb) || sb->s_root)
1494 goto free_path;
1495 return sb;
1496free_path:
1497 kfree(server->mnt_path);
1498err:
1499 server->mnt_path = NULL;
1500 return sb;
1501}
1502
1503static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nfs_clone_mount *data)
1504{
1505 struct nfs_server *server = NFS_SB(sb);
1506 struct rpc_timeout timeparms;
1507 int proto, timeo, retrans;
1508 void *err;
1509
1510 proto = IPPROTO_TCP;
1511 /* Since we are following a referral and there may be alternatives,
1512 set the timeouts and retries to low values */
1513 timeo = 2;
1514 retrans = 1;
1515 nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
1516
1517 server->client = nfs4_create_client(server, &timeparms, proto, data->authflavor);
1518 if (IS_ERR((err = server->client)))
1519 goto out_err;
1520
1521 sb->s_time_gran = 1;
1522 sb->s_op = &nfs4_sops;
1523 err = ERR_PTR(nfs_sb_init(sb, data->authflavor));
1524 if (!IS_ERR(err))
1525 return server;
1526out_err:
1527 return (struct nfs_server *)err;
1528}
1529
1530static int nfs_referral_nfs4_sb(struct file_system_type *fs_type,
1531 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
1532{
1533 struct nfs_clone_mount *data = raw_data;
1534 return nfs_clone_generic_sb(data, nfs4_referral_sb, nfs4_referral_server, mnt);
1535}
1536
1537#endif
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 18dc95b0b646..600bbe630abd 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -52,7 +52,7 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
52{ 52{
53 struct inode *inode = dentry->d_inode; 53 struct inode *inode = dentry->d_inode;
54 struct page *page; 54 struct page *page;
55 void *err = ERR_PTR(nfs_revalidate_inode(NFS_SERVER(inode), inode)); 55 void *err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
56 if (err) 56 if (err)
57 goto read_failed; 57 goto read_failed;
58 page = read_cache_page(&inode->i_data, 0, 58 page = read_cache_page(&inode->i_data, 0,
@@ -75,22 +75,13 @@ read_failed:
75 return NULL; 75 return NULL;
76} 76}
77 77
78static void nfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
79{
80 if (cookie) {
81 struct page *page = cookie;
82 kunmap(page);
83 page_cache_release(page);
84 }
85}
86
87/* 78/*
88 * symlinks can't do much... 79 * symlinks can't do much...
89 */ 80 */
90struct inode_operations nfs_symlink_inode_operations = { 81struct inode_operations nfs_symlink_inode_operations = {
91 .readlink = generic_readlink, 82 .readlink = generic_readlink,
92 .follow_link = nfs_follow_link, 83 .follow_link = nfs_follow_link,
93 .put_link = nfs_put_link, 84 .put_link = page_put_link,
94 .getattr = nfs_getattr, 85 .getattr = nfs_getattr,
95 .setattr = nfs_setattr, 86 .setattr = nfs_setattr,
96}; 87};
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index 4c486eb867ca..2fe3403c2409 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -3,7 +3,6 @@
3 * 3 *
4 * Sysctl interface to NFS parameters 4 * Sysctl interface to NFS parameters
5 */ 5 */
6#include <linux/config.h>
7#include <linux/types.h> 6#include <linux/types.h>
8#include <linux/linkage.h> 7#include <linux/linkage.h>
9#include <linux/ctype.h> 8#include <linux/ctype.h>
@@ -12,6 +11,7 @@
12#include <linux/module.h> 11#include <linux/module.h>
13#include <linux/nfs4.h> 12#include <linux/nfs4.h>
14#include <linux/nfs_idmap.h> 13#include <linux/nfs_idmap.h>
14#include <linux/nfs_fs.h>
15 15
16#include "callback.h" 16#include "callback.h"
17 17
@@ -46,6 +46,15 @@ static ctl_table nfs_cb_sysctls[] = {
46 .strategy = &sysctl_jiffies, 46 .strategy = &sysctl_jiffies,
47 }, 47 },
48#endif 48#endif
49 {
50 .ctl_name = CTL_UNNUMBERED,
51 .procname = "nfs_mountpoint_timeout",
52 .data = &nfs_mountpoint_expiry_timeout,
53 .maxlen = sizeof(nfs_mountpoint_expiry_timeout),
54 .mode = 0644,
55 .proc_handler = &proc_dointvec_jiffies,
56 .strategy = &sysctl_jiffies,
57 },
49 { .ctl_name = 0 } 58 { .ctl_name = 0 }
50}; 59};
51 60
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 4cfada2cc09f..bca5734ca9fb 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -46,7 +46,6 @@
46 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de> 46 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
47 */ 47 */
48 48
49#include <linux/config.h>
50#include <linux/types.h> 49#include <linux/types.h>
51#include <linux/slab.h> 50#include <linux/slab.h>
52#include <linux/mm.h> 51#include <linux/mm.h>
@@ -98,11 +97,10 @@ struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
98 if (p) { 97 if (p) {
99 memset(p, 0, sizeof(*p)); 98 memset(p, 0, sizeof(*p));
100 INIT_LIST_HEAD(&p->pages); 99 INIT_LIST_HEAD(&p->pages);
101 if (pagecount < NFS_PAGEVEC_SIZE) 100 if (pagecount <= ARRAY_SIZE(p->page_array))
102 p->pagevec = &p->page_array[0]; 101 p->pagevec = p->page_array;
103 else { 102 else {
104 size_t size = ++pagecount * sizeof(struct page *); 103 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
105 p->pagevec = kzalloc(size, GFP_NOFS);
106 if (!p->pagevec) { 104 if (!p->pagevec) {
107 mempool_free(p, nfs_commit_mempool); 105 mempool_free(p, nfs_commit_mempool);
108 p = NULL; 106 p = NULL;
@@ -126,14 +124,11 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
126 if (p) { 124 if (p) {
127 memset(p, 0, sizeof(*p)); 125 memset(p, 0, sizeof(*p));
128 INIT_LIST_HEAD(&p->pages); 126 INIT_LIST_HEAD(&p->pages);
129 if (pagecount < NFS_PAGEVEC_SIZE) 127 if (pagecount <= ARRAY_SIZE(p->page_array))
130 p->pagevec = &p->page_array[0]; 128 p->pagevec = p->page_array;
131 else { 129 else {
132 size_t size = ++pagecount * sizeof(struct page *); 130 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
133 p->pagevec = kmalloc(size, GFP_NOFS); 131 if (!p->pagevec) {
134 if (p->pagevec) {
135 memset(p->pagevec, 0, size);
136 } else {
137 mempool_free(p, nfs_wdata_mempool); 132 mempool_free(p, nfs_wdata_mempool);
138 p = NULL; 133 p = NULL;
139 } 134 }
@@ -501,7 +496,7 @@ nfs_mark_request_dirty(struct nfs_page *req)
501 nfs_list_add_request(req, &nfsi->dirty); 496 nfs_list_add_request(req, &nfsi->dirty);
502 nfsi->ndirty++; 497 nfsi->ndirty++;
503 spin_unlock(&nfsi->req_lock); 498 spin_unlock(&nfsi->req_lock);
504 inc_page_state(nr_dirty); 499 inc_zone_page_state(req->wb_page, NR_FILE_DIRTY);
505 mark_inode_dirty(inode); 500 mark_inode_dirty(inode);
506} 501}
507 502
@@ -529,7 +524,7 @@ nfs_mark_request_commit(struct nfs_page *req)
529 nfs_list_add_request(req, &nfsi->commit); 524 nfs_list_add_request(req, &nfsi->commit);
530 nfsi->ncommit++; 525 nfsi->ncommit++;
531 spin_unlock(&nfsi->req_lock); 526 spin_unlock(&nfsi->req_lock);
532 inc_page_state(nr_unstable); 527 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
533 mark_inode_dirty(inode); 528 mark_inode_dirty(inode);
534} 529}
535#endif 530#endif
@@ -583,6 +578,17 @@ static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, un
583 return ret; 578 return ret;
584} 579}
585 580
581static void nfs_cancel_requests(struct list_head *head)
582{
583 struct nfs_page *req;
584 while(!list_empty(head)) {
585 req = nfs_list_entry(head->next);
586 nfs_list_remove_request(req);
587 nfs_inode_remove_request(req);
588 nfs_clear_page_writeback(req);
589 }
590}
591
586/* 592/*
587 * nfs_scan_dirty - Scan an inode for dirty requests 593 * nfs_scan_dirty - Scan an inode for dirty requests
588 * @inode: NFS inode to scan 594 * @inode: NFS inode to scan
@@ -602,7 +608,6 @@ nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_sta
602 if (nfsi->ndirty != 0) { 608 if (nfsi->ndirty != 0) {
603 res = nfs_scan_lock_dirty(nfsi, dst, idx_start, npages); 609 res = nfs_scan_lock_dirty(nfsi, dst, idx_start, npages);
604 nfsi->ndirty -= res; 610 nfsi->ndirty -= res;
605 sub_page_state(nr_dirty,res);
606 if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)) 611 if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty))
607 printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); 612 printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
608 } 613 }
@@ -627,7 +632,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_st
627 int res = 0; 632 int res = 0;
628 633
629 if (nfsi->ncommit != 0) { 634 if (nfsi->ncommit != 0) {
630 res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages); 635 res = nfs_scan_list(nfsi, &nfsi->commit, dst, idx_start, npages);
631 nfsi->ncommit -= res; 636 nfsi->ncommit -= res;
632 if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) 637 if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
633 printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); 638 printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
@@ -1387,7 +1392,6 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1387{ 1392{
1388 struct nfs_write_data *data = calldata; 1393 struct nfs_write_data *data = calldata;
1389 struct nfs_page *req; 1394 struct nfs_page *req;
1390 int res = 0;
1391 1395
1392 dprintk("NFS: %4d nfs_commit_done (status %d)\n", 1396 dprintk("NFS: %4d nfs_commit_done (status %d)\n",
1393 task->tk_pid, task->tk_status); 1397 task->tk_pid, task->tk_status);
@@ -1399,6 +1403,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1399 while (!list_empty(&data->pages)) { 1403 while (!list_empty(&data->pages)) {
1400 req = nfs_list_entry(data->pages.next); 1404 req = nfs_list_entry(data->pages.next);
1401 nfs_list_remove_request(req); 1405 nfs_list_remove_request(req);
1406 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1402 1407
1403 dprintk("NFS: commit (%s/%Ld %d@%Ld)", 1408 dprintk("NFS: commit (%s/%Ld %d@%Ld)",
1404 req->wb_context->dentry->d_inode->i_sb->s_id, 1409 req->wb_context->dentry->d_inode->i_sb->s_id,
@@ -1425,9 +1430,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1425 nfs_mark_request_dirty(req); 1430 nfs_mark_request_dirty(req);
1426 next: 1431 next:
1427 nfs_clear_page_writeback(req); 1432 nfs_clear_page_writeback(req);
1428 res++;
1429 } 1433 }
1430 sub_page_state(nr_unstable,res);
1431} 1434}
1432 1435
1433static const struct rpc_call_ops nfs_commit_ops = { 1436static const struct rpc_call_ops nfs_commit_ops = {
@@ -1495,15 +1498,25 @@ int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
1495 pages = nfs_scan_dirty(inode, &head, idx_start, npages); 1498 pages = nfs_scan_dirty(inode, &head, idx_start, npages);
1496 if (pages != 0) { 1499 if (pages != 0) {
1497 spin_unlock(&nfsi->req_lock); 1500 spin_unlock(&nfsi->req_lock);
1498 ret = nfs_flush_list(inode, &head, pages, how); 1501 if (how & FLUSH_INVALIDATE)
1502 nfs_cancel_requests(&head);
1503 else
1504 ret = nfs_flush_list(inode, &head, pages, how);
1499 spin_lock(&nfsi->req_lock); 1505 spin_lock(&nfsi->req_lock);
1500 continue; 1506 continue;
1501 } 1507 }
1502 if (nocommit) 1508 if (nocommit)
1503 break; 1509 break;
1504 pages = nfs_scan_commit(inode, &head, 0, 0); 1510 pages = nfs_scan_commit(inode, &head, idx_start, npages);
1505 if (pages == 0) 1511 if (pages == 0)
1506 break; 1512 break;
1513 if (how & FLUSH_INVALIDATE) {
1514 spin_unlock(&nfsi->req_lock);
1515 nfs_cancel_requests(&head);
1516 spin_lock(&nfsi->req_lock);
1517 continue;
1518 }
1519 pages += nfs_scan_commit(inode, &head, 0, 0);
1507 spin_unlock(&nfsi->req_lock); 1520 spin_unlock(&nfsi->req_lock);
1508 ret = nfs_commit_list(inode, &head, how); 1521 ret = nfs_commit_list(inode, &head, how);
1509 spin_lock(&nfsi->req_lock); 1522 spin_lock(&nfsi->req_lock);
@@ -1512,7 +1525,7 @@ int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
1512 return ret; 1525 return ret;
1513} 1526}
1514 1527
1515int nfs_init_writepagecache(void) 1528int __init nfs_init_writepagecache(void)
1516{ 1529{
1517 nfs_wdata_cachep = kmem_cache_create("nfs_write_data", 1530 nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
1518 sizeof(struct nfs_write_data), 1531 sizeof(struct nfs_write_data),
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index a5a18d4aca40..c043136a82ca 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -4,7 +4,6 @@
4 * This should eventually move to userland. 4 * This should eventually move to userland.
5 * 5 *
6 */ 6 */
7#include <linux/config.h>
8#include <linux/types.h> 7#include <linux/types.h>
9#include <linux/file.h> 8#include <linux/file.h>
10#include <linux/fs.h> 9#include <linux/fs.h>
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 3eec30000f3f..01bc68c628ad 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -126,7 +126,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
126 if (*ep) 126 if (*ep)
127 goto out; 127 goto out;
128 dprintk("found fsidtype %d\n", fsidtype); 128 dprintk("found fsidtype %d\n", fsidtype);
129 if (fsidtype > 2) 129 if (key_len(fsidtype)==0) /* invalid type */
130 goto out; 130 goto out;
131 if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) 131 if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
132 goto out; 132 goto out;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index dbaf3f93f328..54b37b1d2e3a 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -33,7 +33,6 @@
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */ 34 */
35 35
36#include <linux/config.h>
37#include <linux/module.h> 36#include <linux/module.h>
38#include <linux/list.h> 37#include <linux/list.h>
39#include <linux/inet.h> 38#include <linux/inet.h>
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 4b6aa60dfceb..bea6b9478114 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -34,7 +34,6 @@
34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 */ 35 */
36 36
37#include <linux/config.h>
38#include <linux/module.h> 37#include <linux/module.h>
39#include <linux/init.h> 38#include <linux/init.h>
40 39
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 96c7578cbe1e..9daa0b9feb8d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -123,7 +123,7 @@ static void release_stateid(struct nfs4_stateid *stp, int flags);
123 */ 123 */
124 124
125/* recall_lock protects the del_recall_lru */ 125/* recall_lock protects the del_recall_lru */
126static spinlock_t recall_lock = SPIN_LOCK_UNLOCKED; 126static DEFINE_SPINLOCK(recall_lock);
127static struct list_head del_recall_lru; 127static struct list_head del_recall_lru;
128 128
129static void 129static void
@@ -529,8 +529,7 @@ move_to_confirmed(struct nfs4_client *clp)
529 529
530 dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp); 530 dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
531 list_del_init(&clp->cl_strhash); 531 list_del_init(&clp->cl_strhash);
532 list_del_init(&clp->cl_idhash); 532 list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
533 list_add(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
534 strhashval = clientstr_hashval(clp->cl_recdir); 533 strhashval = clientstr_hashval(clp->cl_recdir);
535 list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]); 534 list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
536 renew_client(clp); 535 renew_client(clp);
@@ -1238,8 +1237,15 @@ find_file(struct inode *ino)
1238 return NULL; 1237 return NULL;
1239} 1238}
1240 1239
1241#define TEST_ACCESS(x) ((x > 0 || x < 4)?1:0) 1240static int access_valid(u32 x)
1242#define TEST_DENY(x) ((x >= 0 || x < 5)?1:0) 1241{
1242 return (x > 0 && x < 4);
1243}
1244
1245static int deny_valid(u32 x)
1246{
1247 return (x >= 0 && x < 5);
1248}
1243 1249
1244static void 1250static void
1245set_access(unsigned int *access, unsigned long bmap) { 1251set_access(unsigned int *access, unsigned long bmap) {
@@ -1746,7 +1752,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
1746 int status; 1752 int status;
1747 1753
1748 status = nfserr_inval; 1754 status = nfserr_inval;
1749 if (!TEST_ACCESS(open->op_share_access) || !TEST_DENY(open->op_share_deny)) 1755 if (!access_valid(open->op_share_access)
1756 || !deny_valid(open->op_share_deny))
1750 goto out; 1757 goto out;
1751 /* 1758 /*
1752 * Lookup file; if found, lookup stateid and check open request, 1759 * Lookup file; if found, lookup stateid and check open request,
@@ -1783,10 +1790,10 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
1783 } else { 1790 } else {
1784 /* Stateid was not found, this is a new OPEN */ 1791 /* Stateid was not found, this is a new OPEN */
1785 int flags = 0; 1792 int flags = 0;
1793 if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
1794 flags |= MAY_READ;
1786 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) 1795 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
1787 flags = MAY_WRITE; 1796 flags |= MAY_WRITE;
1788 else
1789 flags = MAY_READ;
1790 status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags); 1797 status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags);
1791 if (status) 1798 if (status)
1792 goto out; 1799 goto out;
@@ -2071,16 +2078,12 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl
2071 if (!stateid->si_fileid) { /* delegation stateid */ 2078 if (!stateid->si_fileid) { /* delegation stateid */
2072 if(!(dp = find_delegation_stateid(ino, stateid))) { 2079 if(!(dp = find_delegation_stateid(ino, stateid))) {
2073 dprintk("NFSD: delegation stateid not found\n"); 2080 dprintk("NFSD: delegation stateid not found\n");
2074 if (nfs4_in_grace())
2075 status = nfserr_grace;
2076 goto out; 2081 goto out;
2077 } 2082 }
2078 stidp = &dp->dl_stateid; 2083 stidp = &dp->dl_stateid;
2079 } else { /* open or lock stateid */ 2084 } else { /* open or lock stateid */
2080 if (!(stp = find_stateid(stateid, flags))) { 2085 if (!(stp = find_stateid(stateid, flags))) {
2081 dprintk("NFSD: open or lock stateid not found\n"); 2086 dprintk("NFSD: open or lock stateid not found\n");
2082 if (nfs4_in_grace())
2083 status = nfserr_grace;
2084 goto out; 2087 goto out;
2085 } 2088 }
2086 if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) 2089 if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp))
@@ -2253,8 +2256,9 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
2253 (int)current_fh->fh_dentry->d_name.len, 2256 (int)current_fh->fh_dentry->d_name.len,
2254 current_fh->fh_dentry->d_name.name); 2257 current_fh->fh_dentry->d_name.name);
2255 2258
2256 if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0))) 2259 status = fh_verify(rqstp, current_fh, S_IFREG, 0);
2257 goto out; 2260 if (status)
2261 return status;
2258 2262
2259 nfs4_lock_state(); 2263 nfs4_lock_state();
2260 2264
@@ -2321,7 +2325,8 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct n
2321 (int)current_fh->fh_dentry->d_name.len, 2325 (int)current_fh->fh_dentry->d_name.len,
2322 current_fh->fh_dentry->d_name.name); 2326 current_fh->fh_dentry->d_name.name);
2323 2327
2324 if (!TEST_ACCESS(od->od_share_access) || !TEST_DENY(od->od_share_deny)) 2328 if (!access_valid(od->od_share_access)
2329 || !deny_valid(od->od_share_deny))
2325 return nfserr_inval; 2330 return nfserr_inval;
2326 2331
2327 nfs4_lock_state(); 2332 nfs4_lock_state();
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index de3998f15f10..5446a0861d1d 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1310,7 +1310,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1310 if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL)) || 1310 if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL)) ||
1311 (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | 1311 (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
1312 FATTR4_WORD1_SPACE_TOTAL))) { 1312 FATTR4_WORD1_SPACE_TOTAL))) {
1313 status = vfs_statfs(dentry->d_inode->i_sb, &statfs); 1313 status = vfs_statfs(dentry, &statfs);
1314 if (status) 1314 if (status)
1315 goto out_nfserr; 1315 goto out_nfserr;
1316 } 1316 }
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index d852ebb538e3..fdf7cf3dfadc 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -103,8 +103,7 @@ nfsd_cache_shutdown(void)
103static void 103static void
104lru_put_end(struct svc_cacherep *rp) 104lru_put_end(struct svc_cacherep *rp)
105{ 105{
106 list_del(&rp->c_lru); 106 list_move_tail(&rp->c_lru, &lru_head);
107 list_add_tail(&rp->c_lru, &lru_head);
108} 107}
109 108
110/* 109/*
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3ef017b3b5bd..7046ac9cf97f 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -6,7 +6,6 @@
6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
7 */ 7 */
8 8
9#include <linux/config.h>
10#include <linux/module.h> 9#include <linux/module.h>
11 10
12#include <linux/linkage.h> 11#include <linux/linkage.h>
@@ -494,10 +493,10 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
494 return simple_fill_super(sb, 0x6e667364, nfsd_files); 493 return simple_fill_super(sb, 0x6e667364, nfsd_files);
495} 494}
496 495
497static struct super_block *nfsd_get_sb(struct file_system_type *fs_type, 496static int nfsd_get_sb(struct file_system_type *fs_type,
498 int flags, const char *dev_name, void *data) 497 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
499{ 498{
500 return get_sb_single(fs_type, flags, data, nfsd_fill_super); 499 return get_sb_single(fs_type, flags, data, nfsd_fill_super, mnt);
501} 500}
502 501
503static struct file_system_type nfsd_fs_type = { 502static struct file_system_type nfsd_fs_type = {
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 3f2ec2e6d06c..ecc439d2565f 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -187,13 +187,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
187 goto out; 187 goto out;
188 } 188 }
189 189
190 /* Set user creds for this exportpoint */
191 error = nfsd_setuser(rqstp, exp);
192 if (error) {
193 error = nfserrno(error);
194 goto out;
195 }
196
197 /* 190 /*
198 * Look up the dentry using the NFS file handle. 191 * Look up the dentry using the NFS file handle.
199 */ 192 */
@@ -251,6 +244,14 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
251 } 244 }
252 cache_get(&exp->h); 245 cache_get(&exp->h);
253 246
247 /* Set user creds for this exportpoint; necessary even in the "just
248 * checking" case because this may be a filehandle that was created by
249 * fh_compose, and that is about to be used in another nfsv4 compound
250 * operation */
251 error = nfserrno(nfsd_setuser(rqstp, exp));
252 if (error)
253 goto out;
254
254 error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type); 255 error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type);
255 if (error) 256 if (error)
256 goto out; 257 goto out;
@@ -312,8 +313,8 @@ int
312fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, struct svc_fh *ref_fh) 313fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, struct svc_fh *ref_fh)
313{ 314{
314 /* ref_fh is a reference file handle. 315 /* ref_fh is a reference file handle.
315 * if it is non-null, then we should compose a filehandle which is 316 * if it is non-null and for the same filesystem, then we should compose
316 * of the same version, where possible. 317 * a filehandle which is of the same version, where possible.
317 * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca 318 * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca
318 * Then create a 32byte filehandle using nfs_fhbase_old 319 * Then create a 32byte filehandle using nfs_fhbase_old
319 * 320 *
@@ -332,7 +333,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, st
332 parent->d_name.name, dentry->d_name.name, 333 parent->d_name.name, dentry->d_name.name,
333 (inode ? inode->i_ino : 0)); 334 (inode ? inode->i_ino : 0));
334 335
335 if (ref_fh) { 336 if (ref_fh && ref_fh->fh_export == exp) {
336 ref_fh_version = ref_fh->fh_handle.fh_version; 337 ref_fh_version = ref_fh->fh_handle.fh_version;
337 if (ref_fh_version == 0xca) 338 if (ref_fh_version == 0xca)
338 ref_fh_fsid_type = 0; 339 ref_fh_fsid_type = 0;
@@ -461,7 +462,7 @@ fh_update(struct svc_fh *fhp)
461 } else { 462 } else {
462 int size; 463 int size;
463 if (fhp->fh_handle.fh_fileid_type != 0) 464 if (fhp->fh_handle.fh_fileid_type != 0)
464 goto out_uptodate; 465 goto out;
465 datap = fhp->fh_handle.fh_auth+ 466 datap = fhp->fh_handle.fh_auth+
466 fhp->fh_handle.fh_size/4 -1; 467 fhp->fh_handle.fh_size/4 -1;
467 size = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4; 468 size = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4;
@@ -481,10 +482,6 @@ out_negative:
481 printk(KERN_ERR "fh_update: %s/%s still negative!\n", 482 printk(KERN_ERR "fh_update: %s/%s still negative!\n",
482 dentry->d_parent->d_name.name, dentry->d_name.name); 483 dentry->d_parent->d_name.name, dentry->d_name.name);
483 goto out; 484 goto out;
484out_uptodate:
485 printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n",
486 dentry->d_parent->d_name.name, dentry->d_name.name);
487 goto out;
488} 485}
489 486
490/* 487/*
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 3790727e5dfd..ec1decf29bab 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -8,7 +8,6 @@
8 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> 8 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
9 */ 9 */
10 10
11#include <linux/config.h>
12#include <linux/module.h> 11#include <linux/module.h>
13 12
14#include <linux/time.h> 13#include <linux/time.h>
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 1d65f13f458c..c9e3b5a8fe07 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -16,7 +16,6 @@
16 * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp> 16 * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp>
17 */ 17 */
18 18
19#include <linux/config.h>
20#include <linux/string.h> 19#include <linux/string.h>
21#include <linux/time.h> 20#include <linux/time.h>
22#include <linux/errno.h> 21#include <linux/errno.h>
@@ -673,7 +672,10 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
673 goto out_nfserr; 672 goto out_nfserr;
674 673
675 if (access & MAY_WRITE) { 674 if (access & MAY_WRITE) {
676 flags = O_WRONLY|O_LARGEFILE; 675 if (access & MAY_READ)
676 flags = O_RDWR|O_LARGEFILE;
677 else
678 flags = O_WRONLY|O_LARGEFILE;
677 679
678 DQUOT_INIT(inode); 680 DQUOT_INIT(inode);
679 } 681 }
@@ -834,7 +836,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
834 if (ra && ra->p_set) 836 if (ra && ra->p_set)
835 file->f_ra = ra->p_ra; 837 file->f_ra = ra->p_ra;
836 838
837 if (file->f_op->sendfile) { 839 if (file->f_op->sendfile && rqstp->rq_sendfile_ok) {
838 svc_pushback_unused_pages(rqstp); 840 svc_pushback_unused_pages(rqstp);
839 err = file->f_op->sendfile(file, &offset, *count, 841 err = file->f_op->sendfile(file, &offset, *count,
840 nfsd_read_actor, rqstp); 842 nfsd_read_actor, rqstp);
@@ -1517,14 +1519,15 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1517 err = nfserrno(err); 1519 err = nfserrno(err);
1518 } 1520 }
1519 1521
1520 fh_unlock(ffhp);
1521 dput(dnew); 1522 dput(dnew);
1523out_unlock:
1524 fh_unlock(ffhp);
1522out: 1525out:
1523 return err; 1526 return err;
1524 1527
1525out_nfserr: 1528out_nfserr:
1526 err = nfserrno(err); 1529 err = nfserrno(err);
1527 goto out; 1530 goto out_unlock;
1528} 1531}
1529 1532
1530/* 1533/*
@@ -1553,7 +1556,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1553 tdir = tdentry->d_inode; 1556 tdir = tdentry->d_inode;
1554 1557
1555 err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev; 1558 err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
1556 if (fdir->i_sb != tdir->i_sb) 1559 if (ffhp->fh_export != tfhp->fh_export)
1557 goto out; 1560 goto out;
1558 1561
1559 err = nfserr_perm; 1562 err = nfserr_perm;
@@ -1737,7 +1740,7 @@ int
1737nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat) 1740nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat)
1738{ 1741{
1739 int err = fh_verify(rqstp, fhp, 0, MAY_NOP); 1742 int err = fh_verify(rqstp, fhp, 0, MAY_NOP);
1740 if (!err && vfs_statfs(fhp->fh_dentry->d_inode->i_sb,stat)) 1743 if (!err && vfs_statfs(fhp->fh_dentry,stat))
1741 err = nfserr_io; 1744 err = nfserr_io;
1742 return err; 1745 return err;
1743} 1746}
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index a912debcd20b..9de6b495f112 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -10,7 +10,6 @@
10 10
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/string.h> 12#include <linux/string.h>
13#include <linux/config.h>
14#include <linux/nls.h> 13#include <linux/nls.h>
15#include <linux/kernel.h> 14#include <linux/kernel.h>
16#include <linux/errno.h> 15#include <linux/errno.h>
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 580412d330cb..bc579bfdfbd8 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1544,7 +1544,7 @@ err_out:
1544/** 1544/**
1545 * ntfs_aops - general address space operations for inodes and attributes 1545 * ntfs_aops - general address space operations for inodes and attributes
1546 */ 1546 */
1547struct address_space_operations ntfs_aops = { 1547const struct address_space_operations ntfs_aops = {
1548 .readpage = ntfs_readpage, /* Fill page with data. */ 1548 .readpage = ntfs_readpage, /* Fill page with data. */
1549 .sync_page = block_sync_page, /* Currently, just unplugs the 1549 .sync_page = block_sync_page, /* Currently, just unplugs the
1550 disk request queue. */ 1550 disk request queue. */
@@ -1560,7 +1560,7 @@ struct address_space_operations ntfs_aops = {
1560 * ntfs_mst_aops - general address space operations for mst protecteed inodes 1560 * ntfs_mst_aops - general address space operations for mst protecteed inodes
1561 * and attributes 1561 * and attributes
1562 */ 1562 */
1563struct address_space_operations ntfs_mst_aops = { 1563const struct address_space_operations ntfs_mst_aops = {
1564 .readpage = ntfs_readpage, /* Fill page with data. */ 1564 .readpage = ntfs_readpage, /* Fill page with data. */
1565 .sync_page = block_sync_page, /* Currently, just unplugs the 1565 .sync_page = block_sync_page, /* Currently, just unplugs the
1566 disk request queue. */ 1566 disk request queue. */
diff --git a/fs/ntfs/aops.h b/fs/ntfs/aops.h
index 3b74e66ca2ff..325ce261a107 100644
--- a/fs/ntfs/aops.h
+++ b/fs/ntfs/aops.h
@@ -86,8 +86,7 @@ static inline void ntfs_unmap_page(struct page *page)
86static inline struct page *ntfs_map_page(struct address_space *mapping, 86static inline struct page *ntfs_map_page(struct address_space *mapping,
87 unsigned long index) 87 unsigned long index)
88{ 88{
89 struct page *page = read_cache_page(mapping, index, 89 struct page *page = read_mapping_page(mapping, index, NULL);
90 (filler_t*)mapping->a_ops->readpage, NULL);
91 90
92 if (!IS_ERR(page)) { 91 if (!IS_ERR(page)) {
93 wait_on_page_locked(page); 92 wait_on_page_locked(page);
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 1663f5c3c6aa..6708e1d68a9e 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -2529,8 +2529,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
2529 end >>= PAGE_CACHE_SHIFT; 2529 end >>= PAGE_CACHE_SHIFT;
2530 /* If there is a first partial page, need to do it the slow way. */ 2530 /* If there is a first partial page, need to do it the slow way. */
2531 if (start_ofs) { 2531 if (start_ofs) {
2532 page = read_cache_page(mapping, idx, 2532 page = read_mapping_page(mapping, idx, NULL);
2533 (filler_t*)mapping->a_ops->readpage, NULL);
2534 if (IS_ERR(page)) { 2533 if (IS_ERR(page)) {
2535 ntfs_error(vol->sb, "Failed to read first partial " 2534 ntfs_error(vol->sb, "Failed to read first partial "
2536 "page (sync error, index 0x%lx).", idx); 2535 "page (sync error, index 0x%lx).", idx);
@@ -2600,8 +2599,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
2600 } 2599 }
2601 /* If there is a last partial page, need to do it the slow way. */ 2600 /* If there is a last partial page, need to do it the slow way. */
2602 if (end_ofs) { 2601 if (end_ofs) {
2603 page = read_cache_page(mapping, idx, 2602 page = read_mapping_page(mapping, idx, NULL);
2604 (filler_t*)mapping->a_ops->readpage, NULL);
2605 if (IS_ERR(page)) { 2603 if (IS_ERR(page)) {
2606 ntfs_error(vol->sb, "Failed to read last partial page " 2604 ntfs_error(vol->sb, "Failed to read last partial page "
2607 "(sync error, index 0x%lx).", idx); 2605 "(sync error, index 0x%lx).", idx);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index c63a83e8da98..2e42c2dcae12 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -231,8 +231,7 @@ do_non_resident_extend:
231 * Read the page. If the page is not present, this will zero 231 * Read the page. If the page is not present, this will zero
232 * the uninitialized regions for us. 232 * the uninitialized regions for us.
233 */ 233 */
234 page = read_cache_page(mapping, index, 234 page = read_mapping_page(mapping, index, NULL);
235 (filler_t*)mapping->a_ops->readpage, NULL);
236 if (IS_ERR(page)) { 235 if (IS_ERR(page)) {
237 err = PTR_ERR(page); 236 err = PTR_ERR(page);
238 goto init_err_out; 237 goto init_err_out;
@@ -1359,7 +1358,7 @@ err_out:
1359 goto out; 1358 goto out;
1360} 1359}
1361 1360
1362static size_t __ntfs_copy_from_user_iovec(char *vaddr, 1361static size_t __ntfs_copy_from_user_iovec_inatomic(char *vaddr,
1363 const struct iovec *iov, size_t iov_ofs, size_t bytes) 1362 const struct iovec *iov, size_t iov_ofs, size_t bytes)
1364{ 1363{
1365 size_t total = 0; 1364 size_t total = 0;
@@ -1377,10 +1376,6 @@ static size_t __ntfs_copy_from_user_iovec(char *vaddr,
1377 bytes -= len; 1376 bytes -= len;
1378 vaddr += len; 1377 vaddr += len;
1379 if (unlikely(left)) { 1378 if (unlikely(left)) {
1380 /*
1381 * Zero the rest of the target like __copy_from_user().
1382 */
1383 memset(vaddr, 0, bytes);
1384 total -= left; 1379 total -= left;
1385 break; 1380 break;
1386 } 1381 }
@@ -1421,11 +1416,13 @@ static inline void ntfs_set_next_iovec(const struct iovec **iovp,
1421 * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s 1416 * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s
1422 * single-segment behaviour. 1417 * single-segment behaviour.
1423 * 1418 *
1424 * We call the same helper (__ntfs_copy_from_user_iovec()) both when atomic and 1419 * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both
1425 * when not atomic. This is ok because __ntfs_copy_from_user_iovec() calls 1420 * when atomic and when not atomic. This is ok because
1426 * __copy_from_user_inatomic() and it is ok to call this when non-atomic. In 1421 * __ntfs_copy_from_user_iovec_inatomic() calls __copy_from_user_inatomic()
1427 * fact, the only difference between __copy_from_user_inatomic() and 1422 * and it is ok to call this when non-atomic.
1428 * __copy_from_user() is that the latter calls might_sleep(). And on many 1423 * Infact, the only difference between __copy_from_user_inatomic() and
1424 * __copy_from_user() is that the latter calls might_sleep() and the former
1425 * should not zero the tail of the buffer on error. And on many
1429 * architectures __copy_from_user_inatomic() is just defined to 1426 * architectures __copy_from_user_inatomic() is just defined to
1430 * __copy_from_user() so it makes no difference at all on those architectures. 1427 * __copy_from_user() so it makes no difference at all on those architectures.
1431 */ 1428 */
@@ -1442,14 +1439,18 @@ static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
1442 if (len > bytes) 1439 if (len > bytes)
1443 len = bytes; 1440 len = bytes;
1444 kaddr = kmap_atomic(*pages, KM_USER0); 1441 kaddr = kmap_atomic(*pages, KM_USER0);
1445 copied = __ntfs_copy_from_user_iovec(kaddr + ofs, 1442 copied = __ntfs_copy_from_user_iovec_inatomic(kaddr + ofs,
1446 *iov, *iov_ofs, len); 1443 *iov, *iov_ofs, len);
1447 kunmap_atomic(kaddr, KM_USER0); 1444 kunmap_atomic(kaddr, KM_USER0);
1448 if (unlikely(copied != len)) { 1445 if (unlikely(copied != len)) {
1449 /* Do it the slow way. */ 1446 /* Do it the slow way. */
1450 kaddr = kmap(*pages); 1447 kaddr = kmap(*pages);
1451 copied = __ntfs_copy_from_user_iovec(kaddr + ofs, 1448 copied = __ntfs_copy_from_user_iovec_inatomic(kaddr + ofs,
1452 *iov, *iov_ofs, len); 1449 *iov, *iov_ofs, len);
1450 /*
1451 * Zero the rest of the target like __copy_from_user().
1452 */
1453 memset(kaddr + ofs + copied, 0, len - copied);
1453 kunmap(*pages); 1454 kunmap(*pages);
1454 if (unlikely(copied != len)) 1455 if (unlikely(copied != len))
1455 goto err_out; 1456 goto err_out;
@@ -1484,14 +1485,15 @@ static inline void ntfs_flush_dcache_pages(struct page **pages,
1484 unsigned nr_pages) 1485 unsigned nr_pages)
1485{ 1486{
1486 BUG_ON(!nr_pages); 1487 BUG_ON(!nr_pages);
1488 /*
1489 * Warning: Do not do the decrement at the same time as the call to
1490 * flush_dcache_page() because it is a NULL macro on i386 and hence the
1491 * decrement never happens so the loop never terminates.
1492 */
1487 do { 1493 do {
1488 /* 1494 --nr_pages;
1489 * Warning: Do not do the decrement at the same time as the
1490 * call because flush_dcache_page() is a NULL macro on i386
1491 * and hence the decrement never happens.
1492 */
1493 flush_dcache_page(pages[nr_pages]); 1495 flush_dcache_page(pages[nr_pages]);
1494 } while (--nr_pages > 0); 1496 } while (nr_pages > 0);
1495} 1497}
1496 1498
1497/** 1499/**
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index bf7b3d7c0930..ddd3d503097c 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
@@ -57,8 +57,8 @@ extern struct kmem_cache *ntfs_attr_ctx_cache;
57extern struct kmem_cache *ntfs_index_ctx_cache; 57extern struct kmem_cache *ntfs_index_ctx_cache;
58 58
59/* The various operations structs defined throughout the driver files. */ 59/* The various operations structs defined throughout the driver files. */
60extern struct address_space_operations ntfs_aops; 60extern const struct address_space_operations ntfs_aops;
61extern struct address_space_operations ntfs_mst_aops; 61extern const struct address_space_operations ntfs_mst_aops;
62 62
63extern const struct file_operations ntfs_file_ops; 63extern const struct file_operations ntfs_file_ops;
64extern struct inode_operations ntfs_file_inode_ops; 64extern struct inode_operations ntfs_file_inode_ops;
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 27833f6df49f..0e14acea3f8b 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2601,10 +2601,10 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
2601 2601
2602/** 2602/**
2603 * ntfs_statfs - return information about mounted NTFS volume 2603 * ntfs_statfs - return information about mounted NTFS volume
2604 * @sb: super block of mounted volume 2604 * @dentry: dentry from mounted volume
2605 * @sfs: statfs structure in which to return the information 2605 * @sfs: statfs structure in which to return the information
2606 * 2606 *
2607 * Return information about the mounted NTFS volume @sb in the statfs structure 2607 * Return information about the mounted NTFS volume @dentry in the statfs structure
2608 * pointed to by @sfs (this is initialized with zeros before ntfs_statfs is 2608 * pointed to by @sfs (this is initialized with zeros before ntfs_statfs is
2609 * called). We interpret the values to be correct of the moment in time at 2609 * called). We interpret the values to be correct of the moment in time at
2610 * which we are called. Most values are variable otherwise and this isn't just 2610 * which we are called. Most values are variable otherwise and this isn't just
@@ -2617,8 +2617,9 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
2617 * 2617 *
2618 * Return 0 on success or -errno on error. 2618 * Return 0 on success or -errno on error.
2619 */ 2619 */
2620static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs) 2620static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs)
2621{ 2621{
2622 struct super_block *sb = dentry->d_sb;
2622 s64 size; 2623 s64 size;
2623 ntfs_volume *vol = NTFS_SB(sb); 2624 ntfs_volume *vol = NTFS_SB(sb);
2624 ntfs_inode *mft_ni = NTFS_I(vol->mft_ino); 2625 ntfs_inode *mft_ni = NTFS_I(vol->mft_ino);
@@ -3093,10 +3094,11 @@ struct kmem_cache *ntfs_index_ctx_cache;
3093/* Driver wide mutex. */ 3094/* Driver wide mutex. */
3094DEFINE_MUTEX(ntfs_lock); 3095DEFINE_MUTEX(ntfs_lock);
3095 3096
3096static struct super_block *ntfs_get_sb(struct file_system_type *fs_type, 3097static int ntfs_get_sb(struct file_system_type *fs_type,
3097 int flags, const char *dev_name, void *data) 3098 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
3098{ 3099{
3099 return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super); 3100 return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super,
3101 mnt);
3100} 3102}
3101 3103
3102static struct file_system_type ntfs_fs_type = { 3104static struct file_system_type ntfs_fs_type = {
diff --git a/fs/ntfs/sysctl.h b/fs/ntfs/sysctl.h
index c8064cae8f17..beda5bf96405 100644
--- a/fs/ntfs/sysctl.h
+++ b/fs/ntfs/sysctl.h
@@ -24,7 +24,6 @@
24#ifndef _LINUX_NTFS_SYSCTL_H 24#ifndef _LINUX_NTFS_SYSCTL_H
25#define _LINUX_NTFS_SYSCTL_H 25#define _LINUX_NTFS_SYSCTL_H
26 26
27#include <linux/config.h>
28 27
29#if defined(DEBUG) && defined(CONFIG_SYSCTL) 28#if defined(DEBUG) && defined(CONFIG_SYSCTL)
30 29
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 47152bf9a7f2..f1d1c342ce01 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -558,16 +558,9 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
558 u64 vbo_max; /* file offset, max_blocks from iblock */ 558 u64 vbo_max; /* file offset, max_blocks from iblock */
559 u64 p_blkno; 559 u64 p_blkno;
560 int contig_blocks; 560 int contig_blocks;
561 unsigned char blocksize_bits; 561 unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
562 unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; 562 unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
563 563
564 if (!inode || !bh_result) {
565 mlog(ML_ERROR, "inode or bh_result is null\n");
566 return -EIO;
567 }
568
569 blocksize_bits = inode->i_sb->s_blocksize_bits;
570
571 /* This function won't even be called if the request isn't all 564 /* This function won't even be called if the request isn't all
572 * nicely aligned and of the right size, so there's no need 565 * nicely aligned and of the right size, so there's no need
573 * for us to check any of that. */ 566 * for us to check any of that. */
@@ -666,7 +659,7 @@ out:
666 return ret; 659 return ret;
667} 660}
668 661
669struct address_space_operations ocfs2_aops = { 662const struct address_space_operations ocfs2_aops = {
670 .readpage = ocfs2_readpage, 663 .readpage = ocfs2_readpage,
671 .writepage = ocfs2_writepage, 664 .writepage = ocfs2_writepage,
672 .prepare_write = ocfs2_prepare_write, 665 .prepare_write = ocfs2_prepare_write,
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 21f38accd039..504595d6cf65 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -54,7 +54,7 @@ static DECLARE_RWSEM(o2hb_callback_sem);
54 * multiple hb threads are watching multiple regions. A node is live 54 * multiple hb threads are watching multiple regions. A node is live
55 * whenever any of the threads sees activity from the node in its region. 55 * whenever any of the threads sees activity from the node in its region.
56 */ 56 */
57static spinlock_t o2hb_live_lock = SPIN_LOCK_UNLOCKED; 57static DEFINE_SPINLOCK(o2hb_live_lock);
58static struct list_head o2hb_live_slots[O2NM_MAX_NODES]; 58static struct list_head o2hb_live_slots[O2NM_MAX_NODES];
59static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; 59static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
60static LIST_HEAD(o2hb_node_events); 60static LIST_HEAD(o2hb_node_events);
@@ -517,6 +517,7 @@ static inline void o2hb_prepare_block(struct o2hb_region *reg,
517 hb_block->hb_seq = cpu_to_le64(cputime); 517 hb_block->hb_seq = cpu_to_le64(cputime);
518 hb_block->hb_node = node_num; 518 hb_block->hb_node = node_num;
519 hb_block->hb_generation = cpu_to_le64(generation); 519 hb_block->hb_generation = cpu_to_le64(generation);
520 hb_block->hb_dead_ms = cpu_to_le32(o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS);
520 521
521 /* This step must always happen last! */ 522 /* This step must always happen last! */
522 hb_block->hb_cksum = cpu_to_le32(o2hb_compute_block_crc_le(reg, 523 hb_block->hb_cksum = cpu_to_le32(o2hb_compute_block_crc_le(reg,
@@ -645,6 +646,8 @@ static int o2hb_check_slot(struct o2hb_region *reg,
645 struct o2nm_node *node; 646 struct o2nm_node *node;
646 struct o2hb_disk_heartbeat_block *hb_block = reg->hr_tmp_block; 647 struct o2hb_disk_heartbeat_block *hb_block = reg->hr_tmp_block;
647 u64 cputime; 648 u64 cputime;
649 unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS;
650 unsigned int slot_dead_ms;
648 651
649 memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); 652 memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes);
650 653
@@ -733,6 +736,23 @@ fire_callbacks:
733 &o2hb_live_slots[slot->ds_node_num]); 736 &o2hb_live_slots[slot->ds_node_num]);
734 737
735 slot->ds_equal_samples = 0; 738 slot->ds_equal_samples = 0;
739
740 /* We want to be sure that all nodes agree on the
741 * number of milliseconds before a node will be
742 * considered dead. The self-fencing timeout is
743 * computed from this value, and a discrepancy might
744 * result in heartbeat calling a node dead when it
745 * hasn't self-fenced yet. */
746 slot_dead_ms = le32_to_cpu(hb_block->hb_dead_ms);
747 if (slot_dead_ms && slot_dead_ms != dead_ms) {
748 /* TODO: Perhaps we can fail the region here. */
749 mlog(ML_ERROR, "Node %d on device %s has a dead count "
750 "of %u ms, but our count is %u ms.\n"
751 "Please double check your configuration values "
752 "for 'O2CB_HEARTBEAT_THRESHOLD'\n",
753 slot->ds_node_num, reg->hr_dev_name, slot_dead_ms,
754 dead_ms);
755 }
736 goto out; 756 goto out;
737 } 757 }
738 758
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 73edad782537..a42628ba9ddf 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -123,6 +123,17 @@
123#define MLOG_MASK_PREFIX 0 123#define MLOG_MASK_PREFIX 0
124#endif 124#endif
125 125
126/*
127 * When logging is disabled, force the bit test to 0 for anything other
128 * than errors and notices, allowing gcc to remove the code completely.
129 * When enabled, allow all masks.
130 */
131#if defined(CONFIG_OCFS2_DEBUG_MASKLOG)
132#define ML_ALLOWED_BITS ~0
133#else
134#define ML_ALLOWED_BITS (ML_ERROR|ML_NOTICE)
135#endif
136
126#define MLOG_MAX_BITS 64 137#define MLOG_MAX_BITS 64
127 138
128struct mlog_bits { 139struct mlog_bits {
@@ -187,7 +198,8 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
187 198
188#define mlog(mask, fmt, args...) do { \ 199#define mlog(mask, fmt, args...) do { \
189 u64 __m = MLOG_MASK_PREFIX | (mask); \ 200 u64 __m = MLOG_MASK_PREFIX | (mask); \
190 if (__mlog_test_u64(__m, mlog_and_bits) && \ 201 if ((__m & ML_ALLOWED_BITS) && \
202 __mlog_test_u64(__m, mlog_and_bits) && \
191 !__mlog_test_u64(__m, mlog_not_bits)) { \ 203 !__mlog_test_u64(__m, mlog_not_bits)) { \
192 if (__m & ML_ERROR) \ 204 if (__m & ML_ERROR) \
193 __mlog_printk(KERN_ERR, "ERROR: "fmt , ##args); \ 205 __mlog_printk(KERN_ERR, "ERROR: "fmt , ##args); \
@@ -204,6 +216,7 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
204 mlog(ML_ERROR, "status = %lld\n", (long long)_st); \ 216 mlog(ML_ERROR, "status = %lld\n", (long long)_st); \
205} while (0) 217} while (0)
206 218
219#if defined(CONFIG_OCFS2_DEBUG_MASKLOG)
207#define mlog_entry(fmt, args...) do { \ 220#define mlog_entry(fmt, args...) do { \
208 mlog(ML_ENTRY, "ENTRY:" fmt , ##args); \ 221 mlog(ML_ENTRY, "ENTRY:" fmt , ##args); \
209} while (0) 222} while (0)
@@ -247,6 +260,13 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
247#define mlog_exit_void() do { \ 260#define mlog_exit_void() do { \
248 mlog(ML_EXIT, "EXIT\n"); \ 261 mlog(ML_EXIT, "EXIT\n"); \
249} while (0) 262} while (0)
263#else
264#define mlog_entry(...) do { } while (0)
265#define mlog_entry_void(...) do { } while (0)
266#define mlog_exit(...) do { } while (0)
267#define mlog_exit_ptr(...) do { } while (0)
268#define mlog_exit_void(...) do { } while (0)
269#endif /* defined(CONFIG_OCFS2_DEBUG_MASKLOG) */
250 270
251#define mlog_bug_on_msg(cond, fmt, args...) do { \ 271#define mlog_bug_on_msg(cond, fmt, args...) do { \
252 if (cond) { \ 272 if (cond) { \
diff --git a/fs/ocfs2/cluster/ocfs2_heartbeat.h b/fs/ocfs2/cluster/ocfs2_heartbeat.h
index 94096069cb43..3f4151da9709 100644
--- a/fs/ocfs2/cluster/ocfs2_heartbeat.h
+++ b/fs/ocfs2/cluster/ocfs2_heartbeat.h
@@ -32,6 +32,7 @@ struct o2hb_disk_heartbeat_block {
32 __u8 hb_pad1[3]; 32 __u8 hb_pad1[3];
33 __le32 hb_cksum; 33 __le32 hb_cksum;
34 __le64 hb_generation; 34 __le64 hb_generation;
35 __le32 hb_dead_ms;
35}; 36};
36 37
37#endif /* _OCFS2_HEARTBEAT_H */ 38#endif /* _OCFS2_HEARTBEAT_H */
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 0f60cc0d3985..b650efa8c8be 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -108,7 +108,7 @@
108 ##args); \ 108 ##args); \
109} while (0) 109} while (0)
110 110
111static rwlock_t o2net_handler_lock = RW_LOCK_UNLOCKED; 111static DEFINE_RWLOCK(o2net_handler_lock);
112static struct rb_root o2net_handler_tree = RB_ROOT; 112static struct rb_root o2net_handler_tree = RB_ROOT;
113 113
114static struct o2net_node o2net_nodes[O2NM_MAX_NODES]; 114static struct o2net_node o2net_nodes[O2NM_MAX_NODES];
@@ -396,8 +396,8 @@ static void o2net_set_nn_state(struct o2net_node *nn,
396 } 396 }
397 397
398 if (was_valid && !valid) { 398 if (was_valid && !valid) {
399 mlog(ML_NOTICE, "no longer connected to " SC_NODEF_FMT "\n", 399 printk(KERN_INFO "o2net: no longer connected to "
400 SC_NODEF_ARGS(old_sc)); 400 SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc));
401 o2net_complete_nodes_nsw(nn); 401 o2net_complete_nodes_nsw(nn);
402 } 402 }
403 403
@@ -409,10 +409,10 @@ static void o2net_set_nn_state(struct o2net_node *nn,
409 * the only way to start connecting again is to down 409 * the only way to start connecting again is to down
410 * heartbeat and bring it back up. */ 410 * heartbeat and bring it back up. */
411 cancel_delayed_work(&nn->nn_connect_expired); 411 cancel_delayed_work(&nn->nn_connect_expired);
412 mlog(ML_NOTICE, "%s " SC_NODEF_FMT "\n", 412 printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n",
413 o2nm_this_node() > sc->sc_node->nd_num ? 413 o2nm_this_node() > sc->sc_node->nd_num ?
414 "connected to" : "accepted connection from", 414 "connected to" : "accepted connection from",
415 SC_NODEF_ARGS(sc)); 415 SC_NODEF_ARGS(sc));
416 } 416 }
417 417
418 /* trigger the connecting worker func as long as we're not valid, 418 /* trigger the connecting worker func as long as we're not valid,
@@ -1280,7 +1280,7 @@ static void o2net_idle_timer(unsigned long data)
1280 1280
1281 do_gettimeofday(&now); 1281 do_gettimeofday(&now);
1282 1282
1283 mlog(ML_NOTICE, "connection to " SC_NODEF_FMT " has been idle for 10 " 1283 printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for 10 "
1284 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc)); 1284 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc));
1285 mlog(ML_NOTICE, "here are some times that might help debug the " 1285 mlog(ML_NOTICE, "here are some times that might help debug the "
1286 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " 1286 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index ae47f450792f..3d494d1a5f36 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -213,11 +213,9 @@ int ocfs2_find_files_on_disk(const char *name,
213 struct ocfs2_dir_entry **dirent) 213 struct ocfs2_dir_entry **dirent)
214{ 214{
215 int status = -ENOENT; 215 int status = -ENOENT;
216 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
217 216
218 mlog_entry("(osb=%p, parent=%llu, name='%.*s', blkno=%p, inode=%p)\n", 217 mlog_entry("(name=%.*s, blkno=%p, inode=%p, dirent_bh=%p, dirent=%p)\n",
219 osb, (unsigned long long)OCFS2_I(inode)->ip_blkno, 218 namelen, name, blkno, inode, dirent_bh, dirent);
220 namelen, name, blkno, inode);
221 219
222 *dirent_bh = ocfs2_find_entry(name, namelen, inode, dirent); 220 *dirent_bh = ocfs2_find_entry(name, namelen, inode, dirent);
223 if (!*dirent_bh || !*dirent) { 221 if (!*dirent_bh || !*dirent) {
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 355593dd8ef8..42775e2bbe2c 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -197,12 +197,14 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
197 lock->ml.node == dlm->node_num ? "master" : 197 lock->ml.node == dlm->node_num ? "master" :
198 "remote"); 198 "remote");
199 memcpy(lksb->lvb, res->lvb, DLM_LVB_LEN); 199 memcpy(lksb->lvb, res->lvb, DLM_LVB_LEN);
200 } else if (lksb->flags & DLM_LKSB_PUT_LVB) {
201 mlog(0, "setting lvb from lockres for %s node\n",
202 lock->ml.node == dlm->node_num ? "master" :
203 "remote");
204 memcpy(res->lvb, lksb->lvb, DLM_LVB_LEN);
205 } 200 }
201 /* Do nothing for lvb put requests - they should be done in
202 * place when the lock is downconverted - otherwise we risk
203 * racing gets and puts which could result in old lvb data
204 * being propagated. We leave the put flag set and clear it
205 * here. In the future we might want to clear it at the time
206 * the put is actually done.
207 */
206 spin_unlock(&res->spinlock); 208 spin_unlock(&res->spinlock);
207 } 209 }
208 210
@@ -381,8 +383,7 @@ do_ast:
381 ret = DLM_NORMAL; 383 ret = DLM_NORMAL;
382 if (past->type == DLM_AST) { 384 if (past->type == DLM_AST) {
383 /* do not alter lock refcount. switching lists. */ 385 /* do not alter lock refcount. switching lists. */
384 list_del_init(&lock->list); 386 list_move_tail(&lock->list, &res->granted);
385 list_add_tail(&lock->list, &res->granted);
386 mlog(0, "ast: adding to granted list... type=%d, " 387 mlog(0, "ast: adding to granted list... type=%d, "
387 "convert_type=%d\n", lock->ml.type, lock->ml.convert_type); 388 "convert_type=%d\n", lock->ml.type, lock->ml.convert_type);
388 if (lock->ml.convert_type != LKM_IVMODE) { 389 if (lock->ml.convert_type != LKM_IVMODE) {
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 88cc43df18f1..14530ee7e11d 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -37,7 +37,17 @@
37#define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes 37#define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes
38#define DLM_THREAD_MS 200 // flush at least every 200 ms 38#define DLM_THREAD_MS 200 // flush at least every 200 ms
39 39
40#define DLM_HASH_BUCKETS (PAGE_SIZE / sizeof(struct hlist_head)) 40#define DLM_HASH_SIZE_DEFAULT (1 << 14)
41#if DLM_HASH_SIZE_DEFAULT < PAGE_SIZE
42# define DLM_HASH_PAGES 1
43#else
44# define DLM_HASH_PAGES (DLM_HASH_SIZE_DEFAULT / PAGE_SIZE)
45#endif
46#define DLM_BUCKETS_PER_PAGE (PAGE_SIZE / sizeof(struct hlist_head))
47#define DLM_HASH_BUCKETS (DLM_HASH_PAGES * DLM_BUCKETS_PER_PAGE)
48
49/* Intended to make it easier for us to switch out hash functions */
50#define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l)
41 51
42enum dlm_ast_type { 52enum dlm_ast_type {
43 DLM_AST = 0, 53 DLM_AST = 0,
@@ -61,7 +71,8 @@ static inline int dlm_is_recovery_lock(const char *lock_name, int name_len)
61 return 0; 71 return 0;
62} 72}
63 73
64#define DLM_RECO_STATE_ACTIVE 0x0001 74#define DLM_RECO_STATE_ACTIVE 0x0001
75#define DLM_RECO_STATE_FINALIZE 0x0002
65 76
66struct dlm_recovery_ctxt 77struct dlm_recovery_ctxt
67{ 78{
@@ -85,7 +96,7 @@ enum dlm_ctxt_state {
85struct dlm_ctxt 96struct dlm_ctxt
86{ 97{
87 struct list_head list; 98 struct list_head list;
88 struct hlist_head *lockres_hash; 99 struct hlist_head **lockres_hash;
89 struct list_head dirty_list; 100 struct list_head dirty_list;
90 struct list_head purge_list; 101 struct list_head purge_list;
91 struct list_head pending_asts; 102 struct list_head pending_asts;
@@ -120,6 +131,7 @@ struct dlm_ctxt
120 struct o2hb_callback_func dlm_hb_down; 131 struct o2hb_callback_func dlm_hb_down;
121 struct task_struct *dlm_thread_task; 132 struct task_struct *dlm_thread_task;
122 struct task_struct *dlm_reco_thread_task; 133 struct task_struct *dlm_reco_thread_task;
134 struct workqueue_struct *dlm_worker;
123 wait_queue_head_t dlm_thread_wq; 135 wait_queue_head_t dlm_thread_wq;
124 wait_queue_head_t dlm_reco_thread_wq; 136 wait_queue_head_t dlm_reco_thread_wq;
125 wait_queue_head_t ast_wq; 137 wait_queue_head_t ast_wq;
@@ -132,6 +144,11 @@ struct dlm_ctxt
132 struct list_head dlm_eviction_callbacks; 144 struct list_head dlm_eviction_callbacks;
133}; 145};
134 146
147static inline struct hlist_head *dlm_lockres_hash(struct dlm_ctxt *dlm, unsigned i)
148{
149 return dlm->lockres_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] + (i % DLM_BUCKETS_PER_PAGE);
150}
151
135/* these keventd work queue items are for less-frequently 152/* these keventd work queue items are for less-frequently
136 * called functions that cannot be directly called from the 153 * called functions that cannot be directly called from the
137 * net message handlers for some reason, usually because 154 * net message handlers for some reason, usually because
@@ -216,20 +233,29 @@ struct dlm_lock_resource
216 /* WARNING: Please see the comment in dlm_init_lockres before 233 /* WARNING: Please see the comment in dlm_init_lockres before
217 * adding fields here. */ 234 * adding fields here. */
218 struct hlist_node hash_node; 235 struct hlist_node hash_node;
236 struct qstr lockname;
219 struct kref refs; 237 struct kref refs;
220 238
221 /* please keep these next 3 in this order 239 /*
222 * some funcs want to iterate over all lists */ 240 * Please keep granted, converting, and blocked in this order,
241 * as some funcs want to iterate over all lists.
242 *
243 * All four lists are protected by the hash's reference.
244 */
223 struct list_head granted; 245 struct list_head granted;
224 struct list_head converting; 246 struct list_head converting;
225 struct list_head blocked; 247 struct list_head blocked;
248 struct list_head purge;
226 249
250 /*
251 * These two lists require you to hold an additional reference
252 * while they are on the list.
253 */
227 struct list_head dirty; 254 struct list_head dirty;
228 struct list_head recovering; // dlm_recovery_ctxt.resources list 255 struct list_head recovering; // dlm_recovery_ctxt.resources list
229 256
230 /* unused lock resources have their last_used stamped and are 257 /* unused lock resources have their last_used stamped and are
231 * put on a list for the dlm thread to run. */ 258 * put on a list for the dlm thread to run. */
232 struct list_head purge;
233 unsigned long last_used; 259 unsigned long last_used;
234 260
235 unsigned migration_pending:1; 261 unsigned migration_pending:1;
@@ -238,7 +264,6 @@ struct dlm_lock_resource
238 wait_queue_head_t wq; 264 wait_queue_head_t wq;
239 u8 owner; //node which owns the lock resource, or unknown 265 u8 owner; //node which owns the lock resource, or unknown
240 u16 state; 266 u16 state;
241 struct qstr lockname;
242 char lvb[DLM_LVB_LEN]; 267 char lvb[DLM_LVB_LEN];
243}; 268};
244 269
@@ -300,6 +325,15 @@ enum dlm_lockres_list {
300 DLM_BLOCKED_LIST 325 DLM_BLOCKED_LIST
301}; 326};
302 327
328static inline int dlm_lvb_is_empty(char *lvb)
329{
330 int i;
331 for (i=0; i<DLM_LVB_LEN; i++)
332 if (lvb[i])
333 return 0;
334 return 1;
335}
336
303static inline struct list_head * 337static inline struct list_head *
304dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx) 338dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx)
305{ 339{
@@ -609,7 +643,8 @@ struct dlm_finalize_reco
609{ 643{
610 u8 node_idx; 644 u8 node_idx;
611 u8 dead_node; 645 u8 dead_node;
612 __be16 pad1; 646 u8 flags;
647 u8 pad1;
613 __be32 pad2; 648 __be32 pad2;
614}; 649};
615 650
@@ -676,6 +711,7 @@ void dlm_wait_for_recovery(struct dlm_ctxt *dlm);
676void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); 711void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
677int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node); 712int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node);
678int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout); 713int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout);
714int dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout);
679 715
680void dlm_put(struct dlm_ctxt *dlm); 716void dlm_put(struct dlm_ctxt *dlm);
681struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm); 717struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm);
@@ -687,14 +723,20 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
687 struct dlm_lock_resource *res); 723 struct dlm_lock_resource *res);
688void dlm_purge_lockres(struct dlm_ctxt *dlm, 724void dlm_purge_lockres(struct dlm_ctxt *dlm,
689 struct dlm_lock_resource *lockres); 725 struct dlm_lock_resource *lockres);
690void dlm_lockres_get(struct dlm_lock_resource *res); 726static inline void dlm_lockres_get(struct dlm_lock_resource *res)
727{
728 /* This is called on every lookup, so it might be worth
729 * inlining. */
730 kref_get(&res->refs);
731}
691void dlm_lockres_put(struct dlm_lock_resource *res); 732void dlm_lockres_put(struct dlm_lock_resource *res);
692void __dlm_unhash_lockres(struct dlm_lock_resource *res); 733void __dlm_unhash_lockres(struct dlm_lock_resource *res);
693void __dlm_insert_lockres(struct dlm_ctxt *dlm, 734void __dlm_insert_lockres(struct dlm_ctxt *dlm,
694 struct dlm_lock_resource *res); 735 struct dlm_lock_resource *res);
695struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, 736struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm,
696 const char *name, 737 const char *name,
697 unsigned int len); 738 unsigned int len,
739 unsigned int hash);
698struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, 740struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
699 const char *name, 741 const char *name,
700 unsigned int len); 742 unsigned int len);
@@ -780,8 +822,6 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data);
780int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data); 822int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data);
781int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, 823int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
782 u8 nodenum, u8 *real_master); 824 u8 nodenum, u8 *real_master);
783int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
784 struct dlm_lock_resource *res, u8 *real_master);
785 825
786 826
787int dlm_dispatch_assert_master(struct dlm_ctxt *dlm, 827int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
@@ -819,6 +859,7 @@ void dlm_clean_master_list(struct dlm_ctxt *dlm,
819 u8 dead_node); 859 u8 dead_node);
820int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); 860int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock);
821 861
862int __dlm_lockres_unused(struct dlm_lock_resource *res);
822 863
823static inline const char * dlm_lock_mode_name(int mode) 864static inline const char * dlm_lock_mode_name(int mode)
824{ 865{
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index 8285228d9e37..c764dc8e40a2 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -214,6 +214,9 @@ grant:
214 if (lock->ml.node == dlm->node_num) 214 if (lock->ml.node == dlm->node_num)
215 mlog(0, "doing in-place convert for nonlocal lock\n"); 215 mlog(0, "doing in-place convert for nonlocal lock\n");
216 lock->ml.type = type; 216 lock->ml.type = type;
217 if (lock->lksb->flags & DLM_LKSB_PUT_LVB)
218 memcpy(res->lvb, lock->lksb->lvb, DLM_LVB_LEN);
219
217 status = DLM_NORMAL; 220 status = DLM_NORMAL;
218 *call_ast = 1; 221 *call_ast = 1;
219 goto unlock_exit; 222 goto unlock_exit;
@@ -231,8 +234,7 @@ switch_queues:
231 234
232 lock->ml.convert_type = type; 235 lock->ml.convert_type = type;
233 /* do not alter lock refcount. switching lists. */ 236 /* do not alter lock refcount. switching lists. */
234 list_del_init(&lock->list); 237 list_move_tail(&lock->list, &res->converting);
235 list_add_tail(&lock->list, &res->converting);
236 238
237unlock_exit: 239unlock_exit:
238 spin_unlock(&lock->spinlock); 240 spin_unlock(&lock->spinlock);
@@ -248,8 +250,7 @@ void dlm_revert_pending_convert(struct dlm_lock_resource *res,
248 struct dlm_lock *lock) 250 struct dlm_lock *lock)
249{ 251{
250 /* do not alter lock refcount. switching lists. */ 252 /* do not alter lock refcount. switching lists. */
251 list_del_init(&lock->list); 253 list_move_tail(&lock->list, &res->granted);
252 list_add_tail(&lock->list, &res->granted);
253 lock->ml.convert_type = LKM_IVMODE; 254 lock->ml.convert_type = LKM_IVMODE;
254 lock->lksb->flags &= ~(DLM_LKSB_GET_LVB|DLM_LKSB_PUT_LVB); 255 lock->lksb->flags &= ~(DLM_LKSB_GET_LVB|DLM_LKSB_PUT_LVB);
255} 256}
@@ -294,8 +295,7 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
294 res->state |= DLM_LOCK_RES_IN_PROGRESS; 295 res->state |= DLM_LOCK_RES_IN_PROGRESS;
295 /* move lock to local convert queue */ 296 /* move lock to local convert queue */
296 /* do not alter lock refcount. switching lists. */ 297 /* do not alter lock refcount. switching lists. */
297 list_del_init(&lock->list); 298 list_move_tail(&lock->list, &res->converting);
298 list_add_tail(&lock->list, &res->converting);
299 lock->convert_pending = 1; 299 lock->convert_pending = 1;
300 lock->ml.convert_type = type; 300 lock->ml.convert_type = type;
301 301
@@ -464,6 +464,12 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
464 } 464 }
465 465
466 spin_lock(&res->spinlock); 466 spin_lock(&res->spinlock);
467 status = __dlm_lockres_state_to_status(res);
468 if (status != DLM_NORMAL) {
469 spin_unlock(&res->spinlock);
470 dlm_error(status);
471 goto leave;
472 }
467 list_for_each(iter, &res->granted) { 473 list_for_each(iter, &res->granted) {
468 lock = list_entry(iter, struct dlm_lock, list); 474 lock = list_entry(iter, struct dlm_lock, list);
469 if (lock->ml.cookie == cnv->cookie && 475 if (lock->ml.cookie == cnv->cookie &&
@@ -473,6 +479,21 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
473 } 479 }
474 lock = NULL; 480 lock = NULL;
475 } 481 }
482 if (!lock) {
483 __dlm_print_one_lock_resource(res);
484 list_for_each(iter, &res->granted) {
485 lock = list_entry(iter, struct dlm_lock, list);
486 if (lock->ml.node == cnv->node_idx) {
487 mlog(ML_ERROR, "There is something here "
488 "for node %u, lock->ml.cookie=%llu, "
489 "cnv->cookie=%llu\n", cnv->node_idx,
490 (unsigned long long)lock->ml.cookie,
491 (unsigned long long)cnv->cookie);
492 break;
493 }
494 }
495 lock = NULL;
496 }
476 spin_unlock(&res->spinlock); 497 spin_unlock(&res->spinlock);
477 if (!lock) { 498 if (!lock) {
478 status = DLM_IVLOCKID; 499 status = DLM_IVLOCKID;
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index c7eae5d3324e..3f6c8d88f7af 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -37,10 +37,8 @@
37 37
38#include "dlmapi.h" 38#include "dlmapi.h"
39#include "dlmcommon.h" 39#include "dlmcommon.h"
40#include "dlmdebug.h"
41 40
42#include "dlmdomain.h" 41#include "dlmdomain.h"
43#include "dlmdebug.h"
44 42
45#define MLOG_MASK_PREFIX ML_DLM 43#define MLOG_MASK_PREFIX ML_DLM
46#include "cluster/masklog.h" 44#include "cluster/masklog.h"
@@ -120,6 +118,7 @@ void dlm_print_one_lock(struct dlm_lock *lockid)
120} 118}
121EXPORT_SYMBOL_GPL(dlm_print_one_lock); 119EXPORT_SYMBOL_GPL(dlm_print_one_lock);
122 120
121#if 0
123void dlm_dump_lock_resources(struct dlm_ctxt *dlm) 122void dlm_dump_lock_resources(struct dlm_ctxt *dlm)
124{ 123{
125 struct dlm_lock_resource *res; 124 struct dlm_lock_resource *res;
@@ -136,12 +135,13 @@ void dlm_dump_lock_resources(struct dlm_ctxt *dlm)
136 135
137 spin_lock(&dlm->spinlock); 136 spin_lock(&dlm->spinlock);
138 for (i=0; i<DLM_HASH_BUCKETS; i++) { 137 for (i=0; i<DLM_HASH_BUCKETS; i++) {
139 bucket = &(dlm->lockres_hash[i]); 138 bucket = dlm_lockres_hash(dlm, i);
140 hlist_for_each_entry(res, iter, bucket, hash_node) 139 hlist_for_each_entry(res, iter, bucket, hash_node)
141 dlm_print_one_lock_resource(res); 140 dlm_print_one_lock_resource(res);
142 } 141 }
143 spin_unlock(&dlm->spinlock); 142 spin_unlock(&dlm->spinlock);
144} 143}
144#endif /* 0 */
145 145
146static const char *dlm_errnames[] = { 146static const char *dlm_errnames[] = {
147 [DLM_NORMAL] = "DLM_NORMAL", 147 [DLM_NORMAL] = "DLM_NORMAL",
diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h
deleted file mode 100644
index 6858510c3ccd..000000000000
--- a/fs/ocfs2/dlm/dlmdebug.h
+++ /dev/null
@@ -1,30 +0,0 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * dlmdebug.h
5 *
6 * Copyright (C) 2004 Oracle. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public
19 * License along with this program; if not, write to the
20 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 * Boston, MA 021110-1307, USA.
22 *
23 */
24
25#ifndef DLMDEBUG_H
26#define DLMDEBUG_H
27
28void dlm_dump_lock_resources(struct dlm_ctxt *dlm);
29
30#endif
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 8f3a9e3106fd..8d1065f8b3bd 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -41,7 +41,6 @@
41#include "dlmapi.h" 41#include "dlmapi.h"
42#include "dlmcommon.h" 42#include "dlmcommon.h"
43 43
44#include "dlmdebug.h"
45#include "dlmdomain.h" 44#include "dlmdomain.h"
46 45
47#include "dlmver.h" 46#include "dlmver.h"
@@ -49,6 +48,33 @@
49#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN) 48#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN)
50#include "cluster/masklog.h" 49#include "cluster/masklog.h"
51 50
51static void dlm_free_pagevec(void **vec, int pages)
52{
53 while (pages--)
54 free_page((unsigned long)vec[pages]);
55 kfree(vec);
56}
57
58static void **dlm_alloc_pagevec(int pages)
59{
60 void **vec = kmalloc(pages * sizeof(void *), GFP_KERNEL);
61 int i;
62
63 if (!vec)
64 return NULL;
65
66 for (i = 0; i < pages; i++)
67 if (!(vec[i] = (void *)__get_free_page(GFP_KERNEL)))
68 goto out_free;
69
70 mlog(0, "Allocated DLM hash pagevec; %d pages (%lu expected), %lu buckets per page\n",
71 pages, DLM_HASH_PAGES, (unsigned long)DLM_BUCKETS_PER_PAGE);
72 return vec;
73out_free:
74 dlm_free_pagevec(vec, i);
75 return NULL;
76}
77
52/* 78/*
53 * 79 *
54 * spinlock lock ordering: if multiple locks are needed, obey this ordering: 80 * spinlock lock ordering: if multiple locks are needed, obey this ordering:
@@ -62,7 +88,7 @@
62 * 88 *
63 */ 89 */
64 90
65spinlock_t dlm_domain_lock = SPIN_LOCK_UNLOCKED; 91DEFINE_SPINLOCK(dlm_domain_lock);
66LIST_HEAD(dlm_domains); 92LIST_HEAD(dlm_domains);
67static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); 93static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events);
68 94
@@ -90,8 +116,7 @@ void __dlm_insert_lockres(struct dlm_ctxt *dlm,
90 assert_spin_locked(&dlm->spinlock); 116 assert_spin_locked(&dlm->spinlock);
91 117
92 q = &res->lockname; 118 q = &res->lockname;
93 q->hash = full_name_hash(q->name, q->len); 119 bucket = dlm_lockres_hash(dlm, q->hash);
94 bucket = &(dlm->lockres_hash[q->hash % DLM_HASH_BUCKETS]);
95 120
96 /* get a reference for our hashtable */ 121 /* get a reference for our hashtable */
97 dlm_lockres_get(res); 122 dlm_lockres_get(res);
@@ -100,34 +125,32 @@ void __dlm_insert_lockres(struct dlm_ctxt *dlm,
100} 125}
101 126
102struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, 127struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm,
103 const char *name, 128 const char *name,
104 unsigned int len) 129 unsigned int len,
130 unsigned int hash)
105{ 131{
106 unsigned int hash;
107 struct hlist_node *iter;
108 struct dlm_lock_resource *tmpres=NULL;
109 struct hlist_head *bucket; 132 struct hlist_head *bucket;
133 struct hlist_node *list;
110 134
111 mlog_entry("%.*s\n", len, name); 135 mlog_entry("%.*s\n", len, name);
112 136
113 assert_spin_locked(&dlm->spinlock); 137 assert_spin_locked(&dlm->spinlock);
114 138
115 hash = full_name_hash(name, len); 139 bucket = dlm_lockres_hash(dlm, hash);
116
117 bucket = &(dlm->lockres_hash[hash % DLM_HASH_BUCKETS]);
118
119 /* check for pre-existing lock */
120 hlist_for_each(iter, bucket) {
121 tmpres = hlist_entry(iter, struct dlm_lock_resource, hash_node);
122 if (tmpres->lockname.len == len &&
123 memcmp(tmpres->lockname.name, name, len) == 0) {
124 dlm_lockres_get(tmpres);
125 break;
126 }
127 140
128 tmpres = NULL; 141 hlist_for_each(list, bucket) {
142 struct dlm_lock_resource *res = hlist_entry(list,
143 struct dlm_lock_resource, hash_node);
144 if (res->lockname.name[0] != name[0])
145 continue;
146 if (unlikely(res->lockname.len != len))
147 continue;
148 if (memcmp(res->lockname.name + 1, name + 1, len - 1))
149 continue;
150 dlm_lockres_get(res);
151 return res;
129 } 152 }
130 return tmpres; 153 return NULL;
131} 154}
132 155
133struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, 156struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
@@ -135,9 +158,10 @@ struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
135 unsigned int len) 158 unsigned int len)
136{ 159{
137 struct dlm_lock_resource *res; 160 struct dlm_lock_resource *res;
161 unsigned int hash = dlm_lockid_hash(name, len);
138 162
139 spin_lock(&dlm->spinlock); 163 spin_lock(&dlm->spinlock);
140 res = __dlm_lookup_lockres(dlm, name, len); 164 res = __dlm_lookup_lockres(dlm, name, len, hash);
141 spin_unlock(&dlm->spinlock); 165 spin_unlock(&dlm->spinlock);
142 return res; 166 return res;
143} 167}
@@ -194,7 +218,7 @@ static int dlm_wait_on_domain_helper(const char *domain)
194static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm) 218static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm)
195{ 219{
196 if (dlm->lockres_hash) 220 if (dlm->lockres_hash)
197 free_page((unsigned long) dlm->lockres_hash); 221 dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES);
198 222
199 if (dlm->name) 223 if (dlm->name)
200 kfree(dlm->name); 224 kfree(dlm->name);
@@ -278,11 +302,21 @@ int dlm_domain_fully_joined(struct dlm_ctxt *dlm)
278 return ret; 302 return ret;
279} 303}
280 304
305static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm)
306{
307 if (dlm->dlm_worker) {
308 flush_workqueue(dlm->dlm_worker);
309 destroy_workqueue(dlm->dlm_worker);
310 dlm->dlm_worker = NULL;
311 }
312}
313
281static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm) 314static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm)
282{ 315{
283 dlm_unregister_domain_handlers(dlm); 316 dlm_unregister_domain_handlers(dlm);
284 dlm_complete_thread(dlm); 317 dlm_complete_thread(dlm);
285 dlm_complete_recovery_thread(dlm); 318 dlm_complete_recovery_thread(dlm);
319 dlm_destroy_dlm_worker(dlm);
286 320
287 /* We've left the domain. Now we can take ourselves out of the 321 /* We've left the domain. Now we can take ourselves out of the
288 * list and allow the kref stuff to help us free the 322 * list and allow the kref stuff to help us free the
@@ -304,8 +338,8 @@ static void dlm_migrate_all_locks(struct dlm_ctxt *dlm)
304restart: 338restart:
305 spin_lock(&dlm->spinlock); 339 spin_lock(&dlm->spinlock);
306 for (i = 0; i < DLM_HASH_BUCKETS; i++) { 340 for (i = 0; i < DLM_HASH_BUCKETS; i++) {
307 while (!hlist_empty(&dlm->lockres_hash[i])) { 341 while (!hlist_empty(dlm_lockres_hash(dlm, i))) {
308 res = hlist_entry(dlm->lockres_hash[i].first, 342 res = hlist_entry(dlm_lockres_hash(dlm, i)->first,
309 struct dlm_lock_resource, hash_node); 343 struct dlm_lock_resource, hash_node);
310 /* need reference when manually grabbing lockres */ 344 /* need reference when manually grabbing lockres */
311 dlm_lockres_get(res); 345 dlm_lockres_get(res);
@@ -374,12 +408,13 @@ static void __dlm_print_nodes(struct dlm_ctxt *dlm)
374 408
375 assert_spin_locked(&dlm->spinlock); 409 assert_spin_locked(&dlm->spinlock);
376 410
377 mlog(ML_NOTICE, "Nodes in my domain (\"%s\"):\n", dlm->name); 411 printk(KERN_INFO "ocfs2_dlm: Nodes in domain (\"%s\"): ", dlm->name);
378 412
379 while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 413 while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES,
380 node + 1)) < O2NM_MAX_NODES) { 414 node + 1)) < O2NM_MAX_NODES) {
381 mlog(ML_NOTICE, " node %d\n", node); 415 printk("%d ", node);
382 } 416 }
417 printk("\n");
383} 418}
384 419
385static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data) 420static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data)
@@ -395,7 +430,7 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data)
395 430
396 node = exit_msg->node_idx; 431 node = exit_msg->node_idx;
397 432
398 mlog(0, "Node %u leaves domain %s\n", node, dlm->name); 433 printk(KERN_INFO "ocfs2_dlm: Node %u leaves domain %s\n", node, dlm->name);
399 434
400 spin_lock(&dlm->spinlock); 435 spin_lock(&dlm->spinlock);
401 clear_bit(node, dlm->domain_map); 436 clear_bit(node, dlm->domain_map);
@@ -644,6 +679,8 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data)
644 set_bit(assert->node_idx, dlm->domain_map); 679 set_bit(assert->node_idx, dlm->domain_map);
645 __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); 680 __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
646 681
682 printk(KERN_INFO "ocfs2_dlm: Node %u joins domain %s\n",
683 assert->node_idx, dlm->name);
647 __dlm_print_nodes(dlm); 684 __dlm_print_nodes(dlm);
648 685
649 /* notify anything attached to the heartbeat events */ 686 /* notify anything attached to the heartbeat events */
@@ -1126,6 +1163,13 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
1126 goto bail; 1163 goto bail;
1127 } 1164 }
1128 1165
1166 dlm->dlm_worker = create_singlethread_workqueue("dlm_wq");
1167 if (!dlm->dlm_worker) {
1168 status = -ENOMEM;
1169 mlog_errno(status);
1170 goto bail;
1171 }
1172
1129 do { 1173 do {
1130 unsigned int backoff; 1174 unsigned int backoff;
1131 status = dlm_try_to_join_domain(dlm); 1175 status = dlm_try_to_join_domain(dlm);
@@ -1166,6 +1210,7 @@ bail:
1166 dlm_unregister_domain_handlers(dlm); 1210 dlm_unregister_domain_handlers(dlm);
1167 dlm_complete_thread(dlm); 1211 dlm_complete_thread(dlm);
1168 dlm_complete_recovery_thread(dlm); 1212 dlm_complete_recovery_thread(dlm);
1213 dlm_destroy_dlm_worker(dlm);
1169 } 1214 }
1170 1215
1171 return status; 1216 return status;
@@ -1191,7 +1236,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
1191 goto leave; 1236 goto leave;
1192 } 1237 }
1193 1238
1194 dlm->lockres_hash = (struct hlist_head *) __get_free_page(GFP_KERNEL); 1239 dlm->lockres_hash = (struct hlist_head **)dlm_alloc_pagevec(DLM_HASH_PAGES);
1195 if (!dlm->lockres_hash) { 1240 if (!dlm->lockres_hash) {
1196 mlog_errno(-ENOMEM); 1241 mlog_errno(-ENOMEM);
1197 kfree(dlm->name); 1242 kfree(dlm->name);
@@ -1200,8 +1245,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
1200 goto leave; 1245 goto leave;
1201 } 1246 }
1202 1247
1203 for (i=0; i<DLM_HASH_BUCKETS; i++) 1248 for (i = 0; i < DLM_HASH_BUCKETS; i++)
1204 INIT_HLIST_HEAD(&dlm->lockres_hash[i]); 1249 INIT_HLIST_HEAD(dlm_lockres_hash(dlm, i));
1205 1250
1206 strcpy(dlm->name, domain); 1251 strcpy(dlm->name, domain);
1207 dlm->key = key; 1252 dlm->key = key;
@@ -1231,6 +1276,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
1231 1276
1232 dlm->dlm_thread_task = NULL; 1277 dlm->dlm_thread_task = NULL;
1233 dlm->dlm_reco_thread_task = NULL; 1278 dlm->dlm_reco_thread_task = NULL;
1279 dlm->dlm_worker = NULL;
1234 init_waitqueue_head(&dlm->dlm_thread_wq); 1280 init_waitqueue_head(&dlm->dlm_thread_wq);
1235 init_waitqueue_head(&dlm->dlm_reco_thread_wq); 1281 init_waitqueue_head(&dlm->dlm_reco_thread_wq);
1236 init_waitqueue_head(&dlm->reco.event); 1282 init_waitqueue_head(&dlm->reco.event);
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 7e88e24b3471..033ad1701232 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -116,7 +116,7 @@ static int dlmfs_file_open(struct inode *inode,
116 * doesn't make sense for LVB writes. */ 116 * doesn't make sense for LVB writes. */
117 file->f_flags &= ~O_APPEND; 117 file->f_flags &= ~O_APPEND;
118 118
119 fp = kmalloc(sizeof(*fp), GFP_KERNEL); 119 fp = kmalloc(sizeof(*fp), GFP_NOFS);
120 if (!fp) { 120 if (!fp) {
121 status = -ENOMEM; 121 status = -ENOMEM;
122 goto bail; 122 goto bail;
@@ -196,7 +196,7 @@ static ssize_t dlmfs_file_read(struct file *filp,
196 else 196 else
197 readlen = count - *ppos; 197 readlen = count - *ppos;
198 198
199 lvb_buf = kmalloc(readlen, GFP_KERNEL); 199 lvb_buf = kmalloc(readlen, GFP_NOFS);
200 if (!lvb_buf) 200 if (!lvb_buf)
201 return -ENOMEM; 201 return -ENOMEM;
202 202
@@ -240,7 +240,7 @@ static ssize_t dlmfs_file_write(struct file *filp,
240 else 240 else
241 writelen = count - *ppos; 241 writelen = count - *ppos;
242 242
243 lvb_buf = kmalloc(writelen, GFP_KERNEL); 243 lvb_buf = kmalloc(writelen, GFP_NOFS);
244 if (!lvb_buf) 244 if (!lvb_buf)
245 return -ENOMEM; 245 return -ENOMEM;
246 246
@@ -574,10 +574,10 @@ static struct inode_operations dlmfs_file_inode_operations = {
574 .getattr = simple_getattr, 574 .getattr = simple_getattr,
575}; 575};
576 576
577static struct super_block *dlmfs_get_sb(struct file_system_type *fs_type, 577static int dlmfs_get_sb(struct file_system_type *fs_type,
578 int flags, const char *dev_name, void *data) 578 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
579{ 579{
580 return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super); 580 return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt);
581} 581}
582 582
583static struct file_system_type dlmfs_fs_type = { 583static struct file_system_type dlmfs_fs_type = {
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 6fea28318d6d..5ca57ec650c7 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -53,7 +53,7 @@
53#define MLOG_MASK_PREFIX ML_DLM 53#define MLOG_MASK_PREFIX ML_DLM
54#include "cluster/masklog.h" 54#include "cluster/masklog.h"
55 55
56static spinlock_t dlm_cookie_lock = SPIN_LOCK_UNLOCKED; 56static DEFINE_SPINLOCK(dlm_cookie_lock);
57static u64 dlm_next_cookie = 1; 57static u64 dlm_next_cookie = 1;
58 58
59static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm, 59static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
@@ -201,6 +201,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
201 struct dlm_lock *lock, int flags) 201 struct dlm_lock *lock, int flags)
202{ 202{
203 enum dlm_status status = DLM_DENIED; 203 enum dlm_status status = DLM_DENIED;
204 int lockres_changed = 1;
204 205
205 mlog_entry("type=%d\n", lock->ml.type); 206 mlog_entry("type=%d\n", lock->ml.type);
206 mlog(0, "lockres %.*s, flags = 0x%x\n", res->lockname.len, 207 mlog(0, "lockres %.*s, flags = 0x%x\n", res->lockname.len,
@@ -226,8 +227,25 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
226 res->state &= ~DLM_LOCK_RES_IN_PROGRESS; 227 res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
227 lock->lock_pending = 0; 228 lock->lock_pending = 0;
228 if (status != DLM_NORMAL) { 229 if (status != DLM_NORMAL) {
229 if (status != DLM_NOTQUEUED) 230 if (status == DLM_RECOVERING &&
231 dlm_is_recovery_lock(res->lockname.name,
232 res->lockname.len)) {
233 /* recovery lock was mastered by dead node.
234 * we need to have calc_usage shoot down this
235 * lockres and completely remaster it. */
236 mlog(0, "%s: recovery lock was owned by "
237 "dead node %u, remaster it now.\n",
238 dlm->name, res->owner);
239 } else if (status != DLM_NOTQUEUED) {
240 /*
241 * DO NOT call calc_usage, as this would unhash
242 * the remote lockres before we ever get to use
243 * it. treat as if we never made any change to
244 * the lockres.
245 */
246 lockres_changed = 0;
230 dlm_error(status); 247 dlm_error(status);
248 }
231 dlm_revert_pending_lock(res, lock); 249 dlm_revert_pending_lock(res, lock);
232 dlm_lock_put(lock); 250 dlm_lock_put(lock);
233 } else if (dlm_is_recovery_lock(res->lockname.name, 251 } else if (dlm_is_recovery_lock(res->lockname.name,
@@ -239,12 +257,12 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
239 mlog(0, "%s: $RECOVERY lock for this node (%u) is " 257 mlog(0, "%s: $RECOVERY lock for this node (%u) is "
240 "mastered by %u; got lock, manually granting (no ast)\n", 258 "mastered by %u; got lock, manually granting (no ast)\n",
241 dlm->name, dlm->node_num, res->owner); 259 dlm->name, dlm->node_num, res->owner);
242 list_del_init(&lock->list); 260 list_move_tail(&lock->list, &res->granted);
243 list_add_tail(&lock->list, &res->granted);
244 } 261 }
245 spin_unlock(&res->spinlock); 262 spin_unlock(&res->spinlock);
246 263
247 dlm_lockres_calc_usage(dlm, res); 264 if (lockres_changed)
265 dlm_lockres_calc_usage(dlm, res);
248 266
249 wake_up(&res->wq); 267 wake_up(&res->wq);
250 return status; 268 return status;
@@ -281,6 +299,14 @@ static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
281 if (tmpret >= 0) { 299 if (tmpret >= 0) {
282 // successfully sent and received 300 // successfully sent and received
283 ret = status; // this is already a dlm_status 301 ret = status; // this is already a dlm_status
302 if (ret == DLM_REJECTED) {
303 mlog(ML_ERROR, "%s:%.*s: BUG. this is a stale lockres "
304 "no longer owned by %u. that node is coming back "
305 "up currently.\n", dlm->name, create.namelen,
306 create.name, res->owner);
307 dlm_print_one_lock_resource(res);
308 BUG();
309 }
284 } else { 310 } else {
285 mlog_errno(tmpret); 311 mlog_errno(tmpret);
286 if (dlm_is_host_down(tmpret)) { 312 if (dlm_is_host_down(tmpret)) {
@@ -382,13 +408,13 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
382 struct dlm_lock *lock; 408 struct dlm_lock *lock;
383 int kernel_allocated = 0; 409 int kernel_allocated = 0;
384 410
385 lock = kcalloc(1, sizeof(*lock), GFP_KERNEL); 411 lock = kcalloc(1, sizeof(*lock), GFP_NOFS);
386 if (!lock) 412 if (!lock)
387 return NULL; 413 return NULL;
388 414
389 if (!lksb) { 415 if (!lksb) {
390 /* zero memory only if kernel-allocated */ 416 /* zero memory only if kernel-allocated */
391 lksb = kcalloc(1, sizeof(*lksb), GFP_KERNEL); 417 lksb = kcalloc(1, sizeof(*lksb), GFP_NOFS);
392 if (!lksb) { 418 if (!lksb) {
393 kfree(lock); 419 kfree(lock);
394 return NULL; 420 return NULL;
@@ -429,11 +455,16 @@ int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data)
429 if (!dlm_grab(dlm)) 455 if (!dlm_grab(dlm))
430 return DLM_REJECTED; 456 return DLM_REJECTED;
431 457
432 mlog_bug_on_msg(!dlm_domain_fully_joined(dlm),
433 "Domain %s not fully joined!\n", dlm->name);
434
435 name = create->name; 458 name = create->name;
436 namelen = create->namelen; 459 namelen = create->namelen;
460 status = DLM_REJECTED;
461 if (!dlm_domain_fully_joined(dlm)) {
462 mlog(ML_ERROR, "Domain %s not fully joined, but node %u is "
463 "sending a create_lock message for lock %.*s!\n",
464 dlm->name, create->node_idx, namelen, name);
465 dlm_error(status);
466 goto leave;
467 }
437 468
438 status = DLM_IVBUFLEN; 469 status = DLM_IVBUFLEN;
439 if (namelen > DLM_LOCKID_NAME_MAX) { 470 if (namelen > DLM_LOCKID_NAME_MAX) {
@@ -669,18 +700,22 @@ retry_lock:
669 msleep(100); 700 msleep(100);
670 /* no waiting for dlm_reco_thread */ 701 /* no waiting for dlm_reco_thread */
671 if (recovery) { 702 if (recovery) {
672 if (status == DLM_RECOVERING) { 703 if (status != DLM_RECOVERING)
673 mlog(0, "%s: got RECOVERING " 704 goto retry_lock;
674 "for $REOCVERY lock, master " 705
675 "was %u\n", dlm->name, 706 mlog(0, "%s: got RECOVERING "
676 res->owner); 707 "for $RECOVERY lock, master "
677 dlm_wait_for_node_death(dlm, res->owner, 708 "was %u\n", dlm->name,
678 DLM_NODE_DEATH_WAIT_MAX); 709 res->owner);
679 } 710 /* wait to see the node go down, then
711 * drop down and allow the lockres to
712 * get cleaned up. need to remaster. */
713 dlm_wait_for_node_death(dlm, res->owner,
714 DLM_NODE_DEATH_WAIT_MAX);
680 } else { 715 } else {
681 dlm_wait_for_recovery(dlm); 716 dlm_wait_for_recovery(dlm);
717 goto retry_lock;
682 } 718 }
683 goto retry_lock;
684 } 719 }
685 720
686 if (status != DLM_NORMAL) { 721 if (status != DLM_NORMAL) {
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 940be4c13b1f..1b8346dd0572 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -47,7 +47,6 @@
47 47
48#include "dlmapi.h" 48#include "dlmapi.h"
49#include "dlmcommon.h" 49#include "dlmcommon.h"
50#include "dlmdebug.h"
51#include "dlmdomain.h" 50#include "dlmdomain.h"
52 51
53#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_MASTER) 52#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_MASTER)
@@ -74,6 +73,7 @@ struct dlm_master_list_entry
74 wait_queue_head_t wq; 73 wait_queue_head_t wq;
75 atomic_t woken; 74 atomic_t woken;
76 struct kref mle_refs; 75 struct kref mle_refs;
76 int inuse;
77 unsigned long maybe_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; 77 unsigned long maybe_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
78 unsigned long vote_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; 78 unsigned long vote_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
79 unsigned long response_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; 79 unsigned long response_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
@@ -127,18 +127,30 @@ static inline int dlm_mle_equal(struct dlm_ctxt *dlm,
127 return 1; 127 return 1;
128} 128}
129 129
130#if 0 130#define dlm_print_nodemap(m) _dlm_print_nodemap(m,#m)
131/* Code here is included but defined out as it aids debugging */ 131static void _dlm_print_nodemap(unsigned long *map, const char *mapname)
132{
133 int i;
134 printk("%s=[ ", mapname);
135 for (i=0; i<O2NM_MAX_NODES; i++)
136 if (test_bit(i, map))
137 printk("%d ", i);
138 printk("]");
139}
132 140
133void dlm_print_one_mle(struct dlm_master_list_entry *mle) 141static void dlm_print_one_mle(struct dlm_master_list_entry *mle)
134{ 142{
135 int i = 0, refs; 143 int refs;
136 char *type; 144 char *type;
137 char attached; 145 char attached;
138 u8 master; 146 u8 master;
139 unsigned int namelen; 147 unsigned int namelen;
140 const char *name; 148 const char *name;
141 struct kref *k; 149 struct kref *k;
150 unsigned long *maybe = mle->maybe_map,
151 *vote = mle->vote_map,
152 *resp = mle->response_map,
153 *node = mle->node_map;
142 154
143 k = &mle->mle_refs; 155 k = &mle->mle_refs;
144 if (mle->type == DLM_MLE_BLOCK) 156 if (mle->type == DLM_MLE_BLOCK)
@@ -159,18 +171,29 @@ void dlm_print_one_mle(struct dlm_master_list_entry *mle)
159 name = mle->u.res->lockname.name; 171 name = mle->u.res->lockname.name;
160 } 172 }
161 173
162 mlog(ML_NOTICE, " #%3d: %3s %3d %3u %3u %c (%d)%.*s\n", 174 mlog(ML_NOTICE, "%.*s: %3s refs=%3d mas=%3u new=%3u evt=%c inuse=%d ",
163 i, type, refs, master, mle->new_master, attached, 175 namelen, name, type, refs, master, mle->new_master, attached,
164 namelen, namelen, name); 176 mle->inuse);
177 dlm_print_nodemap(maybe);
178 printk(", ");
179 dlm_print_nodemap(vote);
180 printk(", ");
181 dlm_print_nodemap(resp);
182 printk(", ");
183 dlm_print_nodemap(node);
184 printk(", ");
185 printk("\n");
165} 186}
166 187
188#if 0
189/* Code here is included but defined out as it aids debugging */
190
167static void dlm_dump_mles(struct dlm_ctxt *dlm) 191static void dlm_dump_mles(struct dlm_ctxt *dlm)
168{ 192{
169 struct dlm_master_list_entry *mle; 193 struct dlm_master_list_entry *mle;
170 struct list_head *iter; 194 struct list_head *iter;
171 195
172 mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name); 196 mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name);
173 mlog(ML_NOTICE, " ####: type refs owner new events? lockname nodemap votemap respmap maybemap\n");
174 spin_lock(&dlm->master_lock); 197 spin_lock(&dlm->master_lock);
175 list_for_each(iter, &dlm->master_list) { 198 list_for_each(iter, &dlm->master_list) {
176 mle = list_entry(iter, struct dlm_master_list_entry, list); 199 mle = list_entry(iter, struct dlm_master_list_entry, list);
@@ -314,6 +337,31 @@ static inline void dlm_mle_detach_hb_events(struct dlm_ctxt *dlm,
314 spin_unlock(&dlm->spinlock); 337 spin_unlock(&dlm->spinlock);
315} 338}
316 339
340static void dlm_get_mle_inuse(struct dlm_master_list_entry *mle)
341{
342 struct dlm_ctxt *dlm;
343 dlm = mle->dlm;
344
345 assert_spin_locked(&dlm->spinlock);
346 assert_spin_locked(&dlm->master_lock);
347 mle->inuse++;
348 kref_get(&mle->mle_refs);
349}
350
351static void dlm_put_mle_inuse(struct dlm_master_list_entry *mle)
352{
353 struct dlm_ctxt *dlm;
354 dlm = mle->dlm;
355
356 spin_lock(&dlm->spinlock);
357 spin_lock(&dlm->master_lock);
358 mle->inuse--;
359 __dlm_put_mle(mle);
360 spin_unlock(&dlm->master_lock);
361 spin_unlock(&dlm->spinlock);
362
363}
364
317/* remove from list and free */ 365/* remove from list and free */
318static void __dlm_put_mle(struct dlm_master_list_entry *mle) 366static void __dlm_put_mle(struct dlm_master_list_entry *mle)
319{ 367{
@@ -322,9 +370,14 @@ static void __dlm_put_mle(struct dlm_master_list_entry *mle)
322 370
323 assert_spin_locked(&dlm->spinlock); 371 assert_spin_locked(&dlm->spinlock);
324 assert_spin_locked(&dlm->master_lock); 372 assert_spin_locked(&dlm->master_lock);
325 BUG_ON(!atomic_read(&mle->mle_refs.refcount)); 373 if (!atomic_read(&mle->mle_refs.refcount)) {
326 374 /* this may or may not crash, but who cares.
327 kref_put(&mle->mle_refs, dlm_mle_release); 375 * it's a BUG. */
376 mlog(ML_ERROR, "bad mle: %p\n", mle);
377 dlm_print_one_mle(mle);
378 BUG();
379 } else
380 kref_put(&mle->mle_refs, dlm_mle_release);
328} 381}
329 382
330 383
@@ -367,6 +420,7 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle,
367 memset(mle->response_map, 0, sizeof(mle->response_map)); 420 memset(mle->response_map, 0, sizeof(mle->response_map));
368 mle->master = O2NM_MAX_NODES; 421 mle->master = O2NM_MAX_NODES;
369 mle->new_master = O2NM_MAX_NODES; 422 mle->new_master = O2NM_MAX_NODES;
423 mle->inuse = 0;
370 424
371 if (mle->type == DLM_MLE_MASTER) { 425 if (mle->type == DLM_MLE_MASTER) {
372 BUG_ON(!res); 426 BUG_ON(!res);
@@ -564,6 +618,28 @@ static void dlm_lockres_release(struct kref *kref)
564 mlog(0, "destroying lockres %.*s\n", res->lockname.len, 618 mlog(0, "destroying lockres %.*s\n", res->lockname.len,
565 res->lockname.name); 619 res->lockname.name);
566 620
621 if (!hlist_unhashed(&res->hash_node) ||
622 !list_empty(&res->granted) ||
623 !list_empty(&res->converting) ||
624 !list_empty(&res->blocked) ||
625 !list_empty(&res->dirty) ||
626 !list_empty(&res->recovering) ||
627 !list_empty(&res->purge)) {
628 mlog(ML_ERROR,
629 "Going to BUG for resource %.*s."
630 " We're on a list! [%c%c%c%c%c%c%c]\n",
631 res->lockname.len, res->lockname.name,
632 !hlist_unhashed(&res->hash_node) ? 'H' : ' ',
633 !list_empty(&res->granted) ? 'G' : ' ',
634 !list_empty(&res->converting) ? 'C' : ' ',
635 !list_empty(&res->blocked) ? 'B' : ' ',
636 !list_empty(&res->dirty) ? 'D' : ' ',
637 !list_empty(&res->recovering) ? 'R' : ' ',
638 !list_empty(&res->purge) ? 'P' : ' ');
639
640 dlm_print_one_lock_resource(res);
641 }
642
567 /* By the time we're ready to blow this guy away, we shouldn't 643 /* By the time we're ready to blow this guy away, we shouldn't
568 * be on any lists. */ 644 * be on any lists. */
569 BUG_ON(!hlist_unhashed(&res->hash_node)); 645 BUG_ON(!hlist_unhashed(&res->hash_node));
@@ -579,11 +655,6 @@ static void dlm_lockres_release(struct kref *kref)
579 kfree(res); 655 kfree(res);
580} 656}
581 657
582void dlm_lockres_get(struct dlm_lock_resource *res)
583{
584 kref_get(&res->refs);
585}
586
587void dlm_lockres_put(struct dlm_lock_resource *res) 658void dlm_lockres_put(struct dlm_lock_resource *res)
588{ 659{
589 kref_put(&res->refs, dlm_lockres_release); 660 kref_put(&res->refs, dlm_lockres_release);
@@ -603,7 +674,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
603 memcpy(qname, name, namelen); 674 memcpy(qname, name, namelen);
604 675
605 res->lockname.len = namelen; 676 res->lockname.len = namelen;
606 res->lockname.hash = full_name_hash(name, namelen); 677 res->lockname.hash = dlm_lockid_hash(name, namelen);
607 678
608 init_waitqueue_head(&res->wq); 679 init_waitqueue_head(&res->wq);
609 spin_lock_init(&res->spinlock); 680 spin_lock_init(&res->spinlock);
@@ -637,11 +708,11 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
637{ 708{
638 struct dlm_lock_resource *res; 709 struct dlm_lock_resource *res;
639 710
640 res = kmalloc(sizeof(struct dlm_lock_resource), GFP_KERNEL); 711 res = kmalloc(sizeof(struct dlm_lock_resource), GFP_NOFS);
641 if (!res) 712 if (!res)
642 return NULL; 713 return NULL;
643 714
644 res->lockname.name = kmalloc(namelen, GFP_KERNEL); 715 res->lockname.name = kmalloc(namelen, GFP_NOFS);
645 if (!res->lockname.name) { 716 if (!res->lockname.name) {
646 kfree(res); 717 kfree(res);
647 return NULL; 718 return NULL;
@@ -677,19 +748,20 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
677 int blocked = 0; 748 int blocked = 0;
678 int ret, nodenum; 749 int ret, nodenum;
679 struct dlm_node_iter iter; 750 struct dlm_node_iter iter;
680 unsigned int namelen; 751 unsigned int namelen, hash;
681 int tries = 0; 752 int tries = 0;
682 int bit, wait_on_recovery = 0; 753 int bit, wait_on_recovery = 0;
683 754
684 BUG_ON(!lockid); 755 BUG_ON(!lockid);
685 756
686 namelen = strlen(lockid); 757 namelen = strlen(lockid);
758 hash = dlm_lockid_hash(lockid, namelen);
687 759
688 mlog(0, "get lockres %s (len %d)\n", lockid, namelen); 760 mlog(0, "get lockres %s (len %d)\n", lockid, namelen);
689 761
690lookup: 762lookup:
691 spin_lock(&dlm->spinlock); 763 spin_lock(&dlm->spinlock);
692 tmpres = __dlm_lookup_lockres(dlm, lockid, namelen); 764 tmpres = __dlm_lookup_lockres(dlm, lockid, namelen, hash);
693 if (tmpres) { 765 if (tmpres) {
694 spin_unlock(&dlm->spinlock); 766 spin_unlock(&dlm->spinlock);
695 mlog(0, "found in hash!\n"); 767 mlog(0, "found in hash!\n");
@@ -704,7 +776,7 @@ lookup:
704 mlog(0, "allocating a new resource\n"); 776 mlog(0, "allocating a new resource\n");
705 /* nothing found and we need to allocate one. */ 777 /* nothing found and we need to allocate one. */
706 alloc_mle = (struct dlm_master_list_entry *) 778 alloc_mle = (struct dlm_master_list_entry *)
707 kmem_cache_alloc(dlm_mle_cache, GFP_KERNEL); 779 kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
708 if (!alloc_mle) 780 if (!alloc_mle)
709 goto leave; 781 goto leave;
710 res = dlm_new_lockres(dlm, lockid, namelen); 782 res = dlm_new_lockres(dlm, lockid, namelen);
@@ -790,10 +862,11 @@ lookup:
790 * if so, the creator of the BLOCK may try to put the last 862 * if so, the creator of the BLOCK may try to put the last
791 * ref at this time in the assert master handler, so we 863 * ref at this time in the assert master handler, so we
792 * need an extra one to keep from a bad ptr deref. */ 864 * need an extra one to keep from a bad ptr deref. */
793 dlm_get_mle(mle); 865 dlm_get_mle_inuse(mle);
794 spin_unlock(&dlm->master_lock); 866 spin_unlock(&dlm->master_lock);
795 spin_unlock(&dlm->spinlock); 867 spin_unlock(&dlm->spinlock);
796 868
869redo_request:
797 while (wait_on_recovery) { 870 while (wait_on_recovery) {
798 /* any cluster changes that occurred after dropping the 871 /* any cluster changes that occurred after dropping the
799 * dlm spinlock would be detectable be a change on the mle, 872 * dlm spinlock would be detectable be a change on the mle,
@@ -812,7 +885,7 @@ lookup:
812 } 885 }
813 886
814 dlm_kick_recovery_thread(dlm); 887 dlm_kick_recovery_thread(dlm);
815 msleep(100); 888 msleep(1000);
816 dlm_wait_for_recovery(dlm); 889 dlm_wait_for_recovery(dlm);
817 890
818 spin_lock(&dlm->spinlock); 891 spin_lock(&dlm->spinlock);
@@ -825,13 +898,15 @@ lookup:
825 } else 898 } else
826 wait_on_recovery = 0; 899 wait_on_recovery = 0;
827 spin_unlock(&dlm->spinlock); 900 spin_unlock(&dlm->spinlock);
901
902 if (wait_on_recovery)
903 dlm_wait_for_node_recovery(dlm, bit, 10000);
828 } 904 }
829 905
830 /* must wait for lock to be mastered elsewhere */ 906 /* must wait for lock to be mastered elsewhere */
831 if (blocked) 907 if (blocked)
832 goto wait; 908 goto wait;
833 909
834redo_request:
835 ret = -EINVAL; 910 ret = -EINVAL;
836 dlm_node_iter_init(mle->vote_map, &iter); 911 dlm_node_iter_init(mle->vote_map, &iter);
837 while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { 912 while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
@@ -856,6 +931,7 @@ wait:
856 /* keep going until the response map includes all nodes */ 931 /* keep going until the response map includes all nodes */
857 ret = dlm_wait_for_lock_mastery(dlm, res, mle, &blocked); 932 ret = dlm_wait_for_lock_mastery(dlm, res, mle, &blocked);
858 if (ret < 0) { 933 if (ret < 0) {
934 wait_on_recovery = 1;
859 mlog(0, "%s:%.*s: node map changed, redo the " 935 mlog(0, "%s:%.*s: node map changed, redo the "
860 "master request now, blocked=%d\n", 936 "master request now, blocked=%d\n",
861 dlm->name, res->lockname.len, 937 dlm->name, res->lockname.len,
@@ -866,7 +942,7 @@ wait:
866 dlm->name, res->lockname.len, 942 dlm->name, res->lockname.len,
867 res->lockname.name, blocked); 943 res->lockname.name, blocked);
868 dlm_print_one_lock_resource(res); 944 dlm_print_one_lock_resource(res);
869 /* dlm_print_one_mle(mle); */ 945 dlm_print_one_mle(mle);
870 tries = 0; 946 tries = 0;
871 } 947 }
872 goto redo_request; 948 goto redo_request;
@@ -880,7 +956,7 @@ wait:
880 dlm_mle_detach_hb_events(dlm, mle); 956 dlm_mle_detach_hb_events(dlm, mle);
881 dlm_put_mle(mle); 957 dlm_put_mle(mle);
882 /* put the extra ref */ 958 /* put the extra ref */
883 dlm_put_mle(mle); 959 dlm_put_mle_inuse(mle);
884 960
885wake_waiters: 961wake_waiters:
886 spin_lock(&res->spinlock); 962 spin_lock(&res->spinlock);
@@ -921,12 +997,14 @@ recheck:
921 spin_unlock(&res->spinlock); 997 spin_unlock(&res->spinlock);
922 /* this will cause the master to re-assert across 998 /* this will cause the master to re-assert across
923 * the whole cluster, freeing up mles */ 999 * the whole cluster, freeing up mles */
924 ret = dlm_do_master_request(mle, res->owner); 1000 if (res->owner != dlm->node_num) {
925 if (ret < 0) { 1001 ret = dlm_do_master_request(mle, res->owner);
926 /* give recovery a chance to run */ 1002 if (ret < 0) {
927 mlog(ML_ERROR, "link to %u went down?: %d\n", res->owner, ret); 1003 /* give recovery a chance to run */
928 msleep(500); 1004 mlog(ML_ERROR, "link to %u went down?: %d\n", res->owner, ret);
929 goto recheck; 1005 msleep(500);
1006 goto recheck;
1007 }
930 } 1008 }
931 ret = 0; 1009 ret = 0;
932 goto leave; 1010 goto leave;
@@ -962,6 +1040,12 @@ recheck:
962 "rechecking now\n", dlm->name, res->lockname.len, 1040 "rechecking now\n", dlm->name, res->lockname.len,
963 res->lockname.name); 1041 res->lockname.name);
964 goto recheck; 1042 goto recheck;
1043 } else {
1044 if (!voting_done) {
1045 mlog(0, "map not changed and voting not done "
1046 "for %s:%.*s\n", dlm->name, res->lockname.len,
1047 res->lockname.name);
1048 }
965 } 1049 }
966 1050
967 if (m != O2NM_MAX_NODES) { 1051 if (m != O2NM_MAX_NODES) {
@@ -1129,18 +1213,6 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
1129 set_bit(node, mle->vote_map); 1213 set_bit(node, mle->vote_map);
1130 } else { 1214 } else {
1131 mlog(ML_ERROR, "node down! %d\n", node); 1215 mlog(ML_ERROR, "node down! %d\n", node);
1132
1133 /* if the node wasn't involved in mastery skip it,
1134 * but clear it out from the maps so that it will
1135 * not affect mastery of this lockres */
1136 clear_bit(node, mle->response_map);
1137 clear_bit(node, mle->vote_map);
1138 if (!test_bit(node, mle->maybe_map))
1139 goto next;
1140
1141 /* if we're already blocked on lock mastery, and the
1142 * dead node wasn't the expected master, or there is
1143 * another node in the maybe_map, keep waiting */
1144 if (blocked) { 1216 if (blocked) {
1145 int lowest = find_next_bit(mle->maybe_map, 1217 int lowest = find_next_bit(mle->maybe_map,
1146 O2NM_MAX_NODES, 0); 1218 O2NM_MAX_NODES, 0);
@@ -1148,54 +1220,53 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
1148 /* act like it was never there */ 1220 /* act like it was never there */
1149 clear_bit(node, mle->maybe_map); 1221 clear_bit(node, mle->maybe_map);
1150 1222
1151 if (node != lowest) 1223 if (node == lowest) {
1152 goto next; 1224 mlog(0, "expected master %u died"
1153 1225 " while this node was blocked "
1154 mlog(ML_ERROR, "expected master %u died while " 1226 "waiting on it!\n", node);
1155 "this node was blocked waiting on it!\n", 1227 lowest = find_next_bit(mle->maybe_map,
1156 node); 1228 O2NM_MAX_NODES,
1157 lowest = find_next_bit(mle->maybe_map, 1229 lowest+1);
1158 O2NM_MAX_NODES, 1230 if (lowest < O2NM_MAX_NODES) {
1159 lowest+1); 1231 mlog(0, "%s:%.*s:still "
1160 if (lowest < O2NM_MAX_NODES) { 1232 "blocked. waiting on %u "
1161 mlog(0, "still blocked. waiting " 1233 "now\n", dlm->name,
1162 "on %u now\n", lowest); 1234 res->lockname.len,
1163 goto next; 1235 res->lockname.name,
1236 lowest);
1237 } else {
1238 /* mle is an MLE_BLOCK, but
1239 * there is now nothing left to
1240 * block on. we need to return
1241 * all the way back out and try
1242 * again with an MLE_MASTER.
1243 * dlm_do_local_recovery_cleanup
1244 * has already run, so the mle
1245 * refcount is ok */
1246 mlog(0, "%s:%.*s: no "
1247 "longer blocking. try to "
1248 "master this here\n",
1249 dlm->name,
1250 res->lockname.len,
1251 res->lockname.name);
1252 mle->type = DLM_MLE_MASTER;
1253 mle->u.res = res;
1254 }
1164 } 1255 }
1165
1166 /* mle is an MLE_BLOCK, but there is now
1167 * nothing left to block on. we need to return
1168 * all the way back out and try again with
1169 * an MLE_MASTER. dlm_do_local_recovery_cleanup
1170 * has already run, so the mle refcount is ok */
1171 mlog(0, "no longer blocking. we can "
1172 "try to master this here\n");
1173 mle->type = DLM_MLE_MASTER;
1174 memset(mle->maybe_map, 0,
1175 sizeof(mle->maybe_map));
1176 memset(mle->response_map, 0,
1177 sizeof(mle->maybe_map));
1178 memcpy(mle->vote_map, mle->node_map,
1179 sizeof(mle->node_map));
1180 mle->u.res = res;
1181 set_bit(dlm->node_num, mle->maybe_map);
1182
1183 ret = -EAGAIN;
1184 goto next;
1185 } 1256 }
1186 1257
1187 clear_bit(node, mle->maybe_map); 1258 /* now blank out everything, as if we had never
1188 if (node > dlm->node_num) 1259 * contacted anyone */
1189 goto next; 1260 memset(mle->maybe_map, 0, sizeof(mle->maybe_map));
1190 1261 memset(mle->response_map, 0, sizeof(mle->response_map));
1191 mlog(0, "dead node in map!\n"); 1262 /* reset the vote_map to the current node_map */
1192 /* yuck. go back and re-contact all nodes 1263 memcpy(mle->vote_map, mle->node_map,
1193 * in the vote_map, removing this node. */ 1264 sizeof(mle->node_map));
1194 memset(mle->response_map, 0, 1265 /* put myself into the maybe map */
1195 sizeof(mle->response_map)); 1266 if (mle->type != DLM_MLE_BLOCK)
1267 set_bit(dlm->node_num, mle->maybe_map);
1196 } 1268 }
1197 ret = -EAGAIN; 1269 ret = -EAGAIN;
1198next:
1199 node = dlm_bitmap_diff_iter_next(&bdi, &sc); 1270 node = dlm_bitmap_diff_iter_next(&bdi, &sc);
1200 } 1271 }
1201 return ret; 1272 return ret;
@@ -1316,7 +1387,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data)
1316 struct dlm_master_request *request = (struct dlm_master_request *) msg->buf; 1387 struct dlm_master_request *request = (struct dlm_master_request *) msg->buf;
1317 struct dlm_master_list_entry *mle = NULL, *tmpmle = NULL; 1388 struct dlm_master_list_entry *mle = NULL, *tmpmle = NULL;
1318 char *name; 1389 char *name;
1319 unsigned int namelen; 1390 unsigned int namelen, hash;
1320 int found, ret; 1391 int found, ret;
1321 int set_maybe; 1392 int set_maybe;
1322 int dispatch_assert = 0; 1393 int dispatch_assert = 0;
@@ -1331,6 +1402,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data)
1331 1402
1332 name = request->name; 1403 name = request->name;
1333 namelen = request->namelen; 1404 namelen = request->namelen;
1405 hash = dlm_lockid_hash(name, namelen);
1334 1406
1335 if (namelen > DLM_LOCKID_NAME_MAX) { 1407 if (namelen > DLM_LOCKID_NAME_MAX) {
1336 response = DLM_IVBUFLEN; 1408 response = DLM_IVBUFLEN;
@@ -1339,7 +1411,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data)
1339 1411
1340way_up_top: 1412way_up_top:
1341 spin_lock(&dlm->spinlock); 1413 spin_lock(&dlm->spinlock);
1342 res = __dlm_lookup_lockres(dlm, name, namelen); 1414 res = __dlm_lookup_lockres(dlm, name, namelen, hash);
1343 if (res) { 1415 if (res) {
1344 spin_unlock(&dlm->spinlock); 1416 spin_unlock(&dlm->spinlock);
1345 1417
@@ -1459,21 +1531,18 @@ way_up_top:
1459 spin_unlock(&dlm->spinlock); 1531 spin_unlock(&dlm->spinlock);
1460 1532
1461 mle = (struct dlm_master_list_entry *) 1533 mle = (struct dlm_master_list_entry *)
1462 kmem_cache_alloc(dlm_mle_cache, GFP_KERNEL); 1534 kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
1463 if (!mle) { 1535 if (!mle) {
1464 response = DLM_MASTER_RESP_ERROR; 1536 response = DLM_MASTER_RESP_ERROR;
1465 mlog_errno(-ENOMEM); 1537 mlog_errno(-ENOMEM);
1466 goto send_response; 1538 goto send_response;
1467 } 1539 }
1468 spin_lock(&dlm->spinlock);
1469 dlm_init_mle(mle, DLM_MLE_BLOCK, dlm, NULL,
1470 name, namelen);
1471 spin_unlock(&dlm->spinlock);
1472 goto way_up_top; 1540 goto way_up_top;
1473 } 1541 }
1474 1542
1475 // mlog(0, "this is second time thru, already allocated, " 1543 // mlog(0, "this is second time thru, already allocated, "
1476 // "add the block.\n"); 1544 // "add the block.\n");
1545 dlm_init_mle(mle, DLM_MLE_BLOCK, dlm, NULL, name, namelen);
1477 set_bit(request->node_idx, mle->maybe_map); 1546 set_bit(request->node_idx, mle->maybe_map);
1478 list_add(&mle->list, &dlm->master_list); 1547 list_add(&mle->list, &dlm->master_list);
1479 response = DLM_MASTER_RESP_NO; 1548 response = DLM_MASTER_RESP_NO;
@@ -1556,6 +1625,8 @@ again:
1556 dlm_node_iter_init(nodemap, &iter); 1625 dlm_node_iter_init(nodemap, &iter);
1557 while ((to = dlm_node_iter_next(&iter)) >= 0) { 1626 while ((to = dlm_node_iter_next(&iter)) >= 0) {
1558 int r = 0; 1627 int r = 0;
1628 struct dlm_master_list_entry *mle = NULL;
1629
1559 mlog(0, "sending assert master to %d (%.*s)\n", to, 1630 mlog(0, "sending assert master to %d (%.*s)\n", to,
1560 namelen, lockname); 1631 namelen, lockname);
1561 memset(&assert, 0, sizeof(assert)); 1632 memset(&assert, 0, sizeof(assert));
@@ -1567,20 +1638,28 @@ again:
1567 tmpret = o2net_send_message(DLM_ASSERT_MASTER_MSG, dlm->key, 1638 tmpret = o2net_send_message(DLM_ASSERT_MASTER_MSG, dlm->key,
1568 &assert, sizeof(assert), to, &r); 1639 &assert, sizeof(assert), to, &r);
1569 if (tmpret < 0) { 1640 if (tmpret < 0) {
1570 mlog(ML_ERROR, "assert_master returned %d!\n", tmpret); 1641 mlog(0, "assert_master returned %d!\n", tmpret);
1571 if (!dlm_is_host_down(tmpret)) { 1642 if (!dlm_is_host_down(tmpret)) {
1572 mlog(ML_ERROR, "unhandled error!\n"); 1643 mlog(ML_ERROR, "unhandled error=%d!\n", tmpret);
1573 BUG(); 1644 BUG();
1574 } 1645 }
1575 /* a node died. finish out the rest of the nodes. */ 1646 /* a node died. finish out the rest of the nodes. */
1576 mlog(ML_ERROR, "link to %d went down!\n", to); 1647 mlog(0, "link to %d went down!\n", to);
1577 /* any nonzero status return will do */ 1648 /* any nonzero status return will do */
1578 ret = tmpret; 1649 ret = tmpret;
1579 } else if (r < 0) { 1650 } else if (r < 0) {
1580 /* ok, something horribly messed. kill thyself. */ 1651 /* ok, something horribly messed. kill thyself. */
1581 mlog(ML_ERROR,"during assert master of %.*s to %u, " 1652 mlog(ML_ERROR,"during assert master of %.*s to %u, "
1582 "got %d.\n", namelen, lockname, to, r); 1653 "got %d.\n", namelen, lockname, to, r);
1583 dlm_dump_lock_resources(dlm); 1654 spin_lock(&dlm->spinlock);
1655 spin_lock(&dlm->master_lock);
1656 if (dlm_find_mle(dlm, &mle, (char *)lockname,
1657 namelen)) {
1658 dlm_print_one_mle(mle);
1659 __dlm_put_mle(mle);
1660 }
1661 spin_unlock(&dlm->master_lock);
1662 spin_unlock(&dlm->spinlock);
1584 BUG(); 1663 BUG();
1585 } else if (r == EAGAIN) { 1664 } else if (r == EAGAIN) {
1586 mlog(0, "%.*s: node %u create mles on other " 1665 mlog(0, "%.*s: node %u create mles on other "
@@ -1612,7 +1691,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data)
1612 struct dlm_assert_master *assert = (struct dlm_assert_master *)msg->buf; 1691 struct dlm_assert_master *assert = (struct dlm_assert_master *)msg->buf;
1613 struct dlm_lock_resource *res = NULL; 1692 struct dlm_lock_resource *res = NULL;
1614 char *name; 1693 char *name;
1615 unsigned int namelen; 1694 unsigned int namelen, hash;
1616 u32 flags; 1695 u32 flags;
1617 int master_request = 0; 1696 int master_request = 0;
1618 int ret = 0; 1697 int ret = 0;
@@ -1622,6 +1701,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data)
1622 1701
1623 name = assert->name; 1702 name = assert->name;
1624 namelen = assert->namelen; 1703 namelen = assert->namelen;
1704 hash = dlm_lockid_hash(name, namelen);
1625 flags = be32_to_cpu(assert->flags); 1705 flags = be32_to_cpu(assert->flags);
1626 1706
1627 if (namelen > DLM_LOCKID_NAME_MAX) { 1707 if (namelen > DLM_LOCKID_NAME_MAX) {
@@ -1646,7 +1726,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data)
1646 if (bit >= O2NM_MAX_NODES) { 1726 if (bit >= O2NM_MAX_NODES) {
1647 /* not necessarily an error, though less likely. 1727 /* not necessarily an error, though less likely.
1648 * could be master just re-asserting. */ 1728 * could be master just re-asserting. */
1649 mlog(ML_ERROR, "no bits set in the maybe_map, but %u " 1729 mlog(0, "no bits set in the maybe_map, but %u "
1650 "is asserting! (%.*s)\n", assert->node_idx, 1730 "is asserting! (%.*s)\n", assert->node_idx,
1651 namelen, name); 1731 namelen, name);
1652 } else if (bit != assert->node_idx) { 1732 } else if (bit != assert->node_idx) {
@@ -1658,19 +1738,36 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data)
1658 * number winning the mastery will respond 1738 * number winning the mastery will respond
1659 * YES to mastery requests, but this node 1739 * YES to mastery requests, but this node
1660 * had no way of knowing. let it pass. */ 1740 * had no way of knowing. let it pass. */
1661 mlog(ML_ERROR, "%u is the lowest node, " 1741 mlog(0, "%u is the lowest node, "
1662 "%u is asserting. (%.*s) %u must " 1742 "%u is asserting. (%.*s) %u must "
1663 "have begun after %u won.\n", bit, 1743 "have begun after %u won.\n", bit,
1664 assert->node_idx, namelen, name, bit, 1744 assert->node_idx, namelen, name, bit,
1665 assert->node_idx); 1745 assert->node_idx);
1666 } 1746 }
1667 } 1747 }
1748 if (mle->type == DLM_MLE_MIGRATION) {
1749 if (flags & DLM_ASSERT_MASTER_MLE_CLEANUP) {
1750 mlog(0, "%s:%.*s: got cleanup assert"
1751 " from %u for migration\n",
1752 dlm->name, namelen, name,
1753 assert->node_idx);
1754 } else if (!(flags & DLM_ASSERT_MASTER_FINISH_MIGRATION)) {
1755 mlog(0, "%s:%.*s: got unrelated assert"
1756 " from %u for migration, ignoring\n",
1757 dlm->name, namelen, name,
1758 assert->node_idx);
1759 __dlm_put_mle(mle);
1760 spin_unlock(&dlm->master_lock);
1761 spin_unlock(&dlm->spinlock);
1762 goto done;
1763 }
1764 }
1668 } 1765 }
1669 spin_unlock(&dlm->master_lock); 1766 spin_unlock(&dlm->master_lock);
1670 1767
1671 /* ok everything checks out with the MLE 1768 /* ok everything checks out with the MLE
1672 * now check to see if there is a lockres */ 1769 * now check to see if there is a lockres */
1673 res = __dlm_lookup_lockres(dlm, name, namelen); 1770 res = __dlm_lookup_lockres(dlm, name, namelen, hash);
1674 if (res) { 1771 if (res) {
1675 spin_lock(&res->spinlock); 1772 spin_lock(&res->spinlock);
1676 if (res->state & DLM_LOCK_RES_RECOVERING) { 1773 if (res->state & DLM_LOCK_RES_RECOVERING) {
@@ -1679,7 +1776,8 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data)
1679 goto kill; 1776 goto kill;
1680 } 1777 }
1681 if (!mle) { 1778 if (!mle) {
1682 if (res->owner != assert->node_idx) { 1779 if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN &&
1780 res->owner != assert->node_idx) {
1683 mlog(ML_ERROR, "assert_master from " 1781 mlog(ML_ERROR, "assert_master from "
1684 "%u, but current owner is " 1782 "%u, but current owner is "
1685 "%u! (%.*s)\n", 1783 "%u! (%.*s)\n",
@@ -1732,6 +1830,7 @@ ok:
1732 if (mle) { 1830 if (mle) {
1733 int extra_ref = 0; 1831 int extra_ref = 0;
1734 int nn = -1; 1832 int nn = -1;
1833 int rr, err = 0;
1735 1834
1736 spin_lock(&mle->spinlock); 1835 spin_lock(&mle->spinlock);
1737 if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION) 1836 if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION)
@@ -1751,27 +1850,64 @@ ok:
1751 wake_up(&mle->wq); 1850 wake_up(&mle->wq);
1752 spin_unlock(&mle->spinlock); 1851 spin_unlock(&mle->spinlock);
1753 1852
1754 if (mle->type == DLM_MLE_MIGRATION && res) { 1853 if (res) {
1755 mlog(0, "finishing off migration of lockres %.*s, "
1756 "from %u to %u\n",
1757 res->lockname.len, res->lockname.name,
1758 dlm->node_num, mle->new_master);
1759 spin_lock(&res->spinlock); 1854 spin_lock(&res->spinlock);
1760 res->state &= ~DLM_LOCK_RES_MIGRATING; 1855 if (mle->type == DLM_MLE_MIGRATION) {
1761 dlm_change_lockres_owner(dlm, res, mle->new_master); 1856 mlog(0, "finishing off migration of lockres %.*s, "
1762 BUG_ON(res->state & DLM_LOCK_RES_DIRTY); 1857 "from %u to %u\n",
1858 res->lockname.len, res->lockname.name,
1859 dlm->node_num, mle->new_master);
1860 res->state &= ~DLM_LOCK_RES_MIGRATING;
1861 dlm_change_lockres_owner(dlm, res, mle->new_master);
1862 BUG_ON(res->state & DLM_LOCK_RES_DIRTY);
1863 } else {
1864 dlm_change_lockres_owner(dlm, res, mle->master);
1865 }
1763 spin_unlock(&res->spinlock); 1866 spin_unlock(&res->spinlock);
1764 } 1867 }
1765 /* master is known, detach if not already detached */ 1868
1766 dlm_mle_detach_hb_events(dlm, mle); 1869 /* master is known, detach if not already detached.
1767 dlm_put_mle(mle); 1870 * ensures that only one assert_master call will happen
1768 1871 * on this mle. */
1872 spin_lock(&dlm->spinlock);
1873 spin_lock(&dlm->master_lock);
1874
1875 rr = atomic_read(&mle->mle_refs.refcount);
1876 if (mle->inuse > 0) {
1877 if (extra_ref && rr < 3)
1878 err = 1;
1879 else if (!extra_ref && rr < 2)
1880 err = 1;
1881 } else {
1882 if (extra_ref && rr < 2)
1883 err = 1;
1884 else if (!extra_ref && rr < 1)
1885 err = 1;
1886 }
1887 if (err) {
1888 mlog(ML_ERROR, "%s:%.*s: got assert master from %u "
1889 "that will mess up this node, refs=%d, extra=%d, "
1890 "inuse=%d\n", dlm->name, namelen, name,
1891 assert->node_idx, rr, extra_ref, mle->inuse);
1892 dlm_print_one_mle(mle);
1893 }
1894 list_del_init(&mle->list);
1895 __dlm_mle_detach_hb_events(dlm, mle);
1896 __dlm_put_mle(mle);
1769 if (extra_ref) { 1897 if (extra_ref) {
1770 /* the assert master message now balances the extra 1898 /* the assert master message now balances the extra
1771 * ref given by the master / migration request message. 1899 * ref given by the master / migration request message.
1772 * if this is the last put, it will be removed 1900 * if this is the last put, it will be removed
1773 * from the list. */ 1901 * from the list. */
1774 dlm_put_mle(mle); 1902 __dlm_put_mle(mle);
1903 }
1904 spin_unlock(&dlm->master_lock);
1905 spin_unlock(&dlm->spinlock);
1906 } else if (res) {
1907 if (res->owner != assert->node_idx) {
1908 mlog(0, "assert_master from %u, but current "
1909 "owner is %u (%.*s), no mle\n", assert->node_idx,
1910 res->owner, namelen, name);
1775 } 1911 }
1776 } 1912 }
1777 1913
@@ -1788,12 +1924,12 @@ done:
1788 1924
1789kill: 1925kill:
1790 /* kill the caller! */ 1926 /* kill the caller! */
1927 mlog(ML_ERROR, "Bad message received from another node. Dumping state "
1928 "and killing the other node now! This node is OK and can continue.\n");
1929 __dlm_print_one_lock_resource(res);
1791 spin_unlock(&res->spinlock); 1930 spin_unlock(&res->spinlock);
1792 spin_unlock(&dlm->spinlock); 1931 spin_unlock(&dlm->spinlock);
1793 dlm_lockres_put(res); 1932 dlm_lockres_put(res);
1794 mlog(ML_ERROR, "Bad message received from another node. Dumping state "
1795 "and killing the other node now! This node is OK and can continue.\n");
1796 dlm_dump_lock_resources(dlm);
1797 dlm_put(dlm); 1933 dlm_put(dlm);
1798 return -EINVAL; 1934 return -EINVAL;
1799} 1935}
@@ -1803,7 +1939,7 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
1803 int ignore_higher, u8 request_from, u32 flags) 1939 int ignore_higher, u8 request_from, u32 flags)
1804{ 1940{
1805 struct dlm_work_item *item; 1941 struct dlm_work_item *item;
1806 item = kcalloc(1, sizeof(*item), GFP_KERNEL); 1942 item = kcalloc(1, sizeof(*item), GFP_NOFS);
1807 if (!item) 1943 if (!item)
1808 return -ENOMEM; 1944 return -ENOMEM;
1809 1945
@@ -1825,7 +1961,7 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
1825 list_add_tail(&item->list, &dlm->work_list); 1961 list_add_tail(&item->list, &dlm->work_list);
1826 spin_unlock(&dlm->work_lock); 1962 spin_unlock(&dlm->work_lock);
1827 1963
1828 schedule_work(&dlm->dispatched_work); 1964 queue_work(dlm->dlm_worker, &dlm->dispatched_work);
1829 return 0; 1965 return 0;
1830} 1966}
1831 1967
@@ -1866,6 +2002,23 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
1866 } 2002 }
1867 } 2003 }
1868 2004
2005 /*
2006 * If we're migrating this lock to someone else, we are no
2007 * longer allowed to assert out own mastery. OTOH, we need to
2008 * prevent migration from starting while we're still asserting
2009 * our dominance. The reserved ast delays migration.
2010 */
2011 spin_lock(&res->spinlock);
2012 if (res->state & DLM_LOCK_RES_MIGRATING) {
2013 mlog(0, "Someone asked us to assert mastery, but we're "
2014 "in the middle of migration. Skipping assert, "
2015 "the new master will handle that.\n");
2016 spin_unlock(&res->spinlock);
2017 goto put;
2018 } else
2019 __dlm_lockres_reserve_ast(res);
2020 spin_unlock(&res->spinlock);
2021
1869 /* this call now finishes out the nodemap 2022 /* this call now finishes out the nodemap
1870 * even if one or more nodes die */ 2023 * even if one or more nodes die */
1871 mlog(0, "worker about to master %.*s here, this=%u\n", 2024 mlog(0, "worker about to master %.*s here, this=%u\n",
@@ -1875,9 +2028,14 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
1875 nodemap, flags); 2028 nodemap, flags);
1876 if (ret < 0) { 2029 if (ret < 0) {
1877 /* no need to restart, we are done */ 2030 /* no need to restart, we are done */
1878 mlog_errno(ret); 2031 if (!dlm_is_host_down(ret))
2032 mlog_errno(ret);
1879 } 2033 }
1880 2034
2035 /* Ok, we've asserted ourselves. Let's let migration start. */
2036 dlm_lockres_release_ast(dlm, res);
2037
2038put:
1881 dlm_lockres_put(res); 2039 dlm_lockres_put(res);
1882 2040
1883 mlog(0, "finished with dlm_assert_master_worker\n"); 2041 mlog(0, "finished with dlm_assert_master_worker\n");
@@ -1916,6 +2074,7 @@ static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
1916 BUG(); 2074 BUG();
1917 /* host is down, so answer for that node would be 2075 /* host is down, so answer for that node would be
1918 * DLM_LOCK_RES_OWNER_UNKNOWN. continue. */ 2076 * DLM_LOCK_RES_OWNER_UNKNOWN. continue. */
2077 ret = 0;
1919 } 2078 }
1920 2079
1921 if (master != DLM_LOCK_RES_OWNER_UNKNOWN) { 2080 if (master != DLM_LOCK_RES_OWNER_UNKNOWN) {
@@ -2016,14 +2175,14 @@ int dlm_migrate_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
2016 */ 2175 */
2017 2176
2018 ret = -ENOMEM; 2177 ret = -ENOMEM;
2019 mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_KERNEL); 2178 mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS);
2020 if (!mres) { 2179 if (!mres) {
2021 mlog_errno(ret); 2180 mlog_errno(ret);
2022 goto leave; 2181 goto leave;
2023 } 2182 }
2024 2183
2025 mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache, 2184 mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache,
2026 GFP_KERNEL); 2185 GFP_NOFS);
2027 if (!mle) { 2186 if (!mle) {
2028 mlog_errno(ret); 2187 mlog_errno(ret);
2029 goto leave; 2188 goto leave;
@@ -2117,7 +2276,7 @@ fail:
2117 * take both dlm->spinlock and dlm->master_lock */ 2276 * take both dlm->spinlock and dlm->master_lock */
2118 spin_lock(&dlm->spinlock); 2277 spin_lock(&dlm->spinlock);
2119 spin_lock(&dlm->master_lock); 2278 spin_lock(&dlm->master_lock);
2120 dlm_get_mle(mle); 2279 dlm_get_mle_inuse(mle);
2121 spin_unlock(&dlm->master_lock); 2280 spin_unlock(&dlm->master_lock);
2122 spin_unlock(&dlm->spinlock); 2281 spin_unlock(&dlm->spinlock);
2123 2282
@@ -2134,7 +2293,10 @@ fail:
2134 /* migration failed, detach and clean up mle */ 2293 /* migration failed, detach and clean up mle */
2135 dlm_mle_detach_hb_events(dlm, mle); 2294 dlm_mle_detach_hb_events(dlm, mle);
2136 dlm_put_mle(mle); 2295 dlm_put_mle(mle);
2137 dlm_put_mle(mle); 2296 dlm_put_mle_inuse(mle);
2297 spin_lock(&res->spinlock);
2298 res->state &= ~DLM_LOCK_RES_MIGRATING;
2299 spin_unlock(&res->spinlock);
2138 goto leave; 2300 goto leave;
2139 } 2301 }
2140 2302
@@ -2164,8 +2326,8 @@ fail:
2164 /* avoid hang during shutdown when migrating lockres 2326 /* avoid hang during shutdown when migrating lockres
2165 * to a node which also goes down */ 2327 * to a node which also goes down */
2166 if (dlm_is_node_dead(dlm, target)) { 2328 if (dlm_is_node_dead(dlm, target)) {
2167 mlog(0, "%s:%.*s: expected migration target %u " 2329 mlog(0, "%s:%.*s: expected migration "
2168 "is no longer up. restarting.\n", 2330 "target %u is no longer up, restarting\n",
2169 dlm->name, res->lockname.len, 2331 dlm->name, res->lockname.len,
2170 res->lockname.name, target); 2332 res->lockname.name, target);
2171 ret = -ERESTARTSYS; 2333 ret = -ERESTARTSYS;
@@ -2175,7 +2337,10 @@ fail:
2175 /* migration failed, detach and clean up mle */ 2337 /* migration failed, detach and clean up mle */
2176 dlm_mle_detach_hb_events(dlm, mle); 2338 dlm_mle_detach_hb_events(dlm, mle);
2177 dlm_put_mle(mle); 2339 dlm_put_mle(mle);
2178 dlm_put_mle(mle); 2340 dlm_put_mle_inuse(mle);
2341 spin_lock(&res->spinlock);
2342 res->state &= ~DLM_LOCK_RES_MIGRATING;
2343 spin_unlock(&res->spinlock);
2179 goto leave; 2344 goto leave;
2180 } 2345 }
2181 /* TODO: if node died: stop, clean up, return error */ 2346 /* TODO: if node died: stop, clean up, return error */
@@ -2191,7 +2356,7 @@ fail:
2191 2356
2192 /* master is known, detach if not already detached */ 2357 /* master is known, detach if not already detached */
2193 dlm_mle_detach_hb_events(dlm, mle); 2358 dlm_mle_detach_hb_events(dlm, mle);
2194 dlm_put_mle(mle); 2359 dlm_put_mle_inuse(mle);
2195 ret = 0; 2360 ret = 0;
2196 2361
2197 dlm_lockres_calc_usage(dlm, res); 2362 dlm_lockres_calc_usage(dlm, res);
@@ -2462,7 +2627,7 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data)
2462 struct dlm_migrate_request *migrate = (struct dlm_migrate_request *) msg->buf; 2627 struct dlm_migrate_request *migrate = (struct dlm_migrate_request *) msg->buf;
2463 struct dlm_master_list_entry *mle = NULL, *oldmle = NULL; 2628 struct dlm_master_list_entry *mle = NULL, *oldmle = NULL;
2464 const char *name; 2629 const char *name;
2465 unsigned int namelen; 2630 unsigned int namelen, hash;
2466 int ret = 0; 2631 int ret = 0;
2467 2632
2468 if (!dlm_grab(dlm)) 2633 if (!dlm_grab(dlm))
@@ -2470,10 +2635,11 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data)
2470 2635
2471 name = migrate->name; 2636 name = migrate->name;
2472 namelen = migrate->namelen; 2637 namelen = migrate->namelen;
2638 hash = dlm_lockid_hash(name, namelen);
2473 2639
2474 /* preallocate.. if this fails, abort */ 2640 /* preallocate.. if this fails, abort */
2475 mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache, 2641 mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache,
2476 GFP_KERNEL); 2642 GFP_NOFS);
2477 2643
2478 if (!mle) { 2644 if (!mle) {
2479 ret = -ENOMEM; 2645 ret = -ENOMEM;
@@ -2482,7 +2648,7 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data)
2482 2648
2483 /* check for pre-existing lock */ 2649 /* check for pre-existing lock */
2484 spin_lock(&dlm->spinlock); 2650 spin_lock(&dlm->spinlock);
2485 res = __dlm_lookup_lockres(dlm, name, namelen); 2651 res = __dlm_lookup_lockres(dlm, name, namelen, hash);
2486 spin_lock(&dlm->master_lock); 2652 spin_lock(&dlm->master_lock);
2487 2653
2488 if (res) { 2654 if (res) {
@@ -2580,6 +2746,7 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
2580 /* remove it from the list so that only one 2746 /* remove it from the list so that only one
2581 * mle will be found */ 2747 * mle will be found */
2582 list_del_init(&tmp->list); 2748 list_del_init(&tmp->list);
2749 __dlm_mle_detach_hb_events(dlm, mle);
2583 } 2750 }
2584 spin_unlock(&tmp->spinlock); 2751 spin_unlock(&tmp->spinlock);
2585 } 2752 }
@@ -2601,6 +2768,7 @@ void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node)
2601 struct list_head *iter, *iter2; 2768 struct list_head *iter, *iter2;
2602 struct dlm_master_list_entry *mle; 2769 struct dlm_master_list_entry *mle;
2603 struct dlm_lock_resource *res; 2770 struct dlm_lock_resource *res;
2771 unsigned int hash;
2604 2772
2605 mlog_entry("dlm=%s, dead node=%u\n", dlm->name, dead_node); 2773 mlog_entry("dlm=%s, dead node=%u\n", dlm->name, dead_node);
2606top: 2774top:
@@ -2640,7 +2808,7 @@ top:
2640 * may result in the mle being unlinked and 2808 * may result in the mle being unlinked and
2641 * freed, but there may still be a process 2809 * freed, but there may still be a process
2642 * waiting in the dlmlock path which is fine. */ 2810 * waiting in the dlmlock path which is fine. */
2643 mlog(ML_ERROR, "node %u was expected master\n", 2811 mlog(0, "node %u was expected master\n",
2644 dead_node); 2812 dead_node);
2645 atomic_set(&mle->woken, 1); 2813 atomic_set(&mle->woken, 1);
2646 spin_unlock(&mle->spinlock); 2814 spin_unlock(&mle->spinlock);
@@ -2673,19 +2841,21 @@ top:
2673 2841
2674 /* remove from the list early. NOTE: unlinking 2842 /* remove from the list early. NOTE: unlinking
2675 * list_head while in list_for_each_safe */ 2843 * list_head while in list_for_each_safe */
2844 __dlm_mle_detach_hb_events(dlm, mle);
2676 spin_lock(&mle->spinlock); 2845 spin_lock(&mle->spinlock);
2677 list_del_init(&mle->list); 2846 list_del_init(&mle->list);
2678 atomic_set(&mle->woken, 1); 2847 atomic_set(&mle->woken, 1);
2679 spin_unlock(&mle->spinlock); 2848 spin_unlock(&mle->spinlock);
2680 wake_up(&mle->wq); 2849 wake_up(&mle->wq);
2681 2850
2682 mlog(0, "node %u died during migration from " 2851 mlog(0, "%s: node %u died during migration from "
2683 "%u to %u!\n", dead_node, 2852 "%u to %u!\n", dlm->name, dead_node,
2684 mle->master, mle->new_master); 2853 mle->master, mle->new_master);
2685 /* if there is a lockres associated with this 2854 /* if there is a lockres associated with this
2686 * mle, find it and set its owner to UNKNOWN */ 2855 * mle, find it and set its owner to UNKNOWN */
2856 hash = dlm_lockid_hash(mle->u.name.name, mle->u.name.len);
2687 res = __dlm_lookup_lockres(dlm, mle->u.name.name, 2857 res = __dlm_lookup_lockres(dlm, mle->u.name.name,
2688 mle->u.name.len); 2858 mle->u.name.len, hash);
2689 if (res) { 2859 if (res) {
2690 /* unfortunately if we hit this rare case, our 2860 /* unfortunately if we hit this rare case, our
2691 * lock ordering is messed. we need to drop 2861 * lock ordering is messed. we need to drop
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 805cbabac051..594745fab0b5 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -95,11 +95,14 @@ static void dlm_reco_unlock_ast(void *astdata, enum dlm_status st);
95static void dlm_request_all_locks_worker(struct dlm_work_item *item, 95static void dlm_request_all_locks_worker(struct dlm_work_item *item,
96 void *data); 96 void *data);
97static void dlm_mig_lockres_worker(struct dlm_work_item *item, void *data); 97static void dlm_mig_lockres_worker(struct dlm_work_item *item, void *data);
98static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
99 struct dlm_lock_resource *res,
100 u8 *real_master);
98 101
99static u64 dlm_get_next_mig_cookie(void); 102static u64 dlm_get_next_mig_cookie(void);
100 103
101static spinlock_t dlm_reco_state_lock = SPIN_LOCK_UNLOCKED; 104static DEFINE_SPINLOCK(dlm_reco_state_lock);
102static spinlock_t dlm_mig_cookie_lock = SPIN_LOCK_UNLOCKED; 105static DEFINE_SPINLOCK(dlm_mig_cookie_lock);
103static u64 dlm_mig_cookie = 1; 106static u64 dlm_mig_cookie = 1;
104 107
105static u64 dlm_get_next_mig_cookie(void) 108static u64 dlm_get_next_mig_cookie(void)
@@ -115,12 +118,37 @@ static u64 dlm_get_next_mig_cookie(void)
115 return c; 118 return c;
116} 119}
117 120
121static inline void dlm_set_reco_dead_node(struct dlm_ctxt *dlm,
122 u8 dead_node)
123{
124 assert_spin_locked(&dlm->spinlock);
125 if (dlm->reco.dead_node != dead_node)
126 mlog(0, "%s: changing dead_node from %u to %u\n",
127 dlm->name, dlm->reco.dead_node, dead_node);
128 dlm->reco.dead_node = dead_node;
129}
130
131static inline void dlm_set_reco_master(struct dlm_ctxt *dlm,
132 u8 master)
133{
134 assert_spin_locked(&dlm->spinlock);
135 mlog(0, "%s: changing new_master from %u to %u\n",
136 dlm->name, dlm->reco.new_master, master);
137 dlm->reco.new_master = master;
138}
139
140static inline void __dlm_reset_recovery(struct dlm_ctxt *dlm)
141{
142 assert_spin_locked(&dlm->spinlock);
143 clear_bit(dlm->reco.dead_node, dlm->recovery_map);
144 dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
145 dlm_set_reco_master(dlm, O2NM_INVALID_NODE_NUM);
146}
147
118static inline void dlm_reset_recovery(struct dlm_ctxt *dlm) 148static inline void dlm_reset_recovery(struct dlm_ctxt *dlm)
119{ 149{
120 spin_lock(&dlm->spinlock); 150 spin_lock(&dlm->spinlock);
121 clear_bit(dlm->reco.dead_node, dlm->recovery_map); 151 __dlm_reset_recovery(dlm);
122 dlm->reco.dead_node = O2NM_INVALID_NODE_NUM;
123 dlm->reco.new_master = O2NM_INVALID_NODE_NUM;
124 spin_unlock(&dlm->spinlock); 152 spin_unlock(&dlm->spinlock);
125} 153}
126 154
@@ -132,12 +160,21 @@ void dlm_dispatch_work(void *data)
132 struct list_head *iter, *iter2; 160 struct list_head *iter, *iter2;
133 struct dlm_work_item *item; 161 struct dlm_work_item *item;
134 dlm_workfunc_t *workfunc; 162 dlm_workfunc_t *workfunc;
163 int tot=0;
164
165 if (!dlm_joined(dlm))
166 return;
135 167
136 spin_lock(&dlm->work_lock); 168 spin_lock(&dlm->work_lock);
137 list_splice_init(&dlm->work_list, &tmp_list); 169 list_splice_init(&dlm->work_list, &tmp_list);
138 spin_unlock(&dlm->work_lock); 170 spin_unlock(&dlm->work_lock);
139 171
140 list_for_each_safe(iter, iter2, &tmp_list) { 172 list_for_each_safe(iter, iter2, &tmp_list) {
173 tot++;
174 }
175 mlog(0, "%s: work thread has %d work items\n", dlm->name, tot);
176
177 list_for_each_safe(iter, iter2, &tmp_list) {
141 item = list_entry(iter, struct dlm_work_item, list); 178 item = list_entry(iter, struct dlm_work_item, list);
142 workfunc = item->func; 179 workfunc = item->func;
143 list_del_init(&item->list); 180 list_del_init(&item->list);
@@ -220,6 +257,52 @@ void dlm_complete_recovery_thread(struct dlm_ctxt *dlm)
220 * 257 *
221 */ 258 */
222 259
260static void dlm_print_reco_node_status(struct dlm_ctxt *dlm)
261{
262 struct dlm_reco_node_data *ndata;
263 struct dlm_lock_resource *res;
264
265 mlog(ML_NOTICE, "%s(%d): recovery info, state=%s, dead=%u, master=%u\n",
266 dlm->name, dlm->dlm_reco_thread_task->pid,
267 dlm->reco.state & DLM_RECO_STATE_ACTIVE ? "ACTIVE" : "inactive",
268 dlm->reco.dead_node, dlm->reco.new_master);
269
270 list_for_each_entry(ndata, &dlm->reco.node_data, list) {
271 char *st = "unknown";
272 switch (ndata->state) {
273 case DLM_RECO_NODE_DATA_INIT:
274 st = "init";
275 break;
276 case DLM_RECO_NODE_DATA_REQUESTING:
277 st = "requesting";
278 break;
279 case DLM_RECO_NODE_DATA_DEAD:
280 st = "dead";
281 break;
282 case DLM_RECO_NODE_DATA_RECEIVING:
283 st = "receiving";
284 break;
285 case DLM_RECO_NODE_DATA_REQUESTED:
286 st = "requested";
287 break;
288 case DLM_RECO_NODE_DATA_DONE:
289 st = "done";
290 break;
291 case DLM_RECO_NODE_DATA_FINALIZE_SENT:
292 st = "finalize-sent";
293 break;
294 default:
295 st = "bad";
296 break;
297 }
298 mlog(ML_NOTICE, "%s: reco state, node %u, state=%s\n",
299 dlm->name, ndata->node_num, st);
300 }
301 list_for_each_entry(res, &dlm->reco.resources, recovering) {
302 mlog(ML_NOTICE, "%s: lockres %.*s on recovering list\n",
303 dlm->name, res->lockname.len, res->lockname.name);
304 }
305}
223 306
224#define DLM_RECO_THREAD_TIMEOUT_MS (5 * 1000) 307#define DLM_RECO_THREAD_TIMEOUT_MS (5 * 1000)
225 308
@@ -267,11 +350,23 @@ int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node)
267{ 350{
268 int dead; 351 int dead;
269 spin_lock(&dlm->spinlock); 352 spin_lock(&dlm->spinlock);
270 dead = test_bit(node, dlm->domain_map); 353 dead = !test_bit(node, dlm->domain_map);
271 spin_unlock(&dlm->spinlock); 354 spin_unlock(&dlm->spinlock);
272 return dead; 355 return dead;
273} 356}
274 357
358/* returns true if node is no longer in the domain
359 * could be dead or just not joined */
360static int dlm_is_node_recovered(struct dlm_ctxt *dlm, u8 node)
361{
362 int recovered;
363 spin_lock(&dlm->spinlock);
364 recovered = !test_bit(node, dlm->recovery_map);
365 spin_unlock(&dlm->spinlock);
366 return recovered;
367}
368
369
275int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout) 370int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout)
276{ 371{
277 if (timeout) { 372 if (timeout) {
@@ -290,6 +385,24 @@ int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout)
290 return 0; 385 return 0;
291} 386}
292 387
388int dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout)
389{
390 if (timeout) {
391 mlog(0, "%s: waiting %dms for notification of "
392 "recovery of node %u\n", dlm->name, timeout, node);
393 wait_event_timeout(dlm->dlm_reco_thread_wq,
394 dlm_is_node_recovered(dlm, node),
395 msecs_to_jiffies(timeout));
396 } else {
397 mlog(0, "%s: waiting indefinitely for notification "
398 "of recovery of node %u\n", dlm->name, node);
399 wait_event(dlm->dlm_reco_thread_wq,
400 dlm_is_node_recovered(dlm, node));
401 }
402 /* for now, return 0 */
403 return 0;
404}
405
293/* callers of the top-level api calls (dlmlock/dlmunlock) should 406/* callers of the top-level api calls (dlmlock/dlmunlock) should
294 * block on the dlm->reco.event when recovery is in progress. 407 * block on the dlm->reco.event when recovery is in progress.
295 * the dlm recovery thread will set this state when it begins 408 * the dlm recovery thread will set this state when it begins
@@ -308,6 +421,13 @@ static int dlm_in_recovery(struct dlm_ctxt *dlm)
308 421
309void dlm_wait_for_recovery(struct dlm_ctxt *dlm) 422void dlm_wait_for_recovery(struct dlm_ctxt *dlm)
310{ 423{
424 if (dlm_in_recovery(dlm)) {
425 mlog(0, "%s: reco thread %d in recovery: "
426 "state=%d, master=%u, dead=%u\n",
427 dlm->name, dlm->dlm_reco_thread_task->pid,
428 dlm->reco.state, dlm->reco.new_master,
429 dlm->reco.dead_node);
430 }
311 wait_event(dlm->reco.event, !dlm_in_recovery(dlm)); 431 wait_event(dlm->reco.event, !dlm_in_recovery(dlm));
312} 432}
313 433
@@ -341,7 +461,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
341 mlog(0, "new master %u died while recovering %u!\n", 461 mlog(0, "new master %u died while recovering %u!\n",
342 dlm->reco.new_master, dlm->reco.dead_node); 462 dlm->reco.new_master, dlm->reco.dead_node);
343 /* unset the new_master, leave dead_node */ 463 /* unset the new_master, leave dead_node */
344 dlm->reco.new_master = O2NM_INVALID_NODE_NUM; 464 dlm_set_reco_master(dlm, O2NM_INVALID_NODE_NUM);
345 } 465 }
346 466
347 /* select a target to recover */ 467 /* select a target to recover */
@@ -350,14 +470,14 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
350 470
351 bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES+1, 0); 471 bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES+1, 0);
352 if (bit >= O2NM_MAX_NODES || bit < 0) 472 if (bit >= O2NM_MAX_NODES || bit < 0)
353 dlm->reco.dead_node = O2NM_INVALID_NODE_NUM; 473 dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
354 else 474 else
355 dlm->reco.dead_node = bit; 475 dlm_set_reco_dead_node(dlm, bit);
356 } else if (!test_bit(dlm->reco.dead_node, dlm->recovery_map)) { 476 } else if (!test_bit(dlm->reco.dead_node, dlm->recovery_map)) {
357 /* BUG? */ 477 /* BUG? */
358 mlog(ML_ERROR, "dead_node %u no longer in recovery map!\n", 478 mlog(ML_ERROR, "dead_node %u no longer in recovery map!\n",
359 dlm->reco.dead_node); 479 dlm->reco.dead_node);
360 dlm->reco.dead_node = O2NM_INVALID_NODE_NUM; 480 dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
361 } 481 }
362 482
363 if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { 483 if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
@@ -366,7 +486,8 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
366 /* return to main thread loop and sleep. */ 486 /* return to main thread loop and sleep. */
367 return 0; 487 return 0;
368 } 488 }
369 mlog(0, "recovery thread found node %u in the recovery map!\n", 489 mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n",
490 dlm->name, dlm->dlm_reco_thread_task->pid,
370 dlm->reco.dead_node); 491 dlm->reco.dead_node);
371 spin_unlock(&dlm->spinlock); 492 spin_unlock(&dlm->spinlock);
372 493
@@ -389,8 +510,8 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
389 } 510 }
390 mlog(0, "another node will master this recovery session.\n"); 511 mlog(0, "another node will master this recovery session.\n");
391 } 512 }
392 mlog(0, "dlm=%s, new_master=%u, this node=%u, dead_node=%u\n", 513 mlog(0, "dlm=%s (%d), new_master=%u, this node=%u, dead_node=%u\n",
393 dlm->name, dlm->reco.new_master, 514 dlm->name, dlm->dlm_reco_thread_task->pid, dlm->reco.new_master,
394 dlm->node_num, dlm->reco.dead_node); 515 dlm->node_num, dlm->reco.dead_node);
395 516
396 /* it is safe to start everything back up here 517 /* it is safe to start everything back up here
@@ -402,11 +523,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
402 return 0; 523 return 0;
403 524
404master_here: 525master_here:
405 mlog(0, "mastering recovery of %s:%u here(this=%u)!\n", 526 mlog(0, "(%d) mastering recovery of %s:%u here(this=%u)!\n",
527 dlm->dlm_reco_thread_task->pid,
406 dlm->name, dlm->reco.dead_node, dlm->node_num); 528 dlm->name, dlm->reco.dead_node, dlm->node_num);
407 529
408 status = dlm_remaster_locks(dlm, dlm->reco.dead_node); 530 status = dlm_remaster_locks(dlm, dlm->reco.dead_node);
409 if (status < 0) { 531 if (status < 0) {
532 /* we should never hit this anymore */
410 mlog(ML_ERROR, "error %d remastering locks for node %u, " 533 mlog(ML_ERROR, "error %d remastering locks for node %u, "
411 "retrying.\n", status, dlm->reco.dead_node); 534 "retrying.\n", status, dlm->reco.dead_node);
412 /* yield a bit to allow any final network messages 535 /* yield a bit to allow any final network messages
@@ -433,9 +556,16 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
433 int destroy = 0; 556 int destroy = 0;
434 int pass = 0; 557 int pass = 0;
435 558
436 status = dlm_init_recovery_area(dlm, dead_node); 559 do {
437 if (status < 0) 560 /* we have become recovery master. there is no escaping
438 goto leave; 561 * this, so just keep trying until we get it. */
562 status = dlm_init_recovery_area(dlm, dead_node);
563 if (status < 0) {
564 mlog(ML_ERROR, "%s: failed to alloc recovery area, "
565 "retrying\n", dlm->name);
566 msleep(1000);
567 }
568 } while (status != 0);
439 569
440 /* safe to access the node data list without a lock, since this 570 /* safe to access the node data list without a lock, since this
441 * process is the only one to change the list */ 571 * process is the only one to change the list */
@@ -452,16 +582,36 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
452 continue; 582 continue;
453 } 583 }
454 584
455 status = dlm_request_all_locks(dlm, ndata->node_num, dead_node); 585 do {
456 if (status < 0) { 586 status = dlm_request_all_locks(dlm, ndata->node_num,
457 mlog_errno(status); 587 dead_node);
458 if (dlm_is_host_down(status)) 588 if (status < 0) {
459 ndata->state = DLM_RECO_NODE_DATA_DEAD; 589 mlog_errno(status);
460 else { 590 if (dlm_is_host_down(status)) {
461 destroy = 1; 591 /* node died, ignore it for recovery */
462 goto leave; 592 status = 0;
593 ndata->state = DLM_RECO_NODE_DATA_DEAD;
594 /* wait for the domain map to catch up
595 * with the network state. */
596 wait_event_timeout(dlm->dlm_reco_thread_wq,
597 dlm_is_node_dead(dlm,
598 ndata->node_num),
599 msecs_to_jiffies(1000));
600 mlog(0, "waited 1 sec for %u, "
601 "dead? %s\n", ndata->node_num,
602 dlm_is_node_dead(dlm, ndata->node_num) ?
603 "yes" : "no");
604 } else {
605 /* -ENOMEM on the other node */
606 mlog(0, "%s: node %u returned "
607 "%d during recovery, retrying "
608 "after a short wait\n",
609 dlm->name, ndata->node_num,
610 status);
611 msleep(100);
612 }
463 } 613 }
464 } 614 } while (status != 0);
465 615
466 switch (ndata->state) { 616 switch (ndata->state) {
467 case DLM_RECO_NODE_DATA_INIT: 617 case DLM_RECO_NODE_DATA_INIT:
@@ -473,10 +623,9 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
473 mlog(0, "node %u died after requesting " 623 mlog(0, "node %u died after requesting "
474 "recovery info for node %u\n", 624 "recovery info for node %u\n",
475 ndata->node_num, dead_node); 625 ndata->node_num, dead_node);
476 // start all over 626 /* fine. don't need this node's info.
477 destroy = 1; 627 * continue without it. */
478 status = -EAGAIN; 628 break;
479 goto leave;
480 case DLM_RECO_NODE_DATA_REQUESTING: 629 case DLM_RECO_NODE_DATA_REQUESTING:
481 ndata->state = DLM_RECO_NODE_DATA_REQUESTED; 630 ndata->state = DLM_RECO_NODE_DATA_REQUESTED;
482 mlog(0, "now receiving recovery data from " 631 mlog(0, "now receiving recovery data from "
@@ -520,35 +669,26 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
520 BUG(); 669 BUG();
521 break; 670 break;
522 case DLM_RECO_NODE_DATA_DEAD: 671 case DLM_RECO_NODE_DATA_DEAD:
523 mlog(ML_NOTICE, "node %u died after " 672 mlog(0, "node %u died after "
524 "requesting recovery info for " 673 "requesting recovery info for "
525 "node %u\n", ndata->node_num, 674 "node %u\n", ndata->node_num,
526 dead_node); 675 dead_node);
527 spin_unlock(&dlm_reco_state_lock); 676 break;
528 // start all over
529 destroy = 1;
530 status = -EAGAIN;
531 /* instead of spinning like crazy here,
532 * wait for the domain map to catch up
533 * with the network state. otherwise this
534 * can be hit hundreds of times before
535 * the node is really seen as dead. */
536 wait_event_timeout(dlm->dlm_reco_thread_wq,
537 dlm_is_node_dead(dlm,
538 ndata->node_num),
539 msecs_to_jiffies(1000));
540 mlog(0, "waited 1 sec for %u, "
541 "dead? %s\n", ndata->node_num,
542 dlm_is_node_dead(dlm, ndata->node_num) ?
543 "yes" : "no");
544 goto leave;
545 case DLM_RECO_NODE_DATA_RECEIVING: 677 case DLM_RECO_NODE_DATA_RECEIVING:
546 case DLM_RECO_NODE_DATA_REQUESTED: 678 case DLM_RECO_NODE_DATA_REQUESTED:
679 mlog(0, "%s: node %u still in state %s\n",
680 dlm->name, ndata->node_num,
681 ndata->state==DLM_RECO_NODE_DATA_RECEIVING ?
682 "receiving" : "requested");
547 all_nodes_done = 0; 683 all_nodes_done = 0;
548 break; 684 break;
549 case DLM_RECO_NODE_DATA_DONE: 685 case DLM_RECO_NODE_DATA_DONE:
686 mlog(0, "%s: node %u state is done\n",
687 dlm->name, ndata->node_num);
550 break; 688 break;
551 case DLM_RECO_NODE_DATA_FINALIZE_SENT: 689 case DLM_RECO_NODE_DATA_FINALIZE_SENT:
690 mlog(0, "%s: node %u state is finalize\n",
691 dlm->name, ndata->node_num);
552 break; 692 break;
553 } 693 }
554 } 694 }
@@ -578,7 +718,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
578 jiffies, dlm->reco.dead_node, 718 jiffies, dlm->reco.dead_node,
579 dlm->node_num, dlm->reco.new_master); 719 dlm->node_num, dlm->reco.new_master);
580 destroy = 1; 720 destroy = 1;
581 status = ret; 721 status = 0;
582 /* rescan everything marked dirty along the way */ 722 /* rescan everything marked dirty along the way */
583 dlm_kick_thread(dlm, NULL); 723 dlm_kick_thread(dlm, NULL);
584 break; 724 break;
@@ -591,7 +731,6 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
591 731
592 } 732 }
593 733
594leave:
595 if (destroy) 734 if (destroy)
596 dlm_destroy_recovery_area(dlm, dead_node); 735 dlm_destroy_recovery_area(dlm, dead_node);
597 736
@@ -617,7 +756,7 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
617 } 756 }
618 BUG_ON(num == dead_node); 757 BUG_ON(num == dead_node);
619 758
620 ndata = kcalloc(1, sizeof(*ndata), GFP_KERNEL); 759 ndata = kcalloc(1, sizeof(*ndata), GFP_NOFS);
621 if (!ndata) { 760 if (!ndata) {
622 dlm_destroy_recovery_area(dlm, dead_node); 761 dlm_destroy_recovery_area(dlm, dead_node);
623 return -ENOMEM; 762 return -ENOMEM;
@@ -691,16 +830,25 @@ int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data)
691 if (!dlm_grab(dlm)) 830 if (!dlm_grab(dlm))
692 return -EINVAL; 831 return -EINVAL;
693 832
833 if (lr->dead_node != dlm->reco.dead_node) {
834 mlog(ML_ERROR, "%s: node %u sent dead_node=%u, but local "
835 "dead_node is %u\n", dlm->name, lr->node_idx,
836 lr->dead_node, dlm->reco.dead_node);
837 dlm_print_reco_node_status(dlm);
838 /* this is a hack */
839 dlm_put(dlm);
840 return -ENOMEM;
841 }
694 BUG_ON(lr->dead_node != dlm->reco.dead_node); 842 BUG_ON(lr->dead_node != dlm->reco.dead_node);
695 843
696 item = kcalloc(1, sizeof(*item), GFP_KERNEL); 844 item = kcalloc(1, sizeof(*item), GFP_NOFS);
697 if (!item) { 845 if (!item) {
698 dlm_put(dlm); 846 dlm_put(dlm);
699 return -ENOMEM; 847 return -ENOMEM;
700 } 848 }
701 849
702 /* this will get freed by dlm_request_all_locks_worker */ 850 /* this will get freed by dlm_request_all_locks_worker */
703 buf = (char *) __get_free_page(GFP_KERNEL); 851 buf = (char *) __get_free_page(GFP_NOFS);
704 if (!buf) { 852 if (!buf) {
705 kfree(item); 853 kfree(item);
706 dlm_put(dlm); 854 dlm_put(dlm);
@@ -715,7 +863,7 @@ int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data)
715 spin_lock(&dlm->work_lock); 863 spin_lock(&dlm->work_lock);
716 list_add_tail(&item->list, &dlm->work_list); 864 list_add_tail(&item->list, &dlm->work_list);
717 spin_unlock(&dlm->work_lock); 865 spin_unlock(&dlm->work_lock);
718 schedule_work(&dlm->dispatched_work); 866 queue_work(dlm->dlm_worker, &dlm->dispatched_work);
719 867
720 dlm_put(dlm); 868 dlm_put(dlm);
721 return 0; 869 return 0;
@@ -730,32 +878,34 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
730 struct list_head *iter; 878 struct list_head *iter;
731 int ret; 879 int ret;
732 u8 dead_node, reco_master; 880 u8 dead_node, reco_master;
881 int skip_all_done = 0;
733 882
734 dlm = item->dlm; 883 dlm = item->dlm;
735 dead_node = item->u.ral.dead_node; 884 dead_node = item->u.ral.dead_node;
736 reco_master = item->u.ral.reco_master; 885 reco_master = item->u.ral.reco_master;
737 mres = (struct dlm_migratable_lockres *)data; 886 mres = (struct dlm_migratable_lockres *)data;
738 887
888 mlog(0, "%s: recovery worker started, dead=%u, master=%u\n",
889 dlm->name, dead_node, reco_master);
890
739 if (dead_node != dlm->reco.dead_node || 891 if (dead_node != dlm->reco.dead_node ||
740 reco_master != dlm->reco.new_master) { 892 reco_master != dlm->reco.new_master) {
741 /* show extra debug info if the recovery state is messed */ 893 /* worker could have been created before the recovery master
742 mlog(ML_ERROR, "%s: bad reco state: reco(dead=%u, master=%u), " 894 * died. if so, do not continue, but do not error. */
743 "request(dead=%u, master=%u)\n", 895 if (dlm->reco.new_master == O2NM_INVALID_NODE_NUM) {
744 dlm->name, dlm->reco.dead_node, dlm->reco.new_master, 896 mlog(ML_NOTICE, "%s: will not send recovery state, "
745 dead_node, reco_master); 897 "recovery master %u died, thread=(dead=%u,mas=%u)"
746 mlog(ML_ERROR, "%s: name=%.*s master=%u locks=%u/%u flags=%u " 898 " current=(dead=%u,mas=%u)\n", dlm->name,
747 "entry[0]={c=%u:%llu,l=%u,f=%u,t=%d,ct=%d,hb=%d,n=%u}\n", 899 reco_master, dead_node, reco_master,
748 dlm->name, mres->lockname_len, mres->lockname, mres->master, 900 dlm->reco.dead_node, dlm->reco.new_master);
749 mres->num_locks, mres->total_locks, mres->flags, 901 } else {
750 dlm_get_lock_cookie_node(mres->ml[0].cookie), 902 mlog(ML_NOTICE, "%s: reco state invalid: reco(dead=%u, "
751 dlm_get_lock_cookie_seq(mres->ml[0].cookie), 903 "master=%u), request(dead=%u, master=%u)\n",
752 mres->ml[0].list, mres->ml[0].flags, 904 dlm->name, dlm->reco.dead_node,
753 mres->ml[0].type, mres->ml[0].convert_type, 905 dlm->reco.new_master, dead_node, reco_master);
754 mres->ml[0].highest_blocked, mres->ml[0].node); 906 }
755 BUG(); 907 goto leave;
756 } 908 }
757 BUG_ON(dead_node != dlm->reco.dead_node);
758 BUG_ON(reco_master != dlm->reco.new_master);
759 909
760 /* lock resources should have already been moved to the 910 /* lock resources should have already been moved to the
761 * dlm->reco.resources list. now move items from that list 911 * dlm->reco.resources list. now move items from that list
@@ -766,12 +916,20 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
766 dlm_move_reco_locks_to_list(dlm, &resources, dead_node); 916 dlm_move_reco_locks_to_list(dlm, &resources, dead_node);
767 917
768 /* now we can begin blasting lockreses without the dlm lock */ 918 /* now we can begin blasting lockreses without the dlm lock */
919
920 /* any errors returned will be due to the new_master dying,
921 * the dlm_reco_thread should detect this */
769 list_for_each(iter, &resources) { 922 list_for_each(iter, &resources) {
770 res = list_entry (iter, struct dlm_lock_resource, recovering); 923 res = list_entry (iter, struct dlm_lock_resource, recovering);
771 ret = dlm_send_one_lockres(dlm, res, mres, reco_master, 924 ret = dlm_send_one_lockres(dlm, res, mres, reco_master,
772 DLM_MRES_RECOVERY); 925 DLM_MRES_RECOVERY);
773 if (ret < 0) 926 if (ret < 0) {
774 mlog_errno(ret); 927 mlog(ML_ERROR, "%s: node %u went down while sending "
928 "recovery state for dead node %u, ret=%d\n", dlm->name,
929 reco_master, dead_node, ret);
930 skip_all_done = 1;
931 break;
932 }
775 } 933 }
776 934
777 /* move the resources back to the list */ 935 /* move the resources back to the list */
@@ -779,10 +937,15 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
779 list_splice_init(&resources, &dlm->reco.resources); 937 list_splice_init(&resources, &dlm->reco.resources);
780 spin_unlock(&dlm->spinlock); 938 spin_unlock(&dlm->spinlock);
781 939
782 ret = dlm_send_all_done_msg(dlm, dead_node, reco_master); 940 if (!skip_all_done) {
783 if (ret < 0) 941 ret = dlm_send_all_done_msg(dlm, dead_node, reco_master);
784 mlog_errno(ret); 942 if (ret < 0) {
785 943 mlog(ML_ERROR, "%s: node %u went down while sending "
944 "recovery all-done for dead node %u, ret=%d\n",
945 dlm->name, reco_master, dead_node, ret);
946 }
947 }
948leave:
786 free_page((unsigned long)data); 949 free_page((unsigned long)data);
787} 950}
788 951
@@ -801,8 +964,14 @@ static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to)
801 964
802 ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg, 965 ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg,
803 sizeof(done_msg), send_to, &tmpret); 966 sizeof(done_msg), send_to, &tmpret);
804 /* negative status is ignored by the caller */ 967 if (ret < 0) {
805 if (ret >= 0) 968 if (!dlm_is_host_down(ret)) {
969 mlog_errno(ret);
970 mlog(ML_ERROR, "%s: unknown error sending data-done "
971 "to %u\n", dlm->name, send_to);
972 BUG();
973 }
974 } else
806 ret = tmpret; 975 ret = tmpret;
807 return ret; 976 return ret;
808} 977}
@@ -822,7 +991,11 @@ int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data)
822 mlog(0, "got DATA DONE: dead_node=%u, reco.dead_node=%u, " 991 mlog(0, "got DATA DONE: dead_node=%u, reco.dead_node=%u, "
823 "node_idx=%u, this node=%u\n", done->dead_node, 992 "node_idx=%u, this node=%u\n", done->dead_node,
824 dlm->reco.dead_node, done->node_idx, dlm->node_num); 993 dlm->reco.dead_node, done->node_idx, dlm->node_num);
825 BUG_ON(done->dead_node != dlm->reco.dead_node); 994
995 mlog_bug_on_msg((done->dead_node != dlm->reco.dead_node),
996 "Got DATA DONE: dead_node=%u, reco.dead_node=%u, "
997 "node_idx=%u, this node=%u\n", done->dead_node,
998 dlm->reco.dead_node, done->node_idx, dlm->node_num);
826 999
827 spin_lock(&dlm_reco_state_lock); 1000 spin_lock(&dlm_reco_state_lock);
828 list_for_each(iter, &dlm->reco.node_data) { 1001 list_for_each(iter, &dlm->reco.node_data) {
@@ -905,13 +1078,11 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm,
905 mlog(0, "found lockres owned by dead node while " 1078 mlog(0, "found lockres owned by dead node while "
906 "doing recovery for node %u. sending it.\n", 1079 "doing recovery for node %u. sending it.\n",
907 dead_node); 1080 dead_node);
908 list_del_init(&res->recovering); 1081 list_move_tail(&res->recovering, list);
909 list_add_tail(&res->recovering, list);
910 } else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { 1082 } else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
911 mlog(0, "found UNKNOWN owner while doing recovery " 1083 mlog(0, "found UNKNOWN owner while doing recovery "
912 "for node %u. sending it.\n", dead_node); 1084 "for node %u. sending it.\n", dead_node);
913 list_del_init(&res->recovering); 1085 list_move_tail(&res->recovering, list);
914 list_add_tail(&res->recovering, list);
915 } 1086 }
916 } 1087 }
917 spin_unlock(&dlm->spinlock); 1088 spin_unlock(&dlm->spinlock);
@@ -1023,8 +1194,9 @@ static int dlm_add_lock_to_array(struct dlm_lock *lock,
1023 ml->type == LKM_PRMODE) { 1194 ml->type == LKM_PRMODE) {
1024 /* if it is already set, this had better be a PR 1195 /* if it is already set, this had better be a PR
1025 * and it has to match */ 1196 * and it has to match */
1026 if (mres->lvb[0] && (ml->type == LKM_EXMODE || 1197 if (!dlm_lvb_is_empty(mres->lvb) &&
1027 memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))) { 1198 (ml->type == LKM_EXMODE ||
1199 memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))) {
1028 mlog(ML_ERROR, "mismatched lvbs!\n"); 1200 mlog(ML_ERROR, "mismatched lvbs!\n");
1029 __dlm_print_one_lock_resource(lock->lockres); 1201 __dlm_print_one_lock_resource(lock->lockres);
1030 BUG(); 1202 BUG();
@@ -1083,22 +1255,25 @@ int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
1083 * we must send it immediately. */ 1255 * we must send it immediately. */
1084 ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, 1256 ret = dlm_send_mig_lockres_msg(dlm, mres, send_to,
1085 res, total_locks); 1257 res, total_locks);
1086 if (ret < 0) { 1258 if (ret < 0)
1087 // TODO 1259 goto error;
1088 mlog(ML_ERROR, "dlm_send_mig_lockres_msg "
1089 "returned %d, TODO\n", ret);
1090 BUG();
1091 }
1092 } 1260 }
1093 } 1261 }
1094 /* flush any remaining locks */ 1262 /* flush any remaining locks */
1095 ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, res, total_locks); 1263 ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, res, total_locks);
1096 if (ret < 0) { 1264 if (ret < 0)
1097 // TODO 1265 goto error;
1098 mlog(ML_ERROR, "dlm_send_mig_lockres_msg returned %d, " 1266 return ret;
1099 "TODO\n", ret); 1267
1268error:
1269 mlog(ML_ERROR, "%s: dlm_send_mig_lockres_msg returned %d\n",
1270 dlm->name, ret);
1271 if (!dlm_is_host_down(ret))
1100 BUG(); 1272 BUG();
1101 } 1273 mlog(0, "%s: node %u went down while sending %s "
1274 "lockres %.*s\n", dlm->name, send_to,
1275 flags & DLM_MRES_RECOVERY ? "recovery" : "migration",
1276 res->lockname.len, res->lockname.name);
1102 return ret; 1277 return ret;
1103} 1278}
1104 1279
@@ -1146,8 +1321,8 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data)
1146 mlog(0, "all done flag. all lockres data received!\n"); 1321 mlog(0, "all done flag. all lockres data received!\n");
1147 1322
1148 ret = -ENOMEM; 1323 ret = -ENOMEM;
1149 buf = kmalloc(be16_to_cpu(msg->data_len), GFP_KERNEL); 1324 buf = kmalloc(be16_to_cpu(msg->data_len), GFP_NOFS);
1150 item = kcalloc(1, sizeof(*item), GFP_KERNEL); 1325 item = kcalloc(1, sizeof(*item), GFP_NOFS);
1151 if (!buf || !item) 1326 if (!buf || !item)
1152 goto leave; 1327 goto leave;
1153 1328
@@ -1238,7 +1413,7 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data)
1238 spin_lock(&dlm->work_lock); 1413 spin_lock(&dlm->work_lock);
1239 list_add_tail(&item->list, &dlm->work_list); 1414 list_add_tail(&item->list, &dlm->work_list);
1240 spin_unlock(&dlm->work_lock); 1415 spin_unlock(&dlm->work_lock);
1241 schedule_work(&dlm->dispatched_work); 1416 queue_work(dlm->dlm_worker, &dlm->dispatched_work);
1242 1417
1243leave: 1418leave:
1244 dlm_put(dlm); 1419 dlm_put(dlm);
@@ -1312,8 +1487,9 @@ leave:
1312 1487
1313 1488
1314 1489
1315int dlm_lockres_master_requery(struct dlm_ctxt *dlm, 1490static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
1316 struct dlm_lock_resource *res, u8 *real_master) 1491 struct dlm_lock_resource *res,
1492 u8 *real_master)
1317{ 1493{
1318 struct dlm_node_iter iter; 1494 struct dlm_node_iter iter;
1319 int nodenum; 1495 int nodenum;
@@ -1406,6 +1582,7 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data)
1406 struct dlm_ctxt *dlm = data; 1582 struct dlm_ctxt *dlm = data;
1407 struct dlm_master_requery *req = (struct dlm_master_requery *)msg->buf; 1583 struct dlm_master_requery *req = (struct dlm_master_requery *)msg->buf;
1408 struct dlm_lock_resource *res = NULL; 1584 struct dlm_lock_resource *res = NULL;
1585 unsigned int hash;
1409 int master = DLM_LOCK_RES_OWNER_UNKNOWN; 1586 int master = DLM_LOCK_RES_OWNER_UNKNOWN;
1410 u32 flags = DLM_ASSERT_MASTER_REQUERY; 1587 u32 flags = DLM_ASSERT_MASTER_REQUERY;
1411 1588
@@ -1415,8 +1592,10 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data)
1415 return master; 1592 return master;
1416 } 1593 }
1417 1594
1595 hash = dlm_lockid_hash(req->name, req->namelen);
1596
1418 spin_lock(&dlm->spinlock); 1597 spin_lock(&dlm->spinlock);
1419 res = __dlm_lookup_lockres(dlm, req->name, req->namelen); 1598 res = __dlm_lookup_lockres(dlm, req->name, req->namelen, hash);
1420 if (res) { 1599 if (res) {
1421 spin_lock(&res->spinlock); 1600 spin_lock(&res->spinlock);
1422 master = res->owner; 1601 master = res->owner;
@@ -1483,7 +1662,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1483 struct dlm_lock *newlock = NULL; 1662 struct dlm_lock *newlock = NULL;
1484 struct dlm_lockstatus *lksb = NULL; 1663 struct dlm_lockstatus *lksb = NULL;
1485 int ret = 0; 1664 int ret = 0;
1486 int i; 1665 int i, bad;
1487 struct list_head *iter; 1666 struct list_head *iter;
1488 struct dlm_lock *lock = NULL; 1667 struct dlm_lock *lock = NULL;
1489 1668
@@ -1529,8 +1708,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1529 1708
1530 /* move the lock to its proper place */ 1709 /* move the lock to its proper place */
1531 /* do not alter lock refcount. switching lists. */ 1710 /* do not alter lock refcount. switching lists. */
1532 list_del_init(&lock->list); 1711 list_move_tail(&lock->list, queue);
1533 list_add_tail(&lock->list, queue);
1534 spin_unlock(&res->spinlock); 1712 spin_unlock(&res->spinlock);
1535 1713
1536 mlog(0, "just reordered a local lock!\n"); 1714 mlog(0, "just reordered a local lock!\n");
@@ -1553,28 +1731,48 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1553 } 1731 }
1554 lksb->flags |= (ml->flags & 1732 lksb->flags |= (ml->flags &
1555 (DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB)); 1733 (DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB));
1556 1734
1557 if (mres->lvb[0]) { 1735 if (ml->type == LKM_NLMODE)
1736 goto skip_lvb;
1737
1738 if (!dlm_lvb_is_empty(mres->lvb)) {
1558 if (lksb->flags & DLM_LKSB_PUT_LVB) { 1739 if (lksb->flags & DLM_LKSB_PUT_LVB) {
1559 /* other node was trying to update 1740 /* other node was trying to update
1560 * lvb when node died. recreate the 1741 * lvb when node died. recreate the
1561 * lksb with the updated lvb. */ 1742 * lksb with the updated lvb. */
1562 memcpy(lksb->lvb, mres->lvb, DLM_LVB_LEN); 1743 memcpy(lksb->lvb, mres->lvb, DLM_LVB_LEN);
1744 /* the lock resource lvb update must happen
1745 * NOW, before the spinlock is dropped.
1746 * we no longer wait for the AST to update
1747 * the lvb. */
1748 memcpy(res->lvb, mres->lvb, DLM_LVB_LEN);
1563 } else { 1749 } else {
1564 /* otherwise, the node is sending its 1750 /* otherwise, the node is sending its
1565 * most recent valid lvb info */ 1751 * most recent valid lvb info */
1566 BUG_ON(ml->type != LKM_EXMODE && 1752 BUG_ON(ml->type != LKM_EXMODE &&
1567 ml->type != LKM_PRMODE); 1753 ml->type != LKM_PRMODE);
1568 if (res->lvb[0] && (ml->type == LKM_EXMODE || 1754 if (!dlm_lvb_is_empty(res->lvb) &&
1569 memcmp(res->lvb, mres->lvb, DLM_LVB_LEN))) { 1755 (ml->type == LKM_EXMODE ||
1570 mlog(ML_ERROR, "received bad lvb!\n"); 1756 memcmp(res->lvb, mres->lvb, DLM_LVB_LEN))) {
1571 __dlm_print_one_lock_resource(res); 1757 int i;
1572 BUG(); 1758 mlog(ML_ERROR, "%s:%.*s: received bad "
1759 "lvb! type=%d\n", dlm->name,
1760 res->lockname.len,
1761 res->lockname.name, ml->type);
1762 printk("lockres lvb=[");
1763 for (i=0; i<DLM_LVB_LEN; i++)
1764 printk("%02x", res->lvb[i]);
1765 printk("]\nmigrated lvb=[");
1766 for (i=0; i<DLM_LVB_LEN; i++)
1767 printk("%02x", mres->lvb[i]);
1768 printk("]\n");
1769 dlm_print_one_lock_resource(res);
1770 BUG();
1573 } 1771 }
1574 memcpy(res->lvb, mres->lvb, DLM_LVB_LEN); 1772 memcpy(res->lvb, mres->lvb, DLM_LVB_LEN);
1575 } 1773 }
1576 } 1774 }
1577 1775skip_lvb:
1578 1776
1579 /* NOTE: 1777 /* NOTE:
1580 * wrt lock queue ordering and recovery: 1778 * wrt lock queue ordering and recovery:
@@ -1592,9 +1790,33 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1592 * relative to each other, but clearly *not* 1790 * relative to each other, but clearly *not*
1593 * preserved relative to locks from other nodes. 1791 * preserved relative to locks from other nodes.
1594 */ 1792 */
1793 bad = 0;
1595 spin_lock(&res->spinlock); 1794 spin_lock(&res->spinlock);
1596 dlm_lock_get(newlock); 1795 list_for_each_entry(lock, queue, list) {
1597 list_add_tail(&newlock->list, queue); 1796 if (lock->ml.cookie == ml->cookie) {
1797 u64 c = lock->ml.cookie;
1798 mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already "
1799 "exists on this lockres!\n", dlm->name,
1800 res->lockname.len, res->lockname.name,
1801 dlm_get_lock_cookie_node(c),
1802 dlm_get_lock_cookie_seq(c));
1803
1804 mlog(ML_NOTICE, "sent lock: type=%d, conv=%d, "
1805 "node=%u, cookie=%u:%llu, queue=%d\n",
1806 ml->type, ml->convert_type, ml->node,
1807 dlm_get_lock_cookie_node(ml->cookie),
1808 dlm_get_lock_cookie_seq(ml->cookie),
1809 ml->list);
1810
1811 __dlm_print_one_lock_resource(res);
1812 bad = 1;
1813 break;
1814 }
1815 }
1816 if (!bad) {
1817 dlm_lock_get(newlock);
1818 list_add_tail(&newlock->list, queue);
1819 }
1598 spin_unlock(&res->spinlock); 1820 spin_unlock(&res->spinlock);
1599 } 1821 }
1600 mlog(0, "done running all the locks\n"); 1822 mlog(0, "done running all the locks\n");
@@ -1618,8 +1840,14 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm,
1618 struct dlm_lock *lock; 1840 struct dlm_lock *lock;
1619 1841
1620 res->state |= DLM_LOCK_RES_RECOVERING; 1842 res->state |= DLM_LOCK_RES_RECOVERING;
1621 if (!list_empty(&res->recovering)) 1843 if (!list_empty(&res->recovering)) {
1844 mlog(0,
1845 "Recovering res %s:%.*s, is already on recovery list!\n",
1846 dlm->name, res->lockname.len, res->lockname.name);
1622 list_del_init(&res->recovering); 1847 list_del_init(&res->recovering);
1848 }
1849 /* We need to hold a reference while on the recovery list */
1850 dlm_lockres_get(res);
1623 list_add_tail(&res->recovering, &dlm->reco.resources); 1851 list_add_tail(&res->recovering, &dlm->reco.resources);
1624 1852
1625 /* find any pending locks and put them back on proper list */ 1853 /* find any pending locks and put them back on proper list */
@@ -1708,9 +1936,11 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
1708 spin_lock(&res->spinlock); 1936 spin_lock(&res->spinlock);
1709 dlm_change_lockres_owner(dlm, res, new_master); 1937 dlm_change_lockres_owner(dlm, res, new_master);
1710 res->state &= ~DLM_LOCK_RES_RECOVERING; 1938 res->state &= ~DLM_LOCK_RES_RECOVERING;
1711 __dlm_dirty_lockres(dlm, res); 1939 if (!__dlm_lockres_unused(res))
1940 __dlm_dirty_lockres(dlm, res);
1712 spin_unlock(&res->spinlock); 1941 spin_unlock(&res->spinlock);
1713 wake_up(&res->wq); 1942 wake_up(&res->wq);
1943 dlm_lockres_put(res);
1714 } 1944 }
1715 } 1945 }
1716 1946
@@ -1719,7 +1949,7 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
1719 * the RECOVERING state and set the owner 1949 * the RECOVERING state and set the owner
1720 * if necessary */ 1950 * if necessary */
1721 for (i = 0; i < DLM_HASH_BUCKETS; i++) { 1951 for (i = 0; i < DLM_HASH_BUCKETS; i++) {
1722 bucket = &(dlm->lockres_hash[i]); 1952 bucket = dlm_lockres_hash(dlm, i);
1723 hlist_for_each_entry(res, hash_iter, bucket, hash_node) { 1953 hlist_for_each_entry(res, hash_iter, bucket, hash_node) {
1724 if (res->state & DLM_LOCK_RES_RECOVERING) { 1954 if (res->state & DLM_LOCK_RES_RECOVERING) {
1725 if (res->owner == dead_node) { 1955 if (res->owner == dead_node) {
@@ -1743,11 +1973,13 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
1743 dlm->name, res->lockname.len, 1973 dlm->name, res->lockname.len,
1744 res->lockname.name, res->owner); 1974 res->lockname.name, res->owner);
1745 list_del_init(&res->recovering); 1975 list_del_init(&res->recovering);
1976 dlm_lockres_put(res);
1746 } 1977 }
1747 spin_lock(&res->spinlock); 1978 spin_lock(&res->spinlock);
1748 dlm_change_lockres_owner(dlm, res, new_master); 1979 dlm_change_lockres_owner(dlm, res, new_master);
1749 res->state &= ~DLM_LOCK_RES_RECOVERING; 1980 res->state &= ~DLM_LOCK_RES_RECOVERING;
1750 __dlm_dirty_lockres(dlm, res); 1981 if (!__dlm_lockres_unused(res))
1982 __dlm_dirty_lockres(dlm, res);
1751 spin_unlock(&res->spinlock); 1983 spin_unlock(&res->spinlock);
1752 wake_up(&res->wq); 1984 wake_up(&res->wq);
1753 } 1985 }
@@ -1884,7 +2116,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
1884 * need to be fired as a result. 2116 * need to be fired as a result.
1885 */ 2117 */
1886 for (i = 0; i < DLM_HASH_BUCKETS; i++) { 2118 for (i = 0; i < DLM_HASH_BUCKETS; i++) {
1887 bucket = &(dlm->lockres_hash[i]); 2119 bucket = dlm_lockres_hash(dlm, i);
1888 hlist_for_each_entry(res, iter, bucket, hash_node) { 2120 hlist_for_each_entry(res, iter, bucket, hash_node) {
1889 /* always prune any $RECOVERY entries for dead nodes, 2121 /* always prune any $RECOVERY entries for dead nodes,
1890 * otherwise hangs can occur during later recovery */ 2122 * otherwise hangs can occur during later recovery */
@@ -1924,6 +2156,20 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx)
1924{ 2156{
1925 assert_spin_locked(&dlm->spinlock); 2157 assert_spin_locked(&dlm->spinlock);
1926 2158
2159 if (dlm->reco.new_master == idx) {
2160 mlog(0, "%s: recovery master %d just died\n",
2161 dlm->name, idx);
2162 if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) {
2163 /* finalize1 was reached, so it is safe to clear
2164 * the new_master and dead_node. that recovery
2165 * is complete. */
2166 mlog(0, "%s: dead master %d had reached "
2167 "finalize1 state, clearing\n", dlm->name, idx);
2168 dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
2169 __dlm_reset_recovery(dlm);
2170 }
2171 }
2172
1927 /* check to see if the node is already considered dead */ 2173 /* check to see if the node is already considered dead */
1928 if (!test_bit(idx, dlm->live_nodes_map)) { 2174 if (!test_bit(idx, dlm->live_nodes_map)) {
1929 mlog(0, "for domain %s, node %d is already dead. " 2175 mlog(0, "for domain %s, node %d is already dead. "
@@ -2087,7 +2333,7 @@ again:
2087 2333
2088 /* set the new_master to this node */ 2334 /* set the new_master to this node */
2089 spin_lock(&dlm->spinlock); 2335 spin_lock(&dlm->spinlock);
2090 dlm->reco.new_master = dlm->node_num; 2336 dlm_set_reco_master(dlm, dlm->node_num);
2091 spin_unlock(&dlm->spinlock); 2337 spin_unlock(&dlm->spinlock);
2092 } 2338 }
2093 2339
@@ -2125,6 +2371,10 @@ again:
2125 mlog(0, "%s: reco master %u is ready to recover %u\n", 2371 mlog(0, "%s: reco master %u is ready to recover %u\n",
2126 dlm->name, dlm->reco.new_master, dlm->reco.dead_node); 2372 dlm->name, dlm->reco.new_master, dlm->reco.dead_node);
2127 status = -EEXIST; 2373 status = -EEXIST;
2374 } else if (ret == DLM_RECOVERING) {
2375 mlog(0, "dlm=%s dlmlock says master node died (this=%u)\n",
2376 dlm->name, dlm->node_num);
2377 goto again;
2128 } else { 2378 } else {
2129 struct dlm_lock_resource *res; 2379 struct dlm_lock_resource *res;
2130 2380
@@ -2156,7 +2406,7 @@ static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node)
2156 2406
2157 mlog_entry("%u\n", dead_node); 2407 mlog_entry("%u\n", dead_node);
2158 2408
2159 mlog(0, "dead node is %u\n", dead_node); 2409 mlog(0, "%s: dead node is %u\n", dlm->name, dead_node);
2160 2410
2161 spin_lock(&dlm->spinlock); 2411 spin_lock(&dlm->spinlock);
2162 dlm_node_iter_init(dlm->domain_map, &iter); 2412 dlm_node_iter_init(dlm->domain_map, &iter);
@@ -2214,6 +2464,14 @@ retry:
2214 * another ENOMEM */ 2464 * another ENOMEM */
2215 msleep(100); 2465 msleep(100);
2216 goto retry; 2466 goto retry;
2467 } else if (ret == EAGAIN) {
2468 mlog(0, "%s: trying to start recovery of node "
2469 "%u, but node %u is waiting for last recovery "
2470 "to complete, backoff for a bit\n", dlm->name,
2471 dead_node, nodenum);
2472 /* TODO Look into replacing msleep with cond_resched() */
2473 msleep(100);
2474 goto retry;
2217 } 2475 }
2218 } 2476 }
2219 2477
@@ -2229,8 +2487,20 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data)
2229 if (!dlm_grab(dlm)) 2487 if (!dlm_grab(dlm))
2230 return 0; 2488 return 0;
2231 2489
2232 mlog(0, "node %u wants to recover node %u\n", 2490 spin_lock(&dlm->spinlock);
2233 br->node_idx, br->dead_node); 2491 if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) {
2492 mlog(0, "%s: node %u wants to recover node %u (%u:%u) "
2493 "but this node is in finalize state, waiting on finalize2\n",
2494 dlm->name, br->node_idx, br->dead_node,
2495 dlm->reco.dead_node, dlm->reco.new_master);
2496 spin_unlock(&dlm->spinlock);
2497 return EAGAIN;
2498 }
2499 spin_unlock(&dlm->spinlock);
2500
2501 mlog(0, "%s: node %u wants to recover node %u (%u:%u)\n",
2502 dlm->name, br->node_idx, br->dead_node,
2503 dlm->reco.dead_node, dlm->reco.new_master);
2234 2504
2235 dlm_fire_domain_eviction_callbacks(dlm, br->dead_node); 2505 dlm_fire_domain_eviction_callbacks(dlm, br->dead_node);
2236 2506
@@ -2252,8 +2522,8 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data)
2252 "node %u changing it to %u\n", dlm->name, 2522 "node %u changing it to %u\n", dlm->name,
2253 dlm->reco.dead_node, br->node_idx, br->dead_node); 2523 dlm->reco.dead_node, br->node_idx, br->dead_node);
2254 } 2524 }
2255 dlm->reco.new_master = br->node_idx; 2525 dlm_set_reco_master(dlm, br->node_idx);
2256 dlm->reco.dead_node = br->dead_node; 2526 dlm_set_reco_dead_node(dlm, br->dead_node);
2257 if (!test_bit(br->dead_node, dlm->recovery_map)) { 2527 if (!test_bit(br->dead_node, dlm->recovery_map)) {
2258 mlog(0, "recovery master %u sees %u as dead, but this " 2528 mlog(0, "recovery master %u sees %u as dead, but this "
2259 "node has not yet. marking %u as dead\n", 2529 "node has not yet. marking %u as dead\n",
@@ -2272,10 +2542,16 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data)
2272 spin_unlock(&dlm->spinlock); 2542 spin_unlock(&dlm->spinlock);
2273 2543
2274 dlm_kick_recovery_thread(dlm); 2544 dlm_kick_recovery_thread(dlm);
2545
2546 mlog(0, "%s: recovery started by node %u, for %u (%u:%u)\n",
2547 dlm->name, br->node_idx, br->dead_node,
2548 dlm->reco.dead_node, dlm->reco.new_master);
2549
2275 dlm_put(dlm); 2550 dlm_put(dlm);
2276 return 0; 2551 return 0;
2277} 2552}
2278 2553
2554#define DLM_FINALIZE_STAGE2 0x01
2279static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm) 2555static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm)
2280{ 2556{
2281 int ret = 0; 2557 int ret = 0;
@@ -2283,25 +2559,31 @@ static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm)
2283 struct dlm_node_iter iter; 2559 struct dlm_node_iter iter;
2284 int nodenum; 2560 int nodenum;
2285 int status; 2561 int status;
2562 int stage = 1;
2286 2563
2287 mlog(0, "finishing recovery for node %s:%u\n", 2564 mlog(0, "finishing recovery for node %s:%u, "
2288 dlm->name, dlm->reco.dead_node); 2565 "stage %d\n", dlm->name, dlm->reco.dead_node, stage);
2289 2566
2290 spin_lock(&dlm->spinlock); 2567 spin_lock(&dlm->spinlock);
2291 dlm_node_iter_init(dlm->domain_map, &iter); 2568 dlm_node_iter_init(dlm->domain_map, &iter);
2292 spin_unlock(&dlm->spinlock); 2569 spin_unlock(&dlm->spinlock);
2293 2570
2571stage2:
2294 memset(&fr, 0, sizeof(fr)); 2572 memset(&fr, 0, sizeof(fr));
2295 fr.node_idx = dlm->node_num; 2573 fr.node_idx = dlm->node_num;
2296 fr.dead_node = dlm->reco.dead_node; 2574 fr.dead_node = dlm->reco.dead_node;
2575 if (stage == 2)
2576 fr.flags |= DLM_FINALIZE_STAGE2;
2297 2577
2298 while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { 2578 while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
2299 if (nodenum == dlm->node_num) 2579 if (nodenum == dlm->node_num)
2300 continue; 2580 continue;
2301 ret = o2net_send_message(DLM_FINALIZE_RECO_MSG, dlm->key, 2581 ret = o2net_send_message(DLM_FINALIZE_RECO_MSG, dlm->key,
2302 &fr, sizeof(fr), nodenum, &status); 2582 &fr, sizeof(fr), nodenum, &status);
2303 if (ret >= 0) { 2583 if (ret >= 0)
2304 ret = status; 2584 ret = status;
2585 if (ret < 0) {
2586 mlog_errno(ret);
2305 if (dlm_is_host_down(ret)) { 2587 if (dlm_is_host_down(ret)) {
2306 /* this has no effect on this recovery 2588 /* this has no effect on this recovery
2307 * session, so set the status to zero to 2589 * session, so set the status to zero to
@@ -2309,13 +2591,17 @@ static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm)
2309 mlog(ML_ERROR, "node %u went down after this " 2591 mlog(ML_ERROR, "node %u went down after this "
2310 "node finished recovery.\n", nodenum); 2592 "node finished recovery.\n", nodenum);
2311 ret = 0; 2593 ret = 0;
2594 continue;
2312 } 2595 }
2313 }
2314 if (ret < 0) {
2315 mlog_errno(ret);
2316 break; 2596 break;
2317 } 2597 }
2318 } 2598 }
2599 if (stage == 1) {
2600 /* reset the node_iter back to the top and send finalize2 */
2601 iter.curnode = -1;
2602 stage = 2;
2603 goto stage2;
2604 }
2319 2605
2320 return ret; 2606 return ret;
2321} 2607}
@@ -2324,14 +2610,19 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data)
2324{ 2610{
2325 struct dlm_ctxt *dlm = data; 2611 struct dlm_ctxt *dlm = data;
2326 struct dlm_finalize_reco *fr = (struct dlm_finalize_reco *)msg->buf; 2612 struct dlm_finalize_reco *fr = (struct dlm_finalize_reco *)msg->buf;
2613 int stage = 1;
2327 2614
2328 /* ok to return 0, domain has gone away */ 2615 /* ok to return 0, domain has gone away */
2329 if (!dlm_grab(dlm)) 2616 if (!dlm_grab(dlm))
2330 return 0; 2617 return 0;
2331 2618
2332 mlog(0, "node %u finalizing recovery of node %u\n", 2619 if (fr->flags & DLM_FINALIZE_STAGE2)
2333 fr->node_idx, fr->dead_node); 2620 stage = 2;
2334 2621
2622 mlog(0, "%s: node %u finalizing recovery stage%d of "
2623 "node %u (%u:%u)\n", dlm->name, fr->node_idx, stage,
2624 fr->dead_node, dlm->reco.dead_node, dlm->reco.new_master);
2625
2335 spin_lock(&dlm->spinlock); 2626 spin_lock(&dlm->spinlock);
2336 2627
2337 if (dlm->reco.new_master != fr->node_idx) { 2628 if (dlm->reco.new_master != fr->node_idx) {
@@ -2347,13 +2638,41 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data)
2347 BUG(); 2638 BUG();
2348 } 2639 }
2349 2640
2350 dlm_finish_local_lockres_recovery(dlm, fr->dead_node, fr->node_idx); 2641 switch (stage) {
2351 2642 case 1:
2352 spin_unlock(&dlm->spinlock); 2643 dlm_finish_local_lockres_recovery(dlm, fr->dead_node, fr->node_idx);
2644 if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) {
2645 mlog(ML_ERROR, "%s: received finalize1 from "
2646 "new master %u for dead node %u, but "
2647 "this node has already received it!\n",
2648 dlm->name, fr->node_idx, fr->dead_node);
2649 dlm_print_reco_node_status(dlm);
2650 BUG();
2651 }
2652 dlm->reco.state |= DLM_RECO_STATE_FINALIZE;
2653 spin_unlock(&dlm->spinlock);
2654 break;
2655 case 2:
2656 if (!(dlm->reco.state & DLM_RECO_STATE_FINALIZE)) {
2657 mlog(ML_ERROR, "%s: received finalize2 from "
2658 "new master %u for dead node %u, but "
2659 "this node did not have finalize1!\n",
2660 dlm->name, fr->node_idx, fr->dead_node);
2661 dlm_print_reco_node_status(dlm);
2662 BUG();
2663 }
2664 dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
2665 spin_unlock(&dlm->spinlock);
2666 dlm_reset_recovery(dlm);
2667 dlm_kick_recovery_thread(dlm);
2668 break;
2669 default:
2670 BUG();
2671 }
2353 2672
2354 dlm_reset_recovery(dlm); 2673 mlog(0, "%s: recovery done, reco master was %u, dead now %u, master now %u\n",
2674 dlm->name, fr->node_idx, dlm->reco.dead_node, dlm->reco.new_master);
2355 2675
2356 dlm_kick_recovery_thread(dlm);
2357 dlm_put(dlm); 2676 dlm_put(dlm);
2358 return 0; 2677 return 0;
2359} 2678}
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 5be9d14f12cb..0c822f3ffb05 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -39,6 +39,7 @@
39#include <linux/inet.h> 39#include <linux/inet.h>
40#include <linux/timer.h> 40#include <linux/timer.h>
41#include <linux/kthread.h> 41#include <linux/kthread.h>
42#include <linux/delay.h>
42 43
43 44
44#include "cluster/heartbeat.h" 45#include "cluster/heartbeat.h"
@@ -53,6 +54,8 @@
53#include "cluster/masklog.h" 54#include "cluster/masklog.h"
54 55
55static int dlm_thread(void *data); 56static int dlm_thread(void *data);
57static void dlm_purge_lockres_now(struct dlm_ctxt *dlm,
58 struct dlm_lock_resource *lockres);
56 59
57static void dlm_flush_asts(struct dlm_ctxt *dlm); 60static void dlm_flush_asts(struct dlm_ctxt *dlm);
58 61
@@ -80,7 +83,7 @@ repeat:
80} 83}
81 84
82 85
83static int __dlm_lockres_unused(struct dlm_lock_resource *res) 86int __dlm_lockres_unused(struct dlm_lock_resource *res)
84{ 87{
85 if (list_empty(&res->granted) && 88 if (list_empty(&res->granted) &&
86 list_empty(&res->converting) && 89 list_empty(&res->converting) &&
@@ -103,6 +106,20 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
103 assert_spin_locked(&res->spinlock); 106 assert_spin_locked(&res->spinlock);
104 107
105 if (__dlm_lockres_unused(res)){ 108 if (__dlm_lockres_unused(res)){
109 /* For now, just keep any resource we master */
110 if (res->owner == dlm->node_num)
111 {
112 if (!list_empty(&res->purge)) {
113 mlog(0, "we master %s:%.*s, but it is on "
114 "the purge list. Removing\n",
115 dlm->name, res->lockname.len,
116 res->lockname.name);
117 list_del_init(&res->purge);
118 dlm->purge_count--;
119 }
120 return;
121 }
122
106 if (list_empty(&res->purge)) { 123 if (list_empty(&res->purge)) {
107 mlog(0, "putting lockres %.*s from purge list\n", 124 mlog(0, "putting lockres %.*s from purge list\n",
108 res->lockname.len, res->lockname.name); 125 res->lockname.len, res->lockname.name);
@@ -110,10 +127,23 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
110 res->last_used = jiffies; 127 res->last_used = jiffies;
111 list_add_tail(&res->purge, &dlm->purge_list); 128 list_add_tail(&res->purge, &dlm->purge_list);
112 dlm->purge_count++; 129 dlm->purge_count++;
130
131 /* if this node is not the owner, there is
132 * no way to keep track of who the owner could be.
133 * unhash it to avoid serious problems. */
134 if (res->owner != dlm->node_num) {
135 mlog(0, "%s:%.*s: doing immediate "
136 "purge of lockres owned by %u\n",
137 dlm->name, res->lockname.len,
138 res->lockname.name, res->owner);
139
140 dlm_purge_lockres_now(dlm, res);
141 }
113 } 142 }
114 } else if (!list_empty(&res->purge)) { 143 } else if (!list_empty(&res->purge)) {
115 mlog(0, "removing lockres %.*s from purge list\n", 144 mlog(0, "removing lockres %.*s from purge list, "
116 res->lockname.len, res->lockname.name); 145 "owner=%u\n", res->lockname.len, res->lockname.name,
146 res->owner);
117 147
118 list_del_init(&res->purge); 148 list_del_init(&res->purge);
119 dlm->purge_count--; 149 dlm->purge_count--;
@@ -165,6 +195,7 @@ again:
165 } else if (ret < 0) { 195 } else if (ret < 0) {
166 mlog(ML_NOTICE, "lockres %.*s: migrate failed, retrying\n", 196 mlog(ML_NOTICE, "lockres %.*s: migrate failed, retrying\n",
167 lockres->lockname.len, lockres->lockname.name); 197 lockres->lockname.len, lockres->lockname.name);
198 msleep(100);
168 goto again; 199 goto again;
169 } 200 }
170 201
@@ -178,6 +209,24 @@ finish:
178 __dlm_unhash_lockres(lockres); 209 __dlm_unhash_lockres(lockres);
179} 210}
180 211
212/* make an unused lockres go away immediately.
213 * as soon as the dlm spinlock is dropped, this lockres
214 * will not be found. kfree still happens on last put. */
215static void dlm_purge_lockres_now(struct dlm_ctxt *dlm,
216 struct dlm_lock_resource *lockres)
217{
218 assert_spin_locked(&dlm->spinlock);
219 assert_spin_locked(&lockres->spinlock);
220
221 BUG_ON(!__dlm_lockres_unused(lockres));
222
223 if (!list_empty(&lockres->purge)) {
224 list_del_init(&lockres->purge);
225 dlm->purge_count--;
226 }
227 __dlm_unhash_lockres(lockres);
228}
229
181static void dlm_run_purge_list(struct dlm_ctxt *dlm, 230static void dlm_run_purge_list(struct dlm_ctxt *dlm,
182 int purge_now) 231 int purge_now)
183{ 232{
@@ -318,8 +367,7 @@ converting:
318 367
319 target->ml.type = target->ml.convert_type; 368 target->ml.type = target->ml.convert_type;
320 target->ml.convert_type = LKM_IVMODE; 369 target->ml.convert_type = LKM_IVMODE;
321 list_del_init(&target->list); 370 list_move_tail(&target->list, &res->granted);
322 list_add_tail(&target->list, &res->granted);
323 371
324 BUG_ON(!target->lksb); 372 BUG_ON(!target->lksb);
325 target->lksb->status = DLM_NORMAL; 373 target->lksb->status = DLM_NORMAL;
@@ -380,8 +428,7 @@ blocked:
380 target->ml.type, target->ml.node); 428 target->ml.type, target->ml.node);
381 429
382 // target->ml.type is already correct 430 // target->ml.type is already correct
383 list_del_init(&target->list); 431 list_move_tail(&target->list, &res->granted);
384 list_add_tail(&target->list, &res->granted);
385 432
386 BUG_ON(!target->lksb); 433 BUG_ON(!target->lksb);
387 target->lksb->status = DLM_NORMAL; 434 target->lksb->status = DLM_NORMAL;
@@ -422,6 +469,8 @@ void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
422 /* don't shuffle secondary queues */ 469 /* don't shuffle secondary queues */
423 if ((res->owner == dlm->node_num) && 470 if ((res->owner == dlm->node_num) &&
424 !(res->state & DLM_LOCK_RES_DIRTY)) { 471 !(res->state & DLM_LOCK_RES_DIRTY)) {
472 /* ref for dirty_list */
473 dlm_lockres_get(res);
425 list_add_tail(&res->dirty, &dlm->dirty_list); 474 list_add_tail(&res->dirty, &dlm->dirty_list);
426 res->state |= DLM_LOCK_RES_DIRTY; 475 res->state |= DLM_LOCK_RES_DIRTY;
427 } 476 }
@@ -606,6 +655,8 @@ static int dlm_thread(void *data)
606 list_del_init(&res->dirty); 655 list_del_init(&res->dirty);
607 spin_unlock(&res->spinlock); 656 spin_unlock(&res->spinlock);
608 spin_unlock(&dlm->spinlock); 657 spin_unlock(&dlm->spinlock);
658 /* Drop dirty_list ref */
659 dlm_lockres_put(res);
609 660
610 /* lockres can be re-dirtied/re-added to the 661 /* lockres can be re-dirtied/re-added to the
611 * dirty_list in this gap, but that is ok */ 662 * dirty_list in this gap, but that is ok */
@@ -642,8 +693,9 @@ static int dlm_thread(void *data)
642 * spinlock and do NOT have the dlm lock. 693 * spinlock and do NOT have the dlm lock.
643 * safe to reserve/queue asts and run the lists. */ 694 * safe to reserve/queue asts and run the lists. */
644 695
645 mlog(0, "calling dlm_shuffle_lists with dlm=%p, " 696 mlog(0, "calling dlm_shuffle_lists with dlm=%s, "
646 "res=%p\n", dlm, res); 697 "res=%.*s\n", dlm->name,
698 res->lockname.len, res->lockname.name);
647 699
648 /* called while holding lockres lock */ 700 /* called while holding lockres lock */
649 dlm_shuffle_lists(dlm, res); 701 dlm_shuffle_lists(dlm, res);
@@ -657,6 +709,8 @@ in_progress:
657 /* if the lock was in-progress, stick 709 /* if the lock was in-progress, stick
658 * it on the back of the list */ 710 * it on the back of the list */
659 if (delay) { 711 if (delay) {
712 /* ref for dirty_list */
713 dlm_lockres_get(res);
660 spin_lock(&res->spinlock); 714 spin_lock(&res->spinlock);
661 list_add_tail(&res->dirty, &dlm->dirty_list); 715 list_add_tail(&res->dirty, &dlm->dirty_list);
662 res->state |= DLM_LOCK_RES_DIRTY; 716 res->state |= DLM_LOCK_RES_DIRTY;
@@ -677,7 +731,7 @@ in_progress:
677 731
678 /* yield and continue right away if there is more work to do */ 732 /* yield and continue right away if there is more work to do */
679 if (!n) { 733 if (!n) {
680 yield(); 734 cond_resched();
681 continue; 735 continue;
682 } 736 }
683 737
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 7b1a27542674..b0c3134f4f70 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -271,8 +271,7 @@ void dlm_commit_pending_unlock(struct dlm_lock_resource *res,
271void dlm_commit_pending_cancel(struct dlm_lock_resource *res, 271void dlm_commit_pending_cancel(struct dlm_lock_resource *res,
272 struct dlm_lock *lock) 272 struct dlm_lock *lock)
273{ 273{
274 list_del_init(&lock->list); 274 list_move_tail(&lock->list, &res->granted);
275 list_add_tail(&lock->list, &res->granted);
276 lock->ml.convert_type = LKM_IVMODE; 275 lock->ml.convert_type = LKM_IVMODE;
277} 276}
278 277
@@ -319,6 +318,16 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
319 318
320 mlog_entry("%.*s\n", res->lockname.len, res->lockname.name); 319 mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
321 320
321 if (owner == dlm->node_num) {
322 /* ended up trying to contact ourself. this means
323 * that the lockres had been remote but became local
324 * via a migration. just retry it, now as local */
325 mlog(0, "%s:%.*s: this node became the master due to a "
326 "migration, re-evaluate now\n", dlm->name,
327 res->lockname.len, res->lockname.name);
328 return DLM_FORWARD;
329 }
330
322 memset(&unlock, 0, sizeof(unlock)); 331 memset(&unlock, 0, sizeof(unlock));
323 unlock.node_idx = dlm->node_num; 332 unlock.node_idx = dlm->node_num;
324 unlock.flags = cpu_to_be32(flags); 333 unlock.flags = cpu_to_be32(flags);
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c
index 74ca4e5f9765..e641b084b343 100644
--- a/fs/ocfs2/dlm/userdlm.c
+++ b/fs/ocfs2/dlm/userdlm.c
@@ -672,7 +672,7 @@ struct dlm_ctxt *user_dlm_register_context(struct qstr *name)
672 u32 dlm_key; 672 u32 dlm_key;
673 char *domain; 673 char *domain;
674 674
675 domain = kmalloc(name->len + 1, GFP_KERNEL); 675 domain = kmalloc(name->len + 1, GFP_NOFS);
676 if (!domain) { 676 if (!domain) {
677 mlog_errno(-ENOMEM); 677 mlog_errno(-ENOMEM);
678 return ERR_PTR(-ENOMEM); 678 return ERR_PTR(-ENOMEM);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 64cd52860c87..762eb1fbb34d 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -242,7 +242,7 @@ static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
242 mlog_exit_void(); 242 mlog_exit_void();
243} 243}
244 244
245static spinlock_t ocfs2_dlm_tracking_lock = SPIN_LOCK_UNLOCKED; 245static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);
246 246
247static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res, 247static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res,
248 struct ocfs2_dlm_debug *dlm_debug) 248 struct ocfs2_dlm_debug *dlm_debug)
@@ -2071,8 +2071,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
2071 } 2071 }
2072 2072
2073 /* launch vote thread */ 2073 /* launch vote thread */
2074 osb->vote_task = kthread_run(ocfs2_vote_thread, osb, "ocfs2vote-%d", 2074 osb->vote_task = kthread_run(ocfs2_vote_thread, osb, "ocfs2vote");
2075 osb->osb_id);
2076 if (IS_ERR(osb->vote_task)) { 2075 if (IS_ERR(osb->vote_task)) {
2077 status = PTR_ERR(osb->vote_task); 2076 status = PTR_ERR(osb->vote_task);
2078 osb->vote_task = NULL; 2077 osb->vote_task = NULL;
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 1a5c69071df6..fcd4475d1f89 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -298,7 +298,7 @@ static int ocfs2_extent_map_find_leaf(struct inode *inode,
298 298
299 ret = ocfs2_extent_map_insert(inode, rec, 299 ret = ocfs2_extent_map_insert(inode, rec,
300 le16_to_cpu(el->l_tree_depth)); 300 le16_to_cpu(el->l_tree_depth));
301 if (ret) { 301 if (ret && (ret != -EEXIST)) {
302 mlog_errno(ret); 302 mlog_errno(ret);
303 goto out_free; 303 goto out_free;
304 } 304 }
@@ -427,6 +427,11 @@ static int ocfs2_extent_map_insert_entry(struct ocfs2_extent_map *em,
427/* 427/*
428 * Simple rule: on any return code other than -EAGAIN, anything left 428 * Simple rule: on any return code other than -EAGAIN, anything left
429 * in the insert_context will be freed. 429 * in the insert_context will be freed.
430 *
431 * Simple rule #2: A return code of -EEXIST from this function or
432 * its calls to ocfs2_extent_map_insert_entry() signifies that another
433 * thread beat us to the insert. It is not an actual error, but it
434 * tells the caller we have no more work to do.
430 */ 435 */
431static int ocfs2_extent_map_try_insert(struct inode *inode, 436static int ocfs2_extent_map_try_insert(struct inode *inode,
432 struct ocfs2_extent_rec *rec, 437 struct ocfs2_extent_rec *rec,
@@ -448,22 +453,32 @@ static int ocfs2_extent_map_try_insert(struct inode *inode,
448 goto out_unlock; 453 goto out_unlock;
449 } 454 }
450 455
456 /* Since insert_entry failed, the map MUST have old_ent */
451 old_ent = ocfs2_extent_map_lookup(em, le32_to_cpu(rec->e_cpos), 457 old_ent = ocfs2_extent_map_lookup(em, le32_to_cpu(rec->e_cpos),
452 le32_to_cpu(rec->e_clusters), NULL, 458 le32_to_cpu(rec->e_clusters),
453 NULL); 459 NULL, NULL);
454 460
455 BUG_ON(!old_ent); 461 BUG_ON(!old_ent);
456 462
457 ret = -EEXIST; 463 if (old_ent->e_tree_depth < tree_depth) {
458 if (old_ent->e_tree_depth < tree_depth) 464 /* Another thread beat us to the lower tree_depth */
465 ret = -EEXIST;
459 goto out_unlock; 466 goto out_unlock;
467 }
460 468
461 if (old_ent->e_tree_depth == tree_depth) { 469 if (old_ent->e_tree_depth == tree_depth) {
470 /*
471 * Another thread beat us to this tree_depth.
472 * Let's make sure we agree with that thread (the
473 * extent_rec should be identical).
474 */
462 if (!memcmp(rec, &old_ent->e_rec, 475 if (!memcmp(rec, &old_ent->e_rec,
463 sizeof(struct ocfs2_extent_rec))) 476 sizeof(struct ocfs2_extent_rec)))
464 ret = 0; 477 ret = 0;
478 else
479 /* FIXME: Should this be ESRCH/EBADR??? */
480 ret = -EEXIST;
465 481
466 /* FIXME: Should this be ESRCH/EBADR??? */
467 goto out_unlock; 482 goto out_unlock;
468 } 483 }
469 484
@@ -599,7 +614,7 @@ static int ocfs2_extent_map_insert(struct inode *inode,
599 tree_depth, &ctxt); 614 tree_depth, &ctxt);
600 } while (ret == -EAGAIN); 615 } while (ret == -EAGAIN);
601 616
602 if (ret < 0) 617 if ((ret < 0) && (ret != -EEXIST))
603 mlog_errno(ret); 618 mlog_errno(ret);
604 619
605 if (ctxt.left_ent) 620 if (ctxt.left_ent)
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 84c507961287..35140f6cf840 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -114,7 +114,7 @@ static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
114 114
115extern kmem_cache_t *ocfs2_inode_cache; 115extern kmem_cache_t *ocfs2_inode_cache;
116 116
117extern struct address_space_operations ocfs2_aops; 117extern const struct address_space_operations ocfs2_aops;
118 118
119struct buffer_head *ocfs2_bread(struct inode *inode, int block, 119struct buffer_head *ocfs2_bread(struct inode *inode, int block,
120 int *err, int reada); 120 int *err, int reada);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index eebc3cfa6be8..f92bf1dd379a 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -49,7 +49,7 @@
49 49
50#include "buffer_head_io.h" 50#include "buffer_head_io.h"
51 51
52spinlock_t trans_inc_lock = SPIN_LOCK_UNLOCKED; 52DEFINE_SPINLOCK(trans_inc_lock);
53 53
54static int ocfs2_force_read_journal(struct inode *inode); 54static int ocfs2_force_read_journal(struct inode *inode);
55static int ocfs2_recover_node(struct ocfs2_super *osb, 55static int ocfs2_recover_node(struct ocfs2_super *osb,
@@ -222,8 +222,7 @@ void ocfs2_handle_add_inode(struct ocfs2_journal_handle *handle,
222 BUG_ON(!list_empty(&OCFS2_I(inode)->ip_handle_list)); 222 BUG_ON(!list_empty(&OCFS2_I(inode)->ip_handle_list));
223 223
224 OCFS2_I(inode)->ip_handle = handle; 224 OCFS2_I(inode)->ip_handle = handle;
225 list_del(&(OCFS2_I(inode)->ip_handle_list)); 225 list_move_tail(&(OCFS2_I(inode)->ip_handle_list), &(handle->inode_list));
226 list_add_tail(&(OCFS2_I(inode)->ip_handle_list), &(handle->inode_list));
227} 226}
228 227
229static void ocfs2_handle_unlock_inodes(struct ocfs2_journal_handle *handle) 228static void ocfs2_handle_unlock_inodes(struct ocfs2_journal_handle *handle)
@@ -785,8 +784,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal)
785 } 784 }
786 785
787 /* Launch the commit thread */ 786 /* Launch the commit thread */
788 osb->commit_task = kthread_run(ocfs2_commit_thread, osb, "ocfs2cmt-%d", 787 osb->commit_task = kthread_run(ocfs2_commit_thread, osb, "ocfs2cmt");
789 osb->osb_id);
790 if (IS_ERR(osb->commit_task)) { 788 if (IS_ERR(osb->commit_task)) {
791 status = PTR_ERR(osb->commit_task); 789 status = PTR_ERR(osb->commit_task);
792 osb->commit_task = NULL; 790 osb->commit_task = NULL;
@@ -1119,7 +1117,7 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
1119 goto out; 1117 goto out;
1120 1118
1121 osb->recovery_thread_task = kthread_run(__ocfs2_recovery_thread, osb, 1119 osb->recovery_thread_task = kthread_run(__ocfs2_recovery_thread, osb,
1122 "ocfs2rec-%d", osb->osb_id); 1120 "ocfs2rec");
1123 if (IS_ERR(osb->recovery_thread_task)) { 1121 if (IS_ERR(osb->recovery_thread_task)) {
1124 mlog_errno((int)PTR_ERR(osb->recovery_thread_task)); 1122 mlog_errno((int)PTR_ERR(osb->recovery_thread_task));
1125 osb->recovery_thread_task = NULL; 1123 osb->recovery_thread_task = NULL;
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 843cf9ddefe8..83934e33e5b0 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -46,12 +46,12 @@ static struct page *ocfs2_nopage(struct vm_area_struct * area,
46 unsigned long address, 46 unsigned long address,
47 int *type) 47 int *type)
48{ 48{
49 struct inode *inode = area->vm_file->f_dentry->d_inode;
50 struct page *page = NOPAGE_SIGBUS; 49 struct page *page = NOPAGE_SIGBUS;
51 sigset_t blocked, oldset; 50 sigset_t blocked, oldset;
52 int ret; 51 int ret;
53 52
54 mlog_entry("(inode %lu, address %lu)\n", inode->i_ino, address); 53 mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address,
54 type);
55 55
56 /* The best way to deal with signals in this path is 56 /* The best way to deal with signals in this path is
57 * to block them upfront, rather than allowing the 57 * to block them upfront, rather than allowing the
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index da1093039c01..cd4a6f253d13 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -184,7 +184,6 @@ struct ocfs2_journal;
184struct ocfs2_journal_handle; 184struct ocfs2_journal_handle;
185struct ocfs2_super 185struct ocfs2_super
186{ 186{
187 u32 osb_id; /* id used by the proc interface */
188 struct task_struct *commit_task; 187 struct task_struct *commit_task;
189 struct super_block *sb; 188 struct super_block *sb;
190 struct inode *root_inode; 189 struct inode *root_inode;
@@ -222,13 +221,11 @@ struct ocfs2_super
222 unsigned long s_mount_opt; 221 unsigned long s_mount_opt;
223 222
224 u16 max_slots; 223 u16 max_slots;
225 u16 num_nodes;
226 s16 node_num; 224 s16 node_num;
227 s16 slot_num; 225 s16 slot_num;
228 int s_sectsize_bits; 226 int s_sectsize_bits;
229 int s_clustersize; 227 int s_clustersize;
230 int s_clustersize_bits; 228 int s_clustersize_bits;
231 struct proc_dir_entry *proc_sub_dir; /* points to /proc/fs/ocfs2/<maj_min> */
232 229
233 atomic_t vol_state; 230 atomic_t vol_state;
234 struct mutex recovery_lock; 231 struct mutex recovery_lock;
@@ -294,7 +291,6 @@ struct ocfs2_super
294}; 291};
295 292
296#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) 293#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info)
297#define OCFS2_MAX_OSB_ID 65536
298 294
299static inline int ocfs2_should_order_data(struct inode *inode) 295static inline int ocfs2_should_order_data(struct inode *inode)
300{ 296{
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index 871627961d6d..aa6f5aadedc4 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -264,7 +264,7 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
264 osb->slot_num = slot; 264 osb->slot_num = slot;
265 spin_unlock(&si->si_lock); 265 spin_unlock(&si->si_lock);
266 266
267 mlog(ML_NOTICE, "taking node slot %d\n", osb->slot_num); 267 mlog(0, "taking node slot %d\n", osb->slot_num);
268 268
269 status = ocfs2_update_disk_slots(osb, si); 269 status = ocfs2_update_disk_slots(osb, si);
270 if (status < 0) 270 if (status < 0)
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 949b3dac30f1..382706a67ffd 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -68,13 +68,6 @@
68 68
69#include "buffer_head_io.h" 69#include "buffer_head_io.h"
70 70
71/*
72 * Globals
73 */
74static spinlock_t ocfs2_globals_lock = SPIN_LOCK_UNLOCKED;
75
76static u32 osb_id; /* Keeps track of next available OSB Id */
77
78static kmem_cache_t *ocfs2_inode_cachep = NULL; 71static kmem_cache_t *ocfs2_inode_cachep = NULL;
79 72
80kmem_cache_t *ocfs2_lock_cache = NULL; 73kmem_cache_t *ocfs2_lock_cache = NULL;
@@ -100,7 +93,7 @@ static int ocfs2_initialize_mem_caches(void);
100static void ocfs2_free_mem_caches(void); 93static void ocfs2_free_mem_caches(void);
101static void ocfs2_delete_osb(struct ocfs2_super *osb); 94static void ocfs2_delete_osb(struct ocfs2_super *osb);
102 95
103static int ocfs2_statfs(struct super_block *sb, struct kstatfs *buf); 96static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf);
104 97
105static int ocfs2_sync_fs(struct super_block *sb, int wait); 98static int ocfs2_sync_fs(struct super_block *sb, int wait);
106 99
@@ -642,10 +635,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
642 635
643 ocfs2_complete_mount_recovery(osb); 636 ocfs2_complete_mount_recovery(osb);
644 637
645 printk("ocfs2: Mounting device (%u,%u) on (node %d, slot %d) with %s " 638 printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %d, slot %d) "
646 "data mode.\n", 639 "with %s data mode.\n",
647 MAJOR(sb->s_dev), MINOR(sb->s_dev), osb->node_num, 640 osb->dev_str, osb->node_num, osb->slot_num,
648 osb->slot_num,
649 osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" : 641 osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" :
650 "ordered"); 642 "ordered");
651 643
@@ -672,12 +664,14 @@ read_super_error:
672 return status; 664 return status;
673} 665}
674 666
675static struct super_block *ocfs2_get_sb(struct file_system_type *fs_type, 667static int ocfs2_get_sb(struct file_system_type *fs_type,
676 int flags, 668 int flags,
677 const char *dev_name, 669 const char *dev_name,
678 void *data) 670 void *data,
671 struct vfsmount *mnt)
679{ 672{
680 return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super); 673 return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super,
674 mnt);
681} 675}
682 676
683static struct file_system_type ocfs2_fs_type = { 677static struct file_system_type ocfs2_fs_type = {
@@ -798,10 +792,6 @@ static int __init ocfs2_init(void)
798 goto leave; 792 goto leave;
799 } 793 }
800 794
801 spin_lock(&ocfs2_globals_lock);
802 osb_id = 0;
803 spin_unlock(&ocfs2_globals_lock);
804
805 ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); 795 ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL);
806 if (!ocfs2_debugfs_root) { 796 if (!ocfs2_debugfs_root) {
807 status = -EFAULT; 797 status = -EFAULT;
@@ -855,7 +845,7 @@ static void ocfs2_put_super(struct super_block *sb)
855 mlog_exit_void(); 845 mlog_exit_void();
856} 846}
857 847
858static int ocfs2_statfs(struct super_block *sb, struct kstatfs *buf) 848static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
859{ 849{
860 struct ocfs2_super *osb; 850 struct ocfs2_super *osb;
861 u32 numbits, freebits; 851 u32 numbits, freebits;
@@ -864,9 +854,9 @@ static int ocfs2_statfs(struct super_block *sb, struct kstatfs *buf)
864 struct buffer_head *bh = NULL; 854 struct buffer_head *bh = NULL;
865 struct inode *inode = NULL; 855 struct inode *inode = NULL;
866 856
867 mlog_entry("(%p, %p)\n", sb, buf); 857 mlog_entry("(%p, %p)\n", dentry->d_sb, buf);
868 858
869 osb = OCFS2_SB(sb); 859 osb = OCFS2_SB(dentry->d_sb);
870 860
871 inode = ocfs2_get_system_file_inode(osb, 861 inode = ocfs2_get_system_file_inode(osb,
872 GLOBAL_BITMAP_SYSTEM_INODE, 862 GLOBAL_BITMAP_SYSTEM_INODE,
@@ -889,7 +879,7 @@ static int ocfs2_statfs(struct super_block *sb, struct kstatfs *buf)
889 freebits = numbits - le32_to_cpu(bm_lock->id1.bitmap1.i_used); 879 freebits = numbits - le32_to_cpu(bm_lock->id1.bitmap1.i_used);
890 880
891 buf->f_type = OCFS2_SUPER_MAGIC; 881 buf->f_type = OCFS2_SUPER_MAGIC;
892 buf->f_bsize = sb->s_blocksize; 882 buf->f_bsize = dentry->d_sb->s_blocksize;
893 buf->f_namelen = OCFS2_MAX_FILENAME_LEN; 883 buf->f_namelen = OCFS2_MAX_FILENAME_LEN;
894 buf->f_blocks = ((sector_t) numbits) * 884 buf->f_blocks = ((sector_t) numbits) *
895 (osb->s_clustersize >> osb->sb->s_blocksize_bits); 885 (osb->s_clustersize >> osb->sb->s_blocksize_bits);
@@ -1018,7 +1008,7 @@ static int ocfs2_fill_local_node_info(struct ocfs2_super *osb)
1018 goto bail; 1008 goto bail;
1019 } 1009 }
1020 1010
1021 mlog(ML_NOTICE, "I am node %d\n", osb->node_num); 1011 mlog(0, "I am node %d\n", osb->node_num);
1022 1012
1023 status = 0; 1013 status = 0;
1024bail: 1014bail:
@@ -1189,8 +1179,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1189 1179
1190 atomic_set(&osb->vol_state, VOLUME_DISMOUNTED); 1180 atomic_set(&osb->vol_state, VOLUME_DISMOUNTED);
1191 1181
1192 printk("ocfs2: Unmounting device (%u,%u) on (node %d)\n", 1182 printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %d)\n",
1193 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev), osb->node_num); 1183 osb->dev_str, osb->node_num);
1194 1184
1195 ocfs2_delete_osb(osb); 1185 ocfs2_delete_osb(osb);
1196 kfree(osb); 1186 kfree(osb);
@@ -1210,8 +1200,6 @@ static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uu
1210 if (osb->uuid_str == NULL) 1200 if (osb->uuid_str == NULL)
1211 return -ENOMEM; 1201 return -ENOMEM;
1212 1202
1213 memcpy(osb->uuid, uuid, OCFS2_VOL_UUID_LEN);
1214
1215 for (i = 0, ptr = osb->uuid_str; i < OCFS2_VOL_UUID_LEN; i++) { 1203 for (i = 0, ptr = osb->uuid_str; i < OCFS2_VOL_UUID_LEN; i++) {
1216 /* print with null */ 1204 /* print with null */
1217 ret = snprintf(ptr, 3, "%02X", uuid[i]); 1205 ret = snprintf(ptr, 3, "%02X", uuid[i]);
@@ -1309,13 +1297,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
1309 goto bail; 1297 goto bail;
1310 } 1298 }
1311 1299
1312 osb->uuid = kmalloc(OCFS2_VOL_UUID_LEN, GFP_KERNEL);
1313 if (!osb->uuid) {
1314 mlog(ML_ERROR, "unable to alloc uuid\n");
1315 status = -ENOMEM;
1316 goto bail;
1317 }
1318
1319 di = (struct ocfs2_dinode *)bh->b_data; 1300 di = (struct ocfs2_dinode *)bh->b_data;
1320 1301
1321 osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots); 1302 osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots);
@@ -1325,7 +1306,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
1325 status = -EINVAL; 1306 status = -EINVAL;
1326 goto bail; 1307 goto bail;
1327 } 1308 }
1328 mlog(ML_NOTICE, "max_slots for this device: %u\n", osb->max_slots); 1309 mlog(0, "max_slots for this device: %u\n", osb->max_slots);
1329 1310
1330 init_waitqueue_head(&osb->osb_wipe_event); 1311 init_waitqueue_head(&osb->osb_wipe_event);
1331 osb->osb_orphan_wipes = kcalloc(osb->max_slots, 1312 osb->osb_orphan_wipes = kcalloc(osb->max_slots,
@@ -1416,7 +1397,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
1416 goto bail; 1397 goto bail;
1417 } 1398 }
1418 1399
1419 memcpy(&uuid_net_key, &osb->uuid[i], sizeof(osb->net_key)); 1400 memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key));
1420 osb->net_key = le32_to_cpu(uuid_net_key); 1401 osb->net_key = le32_to_cpu(uuid_net_key);
1421 1402
1422 strncpy(osb->vol_label, di->id2.i_super.s_label, 63); 1403 strncpy(osb->vol_label, di->id2.i_super.s_label, 63);
@@ -1482,18 +1463,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
1482 goto bail; 1463 goto bail;
1483 } 1464 }
1484 1465
1485 /* Link this osb onto the global linked list of all osb structures. */
1486 /* The Global Link List is mainted for the whole driver . */
1487 spin_lock(&ocfs2_globals_lock);
1488 osb->osb_id = osb_id;
1489 if (osb_id < OCFS2_MAX_OSB_ID)
1490 osb_id++;
1491 else {
1492 mlog(ML_ERROR, "Too many volumes mounted\n");
1493 status = -ENOMEM;
1494 }
1495 spin_unlock(&ocfs2_globals_lock);
1496
1497bail: 1466bail:
1498 mlog_exit(status); 1467 mlog_exit(status);
1499 return status; 1468 return status;
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index f6986bd79e75..c0f68aa6c175 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -64,8 +64,7 @@ static char *ocfs2_page_getlink(struct dentry * dentry,
64{ 64{
65 struct page * page; 65 struct page * page;
66 struct address_space *mapping = dentry->d_inode->i_mapping; 66 struct address_space *mapping = dentry->d_inode->i_mapping;
67 page = read_cache_page(mapping, 0, 67 page = read_mapping_page(mapping, 0, NULL);
68 (filler_t *)mapping->a_ops->readpage, NULL);
69 if (IS_ERR(page)) 68 if (IS_ERR(page))
70 goto sync_fail; 69 goto sync_fail;
71 wait_on_page_locked(page); 70 wait_on_page_locked(page);
@@ -155,7 +154,7 @@ static void *ocfs2_follow_link(struct dentry *dentry,
155 } 154 }
156 155
157 status = vfs_follow_link(nd, link); 156 status = vfs_follow_link(nd, link);
158 if (status) 157 if (status && status != -ENOENT)
159 mlog_errno(status); 158 mlog_errno(status);
160bail: 159bail:
161 if (page) { 160 if (page) {
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index ee42765a8553..cf70fe2075b8 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -988,9 +988,7 @@ int ocfs2_request_mount_vote(struct ocfs2_super *osb)
988 } 988 }
989 989
990bail: 990bail:
991 if (request) 991 kfree(request);
992 kfree(request);
993
994 return status; 992 return status;
995} 993}
996 994
@@ -1021,9 +1019,7 @@ int ocfs2_request_umount_vote(struct ocfs2_super *osb)
1021 } 1019 }
1022 1020
1023bail: 1021bail:
1024 if (request) 1022 kfree(request);
1025 kfree(request);
1026
1027 return status; 1023 return status;
1028} 1024}
1029 1025
diff --git a/fs/open.c b/fs/open.c
index 317b7c7f38a7..303f06d2a7b9 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -31,18 +31,18 @@
31 31
32#include <asm/unistd.h> 32#include <asm/unistd.h>
33 33
34int vfs_statfs(struct super_block *sb, struct kstatfs *buf) 34int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
35{ 35{
36 int retval = -ENODEV; 36 int retval = -ENODEV;
37 37
38 if (sb) { 38 if (dentry) {
39 retval = -ENOSYS; 39 retval = -ENOSYS;
40 if (sb->s_op->statfs) { 40 if (dentry->d_sb->s_op->statfs) {
41 memset(buf, 0, sizeof(*buf)); 41 memset(buf, 0, sizeof(*buf));
42 retval = security_sb_statfs(sb); 42 retval = security_sb_statfs(dentry);
43 if (retval) 43 if (retval)
44 return retval; 44 return retval;
45 retval = sb->s_op->statfs(sb, buf); 45 retval = dentry->d_sb->s_op->statfs(dentry, buf);
46 if (retval == 0 && buf->f_frsize == 0) 46 if (retval == 0 && buf->f_frsize == 0)
47 buf->f_frsize = buf->f_bsize; 47 buf->f_frsize = buf->f_bsize;
48 } 48 }
@@ -52,12 +52,12 @@ int vfs_statfs(struct super_block *sb, struct kstatfs *buf)
52 52
53EXPORT_SYMBOL(vfs_statfs); 53EXPORT_SYMBOL(vfs_statfs);
54 54
55static int vfs_statfs_native(struct super_block *sb, struct statfs *buf) 55static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf)
56{ 56{
57 struct kstatfs st; 57 struct kstatfs st;
58 int retval; 58 int retval;
59 59
60 retval = vfs_statfs(sb, &st); 60 retval = vfs_statfs(dentry, &st);
61 if (retval) 61 if (retval)
62 return retval; 62 return retval;
63 63
@@ -95,12 +95,12 @@ static int vfs_statfs_native(struct super_block *sb, struct statfs *buf)
95 return 0; 95 return 0;
96} 96}
97 97
98static int vfs_statfs64(struct super_block *sb, struct statfs64 *buf) 98static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf)
99{ 99{
100 struct kstatfs st; 100 struct kstatfs st;
101 int retval; 101 int retval;
102 102
103 retval = vfs_statfs(sb, &st); 103 retval = vfs_statfs(dentry, &st);
104 if (retval) 104 if (retval)
105 return retval; 105 return retval;
106 106
@@ -130,7 +130,7 @@ asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf)
130 error = user_path_walk(path, &nd); 130 error = user_path_walk(path, &nd);
131 if (!error) { 131 if (!error) {
132 struct statfs tmp; 132 struct statfs tmp;
133 error = vfs_statfs_native(nd.dentry->d_inode->i_sb, &tmp); 133 error = vfs_statfs_native(nd.dentry, &tmp);
134 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 134 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
135 error = -EFAULT; 135 error = -EFAULT;
136 path_release(&nd); 136 path_release(&nd);
@@ -149,7 +149,7 @@ asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64
149 error = user_path_walk(path, &nd); 149 error = user_path_walk(path, &nd);
150 if (!error) { 150 if (!error) {
151 struct statfs64 tmp; 151 struct statfs64 tmp;
152 error = vfs_statfs64(nd.dentry->d_inode->i_sb, &tmp); 152 error = vfs_statfs64(nd.dentry, &tmp);
153 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 153 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
154 error = -EFAULT; 154 error = -EFAULT;
155 path_release(&nd); 155 path_release(&nd);
@@ -168,7 +168,7 @@ asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user * buf)
168 file = fget(fd); 168 file = fget(fd);
169 if (!file) 169 if (!file)
170 goto out; 170 goto out;
171 error = vfs_statfs_native(file->f_dentry->d_inode->i_sb, &tmp); 171 error = vfs_statfs_native(file->f_dentry, &tmp);
172 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 172 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
173 error = -EFAULT; 173 error = -EFAULT;
174 fput(file); 174 fput(file);
@@ -189,7 +189,7 @@ asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, struct statfs64 __user
189 file = fget(fd); 189 file = fget(fd);
190 if (!file) 190 if (!file)
191 goto out; 191 goto out;
192 error = vfs_statfs64(file->f_dentry->d_inode->i_sb, &tmp); 192 error = vfs_statfs64(file->f_dentry, &tmp);
193 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 193 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
194 error = -EFAULT; 194 error = -EFAULT;
195 fput(file); 195 fput(file);
@@ -322,7 +322,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
322 322
323 error = locks_verify_truncate(inode, file, length); 323 error = locks_verify_truncate(inode, file, length);
324 if (!error) 324 if (!error)
325 error = do_truncate(dentry, length, 0, file); 325 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
326out_putf: 326out_putf:
327 fput(file); 327 fput(file);
328out: 328out:
@@ -633,7 +633,7 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
633 dentry = file->f_dentry; 633 dentry = file->f_dentry;
634 inode = dentry->d_inode; 634 inode = dentry->d_inode;
635 635
636 audit_inode(NULL, inode, 0); 636 audit_inode(NULL, inode);
637 637
638 err = -EROFS; 638 err = -EROFS;
639 if (IS_RDONLY(inode)) 639 if (IS_RDONLY(inode))
@@ -786,7 +786,7 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
786 if (file) { 786 if (file) {
787 struct dentry * dentry; 787 struct dentry * dentry;
788 dentry = file->f_dentry; 788 dentry = file->f_dentry;
789 audit_inode(NULL, dentry->d_inode, 0); 789 audit_inode(NULL, dentry->d_inode);
790 error = chown_common(dentry, user, group); 790 error = chown_common(dentry, user, group);
791 fput(file); 791 fput(file);
792 } 792 }
@@ -1152,7 +1152,7 @@ int filp_close(struct file *filp, fl_owner_t id)
1152 } 1152 }
1153 1153
1154 if (filp->f_op && filp->f_op->flush) 1154 if (filp->f_op && filp->f_op->flush)
1155 retval = filp->f_op->flush(filp); 1155 retval = filp->f_op->flush(filp, id);
1156 1156
1157 dnotify_flush(filp, id); 1157 dnotify_flush(filp, id);
1158 locks_remove_posix(filp, id); 1158 locks_remove_posix(filp, id);
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 0f14276a2e51..93a56bd4a2b7 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -1,5 +1,4 @@
1/* $Id: inode.c,v 1.15 2001/11/12 09:43:39 davem Exp $ 1/* inode.c: /proc/openprom handling routines
2 * openpromfs.c: /proc/openprom handling routines
3 * 2 *
4 * Copyright (C) 1996-1999 Jakub Jelinek (jakub@redhat.com) 3 * Copyright (C) 1996-1999 Jakub Jelinek (jakub@redhat.com)
5 * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) 4 * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
@@ -12,756 +11,245 @@
12#include <linux/openprom_fs.h> 11#include <linux/openprom_fs.h>
13#include <linux/init.h> 12#include <linux/init.h>
14#include <linux/slab.h> 13#include <linux/slab.h>
15#include <linux/smp_lock.h> 14#include <linux/seq_file.h>
16 15
17#include <asm/openprom.h> 16#include <asm/openprom.h>
18#include <asm/oplib.h> 17#include <asm/oplib.h>
18#include <asm/prom.h>
19#include <asm/uaccess.h> 19#include <asm/uaccess.h>
20 20
21#define ALIASES_NNODES 64 21static DEFINE_MUTEX(op_mutex);
22 22
23typedef struct { 23#define OPENPROM_ROOT_INO 0
24 u16 parent; 24
25 u16 next; 25enum op_inode_type {
26 u16 child; 26 op_inode_node,
27 u16 first_prop; 27 op_inode_prop,
28 u32 node; 28};
29} openpromfs_node; 29
30 30union op_inode_data {
31typedef struct { 31 struct device_node *node;
32#define OPP_STRING 0x10 32 struct property *prop;
33#define OPP_STRINGLIST 0x20 33};
34#define OPP_BINARY 0x40
35#define OPP_HEXSTRING 0x80
36#define OPP_DIRTY 0x01
37#define OPP_QUOTED 0x02
38#define OPP_NOTQUOTED 0x04
39#define OPP_ASCIIZ 0x08
40 u32 flag;
41 u32 alloclen;
42 u32 len;
43 char *value;
44 char name[8];
45} openprom_property;
46
47static openpromfs_node *nodes;
48static int alloced;
49static u16 last_node;
50static u16 first_prop;
51static u16 options = 0xffff;
52static u16 aliases = 0xffff;
53static int aliases_nodes;
54static char *alias_names [ALIASES_NNODES];
55
56#define OPENPROM_ROOT_INO 16
57#define OPENPROM_FIRST_INO OPENPROM_ROOT_INO
58#define NODE(ino) nodes[ino - OPENPROM_FIRST_INO]
59#define NODE2INO(node) (node + OPENPROM_FIRST_INO)
60#define NODEP2INO(no) (no + OPENPROM_FIRST_INO + last_node)
61
62static int openpromfs_create (struct inode *, struct dentry *, int, struct nameidata *);
63static int openpromfs_readdir(struct file *, void *, filldir_t);
64static struct dentry *openpromfs_lookup(struct inode *, struct dentry *dentry, struct nameidata *nd);
65static int openpromfs_unlink (struct inode *, struct dentry *dentry);
66 34
67static ssize_t nodenum_read(struct file *file, char __user *buf, 35struct op_inode_info {
68 size_t count, loff_t *ppos) 36 struct inode vfs_inode;
37 enum op_inode_type type;
38 union op_inode_data u;
39};
40
41static inline struct op_inode_info *OP_I(struct inode *inode)
69{ 42{
70 struct inode *inode = file->f_dentry->d_inode; 43 return container_of(inode, struct op_inode_info, vfs_inode);
71 char buffer[10];
72
73 if (count < 0 || !inode->u.generic_ip)
74 return -EINVAL;
75 sprintf (buffer, "%8.8x\n", (u32)(long)(inode->u.generic_ip));
76 if (file->f_pos >= 9)
77 return 0;
78 if (count > 9 - file->f_pos)
79 count = 9 - file->f_pos;
80 if (copy_to_user(buf, buffer + file->f_pos, count))
81 return -EFAULT;
82 *ppos += count;
83 return count;
84} 44}
85 45
86static ssize_t property_read(struct file *filp, char __user *buf, 46static int is_string(unsigned char *p, int len)
87 size_t count, loff_t *ppos)
88{ 47{
89 struct inode *inode = filp->f_dentry->d_inode; 48 int i;
90 int i, j, k;
91 u32 node;
92 char *p, *s;
93 u32 *q;
94 openprom_property *op;
95 char buffer[64];
96
97 if (!filp->private_data) {
98 node = nodes[(u16)((long)inode->u.generic_ip)].node;
99 i = ((u32)(long)inode->u.generic_ip) >> 16;
100 if ((u16)((long)inode->u.generic_ip) == aliases) {
101 if (i >= aliases_nodes)
102 p = NULL;
103 else
104 p = alias_names [i];
105 } else
106 for (p = prom_firstprop (node, buffer);
107 i && p && *p;
108 p = prom_nextprop (node, p, buffer), i--)
109 /* nothing */ ;
110 if (!p || !*p)
111 return -EIO;
112 i = prom_getproplen (node, p);
113 if (i < 0) {
114 if ((u16)((long)inode->u.generic_ip) == aliases)
115 i = 0;
116 else
117 return -EIO;
118 }
119 k = i;
120 if (i < 64) i = 64;
121 filp->private_data = kmalloc (sizeof (openprom_property)
122 + (j = strlen (p)) + 2 * i,
123 GFP_KERNEL);
124 if (!filp->private_data)
125 return -ENOMEM;
126 op = (openprom_property *)filp->private_data;
127 op->flag = 0;
128 op->alloclen = 2 * i;
129 strcpy (op->name, p);
130 op->value = (char *)(((unsigned long)(op->name + j + 4)) & ~3);
131 op->len = k;
132 if (k && prom_getproperty (node, p, op->value, i) < 0)
133 return -EIO;
134 op->value [k] = 0;
135 if (k) {
136 for (s = NULL, p = op->value; p < op->value + k; p++) {
137 if ((*p >= ' ' && *p <= '~') || *p == '\n') {
138 op->flag |= OPP_STRING;
139 s = p;
140 continue;
141 }
142 if (p > op->value && !*p && s == p - 1) {
143 if (p < op->value + k - 1)
144 op->flag |= OPP_STRINGLIST;
145 else
146 op->flag |= OPP_ASCIIZ;
147 continue;
148 }
149 if (k == 1 && !*p) {
150 op->flag |= (OPP_STRING|OPP_ASCIIZ);
151 break;
152 }
153 op->flag &= ~(OPP_STRING|OPP_STRINGLIST);
154 if (k & 3)
155 op->flag |= OPP_HEXSTRING;
156 else
157 op->flag |= OPP_BINARY;
158 break;
159 }
160 if (op->flag & OPP_STRINGLIST)
161 op->flag &= ~(OPP_STRING);
162 if (op->flag & OPP_ASCIIZ)
163 op->len--;
164 }
165 } else
166 op = (openprom_property *)filp->private_data;
167 if (!count || !(op->len || (op->flag & OPP_ASCIIZ)))
168 return 0;
169 if (*ppos >= 0xffffff || count >= 0xffffff)
170 return -EINVAL;
171 if (op->flag & OPP_STRINGLIST) {
172 for (k = 0, p = op->value; p < op->value + op->len; p++)
173 if (!*p)
174 k++;
175 i = op->len + 4 * k + 3;
176 } else if (op->flag & OPP_STRING) {
177 i = op->len + 3;
178 } else if (op->flag & OPP_BINARY) {
179 i = (op->len * 9) >> 2;
180 } else {
181 i = (op->len << 1) + 1;
182 }
183 k = *ppos;
184 if (k >= i) return 0;
185 if (count > i - k) count = i - k;
186 if (op->flag & OPP_STRING) {
187 if (!k) {
188 if (put_user('\'', buf))
189 return -EFAULT;
190 k++;
191 count--;
192 }
193 49
194 if (k + count >= i - 2) 50 for (i = 0; i < len; i++) {
195 j = i - 2 - k; 51 unsigned char val = p[i];
196 else
197 j = count;
198
199 if (j >= 0) {
200 if (copy_to_user(buf + k - *ppos,
201 op->value + k - 1, j))
202 return -EFAULT;
203 count -= j;
204 k += j;
205 }
206 52
207 if (count) { 53 if ((i && !val) ||
208 if (put_user('\'', &buf [k++ - *ppos])) 54 (val >= ' ' && val <= '~'))
209 return -EFAULT; 55 continue;
210 }
211 if (count > 1) {
212 if (put_user('\n', &buf [k++ - *ppos]))
213 return -EFAULT;
214 }
215 } else if (op->flag & OPP_STRINGLIST) {
216 char *tmp;
217
218 tmp = kmalloc (i, GFP_KERNEL);
219 if (!tmp)
220 return -ENOMEM;
221
222 s = tmp;
223 *s++ = '\'';
224 for (p = op->value; p < op->value + op->len; p++) {
225 if (!*p) {
226 strcpy(s, "' + '");
227 s += 5;
228 continue;
229 }
230 *s++ = *p;
231 }
232 strcpy(s, "'\n");
233
234 if (copy_to_user(buf, tmp + k, count))
235 return -EFAULT;
236
237 kfree(tmp);
238 k += count;
239
240 } else if (op->flag & OPP_BINARY) {
241 char buffer[10];
242 u32 *first, *last;
243 int first_off, last_cnt;
244
245 first = ((u32 *)op->value) + k / 9;
246 first_off = k % 9;
247 last = ((u32 *)op->value) + (k + count - 1) / 9;
248 last_cnt = (k + count) % 9;
249 if (!last_cnt) last_cnt = 9;
250
251 if (first == last) {
252 sprintf (buffer, "%08x.", *first);
253 if (copy_to_user(buf, buffer + first_off,
254 last_cnt - first_off))
255 return -EFAULT;
256 buf += last_cnt - first_off;
257 } else {
258 for (q = first; q <= last; q++) {
259 sprintf (buffer, "%08x.", *q);
260 if (q == first) {
261 if (copy_to_user(buf, buffer + first_off,
262 9 - first_off))
263 return -EFAULT;
264 buf += 9 - first_off;
265 } else if (q == last) {
266 if (copy_to_user(buf, buffer, last_cnt))
267 return -EFAULT;
268 buf += last_cnt;
269 } else {
270 if (copy_to_user(buf, buffer, 9))
271 return -EFAULT;
272 buf += 9;
273 }
274 }
275 }
276 56
277 if (last == (u32 *)(op->value + op->len - 4) && last_cnt == 9) { 57 return 0;
278 if (put_user('\n', (buf - 1))) 58 }
279 return -EFAULT;
280 }
281 59
282 k += count; 60 return 1;
61}
283 62
284 } else if (op->flag & OPP_HEXSTRING) { 63static int property_show(struct seq_file *f, void *v)
285 char buffer[3]; 64{
65 struct property *prop = f->private;
66 void *pval;
67 int len;
286 68
287 if ((k < i - 1) && (k & 1)) { 69 len = prop->length;
288 sprintf (buffer, "%02x", 70 pval = prop->value;
289 (unsigned char) *(op->value + (k >> 1)) & 0xff);
290 if (put_user(buffer[1], &buf[k++ - *ppos]))
291 return -EFAULT;
292 count--;
293 }
294 71
295 for (; (count > 1) && (k < i - 1); k += 2) { 72 if (is_string(pval, len)) {
296 sprintf (buffer, "%02x", 73 while (len > 0) {
297 (unsigned char) *(op->value + (k >> 1)) & 0xff); 74 int n = strlen(pval);
298 if (copy_to_user(buf + k - *ppos, buffer, 2))
299 return -EFAULT;
300 count -= 2;
301 }
302 75
303 if (count && (k < i - 1)) { 76 seq_printf(f, "%s", (char *) pval);
304 sprintf (buffer, "%02x",
305 (unsigned char) *(op->value + (k >> 1)) & 0xff);
306 if (put_user(buffer[0], &buf[k++ - *ppos]))
307 return -EFAULT;
308 count--;
309 }
310 77
311 if (count) { 78 /* Skip over the NULL byte too. */
312 if (put_user('\n', &buf [k++ - *ppos])) 79 pval += n + 1;
313 return -EFAULT; 80 len -= n + 1;
314 }
315 }
316 count = k - *ppos;
317 *ppos = k;
318 return count;
319}
320 81
321static ssize_t property_write(struct file *filp, const char __user *buf, 82 if (len > 0)
322 size_t count, loff_t *ppos) 83 seq_printf(f, " + ");
323{
324 int i, j, k;
325 char *p;
326 u32 *q;
327 void *b;
328 openprom_property *op;
329
330 if (*ppos >= 0xffffff || count >= 0xffffff)
331 return -EINVAL;
332 if (!filp->private_data) {
333 i = property_read (filp, NULL, 0, NULL);
334 if (i)
335 return i;
336 }
337 k = *ppos;
338 op = (openprom_property *)filp->private_data;
339 if (!(op->flag & OPP_STRING)) {
340 u32 *first, *last;
341 int first_off, last_cnt;
342 u32 mask, mask2;
343 char tmp [9];
344 int forcelen = 0;
345
346 j = k % 9;
347 for (i = 0; i < count; i++, j++) {
348 if (j == 9) j = 0;
349 if (!j) {
350 char ctmp;
351 if (get_user(ctmp, &buf[i]))
352 return -EFAULT;
353 if (ctmp != '.') {
354 if (ctmp != '\n') {
355 if (op->flag & OPP_BINARY)
356 return -EINVAL;
357 else
358 goto write_try_string;
359 } else {
360 count = i + 1;
361 forcelen = 1;
362 break;
363 }
364 }
365 } else {
366 char ctmp;
367 if (get_user(ctmp, &buf[i]))
368 return -EFAULT;
369 if (ctmp < '0' ||
370 (ctmp > '9' && ctmp < 'A') ||
371 (ctmp > 'F' && ctmp < 'a') ||
372 ctmp > 'f') {
373 if (op->flag & OPP_BINARY)
374 return -EINVAL;
375 else
376 goto write_try_string;
377 }
378 }
379 }
380 op->flag |= OPP_BINARY;
381 tmp [8] = 0;
382 i = ((count + k + 8) / 9) << 2;
383 if (op->alloclen <= i) {
384 b = kmalloc (sizeof (openprom_property) + 2 * i,
385 GFP_KERNEL);
386 if (!b)
387 return -ENOMEM;
388 memcpy (b, filp->private_data,
389 sizeof (openprom_property)
390 + strlen (op->name) + op->alloclen);
391 memset (((char *)b) + sizeof (openprom_property)
392 + strlen (op->name) + op->alloclen,
393 0, 2 * i - op->alloclen);
394 op = (openprom_property *)b;
395 op->alloclen = 2*i;
396 b = filp->private_data;
397 filp->private_data = (void *)op;
398 kfree (b);
399 } 84 }
400 first = ((u32 *)op->value) + (k / 9); 85 } else {
401 first_off = k % 9; 86 if (len & 3) {
402 last = (u32 *)(op->value + i); 87 while (len) {
403 last_cnt = (k + count) % 9; 88 len--;
404 if (first + 1 == last) { 89 if (len)
405 memset (tmp, '0', 8); 90 seq_printf(f, "%02x.",
406 if (copy_from_user(tmp + first_off, buf, 91 *(unsigned char *) pval);
407 (count + first_off > 8) ? 92 else
408 8 - first_off : count)) 93 seq_printf(f, "%02x",
409 return -EFAULT; 94 *(unsigned char *) pval);
410 mask = 0xffffffff; 95 pval++;
411 mask2 = 0xffffffff;
412 for (j = 0; j < first_off; j++)
413 mask >>= 1;
414 for (j = 8 - count - first_off; j > 0; j--)
415 mask2 <<= 1;
416 mask &= mask2;
417 if (mask) {
418 *first &= ~mask;
419 *first |= simple_strtoul (tmp, NULL, 16);
420 op->flag |= OPP_DIRTY;
421 } 96 }
422 } else { 97 } else {
423 op->flag |= OPP_DIRTY; 98 while (len >= 4) {
424 for (q = first; q < last; q++) { 99 len -= 4;
425 if (q == first) { 100
426 if (first_off < 8) { 101 if (len)
427 memset (tmp, '0', 8); 102 seq_printf(f, "%08x.",
428 if (copy_from_user(tmp + first_off, 103 *(unsigned int *) pval);
429 buf, 104 else
430 8 - first_off)) 105 seq_printf(f, "%08x",
431 return -EFAULT; 106 *(unsigned int *) pval);
432 mask = 0xffffffff; 107 pval += 4;
433 for (j = 0; j < first_off; j++)
434 mask >>= 1;
435 *q &= ~mask;
436 *q |= simple_strtoul (tmp,NULL,16);
437 }
438 buf += 9;
439 } else if ((q == last - 1) && last_cnt
440 && (last_cnt < 8)) {
441 memset (tmp, '0', 8);
442 if (copy_from_user(tmp, buf, last_cnt))
443 return -EFAULT;
444 mask = 0xffffffff;
445 for (j = 0; j < 8 - last_cnt; j++)
446 mask <<= 1;
447 *q &= ~mask;
448 *q |= simple_strtoul (tmp, NULL, 16);
449 buf += last_cnt;
450 } else {
451 char tchars[17]; /* XXX yuck... */
452
453 if (copy_from_user(tchars, buf, 16))
454 return -EFAULT;
455 *q = simple_strtoul (tchars, NULL, 16);
456 buf += 9;
457 }
458 }
459 }
460 if (!forcelen) {
461 if (op->len < i)
462 op->len = i;
463 } else
464 op->len = i;
465 *ppos += count;
466 }
467write_try_string:
468 if (!(op->flag & OPP_BINARY)) {
469 if (!(op->flag & (OPP_QUOTED | OPP_NOTQUOTED))) {
470 char ctmp;
471
472 /* No way, if somebody starts writing from the middle,
473 * we don't know whether he uses quotes around or not
474 */
475 if (k > 0)
476 return -EINVAL;
477 if (get_user(ctmp, buf))
478 return -EFAULT;
479 if (ctmp == '\'') {
480 op->flag |= OPP_QUOTED;
481 buf++;
482 count--;
483 (*ppos)++;
484 if (!count) {
485 op->flag |= OPP_STRING;
486 return 1;
487 }
488 } else
489 op->flag |= OPP_NOTQUOTED;
490 }
491 op->flag |= OPP_STRING;
492 if (op->alloclen <= count + *ppos) {
493 b = kmalloc (sizeof (openprom_property)
494 + 2 * (count + *ppos), GFP_KERNEL);
495 if (!b)
496 return -ENOMEM;
497 memcpy (b, filp->private_data,
498 sizeof (openprom_property)
499 + strlen (op->name) + op->alloclen);
500 memset (((char *)b) + sizeof (openprom_property)
501 + strlen (op->name) + op->alloclen,
502 0, 2*(count - *ppos) - op->alloclen);
503 op = (openprom_property *)b;
504 op->alloclen = 2*(count + *ppos);
505 b = filp->private_data;
506 filp->private_data = (void *)op;
507 kfree (b);
508 }
509 p = op->value + *ppos - ((op->flag & OPP_QUOTED) ? 1 : 0);
510 if (copy_from_user(p, buf, count))
511 return -EFAULT;
512 op->flag |= OPP_DIRTY;
513 for (i = 0; i < count; i++, p++)
514 if (*p == '\n') {
515 *p = 0;
516 break;
517 } 108 }
518 if (i < count) {
519 op->len = p - op->value;
520 *ppos += i + 1;
521 if ((p > op->value) && (op->flag & OPP_QUOTED)
522 && (*(p - 1) == '\''))
523 op->len--;
524 } else {
525 if (p - op->value > op->len)
526 op->len = p - op->value;
527 *ppos += count;
528 } 109 }
529 } 110 }
530 return *ppos - k; 111 seq_printf(f, "\n");
112
113 return 0;
531} 114}
532 115
533int property_release (struct inode *inode, struct file *filp) 116static void *property_start(struct seq_file *f, loff_t *pos)
534{ 117{
535 openprom_property *op = (openprom_property *)filp->private_data; 118 if (*pos == 0)
536 int error; 119 return pos;
537 u32 node; 120 return NULL;
538 121}
539 if (!op) 122
540 return 0; 123static void *property_next(struct seq_file *f, void *v, loff_t *pos)
541 lock_kernel(); 124{
542 node = nodes[(u16)((long)inode->u.generic_ip)].node; 125 (*pos)++;
543 if ((u16)((long)inode->u.generic_ip) == aliases) { 126 return NULL;
544 if ((op->flag & OPP_DIRTY) && (op->flag & OPP_STRING)) { 127}
545 char *p = op->name; 128
546 int i = (op->value - op->name) - strlen (op->name) - 1; 129static void property_stop(struct seq_file *f, void *v)
547 op->value [op->len] = 0; 130{
548 *(op->value - 1) = ' '; 131 /* Nothing to do */
549 if (i) { 132}
550 for (p = op->value - i - 2; p >= op->name; p--) 133
551 p[i] = *p; 134static struct seq_operations property_op = {
552 p = op->name + i; 135 .start = property_start,
553 } 136 .next = property_next,
554 memcpy (p - 8, "nvalias ", 8); 137 .stop = property_stop,
555 prom_feval (p - 8); 138 .show = property_show
556 } 139};
557 } else if (op->flag & OPP_DIRTY) { 140
558 if (op->flag & OPP_STRING) { 141static int property_open(struct inode *inode, struct file *file)
559 op->value [op->len] = 0; 142{
560 error = prom_setprop (node, op->name, 143 struct op_inode_info *oi = OP_I(inode);
561 op->value, op->len + 1); 144 int ret;
562 if (error <= 0) 145
563 printk (KERN_WARNING "openpromfs: " 146 BUG_ON(oi->type != op_inode_prop);
564 "Couldn't write property %s\n", 147
565 op->name); 148 ret = seq_open(file, &property_op);
566 } else if ((op->flag & OPP_BINARY) || !op->len) { 149 if (!ret) {
567 error = prom_setprop (node, op->name, 150 struct seq_file *m = file->private_data;
568 op->value, op->len); 151 m->private = oi->u.prop;
569 if (error <= 0)
570 printk (KERN_WARNING "openpromfs: "
571 "Couldn't write property %s\n",
572 op->name);
573 } else {
574 printk (KERN_WARNING "openpromfs: "
575 "Unknown property type of %s\n",
576 op->name);
577 }
578 } 152 }
579 unlock_kernel(); 153 return ret;
580 kfree (filp->private_data);
581 return 0;
582} 154}
583 155
584static const struct file_operations openpromfs_prop_ops = { 156static const struct file_operations openpromfs_prop_ops = {
585 .read = property_read, 157 .open = property_open,
586 .write = property_write, 158 .read = seq_read,
587 .release = property_release, 159 .llseek = seq_lseek,
160 .release = seq_release,
588}; 161};
589 162
590static const struct file_operations openpromfs_nodenum_ops = { 163static int openpromfs_readdir(struct file *, void *, filldir_t);
591 .read = nodenum_read,
592};
593 164
594static const struct file_operations openprom_operations = { 165static const struct file_operations openprom_operations = {
595 .read = generic_read_dir, 166 .read = generic_read_dir,
596 .readdir = openpromfs_readdir, 167 .readdir = openpromfs_readdir,
597}; 168};
598 169
599static struct inode_operations openprom_alias_inode_operations = { 170static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, struct nameidata *);
600 .create = openpromfs_create,
601 .lookup = openpromfs_lookup,
602 .unlink = openpromfs_unlink,
603};
604 171
605static struct inode_operations openprom_inode_operations = { 172static struct inode_operations openprom_inode_operations = {
606 .lookup = openpromfs_lookup, 173 .lookup = openpromfs_lookup,
607}; 174};
608 175
609static int lookup_children(u16 n, const char * name, int len) 176static struct dentry *openpromfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
610{
611 int ret;
612 u16 node;
613 for (; n != 0xffff; n = nodes[n].next) {
614 node = nodes[n].child;
615 if (node != 0xffff) {
616 char buffer[128];
617 int i;
618 char *p;
619
620 while (node != 0xffff) {
621 if (prom_getname (nodes[node].node,
622 buffer, 128) >= 0) {
623 i = strlen (buffer);
624 if ((len == i)
625 && !strncmp (buffer, name, len))
626 return NODE2INO(node);
627 p = strchr (buffer, '@');
628 if (p && (len == p - buffer)
629 && !strncmp (buffer, name, len))
630 return NODE2INO(node);
631 }
632 node = nodes[node].next;
633 }
634 } else
635 continue;
636 ret = lookup_children (nodes[n].child, name, len);
637 if (ret) return ret;
638 }
639 return 0;
640}
641
642static struct dentry *openpromfs_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
643{ 177{
644 int ino = 0; 178 struct op_inode_info *ent_oi, *oi = OP_I(dir);
645#define OPFSL_DIR 0 179 struct device_node *dp, *child;
646#define OPFSL_PROPERTY 1 180 struct property *prop;
647#define OPFSL_NODENUM 2 181 enum op_inode_type ent_type;
648 int type = 0; 182 union op_inode_data ent_data;
649 char buffer[128];
650 char *p;
651 const char *name; 183 const char *name;
652 u32 n;
653 u16 dirnode;
654 unsigned int len;
655 int i;
656 struct inode *inode; 184 struct inode *inode;
657 char buffer2[64]; 185 unsigned int ino;
186 int len;
658 187
659 inode = NULL; 188 BUG_ON(oi->type != op_inode_node);
189
190 dp = oi->u.node;
191
660 name = dentry->d_name.name; 192 name = dentry->d_name.name;
661 len = dentry->d_name.len; 193 len = dentry->d_name.len;
662 lock_kernel(); 194
663 if (name [0] == '.' && len == 5 && !strncmp (name + 1, "node", 4)) { 195 mutex_lock(&op_mutex);
664 ino = NODEP2INO(NODE(dir->i_ino).first_prop); 196
665 type = OPFSL_NODENUM; 197 child = dp->child;
666 } 198 while (child) {
667 if (!ino) { 199 int n = strlen(child->path_component_name);
668 u16 node = NODE(dir->i_ino).child; 200
669 while (node != 0xffff) { 201 if (len == n &&
670 if (prom_getname (nodes[node].node, buffer, 128) >= 0) { 202 !strncmp(child->path_component_name, name, len)) {
671 i = strlen (buffer); 203 ent_type = op_inode_node;
672 if (len == i && !strncmp (buffer, name, len)) { 204 ent_data.node = child;
673 ino = NODE2INO(node); 205 ino = child->unique_id;
674 type = OPFSL_DIR; 206 goto found;
675 break;
676 }
677 p = strchr (buffer, '@');
678 if (p && (len == p - buffer)
679 && !strncmp (buffer, name, len)) {
680 ino = NODE2INO(node);
681 type = OPFSL_DIR;
682 break;
683 }
684 }
685 node = nodes[node].next;
686 }
687 }
688 n = NODE(dir->i_ino).node;
689 dirnode = dir->i_ino - OPENPROM_FIRST_INO;
690 if (!ino) {
691 int j = NODEP2INO(NODE(dir->i_ino).first_prop);
692 if (dirnode != aliases) {
693 for (p = prom_firstprop (n, buffer2);
694 p && *p;
695 p = prom_nextprop (n, p, buffer2)) {
696 j++;
697 if ((len == strlen (p))
698 && !strncmp (p, name, len)) {
699 ino = j;
700 type = OPFSL_PROPERTY;
701 break;
702 }
703 }
704 } else {
705 int k;
706 for (k = 0; k < aliases_nodes; k++) {
707 j++;
708 if (alias_names [k]
709 && (len == strlen (alias_names [k]))
710 && !strncmp (alias_names [k], name, len)) {
711 ino = j;
712 type = OPFSL_PROPERTY;
713 break;
714 }
715 }
716 } 207 }
208 child = child->sibling;
717 } 209 }
718 if (!ino) { 210
719 ino = lookup_children (NODE(dir->i_ino).child, name, len); 211 prop = dp->properties;
720 if (ino) 212 while (prop) {
721 type = OPFSL_DIR; 213 int n = strlen(prop->name);
722 else { 214
723 unlock_kernel(); 215 if (len == n && !strncmp(prop->name, name, len)) {
724 return ERR_PTR(-ENOENT); 216 ent_type = op_inode_prop;
217 ent_data.prop = prop;
218 ino = prop->unique_id;
219 goto found;
725 } 220 }
221
222 prop = prop->next;
726 } 223 }
727 inode = iget (dir->i_sb, ino); 224
728 unlock_kernel(); 225 mutex_unlock(&op_mutex);
226 return ERR_PTR(-ENOENT);
227
228found:
229 inode = iget(dir->i_sb, ino);
230 mutex_unlock(&op_mutex);
729 if (!inode) 231 if (!inode)
730 return ERR_PTR(-EINVAL); 232 return ERR_PTR(-EINVAL);
731 switch (type) { 233 ent_oi = OP_I(inode);
732 case OPFSL_DIR: 234 ent_oi->type = ent_type;
235 ent_oi->u = ent_data;
236
237 switch (ent_type) {
238 case op_inode_node:
733 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; 239 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
734 if (ino == OPENPROM_FIRST_INO + aliases) { 240 inode->i_op = &openprom_inode_operations;
735 inode->i_mode |= S_IWUSR;
736 inode->i_op = &openprom_alias_inode_operations;
737 } else
738 inode->i_op = &openprom_inode_operations;
739 inode->i_fop = &openprom_operations; 241 inode->i_fop = &openprom_operations;
740 inode->i_nlink = 2; 242 inode->i_nlink = 2;
741 break; 243 break;
742 case OPFSL_NODENUM: 244 case op_inode_prop:
743 inode->i_mode = S_IFREG | S_IRUGO; 245 if (!strcmp(dp->name, "options") && (len == 17) &&
744 inode->i_fop = &openpromfs_nodenum_ops; 246 !strncmp (name, "security-password", 17))
745 inode->i_nlink = 1;
746 inode->u.generic_ip = (void *)(long)(n);
747 break;
748 case OPFSL_PROPERTY:
749 if ((dirnode == options) && (len == 17)
750 && !strncmp (name, "security-password", 17))
751 inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR; 247 inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
752 else { 248 else
753 inode->i_mode = S_IFREG | S_IRUGO; 249 inode->i_mode = S_IFREG | S_IRUGO;
754 if (dirnode == options || dirnode == aliases) {
755 if (len != 4 || strncmp (name, "name", 4))
756 inode->i_mode |= S_IWUSR;
757 }
758 }
759 inode->i_fop = &openpromfs_prop_ops; 250 inode->i_fop = &openpromfs_prop_ops;
760 inode->i_nlink = 1; 251 inode->i_nlink = 1;
761 if (inode->i_size < 0) 252 inode->i_size = ent_oi->u.prop->length;
762 inode->i_size = 0;
763 inode->u.generic_ip = (void *)(long)(((u16)dirnode) |
764 (((u16)(ino - NODEP2INO(NODE(dir->i_ino).first_prop) - 1)) << 16));
765 break; 253 break;
766 } 254 }
767 255
@@ -775,237 +263,89 @@ static struct dentry *openpromfs_lookup(struct inode * dir, struct dentry *dentr
775static int openpromfs_readdir(struct file * filp, void * dirent, filldir_t filldir) 263static int openpromfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
776{ 264{
777 struct inode *inode = filp->f_dentry->d_inode; 265 struct inode *inode = filp->f_dentry->d_inode;
266 struct op_inode_info *oi = OP_I(inode);
267 struct device_node *dp = oi->u.node;
268 struct device_node *child;
269 struct property *prop;
778 unsigned int ino; 270 unsigned int ino;
779 u32 n; 271 int i;
780 int i, j; 272
781 char buffer[128]; 273 mutex_lock(&op_mutex);
782 u16 node;
783 char *p;
784 char buffer2[64];
785
786 lock_kernel();
787 274
788 ino = inode->i_ino; 275 ino = inode->i_ino;
789 i = filp->f_pos; 276 i = filp->f_pos;
790 switch (i) { 277 switch (i) {
791 case 0: 278 case 0:
792 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) goto out; 279 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
280 goto out;
793 i++; 281 i++;
794 filp->f_pos++; 282 filp->f_pos++;
795 /* fall thru */ 283 /* fall thru */
796 case 1: 284 case 1:
797 if (filldir(dirent, "..", 2, i, 285 if (filldir(dirent, "..", 2, i,
798 (NODE(ino).parent == 0xffff) ? 286 (dp->parent == NULL ?
799 OPENPROM_ROOT_INO : NODE2INO(NODE(ino).parent), DT_DIR) < 0) 287 OPENPROM_ROOT_INO :
288 dp->parent->unique_id), DT_DIR) < 0)
800 goto out; 289 goto out;
801 i++; 290 i++;
802 filp->f_pos++; 291 filp->f_pos++;
803 /* fall thru */ 292 /* fall thru */
804 default: 293 default:
805 i -= 2; 294 i -= 2;
806 node = NODE(ino).child; 295
807 while (i && node != 0xffff) { 296 /* First, the children nodes as directories. */
808 node = nodes[node].next; 297 child = dp->child;
298 while (i && child) {
299 child = child->sibling;
809 i--; 300 i--;
810 } 301 }
811 while (node != 0xffff) { 302 while (child) {
812 if (prom_getname (nodes[node].node, buffer, 128) < 0) 303 if (filldir(dirent,
813 goto out; 304 child->path_component_name,
814 if (filldir(dirent, buffer, strlen(buffer), 305 strlen(child->path_component_name),
815 filp->f_pos, NODE2INO(node), DT_DIR) < 0) 306 filp->f_pos, child->unique_id, DT_DIR) < 0)
816 goto out; 307 goto out;
308
817 filp->f_pos++; 309 filp->f_pos++;
818 node = nodes[node].next; 310 child = child->sibling;
819 } 311 }
820 j = NODEP2INO(NODE(ino).first_prop); 312
821 if (!i) { 313 /* Next, the properties as files. */
822 if (filldir(dirent, ".node", 5, filp->f_pos, j, DT_REG) < 0) 314 prop = dp->properties;
315 while (i && prop) {
316 prop = prop->next;
317 i--;
318 }
319 while (prop) {
320 if (filldir(dirent, prop->name, strlen(prop->name),
321 filp->f_pos, prop->unique_id, DT_REG) < 0)
823 goto out; 322 goto out;
323
824 filp->f_pos++; 324 filp->f_pos++;
825 } else 325 prop = prop->next;
826 i--;
827 n = NODE(ino).node;
828 if (ino == OPENPROM_FIRST_INO + aliases) {
829 for (j++; i < aliases_nodes; i++, j++) {
830 if (alias_names [i]) {
831 if (filldir (dirent, alias_names [i],
832 strlen (alias_names [i]),
833 filp->f_pos, j, DT_REG) < 0) goto out;
834 filp->f_pos++;
835 }
836 }
837 } else {
838 for (p = prom_firstprop (n, buffer2);
839 p && *p;
840 p = prom_nextprop (n, p, buffer2)) {
841 j++;
842 if (i) i--;
843 else {
844 if (filldir(dirent, p, strlen(p),
845 filp->f_pos, j, DT_REG) < 0)
846 goto out;
847 filp->f_pos++;
848 }
849 }
850 } 326 }
851 } 327 }
852out: 328out:
853 unlock_kernel(); 329 mutex_unlock(&op_mutex);
854 return 0;
855}
856
857static int openpromfs_create (struct inode *dir, struct dentry *dentry, int mode,
858 struct nameidata *nd)
859{
860 char *p;
861 struct inode *inode;
862
863 if (!dir)
864 return -ENOENT;
865 if (dentry->d_name.len > 256)
866 return -EINVAL;
867 p = kmalloc (dentry->d_name.len + 1, GFP_KERNEL);
868 if (!p)
869 return -ENOMEM;
870 strncpy (p, dentry->d_name.name, dentry->d_name.len);
871 p [dentry->d_name.len] = 0;
872 lock_kernel();
873 if (aliases_nodes == ALIASES_NNODES) {
874 kfree(p);
875 unlock_kernel();
876 return -EIO;
877 }
878 alias_names [aliases_nodes++] = p;
879 inode = iget (dir->i_sb,
880 NODEP2INO(NODE(dir->i_ino).first_prop) + aliases_nodes);
881 if (!inode) {
882 unlock_kernel();
883 return -EINVAL;
884 }
885 inode->i_mode = S_IFREG | S_IRUGO | S_IWUSR;
886 inode->i_fop = &openpromfs_prop_ops;
887 inode->i_nlink = 1;
888 if (inode->i_size < 0) inode->i_size = 0;
889 inode->u.generic_ip = (void *)(long)(((u16)aliases) |
890 (((u16)(aliases_nodes - 1)) << 16));
891 unlock_kernel();
892 d_instantiate(dentry, inode);
893 return 0; 330 return 0;
894} 331}
895 332
896static int openpromfs_unlink (struct inode *dir, struct dentry *dentry) 333static kmem_cache_t *op_inode_cachep;
897{
898 unsigned int len;
899 char *p;
900 const char *name;
901 int i;
902
903 name = dentry->d_name.name;
904 len = dentry->d_name.len;
905 lock_kernel();
906 for (i = 0; i < aliases_nodes; i++)
907 if ((strlen (alias_names [i]) == len)
908 && !strncmp (name, alias_names[i], len)) {
909 char buffer[512];
910
911 p = alias_names [i];
912 alias_names [i] = NULL;
913 kfree (p);
914 strcpy (buffer, "nvunalias ");
915 memcpy (buffer + 10, name, len);
916 buffer [10 + len] = 0;
917 prom_feval (buffer);
918 }
919 unlock_kernel();
920 return 0;
921}
922 334
923/* {{{ init section */ 335static struct inode *openprom_alloc_inode(struct super_block *sb)
924static int __init check_space (u16 n)
925{ 336{
926 unsigned long pages; 337 struct op_inode_info *oi;
927 338
928 if ((1 << alloced) * PAGE_SIZE < (n + 2) * sizeof(openpromfs_node)) { 339 oi = kmem_cache_alloc(op_inode_cachep, SLAB_KERNEL);
929 pages = __get_free_pages (GFP_KERNEL, alloced + 1); 340 if (!oi)
930 if (!pages) 341 return NULL;
931 return -1;
932 342
933 if (nodes) { 343 return &oi->vfs_inode;
934 memcpy ((char *)pages, (char *)nodes,
935 (1 << alloced) * PAGE_SIZE);
936 free_pages ((unsigned long)nodes, alloced);
937 }
938 alloced++;
939 nodes = (openpromfs_node *)pages;
940 }
941 return 0;
942} 344}
943 345
944static u16 __init get_nodes (u16 parent, u32 node) 346static void openprom_destroy_inode(struct inode *inode)
945{ 347{
946 char *p; 348 kmem_cache_free(op_inode_cachep, OP_I(inode));
947 u16 n = last_node++, i;
948 char buffer[64];
949
950 if (check_space (n) < 0)
951 return 0xffff;
952 nodes[n].parent = parent;
953 nodes[n].node = node;
954 nodes[n].next = 0xffff;
955 nodes[n].child = 0xffff;
956 nodes[n].first_prop = first_prop++;
957 if (!parent) {
958 char buffer[8];
959 int j;
960
961 if ((j = prom_getproperty (node, "name", buffer, 8)) >= 0) {
962 buffer[j] = 0;
963 if (!strcmp (buffer, "options"))
964 options = n;
965 else if (!strcmp (buffer, "aliases"))
966 aliases = n;
967 }
968 }
969 if (n != aliases)
970 for (p = prom_firstprop (node, buffer);
971 p && p != (char *)-1 && *p;
972 p = prom_nextprop (node, p, buffer))
973 first_prop++;
974 else {
975 char *q;
976 for (p = prom_firstprop (node, buffer);
977 p && p != (char *)-1 && *p;
978 p = prom_nextprop (node, p, buffer)) {
979 if (aliases_nodes == ALIASES_NNODES)
980 break;
981 for (i = 0; i < aliases_nodes; i++)
982 if (!strcmp (p, alias_names [i]))
983 break;
984 if (i < aliases_nodes)
985 continue;
986 q = kmalloc (strlen (p) + 1, GFP_KERNEL);
987 if (!q)
988 return 0xffff;
989 strcpy (q, p);
990 alias_names [aliases_nodes++] = q;
991 }
992 first_prop += ALIASES_NNODES;
993 }
994 node = prom_getchild (node);
995 if (node) {
996 parent = get_nodes (n, node);
997 if (parent == 0xffff)
998 return 0xffff;
999 nodes[n].child = parent;
1000 while ((node = prom_getsibling (node)) != 0) {
1001 i = get_nodes (n, node);
1002 if (i == 0xffff)
1003 return 0xffff;
1004 nodes[parent].next = i;
1005 parent = i;
1006 }
1007 }
1008 return n;
1009} 349}
1010 350
1011static void openprom_read_inode(struct inode * inode) 351static void openprom_read_inode(struct inode * inode)
@@ -1025,6 +365,8 @@ static int openprom_remount(struct super_block *sb, int *flags, char *data)
1025} 365}
1026 366
1027static struct super_operations openprom_sops = { 367static struct super_operations openprom_sops = {
368 .alloc_inode = openprom_alloc_inode,
369 .destroy_inode = openprom_destroy_inode,
1028 .read_inode = openprom_read_inode, 370 .read_inode = openprom_read_inode,
1029 .statfs = simple_statfs, 371 .statfs = simple_statfs,
1030 .remount_fs = openprom_remount, 372 .remount_fs = openprom_remount,
@@ -1032,7 +374,8 @@ static struct super_operations openprom_sops = {
1032 374
1033static int openprom_fill_super(struct super_block *s, void *data, int silent) 375static int openprom_fill_super(struct super_block *s, void *data, int silent)
1034{ 376{
1035 struct inode * root_inode; 377 struct inode *root_inode;
378 struct op_inode_info *oi;
1036 379
1037 s->s_flags |= MS_NOATIME; 380 s->s_flags |= MS_NOATIME;
1038 s->s_blocksize = 1024; 381 s->s_blocksize = 1024;
@@ -1043,6 +386,11 @@ static int openprom_fill_super(struct super_block *s, void *data, int silent)
1043 root_inode = iget(s, OPENPROM_ROOT_INO); 386 root_inode = iget(s, OPENPROM_ROOT_INO);
1044 if (!root_inode) 387 if (!root_inode)
1045 goto out_no_root; 388 goto out_no_root;
389
390 oi = OP_I(root_inode);
391 oi->type = op_inode_node;
392 oi->u.node = of_find_node_by_path("/");
393
1046 s->s_root = d_alloc_root(root_inode); 394 s->s_root = d_alloc_root(root_inode);
1047 if (!s->s_root) 395 if (!s->s_root)
1048 goto out_no_root; 396 goto out_no_root;
@@ -1054,10 +402,10 @@ out_no_root:
1054 return -ENOMEM; 402 return -ENOMEM;
1055} 403}
1056 404
1057static struct super_block *openprom_get_sb(struct file_system_type *fs_type, 405static int openprom_get_sb(struct file_system_type *fs_type,
1058 int flags, const char *dev_name, void *data) 406 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
1059{ 407{
1060 return get_sb_single(fs_type, flags, data, openprom_fill_super); 408 return get_sb_single(fs_type, flags, data, openprom_fill_super, mnt);
1061} 409}
1062 410
1063static struct file_system_type openprom_fs_type = { 411static struct file_system_type openprom_fs_type = {
@@ -1067,29 +415,39 @@ static struct file_system_type openprom_fs_type = {
1067 .kill_sb = kill_anon_super, 415 .kill_sb = kill_anon_super,
1068}; 416};
1069 417
418static void op_inode_init_once(void *data, kmem_cache_t * cachep, unsigned long flags)
419{
420 struct op_inode_info *oi = (struct op_inode_info *) data;
421
422 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
423 SLAB_CTOR_CONSTRUCTOR)
424 inode_init_once(&oi->vfs_inode);
425}
426
1070static int __init init_openprom_fs(void) 427static int __init init_openprom_fs(void)
1071{ 428{
1072 nodes = (openpromfs_node *)__get_free_pages(GFP_KERNEL, 0); 429 int err;
1073 if (!nodes) { 430
1074 printk (KERN_WARNING "openpromfs: can't get free page\n"); 431 op_inode_cachep = kmem_cache_create("op_inode_cache",
1075 return -EIO; 432 sizeof(struct op_inode_info),
1076 } 433 0,
1077 if (get_nodes (0xffff, prom_root_node) == 0xffff) { 434 (SLAB_RECLAIM_ACCOUNT |
1078 printk (KERN_WARNING "openpromfs: couldn't setup tree\n"); 435 SLAB_MEM_SPREAD),
1079 return -EIO; 436 op_inode_init_once, NULL);
1080 } 437 if (!op_inode_cachep)
1081 nodes[last_node].first_prop = first_prop; 438 return -ENOMEM;
1082 return register_filesystem(&openprom_fs_type); 439
440 err = register_filesystem(&openprom_fs_type);
441 if (err)
442 kmem_cache_destroy(op_inode_cachep);
443
444 return err;
1083} 445}
1084 446
1085static void __exit exit_openprom_fs(void) 447static void __exit exit_openprom_fs(void)
1086{ 448{
1087 int i;
1088 unregister_filesystem(&openprom_fs_type); 449 unregister_filesystem(&openprom_fs_type);
1089 free_pages ((unsigned long)nodes, alloced); 450 kmem_cache_destroy(op_inode_cachep);
1090 for (i = 0; i < aliases_nodes; i++)
1091 kfree (alias_names [i]);
1092 nodes = NULL;
1093} 451}
1094 452
1095module_init(init_openprom_fs) 453module_init(init_openprom_fs)
diff --git a/fs/partitions/Makefile b/fs/partitions/Makefile
index 42c7d3878ed0..d713ce6b3e12 100644
--- a/fs/partitions/Makefile
+++ b/fs/partitions/Makefile
@@ -4,7 +4,6 @@
4 4
5obj-y := check.o 5obj-y := check.o
6 6
7obj-$(CONFIG_DEVFS_FS) += devfs.o
8obj-$(CONFIG_ACORN_PARTITION) += acorn.o 7obj-$(CONFIG_ACORN_PARTITION) += acorn.o
9obj-$(CONFIG_AMIGA_PARTITION) += amiga.o 8obj-$(CONFIG_AMIGA_PARTITION) += amiga.o
10obj-$(CONFIG_ATARI_PARTITION) += atari.o 9obj-$(CONFIG_ATARI_PARTITION) += atari.o
diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c
index c05085710fce..1bc9f372c7d4 100644
--- a/fs/partitions/acorn.c
+++ b/fs/partitions/acorn.c
@@ -12,7 +12,6 @@
12 * every single manufacturer of SCSI and IDE cards created their own 12 * every single manufacturer of SCSI and IDE cards created their own
13 * method. 13 * method.
14 */ 14 */
15#include <linux/config.h>
16#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
17#include <linux/adfs_fs.h> 16#include <linux/adfs_fs.h>
18 17
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 7ef1f094de91..839634026eb5 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -18,10 +18,8 @@
18#include <linux/fs.h> 18#include <linux/fs.h>
19#include <linux/kmod.h> 19#include <linux/kmod.h>
20#include <linux/ctype.h> 20#include <linux/ctype.h>
21#include <linux/devfs_fs_kernel.h>
22 21
23#include "check.h" 22#include "check.h"
24#include "devfs.h"
25 23
26#include "acorn.h" 24#include "acorn.h"
27#include "amiga.h" 25#include "amiga.h"
@@ -161,18 +159,11 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
161 if (!state) 159 if (!state)
162 return NULL; 160 return NULL;
163 161
164#ifdef CONFIG_DEVFS_FS 162 disk_name(hd, 0, state->name);
165 if (hd->devfs_name[0] != '\0') { 163 printk(KERN_INFO " %s:", state->name);
166 printk(KERN_INFO " /dev/%s:", hd->devfs_name); 164 if (isdigit(state->name[strlen(state->name)-1]))
167 sprintf(state->name, "p"); 165 sprintf(state->name, "p");
168 } 166
169#endif
170 else {
171 disk_name(hd, 0, state->name);
172 printk(KERN_INFO " %s:", state->name);
173 if (isdigit(state->name[strlen(state->name)-1]))
174 sprintf(state->name, "p");
175 }
176 state->limit = hd->minors; 167 state->limit = hd->minors;
177 i = res = 0; 168 i = res = 0;
178 while (!res && check_part[i]) { 169 while (!res && check_part[i]) {
@@ -328,7 +319,7 @@ void delete_partition(struct gendisk *disk, int part)
328 p->nr_sects = 0; 319 p->nr_sects = 0;
329 p->ios[0] = p->ios[1] = 0; 320 p->ios[0] = p->ios[1] = 0;
330 p->sectors[0] = p->sectors[1] = 0; 321 p->sectors[0] = p->sectors[1] = 0;
331 devfs_remove("%s/part%d", disk->devfs_name, part); 322 sysfs_remove_link(&p->kobj, "subsystem");
332 if (p->holder_dir) 323 if (p->holder_dir)
333 kobject_unregister(p->holder_dir); 324 kobject_unregister(p->holder_dir);
334 kobject_uevent(&p->kobj, KOBJ_REMOVE); 325 kobject_uevent(&p->kobj, KOBJ_REMOVE);
@@ -349,10 +340,6 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len)
349 p->nr_sects = len; 340 p->nr_sects = len;
350 p->partno = part; 341 p->partno = part;
351 342
352 devfs_mk_bdev(MKDEV(disk->major, disk->first_minor + part),
353 S_IFBLK|S_IRUSR|S_IWUSR,
354 "%s/part%d", disk->devfs_name, part);
355
356 if (isdigit(disk->kobj.name[strlen(disk->kobj.name)-1])) 343 if (isdigit(disk->kobj.name[strlen(disk->kobj.name)-1]))
357 snprintf(p->kobj.name,KOBJ_NAME_LEN,"%sp%d",disk->kobj.name,part); 344 snprintf(p->kobj.name,KOBJ_NAME_LEN,"%sp%d",disk->kobj.name,part);
358 else 345 else
@@ -363,6 +350,7 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len)
363 kobject_add(&p->kobj); 350 kobject_add(&p->kobj);
364 if (!disk->part_uevent_suppress) 351 if (!disk->part_uevent_suppress)
365 kobject_uevent(&p->kobj, KOBJ_ADD); 352 kobject_uevent(&p->kobj, KOBJ_ADD);
353 sysfs_create_link(&p->kobj, &block_subsys.kset.kobj, "subsystem");
366 partition_sysfs_add_subdir(p); 354 partition_sysfs_add_subdir(p);
367 disk->part[part-1] = p; 355 disk->part[part-1] = p;
368} 356}
@@ -398,6 +386,7 @@ static void disk_sysfs_symlinks(struct gendisk *disk)
398 kfree(disk_name); 386 kfree(disk_name);
399 } 387 }
400 } 388 }
389 sysfs_create_link(&disk->kobj, &block_subsys.kset.kobj, "subsystem");
401} 390}
402 391
403/* Not exported, helper to add_disk(). */ 392/* Not exported, helper to add_disk(). */
@@ -420,14 +409,8 @@ void register_disk(struct gendisk *disk)
420 disk_sysfs_add_subdirs(disk); 409 disk_sysfs_add_subdirs(disk);
421 410
422 /* No minors to use for partitions */ 411 /* No minors to use for partitions */
423 if (disk->minors == 1) { 412 if (disk->minors == 1)
424 if (disk->devfs_name[0] != '\0')
425 devfs_add_disk(disk);
426 goto exit; 413 goto exit;
427 }
428
429 /* always add handle for the whole disk */
430 devfs_add_partitioned(disk);
431 414
432 /* No such device (e.g., media were just removed) */ 415 /* No such device (e.g., media were just removed) */
433 if (!get_capacity(disk)) 416 if (!get_capacity(disk))
@@ -481,6 +464,10 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
481 sector_t from = state->parts[p].from; 464 sector_t from = state->parts[p].from;
482 if (!size) 465 if (!size)
483 continue; 466 continue;
467 if (from + size > get_capacity(disk)) {
468 printk(" %s: p%d exceeds device capacity\n",
469 disk->disk_name, p);
470 }
484 add_partition(disk, p, from, size); 471 add_partition(disk, p, from, size);
485#ifdef CONFIG_BLK_DEV_MD 472#ifdef CONFIG_BLK_DEV_MD
486 if (state->parts[p].flags) 473 if (state->parts[p].flags)
@@ -496,8 +483,8 @@ unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
496 struct address_space *mapping = bdev->bd_inode->i_mapping; 483 struct address_space *mapping = bdev->bd_inode->i_mapping;
497 struct page *page; 484 struct page *page;
498 485
499 page = read_cache_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)), 486 page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
500 (filler_t *)mapping->a_ops->readpage, NULL); 487 NULL);
501 if (!IS_ERR(page)) { 488 if (!IS_ERR(page)) {
502 wait_on_page_locked(page); 489 wait_on_page_locked(page);
503 if (!PageUptodate(page)) 490 if (!PageUptodate(page))
@@ -531,8 +518,6 @@ void del_gendisk(struct gendisk *disk)
531 disk_stat_set_all(disk, 0); 518 disk_stat_set_all(disk, 0);
532 disk->stamp = 0; 519 disk->stamp = 0;
533 520
534 devfs_remove_disk(disk);
535
536 kobject_uevent(&disk->kobj, KOBJ_REMOVE); 521 kobject_uevent(&disk->kobj, KOBJ_REMOVE);
537 if (disk->holder_dir) 522 if (disk->holder_dir)
538 kobject_unregister(disk->holder_dir); 523 kobject_unregister(disk->holder_dir);
@@ -548,5 +533,6 @@ void del_gendisk(struct gendisk *disk)
548 put_device(disk->driverfs_dev); 533 put_device(disk->driverfs_dev);
549 disk->driverfs_dev = NULL; 534 disk->driverfs_dev = NULL;
550 } 535 }
536 sysfs_remove_link(&disk->kobj, "subsystem");
551 kobject_del(&disk->kobj); 537 kobject_del(&disk->kobj);
552} 538}
diff --git a/fs/partitions/devfs.c b/fs/partitions/devfs.c
deleted file mode 100644
index 3f0a780c9cec..000000000000
--- a/fs/partitions/devfs.c
+++ /dev/null
@@ -1,130 +0,0 @@
1/*
2 * This tries to keep block devices away from devfs as much as possible.
3 */
4#include <linux/fs.h>
5#include <linux/devfs_fs_kernel.h>
6#include <linux/vmalloc.h>
7#include <linux/genhd.h>
8#include <linux/bitops.h>
9#include <linux/mutex.h>
10
11
12struct unique_numspace {
13 u32 num_free; /* Num free in bits */
14 u32 length; /* Array length in bytes */
15 unsigned long *bits;
16 struct semaphore mutex;
17};
18
19static DEFINE_MUTEX(numspace_mutex);
20
21static int expand_numspace(struct unique_numspace *s)
22{
23 u32 length;
24 void *bits;
25
26 if (s->length < 16)
27 length = 16;
28 else
29 length = s->length << 1;
30
31 bits = vmalloc(length);
32 if (!bits)
33 return -ENOMEM;
34 if (s->bits) {
35 memcpy(bits, s->bits, s->length);
36 vfree(s->bits);
37 }
38
39 s->num_free = (length - s->length) << 3;
40 s->bits = bits;
41 memset(bits + s->length, 0, length - s->length);
42 s->length = length;
43
44 return 0;
45}
46
47static int alloc_unique_number(struct unique_numspace *s)
48{
49 int rval = 0;
50
51 mutex_lock(&numspace_mutex);
52 if (s->num_free < 1)
53 rval = expand_numspace(s);
54 if (!rval) {
55 rval = find_first_zero_bit(s->bits, s->length << 3);
56 --s->num_free;
57 __set_bit(rval, s->bits);
58 }
59 mutex_unlock(&numspace_mutex);
60
61 return rval;
62}
63
64static void dealloc_unique_number(struct unique_numspace *s, int number)
65{
66 int old_val;
67
68 if (number >= 0) {
69 mutex_lock(&numspace_mutex);
70 old_val = __test_and_clear_bit(number, s->bits);
71 if (old_val)
72 ++s->num_free;
73 mutex_unlock(&numspace_mutex);
74 }
75}
76
77static struct unique_numspace disc_numspace;
78static struct unique_numspace cdrom_numspace;
79
80void devfs_add_partitioned(struct gendisk *disk)
81{
82 char dirname[64], symlink[16];
83
84 devfs_mk_dir(disk->devfs_name);
85 devfs_mk_bdev(MKDEV(disk->major, disk->first_minor),
86 S_IFBLK|S_IRUSR|S_IWUSR,
87 "%s/disc", disk->devfs_name);
88
89 disk->number = alloc_unique_number(&disc_numspace);
90
91 sprintf(symlink, "discs/disc%d", disk->number);
92 sprintf(dirname, "../%s", disk->devfs_name);
93 devfs_mk_symlink(symlink, dirname);
94
95}
96
97void devfs_add_disk(struct gendisk *disk)
98{
99 devfs_mk_bdev(MKDEV(disk->major, disk->first_minor),
100 (disk->flags & GENHD_FL_CD) ?
101 S_IFBLK|S_IRUGO|S_IWUGO :
102 S_IFBLK|S_IRUSR|S_IWUSR,
103 "%s", disk->devfs_name);
104
105 if (disk->flags & GENHD_FL_CD) {
106 char dirname[64], symlink[16];
107
108 disk->number = alloc_unique_number(&cdrom_numspace);
109
110 sprintf(symlink, "cdroms/cdrom%d", disk->number);
111 sprintf(dirname, "../%s", disk->devfs_name);
112 devfs_mk_symlink(symlink, dirname);
113 }
114}
115
116void devfs_remove_disk(struct gendisk *disk)
117{
118 if (disk->minors != 1) {
119 devfs_remove("discs/disc%d", disk->number);
120 dealloc_unique_number(&disc_numspace, disk->number);
121 devfs_remove("%s/disc", disk->devfs_name);
122 }
123 if (disk->flags & GENHD_FL_CD) {
124 devfs_remove("cdroms/cdrom%d", disk->number);
125 dealloc_unique_number(&cdrom_numspace, disk->number);
126 }
127 devfs_remove(disk->devfs_name);
128}
129
130
diff --git a/fs/partitions/devfs.h b/fs/partitions/devfs.h
deleted file mode 100644
index 176118b4e492..000000000000
--- a/fs/partitions/devfs.h
+++ /dev/null
@@ -1,10 +0,0 @@
1
2#ifdef CONFIG_DEVFS_FS
3void devfs_add_disk(struct gendisk *dev);
4void devfs_add_partitioned(struct gendisk *dev);
5void devfs_remove_disk(struct gendisk *dev);
6#else
7# define devfs_add_disk(disk) do { } while (0)
8# define devfs_add_partitioned(disk) do { } while (0)
9# define devfs_remove_disk(disk) do { } while (0)
10#endif
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index 0f5b017aebad..63730282ad81 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -91,7 +91,6 @@
91 * - Code works, detects all the partitions. 91 * - Code works, detects all the partitions.
92 * 92 *
93 ************************************************************/ 93 ************************************************************/
94#include <linux/config.h>
95#include <linux/crc32.h> 94#include <linux/crc32.h>
96#include "check.h" 95#include "check.h"
97#include "efi.h" 96#include "efi.h"
diff --git a/fs/partitions/efi.h b/fs/partitions/efi.h
index c44fb0561448..2cc89d0475bf 100644
--- a/fs/partitions/efi.h
+++ b/fs/partitions/efi.h
@@ -26,7 +26,6 @@
26#define FS_PART_EFI_H_INCLUDED 26#define FS_PART_EFI_H_INCLUDED
27 27
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/config.h>
30#include <linux/fs.h> 29#include <linux/fs.h>
31#include <linux/genhd.h> 30#include <linux/genhd.h>
32#include <linux/kernel.h> 31#include <linux/kernel.h>
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index 830c55d86ab1..d352a7381fed 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -6,7 +6,6 @@
6 * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000 6 * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000
7 */ 7 */
8 8
9#include <linux/config.h>
10#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
11#include <linux/hdreg.h> 10#include <linux/hdreg.h>
12#include <linux/slab.h> 11#include <linux/slab.h>
diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c
index 813292f21210..c0871002d00d 100644
--- a/fs/partitions/mac.c
+++ b/fs/partitions/mac.c
@@ -6,7 +6,6 @@
6 * Re-organised Feb 1998 Russell King 6 * Re-organised Feb 1998 Russell King
7 */ 7 */
8 8
9#include <linux/config.h>
10#include <linux/ctype.h> 9#include <linux/ctype.h>
11#include "check.h" 10#include "check.h"
12#include "mac.h" 11#include "mac.h"
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index 9935d254186e..8f12587c3129 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -19,7 +19,6 @@
19 * Re-organised Feb 1998 Russell King 19 * Re-organised Feb 1998 Russell King
20 */ 20 */
21 21
22#include <linux/config.h>
23 22
24#include "check.h" 23#include "check.h"
25#include "msdos.h" 24#include "msdos.h"
diff --git a/fs/pipe.c b/fs/pipe.c
index 5acd8954aaa0..20352573e025 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -979,12 +979,11 @@ no_files:
979 * any operations on the root directory. However, we need a non-trivial 979 * any operations on the root directory. However, we need a non-trivial
980 * d_name - pipe: will go nicely and kill the special-casing in procfs. 980 * d_name - pipe: will go nicely and kill the special-casing in procfs.
981 */ 981 */
982 982static int pipefs_get_sb(struct file_system_type *fs_type,
983static struct super_block * 983 int flags, const char *dev_name, void *data,
984pipefs_get_sb(struct file_system_type *fs_type, int flags, 984 struct vfsmount *mnt)
985 const char *dev_name, void *data)
986{ 985{
987 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC); 986 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt);
988} 987}
989 988
990static struct file_system_type pipe_fs_type = { 989static struct file_system_type pipe_fs_type = {
diff --git a/fs/pnode.c b/fs/pnode.c
index 37b568ed0e05..da42ee61c1df 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -53,8 +53,7 @@ static int do_make_slave(struct vfsmount *mnt)
53 if (master) { 53 if (master) {
54 list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave) 54 list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
55 slave_mnt->mnt_master = master; 55 slave_mnt->mnt_master = master;
56 list_del(&mnt->mnt_slave); 56 list_move(&mnt->mnt_slave, &master->mnt_slave_list);
57 list_add(&mnt->mnt_slave, &master->mnt_slave_list);
58 list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev); 57 list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
59 INIT_LIST_HEAD(&mnt->mnt_slave_list); 58 INIT_LIST_HEAD(&mnt->mnt_slave_list);
60 } else { 59 } else {
@@ -283,10 +282,8 @@ static void __propagate_umount(struct vfsmount *mnt)
283 * umount the child only if the child has no 282 * umount the child only if the child has no
284 * other children 283 * other children
285 */ 284 */
286 if (child && list_empty(&child->mnt_mounts)) { 285 if (child && list_empty(&child->mnt_mounts))
287 list_del(&child->mnt_hash); 286 list_move_tail(&child->mnt_hash, &mnt->mnt_hash);
288 list_add_tail(&child->mnt_hash, &mnt->mnt_hash);
289 }
290 } 287 }
291} 288}
292 289
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 7a76ad570230..7495d3e20775 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -52,7 +52,6 @@
52 * : base.c too. 52 * : base.c too.
53 */ 53 */
54 54
55#include <linux/config.h>
56#include <linux/types.h> 55#include <linux/types.h>
57#include <linux/errno.h> 56#include <linux/errno.h>
58#include <linux/time.h> 57#include <linux/time.h>
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6cc77dc3f3ff..243a94af0427 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -49,7 +49,6 @@
49 49
50#include <asm/uaccess.h> 50#include <asm/uaccess.h>
51 51
52#include <linux/config.h>
53#include <linux/errno.h> 52#include <linux/errno.h>
54#include <linux/time.h> 53#include <linux/time.h>
55#include <linux/proc_fs.h> 54#include <linux/proc_fs.h>
@@ -74,6 +73,16 @@
74#include <linux/poll.h> 73#include <linux/poll.h>
75#include "internal.h" 74#include "internal.h"
76 75
76/* NOTE:
77 * Implementing inode permission operations in /proc is almost
78 * certainly an error. Permission checks need to happen during
79 * each system call not at open time. The reason is that most of
80 * what we wish to check for permissions in /proc varies at runtime.
81 *
82 * The classic example of a problem is opening file descriptors
83 * in /proc for a task before it execs a suid executable.
84 */
85
77/* 86/*
78 * For hysterical raisins we keep the same inumbers as in the old procfs. 87 * For hysterical raisins we keep the same inumbers as in the old procfs.
79 * Feel free to change the macro below - just keep the range distinct from 88 * Feel free to change the macro below - just keep the range distinct from
@@ -121,6 +130,8 @@ enum pid_directory_inos {
121 PROC_TGID_ATTR_PREV, 130 PROC_TGID_ATTR_PREV,
122 PROC_TGID_ATTR_EXEC, 131 PROC_TGID_ATTR_EXEC,
123 PROC_TGID_ATTR_FSCREATE, 132 PROC_TGID_ATTR_FSCREATE,
133 PROC_TGID_ATTR_KEYCREATE,
134 PROC_TGID_ATTR_SOCKCREATE,
124#endif 135#endif
125#ifdef CONFIG_AUDITSYSCALL 136#ifdef CONFIG_AUDITSYSCALL
126 PROC_TGID_LOGINUID, 137 PROC_TGID_LOGINUID,
@@ -162,6 +173,8 @@ enum pid_directory_inos {
162 PROC_TID_ATTR_PREV, 173 PROC_TID_ATTR_PREV,
163 PROC_TID_ATTR_EXEC, 174 PROC_TID_ATTR_EXEC,
164 PROC_TID_ATTR_FSCREATE, 175 PROC_TID_ATTR_FSCREATE,
176 PROC_TID_ATTR_KEYCREATE,
177 PROC_TID_ATTR_SOCKCREATE,
165#endif 178#endif
166#ifdef CONFIG_AUDITSYSCALL 179#ifdef CONFIG_AUDITSYSCALL
167 PROC_TID_LOGINUID, 180 PROC_TID_LOGINUID,
@@ -173,6 +186,9 @@ enum pid_directory_inos {
173 PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ 186 PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */
174}; 187};
175 188
189/* Worst case buffer size needed for holding an integer. */
190#define PROC_NUMBUF 10
191
176struct pid_entry { 192struct pid_entry {
177 int type; 193 int type;
178 int len; 194 int len;
@@ -275,6 +291,8 @@ static struct pid_entry tgid_attr_stuff[] = {
275 E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), 291 E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO),
276 E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), 292 E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO),
277 E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), 293 E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
294 E(PROC_TGID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO),
295 E(PROC_TGID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO),
278 {0,0,NULL,0} 296 {0,0,NULL,0}
279}; 297};
280static struct pid_entry tid_attr_stuff[] = { 298static struct pid_entry tid_attr_stuff[] = {
@@ -282,6 +300,8 @@ static struct pid_entry tid_attr_stuff[] = {
282 E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), 300 E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO),
283 E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), 301 E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO),
284 E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), 302 E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
303 E(PROC_TID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO),
304 E(PROC_TID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO),
285 {0,0,NULL,0} 305 {0,0,NULL,0}
286}; 306};
287#endif 307#endif
@@ -290,12 +310,15 @@ static struct pid_entry tid_attr_stuff[] = {
290 310
291static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 311static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
292{ 312{
293 struct task_struct *task = proc_task(inode); 313 struct task_struct *task = get_proc_task(inode);
294 struct files_struct *files; 314 struct files_struct *files = NULL;
295 struct file *file; 315 struct file *file;
296 int fd = proc_type(inode) - PROC_TID_FD_DIR; 316 int fd = proc_fd(inode);
297 317
298 files = get_files_struct(task); 318 if (task) {
319 files = get_files_struct(task);
320 put_task_struct(task);
321 }
299 if (files) { 322 if (files) {
300 /* 323 /*
301 * We are not taking a ref to the file structure, so we must 324 * We are not taking a ref to the file structure, so we must
@@ -327,29 +350,33 @@ static struct fs_struct *get_fs_struct(struct task_struct *task)
327 return fs; 350 return fs;
328} 351}
329 352
330static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 353static int get_nr_threads(struct task_struct *tsk)
331{ 354{
332 struct fs_struct *fs = get_fs_struct(proc_task(inode)); 355 /* Must be called with the rcu_read_lock held */
333 int result = -ENOENT; 356 unsigned long flags;
334 if (fs) { 357 int count = 0;
335 read_lock(&fs->lock); 358
336 *mnt = mntget(fs->pwdmnt); 359 if (lock_task_sighand(tsk, &flags)) {
337 *dentry = dget(fs->pwd); 360 count = atomic_read(&tsk->signal->count);
338 read_unlock(&fs->lock); 361 unlock_task_sighand(tsk, &flags);
339 result = 0;
340 put_fs_struct(fs);
341 } 362 }
342 return result; 363 return count;
343} 364}
344 365
345static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 366static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
346{ 367{
347 struct fs_struct *fs = get_fs_struct(proc_task(inode)); 368 struct task_struct *task = get_proc_task(inode);
369 struct fs_struct *fs = NULL;
348 int result = -ENOENT; 370 int result = -ENOENT;
371
372 if (task) {
373 fs = get_fs_struct(task);
374 put_task_struct(task);
375 }
349 if (fs) { 376 if (fs) {
350 read_lock(&fs->lock); 377 read_lock(&fs->lock);
351 *mnt = mntget(fs->rootmnt); 378 *mnt = mntget(fs->pwdmnt);
352 *dentry = dget(fs->root); 379 *dentry = dget(fs->pwd);
353 read_unlock(&fs->lock); 380 read_unlock(&fs->lock);
354 result = 0; 381 result = 0;
355 put_fs_struct(fs); 382 put_fs_struct(fs);
@@ -357,42 +384,16 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf
357 return result; 384 return result;
358} 385}
359 386
360 387static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
361/* Same as proc_root_link, but this addionally tries to get fs from other
362 * threads in the group */
363static int proc_task_root_link(struct inode *inode, struct dentry **dentry,
364 struct vfsmount **mnt)
365{ 388{
366 struct fs_struct *fs; 389 struct task_struct *task = get_proc_task(inode);
390 struct fs_struct *fs = NULL;
367 int result = -ENOENT; 391 int result = -ENOENT;
368 struct task_struct *leader = proc_task(inode);
369 392
370 task_lock(leader); 393 if (task) {
371 fs = leader->fs; 394 fs = get_fs_struct(task);
372 if (fs) { 395 put_task_struct(task);
373 atomic_inc(&fs->count);
374 task_unlock(leader);
375 } else {
376 /* Try to get fs from other threads */
377 task_unlock(leader);
378 read_lock(&tasklist_lock);
379 if (pid_alive(leader)) {
380 struct task_struct *task = leader;
381
382 while ((task = next_thread(task)) != leader) {
383 task_lock(task);
384 fs = task->fs;
385 if (fs) {
386 atomic_inc(&fs->count);
387 task_unlock(task);
388 break;
389 }
390 task_unlock(task);
391 }
392 }
393 read_unlock(&tasklist_lock);
394 } 396 }
395
396 if (fs) { 397 if (fs) {
397 read_lock(&fs->lock); 398 read_lock(&fs->lock);
398 *mnt = mntget(fs->rootmnt); 399 *mnt = mntget(fs->rootmnt);
@@ -404,7 +405,6 @@ static int proc_task_root_link(struct inode *inode, struct dentry **dentry,
404 return result; 405 return result;
405} 406}
406 407
407
408#define MAY_PTRACE(task) \ 408#define MAY_PTRACE(task) \
409 (task == current || \ 409 (task == current || \
410 (task->parent == current && \ 410 (task->parent == current && \
@@ -535,142 +535,22 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
535/************************************************************************/ 535/************************************************************************/
536 536
537/* permission checks */ 537/* permission checks */
538 538static int proc_fd_access_allowed(struct inode *inode)
539/* If the process being read is separated by chroot from the reading process,
540 * don't let the reader access the threads.
541 *
542 * note: this does dput(root) and mntput(vfsmnt) on exit.
543 */
544static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt)
545{ 539{
546 struct dentry *de, *base; 540 struct task_struct *task;
547 struct vfsmount *our_vfsmnt, *mnt; 541 int allowed = 0;
548 int res = 0; 542 /* Allow access to a task's file descriptors if it is us or we
549 543 * may use ptrace attach to the process and find out that
550 read_lock(&current->fs->lock); 544 * information.
551 our_vfsmnt = mntget(current->fs->rootmnt); 545 */
552 base = dget(current->fs->root); 546 task = get_proc_task(inode);
553 read_unlock(&current->fs->lock); 547 if (task) {
554 548 allowed = ptrace_may_attach(task);
555 spin_lock(&vfsmount_lock); 549 put_task_struct(task);
556 de = root;
557 mnt = vfsmnt;
558
559 while (mnt != our_vfsmnt) {
560 if (mnt == mnt->mnt_parent)
561 goto out;
562 de = mnt->mnt_mountpoint;
563 mnt = mnt->mnt_parent;
564 }
565
566 if (!is_subdir(de, base))
567 goto out;
568 spin_unlock(&vfsmount_lock);
569
570exit:
571 dput(base);
572 mntput(our_vfsmnt);
573 dput(root);
574 mntput(vfsmnt);
575 return res;
576out:
577 spin_unlock(&vfsmount_lock);
578 res = -EACCES;
579 goto exit;
580}
581
582static int proc_check_root(struct inode *inode)
583{
584 struct dentry *root;
585 struct vfsmount *vfsmnt;
586
587 if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
588 return -ENOENT;
589 return proc_check_chroot(root, vfsmnt);
590}
591
592static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
593{
594 if (generic_permission(inode, mask, NULL) != 0)
595 return -EACCES;
596 return proc_check_root(inode);
597}
598
599static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd)
600{
601 struct dentry *root;
602 struct vfsmount *vfsmnt;
603
604 if (generic_permission(inode, mask, NULL) != 0)
605 return -EACCES;
606
607 if (proc_task_root_link(inode, &root, &vfsmnt))
608 return -ENOENT;
609
610 return proc_check_chroot(root, vfsmnt);
611}
612
613extern struct seq_operations proc_pid_maps_op;
614static int maps_open(struct inode *inode, struct file *file)
615{
616 struct task_struct *task = proc_task(inode);
617 int ret = seq_open(file, &proc_pid_maps_op);
618 if (!ret) {
619 struct seq_file *m = file->private_data;
620 m->private = task;
621 }
622 return ret;
623}
624
625static struct file_operations proc_maps_operations = {
626 .open = maps_open,
627 .read = seq_read,
628 .llseek = seq_lseek,
629 .release = seq_release,
630};
631
632#ifdef CONFIG_NUMA
633extern struct seq_operations proc_pid_numa_maps_op;
634static int numa_maps_open(struct inode *inode, struct file *file)
635{
636 struct task_struct *task = proc_task(inode);
637 int ret = seq_open(file, &proc_pid_numa_maps_op);
638 if (!ret) {
639 struct seq_file *m = file->private_data;
640 m->private = task;
641 }
642 return ret;
643}
644
645static struct file_operations proc_numa_maps_operations = {
646 .open = numa_maps_open,
647 .read = seq_read,
648 .llseek = seq_lseek,
649 .release = seq_release,
650};
651#endif
652
653#ifdef CONFIG_MMU
654extern struct seq_operations proc_pid_smaps_op;
655static int smaps_open(struct inode *inode, struct file *file)
656{
657 struct task_struct *task = proc_task(inode);
658 int ret = seq_open(file, &proc_pid_smaps_op);
659 if (!ret) {
660 struct seq_file *m = file->private_data;
661 m->private = task;
662 } 550 }
663 return ret; 551 return allowed;
664} 552}
665 553
666static struct file_operations proc_smaps_operations = {
667 .open = smaps_open,
668 .read = seq_read,
669 .llseek = seq_lseek,
670 .release = seq_release,
671};
672#endif
673
674extern struct seq_operations mounts_op; 554extern struct seq_operations mounts_op;
675struct proc_mounts { 555struct proc_mounts {
676 struct seq_file m; 556 struct seq_file m;
@@ -679,16 +559,19 @@ struct proc_mounts {
679 559
680static int mounts_open(struct inode *inode, struct file *file) 560static int mounts_open(struct inode *inode, struct file *file)
681{ 561{
682 struct task_struct *task = proc_task(inode); 562 struct task_struct *task = get_proc_task(inode);
683 struct namespace *namespace; 563 struct namespace *namespace = NULL;
684 struct proc_mounts *p; 564 struct proc_mounts *p;
685 int ret = -EINVAL; 565 int ret = -EINVAL;
686 566
687 task_lock(task); 567 if (task) {
688 namespace = task->namespace; 568 task_lock(task);
689 if (namespace) 569 namespace = task->namespace;
690 get_namespace(namespace); 570 if (namespace)
691 task_unlock(task); 571 get_namespace(namespace);
572 task_unlock(task);
573 put_task_struct(task);
574 }
692 575
693 if (namespace) { 576 if (namespace) {
694 ret = -ENOMEM; 577 ret = -ENOMEM;
@@ -745,17 +628,21 @@ static struct file_operations proc_mounts_operations = {
745extern struct seq_operations mountstats_op; 628extern struct seq_operations mountstats_op;
746static int mountstats_open(struct inode *inode, struct file *file) 629static int mountstats_open(struct inode *inode, struct file *file)
747{ 630{
748 struct task_struct *task = proc_task(inode);
749 int ret = seq_open(file, &mountstats_op); 631 int ret = seq_open(file, &mountstats_op);
750 632
751 if (!ret) { 633 if (!ret) {
752 struct seq_file *m = file->private_data; 634 struct seq_file *m = file->private_data;
753 struct namespace *namespace; 635 struct namespace *namespace = NULL;
754 task_lock(task); 636 struct task_struct *task = get_proc_task(inode);
755 namespace = task->namespace; 637
756 if (namespace) 638 if (task) {
757 get_namespace(namespace); 639 task_lock(task);
758 task_unlock(task); 640 namespace = task->namespace;
641 if (namespace)
642 get_namespace(namespace);
643 task_unlock(task);
644 put_task_struct(task);
645 }
759 646
760 if (namespace) 647 if (namespace)
761 m->private = namespace; 648 m->private = namespace;
@@ -782,18 +669,27 @@ static ssize_t proc_info_read(struct file * file, char __user * buf,
782 struct inode * inode = file->f_dentry->d_inode; 669 struct inode * inode = file->f_dentry->d_inode;
783 unsigned long page; 670 unsigned long page;
784 ssize_t length; 671 ssize_t length;
785 struct task_struct *task = proc_task(inode); 672 struct task_struct *task = get_proc_task(inode);
673
674 length = -ESRCH;
675 if (!task)
676 goto out_no_task;
786 677
787 if (count > PROC_BLOCK_SIZE) 678 if (count > PROC_BLOCK_SIZE)
788 count = PROC_BLOCK_SIZE; 679 count = PROC_BLOCK_SIZE;
680
681 length = -ENOMEM;
789 if (!(page = __get_free_page(GFP_KERNEL))) 682 if (!(page = __get_free_page(GFP_KERNEL)))
790 return -ENOMEM; 683 goto out;
791 684
792 length = PROC_I(inode)->op.proc_read(task, (char*)page); 685 length = PROC_I(inode)->op.proc_read(task, (char*)page);
793 686
794 if (length >= 0) 687 if (length >= 0)
795 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 688 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
796 free_page(page); 689 free_page(page);
690out:
691 put_task_struct(task);
692out_no_task:
797 return length; 693 return length;
798} 694}
799 695
@@ -810,12 +706,15 @@ static int mem_open(struct inode* inode, struct file* file)
810static ssize_t mem_read(struct file * file, char __user * buf, 706static ssize_t mem_read(struct file * file, char __user * buf,
811 size_t count, loff_t *ppos) 707 size_t count, loff_t *ppos)
812{ 708{
813 struct task_struct *task = proc_task(file->f_dentry->d_inode); 709 struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
814 char *page; 710 char *page;
815 unsigned long src = *ppos; 711 unsigned long src = *ppos;
816 int ret = -ESRCH; 712 int ret = -ESRCH;
817 struct mm_struct *mm; 713 struct mm_struct *mm;
818 714
715 if (!task)
716 goto out_no_task;
717
819 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 718 if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
820 goto out; 719 goto out;
821 720
@@ -865,6 +764,8 @@ out_put:
865out_free: 764out_free:
866 free_page((unsigned long) page); 765 free_page((unsigned long) page);
867out: 766out:
767 put_task_struct(task);
768out_no_task:
868 return ret; 769 return ret;
869} 770}
870 771
@@ -877,15 +778,20 @@ static ssize_t mem_write(struct file * file, const char * buf,
877{ 778{
878 int copied = 0; 779 int copied = 0;
879 char *page; 780 char *page;
880 struct task_struct *task = proc_task(file->f_dentry->d_inode); 781 struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
881 unsigned long dst = *ppos; 782 unsigned long dst = *ppos;
882 783
784 copied = -ESRCH;
785 if (!task)
786 goto out_no_task;
787
883 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 788 if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
884 return -ESRCH; 789 goto out;
885 790
791 copied = -ENOMEM;
886 page = (char *)__get_free_page(GFP_USER); 792 page = (char *)__get_free_page(GFP_USER);
887 if (!page) 793 if (!page)
888 return -ENOMEM; 794 goto out;
889 795
890 while (count > 0) { 796 while (count > 0) {
891 int this_len, retval; 797 int this_len, retval;
@@ -908,6 +814,9 @@ static ssize_t mem_write(struct file * file, const char * buf,
908 } 814 }
909 *ppos = dst; 815 *ppos = dst;
910 free_page((unsigned long) page); 816 free_page((unsigned long) page);
817out:
818 put_task_struct(task);
819out_no_task:
911 return copied; 820 return copied;
912} 821}
913#endif 822#endif
@@ -938,13 +847,18 @@ static struct file_operations proc_mem_operations = {
938static ssize_t oom_adjust_read(struct file *file, char __user *buf, 847static ssize_t oom_adjust_read(struct file *file, char __user *buf,
939 size_t count, loff_t *ppos) 848 size_t count, loff_t *ppos)
940{ 849{
941 struct task_struct *task = proc_task(file->f_dentry->d_inode); 850 struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
942 char buffer[8]; 851 char buffer[PROC_NUMBUF];
943 size_t len; 852 size_t len;
944 int oom_adjust = task->oomkilladj; 853 int oom_adjust;
945 loff_t __ppos = *ppos; 854 loff_t __ppos = *ppos;
946 855
947 len = sprintf(buffer, "%i\n", oom_adjust); 856 if (!task)
857 return -ESRCH;
858 oom_adjust = task->oomkilladj;
859 put_task_struct(task);
860
861 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
948 if (__ppos >= len) 862 if (__ppos >= len)
949 return 0; 863 return 0;
950 if (count > len-__ppos) 864 if (count > len-__ppos)
@@ -958,15 +872,15 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
958static ssize_t oom_adjust_write(struct file *file, const char __user *buf, 872static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
959 size_t count, loff_t *ppos) 873 size_t count, loff_t *ppos)
960{ 874{
961 struct task_struct *task = proc_task(file->f_dentry->d_inode); 875 struct task_struct *task;
962 char buffer[8], *end; 876 char buffer[PROC_NUMBUF], *end;
963 int oom_adjust; 877 int oom_adjust;
964 878
965 if (!capable(CAP_SYS_RESOURCE)) 879 if (!capable(CAP_SYS_RESOURCE))
966 return -EPERM; 880 return -EPERM;
967 memset(buffer, 0, 8); 881 memset(buffer, 0, sizeof(buffer));
968 if (count > 6) 882 if (count > sizeof(buffer) - 1)
969 count = 6; 883 count = sizeof(buffer) - 1;
970 if (copy_from_user(buffer, buf, count)) 884 if (copy_from_user(buffer, buf, count))
971 return -EFAULT; 885 return -EFAULT;
972 oom_adjust = simple_strtol(buffer, &end, 0); 886 oom_adjust = simple_strtol(buffer, &end, 0);
@@ -974,7 +888,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
974 return -EINVAL; 888 return -EINVAL;
975 if (*end == '\n') 889 if (*end == '\n')
976 end++; 890 end++;
891 task = get_proc_task(file->f_dentry->d_inode);
892 if (!task)
893 return -ESRCH;
977 task->oomkilladj = oom_adjust; 894 task->oomkilladj = oom_adjust;
895 put_task_struct(task);
978 if (end - buffer == 0) 896 if (end - buffer == 0)
979 return -EIO; 897 return -EIO;
980 return end - buffer; 898 return end - buffer;
@@ -985,22 +903,21 @@ static struct file_operations proc_oom_adjust_operations = {
985 .write = oom_adjust_write, 903 .write = oom_adjust_write,
986}; 904};
987 905
988static struct inode_operations proc_mem_inode_operations = {
989 .permission = proc_permission,
990};
991
992#ifdef CONFIG_AUDITSYSCALL 906#ifdef CONFIG_AUDITSYSCALL
993#define TMPBUFLEN 21 907#define TMPBUFLEN 21
994static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 908static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
995 size_t count, loff_t *ppos) 909 size_t count, loff_t *ppos)
996{ 910{
997 struct inode * inode = file->f_dentry->d_inode; 911 struct inode * inode = file->f_dentry->d_inode;
998 struct task_struct *task = proc_task(inode); 912 struct task_struct *task = get_proc_task(inode);
999 ssize_t length; 913 ssize_t length;
1000 char tmpbuf[TMPBUFLEN]; 914 char tmpbuf[TMPBUFLEN];
1001 915
916 if (!task)
917 return -ESRCH;
1002 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 918 length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
1003 audit_get_loginuid(task->audit_context)); 919 audit_get_loginuid(task->audit_context));
920 put_task_struct(task);
1004 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 921 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
1005} 922}
1006 923
@@ -1010,17 +927,16 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
1010 struct inode * inode = file->f_dentry->d_inode; 927 struct inode * inode = file->f_dentry->d_inode;
1011 char *page, *tmp; 928 char *page, *tmp;
1012 ssize_t length; 929 ssize_t length;
1013 struct task_struct *task = proc_task(inode);
1014 uid_t loginuid; 930 uid_t loginuid;
1015 931
1016 if (!capable(CAP_AUDIT_CONTROL)) 932 if (!capable(CAP_AUDIT_CONTROL))
1017 return -EPERM; 933 return -EPERM;
1018 934
1019 if (current != task) 935 if (current != pid_task(proc_pid(inode), PIDTYPE_PID))
1020 return -EPERM; 936 return -EPERM;
1021 937
1022 if (count > PAGE_SIZE) 938 if (count >= PAGE_SIZE)
1023 count = PAGE_SIZE; 939 count = PAGE_SIZE - 1;
1024 940
1025 if (*ppos != 0) { 941 if (*ppos != 0) {
1026 /* No partial writes. */ 942 /* No partial writes. */
@@ -1033,13 +949,14 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
1033 if (copy_from_user(page, buf, count)) 949 if (copy_from_user(page, buf, count))
1034 goto out_free_page; 950 goto out_free_page;
1035 951
952 page[count] = '\0';
1036 loginuid = simple_strtoul(page, &tmp, 10); 953 loginuid = simple_strtoul(page, &tmp, 10);
1037 if (tmp == page) { 954 if (tmp == page) {
1038 length = -EINVAL; 955 length = -EINVAL;
1039 goto out_free_page; 956 goto out_free_page;
1040 957
1041 } 958 }
1042 length = audit_set_loginuid(task, loginuid); 959 length = audit_set_loginuid(current, loginuid);
1043 if (likely(length == 0)) 960 if (likely(length == 0))
1044 length = count; 961 length = count;
1045 962
@@ -1058,13 +975,16 @@ static struct file_operations proc_loginuid_operations = {
1058static ssize_t seccomp_read(struct file *file, char __user *buf, 975static ssize_t seccomp_read(struct file *file, char __user *buf,
1059 size_t count, loff_t *ppos) 976 size_t count, loff_t *ppos)
1060{ 977{
1061 struct task_struct *tsk = proc_task(file->f_dentry->d_inode); 978 struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
1062 char __buf[20]; 979 char __buf[20];
1063 loff_t __ppos = *ppos; 980 loff_t __ppos = *ppos;
1064 size_t len; 981 size_t len;
1065 982
983 if (!tsk)
984 return -ESRCH;
1066 /* no need to print the trailing zero, so use only len */ 985 /* no need to print the trailing zero, so use only len */
1067 len = sprintf(__buf, "%u\n", tsk->seccomp.mode); 986 len = sprintf(__buf, "%u\n", tsk->seccomp.mode);
987 put_task_struct(tsk);
1068 if (__ppos >= len) 988 if (__ppos >= len)
1069 return 0; 989 return 0;
1070 if (count > len - __ppos) 990 if (count > len - __ppos)
@@ -1078,29 +998,43 @@ static ssize_t seccomp_read(struct file *file, char __user *buf,
1078static ssize_t seccomp_write(struct file *file, const char __user *buf, 998static ssize_t seccomp_write(struct file *file, const char __user *buf,
1079 size_t count, loff_t *ppos) 999 size_t count, loff_t *ppos)
1080{ 1000{
1081 struct task_struct *tsk = proc_task(file->f_dentry->d_inode); 1001 struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
1082 char __buf[20], *end; 1002 char __buf[20], *end;
1083 unsigned int seccomp_mode; 1003 unsigned int seccomp_mode;
1004 ssize_t result;
1005
1006 result = -ESRCH;
1007 if (!tsk)
1008 goto out_no_task;
1084 1009
1085 /* can set it only once to be even more secure */ 1010 /* can set it only once to be even more secure */
1011 result = -EPERM;
1086 if (unlikely(tsk->seccomp.mode)) 1012 if (unlikely(tsk->seccomp.mode))
1087 return -EPERM; 1013 goto out;
1088 1014
1015 result = -EFAULT;
1089 memset(__buf, 0, sizeof(__buf)); 1016 memset(__buf, 0, sizeof(__buf));
1090 count = min(count, sizeof(__buf) - 1); 1017 count = min(count, sizeof(__buf) - 1);
1091 if (copy_from_user(__buf, buf, count)) 1018 if (copy_from_user(__buf, buf, count))
1092 return -EFAULT; 1019 goto out;
1020
1093 seccomp_mode = simple_strtoul(__buf, &end, 0); 1021 seccomp_mode = simple_strtoul(__buf, &end, 0);
1094 if (*end == '\n') 1022 if (*end == '\n')
1095 end++; 1023 end++;
1024 result = -EINVAL;
1096 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { 1025 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
1097 tsk->seccomp.mode = seccomp_mode; 1026 tsk->seccomp.mode = seccomp_mode;
1098 set_tsk_thread_flag(tsk, TIF_SECCOMP); 1027 set_tsk_thread_flag(tsk, TIF_SECCOMP);
1099 } else 1028 } else
1100 return -EINVAL; 1029 goto out;
1030 result = -EIO;
1101 if (unlikely(!(end - __buf))) 1031 if (unlikely(!(end - __buf)))
1102 return -EIO; 1032 goto out;
1103 return end - __buf; 1033 result = end - __buf;
1034out:
1035 put_task_struct(tsk);
1036out_no_task:
1037 return result;
1104} 1038}
1105 1039
1106static struct file_operations proc_seccomp_operations = { 1040static struct file_operations proc_seccomp_operations = {
@@ -1117,10 +1051,8 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1117 /* We don't need a base pointer in the /proc filesystem */ 1051 /* We don't need a base pointer in the /proc filesystem */
1118 path_release(nd); 1052 path_release(nd);
1119 1053
1120 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) 1054 /* Are we allowed to snoop on the tasks file descriptors? */
1121 goto out; 1055 if (!proc_fd_access_allowed(inode))
1122 error = proc_check_root(inode);
1123 if (error)
1124 goto out; 1056 goto out;
1125 1057
1126 error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); 1058 error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt);
@@ -1162,12 +1094,8 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b
1162 struct dentry *de; 1094 struct dentry *de;
1163 struct vfsmount *mnt = NULL; 1095 struct vfsmount *mnt = NULL;
1164 1096
1165 lock_kernel(); 1097 /* Are we allowed to snoop on the tasks file descriptors? */
1166 1098 if (!proc_fd_access_allowed(inode))
1167 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
1168 goto out;
1169 error = proc_check_root(inode);
1170 if (error)
1171 goto out; 1099 goto out;
1172 1100
1173 error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); 1101 error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt);
@@ -1178,7 +1106,6 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b
1178 dput(de); 1106 dput(de);
1179 mntput(mnt); 1107 mntput(mnt);
1180out: 1108out:
1181 unlock_kernel();
1182 return error; 1109 return error;
1183} 1110}
1184 1111
@@ -1187,21 +1114,20 @@ static struct inode_operations proc_pid_link_inode_operations = {
1187 .follow_link = proc_pid_follow_link 1114 .follow_link = proc_pid_follow_link
1188}; 1115};
1189 1116
1190#define NUMBUF 10
1191
1192static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) 1117static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1193{ 1118{
1194 struct inode *inode = filp->f_dentry->d_inode; 1119 struct dentry *dentry = filp->f_dentry;
1195 struct task_struct *p = proc_task(inode); 1120 struct inode *inode = dentry->d_inode;
1121 struct task_struct *p = get_proc_task(inode);
1196 unsigned int fd, tid, ino; 1122 unsigned int fd, tid, ino;
1197 int retval; 1123 int retval;
1198 char buf[NUMBUF]; 1124 char buf[PROC_NUMBUF];
1199 struct files_struct * files; 1125 struct files_struct * files;
1200 struct fdtable *fdt; 1126 struct fdtable *fdt;
1201 1127
1202 retval = -ENOENT; 1128 retval = -ENOENT;
1203 if (!pid_alive(p)) 1129 if (!p)
1204 goto out; 1130 goto out_no_task;
1205 retval = 0; 1131 retval = 0;
1206 tid = p->pid; 1132 tid = p->pid;
1207 1133
@@ -1212,7 +1138,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1212 goto out; 1138 goto out;
1213 filp->f_pos++; 1139 filp->f_pos++;
1214 case 1: 1140 case 1:
1215 ino = fake_ino(tid, PROC_TID_INO); 1141 ino = parent_ino(dentry);
1216 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) 1142 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
1217 goto out; 1143 goto out;
1218 filp->f_pos++; 1144 filp->f_pos++;
@@ -1231,7 +1157,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1231 continue; 1157 continue;
1232 rcu_read_unlock(); 1158 rcu_read_unlock();
1233 1159
1234 j = NUMBUF; 1160 j = PROC_NUMBUF;
1235 i = fd; 1161 i = fd;
1236 do { 1162 do {
1237 j--; 1163 j--;
@@ -1240,7 +1166,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1240 } while (i); 1166 } while (i);
1241 1167
1242 ino = fake_ino(tid, PROC_TID_FD_DIR + fd); 1168 ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
1243 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { 1169 if (filldir(dirent, buf+j, PROC_NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
1244 rcu_read_lock(); 1170 rcu_read_lock();
1245 break; 1171 break;
1246 } 1172 }
@@ -1250,6 +1176,8 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1250 put_files_struct(files); 1176 put_files_struct(files);
1251 } 1177 }
1252out: 1178out:
1179 put_task_struct(p);
1180out_no_task:
1253 return retval; 1181 return retval;
1254} 1182}
1255 1183
@@ -1261,16 +1189,18 @@ static int proc_pident_readdir(struct file *filp,
1261 int pid; 1189 int pid;
1262 struct dentry *dentry = filp->f_dentry; 1190 struct dentry *dentry = filp->f_dentry;
1263 struct inode *inode = dentry->d_inode; 1191 struct inode *inode = dentry->d_inode;
1192 struct task_struct *task = get_proc_task(inode);
1264 struct pid_entry *p; 1193 struct pid_entry *p;
1265 ino_t ino; 1194 ino_t ino;
1266 int ret; 1195 int ret;
1267 1196
1268 ret = -ENOENT; 1197 ret = -ENOENT;
1269 if (!pid_alive(proc_task(inode))) 1198 if (!task)
1270 goto out; 1199 goto out;
1271 1200
1272 ret = 0; 1201 ret = 0;
1273 pid = proc_task(inode)->pid; 1202 pid = task->pid;
1203 put_task_struct(task);
1274 i = filp->f_pos; 1204 i = filp->f_pos;
1275 switch (i) { 1205 switch (i) {
1276 case 0: 1206 case 0:
@@ -1353,22 +1283,19 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
1353 1283
1354 /* Common stuff */ 1284 /* Common stuff */
1355 ei = PROC_I(inode); 1285 ei = PROC_I(inode);
1356 ei->task = NULL;
1357 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1286 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1358 inode->i_ino = fake_ino(task->pid, ino); 1287 inode->i_ino = fake_ino(task->pid, ino);
1359 1288
1360 if (!pid_alive(task))
1361 goto out_unlock;
1362
1363 /* 1289 /*
1364 * grab the reference to task. 1290 * grab the reference to task.
1365 */ 1291 */
1366 get_task_struct(task); 1292 ei->pid = get_pid(task->pids[PIDTYPE_PID].pid);
1367 ei->task = task; 1293 if (!ei->pid)
1368 ei->type = ino; 1294 goto out_unlock;
1295
1369 inode->i_uid = 0; 1296 inode->i_uid = 0;
1370 inode->i_gid = 0; 1297 inode->i_gid = 0;
1371 if (ino == PROC_TGID_INO || ino == PROC_TID_INO || task_dumpable(task)) { 1298 if (task_dumpable(task)) {
1372 inode->i_uid = task->euid; 1299 inode->i_uid = task->euid;
1373 inode->i_gid = task->egid; 1300 inode->i_gid = task->egid;
1374 } 1301 }
@@ -1378,7 +1305,6 @@ out:
1378 return inode; 1305 return inode;
1379 1306
1380out_unlock: 1307out_unlock:
1381 ei->pde = NULL;
1382 iput(inode); 1308 iput(inode);
1383 return NULL; 1309 return NULL;
1384} 1310}
@@ -1392,13 +1318,21 @@ out_unlock:
1392 * 1318 *
1393 * Rewrite the inode's ownerships here because the owning task may have 1319 * Rewrite the inode's ownerships here because the owning task may have
1394 * performed a setuid(), etc. 1320 * performed a setuid(), etc.
1321 *
1322 * Before the /proc/pid/status file was created the only way to read
1323 * the effective uid of a /process was to stat /proc/pid. Reading
1324 * /proc/pid/status is slow enough that procps and other packages
1325 * kept stating /proc/pid. To keep the rules in /proc simple I have
1326 * made this apply to all per process world readable and executable
1327 * directories.
1395 */ 1328 */
1396static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1329static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1397{ 1330{
1398 struct inode *inode = dentry->d_inode; 1331 struct inode *inode = dentry->d_inode;
1399 struct task_struct *task = proc_task(inode); 1332 struct task_struct *task = get_proc_task(inode);
1400 if (pid_alive(task)) { 1333 if (task) {
1401 if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) { 1334 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1335 task_dumpable(task)) {
1402 inode->i_uid = task->euid; 1336 inode->i_uid = task->euid;
1403 inode->i_gid = task->egid; 1337 inode->i_gid = task->egid;
1404 } else { 1338 } else {
@@ -1406,59 +1340,75 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1406 inode->i_gid = 0; 1340 inode->i_gid = 0;
1407 } 1341 }
1408 security_task_to_inode(task, inode); 1342 security_task_to_inode(task, inode);
1343 put_task_struct(task);
1409 return 1; 1344 return 1;
1410 } 1345 }
1411 d_drop(dentry); 1346 d_drop(dentry);
1412 return 0; 1347 return 0;
1413} 1348}
1414 1349
1350static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1351{
1352 struct inode *inode = dentry->d_inode;
1353 struct task_struct *task;
1354 generic_fillattr(inode, stat);
1355
1356 rcu_read_lock();
1357 stat->uid = 0;
1358 stat->gid = 0;
1359 task = pid_task(proc_pid(inode), PIDTYPE_PID);
1360 if (task) {
1361 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1362 task_dumpable(task)) {
1363 stat->uid = task->euid;
1364 stat->gid = task->egid;
1365 }
1366 }
1367 rcu_read_unlock();
1368 return 0;
1369}
1370
1415static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1371static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1416{ 1372{
1417 struct inode *inode = dentry->d_inode; 1373 struct inode *inode = dentry->d_inode;
1418 struct task_struct *task = proc_task(inode); 1374 struct task_struct *task = get_proc_task(inode);
1419 int fd = proc_type(inode) - PROC_TID_FD_DIR; 1375 int fd = proc_fd(inode);
1420 struct files_struct *files; 1376 struct files_struct *files;
1421 1377
1422 files = get_files_struct(task); 1378 if (task) {
1423 if (files) { 1379 files = get_files_struct(task);
1424 rcu_read_lock(); 1380 if (files) {
1425 if (fcheck_files(files, fd)) { 1381 rcu_read_lock();
1382 if (fcheck_files(files, fd)) {
1383 rcu_read_unlock();
1384 put_files_struct(files);
1385 if (task_dumpable(task)) {
1386 inode->i_uid = task->euid;
1387 inode->i_gid = task->egid;
1388 } else {
1389 inode->i_uid = 0;
1390 inode->i_gid = 0;
1391 }
1392 security_task_to_inode(task, inode);
1393 put_task_struct(task);
1394 return 1;
1395 }
1426 rcu_read_unlock(); 1396 rcu_read_unlock();
1427 put_files_struct(files); 1397 put_files_struct(files);
1428 if (task_dumpable(task)) {
1429 inode->i_uid = task->euid;
1430 inode->i_gid = task->egid;
1431 } else {
1432 inode->i_uid = 0;
1433 inode->i_gid = 0;
1434 }
1435 security_task_to_inode(task, inode);
1436 return 1;
1437 } 1398 }
1438 rcu_read_unlock(); 1399 put_task_struct(task);
1439 put_files_struct(files);
1440 } 1400 }
1441 d_drop(dentry); 1401 d_drop(dentry);
1442 return 0; 1402 return 0;
1443} 1403}
1444 1404
1445static void pid_base_iput(struct dentry *dentry, struct inode *inode)
1446{
1447 struct task_struct *task = proc_task(inode);
1448 spin_lock(&task->proc_lock);
1449 if (task->proc_dentry == dentry)
1450 task->proc_dentry = NULL;
1451 spin_unlock(&task->proc_lock);
1452 iput(inode);
1453}
1454
1455static int pid_delete_dentry(struct dentry * dentry) 1405static int pid_delete_dentry(struct dentry * dentry)
1456{ 1406{
1457 /* Is the task we represent dead? 1407 /* Is the task we represent dead?
1458 * If so, then don't put the dentry on the lru list, 1408 * If so, then don't put the dentry on the lru list,
1459 * kill it immediately. 1409 * kill it immediately.
1460 */ 1410 */
1461 return !pid_alive(proc_task(dentry->d_inode)); 1411 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
1462} 1412}
1463 1413
1464static struct dentry_operations tid_fd_dentry_operations = 1414static struct dentry_operations tid_fd_dentry_operations =
@@ -1473,13 +1423,6 @@ static struct dentry_operations pid_dentry_operations =
1473 .d_delete = pid_delete_dentry, 1423 .d_delete = pid_delete_dentry,
1474}; 1424};
1475 1425
1476static struct dentry_operations pid_base_dentry_operations =
1477{
1478 .d_revalidate = pid_revalidate,
1479 .d_iput = pid_base_iput,
1480 .d_delete = pid_delete_dentry,
1481};
1482
1483/* Lookups */ 1426/* Lookups */
1484 1427
1485static unsigned name_to_int(struct dentry *dentry) 1428static unsigned name_to_int(struct dentry *dentry)
@@ -1507,22 +1450,24 @@ out:
1507/* SMP-safe */ 1450/* SMP-safe */
1508static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) 1451static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
1509{ 1452{
1510 struct task_struct *task = proc_task(dir); 1453 struct task_struct *task = get_proc_task(dir);
1511 unsigned fd = name_to_int(dentry); 1454 unsigned fd = name_to_int(dentry);
1455 struct dentry *result = ERR_PTR(-ENOENT);
1512 struct file * file; 1456 struct file * file;
1513 struct files_struct * files; 1457 struct files_struct * files;
1514 struct inode *inode; 1458 struct inode *inode;
1515 struct proc_inode *ei; 1459 struct proc_inode *ei;
1516 1460
1461 if (!task)
1462 goto out_no_task;
1517 if (fd == ~0U) 1463 if (fd == ~0U)
1518 goto out; 1464 goto out;
1519 if (!pid_alive(task))
1520 goto out;
1521 1465
1522 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); 1466 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd);
1523 if (!inode) 1467 if (!inode)
1524 goto out; 1468 goto out;
1525 ei = PROC_I(inode); 1469 ei = PROC_I(inode);
1470 ei->fd = fd;
1526 files = get_files_struct(task); 1471 files = get_files_struct(task);
1527 if (!files) 1472 if (!files)
1528 goto out_unlock; 1473 goto out_unlock;
@@ -1547,19 +1492,25 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1547 ei->op.proc_get_link = proc_fd_link; 1492 ei->op.proc_get_link = proc_fd_link;
1548 dentry->d_op = &tid_fd_dentry_operations; 1493 dentry->d_op = &tid_fd_dentry_operations;
1549 d_add(dentry, inode); 1494 d_add(dentry, inode);
1550 return NULL; 1495 /* Close the race of the process dying before we return the dentry */
1496 if (tid_fd_revalidate(dentry, NULL))
1497 result = NULL;
1498out:
1499 put_task_struct(task);
1500out_no_task:
1501 return result;
1551 1502
1552out_unlock2: 1503out_unlock2:
1553 spin_unlock(&files->file_lock); 1504 spin_unlock(&files->file_lock);
1554 put_files_struct(files); 1505 put_files_struct(files);
1555out_unlock: 1506out_unlock:
1556 iput(inode); 1507 iput(inode);
1557out: 1508 goto out;
1558 return ERR_PTR(-ENOENT);
1559} 1509}
1560 1510
1561static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); 1511static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir);
1562static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); 1512static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd);
1513static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
1563 1514
1564static struct file_operations proc_fd_operations = { 1515static struct file_operations proc_fd_operations = {
1565 .read = generic_read_dir, 1516 .read = generic_read_dir,
@@ -1576,12 +1527,11 @@ static struct file_operations proc_task_operations = {
1576 */ 1527 */
1577static struct inode_operations proc_fd_inode_operations = { 1528static struct inode_operations proc_fd_inode_operations = {
1578 .lookup = proc_lookupfd, 1529 .lookup = proc_lookupfd,
1579 .permission = proc_permission,
1580}; 1530};
1581 1531
1582static struct inode_operations proc_task_inode_operations = { 1532static struct inode_operations proc_task_inode_operations = {
1583 .lookup = proc_task_lookup, 1533 .lookup = proc_task_lookup,
1584 .permission = proc_task_permission, 1534 .getattr = proc_task_getattr,
1585}; 1535};
1586 1536
1587#ifdef CONFIG_SECURITY 1537#ifdef CONFIG_SECURITY
@@ -1591,12 +1541,17 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
1591 struct inode * inode = file->f_dentry->d_inode; 1541 struct inode * inode = file->f_dentry->d_inode;
1592 unsigned long page; 1542 unsigned long page;
1593 ssize_t length; 1543 ssize_t length;
1594 struct task_struct *task = proc_task(inode); 1544 struct task_struct *task = get_proc_task(inode);
1545
1546 length = -ESRCH;
1547 if (!task)
1548 goto out_no_task;
1595 1549
1596 if (count > PAGE_SIZE) 1550 if (count > PAGE_SIZE)
1597 count = PAGE_SIZE; 1551 count = PAGE_SIZE;
1552 length = -ENOMEM;
1598 if (!(page = __get_free_page(GFP_KERNEL))) 1553 if (!(page = __get_free_page(GFP_KERNEL)))
1599 return -ENOMEM; 1554 goto out;
1600 1555
1601 length = security_getprocattr(task, 1556 length = security_getprocattr(task,
1602 (char*)file->f_dentry->d_name.name, 1557 (char*)file->f_dentry->d_name.name,
@@ -1604,6 +1559,9 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
1604 if (length >= 0) 1559 if (length >= 0)
1605 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 1560 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
1606 free_page(page); 1561 free_page(page);
1562out:
1563 put_task_struct(task);
1564out_no_task:
1607 return length; 1565 return length;
1608} 1566}
1609 1567
@@ -1613,26 +1571,36 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
1613 struct inode * inode = file->f_dentry->d_inode; 1571 struct inode * inode = file->f_dentry->d_inode;
1614 char *page; 1572 char *page;
1615 ssize_t length; 1573 ssize_t length;
1616 struct task_struct *task = proc_task(inode); 1574 struct task_struct *task = get_proc_task(inode);
1617 1575
1576 length = -ESRCH;
1577 if (!task)
1578 goto out_no_task;
1618 if (count > PAGE_SIZE) 1579 if (count > PAGE_SIZE)
1619 count = PAGE_SIZE; 1580 count = PAGE_SIZE;
1620 if (*ppos != 0) { 1581
1621 /* No partial writes. */ 1582 /* No partial writes. */
1622 return -EINVAL; 1583 length = -EINVAL;
1623 } 1584 if (*ppos != 0)
1585 goto out;
1586
1587 length = -ENOMEM;
1624 page = (char*)__get_free_page(GFP_USER); 1588 page = (char*)__get_free_page(GFP_USER);
1625 if (!page) 1589 if (!page)
1626 return -ENOMEM; 1590 goto out;
1591
1627 length = -EFAULT; 1592 length = -EFAULT;
1628 if (copy_from_user(page, buf, count)) 1593 if (copy_from_user(page, buf, count))
1629 goto out; 1594 goto out_free;
1630 1595
1631 length = security_setprocattr(task, 1596 length = security_setprocattr(task,
1632 (char*)file->f_dentry->d_name.name, 1597 (char*)file->f_dentry->d_name.name,
1633 (void*)page, count); 1598 (void*)page, count);
1634out: 1599out_free:
1635 free_page((unsigned long) page); 1600 free_page((unsigned long) page);
1601out:
1602 put_task_struct(task);
1603out_no_task:
1636 return length; 1604 return length;
1637} 1605}
1638 1606
@@ -1647,24 +1615,22 @@ static struct file_operations proc_tgid_attr_operations;
1647static struct inode_operations proc_tgid_attr_inode_operations; 1615static struct inode_operations proc_tgid_attr_inode_operations;
1648#endif 1616#endif
1649 1617
1650static int get_tid_list(int index, unsigned int *tids, struct inode *dir);
1651
1652/* SMP-safe */ 1618/* SMP-safe */
1653static struct dentry *proc_pident_lookup(struct inode *dir, 1619static struct dentry *proc_pident_lookup(struct inode *dir,
1654 struct dentry *dentry, 1620 struct dentry *dentry,
1655 struct pid_entry *ents) 1621 struct pid_entry *ents)
1656{ 1622{
1657 struct inode *inode; 1623 struct inode *inode;
1658 int error; 1624 struct dentry *error;
1659 struct task_struct *task = proc_task(dir); 1625 struct task_struct *task = get_proc_task(dir);
1660 struct pid_entry *p; 1626 struct pid_entry *p;
1661 struct proc_inode *ei; 1627 struct proc_inode *ei;
1662 1628
1663 error = -ENOENT; 1629 error = ERR_PTR(-ENOENT);
1664 inode = NULL; 1630 inode = NULL;
1665 1631
1666 if (!pid_alive(task)) 1632 if (!task)
1667 goto out; 1633 goto out_no_task;
1668 1634
1669 for (p = ents; p->name; p++) { 1635 for (p = ents; p->name; p++) {
1670 if (p->len != dentry->d_name.len) 1636 if (p->len != dentry->d_name.len)
@@ -1675,7 +1641,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1675 if (!p->name) 1641 if (!p->name)
1676 goto out; 1642 goto out;
1677 1643
1678 error = -EINVAL; 1644 error = ERR_PTR(-EINVAL);
1679 inode = proc_pid_make_inode(dir->i_sb, task, p->type); 1645 inode = proc_pid_make_inode(dir->i_sb, task, p->type);
1680 if (!inode) 1646 if (!inode)
1681 goto out; 1647 goto out;
@@ -1688,7 +1654,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1688 */ 1654 */
1689 switch(p->type) { 1655 switch(p->type) {
1690 case PROC_TGID_TASK: 1656 case PROC_TGID_TASK:
1691 inode->i_nlink = 2 + get_tid_list(2, NULL, dir); 1657 inode->i_nlink = 2;
1692 inode->i_op = &proc_task_inode_operations; 1658 inode->i_op = &proc_task_inode_operations;
1693 inode->i_fop = &proc_task_operations; 1659 inode->i_fop = &proc_task_operations;
1694 break; 1660 break;
@@ -1758,7 +1724,6 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1758#endif 1724#endif
1759 case PROC_TID_MEM: 1725 case PROC_TID_MEM:
1760 case PROC_TGID_MEM: 1726 case PROC_TGID_MEM:
1761 inode->i_op = &proc_mem_inode_operations;
1762 inode->i_fop = &proc_mem_operations; 1727 inode->i_fop = &proc_mem_operations;
1763 break; 1728 break;
1764#ifdef CONFIG_SECCOMP 1729#ifdef CONFIG_SECCOMP
@@ -1800,6 +1765,10 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1800 case PROC_TGID_ATTR_EXEC: 1765 case PROC_TGID_ATTR_EXEC:
1801 case PROC_TID_ATTR_FSCREATE: 1766 case PROC_TID_ATTR_FSCREATE:
1802 case PROC_TGID_ATTR_FSCREATE: 1767 case PROC_TGID_ATTR_FSCREATE:
1768 case PROC_TID_ATTR_KEYCREATE:
1769 case PROC_TGID_ATTR_KEYCREATE:
1770 case PROC_TID_ATTR_SOCKCREATE:
1771 case PROC_TGID_ATTR_SOCKCREATE:
1803 inode->i_fop = &proc_pid_attr_operations; 1772 inode->i_fop = &proc_pid_attr_operations;
1804 break; 1773 break;
1805#endif 1774#endif
@@ -1841,14 +1810,18 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1841 default: 1810 default:
1842 printk("procfs: impossible type (%d)",p->type); 1811 printk("procfs: impossible type (%d)",p->type);
1843 iput(inode); 1812 iput(inode);
1844 return ERR_PTR(-EINVAL); 1813 error = ERR_PTR(-EINVAL);
1814 goto out;
1845 } 1815 }
1846 dentry->d_op = &pid_dentry_operations; 1816 dentry->d_op = &pid_dentry_operations;
1847 d_add(dentry, inode); 1817 d_add(dentry, inode);
1848 return NULL; 1818 /* Close the race of the process dying before we return the dentry */
1849 1819 if (pid_revalidate(dentry, NULL))
1820 error = NULL;
1850out: 1821out:
1851 return ERR_PTR(error); 1822 put_task_struct(task);
1823out_no_task:
1824 return error;
1852} 1825}
1853 1826
1854static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 1827static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
@@ -1871,10 +1844,12 @@ static struct file_operations proc_tid_base_operations = {
1871 1844
1872static struct inode_operations proc_tgid_base_inode_operations = { 1845static struct inode_operations proc_tgid_base_inode_operations = {
1873 .lookup = proc_tgid_base_lookup, 1846 .lookup = proc_tgid_base_lookup,
1847 .getattr = pid_getattr,
1874}; 1848};
1875 1849
1876static struct inode_operations proc_tid_base_inode_operations = { 1850static struct inode_operations proc_tid_base_inode_operations = {
1877 .lookup = proc_tid_base_lookup, 1851 .lookup = proc_tid_base_lookup,
1852 .getattr = pid_getattr,
1878}; 1853};
1879 1854
1880#ifdef CONFIG_SECURITY 1855#ifdef CONFIG_SECURITY
@@ -1916,10 +1891,12 @@ static struct dentry *proc_tid_attr_lookup(struct inode *dir,
1916 1891
1917static struct inode_operations proc_tgid_attr_inode_operations = { 1892static struct inode_operations proc_tgid_attr_inode_operations = {
1918 .lookup = proc_tgid_attr_lookup, 1893 .lookup = proc_tgid_attr_lookup,
1894 .getattr = pid_getattr,
1919}; 1895};
1920 1896
1921static struct inode_operations proc_tid_attr_inode_operations = { 1897static struct inode_operations proc_tid_attr_inode_operations = {
1922 .lookup = proc_tid_attr_lookup, 1898 .lookup = proc_tid_attr_lookup,
1899 .getattr = pid_getattr,
1923}; 1900};
1924#endif 1901#endif
1925 1902
@@ -1929,14 +1906,14 @@ static struct inode_operations proc_tid_attr_inode_operations = {
1929static int proc_self_readlink(struct dentry *dentry, char __user *buffer, 1906static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
1930 int buflen) 1907 int buflen)
1931{ 1908{
1932 char tmp[30]; 1909 char tmp[PROC_NUMBUF];
1933 sprintf(tmp, "%d", current->tgid); 1910 sprintf(tmp, "%d", current->tgid);
1934 return vfs_readlink(dentry,buffer,buflen,tmp); 1911 return vfs_readlink(dentry,buffer,buflen,tmp);
1935} 1912}
1936 1913
1937static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) 1914static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
1938{ 1915{
1939 char tmp[30]; 1916 char tmp[PROC_NUMBUF];
1940 sprintf(tmp, "%d", current->tgid); 1917 sprintf(tmp, "%d", current->tgid);
1941 return ERR_PTR(vfs_follow_link(nd,tmp)); 1918 return ERR_PTR(vfs_follow_link(nd,tmp));
1942} 1919}
@@ -1947,67 +1924,80 @@ static struct inode_operations proc_self_inode_operations = {
1947}; 1924};
1948 1925
1949/** 1926/**
1950 * proc_pid_unhash - Unhash /proc/@pid entry from the dcache. 1927 * proc_flush_task - Remove dcache entries for @task from the /proc dcache.
1951 * @p: task that should be flushed. 1928 *
1929 * @task: task that should be flushed.
1930 *
1931 * Looks in the dcache for
1932 * /proc/@pid
1933 * /proc/@tgid/task/@pid
1934 * if either directory is present flushes it and all of it'ts children
1935 * from the dcache.
1952 * 1936 *
1953 * Drops the /proc/@pid dcache entry from the hash chains. 1937 * It is safe and reasonable to cache /proc entries for a task until
1938 * that task exits. After that they just clog up the dcache with
1939 * useless entries, possibly causing useful dcache entries to be
1940 * flushed instead. This routine is proved to flush those useless
1941 * dcache entries at process exit time.
1954 * 1942 *
1955 * Dropping /proc/@pid entries and detach_pid must be synchroneous, 1943 * NOTE: This routine is just an optimization so it does not guarantee
1956 * otherwise e.g. /proc/@pid/exe might point to the wrong executable, 1944 * that no dcache entries will exist at process exit time it
1957 * if the pid value is immediately reused. This is enforced by 1945 * just makes it very unlikely that any will persist.
1958 * - caller must acquire spin_lock(p->proc_lock)
1959 * - must be called before detach_pid()
1960 * - proc_pid_lookup acquires proc_lock, and checks that
1961 * the target is not dead by looking at the attach count
1962 * of PIDTYPE_PID.
1963 */ 1946 */
1964 1947void proc_flush_task(struct task_struct *task)
1965struct dentry *proc_pid_unhash(struct task_struct *p)
1966{ 1948{
1967 struct dentry *proc_dentry; 1949 struct dentry *dentry, *leader, *dir;
1950 char buf[PROC_NUMBUF];
1951 struct qstr name;
1952
1953 name.name = buf;
1954 name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
1955 dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name);
1956 if (dentry) {
1957 shrink_dcache_parent(dentry);
1958 d_drop(dentry);
1959 dput(dentry);
1960 }
1968 1961
1969 proc_dentry = p->proc_dentry; 1962 if (thread_group_leader(task))
1970 if (proc_dentry != NULL) { 1963 goto out;
1971 1964
1972 spin_lock(&dcache_lock); 1965 name.name = buf;
1973 spin_lock(&proc_dentry->d_lock); 1966 name.len = snprintf(buf, sizeof(buf), "%d", task->tgid);
1974 if (!d_unhashed(proc_dentry)) { 1967 leader = d_hash_and_lookup(proc_mnt->mnt_root, &name);
1975 dget_locked(proc_dentry); 1968 if (!leader)
1976 __d_drop(proc_dentry); 1969 goto out;
1977 spin_unlock(&proc_dentry->d_lock);
1978 } else {
1979 spin_unlock(&proc_dentry->d_lock);
1980 proc_dentry = NULL;
1981 }
1982 spin_unlock(&dcache_lock);
1983 }
1984 return proc_dentry;
1985}
1986 1970
1987/** 1971 name.name = "task";
1988 * proc_pid_flush - recover memory used by stale /proc/@pid/x entries 1972 name.len = strlen(name.name);
1989 * @proc_dentry: directoy to prune. 1973 dir = d_hash_and_lookup(leader, &name);
1990 * 1974 if (!dir)
1991 * Shrink the /proc directory that was used by the just killed thread. 1975 goto out_put_leader;
1992 */ 1976
1993 1977 name.name = buf;
1994void proc_pid_flush(struct dentry *proc_dentry) 1978 name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
1995{ 1979 dentry = d_hash_and_lookup(dir, &name);
1996 might_sleep(); 1980 if (dentry) {
1997 if(proc_dentry != NULL) { 1981 shrink_dcache_parent(dentry);
1998 shrink_dcache_parent(proc_dentry); 1982 d_drop(dentry);
1999 dput(proc_dentry); 1983 dput(dentry);
2000 } 1984 }
1985
1986 dput(dir);
1987out_put_leader:
1988 dput(leader);
1989out:
1990 return;
2001} 1991}
2002 1992
2003/* SMP-safe */ 1993/* SMP-safe */
2004struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 1994struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
2005{ 1995{
1996 struct dentry *result = ERR_PTR(-ENOENT);
2006 struct task_struct *task; 1997 struct task_struct *task;
2007 struct inode *inode; 1998 struct inode *inode;
2008 struct proc_inode *ei; 1999 struct proc_inode *ei;
2009 unsigned tgid; 2000 unsigned tgid;
2010 int died;
2011 2001
2012 if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { 2002 if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) {
2013 inode = new_inode(dir->i_sb); 2003 inode = new_inode(dir->i_sb);
@@ -2028,21 +2018,18 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
2028 if (tgid == ~0U) 2018 if (tgid == ~0U)
2029 goto out; 2019 goto out;
2030 2020
2031 read_lock(&tasklist_lock); 2021 rcu_read_lock();
2032 task = find_task_by_pid(tgid); 2022 task = find_task_by_pid(tgid);
2033 if (task) 2023 if (task)
2034 get_task_struct(task); 2024 get_task_struct(task);
2035 read_unlock(&tasklist_lock); 2025 rcu_read_unlock();
2036 if (!task) 2026 if (!task)
2037 goto out; 2027 goto out;
2038 2028
2039 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); 2029 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
2030 if (!inode)
2031 goto out_put_task;
2040 2032
2041
2042 if (!inode) {
2043 put_task_struct(task);
2044 goto out;
2045 }
2046 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 2033 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
2047 inode->i_op = &proc_tgid_base_inode_operations; 2034 inode->i_op = &proc_tgid_base_inode_operations;
2048 inode->i_fop = &proc_tgid_base_operations; 2035 inode->i_fop = &proc_tgid_base_operations;
@@ -2053,45 +2040,40 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
2053 inode->i_nlink = 4; 2040 inode->i_nlink = 4;
2054#endif 2041#endif
2055 2042
2056 dentry->d_op = &pid_base_dentry_operations; 2043 dentry->d_op = &pid_dentry_operations;
2057 2044
2058 died = 0;
2059 d_add(dentry, inode); 2045 d_add(dentry, inode);
2060 spin_lock(&task->proc_lock); 2046 /* Close the race of the process dying before we return the dentry */
2061 task->proc_dentry = dentry; 2047 if (pid_revalidate(dentry, NULL))
2062 if (!pid_alive(task)) { 2048 result = NULL;
2063 dentry = proc_pid_unhash(task);
2064 died = 1;
2065 }
2066 spin_unlock(&task->proc_lock);
2067 2049
2050out_put_task:
2068 put_task_struct(task); 2051 put_task_struct(task);
2069 if (died) {
2070 proc_pid_flush(dentry);
2071 goto out;
2072 }
2073 return NULL;
2074out: 2052out:
2075 return ERR_PTR(-ENOENT); 2053 return result;
2076} 2054}
2077 2055
2078/* SMP-safe */ 2056/* SMP-safe */
2079static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 2057static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
2080{ 2058{
2059 struct dentry *result = ERR_PTR(-ENOENT);
2081 struct task_struct *task; 2060 struct task_struct *task;
2082 struct task_struct *leader = proc_task(dir); 2061 struct task_struct *leader = get_proc_task(dir);
2083 struct inode *inode; 2062 struct inode *inode;
2084 unsigned tid; 2063 unsigned tid;
2085 2064
2065 if (!leader)
2066 goto out_no_task;
2067
2086 tid = name_to_int(dentry); 2068 tid = name_to_int(dentry);
2087 if (tid == ~0U) 2069 if (tid == ~0U)
2088 goto out; 2070 goto out;
2089 2071
2090 read_lock(&tasklist_lock); 2072 rcu_read_lock();
2091 task = find_task_by_pid(tid); 2073 task = find_task_by_pid(tid);
2092 if (task) 2074 if (task)
2093 get_task_struct(task); 2075 get_task_struct(task);
2094 read_unlock(&tasklist_lock); 2076 rcu_read_unlock();
2095 if (!task) 2077 if (!task)
2096 goto out; 2078 goto out;
2097 if (leader->tgid != task->tgid) 2079 if (leader->tgid != task->tgid)
@@ -2112,101 +2094,95 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
2112 inode->i_nlink = 3; 2094 inode->i_nlink = 3;
2113#endif 2095#endif
2114 2096
2115 dentry->d_op = &pid_base_dentry_operations; 2097 dentry->d_op = &pid_dentry_operations;
2116 2098
2117 d_add(dentry, inode); 2099 d_add(dentry, inode);
2100 /* Close the race of the process dying before we return the dentry */
2101 if (pid_revalidate(dentry, NULL))
2102 result = NULL;
2118 2103
2119 put_task_struct(task);
2120 return NULL;
2121out_drop_task: 2104out_drop_task:
2122 put_task_struct(task); 2105 put_task_struct(task);
2123out: 2106out:
2124 return ERR_PTR(-ENOENT); 2107 put_task_struct(leader);
2108out_no_task:
2109 return result;
2125} 2110}
2126 2111
2127#define PROC_NUMBUF 10
2128#define PROC_MAXPIDS 20
2129
2130/* 2112/*
2131 * Get a few tgid's to return for filldir - we need to hold the 2113 * Find the first tgid to return to user space.
2132 * tasklist lock while doing this, and we must release it before 2114 *
2133 * we actually do the filldir itself, so we use a temp buffer.. 2115 * Usually this is just whatever follows &init_task, but if the users
2116 * buffer was too small to hold the full list or there was a seek into
2117 * the middle of the directory we have more work to do.
2118 *
2119 * In the case of a short read we start with find_task_by_pid.
2120 *
2121 * In the case of a seek we start with &init_task and walk nr
2122 * threads past it.
2134 */ 2123 */
2135static int get_tgid_list(int index, unsigned long version, unsigned int *tgids) 2124static struct task_struct *first_tgid(int tgid, unsigned int nr)
2136{ 2125{
2137 struct task_struct *p; 2126 struct task_struct *pos;
2138 int nr_tgids = 0; 2127 rcu_read_lock();
2139 2128 if (tgid && nr) {
2140 index--; 2129 pos = find_task_by_pid(tgid);
2141 read_lock(&tasklist_lock); 2130 if (pos && thread_group_leader(pos))
2142 p = NULL; 2131 goto found;
2143 if (version) {
2144 p = find_task_by_pid(version);
2145 if (p && !thread_group_leader(p))
2146 p = NULL;
2147 } 2132 }
2133 /* If nr exceeds the number of processes get out quickly */
2134 pos = NULL;
2135 if (nr && nr >= nr_processes())
2136 goto done;
2148 2137
2149 if (p) 2138 /* If we haven't found our starting place yet start with
2150 index = 0; 2139 * the init_task and walk nr tasks forward.
2151 else 2140 */
2152 p = next_task(&init_task); 2141 for (pos = next_task(&init_task); nr > 0; --nr) {
2153 2142 pos = next_task(pos);
2154 for ( ; p != &init_task; p = next_task(p)) { 2143 if (pos == &init_task) {
2155 int tgid = p->pid; 2144 pos = NULL;
2156 if (!pid_alive(p)) 2145 goto done;
2157 continue; 2146 }
2158 if (--index >= 0)
2159 continue;
2160 tgids[nr_tgids] = tgid;
2161 nr_tgids++;
2162 if (nr_tgids >= PROC_MAXPIDS)
2163 break;
2164 } 2147 }
2165 read_unlock(&tasklist_lock); 2148found:
2166 return nr_tgids; 2149 get_task_struct(pos);
2150done:
2151 rcu_read_unlock();
2152 return pos;
2167} 2153}
2168 2154
2169/* 2155/*
2170 * Get a few tid's to return for filldir - we need to hold the 2156 * Find the next task in the task list.
2171 * tasklist lock while doing this, and we must release it before 2157 * Return NULL if we loop or there is any error.
2172 * we actually do the filldir itself, so we use a temp buffer.. 2158 *
2159 * The reference to the input task_struct is released.
2173 */ 2160 */
2174static int get_tid_list(int index, unsigned int *tids, struct inode *dir) 2161static struct task_struct *next_tgid(struct task_struct *start)
2175{ 2162{
2176 struct task_struct *leader_task = proc_task(dir); 2163 struct task_struct *pos;
2177 struct task_struct *task = leader_task; 2164 rcu_read_lock();
2178 int nr_tids = 0; 2165 pos = start;
2179 2166 if (pid_alive(start))
2180 index -= 2; 2167 pos = next_task(start);
2181 read_lock(&tasklist_lock); 2168 if (pid_alive(pos) && (pos != &init_task)) {
2182 /* 2169 get_task_struct(pos);
2183 * The starting point task (leader_task) might be an already 2170 goto done;
2184 * unlinked task, which cannot be used to access the task-list 2171 }
2185 * via next_thread(). 2172 pos = NULL;
2186 */ 2173done:
2187 if (pid_alive(task)) do { 2174 rcu_read_unlock();
2188 int tid = task->pid; 2175 put_task_struct(start);
2189 2176 return pos;
2190 if (--index >= 0)
2191 continue;
2192 if (tids != NULL)
2193 tids[nr_tids] = tid;
2194 nr_tids++;
2195 if (nr_tids >= PROC_MAXPIDS)
2196 break;
2197 } while ((task = next_thread(task)) != leader_task);
2198 read_unlock(&tasklist_lock);
2199 return nr_tids;
2200} 2177}
2201 2178
2202/* for the /proc/ directory itself, after non-process stuff has been done */ 2179/* for the /proc/ directory itself, after non-process stuff has been done */
2203int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 2180int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2204{ 2181{
2205 unsigned int tgid_array[PROC_MAXPIDS];
2206 char buf[PROC_NUMBUF]; 2182 char buf[PROC_NUMBUF];
2207 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; 2183 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
2208 unsigned int nr_tgids, i; 2184 struct task_struct *task;
2209 int next_tgid; 2185 int tgid;
2210 2186
2211 if (!nr) { 2187 if (!nr) {
2212 ino_t ino = fake_ino(0,PROC_TGID_INO); 2188 ino_t ino = fake_ino(0,PROC_TGID_INO);
@@ -2215,63 +2191,116 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2215 filp->f_pos++; 2191 filp->f_pos++;
2216 nr++; 2192 nr++;
2217 } 2193 }
2194 nr -= 1;
2218 2195
2219 /* f_version caches the tgid value that the last readdir call couldn't 2196 /* f_version caches the tgid value that the last readdir call couldn't
2220 * return. lseek aka telldir automagically resets f_version to 0. 2197 * return. lseek aka telldir automagically resets f_version to 0.
2221 */ 2198 */
2222 next_tgid = filp->f_version; 2199 tgid = filp->f_version;
2223 filp->f_version = 0; 2200 filp->f_version = 0;
2224 for (;;) { 2201 for (task = first_tgid(tgid, nr);
2225 nr_tgids = get_tgid_list(nr, next_tgid, tgid_array); 2202 task;
2226 if (!nr_tgids) { 2203 task = next_tgid(task), filp->f_pos++) {
2227 /* no more entries ! */ 2204 int len;
2205 ino_t ino;
2206 tgid = task->pid;
2207 len = snprintf(buf, sizeof(buf), "%d", tgid);
2208 ino = fake_ino(tgid, PROC_TGID_INO);
2209 if (filldir(dirent, buf, len, filp->f_pos, ino, DT_DIR) < 0) {
2210 /* returning this tgid failed, save it as the first
2211 * pid for the next readir call */
2212 filp->f_version = tgid;
2213 put_task_struct(task);
2228 break; 2214 break;
2229 } 2215 }
2230 next_tgid = 0; 2216 }
2217 return 0;
2218}
2231 2219
2232 /* do not use the last found pid, reserve it for next_tgid */ 2220/*
2233 if (nr_tgids == PROC_MAXPIDS) { 2221 * Find the first tid of a thread group to return to user space.
2234 nr_tgids--; 2222 *
2235 next_tgid = tgid_array[nr_tgids]; 2223 * Usually this is just the thread group leader, but if the users
2236 } 2224 * buffer was too small or there was a seek into the middle of the
2225 * directory we have more work todo.
2226 *
2227 * In the case of a short read we start with find_task_by_pid.
2228 *
2229 * In the case of a seek we start with the leader and walk nr
2230 * threads past it.
2231 */
2232static struct task_struct *first_tid(struct task_struct *leader,
2233 int tid, int nr)
2234{
2235 struct task_struct *pos;
2237 2236
2238 for (i=0;i<nr_tgids;i++) { 2237 rcu_read_lock();
2239 int tgid = tgid_array[i]; 2238 /* Attempt to start with the pid of a thread */
2240 ino_t ino = fake_ino(tgid,PROC_TGID_INO); 2239 if (tid && (nr > 0)) {
2241 unsigned long j = PROC_NUMBUF; 2240 pos = find_task_by_pid(tid);
2241 if (pos && (pos->group_leader == leader))
2242 goto found;
2243 }
2242 2244
2243 do 2245 /* If nr exceeds the number of threads there is nothing todo */
2244 buf[--j] = '0' + (tgid % 10); 2246 pos = NULL;
2245 while ((tgid /= 10) != 0); 2247 if (nr && nr >= get_nr_threads(leader))
2248 goto out;
2246 2249
2247 if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) { 2250 /* If we haven't found our starting place yet start
2248 /* returning this tgid failed, save it as the first 2251 * with the leader and walk nr threads forward.
2249 * pid for the next readir call */ 2252 */
2250 filp->f_version = tgid_array[i]; 2253 for (pos = leader; nr > 0; --nr) {
2251 goto out; 2254 pos = next_thread(pos);
2252 } 2255 if (pos == leader) {
2253 filp->f_pos++; 2256 pos = NULL;
2254 nr++; 2257 goto out;
2255 } 2258 }
2256 } 2259 }
2260found:
2261 get_task_struct(pos);
2257out: 2262out:
2258 return 0; 2263 rcu_read_unlock();
2264 return pos;
2265}
2266
2267/*
2268 * Find the next thread in the thread list.
2269 * Return NULL if there is an error or no next thread.
2270 *
2271 * The reference to the input task_struct is released.
2272 */
2273static struct task_struct *next_tid(struct task_struct *start)
2274{
2275 struct task_struct *pos = NULL;
2276 rcu_read_lock();
2277 if (pid_alive(start)) {
2278 pos = next_thread(start);
2279 if (thread_group_leader(pos))
2280 pos = NULL;
2281 else
2282 get_task_struct(pos);
2283 }
2284 rcu_read_unlock();
2285 put_task_struct(start);
2286 return pos;
2259} 2287}
2260 2288
2261/* for the /proc/TGID/task/ directories */ 2289/* for the /proc/TGID/task/ directories */
2262static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) 2290static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
2263{ 2291{
2264 unsigned int tid_array[PROC_MAXPIDS];
2265 char buf[PROC_NUMBUF]; 2292 char buf[PROC_NUMBUF];
2266 unsigned int nr_tids, i;
2267 struct dentry *dentry = filp->f_dentry; 2293 struct dentry *dentry = filp->f_dentry;
2268 struct inode *inode = dentry->d_inode; 2294 struct inode *inode = dentry->d_inode;
2295 struct task_struct *leader = get_proc_task(inode);
2296 struct task_struct *task;
2269 int retval = -ENOENT; 2297 int retval = -ENOENT;
2270 ino_t ino; 2298 ino_t ino;
2299 int tid;
2271 unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ 2300 unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */
2272 2301
2273 if (!pid_alive(proc_task(inode))) 2302 if (!leader)
2274 goto out; 2303 goto out_no_task;
2275 retval = 0; 2304 retval = 0;
2276 2305
2277 switch (pos) { 2306 switch (pos) {
@@ -2289,24 +2318,45 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
2289 /* fall through */ 2318 /* fall through */
2290 } 2319 }
2291 2320
2292 nr_tids = get_tid_list(pos, tid_array, inode); 2321 /* f_version caches the tgid value that the last readdir call couldn't
2293 inode->i_nlink = pos + nr_tids; 2322 * return. lseek aka telldir automagically resets f_version to 0.
2294 2323 */
2295 for (i = 0; i < nr_tids; i++) { 2324 tid = filp->f_version;
2296 unsigned long j = PROC_NUMBUF; 2325 filp->f_version = 0;
2297 int tid = tid_array[i]; 2326 for (task = first_tid(leader, tid, pos - 2);
2298 2327 task;
2299 ino = fake_ino(tid,PROC_TID_INO); 2328 task = next_tid(task), pos++) {
2300 2329 int len;
2301 do 2330 tid = task->pid;
2302 buf[--j] = '0' + (tid % 10); 2331 len = snprintf(buf, sizeof(buf), "%d", tid);
2303 while ((tid /= 10) != 0); 2332 ino = fake_ino(tid, PROC_TID_INO);
2304 2333 if (filldir(dirent, buf, len, pos, ino, DT_DIR < 0)) {
2305 if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0) 2334 /* returning this tgid failed, save it as the first
2335 * pid for the next readir call */
2336 filp->f_version = tid;
2337 put_task_struct(task);
2306 break; 2338 break;
2307 pos++; 2339 }
2308 } 2340 }
2309out: 2341out:
2310 filp->f_pos = pos; 2342 filp->f_pos = pos;
2343 put_task_struct(leader);
2344out_no_task:
2311 return retval; 2345 return retval;
2312} 2346}
2347
2348static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
2349{
2350 struct inode *inode = dentry->d_inode;
2351 struct task_struct *p = get_proc_task(inode);
2352 generic_fillattr(inode, stat);
2353
2354 if (p) {
2355 rcu_read_lock();
2356 stat->nlink += get_nr_threads(p);
2357 rcu_read_unlock();
2358 put_task_struct(p);
2359 }
2360
2361 return 0;
2362}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 722b9c463111..6dcef089e18e 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -58,14 +58,11 @@ static void de_put(struct proc_dir_entry *de)
58static void proc_delete_inode(struct inode *inode) 58static void proc_delete_inode(struct inode *inode)
59{ 59{
60 struct proc_dir_entry *de; 60 struct proc_dir_entry *de;
61 struct task_struct *tsk;
62 61
63 truncate_inode_pages(&inode->i_data, 0); 62 truncate_inode_pages(&inode->i_data, 0);
64 63
65 /* Let go of any associated process */ 64 /* Stop tracking associated processes */
66 tsk = PROC_I(inode)->task; 65 put_pid(PROC_I(inode)->pid);
67 if (tsk)
68 put_task_struct(tsk);
69 66
70 /* Let go of any associated proc directory entry */ 67 /* Let go of any associated proc directory entry */
71 de = PROC_I(inode)->pde; 68 de = PROC_I(inode)->pde;
@@ -94,8 +91,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
94 ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, SLAB_KERNEL); 91 ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, SLAB_KERNEL);
95 if (!ei) 92 if (!ei)
96 return NULL; 93 return NULL;
97 ei->task = NULL; 94 ei->pid = NULL;
98 ei->type = 0; 95 ei->fd = 0;
99 ei->op.proc_get_link = NULL; 96 ei->op.proc_get_link = NULL;
100 ei->pde = NULL; 97 ei->pde = NULL;
101 inode = &ei->vfs_inode; 98 inode = &ei->vfs_inode;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 0502f17b860d..146a434ba944 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -37,16 +37,30 @@ extern int proc_tgid_stat(struct task_struct *, char *);
37extern int proc_pid_status(struct task_struct *, char *); 37extern int proc_pid_status(struct task_struct *, char *);
38extern int proc_pid_statm(struct task_struct *, char *); 38extern int proc_pid_statm(struct task_struct *, char *);
39 39
40extern struct file_operations proc_maps_operations;
41extern struct file_operations proc_numa_maps_operations;
42extern struct file_operations proc_smaps_operations;
43
44extern struct file_operations proc_maps_operations;
45extern struct file_operations proc_numa_maps_operations;
46extern struct file_operations proc_smaps_operations;
47
48
40void free_proc_entry(struct proc_dir_entry *de); 49void free_proc_entry(struct proc_dir_entry *de);
41 50
42int proc_init_inodecache(void); 51int proc_init_inodecache(void);
43 52
44static inline struct task_struct *proc_task(struct inode *inode) 53static inline struct pid *proc_pid(struct inode *inode)
54{
55 return PROC_I(inode)->pid;
56}
57
58static inline struct task_struct *get_proc_task(struct inode *inode)
45{ 59{
46 return PROC_I(inode)->task; 60 return get_pid_task(proc_pid(inode), PIDTYPE_PID);
47} 61}
48 62
49static inline int proc_type(struct inode *inode) 63static inline int proc_fd(struct inode *inode)
50{ 64{
51 return PROC_I(inode)->type; 65 return PROC_I(inode)->fd;
52} 66}
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 17f6e8fa1397..036d14d83627 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -9,7 +9,6 @@
9 * Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com> 9 * Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
10 */ 10 */
11 11
12#include <linux/config.h>
13#include <linux/mm.h> 12#include <linux/mm.h>
14#include <linux/proc_fs.h> 13#include <linux/proc_fs.h>
15#include <linux/user.h> 14#include <linux/user.h>
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 5c10ea157425..9f2cfc30f9cf 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -26,7 +26,6 @@
26#include <linux/mman.h> 26#include <linux/mman.h>
27#include <linux/proc_fs.h> 27#include <linux/proc_fs.h>
28#include <linux/ioport.h> 28#include <linux/ioport.h>
29#include <linux/config.h>
30#include <linux/mm.h> 29#include <linux/mm.h>
31#include <linux/mmzone.h> 30#include <linux/mmzone.h>
32#include <linux/pagemap.h> 31#include <linux/pagemap.h>
@@ -120,7 +119,6 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
120{ 119{
121 struct sysinfo i; 120 struct sysinfo i;
122 int len; 121 int len;
123 struct page_state ps;
124 unsigned long inactive; 122 unsigned long inactive;
125 unsigned long active; 123 unsigned long active;
126 unsigned long free; 124 unsigned long free;
@@ -129,7 +127,6 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
129 struct vmalloc_info vmi; 127 struct vmalloc_info vmi;
130 long cached; 128 long cached;
131 129
132 get_page_state(&ps);
133 get_zone_counts(&active, &inactive, &free); 130 get_zone_counts(&active, &inactive, &free);
134 131
135/* 132/*
@@ -142,7 +139,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
142 allowed = ((totalram_pages - hugetlb_total_pages()) 139 allowed = ((totalram_pages - hugetlb_total_pages())
143 * sysctl_overcommit_ratio / 100) + total_swap_pages; 140 * sysctl_overcommit_ratio / 100) + total_swap_pages;
144 141
145 cached = get_page_cache_size() - total_swapcache_pages - i.bufferram; 142 cached = global_page_state(NR_FILE_PAGES) -
143 total_swapcache_pages - i.bufferram;
146 if (cached < 0) 144 if (cached < 0)
147 cached = 0; 145 cached = 0;
148 146
@@ -167,11 +165,14 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
167 "SwapFree: %8lu kB\n" 165 "SwapFree: %8lu kB\n"
168 "Dirty: %8lu kB\n" 166 "Dirty: %8lu kB\n"
169 "Writeback: %8lu kB\n" 167 "Writeback: %8lu kB\n"
168 "AnonPages: %8lu kB\n"
170 "Mapped: %8lu kB\n" 169 "Mapped: %8lu kB\n"
171 "Slab: %8lu kB\n" 170 "Slab: %8lu kB\n"
171 "PageTables: %8lu kB\n"
172 "NFS Unstable: %8lu kB\n"
173 "Bounce: %8lu kB\n"
172 "CommitLimit: %8lu kB\n" 174 "CommitLimit: %8lu kB\n"
173 "Committed_AS: %8lu kB\n" 175 "Committed_AS: %8lu kB\n"
174 "PageTables: %8lu kB\n"
175 "VmallocTotal: %8lu kB\n" 176 "VmallocTotal: %8lu kB\n"
176 "VmallocUsed: %8lu kB\n" 177 "VmallocUsed: %8lu kB\n"
177 "VmallocChunk: %8lu kB\n", 178 "VmallocChunk: %8lu kB\n",
@@ -188,13 +189,16 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
188 K(i.freeram-i.freehigh), 189 K(i.freeram-i.freehigh),
189 K(i.totalswap), 190 K(i.totalswap),
190 K(i.freeswap), 191 K(i.freeswap),
191 K(ps.nr_dirty), 192 K(global_page_state(NR_FILE_DIRTY)),
192 K(ps.nr_writeback), 193 K(global_page_state(NR_WRITEBACK)),
193 K(ps.nr_mapped), 194 K(global_page_state(NR_ANON_PAGES)),
194 K(ps.nr_slab), 195 K(global_page_state(NR_FILE_MAPPED)),
196 K(global_page_state(NR_SLAB)),
197 K(global_page_state(NR_PAGETABLE)),
198 K(global_page_state(NR_UNSTABLE_NFS)),
199 K(global_page_state(NR_BOUNCE)),
195 K(allowed), 200 K(allowed),
196 K(committed), 201 K(committed),
197 K(ps.nr_page_table_pages),
198 (unsigned long)VMALLOC_TOTAL >> 10, 202 (unsigned long)VMALLOC_TOTAL >> 10,
199 vmi.used >> 10, 203 vmi.used >> 10,
200 vmi.largest_chunk >> 10 204 vmi.largest_chunk >> 10
diff --git a/fs/proc/root.c b/fs/proc/root.c
index c3fd3611112f..8901c65caca8 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -12,7 +12,6 @@
12#include <linux/time.h> 12#include <linux/time.h>
13#include <linux/proc_fs.h> 13#include <linux/proc_fs.h>
14#include <linux/stat.h> 14#include <linux/stat.h>
15#include <linux/config.h>
16#include <linux/init.h> 15#include <linux/init.h>
17#include <linux/module.h> 16#include <linux/module.h>
18#include <linux/bitops.h> 17#include <linux/bitops.h>
@@ -26,10 +25,10 @@ struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc
26struct proc_dir_entry *proc_sys_root; 25struct proc_dir_entry *proc_sys_root;
27#endif 26#endif
28 27
29static struct super_block *proc_get_sb(struct file_system_type *fs_type, 28static int proc_get_sb(struct file_system_type *fs_type,
30 int flags, const char *dev_name, void *data) 29 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
31{ 30{
32 return get_sb_single(fs_type, flags, data, proc_fill_super); 31 return get_sb_single(fs_type, flags, data, proc_fill_super, mnt);
33} 32}
34 33
35static struct file_system_type proc_fs_type = { 34static struct file_system_type proc_fs_type = {
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 91b7c15ab373..0a163a4f7764 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -75,9 +75,13 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount *
75{ 75{
76 struct vm_area_struct * vma; 76 struct vm_area_struct * vma;
77 int result = -ENOENT; 77 int result = -ENOENT;
78 struct task_struct *task = proc_task(inode); 78 struct task_struct *task = get_proc_task(inode);
79 struct mm_struct * mm = get_task_mm(task); 79 struct mm_struct * mm = NULL;
80 80
81 if (task) {
82 mm = get_task_mm(task);
83 put_task_struct(task);
84 }
81 if (!mm) 85 if (!mm)
82 goto out; 86 goto out;
83 down_read(&mm->mmap_sem); 87 down_read(&mm->mmap_sem);
@@ -118,9 +122,15 @@ struct mem_size_stats
118 unsigned long private_dirty; 122 unsigned long private_dirty;
119}; 123};
120 124
125__attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma)
126{
127 return NULL;
128}
129
121static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) 130static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
122{ 131{
123 struct task_struct *task = m->private; 132 struct proc_maps_private *priv = m->private;
133 struct task_struct *task = priv->task;
124 struct vm_area_struct *vma = v; 134 struct vm_area_struct *vma = v;
125 struct mm_struct *mm = vma->vm_mm; 135 struct mm_struct *mm = vma->vm_mm;
126 struct file *file = vma->vm_file; 136 struct file *file = vma->vm_file;
@@ -153,22 +163,23 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats
153 pad_len_spaces(m, len); 163 pad_len_spaces(m, len);
154 seq_path(m, file->f_vfsmnt, file->f_dentry, "\n"); 164 seq_path(m, file->f_vfsmnt, file->f_dentry, "\n");
155 } else { 165 } else {
156 if (mm) { 166 const char *name = arch_vma_name(vma);
157 if (vma->vm_start <= mm->start_brk && 167 if (!name) {
168 if (mm) {
169 if (vma->vm_start <= mm->start_brk &&
158 vma->vm_end >= mm->brk) { 170 vma->vm_end >= mm->brk) {
159 pad_len_spaces(m, len); 171 name = "[heap]";
160 seq_puts(m, "[heap]"); 172 } else if (vma->vm_start <= mm->start_stack &&
161 } else { 173 vma->vm_end >= mm->start_stack) {
162 if (vma->vm_start <= mm->start_stack && 174 name = "[stack]";
163 vma->vm_end >= mm->start_stack) {
164
165 pad_len_spaces(m, len);
166 seq_puts(m, "[stack]");
167 } 175 }
176 } else {
177 name = "[vdso]";
168 } 178 }
169 } else { 179 }
180 if (name) {
170 pad_len_spaces(m, len); 181 pad_len_spaces(m, len);
171 seq_puts(m, "[vdso]"); 182 seq_puts(m, name);
172 } 183 }
173 } 184 }
174 seq_putc(m, '\n'); 185 seq_putc(m, '\n');
@@ -295,12 +306,16 @@ static int show_smap(struct seq_file *m, void *v)
295 306
296static void *m_start(struct seq_file *m, loff_t *pos) 307static void *m_start(struct seq_file *m, loff_t *pos)
297{ 308{
298 struct task_struct *task = m->private; 309 struct proc_maps_private *priv = m->private;
299 unsigned long last_addr = m->version; 310 unsigned long last_addr = m->version;
300 struct mm_struct *mm; 311 struct mm_struct *mm;
301 struct vm_area_struct *vma, *tail_vma; 312 struct vm_area_struct *vma, *tail_vma = NULL;
302 loff_t l = *pos; 313 loff_t l = *pos;
303 314
315 /* Clear the per syscall fields in priv */
316 priv->task = NULL;
317 priv->tail_vma = NULL;
318
304 /* 319 /*
305 * We remember last_addr rather than next_addr to hit with 320 * We remember last_addr rather than next_addr to hit with
306 * mmap_cache most of the time. We have zero last_addr at 321 * mmap_cache most of the time. We have zero last_addr at
@@ -311,11 +326,15 @@ static void *m_start(struct seq_file *m, loff_t *pos)
311 if (last_addr == -1UL) 326 if (last_addr == -1UL)
312 return NULL; 327 return NULL;
313 328
314 mm = get_task_mm(task); 329 priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
330 if (!priv->task)
331 return NULL;
332
333 mm = get_task_mm(priv->task);
315 if (!mm) 334 if (!mm)
316 return NULL; 335 return NULL;
317 336
318 tail_vma = get_gate_vma(task); 337 priv->tail_vma = tail_vma = get_gate_vma(priv->task);
319 down_read(&mm->mmap_sem); 338 down_read(&mm->mmap_sem);
320 339
321 /* Start with last addr hint */ 340 /* Start with last addr hint */
@@ -350,11 +369,9 @@ out:
350 return tail_vma; 369 return tail_vma;
351} 370}
352 371
353static void m_stop(struct seq_file *m, void *v) 372static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
354{ 373{
355 struct task_struct *task = m->private; 374 if (vma && vma != priv->tail_vma) {
356 struct vm_area_struct *vma = v;
357 if (vma && vma != get_gate_vma(task)) {
358 struct mm_struct *mm = vma->vm_mm; 375 struct mm_struct *mm = vma->vm_mm;
359 up_read(&mm->mmap_sem); 376 up_read(&mm->mmap_sem);
360 mmput(mm); 377 mmput(mm);
@@ -363,38 +380,103 @@ static void m_stop(struct seq_file *m, void *v)
363 380
364static void *m_next(struct seq_file *m, void *v, loff_t *pos) 381static void *m_next(struct seq_file *m, void *v, loff_t *pos)
365{ 382{
366 struct task_struct *task = m->private; 383 struct proc_maps_private *priv = m->private;
367 struct vm_area_struct *vma = v; 384 struct vm_area_struct *vma = v;
368 struct vm_area_struct *tail_vma = get_gate_vma(task); 385 struct vm_area_struct *tail_vma = priv->tail_vma;
369 386
370 (*pos)++; 387 (*pos)++;
371 if (vma && (vma != tail_vma) && vma->vm_next) 388 if (vma && (vma != tail_vma) && vma->vm_next)
372 return vma->vm_next; 389 return vma->vm_next;
373 m_stop(m, v); 390 vma_stop(priv, vma);
374 return (vma != tail_vma)? tail_vma: NULL; 391 return (vma != tail_vma)? tail_vma: NULL;
375} 392}
376 393
377struct seq_operations proc_pid_maps_op = { 394static void m_stop(struct seq_file *m, void *v)
395{
396 struct proc_maps_private *priv = m->private;
397 struct vm_area_struct *vma = v;
398
399 vma_stop(priv, vma);
400 if (priv->task)
401 put_task_struct(priv->task);
402}
403
404static struct seq_operations proc_pid_maps_op = {
378 .start = m_start, 405 .start = m_start,
379 .next = m_next, 406 .next = m_next,
380 .stop = m_stop, 407 .stop = m_stop,
381 .show = show_map 408 .show = show_map
382}; 409};
383 410
384struct seq_operations proc_pid_smaps_op = { 411static struct seq_operations proc_pid_smaps_op = {
385 .start = m_start, 412 .start = m_start,
386 .next = m_next, 413 .next = m_next,
387 .stop = m_stop, 414 .stop = m_stop,
388 .show = show_smap 415 .show = show_smap
389}; 416};
390 417
418static int do_maps_open(struct inode *inode, struct file *file,
419 struct seq_operations *ops)
420{
421 struct proc_maps_private *priv;
422 int ret = -ENOMEM;
423 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
424 if (priv) {
425 priv->pid = proc_pid(inode);
426 ret = seq_open(file, ops);
427 if (!ret) {
428 struct seq_file *m = file->private_data;
429 m->private = priv;
430 } else {
431 kfree(priv);
432 }
433 }
434 return ret;
435}
436
437static int maps_open(struct inode *inode, struct file *file)
438{
439 return do_maps_open(inode, file, &proc_pid_maps_op);
440}
441
442struct file_operations proc_maps_operations = {
443 .open = maps_open,
444 .read = seq_read,
445 .llseek = seq_lseek,
446 .release = seq_release_private,
447};
448
391#ifdef CONFIG_NUMA 449#ifdef CONFIG_NUMA
392extern int show_numa_map(struct seq_file *m, void *v); 450extern int show_numa_map(struct seq_file *m, void *v);
393 451
394struct seq_operations proc_pid_numa_maps_op = { 452static struct seq_operations proc_pid_numa_maps_op = {
395 .start = m_start, 453 .start = m_start,
396 .next = m_next, 454 .next = m_next,
397 .stop = m_stop, 455 .stop = m_stop,
398 .show = show_numa_map 456 .show = show_numa_map
399}; 457};
458
459static int numa_maps_open(struct inode *inode, struct file *file)
460{
461 return do_maps_open(inode, file, &proc_pid_numa_maps_op);
462}
463
464struct file_operations proc_numa_maps_operations = {
465 .open = numa_maps_open,
466 .read = seq_read,
467 .llseek = seq_lseek,
468 .release = seq_release_private,
469};
400#endif 470#endif
471
472static int smaps_open(struct inode *inode, struct file *file)
473{
474 return do_maps_open(inode, file, &proc_pid_smaps_op);
475}
476
477struct file_operations proc_smaps_operations = {
478 .open = smaps_open,
479 .read = seq_read,
480 .llseek = seq_lseek,
481 .release = seq_release_private,
482};
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 8f68827ed10e..af69f28277b6 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -156,9 +156,28 @@ static void *m_next(struct seq_file *m, void *v, loff_t *pos)
156{ 156{
157 return NULL; 157 return NULL;
158} 158}
159struct seq_operations proc_pid_maps_op = { 159static struct seq_operations proc_pid_maps_op = {
160 .start = m_start, 160 .start = m_start,
161 .next = m_next, 161 .next = m_next,
162 .stop = m_stop, 162 .stop = m_stop,
163 .show = show_map 163 .show = show_map
164}; 164};
165
166static int maps_open(struct inode *inode, struct file *file)
167{
168 int ret;
169 ret = seq_open(file, &proc_pid_maps_op);
170 if (!ret) {
171 struct seq_file *m = file->private_data;
172 m->private = NULL;
173 }
174 return ret;
175}
176
177struct file_operations proc_maps_operations = {
178 .open = maps_open,
179 .read = seq_read,
180 .llseek = seq_lseek,
181 .release = seq_release,
182};
183
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 20d4b2237fce..d96050728c43 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -7,7 +7,6 @@
7 * 7 *
8 */ 8 */
9 9
10#include <linux/config.h>
11#include <linux/mm.h> 10#include <linux/mm.h>
12#include <linux/proc_fs.h> 11#include <linux/proc_fs.h>
13#include <linux/user.h> 12#include <linux/user.h>
diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c
index 46efbf52cbec..8425cf6e9624 100644
--- a/fs/qnx4/bitmap.c
+++ b/fs/qnx4/bitmap.c
@@ -13,7 +13,6 @@
13 * 28-06-1998 by Frank Denis : qnx4_free_inode (to be fixed) . 13 * 28-06-1998 by Frank Denis : qnx4_free_inode (to be fixed) .
14 */ 14 */
15 15
16#include <linux/config.h>
17#include <linux/time.h> 16#include <linux/time.h>
18#include <linux/fs.h> 17#include <linux/fs.h>
19#include <linux/qnx4_fs.h> 18#include <linux/qnx4_fs.h>
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index 9031948fefd0..0d7103fa0df5 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -11,7 +11,6 @@
11 * 20-06-1998 by Frank Denis : Linux 2.1.99+ & dcache support. 11 * 20-06-1998 by Frank Denis : Linux 2.1.99+ & dcache support.
12 */ 12 */
13 13
14#include <linux/config.h>
15#include <linux/string.h> 14#include <linux/string.h>
16#include <linux/errno.h> 15#include <linux/errno.h>
17#include <linux/fs.h> 16#include <linux/fs.h>
diff --git a/fs/qnx4/fsync.c b/fs/qnx4/fsync.c
index df5bc75d5414..aa3b19544bee 100644
--- a/fs/qnx4/fsync.c
+++ b/fs/qnx4/fsync.c
@@ -10,7 +10,6 @@
10 * 24-03-1998 by Richard Frowijn : first release. 10 * 24-03-1998 by Richard Frowijn : first release.
11 */ 11 */
12 12
13#include <linux/config.h>
14#include <linux/errno.h> 13#include <linux/errno.h>
15#include <linux/time.h> 14#include <linux/time.h>
16#include <linux/stat.h> 15#include <linux/stat.h>
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 2ecd46f85e9f..5a903491e697 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -12,7 +12,6 @@
12 * 30-06-1998 by Frank Denis : first step to write inodes. 12 * 30-06-1998 by Frank Denis : first step to write inodes.
13 */ 13 */
14 14
15#include <linux/config.h>
16#include <linux/module.h> 15#include <linux/module.h>
17#include <linux/types.h> 16#include <linux/types.h>
18#include <linux/string.h> 17#include <linux/string.h>
@@ -128,7 +127,7 @@ static struct inode *qnx4_alloc_inode(struct super_block *sb);
128static void qnx4_destroy_inode(struct inode *inode); 127static void qnx4_destroy_inode(struct inode *inode);
129static void qnx4_read_inode(struct inode *); 128static void qnx4_read_inode(struct inode *);
130static int qnx4_remount(struct super_block *sb, int *flags, char *data); 129static int qnx4_remount(struct super_block *sb, int *flags, char *data);
131static int qnx4_statfs(struct super_block *, struct kstatfs *); 130static int qnx4_statfs(struct dentry *, struct kstatfs *);
132 131
133static struct super_operations qnx4_sops = 132static struct super_operations qnx4_sops =
134{ 133{
@@ -282,8 +281,10 @@ unsigned long qnx4_block_map( struct inode *inode, long iblock )
282 return block; 281 return block;
283} 282}
284 283
285static int qnx4_statfs(struct super_block *sb, struct kstatfs *buf) 284static int qnx4_statfs(struct dentry *dentry, struct kstatfs *buf)
286{ 285{
286 struct super_block *sb = dentry->d_sb;
287
287 lock_kernel(); 288 lock_kernel();
288 289
289 buf->f_type = sb->s_magic; 290 buf->f_type = sb->s_magic;
@@ -448,7 +449,7 @@ static sector_t qnx4_bmap(struct address_space *mapping, sector_t block)
448{ 449{
449 return generic_block_bmap(mapping,block,qnx4_get_block); 450 return generic_block_bmap(mapping,block,qnx4_get_block);
450} 451}
451static struct address_space_operations qnx4_aops = { 452static const struct address_space_operations qnx4_aops = {
452 .readpage = qnx4_readpage, 453 .readpage = qnx4_readpage,
453 .writepage = qnx4_writepage, 454 .writepage = qnx4_writepage,
454 .sync_page = block_sync_page, 455 .sync_page = block_sync_page,
@@ -561,10 +562,11 @@ static void destroy_inodecache(void)
561 "qnx4_inode_cache: not all structures were freed\n"); 562 "qnx4_inode_cache: not all structures were freed\n");
562} 563}
563 564
564static struct super_block *qnx4_get_sb(struct file_system_type *fs_type, 565static int qnx4_get_sb(struct file_system_type *fs_type,
565 int flags, const char *dev_name, void *data) 566 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
566{ 567{
567 return get_sb_bdev(fs_type, flags, dev_name, data, qnx4_fill_super); 568 return get_sb_bdev(fs_type, flags, dev_name, data, qnx4_fill_super,
569 mnt);
568} 570}
569 571
570static struct file_system_type qnx4_fs_type = { 572static struct file_system_type qnx4_fs_type = {
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index 4af4951d7f54..c3d83f67154a 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -12,7 +12,6 @@
12 * 04-07-1998 by Frank Denis : first step for rmdir/unlink. 12 * 04-07-1998 by Frank Denis : first step for rmdir/unlink.
13 */ 13 */
14 14
15#include <linux/config.h>
16#include <linux/time.h> 15#include <linux/time.h>
17#include <linux/fs.h> 16#include <linux/fs.h>
18#include <linux/qnx4_fs.h> 17#include <linux/qnx4_fs.h>
diff --git a/fs/qnx4/truncate.c b/fs/qnx4/truncate.c
index 86563ec01b39..6437c1c3d1dd 100644
--- a/fs/qnx4/truncate.c
+++ b/fs/qnx4/truncate.c
@@ -10,7 +10,6 @@
10 * 30-06-1998 by Frank DENIS : ugly filler. 10 * 30-06-1998 by Frank DENIS : ugly filler.
11 */ 11 */
12 12
13#include <linux/config.h>
14#include <linux/types.h> 13#include <linux/types.h>
15#include <linux/errno.h> 14#include <linux/errno.h>
16#include <linux/fs.h> 15#include <linux/fs.h>
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 00a933eb820c..86f14cacf641 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -26,7 +26,7 @@
26 26
27#include <linux/fs.h> 27#include <linux/fs.h>
28 28
29struct address_space_operations ramfs_aops = { 29const struct address_space_operations ramfs_aops = {
30 .readpage = simple_readpage, 30 .readpage = simple_readpage,
31 .prepare_write = simple_prepare_write, 31 .prepare_write = simple_prepare_write,
32 .commit_write = simple_commit_write 32 .commit_write = simple_commit_write
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index f443a84b98a5..99fffc9e1bfd 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -27,7 +27,7 @@
27 27
28static int ramfs_nommu_setattr(struct dentry *, struct iattr *); 28static int ramfs_nommu_setattr(struct dentry *, struct iattr *);
29 29
30struct address_space_operations ramfs_aops = { 30const struct address_space_operations ramfs_aops = {
31 .readpage = simple_readpage, 31 .readpage = simple_readpage,
32 .prepare_write = simple_prepare_write, 32 .prepare_write = simple_prepare_write,
33 .commit_write = simple_commit_write 33 .commit_write = simple_commit_write
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 14bd2246fb6d..b9677335cc8d 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -185,16 +185,17 @@ static int ramfs_fill_super(struct super_block * sb, void * data, int silent)
185 return 0; 185 return 0;
186} 186}
187 187
188struct super_block *ramfs_get_sb(struct file_system_type *fs_type, 188int ramfs_get_sb(struct file_system_type *fs_type,
189 int flags, const char *dev_name, void *data) 189 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
190{ 190{
191 return get_sb_nodev(fs_type, flags, data, ramfs_fill_super); 191 return get_sb_nodev(fs_type, flags, data, ramfs_fill_super, mnt);
192} 192}
193 193
194static struct super_block *rootfs_get_sb(struct file_system_type *fs_type, 194static int rootfs_get_sb(struct file_system_type *fs_type,
195 int flags, const char *dev_name, void *data) 195 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
196{ 196{
197 return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super); 197 return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super,
198 mnt);
198} 199}
199 200
200static struct file_system_type ramfs_fs_type = { 201static struct file_system_type ramfs_fs_type = {
diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h
index 313237631b49..c2bb58e74653 100644
--- a/fs/ramfs/internal.h
+++ b/fs/ramfs/internal.h
@@ -10,6 +10,6 @@
10 */ 10 */
11 11
12 12
13extern struct address_space_operations ramfs_aops; 13extern const struct address_space_operations ramfs_aops;
14extern const struct file_operations ramfs_file_operations; 14extern const struct file_operations ramfs_file_operations;
15extern struct inode_operations ramfs_file_inode_operations; 15extern struct inode_operations ramfs_file_inode_operations;
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 909f71e9a30f..4a7dbdee1b6d 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -3,7 +3,6 @@
3 */ 3 */
4/* Reiserfs block (de)allocator, bitmap-based. */ 4/* Reiserfs block (de)allocator, bitmap-based. */
5 5
6#include <linux/config.h>
7#include <linux/time.h> 6#include <linux/time.h>
8#include <linux/reiserfs_fs.h> 7#include <linux/reiserfs_fs.h>
9#include <linux/errno.h> 8#include <linux/errno.h>
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 973c819f8033..9aabcc0ccd2d 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -2,7 +2,6 @@
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5#include <linux/config.h>
6#include <linux/string.h> 5#include <linux/string.h>
7#include <linux/errno.h> 6#include <linux/errno.h>
8#include <linux/fs.h> 7#include <linux/fs.h>
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index b2264ba3cc56..fba304e64de8 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -15,7 +15,6 @@
15 ** 15 **
16 **/ 16 **/
17 17
18#include <linux/config.h>
19#include <asm/uaccess.h> 18#include <asm/uaccess.h>
20#include <linux/time.h> 19#include <linux/time.h>
21#include <linux/reiserfs_fs.h> 20#include <linux/reiserfs_fs.h>
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index cf6e1cf40351..752cea12e30f 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1560,12 +1560,6 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t
1560 return res; 1560 return res;
1561} 1561}
1562 1562
1563static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user * buf,
1564 size_t count, loff_t pos)
1565{
1566 return generic_file_aio_write(iocb, buf, count, pos);
1567}
1568
1569const struct file_operations reiserfs_file_operations = { 1563const struct file_operations reiserfs_file_operations = {
1570 .read = generic_file_read, 1564 .read = generic_file_read,
1571 .write = reiserfs_file_write, 1565 .write = reiserfs_file_write,
@@ -1575,7 +1569,7 @@ const struct file_operations reiserfs_file_operations = {
1575 .fsync = reiserfs_sync_file, 1569 .fsync = reiserfs_sync_file,
1576 .sendfile = generic_file_sendfile, 1570 .sendfile = generic_file_sendfile,
1577 .aio_read = generic_file_aio_read, 1571 .aio_read = generic_file_aio_read,
1578 .aio_write = reiserfs_aio_write, 1572 .aio_write = generic_file_aio_write,
1579 .splice_read = generic_file_splice_read, 1573 .splice_read = generic_file_splice_read,
1580 .splice_write = generic_file_splice_write, 1574 .splice_write = generic_file_splice_write,
1581}; 1575};
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 5600d3d60cf7..6d0e554daa9d 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -34,7 +34,6 @@
34 ** 34 **
35 **/ 35 **/
36 36
37#include <linux/config.h>
38#include <linux/time.h> 37#include <linux/time.h>
39#include <linux/string.h> 38#include <linux/string.h>
40#include <linux/reiserfs_fs.h> 39#include <linux/reiserfs_fs.h>
diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c
index 6c5a726fd34b..de391a82b999 100644
--- a/fs/reiserfs/ibalance.c
+++ b/fs/reiserfs/ibalance.c
@@ -2,7 +2,6 @@
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5#include <linux/config.h>
6#include <asm/uaccess.h> 5#include <asm/uaccess.h>
7#include <linux/string.h> 6#include <linux/string.h>
8#include <linux/time.h> 7#include <linux/time.h>
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 9857e50f85e7..12dfdcfbee3d 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2,7 +2,6 @@
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5#include <linux/config.h>
6#include <linux/time.h> 5#include <linux/time.h>
7#include <linux/fs.h> 6#include <linux/fs.h>
8#include <linux/reiserfs_fs.h> 7#include <linux/reiserfs_fs.h>
@@ -2933,6 +2932,11 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
2933 } 2932 }
2934 if (error) 2933 if (error)
2935 goto out; 2934 goto out;
2935 /*
2936 * file size is changed, ctime and mtime are
2937 * to be updated
2938 */
2939 attr->ia_valid |= (ATTR_MTIME | ATTR_CTIME);
2936 } 2940 }
2937 } 2941 }
2938 2942
@@ -2996,7 +3000,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
2996 return error; 3000 return error;
2997} 3001}
2998 3002
2999struct address_space_operations reiserfs_address_space_operations = { 3003const struct address_space_operations reiserfs_address_space_operations = {
3000 .writepage = reiserfs_writepage, 3004 .writepage = reiserfs_writepage,
3001 .readpage = reiserfs_readpage, 3005 .readpage = reiserfs_readpage,
3002 .readpages = reiserfs_readpages, 3006 .readpages = reiserfs_readpages,
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 1b73529b8099..9b3672d69367 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -34,7 +34,6 @@
34** from within kupdate, it will ignore the immediate flag 34** from within kupdate, it will ignore the immediate flag
35*/ 35*/
36 36
37#include <linux/config.h>
38#include <asm/uaccess.h> 37#include <asm/uaccess.h>
39#include <asm/system.h> 38#include <asm/system.h>
40 39
@@ -834,8 +833,7 @@ static int write_ordered_buffers(spinlock_t * lock,
834 get_bh(bh); 833 get_bh(bh);
835 if (test_set_buffer_locked(bh)) { 834 if (test_set_buffer_locked(bh)) {
836 if (!buffer_dirty(bh)) { 835 if (!buffer_dirty(bh)) {
837 list_del_init(&jh->list); 836 list_move(&jh->list, &tmp);
838 list_add(&jh->list, &tmp);
839 goto loop_next; 837 goto loop_next;
840 } 838 }
841 spin_unlock(lock); 839 spin_unlock(lock);
@@ -855,8 +853,7 @@ static int write_ordered_buffers(spinlock_t * lock,
855 ret = -EIO; 853 ret = -EIO;
856 } 854 }
857 if (buffer_dirty(bh)) { 855 if (buffer_dirty(bh)) {
858 list_del_init(&jh->list); 856 list_move(&jh->list, &tmp);
859 list_add(&jh->list, &tmp);
860 add_to_chunk(&chunk, bh, lock, write_ordered_chunk); 857 add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
861 } else { 858 } else {
862 reiserfs_free_jh(bh); 859 reiserfs_free_jh(bh);
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index 2533c1f64aba..281f8061ac58 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -2,7 +2,6 @@
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5#include <linux/config.h>
6#include <asm/uaccess.h> 5#include <asm/uaccess.h>
7#include <linux/string.h> 6#include <linux/string.h>
8#include <linux/time.h> 7#include <linux/time.h>
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 284f7852de8b..c61710e49c62 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -11,7 +11,6 @@
11 * NO WARRANTY 11 * NO WARRANTY
12 */ 12 */
13 13
14#include <linux/config.h>
15#include <linux/time.h> 14#include <linux/time.h>
16#include <linux/bitops.h> 15#include <linux/bitops.h>
17#include <linux/reiserfs_fs.h> 16#include <linux/reiserfs_fs.h>
diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c
index f62590aa9c95..65feba4deb69 100644
--- a/fs/reiserfs/objectid.c
+++ b/fs/reiserfs/objectid.c
@@ -2,7 +2,6 @@
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5#include <linux/config.h>
6#include <linux/string.h> 5#include <linux/string.h>
7#include <linux/random.h> 6#include <linux/random.h>
8#include <linux/time.h> 7#include <linux/time.h>
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 27bd3a1df2ad..bc808a91eeaa 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -2,7 +2,6 @@
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5#include <linux/config.h>
6#include <linux/time.h> 5#include <linux/time.h>
7#include <linux/fs.h> 6#include <linux/fs.h>
8#include <linux/reiserfs_fs.h> 7#include <linux/reiserfs_fs.h>
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 731688e1cfe3..5d8a8cfebc70 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -10,7 +10,6 @@
10 10
11/* $Id: procfs.c,v 1.1.8.2 2001/07/15 17:08:42 god Exp $ */ 11/* $Id: procfs.c,v 1.1.8.2 2001/07/15 17:08:42 god Exp $ */
12 12
13#include <linux/config.h>
14#include <linux/module.h> 13#include <linux/module.h>
15#include <linux/time.h> 14#include <linux/time.h>
16#include <linux/seq_file.h> 15#include <linux/seq_file.h>
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index d2b25e1ba6e9..8b9b13127136 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -49,7 +49,6 @@
49 * reiserfs_insert_item 49 * reiserfs_insert_item
50 */ 50 */
51 51
52#include <linux/config.h>
53#include <linux/time.h> 52#include <linux/time.h>
54#include <linux/string.h> 53#include <linux/string.h>
55#include <linux/pagemap.h> 54#include <linux/pagemap.h>
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index cae2abbc0c71..28eb3c886034 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -11,7 +11,6 @@
11 * NO WARRANTY 11 * NO WARRANTY
12 */ 12 */
13 13
14#include <linux/config.h>
15#include <linux/module.h> 14#include <linux/module.h>
16#include <linux/vmalloc.h> 15#include <linux/vmalloc.h>
17#include <linux/time.h> 16#include <linux/time.h>
@@ -60,7 +59,7 @@ static int is_any_reiserfs_magic_string(struct reiserfs_super_block *rs)
60} 59}
61 60
62static int reiserfs_remount(struct super_block *s, int *flags, char *data); 61static int reiserfs_remount(struct super_block *s, int *flags, char *data);
63static int reiserfs_statfs(struct super_block *s, struct kstatfs *buf); 62static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf);
64 63
65static int reiserfs_sync_fs(struct super_block *s, int wait) 64static int reiserfs_sync_fs(struct super_block *s, int wait)
66{ 65{
@@ -1938,15 +1937,15 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1938 return errval; 1937 return errval;
1939} 1938}
1940 1939
1941static int reiserfs_statfs(struct super_block *s, struct kstatfs *buf) 1940static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1942{ 1941{
1943 struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s); 1942 struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(dentry->d_sb);
1944 1943
1945 buf->f_namelen = (REISERFS_MAX_NAME(s->s_blocksize)); 1944 buf->f_namelen = (REISERFS_MAX_NAME(s->s_blocksize));
1946 buf->f_bfree = sb_free_blocks(rs); 1945 buf->f_bfree = sb_free_blocks(rs);
1947 buf->f_bavail = buf->f_bfree; 1946 buf->f_bavail = buf->f_bfree;
1948 buf->f_blocks = sb_block_count(rs) - sb_bmap_nr(rs) - 1; 1947 buf->f_blocks = sb_block_count(rs) - sb_bmap_nr(rs) - 1;
1949 buf->f_bsize = s->s_blocksize; 1948 buf->f_bsize = dentry->d_sb->s_blocksize;
1950 /* changed to accommodate gcc folks. */ 1949 /* changed to accommodate gcc folks. */
1951 buf->f_type = REISERFS_SUPER_MAGIC; 1950 buf->f_type = REISERFS_SUPER_MAGIC;
1952 return 0; 1951 return 0;
@@ -2249,11 +2248,12 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
2249 2248
2250#endif 2249#endif
2251 2250
2252static struct super_block *get_super_block(struct file_system_type *fs_type, 2251static int get_super_block(struct file_system_type *fs_type,
2253 int flags, const char *dev_name, 2252 int flags, const char *dev_name,
2254 void *data) 2253 void *data, struct vfsmount *mnt)
2255{ 2254{
2256 return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super); 2255 return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super,
2256 mnt);
2257} 2257}
2258 2258
2259static int __init init_reiserfs_fs(void) 2259static int __init init_reiserfs_fs(void)
diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c
index 196e971c03c9..36f108fc1cf5 100644
--- a/fs/reiserfs/tail_conversion.c
+++ b/fs/reiserfs/tail_conversion.c
@@ -2,7 +2,6 @@
2 * Copyright 1999 Hans Reiser, see reiserfs/README for licensing and copyright details 2 * Copyright 1999 Hans Reiser, see reiserfs/README for licensing and copyright details
3 */ 3 */
4 4
5#include <linux/config.h>
6#include <linux/time.h> 5#include <linux/time.h>
7#include <linux/pagemap.h> 6#include <linux/pagemap.h>
8#include <linux/buffer_head.h> 7#include <linux/buffer_head.h>
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index ffb79c48c5bf..39fedaa88a0c 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -452,8 +452,7 @@ static struct page *reiserfs_get_page(struct inode *dir, unsigned long n)
452 /* We can deadlock if we try to free dentries, 452 /* We can deadlock if we try to free dentries,
453 and an unlink/rmdir has just occured - GFP_NOFS avoids this */ 453 and an unlink/rmdir has just occured - GFP_NOFS avoids this */
454 mapping_set_gfp_mask(mapping, GFP_NOFS); 454 mapping_set_gfp_mask(mapping, GFP_NOFS);
455 page = read_cache_page(mapping, n, 455 page = read_mapping_page(mapping, n, NULL);
456 (filler_t *) mapping->a_ops->readpage, NULL);
457 if (!IS_ERR(page)) { 456 if (!IS_ERR(page)) {
458 wait_on_page_locked(page); 457 wait_on_page_locked(page);
459 kmap(page); 458 kmap(page);
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 9b9eda7b335c..22eed61ebf69 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -179,12 +179,12 @@ outnobh:
179/* That's simple too. */ 179/* That's simple too. */
180 180
181static int 181static int
182romfs_statfs(struct super_block *sb, struct kstatfs *buf) 182romfs_statfs(struct dentry *dentry, struct kstatfs *buf)
183{ 183{
184 buf->f_type = ROMFS_MAGIC; 184 buf->f_type = ROMFS_MAGIC;
185 buf->f_bsize = ROMBSIZE; 185 buf->f_bsize = ROMBSIZE;
186 buf->f_bfree = buf->f_bavail = buf->f_ffree; 186 buf->f_bfree = buf->f_bavail = buf->f_ffree;
187 buf->f_blocks = (romfs_maxsize(sb)+ROMBSIZE-1)>>ROMBSBITS; 187 buf->f_blocks = (romfs_maxsize(dentry->d_sb)+ROMBSIZE-1)>>ROMBSBITS;
188 buf->f_namelen = ROMFS_MAXFN; 188 buf->f_namelen = ROMFS_MAXFN;
189 return 0; 189 return 0;
190} 190}
@@ -459,7 +459,7 @@ err_out:
459 459
460/* Mapping from our types to the kernel */ 460/* Mapping from our types to the kernel */
461 461
462static struct address_space_operations romfs_aops = { 462static const struct address_space_operations romfs_aops = {
463 .readpage = romfs_readpage 463 .readpage = romfs_readpage
464}; 464};
465 465
@@ -607,10 +607,11 @@ static struct super_operations romfs_ops = {
607 .remount_fs = romfs_remount, 607 .remount_fs = romfs_remount,
608}; 608};
609 609
610static struct super_block *romfs_get_sb(struct file_system_type *fs_type, 610static int romfs_get_sb(struct file_system_type *fs_type,
611 int flags, const char *dev_name, void *data) 611 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
612{ 612{
613 return get_sb_bdev(fs_type, flags, dev_name, data, romfs_fill_super); 613 return get_sb_bdev(fs_type, flags, dev_name, data, romfs_fill_super,
614 mnt);
614} 615}
615 616
616static struct file_system_type romfs_fs_type = { 617static struct file_system_type romfs_fs_type = {
diff --git a/fs/select.c b/fs/select.c
index a8109baa5e46..33b72ba0f86f 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -546,37 +546,38 @@ struct poll_list {
546 546
547#define POLLFD_PER_PAGE ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd)) 547#define POLLFD_PER_PAGE ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))
548 548
549static void do_pollfd(unsigned int num, struct pollfd * fdpage, 549/*
550 poll_table ** pwait, int *count) 550 * Fish for pollable events on the pollfd->fd file descriptor. We're only
551 * interested in events matching the pollfd->events mask, and the result
552 * matching that mask is both recorded in pollfd->revents and returned. The
553 * pwait poll_table will be used by the fd-provided poll handler for waiting,
554 * if non-NULL.
555 */
556static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
551{ 557{
552 int i; 558 unsigned int mask;
553 559 int fd;
554 for (i = 0; i < num; i++) { 560
555 int fd; 561 mask = 0;
556 unsigned int mask; 562 fd = pollfd->fd;
557 struct pollfd *fdp; 563 if (fd >= 0) {
558 564 int fput_needed;
559 mask = 0; 565 struct file * file;
560 fdp = fdpage+i; 566
561 fd = fdp->fd; 567 file = fget_light(fd, &fput_needed);
562 if (fd >= 0) { 568 mask = POLLNVAL;
563 int fput_needed; 569 if (file != NULL) {
564 struct file * file = fget_light(fd, &fput_needed); 570 mask = DEFAULT_POLLMASK;
565 mask = POLLNVAL; 571 if (file->f_op && file->f_op->poll)
566 if (file != NULL) { 572 mask = file->f_op->poll(file, pwait);
567 mask = DEFAULT_POLLMASK; 573 /* Mask out unneeded events. */
568 if (file->f_op && file->f_op->poll) 574 mask &= pollfd->events | POLLERR | POLLHUP;
569 mask = file->f_op->poll(file, *pwait); 575 fput_light(file, fput_needed);
570 mask &= fdp->events | POLLERR | POLLHUP;
571 fput_light(file, fput_needed);
572 }
573 if (mask) {
574 *pwait = NULL;
575 (*count)++;
576 }
577 } 576 }
578 fdp->revents = mask;
579 } 577 }
578 pollfd->revents = mask;
579
580 return mask;
580} 581}
581 582
582static int do_poll(unsigned int nfds, struct poll_list *list, 583static int do_poll(unsigned int nfds, struct poll_list *list,
@@ -594,11 +595,29 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
594 long __timeout; 595 long __timeout;
595 596
596 set_current_state(TASK_INTERRUPTIBLE); 597 set_current_state(TASK_INTERRUPTIBLE);
597 walk = list; 598 for (walk = list; walk != NULL; walk = walk->next) {
598 while(walk != NULL) { 599 struct pollfd * pfd, * pfd_end;
599 do_pollfd( walk->len, walk->entries, &pt, &count); 600
600 walk = walk->next; 601 pfd = walk->entries;
602 pfd_end = pfd + walk->len;
603 for (; pfd != pfd_end; pfd++) {
604 /*
605 * Fish for events. If we found one, record it
606 * and kill the poll_table, so we don't
607 * needlessly register any other waiters after
608 * this. They'll get immediately deregistered
609 * when we break out and return.
610 */
611 if (do_pollfd(pfd, pt)) {
612 count++;
613 pt = NULL;
614 }
615 }
601 } 616 }
617 /*
618 * All waiters have already been registered, so don't provide
619 * a poll_table to them on the next loop iteration.
620 */
602 pt = NULL; 621 pt = NULL;
603 if (count || !*timeout || signal_pending(current)) 622 if (count || !*timeout || signal_pending(current))
604 break; 623 break;
@@ -727,9 +746,9 @@ out_fds:
727asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, 746asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
728 long timeout_msecs) 747 long timeout_msecs)
729{ 748{
730 s64 timeout_jiffies = 0; 749 s64 timeout_jiffies;
731 750
732 if (timeout_msecs) { 751 if (timeout_msecs > 0) {
733#if HZ > 1000 752#if HZ > 1000
734 /* We can only overflow if HZ > 1000 */ 753 /* We can only overflow if HZ > 1000 */
735 if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ) 754 if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ)
@@ -737,6 +756,9 @@ asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
737 else 756 else
738#endif 757#endif
739 timeout_jiffies = msecs_to_jiffies(timeout_msecs); 758 timeout_jiffies = msecs_to_jiffies(timeout_msecs);
759 } else {
760 /* Infinite (< 0) or no (0) timeout */
761 timeout_jiffies = timeout_msecs;
740 } 762 }
741 763
742 return do_sys_poll(ufds, nfds, &timeout_jiffies); 764 return do_sys_poll(ufds, nfds, &timeout_jiffies);
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index ed9a24d19d7d..dae67048baba 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -306,7 +306,7 @@ static int smb_commit_write(struct file *file, struct page *page,
306 return status; 306 return status;
307} 307}
308 308
309struct address_space_operations smb_file_aops = { 309const struct address_space_operations smb_file_aops = {
310 .readpage = smb_readpage, 310 .readpage = smb_readpage,
311 .writepage = smb_writepage, 311 .writepage = smb_writepage,
312 .prepare_write = smb_prepare_write, 312 .prepare_write = smb_prepare_write,
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index fdeabc0a34f7..a1ed657c3c84 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -7,7 +7,6 @@
7 * Please add a note about your changes to smbfs in the ChangeLog file. 7 * Please add a note about your changes to smbfs in the ChangeLog file.
8 */ 8 */
9 9
10#include <linux/config.h>
11#include <linux/module.h> 10#include <linux/module.h>
12#include <linux/time.h> 11#include <linux/time.h>
13#include <linux/kernel.h> 12#include <linux/kernel.h>
@@ -48,7 +47,7 @@
48 47
49static void smb_delete_inode(struct inode *); 48static void smb_delete_inode(struct inode *);
50static void smb_put_super(struct super_block *); 49static void smb_put_super(struct super_block *);
51static int smb_statfs(struct super_block *, struct kstatfs *); 50static int smb_statfs(struct dentry *, struct kstatfs *);
52static int smb_show_options(struct seq_file *, struct vfsmount *); 51static int smb_show_options(struct seq_file *, struct vfsmount *);
53 52
54static kmem_cache_t *smb_inode_cachep; 53static kmem_cache_t *smb_inode_cachep;
@@ -641,13 +640,13 @@ out_no_server:
641} 640}
642 641
643static int 642static int
644smb_statfs(struct super_block *sb, struct kstatfs *buf) 643smb_statfs(struct dentry *dentry, struct kstatfs *buf)
645{ 644{
646 int result; 645 int result;
647 646
648 lock_kernel(); 647 lock_kernel();
649 648
650 result = smb_proc_dskattr(sb, buf); 649 result = smb_proc_dskattr(dentry, buf);
651 650
652 unlock_kernel(); 651 unlock_kernel();
653 652
@@ -782,10 +781,10 @@ out:
782 return error; 781 return error;
783} 782}
784 783
785static struct super_block *smb_get_sb(struct file_system_type *fs_type, 784static int smb_get_sb(struct file_system_type *fs_type,
786 int flags, const char *dev_name, void *data) 785 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
787{ 786{
788 return get_sb_nodev(fs_type, flags, data, smb_fill_super); 787 return get_sb_nodev(fs_type, flags, data, smb_fill_super, mnt);
789} 788}
790 789
791static struct file_system_type smb_fs_type = { 790static struct file_system_type smb_fs_type = {
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index b1b878b81730..c3495059889d 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -3226,9 +3226,9 @@ smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr)
3226} 3226}
3227 3227
3228int 3228int
3229smb_proc_dskattr(struct super_block *sb, struct kstatfs *attr) 3229smb_proc_dskattr(struct dentry *dentry, struct kstatfs *attr)
3230{ 3230{
3231 struct smb_sb_info *server = SMB_SB(sb); 3231 struct smb_sb_info *server = SMB_SB(dentry->d_sb);
3232 int result; 3232 int result;
3233 char *p; 3233 char *p;
3234 long unit; 3234 long unit;
diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h
index 47664597e6b1..34fb462b2379 100644
--- a/fs/smbfs/proto.h
+++ b/fs/smbfs/proto.h
@@ -29,7 +29,7 @@ extern int smb_proc_getattr(struct dentry *dir, struct smb_fattr *fattr);
29extern int smb_proc_setattr(struct dentry *dir, struct smb_fattr *fattr); 29extern int smb_proc_setattr(struct dentry *dir, struct smb_fattr *fattr);
30extern int smb_proc_setattr_unix(struct dentry *d, struct iattr *attr, unsigned int major, unsigned int minor); 30extern int smb_proc_setattr_unix(struct dentry *d, struct iattr *attr, unsigned int major, unsigned int minor);
31extern int smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr); 31extern int smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr);
32extern int smb_proc_dskattr(struct super_block *sb, struct kstatfs *attr); 32extern int smb_proc_dskattr(struct dentry *dentry, struct kstatfs *attr);
33extern int smb_proc_read_link(struct smb_sb_info *server, struct dentry *d, char *buffer, int len); 33extern int smb_proc_read_link(struct smb_sb_info *server, struct dentry *d, char *buffer, int len);
34extern int smb_proc_symlink(struct smb_sb_info *server, struct dentry *d, const char *oldpath); 34extern int smb_proc_symlink(struct smb_sb_info *server, struct dentry *d, const char *oldpath);
35extern int smb_proc_link(struct smb_sb_info *server, struct dentry *dentry, struct dentry *new_dentry); 35extern int smb_proc_link(struct smb_sb_info *server, struct dentry *dentry, struct dentry *new_dentry);
@@ -63,7 +63,7 @@ extern int smb_revalidate_inode(struct dentry *dentry);
63extern int smb_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); 63extern int smb_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
64extern int smb_notify_change(struct dentry *dentry, struct iattr *attr); 64extern int smb_notify_change(struct dentry *dentry, struct iattr *attr);
65/* file.c */ 65/* file.c */
66extern struct address_space_operations smb_file_aops; 66extern const struct address_space_operations smb_file_aops;
67extern const struct file_operations smb_file_operations; 67extern const struct file_operations smb_file_operations;
68extern struct inode_operations smb_file_inode_operations; 68extern struct inode_operations smb_file_inode_operations;
69/* ioctl.c */ 69/* ioctl.c */
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index c71dd2760d32..c8e96195b96e 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -400,8 +400,7 @@ static int smb_request_send_req(struct smb_request *req)
400 if (!(req->rq_flags & SMB_REQ_TRANSMITTED)) 400 if (!(req->rq_flags & SMB_REQ_TRANSMITTED))
401 goto out; 401 goto out;
402 402
403 list_del_init(&req->rq_queue); 403 list_move_tail(&req->rq_queue, &server->recvq);
404 list_add_tail(&req->rq_queue, &server->recvq);
405 result = 1; 404 result = 1;
406out: 405out:
407 return result; 406 return result;
@@ -435,8 +434,7 @@ int smb_request_send_server(struct smb_sb_info *server)
435 result = smb_request_send_req(req); 434 result = smb_request_send_req(req);
436 if (result < 0) { 435 if (result < 0) {
437 server->conn_error = result; 436 server->conn_error = result;
438 list_del_init(&req->rq_queue); 437 list_move(&req->rq_queue, &server->xmitq);
439 list_add(&req->rq_queue, &server->xmitq);
440 result = -EIO; 438 result = -EIO;
441 goto out; 439 goto out;
442 } 440 }
diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c
index 481a97a423fa..e67540441288 100644
--- a/fs/smbfs/smbiod.c
+++ b/fs/smbfs/smbiod.c
@@ -5,7 +5,6 @@
5 * Copyright (C) 2001, Urban Widmark 5 * Copyright (C) 2001, Urban Widmark
6 */ 6 */
7 7
8#include <linux/config.h>
9 8
10#include <linux/sched.h> 9#include <linux/sched.h>
11#include <linux/kernel.h> 10#include <linux/kernel.h>
@@ -20,6 +19,7 @@
20#include <linux/smp_lock.h> 19#include <linux/smp_lock.h>
21#include <linux/module.h> 20#include <linux/module.h>
22#include <linux/net.h> 21#include <linux/net.h>
22#include <linux/kthread.h>
23#include <net/ip.h> 23#include <net/ip.h>
24 24
25#include <linux/smb_fs.h> 25#include <linux/smb_fs.h>
@@ -40,7 +40,7 @@ enum smbiod_state {
40}; 40};
41 41
42static enum smbiod_state smbiod_state = SMBIOD_DEAD; 42static enum smbiod_state smbiod_state = SMBIOD_DEAD;
43static pid_t smbiod_pid; 43static struct task_struct *smbiod_thread;
44static DECLARE_WAIT_QUEUE_HEAD(smbiod_wait); 44static DECLARE_WAIT_QUEUE_HEAD(smbiod_wait);
45static LIST_HEAD(smb_servers); 45static LIST_HEAD(smb_servers);
46static DEFINE_SPINLOCK(servers_lock); 46static DEFINE_SPINLOCK(servers_lock);
@@ -67,20 +67,29 @@ void smbiod_wake_up(void)
67 */ 67 */
68static int smbiod_start(void) 68static int smbiod_start(void)
69{ 69{
70 pid_t pid; 70 struct task_struct *tsk;
71 int err = 0;
72
71 if (smbiod_state != SMBIOD_DEAD) 73 if (smbiod_state != SMBIOD_DEAD)
72 return 0; 74 return 0;
73 smbiod_state = SMBIOD_STARTING; 75 smbiod_state = SMBIOD_STARTING;
74 __module_get(THIS_MODULE); 76 __module_get(THIS_MODULE);
75 spin_unlock(&servers_lock); 77 spin_unlock(&servers_lock);
76 pid = kernel_thread(smbiod, NULL, 0); 78 tsk = kthread_run(smbiod, NULL, "smbiod");
77 if (pid < 0) 79 if (IS_ERR(tsk)) {
80 err = PTR_ERR(tsk);
78 module_put(THIS_MODULE); 81 module_put(THIS_MODULE);
82 }
79 83
80 spin_lock(&servers_lock); 84 spin_lock(&servers_lock);
81 smbiod_state = pid < 0 ? SMBIOD_DEAD : SMBIOD_RUNNING; 85 if (err < 0) {
82 smbiod_pid = pid; 86 smbiod_state = SMBIOD_DEAD;
83 return pid; 87 smbiod_thread = NULL;
88 } else {
89 smbiod_state = SMBIOD_RUNNING;
90 smbiod_thread = tsk;
91 }
92 return err;
84} 93}
85 94
86/* 95/*
@@ -183,8 +192,7 @@ int smbiod_retry(struct smb_sb_info *server)
183 if (req->rq_flags & SMB_REQ_RETRY) { 192 if (req->rq_flags & SMB_REQ_RETRY) {
184 /* must move the request to the xmitq */ 193 /* must move the request to the xmitq */
185 VERBOSE("retrying request %p on recvq\n", req); 194 VERBOSE("retrying request %p on recvq\n", req);
186 list_del(&req->rq_queue); 195 list_move(&req->rq_queue, &server->xmitq);
187 list_add(&req->rq_queue, &server->xmitq);
188 continue; 196 continue;
189 } 197 }
190#endif 198#endif
@@ -290,8 +298,6 @@ out:
290 */ 298 */
291static int smbiod(void *unused) 299static int smbiod(void *unused)
292{ 300{
293 daemonize("smbiod");
294
295 allow_signal(SIGKILL); 301 allow_signal(SIGKILL);
296 302
297 VERBOSE("SMB Kernel thread starting (%d) ...\n", current->pid); 303 VERBOSE("SMB Kernel thread starting (%d) ...\n", current->pid);
diff --git a/fs/splice.c b/fs/splice.c
index a285fd746dc0..05fd2787be98 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -55,31 +55,43 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
55 struct pipe_buffer *buf) 55 struct pipe_buffer *buf)
56{ 56{
57 struct page *page = buf->page; 57 struct page *page = buf->page;
58 struct address_space *mapping = page_mapping(page); 58 struct address_space *mapping;
59 59
60 lock_page(page); 60 lock_page(page);
61 61
62 WARN_ON(!PageUptodate(page)); 62 mapping = page_mapping(page);
63 if (mapping) {
64 WARN_ON(!PageUptodate(page));
63 65
64 /* 66 /*
65 * At least for ext2 with nobh option, we need to wait on writeback 67 * At least for ext2 with nobh option, we need to wait on
66 * completing on this page, since we'll remove it from the pagecache. 68 * writeback completing on this page, since we'll remove it
67 * Otherwise truncate wont wait on the page, allowing the disk 69 * from the pagecache. Otherwise truncate wont wait on the
68 * blocks to be reused by someone else before we actually wrote our 70 * page, allowing the disk blocks to be reused by someone else
69 * data to them. fs corruption ensues. 71 * before we actually wrote our data to them. fs corruption
70 */ 72 * ensues.
71 wait_on_page_writeback(page); 73 */
74 wait_on_page_writeback(page);
72 75
73 if (PagePrivate(page)) 76 if (PagePrivate(page))
74 try_to_release_page(page, mapping_gfp_mask(mapping)); 77 try_to_release_page(page, mapping_gfp_mask(mapping));
75 78
76 if (!remove_mapping(mapping, page)) { 79 /*
77 unlock_page(page); 80 * If we succeeded in removing the mapping, set LRU flag
78 return 1; 81 * and return good.
82 */
83 if (remove_mapping(mapping, page)) {
84 buf->flags |= PIPE_BUF_FLAG_LRU;
85 return 0;
86 }
79 } 87 }
80 88
81 buf->flags |= PIPE_BUF_FLAG_LRU; 89 /*
82 return 0; 90 * Raced with truncate or failed to remove page from current
91 * address space, unlock and return failure.
92 */
93 unlock_page(page);
94 return 1;
83} 95}
84 96
85static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe, 97static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
diff --git a/fs/stat.c b/fs/stat.c
index 0f282face322..3a44dcf97da2 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -4,7 +4,6 @@
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */ 5 */
6 6
7#include <linux/config.h>
8#include <linux/module.h> 7#include <linux/module.h>
9#include <linux/mm.h> 8#include <linux/mm.h>
10#include <linux/errno.h> 9#include <linux/errno.h>
diff --git a/fs/super.c b/fs/super.c
index a66f66bb8049..9b780c42d845 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -20,7 +20,6 @@
20 * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000 20 * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000
21 */ 21 */
22 22
23#include <linux/config.h>
24#include <linux/module.h> 23#include <linux/module.h>
25#include <linux/slab.h> 24#include <linux/slab.h>
26#include <linux/init.h> 25#include <linux/init.h>
@@ -231,7 +230,7 @@ void generic_shutdown_super(struct super_block *sb)
231 if (root) { 230 if (root) {
232 sb->s_root = NULL; 231 sb->s_root = NULL;
233 shrink_dcache_parent(root); 232 shrink_dcache_parent(root);
234 shrink_dcache_anon(&sb->s_anon); 233 shrink_dcache_sb(sb);
235 dput(root); 234 dput(root);
236 fsync_super(sb); 235 fsync_super(sb);
237 lock_super(sb); 236 lock_super(sb);
@@ -486,7 +485,7 @@ asmlinkage long sys_ustat(unsigned dev, struct ustat __user * ubuf)
486 s = user_get_super(new_decode_dev(dev)); 485 s = user_get_super(new_decode_dev(dev));
487 if (s == NULL) 486 if (s == NULL)
488 goto out; 487 goto out;
489 err = vfs_statfs(s, &sbuf); 488 err = vfs_statfs(s->s_root, &sbuf);
490 drop_super(s); 489 drop_super(s);
491 if (err) 490 if (err)
492 goto out; 491 goto out;
@@ -676,9 +675,10 @@ static void bdev_uevent(struct block_device *bdev, enum kobject_action action)
676 } 675 }
677} 676}
678 677
679struct super_block *get_sb_bdev(struct file_system_type *fs_type, 678int get_sb_bdev(struct file_system_type *fs_type,
680 int flags, const char *dev_name, void *data, 679 int flags, const char *dev_name, void *data,
681 int (*fill_super)(struct super_block *, void *, int)) 680 int (*fill_super)(struct super_block *, void *, int),
681 struct vfsmount *mnt)
682{ 682{
683 struct block_device *bdev; 683 struct block_device *bdev;
684 struct super_block *s; 684 struct super_block *s;
@@ -686,7 +686,7 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
686 686
687 bdev = open_bdev_excl(dev_name, flags, fs_type); 687 bdev = open_bdev_excl(dev_name, flags, fs_type);
688 if (IS_ERR(bdev)) 688 if (IS_ERR(bdev))
689 return (struct super_block *)bdev; 689 return PTR_ERR(bdev);
690 690
691 /* 691 /*
692 * once the super is inserted into the list by sget, s_umount 692 * once the super is inserted into the list by sget, s_umount
@@ -697,15 +697,17 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
697 s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); 697 s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
698 mutex_unlock(&bdev->bd_mount_mutex); 698 mutex_unlock(&bdev->bd_mount_mutex);
699 if (IS_ERR(s)) 699 if (IS_ERR(s))
700 goto out; 700 goto error_s;
701 701
702 if (s->s_root) { 702 if (s->s_root) {
703 if ((flags ^ s->s_flags) & MS_RDONLY) { 703 if ((flags ^ s->s_flags) & MS_RDONLY) {
704 up_write(&s->s_umount); 704 up_write(&s->s_umount);
705 deactivate_super(s); 705 deactivate_super(s);
706 s = ERR_PTR(-EBUSY); 706 error = -EBUSY;
707 goto error_bdev;
707 } 708 }
708 goto out; 709
710 close_bdev_excl(bdev);
709 } else { 711 } else {
710 char b[BDEVNAME_SIZE]; 712 char b[BDEVNAME_SIZE];
711 713
@@ -716,18 +718,21 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
716 if (error) { 718 if (error) {
717 up_write(&s->s_umount); 719 up_write(&s->s_umount);
718 deactivate_super(s); 720 deactivate_super(s);
719 s = ERR_PTR(error); 721 goto error;
720 } else {
721 s->s_flags |= MS_ACTIVE;
722 bdev_uevent(bdev, KOBJ_MOUNT);
723 } 722 }
723
724 s->s_flags |= MS_ACTIVE;
725 bdev_uevent(bdev, KOBJ_MOUNT);
724 } 726 }
725 727
726 return s; 728 return simple_set_mnt(mnt, s);
727 729
728out: 730error_s:
731 error = PTR_ERR(s);
732error_bdev:
729 close_bdev_excl(bdev); 733 close_bdev_excl(bdev);
730 return s; 734error:
735 return error;
731} 736}
732 737
733EXPORT_SYMBOL(get_sb_bdev); 738EXPORT_SYMBOL(get_sb_bdev);
@@ -744,15 +749,16 @@ void kill_block_super(struct super_block *sb)
744 749
745EXPORT_SYMBOL(kill_block_super); 750EXPORT_SYMBOL(kill_block_super);
746 751
747struct super_block *get_sb_nodev(struct file_system_type *fs_type, 752int get_sb_nodev(struct file_system_type *fs_type,
748 int flags, void *data, 753 int flags, void *data,
749 int (*fill_super)(struct super_block *, void *, int)) 754 int (*fill_super)(struct super_block *, void *, int),
755 struct vfsmount *mnt)
750{ 756{
751 int error; 757 int error;
752 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); 758 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
753 759
754 if (IS_ERR(s)) 760 if (IS_ERR(s))
755 return s; 761 return PTR_ERR(s);
756 762
757 s->s_flags = flags; 763 s->s_flags = flags;
758 764
@@ -760,10 +766,10 @@ struct super_block *get_sb_nodev(struct file_system_type *fs_type,
760 if (error) { 766 if (error) {
761 up_write(&s->s_umount); 767 up_write(&s->s_umount);
762 deactivate_super(s); 768 deactivate_super(s);
763 return ERR_PTR(error); 769 return error;
764 } 770 }
765 s->s_flags |= MS_ACTIVE; 771 s->s_flags |= MS_ACTIVE;
766 return s; 772 return simple_set_mnt(mnt, s);
767} 773}
768 774
769EXPORT_SYMBOL(get_sb_nodev); 775EXPORT_SYMBOL(get_sb_nodev);
@@ -773,94 +779,100 @@ static int compare_single(struct super_block *s, void *p)
773 return 1; 779 return 1;
774} 780}
775 781
776struct super_block *get_sb_single(struct file_system_type *fs_type, 782int get_sb_single(struct file_system_type *fs_type,
777 int flags, void *data, 783 int flags, void *data,
778 int (*fill_super)(struct super_block *, void *, int)) 784 int (*fill_super)(struct super_block *, void *, int),
785 struct vfsmount *mnt)
779{ 786{
780 struct super_block *s; 787 struct super_block *s;
781 int error; 788 int error;
782 789
783 s = sget(fs_type, compare_single, set_anon_super, NULL); 790 s = sget(fs_type, compare_single, set_anon_super, NULL);
784 if (IS_ERR(s)) 791 if (IS_ERR(s))
785 return s; 792 return PTR_ERR(s);
786 if (!s->s_root) { 793 if (!s->s_root) {
787 s->s_flags = flags; 794 s->s_flags = flags;
788 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); 795 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
789 if (error) { 796 if (error) {
790 up_write(&s->s_umount); 797 up_write(&s->s_umount);
791 deactivate_super(s); 798 deactivate_super(s);
792 return ERR_PTR(error); 799 return error;
793 } 800 }
794 s->s_flags |= MS_ACTIVE; 801 s->s_flags |= MS_ACTIVE;
795 } 802 }
796 do_remount_sb(s, flags, data, 0); 803 do_remount_sb(s, flags, data, 0);
797 return s; 804 return simple_set_mnt(mnt, s);
798} 805}
799 806
800EXPORT_SYMBOL(get_sb_single); 807EXPORT_SYMBOL(get_sb_single);
801 808
802struct vfsmount * 809struct vfsmount *
803do_kern_mount(const char *fstype, int flags, const char *name, void *data) 810vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
804{ 811{
805 struct file_system_type *type = get_fs_type(fstype);
806 struct super_block *sb = ERR_PTR(-ENOMEM);
807 struct vfsmount *mnt; 812 struct vfsmount *mnt;
808 int error;
809 char *secdata = NULL; 813 char *secdata = NULL;
814 int error;
810 815
811 if (!type) 816 if (!type)
812 return ERR_PTR(-ENODEV); 817 return ERR_PTR(-ENODEV);
813 818
819 error = -ENOMEM;
814 mnt = alloc_vfsmnt(name); 820 mnt = alloc_vfsmnt(name);
815 if (!mnt) 821 if (!mnt)
816 goto out; 822 goto out;
817 823
818 if (data) { 824 if (data) {
819 secdata = alloc_secdata(); 825 secdata = alloc_secdata();
820 if (!secdata) { 826 if (!secdata)
821 sb = ERR_PTR(-ENOMEM);
822 goto out_mnt; 827 goto out_mnt;
823 }
824 828
825 error = security_sb_copy_data(type, data, secdata); 829 error = security_sb_copy_data(type, data, secdata);
826 if (error) { 830 if (error)
827 sb = ERR_PTR(error);
828 goto out_free_secdata; 831 goto out_free_secdata;
829 }
830 } 832 }
831 833
832 sb = type->get_sb(type, flags, name, data); 834 error = type->get_sb(type, flags, name, data, mnt);
833 if (IS_ERR(sb)) 835 if (error < 0)
834 goto out_free_secdata; 836 goto out_free_secdata;
835 error = security_sb_kern_mount(sb, secdata); 837
838 error = security_sb_kern_mount(mnt->mnt_sb, secdata);
836 if (error) 839 if (error)
837 goto out_sb; 840 goto out_sb;
838 mnt->mnt_sb = sb; 841
839 mnt->mnt_root = dget(sb->s_root); 842 mnt->mnt_mountpoint = mnt->mnt_root;
840 mnt->mnt_mountpoint = sb->s_root;
841 mnt->mnt_parent = mnt; 843 mnt->mnt_parent = mnt;
842 up_write(&sb->s_umount); 844 up_write(&mnt->mnt_sb->s_umount);
843 free_secdata(secdata); 845 free_secdata(secdata);
844 put_filesystem(type);
845 return mnt; 846 return mnt;
846out_sb: 847out_sb:
847 up_write(&sb->s_umount); 848 dput(mnt->mnt_root);
848 deactivate_super(sb); 849 up_write(&mnt->mnt_sb->s_umount);
849 sb = ERR_PTR(error); 850 deactivate_super(mnt->mnt_sb);
850out_free_secdata: 851out_free_secdata:
851 free_secdata(secdata); 852 free_secdata(secdata);
852out_mnt: 853out_mnt:
853 free_vfsmnt(mnt); 854 free_vfsmnt(mnt);
854out: 855out:
855 put_filesystem(type); 856 return ERR_PTR(error);
856 return (struct vfsmount *)sb;
857} 857}
858 858
859EXPORT_SYMBOL_GPL(do_kern_mount); 859EXPORT_SYMBOL_GPL(vfs_kern_mount);
860
861struct vfsmount *
862do_kern_mount(const char *fstype, int flags, const char *name, void *data)
863{
864 struct file_system_type *type = get_fs_type(fstype);
865 struct vfsmount *mnt;
866 if (!type)
867 return ERR_PTR(-ENODEV);
868 mnt = vfs_kern_mount(type, flags, name, data);
869 put_filesystem(type);
870 return mnt;
871}
860 872
861struct vfsmount *kern_mount(struct file_system_type *type) 873struct vfsmount *kern_mount(struct file_system_type *type)
862{ 874{
863 return do_kern_mount(type->name, 0, type->name, NULL); 875 return vfs_kern_mount(type, 0, type->name, NULL);
864} 876}
865 877
866EXPORT_SYMBOL(kern_mount); 878EXPORT_SYMBOL(kern_mount);
diff --git a/fs/sync.c b/fs/sync.c
index aab5ffe77e9f..955aef04da28 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -100,7 +100,7 @@ asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
100 } 100 }
101 101
102 if (nbytes == 0) 102 if (nbytes == 0)
103 endbyte = -1; 103 endbyte = LLONG_MAX;
104 else 104 else
105 endbyte--; /* inclusive */ 105 endbyte--; /* inclusive */
106 106
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 610b5bdbe75b..61c42430cba3 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -430,10 +430,9 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
430 i++; 430 i++;
431 /* fallthrough */ 431 /* fallthrough */
432 default: 432 default:
433 if (filp->f_pos == 2) { 433 if (filp->f_pos == 2)
434 list_del(q); 434 list_move(q, &parent_sd->s_children);
435 list_add(q, &parent_sd->s_children); 435
436 }
437 for (p=q->next; p!= &parent_sd->s_children; p=p->next) { 436 for (p=q->next; p!= &parent_sd->s_children; p=p->next) {
438 struct sysfs_dirent *next; 437 struct sysfs_dirent *next;
439 const char * name; 438 const char * name;
@@ -455,8 +454,7 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
455 dt_type(next)) < 0) 454 dt_type(next)) < 0)
456 return 0; 455 return 0;
457 456
458 list_del(q); 457 list_move(q, p);
459 list_add(q, p);
460 p = q; 458 p = q;
461 filp->f_pos++; 459 filp->f_pos++;
462 } 460 }
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index f0b347bd12ca..5e0e31cc46f5 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -16,7 +16,7 @@
16 16
17extern struct super_block * sysfs_sb; 17extern struct super_block * sysfs_sb;
18 18
19static struct address_space_operations sysfs_aops = { 19static const struct address_space_operations sysfs_aops = {
20 .readpage = simple_readpage, 20 .readpage = simple_readpage,
21 .prepare_write = simple_prepare_write, 21 .prepare_write = simple_prepare_write,
22 .commit_write = simple_commit_write 22 .commit_write = simple_commit_write
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index f1117e885bd6..40190c489271 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -66,10 +66,10 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
66 return 0; 66 return 0;
67} 67}
68 68
69static struct super_block *sysfs_get_sb(struct file_system_type *fs_type, 69static int sysfs_get_sb(struct file_system_type *fs_type,
70 int flags, const char *dev_name, void *data) 70 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
71{ 71{
72 return get_sb_single(fs_type, flags, data, sysfs_fill_super); 72 return get_sb_single(fs_type, flags, data, sysfs_fill_super, mnt);
73} 73}
74 74
75static struct file_system_type sysfs_fs_type = { 75static struct file_system_type sysfs_fs_type = {
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index d7074341ee87..f2bef962d309 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -53,8 +53,7 @@ static int dir_commit_chunk(struct page *page, unsigned from, unsigned to)
53static struct page * dir_get_page(struct inode *dir, unsigned long n) 53static struct page * dir_get_page(struct inode *dir, unsigned long n)
54{ 54{
55 struct address_space *mapping = dir->i_mapping; 55 struct address_space *mapping = dir->i_mapping;
56 struct page *page = read_cache_page(mapping, n, 56 struct page *page = read_mapping_page(mapping, n, NULL);
57 (filler_t*)mapping->a_ops->readpage, NULL);
58 if (!IS_ERR(page)) { 57 if (!IS_ERR(page)) {
59 wait_on_page_locked(page); 58 wait_on_page_locked(page);
60 kmap(page); 59 kmap(page);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 3ff89cc5833a..58b2d22142ba 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -85,8 +85,9 @@ static void sysv_put_super(struct super_block *sb)
85 kfree(sbi); 85 kfree(sbi);
86} 86}
87 87
88static int sysv_statfs(struct super_block *sb, struct kstatfs *buf) 88static int sysv_statfs(struct dentry *dentry, struct kstatfs *buf)
89{ 89{
90 struct super_block *sb = dentry->d_sb;
90 struct sysv_sb_info *sbi = SYSV_SB(sb); 91 struct sysv_sb_info *sbi = SYSV_SB(sb);
91 92
92 buf->f_type = sb->s_magic; 93 buf->f_type = sb->s_magic;
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index 86f5f8d43d0f..f2bcccd1d6fc 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -465,7 +465,7 @@ static sector_t sysv_bmap(struct address_space *mapping, sector_t block)
465{ 465{
466 return generic_block_bmap(mapping,block,get_block); 466 return generic_block_bmap(mapping,block,get_block);
467} 467}
468struct address_space_operations sysv_aops = { 468const struct address_space_operations sysv_aops = {
469 .readpage = sysv_readpage, 469 .readpage = sysv_readpage,
470 .writepage = sysv_writepage, 470 .writepage = sysv_writepage,
471 .sync_page = block_sync_page, 471 .sync_page = block_sync_page,
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index e92b991e6dda..876639b93321 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -506,16 +506,17 @@ failed:
506 506
507/* Every kernel module contains stuff like this. */ 507/* Every kernel module contains stuff like this. */
508 508
509static struct super_block *sysv_get_sb(struct file_system_type *fs_type, 509static int sysv_get_sb(struct file_system_type *fs_type,
510 int flags, const char *dev_name, void *data) 510 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
511{ 511{
512 return get_sb_bdev(fs_type, flags, dev_name, data, sysv_fill_super); 512 return get_sb_bdev(fs_type, flags, dev_name, data, sysv_fill_super,
513 mnt);
513} 514}
514 515
515static struct super_block *v7_get_sb(struct file_system_type *fs_type, 516static int v7_get_sb(struct file_system_type *fs_type,
516 int flags, const char *dev_name, void *data) 517 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
517{ 518{
518 return get_sb_bdev(fs_type, flags, dev_name, data, v7_fill_super); 519 return get_sb_bdev(fs_type, flags, dev_name, data, v7_fill_super, mnt);
519} 520}
520 521
521static struct file_system_type sysv_fs_type = { 522static struct file_system_type sysv_fs_type = {
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 393a480e4deb..9dcc82120935 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -161,7 +161,7 @@ extern struct inode_operations sysv_dir_inode_operations;
161extern struct inode_operations sysv_fast_symlink_inode_operations; 161extern struct inode_operations sysv_fast_symlink_inode_operations;
162extern const struct file_operations sysv_file_operations; 162extern const struct file_operations sysv_file_operations;
163extern const struct file_operations sysv_dir_operations; 163extern const struct file_operations sysv_dir_operations;
164extern struct address_space_operations sysv_aops; 164extern const struct address_space_operations sysv_aops;
165extern struct super_operations sysv_sops; 165extern struct super_operations sysv_sops;
166extern struct dentry_operations sysv_dentry_operations; 166extern struct dentry_operations sysv_dentry_operations;
167 167
diff --git a/fs/udf/file.c b/fs/udf/file.c
index e34b00e303f1..a59e5f33daf6 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -95,7 +95,7 @@ static int udf_adinicb_commit_write(struct file *file, struct page *page, unsign
95 return 0; 95 return 0;
96} 96}
97 97
98struct address_space_operations udf_adinicb_aops = { 98const struct address_space_operations udf_adinicb_aops = {
99 .readpage = udf_adinicb_readpage, 99 .readpage = udf_adinicb_readpage,
100 .writepage = udf_adinicb_writepage, 100 .writepage = udf_adinicb_writepage,
101 .sync_page = block_sync_page, 101 .sync_page = block_sync_page,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 2983afd5e7fd..605f5111b6d8 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -132,7 +132,7 @@ static sector_t udf_bmap(struct address_space *mapping, sector_t block)
132 return generic_block_bmap(mapping,block,udf_get_block); 132 return generic_block_bmap(mapping,block,udf_get_block);
133} 133}
134 134
135struct address_space_operations udf_aops = { 135const struct address_space_operations udf_aops = {
136 .readpage = udf_readpage, 136 .readpage = udf_readpage,
137 .writepage = udf_writepage, 137 .writepage = udf_writepage,
138 .sync_page = block_sync_page, 138 .sync_page = block_sync_page,
diff --git a/fs/udf/super.c b/fs/udf/super.c
index e45789fe38e8..4df822c881b6 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -40,7 +40,6 @@
40 40
41#include "udfdecl.h" 41#include "udfdecl.h"
42 42
43#include <linux/config.h>
44#include <linux/blkdev.h> 43#include <linux/blkdev.h>
45#include <linux/slab.h> 44#include <linux/slab.h>
46#include <linux/kernel.h> 45#include <linux/kernel.h>
@@ -91,13 +90,13 @@ static void udf_load_partdesc(struct super_block *, struct buffer_head *);
91static void udf_open_lvid(struct super_block *); 90static void udf_open_lvid(struct super_block *);
92static void udf_close_lvid(struct super_block *); 91static void udf_close_lvid(struct super_block *);
93static unsigned int udf_count_free(struct super_block *); 92static unsigned int udf_count_free(struct super_block *);
94static int udf_statfs(struct super_block *, struct kstatfs *); 93static int udf_statfs(struct dentry *, struct kstatfs *);
95 94
96/* UDF filesystem type */ 95/* UDF filesystem type */
97static struct super_block *udf_get_sb(struct file_system_type *fs_type, 96static int udf_get_sb(struct file_system_type *fs_type,
98 int flags, const char *dev_name, void *data) 97 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
99{ 98{
100 return get_sb_bdev(fs_type, flags, dev_name, data, udf_fill_super); 99 return get_sb_bdev(fs_type, flags, dev_name, data, udf_fill_super, mnt);
101} 100}
102 101
103static struct file_system_type udf_fstype = { 102static struct file_system_type udf_fstype = {
@@ -1779,8 +1778,10 @@ udf_put_super(struct super_block *sb)
1779 * Written, tested, and released. 1778 * Written, tested, and released.
1780 */ 1779 */
1781static int 1780static int
1782udf_statfs(struct super_block *sb, struct kstatfs *buf) 1781udf_statfs(struct dentry *dentry, struct kstatfs *buf)
1783{ 1782{
1783 struct super_block *sb = dentry->d_sb;
1784
1784 buf->f_type = UDF_SUPER_MAGIC; 1785 buf->f_type = UDF_SUPER_MAGIC;
1785 buf->f_bsize = sb->s_blocksize; 1786 buf->f_bsize = sb->s_blocksize;
1786 buf->f_blocks = UDF_SB_PARTLEN(sb, UDF_SB_PARTITION(sb)); 1787 buf->f_blocks = UDF_SB_PARTLEN(sb, UDF_SB_PARTITION(sb));
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index 674bb40edc83..ba068a786563 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -113,6 +113,6 @@ out:
113/* 113/*
114 * symlinks can't do much... 114 * symlinks can't do much...
115 */ 115 */
116struct address_space_operations udf_symlink_aops = { 116const struct address_space_operations udf_symlink_aops = {
117 .readpage = udf_symlink_filler, 117 .readpage = udf_symlink_filler,
118}; 118};
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 023e19ba5a2e..1033b7cf2939 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -6,7 +6,6 @@
6#include "osta_udf.h" 6#include "osta_udf.h"
7 7
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/config.h>
10#include <linux/types.h> 9#include <linux/types.h>
11#include <linux/udf_fs_i.h> 10#include <linux/udf_fs_i.h>
12#include <linux/udf_fs_sb.h> 11#include <linux/udf_fs_sb.h>
@@ -47,9 +46,9 @@ extern struct inode_operations udf_dir_inode_operations;
47extern const struct file_operations udf_dir_operations; 46extern const struct file_operations udf_dir_operations;
48extern struct inode_operations udf_file_inode_operations; 47extern struct inode_operations udf_file_inode_operations;
49extern const struct file_operations udf_file_operations; 48extern const struct file_operations udf_file_operations;
50extern struct address_space_operations udf_aops; 49extern const struct address_space_operations udf_aops;
51extern struct address_space_operations udf_adinicb_aops; 50extern const struct address_space_operations udf_adinicb_aops;
52extern struct address_space_operations udf_symlink_aops; 51extern const struct address_space_operations udf_symlink_aops;
53 52
54struct udf_fileident_bh 53struct udf_fileident_bh
55{ 54{
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 3ada9dcf55b8..b01804baa120 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -21,14 +21,6 @@
21#include "swab.h" 21#include "swab.h"
22#include "util.h" 22#include "util.h"
23 23
24#undef UFS_BALLOC_DEBUG
25
26#ifdef UFS_BALLOC_DEBUG
27#define UFSD(x) printk("(%s, %d), %s:", __FILE__, __LINE__, __FUNCTION__); printk x;
28#else
29#define UFSD(x)
30#endif
31
32static unsigned ufs_add_fragments (struct inode *, unsigned, unsigned, unsigned, int *); 24static unsigned ufs_add_fragments (struct inode *, unsigned, unsigned, unsigned, int *);
33static unsigned ufs_alloc_fragments (struct inode *, unsigned, unsigned, unsigned, int *); 25static unsigned ufs_alloc_fragments (struct inode *, unsigned, unsigned, unsigned, int *);
34static unsigned ufs_alloccg_block (struct inode *, struct ufs_cg_private_info *, unsigned, int *); 26static unsigned ufs_alloccg_block (struct inode *, struct ufs_cg_private_info *, unsigned, int *);
@@ -39,7 +31,8 @@ static void ufs_clusteracct(struct super_block *, struct ufs_cg_private_info *,
39/* 31/*
40 * Free 'count' fragments from fragment number 'fragment' 32 * Free 'count' fragments from fragment number 'fragment'
41 */ 33 */
42void ufs_free_fragments (struct inode * inode, unsigned fragment, unsigned count) { 34void ufs_free_fragments(struct inode *inode, unsigned fragment, unsigned count)
35{
43 struct super_block * sb; 36 struct super_block * sb;
44 struct ufs_sb_private_info * uspi; 37 struct ufs_sb_private_info * uspi;
45 struct ufs_super_block_first * usb1; 38 struct ufs_super_block_first * usb1;
@@ -51,7 +44,7 @@ void ufs_free_fragments (struct inode * inode, unsigned fragment, unsigned count
51 uspi = UFS_SB(sb)->s_uspi; 44 uspi = UFS_SB(sb)->s_uspi;
52 usb1 = ubh_get_usb_first(uspi); 45 usb1 = ubh_get_usb_first(uspi);
53 46
54 UFSD(("ENTER, fragment %u, count %u\n", fragment, count)) 47 UFSD("ENTER, fragment %u, count %u\n", fragment, count);
55 48
56 if (ufs_fragnum(fragment) + count > uspi->s_fpg) 49 if (ufs_fragnum(fragment) + count > uspi->s_fpg)
57 ufs_error (sb, "ufs_free_fragments", "internal error"); 50 ufs_error (sb, "ufs_free_fragments", "internal error");
@@ -68,7 +61,7 @@ void ufs_free_fragments (struct inode * inode, unsigned fragment, unsigned count
68 ucpi = ufs_load_cylinder (sb, cgno); 61 ucpi = ufs_load_cylinder (sb, cgno);
69 if (!ucpi) 62 if (!ucpi)
70 goto failed; 63 goto failed;
71 ucg = ubh_get_ucg (UCPI_UBH); 64 ucg = ubh_get_ucg (UCPI_UBH(ucpi));
72 if (!ufs_cg_chkmagic(sb, ucg)) { 65 if (!ufs_cg_chkmagic(sb, ucg)) {
73 ufs_panic (sb, "ufs_free_fragments", "internal error, bad magic number on cg %u", cgno); 66 ufs_panic (sb, "ufs_free_fragments", "internal error, bad magic number on cg %u", cgno);
74 goto failed; 67 goto failed;
@@ -76,11 +69,11 @@ void ufs_free_fragments (struct inode * inode, unsigned fragment, unsigned count
76 69
77 end_bit = bit + count; 70 end_bit = bit + count;
78 bbase = ufs_blknum (bit); 71 bbase = ufs_blknum (bit);
79 blkmap = ubh_blkmap (UCPI_UBH, ucpi->c_freeoff, bbase); 72 blkmap = ubh_blkmap (UCPI_UBH(ucpi), ucpi->c_freeoff, bbase);
80 ufs_fragacct (sb, blkmap, ucg->cg_frsum, -1); 73 ufs_fragacct (sb, blkmap, ucg->cg_frsum, -1);
81 for (i = bit; i < end_bit; i++) { 74 for (i = bit; i < end_bit; i++) {
82 if (ubh_isclr (UCPI_UBH, ucpi->c_freeoff, i)) 75 if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, i))
83 ubh_setbit (UCPI_UBH, ucpi->c_freeoff, i); 76 ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, i);
84 else 77 else
85 ufs_error (sb, "ufs_free_fragments", 78 ufs_error (sb, "ufs_free_fragments",
86 "bit already cleared for fragment %u", i); 79 "bit already cleared for fragment %u", i);
@@ -90,51 +83,52 @@ void ufs_free_fragments (struct inode * inode, unsigned fragment, unsigned count
90 83
91 84
92 fs32_add(sb, &ucg->cg_cs.cs_nffree, count); 85 fs32_add(sb, &ucg->cg_cs.cs_nffree, count);
93 fs32_add(sb, &usb1->fs_cstotal.cs_nffree, count); 86 uspi->cs_total.cs_nffree += count;
94 fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count); 87 fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count);
95 blkmap = ubh_blkmap (UCPI_UBH, ucpi->c_freeoff, bbase); 88 blkmap = ubh_blkmap (UCPI_UBH(ucpi), ucpi->c_freeoff, bbase);
96 ufs_fragacct(sb, blkmap, ucg->cg_frsum, 1); 89 ufs_fragacct(sb, blkmap, ucg->cg_frsum, 1);
97 90
98 /* 91 /*
99 * Trying to reassemble free fragments into block 92 * Trying to reassemble free fragments into block
100 */ 93 */
101 blkno = ufs_fragstoblks (bbase); 94 blkno = ufs_fragstoblks (bbase);
102 if (ubh_isblockset(UCPI_UBH, ucpi->c_freeoff, blkno)) { 95 if (ubh_isblockset(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno)) {
103 fs32_sub(sb, &ucg->cg_cs.cs_nffree, uspi->s_fpb); 96 fs32_sub(sb, &ucg->cg_cs.cs_nffree, uspi->s_fpb);
104 fs32_sub(sb, &usb1->fs_cstotal.cs_nffree, uspi->s_fpb); 97 uspi->cs_total.cs_nffree -= uspi->s_fpb;
105 fs32_sub(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, uspi->s_fpb); 98 fs32_sub(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, uspi->s_fpb);
106 if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) 99 if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
107 ufs_clusteracct (sb, ucpi, blkno, 1); 100 ufs_clusteracct (sb, ucpi, blkno, 1);
108 fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1); 101 fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1);
109 fs32_add(sb, &usb1->fs_cstotal.cs_nbfree, 1); 102 uspi->cs_total.cs_nbfree++;
110 fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nbfree, 1); 103 fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nbfree, 1);
111 cylno = ufs_cbtocylno (bbase); 104 cylno = ufs_cbtocylno (bbase);
112 fs16_add(sb, &ubh_cg_blks(ucpi, cylno, ufs_cbtorpos(bbase)), 1); 105 fs16_add(sb, &ubh_cg_blks(ucpi, cylno, ufs_cbtorpos(bbase)), 1);
113 fs32_add(sb, &ubh_cg_blktot(ucpi, cylno), 1); 106 fs32_add(sb, &ubh_cg_blktot(ucpi, cylno), 1);
114 } 107 }
115 108
116 ubh_mark_buffer_dirty (USPI_UBH); 109 ubh_mark_buffer_dirty (USPI_UBH(uspi));
117 ubh_mark_buffer_dirty (UCPI_UBH); 110 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
118 if (sb->s_flags & MS_SYNCHRONOUS) { 111 if (sb->s_flags & MS_SYNCHRONOUS) {
119 ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi); 112 ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
120 ubh_wait_on_buffer (UCPI_UBH); 113 ubh_wait_on_buffer (UCPI_UBH(ucpi));
121 } 114 }
122 sb->s_dirt = 1; 115 sb->s_dirt = 1;
123 116
124 unlock_super (sb); 117 unlock_super (sb);
125 UFSD(("EXIT\n")) 118 UFSD("EXIT\n");
126 return; 119 return;
127 120
128failed: 121failed:
129 unlock_super (sb); 122 unlock_super (sb);
130 UFSD(("EXIT (FAILED)\n")) 123 UFSD("EXIT (FAILED)\n");
131 return; 124 return;
132} 125}
133 126
134/* 127/*
135 * Free 'count' fragments from fragment number 'fragment' (free whole blocks) 128 * Free 'count' fragments from fragment number 'fragment' (free whole blocks)
136 */ 129 */
137void ufs_free_blocks (struct inode * inode, unsigned fragment, unsigned count) { 130void ufs_free_blocks(struct inode *inode, unsigned fragment, unsigned count)
131{
138 struct super_block * sb; 132 struct super_block * sb;
139 struct ufs_sb_private_info * uspi; 133 struct ufs_sb_private_info * uspi;
140 struct ufs_super_block_first * usb1; 134 struct ufs_super_block_first * usb1;
@@ -146,7 +140,7 @@ void ufs_free_blocks (struct inode * inode, unsigned fragment, unsigned count) {
146 uspi = UFS_SB(sb)->s_uspi; 140 uspi = UFS_SB(sb)->s_uspi;
147 usb1 = ubh_get_usb_first(uspi); 141 usb1 = ubh_get_usb_first(uspi);
148 142
149 UFSD(("ENTER, fragment %u, count %u\n", fragment, count)) 143 UFSD("ENTER, fragment %u, count %u\n", fragment, count);
150 144
151 if ((fragment & uspi->s_fpbmask) || (count & uspi->s_fpbmask)) { 145 if ((fragment & uspi->s_fpbmask) || (count & uspi->s_fpbmask)) {
152 ufs_error (sb, "ufs_free_blocks", "internal error, " 146 ufs_error (sb, "ufs_free_blocks", "internal error, "
@@ -162,7 +156,7 @@ do_more:
162 bit = ufs_dtogd (fragment); 156 bit = ufs_dtogd (fragment);
163 if (cgno >= uspi->s_ncg) { 157 if (cgno >= uspi->s_ncg) {
164 ufs_panic (sb, "ufs_free_blocks", "freeing blocks are outside device"); 158 ufs_panic (sb, "ufs_free_blocks", "freeing blocks are outside device");
165 goto failed; 159 goto failed_unlock;
166 } 160 }
167 end_bit = bit + count; 161 end_bit = bit + count;
168 if (end_bit > uspi->s_fpg) { 162 if (end_bit > uspi->s_fpg) {
@@ -173,36 +167,36 @@ do_more:
173 167
174 ucpi = ufs_load_cylinder (sb, cgno); 168 ucpi = ufs_load_cylinder (sb, cgno);
175 if (!ucpi) 169 if (!ucpi)
176 goto failed; 170 goto failed_unlock;
177 ucg = ubh_get_ucg (UCPI_UBH); 171 ucg = ubh_get_ucg (UCPI_UBH(ucpi));
178 if (!ufs_cg_chkmagic(sb, ucg)) { 172 if (!ufs_cg_chkmagic(sb, ucg)) {
179 ufs_panic (sb, "ufs_free_blocks", "internal error, bad magic number on cg %u", cgno); 173 ufs_panic (sb, "ufs_free_blocks", "internal error, bad magic number on cg %u", cgno);
180 goto failed; 174 goto failed_unlock;
181 } 175 }
182 176
183 for (i = bit; i < end_bit; i += uspi->s_fpb) { 177 for (i = bit; i < end_bit; i += uspi->s_fpb) {
184 blkno = ufs_fragstoblks(i); 178 blkno = ufs_fragstoblks(i);
185 if (ubh_isblockset(UCPI_UBH, ucpi->c_freeoff, blkno)) { 179 if (ubh_isblockset(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno)) {
186 ufs_error(sb, "ufs_free_blocks", "freeing free fragment"); 180 ufs_error(sb, "ufs_free_blocks", "freeing free fragment");
187 } 181 }
188 ubh_setblock(UCPI_UBH, ucpi->c_freeoff, blkno); 182 ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
189 if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) 183 if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
190 ufs_clusteracct (sb, ucpi, blkno, 1); 184 ufs_clusteracct (sb, ucpi, blkno, 1);
191 DQUOT_FREE_BLOCK(inode, uspi->s_fpb); 185 DQUOT_FREE_BLOCK(inode, uspi->s_fpb);
192 186
193 fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1); 187 fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1);
194 fs32_add(sb, &usb1->fs_cstotal.cs_nbfree, 1); 188 uspi->cs_total.cs_nbfree++;
195 fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nbfree, 1); 189 fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nbfree, 1);
196 cylno = ufs_cbtocylno(i); 190 cylno = ufs_cbtocylno(i);
197 fs16_add(sb, &ubh_cg_blks(ucpi, cylno, ufs_cbtorpos(i)), 1); 191 fs16_add(sb, &ubh_cg_blks(ucpi, cylno, ufs_cbtorpos(i)), 1);
198 fs32_add(sb, &ubh_cg_blktot(ucpi, cylno), 1); 192 fs32_add(sb, &ubh_cg_blktot(ucpi, cylno), 1);
199 } 193 }
200 194
201 ubh_mark_buffer_dirty (USPI_UBH); 195 ubh_mark_buffer_dirty (USPI_UBH(uspi));
202 ubh_mark_buffer_dirty (UCPI_UBH); 196 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
203 if (sb->s_flags & MS_SYNCHRONOUS) { 197 if (sb->s_flags & MS_SYNCHRONOUS) {
204 ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi); 198 ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
205 ubh_wait_on_buffer (UCPI_UBH); 199 ubh_wait_on_buffer (UCPI_UBH(ucpi));
206 } 200 }
207 201
208 if (overflow) { 202 if (overflow) {
@@ -213,38 +207,83 @@ do_more:
213 207
214 sb->s_dirt = 1; 208 sb->s_dirt = 1;
215 unlock_super (sb); 209 unlock_super (sb);
216 UFSD(("EXIT\n")) 210 UFSD("EXIT\n");
217 return; 211 return;
218 212
219failed: 213failed_unlock:
220 unlock_super (sb); 214 unlock_super (sb);
221 UFSD(("EXIT (FAILED)\n")) 215failed:
216 UFSD("EXIT (FAILED)\n");
222 return; 217 return;
223} 218}
224 219
220/*
221 * Modify inode page cache in such way:
222 * have - blocks with b_blocknr equal to oldb...oldb+count-1
223 * get - blocks with b_blocknr equal to newb...newb+count-1
224 * also we suppose that oldb...oldb+count-1 blocks
225 * situated at the end of file.
226 *
227 * We can come here from ufs_writepage or ufs_prepare_write,
228 * locked_page is argument of these functions, so we already lock it.
229 */
230static void ufs_change_blocknr(struct inode *inode, unsigned int baseblk,
231 unsigned int count, unsigned int oldb,
232 unsigned int newb, struct page *locked_page)
233{
234 unsigned int blk_per_page = 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits);
235 struct address_space *mapping = inode->i_mapping;
236 pgoff_t index, cur_index = locked_page->index;
237 unsigned int i, j;
238 struct page *page;
239 struct buffer_head *head, *bh;
240
241 UFSD("ENTER, ino %lu, count %u, oldb %u, newb %u\n",
242 inode->i_ino, count, oldb, newb);
243
244 BUG_ON(!PageLocked(locked_page));
245
246 for (i = 0; i < count; i += blk_per_page) {
247 index = (baseblk+i) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
248
249 if (likely(cur_index != index)) {
250 page = ufs_get_locked_page(mapping, index);
251 if (IS_ERR(page))
252 continue;
253 } else
254 page = locked_page;
255
256 j = i;
257 head = page_buffers(page);
258 bh = head;
259 do {
260 if (likely(bh->b_blocknr == j + oldb && j < count)) {
261 unmap_underlying_metadata(bh->b_bdev,
262 bh->b_blocknr);
263 bh->b_blocknr = newb + j++;
264 mark_buffer_dirty(bh);
265 }
225 266
267 bh = bh->b_this_page;
268 } while (bh != head);
226 269
227#define NULLIFY_FRAGMENTS \ 270 set_page_dirty(page);
228 for (i = oldcount; i < newcount; i++) { \ 271
229 bh = sb_getblk(sb, result + i); \ 272 if (likely(cur_index != index))
230 memset (bh->b_data, 0, sb->s_blocksize); \ 273 ufs_put_locked_page(page);
231 set_buffer_uptodate(bh); \ 274 }
232 mark_buffer_dirty (bh); \ 275 UFSD("EXIT\n");
233 if (IS_SYNC(inode)) \ 276}
234 sync_dirty_buffer(bh); \
235 brelse (bh); \
236 }
237 277
238unsigned ufs_new_fragments (struct inode * inode, __fs32 * p, unsigned fragment, 278unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
239 unsigned goal, unsigned count, int * err ) 279 unsigned goal, unsigned count, int * err, struct page *locked_page)
240{ 280{
241 struct super_block * sb; 281 struct super_block * sb;
242 struct ufs_sb_private_info * uspi; 282 struct ufs_sb_private_info * uspi;
243 struct ufs_super_block_first * usb1; 283 struct ufs_super_block_first * usb1;
244 struct buffer_head * bh; 284 unsigned cgno, oldcount, newcount, tmp, request, result;
245 unsigned cgno, oldcount, newcount, tmp, request, i, result;
246 285
247 UFSD(("ENTER, ino %lu, fragment %u, goal %u, count %u\n", inode->i_ino, fragment, goal, count)) 286 UFSD("ENTER, ino %lu, fragment %u, goal %u, count %u\n", inode->i_ino, fragment, goal, count);
248 287
249 sb = inode->i_sb; 288 sb = inode->i_sb;
250 uspi = UFS_SB(sb)->s_uspi; 289 uspi = UFS_SB(sb)->s_uspi;
@@ -273,14 +312,14 @@ unsigned ufs_new_fragments (struct inode * inode, __fs32 * p, unsigned fragment,
273 return (unsigned)-1; 312 return (unsigned)-1;
274 } 313 }
275 if (fragment < UFS_I(inode)->i_lastfrag) { 314 if (fragment < UFS_I(inode)->i_lastfrag) {
276 UFSD(("EXIT (ALREADY ALLOCATED)\n")) 315 UFSD("EXIT (ALREADY ALLOCATED)\n");
277 unlock_super (sb); 316 unlock_super (sb);
278 return 0; 317 return 0;
279 } 318 }
280 } 319 }
281 else { 320 else {
282 if (tmp) { 321 if (tmp) {
283 UFSD(("EXIT (ALREADY ALLOCATED)\n")) 322 UFSD("EXIT (ALREADY ALLOCATED)\n");
284 unlock_super(sb); 323 unlock_super(sb);
285 return 0; 324 return 0;
286 } 325 }
@@ -289,9 +328,9 @@ unsigned ufs_new_fragments (struct inode * inode, __fs32 * p, unsigned fragment,
289 /* 328 /*
290 * There is not enough space for user on the device 329 * There is not enough space for user on the device
291 */ 330 */
292 if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(usb1, UFS_MINFREE) <= 0) { 331 if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) {
293 unlock_super (sb); 332 unlock_super (sb);
294 UFSD(("EXIT (FAILED)\n")) 333 UFSD("EXIT (FAILED)\n");
295 return 0; 334 return 0;
296 } 335 }
297 336
@@ -310,12 +349,10 @@ unsigned ufs_new_fragments (struct inode * inode, __fs32 * p, unsigned fragment,
310 if (result) { 349 if (result) {
311 *p = cpu_to_fs32(sb, result); 350 *p = cpu_to_fs32(sb, result);
312 *err = 0; 351 *err = 0;
313 inode->i_blocks += count << uspi->s_nspfshift;
314 UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count); 352 UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count);
315 NULLIFY_FRAGMENTS
316 } 353 }
317 unlock_super(sb); 354 unlock_super(sb);
318 UFSD(("EXIT, result %u\n", result)) 355 UFSD("EXIT, result %u\n", result);
319 return result; 356 return result;
320 } 357 }
321 358
@@ -325,11 +362,9 @@ unsigned ufs_new_fragments (struct inode * inode, __fs32 * p, unsigned fragment,
325 result = ufs_add_fragments (inode, tmp, oldcount, newcount, err); 362 result = ufs_add_fragments (inode, tmp, oldcount, newcount, err);
326 if (result) { 363 if (result) {
327 *err = 0; 364 *err = 0;
328 inode->i_blocks += count << uspi->s_nspfshift;
329 UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count); 365 UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count);
330 NULLIFY_FRAGMENTS
331 unlock_super(sb); 366 unlock_super(sb);
332 UFSD(("EXIT, result %u\n", result)) 367 UFSD("EXIT, result %u\n", result);
333 return result; 368 return result;
334 } 369 }
335 370
@@ -339,8 +374,8 @@ unsigned ufs_new_fragments (struct inode * inode, __fs32 * p, unsigned fragment,
339 switch (fs32_to_cpu(sb, usb1->fs_optim)) { 374 switch (fs32_to_cpu(sb, usb1->fs_optim)) {
340 case UFS_OPTSPACE: 375 case UFS_OPTSPACE:
341 request = newcount; 376 request = newcount;
342 if (uspi->s_minfree < 5 || fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree) 377 if (uspi->s_minfree < 5 || uspi->cs_total.cs_nffree
343 > uspi->s_dsize * uspi->s_minfree / (2 * 100) ) 378 > uspi->s_dsize * uspi->s_minfree / (2 * 100))
344 break; 379 break;
345 usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME); 380 usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
346 break; 381 break;
@@ -349,7 +384,7 @@ unsigned ufs_new_fragments (struct inode * inode, __fs32 * p, unsigned fragment,
349 384
350 case UFS_OPTTIME: 385 case UFS_OPTTIME:
351 request = uspi->s_fpb; 386 request = uspi->s_fpb;
352 if (fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree) < uspi->s_dsize * 387 if (uspi->cs_total.cs_nffree < uspi->s_dsize *
353 (uspi->s_minfree - 2) / 100) 388 (uspi->s_minfree - 2) / 100)
354 break; 389 break;
355 usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME); 390 usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
@@ -357,39 +392,22 @@ unsigned ufs_new_fragments (struct inode * inode, __fs32 * p, unsigned fragment,
357 } 392 }
358 result = ufs_alloc_fragments (inode, cgno, goal, request, err); 393 result = ufs_alloc_fragments (inode, cgno, goal, request, err);
359 if (result) { 394 if (result) {
360 for (i = 0; i < oldcount; i++) { 395 ufs_change_blocknr(inode, fragment - oldcount, oldcount, tmp,
361 bh = sb_bread(sb, tmp + i); 396 result, locked_page);
362 if(bh) 397
363 {
364 clear_buffer_dirty(bh);
365 bh->b_blocknr = result + i;
366 mark_buffer_dirty (bh);
367 if (IS_SYNC(inode))
368 sync_dirty_buffer(bh);
369 brelse (bh);
370 }
371 else
372 {
373 printk(KERN_ERR "ufs_new_fragments: bread fail\n");
374 unlock_super(sb);
375 return 0;
376 }
377 }
378 *p = cpu_to_fs32(sb, result); 398 *p = cpu_to_fs32(sb, result);
379 *err = 0; 399 *err = 0;
380 inode->i_blocks += count << uspi->s_nspfshift;
381 UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count); 400 UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count);
382 NULLIFY_FRAGMENTS
383 unlock_super(sb); 401 unlock_super(sb);
384 if (newcount < request) 402 if (newcount < request)
385 ufs_free_fragments (inode, result + newcount, request - newcount); 403 ufs_free_fragments (inode, result + newcount, request - newcount);
386 ufs_free_fragments (inode, tmp, oldcount); 404 ufs_free_fragments (inode, tmp, oldcount);
387 UFSD(("EXIT, result %u\n", result)) 405 UFSD("EXIT, result %u\n", result);
388 return result; 406 return result;
389 } 407 }
390 408
391 unlock_super(sb); 409 unlock_super(sb);
392 UFSD(("EXIT (FAILED)\n")) 410 UFSD("EXIT (FAILED)\n");
393 return 0; 411 return 0;
394} 412}
395 413
@@ -404,7 +422,7 @@ ufs_add_fragments (struct inode * inode, unsigned fragment,
404 struct ufs_cylinder_group * ucg; 422 struct ufs_cylinder_group * ucg;
405 unsigned cgno, fragno, fragoff, count, fragsize, i; 423 unsigned cgno, fragno, fragoff, count, fragsize, i;
406 424
407 UFSD(("ENTER, fragment %u, oldcount %u, newcount %u\n", fragment, oldcount, newcount)) 425 UFSD("ENTER, fragment %u, oldcount %u, newcount %u\n", fragment, oldcount, newcount);
408 426
409 sb = inode->i_sb; 427 sb = inode->i_sb;
410 uspi = UFS_SB(sb)->s_uspi; 428 uspi = UFS_SB(sb)->s_uspi;
@@ -419,7 +437,7 @@ ufs_add_fragments (struct inode * inode, unsigned fragment,
419 ucpi = ufs_load_cylinder (sb, cgno); 437 ucpi = ufs_load_cylinder (sb, cgno);
420 if (!ucpi) 438 if (!ucpi)
421 return 0; 439 return 0;
422 ucg = ubh_get_ucg (UCPI_UBH); 440 ucg = ubh_get_ucg (UCPI_UBH(ucpi));
423 if (!ufs_cg_chkmagic(sb, ucg)) { 441 if (!ufs_cg_chkmagic(sb, ucg)) {
424 ufs_panic (sb, "ufs_add_fragments", 442 ufs_panic (sb, "ufs_add_fragments",
425 "internal error, bad magic number on cg %u", cgno); 443 "internal error, bad magic number on cg %u", cgno);
@@ -429,14 +447,14 @@ ufs_add_fragments (struct inode * inode, unsigned fragment,
429 fragno = ufs_dtogd (fragment); 447 fragno = ufs_dtogd (fragment);
430 fragoff = ufs_fragnum (fragno); 448 fragoff = ufs_fragnum (fragno);
431 for (i = oldcount; i < newcount; i++) 449 for (i = oldcount; i < newcount; i++)
432 if (ubh_isclr (UCPI_UBH, ucpi->c_freeoff, fragno + i)) 450 if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i))
433 return 0; 451 return 0;
434 /* 452 /*
435 * Block can be extended 453 * Block can be extended
436 */ 454 */
437 ucg->cg_time = cpu_to_fs32(sb, get_seconds()); 455 ucg->cg_time = cpu_to_fs32(sb, get_seconds());
438 for (i = newcount; i < (uspi->s_fpb - fragoff); i++) 456 for (i = newcount; i < (uspi->s_fpb - fragoff); i++)
439 if (ubh_isclr (UCPI_UBH, ucpi->c_freeoff, fragno + i)) 457 if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i))
440 break; 458 break;
441 fragsize = i - oldcount; 459 fragsize = i - oldcount;
442 if (!fs32_to_cpu(sb, ucg->cg_frsum[fragsize])) 460 if (!fs32_to_cpu(sb, ucg->cg_frsum[fragsize]))
@@ -446,7 +464,7 @@ ufs_add_fragments (struct inode * inode, unsigned fragment,
446 if (fragsize != count) 464 if (fragsize != count)
447 fs32_add(sb, &ucg->cg_frsum[fragsize - count], 1); 465 fs32_add(sb, &ucg->cg_frsum[fragsize - count], 1);
448 for (i = oldcount; i < newcount; i++) 466 for (i = oldcount; i < newcount; i++)
449 ubh_clrbit (UCPI_UBH, ucpi->c_freeoff, fragno + i); 467 ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i);
450 if(DQUOT_ALLOC_BLOCK(inode, count)) { 468 if(DQUOT_ALLOC_BLOCK(inode, count)) {
451 *err = -EDQUOT; 469 *err = -EDQUOT;
452 return 0; 470 return 0;
@@ -454,17 +472,17 @@ ufs_add_fragments (struct inode * inode, unsigned fragment,
454 472
455 fs32_sub(sb, &ucg->cg_cs.cs_nffree, count); 473 fs32_sub(sb, &ucg->cg_cs.cs_nffree, count);
456 fs32_sub(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count); 474 fs32_sub(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count);
457 fs32_sub(sb, &usb1->fs_cstotal.cs_nffree, count); 475 uspi->cs_total.cs_nffree -= count;
458 476
459 ubh_mark_buffer_dirty (USPI_UBH); 477 ubh_mark_buffer_dirty (USPI_UBH(uspi));
460 ubh_mark_buffer_dirty (UCPI_UBH); 478 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
461 if (sb->s_flags & MS_SYNCHRONOUS) { 479 if (sb->s_flags & MS_SYNCHRONOUS) {
462 ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi); 480 ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
463 ubh_wait_on_buffer (UCPI_UBH); 481 ubh_wait_on_buffer (UCPI_UBH(ucpi));
464 } 482 }
465 sb->s_dirt = 1; 483 sb->s_dirt = 1;
466 484
467 UFSD(("EXIT, fragment %u\n", fragment)) 485 UFSD("EXIT, fragment %u\n", fragment);
468 486
469 return fragment; 487 return fragment;
470} 488}
@@ -487,7 +505,7 @@ static unsigned ufs_alloc_fragments (struct inode * inode, unsigned cgno,
487 struct ufs_cylinder_group * ucg; 505 struct ufs_cylinder_group * ucg;
488 unsigned oldcg, i, j, k, result, allocsize; 506 unsigned oldcg, i, j, k, result, allocsize;
489 507
490 UFSD(("ENTER, ino %lu, cgno %u, goal %u, count %u\n", inode->i_ino, cgno, goal, count)) 508 UFSD("ENTER, ino %lu, cgno %u, goal %u, count %u\n", inode->i_ino, cgno, goal, count);
491 509
492 sb = inode->i_sb; 510 sb = inode->i_sb;
493 uspi = UFS_SB(sb)->s_uspi; 511 uspi = UFS_SB(sb)->s_uspi;
@@ -521,14 +539,14 @@ static unsigned ufs_alloc_fragments (struct inode * inode, unsigned cgno,
521 UFS_TEST_FREE_SPACE_CG 539 UFS_TEST_FREE_SPACE_CG
522 } 540 }
523 541
524 UFSD(("EXIT (FAILED)\n")) 542 UFSD("EXIT (FAILED)\n");
525 return 0; 543 return 0;
526 544
527cg_found: 545cg_found:
528 ucpi = ufs_load_cylinder (sb, cgno); 546 ucpi = ufs_load_cylinder (sb, cgno);
529 if (!ucpi) 547 if (!ucpi)
530 return 0; 548 return 0;
531 ucg = ubh_get_ucg (UCPI_UBH); 549 ucg = ubh_get_ucg (UCPI_UBH(ucpi));
532 if (!ufs_cg_chkmagic(sb, ucg)) 550 if (!ufs_cg_chkmagic(sb, ucg))
533 ufs_panic (sb, "ufs_alloc_fragments", 551 ufs_panic (sb, "ufs_alloc_fragments",
534 "internal error, bad magic number on cg %u", cgno); 552 "internal error, bad magic number on cg %u", cgno);
@@ -551,12 +569,12 @@ cg_found:
551 return 0; 569 return 0;
552 goal = ufs_dtogd (result); 570 goal = ufs_dtogd (result);
553 for (i = count; i < uspi->s_fpb; i++) 571 for (i = count; i < uspi->s_fpb; i++)
554 ubh_setbit (UCPI_UBH, ucpi->c_freeoff, goal + i); 572 ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i);
555 i = uspi->s_fpb - count; 573 i = uspi->s_fpb - count;
556 DQUOT_FREE_BLOCK(inode, i); 574 DQUOT_FREE_BLOCK(inode, i);
557 575
558 fs32_add(sb, &ucg->cg_cs.cs_nffree, i); 576 fs32_add(sb, &ucg->cg_cs.cs_nffree, i);
559 fs32_add(sb, &usb1->fs_cstotal.cs_nffree, i); 577 uspi->cs_total.cs_nffree += i;
560 fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, i); 578 fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, i);
561 fs32_add(sb, &ucg->cg_frsum[i], 1); 579 fs32_add(sb, &ucg->cg_frsum[i], 1);
562 goto succed; 580 goto succed;
@@ -570,10 +588,10 @@ cg_found:
570 return 0; 588 return 0;
571 } 589 }
572 for (i = 0; i < count; i++) 590 for (i = 0; i < count; i++)
573 ubh_clrbit (UCPI_UBH, ucpi->c_freeoff, result + i); 591 ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, result + i);
574 592
575 fs32_sub(sb, &ucg->cg_cs.cs_nffree, count); 593 fs32_sub(sb, &ucg->cg_cs.cs_nffree, count);
576 fs32_sub(sb, &usb1->fs_cstotal.cs_nffree, count); 594 uspi->cs_total.cs_nffree -= count;
577 fs32_sub(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count); 595 fs32_sub(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count);
578 fs32_sub(sb, &ucg->cg_frsum[allocsize], 1); 596 fs32_sub(sb, &ucg->cg_frsum[allocsize], 1);
579 597
@@ -581,16 +599,16 @@ cg_found:
581 fs32_add(sb, &ucg->cg_frsum[allocsize - count], 1); 599 fs32_add(sb, &ucg->cg_frsum[allocsize - count], 1);
582 600
583succed: 601succed:
584 ubh_mark_buffer_dirty (USPI_UBH); 602 ubh_mark_buffer_dirty (USPI_UBH(uspi));
585 ubh_mark_buffer_dirty (UCPI_UBH); 603 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
586 if (sb->s_flags & MS_SYNCHRONOUS) { 604 if (sb->s_flags & MS_SYNCHRONOUS) {
587 ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi); 605 ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
588 ubh_wait_on_buffer (UCPI_UBH); 606 ubh_wait_on_buffer (UCPI_UBH(ucpi));
589 } 607 }
590 sb->s_dirt = 1; 608 sb->s_dirt = 1;
591 609
592 result += cgno * uspi->s_fpg; 610 result += cgno * uspi->s_fpg;
593 UFSD(("EXIT3, result %u\n", result)) 611 UFSD("EXIT3, result %u\n", result);
594 return result; 612 return result;
595} 613}
596 614
@@ -603,12 +621,12 @@ static unsigned ufs_alloccg_block (struct inode * inode,
603 struct ufs_cylinder_group * ucg; 621 struct ufs_cylinder_group * ucg;
604 unsigned result, cylno, blkno; 622 unsigned result, cylno, blkno;
605 623
606 UFSD(("ENTER, goal %u\n", goal)) 624 UFSD("ENTER, goal %u\n", goal);
607 625
608 sb = inode->i_sb; 626 sb = inode->i_sb;
609 uspi = UFS_SB(sb)->s_uspi; 627 uspi = UFS_SB(sb)->s_uspi;
610 usb1 = ubh_get_usb_first(uspi); 628 usb1 = ubh_get_usb_first(uspi);
611 ucg = ubh_get_ucg(UCPI_UBH); 629 ucg = ubh_get_ucg(UCPI_UBH(ucpi));
612 630
613 if (goal == 0) { 631 if (goal == 0) {
614 goal = ucpi->c_rotor; 632 goal = ucpi->c_rotor;
@@ -620,7 +638,7 @@ static unsigned ufs_alloccg_block (struct inode * inode,
620 /* 638 /*
621 * If the requested block is available, use it. 639 * If the requested block is available, use it.
622 */ 640 */
623 if (ubh_isblockset(UCPI_UBH, ucpi->c_freeoff, ufs_fragstoblks(goal))) { 641 if (ubh_isblockset(UCPI_UBH(ucpi), ucpi->c_freeoff, ufs_fragstoblks(goal))) {
624 result = goal; 642 result = goal;
625 goto gotit; 643 goto gotit;
626 } 644 }
@@ -632,7 +650,7 @@ norot:
632 ucpi->c_rotor = result; 650 ucpi->c_rotor = result;
633gotit: 651gotit:
634 blkno = ufs_fragstoblks(result); 652 blkno = ufs_fragstoblks(result);
635 ubh_clrblock (UCPI_UBH, ucpi->c_freeoff, blkno); 653 ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
636 if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) 654 if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
637 ufs_clusteracct (sb, ucpi, blkno, -1); 655 ufs_clusteracct (sb, ucpi, blkno, -1);
638 if(DQUOT_ALLOC_BLOCK(inode, uspi->s_fpb)) { 656 if(DQUOT_ALLOC_BLOCK(inode, uspi->s_fpb)) {
@@ -641,31 +659,76 @@ gotit:
641 } 659 }
642 660
643 fs32_sub(sb, &ucg->cg_cs.cs_nbfree, 1); 661 fs32_sub(sb, &ucg->cg_cs.cs_nbfree, 1);
644 fs32_sub(sb, &usb1->fs_cstotal.cs_nbfree, 1); 662 uspi->cs_total.cs_nbfree--;
645 fs32_sub(sb, &UFS_SB(sb)->fs_cs(ucpi->c_cgx).cs_nbfree, 1); 663 fs32_sub(sb, &UFS_SB(sb)->fs_cs(ucpi->c_cgx).cs_nbfree, 1);
646 cylno = ufs_cbtocylno(result); 664 cylno = ufs_cbtocylno(result);
647 fs16_sub(sb, &ubh_cg_blks(ucpi, cylno, ufs_cbtorpos(result)), 1); 665 fs16_sub(sb, &ubh_cg_blks(ucpi, cylno, ufs_cbtorpos(result)), 1);
648 fs32_sub(sb, &ubh_cg_blktot(ucpi, cylno), 1); 666 fs32_sub(sb, &ubh_cg_blktot(ucpi, cylno), 1);
649 667
650 UFSD(("EXIT, result %u\n", result)) 668 UFSD("EXIT, result %u\n", result);
651 669
652 return result; 670 return result;
653} 671}
654 672
655static unsigned ufs_bitmap_search (struct super_block * sb, 673static unsigned ubh_scanc(struct ufs_sb_private_info *uspi,
656 struct ufs_cg_private_info * ucpi, unsigned goal, unsigned count) 674 struct ufs_buffer_head *ubh,
675 unsigned begin, unsigned size,
676 unsigned char *table, unsigned char mask)
657{ 677{
658 struct ufs_sb_private_info * uspi; 678 unsigned rest, offset;
659 struct ufs_super_block_first * usb1; 679 unsigned char *cp;
660 struct ufs_cylinder_group * ucg; 680
661 unsigned start, length, location, result; 681
662 unsigned possition, fragsize, blockmap, mask; 682 offset = begin & ~uspi->s_fmask;
663 683 begin >>= uspi->s_fshift;
664 UFSD(("ENTER, cg %u, goal %u, count %u\n", ucpi->c_cgx, goal, count)) 684 for (;;) {
685 if ((offset + size) < uspi->s_fsize)
686 rest = size;
687 else
688 rest = uspi->s_fsize - offset;
689 size -= rest;
690 cp = ubh->bh[begin]->b_data + offset;
691 while ((table[*cp++] & mask) == 0 && --rest)
692 ;
693 if (rest || !size)
694 break;
695 begin++;
696 offset = 0;
697 }
698 return (size + rest);
699}
700
701/*
702 * Find a block of the specified size in the specified cylinder group.
703 * @sp: pointer to super block
704 * @ucpi: pointer to cylinder group info
705 * @goal: near which block we want find new one
706 * @count: specified size
707 */
708static unsigned ufs_bitmap_search(struct super_block *sb,
709 struct ufs_cg_private_info *ucpi,
710 unsigned goal, unsigned count)
711{
712 /*
713 * Bit patterns for identifying fragments in the block map
714 * used as ((map & mask_arr) == want_arr)
715 */
716 static const int mask_arr[9] = {
717 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff, 0x3ff
718 };
719 static const int want_arr[9] = {
720 0x0, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe
721 };
722 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
723 struct ufs_super_block_first *usb1;
724 struct ufs_cylinder_group *ucg;
725 unsigned start, length, loc, result;
726 unsigned pos, want, blockmap, mask, end;
727
728 UFSD("ENTER, cg %u, goal %u, count %u\n", ucpi->c_cgx, goal, count);
665 729
666 uspi = UFS_SB(sb)->s_uspi;
667 usb1 = ubh_get_usb_first (uspi); 730 usb1 = ubh_get_usb_first (uspi);
668 ucg = ubh_get_ucg(UCPI_UBH); 731 ucg = ubh_get_ucg(UCPI_UBH(ucpi));
669 732
670 if (goal) 733 if (goal)
671 start = ufs_dtogd(goal) >> 3; 734 start = ufs_dtogd(goal) >> 3;
@@ -673,53 +736,50 @@ static unsigned ufs_bitmap_search (struct super_block * sb,
673 start = ucpi->c_frotor >> 3; 736 start = ucpi->c_frotor >> 3;
674 737
675 length = ((uspi->s_fpg + 7) >> 3) - start; 738 length = ((uspi->s_fpg + 7) >> 3) - start;
676 location = ubh_scanc(UCPI_UBH, ucpi->c_freeoff + start, length, 739 loc = ubh_scanc(uspi, UCPI_UBH(ucpi), ucpi->c_freeoff + start, length,
677 (uspi->s_fpb == 8) ? ufs_fragtable_8fpb : ufs_fragtable_other, 740 (uspi->s_fpb == 8) ? ufs_fragtable_8fpb : ufs_fragtable_other,
678 1 << (count - 1 + (uspi->s_fpb & 7))); 741 1 << (count - 1 + (uspi->s_fpb & 7)));
679 if (location == 0) { 742 if (loc == 0) {
680 length = start + 1; 743 length = start + 1;
681 location = ubh_scanc(UCPI_UBH, ucpi->c_freeoff, length, 744 loc = ubh_scanc(uspi, UCPI_UBH(ucpi), ucpi->c_freeoff, length,
682 (uspi->s_fpb == 8) ? ufs_fragtable_8fpb : ufs_fragtable_other, 745 (uspi->s_fpb == 8) ? ufs_fragtable_8fpb :
683 1 << (count - 1 + (uspi->s_fpb & 7))); 746 ufs_fragtable_other,
684 if (location == 0) { 747 1 << (count - 1 + (uspi->s_fpb & 7)));
685 ufs_error (sb, "ufs_bitmap_search", 748 if (loc == 0) {
686 "bitmap corrupted on cg %u, start %u, length %u, count %u, freeoff %u\n", 749 ufs_error(sb, "ufs_bitmap_search",
687 ucpi->c_cgx, start, length, count, ucpi->c_freeoff); 750 "bitmap corrupted on cg %u, start %u,"
751 " length %u, count %u, freeoff %u\n",
752 ucpi->c_cgx, start, length, count,
753 ucpi->c_freeoff);
688 return (unsigned)-1; 754 return (unsigned)-1;
689 } 755 }
690 start = 0; 756 start = 0;
691 } 757 }
692 result = (start + length - location) << 3; 758 result = (start + length - loc) << 3;
693 ucpi->c_frotor = result; 759 ucpi->c_frotor = result;
694 760
695 /* 761 /*
696 * found the byte in the map 762 * found the byte in the map
697 */ 763 */
698 blockmap = ubh_blkmap(UCPI_UBH, ucpi->c_freeoff, result); 764
699 fragsize = 0; 765 for (end = result + 8; result < end; result += uspi->s_fpb) {
700 for (possition = 0, mask = 1; possition < 8; possition++, mask <<= 1) { 766 blockmap = ubh_blkmap(UCPI_UBH(ucpi), ucpi->c_freeoff, result);
701 if (blockmap & mask) { 767 blockmap <<= 1;
702 if (!(possition & uspi->s_fpbmask)) 768 mask = mask_arr[count];
703 fragsize = 1; 769 want = want_arr[count];
704 else 770 for (pos = 0; pos <= uspi->s_fpb - count; pos++) {
705 fragsize++; 771 if ((blockmap & mask) == want) {
706 } 772 UFSD("EXIT, result %u\n", result);
707 else { 773 return result + pos;
708 if (fragsize == count) { 774 }
709 result += possition - count; 775 mask <<= 1;
710 UFSD(("EXIT, result %u\n", result)) 776 want <<= 1;
711 return result; 777 }
712 } 778 }
713 fragsize = 0; 779
714 } 780 ufs_error(sb, "ufs_bitmap_search", "block not in map on cg %u\n",
715 } 781 ucpi->c_cgx);
716 if (fragsize == count) { 782 UFSD("EXIT (FAILED)\n");
717 result += possition - count;
718 UFSD(("EXIT, result %u\n", result))
719 return result;
720 }
721 ufs_error (sb, "ufs_bitmap_search", "block not in map on cg %u\n", ucpi->c_cgx);
722 UFSD(("EXIT (FAILED)\n"))
723 return (unsigned)-1; 783 return (unsigned)-1;
724} 784}
725 785
@@ -734,9 +794,9 @@ static void ufs_clusteracct(struct super_block * sb,
734 return; 794 return;
735 795
736 if (cnt > 0) 796 if (cnt > 0)
737 ubh_setbit(UCPI_UBH, ucpi->c_clusteroff, blkno); 797 ubh_setbit(UCPI_UBH(ucpi), ucpi->c_clusteroff, blkno);
738 else 798 else
739 ubh_clrbit(UCPI_UBH, ucpi->c_clusteroff, blkno); 799 ubh_clrbit(UCPI_UBH(ucpi), ucpi->c_clusteroff, blkno);
740 800
741 /* 801 /*
742 * Find the size of the cluster going forward. 802 * Find the size of the cluster going forward.
@@ -745,7 +805,7 @@ static void ufs_clusteracct(struct super_block * sb,
745 end = start + uspi->s_contigsumsize; 805 end = start + uspi->s_contigsumsize;
746 if ( end >= ucpi->c_nclusterblks) 806 if ( end >= ucpi->c_nclusterblks)
747 end = ucpi->c_nclusterblks; 807 end = ucpi->c_nclusterblks;
748 i = ubh_find_next_zero_bit (UCPI_UBH, ucpi->c_clusteroff, end, start); 808 i = ubh_find_next_zero_bit (UCPI_UBH(ucpi), ucpi->c_clusteroff, end, start);
749 if (i > end) 809 if (i > end)
750 i = end; 810 i = end;
751 forw = i - start; 811 forw = i - start;
@@ -757,7 +817,7 @@ static void ufs_clusteracct(struct super_block * sb,
757 end = start - uspi->s_contigsumsize; 817 end = start - uspi->s_contigsumsize;
758 if (end < 0 ) 818 if (end < 0 )
759 end = -1; 819 end = -1;
760 i = ubh_find_last_zero_bit (UCPI_UBH, ucpi->c_clusteroff, start, end); 820 i = ubh_find_last_zero_bit (UCPI_UBH(ucpi), ucpi->c_clusteroff, start, end);
761 if ( i < end) 821 if ( i < end)
762 i = end; 822 i = end;
763 back = start - i; 823 back = start - i;
@@ -769,11 +829,11 @@ static void ufs_clusteracct(struct super_block * sb,
769 i = back + forw + 1; 829 i = back + forw + 1;
770 if (i > uspi->s_contigsumsize) 830 if (i > uspi->s_contigsumsize)
771 i = uspi->s_contigsumsize; 831 i = uspi->s_contigsumsize;
772 fs32_add(sb, (__fs32*)ubh_get_addr(UCPI_UBH, ucpi->c_clustersumoff + (i << 2)), cnt); 832 fs32_add(sb, (__fs32*)ubh_get_addr(UCPI_UBH(ucpi), ucpi->c_clustersumoff + (i << 2)), cnt);
773 if (back > 0) 833 if (back > 0)
774 fs32_sub(sb, (__fs32*)ubh_get_addr(UCPI_UBH, ucpi->c_clustersumoff + (back << 2)), cnt); 834 fs32_sub(sb, (__fs32*)ubh_get_addr(UCPI_UBH(ucpi), ucpi->c_clustersumoff + (back << 2)), cnt);
775 if (forw > 0) 835 if (forw > 0)
776 fs32_sub(sb, (__fs32*)ubh_get_addr(UCPI_UBH, ucpi->c_clustersumoff + (forw << 2)), cnt); 836 fs32_sub(sb, (__fs32*)ubh_get_addr(UCPI_UBH(ucpi), ucpi->c_clustersumoff + (forw << 2)), cnt);
777} 837}
778 838
779 839
diff --git a/fs/ufs/cylinder.c b/fs/ufs/cylinder.c
index 14abb8b835f7..09c39e5e6386 100644
--- a/fs/ufs/cylinder.c
+++ b/fs/ufs/cylinder.c
@@ -20,15 +20,6 @@
20#include "swab.h" 20#include "swab.h"
21#include "util.h" 21#include "util.h"
22 22
23#undef UFS_CYLINDER_DEBUG
24
25#ifdef UFS_CYLINDER_DEBUG
26#define UFSD(x) printk("(%s, %d), %s:", __FILE__, __LINE__, __FUNCTION__); printk x;
27#else
28#define UFSD(x)
29#endif
30
31
32/* 23/*
33 * Read cylinder group into cache. The memory space for ufs_cg_private_info 24 * Read cylinder group into cache. The memory space for ufs_cg_private_info
34 * structure is already allocated during ufs_read_super. 25 * structure is already allocated during ufs_read_super.
@@ -42,19 +33,19 @@ static void ufs_read_cylinder (struct super_block * sb,
42 struct ufs_cylinder_group * ucg; 33 struct ufs_cylinder_group * ucg;
43 unsigned i, j; 34 unsigned i, j;
44 35
45 UFSD(("ENTER, cgno %u, bitmap_nr %u\n", cgno, bitmap_nr)) 36 UFSD("ENTER, cgno %u, bitmap_nr %u\n", cgno, bitmap_nr);
46 uspi = sbi->s_uspi; 37 uspi = sbi->s_uspi;
47 ucpi = sbi->s_ucpi[bitmap_nr]; 38 ucpi = sbi->s_ucpi[bitmap_nr];
48 ucg = (struct ufs_cylinder_group *)sbi->s_ucg[cgno]->b_data; 39 ucg = (struct ufs_cylinder_group *)sbi->s_ucg[cgno]->b_data;
49 40
50 UCPI_UBH->fragment = ufs_cgcmin(cgno); 41 UCPI_UBH(ucpi)->fragment = ufs_cgcmin(cgno);
51 UCPI_UBH->count = uspi->s_cgsize >> sb->s_blocksize_bits; 42 UCPI_UBH(ucpi)->count = uspi->s_cgsize >> sb->s_blocksize_bits;
52 /* 43 /*
53 * We have already the first fragment of cylinder group block in buffer 44 * We have already the first fragment of cylinder group block in buffer
54 */ 45 */
55 UCPI_UBH->bh[0] = sbi->s_ucg[cgno]; 46 UCPI_UBH(ucpi)->bh[0] = sbi->s_ucg[cgno];
56 for (i = 1; i < UCPI_UBH->count; i++) 47 for (i = 1; i < UCPI_UBH(ucpi)->count; i++)
57 if (!(UCPI_UBH->bh[i] = sb_bread(sb, UCPI_UBH->fragment + i))) 48 if (!(UCPI_UBH(ucpi)->bh[i] = sb_bread(sb, UCPI_UBH(ucpi)->fragment + i)))
58 goto failed; 49 goto failed;
59 sbi->s_cgno[bitmap_nr] = cgno; 50 sbi->s_cgno[bitmap_nr] = cgno;
60 51
@@ -73,7 +64,7 @@ static void ufs_read_cylinder (struct super_block * sb,
73 ucpi->c_clustersumoff = fs32_to_cpu(sb, ucg->cg_u.cg_44.cg_clustersumoff); 64 ucpi->c_clustersumoff = fs32_to_cpu(sb, ucg->cg_u.cg_44.cg_clustersumoff);
74 ucpi->c_clusteroff = fs32_to_cpu(sb, ucg->cg_u.cg_44.cg_clusteroff); 65 ucpi->c_clusteroff = fs32_to_cpu(sb, ucg->cg_u.cg_44.cg_clusteroff);
75 ucpi->c_nclusterblks = fs32_to_cpu(sb, ucg->cg_u.cg_44.cg_nclusterblks); 66 ucpi->c_nclusterblks = fs32_to_cpu(sb, ucg->cg_u.cg_44.cg_nclusterblks);
76 UFSD(("EXIT\n")) 67 UFSD("EXIT\n");
77 return; 68 return;
78 69
79failed: 70failed:
@@ -95,15 +86,15 @@ void ufs_put_cylinder (struct super_block * sb, unsigned bitmap_nr)
95 struct ufs_cylinder_group * ucg; 86 struct ufs_cylinder_group * ucg;
96 unsigned i; 87 unsigned i;
97 88
98 UFSD(("ENTER, bitmap_nr %u\n", bitmap_nr)) 89 UFSD("ENTER, bitmap_nr %u\n", bitmap_nr);
99 90
100 uspi = sbi->s_uspi; 91 uspi = sbi->s_uspi;
101 if (sbi->s_cgno[bitmap_nr] == UFS_CGNO_EMPTY) { 92 if (sbi->s_cgno[bitmap_nr] == UFS_CGNO_EMPTY) {
102 UFSD(("EXIT\n")) 93 UFSD("EXIT\n");
103 return; 94 return;
104 } 95 }
105 ucpi = sbi->s_ucpi[bitmap_nr]; 96 ucpi = sbi->s_ucpi[bitmap_nr];
106 ucg = ubh_get_ucg(UCPI_UBH); 97 ucg = ubh_get_ucg(UCPI_UBH(ucpi));
107 98
108 if (uspi->s_ncg > UFS_MAX_GROUP_LOADED && bitmap_nr >= sbi->s_cg_loaded) { 99 if (uspi->s_ncg > UFS_MAX_GROUP_LOADED && bitmap_nr >= sbi->s_cg_loaded) {
109 ufs_panic (sb, "ufs_put_cylinder", "internal error"); 100 ufs_panic (sb, "ufs_put_cylinder", "internal error");
@@ -116,13 +107,13 @@ void ufs_put_cylinder (struct super_block * sb, unsigned bitmap_nr)
116 ucg->cg_rotor = cpu_to_fs32(sb, ucpi->c_rotor); 107 ucg->cg_rotor = cpu_to_fs32(sb, ucpi->c_rotor);
117 ucg->cg_frotor = cpu_to_fs32(sb, ucpi->c_frotor); 108 ucg->cg_frotor = cpu_to_fs32(sb, ucpi->c_frotor);
118 ucg->cg_irotor = cpu_to_fs32(sb, ucpi->c_irotor); 109 ucg->cg_irotor = cpu_to_fs32(sb, ucpi->c_irotor);
119 ubh_mark_buffer_dirty (UCPI_UBH); 110 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
120 for (i = 1; i < UCPI_UBH->count; i++) { 111 for (i = 1; i < UCPI_UBH(ucpi)->count; i++) {
121 brelse (UCPI_UBH->bh[i]); 112 brelse (UCPI_UBH(ucpi)->bh[i]);
122 } 113 }
123 114
124 sbi->s_cgno[bitmap_nr] = UFS_CGNO_EMPTY; 115 sbi->s_cgno[bitmap_nr] = UFS_CGNO_EMPTY;
125 UFSD(("EXIT\n")) 116 UFSD("EXIT\n");
126} 117}
127 118
128/* 119/*
@@ -139,7 +130,7 @@ struct ufs_cg_private_info * ufs_load_cylinder (
139 struct ufs_cg_private_info * ucpi; 130 struct ufs_cg_private_info * ucpi;
140 unsigned cg, i, j; 131 unsigned cg, i, j;
141 132
142 UFSD(("ENTER, cgno %u\n", cgno)) 133 UFSD("ENTER, cgno %u\n", cgno);
143 134
144 uspi = sbi->s_uspi; 135 uspi = sbi->s_uspi;
145 if (cgno >= uspi->s_ncg) { 136 if (cgno >= uspi->s_ncg) {
@@ -150,7 +141,7 @@ struct ufs_cg_private_info * ufs_load_cylinder (
150 * Cylinder group number cg it in cache and it was last used 141 * Cylinder group number cg it in cache and it was last used
151 */ 142 */
152 if (sbi->s_cgno[0] == cgno) { 143 if (sbi->s_cgno[0] == cgno) {
153 UFSD(("EXIT\n")) 144 UFSD("EXIT\n");
154 return sbi->s_ucpi[0]; 145 return sbi->s_ucpi[0];
155 } 146 }
156 /* 147 /*
@@ -160,16 +151,16 @@ struct ufs_cg_private_info * ufs_load_cylinder (
160 if (sbi->s_cgno[cgno] != UFS_CGNO_EMPTY) { 151 if (sbi->s_cgno[cgno] != UFS_CGNO_EMPTY) {
161 if (sbi->s_cgno[cgno] != cgno) { 152 if (sbi->s_cgno[cgno] != cgno) {
162 ufs_panic (sb, "ufs_load_cylinder", "internal error, wrong number of cg in cache"); 153 ufs_panic (sb, "ufs_load_cylinder", "internal error, wrong number of cg in cache");
163 UFSD(("EXIT (FAILED)\n")) 154 UFSD("EXIT (FAILED)\n");
164 return NULL; 155 return NULL;
165 } 156 }
166 else { 157 else {
167 UFSD(("EXIT\n")) 158 UFSD("EXIT\n");
168 return sbi->s_ucpi[cgno]; 159 return sbi->s_ucpi[cgno];
169 } 160 }
170 } else { 161 } else {
171 ufs_read_cylinder (sb, cgno, cgno); 162 ufs_read_cylinder (sb, cgno, cgno);
172 UFSD(("EXIT\n")) 163 UFSD("EXIT\n");
173 return sbi->s_ucpi[cgno]; 164 return sbi->s_ucpi[cgno];
174 } 165 }
175 } 166 }
@@ -204,6 +195,6 @@ struct ufs_cg_private_info * ufs_load_cylinder (
204 sbi->s_ucpi[0] = ucpi; 195 sbi->s_ucpi[0] = ucpi;
205 ufs_read_cylinder (sb, cgno, 0); 196 ufs_read_cylinder (sb, cgno, 0);
206 } 197 }
207 UFSD(("EXIT\n")) 198 UFSD("EXIT\n");
208 return sbi->s_ucpi[0]; 199 return sbi->s_ucpi[0];
209} 200}
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 1a561202d3f4..7f0a0aa63584 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -11,31 +11,20 @@
11 * 4.4BSD (FreeBSD) support added on February 1st 1998 by 11 * 4.4BSD (FreeBSD) support added on February 1st 1998 by
12 * Niels Kristian Bech Jensen <nkbj@image.dk> partially based 12 * Niels Kristian Bech Jensen <nkbj@image.dk> partially based
13 * on code by Martin von Loewis <martin@mira.isdn.cs.tu-berlin.de>. 13 * on code by Martin von Loewis <martin@mira.isdn.cs.tu-berlin.de>.
14 *
15 * Migration to usage of "page cache" on May 2006 by
16 * Evgeniy Dushistov <dushistov@mail.ru> based on ext2 code base.
14 */ 17 */
15 18
16#include <linux/time.h> 19#include <linux/time.h>
17#include <linux/fs.h> 20#include <linux/fs.h>
18#include <linux/ufs_fs.h> 21#include <linux/ufs_fs.h>
19#include <linux/smp_lock.h> 22#include <linux/smp_lock.h>
20#include <linux/buffer_head.h>
21#include <linux/sched.h> 23#include <linux/sched.h>
22 24
23#include "swab.h" 25#include "swab.h"
24#include "util.h" 26#include "util.h"
25 27
26#undef UFS_DIR_DEBUG
27
28#ifdef UFS_DIR_DEBUG
29#define UFSD(x) printk("(%s, %d), %s: ", __FILE__, __LINE__, __FUNCTION__); printk x;
30#else
31#define UFSD(x)
32#endif
33
34static int
35ufs_check_dir_entry (const char *, struct inode *, struct ufs_dir_entry *,
36 struct buffer_head *, unsigned long);
37
38
39/* 28/*
40 * NOTE! unlike strncmp, ufs_match returns 1 for success, 0 for failure. 29 * NOTE! unlike strncmp, ufs_match returns 1 for success, 0 for failure.
41 * 30 *
@@ -51,495 +40,541 @@ static inline int ufs_match(struct super_block *sb, int len,
51 return !memcmp(name, de->d_name, len); 40 return !memcmp(name, de->d_name, len);
52} 41}
53 42
54/* 43static int ufs_commit_chunk(struct page *page, unsigned from, unsigned to)
55 * This is blatantly stolen from ext2fs
56 */
57static int
58ufs_readdir (struct file * filp, void * dirent, filldir_t filldir)
59{ 44{
60 struct inode *inode = filp->f_dentry->d_inode; 45 struct inode *dir = page->mapping->host;
61 int error = 0; 46 int err = 0;
62 unsigned long offset, lblk; 47 dir->i_version++;
63 int i, stored; 48 page->mapping->a_ops->commit_write(NULL, page, from, to);
64 struct buffer_head * bh; 49 if (IS_DIRSYNC(dir))
65 struct ufs_dir_entry * de; 50 err = write_one_page(page, 1);
66 struct super_block * sb; 51 else
67 int de_reclen; 52 unlock_page(page);
68 unsigned flags; 53 return err;
69 u64 blk= 0L; 54}
70
71 lock_kernel();
72
73 sb = inode->i_sb;
74 flags = UFS_SB(sb)->s_flags;
75
76 UFSD(("ENTER, ino %lu f_pos %lu\n", inode->i_ino, (unsigned long) filp->f_pos))
77
78 stored = 0;
79 bh = NULL;
80 offset = filp->f_pos & (sb->s_blocksize - 1);
81
82 while (!error && !stored && filp->f_pos < inode->i_size) {
83 lblk = (filp->f_pos) >> sb->s_blocksize_bits;
84 blk = ufs_frag_map(inode, lblk);
85 if (!blk || !(bh = sb_bread(sb, blk))) {
86 /* XXX - error - skip to the next block */
87 printk("ufs_readdir: "
88 "dir inode %lu has a hole at offset %lu\n",
89 inode->i_ino, (unsigned long int)filp->f_pos);
90 filp->f_pos += sb->s_blocksize - offset;
91 continue;
92 }
93
94revalidate:
95 /* If the dir block has changed since the last call to
96 * readdir(2), then we might be pointing to an invalid
97 * dirent right now. Scan from the start of the block
98 * to make sure. */
99 if (filp->f_version != inode->i_version) {
100 for (i = 0; i < sb->s_blocksize && i < offset; ) {
101 de = (struct ufs_dir_entry *)(bh->b_data + i);
102 /* It's too expensive to do a full
103 * dirent test each time round this
104 * loop, but we do have to test at
105 * least that it is non-zero. A
106 * failure will be detected in the
107 * dirent test below. */
108 de_reclen = fs16_to_cpu(sb, de->d_reclen);
109 if (de_reclen < 1)
110 break;
111 i += de_reclen;
112 }
113 offset = i;
114 filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
115 | offset;
116 filp->f_version = inode->i_version;
117 }
118 55
119 while (!error && filp->f_pos < inode->i_size 56static inline void ufs_put_page(struct page *page)
120 && offset < sb->s_blocksize) { 57{
121 de = (struct ufs_dir_entry *) (bh->b_data + offset); 58 kunmap(page);
122 /* XXX - put in a real ufs_check_dir_entry() */ 59 page_cache_release(page);
123 if ((de->d_reclen == 0) || (ufs_get_de_namlen(sb, de) == 0)) { 60}
124 filp->f_pos = (filp->f_pos &
125 (sb->s_blocksize - 1)) +
126 sb->s_blocksize;
127 brelse(bh);
128 unlock_kernel();
129 return stored;
130 }
131 if (!ufs_check_dir_entry ("ufs_readdir", inode, de,
132 bh, offset)) {
133 /* On error, skip the f_pos to the
134 next block. */
135 filp->f_pos = (filp->f_pos |
136 (sb->s_blocksize - 1)) +
137 1;
138 brelse (bh);
139 unlock_kernel();
140 return stored;
141 }
142 offset += fs16_to_cpu(sb, de->d_reclen);
143 if (de->d_ino) {
144 /* We might block in the next section
145 * if the data destination is
146 * currently swapped out. So, use a
147 * version stamp to detect whether or
148 * not the directory has been modified
149 * during the copy operation. */
150 unsigned long version = filp->f_version;
151 unsigned char d_type = DT_UNKNOWN;
152 61
153 UFSD(("filldir(%s,%u)\n", de->d_name, 62static inline unsigned long ufs_dir_pages(struct inode *inode)
154 fs32_to_cpu(sb, de->d_ino))) 63{
155 UFSD(("namlen %u\n", ufs_get_de_namlen(sb, de))) 64 return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
65}
156 66
157 if ((flags & UFS_DE_MASK) == UFS_DE_44BSD) 67ino_t ufs_inode_by_name(struct inode *dir, struct dentry *dentry)
158 d_type = de->d_u.d_44.d_type; 68{
159 error = filldir(dirent, de->d_name, 69 ino_t res = 0;
160 ufs_get_de_namlen(sb, de), filp->f_pos, 70 struct ufs_dir_entry *de;
161 fs32_to_cpu(sb, de->d_ino), d_type); 71 struct page *page;
162 if (error) 72
163 break; 73 de = ufs_find_entry(dir, dentry, &page);
164 if (version != filp->f_version) 74 if (de) {
165 goto revalidate; 75 res = fs32_to_cpu(dir->i_sb, de->d_ino);
166 stored ++; 76 ufs_put_page(page);
167 }
168 filp->f_pos += fs16_to_cpu(sb, de->d_reclen);
169 }
170 offset = 0;
171 brelse (bh);
172 } 77 }
173 unlock_kernel(); 78 return res;
174 return 0;
175} 79}
176 80
177/*
178 * define how far ahead to read directories while searching them.
179 */
180#define NAMEI_RA_CHUNKS 2
181#define NAMEI_RA_BLOCKS 4
182#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
183#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
184 81
185/* 82/* Releases the page */
186 * ufs_find_entry() 83void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,
187 * 84 struct page *page, struct inode *inode)
188 * finds an entry in the specified directory with the wanted name. It
189 * returns the cache buffer in which the entry was found, and the entry
190 * itself (as a parameter - res_bh). It does NOT read the inode of the
191 * entry - you'll have to do that yourself if you want to.
192 */
193struct ufs_dir_entry * ufs_find_entry (struct dentry *dentry,
194 struct buffer_head ** res_bh)
195{ 85{
196 struct super_block * sb; 86 unsigned from = (char *) de - (char *) page_address(page);
197 struct buffer_head * bh_use[NAMEI_RA_SIZE]; 87 unsigned to = from + fs16_to_cpu(dir->i_sb, de->d_reclen);
198 struct buffer_head * bh_read[NAMEI_RA_SIZE]; 88 int err;
199 unsigned long offset;
200 int block, toread, i, err;
201 struct inode *dir = dentry->d_parent->d_inode;
202 const char *name = dentry->d_name.name;
203 int namelen = dentry->d_name.len;
204 89
205 UFSD(("ENTER, dir_ino %lu, name %s, namlen %u\n", dir->i_ino, name, namelen)) 90 lock_page(page);
206 91 err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
207 *res_bh = NULL; 92 BUG_ON(err);
208 93 de->d_ino = cpu_to_fs32(dir->i_sb, inode->i_ino);
209 sb = dir->i_sb; 94 ufs_set_de_type(dir->i_sb, de, inode->i_mode);
210 95 err = ufs_commit_chunk(page, from, to);
211 if (namelen > UFS_MAXNAMLEN) 96 ufs_put_page(page);
212 return NULL; 97 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
98 mark_inode_dirty(dir);
99}
213 100
214 memset (bh_use, 0, sizeof (bh_use));
215 toread = 0;
216 for (block = 0; block < NAMEI_RA_SIZE; ++block) {
217 struct buffer_head * bh;
218 101
219 if ((block << sb->s_blocksize_bits) >= dir->i_size) 102static void ufs_check_page(struct page *page)
220 break; 103{
221 bh = ufs_getfrag (dir, block, 0, &err); 104 struct inode *dir = page->mapping->host;
222 bh_use[block] = bh; 105 struct super_block *sb = dir->i_sb;
223 if (bh && !buffer_uptodate(bh)) 106 char *kaddr = page_address(page);
224 bh_read[toread++] = bh; 107 unsigned offs, rec_len;
108 unsigned limit = PAGE_CACHE_SIZE;
109 struct ufs_dir_entry *p;
110 char *error;
111
112 if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
113 limit = dir->i_size & ~PAGE_CACHE_MASK;
114 if (limit & (UFS_SECTOR_SIZE - 1))
115 goto Ebadsize;
116 if (!limit)
117 goto out;
225 } 118 }
119 for (offs = 0; offs <= limit - UFS_DIR_REC_LEN(1); offs += rec_len) {
120 p = (struct ufs_dir_entry *)(kaddr + offs);
121 rec_len = fs16_to_cpu(sb, p->d_reclen);
122
123 if (rec_len < UFS_DIR_REC_LEN(1))
124 goto Eshort;
125 if (rec_len & 3)
126 goto Ealign;
127 if (rec_len < UFS_DIR_REC_LEN(ufs_get_de_namlen(sb, p)))
128 goto Enamelen;
129 if (((offs + rec_len - 1) ^ offs) & ~(UFS_SECTOR_SIZE-1))
130 goto Espan;
131 if (fs32_to_cpu(sb, p->d_ino) > (UFS_SB(sb)->s_uspi->s_ipg *
132 UFS_SB(sb)->s_uspi->s_ncg))
133 goto Einumber;
134 }
135 if (offs != limit)
136 goto Eend;
137out:
138 SetPageChecked(page);
139 return;
140
141 /* Too bad, we had an error */
142
143Ebadsize:
144 ufs_error(sb, "ufs_check_page",
145 "size of directory #%lu is not a multiple of chunk size",
146 dir->i_ino
147 );
148 goto fail;
149Eshort:
150 error = "rec_len is smaller than minimal";
151 goto bad_entry;
152Ealign:
153 error = "unaligned directory entry";
154 goto bad_entry;
155Enamelen:
156 error = "rec_len is too small for name_len";
157 goto bad_entry;
158Espan:
159 error = "directory entry across blocks";
160 goto bad_entry;
161Einumber:
162 error = "inode out of bounds";
163bad_entry:
164 ufs_error (sb, "ufs_check_page", "bad entry in directory #%lu: %s - "
165 "offset=%lu, rec_len=%d, name_len=%d",
166 dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
167 rec_len, ufs_get_de_namlen(sb, p));
168 goto fail;
169Eend:
170 p = (struct ufs_dir_entry *)(kaddr + offs);
171 ufs_error (sb, "ext2_check_page",
172 "entry in directory #%lu spans the page boundary"
173 "offset=%lu",
174 dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs);
175fail:
176 SetPageChecked(page);
177 SetPageError(page);
178}
226 179
227 for (block = 0, offset = 0; offset < dir->i_size; block++) { 180static struct page *ufs_get_page(struct inode *dir, unsigned long n)
228 struct buffer_head * bh; 181{
229 struct ufs_dir_entry * de; 182 struct address_space *mapping = dir->i_mapping;
230 char * dlimit; 183 struct page *page = read_cache_page(mapping, n,
231 184 (filler_t*)mapping->a_ops->readpage, NULL);
232 if ((block % NAMEI_RA_BLOCKS) == 0 && toread) { 185 if (!IS_ERR(page)) {
233 ll_rw_block (READ, toread, bh_read); 186 wait_on_page_locked(page);
234 toread = 0; 187 kmap(page);
235 } 188 if (!PageUptodate(page))
236 bh = bh_use[block % NAMEI_RA_SIZE]; 189 goto fail;
237 if (!bh) { 190 if (!PageChecked(page))
238 ufs_error (sb, "ufs_find_entry", 191 ufs_check_page(page);
239 "directory #%lu contains a hole at offset %lu", 192 if (PageError(page))
240 dir->i_ino, offset); 193 goto fail;
241 offset += sb->s_blocksize;
242 continue;
243 }
244 wait_on_buffer (bh);
245 if (!buffer_uptodate(bh)) {
246 /*
247 * read error: all bets are off
248 */
249 break;
250 }
251
252 de = (struct ufs_dir_entry *) bh->b_data;
253 dlimit = bh->b_data + sb->s_blocksize;
254 while ((char *) de < dlimit && offset < dir->i_size) {
255 /* this code is executed quadratically often */
256 /* do minimal checking by hand */
257 int de_len;
258
259 if ((char *) de + namelen <= dlimit &&
260 ufs_match(sb, namelen, name, de)) {
261 /* found a match -
262 just to be sure, do a full check */
263 if (!ufs_check_dir_entry("ufs_find_entry",
264 dir, de, bh, offset))
265 goto failed;
266 for (i = 0; i < NAMEI_RA_SIZE; ++i) {
267 if (bh_use[i] != bh)
268 brelse (bh_use[i]);
269 }
270 *res_bh = bh;
271 return de;
272 }
273 /* prevent looping on a bad block */
274 de_len = fs16_to_cpu(sb, de->d_reclen);
275 if (de_len <= 0)
276 goto failed;
277 offset += de_len;
278 de = (struct ufs_dir_entry *) ((char *) de + de_len);
279 }
280
281 brelse (bh);
282 if (((block + NAMEI_RA_SIZE) << sb->s_blocksize_bits ) >=
283 dir->i_size)
284 bh = NULL;
285 else
286 bh = ufs_getfrag (dir, block + NAMEI_RA_SIZE, 0, &err);
287 bh_use[block % NAMEI_RA_SIZE] = bh;
288 if (bh && !buffer_uptodate(bh))
289 bh_read[toread++] = bh;
290 } 194 }
195 return page;
291 196
292failed: 197fail:
293 for (i = 0; i < NAMEI_RA_SIZE; ++i) brelse (bh_use[i]); 198 ufs_put_page(page);
294 UFSD(("EXIT\n")) 199 return ERR_PTR(-EIO);
295 return NULL;
296} 200}
297 201
298static int 202/*
299ufs_check_dir_entry (const char *function, struct inode *dir, 203 * Return the offset into page `page_nr' of the last valid
300 struct ufs_dir_entry *de, struct buffer_head *bh, 204 * byte in that page, plus one.
301 unsigned long offset) 205 */
206static unsigned
207ufs_last_byte(struct inode *inode, unsigned long page_nr)
302{ 208{
303 struct super_block *sb = dir->i_sb; 209 unsigned last_byte = inode->i_size;
304 const char *error_msg = NULL; 210
305 int rlen = fs16_to_cpu(sb, de->d_reclen); 211 last_byte -= page_nr << PAGE_CACHE_SHIFT;
306 212 if (last_byte > PAGE_CACHE_SIZE)
307 if (rlen < UFS_DIR_REC_LEN(1)) 213 last_byte = PAGE_CACHE_SIZE;
308 error_msg = "reclen is smaller than minimal"; 214 return last_byte;
309 else if (rlen % 4 != 0)
310 error_msg = "reclen % 4 != 0";
311 else if (rlen < UFS_DIR_REC_LEN(ufs_get_de_namlen(sb, de)))
312 error_msg = "reclen is too small for namlen";
313 else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
314 error_msg = "directory entry across blocks";
315 else if (fs32_to_cpu(sb, de->d_ino) > (UFS_SB(sb)->s_uspi->s_ipg *
316 UFS_SB(sb)->s_uspi->s_ncg))
317 error_msg = "inode out of bounds";
318
319 if (error_msg != NULL)
320 ufs_error (sb, function, "bad entry in directory #%lu, size %Lu: %s - "
321 "offset=%lu, inode=%lu, reclen=%d, namlen=%d",
322 dir->i_ino, dir->i_size, error_msg, offset,
323 (unsigned long)fs32_to_cpu(sb, de->d_ino),
324 rlen, ufs_get_de_namlen(sb, de));
325
326 return (error_msg == NULL ? 1 : 0);
327} 215}
328 216
329struct ufs_dir_entry *ufs_dotdot(struct inode *dir, struct buffer_head **p) 217static inline struct ufs_dir_entry *
218ufs_next_entry(struct super_block *sb, struct ufs_dir_entry *p)
330{ 219{
331 int err; 220 return (struct ufs_dir_entry *)((char *)p +
332 struct buffer_head *bh = ufs_bread (dir, 0, 0, &err); 221 fs16_to_cpu(sb, p->d_reclen));
333 struct ufs_dir_entry *res = NULL;
334
335 if (bh) {
336 res = (struct ufs_dir_entry *) bh->b_data;
337 res = (struct ufs_dir_entry *)((char *)res +
338 fs16_to_cpu(dir->i_sb, res->d_reclen));
339 }
340 *p = bh;
341 return res;
342} 222}
343ino_t ufs_inode_by_name(struct inode * dir, struct dentry *dentry) 223
224struct ufs_dir_entry *ufs_dotdot(struct inode *dir, struct page **p)
344{ 225{
345 ino_t res = 0; 226 struct page *page = ufs_get_page(dir, 0);
346 struct ufs_dir_entry * de; 227 struct ufs_dir_entry *de = NULL;
347 struct buffer_head *bh;
348 228
349 de = ufs_find_entry (dentry, &bh); 229 if (!IS_ERR(page)) {
350 if (de) { 230 de = ufs_next_entry(dir->i_sb,
351 res = fs32_to_cpu(dir->i_sb, de->d_ino); 231 (struct ufs_dir_entry *)page_address(page));
352 brelse(bh); 232 *p = page;
353 } 233 }
354 return res; 234 return de;
355} 235}
356 236
357void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, 237/*
358 struct buffer_head *bh, struct inode *inode) 238 * ufs_find_entry()
239 *
240 * finds an entry in the specified directory with the wanted name. It
241 * returns the page in which the entry was found, and the entry itself
242 * (as a parameter - res_dir). Page is returned mapped and unlocked.
243 * Entry is guaranteed to be valid.
244 */
245struct ufs_dir_entry *ufs_find_entry(struct inode *dir, struct dentry *dentry,
246 struct page **res_page)
359{ 247{
360 dir->i_version++; 248 struct super_block *sb = dir->i_sb;
361 de->d_ino = cpu_to_fs32(dir->i_sb, inode->i_ino); 249 const char *name = dentry->d_name.name;
362 mark_buffer_dirty(bh); 250 int namelen = dentry->d_name.len;
363 if (IS_DIRSYNC(dir)) 251 unsigned reclen = UFS_DIR_REC_LEN(namelen);
364 sync_dirty_buffer(bh); 252 unsigned long start, n;
365 brelse (bh); 253 unsigned long npages = ufs_dir_pages(dir);
254 struct page *page = NULL;
255 struct ufs_inode_info *ui = UFS_I(dir);
256 struct ufs_dir_entry *de;
257
258 UFSD("ENTER, dir_ino %lu, name %s, namlen %u\n", dir->i_ino, name, namelen);
259
260 if (npages == 0 || namelen > UFS_MAXNAMLEN)
261 goto out;
262
263 /* OFFSET_CACHE */
264 *res_page = NULL;
265
266 start = ui->i_dir_start_lookup;
267
268 if (start >= npages)
269 start = 0;
270 n = start;
271 do {
272 char *kaddr;
273 page = ufs_get_page(dir, n);
274 if (!IS_ERR(page)) {
275 kaddr = page_address(page);
276 de = (struct ufs_dir_entry *) kaddr;
277 kaddr += ufs_last_byte(dir, n) - reclen;
278 while ((char *) de <= kaddr) {
279 if (de->d_reclen == 0) {
280 ufs_error(dir->i_sb, __FUNCTION__,
281 "zero-length directory entry");
282 ufs_put_page(page);
283 goto out;
284 }
285 if (ufs_match(sb, namelen, name, de))
286 goto found;
287 de = ufs_next_entry(sb, de);
288 }
289 ufs_put_page(page);
290 }
291 if (++n >= npages)
292 n = 0;
293 } while (n != start);
294out:
295 return NULL;
296
297found:
298 *res_page = page;
299 ui->i_dir_start_lookup = n;
300 return de;
366} 301}
367 302
368/* 303/*
369 * ufs_add_entry() 304 * Parent is locked.
370 *
371 * adds a file entry to the specified directory, using the same
372 * semantics as ufs_find_entry(). It returns NULL if it failed.
373 */ 305 */
374int ufs_add_link(struct dentry *dentry, struct inode *inode) 306int ufs_add_link(struct dentry *dentry, struct inode *inode)
375{ 307{
376 struct super_block * sb;
377 struct ufs_sb_private_info * uspi;
378 unsigned long offset;
379 unsigned fragoff;
380 unsigned short rec_len;
381 struct buffer_head * bh;
382 struct ufs_dir_entry * de, * de1;
383 struct inode *dir = dentry->d_parent->d_inode; 308 struct inode *dir = dentry->d_parent->d_inode;
384 const char *name = dentry->d_name.name; 309 const char *name = dentry->d_name.name;
385 int namelen = dentry->d_name.len; 310 int namelen = dentry->d_name.len;
311 struct super_block *sb = dir->i_sb;
312 unsigned reclen = UFS_DIR_REC_LEN(namelen);
313 unsigned short rec_len, name_len;
314 struct page *page = NULL;
315 struct ufs_dir_entry *de;
316 unsigned long npages = ufs_dir_pages(dir);
317 unsigned long n;
318 char *kaddr;
319 unsigned from, to;
386 int err; 320 int err;
387 321
388 UFSD(("ENTER, name %s, namelen %u\n", name, namelen)) 322 UFSD("ENTER, name %s, namelen %u\n", name, namelen);
389 323
390 sb = dir->i_sb; 324 /*
391 uspi = UFS_SB(sb)->s_uspi; 325 * We take care of directory expansion in the same loop.
392 326 * This code plays outside i_size, so it locks the page
393 if (!namelen) 327 * to protect that region.
394 return -EINVAL; 328 */
395 bh = ufs_bread (dir, 0, 0, &err); 329 for (n = 0; n <= npages; n++) {
396 if (!bh) 330 char *dir_end;
397 return err; 331
398 rec_len = UFS_DIR_REC_LEN(namelen); 332 page = ufs_get_page(dir, n);
399 offset = 0; 333 err = PTR_ERR(page);
400 de = (struct ufs_dir_entry *) bh->b_data; 334 if (IS_ERR(page))
401 while (1) { 335 goto out;
402 if ((char *)de >= UFS_SECTOR_SIZE + bh->b_data) { 336 lock_page(page);
403 fragoff = offset & ~uspi->s_fmask; 337 kaddr = page_address(page);
404 if (fragoff != 0 && fragoff != UFS_SECTOR_SIZE) 338 dir_end = kaddr + ufs_last_byte(dir, n);
405 ufs_error (sb, "ufs_add_entry", "internal error" 339 de = (struct ufs_dir_entry *)kaddr;
406 " fragoff %u", fragoff); 340 kaddr += PAGE_CACHE_SIZE - reclen;
407 if (!fragoff) { 341 while ((char *)de <= kaddr) {
408 brelse (bh); 342 if ((char *)de == dir_end) {
409 bh = ufs_bread (dir, offset >> sb->s_blocksize_bits, 1, &err); 343 /* We hit i_size */
410 if (!bh) 344 name_len = 0;
411 return err; 345 rec_len = UFS_SECTOR_SIZE;
412 }
413 if (dir->i_size <= offset) {
414 if (dir->i_size == 0) {
415 brelse(bh);
416 return -ENOENT;
417 }
418 de = (struct ufs_dir_entry *) (bh->b_data + fragoff);
419 de->d_ino = 0;
420 de->d_reclen = cpu_to_fs16(sb, UFS_SECTOR_SIZE); 346 de->d_reclen = cpu_to_fs16(sb, UFS_SECTOR_SIZE);
421 ufs_set_de_namlen(sb, de, 0); 347 de->d_ino = 0;
422 dir->i_size = offset + UFS_SECTOR_SIZE; 348 goto got_it;
423 mark_inode_dirty(dir);
424 } else {
425 de = (struct ufs_dir_entry *) bh->b_data;
426 } 349 }
350 if (de->d_reclen == 0) {
351 ufs_error(dir->i_sb, __FUNCTION__,
352 "zero-length directory entry");
353 err = -EIO;
354 goto out_unlock;
355 }
356 err = -EEXIST;
357 if (ufs_match(sb, namelen, name, de))
358 goto out_unlock;
359 name_len = UFS_DIR_REC_LEN(ufs_get_de_namlen(sb, de));
360 rec_len = fs16_to_cpu(sb, de->d_reclen);
361 if (!de->d_ino && rec_len >= reclen)
362 goto got_it;
363 if (rec_len >= name_len + reclen)
364 goto got_it;
365 de = (struct ufs_dir_entry *) ((char *) de + rec_len);
427 } 366 }
428 if (!ufs_check_dir_entry ("ufs_add_entry", dir, de, bh, offset)) { 367 unlock_page(page);
429 brelse (bh); 368 ufs_put_page(page);
430 return -ENOENT;
431 }
432 if (ufs_match(sb, namelen, name, de)) {
433 brelse (bh);
434 return -EEXIST;
435 }
436 if (de->d_ino == 0 && fs16_to_cpu(sb, de->d_reclen) >= rec_len)
437 break;
438
439 if (fs16_to_cpu(sb, de->d_reclen) >=
440 UFS_DIR_REC_LEN(ufs_get_de_namlen(sb, de)) + rec_len)
441 break;
442 offset += fs16_to_cpu(sb, de->d_reclen);
443 de = (struct ufs_dir_entry *) ((char *) de + fs16_to_cpu(sb, de->d_reclen));
444 } 369 }
445 370 BUG();
371 return -EINVAL;
372
373got_it:
374 from = (char*)de - (char*)page_address(page);
375 to = from + rec_len;
376 err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
377 if (err)
378 goto out_unlock;
446 if (de->d_ino) { 379 if (de->d_ino) {
447 de1 = (struct ufs_dir_entry *) ((char *) de + 380 struct ufs_dir_entry *de1 =
448 UFS_DIR_REC_LEN(ufs_get_de_namlen(sb, de))); 381 (struct ufs_dir_entry *) ((char *) de + name_len);
449 de1->d_reclen = 382 de1->d_reclen = cpu_to_fs16(sb, rec_len - name_len);
450 cpu_to_fs16(sb, fs16_to_cpu(sb, de->d_reclen) - 383 de->d_reclen = cpu_to_fs16(sb, name_len);
451 UFS_DIR_REC_LEN(ufs_get_de_namlen(sb, de))); 384
452 de->d_reclen =
453 cpu_to_fs16(sb, UFS_DIR_REC_LEN(ufs_get_de_namlen(sb, de)));
454 de = de1; 385 de = de1;
455 } 386 }
456 de->d_ino = 0; 387
457 ufs_set_de_namlen(sb, de, namelen); 388 ufs_set_de_namlen(sb, de, namelen);
458 memcpy (de->d_name, name, namelen + 1); 389 memcpy(de->d_name, name, namelen + 1);
459 de->d_ino = cpu_to_fs32(sb, inode->i_ino); 390 de->d_ino = cpu_to_fs32(sb, inode->i_ino);
460 ufs_set_de_type(sb, de, inode->i_mode); 391 ufs_set_de_type(sb, de, inode->i_mode);
461 mark_buffer_dirty(bh); 392
462 if (IS_DIRSYNC(dir)) 393 err = ufs_commit_chunk(page, from, to);
463 sync_dirty_buffer(bh);
464 brelse (bh);
465 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; 394 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
466 dir->i_version++; 395
467 mark_inode_dirty(dir); 396 mark_inode_dirty(dir);
397 /* OFFSET_CACHE */
398out_put:
399 ufs_put_page(page);
400out:
401 return err;
402out_unlock:
403 unlock_page(page);
404 goto out_put;
405}
468 406
469 UFSD(("EXIT\n")) 407static inline unsigned
408ufs_validate_entry(struct super_block *sb, char *base,
409 unsigned offset, unsigned mask)
410{
411 struct ufs_dir_entry *de = (struct ufs_dir_entry*)(base + offset);
412 struct ufs_dir_entry *p = (struct ufs_dir_entry*)(base + (offset&mask));
413 while ((char*)p < (char*)de) {
414 if (p->d_reclen == 0)
415 break;
416 p = ufs_next_entry(sb, p);
417 }
418 return (char *)p - base;
419}
420
421
422/*
423 * This is blatantly stolen from ext2fs
424 */
425static int
426ufs_readdir(struct file *filp, void *dirent, filldir_t filldir)
427{
428 loff_t pos = filp->f_pos;
429 struct inode *inode = filp->f_dentry->d_inode;
430 struct super_block *sb = inode->i_sb;
431 unsigned int offset = pos & ~PAGE_CACHE_MASK;
432 unsigned long n = pos >> PAGE_CACHE_SHIFT;
433 unsigned long npages = ufs_dir_pages(inode);
434 unsigned chunk_mask = ~(UFS_SECTOR_SIZE - 1);
435 int need_revalidate = filp->f_version != inode->i_version;
436 unsigned flags = UFS_SB(sb)->s_flags;
437
438 UFSD("BEGIN\n");
439
440 if (pos > inode->i_size - UFS_DIR_REC_LEN(1))
441 return 0;
442
443 for ( ; n < npages; n++, offset = 0) {
444 char *kaddr, *limit;
445 struct ufs_dir_entry *de;
446
447 struct page *page = ufs_get_page(inode, n);
448
449 if (IS_ERR(page)) {
450 ufs_error(sb, __FUNCTION__,
451 "bad page in #%lu",
452 inode->i_ino);
453 filp->f_pos += PAGE_CACHE_SIZE - offset;
454 return -EIO;
455 }
456 kaddr = page_address(page);
457 if (unlikely(need_revalidate)) {
458 if (offset) {
459 offset = ufs_validate_entry(sb, kaddr, offset, chunk_mask);
460 filp->f_pos = (n<<PAGE_CACHE_SHIFT) + offset;
461 }
462 filp->f_version = inode->i_version;
463 need_revalidate = 0;
464 }
465 de = (struct ufs_dir_entry *)(kaddr+offset);
466 limit = kaddr + ufs_last_byte(inode, n) - UFS_DIR_REC_LEN(1);
467 for ( ;(char*)de <= limit; de = ufs_next_entry(sb, de)) {
468 if (de->d_reclen == 0) {
469 ufs_error(sb, __FUNCTION__,
470 "zero-length directory entry");
471 ufs_put_page(page);
472 return -EIO;
473 }
474 if (de->d_ino) {
475 int over;
476 unsigned char d_type = DT_UNKNOWN;
477
478 offset = (char *)de - kaddr;
479
480 UFSD("filldir(%s,%u)\n", de->d_name,
481 fs32_to_cpu(sb, de->d_ino));
482 UFSD("namlen %u\n", ufs_get_de_namlen(sb, de));
483
484 if ((flags & UFS_DE_MASK) == UFS_DE_44BSD)
485 d_type = de->d_u.d_44.d_type;
486
487 over = filldir(dirent, de->d_name,
488 ufs_get_de_namlen(sb, de),
489 (n<<PAGE_CACHE_SHIFT) | offset,
490 fs32_to_cpu(sb, de->d_ino), d_type);
491 if (over) {
492 ufs_put_page(page);
493 return 0;
494 }
495 }
496 filp->f_pos += fs16_to_cpu(sb, de->d_reclen);
497 }
498 ufs_put_page(page);
499 }
470 return 0; 500 return 0;
471} 501}
472 502
503
473/* 504/*
474 * ufs_delete_entry deletes a directory entry by merging it with the 505 * ufs_delete_entry deletes a directory entry by merging it with the
475 * previous entry. 506 * previous entry.
476 */ 507 */
477int ufs_delete_entry (struct inode * inode, struct ufs_dir_entry * dir, 508int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
478 struct buffer_head * bh ) 509 struct page * page)
479
480{ 510{
481 struct super_block * sb; 511 struct super_block *sb = inode->i_sb;
482 struct ufs_dir_entry * de, * pde; 512 struct address_space *mapping = page->mapping;
483 unsigned i; 513 char *kaddr = page_address(page);
484 514 unsigned from = ((char*)dir - kaddr) & ~(UFS_SECTOR_SIZE - 1);
485 UFSD(("ENTER\n")) 515 unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen);
516 struct ufs_dir_entry *pde = NULL;
517 struct ufs_dir_entry *de = (struct ufs_dir_entry *) (kaddr + from);
518 int err;
486 519
487 sb = inode->i_sb; 520 UFSD("ENTER\n");
488 i = 0; 521
489 pde = NULL; 522 UFSD("ino %u, reclen %u, namlen %u, name %s\n",
490 de = (struct ufs_dir_entry *) bh->b_data; 523 fs32_to_cpu(sb, de->d_ino),
491 524 fs16_to_cpu(sb, de->d_reclen),
492 UFSD(("ino %u, reclen %u, namlen %u, name %s\n", 525 ufs_get_de_namlen(sb, de), de->d_name);
493 fs32_to_cpu(sb, de->d_ino), 526
494 fs16_to_cpu(sb, de->d_reclen), 527 while ((char*)de < (char*)dir) {
495 ufs_get_de_namlen(sb, de), de->d_name)) 528 if (de->d_reclen == 0) {
496 529 ufs_error(inode->i_sb, __FUNCTION__,
497 while (i < bh->b_size) { 530 "zero-length directory entry");
498 if (!ufs_check_dir_entry ("ufs_delete_entry", inode, de, bh, i)) { 531 err = -EIO;
499 brelse(bh); 532 goto out;
500 return -EIO;
501 }
502 if (de == dir) {
503 if (pde)
504 fs16_add(sb, &pde->d_reclen,
505 fs16_to_cpu(sb, dir->d_reclen));
506 dir->d_ino = 0;
507 inode->i_version++;
508 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
509 mark_inode_dirty(inode);
510 mark_buffer_dirty(bh);
511 if (IS_DIRSYNC(inode))
512 sync_dirty_buffer(bh);
513 brelse(bh);
514 UFSD(("EXIT\n"))
515 return 0;
516 } 533 }
517 i += fs16_to_cpu(sb, de->d_reclen); 534 pde = de;
518 if (i == UFS_SECTOR_SIZE) pde = NULL; 535 de = ufs_next_entry(sb, de);
519 else pde = de;
520 de = (struct ufs_dir_entry *)
521 ((char *) de + fs16_to_cpu(sb, de->d_reclen));
522 if (i == UFS_SECTOR_SIZE && de->d_reclen == 0)
523 break;
524 } 536 }
525 UFSD(("EXIT\n")) 537 if (pde)
526 brelse(bh); 538 from = (char*)pde - (char*)page_address(page);
527 return -ENOENT; 539 lock_page(page);
540 err = mapping->a_ops->prepare_write(NULL, page, from, to);
541 BUG_ON(err);
542 if (pde)
543 pde->d_reclen = cpu_to_fs16(sb, to-from);
544 dir->d_ino = 0;
545 err = ufs_commit_chunk(page, from, to);
546 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
547 mark_inode_dirty(inode);
548out:
549 ufs_put_page(page);
550 UFSD("EXIT\n");
551 return err;
528} 552}
529 553
530int ufs_make_empty(struct inode * inode, struct inode *dir) 554int ufs_make_empty(struct inode * inode, struct inode *dir)
531{ 555{
532 struct super_block * sb = dir->i_sb; 556 struct super_block * sb = dir->i_sb;
533 struct buffer_head * dir_block; 557 struct address_space *mapping = inode->i_mapping;
558 struct page *page = grab_cache_page(mapping, 0);
534 struct ufs_dir_entry * de; 559 struct ufs_dir_entry * de;
560 char *base;
535 int err; 561 int err;
536 562
537 dir_block = ufs_bread (inode, 0, 1, &err); 563 if (!page)
538 if (!dir_block) 564 return -ENOMEM;
539 return err; 565 kmap(page);
566 err = mapping->a_ops->prepare_write(NULL, page, 0, UFS_SECTOR_SIZE);
567 if (err) {
568 unlock_page(page);
569 goto fail;
570 }
571
572
573 base = (char*)page_address(page);
574 memset(base, 0, PAGE_CACHE_SIZE);
575
576 de = (struct ufs_dir_entry *) base;
540 577
541 inode->i_blocks = sb->s_blocksize / UFS_SECTOR_SIZE;
542 de = (struct ufs_dir_entry *) dir_block->b_data;
543 de->d_ino = cpu_to_fs32(sb, inode->i_ino); 578 de->d_ino = cpu_to_fs32(sb, inode->i_ino);
544 ufs_set_de_type(sb, de, inode->i_mode); 579 ufs_set_de_type(sb, de, inode->i_mode);
545 ufs_set_de_namlen(sb, de, 1); 580 ufs_set_de_namlen(sb, de, 1);
@@ -552,72 +587,65 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
552 de->d_reclen = cpu_to_fs16(sb, UFS_SECTOR_SIZE - UFS_DIR_REC_LEN(1)); 587 de->d_reclen = cpu_to_fs16(sb, UFS_SECTOR_SIZE - UFS_DIR_REC_LEN(1));
553 ufs_set_de_namlen(sb, de, 2); 588 ufs_set_de_namlen(sb, de, 2);
554 strcpy (de->d_name, ".."); 589 strcpy (de->d_name, "..");
555 mark_buffer_dirty(dir_block); 590
556 brelse (dir_block); 591 err = ufs_commit_chunk(page, 0, UFS_SECTOR_SIZE);
557 mark_inode_dirty(inode); 592fail:
558 return 0; 593 kunmap(page);
594 page_cache_release(page);
595 return err;
559} 596}
560 597
561/* 598/*
562 * routine to check that the specified directory is empty (for rmdir) 599 * routine to check that the specified directory is empty (for rmdir)
563 */ 600 */
564int ufs_empty_dir (struct inode * inode) 601int ufs_empty_dir(struct inode * inode)
565{ 602{
566 struct super_block * sb; 603 struct super_block *sb = inode->i_sb;
567 unsigned long offset; 604 struct page *page = NULL;
568 struct buffer_head * bh; 605 unsigned long i, npages = ufs_dir_pages(inode);
569 struct ufs_dir_entry * de, * de1; 606
570 int err; 607 for (i = 0; i < npages; i++) {
571 608 char *kaddr;
572 sb = inode->i_sb; 609 struct ufs_dir_entry *de;
573 610 page = ufs_get_page(inode, i);
574 if (inode->i_size < UFS_DIR_REC_LEN(1) + UFS_DIR_REC_LEN(2) || 611
575 !(bh = ufs_bread (inode, 0, 0, &err))) { 612 if (IS_ERR(page))
576 ufs_warning (inode->i_sb, "empty_dir", 613 continue;
577 "bad directory (dir #%lu) - no data block", 614
578 inode->i_ino); 615 kaddr = page_address(page);
579 return 1; 616 de = (struct ufs_dir_entry *)kaddr;
580 } 617 kaddr += ufs_last_byte(inode, i) - UFS_DIR_REC_LEN(1);
581 de = (struct ufs_dir_entry *) bh->b_data; 618
582 de1 = (struct ufs_dir_entry *) 619 while ((char *)de <= kaddr) {
583 ((char *)de + fs16_to_cpu(sb, de->d_reclen)); 620 if (de->d_reclen == 0) {
584 if (fs32_to_cpu(sb, de->d_ino) != inode->i_ino || de1->d_ino == 0 || 621 ufs_error(inode->i_sb, __FUNCTION__,
585 strcmp (".", de->d_name) || strcmp ("..", de1->d_name)) { 622 "zero-length directory entry: "
586 ufs_warning (inode->i_sb, "empty_dir", 623 "kaddr=%p, de=%p\n", kaddr, de);
587 "bad directory (dir #%lu) - no `.' or `..'", 624 goto not_empty;
588 inode->i_ino);
589 return 1;
590 }
591 offset = fs16_to_cpu(sb, de->d_reclen) + fs16_to_cpu(sb, de1->d_reclen);
592 de = (struct ufs_dir_entry *)
593 ((char *)de1 + fs16_to_cpu(sb, de1->d_reclen));
594 while (offset < inode->i_size ) {
595 if (!bh || (void *) de >= (void *) (bh->b_data + sb->s_blocksize)) {
596 brelse (bh);
597 bh = ufs_bread (inode, offset >> sb->s_blocksize_bits, 1, &err);
598 if (!bh) {
599 ufs_error (sb, "empty_dir",
600 "directory #%lu contains a hole at offset %lu",
601 inode->i_ino, offset);
602 offset += sb->s_blocksize;
603 continue;
604 } 625 }
605 de = (struct ufs_dir_entry *) bh->b_data; 626 if (de->d_ino) {
606 } 627 u16 namelen=ufs_get_de_namlen(sb, de);
607 if (!ufs_check_dir_entry ("empty_dir", inode, de, bh, offset)) { 628 /* check for . and .. */
608 brelse (bh); 629 if (de->d_name[0] != '.')
609 return 1; 630 goto not_empty;
610 } 631 if (namelen > 2)
611 if (de->d_ino) { 632 goto not_empty;
612 brelse (bh); 633 if (namelen < 2) {
613 return 0; 634 if (inode->i_ino !=
635 fs32_to_cpu(sb, de->d_ino))
636 goto not_empty;
637 } else if (de->d_name[1] != '.')
638 goto not_empty;
639 }
640 de = ufs_next_entry(sb, de);
614 } 641 }
615 offset += fs16_to_cpu(sb, de->d_reclen); 642 ufs_put_page(page);
616 de = (struct ufs_dir_entry *)
617 ((char *)de + fs16_to_cpu(sb, de->d_reclen));
618 } 643 }
619 brelse (bh);
620 return 1; 644 return 1;
645
646not_empty:
647 ufs_put_page(page);
648 return 0;
621} 649}
622 650
623const struct file_operations ufs_dir_operations = { 651const struct file_operations ufs_dir_operations = {
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 312fd3f86313..a9c6e5f04fae 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -25,6 +25,26 @@
25 25
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/ufs_fs.h> 27#include <linux/ufs_fs.h>
28#include <linux/buffer_head.h> /* for sync_mapping_buffers() */
29
30static int ufs_sync_file(struct file *file, struct dentry *dentry, int datasync)
31{
32 struct inode *inode = dentry->d_inode;
33 int err;
34 int ret;
35
36 ret = sync_mapping_buffers(inode->i_mapping);
37 if (!(inode->i_state & I_DIRTY))
38 return ret;
39 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
40 return ret;
41
42 err = ufs_sync_inode(inode);
43 if (ret == 0)
44 ret = err;
45 return ret;
46}
47
28 48
29/* 49/*
30 * We have mostly NULL's here: the current defaults are ok for 50 * We have mostly NULL's here: the current defaults are ok for
@@ -37,9 +57,6 @@ const struct file_operations ufs_file_operations = {
37 .write = generic_file_write, 57 .write = generic_file_write,
38 .mmap = generic_file_mmap, 58 .mmap = generic_file_mmap,
39 .open = generic_file_open, 59 .open = generic_file_open,
60 .fsync = ufs_sync_file,
40 .sendfile = generic_file_sendfile, 61 .sendfile = generic_file_sendfile,
41}; 62};
42
43struct inode_operations ufs_file_inode_operations = {
44 .truncate = ufs_truncate,
45};
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index c7a47ed4f430..9501dcd3b213 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -34,14 +34,6 @@
34#include "swab.h" 34#include "swab.h"
35#include "util.h" 35#include "util.h"
36 36
37#undef UFS_IALLOC_DEBUG
38
39#ifdef UFS_IALLOC_DEBUG
40#define UFSD(x) printk("(%s, %d), %s: ", __FILE__, __LINE__, __FUNCTION__); printk x;
41#else
42#define UFSD(x)
43#endif
44
45/* 37/*
46 * NOTE! When we get the inode, we're the only people 38 * NOTE! When we get the inode, we're the only people
47 * that have access to it, and as such there are no 39 * that have access to it, and as such there are no
@@ -68,7 +60,7 @@ void ufs_free_inode (struct inode * inode)
68 int is_directory; 60 int is_directory;
69 unsigned ino, cg, bit; 61 unsigned ino, cg, bit;
70 62
71 UFSD(("ENTER, ino %lu\n", inode->i_ino)) 63 UFSD("ENTER, ino %lu\n", inode->i_ino);
72 64
73 sb = inode->i_sb; 65 sb = inode->i_sb;
74 uspi = UFS_SB(sb)->s_uspi; 66 uspi = UFS_SB(sb)->s_uspi;
@@ -91,7 +83,7 @@ void ufs_free_inode (struct inode * inode)
91 unlock_super (sb); 83 unlock_super (sb);
92 return; 84 return;
93 } 85 }
94 ucg = ubh_get_ucg(UCPI_UBH); 86 ucg = ubh_get_ucg(UCPI_UBH(ucpi));
95 if (!ufs_cg_chkmagic(sb, ucg)) 87 if (!ufs_cg_chkmagic(sb, ucg))
96 ufs_panic (sb, "ufs_free_fragments", "internal error, bad cg magic number"); 88 ufs_panic (sb, "ufs_free_fragments", "internal error, bad cg magic number");
97 89
@@ -104,33 +96,33 @@ void ufs_free_inode (struct inode * inode)
104 96
105 clear_inode (inode); 97 clear_inode (inode);
106 98
107 if (ubh_isclr (UCPI_UBH, ucpi->c_iusedoff, bit)) 99 if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit))
108 ufs_error(sb, "ufs_free_inode", "bit already cleared for inode %u", ino); 100 ufs_error(sb, "ufs_free_inode", "bit already cleared for inode %u", ino);
109 else { 101 else {
110 ubh_clrbit (UCPI_UBH, ucpi->c_iusedoff, bit); 102 ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit);
111 if (ino < ucpi->c_irotor) 103 if (ino < ucpi->c_irotor)
112 ucpi->c_irotor = ino; 104 ucpi->c_irotor = ino;
113 fs32_add(sb, &ucg->cg_cs.cs_nifree, 1); 105 fs32_add(sb, &ucg->cg_cs.cs_nifree, 1);
114 fs32_add(sb, &usb1->fs_cstotal.cs_nifree, 1); 106 uspi->cs_total.cs_nifree++;
115 fs32_add(sb, &UFS_SB(sb)->fs_cs(cg).cs_nifree, 1); 107 fs32_add(sb, &UFS_SB(sb)->fs_cs(cg).cs_nifree, 1);
116 108
117 if (is_directory) { 109 if (is_directory) {
118 fs32_sub(sb, &ucg->cg_cs.cs_ndir, 1); 110 fs32_sub(sb, &ucg->cg_cs.cs_ndir, 1);
119 fs32_sub(sb, &usb1->fs_cstotal.cs_ndir, 1); 111 uspi->cs_total.cs_ndir--;
120 fs32_sub(sb, &UFS_SB(sb)->fs_cs(cg).cs_ndir, 1); 112 fs32_sub(sb, &UFS_SB(sb)->fs_cs(cg).cs_ndir, 1);
121 } 113 }
122 } 114 }
123 115
124 ubh_mark_buffer_dirty (USPI_UBH); 116 ubh_mark_buffer_dirty (USPI_UBH(uspi));
125 ubh_mark_buffer_dirty (UCPI_UBH); 117 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
126 if (sb->s_flags & MS_SYNCHRONOUS) { 118 if (sb->s_flags & MS_SYNCHRONOUS) {
127 ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **) &ucpi); 119 ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
128 ubh_wait_on_buffer (UCPI_UBH); 120 ubh_wait_on_buffer (UCPI_UBH(ucpi));
129 } 121 }
130 122
131 sb->s_dirt = 1; 123 sb->s_dirt = 1;
132 unlock_super (sb); 124 unlock_super (sb);
133 UFSD(("EXIT\n")) 125 UFSD("EXIT\n");
134} 126}
135 127
136/* 128/*
@@ -155,7 +147,7 @@ struct inode * ufs_new_inode(struct inode * dir, int mode)
155 unsigned cg, bit, i, j, start; 147 unsigned cg, bit, i, j, start;
156 struct ufs_inode_info *ufsi; 148 struct ufs_inode_info *ufsi;
157 149
158 UFSD(("ENTER\n")) 150 UFSD("ENTER\n");
159 151
160 /* Cannot create files in a deleted directory */ 152 /* Cannot create files in a deleted directory */
161 if (!dir || !dir->i_nlink) 153 if (!dir || !dir->i_nlink)
@@ -213,43 +205,43 @@ cg_found:
213 ucpi = ufs_load_cylinder (sb, cg); 205 ucpi = ufs_load_cylinder (sb, cg);
214 if (!ucpi) 206 if (!ucpi)
215 goto failed; 207 goto failed;
216 ucg = ubh_get_ucg(UCPI_UBH); 208 ucg = ubh_get_ucg(UCPI_UBH(ucpi));
217 if (!ufs_cg_chkmagic(sb, ucg)) 209 if (!ufs_cg_chkmagic(sb, ucg))
218 ufs_panic (sb, "ufs_new_inode", "internal error, bad cg magic number"); 210 ufs_panic (sb, "ufs_new_inode", "internal error, bad cg magic number");
219 211
220 start = ucpi->c_irotor; 212 start = ucpi->c_irotor;
221 bit = ubh_find_next_zero_bit (UCPI_UBH, ucpi->c_iusedoff, uspi->s_ipg, start); 213 bit = ubh_find_next_zero_bit (UCPI_UBH(ucpi), ucpi->c_iusedoff, uspi->s_ipg, start);
222 if (!(bit < uspi->s_ipg)) { 214 if (!(bit < uspi->s_ipg)) {
223 bit = ubh_find_first_zero_bit (UCPI_UBH, ucpi->c_iusedoff, start); 215 bit = ubh_find_first_zero_bit (UCPI_UBH(ucpi), ucpi->c_iusedoff, start);
224 if (!(bit < start)) { 216 if (!(bit < start)) {
225 ufs_error (sb, "ufs_new_inode", 217 ufs_error (sb, "ufs_new_inode",
226 "cylinder group %u corrupted - error in inode bitmap\n", cg); 218 "cylinder group %u corrupted - error in inode bitmap\n", cg);
227 goto failed; 219 goto failed;
228 } 220 }
229 } 221 }
230 UFSD(("start = %u, bit = %u, ipg = %u\n", start, bit, uspi->s_ipg)) 222 UFSD("start = %u, bit = %u, ipg = %u\n", start, bit, uspi->s_ipg);
231 if (ubh_isclr (UCPI_UBH, ucpi->c_iusedoff, bit)) 223 if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit))
232 ubh_setbit (UCPI_UBH, ucpi->c_iusedoff, bit); 224 ubh_setbit (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit);
233 else { 225 else {
234 ufs_panic (sb, "ufs_new_inode", "internal error"); 226 ufs_panic (sb, "ufs_new_inode", "internal error");
235 goto failed; 227 goto failed;
236 } 228 }
237 229
238 fs32_sub(sb, &ucg->cg_cs.cs_nifree, 1); 230 fs32_sub(sb, &ucg->cg_cs.cs_nifree, 1);
239 fs32_sub(sb, &usb1->fs_cstotal.cs_nifree, 1); 231 uspi->cs_total.cs_nifree--;
240 fs32_sub(sb, &sbi->fs_cs(cg).cs_nifree, 1); 232 fs32_sub(sb, &sbi->fs_cs(cg).cs_nifree, 1);
241 233
242 if (S_ISDIR(mode)) { 234 if (S_ISDIR(mode)) {
243 fs32_add(sb, &ucg->cg_cs.cs_ndir, 1); 235 fs32_add(sb, &ucg->cg_cs.cs_ndir, 1);
244 fs32_add(sb, &usb1->fs_cstotal.cs_ndir, 1); 236 uspi->cs_total.cs_ndir++;
245 fs32_add(sb, &sbi->fs_cs(cg).cs_ndir, 1); 237 fs32_add(sb, &sbi->fs_cs(cg).cs_ndir, 1);
246 } 238 }
247 239
248 ubh_mark_buffer_dirty (USPI_UBH); 240 ubh_mark_buffer_dirty (USPI_UBH(uspi));
249 ubh_mark_buffer_dirty (UCPI_UBH); 241 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
250 if (sb->s_flags & MS_SYNCHRONOUS) { 242 if (sb->s_flags & MS_SYNCHRONOUS) {
251 ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **) &ucpi); 243 ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
252 ubh_wait_on_buffer (UCPI_UBH); 244 ubh_wait_on_buffer (UCPI_UBH(ucpi));
253 } 245 }
254 sb->s_dirt = 1; 246 sb->s_dirt = 1;
255 247
@@ -272,6 +264,7 @@ cg_found:
272 ufsi->i_shadow = 0; 264 ufsi->i_shadow = 0;
273 ufsi->i_osync = 0; 265 ufsi->i_osync = 0;
274 ufsi->i_oeftflag = 0; 266 ufsi->i_oeftflag = 0;
267 ufsi->i_dir_start_lookup = 0;
275 memset(&ufsi->i_u1, 0, sizeof(ufsi->i_u1)); 268 memset(&ufsi->i_u1, 0, sizeof(ufsi->i_u1));
276 269
277 insert_inode_hash(inode); 270 insert_inode_hash(inode);
@@ -287,14 +280,14 @@ cg_found:
287 return ERR_PTR(-EDQUOT); 280 return ERR_PTR(-EDQUOT);
288 } 281 }
289 282
290 UFSD(("allocating inode %lu\n", inode->i_ino)) 283 UFSD("allocating inode %lu\n", inode->i_ino);
291 UFSD(("EXIT\n")) 284 UFSD("EXIT\n");
292 return inode; 285 return inode;
293 286
294failed: 287failed:
295 unlock_super (sb); 288 unlock_super (sb);
296 make_bad_inode(inode); 289 make_bad_inode(inode);
297 iput (inode); 290 iput (inode);
298 UFSD(("EXIT (FAILED)\n")) 291 UFSD("EXIT (FAILED)\n");
299 return ERR_PTR(-ENOSPC); 292 return ERR_PTR(-ENOSPC);
300} 293}
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 3c3f62ce2ad9..e7c8615beb65 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -41,14 +41,7 @@
41#include "swab.h" 41#include "swab.h"
42#include "util.h" 42#include "util.h"
43 43
44#undef UFS_INODE_DEBUG 44static u64 ufs_frag_map(struct inode *inode, sector_t frag);
45#undef UFS_INODE_DEBUG_MORE
46
47#ifdef UFS_INODE_DEBUG
48#define UFSD(x) printk("(%s, %d), %s: ", __FILE__, __LINE__, __FUNCTION__); printk x;
49#else
50#define UFSD(x)
51#endif
52 45
53static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t offsets[4]) 46static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t offsets[4])
54{ 47{
@@ -61,7 +54,7 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off
61 int n = 0; 54 int n = 0;
62 55
63 56
64 UFSD(("ptrs=uspi->s_apb = %d,double_blocks=%ld \n",ptrs,double_blocks)); 57 UFSD("ptrs=uspi->s_apb = %d,double_blocks=%ld \n",ptrs,double_blocks);
65 if (i_block < 0) { 58 if (i_block < 0) {
66 ufs_warning(inode->i_sb, "ufs_block_to_path", "block < 0"); 59 ufs_warning(inode->i_sb, "ufs_block_to_path", "block < 0");
67 } else if (i_block < direct_blocks) { 60 } else if (i_block < direct_blocks) {
@@ -89,7 +82,7 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off
89 * the begining of the filesystem. 82 * the begining of the filesystem.
90 */ 83 */
91 84
92u64 ufs_frag_map(struct inode *inode, sector_t frag) 85static u64 ufs_frag_map(struct inode *inode, sector_t frag)
93{ 86{
94 struct ufs_inode_info *ufsi = UFS_I(inode); 87 struct ufs_inode_info *ufsi = UFS_I(inode);
95 struct super_block *sb = inode->i_sb; 88 struct super_block *sb = inode->i_sb;
@@ -104,8 +97,10 @@ u64 ufs_frag_map(struct inode *inode, sector_t frag)
104 unsigned flags = UFS_SB(sb)->s_flags; 97 unsigned flags = UFS_SB(sb)->s_flags;
105 u64 temp = 0L; 98 u64 temp = 0L;
106 99
107 UFSD((": frag = %llu depth = %d\n", (unsigned long long)frag, depth)); 100 UFSD(": frag = %llu depth = %d\n", (unsigned long long)frag, depth);
108 UFSD((": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n",uspi->s_fpbshift,uspi->s_apbmask,mask)); 101 UFSD(": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n",
102 uspi->s_fpbshift, uspi->s_apbmask,
103 (unsigned long long)mask);
109 104
110 if (depth == 0) 105 if (depth == 0)
111 return 0; 106 return 0;
@@ -161,26 +156,64 @@ out:
161 return ret; 156 return ret;
162} 157}
163 158
164static struct buffer_head * ufs_inode_getfrag (struct inode *inode, 159static void ufs_clear_frag(struct inode *inode, struct buffer_head *bh)
165 unsigned int fragment, unsigned int new_fragment, 160{
166 unsigned int required, int *err, int metadata, long *phys, int *new) 161 lock_buffer(bh);
162 memset(bh->b_data, 0, inode->i_sb->s_blocksize);
163 set_buffer_uptodate(bh);
164 mark_buffer_dirty(bh);
165 unlock_buffer(bh);
166 if (IS_SYNC(inode))
167 sync_dirty_buffer(bh);
168}
169
170static struct buffer_head *
171ufs_clear_frags(struct inode *inode, sector_t beg,
172 unsigned int n)
173{
174 struct buffer_head *res, *bh;
175 sector_t end = beg + n;
176
177 res = sb_getblk(inode->i_sb, beg);
178 ufs_clear_frag(inode, res);
179 for (++beg; beg < end; ++beg) {
180 bh = sb_getblk(inode->i_sb, beg);
181 ufs_clear_frag(inode, bh);
182 brelse(bh);
183 }
184 return res;
185}
186
187/**
188 * ufs_inode_getfrag() - allocate new fragment(s)
189 * @inode - pointer to inode
190 * @fragment - number of `fragment' which hold pointer
191 * to new allocated fragment(s)
192 * @new_fragment - number of new allocated fragment(s)
193 * @required - how many fragment(s) we require
194 * @err - we set it if something wrong
195 * @phys - pointer to where we save physical number of new allocated fragments,
196 * NULL if we allocate not data(indirect blocks for example).
197 * @new - we set it if we allocate new block
198 * @locked_page - for ufs_new_fragments()
199 */
200static struct buffer_head *
201ufs_inode_getfrag(struct inode *inode, unsigned int fragment,
202 sector_t new_fragment, unsigned int required, int *err,
203 long *phys, int *new, struct page *locked_page)
167{ 204{
168 struct ufs_inode_info *ufsi = UFS_I(inode); 205 struct ufs_inode_info *ufsi = UFS_I(inode);
169 struct super_block * sb; 206 struct super_block *sb = inode->i_sb;
170 struct ufs_sb_private_info * uspi; 207 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
171 struct buffer_head * result; 208 struct buffer_head * result;
172 unsigned block, blockoff, lastfrag, lastblock, lastblockoff; 209 unsigned block, blockoff, lastfrag, lastblock, lastblockoff;
173 unsigned tmp, goal; 210 unsigned tmp, goal;
174 __fs32 * p, * p2; 211 __fs32 * p, * p2;
175 unsigned flags = 0;
176 212
177 UFSD(("ENTER, ino %lu, fragment %u, new_fragment %u, required %u\n", 213 UFSD("ENTER, ino %lu, fragment %u, new_fragment %llu, required %u, "
178 inode->i_ino, fragment, new_fragment, required)) 214 "metadata %d\n", inode->i_ino, fragment,
215 (unsigned long long)new_fragment, required, !phys);
179 216
180 sb = inode->i_sb;
181 uspi = UFS_SB(sb)->s_uspi;
182
183 flags = UFS_SB(sb)->s_flags;
184 /* TODO : to be done for write support 217 /* TODO : to be done for write support
185 if ( (flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) 218 if ( (flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
186 goto ufs2; 219 goto ufs2;
@@ -195,16 +228,16 @@ repeat:
195 tmp = fs32_to_cpu(sb, *p); 228 tmp = fs32_to_cpu(sb, *p);
196 lastfrag = ufsi->i_lastfrag; 229 lastfrag = ufsi->i_lastfrag;
197 if (tmp && fragment < lastfrag) { 230 if (tmp && fragment < lastfrag) {
198 if (metadata) { 231 if (!phys) {
199 result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff); 232 result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
200 if (tmp == fs32_to_cpu(sb, *p)) { 233 if (tmp == fs32_to_cpu(sb, *p)) {
201 UFSD(("EXIT, result %u\n", tmp + blockoff)) 234 UFSD("EXIT, result %u\n", tmp + blockoff);
202 return result; 235 return result;
203 } 236 }
204 brelse (result); 237 brelse (result);
205 goto repeat; 238 goto repeat;
206 } else { 239 } else {
207 *phys = tmp; 240 *phys = tmp + blockoff;
208 return NULL; 241 return NULL;
209 } 242 }
210 } 243 }
@@ -221,7 +254,8 @@ repeat:
221 if (lastblockoff) { 254 if (lastblockoff) {
222 p2 = ufsi->i_u1.i_data + lastblock; 255 p2 = ufsi->i_u1.i_data + lastblock;
223 tmp = ufs_new_fragments (inode, p2, lastfrag, 256 tmp = ufs_new_fragments (inode, p2, lastfrag,
224 fs32_to_cpu(sb, *p2), uspi->s_fpb - lastblockoff, err); 257 fs32_to_cpu(sb, *p2), uspi->s_fpb - lastblockoff,
258 err, locked_page);
225 if (!tmp) { 259 if (!tmp) {
226 if (lastfrag != ufsi->i_lastfrag) 260 if (lastfrag != ufsi->i_lastfrag)
227 goto repeat; 261 goto repeat;
@@ -233,14 +267,16 @@ repeat:
233 } 267 }
234 goal = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]) + uspi->s_fpb; 268 goal = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]) + uspi->s_fpb;
235 tmp = ufs_new_fragments (inode, p, fragment - blockoff, 269 tmp = ufs_new_fragments (inode, p, fragment - blockoff,
236 goal, required + blockoff, err); 270 goal, required + blockoff,
271 err, locked_page);
237 } 272 }
238 /* 273 /*
239 * We will extend last allocated block 274 * We will extend last allocated block
240 */ 275 */
241 else if (lastblock == block) { 276 else if (lastblock == block) {
242 tmp = ufs_new_fragments (inode, p, fragment - (blockoff - lastblockoff), 277 tmp = ufs_new_fragments(inode, p, fragment - (blockoff - lastblockoff),
243 fs32_to_cpu(sb, *p), required + (blockoff - lastblockoff), err); 278 fs32_to_cpu(sb, *p), required + (blockoff - lastblockoff),
279 err, locked_page);
244 } 280 }
245 /* 281 /*
246 * We will allocate new block before last allocated block 282 * We will allocate new block before last allocated block
@@ -248,8 +284,8 @@ repeat:
248 else /* (lastblock > block) */ { 284 else /* (lastblock > block) */ {
249 if (lastblock && (tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock-1]))) 285 if (lastblock && (tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock-1])))
250 goal = tmp + uspi->s_fpb; 286 goal = tmp + uspi->s_fpb;
251 tmp = ufs_new_fragments (inode, p, fragment - blockoff, 287 tmp = ufs_new_fragments(inode, p, fragment - blockoff,
252 goal, uspi->s_fpb, err); 288 goal, uspi->s_fpb, err, locked_page);
253 } 289 }
254 if (!tmp) { 290 if (!tmp) {
255 if ((!blockoff && *p) || 291 if ((!blockoff && *p) ||
@@ -259,14 +295,10 @@ repeat:
259 return NULL; 295 return NULL;
260 } 296 }
261 297
262 /* The nullification of framgents done in ufs/balloc.c is 298 if (!phys) {
263 * something I don't have the stomache to move into here right 299 result = ufs_clear_frags(inode, tmp + blockoff, required);
264 * now. -DaveM
265 */
266 if (metadata) {
267 result = sb_getblk(inode->i_sb, tmp + blockoff);
268 } else { 300 } else {
269 *phys = tmp; 301 *phys = tmp + blockoff;
270 result = NULL; 302 result = NULL;
271 *err = 0; 303 *err = 0;
272 *new = 1; 304 *new = 1;
@@ -276,7 +308,7 @@ repeat:
276 if (IS_SYNC(inode)) 308 if (IS_SYNC(inode))
277 ufs_sync_inode (inode); 309 ufs_sync_inode (inode);
278 mark_inode_dirty(inode); 310 mark_inode_dirty(inode);
279 UFSD(("EXIT, result %u\n", tmp + blockoff)) 311 UFSD("EXIT, result %u\n", tmp + blockoff);
280 return result; 312 return result;
281 313
282 /* This part : To be implemented .... 314 /* This part : To be implemented ....
@@ -295,22 +327,35 @@ repeat2:
295 */ 327 */
296} 328}
297 329
298static struct buffer_head * ufs_block_getfrag (struct inode *inode, 330/**
299 struct buffer_head *bh, unsigned int fragment, unsigned int new_fragment, 331 * ufs_inode_getblock() - allocate new block
300 unsigned int blocksize, int * err, int metadata, long *phys, int *new) 332 * @inode - pointer to inode
333 * @bh - pointer to block which hold "pointer" to new allocated block
334 * @fragment - number of `fragment' which hold pointer
335 * to new allocated block
336 * @new_fragment - number of new allocated fragment
337 * (block will hold this fragment and also uspi->s_fpb-1)
338 * @err - see ufs_inode_getfrag()
339 * @phys - see ufs_inode_getfrag()
340 * @new - see ufs_inode_getfrag()
341 * @locked_page - see ufs_inode_getfrag()
342 */
343static struct buffer_head *
344ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
345 unsigned int fragment, sector_t new_fragment, int *err,
346 long *phys, int *new, struct page *locked_page)
301{ 347{
302 struct super_block * sb; 348 struct super_block *sb = inode->i_sb;
303 struct ufs_sb_private_info * uspi; 349 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
304 struct buffer_head * result; 350 struct buffer_head * result;
305 unsigned tmp, goal, block, blockoff; 351 unsigned tmp, goal, block, blockoff;
306 __fs32 * p; 352 __fs32 * p;
307 353
308 sb = inode->i_sb;
309 uspi = UFS_SB(sb)->s_uspi;
310 block = ufs_fragstoblks (fragment); 354 block = ufs_fragstoblks (fragment);
311 blockoff = ufs_fragnum (fragment); 355 blockoff = ufs_fragnum (fragment);
312 356
313 UFSD(("ENTER, ino %lu, fragment %u, new_fragment %u\n", inode->i_ino, fragment, new_fragment)) 357 UFSD("ENTER, ino %lu, fragment %u, new_fragment %llu, metadata %d\n",
358 inode->i_ino, fragment, (unsigned long long)new_fragment, !phys);
314 359
315 result = NULL; 360 result = NULL;
316 if (!bh) 361 if (!bh)
@@ -326,14 +371,14 @@ static struct buffer_head * ufs_block_getfrag (struct inode *inode,
326repeat: 371repeat:
327 tmp = fs32_to_cpu(sb, *p); 372 tmp = fs32_to_cpu(sb, *p);
328 if (tmp) { 373 if (tmp) {
329 if (metadata) { 374 if (!phys) {
330 result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff); 375 result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
331 if (tmp == fs32_to_cpu(sb, *p)) 376 if (tmp == fs32_to_cpu(sb, *p))
332 goto out; 377 goto out;
333 brelse (result); 378 brelse (result);
334 goto repeat; 379 goto repeat;
335 } else { 380 } else {
336 *phys = tmp; 381 *phys = tmp + blockoff;
337 goto out; 382 goto out;
338 } 383 }
339 } 384 }
@@ -342,21 +387,19 @@ repeat:
342 goal = tmp + uspi->s_fpb; 387 goal = tmp + uspi->s_fpb;
343 else 388 else
344 goal = bh->b_blocknr + uspi->s_fpb; 389 goal = bh->b_blocknr + uspi->s_fpb;
345 tmp = ufs_new_fragments (inode, p, ufs_blknum(new_fragment), goal, uspi->s_fpb, err); 390 tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment), goal,
391 uspi->s_fpb, err, locked_page);
346 if (!tmp) { 392 if (!tmp) {
347 if (fs32_to_cpu(sb, *p)) 393 if (fs32_to_cpu(sb, *p))
348 goto repeat; 394 goto repeat;
349 goto out; 395 goto out;
350 } 396 }
351 397
352 /* The nullification of framgents done in ufs/balloc.c is 398
353 * something I don't have the stomache to move into here right 399 if (!phys) {
354 * now. -DaveM 400 result = ufs_clear_frags(inode, tmp + blockoff, uspi->s_fpb);
355 */
356 if (metadata) {
357 result = sb_getblk(sb, tmp + blockoff);
358 } else { 401 } else {
359 *phys = tmp; 402 *phys = tmp + blockoff;
360 *new = 1; 403 *new = 1;
361 } 404 }
362 405
@@ -365,18 +408,19 @@ repeat:
365 sync_dirty_buffer(bh); 408 sync_dirty_buffer(bh);
366 inode->i_ctime = CURRENT_TIME_SEC; 409 inode->i_ctime = CURRENT_TIME_SEC;
367 mark_inode_dirty(inode); 410 mark_inode_dirty(inode);
368 UFSD(("result %u\n", tmp + blockoff)); 411 UFSD("result %u\n", tmp + blockoff);
369out: 412out:
370 brelse (bh); 413 brelse (bh);
371 UFSD(("EXIT\n")); 414 UFSD("EXIT\n");
372 return result; 415 return result;
373} 416}
374 417
375/* 418/**
376 * This function gets the block which contains the fragment. 419 * ufs_getfrag_bloc() - `get_block_t' function, interface between UFS and
420 * readpage, writepage and so on
377 */ 421 */
378 422
379int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create) 423int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create)
380{ 424{
381 struct super_block * sb = inode->i_sb; 425 struct super_block * sb = inode->i_sb;
382 struct ufs_sb_private_info * uspi = UFS_SB(sb)->s_uspi; 426 struct ufs_sb_private_info * uspi = UFS_SB(sb)->s_uspi;
@@ -387,7 +431,7 @@ int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_hea
387 431
388 if (!create) { 432 if (!create) {
389 phys64 = ufs_frag_map(inode, fragment); 433 phys64 = ufs_frag_map(inode, fragment);
390 UFSD(("phys64 = %llu \n",phys64)); 434 UFSD("phys64 = %llu\n", (unsigned long long)phys64);
391 if (phys64) 435 if (phys64)
392 map_bh(bh_result, sb, phys64); 436 map_bh(bh_result, sb, phys64);
393 return 0; 437 return 0;
@@ -402,7 +446,7 @@ int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_hea
402 446
403 lock_kernel(); 447 lock_kernel();
404 448
405 UFSD(("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment)) 449 UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment);
406 if (fragment < 0) 450 if (fragment < 0)
407 goto abort_negative; 451 goto abort_negative;
408 if (fragment > 452 if (fragment >
@@ -418,15 +462,15 @@ int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_hea
418 * it much more readable: 462 * it much more readable:
419 */ 463 */
420#define GET_INODE_DATABLOCK(x) \ 464#define GET_INODE_DATABLOCK(x) \
421 ufs_inode_getfrag(inode, x, fragment, 1, &err, 0, &phys, &new) 465 ufs_inode_getfrag(inode, x, fragment, 1, &err, &phys, &new, bh_result->b_page)
422#define GET_INODE_PTR(x) \ 466#define GET_INODE_PTR(x) \
423 ufs_inode_getfrag(inode, x, fragment, uspi->s_fpb, &err, 1, NULL, NULL) 467 ufs_inode_getfrag(inode, x, fragment, uspi->s_fpb, &err, NULL, NULL, bh_result->b_page)
424#define GET_INDIRECT_DATABLOCK(x) \ 468#define GET_INDIRECT_DATABLOCK(x) \
425 ufs_block_getfrag(inode, bh, x, fragment, sb->s_blocksize, \ 469 ufs_inode_getblock(inode, bh, x, fragment, \
426 &err, 0, &phys, &new); 470 &err, &phys, &new, bh_result->b_page);
427#define GET_INDIRECT_PTR(x) \ 471#define GET_INDIRECT_PTR(x) \
428 ufs_block_getfrag(inode, bh, x, fragment, sb->s_blocksize, \ 472 ufs_inode_getblock(inode, bh, x, fragment, \
429 &err, 1, NULL, NULL); 473 &err, NULL, NULL, bh_result->b_page);
430 474
431 if (ptr < UFS_NDIR_FRAGMENT) { 475 if (ptr < UFS_NDIR_FRAGMENT) {
432 bh = GET_INODE_DATABLOCK(ptr); 476 bh = GET_INODE_DATABLOCK(ptr);
@@ -474,8 +518,9 @@ abort_too_big:
474 goto abort; 518 goto abort;
475} 519}
476 520
477struct buffer_head *ufs_getfrag(struct inode *inode, unsigned int fragment, 521static struct buffer_head *ufs_getfrag(struct inode *inode,
478 int create, int *err) 522 unsigned int fragment,
523 int create, int *err)
479{ 524{
480 struct buffer_head dummy; 525 struct buffer_head dummy;
481 int error; 526 int error;
@@ -502,7 +547,7 @@ struct buffer_head * ufs_bread (struct inode * inode, unsigned fragment,
502{ 547{
503 struct buffer_head * bh; 548 struct buffer_head * bh;
504 549
505 UFSD(("ENTER, ino %lu, fragment %u\n", inode->i_ino, fragment)) 550 UFSD("ENTER, ino %lu, fragment %u\n", inode->i_ino, fragment);
506 bh = ufs_getfrag (inode, fragment, create, err); 551 bh = ufs_getfrag (inode, fragment, create, err);
507 if (!bh || buffer_uptodate(bh)) 552 if (!bh || buffer_uptodate(bh))
508 return bh; 553 return bh;
@@ -531,7 +576,7 @@ static sector_t ufs_bmap(struct address_space *mapping, sector_t block)
531{ 576{
532 return generic_block_bmap(mapping,block,ufs_getfrag_block); 577 return generic_block_bmap(mapping,block,ufs_getfrag_block);
533} 578}
534struct address_space_operations ufs_aops = { 579const struct address_space_operations ufs_aops = {
535 .readpage = ufs_readpage, 580 .readpage = ufs_readpage,
536 .writepage = ufs_writepage, 581 .writepage = ufs_writepage,
537 .sync_page = block_sync_page, 582 .sync_page = block_sync_page,
@@ -540,39 +585,34 @@ struct address_space_operations ufs_aops = {
540 .bmap = ufs_bmap 585 .bmap = ufs_bmap
541}; 586};
542 587
543void ufs_read_inode (struct inode * inode) 588static void ufs_set_inode_ops(struct inode *inode)
589{
590 if (S_ISREG(inode->i_mode)) {
591 inode->i_op = &ufs_file_inode_operations;
592 inode->i_fop = &ufs_file_operations;
593 inode->i_mapping->a_ops = &ufs_aops;
594 } else if (S_ISDIR(inode->i_mode)) {
595 inode->i_op = &ufs_dir_inode_operations;
596 inode->i_fop = &ufs_dir_operations;
597 inode->i_mapping->a_ops = &ufs_aops;
598 } else if (S_ISLNK(inode->i_mode)) {
599 if (!inode->i_blocks)
600 inode->i_op = &ufs_fast_symlink_inode_operations;
601 else {
602 inode->i_op = &page_symlink_inode_operations;
603 inode->i_mapping->a_ops = &ufs_aops;
604 }
605 } else
606 init_special_inode(inode, inode->i_mode,
607 ufs_get_inode_dev(inode->i_sb, UFS_I(inode)));
608}
609
610static void ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
544{ 611{
545 struct ufs_inode_info *ufsi = UFS_I(inode); 612 struct ufs_inode_info *ufsi = UFS_I(inode);
546 struct super_block * sb; 613 struct super_block *sb = inode->i_sb;
547 struct ufs_sb_private_info * uspi;
548 struct ufs_inode * ufs_inode;
549 struct ufs2_inode *ufs2_inode;
550 struct buffer_head * bh;
551 mode_t mode; 614 mode_t mode;
552 unsigned i; 615 unsigned i;
553 unsigned flags;
554
555 UFSD(("ENTER, ino %lu\n", inode->i_ino))
556
557 sb = inode->i_sb;
558 uspi = UFS_SB(sb)->s_uspi;
559 flags = UFS_SB(sb)->s_flags;
560
561 if (inode->i_ino < UFS_ROOTINO ||
562 inode->i_ino > (uspi->s_ncg * uspi->s_ipg)) {
563 ufs_warning (sb, "ufs_read_inode", "bad inode number (%lu)\n", inode->i_ino);
564 goto bad_inode;
565 }
566
567 bh = sb_bread(sb, uspi->s_sbbase + ufs_inotofsba(inode->i_ino));
568 if (!bh) {
569 ufs_warning (sb, "ufs_read_inode", "unable to read inode %lu\n", inode->i_ino);
570 goto bad_inode;
571 }
572 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
573 goto ufs2_inode;
574
575 ufs_inode = (struct ufs_inode *) (bh->b_data + sizeof(struct ufs_inode) * ufs_inotofsbo(inode->i_ino));
576 616
577 /* 617 /*
578 * Copy data to the in-core inode. 618 * Copy data to the in-core inode.
@@ -596,56 +636,29 @@ void ufs_read_inode (struct inode * inode)
596 inode->i_atime.tv_nsec = 0; 636 inode->i_atime.tv_nsec = 0;
597 inode->i_ctime.tv_nsec = 0; 637 inode->i_ctime.tv_nsec = 0;
598 inode->i_blocks = fs32_to_cpu(sb, ufs_inode->ui_blocks); 638 inode->i_blocks = fs32_to_cpu(sb, ufs_inode->ui_blocks);
599 inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat) */
600 inode->i_version++;
601 ufsi->i_flags = fs32_to_cpu(sb, ufs_inode->ui_flags); 639 ufsi->i_flags = fs32_to_cpu(sb, ufs_inode->ui_flags);
602 ufsi->i_gen = fs32_to_cpu(sb, ufs_inode->ui_gen); 640 ufsi->i_gen = fs32_to_cpu(sb, ufs_inode->ui_gen);
603 ufsi->i_shadow = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_shadow); 641 ufsi->i_shadow = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_shadow);
604 ufsi->i_oeftflag = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_oeftflag); 642 ufsi->i_oeftflag = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_oeftflag);
605 ufsi->i_lastfrag = (inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift; 643
606 644
607 if (S_ISCHR(mode) || S_ISBLK(mode) || inode->i_blocks) { 645 if (S_ISCHR(mode) || S_ISBLK(mode) || inode->i_blocks) {
608 for (i = 0; i < (UFS_NDADDR + UFS_NINDIR); i++) 646 for (i = 0; i < (UFS_NDADDR + UFS_NINDIR); i++)
609 ufsi->i_u1.i_data[i] = ufs_inode->ui_u2.ui_addr.ui_db[i]; 647 ufsi->i_u1.i_data[i] = ufs_inode->ui_u2.ui_addr.ui_db[i];
610 } 648 } else {
611 else {
612 for (i = 0; i < (UFS_NDADDR + UFS_NINDIR) * 4; i++) 649 for (i = 0; i < (UFS_NDADDR + UFS_NINDIR) * 4; i++)
613 ufsi->i_u1.i_symlink[i] = ufs_inode->ui_u2.ui_symlink[i]; 650 ufsi->i_u1.i_symlink[i] = ufs_inode->ui_u2.ui_symlink[i];
614 } 651 }
615 ufsi->i_osync = 0; 652}
616
617 if (S_ISREG(inode->i_mode)) {
618 inode->i_op = &ufs_file_inode_operations;
619 inode->i_fop = &ufs_file_operations;
620 inode->i_mapping->a_ops = &ufs_aops;
621 } else if (S_ISDIR(inode->i_mode)) {
622 inode->i_op = &ufs_dir_inode_operations;
623 inode->i_fop = &ufs_dir_operations;
624 } else if (S_ISLNK(inode->i_mode)) {
625 if (!inode->i_blocks)
626 inode->i_op = &ufs_fast_symlink_inode_operations;
627 else {
628 inode->i_op = &page_symlink_inode_operations;
629 inode->i_mapping->a_ops = &ufs_aops;
630 }
631 } else
632 init_special_inode(inode, inode->i_mode,
633 ufs_get_inode_dev(sb, ufsi));
634
635 brelse (bh);
636
637 UFSD(("EXIT\n"))
638 return;
639
640bad_inode:
641 make_bad_inode(inode);
642 return;
643
644ufs2_inode :
645 UFSD(("Reading ufs2 inode, ino %lu\n", inode->i_ino))
646 653
647 ufs2_inode = (struct ufs2_inode *)(bh->b_data + sizeof(struct ufs2_inode) * ufs_inotofsbo(inode->i_ino)); 654static void ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
655{
656 struct ufs_inode_info *ufsi = UFS_I(inode);
657 struct super_block *sb = inode->i_sb;
658 mode_t mode;
659 unsigned i;
648 660
661 UFSD("Reading ufs2 inode, ino %lu\n", inode->i_ino);
649 /* 662 /*
650 * Copy data to the in-core inode. 663 * Copy data to the in-core inode.
651 */ 664 */
@@ -668,50 +681,75 @@ ufs2_inode :
668 inode->i_atime.tv_nsec = 0; 681 inode->i_atime.tv_nsec = 0;
669 inode->i_ctime.tv_nsec = 0; 682 inode->i_ctime.tv_nsec = 0;
670 inode->i_blocks = fs64_to_cpu(sb, ufs2_inode->ui_blocks); 683 inode->i_blocks = fs64_to_cpu(sb, ufs2_inode->ui_blocks);
671 inode->i_blksize = PAGE_SIZE; /*This is the optimal IO size(for stat)*/
672
673 inode->i_version++;
674 ufsi->i_flags = fs32_to_cpu(sb, ufs2_inode->ui_flags); 684 ufsi->i_flags = fs32_to_cpu(sb, ufs2_inode->ui_flags);
675 ufsi->i_gen = fs32_to_cpu(sb, ufs2_inode->ui_gen); 685 ufsi->i_gen = fs32_to_cpu(sb, ufs2_inode->ui_gen);
676 /* 686 /*
677 ufsi->i_shadow = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_shadow); 687 ufsi->i_shadow = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_shadow);
678 ufsi->i_oeftflag = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_oeftflag); 688 ufsi->i_oeftflag = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_oeftflag);
679 */ 689 */
680 ufsi->i_lastfrag= (inode->i_size + uspi->s_fsize- 1) >> uspi->s_fshift;
681 690
682 if (S_ISCHR(mode) || S_ISBLK(mode) || inode->i_blocks) { 691 if (S_ISCHR(mode) || S_ISBLK(mode) || inode->i_blocks) {
683 for (i = 0; i < (UFS_NDADDR + UFS_NINDIR); i++) 692 for (i = 0; i < (UFS_NDADDR + UFS_NINDIR); i++)
684 ufsi->i_u1.u2_i_data[i] = 693 ufsi->i_u1.u2_i_data[i] =
685 ufs2_inode->ui_u2.ui_addr.ui_db[i]; 694 ufs2_inode->ui_u2.ui_addr.ui_db[i];
686 } 695 } else {
687 else {
688 for (i = 0; i < (UFS_NDADDR + UFS_NINDIR) * 4; i++) 696 for (i = 0; i < (UFS_NDADDR + UFS_NINDIR) * 4; i++)
689 ufsi->i_u1.i_symlink[i] = ufs2_inode->ui_u2.ui_symlink[i]; 697 ufsi->i_u1.i_symlink[i] = ufs2_inode->ui_u2.ui_symlink[i];
690 } 698 }
699}
700
701void ufs_read_inode(struct inode * inode)
702{
703 struct ufs_inode_info *ufsi = UFS_I(inode);
704 struct super_block * sb;
705 struct ufs_sb_private_info * uspi;
706 struct buffer_head * bh;
707
708 UFSD("ENTER, ino %lu\n", inode->i_ino);
709
710 sb = inode->i_sb;
711 uspi = UFS_SB(sb)->s_uspi;
712
713 if (inode->i_ino < UFS_ROOTINO ||
714 inode->i_ino > (uspi->s_ncg * uspi->s_ipg)) {
715 ufs_warning(sb, "ufs_read_inode", "bad inode number (%lu)\n",
716 inode->i_ino);
717 goto bad_inode;
718 }
719
720 bh = sb_bread(sb, uspi->s_sbbase + ufs_inotofsba(inode->i_ino));
721 if (!bh) {
722 ufs_warning(sb, "ufs_read_inode", "unable to read inode %lu\n",
723 inode->i_ino);
724 goto bad_inode;
725 }
726 if ((UFS_SB(sb)->s_flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
727 struct ufs2_inode *ufs2_inode = (struct ufs2_inode *)bh->b_data;
728
729 ufs2_read_inode(inode,
730 ufs2_inode + ufs_inotofsbo(inode->i_ino));
731 } else {
732 struct ufs_inode *ufs_inode = (struct ufs_inode *)bh->b_data;
733
734 ufs1_read_inode(inode, ufs_inode + ufs_inotofsbo(inode->i_ino));
735 }
736
737 inode->i_blksize = PAGE_SIZE;/*This is the optimal IO size (for stat)*/
738 inode->i_version++;
739 ufsi->i_lastfrag =
740 (inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift;
741 ufsi->i_dir_start_lookup = 0;
691 ufsi->i_osync = 0; 742 ufsi->i_osync = 0;
692 743
693 if (S_ISREG(inode->i_mode)) { 744 ufs_set_inode_ops(inode);
694 inode->i_op = &ufs_file_inode_operations;
695 inode->i_fop = &ufs_file_operations;
696 inode->i_mapping->a_ops = &ufs_aops;
697 } else if (S_ISDIR(inode->i_mode)) {
698 inode->i_op = &ufs_dir_inode_operations;
699 inode->i_fop = &ufs_dir_operations;
700 } else if (S_ISLNK(inode->i_mode)) {
701 if (!inode->i_blocks)
702 inode->i_op = &ufs_fast_symlink_inode_operations;
703 else {
704 inode->i_op = &page_symlink_inode_operations;
705 inode->i_mapping->a_ops = &ufs_aops;
706 }
707 } else /* TODO : here ...*/
708 init_special_inode(inode, inode->i_mode,
709 ufs_get_inode_dev(sb, ufsi));
710 745
711 brelse(bh); 746 brelse(bh);
712 747
713 UFSD(("EXIT\n")) 748 UFSD("EXIT\n");
714 return; 749 return;
750
751bad_inode:
752 make_bad_inode(inode);
715} 753}
716 754
717static int ufs_update_inode(struct inode * inode, int do_sync) 755static int ufs_update_inode(struct inode * inode, int do_sync)
@@ -724,7 +762,7 @@ static int ufs_update_inode(struct inode * inode, int do_sync)
724 unsigned i; 762 unsigned i;
725 unsigned flags; 763 unsigned flags;
726 764
727 UFSD(("ENTER, ino %lu\n", inode->i_ino)) 765 UFSD("ENTER, ino %lu\n", inode->i_ino);
728 766
729 sb = inode->i_sb; 767 sb = inode->i_sb;
730 uspi = UFS_SB(sb)->s_uspi; 768 uspi = UFS_SB(sb)->s_uspi;
@@ -785,7 +823,7 @@ static int ufs_update_inode(struct inode * inode, int do_sync)
785 sync_dirty_buffer(bh); 823 sync_dirty_buffer(bh);
786 brelse (bh); 824 brelse (bh);
787 825
788 UFSD(("EXIT\n")) 826 UFSD("EXIT\n");
789 return 0; 827 return 0;
790} 828}
791 829
@@ -805,14 +843,17 @@ int ufs_sync_inode (struct inode *inode)
805 843
806void ufs_delete_inode (struct inode * inode) 844void ufs_delete_inode (struct inode * inode)
807{ 845{
846 loff_t old_i_size;
847
808 truncate_inode_pages(&inode->i_data, 0); 848 truncate_inode_pages(&inode->i_data, 0);
809 /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ 849 /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
810 lock_kernel(); 850 lock_kernel();
811 mark_inode_dirty(inode); 851 mark_inode_dirty(inode);
812 ufs_update_inode(inode, IS_SYNC(inode)); 852 ufs_update_inode(inode, IS_SYNC(inode));
853 old_i_size = inode->i_size;
813 inode->i_size = 0; 854 inode->i_size = 0;
814 if (inode->i_blocks) 855 if (inode->i_blocks && ufs_truncate(inode, old_i_size))
815 ufs_truncate (inode); 856 ufs_warning(inode->i_sb, __FUNCTION__, "ufs_truncate failed\n");
816 ufs_free_inode (inode); 857 ufs_free_inode (inode);
817 unlock_kernel(); 858 unlock_kernel();
818} 859}
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 8d5f98a01c74..abd5f23a426d 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -1,6 +1,9 @@
1/* 1/*
2 * linux/fs/ufs/namei.c 2 * linux/fs/ufs/namei.c
3 * 3 *
4 * Migration to usage of "page cache" on May 2006 by
5 * Evgeniy Dushistov <dushistov@mail.ru> based on ext2 code base.
6 *
4 * Copyright (C) 1998 7 * Copyright (C) 1998
5 * Daniel Pirkl <daniel.pirkl@email.cz> 8 * Daniel Pirkl <daniel.pirkl@email.cz>
6 * Charles University, Faculty of Mathematics and Physics 9 * Charles University, Faculty of Mathematics and Physics
@@ -28,21 +31,9 @@
28#include <linux/fs.h> 31#include <linux/fs.h>
29#include <linux/ufs_fs.h> 32#include <linux/ufs_fs.h>
30#include <linux/smp_lock.h> 33#include <linux/smp_lock.h>
31#include <linux/buffer_head.h>
32#include "swab.h" /* will go away - see comment in mknod() */ 34#include "swab.h" /* will go away - see comment in mknod() */
33#include "util.h" 35#include "util.h"
34 36
35/*
36#undef UFS_NAMEI_DEBUG
37*/
38#define UFS_NAMEI_DEBUG
39
40#ifdef UFS_NAMEI_DEBUG
41#define UFSD(x) printk("(%s, %d), %s: ", __FILE__, __LINE__, __FUNCTION__); printk x;
42#else
43#define UFSD(x)
44#endif
45
46static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode) 37static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode)
47{ 38{
48 int err = ufs_add_link(dentry, inode); 39 int err = ufs_add_link(dentry, inode);
@@ -88,8 +79,13 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru
88static int ufs_create (struct inode * dir, struct dentry * dentry, int mode, 79static int ufs_create (struct inode * dir, struct dentry * dentry, int mode,
89 struct nameidata *nd) 80 struct nameidata *nd)
90{ 81{
91 struct inode * inode = ufs_new_inode(dir, mode); 82 struct inode *inode;
92 int err = PTR_ERR(inode); 83 int err;
84
85 UFSD("BEGIN\n");
86 inode = ufs_new_inode(dir, mode);
87 err = PTR_ERR(inode);
88
93 if (!IS_ERR(inode)) { 89 if (!IS_ERR(inode)) {
94 inode->i_op = &ufs_file_inode_operations; 90 inode->i_op = &ufs_file_inode_operations;
95 inode->i_fop = &ufs_file_operations; 91 inode->i_fop = &ufs_file_operations;
@@ -99,6 +95,7 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, int mode,
99 err = ufs_add_nondir(dentry, inode); 95 err = ufs_add_nondir(dentry, inode);
100 unlock_kernel(); 96 unlock_kernel();
101 } 97 }
98 UFSD("END: err=%d\n", err);
102 return err; 99 return err;
103} 100}
104 101
@@ -205,6 +202,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
205 202
206 inode->i_op = &ufs_dir_inode_operations; 203 inode->i_op = &ufs_dir_inode_operations;
207 inode->i_fop = &ufs_dir_operations; 204 inode->i_fop = &ufs_dir_operations;
205 inode->i_mapping->a_ops = &ufs_aops;
208 206
209 inode_inc_link_count(inode); 207 inode_inc_link_count(inode);
210 208
@@ -231,19 +229,18 @@ out_dir:
231 goto out; 229 goto out;
232} 230}
233 231
234static int ufs_unlink(struct inode * dir, struct dentry *dentry) 232static int ufs_unlink(struct inode *dir, struct dentry *dentry)
235{ 233{
236 struct inode * inode = dentry->d_inode; 234 struct inode * inode = dentry->d_inode;
237 struct buffer_head * bh; 235 struct ufs_dir_entry *de;
238 struct ufs_dir_entry * de; 236 struct page *page;
239 int err = -ENOENT; 237 int err = -ENOENT;
240 238
241 lock_kernel(); 239 de = ufs_find_entry(dir, dentry, &page);
242 de = ufs_find_entry (dentry, &bh);
243 if (!de) 240 if (!de)
244 goto out; 241 goto out;
245 242
246 err = ufs_delete_entry (dir, de, bh); 243 err = ufs_delete_entry(dir, de, page);
247 if (err) 244 if (err)
248 goto out; 245 goto out;
249 246
@@ -251,7 +248,6 @@ static int ufs_unlink(struct inode * dir, struct dentry *dentry)
251 inode_dec_link_count(inode); 248 inode_dec_link_count(inode);
252 err = 0; 249 err = 0;
253out: 250out:
254 unlock_kernel();
255 return err; 251 return err;
256} 252}
257 253
@@ -273,42 +269,42 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry)
273 return err; 269 return err;
274} 270}
275 271
276static int ufs_rename (struct inode * old_dir, struct dentry * old_dentry, 272static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
277 struct inode * new_dir, struct dentry * new_dentry ) 273 struct inode *new_dir, struct dentry *new_dentry)
278{ 274{
279 struct inode *old_inode = old_dentry->d_inode; 275 struct inode *old_inode = old_dentry->d_inode;
280 struct inode *new_inode = new_dentry->d_inode; 276 struct inode *new_inode = new_dentry->d_inode;
281 struct buffer_head *dir_bh = NULL; 277 struct page *dir_page = NULL;
282 struct ufs_dir_entry *dir_de = NULL; 278 struct ufs_dir_entry * dir_de = NULL;
283 struct buffer_head *old_bh; 279 struct page *old_page;
284 struct ufs_dir_entry *old_de; 280 struct ufs_dir_entry *old_de;
285 int err = -ENOENT; 281 int err = -ENOENT;
286 282
287 lock_kernel(); 283 old_de = ufs_find_entry(old_dir, old_dentry, &old_page);
288 old_de = ufs_find_entry (old_dentry, &old_bh);
289 if (!old_de) 284 if (!old_de)
290 goto out; 285 goto out;
291 286
292 if (S_ISDIR(old_inode->i_mode)) { 287 if (S_ISDIR(old_inode->i_mode)) {
293 err = -EIO; 288 err = -EIO;
294 dir_de = ufs_dotdot(old_inode, &dir_bh); 289 dir_de = ufs_dotdot(old_inode, &dir_page);
295 if (!dir_de) 290 if (!dir_de)
296 goto out_old; 291 goto out_old;
297 } 292 }
298 293
299 if (new_inode) { 294 if (new_inode) {
300 struct buffer_head *new_bh; 295 struct page *new_page;
301 struct ufs_dir_entry *new_de; 296 struct ufs_dir_entry *new_de;
302 297
303 err = -ENOTEMPTY; 298 err = -ENOTEMPTY;
304 if (dir_de && !ufs_empty_dir (new_inode)) 299 if (dir_de && !ufs_empty_dir(new_inode))
305 goto out_dir; 300 goto out_dir;
301
306 err = -ENOENT; 302 err = -ENOENT;
307 new_de = ufs_find_entry (new_dentry, &new_bh); 303 new_de = ufs_find_entry(new_dir, new_dentry, &new_page);
308 if (!new_de) 304 if (!new_de)
309 goto out_dir; 305 goto out_dir;
310 inode_inc_link_count(old_inode); 306 inode_inc_link_count(old_inode);
311 ufs_set_link(new_dir, new_de, new_bh, old_inode); 307 ufs_set_link(new_dir, new_de, new_page, old_inode);
312 new_inode->i_ctime = CURRENT_TIME_SEC; 308 new_inode->i_ctime = CURRENT_TIME_SEC;
313 if (dir_de) 309 if (dir_de)
314 new_inode->i_nlink--; 310 new_inode->i_nlink--;
@@ -329,24 +325,32 @@ static int ufs_rename (struct inode * old_dir, struct dentry * old_dentry,
329 inode_inc_link_count(new_dir); 325 inode_inc_link_count(new_dir);
330 } 326 }
331 327
332 ufs_delete_entry (old_dir, old_de, old_bh); 328 /*
329 * Like most other Unix systems, set the ctime for inodes on a
330 * rename.
331 * inode_dec_link_count() will mark the inode dirty.
332 */
333 old_inode->i_ctime = CURRENT_TIME_SEC;
333 334
335 ufs_delete_entry(old_dir, old_de, old_page);
334 inode_dec_link_count(old_inode); 336 inode_dec_link_count(old_inode);
335 337
336 if (dir_de) { 338 if (dir_de) {
337 ufs_set_link(old_inode, dir_de, dir_bh, new_dir); 339 ufs_set_link(old_inode, dir_de, dir_page, new_dir);
338 inode_dec_link_count(old_dir); 340 inode_dec_link_count(old_dir);
339 } 341 }
340 unlock_kernel();
341 return 0; 342 return 0;
342 343
344
343out_dir: 345out_dir:
344 if (dir_de) 346 if (dir_de) {
345 brelse(dir_bh); 347 kunmap(dir_page);
348 page_cache_release(dir_page);
349 }
346out_old: 350out_old:
347 brelse (old_bh); 351 kunmap(old_page);
352 page_cache_release(old_page);
348out: 353out:
349 unlock_kernel();
350 return err; 354 return err;
351} 355}
352 356
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index db98a4c71e63..19a99726e58d 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -64,7 +64,6 @@
64 */ 64 */
65 65
66 66
67#include <linux/config.h>
68#include <linux/module.h> 67#include <linux/module.h>
69#include <linux/bitops.h> 68#include <linux/bitops.h>
70 69
@@ -90,95 +89,84 @@
90#include "swab.h" 89#include "swab.h"
91#include "util.h" 90#include "util.h"
92 91
93#undef UFS_SUPER_DEBUG 92#ifdef CONFIG_UFS_DEBUG
94#undef UFS_SUPER_DEBUG_MORE
95
96
97#undef UFS_SUPER_DEBUG_MORE
98#ifdef UFS_SUPER_DEBUG
99#define UFSD(x) printk("(%s, %d), %s: ", __FILE__, __LINE__, __FUNCTION__); printk x;
100#else
101#define UFSD(x)
102#endif
103
104#ifdef UFS_SUPER_DEBUG_MORE
105/* 93/*
106 * Print contents of ufs_super_block, useful for debugging 94 * Print contents of ufs_super_block, useful for debugging
107 */ 95 */
108void ufs_print_super_stuff(struct super_block *sb, 96static void ufs_print_super_stuff(struct super_block *sb, unsigned flags,
109 struct ufs_super_block_first * usb1, 97 struct ufs_super_block_first *usb1,
110 struct ufs_super_block_second * usb2, 98 struct ufs_super_block_second *usb2,
111 struct ufs_super_block_third * usb3) 99 struct ufs_super_block_third *usb3)
112{ 100{
113 printk("ufs_print_super_stuff\n"); 101 printk("ufs_print_super_stuff\n");
114 printk("size of usb: %u\n", sizeof(struct ufs_super_block)); 102 printk(" magic: 0x%x\n", fs32_to_cpu(sb, usb3->fs_magic));
115 printk(" magic: 0x%x\n", fs32_to_cpu(sb, usb3->fs_magic)); 103 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
116 printk(" sblkno: %u\n", fs32_to_cpu(sb, usb1->fs_sblkno)); 104 printk(" fs_size: %llu\n", (unsigned long long)
117 printk(" cblkno: %u\n", fs32_to_cpu(sb, usb1->fs_cblkno)); 105 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size));
118 printk(" iblkno: %u\n", fs32_to_cpu(sb, usb1->fs_iblkno)); 106 printk(" fs_dsize: %llu\n", (unsigned long long)
119 printk(" dblkno: %u\n", fs32_to_cpu(sb, usb1->fs_dblkno)); 107 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize));
120 printk(" cgoffset: %u\n", fs32_to_cpu(sb, usb1->fs_cgoffset)); 108 printk(" bsize: %u\n",
121 printk(" ~cgmask: 0x%x\n", ~fs32_to_cpu(sb, usb1->fs_cgmask)); 109 fs32_to_cpu(sb, usb1->fs_bsize));
122 printk(" size: %u\n", fs32_to_cpu(sb, usb1->fs_size)); 110 printk(" fsize: %u\n",
123 printk(" dsize: %u\n", fs32_to_cpu(sb, usb1->fs_dsize)); 111 fs32_to_cpu(sb, usb1->fs_fsize));
124 printk(" ncg: %u\n", fs32_to_cpu(sb, usb1->fs_ncg)); 112 printk(" fs_volname: %s\n", usb2->fs_un.fs_u2.fs_volname);
125 printk(" bsize: %u\n", fs32_to_cpu(sb, usb1->fs_bsize)); 113 printk(" fs_sblockloc: %llu\n", (unsigned long long)
126 printk(" fsize: %u\n", fs32_to_cpu(sb, usb1->fs_fsize)); 114 fs64_to_cpu(sb, usb2->fs_un.fs_u2.fs_sblockloc));
127 printk(" frag: %u\n", fs32_to_cpu(sb, usb1->fs_frag)); 115 printk(" cs_ndir(No of dirs): %llu\n", (unsigned long long)
128 printk(" fragshift: %u\n", fs32_to_cpu(sb, usb1->fs_fragshift)); 116 fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir));
129 printk(" ~fmask: %u\n", ~fs32_to_cpu(sb, usb1->fs_fmask)); 117 printk(" cs_nbfree(No of free blocks): %llu\n",
130 printk(" fshift: %u\n", fs32_to_cpu(sb, usb1->fs_fshift)); 118 (unsigned long long)
131 printk(" sbsize: %u\n", fs32_to_cpu(sb, usb1->fs_sbsize)); 119 fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_nbfree));
132 printk(" spc: %u\n", fs32_to_cpu(sb, usb1->fs_spc)); 120 } else {
133 printk(" cpg: %u\n", fs32_to_cpu(sb, usb1->fs_cpg)); 121 printk(" sblkno: %u\n", fs32_to_cpu(sb, usb1->fs_sblkno));
134 printk(" ipg: %u\n", fs32_to_cpu(sb, usb1->fs_ipg)); 122 printk(" cblkno: %u\n", fs32_to_cpu(sb, usb1->fs_cblkno));
135 printk(" fpg: %u\n", fs32_to_cpu(sb, usb1->fs_fpg)); 123 printk(" iblkno: %u\n", fs32_to_cpu(sb, usb1->fs_iblkno));
136 printk(" csaddr: %u\n", fs32_to_cpu(sb, usb1->fs_csaddr)); 124 printk(" dblkno: %u\n", fs32_to_cpu(sb, usb1->fs_dblkno));
137 printk(" cssize: %u\n", fs32_to_cpu(sb, usb1->fs_cssize)); 125 printk(" cgoffset: %u\n",
138 printk(" cgsize: %u\n", fs32_to_cpu(sb, usb1->fs_cgsize)); 126 fs32_to_cpu(sb, usb1->fs_cgoffset));
139 printk(" fstodb: %u\n", fs32_to_cpu(sb, usb1->fs_fsbtodb)); 127 printk(" ~cgmask: 0x%x\n",
140 printk(" contigsumsize: %d\n", fs32_to_cpu(sb, usb3->fs_u2.fs_44.fs_contigsumsize)); 128 ~fs32_to_cpu(sb, usb1->fs_cgmask));
141 printk(" postblformat: %u\n", fs32_to_cpu(sb, usb3->fs_postblformat)); 129 printk(" size: %u\n", fs32_to_cpu(sb, usb1->fs_size));
142 printk(" nrpos: %u\n", fs32_to_cpu(sb, usb3->fs_nrpos)); 130 printk(" dsize: %u\n", fs32_to_cpu(sb, usb1->fs_dsize));
143 printk(" ndir %u\n", fs32_to_cpu(sb, usb1->fs_cstotal.cs_ndir)); 131 printk(" ncg: %u\n", fs32_to_cpu(sb, usb1->fs_ncg));
144 printk(" nifree %u\n", fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree)); 132 printk(" bsize: %u\n", fs32_to_cpu(sb, usb1->fs_bsize));
145 printk(" nbfree %u\n", fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree)); 133 printk(" fsize: %u\n", fs32_to_cpu(sb, usb1->fs_fsize));
146 printk(" nffree %u\n", fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree)); 134 printk(" frag: %u\n", fs32_to_cpu(sb, usb1->fs_frag));
147 printk("\n"); 135 printk(" fragshift: %u\n",
148} 136 fs32_to_cpu(sb, usb1->fs_fragshift));
149 137 printk(" ~fmask: %u\n", ~fs32_to_cpu(sb, usb1->fs_fmask));
150/* 138 printk(" fshift: %u\n", fs32_to_cpu(sb, usb1->fs_fshift));
151 * Print contents of ufs2 ufs_super_block, useful for debugging 139 printk(" sbsize: %u\n", fs32_to_cpu(sb, usb1->fs_sbsize));
152 */ 140 printk(" spc: %u\n", fs32_to_cpu(sb, usb1->fs_spc));
153void ufs2_print_super_stuff( 141 printk(" cpg: %u\n", fs32_to_cpu(sb, usb1->fs_cpg));
154 struct super_block *sb, 142 printk(" ipg: %u\n", fs32_to_cpu(sb, usb1->fs_ipg));
155 struct ufs_super_block *usb) 143 printk(" fpg: %u\n", fs32_to_cpu(sb, usb1->fs_fpg));
156{ 144 printk(" csaddr: %u\n", fs32_to_cpu(sb, usb1->fs_csaddr));
157 printk("ufs_print_super_stuff\n"); 145 printk(" cssize: %u\n", fs32_to_cpu(sb, usb1->fs_cssize));
158 printk("size of usb: %u\n", sizeof(struct ufs_super_block)); 146 printk(" cgsize: %u\n", fs32_to_cpu(sb, usb1->fs_cgsize));
159 printk(" magic: 0x%x\n", fs32_to_cpu(sb, usb->fs_magic)); 147 printk(" fstodb: %u\n",
160 printk(" fs_size: %u\n",fs64_to_cpu(sb, usb->fs_u11.fs_u2.fs_size)); 148 fs32_to_cpu(sb, usb1->fs_fsbtodb));
161 printk(" fs_dsize: %u\n",fs64_to_cpu(sb, usb->fs_u11.fs_u2.fs_dsize)); 149 printk(" nrpos: %u\n", fs32_to_cpu(sb, usb3->fs_nrpos));
162 printk(" bsize: %u\n", fs32_to_cpu(usb, usb->fs_bsize)); 150 printk(" ndir %u\n",
163 printk(" fsize: %u\n", fs32_to_cpu(usb, usb->fs_fsize)); 151 fs32_to_cpu(sb, usb1->fs_cstotal.cs_ndir));
164 printk(" fs_volname: %s\n", usb->fs_u11.fs_u2.fs_volname); 152 printk(" nifree %u\n",
165 printk(" fs_fsmnt: %s\n", usb->fs_u11.fs_u2.fs_fsmnt); 153 fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree));
166 printk(" fs_sblockloc: %u\n",fs64_to_cpu(sb, 154 printk(" nbfree %u\n",
167 usb->fs_u11.fs_u2.fs_sblockloc)); 155 fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree));
168 printk(" cs_ndir(No of dirs): %u\n",fs64_to_cpu(sb, 156 printk(" nffree %u\n",
169 usb->fs_u11.fs_u2.fs_cstotal.cs_ndir)); 157 fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree));
170 printk(" cs_nbfree(No of free blocks): %u\n",fs64_to_cpu(sb, 158 }
171 usb->fs_u11.fs_u2.fs_cstotal.cs_nbfree));
172 printk("\n"); 159 printk("\n");
173} 160}
174 161
175/* 162/*
176 * Print contents of ufs_cylinder_group, useful for debugging 163 * Print contents of ufs_cylinder_group, useful for debugging
177 */ 164 */
178void ufs_print_cylinder_stuff(struct super_block *sb, struct ufs_cylinder_group *cg) 165static void ufs_print_cylinder_stuff(struct super_block *sb,
166 struct ufs_cylinder_group *cg)
179{ 167{
180 printk("\nufs_print_cylinder_stuff\n"); 168 printk("\nufs_print_cylinder_stuff\n");
181 printk("size of ucg: %u\n", sizeof(struct ufs_cylinder_group)); 169 printk("size of ucg: %zu\n", sizeof(struct ufs_cylinder_group));
182 printk(" magic: %x\n", fs32_to_cpu(sb, cg->cg_magic)); 170 printk(" magic: %x\n", fs32_to_cpu(sb, cg->cg_magic));
183 printk(" time: %u\n", fs32_to_cpu(sb, cg->cg_time)); 171 printk(" time: %u\n", fs32_to_cpu(sb, cg->cg_time));
184 printk(" cgx: %u\n", fs32_to_cpu(sb, cg->cg_cgx)); 172 printk(" cgx: %u\n", fs32_to_cpu(sb, cg->cg_cgx));
@@ -202,12 +190,18 @@ void ufs_print_cylinder_stuff(struct super_block *sb, struct ufs_cylinder_group
202 printk(" iuseoff: %u\n", fs32_to_cpu(sb, cg->cg_iusedoff)); 190 printk(" iuseoff: %u\n", fs32_to_cpu(sb, cg->cg_iusedoff));
203 printk(" freeoff: %u\n", fs32_to_cpu(sb, cg->cg_freeoff)); 191 printk(" freeoff: %u\n", fs32_to_cpu(sb, cg->cg_freeoff));
204 printk(" nextfreeoff: %u\n", fs32_to_cpu(sb, cg->cg_nextfreeoff)); 192 printk(" nextfreeoff: %u\n", fs32_to_cpu(sb, cg->cg_nextfreeoff));
205 printk(" clustersumoff %u\n", fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clustersumoff)); 193 printk(" clustersumoff %u\n",
206 printk(" clusteroff %u\n", fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clusteroff)); 194 fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clustersumoff));
207 printk(" nclusterblks %u\n", fs32_to_cpu(sb, cg->cg_u.cg_44.cg_nclusterblks)); 195 printk(" clusteroff %u\n",
196 fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clusteroff));
197 printk(" nclusterblks %u\n",
198 fs32_to_cpu(sb, cg->cg_u.cg_44.cg_nclusterblks));
208 printk("\n"); 199 printk("\n");
209} 200}
210#endif /* UFS_SUPER_DEBUG_MORE */ 201#else
202# define ufs_print_super_stuff(sb, flags, usb1, usb2, usb3) /**/
203# define ufs_print_cylinder_stuff(sb, cg) /**/
204#endif /* CONFIG_UFS_DEBUG */
211 205
212static struct super_operations ufs_super_ops; 206static struct super_operations ufs_super_ops;
213 207
@@ -225,7 +219,7 @@ void ufs_error (struct super_block * sb, const char * function,
225 219
226 if (!(sb->s_flags & MS_RDONLY)) { 220 if (!(sb->s_flags & MS_RDONLY)) {
227 usb1->fs_clean = UFS_FSBAD; 221 usb1->fs_clean = UFS_FSBAD;
228 ubh_mark_buffer_dirty(USPI_UBH); 222 ubh_mark_buffer_dirty(USPI_UBH(uspi));
229 sb->s_dirt = 1; 223 sb->s_dirt = 1;
230 sb->s_flags |= MS_RDONLY; 224 sb->s_flags |= MS_RDONLY;
231 } 225 }
@@ -257,7 +251,7 @@ void ufs_panic (struct super_block * sb, const char * function,
257 251
258 if (!(sb->s_flags & MS_RDONLY)) { 252 if (!(sb->s_flags & MS_RDONLY)) {
259 usb1->fs_clean = UFS_FSBAD; 253 usb1->fs_clean = UFS_FSBAD;
260 ubh_mark_buffer_dirty(USPI_UBH); 254 ubh_mark_buffer_dirty(USPI_UBH(uspi));
261 sb->s_dirt = 1; 255 sb->s_dirt = 1;
262 } 256 }
263 va_start (args, fmt); 257 va_start (args, fmt);
@@ -309,7 +303,7 @@ static int ufs_parse_options (char * options, unsigned * mount_options)
309{ 303{
310 char * p; 304 char * p;
311 305
312 UFSD(("ENTER\n")) 306 UFSD("ENTER\n");
313 307
314 if (!options) 308 if (!options)
315 return 1; 309 return 1;
@@ -386,27 +380,57 @@ static int ufs_parse_options (char * options, unsigned * mount_options)
386} 380}
387 381
388/* 382/*
383 * Diffrent types of UFS hold fs_cstotal in different
384 * places, and use diffrent data structure for it.
385 * To make things simplier we just copy fs_cstotal to ufs_sb_private_info
386 */
387static void ufs_setup_cstotal(struct super_block *sb)
388{
389 struct ufs_sb_info *sbi = UFS_SB(sb);
390 struct ufs_sb_private_info *uspi = sbi->s_uspi;
391 struct ufs_super_block_first *usb1;
392 struct ufs_super_block_second *usb2;
393 struct ufs_super_block_third *usb3;
394 unsigned mtype = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
395
396 UFSD("ENTER, mtype=%u\n", mtype);
397 usb1 = ubh_get_usb_first(uspi);
398 usb2 = ubh_get_usb_second(uspi);
399 usb3 = ubh_get_usb_third(uspi);
400
401 if ((mtype == UFS_MOUNT_UFSTYPE_44BSD &&
402 (usb1->fs_flags & UFS_FLAGS_UPDATED)) ||
403 mtype == UFS_MOUNT_UFSTYPE_UFS2) {
404 /*we have statistic in different place, then usual*/
405 uspi->cs_total.cs_ndir = fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir);
406 uspi->cs_total.cs_nbfree = fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_nbfree);
407 uspi->cs_total.cs_nifree = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nifree);
408 uspi->cs_total.cs_nffree = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nffree);
409 } else {
410 uspi->cs_total.cs_ndir = fs32_to_cpu(sb, usb1->fs_cstotal.cs_ndir);
411 uspi->cs_total.cs_nbfree = fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree);
412 uspi->cs_total.cs_nifree = fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree);
413 uspi->cs_total.cs_nffree = fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree);
414 }
415 UFSD("EXIT\n");
416}
417
418/*
389 * Read on-disk structures associated with cylinder groups 419 * Read on-disk structures associated with cylinder groups
390 */ 420 */
391static int ufs_read_cylinder_structures (struct super_block *sb) 421static int ufs_read_cylinder_structures(struct super_block *sb)
392{ 422{
393 struct ufs_sb_info * sbi = UFS_SB(sb); 423 struct ufs_sb_info *sbi = UFS_SB(sb);
394 struct ufs_sb_private_info * uspi; 424 struct ufs_sb_private_info *uspi = sbi->s_uspi;
395 struct ufs_super_block *usb; 425 unsigned flags = sbi->s_flags;
396 struct ufs_buffer_head * ubh; 426 struct ufs_buffer_head * ubh;
397 unsigned char * base, * space; 427 unsigned char * base, * space;
398 unsigned size, blks, i; 428 unsigned size, blks, i;
399 unsigned flags = 0; 429 struct ufs_super_block_third *usb3;
400
401 UFSD(("ENTER\n"))
402
403 uspi = sbi->s_uspi;
404 430
405 usb = (struct ufs_super_block *) 431 UFSD("ENTER\n");
406 ((struct ufs_buffer_head *)uspi)->bh[0]->b_data;
407 432
408 flags = UFS_SB(sb)->s_flags; 433 usb3 = ubh_get_usb_third(uspi);
409
410 /* 434 /*
411 * Read cs structures from (usually) first data block 435 * Read cs structures from (usually) first data block
412 * on the device. 436 * on the device.
@@ -424,7 +448,7 @@ static int ufs_read_cylinder_structures (struct super_block *sb)
424 448
425 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) 449 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
426 ubh = ubh_bread(sb, 450 ubh = ubh_bread(sb,
427 fs64_to_cpu(sb, usb->fs_u11.fs_u2.fs_csaddr) + i, size); 451 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_csaddr) + i, size);
428 else 452 else
429 ubh = ubh_bread(sb, uspi->s_csaddr + i, size); 453 ubh = ubh_bread(sb, uspi->s_csaddr + i, size);
430 454
@@ -451,14 +475,13 @@ static int ufs_read_cylinder_structures (struct super_block *sb)
451 sbi->s_cgno[i] = UFS_CGNO_EMPTY; 475 sbi->s_cgno[i] = UFS_CGNO_EMPTY;
452 } 476 }
453 for (i = 0; i < uspi->s_ncg; i++) { 477 for (i = 0; i < uspi->s_ncg; i++) {
454 UFSD(("read cg %u\n", i)) 478 UFSD("read cg %u\n", i);
455 if (!(sbi->s_ucg[i] = sb_bread(sb, ufs_cgcmin(i)))) 479 if (!(sbi->s_ucg[i] = sb_bread(sb, ufs_cgcmin(i))))
456 goto failed; 480 goto failed;
457 if (!ufs_cg_chkmagic (sb, (struct ufs_cylinder_group *) sbi->s_ucg[i]->b_data)) 481 if (!ufs_cg_chkmagic (sb, (struct ufs_cylinder_group *) sbi->s_ucg[i]->b_data))
458 goto failed; 482 goto failed;
459#ifdef UFS_SUPER_DEBUG_MORE 483
460 ufs_print_cylinder_stuff(sb, (struct ufs_cylinder_group *) sbi->s_ucg[i]->b_data); 484 ufs_print_cylinder_stuff(sb, (struct ufs_cylinder_group *) sbi->s_ucg[i]->b_data);
461#endif
462 } 485 }
463 for (i = 0; i < UFS_MAX_GROUP_LOADED; i++) { 486 for (i = 0; i < UFS_MAX_GROUP_LOADED; i++) {
464 if (!(sbi->s_ucpi[i] = kmalloc (sizeof(struct ufs_cg_private_info), GFP_KERNEL))) 487 if (!(sbi->s_ucpi[i] = kmalloc (sizeof(struct ufs_cg_private_info), GFP_KERNEL)))
@@ -466,7 +489,7 @@ static int ufs_read_cylinder_structures (struct super_block *sb)
466 sbi->s_cgno[i] = UFS_CGNO_EMPTY; 489 sbi->s_cgno[i] = UFS_CGNO_EMPTY;
467 } 490 }
468 sbi->s_cg_loaded = 0; 491 sbi->s_cg_loaded = 0;
469 UFSD(("EXIT\n")) 492 UFSD("EXIT\n");
470 return 1; 493 return 1;
471 494
472failed: 495failed:
@@ -479,26 +502,69 @@ failed:
479 for (i = 0; i < UFS_MAX_GROUP_LOADED; i++) 502 for (i = 0; i < UFS_MAX_GROUP_LOADED; i++)
480 kfree (sbi->s_ucpi[i]); 503 kfree (sbi->s_ucpi[i]);
481 } 504 }
482 UFSD(("EXIT (FAILED)\n")) 505 UFSD("EXIT (FAILED)\n");
483 return 0; 506 return 0;
484} 507}
485 508
486/* 509/*
487 * Put on-disk structures associated with cylinder groups and 510 * Sync our internal copy of fs_cstotal with disk
488 * write them back to disk
489 */ 511 */
490static void ufs_put_cylinder_structures (struct super_block *sb) 512static void ufs_put_cstotal(struct super_block *sb)
491{ 513{
492 struct ufs_sb_info * sbi = UFS_SB(sb); 514 unsigned mtype = UFS_SB(sb)->s_mount_opt & UFS_MOUNT_UFSTYPE;
493 struct ufs_sb_private_info * uspi; 515 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
516 struct ufs_super_block_first *usb1;
517 struct ufs_super_block_second *usb2;
518 struct ufs_super_block_third *usb3;
519
520 UFSD("ENTER\n");
521 usb1 = ubh_get_usb_first(uspi);
522 usb2 = ubh_get_usb_second(uspi);
523 usb3 = ubh_get_usb_third(uspi);
524
525 if ((mtype == UFS_MOUNT_UFSTYPE_44BSD &&
526 (usb1->fs_flags & UFS_FLAGS_UPDATED)) ||
527 mtype == UFS_MOUNT_UFSTYPE_UFS2) {
528 /*we have statistic in different place, then usual*/
529 usb2->fs_un.fs_u2.cs_ndir =
530 cpu_to_fs64(sb, uspi->cs_total.cs_ndir);
531 usb2->fs_un.fs_u2.cs_nbfree =
532 cpu_to_fs64(sb, uspi->cs_total.cs_nbfree);
533 usb3->fs_un1.fs_u2.cs_nifree =
534 cpu_to_fs64(sb, uspi->cs_total.cs_nifree);
535 usb3->fs_un1.fs_u2.cs_nffree =
536 cpu_to_fs64(sb, uspi->cs_total.cs_nffree);
537 } else {
538 usb1->fs_cstotal.cs_ndir =
539 cpu_to_fs32(sb, uspi->cs_total.cs_ndir);
540 usb1->fs_cstotal.cs_nbfree =
541 cpu_to_fs32(sb, uspi->cs_total.cs_nbfree);
542 usb1->fs_cstotal.cs_nifree =
543 cpu_to_fs32(sb, uspi->cs_total.cs_nifree);
544 usb1->fs_cstotal.cs_nffree =
545 cpu_to_fs32(sb, uspi->cs_total.cs_nffree);
546 }
547 ubh_mark_buffer_dirty(USPI_UBH(uspi));
548 UFSD("EXIT\n");
549}
550
551/**
552 * ufs_put_super_internal() - put on-disk intrenal structures
553 * @sb: pointer to super_block structure
554 * Put on-disk structures associated with cylinder groups
555 * and write them back to disk, also update cs_total on disk
556 */
557static void ufs_put_super_internal(struct super_block *sb)
558{
559 struct ufs_sb_info *sbi = UFS_SB(sb);
560 struct ufs_sb_private_info *uspi = sbi->s_uspi;
494 struct ufs_buffer_head * ubh; 561 struct ufs_buffer_head * ubh;
495 unsigned char * base, * space; 562 unsigned char * base, * space;
496 unsigned blks, size, i; 563 unsigned blks, size, i;
497
498 UFSD(("ENTER\n"))
499
500 uspi = sbi->s_uspi;
501 564
565
566 UFSD("ENTER\n");
567 ufs_put_cstotal(sb);
502 size = uspi->s_cssize; 568 size = uspi->s_cssize;
503 blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift; 569 blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift;
504 base = space = (char*) sbi->s_csp; 570 base = space = (char*) sbi->s_csp;
@@ -523,7 +589,7 @@ static void ufs_put_cylinder_structures (struct super_block *sb)
523 brelse (sbi->s_ucg[i]); 589 brelse (sbi->s_ucg[i]);
524 kfree (sbi->s_ucg); 590 kfree (sbi->s_ucg);
525 kfree (base); 591 kfree (base);
526 UFSD(("EXIT\n")) 592 UFSD("EXIT\n");
527} 593}
528 594
529static int ufs_fill_super(struct super_block *sb, void *data, int silent) 595static int ufs_fill_super(struct super_block *sb, void *data, int silent)
@@ -533,7 +599,6 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
533 struct ufs_super_block_first * usb1; 599 struct ufs_super_block_first * usb1;
534 struct ufs_super_block_second * usb2; 600 struct ufs_super_block_second * usb2;
535 struct ufs_super_block_third * usb3; 601 struct ufs_super_block_third * usb3;
536 struct ufs_super_block *usb;
537 struct ufs_buffer_head * ubh; 602 struct ufs_buffer_head * ubh;
538 struct inode *inode; 603 struct inode *inode;
539 unsigned block_size, super_block_size; 604 unsigned block_size, super_block_size;
@@ -544,7 +609,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
544 ubh = NULL; 609 ubh = NULL;
545 flags = 0; 610 flags = 0;
546 611
547 UFSD(("ENTER\n")) 612 UFSD("ENTER\n");
548 613
549 sbi = kmalloc(sizeof(struct ufs_sb_info), GFP_KERNEL); 614 sbi = kmalloc(sizeof(struct ufs_sb_info), GFP_KERNEL);
550 if (!sbi) 615 if (!sbi)
@@ -552,7 +617,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
552 sb->s_fs_info = sbi; 617 sb->s_fs_info = sbi;
553 memset(sbi, 0, sizeof(struct ufs_sb_info)); 618 memset(sbi, 0, sizeof(struct ufs_sb_info));
554 619
555 UFSD(("flag %u\n", (int)(sb->s_flags & MS_RDONLY))) 620 UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY));
556 621
557#ifndef CONFIG_UFS_FS_WRITE 622#ifndef CONFIG_UFS_FS_WRITE
558 if (!(sb->s_flags & MS_RDONLY)) { 623 if (!(sb->s_flags & MS_RDONLY)) {
@@ -593,7 +658,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
593 the rules */ 658 the rules */
594 switch (sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) { 659 switch (sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) {
595 case UFS_MOUNT_UFSTYPE_44BSD: 660 case UFS_MOUNT_UFSTYPE_44BSD:
596 UFSD(("ufstype=44bsd\n")) 661 UFSD("ufstype=44bsd\n");
597 uspi->s_fsize = block_size = 512; 662 uspi->s_fsize = block_size = 512;
598 uspi->s_fmask = ~(512 - 1); 663 uspi->s_fmask = ~(512 - 1);
599 uspi->s_fshift = 9; 664 uspi->s_fshift = 9;
@@ -602,7 +667,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
602 flags |= UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD; 667 flags |= UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD;
603 break; 668 break;
604 case UFS_MOUNT_UFSTYPE_UFS2: 669 case UFS_MOUNT_UFSTYPE_UFS2:
605 UFSD(("ufstype=ufs2\n")); 670 UFSD("ufstype=ufs2\n");
606 super_block_offset=SBLOCK_UFS2; 671 super_block_offset=SBLOCK_UFS2;
607 uspi->s_fsize = block_size = 512; 672 uspi->s_fsize = block_size = 512;
608 uspi->s_fmask = ~(512 - 1); 673 uspi->s_fmask = ~(512 - 1);
@@ -617,7 +682,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
617 break; 682 break;
618 683
619 case UFS_MOUNT_UFSTYPE_SUN: 684 case UFS_MOUNT_UFSTYPE_SUN:
620 UFSD(("ufstype=sun\n")) 685 UFSD("ufstype=sun\n");
621 uspi->s_fsize = block_size = 1024; 686 uspi->s_fsize = block_size = 1024;
622 uspi->s_fmask = ~(1024 - 1); 687 uspi->s_fmask = ~(1024 - 1);
623 uspi->s_fshift = 10; 688 uspi->s_fshift = 10;
@@ -628,7 +693,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
628 break; 693 break;
629 694
630 case UFS_MOUNT_UFSTYPE_SUNx86: 695 case UFS_MOUNT_UFSTYPE_SUNx86:
631 UFSD(("ufstype=sunx86\n")) 696 UFSD("ufstype=sunx86\n");
632 uspi->s_fsize = block_size = 1024; 697 uspi->s_fsize = block_size = 1024;
633 uspi->s_fmask = ~(1024 - 1); 698 uspi->s_fmask = ~(1024 - 1);
634 uspi->s_fshift = 10; 699 uspi->s_fshift = 10;
@@ -639,7 +704,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
639 break; 704 break;
640 705
641 case UFS_MOUNT_UFSTYPE_OLD: 706 case UFS_MOUNT_UFSTYPE_OLD:
642 UFSD(("ufstype=old\n")) 707 UFSD("ufstype=old\n");
643 uspi->s_fsize = block_size = 1024; 708 uspi->s_fsize = block_size = 1024;
644 uspi->s_fmask = ~(1024 - 1); 709 uspi->s_fmask = ~(1024 - 1);
645 uspi->s_fshift = 10; 710 uspi->s_fshift = 10;
@@ -654,7 +719,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
654 break; 719 break;
655 720
656 case UFS_MOUNT_UFSTYPE_NEXTSTEP: 721 case UFS_MOUNT_UFSTYPE_NEXTSTEP:
657 UFSD(("ufstype=nextstep\n")) 722 UFSD("ufstype=nextstep\n");
658 uspi->s_fsize = block_size = 1024; 723 uspi->s_fsize = block_size = 1024;
659 uspi->s_fmask = ~(1024 - 1); 724 uspi->s_fmask = ~(1024 - 1);
660 uspi->s_fshift = 10; 725 uspi->s_fshift = 10;
@@ -669,7 +734,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
669 break; 734 break;
670 735
671 case UFS_MOUNT_UFSTYPE_NEXTSTEP_CD: 736 case UFS_MOUNT_UFSTYPE_NEXTSTEP_CD:
672 UFSD(("ufstype=nextstep-cd\n")) 737 UFSD("ufstype=nextstep-cd\n");
673 uspi->s_fsize = block_size = 2048; 738 uspi->s_fsize = block_size = 2048;
674 uspi->s_fmask = ~(2048 - 1); 739 uspi->s_fmask = ~(2048 - 1);
675 uspi->s_fshift = 11; 740 uspi->s_fshift = 11;
@@ -684,7 +749,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
684 break; 749 break;
685 750
686 case UFS_MOUNT_UFSTYPE_OPENSTEP: 751 case UFS_MOUNT_UFSTYPE_OPENSTEP:
687 UFSD(("ufstype=openstep\n")) 752 UFSD("ufstype=openstep\n");
688 uspi->s_fsize = block_size = 1024; 753 uspi->s_fsize = block_size = 1024;
689 uspi->s_fmask = ~(1024 - 1); 754 uspi->s_fmask = ~(1024 - 1);
690 uspi->s_fshift = 10; 755 uspi->s_fshift = 10;
@@ -699,7 +764,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
699 break; 764 break;
700 765
701 case UFS_MOUNT_UFSTYPE_HP: 766 case UFS_MOUNT_UFSTYPE_HP:
702 UFSD(("ufstype=hp\n")) 767 UFSD("ufstype=hp\n");
703 uspi->s_fsize = block_size = 1024; 768 uspi->s_fsize = block_size = 1024;
704 uspi->s_fmask = ~(1024 - 1); 769 uspi->s_fmask = ~(1024 - 1);
705 uspi->s_fshift = 10; 770 uspi->s_fshift = 10;
@@ -737,8 +802,6 @@ again:
737 usb1 = ubh_get_usb_first(uspi); 802 usb1 = ubh_get_usb_first(uspi);
738 usb2 = ubh_get_usb_second(uspi); 803 usb2 = ubh_get_usb_second(uspi);
739 usb3 = ubh_get_usb_third(uspi); 804 usb3 = ubh_get_usb_third(uspi);
740 usb = (struct ufs_super_block *)
741 ((struct ufs_buffer_head *)uspi)->bh[0]->b_data ;
742 805
743 /* 806 /*
744 * Check ufs magic number 807 * Check ufs magic number
@@ -820,16 +883,12 @@ magic_found:
820 ubh = NULL; 883 ubh = NULL;
821 block_size = uspi->s_fsize; 884 block_size = uspi->s_fsize;
822 super_block_size = uspi->s_sbsize; 885 super_block_size = uspi->s_sbsize;
823 UFSD(("another value of block_size or super_block_size %u, %u\n", block_size, super_block_size)) 886 UFSD("another value of block_size or super_block_size %u, %u\n", block_size, super_block_size);
824 goto again; 887 goto again;
825 } 888 }
826 889
827#ifdef UFS_SUPER_DEBUG_MORE 890
828 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) 891 ufs_print_super_stuff(sb, flags, usb1, usb2, usb3);
829 ufs2_print_super_stuff(sb,usb);
830 else
831 ufs_print_super_stuff(sb, usb1, usb2, usb3);
832#endif
833 892
834 /* 893 /*
835 * Check, if file system was correctly unmounted. 894 * Check, if file system was correctly unmounted.
@@ -842,13 +901,13 @@ magic_found:
842 (ufs_get_fs_state(sb, usb1, usb3) == (UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time))))) { 901 (ufs_get_fs_state(sb, usb1, usb3) == (UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time))))) {
843 switch(usb1->fs_clean) { 902 switch(usb1->fs_clean) {
844 case UFS_FSCLEAN: 903 case UFS_FSCLEAN:
845 UFSD(("fs is clean\n")) 904 UFSD("fs is clean\n");
846 break; 905 break;
847 case UFS_FSSTABLE: 906 case UFS_FSSTABLE:
848 UFSD(("fs is stable\n")) 907 UFSD("fs is stable\n");
849 break; 908 break;
850 case UFS_FSOSF1: 909 case UFS_FSOSF1:
851 UFSD(("fs is DEC OSF/1\n")) 910 UFSD("fs is DEC OSF/1\n");
852 break; 911 break;
853 case UFS_FSACTIVE: 912 case UFS_FSACTIVE:
854 printk("ufs_read_super: fs is active\n"); 913 printk("ufs_read_super: fs is active\n");
@@ -863,8 +922,7 @@ magic_found:
863 sb->s_flags |= MS_RDONLY; 922 sb->s_flags |= MS_RDONLY;
864 break; 923 break;
865 } 924 }
866 } 925 } else {
867 else {
868 printk("ufs_read_super: fs needs fsck\n"); 926 printk("ufs_read_super: fs needs fsck\n");
869 sb->s_flags |= MS_RDONLY; 927 sb->s_flags |= MS_RDONLY;
870 } 928 }
@@ -884,10 +942,9 @@ magic_found:
884 uspi->s_cgmask = fs32_to_cpu(sb, usb1->fs_cgmask); 942 uspi->s_cgmask = fs32_to_cpu(sb, usb1->fs_cgmask);
885 943
886 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { 944 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
887 uspi->s_u2_size = fs64_to_cpu(sb, usb->fs_u11.fs_u2.fs_size); 945 uspi->s_u2_size = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size);
888 uspi->s_u2_dsize = fs64_to_cpu(sb, usb->fs_u11.fs_u2.fs_dsize); 946 uspi->s_u2_dsize = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize);
889 } 947 } else {
890 else {
891 uspi->s_size = fs32_to_cpu(sb, usb1->fs_size); 948 uspi->s_size = fs32_to_cpu(sb, usb1->fs_size);
892 uspi->s_dsize = fs32_to_cpu(sb, usb1->fs_dsize); 949 uspi->s_dsize = fs32_to_cpu(sb, usb1->fs_dsize);
893 } 950 }
@@ -901,8 +958,8 @@ magic_found:
901 uspi->s_fmask = fs32_to_cpu(sb, usb1->fs_fmask); 958 uspi->s_fmask = fs32_to_cpu(sb, usb1->fs_fmask);
902 uspi->s_bshift = fs32_to_cpu(sb, usb1->fs_bshift); 959 uspi->s_bshift = fs32_to_cpu(sb, usb1->fs_bshift);
903 uspi->s_fshift = fs32_to_cpu(sb, usb1->fs_fshift); 960 uspi->s_fshift = fs32_to_cpu(sb, usb1->fs_fshift);
904 UFSD(("uspi->s_bshift = %d,uspi->s_fshift = %d", uspi->s_bshift, 961 UFSD("uspi->s_bshift = %d,uspi->s_fshift = %d", uspi->s_bshift,
905 uspi->s_fshift)); 962 uspi->s_fshift);
906 uspi->s_fpbshift = fs32_to_cpu(sb, usb1->fs_fragshift); 963 uspi->s_fpbshift = fs32_to_cpu(sb, usb1->fs_fragshift);
907 uspi->s_fsbtodb = fs32_to_cpu(sb, usb1->fs_fsbtodb); 964 uspi->s_fsbtodb = fs32_to_cpu(sb, usb1->fs_fsbtodb);
908 /* s_sbsize already set */ 965 /* s_sbsize already set */
@@ -922,8 +979,8 @@ magic_found:
922 uspi->s_spc = fs32_to_cpu(sb, usb1->fs_spc); 979 uspi->s_spc = fs32_to_cpu(sb, usb1->fs_spc);
923 uspi->s_ipg = fs32_to_cpu(sb, usb1->fs_ipg); 980 uspi->s_ipg = fs32_to_cpu(sb, usb1->fs_ipg);
924 uspi->s_fpg = fs32_to_cpu(sb, usb1->fs_fpg); 981 uspi->s_fpg = fs32_to_cpu(sb, usb1->fs_fpg);
925 uspi->s_cpc = fs32_to_cpu(sb, usb2->fs_cpc); 982 uspi->s_cpc = fs32_to_cpu(sb, usb2->fs_un.fs_u1.fs_cpc);
926 uspi->s_contigsumsize = fs32_to_cpu(sb, usb3->fs_u2.fs_44.fs_contigsumsize); 983 uspi->s_contigsumsize = fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_contigsumsize);
927 uspi->s_qbmask = ufs_get_fs_qbmask(sb, usb3); 984 uspi->s_qbmask = ufs_get_fs_qbmask(sb, usb3);
928 uspi->s_qfmask = ufs_get_fs_qfmask(sb, usb3); 985 uspi->s_qfmask = ufs_get_fs_qfmask(sb, usb3);
929 uspi->s_postblformat = fs32_to_cpu(sb, usb3->fs_postblformat); 986 uspi->s_postblformat = fs32_to_cpu(sb, usb3->fs_postblformat);
@@ -935,12 +992,11 @@ magic_found:
935 * Compute another frequently used values 992 * Compute another frequently used values
936 */ 993 */
937 uspi->s_fpbmask = uspi->s_fpb - 1; 994 uspi->s_fpbmask = uspi->s_fpb - 1;
938 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { 995 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
939 uspi->s_apbshift = uspi->s_bshift - 3; 996 uspi->s_apbshift = uspi->s_bshift - 3;
940 } 997 else
941 else {
942 uspi->s_apbshift = uspi->s_bshift - 2; 998 uspi->s_apbshift = uspi->s_bshift - 2;
943 } 999
944 uspi->s_2apbshift = uspi->s_apbshift * 2; 1000 uspi->s_2apbshift = uspi->s_apbshift * 2;
945 uspi->s_3apbshift = uspi->s_apbshift * 3; 1001 uspi->s_3apbshift = uspi->s_apbshift * 3;
946 uspi->s_apb = 1 << uspi->s_apbshift; 1002 uspi->s_apb = 1 << uspi->s_apbshift;
@@ -956,7 +1012,7 @@ magic_found:
956 if ((sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) == 1012 if ((sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) ==
957 UFS_MOUNT_UFSTYPE_44BSD) 1013 UFS_MOUNT_UFSTYPE_44BSD)
958 uspi->s_maxsymlinklen = 1014 uspi->s_maxsymlinklen =
959 fs32_to_cpu(sb, usb3->fs_u2.fs_44.fs_maxsymlinklen); 1015 fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen);
960 1016
961 sbi->s_flags = flags; 1017 sbi->s_flags = flags;
962 1018
@@ -967,7 +1023,7 @@ magic_found:
967 if (!sb->s_root) 1023 if (!sb->s_root)
968 goto dalloc_failed; 1024 goto dalloc_failed;
969 1025
970 1026 ufs_setup_cstotal(sb);
971 /* 1027 /*
972 * Read cylinder group structures 1028 * Read cylinder group structures
973 */ 1029 */
@@ -975,7 +1031,7 @@ magic_found:
975 if (!ufs_read_cylinder_structures(sb)) 1031 if (!ufs_read_cylinder_structures(sb))
976 goto failed; 1032 goto failed;
977 1033
978 UFSD(("EXIT\n")) 1034 UFSD("EXIT\n");
979 return 0; 1035 return 0;
980 1036
981dalloc_failed: 1037dalloc_failed:
@@ -986,15 +1042,16 @@ failed:
986 kfree (uspi); 1042 kfree (uspi);
987 kfree(sbi); 1043 kfree(sbi);
988 sb->s_fs_info = NULL; 1044 sb->s_fs_info = NULL;
989 UFSD(("EXIT (FAILED)\n")) 1045 UFSD("EXIT (FAILED)\n");
990 return -EINVAL; 1046 return -EINVAL;
991 1047
992failed_nomem: 1048failed_nomem:
993 UFSD(("EXIT (NOMEM)\n")) 1049 UFSD("EXIT (NOMEM)\n");
994 return -ENOMEM; 1050 return -ENOMEM;
995} 1051}
996 1052
997static void ufs_write_super (struct super_block *sb) { 1053static void ufs_write_super(struct super_block *sb)
1054{
998 struct ufs_sb_private_info * uspi; 1055 struct ufs_sb_private_info * uspi;
999 struct ufs_super_block_first * usb1; 1056 struct ufs_super_block_first * usb1;
1000 struct ufs_super_block_third * usb3; 1057 struct ufs_super_block_third * usb3;
@@ -1002,7 +1059,7 @@ static void ufs_write_super (struct super_block *sb) {
1002 1059
1003 lock_kernel(); 1060 lock_kernel();
1004 1061
1005 UFSD(("ENTER\n")) 1062 UFSD("ENTER\n");
1006 flags = UFS_SB(sb)->s_flags; 1063 flags = UFS_SB(sb)->s_flags;
1007 uspi = UFS_SB(sb)->s_uspi; 1064 uspi = UFS_SB(sb)->s_uspi;
1008 usb1 = ubh_get_usb_first(uspi); 1065 usb1 = ubh_get_usb_first(uspi);
@@ -1014,26 +1071,27 @@ static void ufs_write_super (struct super_block *sb) {
1014 || (flags & UFS_ST_MASK) == UFS_ST_SUNx86) 1071 || (flags & UFS_ST_MASK) == UFS_ST_SUNx86)
1015 ufs_set_fs_state(sb, usb1, usb3, 1072 ufs_set_fs_state(sb, usb1, usb3,
1016 UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time)); 1073 UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time));
1017 ubh_mark_buffer_dirty (USPI_UBH); 1074 ufs_put_cstotal(sb);
1018 } 1075 }
1019 sb->s_dirt = 0; 1076 sb->s_dirt = 0;
1020 UFSD(("EXIT\n")) 1077 UFSD("EXIT\n");
1021 unlock_kernel(); 1078 unlock_kernel();
1022} 1079}
1023 1080
1024static void ufs_put_super (struct super_block *sb) 1081static void ufs_put_super(struct super_block *sb)
1025{ 1082{
1026 struct ufs_sb_info * sbi = UFS_SB(sb); 1083 struct ufs_sb_info * sbi = UFS_SB(sb);
1027 1084
1028 UFSD(("ENTER\n")) 1085 UFSD("ENTER\n");
1029 1086
1030 if (!(sb->s_flags & MS_RDONLY)) 1087 if (!(sb->s_flags & MS_RDONLY))
1031 ufs_put_cylinder_structures (sb); 1088 ufs_put_super_internal(sb);
1032 1089
1033 ubh_brelse_uspi (sbi->s_uspi); 1090 ubh_brelse_uspi (sbi->s_uspi);
1034 kfree (sbi->s_uspi); 1091 kfree (sbi->s_uspi);
1035 kfree (sbi); 1092 kfree (sbi);
1036 sb->s_fs_info = NULL; 1093 sb->s_fs_info = NULL;
1094 UFSD("EXIT\n");
1037 return; 1095 return;
1038} 1096}
1039 1097
@@ -1062,8 +1120,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1062 return -EINVAL; 1120 return -EINVAL;
1063 if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) { 1121 if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) {
1064 new_mount_opt |= ufstype; 1122 new_mount_opt |= ufstype;
1065 } 1123 } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
1066 else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
1067 printk("ufstype can't be changed during remount\n"); 1124 printk("ufstype can't be changed during remount\n");
1068 return -EINVAL; 1125 return -EINVAL;
1069 } 1126 }
@@ -1077,20 +1134,19 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1077 * fs was mouted as rw, remounting ro 1134 * fs was mouted as rw, remounting ro
1078 */ 1135 */
1079 if (*mount_flags & MS_RDONLY) { 1136 if (*mount_flags & MS_RDONLY) {
1080 ufs_put_cylinder_structures(sb); 1137 ufs_put_super_internal(sb);
1081 usb1->fs_time = cpu_to_fs32(sb, get_seconds()); 1138 usb1->fs_time = cpu_to_fs32(sb, get_seconds());
1082 if ((flags & UFS_ST_MASK) == UFS_ST_SUN 1139 if ((flags & UFS_ST_MASK) == UFS_ST_SUN
1083 || (flags & UFS_ST_MASK) == UFS_ST_SUNx86) 1140 || (flags & UFS_ST_MASK) == UFS_ST_SUNx86)
1084 ufs_set_fs_state(sb, usb1, usb3, 1141 ufs_set_fs_state(sb, usb1, usb3,
1085 UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time)); 1142 UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time));
1086 ubh_mark_buffer_dirty (USPI_UBH); 1143 ubh_mark_buffer_dirty (USPI_UBH(uspi));
1087 sb->s_dirt = 0; 1144 sb->s_dirt = 0;
1088 sb->s_flags |= MS_RDONLY; 1145 sb->s_flags |= MS_RDONLY;
1089 } 1146 } else {
1090 /* 1147 /*
1091 * fs was mounted as ro, remounting rw 1148 * fs was mounted as ro, remounting rw
1092 */ 1149 */
1093 else {
1094#ifndef CONFIG_UFS_FS_WRITE 1150#ifndef CONFIG_UFS_FS_WRITE
1095 printk("ufs was compiled with read-only support, " 1151 printk("ufs was compiled with read-only support, "
1096 "can't be mounted as read-write\n"); 1152 "can't be mounted as read-write\n");
@@ -1102,7 +1158,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1102 printk("this ufstype is read-only supported\n"); 1158 printk("this ufstype is read-only supported\n");
1103 return -EINVAL; 1159 return -EINVAL;
1104 } 1160 }
1105 if (!ufs_read_cylinder_structures (sb)) { 1161 if (!ufs_read_cylinder_structures(sb)) {
1106 printk("failed during remounting\n"); 1162 printk("failed during remounting\n");
1107 return -EPERM; 1163 return -EPERM;
1108 } 1164 }
@@ -1113,36 +1169,31 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1113 return 0; 1169 return 0;
1114} 1170}
1115 1171
1116static int ufs_statfs (struct super_block *sb, struct kstatfs *buf) 1172static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
1117{ 1173{
1118 struct ufs_sb_private_info * uspi; 1174 struct super_block *sb = dentry->d_sb;
1119 struct ufs_super_block_first * usb1; 1175 struct ufs_sb_private_info *uspi= UFS_SB(sb)->s_uspi;
1120 struct ufs_super_block * usb; 1176 unsigned flags = UFS_SB(sb)->s_flags;
1121 unsigned flags = 0; 1177 struct ufs_super_block_first *usb1;
1178 struct ufs_super_block_second *usb2;
1179 struct ufs_super_block_third *usb3;
1122 1180
1123 lock_kernel(); 1181 lock_kernel();
1124 1182
1125 uspi = UFS_SB(sb)->s_uspi; 1183 usb1 = ubh_get_usb_first(uspi);
1126 usb1 = ubh_get_usb_first (uspi); 1184 usb2 = ubh_get_usb_second(uspi);
1127 usb = (struct ufs_super_block *) 1185 usb3 = ubh_get_usb_third(uspi);
1128 ((struct ufs_buffer_head *)uspi)->bh[0]->b_data ;
1129 1186
1130 flags = UFS_SB(sb)->s_flags;
1131 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { 1187 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
1132 buf->f_type = UFS2_MAGIC; 1188 buf->f_type = UFS2_MAGIC;
1133 buf->f_blocks = fs64_to_cpu(sb, usb->fs_u11.fs_u2.fs_dsize); 1189 buf->f_blocks = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize);
1134 buf->f_bfree = ufs_blkstofrags(fs64_to_cpu(sb, usb->fs_u11.fs_u2.fs_cstotal.cs_nbfree)) + 1190 } else {
1135 fs64_to_cpu(sb, usb->fs_u11.fs_u2.fs_cstotal.cs_nffree);
1136 buf->f_ffree = fs64_to_cpu(sb,
1137 usb->fs_u11.fs_u2.fs_cstotal.cs_nifree);
1138 }
1139 else {
1140 buf->f_type = UFS_MAGIC; 1191 buf->f_type = UFS_MAGIC;
1141 buf->f_blocks = uspi->s_dsize; 1192 buf->f_blocks = uspi->s_dsize;
1142 buf->f_bfree = ufs_blkstofrags(fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree)) +
1143 fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree);
1144 buf->f_ffree = fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree);
1145 } 1193 }
1194 buf->f_bfree = ufs_blkstofrags(uspi->cs_total.cs_nbfree) +
1195 uspi->cs_total.cs_nffree;
1196 buf->f_ffree = uspi->cs_total.cs_nifree;
1146 buf->f_bsize = sb->s_blocksize; 1197 buf->f_bsize = sb->s_blocksize;
1147 buf->f_bavail = (buf->f_bfree > (((long)buf->f_blocks / 100) * uspi->s_minfree)) 1198 buf->f_bavail = (buf->f_bfree > (((long)buf->f_blocks / 100) * uspi->s_minfree))
1148 ? (buf->f_bfree - (((long)buf->f_blocks / 100) * uspi->s_minfree)) : 0; 1199 ? (buf->f_bfree - (((long)buf->f_blocks / 100) * uspi->s_minfree)) : 0;
@@ -1311,10 +1362,10 @@ out:
1311 1362
1312#endif 1363#endif
1313 1364
1314static struct super_block *ufs_get_sb(struct file_system_type *fs_type, 1365static int ufs_get_sb(struct file_system_type *fs_type,
1315 int flags, const char *dev_name, void *data) 1366 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
1316{ 1367{
1317 return get_sb_bdev(fs_type, flags, dev_name, data, ufs_fill_super); 1368 return get_sb_bdev(fs_type, flags, dev_name, data, ufs_fill_super, mnt);
1318} 1369}
1319 1370
1320static struct file_system_type ufs_fs_type = { 1371static struct file_system_type ufs_fs_type = {
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 02e86291ef8a..c9b55872079b 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -49,14 +49,6 @@
49#include "swab.h" 49#include "swab.h"
50#include "util.h" 50#include "util.h"
51 51
52#undef UFS_TRUNCATE_DEBUG
53
54#ifdef UFS_TRUNCATE_DEBUG
55#define UFSD(x) printk("(%s, %d), %s: ", __FILE__, __LINE__, __FUNCTION__); printk x;
56#else
57#define UFSD(x)
58#endif
59
60/* 52/*
61 * Secure deletion currently doesn't work. It interacts very badly 53 * Secure deletion currently doesn't work. It interacts very badly
62 * with buffers shared with memory mappings, and for that reason 54 * with buffers shared with memory mappings, and for that reason
@@ -82,7 +74,7 @@ static int ufs_trunc_direct (struct inode * inode)
82 unsigned i, tmp; 74 unsigned i, tmp;
83 int retry; 75 int retry;
84 76
85 UFSD(("ENTER\n")) 77 UFSD("ENTER\n");
86 78
87 sb = inode->i_sb; 79 sb = inode->i_sb;
88 uspi = UFS_SB(sb)->s_uspi; 80 uspi = UFS_SB(sb)->s_uspi;
@@ -105,7 +97,7 @@ static int ufs_trunc_direct (struct inode * inode)
105 block2 = ufs_fragstoblks (frag3); 97 block2 = ufs_fragstoblks (frag3);
106 } 98 }
107 99
108 UFSD(("frag1 %u, frag2 %u, block1 %u, block2 %u, frag3 %u, frag4 %u\n", frag1, frag2, block1, block2, frag3, frag4)) 100 UFSD("frag1 %u, frag2 %u, block1 %u, block2 %u, frag3 %u, frag4 %u\n", frag1, frag2, block1, block2, frag3, frag4);
109 101
110 if (frag1 >= frag2) 102 if (frag1 >= frag2)
111 goto next1; 103 goto next1;
@@ -120,9 +112,8 @@ static int ufs_trunc_direct (struct inode * inode)
120 frag1 = ufs_fragnum (frag1); 112 frag1 = ufs_fragnum (frag1);
121 frag2 = ufs_fragnum (frag2); 113 frag2 = ufs_fragnum (frag2);
122 114
123 inode->i_blocks -= (frag2-frag1) << uspi->s_nspfshift;
124 mark_inode_dirty(inode);
125 ufs_free_fragments (inode, tmp + frag1, frag2 - frag1); 115 ufs_free_fragments (inode, tmp + frag1, frag2 - frag1);
116 mark_inode_dirty(inode);
126 frag_to_free = tmp + frag1; 117 frag_to_free = tmp + frag1;
127 118
128next1: 119next1:
@@ -136,8 +127,7 @@ next1:
136 continue; 127 continue;
137 128
138 *p = 0; 129 *p = 0;
139 inode->i_blocks -= uspi->s_nspb; 130
140 mark_inode_dirty(inode);
141 if (free_count == 0) { 131 if (free_count == 0) {
142 frag_to_free = tmp; 132 frag_to_free = tmp;
143 free_count = uspi->s_fpb; 133 free_count = uspi->s_fpb;
@@ -148,6 +138,7 @@ next1:
148 frag_to_free = tmp; 138 frag_to_free = tmp;
149 free_count = uspi->s_fpb; 139 free_count = uspi->s_fpb;
150 } 140 }
141 mark_inode_dirty(inode);
151 } 142 }
152 143
153 if (free_count > 0) 144 if (free_count > 0)
@@ -166,12 +157,12 @@ next1:
166 frag4 = ufs_fragnum (frag4); 157 frag4 = ufs_fragnum (frag4);
167 158
168 *p = 0; 159 *p = 0;
169 inode->i_blocks -= frag4 << uspi->s_nspfshift; 160
170 mark_inode_dirty(inode);
171 ufs_free_fragments (inode, tmp, frag4); 161 ufs_free_fragments (inode, tmp, frag4);
162 mark_inode_dirty(inode);
172 next3: 163 next3:
173 164
174 UFSD(("EXIT\n")) 165 UFSD("EXIT\n");
175 return retry; 166 return retry;
176} 167}
177 168
@@ -186,7 +177,7 @@ static int ufs_trunc_indirect (struct inode * inode, unsigned offset, __fs32 *p)
186 unsigned frag_to_free, free_count; 177 unsigned frag_to_free, free_count;
187 int retry; 178 int retry;
188 179
189 UFSD(("ENTER\n")) 180 UFSD("ENTER\n");
190 181
191 sb = inode->i_sb; 182 sb = inode->i_sb;
192 uspi = UFS_SB(sb)->s_uspi; 183 uspi = UFS_SB(sb)->s_uspi;
@@ -227,7 +218,7 @@ static int ufs_trunc_indirect (struct inode * inode, unsigned offset, __fs32 *p)
227 frag_to_free = tmp; 218 frag_to_free = tmp;
228 free_count = uspi->s_fpb; 219 free_count = uspi->s_fpb;
229 } 220 }
230 inode->i_blocks -= uspi->s_nspb; 221
231 mark_inode_dirty(inode); 222 mark_inode_dirty(inode);
232 } 223 }
233 224
@@ -238,26 +229,21 @@ static int ufs_trunc_indirect (struct inode * inode, unsigned offset, __fs32 *p)
238 if (*ubh_get_addr32(ind_ubh,i)) 229 if (*ubh_get_addr32(ind_ubh,i))
239 break; 230 break;
240 if (i >= uspi->s_apb) { 231 if (i >= uspi->s_apb) {
241 if (ubh_max_bcount(ind_ubh) != 1) { 232 tmp = fs32_to_cpu(sb, *p);
242 retry = 1; 233 *p = 0;
243 } 234
244 else { 235 ufs_free_blocks (inode, tmp, uspi->s_fpb);
245 tmp = fs32_to_cpu(sb, *p); 236 mark_inode_dirty(inode);
246 *p = 0; 237 ubh_bforget(ind_ubh);
247 inode->i_blocks -= uspi->s_nspb; 238 ind_ubh = NULL;
248 mark_inode_dirty(inode);
249 ufs_free_blocks (inode, tmp, uspi->s_fpb);
250 ubh_bforget(ind_ubh);
251 ind_ubh = NULL;
252 }
253 } 239 }
254 if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) { 240 if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) {
255 ubh_ll_rw_block (SWRITE, 1, &ind_ubh); 241 ubh_ll_rw_block(SWRITE, ind_ubh);
256 ubh_wait_on_buffer (ind_ubh); 242 ubh_wait_on_buffer (ind_ubh);
257 } 243 }
258 ubh_brelse (ind_ubh); 244 ubh_brelse (ind_ubh);
259 245
260 UFSD(("EXIT\n")) 246 UFSD("EXIT\n");
261 247
262 return retry; 248 return retry;
263} 249}
@@ -271,7 +257,7 @@ static int ufs_trunc_dindirect (struct inode *inode, unsigned offset, __fs32 *p)
271 __fs32 * dind; 257 __fs32 * dind;
272 int retry = 0; 258 int retry = 0;
273 259
274 UFSD(("ENTER\n")) 260 UFSD("ENTER\n");
275 261
276 sb = inode->i_sb; 262 sb = inode->i_sb;
277 uspi = UFS_SB(sb)->s_uspi; 263 uspi = UFS_SB(sb)->s_uspi;
@@ -306,25 +292,21 @@ static int ufs_trunc_dindirect (struct inode *inode, unsigned offset, __fs32 *p)
306 if (*ubh_get_addr32 (dind_bh, i)) 292 if (*ubh_get_addr32 (dind_bh, i))
307 break; 293 break;
308 if (i >= uspi->s_apb) { 294 if (i >= uspi->s_apb) {
309 if (ubh_max_bcount(dind_bh) != 1) 295 tmp = fs32_to_cpu(sb, *p);
310 retry = 1; 296 *p = 0;
311 else { 297
312 tmp = fs32_to_cpu(sb, *p); 298 ufs_free_blocks(inode, tmp, uspi->s_fpb);
313 *p = 0; 299 mark_inode_dirty(inode);
314 inode->i_blocks -= uspi->s_nspb; 300 ubh_bforget(dind_bh);
315 mark_inode_dirty(inode); 301 dind_bh = NULL;
316 ufs_free_blocks (inode, tmp, uspi->s_fpb);
317 ubh_bforget(dind_bh);
318 dind_bh = NULL;
319 }
320 } 302 }
321 if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) { 303 if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) {
322 ubh_ll_rw_block (SWRITE, 1, &dind_bh); 304 ubh_ll_rw_block(SWRITE, dind_bh);
323 ubh_wait_on_buffer (dind_bh); 305 ubh_wait_on_buffer (dind_bh);
324 } 306 }
325 ubh_brelse (dind_bh); 307 ubh_brelse (dind_bh);
326 308
327 UFSD(("EXIT\n")) 309 UFSD("EXIT\n");
328 310
329 return retry; 311 return retry;
330} 312}
@@ -339,7 +321,7 @@ static int ufs_trunc_tindirect (struct inode * inode)
339 __fs32 * tind, * p; 321 __fs32 * tind, * p;
340 int retry; 322 int retry;
341 323
342 UFSD(("ENTER\n")) 324 UFSD("ENTER\n");
343 325
344 sb = inode->i_sb; 326 sb = inode->i_sb;
345 uspi = UFS_SB(sb)->s_uspi; 327 uspi = UFS_SB(sb)->s_uspi;
@@ -370,45 +352,114 @@ static int ufs_trunc_tindirect (struct inode * inode)
370 if (*ubh_get_addr32 (tind_bh, i)) 352 if (*ubh_get_addr32 (tind_bh, i))
371 break; 353 break;
372 if (i >= uspi->s_apb) { 354 if (i >= uspi->s_apb) {
373 if (ubh_max_bcount(tind_bh) != 1) 355 tmp = fs32_to_cpu(sb, *p);
374 retry = 1; 356 *p = 0;
375 else { 357
376 tmp = fs32_to_cpu(sb, *p); 358 ufs_free_blocks(inode, tmp, uspi->s_fpb);
377 *p = 0; 359 mark_inode_dirty(inode);
378 inode->i_blocks -= uspi->s_nspb; 360 ubh_bforget(tind_bh);
379 mark_inode_dirty(inode); 361 tind_bh = NULL;
380 ufs_free_blocks (inode, tmp, uspi->s_fpb);
381 ubh_bforget(tind_bh);
382 tind_bh = NULL;
383 }
384 } 362 }
385 if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) { 363 if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) {
386 ubh_ll_rw_block (SWRITE, 1, &tind_bh); 364 ubh_ll_rw_block(SWRITE, tind_bh);
387 ubh_wait_on_buffer (tind_bh); 365 ubh_wait_on_buffer (tind_bh);
388 } 366 }
389 ubh_brelse (tind_bh); 367 ubh_brelse (tind_bh);
390 368
391 UFSD(("EXIT\n")) 369 UFSD("EXIT\n");
392 return retry; 370 return retry;
393} 371}
394 372
395void ufs_truncate (struct inode * inode) 373static int ufs_alloc_lastblock(struct inode *inode)
396{ 374{
375 int err = 0;
376 struct address_space *mapping = inode->i_mapping;
377 struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi;
397 struct ufs_inode_info *ufsi = UFS_I(inode); 378 struct ufs_inode_info *ufsi = UFS_I(inode);
398 struct super_block * sb; 379 unsigned lastfrag, i, end;
399 struct ufs_sb_private_info * uspi; 380 struct page *lastpage;
400 int retry; 381 struct buffer_head *bh;
382
383 lastfrag = (i_size_read(inode) + uspi->s_fsize - 1) >> uspi->s_fshift;
384
385 if (!lastfrag) {
386 ufsi->i_lastfrag = 0;
387 goto out;
388 }
389 lastfrag--;
390
391 lastpage = ufs_get_locked_page(mapping, lastfrag >>
392 (PAGE_CACHE_SHIFT - inode->i_blkbits));
393 if (IS_ERR(lastpage)) {
394 err = -EIO;
395 goto out;
396 }
397
398 end = lastfrag & ((1 << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - 1);
399 bh = page_buffers(lastpage);
400 for (i = 0; i < end; ++i)
401 bh = bh->b_this_page;
402
403 if (!buffer_mapped(bh)) {
404 err = ufs_getfrag_block(inode, lastfrag, bh, 1);
405
406 if (unlikely(err))
407 goto out_unlock;
408
409 if (buffer_new(bh)) {
410 clear_buffer_new(bh);
411 unmap_underlying_metadata(bh->b_bdev,
412 bh->b_blocknr);
413 /*
414 * we do not zeroize fragment, because of
415 * if it maped to hole, it already contains zeroes
416 */
417 set_buffer_uptodate(bh);
418 mark_buffer_dirty(bh);
419 set_page_dirty(lastpage);
420 }
421 }
422out_unlock:
423 ufs_put_locked_page(lastpage);
424out:
425 return err;
426}
427
428int ufs_truncate(struct inode *inode, loff_t old_i_size)
429{
430 struct ufs_inode_info *ufsi = UFS_I(inode);
431 struct super_block *sb = inode->i_sb;
432 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
433 int retry, err = 0;
401 434
402 UFSD(("ENTER\n")) 435 UFSD("ENTER\n");
403 sb = inode->i_sb;
404 uspi = UFS_SB(sb)->s_uspi;
405 436
406 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) 437 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
407 return; 438 S_ISLNK(inode->i_mode)))
439 return -EINVAL;
408 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 440 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
409 return; 441 return -EPERM;
442
443 if (inode->i_size > old_i_size) {
444 /*
445 * if we expand file we should care about
446 * allocation of block for last byte first of all
447 */
448 err = ufs_alloc_lastblock(inode);
449
450 if (err) {
451 i_size_write(inode, old_i_size);
452 goto out;
453 }
454 /*
455 * go away, because of we expand file, and we do not
456 * need free blocks, and zeroizes page
457 */
458 lock_kernel();
459 goto almost_end;
460 }
410 461
411 block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block); 462 block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block);
412 463
413 lock_kernel(); 464 lock_kernel();
414 while (1) { 465 while (1) {
@@ -426,9 +477,58 @@ void ufs_truncate (struct inode * inode)
426 yield(); 477 yield();
427 } 478 }
428 479
480 if (inode->i_size < old_i_size) {
481 /*
482 * now we should have enough space
483 * to allocate block for last byte
484 */
485 err = ufs_alloc_lastblock(inode);
486 if (err)
487 /*
488 * looks like all the same - we have no space,
489 * but we truncate file already
490 */
491 inode->i_size = (ufsi->i_lastfrag - 1) * uspi->s_fsize;
492 }
493almost_end:
429 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; 494 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
430 ufsi->i_lastfrag = DIRECT_FRAGMENT;
431 unlock_kernel(); 495 unlock_kernel();
432 mark_inode_dirty(inode); 496 mark_inode_dirty(inode);
433 UFSD(("EXIT\n")) 497out:
498 UFSD("EXIT: err %d\n", err);
499 return err;
434} 500}
501
502
503/*
504 * We don't define our `inode->i_op->truncate', and call it here,
505 * because of:
506 * - there is no way to know old size
507 * - there is no way inform user about error, if it happens in `truncate'
508 */
509static int ufs_setattr(struct dentry *dentry, struct iattr *attr)
510{
511 struct inode *inode = dentry->d_inode;
512 unsigned int ia_valid = attr->ia_valid;
513 int error;
514
515 error = inode_change_ok(inode, attr);
516 if (error)
517 return error;
518
519 if (ia_valid & ATTR_SIZE &&
520 attr->ia_size != i_size_read(inode)) {
521 loff_t old_i_size = inode->i_size;
522 error = vmtruncate(inode, attr->ia_size);
523 if (error)
524 return error;
525 error = ufs_truncate(inode, old_i_size);
526 if (error)
527 return error;
528 }
529 return inode_setattr(inode, attr);
530}
531
532struct inode_operations ufs_file_inode_operations = {
533 .setattr = ufs_setattr,
534};
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index 59acc8f073ac..337cf2c46d10 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -14,15 +14,6 @@
14#include "swab.h" 14#include "swab.h"
15#include "util.h" 15#include "util.h"
16 16
17#undef UFS_UTILS_DEBUG
18
19#ifdef UFS_UTILS_DEBUG
20#define UFSD(x) printk("(%s, %d), %s: ", __FILE__, __LINE__, __FUNCTION__); printk x;
21#else
22#define UFSD(x)
23#endif
24
25
26struct ufs_buffer_head * _ubh_bread_ (struct ufs_sb_private_info * uspi, 17struct ufs_buffer_head * _ubh_bread_ (struct ufs_sb_private_info * uspi,
27 struct super_block *sb, u64 fragment, u64 size) 18 struct super_block *sb, u64 fragment, u64 size)
28{ 19{
@@ -63,17 +54,17 @@ struct ufs_buffer_head * ubh_bread_uspi (struct ufs_sb_private_info * uspi,
63 count = size >> uspi->s_fshift; 54 count = size >> uspi->s_fshift;
64 if (count <= 0 || count > UFS_MAXFRAG) 55 if (count <= 0 || count > UFS_MAXFRAG)
65 return NULL; 56 return NULL;
66 USPI_UBH->fragment = fragment; 57 USPI_UBH(uspi)->fragment = fragment;
67 USPI_UBH->count = count; 58 USPI_UBH(uspi)->count = count;
68 for (i = 0; i < count; i++) 59 for (i = 0; i < count; i++)
69 if (!(USPI_UBH->bh[i] = sb_bread(sb, fragment + i))) 60 if (!(USPI_UBH(uspi)->bh[i] = sb_bread(sb, fragment + i)))
70 goto failed; 61 goto failed;
71 for (; i < UFS_MAXFRAG; i++) 62 for (; i < UFS_MAXFRAG; i++)
72 USPI_UBH->bh[i] = NULL; 63 USPI_UBH(uspi)->bh[i] = NULL;
73 return USPI_UBH; 64 return USPI_UBH(uspi);
74failed: 65failed:
75 for (j = 0; j < i; j++) 66 for (j = 0; j < i; j++)
76 brelse (USPI_UBH->bh[j]); 67 brelse (USPI_UBH(uspi)->bh[j]);
77 return NULL; 68 return NULL;
78} 69}
79 70
@@ -90,11 +81,11 @@ void ubh_brelse (struct ufs_buffer_head * ubh)
90void ubh_brelse_uspi (struct ufs_sb_private_info * uspi) 81void ubh_brelse_uspi (struct ufs_sb_private_info * uspi)
91{ 82{
92 unsigned i; 83 unsigned i;
93 if (!USPI_UBH) 84 if (!USPI_UBH(uspi))
94 return; 85 return;
95 for ( i = 0; i < USPI_UBH->count; i++ ) { 86 for ( i = 0; i < USPI_UBH(uspi)->count; i++ ) {
96 brelse (USPI_UBH->bh[i]); 87 brelse (USPI_UBH(uspi)->bh[i]);
97 USPI_UBH->bh[i] = NULL; 88 USPI_UBH(uspi)->bh[i] = NULL;
98 } 89 }
99} 90}
100 91
@@ -121,13 +112,12 @@ void ubh_mark_buffer_uptodate (struct ufs_buffer_head * ubh, int flag)
121 } 112 }
122} 113}
123 114
124void ubh_ll_rw_block (int rw, unsigned nr, struct ufs_buffer_head * ubh[]) 115void ubh_ll_rw_block(int rw, struct ufs_buffer_head *ubh)
125{ 116{
126 unsigned i;
127 if (!ubh) 117 if (!ubh)
128 return; 118 return;
129 for ( i = 0; i < nr; i++ ) 119
130 ll_rw_block (rw, ubh[i]->count, ubh[i]->bh); 120 ll_rw_block(rw, ubh->count, ubh->bh);
131} 121}
132 122
133void ubh_wait_on_buffer (struct ufs_buffer_head * ubh) 123void ubh_wait_on_buffer (struct ufs_buffer_head * ubh)
@@ -139,18 +129,6 @@ void ubh_wait_on_buffer (struct ufs_buffer_head * ubh)
139 wait_on_buffer (ubh->bh[i]); 129 wait_on_buffer (ubh->bh[i]);
140} 130}
141 131
142unsigned ubh_max_bcount (struct ufs_buffer_head * ubh)
143{
144 unsigned i;
145 unsigned max = 0;
146 if (!ubh)
147 return 0;
148 for ( i = 0; i < ubh->count; i++ )
149 if ( atomic_read(&ubh->bh[i]->b_count) > max )
150 max = atomic_read(&ubh->bh[i]->b_count);
151 return max;
152}
153
154void ubh_bforget (struct ufs_buffer_head * ubh) 132void ubh_bforget (struct ufs_buffer_head * ubh)
155{ 133{
156 unsigned i; 134 unsigned i;
@@ -255,3 +233,57 @@ ufs_set_inode_dev(struct super_block *sb, struct ufs_inode_info *ufsi, dev_t dev
255 else 233 else
256 ufsi->i_u1.i_data[0] = fs32; 234 ufsi->i_u1.i_data[0] = fs32;
257} 235}
236
237/**
238 * ufs_get_locked_page() - locate, pin and lock a pagecache page, if not exist
239 * read it from disk.
240 * @mapping: the address_space to search
241 * @index: the page index
242 *
243 * Locates the desired pagecache page, if not exist we'll read it,
244 * locks it, increments its reference
245 * count and returns its address.
246 *
247 */
248
249struct page *ufs_get_locked_page(struct address_space *mapping,
250 pgoff_t index)
251{
252 struct page *page;
253
254try_again:
255 page = find_lock_page(mapping, index);
256 if (!page) {
257 page = read_cache_page(mapping, index,
258 (filler_t*)mapping->a_ops->readpage,
259 NULL);
260 if (IS_ERR(page)) {
261 printk(KERN_ERR "ufs_change_blocknr: "
262 "read_cache_page error: ino %lu, index: %lu\n",
263 mapping->host->i_ino, index);
264 goto out;
265 }
266
267 lock_page(page);
268
269 if (!PageUptodate(page) || PageError(page)) {
270 unlock_page(page);
271 page_cache_release(page);
272
273 printk(KERN_ERR "ufs_change_blocknr: "
274 "can not read page: ino %lu, index: %lu\n",
275 mapping->host->i_ino, index);
276
277 page = ERR_PTR(-EIO);
278 goto out;
279 }
280 }
281
282 if (unlikely(!page->mapping || !page_has_buffers(page))) {
283 unlock_page(page);
284 page_cache_release(page);
285 goto try_again;/*we really need these buffers*/
286 }
287out:
288 return page;
289}
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 48d6d9bcc157..28fce6c239b5 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -17,10 +17,16 @@
17#define in_range(b,first,len) ((b)>=(first)&&(b)<(first)+(len)) 17#define in_range(b,first,len) ((b)>=(first)&&(b)<(first)+(len))
18 18
19/* 19/*
20 * macros used for retyping 20 * functions used for retyping
21 */ 21 */
22#define UCPI_UBH ((struct ufs_buffer_head *)ucpi) 22static inline struct ufs_buffer_head *UCPI_UBH(struct ufs_cg_private_info *cpi)
23#define USPI_UBH ((struct ufs_buffer_head *)uspi) 23{
24 return &cpi->c_ubh;
25}
26static inline struct ufs_buffer_head *USPI_UBH(struct ufs_sb_private_info *spi)
27{
28 return &spi->s_ubh;
29}
24 30
25 31
26 32
@@ -33,12 +39,12 @@ ufs_get_fs_state(struct super_block *sb, struct ufs_super_block_first *usb1,
33{ 39{
34 switch (UFS_SB(sb)->s_flags & UFS_ST_MASK) { 40 switch (UFS_SB(sb)->s_flags & UFS_ST_MASK) {
35 case UFS_ST_SUN: 41 case UFS_ST_SUN:
36 return fs32_to_cpu(sb, usb3->fs_u2.fs_sun.fs_state); 42 return fs32_to_cpu(sb, usb3->fs_un2.fs_sun.fs_state);
37 case UFS_ST_SUNx86: 43 case UFS_ST_SUNx86:
38 return fs32_to_cpu(sb, usb1->fs_u1.fs_sunx86.fs_state); 44 return fs32_to_cpu(sb, usb1->fs_u1.fs_sunx86.fs_state);
39 case UFS_ST_44BSD: 45 case UFS_ST_44BSD:
40 default: 46 default:
41 return fs32_to_cpu(sb, usb3->fs_u2.fs_44.fs_state); 47 return fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_state);
42 } 48 }
43} 49}
44 50
@@ -48,13 +54,13 @@ ufs_set_fs_state(struct super_block *sb, struct ufs_super_block_first *usb1,
48{ 54{
49 switch (UFS_SB(sb)->s_flags & UFS_ST_MASK) { 55 switch (UFS_SB(sb)->s_flags & UFS_ST_MASK) {
50 case UFS_ST_SUN: 56 case UFS_ST_SUN:
51 usb3->fs_u2.fs_sun.fs_state = cpu_to_fs32(sb, value); 57 usb3->fs_un2.fs_sun.fs_state = cpu_to_fs32(sb, value);
52 break; 58 break;
53 case UFS_ST_SUNx86: 59 case UFS_ST_SUNx86:
54 usb1->fs_u1.fs_sunx86.fs_state = cpu_to_fs32(sb, value); 60 usb1->fs_u1.fs_sunx86.fs_state = cpu_to_fs32(sb, value);
55 break; 61 break;
56 case UFS_ST_44BSD: 62 case UFS_ST_44BSD:
57 usb3->fs_u2.fs_44.fs_state = cpu_to_fs32(sb, value); 63 usb3->fs_un2.fs_44.fs_state = cpu_to_fs32(sb, value);
58 break; 64 break;
59 } 65 }
60} 66}
@@ -64,7 +70,7 @@ ufs_get_fs_npsect(struct super_block *sb, struct ufs_super_block_first *usb1,
64 struct ufs_super_block_third *usb3) 70 struct ufs_super_block_third *usb3)
65{ 71{
66 if ((UFS_SB(sb)->s_flags & UFS_ST_MASK) == UFS_ST_SUNx86) 72 if ((UFS_SB(sb)->s_flags & UFS_ST_MASK) == UFS_ST_SUNx86)
67 return fs32_to_cpu(sb, usb3->fs_u2.fs_sunx86.fs_npsect); 73 return fs32_to_cpu(sb, usb3->fs_un2.fs_sunx86.fs_npsect);
68 else 74 else
69 return fs32_to_cpu(sb, usb1->fs_u1.fs_sun.fs_npsect); 75 return fs32_to_cpu(sb, usb1->fs_u1.fs_sun.fs_npsect);
70} 76}
@@ -76,16 +82,16 @@ ufs_get_fs_qbmask(struct super_block *sb, struct ufs_super_block_third *usb3)
76 82
77 switch (UFS_SB(sb)->s_flags & UFS_ST_MASK) { 83 switch (UFS_SB(sb)->s_flags & UFS_ST_MASK) {
78 case UFS_ST_SUN: 84 case UFS_ST_SUN:
79 ((__fs32 *)&tmp)[0] = usb3->fs_u2.fs_sun.fs_qbmask[0]; 85 ((__fs32 *)&tmp)[0] = usb3->fs_un2.fs_sun.fs_qbmask[0];
80 ((__fs32 *)&tmp)[1] = usb3->fs_u2.fs_sun.fs_qbmask[1]; 86 ((__fs32 *)&tmp)[1] = usb3->fs_un2.fs_sun.fs_qbmask[1];
81 break; 87 break;
82 case UFS_ST_SUNx86: 88 case UFS_ST_SUNx86:
83 ((__fs32 *)&tmp)[0] = usb3->fs_u2.fs_sunx86.fs_qbmask[0]; 89 ((__fs32 *)&tmp)[0] = usb3->fs_un2.fs_sunx86.fs_qbmask[0];
84 ((__fs32 *)&tmp)[1] = usb3->fs_u2.fs_sunx86.fs_qbmask[1]; 90 ((__fs32 *)&tmp)[1] = usb3->fs_un2.fs_sunx86.fs_qbmask[1];
85 break; 91 break;
86 case UFS_ST_44BSD: 92 case UFS_ST_44BSD:
87 ((__fs32 *)&tmp)[0] = usb3->fs_u2.fs_44.fs_qbmask[0]; 93 ((__fs32 *)&tmp)[0] = usb3->fs_un2.fs_44.fs_qbmask[0];
88 ((__fs32 *)&tmp)[1] = usb3->fs_u2.fs_44.fs_qbmask[1]; 94 ((__fs32 *)&tmp)[1] = usb3->fs_un2.fs_44.fs_qbmask[1];
89 break; 95 break;
90 } 96 }
91 97
@@ -99,16 +105,16 @@ ufs_get_fs_qfmask(struct super_block *sb, struct ufs_super_block_third *usb3)
99 105
100 switch (UFS_SB(sb)->s_flags & UFS_ST_MASK) { 106 switch (UFS_SB(sb)->s_flags & UFS_ST_MASK) {
101 case UFS_ST_SUN: 107 case UFS_ST_SUN:
102 ((__fs32 *)&tmp)[0] = usb3->fs_u2.fs_sun.fs_qfmask[0]; 108 ((__fs32 *)&tmp)[0] = usb3->fs_un2.fs_sun.fs_qfmask[0];
103 ((__fs32 *)&tmp)[1] = usb3->fs_u2.fs_sun.fs_qfmask[1]; 109 ((__fs32 *)&tmp)[1] = usb3->fs_un2.fs_sun.fs_qfmask[1];
104 break; 110 break;
105 case UFS_ST_SUNx86: 111 case UFS_ST_SUNx86:
106 ((__fs32 *)&tmp)[0] = usb3->fs_u2.fs_sunx86.fs_qfmask[0]; 112 ((__fs32 *)&tmp)[0] = usb3->fs_un2.fs_sunx86.fs_qfmask[0];
107 ((__fs32 *)&tmp)[1] = usb3->fs_u2.fs_sunx86.fs_qfmask[1]; 113 ((__fs32 *)&tmp)[1] = usb3->fs_un2.fs_sunx86.fs_qfmask[1];
108 break; 114 break;
109 case UFS_ST_44BSD: 115 case UFS_ST_44BSD:
110 ((__fs32 *)&tmp)[0] = usb3->fs_u2.fs_44.fs_qfmask[0]; 116 ((__fs32 *)&tmp)[0] = usb3->fs_un2.fs_44.fs_qfmask[0];
111 ((__fs32 *)&tmp)[1] = usb3->fs_u2.fs_44.fs_qfmask[1]; 117 ((__fs32 *)&tmp)[1] = usb3->fs_un2.fs_44.fs_qfmask[1];
112 break; 118 break;
113 } 119 }
114 120
@@ -236,9 +242,8 @@ extern void ubh_brelse (struct ufs_buffer_head *);
236extern void ubh_brelse_uspi (struct ufs_sb_private_info *); 242extern void ubh_brelse_uspi (struct ufs_sb_private_info *);
237extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *); 243extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *);
238extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int); 244extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int);
239extern void ubh_ll_rw_block (int, unsigned, struct ufs_buffer_head **); 245extern void ubh_ll_rw_block(int, struct ufs_buffer_head *);
240extern void ubh_wait_on_buffer (struct ufs_buffer_head *); 246extern void ubh_wait_on_buffer (struct ufs_buffer_head *);
241extern unsigned ubh_max_bcount (struct ufs_buffer_head *);
242extern void ubh_bforget (struct ufs_buffer_head *); 247extern void ubh_bforget (struct ufs_buffer_head *);
243extern int ubh_buffer_dirty (struct ufs_buffer_head *); 248extern int ubh_buffer_dirty (struct ufs_buffer_head *);
244#define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size) 249#define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size)
@@ -246,6 +251,14 @@ extern void _ubh_ubhcpymem_(struct ufs_sb_private_info *, unsigned char *, struc
246#define ubh_memcpyubh(ubh,mem,size) _ubh_memcpyubh_(uspi,ubh,mem,size) 251#define ubh_memcpyubh(ubh,mem,size) _ubh_memcpyubh_(uspi,ubh,mem,size)
247extern void _ubh_memcpyubh_(struct ufs_sb_private_info *, struct ufs_buffer_head *, unsigned char *, unsigned); 252extern void _ubh_memcpyubh_(struct ufs_sb_private_info *, struct ufs_buffer_head *, unsigned char *, unsigned);
248 253
254/* This functions works with cache pages*/
255extern struct page *ufs_get_locked_page(struct address_space *mapping,
256 pgoff_t index);
257static inline void ufs_put_locked_page(struct page *page)
258{
259 unlock_page(page);
260 page_cache_release(page);
261}
249 262
250 263
251/* 264/*
@@ -297,40 +310,26 @@ static inline void *get_usb_offset(struct ufs_sb_private_info *uspi,
297#define ubh_blkmap(ubh,begin,bit) \ 310#define ubh_blkmap(ubh,begin,bit) \
298 ((*ubh_get_addr(ubh, (begin) + ((bit) >> 3)) >> ((bit) & 7)) & (0xff >> (UFS_MAXFRAG - uspi->s_fpb))) 311 ((*ubh_get_addr(ubh, (begin) + ((bit) >> 3)) >> ((bit) & 7)) & (0xff >> (UFS_MAXFRAG - uspi->s_fpb)))
299 312
300
301/*
302 * Macros for access to superblock array structures
303 */
304#define ubh_postbl(ubh,cylno,i) \
305 ((uspi->s_postblformat != UFS_DYNAMICPOSTBLFMT) \
306 ? (*(__s16*)(ubh_get_addr(ubh, \
307 (unsigned)(&((struct ufs_super_block *)0)->fs_opostbl) \
308 + (((cylno) * 16 + (i)) << 1) ) )) \
309 : (*(__s16*)(ubh_get_addr(ubh, \
310 uspi->s_postbloff + (((cylno) * uspi->s_nrpos + (i)) << 1) ))))
311
312#define ubh_rotbl(ubh,i) \
313 ((uspi->s_postblformat != UFS_DYNAMICPOSTBLFMT) \
314 ? (*(__u8*)(ubh_get_addr(ubh, \
315 (unsigned)(&((struct ufs_super_block *)0)->fs_space) + (i)))) \
316 : (*(__u8*)(ubh_get_addr(ubh, uspi->s_rotbloff + (i)))))
317
318/* 313/*
319 * Determine the number of available frags given a 314 * Determine the number of available frags given a
320 * percentage to hold in reserve. 315 * percentage to hold in reserve.
321 */ 316 */
322#define ufs_freespace(usb, percentreserved) \ 317static inline u64
323 (ufs_blkstofrags(fs32_to_cpu(sb, (usb)->fs_cstotal.cs_nbfree)) + \ 318ufs_freespace(struct ufs_sb_private_info *uspi, int percentreserved)
324 fs32_to_cpu(sb, (usb)->fs_cstotal.cs_nffree) - (uspi->s_dsize * (percentreserved) / 100)) 319{
320 return ufs_blkstofrags(uspi->cs_total.cs_nbfree) +
321 uspi->cs_total.cs_nffree -
322 (uspi->s_dsize * (percentreserved) / 100);
323}
325 324
326/* 325/*
327 * Macros to access cylinder group array structures 326 * Macros to access cylinder group array structures
328 */ 327 */
329#define ubh_cg_blktot(ucpi,cylno) \ 328#define ubh_cg_blktot(ucpi,cylno) \
330 (*((__fs32*)ubh_get_addr(UCPI_UBH, (ucpi)->c_btotoff + ((cylno) << 2)))) 329 (*((__fs32*)ubh_get_addr(UCPI_UBH(ucpi), (ucpi)->c_btotoff + ((cylno) << 2))))
331 330
332#define ubh_cg_blks(ucpi,cylno,rpos) \ 331#define ubh_cg_blks(ucpi,cylno,rpos) \
333 (*((__fs16*)ubh_get_addr(UCPI_UBH, \ 332 (*((__fs16*)ubh_get_addr(UCPI_UBH(ucpi), \
334 (ucpi)->c_boff + (((cylno) * uspi->s_nrpos + (rpos)) << 1 )))) 333 (ucpi)->c_boff + (((cylno) * uspi->s_nrpos + (rpos)) << 1 ))))
335 334
336/* 335/*
@@ -508,29 +507,3 @@ static inline void ufs_fragacct (struct super_block * sb, unsigned blockmap,
508 if (fragsize > 0 && fragsize < uspi->s_fpb) 507 if (fragsize > 0 && fragsize < uspi->s_fpb)
509 fs32_add(sb, &fraglist[fragsize], cnt); 508 fs32_add(sb, &fraglist[fragsize], cnt);
510} 509}
511
512#define ubh_scanc(ubh,begin,size,table,mask) _ubh_scanc_(uspi,ubh,begin,size,table,mask)
513static inline unsigned _ubh_scanc_(struct ufs_sb_private_info * uspi, struct ufs_buffer_head * ubh,
514 unsigned begin, unsigned size, unsigned char * table, unsigned char mask)
515{
516 unsigned rest, offset;
517 unsigned char * cp;
518
519
520 offset = begin & ~uspi->s_fmask;
521 begin >>= uspi->s_fshift;
522 for (;;) {
523 if ((offset + size) < uspi->s_fsize)
524 rest = size;
525 else
526 rest = uspi->s_fsize - offset;
527 size -= rest;
528 cp = ubh->bh[begin]->b_data + offset;
529 while ((table[*cp++] & mask) == 0 && --rest);
530 if (rest || !size)
531 break;
532 begin++;
533 offset = 0;
534 }
535 return (size + rest);
536}
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index a56cec3be5f0..9a8f48bae956 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -1023,11 +1023,12 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent)
1023 return 0; 1023 return 0;
1024} 1024}
1025 1025
1026static struct super_block *vfat_get_sb(struct file_system_type *fs_type, 1026static int vfat_get_sb(struct file_system_type *fs_type,
1027 int flags, const char *dev_name, 1027 int flags, const char *dev_name,
1028 void *data) 1028 void *data, struct vfsmount *mnt)
1029{ 1029{
1030 return get_sb_bdev(fs_type, flags, dev_name, data, vfat_fill_super); 1030 return get_sb_bdev(fs_type, flags, dev_name, data, vfat_fill_super,
1031 mnt);
1031} 1032}
1032 1033
1033static struct file_system_type vfat_fs_type = { 1034static struct file_system_type vfat_fs_type = {
diff --git a/fs/xattr.c b/fs/xattr.c
index e416190f5e9c..c32f15b5f60f 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -242,7 +242,7 @@ sys_fsetxattr(int fd, char __user *name, void __user *value,
242 if (!f) 242 if (!f)
243 return error; 243 return error;
244 dentry = f->f_dentry; 244 dentry = f->f_dentry;
245 audit_inode(NULL, dentry->d_inode, 0); 245 audit_inode(NULL, dentry->d_inode);
246 error = setxattr(dentry, name, value, size, flags); 246 error = setxattr(dentry, name, value, size, flags);
247 fput(f); 247 fput(f);
248 return error; 248 return error;
@@ -469,7 +469,7 @@ sys_fremovexattr(int fd, char __user *name)
469 if (!f) 469 if (!f)
470 return error; 470 return error;
471 dentry = f->f_dentry; 471 dentry = f->f_dentry;
472 audit_inode(NULL, dentry->d_inode, 0); 472 audit_inode(NULL, dentry->d_inode);
473 error = removexattr(dentry, name); 473 error = removexattr(dentry, name);
474 fput(f); 474 fput(f);
475 return error; 475 return error;
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index bac27d66151d..26b364c9d62c 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -1,6 +1,5 @@
1config XFS_FS 1config XFS_FS
2 tristate "XFS filesystem support" 2 tristate "XFS filesystem support"
3 select EXPORTFS if NFSD!=n
4 help 3 help
5 XFS is a high performance journaling filesystem which originated 4 XFS is a high performance journaling filesystem which originated
6 on the SGI IRIX platform. It is completely multi-threaded, can 5 on the SGI IRIX platform. It is completely multi-threaded, can
@@ -18,11 +17,6 @@ config XFS_FS
18 system of your root partition is compiled as a module, you'll need 17 system of your root partition is compiled as a module, you'll need
19 to use an initial ramdisk (initrd) to boot. 18 to use an initial ramdisk (initrd) to boot.
20 19
21config XFS_EXPORT
22 bool
23 depends on XFS_FS && EXPORTFS
24 default y
25
26config XFS_QUOTA 20config XFS_QUOTA
27 bool "XFS Quota support" 21 bool "XFS Quota support"
28 depends on XFS_FS 22 depends on XFS_FS
@@ -65,18 +59,19 @@ config XFS_POSIX_ACL
65 If you don't know what Access Control Lists are, say N. 59 If you don't know what Access Control Lists are, say N.
66 60
67config XFS_RT 61config XFS_RT
68 bool "XFS Realtime support (EXPERIMENTAL)" 62 bool "XFS Realtime subvolume support"
69 depends on XFS_FS && EXPERIMENTAL 63 depends on XFS_FS
70 help 64 help
71 If you say Y here you will be able to mount and use XFS filesystems 65 If you say Y here you will be able to mount and use XFS filesystems
72 which contain a realtime subvolume. The realtime subvolume is a 66 which contain a realtime subvolume. The realtime subvolume is a
73 separate area of disk space where only file data is stored. The 67 separate area of disk space where only file data is stored. It was
74 realtime subvolume is designed to provide very deterministic 68 originally designed to provide deterministic data rates suitable
75 data rates suitable for media streaming applications. 69 for media streaming applications, but is also useful as a generic
76 70 mechanism for ensuring data and metadata/log I/Os are completely
77 See the xfs man page in section 5 for a bit more information. 71 separated. Regular file I/Os are isolated to a separate device
72 from all other requests, and this can be done quite transparently
73 to applications via the inherit-realtime directory inode flag.
78 74
79 This feature is unsupported at this time, is not yet fully 75 See the xfs man page in section 5 for additional information.
80 functional, and may cause serious problems.
81 76
82 If unsure, say N. 77 If unsure, say N.
diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6
index 5d73eaa1971f..9e7f85986d0d 100644
--- a/fs/xfs/Makefile-linux-2.6
+++ b/fs/xfs/Makefile-linux-2.6
@@ -59,7 +59,6 @@ xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
59xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o 59xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o
60xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o 60xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o
61xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o 61xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o
62xfs-$(CONFIG_XFS_EXPORT) += $(XFS_LINUX)/xfs_export.o
63 62
64 63
65xfs-y += xfs_alloc.o \ 64xfs-y += xfs_alloc.o \
@@ -73,14 +72,12 @@ xfs-y += xfs_alloc.o \
73 xfs_btree.o \ 72 xfs_btree.o \
74 xfs_buf_item.o \ 73 xfs_buf_item.o \
75 xfs_da_btree.o \ 74 xfs_da_btree.o \
76 xfs_dir.o \
77 xfs_dir2.o \ 75 xfs_dir2.o \
78 xfs_dir2_block.o \ 76 xfs_dir2_block.o \
79 xfs_dir2_data.o \ 77 xfs_dir2_data.o \
80 xfs_dir2_leaf.o \ 78 xfs_dir2_leaf.o \
81 xfs_dir2_node.o \ 79 xfs_dir2_node.o \
82 xfs_dir2_sf.o \ 80 xfs_dir2_sf.o \
83 xfs_dir_leaf.o \
84 xfs_error.o \ 81 xfs_error.o \
85 xfs_extfree_item.o \ 82 xfs_extfree_item.o \
86 xfs_fsops.o \ 83 xfs_fsops.o \
@@ -117,6 +114,7 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
117 kmem.o \ 114 kmem.o \
118 xfs_aops.o \ 115 xfs_aops.o \
119 xfs_buf.o \ 116 xfs_buf.o \
117 xfs_export.o \
120 xfs_file.o \ 118 xfs_file.o \
121 xfs_fs_subr.o \ 119 xfs_fs_subr.o \
122 xfs_globals.o \ 120 xfs_globals.o \
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 2cfd33d4d8aa..939bd84bc7ee 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -23,42 +23,6 @@
23#include <linux/mm.h> 23#include <linux/mm.h>
24 24
25/* 25/*
26 * Process flags handling
27 */
28
29#define PFLAGS_TEST_NOIO() (current->flags & PF_NOIO)
30#define PFLAGS_TEST_FSTRANS() (current->flags & PF_FSTRANS)
31
32#define PFLAGS_SET_NOIO() do { \
33 current->flags |= PF_NOIO; \
34} while (0)
35
36#define PFLAGS_CLEAR_NOIO() do { \
37 current->flags &= ~PF_NOIO; \
38} while (0)
39
40/* these could be nested, so we save state */
41#define PFLAGS_SET_FSTRANS(STATEP) do { \
42 *(STATEP) = current->flags; \
43 current->flags |= PF_FSTRANS; \
44} while (0)
45
46#define PFLAGS_CLEAR_FSTRANS(STATEP) do { \
47 *(STATEP) = current->flags; \
48 current->flags &= ~PF_FSTRANS; \
49} while (0)
50
51/* Restore the PF_FSTRANS state to what was saved in STATEP */
52#define PFLAGS_RESTORE_FSTRANS(STATEP) do { \
53 current->flags = ((current->flags & ~PF_FSTRANS) | \
54 (*(STATEP) & PF_FSTRANS)); \
55} while (0)
56
57#define PFLAGS_DUP(OSTATEP, NSTATEP) do { \
58 *(NSTATEP) = *(OSTATEP); \
59} while (0)
60
61/*
62 * General memory allocation interfaces 26 * General memory allocation interfaces
63 */ 27 */
64 28
@@ -83,7 +47,7 @@ kmem_flags_convert(unsigned int __nocast flags)
83 lflags = GFP_ATOMIC | __GFP_NOWARN; 47 lflags = GFP_ATOMIC | __GFP_NOWARN;
84 } else { 48 } else {
85 lflags = GFP_KERNEL | __GFP_NOWARN; 49 lflags = GFP_KERNEL | __GFP_NOWARN;
86 if (PFLAGS_TEST_FSTRANS() || (flags & KM_NOFS)) 50 if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
87 lflags &= ~__GFP_FS; 51 lflags &= ~__GFP_FS;
88 } 52 }
89 return lflags; 53 return lflags;
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
index 1b262b790d9c..32e1ce0f04c9 100644
--- a/fs/xfs/linux-2.6/mrlock.h
+++ b/fs/xfs/linux-2.6/mrlock.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
@@ -28,7 +28,7 @@ typedef struct {
28} mrlock_t; 28} mrlock_t;
29 29
30#define mrinit(mrp, name) \ 30#define mrinit(mrp, name) \
31 ( (mrp)->mr_writer = 0, init_rwsem(&(mrp)->mr_lock) ) 31 do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
32#define mrlock_init(mrp, t,n,s) mrinit(mrp, n) 32#define mrlock_init(mrp, t,n,s) mrinit(mrp, n)
33#define mrfree(mrp) do { } while (0) 33#define mrfree(mrp) do { } while (0)
34#define mraccess(mrp) mraccessf(mrp, 0) 34#define mraccess(mrp) mraccessf(mrp, 0)
diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h
index 194a84490bd1..b25090094cca 100644
--- a/fs/xfs/linux-2.6/sema.h
+++ b/fs/xfs/linux-2.6/sema.h
@@ -34,20 +34,21 @@ typedef struct semaphore sema_t;
34#define initnsema(sp, val, name) sema_init(sp, val) 34#define initnsema(sp, val, name) sema_init(sp, val)
35#define psema(sp, b) down(sp) 35#define psema(sp, b) down(sp)
36#define vsema(sp) up(sp) 36#define vsema(sp) up(sp)
37#define valusema(sp) (atomic_read(&(sp)->count)) 37#define freesema(sema) do { } while (0)
38#define freesema(sema) 38
39static inline int issemalocked(sema_t *sp)
40{
41 return down_trylock(sp) || (up(sp), 0);
42}
39 43
40/* 44/*
41 * Map cpsema (try to get the sema) to down_trylock. We need to switch 45 * Map cpsema (try to get the sema) to down_trylock. We need to switch
42 * the return values since cpsema returns 1 (acquired) 0 (failed) and 46 * the return values since cpsema returns 1 (acquired) 0 (failed) and
43 * down_trylock returns the reverse 0 (acquired) 1 (failed). 47 * down_trylock returns the reverse 0 (acquired) 1 (failed).
44 */ 48 */
45 49static inline int cpsema(sema_t *sp)
46#define cpsema(sp) (down_trylock(sp) ? 0 : 1) 50{
47 51 return down_trylock(sp) ? 0 : 1;
48/* 52}
49 * Didn't do cvsema(sp). Not sure how to map this to up/down/...
50 * It does a vsema if the values is < 0 other wise nothing.
51 */
52 53
53#endif /* __XFS_SUPPORT_SEMA_H__ */ 54#endif /* __XFS_SUPPORT_SEMA_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 4d191ef39b67..c40f81ba9b13 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -21,7 +21,6 @@
21#include "xfs_inum.h" 21#include "xfs_inum.h"
22#include "xfs_sb.h" 22#include "xfs_sb.h"
23#include "xfs_ag.h" 23#include "xfs_ag.h"
24#include "xfs_dir.h"
25#include "xfs_dir2.h" 24#include "xfs_dir2.h"
26#include "xfs_trans.h" 25#include "xfs_trans.h"
27#include "xfs_dmapi.h" 26#include "xfs_dmapi.h"
@@ -29,7 +28,6 @@
29#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
30#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
31#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
32#include "xfs_dir_sf.h"
33#include "xfs_dir2_sf.h" 31#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h" 32#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 33#include "xfs_dinode.h"
@@ -76,7 +74,7 @@ xfs_page_trace(
76 int mask) 74 int mask)
77{ 75{
78 xfs_inode_t *ip; 76 xfs_inode_t *ip;
79 vnode_t *vp = vn_from_inode(inode); 77 bhv_vnode_t *vp = vn_from_inode(inode);
80 loff_t isize = i_size_read(inode); 78 loff_t isize = i_size_read(inode);
81 loff_t offset = page_offset(page); 79 loff_t offset = page_offset(page);
82 int delalloc = -1, unmapped = -1, unwritten = -1; 80 int delalloc = -1, unmapped = -1, unwritten = -1;
@@ -136,9 +134,10 @@ xfs_destroy_ioend(
136 134
137 for (bh = ioend->io_buffer_head; bh; bh = next) { 135 for (bh = ioend->io_buffer_head; bh; bh = next) {
138 next = bh->b_private; 136 next = bh->b_private;
139 bh->b_end_io(bh, ioend->io_uptodate); 137 bh->b_end_io(bh, !ioend->io_error);
140 } 138 }
141 139 if (unlikely(ioend->io_error))
140 vn_ioerror(ioend->io_vnode, ioend->io_error, __FILE__,__LINE__);
142 vn_iowake(ioend->io_vnode); 141 vn_iowake(ioend->io_vnode);
143 mempool_free(ioend, xfs_ioend_pool); 142 mempool_free(ioend, xfs_ioend_pool);
144} 143}
@@ -180,13 +179,12 @@ xfs_end_bio_unwritten(
180 void *data) 179 void *data)
181{ 180{
182 xfs_ioend_t *ioend = data; 181 xfs_ioend_t *ioend = data;
183 vnode_t *vp = ioend->io_vnode; 182 bhv_vnode_t *vp = ioend->io_vnode;
184 xfs_off_t offset = ioend->io_offset; 183 xfs_off_t offset = ioend->io_offset;
185 size_t size = ioend->io_size; 184 size_t size = ioend->io_size;
186 int error;
187 185
188 if (ioend->io_uptodate) 186 if (likely(!ioend->io_error))
189 VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); 187 bhv_vop_bmap(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL);
190 xfs_destroy_ioend(ioend); 188 xfs_destroy_ioend(ioend);
191} 189}
192 190
@@ -211,7 +209,7 @@ xfs_alloc_ioend(
211 * all the I/O from calling the completion routine too early. 209 * all the I/O from calling the completion routine too early.
212 */ 210 */
213 atomic_set(&ioend->io_remaining, 1); 211 atomic_set(&ioend->io_remaining, 1);
214 ioend->io_uptodate = 1; /* cleared if any I/O fails */ 212 ioend->io_error = 0;
215 ioend->io_list = NULL; 213 ioend->io_list = NULL;
216 ioend->io_type = type; 214 ioend->io_type = type;
217 ioend->io_vnode = vn_from_inode(inode); 215 ioend->io_vnode = vn_from_inode(inode);
@@ -239,10 +237,10 @@ xfs_map_blocks(
239 xfs_iomap_t *mapp, 237 xfs_iomap_t *mapp,
240 int flags) 238 int flags)
241{ 239{
242 vnode_t *vp = vn_from_inode(inode); 240 bhv_vnode_t *vp = vn_from_inode(inode);
243 int error, nmaps = 1; 241 int error, nmaps = 1;
244 242
245 VOP_BMAP(vp, offset, count, flags, mapp, &nmaps, error); 243 error = bhv_vop_bmap(vp, offset, count, flags, mapp, &nmaps);
246 if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE))) 244 if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)))
247 VMODIFY(vp); 245 VMODIFY(vp);
248 return -error; 246 return -error;
@@ -271,16 +269,14 @@ xfs_end_bio(
271 if (bio->bi_size) 269 if (bio->bi_size)
272 return 1; 270 return 1;
273 271
274 ASSERT(ioend);
275 ASSERT(atomic_read(&bio->bi_cnt) >= 1); 272 ASSERT(atomic_read(&bio->bi_cnt) >= 1);
273 ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error;
276 274
277 /* Toss bio and pass work off to an xfsdatad thread */ 275 /* Toss bio and pass work off to an xfsdatad thread */
278 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
279 ioend->io_uptodate = 0;
280 bio->bi_private = NULL; 276 bio->bi_private = NULL;
281 bio->bi_end_io = NULL; 277 bio->bi_end_io = NULL;
282
283 bio_put(bio); 278 bio_put(bio);
279
284 xfs_finish_ioend(ioend); 280 xfs_finish_ioend(ioend);
285 return 0; 281 return 0;
286} 282}
@@ -1127,7 +1123,7 @@ xfs_vm_writepage(
1127 * then mark the page dirty again and leave the page 1123 * then mark the page dirty again and leave the page
1128 * as is. 1124 * as is.
1129 */ 1125 */
1130 if (PFLAGS_TEST_FSTRANS() && need_trans) 1126 if (current_test_flags(PF_FSTRANS) && need_trans)
1131 goto out_fail; 1127 goto out_fail;
1132 1128
1133 /* 1129 /*
@@ -1158,6 +1154,18 @@ out_unlock:
1158 return error; 1154 return error;
1159} 1155}
1160 1156
1157STATIC int
1158xfs_vm_writepages(
1159 struct address_space *mapping,
1160 struct writeback_control *wbc)
1161{
1162 struct bhv_vnode *vp = vn_from_inode(mapping->host);
1163
1164 if (VN_TRUNC(vp))
1165 VUNTRUNCATE(vp);
1166 return generic_writepages(mapping, wbc);
1167}
1168
1161/* 1169/*
1162 * Called to move a page into cleanable state - and from there 1170 * Called to move a page into cleanable state - and from there
1163 * to be released. Possibly the page is already clean. We always 1171 * to be released. Possibly the page is already clean. We always
@@ -1204,7 +1212,7 @@ xfs_vm_releasepage(
1204 /* If we are already inside a transaction or the thread cannot 1212 /* If we are already inside a transaction or the thread cannot
1205 * do I/O, we cannot release this page. 1213 * do I/O, we cannot release this page.
1206 */ 1214 */
1207 if (PFLAGS_TEST_FSTRANS()) 1215 if (current_test_flags(PF_FSTRANS))
1208 return 0; 1216 return 0;
1209 1217
1210 /* 1218 /*
@@ -1231,7 +1239,7 @@ __xfs_get_blocks(
1231 int direct, 1239 int direct,
1232 bmapi_flags_t flags) 1240 bmapi_flags_t flags)
1233{ 1241{
1234 vnode_t *vp = vn_from_inode(inode); 1242 bhv_vnode_t *vp = vn_from_inode(inode);
1235 xfs_iomap_t iomap; 1243 xfs_iomap_t iomap;
1236 xfs_off_t offset; 1244 xfs_off_t offset;
1237 ssize_t size; 1245 ssize_t size;
@@ -1241,8 +1249,8 @@ __xfs_get_blocks(
1241 offset = (xfs_off_t)iblock << inode->i_blkbits; 1249 offset = (xfs_off_t)iblock << inode->i_blkbits;
1242 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); 1250 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
1243 size = bh_result->b_size; 1251 size = bh_result->b_size;
1244 VOP_BMAP(vp, offset, size, 1252 error = bhv_vop_bmap(vp, offset, size,
1245 create ? flags : BMAPI_READ, &iomap, &niomap, error); 1253 create ? flags : BMAPI_READ, &iomap, &niomap);
1246 if (error) 1254 if (error)
1247 return -error; 1255 return -error;
1248 if (niomap == 0) 1256 if (niomap == 0)
@@ -1370,13 +1378,13 @@ xfs_vm_direct_IO(
1370{ 1378{
1371 struct file *file = iocb->ki_filp; 1379 struct file *file = iocb->ki_filp;
1372 struct inode *inode = file->f_mapping->host; 1380 struct inode *inode = file->f_mapping->host;
1373 vnode_t *vp = vn_from_inode(inode); 1381 bhv_vnode_t *vp = vn_from_inode(inode);
1374 xfs_iomap_t iomap; 1382 xfs_iomap_t iomap;
1375 int maps = 1; 1383 int maps = 1;
1376 int error; 1384 int error;
1377 ssize_t ret; 1385 ssize_t ret;
1378 1386
1379 VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error); 1387 error = bhv_vop_bmap(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps);
1380 if (error) 1388 if (error)
1381 return -error; 1389 return -error;
1382 1390
@@ -1409,14 +1417,12 @@ xfs_vm_bmap(
1409 sector_t block) 1417 sector_t block)
1410{ 1418{
1411 struct inode *inode = (struct inode *)mapping->host; 1419 struct inode *inode = (struct inode *)mapping->host;
1412 vnode_t *vp = vn_from_inode(inode); 1420 bhv_vnode_t *vp = vn_from_inode(inode);
1413 int error;
1414 1421
1415 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 1422 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
1416 1423 bhv_vop_rwlock(vp, VRWLOCK_READ);
1417 VOP_RWLOCK(vp, VRWLOCK_READ); 1424 bhv_vop_flush_pages(vp, (xfs_off_t)0, -1, 0, FI_REMAPF);
1418 VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error); 1425 bhv_vop_rwunlock(vp, VRWLOCK_READ);
1419 VOP_RWUNLOCK(vp, VRWLOCK_READ);
1420 return generic_block_bmap(mapping, block, xfs_get_blocks); 1426 return generic_block_bmap(mapping, block, xfs_get_blocks);
1421} 1427}
1422 1428
@@ -1448,10 +1454,11 @@ xfs_vm_invalidatepage(
1448 block_invalidatepage(page, offset); 1454 block_invalidatepage(page, offset);
1449} 1455}
1450 1456
1451struct address_space_operations xfs_address_space_operations = { 1457const struct address_space_operations xfs_address_space_operations = {
1452 .readpage = xfs_vm_readpage, 1458 .readpage = xfs_vm_readpage,
1453 .readpages = xfs_vm_readpages, 1459 .readpages = xfs_vm_readpages,
1454 .writepage = xfs_vm_writepage, 1460 .writepage = xfs_vm_writepage,
1461 .writepages = xfs_vm_writepages,
1455 .sync_page = block_sync_page, 1462 .sync_page = block_sync_page,
1456 .releasepage = xfs_vm_releasepage, 1463 .releasepage = xfs_vm_releasepage,
1457 .invalidatepage = xfs_vm_invalidatepage, 1464 .invalidatepage = xfs_vm_invalidatepage,
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 60716543c68b..2244e516b66a 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2005 Silicon Graphics, Inc. 2 * Copyright (c) 2005-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
@@ -30,9 +30,9 @@ typedef void (*xfs_ioend_func_t)(void *);
30typedef struct xfs_ioend { 30typedef struct xfs_ioend {
31 struct xfs_ioend *io_list; /* next ioend in chain */ 31 struct xfs_ioend *io_list; /* next ioend in chain */
32 unsigned int io_type; /* delalloc / unwritten */ 32 unsigned int io_type; /* delalloc / unwritten */
33 unsigned int io_uptodate; /* I/O status register */ 33 int io_error; /* I/O error code */
34 atomic_t io_remaining; /* hold count */ 34 atomic_t io_remaining; /* hold count */
35 struct vnode *io_vnode; /* file being written to */ 35 struct bhv_vnode *io_vnode; /* file being written to */
36 struct buffer_head *io_buffer_head;/* buffer linked list head */ 36 struct buffer_head *io_buffer_head;/* buffer linked list head */
37 struct buffer_head *io_buffer_tail;/* buffer linked list tail */ 37 struct buffer_head *io_buffer_tail;/* buffer linked list tail */
38 size_t io_size; /* size of the extent */ 38 size_t io_size; /* size of the extent */
@@ -40,7 +40,7 @@ typedef struct xfs_ioend {
40 struct work_struct io_work; /* xfsdatad work queue */ 40 struct work_struct io_work; /* xfsdatad work queue */
41} xfs_ioend_t; 41} xfs_ioend_t;
42 42
43extern struct address_space_operations xfs_address_space_operations; 43extern const struct address_space_operations xfs_address_space_operations;
44extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); 44extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
45 45
46#endif /* __XFS_IOPS_H__ */ 46#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 26fed0756f01..2af528dcfb04 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1520,7 +1520,7 @@ xfs_mapping_buftarg(
1520 struct backing_dev_info *bdi; 1520 struct backing_dev_info *bdi;
1521 struct inode *inode; 1521 struct inode *inode;
1522 struct address_space *mapping; 1522 struct address_space *mapping;
1523 static struct address_space_operations mapping_aops = { 1523 static const struct address_space_operations mapping_aops = {
1524 .sync_page = block_sync_page, 1524 .sync_page = block_sync_page,
1525 .migratepage = fail_migrate_page, 1525 .migratepage = fail_migrate_page,
1526 }; 1526 };
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 4dd6592d5a4c..ceda3a2859d2 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -18,7 +18,6 @@
18#ifndef __XFS_BUF_H__ 18#ifndef __XFS_BUF_H__
19#define __XFS_BUF_H__ 19#define __XFS_BUF_H__
20 20
21#include <linux/config.h>
22#include <linux/list.h> 21#include <linux/list.h>
23#include <linux/types.h> 22#include <linux/types.h>
24#include <linux/spinlock.h> 23#include <linux/spinlock.h>
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index b768ea910bbe..5fb75d9151f2 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -21,7 +21,6 @@
21#include "xfs_log.h" 21#include "xfs_log.h"
22#include "xfs_trans.h" 22#include "xfs_trans.h"
23#include "xfs_sb.h" 23#include "xfs_sb.h"
24#include "xfs_dir.h"
25#include "xfs_mount.h" 24#include "xfs_mount.h"
26#include "xfs_export.h" 25#include "xfs_export.h"
27 26
@@ -97,7 +96,7 @@ xfs_fs_encode_fh(
97 int len; 96 int len;
98 int is64 = 0; 97 int is64 = 0;
99#if XFS_BIG_INUMS 98#if XFS_BIG_INUMS
100 vfs_t *vfs = vfs_from_sb(inode->i_sb); 99 bhv_vfs_t *vfs = vfs_from_sb(inode->i_sb);
101 100
102 if (!(vfs->vfs_flag & VFS_32BITINODES)) { 101 if (!(vfs->vfs_flag & VFS_32BITINODES)) {
103 /* filesystem may contain 64bit inode numbers */ 102 /* filesystem may contain 64bit inode numbers */
@@ -136,13 +135,13 @@ xfs_fs_get_dentry(
136 struct super_block *sb, 135 struct super_block *sb,
137 void *data) 136 void *data)
138{ 137{
139 vnode_t *vp; 138 bhv_vnode_t *vp;
140 struct inode *inode; 139 struct inode *inode;
141 struct dentry *result; 140 struct dentry *result;
142 vfs_t *vfsp = vfs_from_sb(sb); 141 bhv_vfs_t *vfsp = vfs_from_sb(sb);
143 int error; 142 int error;
144 143
145 VFS_VGET(vfsp, &vp, (fid_t *)data, error); 144 error = bhv_vfs_vget(vfsp, &vp, (fid_t *)data);
146 if (error || vp == NULL) 145 if (error || vp == NULL)
147 return ERR_PTR(-ESTALE) ; 146 return ERR_PTR(-ESTALE) ;
148 147
@@ -160,12 +159,12 @@ xfs_fs_get_parent(
160 struct dentry *child) 159 struct dentry *child)
161{ 160{
162 int error; 161 int error;
163 vnode_t *vp, *cvp; 162 bhv_vnode_t *vp, *cvp;
164 struct dentry *parent; 163 struct dentry *parent;
165 164
166 cvp = NULL; 165 cvp = NULL;
167 vp = vn_from_inode(child->d_inode); 166 vp = vn_from_inode(child->d_inode);
168 VOP_LOOKUP(vp, &dotdot, &cvp, 0, NULL, NULL, error); 167 error = bhv_vop_lookup(vp, &dotdot, &cvp, 0, NULL, NULL);
169 if (unlikely(error)) 168 if (unlikely(error))
170 return ERR_PTR(-error); 169 return ERR_PTR(-error);
171 170
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index c847416f6d10..3d4f6dff2113 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -21,7 +21,6 @@
21#include "xfs_inum.h" 21#include "xfs_inum.h"
22#include "xfs_sb.h" 22#include "xfs_sb.h"
23#include "xfs_ag.h" 23#include "xfs_ag.h"
24#include "xfs_dir.h"
25#include "xfs_dir2.h" 24#include "xfs_dir2.h"
26#include "xfs_trans.h" 25#include "xfs_trans.h"
27#include "xfs_dmapi.h" 26#include "xfs_dmapi.h"
@@ -32,7 +31,6 @@
32#include "xfs_alloc.h" 31#include "xfs_alloc.h"
33#include "xfs_btree.h" 32#include "xfs_btree.h"
34#include "xfs_attr_sf.h" 33#include "xfs_attr_sf.h"
35#include "xfs_dir_sf.h"
36#include "xfs_dir2_sf.h" 34#include "xfs_dir2_sf.h"
37#include "xfs_dinode.h" 35#include "xfs_dinode.h"
38#include "xfs_inode.h" 36#include "xfs_inode.h"
@@ -58,15 +56,12 @@ __xfs_file_read(
58{ 56{
59 struct iovec iov = {buf, count}; 57 struct iovec iov = {buf, count};
60 struct file *file = iocb->ki_filp; 58 struct file *file = iocb->ki_filp;
61 vnode_t *vp = vn_from_inode(file->f_dentry->d_inode); 59 bhv_vnode_t *vp = vn_from_inode(file->f_dentry->d_inode);
62 ssize_t rval;
63 60
64 BUG_ON(iocb->ki_pos != pos); 61 BUG_ON(iocb->ki_pos != pos);
65
66 if (unlikely(file->f_flags & O_DIRECT)) 62 if (unlikely(file->f_flags & O_DIRECT))
67 ioflags |= IO_ISDIRECT; 63 ioflags |= IO_ISDIRECT;
68 VOP_READ(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval); 64 return bhv_vop_read(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL);
69 return rval;
70} 65}
71 66
72STATIC ssize_t 67STATIC ssize_t
@@ -100,15 +95,12 @@ __xfs_file_write(
100 struct iovec iov = {(void __user *)buf, count}; 95 struct iovec iov = {(void __user *)buf, count};
101 struct file *file = iocb->ki_filp; 96 struct file *file = iocb->ki_filp;
102 struct inode *inode = file->f_mapping->host; 97 struct inode *inode = file->f_mapping->host;
103 vnode_t *vp = vn_from_inode(inode); 98 bhv_vnode_t *vp = vn_from_inode(inode);
104 ssize_t rval;
105 99
106 BUG_ON(iocb->ki_pos != pos); 100 BUG_ON(iocb->ki_pos != pos);
107 if (unlikely(file->f_flags & O_DIRECT)) 101 if (unlikely(file->f_flags & O_DIRECT))
108 ioflags |= IO_ISDIRECT; 102 ioflags |= IO_ISDIRECT;
109 103 return bhv_vop_write(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL);
110 VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
111 return rval;
112} 104}
113 105
114STATIC ssize_t 106STATIC ssize_t
@@ -140,7 +132,7 @@ __xfs_file_readv(
140 loff_t *ppos) 132 loff_t *ppos)
141{ 133{
142 struct inode *inode = file->f_mapping->host; 134 struct inode *inode = file->f_mapping->host;
143 vnode_t *vp = vn_from_inode(inode); 135 bhv_vnode_t *vp = vn_from_inode(inode);
144 struct kiocb kiocb; 136 struct kiocb kiocb;
145 ssize_t rval; 137 ssize_t rval;
146 138
@@ -149,7 +141,8 @@ __xfs_file_readv(
149 141
150 if (unlikely(file->f_flags & O_DIRECT)) 142 if (unlikely(file->f_flags & O_DIRECT))
151 ioflags |= IO_ISDIRECT; 143 ioflags |= IO_ISDIRECT;
152 VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval); 144 rval = bhv_vop_read(vp, &kiocb, iov, nr_segs,
145 &kiocb.ki_pos, ioflags, NULL);
153 146
154 *ppos = kiocb.ki_pos; 147 *ppos = kiocb.ki_pos;
155 return rval; 148 return rval;
@@ -184,7 +177,7 @@ __xfs_file_writev(
184 loff_t *ppos) 177 loff_t *ppos)
185{ 178{
186 struct inode *inode = file->f_mapping->host; 179 struct inode *inode = file->f_mapping->host;
187 vnode_t *vp = vn_from_inode(inode); 180 bhv_vnode_t *vp = vn_from_inode(inode);
188 struct kiocb kiocb; 181 struct kiocb kiocb;
189 ssize_t rval; 182 ssize_t rval;
190 183
@@ -193,7 +186,8 @@ __xfs_file_writev(
193 if (unlikely(file->f_flags & O_DIRECT)) 186 if (unlikely(file->f_flags & O_DIRECT))
194 ioflags |= IO_ISDIRECT; 187 ioflags |= IO_ISDIRECT;
195 188
196 VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval); 189 rval = bhv_vop_write(vp, &kiocb, iov, nr_segs,
190 &kiocb.ki_pos, ioflags, NULL);
197 191
198 *ppos = kiocb.ki_pos; 192 *ppos = kiocb.ki_pos;
199 return rval; 193 return rval;
@@ -227,11 +221,8 @@ xfs_file_sendfile(
227 read_actor_t actor, 221 read_actor_t actor,
228 void *target) 222 void *target)
229{ 223{
230 vnode_t *vp = vn_from_inode(filp->f_dentry->d_inode); 224 return bhv_vop_sendfile(vn_from_inode(filp->f_dentry->d_inode),
231 ssize_t rval; 225 filp, pos, 0, count, actor, target, NULL);
232
233 VOP_SENDFILE(vp, filp, pos, 0, count, actor, target, NULL, rval);
234 return rval;
235} 226}
236 227
237STATIC ssize_t 228STATIC ssize_t
@@ -242,11 +233,8 @@ xfs_file_sendfile_invis(
242 read_actor_t actor, 233 read_actor_t actor,
243 void *target) 234 void *target)
244{ 235{
245 vnode_t *vp = vn_from_inode(filp->f_dentry->d_inode); 236 return bhv_vop_sendfile(vn_from_inode(filp->f_dentry->d_inode),
246 ssize_t rval; 237 filp, pos, IO_INVIS, count, actor, target, NULL);
247
248 VOP_SENDFILE(vp, filp, pos, IO_INVIS, count, actor, target, NULL, rval);
249 return rval;
250} 238}
251 239
252STATIC ssize_t 240STATIC ssize_t
@@ -257,11 +245,8 @@ xfs_file_splice_read(
257 size_t len, 245 size_t len,
258 unsigned int flags) 246 unsigned int flags)
259{ 247{
260 vnode_t *vp = vn_from_inode(infilp->f_dentry->d_inode); 248 return bhv_vop_splice_read(vn_from_inode(infilp->f_dentry->d_inode),
261 ssize_t rval; 249 infilp, ppos, pipe, len, flags, 0, NULL);
262
263 VOP_SPLICE_READ(vp, infilp, ppos, pipe, len, flags, 0, NULL, rval);
264 return rval;
265} 250}
266 251
267STATIC ssize_t 252STATIC ssize_t
@@ -272,11 +257,9 @@ xfs_file_splice_read_invis(
272 size_t len, 257 size_t len,
273 unsigned int flags) 258 unsigned int flags)
274{ 259{
275 vnode_t *vp = vn_from_inode(infilp->f_dentry->d_inode); 260 return bhv_vop_splice_read(vn_from_inode(infilp->f_dentry->d_inode),
276 ssize_t rval; 261 infilp, ppos, pipe, len, flags, IO_INVIS,
277 262 NULL);
278 VOP_SPLICE_READ(vp, infilp, ppos, pipe, len, flags, IO_INVIS, NULL, rval);
279 return rval;
280} 263}
281 264
282STATIC ssize_t 265STATIC ssize_t
@@ -287,11 +270,8 @@ xfs_file_splice_write(
287 size_t len, 270 size_t len,
288 unsigned int flags) 271 unsigned int flags)
289{ 272{
290 vnode_t *vp = vn_from_inode(outfilp->f_dentry->d_inode); 273 return bhv_vop_splice_write(vn_from_inode(outfilp->f_dentry->d_inode),
291 ssize_t rval; 274 pipe, outfilp, ppos, len, flags, 0, NULL);
292
293 VOP_SPLICE_WRITE(vp, pipe, outfilp, ppos, len, flags, 0, NULL, rval);
294 return rval;
295} 275}
296 276
297STATIC ssize_t 277STATIC ssize_t
@@ -302,11 +282,9 @@ xfs_file_splice_write_invis(
302 size_t len, 282 size_t len,
303 unsigned int flags) 283 unsigned int flags)
304{ 284{
305 vnode_t *vp = vn_from_inode(outfilp->f_dentry->d_inode); 285 return bhv_vop_splice_write(vn_from_inode(outfilp->f_dentry->d_inode),
306 ssize_t rval; 286 pipe, outfilp, ppos, len, flags, IO_INVIS,
307 287 NULL);
308 VOP_SPLICE_WRITE(vp, pipe, outfilp, ppos, len, flags, IO_INVIS, NULL, rval);
309 return rval;
310} 288}
311 289
312STATIC int 290STATIC int
@@ -314,13 +292,18 @@ xfs_file_open(
314 struct inode *inode, 292 struct inode *inode,
315 struct file *filp) 293 struct file *filp)
316{ 294{
317 vnode_t *vp = vn_from_inode(inode);
318 int error;
319
320 if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) 295 if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
321 return -EFBIG; 296 return -EFBIG;
322 VOP_OPEN(vp, NULL, error); 297 return -bhv_vop_open(vn_from_inode(inode), NULL);
323 return -error; 298}
299
300STATIC int
301xfs_file_close(
302 struct file *filp,
303 fl_owner_t id)
304{
305 return -bhv_vop_close(vn_from_inode(filp->f_dentry->d_inode), 0,
306 file_count(filp) > 1 ? L_FALSE : L_TRUE, NULL);
324} 307}
325 308
326STATIC int 309STATIC int
@@ -328,12 +311,11 @@ xfs_file_release(
328 struct inode *inode, 311 struct inode *inode,
329 struct file *filp) 312 struct file *filp)
330{ 313{
331 vnode_t *vp = vn_from_inode(inode); 314 bhv_vnode_t *vp = vn_from_inode(inode);
332 int error = 0;
333 315
334 if (vp) 316 if (vp)
335 VOP_RELEASE(vp, error); 317 return -bhv_vop_release(vp);
336 return -error; 318 return 0;
337} 319}
338 320
339STATIC int 321STATIC int
@@ -342,15 +324,14 @@ xfs_file_fsync(
342 struct dentry *dentry, 324 struct dentry *dentry,
343 int datasync) 325 int datasync)
344{ 326{
345 struct inode *inode = dentry->d_inode; 327 bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
346 vnode_t *vp = vn_from_inode(inode);
347 int error;
348 int flags = FSYNC_WAIT; 328 int flags = FSYNC_WAIT;
349 329
350 if (datasync) 330 if (datasync)
351 flags |= FSYNC_DATA; 331 flags |= FSYNC_DATA;
352 VOP_FSYNC(vp, flags, NULL, (xfs_off_t)0, (xfs_off_t)-1, error); 332 if (VN_TRUNC(vp))
353 return -error; 333 VUNTRUNCATE(vp);
334 return -bhv_vop_fsync(vp, flags, NULL, (xfs_off_t)0, (xfs_off_t)-1);
354} 335}
355 336
356#ifdef CONFIG_XFS_DMAPI 337#ifdef CONFIG_XFS_DMAPI
@@ -361,16 +342,11 @@ xfs_vm_nopage(
361 int *type) 342 int *type)
362{ 343{
363 struct inode *inode = area->vm_file->f_dentry->d_inode; 344 struct inode *inode = area->vm_file->f_dentry->d_inode;
364 vnode_t *vp = vn_from_inode(inode); 345 bhv_vnode_t *vp = vn_from_inode(inode);
365 xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);
366 int error;
367 346
368 ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI); 347 ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
369 348 if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), area, 0))
370 error = XFS_SEND_MMAP(mp, area, 0);
371 if (error)
372 return NULL; 349 return NULL;
373
374 return filemap_nopage(area, address, type); 350 return filemap_nopage(area, address, type);
375} 351}
376#endif /* CONFIG_XFS_DMAPI */ 352#endif /* CONFIG_XFS_DMAPI */
@@ -382,7 +358,7 @@ xfs_file_readdir(
382 filldir_t filldir) 358 filldir_t filldir)
383{ 359{
384 int error = 0; 360 int error = 0;
385 vnode_t *vp = vn_from_inode(filp->f_dentry->d_inode); 361 bhv_vnode_t *vp = vn_from_inode(filp->f_dentry->d_inode);
386 uio_t uio; 362 uio_t uio;
387 iovec_t iov; 363 iovec_t iov;
388 int eof = 0; 364 int eof = 0;
@@ -417,7 +393,7 @@ xfs_file_readdir(
417 393
418 start_offset = uio.uio_offset; 394 start_offset = uio.uio_offset;
419 395
420 VOP_READDIR(vp, &uio, NULL, &eof, error); 396 error = bhv_vop_readdir(vp, &uio, NULL, &eof);
421 if ((uio.uio_offset == start_offset) || error) { 397 if ((uio.uio_offset == start_offset) || error) {
422 size = 0; 398 size = 0;
423 break; 399 break;
@@ -456,38 +432,28 @@ xfs_file_mmap(
456 struct file *filp, 432 struct file *filp,
457 struct vm_area_struct *vma) 433 struct vm_area_struct *vma)
458{ 434{
459 struct inode *ip = filp->f_dentry->d_inode;
460 vnode_t *vp = vn_from_inode(ip);
461 vattr_t vattr;
462 int error;
463
464 vma->vm_ops = &xfs_file_vm_ops; 435 vma->vm_ops = &xfs_file_vm_ops;
465 436
466#ifdef CONFIG_XFS_DMAPI 437#ifdef CONFIG_XFS_DMAPI
467 if (vp->v_vfsp->vfs_flag & VFS_DMI) { 438 if (vn_from_inode(filp->f_dentry->d_inode)->v_vfsp->vfs_flag & VFS_DMI)
468 vma->vm_ops = &xfs_dmapi_file_vm_ops; 439 vma->vm_ops = &xfs_dmapi_file_vm_ops;
469 }
470#endif /* CONFIG_XFS_DMAPI */ 440#endif /* CONFIG_XFS_DMAPI */
471 441
472 vattr.va_mask = XFS_AT_UPDATIME; 442 file_accessed(filp);
473 VOP_SETATTR(vp, &vattr, XFS_AT_UPDATIME, NULL, error);
474 if (likely(!error))
475 __vn_revalidate(vp, &vattr); /* update flags */
476 return 0; 443 return 0;
477} 444}
478 445
479
480STATIC long 446STATIC long
481xfs_file_ioctl( 447xfs_file_ioctl(
482 struct file *filp, 448 struct file *filp,
483 unsigned int cmd, 449 unsigned int cmd,
484 unsigned long arg) 450 unsigned long p)
485{ 451{
486 int error; 452 int error;
487 struct inode *inode = filp->f_dentry->d_inode; 453 struct inode *inode = filp->f_dentry->d_inode;
488 vnode_t *vp = vn_from_inode(inode); 454 bhv_vnode_t *vp = vn_from_inode(inode);
489 455
490 VOP_IOCTL(vp, inode, filp, 0, cmd, (void __user *)arg, error); 456 error = bhv_vop_ioctl(vp, inode, filp, 0, cmd, (void __user *)p);
491 VMODIFY(vp); 457 VMODIFY(vp);
492 458
493 /* NOTE: some of the ioctl's return positive #'s as a 459 /* NOTE: some of the ioctl's return positive #'s as a
@@ -503,13 +469,13 @@ STATIC long
503xfs_file_ioctl_invis( 469xfs_file_ioctl_invis(
504 struct file *filp, 470 struct file *filp,
505 unsigned int cmd, 471 unsigned int cmd,
506 unsigned long arg) 472 unsigned long p)
507{ 473{
508 struct inode *inode = filp->f_dentry->d_inode;
509 vnode_t *vp = vn_from_inode(inode);
510 int error; 474 int error;
475 struct inode *inode = filp->f_dentry->d_inode;
476 bhv_vnode_t *vp = vn_from_inode(inode);
511 477
512 VOP_IOCTL(vp, inode, filp, IO_INVIS, cmd, (void __user *)arg, error); 478 error = bhv_vop_ioctl(vp, inode, filp, IO_INVIS, cmd, (void __user *)p);
513 VMODIFY(vp); 479 VMODIFY(vp);
514 480
515 /* NOTE: some of the ioctl's return positive #'s as a 481 /* NOTE: some of the ioctl's return positive #'s as a
@@ -528,7 +494,7 @@ xfs_vm_mprotect(
528 struct vm_area_struct *vma, 494 struct vm_area_struct *vma,
529 unsigned int newflags) 495 unsigned int newflags)
530{ 496{
531 vnode_t *vp = vn_from_inode(vma->vm_file->f_dentry->d_inode); 497 bhv_vnode_t *vp = vn_from_inode(vma->vm_file->f_dentry->d_inode);
532 int error = 0; 498 int error = 0;
533 499
534 if (vp->v_vfsp->vfs_flag & VFS_DMI) { 500 if (vp->v_vfsp->vfs_flag & VFS_DMI) {
@@ -554,24 +520,19 @@ STATIC int
554xfs_file_open_exec( 520xfs_file_open_exec(
555 struct inode *inode) 521 struct inode *inode)
556{ 522{
557 vnode_t *vp = vn_from_inode(inode); 523 bhv_vnode_t *vp = vn_from_inode(inode);
558 xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);
559 int error = 0;
560 xfs_inode_t *ip;
561 524
562 if (vp->v_vfsp->vfs_flag & VFS_DMI) { 525 if (unlikely(vp->v_vfsp->vfs_flag & VFS_DMI)) {
563 ip = xfs_vtoi(vp); 526 xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);
564 if (!ip) { 527 xfs_inode_t *ip = xfs_vtoi(vp);
565 error = -EINVAL; 528
566 goto open_exec_out; 529 if (!ip)
567 } 530 return -EINVAL;
568 if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) { 531 if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ))
569 error = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp, 532 return -XFS_SEND_DATA(mp, DM_EVENT_READ, vp,
570 0, 0, 0, NULL); 533 0, 0, 0, NULL);
571 }
572 } 534 }
573open_exec_out: 535 return 0;
574 return error;
575} 536}
576#endif /* HAVE_FOP_OPEN_EXEC */ 537#endif /* HAVE_FOP_OPEN_EXEC */
577 538
@@ -592,6 +553,7 @@ const struct file_operations xfs_file_operations = {
592#endif 553#endif
593 .mmap = xfs_file_mmap, 554 .mmap = xfs_file_mmap,
594 .open = xfs_file_open, 555 .open = xfs_file_open,
556 .flush = xfs_file_close,
595 .release = xfs_file_release, 557 .release = xfs_file_release,
596 .fsync = xfs_file_fsync, 558 .fsync = xfs_file_fsync,
597#ifdef HAVE_FOP_OPEN_EXEC 559#ifdef HAVE_FOP_OPEN_EXEC
@@ -616,6 +578,7 @@ const struct file_operations xfs_invis_file_operations = {
616#endif 578#endif
617 .mmap = xfs_file_mmap, 579 .mmap = xfs_file_mmap,
618 .open = xfs_file_open, 580 .open = xfs_file_open,
581 .flush = xfs_file_close,
619 .release = xfs_file_release, 582 .release = xfs_file_release,
620 .fsync = xfs_file_fsync, 583 .fsync = xfs_file_fsync,
621}; 584};
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 575f2a790f31..dc0562828e76 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2002,2005-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
@@ -15,40 +15,12 @@
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18
19#include "xfs.h" 18#include "xfs.h"
20 19
21/* 20int fs_noerr(void) { return 0; }
22 * Stub for no-op vnode operations that return error status. 21int fs_nosys(void) { return ENOSYS; }
23 */ 22void fs_noval(void) { return; }
24int
25fs_noerr(void)
26{
27 return 0;
28}
29 23
30/*
31 * Operation unsupported under this file system.
32 */
33int
34fs_nosys(void)
35{
36 return ENOSYS;
37}
38
39/*
40 * Stub for inactive, strategy, and read/write lock/unlock. Does nothing.
41 */
42/* ARGSUSED */
43void
44fs_noval(void)
45{
46}
47
48/*
49 * vnode pcache layer for vnode_tosspages.
50 * 'last' parameter unused but left in for IRIX compatibility
51 */
52void 24void
53fs_tosspages( 25fs_tosspages(
54 bhv_desc_t *bdp, 26 bhv_desc_t *bdp,
@@ -56,18 +28,13 @@ fs_tosspages(
56 xfs_off_t last, 28 xfs_off_t last,
57 int fiopt) 29 int fiopt)
58{ 30{
59 vnode_t *vp = BHV_TO_VNODE(bdp); 31 bhv_vnode_t *vp = BHV_TO_VNODE(bdp);
60 struct inode *ip = vn_to_inode(vp); 32 struct inode *ip = vn_to_inode(vp);
61 33
62 if (VN_CACHED(vp)) 34 if (VN_CACHED(vp))
63 truncate_inode_pages(ip->i_mapping, first); 35 truncate_inode_pages(ip->i_mapping, first);
64} 36}
65 37
66
67/*
68 * vnode pcache layer for vnode_flushinval_pages.
69 * 'last' parameter unused but left in for IRIX compatibility
70 */
71void 38void
72fs_flushinval_pages( 39fs_flushinval_pages(
73 bhv_desc_t *bdp, 40 bhv_desc_t *bdp,
@@ -75,20 +42,17 @@ fs_flushinval_pages(
75 xfs_off_t last, 42 xfs_off_t last,
76 int fiopt) 43 int fiopt)
77{ 44{
78 vnode_t *vp = BHV_TO_VNODE(bdp); 45 bhv_vnode_t *vp = BHV_TO_VNODE(bdp);
79 struct inode *ip = vn_to_inode(vp); 46 struct inode *ip = vn_to_inode(vp);
80 47
81 if (VN_CACHED(vp)) { 48 if (VN_CACHED(vp)) {
49 if (VN_TRUNC(vp))
50 VUNTRUNCATE(vp);
82 filemap_write_and_wait(ip->i_mapping); 51 filemap_write_and_wait(ip->i_mapping);
83
84 truncate_inode_pages(ip->i_mapping, first); 52 truncate_inode_pages(ip->i_mapping, first);
85 } 53 }
86} 54}
87 55
88/*
89 * vnode pcache layer for vnode_flush_pages.
90 * 'last' parameter unused but left in for IRIX compatibility
91 */
92int 56int
93fs_flush_pages( 57fs_flush_pages(
94 bhv_desc_t *bdp, 58 bhv_desc_t *bdp,
@@ -97,15 +61,16 @@ fs_flush_pages(
97 uint64_t flags, 61 uint64_t flags,
98 int fiopt) 62 int fiopt)
99{ 63{
100 vnode_t *vp = BHV_TO_VNODE(bdp); 64 bhv_vnode_t *vp = BHV_TO_VNODE(bdp);
101 struct inode *ip = vn_to_inode(vp); 65 struct inode *ip = vn_to_inode(vp);
102 66
103 if (VN_CACHED(vp)) { 67 if (VN_DIRTY(vp)) {
68 if (VN_TRUNC(vp))
69 VUNTRUNCATE(vp);
104 filemap_fdatawrite(ip->i_mapping); 70 filemap_fdatawrite(ip->i_mapping);
105 if (flags & XFS_B_ASYNC) 71 if (flags & XFS_B_ASYNC)
106 return 0; 72 return 0;
107 filemap_fdatawait(ip->i_mapping); 73 filemap_fdatawait(ip->i_mapping);
108 } 74 }
109
110 return 0; 75 return 0;
111} 76}
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index 6e8085f34635..6c162c3dde7e 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -45,6 +45,7 @@ xfs_param_t xfs_params = {
45 .xfs_buf_age = { 1*100, 15*100, 7200*100}, 45 .xfs_buf_age = { 1*100, 15*100, 7200*100},
46 .inherit_nosym = { 0, 0, 1 }, 46 .inherit_nosym = { 0, 0, 1 },
47 .rotorstep = { 1, 1, 255 }, 47 .rotorstep = { 1, 1, 255 },
48 .inherit_nodfrg = { 0, 1, 1 },
48}; 49};
49 50
50/* 51/*
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 84478491609b..6e52a5dd38d8 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -23,7 +23,6 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir.h"
27#include "xfs_dir2.h" 26#include "xfs_dir2.h"
28#include "xfs_alloc.h" 27#include "xfs_alloc.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
@@ -31,7 +30,6 @@
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 32#include "xfs_ialloc_btree.h"
34#include "xfs_dir_sf.h"
35#include "xfs_attr_sf.h" 33#include "xfs_attr_sf.h"
36#include "xfs_dir2_sf.h" 34#include "xfs_dir2_sf.h"
37#include "xfs_dinode.h" 35#include "xfs_dinode.h"
@@ -78,7 +76,7 @@ xfs_find_handle(
78 xfs_handle_t handle; 76 xfs_handle_t handle;
79 xfs_fsop_handlereq_t hreq; 77 xfs_fsop_handlereq_t hreq;
80 struct inode *inode; 78 struct inode *inode;
81 struct vnode *vp; 79 bhv_vnode_t *vp;
82 80
83 if (copy_from_user(&hreq, arg, sizeof(hreq))) 81 if (copy_from_user(&hreq, arg, sizeof(hreq)))
84 return -XFS_ERROR(EFAULT); 82 return -XFS_ERROR(EFAULT);
@@ -192,7 +190,7 @@ xfs_vget_fsop_handlereq(
192 xfs_mount_t *mp, 190 xfs_mount_t *mp,
193 struct inode *parinode, /* parent inode pointer */ 191 struct inode *parinode, /* parent inode pointer */
194 xfs_fsop_handlereq_t *hreq, 192 xfs_fsop_handlereq_t *hreq,
195 vnode_t **vp, 193 bhv_vnode_t **vp,
196 struct inode **inode) 194 struct inode **inode)
197{ 195{
198 void __user *hanp; 196 void __user *hanp;
@@ -202,7 +200,7 @@ xfs_vget_fsop_handlereq(
202 xfs_handle_t handle; 200 xfs_handle_t handle;
203 xfs_inode_t *ip; 201 xfs_inode_t *ip;
204 struct inode *inodep; 202 struct inode *inodep;
205 vnode_t *vpp; 203 bhv_vnode_t *vpp;
206 xfs_ino_t ino; 204 xfs_ino_t ino;
207 __u32 igen; 205 __u32 igen;
208 int error; 206 int error;
@@ -277,7 +275,7 @@ xfs_open_by_handle(
277 struct file *filp; 275 struct file *filp;
278 struct inode *inode; 276 struct inode *inode;
279 struct dentry *dentry; 277 struct dentry *dentry;
280 vnode_t *vp; 278 bhv_vnode_t *vp;
281 xfs_fsop_handlereq_t hreq; 279 xfs_fsop_handlereq_t hreq;
282 280
283 if (!capable(CAP_SYS_ADMIN)) 281 if (!capable(CAP_SYS_ADMIN))
@@ -362,7 +360,7 @@ xfs_readlink_by_handle(
362 struct uio auio; 360 struct uio auio;
363 struct inode *inode; 361 struct inode *inode;
364 xfs_fsop_handlereq_t hreq; 362 xfs_fsop_handlereq_t hreq;
365 vnode_t *vp; 363 bhv_vnode_t *vp;
366 __u32 olen; 364 __u32 olen;
367 365
368 if (!capable(CAP_SYS_ADMIN)) 366 if (!capable(CAP_SYS_ADMIN))
@@ -393,9 +391,11 @@ xfs_readlink_by_handle(
393 auio.uio_segflg = UIO_USERSPACE; 391 auio.uio_segflg = UIO_USERSPACE;
394 auio.uio_resid = olen; 392 auio.uio_resid = olen;
395 393
396 VOP_READLINK(vp, &auio, IO_INVIS, NULL, error); 394 error = bhv_vop_readlink(vp, &auio, IO_INVIS, NULL);
397
398 VN_RELE(vp); 395 VN_RELE(vp);
396 if (error)
397 return -error;
398
399 return (olen - auio.uio_resid); 399 return (olen - auio.uio_resid);
400} 400}
401 401
@@ -411,7 +411,7 @@ xfs_fssetdm_by_handle(
411 xfs_fsop_setdm_handlereq_t dmhreq; 411 xfs_fsop_setdm_handlereq_t dmhreq;
412 struct inode *inode; 412 struct inode *inode;
413 bhv_desc_t *bdp; 413 bhv_desc_t *bdp;
414 vnode_t *vp; 414 bhv_vnode_t *vp;
415 415
416 if (!capable(CAP_MKNOD)) 416 if (!capable(CAP_MKNOD))
417 return -XFS_ERROR(EPERM); 417 return -XFS_ERROR(EPERM);
@@ -452,7 +452,7 @@ xfs_attrlist_by_handle(
452 attrlist_cursor_kern_t *cursor; 452 attrlist_cursor_kern_t *cursor;
453 xfs_fsop_attrlist_handlereq_t al_hreq; 453 xfs_fsop_attrlist_handlereq_t al_hreq;
454 struct inode *inode; 454 struct inode *inode;
455 vnode_t *vp; 455 bhv_vnode_t *vp;
456 char *kbuf; 456 char *kbuf;
457 457
458 if (!capable(CAP_SYS_ADMIN)) 458 if (!capable(CAP_SYS_ADMIN))
@@ -472,8 +472,8 @@ xfs_attrlist_by_handle(
472 goto out_vn_rele; 472 goto out_vn_rele;
473 473
474 cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; 474 cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
475 VOP_ATTR_LIST(vp, kbuf, al_hreq.buflen, al_hreq.flags, 475 error = bhv_vop_attr_list(vp, kbuf, al_hreq.buflen, al_hreq.flags,
476 cursor, NULL, error); 476 cursor, NULL);
477 if (error) 477 if (error)
478 goto out_kfree; 478 goto out_kfree;
479 479
@@ -490,7 +490,7 @@ xfs_attrlist_by_handle(
490 490
491STATIC int 491STATIC int
492xfs_attrmulti_attr_get( 492xfs_attrmulti_attr_get(
493 struct vnode *vp, 493 bhv_vnode_t *vp,
494 char *name, 494 char *name,
495 char __user *ubuf, 495 char __user *ubuf,
496 __uint32_t *len, 496 __uint32_t *len,
@@ -505,7 +505,7 @@ xfs_attrmulti_attr_get(
505 if (!kbuf) 505 if (!kbuf)
506 return ENOMEM; 506 return ENOMEM;
507 507
508 VOP_ATTR_GET(vp, name, kbuf, len, flags, NULL, error); 508 error = bhv_vop_attr_get(vp, name, kbuf, len, flags, NULL);
509 if (error) 509 if (error)
510 goto out_kfree; 510 goto out_kfree;
511 511
@@ -519,7 +519,7 @@ xfs_attrmulti_attr_get(
519 519
520STATIC int 520STATIC int
521xfs_attrmulti_attr_set( 521xfs_attrmulti_attr_set(
522 struct vnode *vp, 522 bhv_vnode_t *vp,
523 char *name, 523 char *name,
524 const char __user *ubuf, 524 const char __user *ubuf,
525 __uint32_t len, 525 __uint32_t len,
@@ -542,7 +542,7 @@ xfs_attrmulti_attr_set(
542 if (copy_from_user(kbuf, ubuf, len)) 542 if (copy_from_user(kbuf, ubuf, len))
543 goto out_kfree; 543 goto out_kfree;
544 544
545 VOP_ATTR_SET(vp, name, kbuf, len, flags, NULL, error); 545 error = bhv_vop_attr_set(vp, name, kbuf, len, flags, NULL);
546 546
547 out_kfree: 547 out_kfree:
548 kfree(kbuf); 548 kfree(kbuf);
@@ -551,20 +551,15 @@ xfs_attrmulti_attr_set(
551 551
552STATIC int 552STATIC int
553xfs_attrmulti_attr_remove( 553xfs_attrmulti_attr_remove(
554 struct vnode *vp, 554 bhv_vnode_t *vp,
555 char *name, 555 char *name,
556 __uint32_t flags) 556 __uint32_t flags)
557{ 557{
558 int error;
559
560
561 if (IS_RDONLY(&vp->v_inode)) 558 if (IS_RDONLY(&vp->v_inode))
562 return -EROFS; 559 return -EROFS;
563 if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode)) 560 if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode))
564 return EPERM; 561 return EPERM;
565 562 return bhv_vop_attr_remove(vp, name, flags, NULL);
566 VOP_ATTR_REMOVE(vp, name, flags, NULL, error);
567 return error;
568} 563}
569 564
570STATIC int 565STATIC int
@@ -578,7 +573,7 @@ xfs_attrmulti_by_handle(
578 xfs_attr_multiop_t *ops; 573 xfs_attr_multiop_t *ops;
579 xfs_fsop_attrmulti_handlereq_t am_hreq; 574 xfs_fsop_attrmulti_handlereq_t am_hreq;
580 struct inode *inode; 575 struct inode *inode;
581 vnode_t *vp; 576 bhv_vnode_t *vp;
582 unsigned int i, size; 577 unsigned int i, size;
583 char *attr_name; 578 char *attr_name;
584 579
@@ -658,7 +653,7 @@ xfs_attrmulti_by_handle(
658STATIC int 653STATIC int
659xfs_ioc_space( 654xfs_ioc_space(
660 bhv_desc_t *bdp, 655 bhv_desc_t *bdp,
661 vnode_t *vp, 656 bhv_vnode_t *vp,
662 struct file *filp, 657 struct file *filp,
663 int flags, 658 int flags,
664 unsigned int cmd, 659 unsigned int cmd,
@@ -682,7 +677,7 @@ xfs_ioc_fsgeometry(
682 677
683STATIC int 678STATIC int
684xfs_ioc_xattr( 679xfs_ioc_xattr(
685 vnode_t *vp, 680 bhv_vnode_t *vp,
686 xfs_inode_t *ip, 681 xfs_inode_t *ip,
687 struct file *filp, 682 struct file *filp,
688 unsigned int cmd, 683 unsigned int cmd,
@@ -711,7 +706,7 @@ xfs_ioctl(
711 void __user *arg) 706 void __user *arg)
712{ 707{
713 int error; 708 int error;
714 vnode_t *vp; 709 bhv_vnode_t *vp;
715 xfs_inode_t *ip; 710 xfs_inode_t *ip;
716 xfs_mount_t *mp; 711 xfs_mount_t *mp;
717 712
@@ -962,7 +957,7 @@ xfs_ioctl(
962STATIC int 957STATIC int
963xfs_ioc_space( 958xfs_ioc_space(
964 bhv_desc_t *bdp, 959 bhv_desc_t *bdp,
965 vnode_t *vp, 960 bhv_vnode_t *vp,
966 struct file *filp, 961 struct file *filp,
967 int ioflags, 962 int ioflags,
968 unsigned int cmd, 963 unsigned int cmd,
@@ -1153,14 +1148,14 @@ xfs_di2lxflags(
1153 1148
1154STATIC int 1149STATIC int
1155xfs_ioc_xattr( 1150xfs_ioc_xattr(
1156 vnode_t *vp, 1151 bhv_vnode_t *vp,
1157 xfs_inode_t *ip, 1152 xfs_inode_t *ip,
1158 struct file *filp, 1153 struct file *filp,
1159 unsigned int cmd, 1154 unsigned int cmd,
1160 void __user *arg) 1155 void __user *arg)
1161{ 1156{
1162 struct fsxattr fa; 1157 struct fsxattr fa;
1163 struct vattr *vattr; 1158 struct bhv_vattr *vattr;
1164 int error = 0; 1159 int error = 0;
1165 int attr_flags; 1160 int attr_flags;
1166 unsigned int flags; 1161 unsigned int flags;
@@ -1173,7 +1168,7 @@ xfs_ioc_xattr(
1173 case XFS_IOC_FSGETXATTR: { 1168 case XFS_IOC_FSGETXATTR: {
1174 vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \ 1169 vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \
1175 XFS_AT_NEXTENTS | XFS_AT_PROJID; 1170 XFS_AT_NEXTENTS | XFS_AT_PROJID;
1176 VOP_GETATTR(vp, vattr, 0, NULL, error); 1171 error = bhv_vop_getattr(vp, vattr, 0, NULL);
1177 if (unlikely(error)) { 1172 if (unlikely(error)) {
1178 error = -error; 1173 error = -error;
1179 break; 1174 break;
@@ -1206,7 +1201,7 @@ xfs_ioc_xattr(
1206 vattr->va_extsize = fa.fsx_extsize; 1201 vattr->va_extsize = fa.fsx_extsize;
1207 vattr->va_projid = fa.fsx_projid; 1202 vattr->va_projid = fa.fsx_projid;
1208 1203
1209 VOP_SETATTR(vp, vattr, attr_flags, NULL, error); 1204 error = bhv_vop_setattr(vp, vattr, attr_flags, NULL);
1210 if (likely(!error)) 1205 if (likely(!error))
1211 __vn_revalidate(vp, vattr); /* update flags */ 1206 __vn_revalidate(vp, vattr); /* update flags */
1212 error = -error; 1207 error = -error;
@@ -1216,7 +1211,7 @@ xfs_ioc_xattr(
1216 case XFS_IOC_FSGETXATTRA: { 1211 case XFS_IOC_FSGETXATTRA: {
1217 vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \ 1212 vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \
1218 XFS_AT_ANEXTENTS | XFS_AT_PROJID; 1213 XFS_AT_ANEXTENTS | XFS_AT_PROJID;
1219 VOP_GETATTR(vp, vattr, 0, NULL, error); 1214 error = bhv_vop_getattr(vp, vattr, 0, NULL);
1220 if (unlikely(error)) { 1215 if (unlikely(error)) {
1221 error = -error; 1216 error = -error;
1222 break; 1217 break;
@@ -1262,7 +1257,7 @@ xfs_ioc_xattr(
1262 vattr->va_xflags = xfs_merge_ioc_xflags(flags, 1257 vattr->va_xflags = xfs_merge_ioc_xflags(flags,
1263 xfs_ip2xflags(ip)); 1258 xfs_ip2xflags(ip));
1264 1259
1265 VOP_SETATTR(vp, vattr, attr_flags, NULL, error); 1260 error = bhv_vop_setattr(vp, vattr, attr_flags, NULL);
1266 if (likely(!error)) 1261 if (likely(!error))
1267 __vn_revalidate(vp, vattr); /* update flags */ 1262 __vn_revalidate(vp, vattr); /* update flags */
1268 error = -error; 1263 error = -error;
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 251bfe451a3f..270db0f3861d 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -15,7 +15,6 @@
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18#include <linux/config.h>
19#include <linux/compat.h> 18#include <linux/compat.h>
20#include <linux/init.h> 19#include <linux/init.h>
21#include <linux/ioctl.h> 20#include <linux/ioctl.h>
@@ -114,7 +113,7 @@ xfs_compat_ioctl(
114 unsigned long arg) 113 unsigned long arg)
115{ 114{
116 struct inode *inode = file->f_dentry->d_inode; 115 struct inode *inode = file->f_dentry->d_inode;
117 vnode_t *vp = vn_from_inode(inode); 116 bhv_vnode_t *vp = vn_from_inode(inode);
118 int error; 117 int error;
119 118
120 switch (cmd) { 119 switch (cmd) {
@@ -193,7 +192,7 @@ xfs_compat_ioctl(
193 return -ENOIOCTLCMD; 192 return -ENOIOCTLCMD;
194 } 193 }
195 194
196 VOP_IOCTL(vp, inode, file, mode, cmd, (void __user *)arg, error); 195 error = bhv_vop_ioctl(vp, inode, file, mode, cmd, (void __user *)arg);
197 VMODIFY(vp); 196 VMODIFY(vp);
198 197
199 return error; 198 return error;
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 2e2e275c786f..d9180020de63 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -23,7 +23,6 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir.h"
27#include "xfs_dir2.h" 26#include "xfs_dir2.h"
28#include "xfs_alloc.h" 27#include "xfs_alloc.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
@@ -32,7 +31,6 @@
32#include "xfs_bmap_btree.h" 31#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h" 32#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h" 33#include "xfs_ialloc_btree.h"
35#include "xfs_dir_sf.h"
36#include "xfs_dir2_sf.h" 34#include "xfs_dir2_sf.h"
37#include "xfs_attr_sf.h" 35#include "xfs_attr_sf.h"
38#include "xfs_dinode.h" 36#include "xfs_dinode.h"
@@ -61,7 +59,7 @@
61 */ 59 */
62xfs_inode_t * 60xfs_inode_t *
63xfs_vtoi( 61xfs_vtoi(
64 struct vnode *vp) 62 bhv_vnode_t *vp)
65{ 63{
66 bhv_desc_t *bdp; 64 bhv_desc_t *bdp;
67 65
@@ -80,7 +78,7 @@ void
80xfs_synchronize_atime( 78xfs_synchronize_atime(
81 xfs_inode_t *ip) 79 xfs_inode_t *ip)
82{ 80{
83 vnode_t *vp; 81 bhv_vnode_t *vp;
84 82
85 vp = XFS_ITOV_NULL(ip); 83 vp = XFS_ITOV_NULL(ip);
86 if (vp) { 84 if (vp) {
@@ -200,14 +198,10 @@ xfs_ichgtime_fast(
200STATIC void 198STATIC void
201xfs_validate_fields( 199xfs_validate_fields(
202 struct inode *ip, 200 struct inode *ip,
203 struct vattr *vattr) 201 bhv_vattr_t *vattr)
204{ 202{
205 vnode_t *vp = vn_from_inode(ip);
206 int error;
207
208 vattr->va_mask = XFS_AT_NLINK|XFS_AT_SIZE|XFS_AT_NBLOCKS; 203 vattr->va_mask = XFS_AT_NLINK|XFS_AT_SIZE|XFS_AT_NBLOCKS;
209 VOP_GETATTR(vp, vattr, ATTR_LAZY, NULL, error); 204 if (!bhv_vop_getattr(vn_from_inode(ip), vattr, ATTR_LAZY, NULL)) {
210 if (likely(!error)) {
211 ip->i_nlink = vattr->va_nlink; 205 ip->i_nlink = vattr->va_nlink;
212 ip->i_blocks = vattr->va_nblocks; 206 ip->i_blocks = vattr->va_nblocks;
213 207
@@ -225,7 +219,7 @@ xfs_validate_fields(
225 */ 219 */
226STATIC int 220STATIC int
227xfs_init_security( 221xfs_init_security(
228 struct vnode *vp, 222 bhv_vnode_t *vp,
229 struct inode *dir) 223 struct inode *dir)
230{ 224{
231 struct inode *ip = vn_to_inode(vp); 225 struct inode *ip = vn_to_inode(vp);
@@ -241,7 +235,7 @@ xfs_init_security(
241 return -error; 235 return -error;
242 } 236 }
243 237
244 VOP_ATTR_SET(vp, name, value, length, ATTR_SECURE, NULL, error); 238 error = bhv_vop_attr_set(vp, name, value, length, ATTR_SECURE, NULL);
245 if (!error) 239 if (!error)
246 VMODIFY(vp); 240 VMODIFY(vp);
247 241
@@ -264,13 +258,12 @@ xfs_has_fs_struct(struct task_struct *task)
264 258
265STATIC inline void 259STATIC inline void
266xfs_cleanup_inode( 260xfs_cleanup_inode(
267 vnode_t *dvp, 261 bhv_vnode_t *dvp,
268 vnode_t *vp, 262 bhv_vnode_t *vp,
269 struct dentry *dentry, 263 struct dentry *dentry,
270 int mode) 264 int mode)
271{ 265{
272 struct dentry teardown = {}; 266 struct dentry teardown = {};
273 int error;
274 267
275 /* Oh, the horror. 268 /* Oh, the horror.
276 * If we can't add the ACL or we fail in 269 * If we can't add the ACL or we fail in
@@ -281,9 +274,9 @@ xfs_cleanup_inode(
281 teardown.d_name = dentry->d_name; 274 teardown.d_name = dentry->d_name;
282 275
283 if (S_ISDIR(mode)) 276 if (S_ISDIR(mode))
284 VOP_RMDIR(dvp, &teardown, NULL, error); 277 bhv_vop_rmdir(dvp, &teardown, NULL);
285 else 278 else
286 VOP_REMOVE(dvp, &teardown, NULL, error); 279 bhv_vop_remove(dvp, &teardown, NULL);
287 VN_RELE(vp); 280 VN_RELE(vp);
288} 281}
289 282
@@ -295,8 +288,8 @@ xfs_vn_mknod(
295 dev_t rdev) 288 dev_t rdev)
296{ 289{
297 struct inode *ip; 290 struct inode *ip;
298 vattr_t vattr = { 0 }; 291 bhv_vattr_t vattr = { 0 };
299 vnode_t *vp = NULL, *dvp = vn_from_inode(dir); 292 bhv_vnode_t *vp = NULL, *dvp = vn_from_inode(dir);
300 xfs_acl_t *default_acl = NULL; 293 xfs_acl_t *default_acl = NULL;
301 attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS; 294 attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS;
302 int error; 295 int error;
@@ -330,10 +323,10 @@ xfs_vn_mknod(
330 vattr.va_mask |= XFS_AT_RDEV; 323 vattr.va_mask |= XFS_AT_RDEV;
331 /*FALLTHROUGH*/ 324 /*FALLTHROUGH*/
332 case S_IFREG: 325 case S_IFREG:
333 VOP_CREATE(dvp, dentry, &vattr, &vp, NULL, error); 326 error = bhv_vop_create(dvp, dentry, &vattr, &vp, NULL);
334 break; 327 break;
335 case S_IFDIR: 328 case S_IFDIR:
336 VOP_MKDIR(dvp, dentry, &vattr, &vp, NULL, error); 329 error = bhv_vop_mkdir(dvp, dentry, &vattr, &vp, NULL);
337 break; 330 break;
338 default: 331 default:
339 error = EINVAL; 332 error = EINVAL;
@@ -396,14 +389,14 @@ xfs_vn_lookup(
396 struct dentry *dentry, 389 struct dentry *dentry,
397 struct nameidata *nd) 390 struct nameidata *nd)
398{ 391{
399 struct vnode *vp = vn_from_inode(dir), *cvp; 392 bhv_vnode_t *vp = vn_from_inode(dir), *cvp;
400 int error; 393 int error;
401 394
402 if (dentry->d_name.len >= MAXNAMELEN) 395 if (dentry->d_name.len >= MAXNAMELEN)
403 return ERR_PTR(-ENAMETOOLONG); 396 return ERR_PTR(-ENAMETOOLONG);
404 397
405 VOP_LOOKUP(vp, dentry, &cvp, 0, NULL, NULL, error); 398 error = bhv_vop_lookup(vp, dentry, &cvp, 0, NULL, NULL);
406 if (error) { 399 if (unlikely(error)) {
407 if (unlikely(error != ENOENT)) 400 if (unlikely(error != ENOENT))
408 return ERR_PTR(-error); 401 return ERR_PTR(-error);
409 d_add(dentry, NULL); 402 d_add(dentry, NULL);
@@ -420,22 +413,21 @@ xfs_vn_link(
420 struct dentry *dentry) 413 struct dentry *dentry)
421{ 414{
422 struct inode *ip; /* inode of guy being linked to */ 415 struct inode *ip; /* inode of guy being linked to */
423 vnode_t *tdvp; /* target directory for new name/link */ 416 bhv_vnode_t *tdvp; /* target directory for new name/link */
424 vnode_t *vp; /* vp of name being linked */ 417 bhv_vnode_t *vp; /* vp of name being linked */
425 vattr_t vattr; 418 bhv_vattr_t vattr;
426 int error; 419 int error;
427 420
428 ip = old_dentry->d_inode; /* inode being linked to */ 421 ip = old_dentry->d_inode; /* inode being linked to */
429 if (S_ISDIR(ip->i_mode))
430 return -EPERM;
431
432 tdvp = vn_from_inode(dir); 422 tdvp = vn_from_inode(dir);
433 vp = vn_from_inode(ip); 423 vp = vn_from_inode(ip);
434 424
435 VOP_LINK(tdvp, vp, dentry, NULL, error); 425 VN_HOLD(vp);
436 if (likely(!error)) { 426 error = bhv_vop_link(tdvp, vp, dentry, NULL);
427 if (unlikely(error)) {
428 VN_RELE(vp);
429 } else {
437 VMODIFY(tdvp); 430 VMODIFY(tdvp);
438 VN_HOLD(vp);
439 xfs_validate_fields(ip, &vattr); 431 xfs_validate_fields(ip, &vattr);
440 d_instantiate(dentry, ip); 432 d_instantiate(dentry, ip);
441 } 433 }
@@ -448,14 +440,14 @@ xfs_vn_unlink(
448 struct dentry *dentry) 440 struct dentry *dentry)
449{ 441{
450 struct inode *inode; 442 struct inode *inode;
451 vnode_t *dvp; /* directory containing name to remove */ 443 bhv_vnode_t *dvp; /* directory containing name to remove */
452 vattr_t vattr; 444 bhv_vattr_t vattr;
453 int error; 445 int error;
454 446
455 inode = dentry->d_inode; 447 inode = dentry->d_inode;
456 dvp = vn_from_inode(dir); 448 dvp = vn_from_inode(dir);
457 449
458 VOP_REMOVE(dvp, dentry, NULL, error); 450 error = bhv_vop_remove(dvp, dentry, NULL);
459 if (likely(!error)) { 451 if (likely(!error)) {
460 xfs_validate_fields(dir, &vattr); /* size needs update */ 452 xfs_validate_fields(dir, &vattr); /* size needs update */
461 xfs_validate_fields(inode, &vattr); 453 xfs_validate_fields(inode, &vattr);
@@ -470,27 +462,26 @@ xfs_vn_symlink(
470 const char *symname) 462 const char *symname)
471{ 463{
472 struct inode *ip; 464 struct inode *ip;
473 vattr_t vattr = { 0 }; 465 bhv_vattr_t va = { 0 };
474 vnode_t *dvp; /* directory containing name of symlink */ 466 bhv_vnode_t *dvp; /* directory containing name of symlink */
475 vnode_t *cvp; /* used to lookup symlink to put in dentry */ 467 bhv_vnode_t *cvp; /* used to lookup symlink to put in dentry */
476 int error; 468 int error;
477 469
478 dvp = vn_from_inode(dir); 470 dvp = vn_from_inode(dir);
479 cvp = NULL; 471 cvp = NULL;
480 472
481 vattr.va_mode = S_IFLNK | 473 va.va_mode = S_IFLNK |
482 (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO); 474 (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO);
483 vattr.va_mask = XFS_AT_TYPE|XFS_AT_MODE; 475 va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
484 476
485 error = 0; 477 error = bhv_vop_symlink(dvp, dentry, &va, (char *)symname, &cvp, NULL);
486 VOP_SYMLINK(dvp, dentry, &vattr, (char *)symname, &cvp, NULL, error);
487 if (likely(!error && cvp)) { 478 if (likely(!error && cvp)) {
488 error = xfs_init_security(cvp, dir); 479 error = xfs_init_security(cvp, dir);
489 if (likely(!error)) { 480 if (likely(!error)) {
490 ip = vn_to_inode(cvp); 481 ip = vn_to_inode(cvp);
491 d_instantiate(dentry, ip); 482 d_instantiate(dentry, ip);
492 xfs_validate_fields(dir, &vattr); 483 xfs_validate_fields(dir, &va);
493 xfs_validate_fields(ip, &vattr); 484 xfs_validate_fields(ip, &va);
494 } else { 485 } else {
495 xfs_cleanup_inode(dvp, cvp, dentry, 0); 486 xfs_cleanup_inode(dvp, cvp, dentry, 0);
496 } 487 }
@@ -504,11 +495,11 @@ xfs_vn_rmdir(
504 struct dentry *dentry) 495 struct dentry *dentry)
505{ 496{
506 struct inode *inode = dentry->d_inode; 497 struct inode *inode = dentry->d_inode;
507 vnode_t *dvp = vn_from_inode(dir); 498 bhv_vnode_t *dvp = vn_from_inode(dir);
508 vattr_t vattr; 499 bhv_vattr_t vattr;
509 int error; 500 int error;
510 501
511 VOP_RMDIR(dvp, dentry, NULL, error); 502 error = bhv_vop_rmdir(dvp, dentry, NULL);
512 if (likely(!error)) { 503 if (likely(!error)) {
513 xfs_validate_fields(inode, &vattr); 504 xfs_validate_fields(inode, &vattr);
514 xfs_validate_fields(dir, &vattr); 505 xfs_validate_fields(dir, &vattr);
@@ -524,15 +515,15 @@ xfs_vn_rename(
524 struct dentry *ndentry) 515 struct dentry *ndentry)
525{ 516{
526 struct inode *new_inode = ndentry->d_inode; 517 struct inode *new_inode = ndentry->d_inode;
527 vnode_t *fvp; /* from directory */ 518 bhv_vnode_t *fvp; /* from directory */
528 vnode_t *tvp; /* target directory */ 519 bhv_vnode_t *tvp; /* target directory */
529 vattr_t vattr; 520 bhv_vattr_t vattr;
530 int error; 521 int error;
531 522
532 fvp = vn_from_inode(odir); 523 fvp = vn_from_inode(odir);
533 tvp = vn_from_inode(ndir); 524 tvp = vn_from_inode(ndir);
534 525
535 VOP_RENAME(fvp, odentry, tvp, ndentry, NULL, error); 526 error = bhv_vop_rename(fvp, odentry, tvp, ndentry, NULL);
536 if (likely(!error)) { 527 if (likely(!error)) {
537 if (new_inode) 528 if (new_inode)
538 xfs_validate_fields(new_inode, &vattr); 529 xfs_validate_fields(new_inode, &vattr);
@@ -553,7 +544,7 @@ xfs_vn_follow_link(
553 struct dentry *dentry, 544 struct dentry *dentry,
554 struct nameidata *nd) 545 struct nameidata *nd)
555{ 546{
556 vnode_t *vp; 547 bhv_vnode_t *vp;
557 uio_t *uio; 548 uio_t *uio;
558 iovec_t iov; 549 iovec_t iov;
559 int error; 550 int error;
@@ -586,8 +577,8 @@ xfs_vn_follow_link(
586 uio->uio_resid = MAXPATHLEN; 577 uio->uio_resid = MAXPATHLEN;
587 uio->uio_iovcnt = 1; 578 uio->uio_iovcnt = 1;
588 579
589 VOP_READLINK(vp, uio, 0, NULL, error); 580 error = bhv_vop_readlink(vp, uio, 0, NULL);
590 if (error) { 581 if (unlikely(error)) {
591 kfree(link); 582 kfree(link);
592 link = ERR_PTR(-error); 583 link = ERR_PTR(-error);
593 } else { 584 } else {
@@ -618,12 +609,7 @@ xfs_vn_permission(
618 int mode, 609 int mode,
619 struct nameidata *nd) 610 struct nameidata *nd)
620{ 611{
621 vnode_t *vp = vn_from_inode(inode); 612 return -bhv_vop_access(vn_from_inode(inode), mode << 6, NULL);
622 int error;
623
624 mode <<= 6; /* convert from linux to vnode access bits */
625 VOP_ACCESS(vp, mode, NULL, error);
626 return -error;
627} 613}
628#else 614#else
629#define xfs_vn_permission NULL 615#define xfs_vn_permission NULL
@@ -636,14 +622,14 @@ xfs_vn_getattr(
636 struct kstat *stat) 622 struct kstat *stat)
637{ 623{
638 struct inode *inode = dentry->d_inode; 624 struct inode *inode = dentry->d_inode;
639 vnode_t *vp = vn_from_inode(inode); 625 bhv_vnode_t *vp = vn_from_inode(inode);
640 int error = 0; 626 int error = 0;
641 627
642 if (unlikely(vp->v_flag & VMODIFIED)) 628 if (unlikely(vp->v_flag & VMODIFIED))
643 error = vn_revalidate(vp); 629 error = vn_revalidate(vp);
644 if (!error) 630 if (!error)
645 generic_fillattr(inode, stat); 631 generic_fillattr(inode, stat);
646 return 0; 632 return -error;
647} 633}
648 634
649STATIC int 635STATIC int
@@ -653,8 +639,8 @@ xfs_vn_setattr(
653{ 639{
654 struct inode *inode = dentry->d_inode; 640 struct inode *inode = dentry->d_inode;
655 unsigned int ia_valid = attr->ia_valid; 641 unsigned int ia_valid = attr->ia_valid;
656 vnode_t *vp = vn_from_inode(inode); 642 bhv_vnode_t *vp = vn_from_inode(inode);
657 vattr_t vattr = { 0 }; 643 bhv_vattr_t vattr = { 0 };
658 int flags = 0; 644 int flags = 0;
659 int error; 645 int error;
660 646
@@ -697,7 +683,7 @@ xfs_vn_setattr(
697 flags |= ATTR_NONBLOCK; 683 flags |= ATTR_NONBLOCK;
698#endif 684#endif
699 685
700 VOP_SETATTR(vp, &vattr, flags, NULL, error); 686 error = bhv_vop_setattr(vp, &vattr, flags, NULL);
701 if (likely(!error)) 687 if (likely(!error))
702 __vn_revalidate(vp, &vattr); 688 __vn_revalidate(vp, &vattr);
703 return -error; 689 return -error;
@@ -718,7 +704,7 @@ xfs_vn_setxattr(
718 size_t size, 704 size_t size,
719 int flags) 705 int flags)
720{ 706{
721 vnode_t *vp = vn_from_inode(dentry->d_inode); 707 bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
722 char *attr = (char *)name; 708 char *attr = (char *)name;
723 attrnames_t *namesp; 709 attrnames_t *namesp;
724 int xflags = 0; 710 int xflags = 0;
@@ -748,7 +734,7 @@ xfs_vn_getxattr(
748 void *data, 734 void *data,
749 size_t size) 735 size_t size)
750{ 736{
751 vnode_t *vp = vn_from_inode(dentry->d_inode); 737 bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
752 char *attr = (char *)name; 738 char *attr = (char *)name;
753 attrnames_t *namesp; 739 attrnames_t *namesp;
754 int xflags = 0; 740 int xflags = 0;
@@ -777,7 +763,7 @@ xfs_vn_listxattr(
777 char *data, 763 char *data,
778 size_t size) 764 size_t size)
779{ 765{
780 vnode_t *vp = vn_from_inode(dentry->d_inode); 766 bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
781 int error, xflags = ATTR_KERNAMELS; 767 int error, xflags = ATTR_KERNAMELS;
782 ssize_t result; 768 ssize_t result;
783 769
@@ -796,7 +782,7 @@ xfs_vn_removexattr(
796 struct dentry *dentry, 782 struct dentry *dentry,
797 const char *name) 783 const char *name)
798{ 784{
799 vnode_t *vp = vn_from_inode(dentry->d_inode); 785 bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
800 char *attr = (char *)name; 786 char *attr = (char *)name;
801 attrnames_t *namesp; 787 attrnames_t *namesp;
802 int xflags = 0; 788 int xflags = 0;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index e9fe43d74768..8c021dc57d1f 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -19,7 +19,6 @@
19#define __XFS_LINUX__ 19#define __XFS_LINUX__
20 20
21#include <linux/types.h> 21#include <linux/types.h>
22#include <linux/config.h>
23 22
24/* 23/*
25 * Some types are conditional depending on the target system. 24 * Some types are conditional depending on the target system.
@@ -134,14 +133,19 @@ BUFFER_FNS(PrivateStart, unwritten);
134#define xfs_buf_age_centisecs xfs_params.xfs_buf_age.val 133#define xfs_buf_age_centisecs xfs_params.xfs_buf_age.val
135#define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val 134#define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val
136#define xfs_rotorstep xfs_params.rotorstep.val 135#define xfs_rotorstep xfs_params.rotorstep.val
136#define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val
137 137
138#ifndef raw_smp_processor_id 138#define current_cpu() (raw_smp_processor_id())
139#define raw_smp_processor_id() smp_processor_id()
140#endif
141#define current_cpu() raw_smp_processor_id()
142#define current_pid() (current->pid) 139#define current_pid() (current->pid)
143#define current_fsuid(cred) (current->fsuid) 140#define current_fsuid(cred) (current->fsuid)
144#define current_fsgid(cred) (current->fsgid) 141#define current_fsgid(cred) (current->fsgid)
142#define current_test_flags(f) (current->flags & (f))
143#define current_set_flags_nested(sp, f) \
144 (*(sp) = current->flags, current->flags |= (f))
145#define current_clear_flags_nested(sp, f) \
146 (*(sp) = current->flags, current->flags &= ~(f))
147#define current_restore_flags_nested(sp, f) \
148 (current->flags = ((current->flags & ~(f)) | (*(sp) & (f))))
145 149
146#define NBPP PAGE_SIZE 150#define NBPP PAGE_SIZE
147#define DPPSHFT (PAGE_SHIFT - 9) 151#define DPPSHFT (PAGE_SHIFT - 9)
@@ -187,25 +191,9 @@ BUFFER_FNS(PrivateStart, unwritten);
187/* bytes to clicks */ 191/* bytes to clicks */
188#define btoc(x) (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT) 192#define btoc(x) (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT)
189 193
190#ifndef ENOATTR
191#define ENOATTR ENODATA /* Attribute not found */ 194#define ENOATTR ENODATA /* Attribute not found */
192#endif 195#define EWRONGFS EINVAL /* Mount with wrong filesystem type */
193 196#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
194/* Note: EWRONGFS never visible outside the kernel */
195#define EWRONGFS EINVAL /* Mount with wrong filesystem type */
196
197/*
198 * XXX EFSCORRUPTED needs a real value in errno.h. asm-i386/errno.h won't
199 * return codes out of its known range in errno.
200 * XXX Also note: needs to be < 1000 and fairly unique on Linux (mustn't
201 * conflict with any code we use already or any code a driver may use)
202 * XXX Some options (currently we do #2):
203 * 1/ New error code ["Filesystem is corrupted", _after_ glibc updated]
204 * 2/ 990 ["Unknown error 990"]
205 * 3/ EUCLEAN ["Structure needs cleaning"]
206 * 4/ Convert EFSCORRUPTED to EIO [just prior to return into userspace]
207 */
208#define EFSCORRUPTED 990 /* Filesystem is corrupted */
209 197
210#define SYNCHRONIZE() barrier() 198#define SYNCHRONIZE() barrier()
211#define __return_address __builtin_return_address(0) 199#define __return_address __builtin_return_address(0)
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 67efe3308980..5d9cfd91ad08 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -23,7 +23,6 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir.h"
27#include "xfs_dir2.h" 26#include "xfs_dir2.h"
28#include "xfs_alloc.h" 27#include "xfs_alloc.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
@@ -32,7 +31,6 @@
32#include "xfs_bmap_btree.h" 31#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h" 32#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h" 33#include "xfs_ialloc_btree.h"
35#include "xfs_dir_sf.h"
36#include "xfs_dir2_sf.h" 34#include "xfs_dir2_sf.h"
37#include "xfs_attr_sf.h" 35#include "xfs_attr_sf.h"
38#include "xfs_dinode.h" 36#include "xfs_dinode.h"
@@ -206,7 +204,7 @@ xfs_read(
206 xfs_fsize_t n; 204 xfs_fsize_t n;
207 xfs_inode_t *ip; 205 xfs_inode_t *ip;
208 xfs_mount_t *mp; 206 xfs_mount_t *mp;
209 vnode_t *vp; 207 bhv_vnode_t *vp;
210 unsigned long seg; 208 unsigned long seg;
211 209
212 ip = XFS_BHVTOI(bdp); 210 ip = XFS_BHVTOI(bdp);
@@ -258,7 +256,7 @@ xfs_read(
258 256
259 if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && 257 if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
260 !(ioflags & IO_INVIS)) { 258 !(ioflags & IO_INVIS)) {
261 vrwlock_t locktype = VRWLOCK_READ; 259 bhv_vrwlock_t locktype = VRWLOCK_READ;
262 int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); 260 int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
263 261
264 ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, 262 ret = -XFS_SEND_DATA(mp, DM_EVENT_READ,
@@ -271,7 +269,7 @@ xfs_read(
271 } 269 }
272 270
273 if (unlikely((ioflags & IO_ISDIRECT) && VN_CACHED(vp))) 271 if (unlikely((ioflags & IO_ISDIRECT) && VN_CACHED(vp)))
274 VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(*offset)), 272 bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)),
275 -1, FI_REMAPF_LOCKED); 273 -1, FI_REMAPF_LOCKED);
276 274
277 xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, 275 xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
@@ -313,7 +311,7 @@ xfs_sendfile(
313 311
314 if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) && 312 if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
315 (!(ioflags & IO_INVIS))) { 313 (!(ioflags & IO_INVIS))) {
316 vrwlock_t locktype = VRWLOCK_READ; 314 bhv_vrwlock_t locktype = VRWLOCK_READ;
317 int error; 315 int error;
318 316
319 error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), 317 error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp),
@@ -357,7 +355,7 @@ xfs_splice_read(
357 355
358 if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) && 356 if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
359 (!(ioflags & IO_INVIS))) { 357 (!(ioflags & IO_INVIS))) {
360 vrwlock_t locktype = VRWLOCK_READ; 358 bhv_vrwlock_t locktype = VRWLOCK_READ;
361 int error; 359 int error;
362 360
363 error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), 361 error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp),
@@ -401,7 +399,7 @@ xfs_splice_write(
401 399
402 if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_WRITE) && 400 if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_WRITE) &&
403 (!(ioflags & IO_INVIS))) { 401 (!(ioflags & IO_INVIS))) {
404 vrwlock_t locktype = VRWLOCK_WRITE; 402 bhv_vrwlock_t locktype = VRWLOCK_WRITE;
405 int error; 403 int error;
406 404
407 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, BHV_TO_VNODE(bdp), 405 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, BHV_TO_VNODE(bdp),
@@ -458,7 +456,7 @@ xfs_zero_last_block(
458 last_fsb = XFS_B_TO_FSBT(mp, isize); 456 last_fsb = XFS_B_TO_FSBT(mp, isize);
459 nimaps = 1; 457 nimaps = 1;
460 error = XFS_BMAPI(mp, NULL, io, last_fsb, 1, 0, NULL, 0, &imap, 458 error = XFS_BMAPI(mp, NULL, io, last_fsb, 1, 0, NULL, 0, &imap,
461 &nimaps, NULL); 459 &nimaps, NULL, NULL);
462 if (error) { 460 if (error) {
463 return error; 461 return error;
464 } 462 }
@@ -499,7 +497,7 @@ xfs_zero_last_block(
499 497
500int /* error (positive) */ 498int /* error (positive) */
501xfs_zero_eof( 499xfs_zero_eof(
502 vnode_t *vp, 500 bhv_vnode_t *vp,
503 xfs_iocore_t *io, 501 xfs_iocore_t *io,
504 xfs_off_t offset, /* starting I/O offset */ 502 xfs_off_t offset, /* starting I/O offset */
505 xfs_fsize_t isize, /* current inode size */ 503 xfs_fsize_t isize, /* current inode size */
@@ -510,7 +508,6 @@ xfs_zero_eof(
510 xfs_fileoff_t end_zero_fsb; 508 xfs_fileoff_t end_zero_fsb;
511 xfs_fileoff_t zero_count_fsb; 509 xfs_fileoff_t zero_count_fsb;
512 xfs_fileoff_t last_fsb; 510 xfs_fileoff_t last_fsb;
513 xfs_extlen_t buf_len_fsb;
514 xfs_mount_t *mp = io->io_mount; 511 xfs_mount_t *mp = io->io_mount;
515 int nimaps; 512 int nimaps;
516 int error = 0; 513 int error = 0;
@@ -556,7 +553,7 @@ xfs_zero_eof(
556 nimaps = 1; 553 nimaps = 1;
557 zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; 554 zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
558 error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb, 555 error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb,
559 0, NULL, 0, &imap, &nimaps, NULL); 556 0, NULL, 0, &imap, &nimaps, NULL, NULL);
560 if (error) { 557 if (error) {
561 ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); 558 ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
562 ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); 559 ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
@@ -579,16 +576,7 @@ xfs_zero_eof(
579 } 576 }
580 577
581 /* 578 /*
582 * There are blocks in the range requested. 579 * There are blocks we need to zero.
583 * Zero them a single write at a time. We actually
584 * don't zero the entire range returned if it is
585 * too big and simply loop around to get the rest.
586 * That is not the most efficient thing to do, but it
587 * is simple and this path should not be exercised often.
588 */
589 buf_len_fsb = XFS_FILBLKS_MIN(imap.br_blockcount,
590 mp->m_writeio_blocks << 8);
591 /*
592 * Drop the inode lock while we're doing the I/O. 580 * Drop the inode lock while we're doing the I/O.
593 * We'll still have the iolock to protect us. 581 * We'll still have the iolock to protect us.
594 */ 582 */
@@ -596,14 +584,13 @@ xfs_zero_eof(
596 584
597 error = xfs_iozero(ip, 585 error = xfs_iozero(ip,
598 XFS_FSB_TO_B(mp, start_zero_fsb), 586 XFS_FSB_TO_B(mp, start_zero_fsb),
599 XFS_FSB_TO_B(mp, buf_len_fsb), 587 XFS_FSB_TO_B(mp, imap.br_blockcount),
600 end_size); 588 end_size);
601
602 if (error) { 589 if (error) {
603 goto out_lock; 590 goto out_lock;
604 } 591 }
605 592
606 start_zero_fsb = imap.br_startoff + buf_len_fsb; 593 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
607 ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); 594 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
608 595
609 XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 596 XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
@@ -637,11 +624,11 @@ xfs_write(
637 ssize_t ret = 0, error = 0; 624 ssize_t ret = 0, error = 0;
638 xfs_fsize_t isize, new_size; 625 xfs_fsize_t isize, new_size;
639 xfs_iocore_t *io; 626 xfs_iocore_t *io;
640 vnode_t *vp; 627 bhv_vnode_t *vp;
641 unsigned long seg; 628 unsigned long seg;
642 int iolock; 629 int iolock;
643 int eventsent = 0; 630 int eventsent = 0;
644 vrwlock_t locktype; 631 bhv_vrwlock_t locktype;
645 size_t ocount = 0, count; 632 size_t ocount = 0, count;
646 loff_t pos; 633 loff_t pos;
647 int need_i_mutex = 1, need_flush = 0; 634 int need_i_mutex = 1, need_flush = 0;
@@ -679,11 +666,11 @@ xfs_write(
679 io = &xip->i_iocore; 666 io = &xip->i_iocore;
680 mp = io->io_mount; 667 mp = io->io_mount;
681 668
669 vfs_wait_for_freeze(vp->v_vfsp, SB_FREEZE_WRITE);
670
682 if (XFS_FORCED_SHUTDOWN(mp)) 671 if (XFS_FORCED_SHUTDOWN(mp))
683 return -EIO; 672 return -EIO;
684 673
685 fs_check_frozen(vp->v_vfsp, SB_FREEZE_WRITE);
686
687 if (ioflags & IO_ISDIRECT) { 674 if (ioflags & IO_ISDIRECT) {
688 xfs_buftarg_t *target = 675 xfs_buftarg_t *target =
689 (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? 676 (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
@@ -814,7 +801,7 @@ retry:
814 if (need_flush) { 801 if (need_flush) {
815 xfs_inval_cached_trace(io, pos, -1, 802 xfs_inval_cached_trace(io, pos, -1,
816 ctooff(offtoct(pos)), -1); 803 ctooff(offtoct(pos)), -1);
817 VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(pos)), 804 bhv_vop_flushinval_pages(vp, ctooff(offtoct(pos)),
818 -1, FI_REMAPF_LOCKED); 805 -1, FI_REMAPF_LOCKED);
819 } 806 }
820 807
@@ -903,79 +890,9 @@ retry:
903 890
904 /* Handle various SYNC-type writes */ 891 /* Handle various SYNC-type writes */
905 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { 892 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
906 /* 893 error = xfs_write_sync_logforce(mp, xip);
907 * If we're treating this as O_DSYNC and we have not updated the 894 if (error)
908 * size, force the log. 895 goto out_unlock_internal;
909 */
910 if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) &&
911 !(xip->i_update_size)) {
912 xfs_inode_log_item_t *iip = xip->i_itemp;
913
914 /*
915 * If an allocation transaction occurred
916 * without extending the size, then we have to force
917 * the log up the proper point to ensure that the
918 * allocation is permanent. We can't count on
919 * the fact that buffered writes lock out direct I/O
920 * writes - the direct I/O write could have extended
921 * the size nontransactionally, then finished before
922 * we started. xfs_write_file will think that the file
923 * didn't grow but the update isn't safe unless the
924 * size change is logged.
925 *
926 * Force the log if we've committed a transaction
927 * against the inode or if someone else has and
928 * the commit record hasn't gone to disk (e.g.
929 * the inode is pinned). This guarantees that
930 * all changes affecting the inode are permanent
931 * when we return.
932 */
933 if (iip && iip->ili_last_lsn) {
934 xfs_log_force(mp, iip->ili_last_lsn,
935 XFS_LOG_FORCE | XFS_LOG_SYNC);
936 } else if (xfs_ipincount(xip) > 0) {
937 xfs_log_force(mp, (xfs_lsn_t)0,
938 XFS_LOG_FORCE | XFS_LOG_SYNC);
939 }
940
941 } else {
942 xfs_trans_t *tp;
943
944 /*
945 * O_SYNC or O_DSYNC _with_ a size update are handled
946 * the same way.
947 *
948 * If the write was synchronous then we need to make
949 * sure that the inode modification time is permanent.
950 * We'll have updated the timestamp above, so here
951 * we use a synchronous transaction to log the inode.
952 * It's not fast, but it's necessary.
953 *
954 * If this a dsync write and the size got changed
955 * non-transactionally, then we need to ensure that
956 * the size change gets logged in a synchronous
957 * transaction.
958 */
959
960 tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC);
961 if ((error = xfs_trans_reserve(tp, 0,
962 XFS_SWRITE_LOG_RES(mp),
963 0, 0, 0))) {
964 /* Transaction reserve failed */
965 xfs_trans_cancel(tp, 0);
966 } else {
967 /* Transaction reserve successful */
968 xfs_ilock(xip, XFS_ILOCK_EXCL);
969 xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL);
970 xfs_trans_ihold(tp, xip);
971 xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE);
972 xfs_trans_set_sync(tp);
973 error = xfs_trans_commit(tp, 0, NULL);
974 xfs_iunlock(xip, XFS_ILOCK_EXCL);
975 }
976 if (error)
977 goto out_unlock_internal;
978 }
979 896
980 xfs_rwunlock(bdp, locktype); 897 xfs_rwunlock(bdp, locktype);
981 if (need_i_mutex) 898 if (need_i_mutex)
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index 8f4539952350..c77e62efb742 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -18,8 +18,8 @@
18#ifndef __XFS_LRW_H__ 18#ifndef __XFS_LRW_H__
19#define __XFS_LRW_H__ 19#define __XFS_LRW_H__
20 20
21struct vnode;
22struct bhv_desc; 21struct bhv_desc;
22struct bhv_vnode;
23struct xfs_mount; 23struct xfs_mount;
24struct xfs_iocore; 24struct xfs_iocore;
25struct xfs_inode; 25struct xfs_inode;
@@ -49,7 +49,7 @@ struct xfs_iomap;
49#define XFS_CTRUNC4 14 49#define XFS_CTRUNC4 14
50#define XFS_CTRUNC5 15 50#define XFS_CTRUNC5 15
51#define XFS_CTRUNC6 16 51#define XFS_CTRUNC6 16
52#define XFS_BUNMAPI 17 52#define XFS_BUNMAP 17
53#define XFS_INVAL_CACHED 18 53#define XFS_INVAL_CACHED 18
54#define XFS_DIORD_ENTER 19 54#define XFS_DIORD_ENTER 19
55#define XFS_DIOWR_ENTER 20 55#define XFS_DIOWR_ENTER 20
@@ -82,7 +82,7 @@ extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
82extern int xfs_bdstrat_cb(struct xfs_buf *); 82extern int xfs_bdstrat_cb(struct xfs_buf *);
83extern int xfs_dev_is_read_only(struct xfs_mount *, char *); 83extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
84 84
85extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t, 85extern int xfs_zero_eof(struct bhv_vnode *, struct xfs_iocore *, xfs_off_t,
86 xfs_fsize_t, xfs_fsize_t); 86 xfs_fsize_t, xfs_fsize_t);
87extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *, 87extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
88 const struct iovec *, unsigned int, 88 const struct iovec *, unsigned int,
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index 1f0589a05eca..e480b6102051 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -62,7 +62,7 @@ xfs_read_xfsstats(
62 while (j < xstats[i].endpoint) { 62 while (j < xstats[i].endpoint) {
63 val = 0; 63 val = 0;
64 /* sum over all cpus */ 64 /* sum over all cpus */
65 for_each_cpu(c) 65 for_each_possible_cpu(c)
66 val += *(((__u32*)&per_cpu(xfsstats, c) + j)); 66 val += *(((__u32*)&per_cpu(xfsstats, c) + j));
67 len += sprintf(buffer + len, " %u", val); 67 len += sprintf(buffer + len, " %u", val);
68 j++; 68 j++;
@@ -70,7 +70,7 @@ xfs_read_xfsstats(
70 buffer[len++] = '\n'; 70 buffer[len++] = '\n';
71 } 71 }
72 /* extra precision counters */ 72 /* extra precision counters */
73 for_each_cpu(i) { 73 for_each_possible_cpu(i) {
74 xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes; 74 xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
75 xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes; 75 xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
76 xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes; 76 xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 68f4793e8a11..9bdef9d51900 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
@@ -23,7 +23,6 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir.h"
27#include "xfs_dir2.h" 26#include "xfs_dir2.h"
28#include "xfs_alloc.h" 27#include "xfs_alloc.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
@@ -32,7 +31,6 @@
32#include "xfs_bmap_btree.h" 31#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h" 32#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h" 33#include "xfs_ialloc_btree.h"
35#include "xfs_dir_sf.h"
36#include "xfs_dir2_sf.h" 34#include "xfs_dir2_sf.h"
37#include "xfs_attr_sf.h" 35#include "xfs_attr_sf.h"
38#include "xfs_dinode.h" 36#include "xfs_dinode.h"
@@ -151,7 +149,7 @@ xfs_set_inodeops(
151STATIC __inline__ void 149STATIC __inline__ void
152xfs_revalidate_inode( 150xfs_revalidate_inode(
153 xfs_mount_t *mp, 151 xfs_mount_t *mp,
154 vnode_t *vp, 152 bhv_vnode_t *vp,
155 xfs_inode_t *ip) 153 xfs_inode_t *ip)
156{ 154{
157 struct inode *inode = vn_to_inode(vp); 155 struct inode *inode = vn_to_inode(vp);
@@ -206,7 +204,7 @@ xfs_revalidate_inode(
206void 204void
207xfs_initialize_vnode( 205xfs_initialize_vnode(
208 bhv_desc_t *bdp, 206 bhv_desc_t *bdp,
209 vnode_t *vp, 207 bhv_vnode_t *vp,
210 bhv_desc_t *inode_bhv, 208 bhv_desc_t *inode_bhv,
211 int unlock) 209 int unlock)
212{ 210{
@@ -336,7 +334,7 @@ STATIC struct inode *
336xfs_fs_alloc_inode( 334xfs_fs_alloc_inode(
337 struct super_block *sb) 335 struct super_block *sb)
338{ 336{
339 vnode_t *vp; 337 bhv_vnode_t *vp;
340 338
341 vp = kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP); 339 vp = kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP);
342 if (unlikely(!vp)) 340 if (unlikely(!vp))
@@ -359,13 +357,13 @@ xfs_fs_inode_init_once(
359{ 357{
360 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 358 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
361 SLAB_CTOR_CONSTRUCTOR) 359 SLAB_CTOR_CONSTRUCTOR)
362 inode_init_once(vn_to_inode((vnode_t *)vnode)); 360 inode_init_once(vn_to_inode((bhv_vnode_t *)vnode));
363} 361}
364 362
365STATIC int 363STATIC int
366xfs_init_zones(void) 364xfs_init_zones(void)
367{ 365{
368 xfs_vnode_zone = kmem_zone_init_flags(sizeof(vnode_t), "xfs_vnode_t", 366 xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode",
369 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | 367 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
370 KM_ZONE_SPREAD, 368 KM_ZONE_SPREAD,
371 xfs_fs_inode_init_once); 369 xfs_fs_inode_init_once);
@@ -409,22 +407,17 @@ xfs_fs_write_inode(
409 struct inode *inode, 407 struct inode *inode,
410 int sync) 408 int sync)
411{ 409{
412 vnode_t *vp = vn_from_inode(inode); 410 bhv_vnode_t *vp = vn_from_inode(inode);
413 int error = 0, flags = FLUSH_INODE; 411 int error = 0, flags = FLUSH_INODE;
414 412
415 if (vp) { 413 if (vp) {
416 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 414 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
417 if (sync) 415 if (sync)
418 flags |= FLUSH_SYNC; 416 flags |= FLUSH_SYNC;
419 VOP_IFLUSH(vp, flags, error); 417 error = bhv_vop_iflush(vp, flags);
420 if (error == EAGAIN) { 418 if (error == EAGAIN)
421 if (sync) 419 error = sync? bhv_vop_iflush(vp, flags | FLUSH_LOG) : 0;
422 VOP_IFLUSH(vp, flags | FLUSH_LOG, error);
423 else
424 error = 0;
425 }
426 } 420 }
427
428 return -error; 421 return -error;
429} 422}
430 423
@@ -432,8 +425,7 @@ STATIC void
432xfs_fs_clear_inode( 425xfs_fs_clear_inode(
433 struct inode *inode) 426 struct inode *inode)
434{ 427{
435 vnode_t *vp = vn_from_inode(inode); 428 bhv_vnode_t *vp = vn_from_inode(inode);
436 int error, cache;
437 429
438 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 430 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
439 431
@@ -446,20 +438,18 @@ xfs_fs_clear_inode(
446 * This can happen because xfs_iget_core calls xfs_idestroy if we 438 * This can happen because xfs_iget_core calls xfs_idestroy if we
447 * find an inode with di_mode == 0 but without IGET_CREATE set. 439 * find an inode with di_mode == 0 but without IGET_CREATE set.
448 */ 440 */
449 if (vp->v_fbhv) 441 if (VNHEAD(vp))
450 VOP_INACTIVE(vp, NULL, cache); 442 bhv_vop_inactive(vp, NULL);
451 443
452 VN_LOCK(vp); 444 VN_LOCK(vp);
453 vp->v_flag &= ~VMODIFIED; 445 vp->v_flag &= ~VMODIFIED;
454 VN_UNLOCK(vp, 0); 446 VN_UNLOCK(vp, 0);
455 447
456 if (vp->v_fbhv) { 448 if (VNHEAD(vp))
457 VOP_RECLAIM(vp, error); 449 if (bhv_vop_reclaim(vp))
458 if (error) 450 panic("%s: cannot reclaim 0x%p\n", __FUNCTION__, vp);
459 panic("vn_purge: cannot reclaim");
460 }
461 451
462 ASSERT(vp->v_fbhv == NULL); 452 ASSERT(VNHEAD(vp) == NULL);
463 453
464#ifdef XFS_VNODE_TRACE 454#ifdef XFS_VNODE_TRACE
465 ktrace_free(vp->v_trace); 455 ktrace_free(vp->v_trace);
@@ -475,13 +465,13 @@ xfs_fs_clear_inode(
475 */ 465 */
476STATIC void 466STATIC void
477xfs_syncd_queue_work( 467xfs_syncd_queue_work(
478 struct vfs *vfs, 468 struct bhv_vfs *vfs,
479 void *data, 469 void *data,
480 void (*syncer)(vfs_t *, void *)) 470 void (*syncer)(bhv_vfs_t *, void *))
481{ 471{
482 vfs_sync_work_t *work; 472 struct bhv_vfs_sync_work *work;
483 473
484 work = kmem_alloc(sizeof(struct vfs_sync_work), KM_SLEEP); 474 work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP);
485 INIT_LIST_HEAD(&work->w_list); 475 INIT_LIST_HEAD(&work->w_list);
486 work->w_syncer = syncer; 476 work->w_syncer = syncer;
487 work->w_data = data; 477 work->w_data = data;
@@ -500,7 +490,7 @@ xfs_syncd_queue_work(
500 */ 490 */
501STATIC void 491STATIC void
502xfs_flush_inode_work( 492xfs_flush_inode_work(
503 vfs_t *vfs, 493 bhv_vfs_t *vfs,
504 void *inode) 494 void *inode)
505{ 495{
506 filemap_flush(((struct inode *)inode)->i_mapping); 496 filemap_flush(((struct inode *)inode)->i_mapping);
@@ -512,7 +502,7 @@ xfs_flush_inode(
512 xfs_inode_t *ip) 502 xfs_inode_t *ip)
513{ 503{
514 struct inode *inode = vn_to_inode(XFS_ITOV(ip)); 504 struct inode *inode = vn_to_inode(XFS_ITOV(ip));
515 struct vfs *vfs = XFS_MTOVFS(ip->i_mount); 505 struct bhv_vfs *vfs = XFS_MTOVFS(ip->i_mount);
516 506
517 igrab(inode); 507 igrab(inode);
518 xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work); 508 xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work);
@@ -525,7 +515,7 @@ xfs_flush_inode(
525 */ 515 */
526STATIC void 516STATIC void
527xfs_flush_device_work( 517xfs_flush_device_work(
528 vfs_t *vfs, 518 bhv_vfs_t *vfs,
529 void *inode) 519 void *inode)
530{ 520{
531 sync_blockdev(vfs->vfs_super->s_bdev); 521 sync_blockdev(vfs->vfs_super->s_bdev);
@@ -537,7 +527,7 @@ xfs_flush_device(
537 xfs_inode_t *ip) 527 xfs_inode_t *ip)
538{ 528{
539 struct inode *inode = vn_to_inode(XFS_ITOV(ip)); 529 struct inode *inode = vn_to_inode(XFS_ITOV(ip));
540 struct vfs *vfs = XFS_MTOVFS(ip->i_mount); 530 struct bhv_vfs *vfs = XFS_MTOVFS(ip->i_mount);
541 531
542 igrab(inode); 532 igrab(inode);
543 xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work); 533 xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work);
@@ -545,16 +535,16 @@ xfs_flush_device(
545 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); 535 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
546} 536}
547 537
548#define SYNCD_FLAGS (SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR|SYNC_REFCACHE)
549STATIC void 538STATIC void
550vfs_sync_worker( 539vfs_sync_worker(
551 vfs_t *vfsp, 540 bhv_vfs_t *vfsp,
552 void *unused) 541 void *unused)
553{ 542{
554 int error; 543 int error;
555 544
556 if (!(vfsp->vfs_flag & VFS_RDONLY)) 545 if (!(vfsp->vfs_flag & VFS_RDONLY))
557 VFS_SYNC(vfsp, SYNCD_FLAGS, NULL, error); 546 error = bhv_vfs_sync(vfsp, SYNC_FSDATA | SYNC_BDFLUSH | \
547 SYNC_ATTR | SYNC_REFCACHE, NULL);
558 vfsp->vfs_sync_seq++; 548 vfsp->vfs_sync_seq++;
559 wmb(); 549 wmb();
560 wake_up(&vfsp->vfs_wait_single_sync_task); 550 wake_up(&vfsp->vfs_wait_single_sync_task);
@@ -565,8 +555,8 @@ xfssyncd(
565 void *arg) 555 void *arg)
566{ 556{
567 long timeleft; 557 long timeleft;
568 vfs_t *vfsp = (vfs_t *) arg; 558 bhv_vfs_t *vfsp = (bhv_vfs_t *) arg;
569 struct vfs_sync_work *work, *n; 559 bhv_vfs_sync_work_t *work, *n;
570 LIST_HEAD (tmp); 560 LIST_HEAD (tmp);
571 561
572 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); 562 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
@@ -600,7 +590,7 @@ xfssyncd(
600 list_del(&work->w_list); 590 list_del(&work->w_list);
601 if (work == &vfsp->vfs_sync_work) 591 if (work == &vfsp->vfs_sync_work)
602 continue; 592 continue;
603 kmem_free(work, sizeof(struct vfs_sync_work)); 593 kmem_free(work, sizeof(struct bhv_vfs_sync_work));
604 } 594 }
605 } 595 }
606 596
@@ -609,7 +599,7 @@ xfssyncd(
609 599
610STATIC int 600STATIC int
611xfs_fs_start_syncd( 601xfs_fs_start_syncd(
612 vfs_t *vfsp) 602 bhv_vfs_t *vfsp)
613{ 603{
614 vfsp->vfs_sync_work.w_syncer = vfs_sync_worker; 604 vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
615 vfsp->vfs_sync_work.w_vfs = vfsp; 605 vfsp->vfs_sync_work.w_vfs = vfsp;
@@ -621,7 +611,7 @@ xfs_fs_start_syncd(
621 611
622STATIC void 612STATIC void
623xfs_fs_stop_syncd( 613xfs_fs_stop_syncd(
624 vfs_t *vfsp) 614 bhv_vfs_t *vfsp)
625{ 615{
626 kthread_stop(vfsp->vfs_sync_task); 616 kthread_stop(vfsp->vfs_sync_task);
627} 617}
@@ -630,35 +620,26 @@ STATIC void
630xfs_fs_put_super( 620xfs_fs_put_super(
631 struct super_block *sb) 621 struct super_block *sb)
632{ 622{
633 vfs_t *vfsp = vfs_from_sb(sb); 623 bhv_vfs_t *vfsp = vfs_from_sb(sb);
634 int error; 624 int error;
635 625
636 xfs_fs_stop_syncd(vfsp); 626 xfs_fs_stop_syncd(vfsp);
637 VFS_SYNC(vfsp, SYNC_ATTR|SYNC_DELWRI, NULL, error); 627 bhv_vfs_sync(vfsp, SYNC_ATTR | SYNC_DELWRI, NULL);
638 if (!error) 628 error = bhv_vfs_unmount(vfsp, 0, NULL);
639 VFS_UNMOUNT(vfsp, 0, NULL, error);
640 if (error) { 629 if (error) {
641 printk("XFS unmount got error %d\n", error); 630 printk("XFS: unmount got error=%d\n", error);
642 printk("%s: vfsp/0x%p left dangling!\n", __FUNCTION__, vfsp); 631 printk("%s: vfs=0x%p left dangling!\n", __FUNCTION__, vfsp);
643 return; 632 } else {
633 vfs_deallocate(vfsp);
644 } 634 }
645
646 vfs_deallocate(vfsp);
647} 635}
648 636
649STATIC void 637STATIC void
650xfs_fs_write_super( 638xfs_fs_write_super(
651 struct super_block *sb) 639 struct super_block *sb)
652{ 640{
653 vfs_t *vfsp = vfs_from_sb(sb); 641 if (!(sb->s_flags & MS_RDONLY))
654 int error; 642 bhv_vfs_sync(vfs_from_sb(sb), SYNC_FSDATA, NULL);
655
656 if (sb->s_flags & MS_RDONLY) {
657 sb->s_dirt = 0; /* paranoia */
658 return;
659 }
660 /* Push the log and superblock a little */
661 VFS_SYNC(vfsp, SYNC_FSDATA, NULL, error);
662 sb->s_dirt = 0; 643 sb->s_dirt = 0;
663} 644}
664 645
@@ -667,16 +648,16 @@ xfs_fs_sync_super(
667 struct super_block *sb, 648 struct super_block *sb,
668 int wait) 649 int wait)
669{ 650{
670 vfs_t *vfsp = vfs_from_sb(sb); 651 bhv_vfs_t *vfsp = vfs_from_sb(sb);
671 int error; 652 int error;
672 int flags = SYNC_FSDATA; 653 int flags;
673 654
674 if (unlikely(sb->s_frozen == SB_FREEZE_WRITE)) 655 if (unlikely(sb->s_frozen == SB_FREEZE_WRITE))
675 flags = SYNC_QUIESCE; 656 flags = SYNC_QUIESCE;
676 else 657 else
677 flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0); 658 flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0);
678 659
679 VFS_SYNC(vfsp, flags, NULL, error); 660 error = bhv_vfs_sync(vfsp, flags, NULL);
680 sb->s_dirt = 0; 661 sb->s_dirt = 0;
681 662
682 if (unlikely(laptop_mode)) { 663 if (unlikely(laptop_mode)) {
@@ -703,14 +684,11 @@ xfs_fs_sync_super(
703 684
704STATIC int 685STATIC int
705xfs_fs_statfs( 686xfs_fs_statfs(
706 struct super_block *sb, 687 struct dentry *dentry,
707 struct kstatfs *statp) 688 struct kstatfs *statp)
708{ 689{
709 vfs_t *vfsp = vfs_from_sb(sb); 690 return -bhv_vfs_statvfs(vfs_from_sb(dentry->d_sb), statp,
710 int error; 691 vn_from_inode(dentry->d_inode));
711
712 VFS_STATVFS(vfsp, statp, NULL, error);
713 return -error;
714} 692}
715 693
716STATIC int 694STATIC int
@@ -719,13 +697,13 @@ xfs_fs_remount(
719 int *flags, 697 int *flags,
720 char *options) 698 char *options)
721{ 699{
722 vfs_t *vfsp = vfs_from_sb(sb); 700 bhv_vfs_t *vfsp = vfs_from_sb(sb);
723 struct xfs_mount_args *args = xfs_args_allocate(sb, 0); 701 struct xfs_mount_args *args = xfs_args_allocate(sb, 0);
724 int error; 702 int error;
725 703
726 VFS_PARSEARGS(vfsp, options, args, 1, error); 704 error = bhv_vfs_parseargs(vfsp, options, args, 1);
727 if (!error) 705 if (!error)
728 VFS_MNTUPDATE(vfsp, flags, args, error); 706 error = bhv_vfs_mntupdate(vfsp, flags, args);
729 kmem_free(args, sizeof(*args)); 707 kmem_free(args, sizeof(*args));
730 return -error; 708 return -error;
731} 709}
@@ -734,7 +712,7 @@ STATIC void
734xfs_fs_lockfs( 712xfs_fs_lockfs(
735 struct super_block *sb) 713 struct super_block *sb)
736{ 714{
737 VFS_FREEZE(vfs_from_sb(sb)); 715 bhv_vfs_freeze(vfs_from_sb(sb));
738} 716}
739 717
740STATIC int 718STATIC int
@@ -742,11 +720,7 @@ xfs_fs_show_options(
742 struct seq_file *m, 720 struct seq_file *m,
743 struct vfsmount *mnt) 721 struct vfsmount *mnt)
744{ 722{
745 struct vfs *vfsp = vfs_from_sb(mnt->mnt_sb); 723 return -bhv_vfs_showargs(vfs_from_sb(mnt->mnt_sb), m);
746 int error;
747
748 VFS_SHOWARGS(vfsp, m, error);
749 return error;
750} 724}
751 725
752STATIC int 726STATIC int
@@ -754,11 +728,7 @@ xfs_fs_quotasync(
754 struct super_block *sb, 728 struct super_block *sb,
755 int type) 729 int type)
756{ 730{
757 struct vfs *vfsp = vfs_from_sb(sb); 731 return -bhv_vfs_quotactl(vfs_from_sb(sb), Q_XQUOTASYNC, 0, NULL);
758 int error;
759
760 VFS_QUOTACTL(vfsp, Q_XQUOTASYNC, 0, (caddr_t)NULL, error);
761 return -error;
762} 732}
763 733
764STATIC int 734STATIC int
@@ -766,11 +736,7 @@ xfs_fs_getxstate(
766 struct super_block *sb, 736 struct super_block *sb,
767 struct fs_quota_stat *fqs) 737 struct fs_quota_stat *fqs)
768{ 738{
769 struct vfs *vfsp = vfs_from_sb(sb); 739 return -bhv_vfs_quotactl(vfs_from_sb(sb), Q_XGETQSTAT, 0, (caddr_t)fqs);
770 int error;
771
772 VFS_QUOTACTL(vfsp, Q_XGETQSTAT, 0, (caddr_t)fqs, error);
773 return -error;
774} 740}
775 741
776STATIC int 742STATIC int
@@ -779,11 +745,7 @@ xfs_fs_setxstate(
779 unsigned int flags, 745 unsigned int flags,
780 int op) 746 int op)
781{ 747{
782 struct vfs *vfsp = vfs_from_sb(sb); 748 return -bhv_vfs_quotactl(vfs_from_sb(sb), op, 0, (caddr_t)&flags);
783 int error;
784
785 VFS_QUOTACTL(vfsp, op, 0, (caddr_t)&flags, error);
786 return -error;
787} 749}
788 750
789STATIC int 751STATIC int
@@ -793,13 +755,10 @@ xfs_fs_getxquota(
793 qid_t id, 755 qid_t id,
794 struct fs_disk_quota *fdq) 756 struct fs_disk_quota *fdq)
795{ 757{
796 struct vfs *vfsp = vfs_from_sb(sb); 758 return -bhv_vfs_quotactl(vfs_from_sb(sb),
797 int error, getmode; 759 (type == USRQUOTA) ? Q_XGETQUOTA :
798 760 ((type == GRPQUOTA) ? Q_XGETGQUOTA :
799 getmode = (type == USRQUOTA) ? Q_XGETQUOTA : 761 Q_XGETPQUOTA), id, (caddr_t)fdq);
800 ((type == GRPQUOTA) ? Q_XGETGQUOTA : Q_XGETPQUOTA);
801 VFS_QUOTACTL(vfsp, getmode, id, (caddr_t)fdq, error);
802 return -error;
803} 762}
804 763
805STATIC int 764STATIC int
@@ -809,13 +768,10 @@ xfs_fs_setxquota(
809 qid_t id, 768 qid_t id,
810 struct fs_disk_quota *fdq) 769 struct fs_disk_quota *fdq)
811{ 770{
812 struct vfs *vfsp = vfs_from_sb(sb); 771 return -bhv_vfs_quotactl(vfs_from_sb(sb),
813 int error, setmode; 772 (type == USRQUOTA) ? Q_XSETQLIM :
814 773 ((type == GRPQUOTA) ? Q_XSETGQLIM :
815 setmode = (type == USRQUOTA) ? Q_XSETQLIM : 774 Q_XSETPQLIM), id, (caddr_t)fdq);
816 ((type == GRPQUOTA) ? Q_XSETGQLIM : Q_XSETPQLIM);
817 VFS_QUOTACTL(vfsp, setmode, id, (caddr_t)fdq, error);
818 return -error;
819} 775}
820 776
821STATIC int 777STATIC int
@@ -824,34 +780,32 @@ xfs_fs_fill_super(
824 void *data, 780 void *data,
825 int silent) 781 int silent)
826{ 782{
827 vnode_t *rootvp; 783 struct bhv_vnode *rootvp;
828 struct vfs *vfsp = vfs_allocate(sb); 784 struct bhv_vfs *vfsp = vfs_allocate(sb);
829 struct xfs_mount_args *args = xfs_args_allocate(sb, silent); 785 struct xfs_mount_args *args = xfs_args_allocate(sb, silent);
830 struct kstatfs statvfs; 786 struct kstatfs statvfs;
831 int error, error2; 787 int error;
832 788
833 bhv_insert_all_vfsops(vfsp); 789 bhv_insert_all_vfsops(vfsp);
834 790
835 VFS_PARSEARGS(vfsp, (char *)data, args, 0, error); 791 error = bhv_vfs_parseargs(vfsp, (char *)data, args, 0);
836 if (error) { 792 if (error) {
837 bhv_remove_all_vfsops(vfsp, 1); 793 bhv_remove_all_vfsops(vfsp, 1);
838 goto fail_vfsop; 794 goto fail_vfsop;
839 } 795 }
840 796
841 sb_min_blocksize(sb, BBSIZE); 797 sb_min_blocksize(sb, BBSIZE);
842#ifdef CONFIG_XFS_EXPORT
843 sb->s_export_op = &xfs_export_operations; 798 sb->s_export_op = &xfs_export_operations;
844#endif
845 sb->s_qcop = &xfs_quotactl_operations; 799 sb->s_qcop = &xfs_quotactl_operations;
846 sb->s_op = &xfs_super_operations; 800 sb->s_op = &xfs_super_operations;
847 801
848 VFS_MOUNT(vfsp, args, NULL, error); 802 error = bhv_vfs_mount(vfsp, args, NULL);
849 if (error) { 803 if (error) {
850 bhv_remove_all_vfsops(vfsp, 1); 804 bhv_remove_all_vfsops(vfsp, 1);
851 goto fail_vfsop; 805 goto fail_vfsop;
852 } 806 }
853 807
854 VFS_STATVFS(vfsp, &statvfs, NULL, error); 808 error = bhv_vfs_statvfs(vfsp, &statvfs, NULL);
855 if (error) 809 if (error)
856 goto fail_unmount; 810 goto fail_unmount;
857 811
@@ -863,7 +817,7 @@ xfs_fs_fill_super(
863 sb->s_time_gran = 1; 817 sb->s_time_gran = 1;
864 set_posix_acl_flag(sb); 818 set_posix_acl_flag(sb);
865 819
866 VFS_ROOT(vfsp, &rootvp, error); 820 error = bhv_vfs_root(vfsp, &rootvp);
867 if (error) 821 if (error)
868 goto fail_unmount; 822 goto fail_unmount;
869 823
@@ -892,7 +846,7 @@ fail_vnrele:
892 } 846 }
893 847
894fail_unmount: 848fail_unmount:
895 VFS_UNMOUNT(vfsp, 0, NULL, error2); 849 bhv_vfs_unmount(vfsp, 0, NULL);
896 850
897fail_vfsop: 851fail_vfsop:
898 vfs_deallocate(vfsp); 852 vfs_deallocate(vfsp);
@@ -900,14 +854,16 @@ fail_vfsop:
900 return -error; 854 return -error;
901} 855}
902 856
903STATIC struct super_block * 857STATIC int
904xfs_fs_get_sb( 858xfs_fs_get_sb(
905 struct file_system_type *fs_type, 859 struct file_system_type *fs_type,
906 int flags, 860 int flags,
907 const char *dev_name, 861 const char *dev_name,
908 void *data) 862 void *data,
863 struct vfsmount *mnt)
909{ 864{
910 return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); 865 return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super,
866 mnt);
911} 867}
912 868
913STATIC struct super_operations xfs_super_operations = { 869STATIC struct super_operations xfs_super_operations = {
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 376b96cb513a..33dd1ca13245 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -105,7 +105,7 @@ struct block_device;
105 105
106extern __uint64_t xfs_max_file_offset(unsigned int); 106extern __uint64_t xfs_max_file_offset(unsigned int);
107 107
108extern void xfs_initialize_vnode(bhv_desc_t *, vnode_t *, bhv_desc_t *, int); 108extern void xfs_initialize_vnode(bhv_desc_t *, bhv_vnode_t *, bhv_desc_t *, int);
109 109
110extern void xfs_flush_inode(struct xfs_inode *); 110extern void xfs_flush_inode(struct xfs_inode *);
111extern void xfs_flush_device(struct xfs_inode *); 111extern void xfs_flush_device(struct xfs_inode *);
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index 7079cc837210..af246532fbfb 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -38,7 +38,7 @@ xfs_stats_clear_proc_handler(
38 38
39 if (!ret && write && *valp) { 39 if (!ret && write && *valp) {
40 printk("XFS Clearing xfsstats\n"); 40 printk("XFS Clearing xfsstats\n");
41 for_each_cpu(c) { 41 for_each_possible_cpu(c) {
42 preempt_disable(); 42 preempt_disable();
43 /* save vn_active, it's a universal truth! */ 43 /* save vn_active, it's a universal truth! */
44 vn_active = per_cpu(xfsstats, c).vn_active; 44 vn_active = per_cpu(xfsstats, c).vn_active;
@@ -120,6 +120,11 @@ STATIC ctl_table xfs_table[] = {
120 &sysctl_intvec, NULL, 120 &sysctl_intvec, NULL,
121 &xfs_params.rotorstep.min, &xfs_params.rotorstep.max}, 121 &xfs_params.rotorstep.min, &xfs_params.rotorstep.max},
122 122
123 {XFS_INHERIT_NODFRG, "inherit_nodefrag", &xfs_params.inherit_nodfrg.val,
124 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
125 &sysctl_intvec, NULL,
126 &xfs_params.inherit_nodfrg.min, &xfs_params.inherit_nodfrg.max},
127
123 /* please keep this the last entry */ 128 /* please keep this the last entry */
124#ifdef CONFIG_PROC_FS 129#ifdef CONFIG_PROC_FS
125 {XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val, 130 {XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val,
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
index bc8c11f13722..a631fb8cc5ac 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -46,6 +46,7 @@ typedef struct xfs_param {
46 xfs_sysctl_val_t xfs_buf_age; /* Metadata buffer age before flush. */ 46 xfs_sysctl_val_t xfs_buf_age; /* Metadata buffer age before flush. */
47 xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */ 47 xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */
48 xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */ 48 xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */
49 xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
49} xfs_param_t; 50} xfs_param_t;
50 51
51/* 52/*
@@ -84,6 +85,7 @@ enum {
84 /* XFS_IO_BYPASS = 18 */ 85 /* XFS_IO_BYPASS = 18 */
85 XFS_INHERIT_NOSYM = 19, 86 XFS_INHERIT_NOSYM = 19,
86 XFS_ROTORSTEP = 20, 87 XFS_ROTORSTEP = 20,
88 XFS_INHERIT_NODFRG = 21,
87}; 89};
88 90
89extern xfs_param_t xfs_params; 91extern xfs_param_t xfs_params;
diff --git a/fs/xfs/linux-2.6/xfs_vfs.c b/fs/xfs/linux-2.6/xfs_vfs.c
index 6f7c9f7a8624..6145e8bd0be2 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.c
+++ b/fs/xfs/linux-2.6/xfs_vfs.c
@@ -23,7 +23,6 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir.h"
27#include "xfs_dir2.h" 26#include "xfs_dir2.h"
28#include "xfs_imap.h" 27#include "xfs_imap.h"
29#include "xfs_alloc.h" 28#include "xfs_alloc.h"
@@ -104,7 +103,7 @@ vfs_mntupdate(
104int 103int
105vfs_root( 104vfs_root(
106 struct bhv_desc *bdp, 105 struct bhv_desc *bdp,
107 struct vnode **vpp) 106 struct bhv_vnode **vpp)
108{ 107{
109 struct bhv_desc *next = bdp; 108 struct bhv_desc *next = bdp;
110 109
@@ -117,15 +116,15 @@ vfs_root(
117int 116int
118vfs_statvfs( 117vfs_statvfs(
119 struct bhv_desc *bdp, 118 struct bhv_desc *bdp,
120 xfs_statfs_t *sp, 119 bhv_statvfs_t *statp,
121 struct vnode *vp) 120 struct bhv_vnode *vp)
122{ 121{
123 struct bhv_desc *next = bdp; 122 struct bhv_desc *next = bdp;
124 123
125 ASSERT(next); 124 ASSERT(next);
126 while (! (bhvtovfsops(next))->vfs_statvfs) 125 while (! (bhvtovfsops(next))->vfs_statvfs)
127 next = BHV_NEXT(next); 126 next = BHV_NEXT(next);
128 return ((*bhvtovfsops(next)->vfs_statvfs)(next, sp, vp)); 127 return ((*bhvtovfsops(next)->vfs_statvfs)(next, statp, vp));
129} 128}
130 129
131int 130int
@@ -145,7 +144,7 @@ vfs_sync(
145int 144int
146vfs_vget( 145vfs_vget(
147 struct bhv_desc *bdp, 146 struct bhv_desc *bdp,
148 struct vnode **vpp, 147 struct bhv_vnode **vpp,
149 struct fid *fidp) 148 struct fid *fidp)
150{ 149{
151 struct bhv_desc *next = bdp; 150 struct bhv_desc *next = bdp;
@@ -187,7 +186,7 @@ vfs_quotactl(
187void 186void
188vfs_init_vnode( 187vfs_init_vnode(
189 struct bhv_desc *bdp, 188 struct bhv_desc *bdp,
190 struct vnode *vp, 189 struct bhv_vnode *vp,
191 struct bhv_desc *bp, 190 struct bhv_desc *bp,
192 int unlock) 191 int unlock)
193{ 192{
@@ -226,13 +225,13 @@ vfs_freeze(
226 ((*bhvtovfsops(next)->vfs_freeze)(next)); 225 ((*bhvtovfsops(next)->vfs_freeze)(next));
227} 226}
228 227
229vfs_t * 228bhv_vfs_t *
230vfs_allocate( 229vfs_allocate(
231 struct super_block *sb) 230 struct super_block *sb)
232{ 231{
233 struct vfs *vfsp; 232 struct bhv_vfs *vfsp;
234 233
235 vfsp = kmem_zalloc(sizeof(vfs_t), KM_SLEEP); 234 vfsp = kmem_zalloc(sizeof(bhv_vfs_t), KM_SLEEP);
236 bhv_head_init(VFS_BHVHEAD(vfsp), "vfs"); 235 bhv_head_init(VFS_BHVHEAD(vfsp), "vfs");
237 INIT_LIST_HEAD(&vfsp->vfs_sync_list); 236 INIT_LIST_HEAD(&vfsp->vfs_sync_list);
238 spin_lock_init(&vfsp->vfs_sync_lock); 237 spin_lock_init(&vfsp->vfs_sync_lock);
@@ -247,25 +246,25 @@ vfs_allocate(
247 return vfsp; 246 return vfsp;
248} 247}
249 248
250vfs_t * 249bhv_vfs_t *
251vfs_from_sb( 250vfs_from_sb(
252 struct super_block *sb) 251 struct super_block *sb)
253{ 252{
254 return (vfs_t *)sb->s_fs_info; 253 return (bhv_vfs_t *)sb->s_fs_info;
255} 254}
256 255
257void 256void
258vfs_deallocate( 257vfs_deallocate(
259 struct vfs *vfsp) 258 struct bhv_vfs *vfsp)
260{ 259{
261 bhv_head_destroy(VFS_BHVHEAD(vfsp)); 260 bhv_head_destroy(VFS_BHVHEAD(vfsp));
262 kmem_free(vfsp, sizeof(vfs_t)); 261 kmem_free(vfsp, sizeof(bhv_vfs_t));
263} 262}
264 263
265void 264void
266vfs_insertops( 265vfs_insertops(
267 struct vfs *vfsp, 266 struct bhv_vfs *vfsp,
268 struct bhv_vfsops *vfsops) 267 struct bhv_module_vfsops *vfsops)
269{ 268{
270 struct bhv_desc *bdp; 269 struct bhv_desc *bdp;
271 270
@@ -276,9 +275,9 @@ vfs_insertops(
276 275
277void 276void
278vfs_insertbhv( 277vfs_insertbhv(
279 struct vfs *vfsp, 278 struct bhv_vfs *vfsp,
280 struct bhv_desc *bdp, 279 struct bhv_desc *bdp,
281 struct vfsops *vfsops, 280 struct bhv_vfsops *vfsops,
282 void *mount) 281 void *mount)
283{ 282{
284 bhv_desc_init(bdp, mount, vfsp, vfsops); 283 bhv_desc_init(bdp, mount, vfsp, vfsops);
@@ -287,7 +286,7 @@ vfs_insertbhv(
287 286
288void 287void
289bhv_remove_vfsops( 288bhv_remove_vfsops(
290 struct vfs *vfsp, 289 struct bhv_vfs *vfsp,
291 int pos) 290 int pos)
292{ 291{
293 struct bhv_desc *bhv; 292 struct bhv_desc *bhv;
@@ -301,7 +300,7 @@ bhv_remove_vfsops(
301 300
302void 301void
303bhv_remove_all_vfsops( 302bhv_remove_all_vfsops(
304 struct vfs *vfsp, 303 struct bhv_vfs *vfsp,
305 int freebase) 304 int freebase)
306{ 305{
307 struct xfs_mount *mp; 306 struct xfs_mount *mp;
@@ -317,7 +316,7 @@ bhv_remove_all_vfsops(
317 316
318void 317void
319bhv_insert_all_vfsops( 318bhv_insert_all_vfsops(
320 struct vfs *vfsp) 319 struct bhv_vfs *vfsp)
321{ 320{
322 struct xfs_mount *mp; 321 struct xfs_mount *mp;
323 322
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index 841200c03092..91fc2c4b3353 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
@@ -21,42 +21,40 @@
21#include <linux/vfs.h> 21#include <linux/vfs.h>
22#include "xfs_fs.h" 22#include "xfs_fs.h"
23 23
24struct bhv_vfs;
25struct bhv_vnode;
26
24struct fid; 27struct fid;
25struct vfs;
26struct cred; 28struct cred;
27struct vnode;
28struct kstatfs;
29struct seq_file; 29struct seq_file;
30struct super_block; 30struct super_block;
31struct xfs_mount_args; 31struct xfs_mount_args;
32 32
33typedef struct kstatfs xfs_statfs_t; 33typedef struct kstatfs bhv_statvfs_t;
34 34
35typedef struct vfs_sync_work { 35typedef struct bhv_vfs_sync_work {
36 struct list_head w_list; 36 struct list_head w_list;
37 struct vfs *w_vfs; 37 struct bhv_vfs *w_vfs;
38 void *w_data; /* syncer routine argument */ 38 void *w_data; /* syncer routine argument */
39 void (*w_syncer)(struct vfs *, void *); 39 void (*w_syncer)(struct bhv_vfs *, void *);
40} vfs_sync_work_t; 40} bhv_vfs_sync_work_t;
41 41
42typedef struct vfs { 42typedef struct bhv_vfs {
43 u_int vfs_flag; /* flags */ 43 u_int vfs_flag; /* flags */
44 xfs_fsid_t vfs_fsid; /* file system ID */ 44 xfs_fsid_t vfs_fsid; /* file system ID */
45 xfs_fsid_t *vfs_altfsid; /* An ID fixed for life of FS */ 45 xfs_fsid_t *vfs_altfsid; /* An ID fixed for life of FS */
46 bhv_head_t vfs_bh; /* head of vfs behavior chain */ 46 bhv_head_t vfs_bh; /* head of vfs behavior chain */
47 struct super_block *vfs_super; /* generic superblock pointer */ 47 struct super_block *vfs_super; /* generic superblock pointer */
48 struct task_struct *vfs_sync_task; /* generalised sync thread */ 48 struct task_struct *vfs_sync_task; /* generalised sync thread */
49 vfs_sync_work_t vfs_sync_work; /* work item for VFS_SYNC */ 49 bhv_vfs_sync_work_t vfs_sync_work; /* work item for VFS_SYNC */
50 struct list_head vfs_sync_list; /* sync thread work item list */ 50 struct list_head vfs_sync_list; /* sync thread work item list */
51 spinlock_t vfs_sync_lock; /* work item list lock */ 51 spinlock_t vfs_sync_lock; /* work item list lock */
52 int vfs_sync_seq; /* sync thread generation no. */ 52 int vfs_sync_seq; /* sync thread generation no. */
53 wait_queue_head_t vfs_wait_single_sync_task; 53 wait_queue_head_t vfs_wait_single_sync_task;
54} vfs_t; 54} bhv_vfs_t;
55
56#define vfs_fbhv vfs_bh.bh_first /* 1st on vfs behavior chain */
57 55
58#define bhvtovfs(bdp) ( (struct vfs *)BHV_VOBJ(bdp) ) 56#define bhvtovfs(bdp) ( (struct bhv_vfs *)BHV_VOBJ(bdp) )
59#define bhvtovfsops(bdp) ( (struct vfsops *)BHV_OPS(bdp) ) 57#define bhvtovfsops(bdp) ( (struct bhv_vfsops *)BHV_OPS(bdp) )
60#define VFS_BHVHEAD(vfs) ( &(vfs)->vfs_bh ) 58#define VFS_BHVHEAD(vfs) ( &(vfs)->vfs_bh )
61#define VFS_REMOVEBHV(vfs, bdp) ( bhv_remove(VFS_BHVHEAD(vfs), bdp) ) 59#define VFS_REMOVEBHV(vfs, bdp) ( bhv_remove(VFS_BHVHEAD(vfs), bdp) )
62 60
@@ -71,7 +69,7 @@ typedef enum {
71 VFS_BHV_QM, /* quota manager */ 69 VFS_BHV_QM, /* quota manager */
72 VFS_BHV_IO, /* IO path */ 70 VFS_BHV_IO, /* IO path */
73 VFS_BHV_END /* housekeeping end-of-range */ 71 VFS_BHV_END /* housekeeping end-of-range */
74} vfs_bhv_t; 72} bhv_vfs_type_t;
75 73
76#define VFS_POSITION_XFS (BHV_POSITION_BASE) 74#define VFS_POSITION_XFS (BHV_POSITION_BASE)
77#define VFS_POSITION_DM (VFS_POSITION_BASE+10) 75#define VFS_POSITION_DM (VFS_POSITION_BASE+10)
@@ -81,8 +79,9 @@ typedef enum {
81#define VFS_RDONLY 0x0001 /* read-only vfs */ 79#define VFS_RDONLY 0x0001 /* read-only vfs */
82#define VFS_GRPID 0x0002 /* group-ID assigned from directory */ 80#define VFS_GRPID 0x0002 /* group-ID assigned from directory */
83#define VFS_DMI 0x0004 /* filesystem has the DMI enabled */ 81#define VFS_DMI 0x0004 /* filesystem has the DMI enabled */
84#define VFS_32BITINODES 0x0008 /* do not use inums above 32 bits */ 82#define VFS_UMOUNT 0x0008 /* unmount in progress */
85#define VFS_END 0x0008 /* max flag */ 83#define VFS_32BITINODES 0x0010 /* do not use inums above 32 bits */
84#define VFS_END 0x0010 /* max flag */
86 85
87#define SYNC_ATTR 0x0001 /* sync attributes */ 86#define SYNC_ATTR 0x0001 /* sync attributes */
88#define SYNC_CLOSE 0x0002 /* close file system down */ 87#define SYNC_CLOSE 0x0002 /* close file system down */
@@ -92,7 +91,14 @@ typedef enum {
92#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */ 91#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */
93#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ 92#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */
94#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ 93#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */
95#define SYNC_QUIESCE 0x0100 /* quiesce filesystem for a snapshot */ 94#define SYNC_QUIESCE 0x0100 /* quiesce fileystem for a snapshot */
95
96#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */
97#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */
98#define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */
99#define SHUTDOWN_CORRUPT_INCORE 0x0008 /* corrupt in-memory data structures */
100#define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */
101#define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */
96 102
97typedef int (*vfs_mount_t)(bhv_desc_t *, 103typedef int (*vfs_mount_t)(bhv_desc_t *,
98 struct xfs_mount_args *, struct cred *); 104 struct xfs_mount_args *, struct cred *);
@@ -102,18 +108,19 @@ typedef int (*vfs_showargs_t)(bhv_desc_t *, struct seq_file *);
102typedef int (*vfs_unmount_t)(bhv_desc_t *, int, struct cred *); 108typedef int (*vfs_unmount_t)(bhv_desc_t *, int, struct cred *);
103typedef int (*vfs_mntupdate_t)(bhv_desc_t *, int *, 109typedef int (*vfs_mntupdate_t)(bhv_desc_t *, int *,
104 struct xfs_mount_args *); 110 struct xfs_mount_args *);
105typedef int (*vfs_root_t)(bhv_desc_t *, struct vnode **); 111typedef int (*vfs_root_t)(bhv_desc_t *, struct bhv_vnode **);
106typedef int (*vfs_statvfs_t)(bhv_desc_t *, xfs_statfs_t *, struct vnode *); 112typedef int (*vfs_statvfs_t)(bhv_desc_t *, bhv_statvfs_t *,
113 struct bhv_vnode *);
107typedef int (*vfs_sync_t)(bhv_desc_t *, int, struct cred *); 114typedef int (*vfs_sync_t)(bhv_desc_t *, int, struct cred *);
108typedef int (*vfs_vget_t)(bhv_desc_t *, struct vnode **, struct fid *); 115typedef int (*vfs_vget_t)(bhv_desc_t *, struct bhv_vnode **, struct fid *);
109typedef int (*vfs_dmapiops_t)(bhv_desc_t *, caddr_t); 116typedef int (*vfs_dmapiops_t)(bhv_desc_t *, caddr_t);
110typedef int (*vfs_quotactl_t)(bhv_desc_t *, int, int, caddr_t); 117typedef int (*vfs_quotactl_t)(bhv_desc_t *, int, int, caddr_t);
111typedef void (*vfs_init_vnode_t)(bhv_desc_t *, 118typedef void (*vfs_init_vnode_t)(bhv_desc_t *,
112 struct vnode *, bhv_desc_t *, int); 119 struct bhv_vnode *, bhv_desc_t *, int);
113typedef void (*vfs_force_shutdown_t)(bhv_desc_t *, int, char *, int); 120typedef void (*vfs_force_shutdown_t)(bhv_desc_t *, int, char *, int);
114typedef void (*vfs_freeze_t)(bhv_desc_t *); 121typedef void (*vfs_freeze_t)(bhv_desc_t *);
115 122
116typedef struct vfsops { 123typedef struct bhv_vfsops {
117 bhv_position_t vf_position; /* behavior chain position */ 124 bhv_position_t vf_position; /* behavior chain position */
118 vfs_mount_t vfs_mount; /* mount file system */ 125 vfs_mount_t vfs_mount; /* mount file system */
119 vfs_parseargs_t vfs_parseargs; /* parse mount options */ 126 vfs_parseargs_t vfs_parseargs; /* parse mount options */
@@ -129,82 +136,82 @@ typedef struct vfsops {
129 vfs_init_vnode_t vfs_init_vnode; /* initialize a new vnode */ 136 vfs_init_vnode_t vfs_init_vnode; /* initialize a new vnode */
130 vfs_force_shutdown_t vfs_force_shutdown; /* crash and burn */ 137 vfs_force_shutdown_t vfs_force_shutdown; /* crash and burn */
131 vfs_freeze_t vfs_freeze; /* freeze fs for snapshot */ 138 vfs_freeze_t vfs_freeze; /* freeze fs for snapshot */
132} vfsops_t; 139} bhv_vfsops_t;
133 140
134/* 141/*
135 * VFS's. Operates on vfs structure pointers (starts at bhv head). 142 * Virtual filesystem operations, operating from head bhv.
136 */ 143 */
137#define VHEAD(v) ((v)->vfs_fbhv) 144#define VFSHEAD(v) ((v)->vfs_bh.bh_first)
138#define VFS_MOUNT(v, ma,cr, rv) ((rv) = vfs_mount(VHEAD(v), ma,cr)) 145#define bhv_vfs_mount(v, ma,cr) vfs_mount(VFSHEAD(v), ma,cr)
139#define VFS_PARSEARGS(v, o,ma,f, rv) ((rv) = vfs_parseargs(VHEAD(v), o,ma,f)) 146#define bhv_vfs_parseargs(v, o,ma,f) vfs_parseargs(VFSHEAD(v), o,ma,f)
140#define VFS_SHOWARGS(v, m, rv) ((rv) = vfs_showargs(VHEAD(v), m)) 147#define bhv_vfs_showargs(v, m) vfs_showargs(VFSHEAD(v), m)
141#define VFS_UNMOUNT(v, f, cr, rv) ((rv) = vfs_unmount(VHEAD(v), f,cr)) 148#define bhv_vfs_unmount(v, f,cr) vfs_unmount(VFSHEAD(v), f,cr)
142#define VFS_MNTUPDATE(v, fl, args, rv) ((rv) = vfs_mntupdate(VHEAD(v), fl, args)) 149#define bhv_vfs_mntupdate(v, fl,args) vfs_mntupdate(VFSHEAD(v), fl,args)
143#define VFS_ROOT(v, vpp, rv) ((rv) = vfs_root(VHEAD(v), vpp)) 150#define bhv_vfs_root(v, vpp) vfs_root(VFSHEAD(v), vpp)
144#define VFS_STATVFS(v, sp,vp, rv) ((rv) = vfs_statvfs(VHEAD(v), sp,vp)) 151#define bhv_vfs_statvfs(v, sp,vp) vfs_statvfs(VFSHEAD(v), sp,vp)
145#define VFS_SYNC(v, flag,cr, rv) ((rv) = vfs_sync(VHEAD(v), flag,cr)) 152#define bhv_vfs_sync(v, flag,cr) vfs_sync(VFSHEAD(v), flag,cr)
146#define VFS_VGET(v, vpp,fidp, rv) ((rv) = vfs_vget(VHEAD(v), vpp,fidp)) 153#define bhv_vfs_vget(v, vpp,fidp) vfs_vget(VFSHEAD(v), vpp,fidp)
147#define VFS_DMAPIOPS(v, p, rv) ((rv) = vfs_dmapiops(VHEAD(v), p)) 154#define bhv_vfs_dmapiops(v, p) vfs_dmapiops(VFSHEAD(v), p)
148#define VFS_QUOTACTL(v, c,id,p, rv) ((rv) = vfs_quotactl(VHEAD(v), c,id,p)) 155#define bhv_vfs_quotactl(v, c,id,p) vfs_quotactl(VFSHEAD(v), c,id,p)
149#define VFS_INIT_VNODE(v, vp,b,ul) ( vfs_init_vnode(VHEAD(v), vp,b,ul) ) 156#define bhv_vfs_init_vnode(v, vp,b,ul) vfs_init_vnode(VFSHEAD(v), vp,b,ul)
150#define VFS_FORCE_SHUTDOWN(v, fl,f,l) ( vfs_force_shutdown(VHEAD(v), fl,f,l) ) 157#define bhv_vfs_force_shutdown(v,u,f,l) vfs_force_shutdown(VFSHEAD(v), u,f,l)
151#define VFS_FREEZE(v) ( vfs_freeze(VHEAD(v)) ) 158#define bhv_vfs_freeze(v) vfs_freeze(VFSHEAD(v))
152 159
153/* 160/*
154 * PVFS's. Operates on behavior descriptor pointers. 161 * Virtual filesystem operations, operating from next bhv.
155 */ 162 */
156#define PVFS_MOUNT(b, ma,cr, rv) ((rv) = vfs_mount(b, ma,cr)) 163#define bhv_next_vfs_mount(b, ma,cr) vfs_mount(b, ma,cr)
157#define PVFS_PARSEARGS(b, o,ma,f, rv) ((rv) = vfs_parseargs(b, o,ma,f)) 164#define bhv_next_vfs_parseargs(b, o,ma,f) vfs_parseargs(b, o,ma,f)
158#define PVFS_SHOWARGS(b, m, rv) ((rv) = vfs_showargs(b, m)) 165#define bhv_next_vfs_showargs(b, m) vfs_showargs(b, m)
159#define PVFS_UNMOUNT(b, f,cr, rv) ((rv) = vfs_unmount(b, f,cr)) 166#define bhv_next_vfs_unmount(b, f,cr) vfs_unmount(b, f,cr)
160#define PVFS_MNTUPDATE(b, fl, args, rv) ((rv) = vfs_mntupdate(b, fl, args)) 167#define bhv_next_vfs_mntupdate(b, fl,args) vfs_mntupdate(b, fl, args)
161#define PVFS_ROOT(b, vpp, rv) ((rv) = vfs_root(b, vpp)) 168#define bhv_next_vfs_root(b, vpp) vfs_root(b, vpp)
162#define PVFS_STATVFS(b, sp,vp, rv) ((rv) = vfs_statvfs(b, sp,vp)) 169#define bhv_next_vfs_statvfs(b, sp,vp) vfs_statvfs(b, sp,vp)
163#define PVFS_SYNC(b, flag,cr, rv) ((rv) = vfs_sync(b, flag,cr)) 170#define bhv_next_vfs_sync(b, flag,cr) vfs_sync(b, flag,cr)
164#define PVFS_VGET(b, vpp,fidp, rv) ((rv) = vfs_vget(b, vpp,fidp)) 171#define bhv_next_vfs_vget(b, vpp,fidp) vfs_vget(b, vpp,fidp)
165#define PVFS_DMAPIOPS(b, p, rv) ((rv) = vfs_dmapiops(b, p)) 172#define bhv_next_vfs_dmapiops(b, p) vfs_dmapiops(b, p)
166#define PVFS_QUOTACTL(b, c,id,p, rv) ((rv) = vfs_quotactl(b, c,id,p)) 173#define bhv_next_vfs_quotactl(b, c,id,p) vfs_quotactl(b, c,id,p)
167#define PVFS_INIT_VNODE(b, vp,b2,ul) ( vfs_init_vnode(b, vp,b2,ul) ) 174#define bhv_next_vfs_init_vnode(b, vp,b2,ul) vfs_init_vnode(b, vp,b2,ul)
168#define PVFS_FORCE_SHUTDOWN(b, fl,f,l) ( vfs_force_shutdown(b, fl,f,l) ) 175#define bhv_next_force_shutdown(b, fl,f,l) vfs_force_shutdown(b, fl,f,l)
169#define PVFS_FREEZE(b) ( vfs_freeze(b) ) 176#define bhv_next_vfs_freeze(b) vfs_freeze(b)
170 177
171extern int vfs_mount(bhv_desc_t *, struct xfs_mount_args *, struct cred *); 178extern int vfs_mount(bhv_desc_t *, struct xfs_mount_args *, struct cred *);
172extern int vfs_parseargs(bhv_desc_t *, char *, struct xfs_mount_args *, int); 179extern int vfs_parseargs(bhv_desc_t *, char *, struct xfs_mount_args *, int);
173extern int vfs_showargs(bhv_desc_t *, struct seq_file *); 180extern int vfs_showargs(bhv_desc_t *, struct seq_file *);
174extern int vfs_unmount(bhv_desc_t *, int, struct cred *); 181extern int vfs_unmount(bhv_desc_t *, int, struct cred *);
175extern int vfs_mntupdate(bhv_desc_t *, int *, struct xfs_mount_args *); 182extern int vfs_mntupdate(bhv_desc_t *, int *, struct xfs_mount_args *);
176extern int vfs_root(bhv_desc_t *, struct vnode **); 183extern int vfs_root(bhv_desc_t *, struct bhv_vnode **);
177extern int vfs_statvfs(bhv_desc_t *, xfs_statfs_t *, struct vnode *); 184extern int vfs_statvfs(bhv_desc_t *, bhv_statvfs_t *, struct bhv_vnode *);
178extern int vfs_sync(bhv_desc_t *, int, struct cred *); 185extern int vfs_sync(bhv_desc_t *, int, struct cred *);
179extern int vfs_vget(bhv_desc_t *, struct vnode **, struct fid *); 186extern int vfs_vget(bhv_desc_t *, struct bhv_vnode **, struct fid *);
180extern int vfs_dmapiops(bhv_desc_t *, caddr_t); 187extern int vfs_dmapiops(bhv_desc_t *, caddr_t);
181extern int vfs_quotactl(bhv_desc_t *, int, int, caddr_t); 188extern int vfs_quotactl(bhv_desc_t *, int, int, caddr_t);
182extern void vfs_init_vnode(bhv_desc_t *, struct vnode *, bhv_desc_t *, int); 189extern void vfs_init_vnode(bhv_desc_t *, struct bhv_vnode *, bhv_desc_t *, int);
183extern void vfs_force_shutdown(bhv_desc_t *, int, char *, int); 190extern void vfs_force_shutdown(bhv_desc_t *, int, char *, int);
184extern void vfs_freeze(bhv_desc_t *); 191extern void vfs_freeze(bhv_desc_t *);
185 192
186typedef struct bhv_vfsops { 193#define vfs_test_for_freeze(vfs) ((vfs)->vfs_super->s_frozen)
187 struct vfsops bhv_common; 194#define vfs_wait_for_freeze(vfs,l) vfs_check_frozen((vfs)->vfs_super, (l))
195
196typedef struct bhv_module_vfsops {
197 struct bhv_vfsops bhv_common;
188 void * bhv_custom; 198 void * bhv_custom;
189} bhv_vfsops_t; 199} bhv_module_vfsops_t;
190 200
191#define vfs_bhv_lookup(v, id) ( bhv_lookup_range(&(v)->vfs_bh, (id), (id)) ) 201#define vfs_bhv_lookup(v, id) (bhv_lookup_range(&(v)->vfs_bh, (id), (id)))
192#define vfs_bhv_custom(b) ( ((bhv_vfsops_t *)BHV_OPS(b))->bhv_custom ) 202#define vfs_bhv_custom(b) (((bhv_module_vfsops_t*)BHV_OPS(b))->bhv_custom)
193#define vfs_bhv_set_custom(b,o) ( (b)->bhv_custom = (void *)(o)) 203#define vfs_bhv_set_custom(b,o) ((b)->bhv_custom = (void *)(o))
194#define vfs_bhv_clr_custom(b) ( (b)->bhv_custom = NULL ) 204#define vfs_bhv_clr_custom(b) ((b)->bhv_custom = NULL)
195 205
196extern vfs_t *vfs_allocate(struct super_block *); 206extern bhv_vfs_t *vfs_allocate(struct super_block *);
197extern vfs_t *vfs_from_sb(struct super_block *); 207extern bhv_vfs_t *vfs_from_sb(struct super_block *);
198extern void vfs_deallocate(vfs_t *); 208extern void vfs_deallocate(bhv_vfs_t *);
199extern void vfs_insertops(vfs_t *, bhv_vfsops_t *); 209extern void vfs_insertbhv(bhv_vfs_t *, bhv_desc_t *, bhv_vfsops_t *, void *);
200extern void vfs_insertbhv(vfs_t *, bhv_desc_t *, vfsops_t *, void *);
201 210
202extern void bhv_insert_all_vfsops(struct vfs *); 211extern void vfs_insertops(bhv_vfs_t *, bhv_module_vfsops_t *);
203extern void bhv_remove_all_vfsops(struct vfs *, int);
204extern void bhv_remove_vfsops(struct vfs *, int);
205 212
206#define fs_frozen(vfsp) ((vfsp)->vfs_super->s_frozen) 213extern void bhv_insert_all_vfsops(struct bhv_vfs *);
207#define fs_check_frozen(vfsp, level) \ 214extern void bhv_remove_all_vfsops(struct bhv_vfs *, int);
208 vfs_check_frozen(vfsp->vfs_super, level); 215extern void bhv_remove_vfsops(struct bhv_vfs *, int);
209 216
210#endif /* __XFS_VFS_H__ */ 217#endif /* __XFS_VFS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index d27c25b27ccd..6628d96b6fd6 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -39,7 +39,7 @@ vn_init(void)
39 39
40void 40void
41vn_iowait( 41vn_iowait(
42 struct vnode *vp) 42 bhv_vnode_t *vp)
43{ 43{
44 wait_queue_head_t *wq = vptosync(vp); 44 wait_queue_head_t *wq = vptosync(vp);
45 45
@@ -48,17 +48,33 @@ vn_iowait(
48 48
49void 49void
50vn_iowake( 50vn_iowake(
51 struct vnode *vp) 51 bhv_vnode_t *vp)
52{ 52{
53 if (atomic_dec_and_test(&vp->v_iocount)) 53 if (atomic_dec_and_test(&vp->v_iocount))
54 wake_up(vptosync(vp)); 54 wake_up(vptosync(vp));
55} 55}
56 56
57struct vnode * 57/*
58 * Volume managers supporting multiple paths can send back ENODEV when the
59 * final path disappears. In this case continuing to fill the page cache
60 * with dirty data which cannot be written out is evil, so prevent that.
61 */
62void
63vn_ioerror(
64 bhv_vnode_t *vp,
65 int error,
66 char *f,
67 int l)
68{
69 if (unlikely(error == -ENODEV))
70 bhv_vfs_force_shutdown(vp->v_vfsp, SHUTDOWN_DEVICE_REQ, f, l);
71}
72
73bhv_vnode_t *
58vn_initialize( 74vn_initialize(
59 struct inode *inode) 75 struct inode *inode)
60{ 76{
61 struct vnode *vp = vn_from_inode(inode); 77 bhv_vnode_t *vp = vn_from_inode(inode);
62 78
63 XFS_STATS_INC(vn_active); 79 XFS_STATS_INC(vn_active);
64 XFS_STATS_INC(vn_alloc); 80 XFS_STATS_INC(vn_alloc);
@@ -94,8 +110,8 @@ vn_initialize(
94 */ 110 */
95void 111void
96vn_revalidate_core( 112vn_revalidate_core(
97 struct vnode *vp, 113 bhv_vnode_t *vp,
98 vattr_t *vap) 114 bhv_vattr_t *vap)
99{ 115{
100 struct inode *inode = vn_to_inode(vp); 116 struct inode *inode = vn_to_inode(vp);
101 117
@@ -130,14 +146,14 @@ vn_revalidate_core(
130 */ 146 */
131int 147int
132__vn_revalidate( 148__vn_revalidate(
133 struct vnode *vp, 149 bhv_vnode_t *vp,
134 struct vattr *vattr) 150 bhv_vattr_t *vattr)
135{ 151{
136 int error; 152 int error;
137 153
138 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 154 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
139 vattr->va_mask = XFS_AT_STAT | XFS_AT_XFLAGS; 155 vattr->va_mask = XFS_AT_STAT | XFS_AT_XFLAGS;
140 VOP_GETATTR(vp, vattr, 0, NULL, error); 156 error = bhv_vop_getattr(vp, vattr, 0, NULL);
141 if (likely(!error)) { 157 if (likely(!error)) {
142 vn_revalidate_core(vp, vattr); 158 vn_revalidate_core(vp, vattr);
143 VUNMODIFY(vp); 159 VUNMODIFY(vp);
@@ -147,9 +163,9 @@ __vn_revalidate(
147 163
148int 164int
149vn_revalidate( 165vn_revalidate(
150 struct vnode *vp) 166 bhv_vnode_t *vp)
151{ 167{
152 vattr_t vattr; 168 bhv_vattr_t vattr;
153 169
154 return __vn_revalidate(vp, &vattr); 170 return __vn_revalidate(vp, &vattr);
155} 171}
@@ -157,9 +173,9 @@ vn_revalidate(
157/* 173/*
158 * Add a reference to a referenced vnode. 174 * Add a reference to a referenced vnode.
159 */ 175 */
160struct vnode * 176bhv_vnode_t *
161vn_hold( 177vn_hold(
162 struct vnode *vp) 178 bhv_vnode_t *vp)
163{ 179{
164 struct inode *inode; 180 struct inode *inode;
165 181
@@ -192,31 +208,31 @@ vn_hold(
192 * Vnode tracing code. 208 * Vnode tracing code.
193 */ 209 */
194void 210void
195vn_trace_entry(vnode_t *vp, const char *func, inst_t *ra) 211vn_trace_entry(bhv_vnode_t *vp, const char *func, inst_t *ra)
196{ 212{
197 KTRACE_ENTER(vp, VNODE_KTRACE_ENTRY, func, 0, ra); 213 KTRACE_ENTER(vp, VNODE_KTRACE_ENTRY, func, 0, ra);
198} 214}
199 215
200void 216void
201vn_trace_exit(vnode_t *vp, const char *func, inst_t *ra) 217vn_trace_exit(bhv_vnode_t *vp, const char *func, inst_t *ra)
202{ 218{
203 KTRACE_ENTER(vp, VNODE_KTRACE_EXIT, func, 0, ra); 219 KTRACE_ENTER(vp, VNODE_KTRACE_EXIT, func, 0, ra);
204} 220}
205 221
206void 222void
207vn_trace_hold(vnode_t *vp, char *file, int line, inst_t *ra) 223vn_trace_hold(bhv_vnode_t *vp, char *file, int line, inst_t *ra)
208{ 224{
209 KTRACE_ENTER(vp, VNODE_KTRACE_HOLD, file, line, ra); 225 KTRACE_ENTER(vp, VNODE_KTRACE_HOLD, file, line, ra);
210} 226}
211 227
212void 228void
213vn_trace_ref(vnode_t *vp, char *file, int line, inst_t *ra) 229vn_trace_ref(bhv_vnode_t *vp, char *file, int line, inst_t *ra)
214{ 230{
215 KTRACE_ENTER(vp, VNODE_KTRACE_REF, file, line, ra); 231 KTRACE_ENTER(vp, VNODE_KTRACE_REF, file, line, ra);
216} 232}
217 233
218void 234void
219vn_trace_rele(vnode_t *vp, char *file, int line, inst_t *ra) 235vn_trace_rele(bhv_vnode_t *vp, char *file, int line, inst_t *ra)
220{ 236{
221 KTRACE_ENTER(vp, VNODE_KTRACE_RELE, file, line, ra); 237 KTRACE_ENTER(vp, VNODE_KTRACE_RELE, file, line, ra);
222} 238}
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 2a8e16c22353..c42b3221b20c 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -14,57 +14,35 @@
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 *
18 * Portions Copyright (c) 1989, 1993
19 * The Regents of the University of California. All rights reserved.
20 *
21 * Redistribution and use in source and binary forms, with or without
22 * modification, are permitted provided that the following conditions
23 * are met:
24 * 1. Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * 2. Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in the
28 * documentation and/or other materials provided with the distribution.
29 * 3. Neither the name of the University nor the names of its contributors
30 * may be used to endorse or promote products derived from this software
31 * without specific prior written permission.
32 *
33 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
34 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
37 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
41 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
42 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43 * SUCH DAMAGE.
44 */ 17 */
45#ifndef __XFS_VNODE_H__ 18#ifndef __XFS_VNODE_H__
46#define __XFS_VNODE_H__ 19#define __XFS_VNODE_H__
47 20
48struct uio; 21struct uio;
49struct file; 22struct file;
50struct vattr; 23struct bhv_vfs;
24struct bhv_vattr;
51struct xfs_iomap; 25struct xfs_iomap;
52struct attrlist_cursor_kern; 26struct attrlist_cursor_kern;
53 27
28typedef struct dentry bhv_vname_t;
29typedef __u64 bhv_vnumber_t;
54 30
55typedef xfs_ino_t vnumber_t; 31typedef enum bhv_vflags {
56typedef struct dentry vname_t; 32 VMODIFIED = 0x08, /* XFS inode state possibly differs */
57typedef bhv_head_t vn_bhv_head_t; 33 /* to the Linux inode state. */
34 VTRUNCATED = 0x40, /* truncated down so flush-on-close */
35} bhv_vflags_t;
58 36
59/* 37/*
60 * MP locking protocols: 38 * MP locking protocols:
61 * v_flag, v_vfsp VN_LOCK/VN_UNLOCK 39 * v_flag, v_vfsp VN_LOCK/VN_UNLOCK
62 */ 40 */
63typedef struct vnode { 41typedef struct bhv_vnode {
64 __u32 v_flag; /* vnode flags (see below) */ 42 bhv_vflags_t v_flag; /* vnode flags (see above) */
65 struct vfs *v_vfsp; /* ptr to containing VFS */ 43 bhv_vfs_t *v_vfsp; /* ptr to containing VFS */
66 vnumber_t v_number; /* in-core vnode number */ 44 bhv_vnumber_t v_number; /* in-core vnode number */
67 vn_bhv_head_t v_bh; /* behavior head */ 45 bhv_head_t v_bh; /* behavior head */
68 spinlock_t v_lock; /* VN_LOCK/VN_UNLOCK */ 46 spinlock_t v_lock; /* VN_LOCK/VN_UNLOCK */
69 atomic_t v_iocount; /* outstanding I/O count */ 47 atomic_t v_iocount; /* outstanding I/O count */
70#ifdef XFS_VNODE_TRACE 48#ifdef XFS_VNODE_TRACE
@@ -72,7 +50,7 @@ typedef struct vnode {
72#endif 50#endif
73 struct inode v_inode; /* Linux inode */ 51 struct inode v_inode; /* Linux inode */
74 /* inode MUST be last */ 52 /* inode MUST be last */
75} vnode_t; 53} bhv_vnode_t;
76 54
77#define VN_ISLNK(vp) S_ISLNK((vp)->v_inode.i_mode) 55#define VN_ISLNK(vp) S_ISLNK((vp)->v_inode.i_mode)
78#define VN_ISREG(vp) S_ISREG((vp)->v_inode.i_mode) 56#define VN_ISREG(vp) S_ISREG((vp)->v_inode.i_mode)
@@ -80,9 +58,6 @@ typedef struct vnode {
80#define VN_ISCHR(vp) S_ISCHR((vp)->v_inode.i_mode) 58#define VN_ISCHR(vp) S_ISCHR((vp)->v_inode.i_mode)
81#define VN_ISBLK(vp) S_ISBLK((vp)->v_inode.i_mode) 59#define VN_ISBLK(vp) S_ISBLK((vp)->v_inode.i_mode)
82 60
83#define v_fbhv v_bh.bh_first /* first behavior */
84#define v_fops v_bh.bh_first->bd_ops /* first behavior ops */
85
86#define VNODE_POSITION_BASE BHV_POSITION_BASE /* chain bottom */ 61#define VNODE_POSITION_BASE BHV_POSITION_BASE /* chain bottom */
87#define VNODE_POSITION_TOP BHV_POSITION_TOP /* chain top */ 62#define VNODE_POSITION_TOP BHV_POSITION_TOP /* chain top */
88#define VNODE_POSITION_INVALID BHV_POSITION_INVALID /* invalid pos. num */ 63#define VNODE_POSITION_INVALID BHV_POSITION_INVALID /* invalid pos. num */
@@ -104,8 +79,8 @@ typedef enum {
104/* 79/*
105 * Macros for dealing with the behavior descriptor inside of the vnode. 80 * Macros for dealing with the behavior descriptor inside of the vnode.
106 */ 81 */
107#define BHV_TO_VNODE(bdp) ((vnode_t *)BHV_VOBJ(bdp)) 82#define BHV_TO_VNODE(bdp) ((bhv_vnode_t *)BHV_VOBJ(bdp))
108#define BHV_TO_VNODE_NULL(bdp) ((vnode_t *)BHV_VOBJNULL(bdp)) 83#define BHV_TO_VNODE_NULL(bdp) ((bhv_vnode_t *)BHV_VOBJNULL(bdp))
109 84
110#define VN_BHV_HEAD(vp) ((bhv_head_t *)(&((vp)->v_bh))) 85#define VN_BHV_HEAD(vp) ((bhv_head_t *)(&((vp)->v_bh)))
111#define vn_bhv_head_init(bhp,name) bhv_head_init(bhp,name) 86#define vn_bhv_head_init(bhp,name) bhv_head_init(bhp,name)
@@ -116,35 +91,29 @@ typedef enum {
116/* 91/*
117 * Vnode to Linux inode mapping. 92 * Vnode to Linux inode mapping.
118 */ 93 */
119static inline struct vnode *vn_from_inode(struct inode *inode) 94static inline struct bhv_vnode *vn_from_inode(struct inode *inode)
120{ 95{
121 return (vnode_t *)list_entry(inode, vnode_t, v_inode); 96 return container_of(inode, bhv_vnode_t, v_inode);
122} 97}
123static inline struct inode *vn_to_inode(struct vnode *vnode) 98static inline struct inode *vn_to_inode(struct bhv_vnode *vnode)
124{ 99{
125 return &vnode->v_inode; 100 return &vnode->v_inode;
126} 101}
127 102
128/* 103/*
129 * Vnode flags. 104 * Values for the vop_rwlock/rwunlock flags parameter.
130 */
131#define VMODIFIED 0x8 /* XFS inode state possibly differs */
132 /* to the Linux inode state. */
133
134/*
135 * Values for the VOP_RWLOCK and VOP_RWUNLOCK flags parameter.
136 */ 105 */
137typedef enum vrwlock { 106typedef enum bhv_vrwlock {
138 VRWLOCK_NONE, 107 VRWLOCK_NONE,
139 VRWLOCK_READ, 108 VRWLOCK_READ,
140 VRWLOCK_WRITE, 109 VRWLOCK_WRITE,
141 VRWLOCK_WRITE_DIRECT, 110 VRWLOCK_WRITE_DIRECT,
142 VRWLOCK_TRY_READ, 111 VRWLOCK_TRY_READ,
143 VRWLOCK_TRY_WRITE 112 VRWLOCK_TRY_WRITE
144} vrwlock_t; 113} bhv_vrwlock_t;
145 114
146/* 115/*
147 * Return values for VOP_INACTIVE. A return value of 116 * Return values for bhv_vop_inactive. A return value of
148 * VN_INACTIVE_NOCACHE implies that the file system behavior 117 * VN_INACTIVE_NOCACHE implies that the file system behavior
149 * has disassociated its state and bhv_desc_t from the vnode. 118 * has disassociated its state and bhv_desc_t from the vnode.
150 */ 119 */
@@ -152,18 +121,20 @@ typedef enum vrwlock {
152#define VN_INACTIVE_NOCACHE 1 121#define VN_INACTIVE_NOCACHE 1
153 122
154/* 123/*
155 * Values for the cmd code given to VOP_VNODE_CHANGE. 124 * Values for the cmd code given to vop_vnode_change.
156 */ 125 */
157typedef enum vchange { 126typedef enum bhv_vchange {
158 VCHANGE_FLAGS_FRLOCKS = 0, 127 VCHANGE_FLAGS_FRLOCKS = 0,
159 VCHANGE_FLAGS_ENF_LOCKING = 1, 128 VCHANGE_FLAGS_ENF_LOCKING = 1,
160 VCHANGE_FLAGS_TRUNCATED = 2, 129 VCHANGE_FLAGS_TRUNCATED = 2,
161 VCHANGE_FLAGS_PAGE_DIRTY = 3, 130 VCHANGE_FLAGS_PAGE_DIRTY = 3,
162 VCHANGE_FLAGS_IOEXCL_COUNT = 4 131 VCHANGE_FLAGS_IOEXCL_COUNT = 4
163} vchange_t; 132} bhv_vchange_t;
164 133
134typedef enum { L_FALSE, L_TRUE } lastclose_t;
165 135
166typedef int (*vop_open_t)(bhv_desc_t *, struct cred *); 136typedef int (*vop_open_t)(bhv_desc_t *, struct cred *);
137typedef int (*vop_close_t)(bhv_desc_t *, int, lastclose_t, struct cred *);
167typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *, 138typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
168 const struct iovec *, unsigned int, 139 const struct iovec *, unsigned int,
169 loff_t *, int, struct cred *); 140 loff_t *, int, struct cred *);
@@ -181,27 +152,27 @@ typedef ssize_t (*vop_splice_write_t)(bhv_desc_t *, struct pipe_inode_info *,
181 struct cred *); 152 struct cred *);
182typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *, 153typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *,
183 int, unsigned int, void __user *); 154 int, unsigned int, void __user *);
184typedef int (*vop_getattr_t)(bhv_desc_t *, struct vattr *, int, 155typedef int (*vop_getattr_t)(bhv_desc_t *, struct bhv_vattr *, int,
185 struct cred *); 156 struct cred *);
186typedef int (*vop_setattr_t)(bhv_desc_t *, struct vattr *, int, 157typedef int (*vop_setattr_t)(bhv_desc_t *, struct bhv_vattr *, int,
187 struct cred *); 158 struct cred *);
188typedef int (*vop_access_t)(bhv_desc_t *, int, struct cred *); 159typedef int (*vop_access_t)(bhv_desc_t *, int, struct cred *);
189typedef int (*vop_lookup_t)(bhv_desc_t *, vname_t *, vnode_t **, 160typedef int (*vop_lookup_t)(bhv_desc_t *, bhv_vname_t *, bhv_vnode_t **,
190 int, vnode_t *, struct cred *); 161 int, bhv_vnode_t *, struct cred *);
191typedef int (*vop_create_t)(bhv_desc_t *, vname_t *, struct vattr *, 162typedef int (*vop_create_t)(bhv_desc_t *, bhv_vname_t *, struct bhv_vattr *,
192 vnode_t **, struct cred *); 163 bhv_vnode_t **, struct cred *);
193typedef int (*vop_remove_t)(bhv_desc_t *, vname_t *, struct cred *); 164typedef int (*vop_remove_t)(bhv_desc_t *, bhv_vname_t *, struct cred *);
194typedef int (*vop_link_t)(bhv_desc_t *, vnode_t *, vname_t *, 165typedef int (*vop_link_t)(bhv_desc_t *, bhv_vnode_t *, bhv_vname_t *,
195 struct cred *);
196typedef int (*vop_rename_t)(bhv_desc_t *, vname_t *, vnode_t *, vname_t *,
197 struct cred *); 166 struct cred *);
198typedef int (*vop_mkdir_t)(bhv_desc_t *, vname_t *, struct vattr *, 167typedef int (*vop_rename_t)(bhv_desc_t *, bhv_vname_t *, bhv_vnode_t *,
199 vnode_t **, struct cred *); 168 bhv_vname_t *, struct cred *);
200typedef int (*vop_rmdir_t)(bhv_desc_t *, vname_t *, struct cred *); 169typedef int (*vop_mkdir_t)(bhv_desc_t *, bhv_vname_t *, struct bhv_vattr *,
170 bhv_vnode_t **, struct cred *);
171typedef int (*vop_rmdir_t)(bhv_desc_t *, bhv_vname_t *, struct cred *);
201typedef int (*vop_readdir_t)(bhv_desc_t *, struct uio *, struct cred *, 172typedef int (*vop_readdir_t)(bhv_desc_t *, struct uio *, struct cred *,
202 int *); 173 int *);
203typedef int (*vop_symlink_t)(bhv_desc_t *, vname_t *, struct vattr *, 174typedef int (*vop_symlink_t)(bhv_desc_t *, bhv_vname_t *, struct bhv_vattr*,
204 char *, vnode_t **, struct cred *); 175 char *, bhv_vnode_t **, struct cred *);
205typedef int (*vop_readlink_t)(bhv_desc_t *, struct uio *, int, 176typedef int (*vop_readlink_t)(bhv_desc_t *, struct uio *, int,
206 struct cred *); 177 struct cred *);
207typedef int (*vop_fsync_t)(bhv_desc_t *, int, struct cred *, 178typedef int (*vop_fsync_t)(bhv_desc_t *, int, struct cred *,
@@ -209,8 +180,8 @@ typedef int (*vop_fsync_t)(bhv_desc_t *, int, struct cred *,
209typedef int (*vop_inactive_t)(bhv_desc_t *, struct cred *); 180typedef int (*vop_inactive_t)(bhv_desc_t *, struct cred *);
210typedef int (*vop_fid2_t)(bhv_desc_t *, struct fid *); 181typedef int (*vop_fid2_t)(bhv_desc_t *, struct fid *);
211typedef int (*vop_release_t)(bhv_desc_t *); 182typedef int (*vop_release_t)(bhv_desc_t *);
212typedef int (*vop_rwlock_t)(bhv_desc_t *, vrwlock_t); 183typedef int (*vop_rwlock_t)(bhv_desc_t *, bhv_vrwlock_t);
213typedef void (*vop_rwunlock_t)(bhv_desc_t *, vrwlock_t); 184typedef void (*vop_rwunlock_t)(bhv_desc_t *, bhv_vrwlock_t);
214typedef int (*vop_bmap_t)(bhv_desc_t *, xfs_off_t, ssize_t, int, 185typedef int (*vop_bmap_t)(bhv_desc_t *, xfs_off_t, ssize_t, int,
215 struct xfs_iomap *, int *); 186 struct xfs_iomap *, int *);
216typedef int (*vop_reclaim_t)(bhv_desc_t *); 187typedef int (*vop_reclaim_t)(bhv_desc_t *);
@@ -222,8 +193,8 @@ typedef int (*vop_attr_remove_t)(bhv_desc_t *, const char *,
222 int, struct cred *); 193 int, struct cred *);
223typedef int (*vop_attr_list_t)(bhv_desc_t *, char *, int, int, 194typedef int (*vop_attr_list_t)(bhv_desc_t *, char *, int, int,
224 struct attrlist_cursor_kern *, struct cred *); 195 struct attrlist_cursor_kern *, struct cred *);
225typedef void (*vop_link_removed_t)(bhv_desc_t *, vnode_t *, int); 196typedef void (*vop_link_removed_t)(bhv_desc_t *, bhv_vnode_t *, int);
226typedef void (*vop_vnode_change_t)(bhv_desc_t *, vchange_t, __psint_t); 197typedef void (*vop_vnode_change_t)(bhv_desc_t *, bhv_vchange_t, __psint_t);
227typedef void (*vop_ptossvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int); 198typedef void (*vop_ptossvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
228typedef void (*vop_pflushinvalvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int); 199typedef void (*vop_pflushinvalvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
229typedef int (*vop_pflushvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, 200typedef int (*vop_pflushvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t,
@@ -231,9 +202,10 @@ typedef int (*vop_pflushvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t,
231typedef int (*vop_iflush_t)(bhv_desc_t *, int); 202typedef int (*vop_iflush_t)(bhv_desc_t *, int);
232 203
233 204
234typedef struct vnodeops { 205typedef struct bhv_vnodeops {
235 bhv_position_t vn_position; /* position within behavior chain */ 206 bhv_position_t vn_position; /* position within behavior chain */
236 vop_open_t vop_open; 207 vop_open_t vop_open;
208 vop_close_t vop_close;
237 vop_read_t vop_read; 209 vop_read_t vop_read;
238 vop_write_t vop_write; 210 vop_write_t vop_write;
239 vop_sendfile_t vop_sendfile; 211 vop_sendfile_t vop_sendfile;
@@ -271,103 +243,80 @@ typedef struct vnodeops {
271 vop_pflushvp_t vop_flush_pages; 243 vop_pflushvp_t vop_flush_pages;
272 vop_release_t vop_release; 244 vop_release_t vop_release;
273 vop_iflush_t vop_iflush; 245 vop_iflush_t vop_iflush;
274} vnodeops_t; 246} bhv_vnodeops_t;
275 247
276/* 248/*
277 * VOP's. 249 * Virtual node operations, operating from head bhv.
278 */
279#define _VOP_(op, vp) (*((vnodeops_t *)(vp)->v_fops)->op)
280
281#define VOP_READ(vp,file,iov,segs,offset,ioflags,cr,rv) \
282 rv = _VOP_(vop_read, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
283#define VOP_WRITE(vp,file,iov,segs,offset,ioflags,cr,rv) \
284 rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
285#define VOP_SENDFILE(vp,f,off,ioflags,cnt,act,targ,cr,rv) \
286 rv = _VOP_(vop_sendfile, vp)((vp)->v_fbhv,f,off,ioflags,cnt,act,targ,cr)
287#define VOP_SPLICE_READ(vp,f,o,pipe,cnt,fl,iofl,cr,rv) \
288 rv = _VOP_(vop_splice_read, vp)((vp)->v_fbhv,f,o,pipe,cnt,fl,iofl,cr)
289#define VOP_SPLICE_WRITE(vp,f,o,pipe,cnt,fl,iofl,cr,rv) \
290 rv = _VOP_(vop_splice_write, vp)((vp)->v_fbhv,f,o,pipe,cnt,fl,iofl,cr)
291#define VOP_BMAP(vp,of,sz,rw,b,n,rv) \
292 rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n)
293#define VOP_OPEN(vp, cr, rv) \
294 rv = _VOP_(vop_open, vp)((vp)->v_fbhv, cr)
295#define VOP_GETATTR(vp, vap, f, cr, rv) \
296 rv = _VOP_(vop_getattr, vp)((vp)->v_fbhv, vap, f, cr)
297#define VOP_SETATTR(vp, vap, f, cr, rv) \
298 rv = _VOP_(vop_setattr, vp)((vp)->v_fbhv, vap, f, cr)
299#define VOP_ACCESS(vp, mode, cr, rv) \
300 rv = _VOP_(vop_access, vp)((vp)->v_fbhv, mode, cr)
301#define VOP_LOOKUP(vp,d,vpp,f,rdir,cr,rv) \
302 rv = _VOP_(vop_lookup, vp)((vp)->v_fbhv,d,vpp,f,rdir,cr)
303#define VOP_CREATE(dvp,d,vap,vpp,cr,rv) \
304 rv = _VOP_(vop_create, dvp)((dvp)->v_fbhv,d,vap,vpp,cr)
305#define VOP_REMOVE(dvp,d,cr,rv) \
306 rv = _VOP_(vop_remove, dvp)((dvp)->v_fbhv,d,cr)
307#define VOP_LINK(tdvp,fvp,d,cr,rv) \
308 rv = _VOP_(vop_link, tdvp)((tdvp)->v_fbhv,fvp,d,cr)
309#define VOP_RENAME(fvp,fnm,tdvp,tnm,cr,rv) \
310 rv = _VOP_(vop_rename, fvp)((fvp)->v_fbhv,fnm,tdvp,tnm,cr)
311#define VOP_MKDIR(dp,d,vap,vpp,cr,rv) \
312 rv = _VOP_(vop_mkdir, dp)((dp)->v_fbhv,d,vap,vpp,cr)
313#define VOP_RMDIR(dp,d,cr,rv) \
314 rv = _VOP_(vop_rmdir, dp)((dp)->v_fbhv,d,cr)
315#define VOP_READDIR(vp,uiop,cr,eofp,rv) \
316 rv = _VOP_(vop_readdir, vp)((vp)->v_fbhv,uiop,cr,eofp)
317#define VOP_SYMLINK(dvp,d,vap,tnm,vpp,cr,rv) \
318 rv = _VOP_(vop_symlink, dvp) ((dvp)->v_fbhv,d,vap,tnm,vpp,cr)
319#define VOP_READLINK(vp,uiop,fl,cr,rv) \
320 rv = _VOP_(vop_readlink, vp)((vp)->v_fbhv,uiop,fl,cr)
321#define VOP_FSYNC(vp,f,cr,b,e,rv) \
322 rv = _VOP_(vop_fsync, vp)((vp)->v_fbhv,f,cr,b,e)
323#define VOP_INACTIVE(vp, cr, rv) \
324 rv = _VOP_(vop_inactive, vp)((vp)->v_fbhv, cr)
325#define VOP_RELEASE(vp, rv) \
326 rv = _VOP_(vop_release, vp)((vp)->v_fbhv)
327#define VOP_FID2(vp, fidp, rv) \
328 rv = _VOP_(vop_fid2, vp)((vp)->v_fbhv, fidp)
329#define VOP_RWLOCK(vp,i) \
330 (void)_VOP_(vop_rwlock, vp)((vp)->v_fbhv, i)
331#define VOP_RWLOCK_TRY(vp,i) \
332 _VOP_(vop_rwlock, vp)((vp)->v_fbhv, i)
333#define VOP_RWUNLOCK(vp,i) \
334 (void)_VOP_(vop_rwunlock, vp)((vp)->v_fbhv, i)
335#define VOP_FRLOCK(vp,c,fl,flags,offset,fr,rv) \
336 rv = _VOP_(vop_frlock, vp)((vp)->v_fbhv,c,fl,flags,offset,fr)
337#define VOP_RECLAIM(vp, rv) \
338 rv = _VOP_(vop_reclaim, vp)((vp)->v_fbhv)
339#define VOP_ATTR_GET(vp, name, val, vallenp, fl, cred, rv) \
340 rv = _VOP_(vop_attr_get, vp)((vp)->v_fbhv,name,val,vallenp,fl,cred)
341#define VOP_ATTR_SET(vp, name, val, vallen, fl, cred, rv) \
342 rv = _VOP_(vop_attr_set, vp)((vp)->v_fbhv,name,val,vallen,fl,cred)
343#define VOP_ATTR_REMOVE(vp, name, flags, cred, rv) \
344 rv = _VOP_(vop_attr_remove, vp)((vp)->v_fbhv,name,flags,cred)
345#define VOP_ATTR_LIST(vp, buf, buflen, fl, cursor, cred, rv) \
346 rv = _VOP_(vop_attr_list, vp)((vp)->v_fbhv,buf,buflen,fl,cursor,cred)
347#define VOP_LINK_REMOVED(vp, dvp, linkzero) \
348 (void)_VOP_(vop_link_removed, vp)((vp)->v_fbhv, dvp, linkzero)
349#define VOP_VNODE_CHANGE(vp, cmd, val) \
350 (void)_VOP_(vop_vnode_change, vp)((vp)->v_fbhv,cmd,val)
351/*
352 * These are page cache functions that now go thru VOPs.
353 * 'last' parameter is unused and left in for IRIX compatibility
354 */ 250 */
355#define VOP_TOSS_PAGES(vp, first, last, fiopt) \ 251#define VNHEAD(vp) ((vp)->v_bh.bh_first)
356 _VOP_(vop_tosspages, vp)((vp)->v_fbhv,first, last, fiopt) 252#define VOP(op, vp) (*((bhv_vnodeops_t *)VNHEAD(vp)->bd_ops)->op)
357/* 253#define bhv_vop_open(vp, cr) VOP(vop_open, vp)(VNHEAD(vp),cr)
358 * 'last' parameter is unused and left in for IRIX compatibility 254#define bhv_vop_close(vp, f,last,cr) VOP(vop_close, vp)(VNHEAD(vp),f,last,cr)
359 */ 255#define bhv_vop_read(vp,file,iov,segs,offset,ioflags,cr) \
360#define VOP_FLUSHINVAL_PAGES(vp, first, last, fiopt) \ 256 VOP(vop_read, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
361 _VOP_(vop_flushinval_pages, vp)((vp)->v_fbhv,first,last,fiopt) 257#define bhv_vop_write(vp,file,iov,segs,offset,ioflags,cr) \
362/* 258 VOP(vop_write, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
363 * 'last' parameter is unused and left in for IRIX compatibility 259#define bhv_vop_sendfile(vp,f,off,ioflags,cnt,act,targ,cr) \
364 */ 260 VOP(vop_sendfile, vp)(VNHEAD(vp),f,off,ioflags,cnt,act,targ,cr)
365#define VOP_FLUSH_PAGES(vp, first, last, flags, fiopt, rv) \ 261#define bhv_vop_splice_read(vp,f,o,pipe,cnt,fl,iofl,cr) \
366 rv = _VOP_(vop_flush_pages, vp)((vp)->v_fbhv,first,last,flags,fiopt) 262 VOP(vop_splice_read, vp)(VNHEAD(vp),f,o,pipe,cnt,fl,iofl,cr)
367#define VOP_IOCTL(vp, inode, filp, fl, cmd, arg, rv) \ 263#define bhv_vop_splice_write(vp,f,o,pipe,cnt,fl,iofl,cr) \
368 rv = _VOP_(vop_ioctl, vp)((vp)->v_fbhv,inode,filp,fl,cmd,arg) 264 VOP(vop_splice_write, vp)(VNHEAD(vp),f,o,pipe,cnt,fl,iofl,cr)
369#define VOP_IFLUSH(vp, flags, rv) \ 265#define bhv_vop_bmap(vp,of,sz,rw,b,n) \
370 rv = _VOP_(vop_iflush, vp)((vp)->v_fbhv, flags) 266 VOP(vop_bmap, vp)(VNHEAD(vp),of,sz,rw,b,n)
267#define bhv_vop_getattr(vp, vap,f,cr) \
268 VOP(vop_getattr, vp)(VNHEAD(vp), vap,f,cr)
269#define bhv_vop_setattr(vp, vap,f,cr) \
270 VOP(vop_setattr, vp)(VNHEAD(vp), vap,f,cr)
271#define bhv_vop_access(vp, mode,cr) VOP(vop_access, vp)(VNHEAD(vp), mode,cr)
272#define bhv_vop_lookup(vp,d,vpp,f,rdir,cr) \
273 VOP(vop_lookup, vp)(VNHEAD(vp),d,vpp,f,rdir,cr)
274#define bhv_vop_create(dvp,d,vap,vpp,cr) \
275 VOP(vop_create, dvp)(VNHEAD(dvp),d,vap,vpp,cr)
276#define bhv_vop_remove(dvp,d,cr) VOP(vop_remove, dvp)(VNHEAD(dvp),d,cr)
277#define bhv_vop_link(dvp,fvp,d,cr) VOP(vop_link, dvp)(VNHEAD(dvp),fvp,d,cr)
278#define bhv_vop_rename(fvp,fnm,tdvp,tnm,cr) \
279 VOP(vop_rename, fvp)(VNHEAD(fvp),fnm,tdvp,tnm,cr)
280#define bhv_vop_mkdir(dp,d,vap,vpp,cr) \
281 VOP(vop_mkdir, dp)(VNHEAD(dp),d,vap,vpp,cr)
282#define bhv_vop_rmdir(dp,d,cr) VOP(vop_rmdir, dp)(VNHEAD(dp),d,cr)
283#define bhv_vop_readdir(vp,uiop,cr,eofp) \
284 VOP(vop_readdir, vp)(VNHEAD(vp),uiop,cr,eofp)
285#define bhv_vop_symlink(dvp,d,vap,tnm,vpp,cr) \
286 VOP(vop_symlink, dvp)(VNHEAD(dvp),d,vap,tnm,vpp,cr)
287#define bhv_vop_readlink(vp,uiop,fl,cr) \
288 VOP(vop_readlink, vp)(VNHEAD(vp),uiop,fl,cr)
289#define bhv_vop_fsync(vp,f,cr,b,e) VOP(vop_fsync, vp)(VNHEAD(vp),f,cr,b,e)
290#define bhv_vop_inactive(vp,cr) VOP(vop_inactive, vp)(VNHEAD(vp),cr)
291#define bhv_vop_release(vp) VOP(vop_release, vp)(VNHEAD(vp))
292#define bhv_vop_fid2(vp,fidp) VOP(vop_fid2, vp)(VNHEAD(vp),fidp)
293#define bhv_vop_rwlock(vp,i) VOP(vop_rwlock, vp)(VNHEAD(vp),i)
294#define bhv_vop_rwlock_try(vp,i) VOP(vop_rwlock, vp)(VNHEAD(vp),i)
295#define bhv_vop_rwunlock(vp,i) VOP(vop_rwunlock, vp)(VNHEAD(vp),i)
296#define bhv_vop_frlock(vp,c,fl,flags,offset,fr) \
297 VOP(vop_frlock, vp)(VNHEAD(vp),c,fl,flags,offset,fr)
298#define bhv_vop_reclaim(vp) VOP(vop_reclaim, vp)(VNHEAD(vp))
299#define bhv_vop_attr_get(vp, name, val, vallenp, fl, cred) \
300 VOP(vop_attr_get, vp)(VNHEAD(vp),name,val,vallenp,fl,cred)
301#define bhv_vop_attr_set(vp, name, val, vallen, fl, cred) \
302 VOP(vop_attr_set, vp)(VNHEAD(vp),name,val,vallen,fl,cred)
303#define bhv_vop_attr_remove(vp, name, flags, cred) \
304 VOP(vop_attr_remove, vp)(VNHEAD(vp),name,flags,cred)
305#define bhv_vop_attr_list(vp, buf, buflen, fl, cursor, cred) \
306 VOP(vop_attr_list, vp)(VNHEAD(vp),buf,buflen,fl,cursor,cred)
307#define bhv_vop_link_removed(vp, dvp, linkzero) \
308 VOP(vop_link_removed, vp)(VNHEAD(vp), dvp, linkzero)
309#define bhv_vop_vnode_change(vp, cmd, val) \
310 VOP(vop_vnode_change, vp)(VNHEAD(vp), cmd, val)
311#define bhv_vop_toss_pages(vp, first, last, fiopt) \
312 VOP(vop_tosspages, vp)(VNHEAD(vp), first, last, fiopt)
313#define bhv_vop_flushinval_pages(vp, first, last, fiopt) \
314 VOP(vop_flushinval_pages, vp)(VNHEAD(vp),first,last,fiopt)
315#define bhv_vop_flush_pages(vp, first, last, flags, fiopt) \
316 VOP(vop_flush_pages, vp)(VNHEAD(vp),first,last,flags,fiopt)
317#define bhv_vop_ioctl(vp, inode, filp, fl, cmd, arg) \
318 VOP(vop_ioctl, vp)(VNHEAD(vp),inode,filp,fl,cmd,arg)
319#define bhv_vop_iflush(vp, flags) VOP(vop_iflush, vp)(VNHEAD(vp), flags)
371 320
372/* 321/*
373 * Flags for read/write calls - same values as IRIX 322 * Flags for read/write calls - same values as IRIX
@@ -377,7 +326,7 @@ typedef struct vnodeops {
377#define IO_INVIS 0x00020 /* don't update inode timestamps */ 326#define IO_INVIS 0x00020 /* don't update inode timestamps */
378 327
379/* 328/*
380 * Flags for VOP_IFLUSH call 329 * Flags for vop_iflush call
381 */ 330 */
382#define FLUSH_SYNC 1 /* wait for flush to complete */ 331#define FLUSH_SYNC 1 /* wait for flush to complete */
383#define FLUSH_INODE 2 /* flush the inode itself */ 332#define FLUSH_INODE 2 /* flush the inode itself */
@@ -385,8 +334,7 @@ typedef struct vnodeops {
385 * this inode out to disk */ 334 * this inode out to disk */
386 335
387/* 336/*
388 * Flush/Invalidate options for VOP_TOSS_PAGES, VOP_FLUSHINVAL_PAGES and 337 * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
389 * VOP_FLUSH_PAGES.
390 */ 338 */
391#define FI_NONE 0 /* none */ 339#define FI_NONE 0 /* none */
392#define FI_REMAPF 1 /* Do a remapf prior to the operation */ 340#define FI_REMAPF 1 /* Do a remapf prior to the operation */
@@ -398,7 +346,7 @@ typedef struct vnodeops {
398 * Vnode attributes. va_mask indicates those attributes the caller 346 * Vnode attributes. va_mask indicates those attributes the caller
399 * wants to set or extract. 347 * wants to set or extract.
400 */ 348 */
401typedef struct vattr { 349typedef struct bhv_vattr {
402 int va_mask; /* bit-mask of attributes present */ 350 int va_mask; /* bit-mask of attributes present */
403 mode_t va_mode; /* file access mode and type */ 351 mode_t va_mode; /* file access mode and type */
404 xfs_nlink_t va_nlink; /* number of references to file */ 352 xfs_nlink_t va_nlink; /* number of references to file */
@@ -418,7 +366,7 @@ typedef struct vattr {
418 u_long va_nextents; /* number of extents in file */ 366 u_long va_nextents; /* number of extents in file */
419 u_long va_anextents; /* number of attr extents in file */ 367 u_long va_anextents; /* number of attr extents in file */
420 prid_t va_projid; /* project id */ 368 prid_t va_projid; /* project id */
421} vattr_t; 369} bhv_vattr_t;
422 370
423/* 371/*
424 * setattr or getattr attributes 372 * setattr or getattr attributes
@@ -492,29 +440,17 @@ typedef struct vattr {
492 (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID) 440 (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
493 441
494extern void vn_init(void); 442extern void vn_init(void);
495extern vnode_t *vn_initialize(struct inode *); 443extern bhv_vnode_t *vn_initialize(struct inode *);
496 444extern int vn_revalidate(struct bhv_vnode *);
497/* 445extern int __vn_revalidate(struct bhv_vnode *, bhv_vattr_t *);
498 * vnode_map structures _must_ match vn_epoch and vnode structure sizes. 446extern void vn_revalidate_core(struct bhv_vnode *, bhv_vattr_t *);
499 */
500typedef struct vnode_map {
501 vfs_t *v_vfsp;
502 vnumber_t v_number; /* in-core vnode number */
503 xfs_ino_t v_ino; /* inode # */
504} vmap_t;
505
506#define VMAP(vp, vmap) {(vmap).v_vfsp = (vp)->v_vfsp, \
507 (vmap).v_number = (vp)->v_number, \
508 (vmap).v_ino = (vp)->v_inode.i_ino; }
509 447
510extern int vn_revalidate(struct vnode *); 448extern void vn_iowait(struct bhv_vnode *vp);
511extern int __vn_revalidate(struct vnode *, vattr_t *); 449extern void vn_iowake(struct bhv_vnode *vp);
512extern void vn_revalidate_core(struct vnode *, vattr_t *);
513 450
514extern void vn_iowait(struct vnode *vp); 451extern void vn_ioerror(struct bhv_vnode *vp, int error, char *f, int l);
515extern void vn_iowake(struct vnode *vp);
516 452
517static inline int vn_count(struct vnode *vp) 453static inline int vn_count(struct bhv_vnode *vp)
518{ 454{
519 return atomic_read(&vn_to_inode(vp)->i_count); 455 return atomic_read(&vn_to_inode(vp)->i_count);
520} 456}
@@ -522,7 +458,7 @@ static inline int vn_count(struct vnode *vp)
522/* 458/*
523 * Vnode reference counting functions (and macros for compatibility). 459 * Vnode reference counting functions (and macros for compatibility).
524 */ 460 */
525extern vnode_t *vn_hold(struct vnode *); 461extern bhv_vnode_t *vn_hold(struct bhv_vnode *);
526 462
527#if defined(XFS_VNODE_TRACE) 463#if defined(XFS_VNODE_TRACE)
528#define VN_HOLD(vp) \ 464#define VN_HOLD(vp) \
@@ -536,7 +472,7 @@ extern vnode_t *vn_hold(struct vnode *);
536#define VN_RELE(vp) (iput(vn_to_inode(vp))) 472#define VN_RELE(vp) (iput(vn_to_inode(vp)))
537#endif 473#endif
538 474
539static inline struct vnode *vn_grab(struct vnode *vp) 475static inline struct bhv_vnode *vn_grab(struct bhv_vnode *vp)
540{ 476{
541 struct inode *inode = igrab(vn_to_inode(vp)); 477 struct inode *inode = igrab(vn_to_inode(vp));
542 return inode ? vn_from_inode(inode) : NULL; 478 return inode ? vn_from_inode(inode) : NULL;
@@ -554,32 +490,39 @@ static inline struct vnode *vn_grab(struct vnode *vp)
554 */ 490 */
555#define VN_LOCK(vp) mutex_spinlock(&(vp)->v_lock) 491#define VN_LOCK(vp) mutex_spinlock(&(vp)->v_lock)
556#define VN_UNLOCK(vp, s) mutex_spinunlock(&(vp)->v_lock, s) 492#define VN_UNLOCK(vp, s) mutex_spinunlock(&(vp)->v_lock, s)
557#define VN_FLAGSET(vp,b) vn_flagset(vp,b)
558#define VN_FLAGCLR(vp,b) vn_flagclr(vp,b)
559 493
560static __inline__ void vn_flagset(struct vnode *vp, uint flag) 494static __inline__ void vn_flagset(struct bhv_vnode *vp, uint flag)
561{ 495{
562 spin_lock(&vp->v_lock); 496 spin_lock(&vp->v_lock);
563 vp->v_flag |= flag; 497 vp->v_flag |= flag;
564 spin_unlock(&vp->v_lock); 498 spin_unlock(&vp->v_lock);
565} 499}
566 500
567static __inline__ void vn_flagclr(struct vnode *vp, uint flag) 501static __inline__ uint vn_flagclr(struct bhv_vnode *vp, uint flag)
568{ 502{
503 uint cleared;
504
569 spin_lock(&vp->v_lock); 505 spin_lock(&vp->v_lock);
506 cleared = (vp->v_flag & flag);
570 vp->v_flag &= ~flag; 507 vp->v_flag &= ~flag;
571 spin_unlock(&vp->v_lock); 508 spin_unlock(&vp->v_lock);
509 return cleared;
572} 510}
573 511
512#define VMODIFY(vp) vn_flagset(vp, VMODIFIED)
513#define VUNMODIFY(vp) vn_flagclr(vp, VMODIFIED)
514#define VTRUNCATE(vp) vn_flagset(vp, VTRUNCATED)
515#define VUNTRUNCATE(vp) vn_flagclr(vp, VTRUNCATED)
516
574/* 517/*
575 * Dealing with bad inodes 518 * Dealing with bad inodes
576 */ 519 */
577static inline void vn_mark_bad(struct vnode *vp) 520static inline void vn_mark_bad(struct bhv_vnode *vp)
578{ 521{
579 make_bad_inode(vn_to_inode(vp)); 522 make_bad_inode(vn_to_inode(vp));
580} 523}
581 524
582static inline int VN_BAD(struct vnode *vp) 525static inline int VN_BAD(struct bhv_vnode *vp)
583{ 526{
584 return is_bad_inode(vn_to_inode(vp)); 527 return is_bad_inode(vn_to_inode(vp));
585} 528}
@@ -587,18 +530,18 @@ static inline int VN_BAD(struct vnode *vp)
587/* 530/*
588 * Extracting atime values in various formats 531 * Extracting atime values in various formats
589 */ 532 */
590static inline void vn_atime_to_bstime(struct vnode *vp, xfs_bstime_t *bs_atime) 533static inline void vn_atime_to_bstime(bhv_vnode_t *vp, xfs_bstime_t *bs_atime)
591{ 534{
592 bs_atime->tv_sec = vp->v_inode.i_atime.tv_sec; 535 bs_atime->tv_sec = vp->v_inode.i_atime.tv_sec;
593 bs_atime->tv_nsec = vp->v_inode.i_atime.tv_nsec; 536 bs_atime->tv_nsec = vp->v_inode.i_atime.tv_nsec;
594} 537}
595 538
596static inline void vn_atime_to_timespec(struct vnode *vp, struct timespec *ts) 539static inline void vn_atime_to_timespec(bhv_vnode_t *vp, struct timespec *ts)
597{ 540{
598 *ts = vp->v_inode.i_atime; 541 *ts = vp->v_inode.i_atime;
599} 542}
600 543
601static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt) 544static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt)
602{ 545{
603 *tt = vp->v_inode.i_atime.tv_sec; 546 *tt = vp->v_inode.i_atime.tv_sec;
604} 547}
@@ -610,11 +553,10 @@ static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt)
610#define VN_CACHED(vp) (vn_to_inode(vp)->i_mapping->nrpages) 553#define VN_CACHED(vp) (vn_to_inode(vp)->i_mapping->nrpages)
611#define VN_DIRTY(vp) mapping_tagged(vn_to_inode(vp)->i_mapping, \ 554#define VN_DIRTY(vp) mapping_tagged(vn_to_inode(vp)->i_mapping, \
612 PAGECACHE_TAG_DIRTY) 555 PAGECACHE_TAG_DIRTY)
613#define VMODIFY(vp) VN_FLAGSET(vp, VMODIFIED) 556#define VN_TRUNC(vp) ((vp)->v_flag & VTRUNCATED)
614#define VUNMODIFY(vp) VN_FLAGCLR(vp, VMODIFIED)
615 557
616/* 558/*
617 * Flags to VOP_SETATTR/VOP_GETATTR. 559 * Flags to vop_setattr/getattr.
618 */ 560 */
619#define ATTR_UTIME 0x01 /* non-default utime(2) request */ 561#define ATTR_UTIME 0x01 /* non-default utime(2) request */
620#define ATTR_DMI 0x08 /* invocation from a DMI function */ 562#define ATTR_DMI 0x08 /* invocation from a DMI function */
@@ -624,7 +566,7 @@ static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt)
624#define ATTR_NOSIZETOK 0x400 /* Don't get the SIZE token */ 566#define ATTR_NOSIZETOK 0x400 /* Don't get the SIZE token */
625 567
626/* 568/*
627 * Flags to VOP_FSYNC and VOP_RECLAIM. 569 * Flags to vop_fsync/reclaim.
628 */ 570 */
629#define FSYNC_NOWAIT 0 /* asynchronous flush */ 571#define FSYNC_NOWAIT 0 /* asynchronous flush */
630#define FSYNC_WAIT 0x1 /* synchronous fsync or forced reclaim */ 572#define FSYNC_WAIT 0x1 /* synchronous fsync or forced reclaim */
@@ -643,11 +585,11 @@ static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt)
643#define VNODE_KTRACE_REF 4 585#define VNODE_KTRACE_REF 4
644#define VNODE_KTRACE_RELE 5 586#define VNODE_KTRACE_RELE 5
645 587
646extern void vn_trace_entry(struct vnode *, const char *, inst_t *); 588extern void vn_trace_entry(struct bhv_vnode *, const char *, inst_t *);
647extern void vn_trace_exit(struct vnode *, const char *, inst_t *); 589extern void vn_trace_exit(struct bhv_vnode *, const char *, inst_t *);
648extern void vn_trace_hold(struct vnode *, char *, int, inst_t *); 590extern void vn_trace_hold(struct bhv_vnode *, char *, int, inst_t *);
649extern void vn_trace_ref(struct vnode *, char *, int, inst_t *); 591extern void vn_trace_ref(struct bhv_vnode *, char *, int, inst_t *);
650extern void vn_trace_rele(struct vnode *, char *, int, inst_t *); 592extern void vn_trace_rele(struct bhv_vnode *, char *, int, inst_t *);
651 593
652#define VN_TRACE(vp) \ 594#define VN_TRACE(vp) \
653 vn_trace_ref(vp, __FILE__, __LINE__, (inst_t *)__return_address) 595 vn_trace_ref(vp, __FILE__, __LINE__, (inst_t *)__return_address)
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 772ac48329ea..3aa771531856 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -23,7 +23,6 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir.h"
27#include "xfs_dir2.h" 26#include "xfs_dir2.h"
28#include "xfs_alloc.h" 27#include "xfs_alloc.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
@@ -32,7 +31,6 @@
32#include "xfs_bmap_btree.h" 31#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h" 32#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h" 33#include "xfs_ialloc_btree.h"
35#include "xfs_dir_sf.h"
36#include "xfs_dir2_sf.h" 34#include "xfs_dir2_sf.h"
37#include "xfs_attr_sf.h" 35#include "xfs_attr_sf.h"
38#include "xfs_dinode.h" 36#include "xfs_dinode.h"
@@ -444,7 +442,7 @@ xfs_qm_dqalloc(
444 XFS_BMAPI_METADATA | XFS_BMAPI_WRITE, 442 XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
445 &firstblock, 443 &firstblock,
446 XFS_QM_DQALLOC_SPACE_RES(mp), 444 XFS_QM_DQALLOC_SPACE_RES(mp),
447 &map, &nmaps, &flist))) { 445 &map, &nmaps, &flist, NULL))) {
448 goto error0; 446 goto error0;
449 } 447 }
450 ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); 448 ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
@@ -559,7 +557,7 @@ xfs_qm_dqtobp(
559 error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset, 557 error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
560 XFS_DQUOT_CLUSTER_SIZE_FSB, 558 XFS_DQUOT_CLUSTER_SIZE_FSB,
561 XFS_BMAPI_METADATA, 559 XFS_BMAPI_METADATA,
562 NULL, 0, &map, &nmaps, NULL); 560 NULL, 0, &map, &nmaps, NULL, NULL);
563 561
564 xfs_iunlock(quotip, XFS_ILOCK_SHARED); 562 xfs_iunlock(quotip, XFS_ILOCK_SHARED);
565 if (error) 563 if (error)
@@ -1261,7 +1259,7 @@ xfs_qm_dqflush(
1261 1259
1262 if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id), 1260 if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id),
1263 0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) { 1261 0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) {
1264 xfs_force_shutdown(dqp->q_mount, XFS_CORRUPT_INCORE); 1262 xfs_force_shutdown(dqp->q_mount, SHUTDOWN_CORRUPT_INCORE);
1265 return XFS_ERROR(EIO); 1263 return XFS_ERROR(EIO);
1266 } 1264 }
1267 1265
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index c0c629663a5c..78d3ab95c5fd 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -119,7 +119,7 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
119 */ 119 */
120#define xfs_dqflock(dqp) { psema(&((dqp)->q_flock), PINOD | PRECALC);\ 120#define xfs_dqflock(dqp) { psema(&((dqp)->q_flock), PINOD | PRECALC);\
121 (dqp)->dq_flags |= XFS_DQ_FLOCKED; } 121 (dqp)->dq_flags |= XFS_DQ_FLOCKED; }
122#define xfs_dqfunlock(dqp) { ASSERT(valusema(&((dqp)->q_flock)) <= 0); \ 122#define xfs_dqfunlock(dqp) { ASSERT(issemalocked(&((dqp)->q_flock))); \
123 vsema(&((dqp)->q_flock)); \ 123 vsema(&((dqp)->q_flock)); \
124 (dqp)->dq_flags &= ~(XFS_DQ_FLOCKED); } 124 (dqp)->dq_flags &= ~(XFS_DQ_FLOCKED); }
125 125
@@ -128,7 +128,7 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
128#define XFS_DQ_PINUNLOCK(dqp, s) mutex_spinunlock( \ 128#define XFS_DQ_PINUNLOCK(dqp, s) mutex_spinunlock( \
129 &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s) 129 &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s)
130 130
131#define XFS_DQ_IS_FLUSH_LOCKED(dqp) (valusema(&((dqp)->q_flock)) <= 0) 131#define XFS_DQ_IS_FLUSH_LOCKED(dqp) (issemalocked(&((dqp)->q_flock)))
132#define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp)) 132#define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp))
133#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) 133#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
134#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) 134#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 546f48af882a..5b2dcc58b244 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -23,7 +23,6 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir.h"
27#include "xfs_dir2.h" 26#include "xfs_dir2.h"
28#include "xfs_alloc.h" 27#include "xfs_alloc.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
@@ -32,7 +31,6 @@
32#include "xfs_bmap_btree.h" 31#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h" 32#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h" 33#include "xfs_ialloc_btree.h"
35#include "xfs_dir_sf.h"
36#include "xfs_dir2_sf.h" 34#include "xfs_dir2_sf.h"
37#include "xfs_attr_sf.h" 35#include "xfs_attr_sf.h"
38#include "xfs_dinode.h" 36#include "xfs_dinode.h"
@@ -248,7 +246,7 @@ xfs_qm_dquot_logitem_pushbuf(
248 * inode flush completed and the inode was taken off the AIL. 246 * inode flush completed and the inode was taken off the AIL.
249 * So, just get out. 247 * So, just get out.
250 */ 248 */
251 if ((valusema(&(dqp->q_flock)) > 0) || 249 if (!issemalocked(&(dqp->q_flock)) ||
252 ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) { 250 ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) {
253 qip->qli_pushbuf_flag = 0; 251 qip->qli_pushbuf_flag = 0;
254 xfs_dqunlock(dqp); 252 xfs_dqunlock(dqp);
@@ -261,7 +259,7 @@ xfs_qm_dquot_logitem_pushbuf(
261 if (bp != NULL) { 259 if (bp != NULL) {
262 if (XFS_BUF_ISDELAYWRITE(bp)) { 260 if (XFS_BUF_ISDELAYWRITE(bp)) {
263 dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && 261 dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
264 (valusema(&(dqp->q_flock)) <= 0)); 262 issemalocked(&(dqp->q_flock)));
265 qip->qli_pushbuf_flag = 0; 263 qip->qli_pushbuf_flag = 0;
266 xfs_dqunlock(dqp); 264 xfs_dqunlock(dqp);
267 265
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 7fb5eca9bd50..e23e45535c48 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -24,7 +24,6 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_alloc.h" 28#include "xfs_alloc.h"
30#include "xfs_dmapi.h" 29#include "xfs_dmapi.h"
@@ -33,7 +32,6 @@
33#include "xfs_bmap_btree.h" 32#include "xfs_bmap_btree.h"
34#include "xfs_alloc_btree.h" 33#include "xfs_alloc_btree.h"
35#include "xfs_ialloc_btree.h" 34#include "xfs_ialloc_btree.h"
36#include "xfs_dir_sf.h"
37#include "xfs_dir2_sf.h" 35#include "xfs_dir2_sf.h"
38#include "xfs_attr_sf.h" 36#include "xfs_attr_sf.h"
39#include "xfs_dinode.h" 37#include "xfs_dinode.h"
@@ -1603,7 +1601,7 @@ xfs_qm_dqiterate(
1603 maxlblkcnt - lblkno, 1601 maxlblkcnt - lblkno,
1604 XFS_BMAPI_METADATA, 1602 XFS_BMAPI_METADATA,
1605 NULL, 1603 NULL,
1606 0, map, &nmaps, NULL); 1604 0, map, &nmaps, NULL, NULL);
1607 xfs_iunlock(qip, XFS_ILOCK_SHARED); 1605 xfs_iunlock(qip, XFS_ILOCK_SHARED);
1608 if (error) 1606 if (error)
1609 break; 1607 break;
@@ -1905,9 +1903,7 @@ xfs_qm_quotacheck(
1905 */ 1903 */
1906 if ((error = xfs_bulkstat(mp, &lastino, &count, 1904 if ((error = xfs_bulkstat(mp, &lastino, &count,
1907 xfs_qm_dqusage_adjust, NULL, 1905 xfs_qm_dqusage_adjust, NULL,
1908 structsz, NULL, 1906 structsz, NULL, BULKSTAT_FG_IGET, &done)))
1909 BULKSTAT_FG_IGET|BULKSTAT_FG_VFSLOCKED,
1910 &done)))
1911 break; 1907 break;
1912 1908
1913 } while (! done); 1909 } while (! done);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 6838b36d95a9..e95e99f7168f 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
@@ -24,7 +24,6 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_alloc.h" 28#include "xfs_alloc.h"
30#include "xfs_dmapi.h" 29#include "xfs_dmapi.h"
@@ -33,7 +32,6 @@
33#include "xfs_bmap_btree.h" 32#include "xfs_bmap_btree.h"
34#include "xfs_alloc_btree.h" 33#include "xfs_alloc_btree.h"
35#include "xfs_ialloc_btree.h" 34#include "xfs_ialloc_btree.h"
36#include "xfs_dir_sf.h"
37#include "xfs_dir2_sf.h" 35#include "xfs_dir2_sf.h"
38#include "xfs_attr_sf.h" 36#include "xfs_attr_sf.h"
39#include "xfs_dinode.h" 37#include "xfs_dinode.h"
@@ -129,7 +127,7 @@ xfs_qm_parseargs(
129 return XFS_ERROR(EINVAL); 127 return XFS_ERROR(EINVAL);
130 } 128 }
131 129
132 PVFS_PARSEARGS(BHV_NEXT(bhv), options, args, update, error); 130 error = bhv_next_vfs_parseargs(BHV_NEXT(bhv), options, args, update);
133 if (!error && !referenced) 131 if (!error && !referenced)
134 bhv_remove_vfsops(bhvtovfs(bhv), VFS_POSITION_QM); 132 bhv_remove_vfsops(bhvtovfs(bhv), VFS_POSITION_QM);
135 return error; 133 return error;
@@ -140,9 +138,8 @@ xfs_qm_showargs(
140 struct bhv_desc *bhv, 138 struct bhv_desc *bhv,
141 struct seq_file *m) 139 struct seq_file *m)
142{ 140{
143 struct vfs *vfsp = bhvtovfs(bhv); 141 struct bhv_vfs *vfsp = bhvtovfs(bhv);
144 struct xfs_mount *mp = XFS_VFSTOM(vfsp); 142 struct xfs_mount *mp = XFS_VFSTOM(vfsp);
145 int error;
146 143
147 if (mp->m_qflags & XFS_UQUOTA_ACCT) { 144 if (mp->m_qflags & XFS_UQUOTA_ACCT) {
148 (mp->m_qflags & XFS_UQUOTA_ENFD) ? 145 (mp->m_qflags & XFS_UQUOTA_ENFD) ?
@@ -165,8 +162,7 @@ xfs_qm_showargs(
165 if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) 162 if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
166 seq_puts(m, "," MNTOPT_NOQUOTA); 163 seq_puts(m, "," MNTOPT_NOQUOTA);
167 164
168 PVFS_SHOWARGS(BHV_NEXT(bhv), m, error); 165 return bhv_next_vfs_showargs(BHV_NEXT(bhv), m);
169 return error;
170} 166}
171 167
172STATIC int 168STATIC int
@@ -175,14 +171,67 @@ xfs_qm_mount(
175 struct xfs_mount_args *args, 171 struct xfs_mount_args *args,
176 struct cred *cr) 172 struct cred *cr)
177{ 173{
178 struct vfs *vfsp = bhvtovfs(bhv); 174 struct bhv_vfs *vfsp = bhvtovfs(bhv);
179 struct xfs_mount *mp = XFS_VFSTOM(vfsp); 175 struct xfs_mount *mp = XFS_VFSTOM(vfsp);
180 int error;
181 176
182 if (args->flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA | XFSMNT_PQUOTA)) 177 if (args->flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA | XFSMNT_PQUOTA))
183 xfs_qm_mount_quotainit(mp, args->flags); 178 xfs_qm_mount_quotainit(mp, args->flags);
184 PVFS_MOUNT(BHV_NEXT(bhv), args, cr, error); 179 return bhv_next_vfs_mount(BHV_NEXT(bhv), args, cr);
185 return error; 180}
181
182/*
183 * Directory tree accounting is implemented using project quotas, where
184 * the project identifier is inherited from parent directories.
185 * A statvfs (df, etc.) of a directory that is using project quota should
186 * return a statvfs of the project, not the entire filesystem.
187 * This makes such trees appear as if they are filesystems in themselves.
188 */
189STATIC int
190xfs_qm_statvfs(
191 struct bhv_desc *bhv,
192 bhv_statvfs_t *statp,
193 struct bhv_vnode *vnode)
194{
195 xfs_mount_t *mp;
196 xfs_inode_t *ip;
197 xfs_dquot_t *dqp;
198 xfs_disk_dquot_t *dp;
199 __uint64_t limit;
200 int error;
201
202 error = bhv_next_vfs_statvfs(BHV_NEXT(bhv), statp, vnode);
203 if (error || !vnode)
204 return error;
205
206 mp = XFS_BHVTOM(bhv);
207 ip = xfs_vtoi(vnode);
208
209 if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT))
210 return 0;
211 if (!(mp->m_qflags & XFS_PQUOTA_ACCT))
212 return 0;
213 if (!(mp->m_qflags & XFS_OQUOTA_ENFD))
214 return 0;
215
216 if (xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp))
217 return 0;
218 dp = &dqp->q_core;
219
220 limit = dp->d_blk_softlimit ? dp->d_blk_softlimit : dp->d_blk_hardlimit;
221 if (limit && statp->f_blocks > limit) {
222 statp->f_blocks = limit;
223 statp->f_bfree = (statp->f_blocks > dp->d_bcount) ?
224 (statp->f_blocks - dp->d_bcount) : 0;
225 }
226 limit = dp->d_ino_softlimit ? dp->d_ino_softlimit : dp->d_ino_hardlimit;
227 if (limit && statp->f_files > limit) {
228 statp->f_files = limit;
229 statp->f_ffree = (statp->f_files > dp->d_icount) ?
230 (statp->f_ffree - dp->d_icount) : 0;
231 }
232
233 xfs_qm_dqput(dqp);
234 return 0;
186} 235}
187 236
188STATIC int 237STATIC int
@@ -191,7 +240,7 @@ xfs_qm_syncall(
191 int flags, 240 int flags,
192 cred_t *credp) 241 cred_t *credp)
193{ 242{
194 struct vfs *vfsp = bhvtovfs(bhv); 243 struct bhv_vfs *vfsp = bhvtovfs(bhv);
195 struct xfs_mount *mp = XFS_VFSTOM(vfsp); 244 struct xfs_mount *mp = XFS_VFSTOM(vfsp);
196 int error; 245 int error;
197 246
@@ -210,8 +259,7 @@ xfs_qm_syncall(
210 } 259 }
211 } 260 }
212 } 261 }
213 PVFS_SYNC(BHV_NEXT(bhv), flags, credp, error); 262 return bhv_next_vfs_sync(BHV_NEXT(bhv), flags, credp);
214 return error;
215} 263}
216 264
217STATIC int 265STATIC int
@@ -346,11 +394,12 @@ STATIC struct xfs_qmops xfs_qmcore_xfs = {
346 .xfs_dqtrxops = &xfs_trans_dquot_ops, 394 .xfs_dqtrxops = &xfs_trans_dquot_ops,
347}; 395};
348 396
349struct bhv_vfsops xfs_qmops = { { 397struct bhv_module_vfsops xfs_qmops = { {
350 BHV_IDENTITY_INIT(VFS_BHV_QM, VFS_POSITION_QM), 398 BHV_IDENTITY_INIT(VFS_BHV_QM, VFS_POSITION_QM),
351 .vfs_parseargs = xfs_qm_parseargs, 399 .vfs_parseargs = xfs_qm_parseargs,
352 .vfs_showargs = xfs_qm_showargs, 400 .vfs_showargs = xfs_qm_showargs,
353 .vfs_mount = xfs_qm_mount, 401 .vfs_mount = xfs_qm_mount,
402 .vfs_statvfs = xfs_qm_statvfs,
354 .vfs_sync = xfs_qm_syncall, 403 .vfs_sync = xfs_qm_syncall,
355 .vfs_quotactl = xfs_qm_quotactl, }, 404 .vfs_quotactl = xfs_qm_quotactl, },
356}; 405};
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
index 0570f7733550..6f858fb81a36 100644
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ b/fs/xfs/quota/xfs_qm_stats.c
@@ -23,7 +23,6 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir.h"
27#include "xfs_dir2.h" 26#include "xfs_dir2.h"
28#include "xfs_alloc.h" 27#include "xfs_alloc.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
@@ -32,7 +31,6 @@
32#include "xfs_bmap_btree.h" 31#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h" 32#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h" 33#include "xfs_ialloc_btree.h"
35#include "xfs_dir_sf.h"
36#include "xfs_dir2_sf.h" 34#include "xfs_dir2_sf.h"
37#include "xfs_attr_sf.h" 35#include "xfs_attr_sf.h"
38#include "xfs_dinode.h" 36#include "xfs_dinode.h"
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index c55db463bbf2..ed620c4d1594 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -26,7 +26,6 @@
26#include "xfs_trans.h" 26#include "xfs_trans.h"
27#include "xfs_sb.h" 27#include "xfs_sb.h"
28#include "xfs_ag.h" 28#include "xfs_ag.h"
29#include "xfs_dir.h"
30#include "xfs_dir2.h" 29#include "xfs_dir2.h"
31#include "xfs_alloc.h" 30#include "xfs_alloc.h"
32#include "xfs_dmapi.h" 31#include "xfs_dmapi.h"
@@ -35,7 +34,6 @@
35#include "xfs_bmap_btree.h" 34#include "xfs_bmap_btree.h"
36#include "xfs_alloc_btree.h" 35#include "xfs_alloc_btree.h"
37#include "xfs_ialloc_btree.h" 36#include "xfs_ialloc_btree.h"
38#include "xfs_dir_sf.h"
39#include "xfs_dir2_sf.h" 37#include "xfs_dir2_sf.h"
40#include "xfs_attr_sf.h" 38#include "xfs_attr_sf.h"
41#include "xfs_dinode.h" 39#include "xfs_dinode.h"
@@ -91,8 +89,8 @@ xfs_qm_quotactl(
91 xfs_caddr_t addr) 89 xfs_caddr_t addr)
92{ 90{
93 xfs_mount_t *mp; 91 xfs_mount_t *mp;
92 bhv_vfs_t *vfsp;
94 int error; 93 int error;
95 struct vfs *vfsp;
96 94
97 vfsp = bhvtovfs(bdp); 95 vfsp = bhvtovfs(bdp);
98 mp = XFS_VFSTOM(vfsp); 96 mp = XFS_VFSTOM(vfsp);
@@ -1035,7 +1033,7 @@ xfs_qm_dqrele_all_inodes(
1035{ 1033{
1036 xfs_inode_t *ip, *topino; 1034 xfs_inode_t *ip, *topino;
1037 uint ireclaims; 1035 uint ireclaims;
1038 vnode_t *vp; 1036 bhv_vnode_t *vp;
1039 boolean_t vnode_refd; 1037 boolean_t vnode_refd;
1040 1038
1041 ASSERT(mp->m_quotainfo); 1039 ASSERT(mp->m_quotainfo);
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 9168918db252..0242e9666e8e 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -23,7 +23,6 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir.h"
27#include "xfs_dir2.h" 26#include "xfs_dir2.h"
28#include "xfs_alloc.h" 27#include "xfs_alloc.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
@@ -33,7 +32,6 @@
33#include "xfs_alloc_btree.h" 32#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h" 33#include "xfs_ialloc_btree.h"
35#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
36#include "xfs_dir_sf.h"
37#include "xfs_dir2_sf.h" 35#include "xfs_dir2_sf.h"
38#include "xfs_dinode.h" 36#include "xfs_dinode.h"
39#include "xfs_inode.h" 37#include "xfs_inode.h"
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index b08b3d9345b7..36fbeccdc722 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -47,7 +47,7 @@ cmn_err(register int level, char *fmt, ...)
47 va_start(ap, fmt); 47 va_start(ap, fmt);
48 if (*fmt == '!') fp++; 48 if (*fmt == '!') fp++;
49 len = vsprintf(message, fp, ap); 49 len = vsprintf(message, fp, ap);
50 if (message[len-1] != '\n') 50 if (level != CE_DEBUG && message[len-1] != '\n')
51 strcat(message, "\n"); 51 strcat(message, "\n");
52 printk("%s%s", err_level[level], message); 52 printk("%s%s", err_level[level], message);
53 va_end(ap); 53 va_end(ap);
@@ -68,7 +68,7 @@ icmn_err(register int level, char *fmt, va_list ap)
68 level = XFS_MAX_ERR_LEVEL; 68 level = XFS_MAX_ERR_LEVEL;
69 spin_lock_irqsave(&xfs_err_lock,flags); 69 spin_lock_irqsave(&xfs_err_lock,flags);
70 len = vsprintf(message, fmt, ap); 70 len = vsprintf(message, fmt, ap);
71 if (message[len-1] != '\n') 71 if (level != CE_DEBUG && message[len-1] != '\n')
72 strcat(message, "\n"); 72 strcat(message, "\n");
73 spin_unlock_irqrestore(&xfs_err_lock,flags); 73 spin_unlock_irqrestore(&xfs_err_lock,flags);
74 printk("%s%s", err_level[level], message); 74 printk("%s%s", err_level[level], message);
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index e3bf58112e7e..4f54dca662a8 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -33,9 +33,6 @@ extern void cmn_err(int, char *, ...)
33 __attribute__ ((format (printf, 2, 3))); 33 __attribute__ ((format (printf, 2, 3)));
34extern void assfail(char *expr, char *f, int l); 34extern void assfail(char *expr, char *f, int l);
35 35
36#define prdev(fmt,targ,args...) \
37 printk("Device %s - " fmt "\n", XFS_BUFTARG_NAME(targ), ## args)
38
39#define ASSERT_ALWAYS(expr) \ 36#define ASSERT_ALWAYS(expr) \
40 (unlikely((expr) != 0) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) 37 (unlikely((expr) != 0) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
41 38
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 2539af34eb63..4b0cb474be4c 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -21,12 +21,10 @@
21#include "xfs_bit.h" 21#include "xfs_bit.h"
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_ag.h" 23#include "xfs_ag.h"
24#include "xfs_dir.h"
25#include "xfs_dir2.h" 24#include "xfs_dir2.h"
26#include "xfs_bmap_btree.h" 25#include "xfs_bmap_btree.h"
27#include "xfs_alloc_btree.h" 26#include "xfs_alloc_btree.h"
28#include "xfs_ialloc_btree.h" 27#include "xfs_ialloc_btree.h"
29#include "xfs_dir_sf.h"
30#include "xfs_dir2_sf.h" 28#include "xfs_dir2_sf.h"
31#include "xfs_attr_sf.h" 29#include "xfs_attr_sf.h"
32#include "xfs_dinode.h" 30#include "xfs_dinode.h"
@@ -39,15 +37,15 @@
39#include <linux/capability.h> 37#include <linux/capability.h>
40#include <linux/posix_acl_xattr.h> 38#include <linux/posix_acl_xattr.h>
41 39
42STATIC int xfs_acl_setmode(vnode_t *, xfs_acl_t *, int *); 40STATIC int xfs_acl_setmode(bhv_vnode_t *, xfs_acl_t *, int *);
43STATIC void xfs_acl_filter_mode(mode_t, xfs_acl_t *); 41STATIC void xfs_acl_filter_mode(mode_t, xfs_acl_t *);
44STATIC void xfs_acl_get_endian(xfs_acl_t *); 42STATIC void xfs_acl_get_endian(xfs_acl_t *);
45STATIC int xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *); 43STATIC int xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *);
46STATIC int xfs_acl_invalid(xfs_acl_t *); 44STATIC int xfs_acl_invalid(xfs_acl_t *);
47STATIC void xfs_acl_sync_mode(mode_t, xfs_acl_t *); 45STATIC void xfs_acl_sync_mode(mode_t, xfs_acl_t *);
48STATIC void xfs_acl_get_attr(vnode_t *, xfs_acl_t *, int, int, int *); 46STATIC void xfs_acl_get_attr(bhv_vnode_t *, xfs_acl_t *, int, int, int *);
49STATIC void xfs_acl_set_attr(vnode_t *, xfs_acl_t *, int, int *); 47STATIC void xfs_acl_set_attr(bhv_vnode_t *, xfs_acl_t *, int, int *);
50STATIC int xfs_acl_allow_set(vnode_t *, int); 48STATIC int xfs_acl_allow_set(bhv_vnode_t *, int);
51 49
52kmem_zone_t *xfs_acl_zone; 50kmem_zone_t *xfs_acl_zone;
53 51
@@ -57,7 +55,7 @@ kmem_zone_t *xfs_acl_zone;
57 */ 55 */
58int 56int
59xfs_acl_vhasacl_access( 57xfs_acl_vhasacl_access(
60 vnode_t *vp) 58 bhv_vnode_t *vp)
61{ 59{
62 int error; 60 int error;
63 61
@@ -70,7 +68,7 @@ xfs_acl_vhasacl_access(
70 */ 68 */
71int 69int
72xfs_acl_vhasacl_default( 70xfs_acl_vhasacl_default(
73 vnode_t *vp) 71 bhv_vnode_t *vp)
74{ 72{
75 int error; 73 int error;
76 74
@@ -209,7 +207,7 @@ posix_acl_xfs_to_xattr(
209 207
210int 208int
211xfs_acl_vget( 209xfs_acl_vget(
212 vnode_t *vp, 210 bhv_vnode_t *vp,
213 void *acl, 211 void *acl,
214 size_t size, 212 size_t size,
215 int kind) 213 int kind)
@@ -241,10 +239,10 @@ xfs_acl_vget(
241 goto out; 239 goto out;
242 } 240 }
243 if (kind == _ACL_TYPE_ACCESS) { 241 if (kind == _ACL_TYPE_ACCESS) {
244 vattr_t va; 242 bhv_vattr_t va;
245 243
246 va.va_mask = XFS_AT_MODE; 244 va.va_mask = XFS_AT_MODE;
247 VOP_GETATTR(vp, &va, 0, sys_cred, error); 245 error = bhv_vop_getattr(vp, &va, 0, sys_cred);
248 if (error) 246 if (error)
249 goto out; 247 goto out;
250 xfs_acl_sync_mode(va.va_mode, xfs_acl); 248 xfs_acl_sync_mode(va.va_mode, xfs_acl);
@@ -260,7 +258,7 @@ out:
260 258
261int 259int
262xfs_acl_vremove( 260xfs_acl_vremove(
263 vnode_t *vp, 261 bhv_vnode_t *vp,
264 int kind) 262 int kind)
265{ 263{
266 int error; 264 int error;
@@ -268,9 +266,9 @@ xfs_acl_vremove(
268 VN_HOLD(vp); 266 VN_HOLD(vp);
269 error = xfs_acl_allow_set(vp, kind); 267 error = xfs_acl_allow_set(vp, kind);
270 if (!error) { 268 if (!error) {
271 VOP_ATTR_REMOVE(vp, kind == _ACL_TYPE_DEFAULT? 269 error = bhv_vop_attr_remove(vp, kind == _ACL_TYPE_DEFAULT?
272 SGI_ACL_DEFAULT: SGI_ACL_FILE, 270 SGI_ACL_DEFAULT: SGI_ACL_FILE,
273 ATTR_ROOT, sys_cred, error); 271 ATTR_ROOT, sys_cred);
274 if (error == ENOATTR) 272 if (error == ENOATTR)
275 error = 0; /* 'scool */ 273 error = 0; /* 'scool */
276 } 274 }
@@ -280,7 +278,7 @@ xfs_acl_vremove(
280 278
281int 279int
282xfs_acl_vset( 280xfs_acl_vset(
283 vnode_t *vp, 281 bhv_vnode_t *vp,
284 void *acl, 282 void *acl,
285 size_t size, 283 size_t size,
286 int kind) 284 int kind)
@@ -370,10 +368,10 @@ xfs_acl_iaccess(
370 368
371STATIC int 369STATIC int
372xfs_acl_allow_set( 370xfs_acl_allow_set(
373 vnode_t *vp, 371 bhv_vnode_t *vp,
374 int kind) 372 int kind)
375{ 373{
376 vattr_t va; 374 bhv_vattr_t va;
377 int error; 375 int error;
378 376
379 if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) 377 if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND))
@@ -383,7 +381,7 @@ xfs_acl_allow_set(
383 if (vp->v_vfsp->vfs_flag & VFS_RDONLY) 381 if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
384 return EROFS; 382 return EROFS;
385 va.va_mask = XFS_AT_UID; 383 va.va_mask = XFS_AT_UID;
386 VOP_GETATTR(vp, &va, 0, NULL, error); 384 error = bhv_vop_getattr(vp, &va, 0, NULL);
387 if (error) 385 if (error)
388 return error; 386 return error;
389 if (va.va_uid != current->fsuid && !capable(CAP_FOWNER)) 387 if (va.va_uid != current->fsuid && !capable(CAP_FOWNER))
@@ -606,7 +604,7 @@ xfs_acl_get_endian(
606 */ 604 */
607STATIC void 605STATIC void
608xfs_acl_get_attr( 606xfs_acl_get_attr(
609 vnode_t *vp, 607 bhv_vnode_t *vp,
610 xfs_acl_t *aclp, 608 xfs_acl_t *aclp,
611 int kind, 609 int kind,
612 int flags, 610 int flags,
@@ -616,9 +614,9 @@ xfs_acl_get_attr(
616 614
617 ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1); 615 ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1);
618 flags |= ATTR_ROOT; 616 flags |= ATTR_ROOT;
619 VOP_ATTR_GET(vp, 617 *error = bhv_vop_attr_get(vp, kind == _ACL_TYPE_ACCESS ?
620 kind == _ACL_TYPE_ACCESS ? SGI_ACL_FILE : SGI_ACL_DEFAULT, 618 SGI_ACL_FILE : SGI_ACL_DEFAULT,
621 (char *)aclp, &len, flags, sys_cred, *error); 619 (char *)aclp, &len, flags, sys_cred);
622 if (*error || (flags & ATTR_KERNOVAL)) 620 if (*error || (flags & ATTR_KERNOVAL))
623 return; 621 return;
624 xfs_acl_get_endian(aclp); 622 xfs_acl_get_endian(aclp);
@@ -629,7 +627,7 @@ xfs_acl_get_attr(
629 */ 627 */
630STATIC void 628STATIC void
631xfs_acl_set_attr( 629xfs_acl_set_attr(
632 vnode_t *vp, 630 bhv_vnode_t *vp,
633 xfs_acl_t *aclp, 631 xfs_acl_t *aclp,
634 int kind, 632 int kind,
635 int *error) 633 int *error)
@@ -654,19 +652,19 @@ xfs_acl_set_attr(
654 INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm); 652 INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm);
655 } 653 }
656 INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt); 654 INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt);
657 VOP_ATTR_SET(vp, 655 *error = bhv_vop_attr_set(vp, kind == _ACL_TYPE_ACCESS ?
658 kind == _ACL_TYPE_ACCESS ? SGI_ACL_FILE: SGI_ACL_DEFAULT, 656 SGI_ACL_FILE: SGI_ACL_DEFAULT,
659 (char *)newacl, len, ATTR_ROOT, sys_cred, *error); 657 (char *)newacl, len, ATTR_ROOT, sys_cred);
660 _ACL_FREE(newacl); 658 _ACL_FREE(newacl);
661} 659}
662 660
663int 661int
664xfs_acl_vtoacl( 662xfs_acl_vtoacl(
665 vnode_t *vp, 663 bhv_vnode_t *vp,
666 xfs_acl_t *access_acl, 664 xfs_acl_t *access_acl,
667 xfs_acl_t *default_acl) 665 xfs_acl_t *default_acl)
668{ 666{
669 vattr_t va; 667 bhv_vattr_t va;
670 int error = 0; 668 int error = 0;
671 669
672 if (access_acl) { 670 if (access_acl) {
@@ -678,7 +676,7 @@ xfs_acl_vtoacl(
678 if (!error) { 676 if (!error) {
679 /* Got the ACL, need the mode... */ 677 /* Got the ACL, need the mode... */
680 va.va_mask = XFS_AT_MODE; 678 va.va_mask = XFS_AT_MODE;
681 VOP_GETATTR(vp, &va, 0, sys_cred, error); 679 error = bhv_vop_getattr(vp, &va, 0, sys_cred);
682 } 680 }
683 681
684 if (error) 682 if (error)
@@ -701,8 +699,8 @@ xfs_acl_vtoacl(
701 */ 699 */
702int 700int
703xfs_acl_inherit( 701xfs_acl_inherit(
704 vnode_t *vp, 702 bhv_vnode_t *vp,
705 vattr_t *vap, 703 bhv_vattr_t *vap,
706 xfs_acl_t *pdaclp) 704 xfs_acl_t *pdaclp)
707{ 705{
708 xfs_acl_t *cacl; 706 xfs_acl_t *cacl;
@@ -757,11 +755,11 @@ xfs_acl_inherit(
757 */ 755 */
758STATIC int 756STATIC int
759xfs_acl_setmode( 757xfs_acl_setmode(
760 vnode_t *vp, 758 bhv_vnode_t *vp,
761 xfs_acl_t *acl, 759 xfs_acl_t *acl,
762 int *basicperms) 760 int *basicperms)
763{ 761{
764 vattr_t va; 762 bhv_vattr_t va;
765 xfs_acl_entry_t *ap; 763 xfs_acl_entry_t *ap;
766 xfs_acl_entry_t *gap = NULL; 764 xfs_acl_entry_t *gap = NULL;
767 int i, error, nomask = 1; 765 int i, error, nomask = 1;
@@ -776,7 +774,7 @@ xfs_acl_setmode(
776 * mode. The m:: bits take precedence over the g:: bits. 774 * mode. The m:: bits take precedence over the g:: bits.
777 */ 775 */
778 va.va_mask = XFS_AT_MODE; 776 va.va_mask = XFS_AT_MODE;
779 VOP_GETATTR(vp, &va, 0, sys_cred, error); 777 error = bhv_vop_getattr(vp, &va, 0, sys_cred);
780 if (error) 778 if (error)
781 return error; 779 return error;
782 780
@@ -810,8 +808,7 @@ xfs_acl_setmode(
810 if (gap && nomask) 808 if (gap && nomask)
811 va.va_mode |= gap->ae_perm << 3; 809 va.va_mode |= gap->ae_perm << 3;
812 810
813 VOP_SETATTR(vp, &va, 0, sys_cred, error); 811 return bhv_vop_setattr(vp, &va, 0, sys_cred);
814 return error;
815} 812}
816 813
817/* 814/*
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 538d0d65b04c..f853cf1a6270 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -50,7 +50,7 @@ typedef struct xfs_acl {
50#ifdef CONFIG_XFS_POSIX_ACL 50#ifdef CONFIG_XFS_POSIX_ACL
51 51
52struct vattr; 52struct vattr;
53struct vnode; 53struct bhv_vnode;
54struct xfs_inode; 54struct xfs_inode;
55 55
56extern struct kmem_zone *xfs_acl_zone; 56extern struct kmem_zone *xfs_acl_zone;
@@ -58,14 +58,14 @@ extern struct kmem_zone *xfs_acl_zone;
58 (zone) = kmem_zone_init(sizeof(xfs_acl_t), (name)) 58 (zone) = kmem_zone_init(sizeof(xfs_acl_t), (name))
59#define xfs_acl_zone_destroy(zone) kmem_zone_destroy(zone) 59#define xfs_acl_zone_destroy(zone) kmem_zone_destroy(zone)
60 60
61extern int xfs_acl_inherit(struct vnode *, struct vattr *, xfs_acl_t *); 61extern int xfs_acl_inherit(struct bhv_vnode *, struct bhv_vattr *, xfs_acl_t *);
62extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *); 62extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *);
63extern int xfs_acl_vtoacl(struct vnode *, xfs_acl_t *, xfs_acl_t *); 63extern int xfs_acl_vtoacl(struct bhv_vnode *, xfs_acl_t *, xfs_acl_t *);
64extern int xfs_acl_vhasacl_access(struct vnode *); 64extern int xfs_acl_vhasacl_access(struct bhv_vnode *);
65extern int xfs_acl_vhasacl_default(struct vnode *); 65extern int xfs_acl_vhasacl_default(struct bhv_vnode *);
66extern int xfs_acl_vset(struct vnode *, void *, size_t, int); 66extern int xfs_acl_vset(struct bhv_vnode *, void *, size_t, int);
67extern int xfs_acl_vget(struct vnode *, void *, size_t, int); 67extern int xfs_acl_vget(struct bhv_vnode *, void *, size_t, int);
68extern int xfs_acl_vremove(struct vnode *vp, int); 68extern int xfs_acl_vremove(struct bhv_vnode *, int);
69 69
70#define _ACL_TYPE_ACCESS 1 70#define _ACL_TYPE_ACCESS 1
71#define _ACL_TYPE_DEFAULT 2 71#define _ACL_TYPE_DEFAULT 2
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 8558226281c4..eef6763f3a67 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -24,14 +24,12 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 32#include "xfs_ialloc_btree.h"
34#include "xfs_dir_sf.h"
35#include "xfs_dir2_sf.h" 33#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 35#include "xfs_dinode.h"
@@ -1862,7 +1860,7 @@ xfs_alloc_fix_freelist(
1862 (pag->pagf_longest - delta) : 1860 (pag->pagf_longest - delta) :
1863 (pag->pagf_flcount > 0 || pag->pagf_longest > 0); 1861 (pag->pagf_flcount > 0 || pag->pagf_longest > 0);
1864 if (args->minlen + args->alignment + args->minalignslop - 1 > longest || 1862 if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
1865 (args->minleft && 1863 (!(flags & XFS_ALLOC_FLAG_FREEING) &&
1866 (int)(pag->pagf_freeblks + pag->pagf_flcount - 1864 (int)(pag->pagf_freeblks + pag->pagf_flcount -
1867 need - args->total) < 1865 need - args->total) <
1868 (int)args->minleft)) { 1866 (int)args->minleft)) {
@@ -1898,7 +1896,7 @@ xfs_alloc_fix_freelist(
1898 longest = (longest > delta) ? (longest - delta) : 1896 longest = (longest > delta) ? (longest - delta) :
1899 (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0); 1897 (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
1900 if (args->minlen + args->alignment + args->minalignslop - 1 > longest || 1898 if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
1901 (args->minleft && 1899 (!(flags & XFS_ALLOC_FLAG_FREEING) &&
1902 (int)(be32_to_cpu(agf->agf_freeblks) + 1900 (int)(be32_to_cpu(agf->agf_freeblks) +
1903 be32_to_cpu(agf->agf_flcount) - need - args->total) < 1901 be32_to_cpu(agf->agf_flcount) - need - args->total) <
1904 (int)args->minleft)) { 1902 (int)args->minleft)) {
@@ -1951,8 +1949,14 @@ xfs_alloc_fix_freelist(
1951 * the restrictions correctly. Can happen for free calls 1949 * the restrictions correctly. Can happen for free calls
1952 * on a completely full ag. 1950 * on a completely full ag.
1953 */ 1951 */
1954 if (targs.agbno == NULLAGBLOCK) 1952 if (targs.agbno == NULLAGBLOCK) {
1953 if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1954 xfs_trans_brelse(tp, agflbp);
1955 args->agbp = NULL;
1956 return 0;
1957 }
1955 break; 1958 break;
1959 }
1956 /* 1960 /*
1957 * Put each allocated block on the list. 1961 * Put each allocated block on the list.
1958 */ 1962 */
@@ -2360,8 +2364,19 @@ xfs_alloc_vextent(
2360 if (args->agno == sagno && 2364 if (args->agno == sagno &&
2361 type == XFS_ALLOCTYPE_START_BNO) 2365 type == XFS_ALLOCTYPE_START_BNO)
2362 args->type = XFS_ALLOCTYPE_THIS_AG; 2366 args->type = XFS_ALLOCTYPE_THIS_AG;
2363 if (++(args->agno) == mp->m_sb.sb_agcount) 2367 /*
2364 args->agno = 0; 2368 * For the first allocation, we can try any AG to get
2369 * space. However, if we already have allocated a
2370 * block, we don't want to try AGs whose number is below
2371 * sagno. Otherwise, we may end up with out-of-order
2372 * locking of AGF, which might cause deadlock.
2373 */
2374 if (++(args->agno) == mp->m_sb.sb_agcount) {
2375 if (args->firstblock != NULLFSBLOCK)
2376 args->agno = sagno;
2377 else
2378 args->agno = 0;
2379 }
2365 /* 2380 /*
2366 * Reached the starting a.g., must either be done 2381 * Reached the starting a.g., must either be done
2367 * or switch to non-trylock mode. 2382 * or switch to non-trylock mode.
@@ -2443,7 +2458,7 @@ xfs_free_extent(
2443 args.minlen = args.minleft = args.minalignslop = 0; 2458 args.minlen = args.minleft = args.minalignslop = 0;
2444 down_read(&args.mp->m_peraglock); 2459 down_read(&args.mp->m_peraglock);
2445 args.pag = &args.mp->m_perag[args.agno]; 2460 args.pag = &args.mp->m_perag[args.agno];
2446 if ((error = xfs_alloc_fix_freelist(&args, 0))) 2461 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
2447 goto error0; 2462 goto error0;
2448#ifdef DEBUG 2463#ifdef DEBUG
2449 ASSERT(args.agbp != NULL); 2464 ASSERT(args.agbp != NULL);
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 2d1f8928b267..650591f999ae 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -41,6 +41,7 @@ typedef enum xfs_alloctype
41 * Flags for xfs_alloc_fix_freelist. 41 * Flags for xfs_alloc_fix_freelist.
42 */ 42 */
43#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */ 43#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */
44#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/
44 45
45/* 46/*
46 * Argument structure for xfs_alloc routines. 47 * Argument structure for xfs_alloc routines.
@@ -70,6 +71,7 @@ typedef struct xfs_alloc_arg {
70 char wasfromfl; /* set if allocation is from freelist */ 71 char wasfromfl; /* set if allocation is from freelist */
71 char isfl; /* set if is freelist blocks - !acctg */ 72 char isfl; /* set if is freelist blocks - !acctg */
72 char userdata; /* set if this is user data */ 73 char userdata; /* set if this is user data */
74 xfs_fsblock_t firstblock; /* io first block allocated */
73} xfs_alloc_arg_t; 75} xfs_alloc_arg_t;
74 76
75/* 77/*
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index a1d92da86ccd..7446556e8021 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -24,14 +24,12 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 32#include "xfs_ialloc_btree.h"
34#include "xfs_dir_sf.h"
35#include "xfs_dir2_sf.h" 33#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 35#include "xfs_dinode.h"
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index b6e1e02bbb28..1a2101043275 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -27,7 +27,6 @@
27#include "xfs_trans.h" 27#include "xfs_trans.h"
28#include "xfs_sb.h" 28#include "xfs_sb.h"
29#include "xfs_ag.h" 29#include "xfs_ag.h"
30#include "xfs_dir.h"
31#include "xfs_dir2.h" 30#include "xfs_dir2.h"
32#include "xfs_dmapi.h" 31#include "xfs_dmapi.h"
33#include "xfs_mount.h" 32#include "xfs_mount.h"
@@ -35,7 +34,6 @@
35#include "xfs_bmap_btree.h" 34#include "xfs_bmap_btree.h"
36#include "xfs_alloc_btree.h" 35#include "xfs_alloc_btree.h"
37#include "xfs_ialloc_btree.h" 36#include "xfs_ialloc_btree.h"
38#include "xfs_dir_sf.h"
39#include "xfs_dir2_sf.h" 37#include "xfs_dir2_sf.h"
40#include "xfs_attr_sf.h" 38#include "xfs_attr_sf.h"
41#include "xfs_dinode.h" 39#include "xfs_dinode.h"
@@ -1910,7 +1908,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
1910 error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno, 1908 error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno,
1911 args->rmtblkcnt, 1909 args->rmtblkcnt,
1912 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, 1910 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
1913 NULL, 0, map, &nmap, NULL); 1911 NULL, 0, map, &nmap, NULL, NULL);
1914 if (error) 1912 if (error)
1915 return(error); 1913 return(error);
1916 ASSERT(nmap >= 1); 1914 ASSERT(nmap >= 1);
@@ -1988,7 +1986,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
1988 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA | 1986 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA |
1989 XFS_BMAPI_WRITE, 1987 XFS_BMAPI_WRITE,
1990 args->firstblock, args->total, &map, &nmap, 1988 args->firstblock, args->total, &map, &nmap,
1991 args->flist); 1989 args->flist, NULL);
1992 if (!error) { 1990 if (!error) {
1993 error = xfs_bmap_finish(&args->trans, args->flist, 1991 error = xfs_bmap_finish(&args->trans, args->flist,
1994 *args->firstblock, &committed); 1992 *args->firstblock, &committed);
@@ -2039,7 +2037,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2039 error = xfs_bmapi(NULL, dp, (xfs_fileoff_t)lblkno, 2037 error = xfs_bmapi(NULL, dp, (xfs_fileoff_t)lblkno,
2040 args->rmtblkcnt, 2038 args->rmtblkcnt,
2041 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, 2039 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2042 args->firstblock, 0, &map, &nmap, NULL); 2040 args->firstblock, 0, &map, &nmap,
2041 NULL, NULL);
2043 if (error) { 2042 if (error) {
2044 return(error); 2043 return(error);
2045 } 2044 }
@@ -2104,7 +2103,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
2104 args->rmtblkcnt, 2103 args->rmtblkcnt,
2105 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, 2104 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2106 args->firstblock, 0, &map, &nmap, 2105 args->firstblock, 0, &map, &nmap,
2107 args->flist); 2106 args->flist, NULL);
2108 if (error) { 2107 if (error) {
2109 return(error); 2108 return(error);
2110 } 2109 }
@@ -2142,7 +2141,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
2142 XFS_BMAP_INIT(args->flist, args->firstblock); 2141 XFS_BMAP_INIT(args->flist, args->firstblock);
2143 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, 2142 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
2144 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, 2143 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2145 1, args->firstblock, args->flist, &done); 2144 1, args->firstblock, args->flist,
2145 NULL, &done);
2146 if (!error) { 2146 if (!error) {
2147 error = xfs_bmap_finish(&args->trans, args->flist, 2147 error = xfs_bmap_finish(&args->trans, args->flist,
2148 *args->firstblock, &committed); 2148 *args->firstblock, &committed);
@@ -2322,56 +2322,56 @@ xfs_attr_trace_enter(int type, char *where,
2322 2322
2323STATIC int 2323STATIC int
2324posix_acl_access_set( 2324posix_acl_access_set(
2325 vnode_t *vp, char *name, void *data, size_t size, int xflags) 2325 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2326{ 2326{
2327 return xfs_acl_vset(vp, data, size, _ACL_TYPE_ACCESS); 2327 return xfs_acl_vset(vp, data, size, _ACL_TYPE_ACCESS);
2328} 2328}
2329 2329
2330STATIC int 2330STATIC int
2331posix_acl_access_remove( 2331posix_acl_access_remove(
2332 struct vnode *vp, char *name, int xflags) 2332 bhv_vnode_t *vp, char *name, int xflags)
2333{ 2333{
2334 return xfs_acl_vremove(vp, _ACL_TYPE_ACCESS); 2334 return xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
2335} 2335}
2336 2336
2337STATIC int 2337STATIC int
2338posix_acl_access_get( 2338posix_acl_access_get(
2339 vnode_t *vp, char *name, void *data, size_t size, int xflags) 2339 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2340{ 2340{
2341 return xfs_acl_vget(vp, data, size, _ACL_TYPE_ACCESS); 2341 return xfs_acl_vget(vp, data, size, _ACL_TYPE_ACCESS);
2342} 2342}
2343 2343
2344STATIC int 2344STATIC int
2345posix_acl_access_exists( 2345posix_acl_access_exists(
2346 vnode_t *vp) 2346 bhv_vnode_t *vp)
2347{ 2347{
2348 return xfs_acl_vhasacl_access(vp); 2348 return xfs_acl_vhasacl_access(vp);
2349} 2349}
2350 2350
2351STATIC int 2351STATIC int
2352posix_acl_default_set( 2352posix_acl_default_set(
2353 vnode_t *vp, char *name, void *data, size_t size, int xflags) 2353 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2354{ 2354{
2355 return xfs_acl_vset(vp, data, size, _ACL_TYPE_DEFAULT); 2355 return xfs_acl_vset(vp, data, size, _ACL_TYPE_DEFAULT);
2356} 2356}
2357 2357
2358STATIC int 2358STATIC int
2359posix_acl_default_get( 2359posix_acl_default_get(
2360 vnode_t *vp, char *name, void *data, size_t size, int xflags) 2360 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2361{ 2361{
2362 return xfs_acl_vget(vp, data, size, _ACL_TYPE_DEFAULT); 2362 return xfs_acl_vget(vp, data, size, _ACL_TYPE_DEFAULT);
2363} 2363}
2364 2364
2365STATIC int 2365STATIC int
2366posix_acl_default_remove( 2366posix_acl_default_remove(
2367 struct vnode *vp, char *name, int xflags) 2367 bhv_vnode_t *vp, char *name, int xflags)
2368{ 2368{
2369 return xfs_acl_vremove(vp, _ACL_TYPE_DEFAULT); 2369 return xfs_acl_vremove(vp, _ACL_TYPE_DEFAULT);
2370} 2370}
2371 2371
2372STATIC int 2372STATIC int
2373posix_acl_default_exists( 2373posix_acl_default_exists(
2374 vnode_t *vp) 2374 bhv_vnode_t *vp)
2375{ 2375{
2376 return xfs_acl_vhasacl_default(vp); 2376 return xfs_acl_vhasacl_default(vp);
2377} 2377}
@@ -2404,21 +2404,18 @@ STATIC struct attrnames *attr_system_names[] =
2404 2404
2405STATIC int 2405STATIC int
2406attr_generic_set( 2406attr_generic_set(
2407 struct vnode *vp, char *name, void *data, size_t size, int xflags) 2407 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2408{ 2408{
2409 int error; 2409 return -bhv_vop_attr_set(vp, name, data, size, xflags, NULL);
2410
2411 VOP_ATTR_SET(vp, name, data, size, xflags, NULL, error);
2412 return -error;
2413} 2410}
2414 2411
2415STATIC int 2412STATIC int
2416attr_generic_get( 2413attr_generic_get(
2417 struct vnode *vp, char *name, void *data, size_t size, int xflags) 2414 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2418{ 2415{
2419 int error, asize = size; 2416 int error, asize = size;
2420 2417
2421 VOP_ATTR_GET(vp, name, data, &asize, xflags, NULL, error); 2418 error = bhv_vop_attr_get(vp, name, data, &asize, xflags, NULL);
2422 if (!error) 2419 if (!error)
2423 return asize; 2420 return asize;
2424 return -error; 2421 return -error;
@@ -2426,12 +2423,9 @@ attr_generic_get(
2426 2423
2427STATIC int 2424STATIC int
2428attr_generic_remove( 2425attr_generic_remove(
2429 struct vnode *vp, char *name, int xflags) 2426 bhv_vnode_t *vp, char *name, int xflags)
2430{ 2427{
2431 int error; 2428 return -bhv_vop_attr_remove(vp, name, xflags, NULL);
2432
2433 VOP_ATTR_REMOVE(vp, name, xflags, NULL, error);
2434 return -error;
2435} 2429}
2436 2430
2437STATIC int 2431STATIC int
@@ -2459,7 +2453,7 @@ attr_generic_listadd(
2459 2453
2460STATIC int 2454STATIC int
2461attr_system_list( 2455attr_system_list(
2462 struct vnode *vp, 2456 bhv_vnode_t *vp,
2463 void *data, 2457 void *data,
2464 size_t size, 2458 size_t size,
2465 ssize_t *result) 2459 ssize_t *result)
@@ -2481,12 +2475,12 @@ attr_system_list(
2481 2475
2482int 2476int
2483attr_generic_list( 2477attr_generic_list(
2484 struct vnode *vp, void *data, size_t size, int xflags, ssize_t *result) 2478 bhv_vnode_t *vp, void *data, size_t size, int xflags, ssize_t *result)
2485{ 2479{
2486 attrlist_cursor_kern_t cursor = { 0 }; 2480 attrlist_cursor_kern_t cursor = { 0 };
2487 int error; 2481 int error;
2488 2482
2489 VOP_ATTR_LIST(vp, data, size, xflags, &cursor, NULL, error); 2483 error = bhv_vop_attr_list(vp, data, size, xflags, &cursor, NULL);
2490 if (error > 0) 2484 if (error > 0)
2491 return -error; 2485 return -error;
2492 *result = -error; 2486 *result = -error;
@@ -2514,7 +2508,7 @@ attr_lookup_namespace(
2514 */ 2508 */
2515STATIC int 2509STATIC int
2516attr_user_capable( 2510attr_user_capable(
2517 struct vnode *vp, 2511 bhv_vnode_t *vp,
2518 cred_t *cred) 2512 cred_t *cred)
2519{ 2513{
2520 struct inode *inode = vn_to_inode(vp); 2514 struct inode *inode = vn_to_inode(vp);
@@ -2532,7 +2526,7 @@ attr_user_capable(
2532 2526
2533STATIC int 2527STATIC int
2534attr_trusted_capable( 2528attr_trusted_capable(
2535 struct vnode *vp, 2529 bhv_vnode_t *vp,
2536 cred_t *cred) 2530 cred_t *cred)
2537{ 2531{
2538 struct inode *inode = vn_to_inode(vp); 2532 struct inode *inode = vn_to_inode(vp);
@@ -2546,7 +2540,7 @@ attr_trusted_capable(
2546 2540
2547STATIC int 2541STATIC int
2548attr_secure_capable( 2542attr_secure_capable(
2549 struct vnode *vp, 2543 bhv_vnode_t *vp,
2550 cred_t *cred) 2544 cred_t *cred)
2551{ 2545{
2552 return -ENOSECURITY; 2546 return -ENOSECURITY;
@@ -2554,7 +2548,7 @@ attr_secure_capable(
2554 2548
2555STATIC int 2549STATIC int
2556attr_system_set( 2550attr_system_set(
2557 struct vnode *vp, char *name, void *data, size_t size, int xflags) 2551 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2558{ 2552{
2559 attrnames_t *namesp; 2553 attrnames_t *namesp;
2560 int error; 2554 int error;
@@ -2573,7 +2567,7 @@ attr_system_set(
2573 2567
2574STATIC int 2568STATIC int
2575attr_system_get( 2569attr_system_get(
2576 struct vnode *vp, char *name, void *data, size_t size, int xflags) 2570 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2577{ 2571{
2578 attrnames_t *namesp; 2572 attrnames_t *namesp;
2579 2573
@@ -2585,7 +2579,7 @@ attr_system_get(
2585 2579
2586STATIC int 2580STATIC int
2587attr_system_remove( 2581attr_system_remove(
2588 struct vnode *vp, char *name, int xflags) 2582 bhv_vnode_t *vp, char *name, int xflags)
2589{ 2583{
2590 attrnames_t *namesp; 2584 attrnames_t *namesp;
2591 2585
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index b2c7b9fcded3..981633f6c077 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -36,13 +36,13 @@
36 *========================================================================*/ 36 *========================================================================*/
37 37
38struct cred; 38struct cred;
39struct vnode; 39struct bhv_vnode;
40 40
41typedef int (*attrset_t)(struct vnode *, char *, void *, size_t, int); 41typedef int (*attrset_t)(struct bhv_vnode *, char *, void *, size_t, int);
42typedef int (*attrget_t)(struct vnode *, char *, void *, size_t, int); 42typedef int (*attrget_t)(struct bhv_vnode *, char *, void *, size_t, int);
43typedef int (*attrremove_t)(struct vnode *, char *, int); 43typedef int (*attrremove_t)(struct bhv_vnode *, char *, int);
44typedef int (*attrexists_t)(struct vnode *); 44typedef int (*attrexists_t)(struct bhv_vnode *);
45typedef int (*attrcapable_t)(struct vnode *, struct cred *); 45typedef int (*attrcapable_t)(struct bhv_vnode *, struct cred *);
46 46
47typedef struct attrnames { 47typedef struct attrnames {
48 char * attr_name; 48 char * attr_name;
@@ -63,7 +63,7 @@ extern struct attrnames attr_trusted;
63extern struct attrnames *attr_namespaces[ATTR_NAMECOUNT]; 63extern struct attrnames *attr_namespaces[ATTR_NAMECOUNT];
64 64
65extern attrnames_t *attr_lookup_namespace(char *, attrnames_t **, int); 65extern attrnames_t *attr_lookup_namespace(char *, attrnames_t **, int);
66extern int attr_generic_list(struct vnode *, void *, size_t, int, ssize_t *); 66extern int attr_generic_list(struct bhv_vnode *, void *, size_t, int, ssize_t *);
67 67
68#define ATTR_DONTFOLLOW 0x0001 /* -- unused, from IRIX -- */ 68#define ATTR_DONTFOLLOW 0x0001 /* -- unused, from IRIX -- */
69#define ATTR_ROOT 0x0002 /* use attrs in root (trusted) namespace */ 69#define ATTR_ROOT 0x0002 /* use attrs in root (trusted) namespace */
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 9462be86aa14..9455051f0120 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -24,7 +24,6 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
@@ -34,7 +33,6 @@
34#include "xfs_ialloc_btree.h" 33#include "xfs_ialloc_btree.h"
35#include "xfs_alloc.h" 34#include "xfs_alloc.h"
36#include "xfs_btree.h" 35#include "xfs_btree.h"
37#include "xfs_dir_sf.h"
38#include "xfs_dir2_sf.h" 36#include "xfs_dir2_sf.h"
39#include "xfs_attr_sf.h" 37#include "xfs_attr_sf.h"
40#include "xfs_dinode.h" 38#include "xfs_dinode.h"
@@ -2990,7 +2988,7 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
2990 nmap = 1; 2988 nmap = 1;
2991 error = xfs_bmapi(*trans, dp, (xfs_fileoff_t)tblkno, tblkcnt, 2989 error = xfs_bmapi(*trans, dp, (xfs_fileoff_t)tblkno, tblkcnt,
2992 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, 2990 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2993 NULL, 0, &map, &nmap, NULL); 2991 NULL, 0, &map, &nmap, NULL, NULL);
2994 if (error) { 2992 if (error) {
2995 return(error); 2993 return(error);
2996 } 2994 }
diff --git a/fs/xfs/xfs_behavior.h b/fs/xfs/xfs_behavior.h
index 1d8ff103201c..6e6e56fb352d 100644
--- a/fs/xfs/xfs_behavior.h
+++ b/fs/xfs/xfs_behavior.h
@@ -78,15 +78,12 @@
78 * 78 *
79 */ 79 */
80 80
81struct bhv_head_lock;
82
83/* 81/*
84 * Behavior head. Head of the chain of behaviors. 82 * Behavior head. Head of the chain of behaviors.
85 * Contained within each virtualized object data structure. 83 * Contained within each virtualized object data structure.
86 */ 84 */
87typedef struct bhv_head { 85typedef struct bhv_head {
88 struct bhv_desc *bh_first; /* first behavior in chain */ 86 struct bhv_desc *bh_first; /* first behavior in chain */
89 struct bhv_head_lock *bh_lockp; /* pointer to lock info struct */
90} bhv_head_t; 87} bhv_head_t;
91 88
92/* 89/*
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 26939d364bc4..3a6137539064 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
@@ -24,13 +24,11 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 30#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 31#include "xfs_ialloc_btree.h"
33#include "xfs_dir_sf.h"
34#include "xfs_dir2_sf.h" 32#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h" 33#include "xfs_attr_sf.h"
36#include "xfs_dinode.h" 34#include "xfs_dinode.h"
@@ -40,13 +38,15 @@
40#include "xfs_mount.h" 38#include "xfs_mount.h"
41#include "xfs_ialloc.h" 39#include "xfs_ialloc.h"
42#include "xfs_itable.h" 40#include "xfs_itable.h"
41#include "xfs_dir2_data.h"
42#include "xfs_dir2_leaf.h"
43#include "xfs_dir2_block.h"
43#include "xfs_inode_item.h" 44#include "xfs_inode_item.h"
44#include "xfs_extfree_item.h" 45#include "xfs_extfree_item.h"
45#include "xfs_alloc.h" 46#include "xfs_alloc.h"
46#include "xfs_bmap.h" 47#include "xfs_bmap.h"
47#include "xfs_rtalloc.h" 48#include "xfs_rtalloc.h"
48#include "xfs_error.h" 49#include "xfs_error.h"
49#include "xfs_dir_leaf.h"
50#include "xfs_attr_leaf.h" 50#include "xfs_attr_leaf.h"
51#include "xfs_rw.h" 51#include "xfs_rw.h"
52#include "xfs_quota.h" 52#include "xfs_quota.h"
@@ -101,6 +101,7 @@ xfs_bmap_add_extent(
101 xfs_fsblock_t *first, /* pointer to firstblock variable */ 101 xfs_fsblock_t *first, /* pointer to firstblock variable */
102 xfs_bmap_free_t *flist, /* list of extents to be freed */ 102 xfs_bmap_free_t *flist, /* list of extents to be freed */
103 int *logflagsp, /* inode logging flags */ 103 int *logflagsp, /* inode logging flags */
104 xfs_extdelta_t *delta, /* Change made to incore extents */
104 int whichfork, /* data or attr fork */ 105 int whichfork, /* data or attr fork */
105 int rsvd); /* OK to allocate reserved blocks */ 106 int rsvd); /* OK to allocate reserved blocks */
106 107
@@ -118,6 +119,7 @@ xfs_bmap_add_extent_delay_real(
118 xfs_fsblock_t *first, /* pointer to firstblock variable */ 119 xfs_fsblock_t *first, /* pointer to firstblock variable */
119 xfs_bmap_free_t *flist, /* list of extents to be freed */ 120 xfs_bmap_free_t *flist, /* list of extents to be freed */
120 int *logflagsp, /* inode logging flags */ 121 int *logflagsp, /* inode logging flags */
122 xfs_extdelta_t *delta, /* Change made to incore extents */
121 int rsvd); /* OK to allocate reserved blocks */ 123 int rsvd); /* OK to allocate reserved blocks */
122 124
123/* 125/*
@@ -131,6 +133,7 @@ xfs_bmap_add_extent_hole_delay(
131 xfs_btree_cur_t *cur, /* if null, not a btree */ 133 xfs_btree_cur_t *cur, /* if null, not a btree */
132 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 134 xfs_bmbt_irec_t *new, /* new data to add to file extents */
133 int *logflagsp,/* inode logging flags */ 135 int *logflagsp,/* inode logging flags */
136 xfs_extdelta_t *delta, /* Change made to incore extents */
134 int rsvd); /* OK to allocate reserved blocks */ 137 int rsvd); /* OK to allocate reserved blocks */
135 138
136/* 139/*
@@ -144,6 +147,7 @@ xfs_bmap_add_extent_hole_real(
144 xfs_btree_cur_t *cur, /* if null, not a btree */ 147 xfs_btree_cur_t *cur, /* if null, not a btree */
145 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 148 xfs_bmbt_irec_t *new, /* new data to add to file extents */
146 int *logflagsp, /* inode logging flags */ 149 int *logflagsp, /* inode logging flags */
150 xfs_extdelta_t *delta, /* Change made to incore extents */
147 int whichfork); /* data or attr fork */ 151 int whichfork); /* data or attr fork */
148 152
149/* 153/*
@@ -156,7 +160,8 @@ xfs_bmap_add_extent_unwritten_real(
156 xfs_extnum_t idx, /* extent number to update/insert */ 160 xfs_extnum_t idx, /* extent number to update/insert */
157 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 161 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
158 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 162 xfs_bmbt_irec_t *new, /* new data to add to file extents */
159 int *logflagsp); /* inode logging flags */ 163 int *logflagsp, /* inode logging flags */
164 xfs_extdelta_t *delta); /* Change made to incore extents */
160 165
161/* 166/*
162 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. 167 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
@@ -203,6 +208,7 @@ xfs_bmap_del_extent(
203 xfs_btree_cur_t *cur, /* if null, not a btree */ 208 xfs_btree_cur_t *cur, /* if null, not a btree */
204 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 209 xfs_bmbt_irec_t *new, /* new data to add to file extents */
205 int *logflagsp,/* inode logging flags */ 210 int *logflagsp,/* inode logging flags */
211 xfs_extdelta_t *delta, /* Change made to incore extents */
206 int whichfork, /* data or attr fork */ 212 int whichfork, /* data or attr fork */
207 int rsvd); /* OK to allocate reserved blocks */ 213 int rsvd); /* OK to allocate reserved blocks */
208 214
@@ -510,7 +516,7 @@ xfs_bmap_add_attrfork_local(
510 dargs.total = mp->m_dirblkfsbs; 516 dargs.total = mp->m_dirblkfsbs;
511 dargs.whichfork = XFS_DATA_FORK; 517 dargs.whichfork = XFS_DATA_FORK;
512 dargs.trans = tp; 518 dargs.trans = tp;
513 error = XFS_DIR_SHORTFORM_TO_SINGLE(mp, &dargs); 519 error = xfs_dir2_sf_to_block(&dargs);
514 } else 520 } else
515 error = xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags, 521 error = xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags,
516 XFS_DATA_FORK); 522 XFS_DATA_FORK);
@@ -530,6 +536,7 @@ xfs_bmap_add_extent(
530 xfs_fsblock_t *first, /* pointer to firstblock variable */ 536 xfs_fsblock_t *first, /* pointer to firstblock variable */
531 xfs_bmap_free_t *flist, /* list of extents to be freed */ 537 xfs_bmap_free_t *flist, /* list of extents to be freed */
532 int *logflagsp, /* inode logging flags */ 538 int *logflagsp, /* inode logging flags */
539 xfs_extdelta_t *delta, /* Change made to incore extents */
533 int whichfork, /* data or attr fork */ 540 int whichfork, /* data or attr fork */
534 int rsvd) /* OK to use reserved data blocks */ 541 int rsvd) /* OK to use reserved data blocks */
535{ 542{
@@ -567,6 +574,15 @@ xfs_bmap_add_extent(
567 logflags = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork); 574 logflags = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork);
568 } else 575 } else
569 logflags = 0; 576 logflags = 0;
577 /* DELTA: single new extent */
578 if (delta) {
579 if (delta->xed_startoff > new->br_startoff)
580 delta->xed_startoff = new->br_startoff;
581 if (delta->xed_blockcount <
582 new->br_startoff + new->br_blockcount)
583 delta->xed_blockcount = new->br_startoff +
584 new->br_blockcount;
585 }
570 } 586 }
571 /* 587 /*
572 * Any kind of new delayed allocation goes here. 588 * Any kind of new delayed allocation goes here.
@@ -576,7 +592,7 @@ xfs_bmap_add_extent(
576 ASSERT((cur->bc_private.b.flags & 592 ASSERT((cur->bc_private.b.flags &
577 XFS_BTCUR_BPRV_WASDEL) == 0); 593 XFS_BTCUR_BPRV_WASDEL) == 0);
578 if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, cur, new, 594 if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, cur, new,
579 &logflags, rsvd))) 595 &logflags, delta, rsvd)))
580 goto done; 596 goto done;
581 } 597 }
582 /* 598 /*
@@ -587,7 +603,7 @@ xfs_bmap_add_extent(
587 ASSERT((cur->bc_private.b.flags & 603 ASSERT((cur->bc_private.b.flags &
588 XFS_BTCUR_BPRV_WASDEL) == 0); 604 XFS_BTCUR_BPRV_WASDEL) == 0);
589 if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new, 605 if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new,
590 &logflags, whichfork))) 606 &logflags, delta, whichfork)))
591 goto done; 607 goto done;
592 } else { 608 } else {
593 xfs_bmbt_irec_t prev; /* old extent at offset idx */ 609 xfs_bmbt_irec_t prev; /* old extent at offset idx */
@@ -612,17 +628,17 @@ xfs_bmap_add_extent(
612 XFS_BTCUR_BPRV_WASDEL); 628 XFS_BTCUR_BPRV_WASDEL);
613 if ((error = xfs_bmap_add_extent_delay_real(ip, 629 if ((error = xfs_bmap_add_extent_delay_real(ip,
614 idx, &cur, new, &da_new, first, flist, 630 idx, &cur, new, &da_new, first, flist,
615 &logflags, rsvd))) 631 &logflags, delta, rsvd)))
616 goto done; 632 goto done;
617 } else if (new->br_state == XFS_EXT_NORM) { 633 } else if (new->br_state == XFS_EXT_NORM) {
618 ASSERT(new->br_state == XFS_EXT_NORM); 634 ASSERT(new->br_state == XFS_EXT_NORM);
619 if ((error = xfs_bmap_add_extent_unwritten_real( 635 if ((error = xfs_bmap_add_extent_unwritten_real(
620 ip, idx, &cur, new, &logflags))) 636 ip, idx, &cur, new, &logflags, delta)))
621 goto done; 637 goto done;
622 } else { 638 } else {
623 ASSERT(new->br_state == XFS_EXT_UNWRITTEN); 639 ASSERT(new->br_state == XFS_EXT_UNWRITTEN);
624 if ((error = xfs_bmap_add_extent_unwritten_real( 640 if ((error = xfs_bmap_add_extent_unwritten_real(
625 ip, idx, &cur, new, &logflags))) 641 ip, idx, &cur, new, &logflags, delta)))
626 goto done; 642 goto done;
627 } 643 }
628 ASSERT(*curp == cur || *curp == NULL); 644 ASSERT(*curp == cur || *curp == NULL);
@@ -635,7 +651,7 @@ xfs_bmap_add_extent(
635 ASSERT((cur->bc_private.b.flags & 651 ASSERT((cur->bc_private.b.flags &
636 XFS_BTCUR_BPRV_WASDEL) == 0); 652 XFS_BTCUR_BPRV_WASDEL) == 0);
637 if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, 653 if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur,
638 new, &logflags, whichfork))) 654 new, &logflags, delta, whichfork)))
639 goto done; 655 goto done;
640 } 656 }
641 } 657 }
@@ -700,6 +716,7 @@ xfs_bmap_add_extent_delay_real(
700 xfs_fsblock_t *first, /* pointer to firstblock variable */ 716 xfs_fsblock_t *first, /* pointer to firstblock variable */
701 xfs_bmap_free_t *flist, /* list of extents to be freed */ 717 xfs_bmap_free_t *flist, /* list of extents to be freed */
702 int *logflagsp, /* inode logging flags */ 718 int *logflagsp, /* inode logging flags */
719 xfs_extdelta_t *delta, /* Change made to incore extents */
703 int rsvd) /* OK to use reserved data block allocation */ 720 int rsvd) /* OK to use reserved data block allocation */
704{ 721{
705 xfs_btree_cur_t *cur; /* btree cursor */ 722 xfs_btree_cur_t *cur; /* btree cursor */
@@ -716,8 +733,8 @@ xfs_bmap_add_extent_delay_real(
716 /* left is 0, right is 1, prev is 2 */ 733 /* left is 0, right is 1, prev is 2 */
717 int rval=0; /* return value (logging flags) */ 734 int rval=0; /* return value (logging flags) */
718 int state = 0;/* state bits, accessed thru macros */ 735 int state = 0;/* state bits, accessed thru macros */
719 xfs_filblks_t temp; /* value for dnew calculations */ 736 xfs_filblks_t temp=0; /* value for dnew calculations */
720 xfs_filblks_t temp2; /* value for dnew calculations */ 737 xfs_filblks_t temp2=0;/* value for dnew calculations */
721 int tmp_rval; /* partial logging flags */ 738 int tmp_rval; /* partial logging flags */
722 enum { /* bit number definitions for state */ 739 enum { /* bit number definitions for state */
723 LEFT_CONTIG, RIGHT_CONTIG, 740 LEFT_CONTIG, RIGHT_CONTIG,
@@ -839,6 +856,11 @@ xfs_bmap_add_extent_delay_real(
839 goto done; 856 goto done;
840 } 857 }
841 *dnew = 0; 858 *dnew = 0;
859 /* DELTA: Three in-core extents are replaced by one. */
860 temp = LEFT.br_startoff;
861 temp2 = LEFT.br_blockcount +
862 PREV.br_blockcount +
863 RIGHT.br_blockcount;
842 break; 864 break;
843 865
844 case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG): 866 case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG):
@@ -872,6 +894,10 @@ xfs_bmap_add_extent_delay_real(
872 goto done; 894 goto done;
873 } 895 }
874 *dnew = 0; 896 *dnew = 0;
897 /* DELTA: Two in-core extents are replaced by one. */
898 temp = LEFT.br_startoff;
899 temp2 = LEFT.br_blockcount +
900 PREV.br_blockcount;
875 break; 901 break;
876 902
877 case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG): 903 case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG):
@@ -906,6 +932,10 @@ xfs_bmap_add_extent_delay_real(
906 goto done; 932 goto done;
907 } 933 }
908 *dnew = 0; 934 *dnew = 0;
935 /* DELTA: Two in-core extents are replaced by one. */
936 temp = PREV.br_startoff;
937 temp2 = PREV.br_blockcount +
938 RIGHT.br_blockcount;
909 break; 939 break;
910 940
911 case MASK2(LEFT_FILLING, RIGHT_FILLING): 941 case MASK2(LEFT_FILLING, RIGHT_FILLING):
@@ -936,6 +966,9 @@ xfs_bmap_add_extent_delay_real(
936 ASSERT(i == 1); 966 ASSERT(i == 1);
937 } 967 }
938 *dnew = 0; 968 *dnew = 0;
969 /* DELTA: The in-core extent described by new changed type. */
970 temp = new->br_startoff;
971 temp2 = new->br_blockcount;
939 break; 972 break;
940 973
941 case MASK2(LEFT_FILLING, LEFT_CONTIG): 974 case MASK2(LEFT_FILLING, LEFT_CONTIG):
@@ -978,6 +1011,10 @@ xfs_bmap_add_extent_delay_real(
978 xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx, 1011 xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx,
979 XFS_DATA_FORK); 1012 XFS_DATA_FORK);
980 *dnew = temp; 1013 *dnew = temp;
1014 /* DELTA: The boundary between two in-core extents moved. */
1015 temp = LEFT.br_startoff;
1016 temp2 = LEFT.br_blockcount +
1017 PREV.br_blockcount;
981 break; 1018 break;
982 1019
983 case MASK(LEFT_FILLING): 1020 case MASK(LEFT_FILLING):
@@ -1025,6 +1062,9 @@ xfs_bmap_add_extent_delay_real(
1025 xfs_bmap_trace_post_update(fname, "LF", ip, idx + 1, 1062 xfs_bmap_trace_post_update(fname, "LF", ip, idx + 1,
1026 XFS_DATA_FORK); 1063 XFS_DATA_FORK);
1027 *dnew = temp; 1064 *dnew = temp;
1065 /* DELTA: One in-core extent is split in two. */
1066 temp = PREV.br_startoff;
1067 temp2 = PREV.br_blockcount;
1028 break; 1068 break;
1029 1069
1030 case MASK2(RIGHT_FILLING, RIGHT_CONTIG): 1070 case MASK2(RIGHT_FILLING, RIGHT_CONTIG):
@@ -1067,6 +1107,10 @@ xfs_bmap_add_extent_delay_real(
1067 xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx, 1107 xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx,
1068 XFS_DATA_FORK); 1108 XFS_DATA_FORK);
1069 *dnew = temp; 1109 *dnew = temp;
1110 /* DELTA: The boundary between two in-core extents moved. */
1111 temp = PREV.br_startoff;
1112 temp2 = PREV.br_blockcount +
1113 RIGHT.br_blockcount;
1070 break; 1114 break;
1071 1115
1072 case MASK(RIGHT_FILLING): 1116 case MASK(RIGHT_FILLING):
@@ -1112,6 +1156,9 @@ xfs_bmap_add_extent_delay_real(
1112 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); 1156 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
1113 xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK); 1157 xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK);
1114 *dnew = temp; 1158 *dnew = temp;
1159 /* DELTA: One in-core extent is split in two. */
1160 temp = PREV.br_startoff;
1161 temp2 = PREV.br_blockcount;
1115 break; 1162 break;
1116 1163
1117 case 0: 1164 case 0:
@@ -1194,6 +1241,9 @@ xfs_bmap_add_extent_delay_real(
1194 xfs_bmap_trace_post_update(fname, "0", ip, idx + 2, 1241 xfs_bmap_trace_post_update(fname, "0", ip, idx + 2,
1195 XFS_DATA_FORK); 1242 XFS_DATA_FORK);
1196 *dnew = temp + temp2; 1243 *dnew = temp + temp2;
1244 /* DELTA: One in-core extent is split in three. */
1245 temp = PREV.br_startoff;
1246 temp2 = PREV.br_blockcount;
1197 break; 1247 break;
1198 1248
1199 case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): 1249 case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
@@ -1209,6 +1259,13 @@ xfs_bmap_add_extent_delay_real(
1209 ASSERT(0); 1259 ASSERT(0);
1210 } 1260 }
1211 *curp = cur; 1261 *curp = cur;
1262 if (delta) {
1263 temp2 += temp;
1264 if (delta->xed_startoff > temp)
1265 delta->xed_startoff = temp;
1266 if (delta->xed_blockcount < temp2)
1267 delta->xed_blockcount = temp2;
1268 }
1212done: 1269done:
1213 *logflagsp = rval; 1270 *logflagsp = rval;
1214 return error; 1271 return error;
@@ -1235,7 +1292,8 @@ xfs_bmap_add_extent_unwritten_real(
1235 xfs_extnum_t idx, /* extent number to update/insert */ 1292 xfs_extnum_t idx, /* extent number to update/insert */
1236 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 1293 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
1237 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 1294 xfs_bmbt_irec_t *new, /* new data to add to file extents */
1238 int *logflagsp) /* inode logging flags */ 1295 int *logflagsp, /* inode logging flags */
1296 xfs_extdelta_t *delta) /* Change made to incore extents */
1239{ 1297{
1240 xfs_btree_cur_t *cur; /* btree cursor */ 1298 xfs_btree_cur_t *cur; /* btree cursor */
1241 xfs_bmbt_rec_t *ep; /* extent entry for idx */ 1299 xfs_bmbt_rec_t *ep; /* extent entry for idx */
@@ -1252,6 +1310,8 @@ xfs_bmap_add_extent_unwritten_real(
1252 /* left is 0, right is 1, prev is 2 */ 1310 /* left is 0, right is 1, prev is 2 */
1253 int rval=0; /* return value (logging flags) */ 1311 int rval=0; /* return value (logging flags) */
1254 int state = 0;/* state bits, accessed thru macros */ 1312 int state = 0;/* state bits, accessed thru macros */
1313 xfs_filblks_t temp=0;
1314 xfs_filblks_t temp2=0;
1255 enum { /* bit number definitions for state */ 1315 enum { /* bit number definitions for state */
1256 LEFT_CONTIG, RIGHT_CONTIG, 1316 LEFT_CONTIG, RIGHT_CONTIG,
1257 LEFT_FILLING, RIGHT_FILLING, 1317 LEFT_FILLING, RIGHT_FILLING,
@@ -1380,6 +1440,11 @@ xfs_bmap_add_extent_unwritten_real(
1380 RIGHT.br_blockcount, LEFT.br_state))) 1440 RIGHT.br_blockcount, LEFT.br_state)))
1381 goto done; 1441 goto done;
1382 } 1442 }
1443 /* DELTA: Three in-core extents are replaced by one. */
1444 temp = LEFT.br_startoff;
1445 temp2 = LEFT.br_blockcount +
1446 PREV.br_blockcount +
1447 RIGHT.br_blockcount;
1383 break; 1448 break;
1384 1449
1385 case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG): 1450 case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG):
@@ -1419,6 +1484,10 @@ xfs_bmap_add_extent_unwritten_real(
1419 LEFT.br_state))) 1484 LEFT.br_state)))
1420 goto done; 1485 goto done;
1421 } 1486 }
1487 /* DELTA: Two in-core extents are replaced by one. */
1488 temp = LEFT.br_startoff;
1489 temp2 = LEFT.br_blockcount +
1490 PREV.br_blockcount;
1422 break; 1491 break;
1423 1492
1424 case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG): 1493 case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG):
@@ -1459,6 +1528,10 @@ xfs_bmap_add_extent_unwritten_real(
1459 newext))) 1528 newext)))
1460 goto done; 1529 goto done;
1461 } 1530 }
1531 /* DELTA: Two in-core extents are replaced by one. */
1532 temp = PREV.br_startoff;
1533 temp2 = PREV.br_blockcount +
1534 RIGHT.br_blockcount;
1462 break; 1535 break;
1463 1536
1464 case MASK2(LEFT_FILLING, RIGHT_FILLING): 1537 case MASK2(LEFT_FILLING, RIGHT_FILLING):
@@ -1487,6 +1560,9 @@ xfs_bmap_add_extent_unwritten_real(
1487 newext))) 1560 newext)))
1488 goto done; 1561 goto done;
1489 } 1562 }
1563 /* DELTA: The in-core extent described by new changed type. */
1564 temp = new->br_startoff;
1565 temp2 = new->br_blockcount;
1490 break; 1566 break;
1491 1567
1492 case MASK2(LEFT_FILLING, LEFT_CONTIG): 1568 case MASK2(LEFT_FILLING, LEFT_CONTIG):
@@ -1534,6 +1610,10 @@ xfs_bmap_add_extent_unwritten_real(
1534 LEFT.br_state)) 1610 LEFT.br_state))
1535 goto done; 1611 goto done;
1536 } 1612 }
1613 /* DELTA: The boundary between two in-core extents moved. */
1614 temp = LEFT.br_startoff;
1615 temp2 = LEFT.br_blockcount +
1616 PREV.br_blockcount;
1537 break; 1617 break;
1538 1618
1539 case MASK(LEFT_FILLING): 1619 case MASK(LEFT_FILLING):
@@ -1574,6 +1654,9 @@ xfs_bmap_add_extent_unwritten_real(
1574 goto done; 1654 goto done;
1575 ASSERT(i == 1); 1655 ASSERT(i == 1);
1576 } 1656 }
1657 /* DELTA: One in-core extent is split in two. */
1658 temp = PREV.br_startoff;
1659 temp2 = PREV.br_blockcount;
1577 break; 1660 break;
1578 1661
1579 case MASK2(RIGHT_FILLING, RIGHT_CONTIG): 1662 case MASK2(RIGHT_FILLING, RIGHT_CONTIG):
@@ -1617,6 +1700,10 @@ xfs_bmap_add_extent_unwritten_real(
1617 newext))) 1700 newext)))
1618 goto done; 1701 goto done;
1619 } 1702 }
1703 /* DELTA: The boundary between two in-core extents moved. */
1704 temp = PREV.br_startoff;
1705 temp2 = PREV.br_blockcount +
1706 RIGHT.br_blockcount;
1620 break; 1707 break;
1621 1708
1622 case MASK(RIGHT_FILLING): 1709 case MASK(RIGHT_FILLING):
@@ -1657,6 +1744,9 @@ xfs_bmap_add_extent_unwritten_real(
1657 goto done; 1744 goto done;
1658 ASSERT(i == 1); 1745 ASSERT(i == 1);
1659 } 1746 }
1747 /* DELTA: One in-core extent is split in two. */
1748 temp = PREV.br_startoff;
1749 temp2 = PREV.br_blockcount;
1660 break; 1750 break;
1661 1751
1662 case 0: 1752 case 0:
@@ -1710,6 +1800,9 @@ xfs_bmap_add_extent_unwritten_real(
1710 goto done; 1800 goto done;
1711 ASSERT(i == 1); 1801 ASSERT(i == 1);
1712 } 1802 }
1803 /* DELTA: One in-core extent is split in three. */
1804 temp = PREV.br_startoff;
1805 temp2 = PREV.br_blockcount;
1713 break; 1806 break;
1714 1807
1715 case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): 1808 case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
@@ -1725,6 +1818,13 @@ xfs_bmap_add_extent_unwritten_real(
1725 ASSERT(0); 1818 ASSERT(0);
1726 } 1819 }
1727 *curp = cur; 1820 *curp = cur;
1821 if (delta) {
1822 temp2 += temp;
1823 if (delta->xed_startoff > temp)
1824 delta->xed_startoff = temp;
1825 if (delta->xed_blockcount < temp2)
1826 delta->xed_blockcount = temp2;
1827 }
1728done: 1828done:
1729 *logflagsp = rval; 1829 *logflagsp = rval;
1730 return error; 1830 return error;
@@ -1753,6 +1853,7 @@ xfs_bmap_add_extent_hole_delay(
1753 xfs_btree_cur_t *cur, /* if null, not a btree */ 1853 xfs_btree_cur_t *cur, /* if null, not a btree */
1754 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 1854 xfs_bmbt_irec_t *new, /* new data to add to file extents */
1755 int *logflagsp, /* inode logging flags */ 1855 int *logflagsp, /* inode logging flags */
1856 xfs_extdelta_t *delta, /* Change made to incore extents */
1756 int rsvd) /* OK to allocate reserved blocks */ 1857 int rsvd) /* OK to allocate reserved blocks */
1757{ 1858{
1758 xfs_bmbt_rec_t *ep; /* extent record for idx */ 1859 xfs_bmbt_rec_t *ep; /* extent record for idx */
@@ -1765,7 +1866,8 @@ xfs_bmap_add_extent_hole_delay(
1765 xfs_filblks_t oldlen=0; /* old indirect size */ 1866 xfs_filblks_t oldlen=0; /* old indirect size */
1766 xfs_bmbt_irec_t right; /* right neighbor extent entry */ 1867 xfs_bmbt_irec_t right; /* right neighbor extent entry */
1767 int state; /* state bits, accessed thru macros */ 1868 int state; /* state bits, accessed thru macros */
1768 xfs_filblks_t temp; /* temp for indirect calculations */ 1869 xfs_filblks_t temp=0; /* temp for indirect calculations */
1870 xfs_filblks_t temp2=0;
1769 enum { /* bit number definitions for state */ 1871 enum { /* bit number definitions for state */
1770 LEFT_CONTIG, RIGHT_CONTIG, 1872 LEFT_CONTIG, RIGHT_CONTIG,
1771 LEFT_DELAY, RIGHT_DELAY, 1873 LEFT_DELAY, RIGHT_DELAY,
@@ -1844,6 +1946,9 @@ xfs_bmap_add_extent_hole_delay(
1844 XFS_DATA_FORK); 1946 XFS_DATA_FORK);
1845 xfs_iext_remove(ifp, idx, 1); 1947 xfs_iext_remove(ifp, idx, 1);
1846 ip->i_df.if_lastex = idx - 1; 1948 ip->i_df.if_lastex = idx - 1;
1949 /* DELTA: Two in-core extents were replaced by one. */
1950 temp2 = temp;
1951 temp = left.br_startoff;
1847 break; 1952 break;
1848 1953
1849 case MASK(LEFT_CONTIG): 1954 case MASK(LEFT_CONTIG):
@@ -1864,6 +1969,9 @@ xfs_bmap_add_extent_hole_delay(
1864 xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, 1969 xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1,
1865 XFS_DATA_FORK); 1970 XFS_DATA_FORK);
1866 ip->i_df.if_lastex = idx - 1; 1971 ip->i_df.if_lastex = idx - 1;
1972 /* DELTA: One in-core extent grew into a hole. */
1973 temp2 = temp;
1974 temp = left.br_startoff;
1867 break; 1975 break;
1868 1976
1869 case MASK(RIGHT_CONTIG): 1977 case MASK(RIGHT_CONTIG):
@@ -1881,6 +1989,9 @@ xfs_bmap_add_extent_hole_delay(
1881 NULLSTARTBLOCK((int)newlen), temp, right.br_state); 1989 NULLSTARTBLOCK((int)newlen), temp, right.br_state);
1882 xfs_bmap_trace_post_update(fname, "RC", ip, idx, XFS_DATA_FORK); 1990 xfs_bmap_trace_post_update(fname, "RC", ip, idx, XFS_DATA_FORK);
1883 ip->i_df.if_lastex = idx; 1991 ip->i_df.if_lastex = idx;
1992 /* DELTA: One in-core extent grew into a hole. */
1993 temp2 = temp;
1994 temp = new->br_startoff;
1884 break; 1995 break;
1885 1996
1886 case 0: 1997 case 0:
@@ -1894,6 +2005,9 @@ xfs_bmap_add_extent_hole_delay(
1894 XFS_DATA_FORK); 2005 XFS_DATA_FORK);
1895 xfs_iext_insert(ifp, idx, 1, new); 2006 xfs_iext_insert(ifp, idx, 1, new);
1896 ip->i_df.if_lastex = idx; 2007 ip->i_df.if_lastex = idx;
2008 /* DELTA: A new in-core extent was added in a hole. */
2009 temp2 = new->br_blockcount;
2010 temp = new->br_startoff;
1897 break; 2011 break;
1898 } 2012 }
1899 if (oldlen != newlen) { 2013 if (oldlen != newlen) {
@@ -1904,6 +2018,13 @@ xfs_bmap_add_extent_hole_delay(
1904 * Nothing to do for disk quota accounting here. 2018 * Nothing to do for disk quota accounting here.
1905 */ 2019 */
1906 } 2020 }
2021 if (delta) {
2022 temp2 += temp;
2023 if (delta->xed_startoff > temp)
2024 delta->xed_startoff = temp;
2025 if (delta->xed_blockcount < temp2)
2026 delta->xed_blockcount = temp2;
2027 }
1907 *logflagsp = 0; 2028 *logflagsp = 0;
1908 return 0; 2029 return 0;
1909#undef MASK 2030#undef MASK
@@ -1925,6 +2046,7 @@ xfs_bmap_add_extent_hole_real(
1925 xfs_btree_cur_t *cur, /* if null, not a btree */ 2046 xfs_btree_cur_t *cur, /* if null, not a btree */
1926 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 2047 xfs_bmbt_irec_t *new, /* new data to add to file extents */
1927 int *logflagsp, /* inode logging flags */ 2048 int *logflagsp, /* inode logging flags */
2049 xfs_extdelta_t *delta, /* Change made to incore extents */
1928 int whichfork) /* data or attr fork */ 2050 int whichfork) /* data or attr fork */
1929{ 2051{
1930 xfs_bmbt_rec_t *ep; /* pointer to extent entry ins. point */ 2052 xfs_bmbt_rec_t *ep; /* pointer to extent entry ins. point */
@@ -1936,7 +2058,10 @@ xfs_bmap_add_extent_hole_real(
1936 xfs_ifork_t *ifp; /* inode fork pointer */ 2058 xfs_ifork_t *ifp; /* inode fork pointer */
1937 xfs_bmbt_irec_t left; /* left neighbor extent entry */ 2059 xfs_bmbt_irec_t left; /* left neighbor extent entry */
1938 xfs_bmbt_irec_t right; /* right neighbor extent entry */ 2060 xfs_bmbt_irec_t right; /* right neighbor extent entry */
2061 int rval=0; /* return value (logging flags) */
1939 int state; /* state bits, accessed thru macros */ 2062 int state; /* state bits, accessed thru macros */
2063 xfs_filblks_t temp=0;
2064 xfs_filblks_t temp2=0;
1940 enum { /* bit number definitions for state */ 2065 enum { /* bit number definitions for state */
1941 LEFT_CONTIG, RIGHT_CONTIG, 2066 LEFT_CONTIG, RIGHT_CONTIG,
1942 LEFT_DELAY, RIGHT_DELAY, 2067 LEFT_DELAY, RIGHT_DELAY,
@@ -1993,6 +2118,7 @@ xfs_bmap_add_extent_hole_real(
1993 left.br_blockcount + new->br_blockcount + 2118 left.br_blockcount + new->br_blockcount +
1994 right.br_blockcount <= MAXEXTLEN)); 2119 right.br_blockcount <= MAXEXTLEN));
1995 2120
2121 error = 0;
1996 /* 2122 /*
1997 * Select which case we're in here, and implement it. 2123 * Select which case we're in here, and implement it.
1998 */ 2124 */
@@ -2018,25 +2144,35 @@ xfs_bmap_add_extent_hole_real(
2018 XFS_IFORK_NEXT_SET(ip, whichfork, 2144 XFS_IFORK_NEXT_SET(ip, whichfork,
2019 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 2145 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2020 if (cur == NULL) { 2146 if (cur == NULL) {
2021 *logflagsp = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork); 2147 rval = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork);
2022 return 0; 2148 } else {
2149 rval = XFS_ILOG_CORE;
2150 if ((error = xfs_bmbt_lookup_eq(cur,
2151 right.br_startoff,
2152 right.br_startblock,
2153 right.br_blockcount, &i)))
2154 goto done;
2155 ASSERT(i == 1);
2156 if ((error = xfs_bmbt_delete(cur, &i)))
2157 goto done;
2158 ASSERT(i == 1);
2159 if ((error = xfs_bmbt_decrement(cur, 0, &i)))
2160 goto done;
2161 ASSERT(i == 1);
2162 if ((error = xfs_bmbt_update(cur, left.br_startoff,
2163 left.br_startblock,
2164 left.br_blockcount +
2165 new->br_blockcount +
2166 right.br_blockcount,
2167 left.br_state)))
2168 goto done;
2023 } 2169 }
2024 *logflagsp = XFS_ILOG_CORE; 2170 /* DELTA: Two in-core extents were replaced by one. */
2025 if ((error = xfs_bmbt_lookup_eq(cur, right.br_startoff, 2171 temp = left.br_startoff;
2026 right.br_startblock, right.br_blockcount, &i))) 2172 temp2 = left.br_blockcount +
2027 return error; 2173 new->br_blockcount +
2028 ASSERT(i == 1); 2174 right.br_blockcount;
2029 if ((error = xfs_bmbt_delete(cur, &i))) 2175 break;
2030 return error;
2031 ASSERT(i == 1);
2032 if ((error = xfs_bmbt_decrement(cur, 0, &i)))
2033 return error;
2034 ASSERT(i == 1);
2035 error = xfs_bmbt_update(cur, left.br_startoff,
2036 left.br_startblock,
2037 left.br_blockcount + new->br_blockcount +
2038 right.br_blockcount, left.br_state);
2039 return error;
2040 2176
2041 case MASK(LEFT_CONTIG): 2177 case MASK(LEFT_CONTIG):
2042 /* 2178 /*
@@ -2050,19 +2186,27 @@ xfs_bmap_add_extent_hole_real(
2050 xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, whichfork); 2186 xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, whichfork);
2051 ifp->if_lastex = idx - 1; 2187 ifp->if_lastex = idx - 1;
2052 if (cur == NULL) { 2188 if (cur == NULL) {
2053 *logflagsp = XFS_ILOG_FEXT(whichfork); 2189 rval = XFS_ILOG_FEXT(whichfork);
2054 return 0; 2190 } else {
2191 rval = 0;
2192 if ((error = xfs_bmbt_lookup_eq(cur,
2193 left.br_startoff,
2194 left.br_startblock,
2195 left.br_blockcount, &i)))
2196 goto done;
2197 ASSERT(i == 1);
2198 if ((error = xfs_bmbt_update(cur, left.br_startoff,
2199 left.br_startblock,
2200 left.br_blockcount +
2201 new->br_blockcount,
2202 left.br_state)))
2203 goto done;
2055 } 2204 }
2056 *logflagsp = 0; 2205 /* DELTA: One in-core extent grew. */
2057 if ((error = xfs_bmbt_lookup_eq(cur, left.br_startoff, 2206 temp = left.br_startoff;
2058 left.br_startblock, left.br_blockcount, &i))) 2207 temp2 = left.br_blockcount +
2059 return error; 2208 new->br_blockcount;
2060 ASSERT(i == 1); 2209 break;
2061 error = xfs_bmbt_update(cur, left.br_startoff,
2062 left.br_startblock,
2063 left.br_blockcount + new->br_blockcount,
2064 left.br_state);
2065 return error;
2066 2210
2067 case MASK(RIGHT_CONTIG): 2211 case MASK(RIGHT_CONTIG):
2068 /* 2212 /*
@@ -2077,19 +2221,27 @@ xfs_bmap_add_extent_hole_real(
2077 xfs_bmap_trace_post_update(fname, "RC", ip, idx, whichfork); 2221 xfs_bmap_trace_post_update(fname, "RC", ip, idx, whichfork);
2078 ifp->if_lastex = idx; 2222 ifp->if_lastex = idx;
2079 if (cur == NULL) { 2223 if (cur == NULL) {
2080 *logflagsp = XFS_ILOG_FEXT(whichfork); 2224 rval = XFS_ILOG_FEXT(whichfork);
2081 return 0; 2225 } else {
2226 rval = 0;
2227 if ((error = xfs_bmbt_lookup_eq(cur,
2228 right.br_startoff,
2229 right.br_startblock,
2230 right.br_blockcount, &i)))
2231 goto done;
2232 ASSERT(i == 1);
2233 if ((error = xfs_bmbt_update(cur, new->br_startoff,
2234 new->br_startblock,
2235 new->br_blockcount +
2236 right.br_blockcount,
2237 right.br_state)))
2238 goto done;
2082 } 2239 }
2083 *logflagsp = 0; 2240 /* DELTA: One in-core extent grew. */
2084 if ((error = xfs_bmbt_lookup_eq(cur, right.br_startoff, 2241 temp = new->br_startoff;
2085 right.br_startblock, right.br_blockcount, &i))) 2242 temp2 = new->br_blockcount +
2086 return error; 2243 right.br_blockcount;
2087 ASSERT(i == 1); 2244 break;
2088 error = xfs_bmbt_update(cur, new->br_startoff,
2089 new->br_startblock,
2090 new->br_blockcount + right.br_blockcount,
2091 right.br_state);
2092 return error;
2093 2245
2094 case 0: 2246 case 0:
2095 /* 2247 /*
@@ -2104,29 +2256,41 @@ xfs_bmap_add_extent_hole_real(
2104 XFS_IFORK_NEXT_SET(ip, whichfork, 2256 XFS_IFORK_NEXT_SET(ip, whichfork,
2105 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 2257 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
2106 if (cur == NULL) { 2258 if (cur == NULL) {
2107 *logflagsp = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork); 2259 rval = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork);
2108 return 0; 2260 } else {
2261 rval = XFS_ILOG_CORE;
2262 if ((error = xfs_bmbt_lookup_eq(cur,
2263 new->br_startoff,
2264 new->br_startblock,
2265 new->br_blockcount, &i)))
2266 goto done;
2267 ASSERT(i == 0);
2268 cur->bc_rec.b.br_state = new->br_state;
2269 if ((error = xfs_bmbt_insert(cur, &i)))
2270 goto done;
2271 ASSERT(i == 1);
2109 } 2272 }
2110 *logflagsp = XFS_ILOG_CORE; 2273 /* DELTA: A new extent was added in a hole. */
2111 if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, 2274 temp = new->br_startoff;
2112 new->br_startblock, new->br_blockcount, &i))) 2275 temp2 = new->br_blockcount;
2113 return error; 2276 break;
2114 ASSERT(i == 0); 2277 }
2115 cur->bc_rec.b.br_state = new->br_state; 2278 if (delta) {
2116 if ((error = xfs_bmbt_insert(cur, &i))) 2279 temp2 += temp;
2117 return error; 2280 if (delta->xed_startoff > temp)
2118 ASSERT(i == 1); 2281 delta->xed_startoff = temp;
2119 return 0; 2282 if (delta->xed_blockcount < temp2)
2283 delta->xed_blockcount = temp2;
2120 } 2284 }
2285done:
2286 *logflagsp = rval;
2287 return error;
2121#undef MASK 2288#undef MASK
2122#undef MASK2 2289#undef MASK2
2123#undef STATE_SET 2290#undef STATE_SET
2124#undef STATE_TEST 2291#undef STATE_TEST
2125#undef STATE_SET_TEST 2292#undef STATE_SET_TEST
2126#undef SWITCH_STATE 2293#undef SWITCH_STATE
2127 /* NOTREACHED */
2128 ASSERT(0);
2129 return 0; /* keep gcc quite */
2130} 2294}
2131 2295
2132/* 2296/*
@@ -2598,6 +2762,7 @@ xfs_bmap_btalloc(
2598 args.mp = mp; 2762 args.mp = mp;
2599 args.fsbno = ap->rval; 2763 args.fsbno = ap->rval;
2600 args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks); 2764 args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks);
2765 args.firstblock = ap->firstblock;
2601 blen = 0; 2766 blen = 0;
2602 if (nullfb) { 2767 if (nullfb) {
2603 args.type = XFS_ALLOCTYPE_START_BNO; 2768 args.type = XFS_ALLOCTYPE_START_BNO;
@@ -2657,7 +2822,7 @@ xfs_bmap_btalloc(
2657 else 2822 else
2658 args.minlen = ap->alen; 2823 args.minlen = ap->alen;
2659 } else if (ap->low) { 2824 } else if (ap->low) {
2660 args.type = XFS_ALLOCTYPE_FIRST_AG; 2825 args.type = XFS_ALLOCTYPE_START_BNO;
2661 args.total = args.minlen = ap->minlen; 2826 args.total = args.minlen = ap->minlen;
2662 } else { 2827 } else {
2663 args.type = XFS_ALLOCTYPE_NEAR_BNO; 2828 args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -2669,7 +2834,7 @@ xfs_bmap_btalloc(
2669 args.prod = ap->ip->i_d.di_extsize; 2834 args.prod = ap->ip->i_d.di_extsize;
2670 if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod))) 2835 if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
2671 args.mod = (xfs_extlen_t)(args.prod - args.mod); 2836 args.mod = (xfs_extlen_t)(args.prod - args.mod);
2672 } else if (unlikely(mp->m_sb.sb_blocksize >= NBPP)) { 2837 } else if (mp->m_sb.sb_blocksize >= NBPP) {
2673 args.prod = 1; 2838 args.prod = 1;
2674 args.mod = 0; 2839 args.mod = 0;
2675 } else { 2840 } else {
@@ -2885,6 +3050,7 @@ xfs_bmap_del_extent(
2885 xfs_btree_cur_t *cur, /* if null, not a btree */ 3050 xfs_btree_cur_t *cur, /* if null, not a btree */
2886 xfs_bmbt_irec_t *del, /* data to remove from extents */ 3051 xfs_bmbt_irec_t *del, /* data to remove from extents */
2887 int *logflagsp, /* inode logging flags */ 3052 int *logflagsp, /* inode logging flags */
3053 xfs_extdelta_t *delta, /* Change made to incore extents */
2888 int whichfork, /* data or attr fork */ 3054 int whichfork, /* data or attr fork */
2889 int rsvd) /* OK to allocate reserved blocks */ 3055 int rsvd) /* OK to allocate reserved blocks */
2890{ 3056{
@@ -3193,6 +3359,14 @@ xfs_bmap_del_extent(
3193 if (da_old > da_new) 3359 if (da_old > da_new)
3194 xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int)(da_old - da_new), 3360 xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int)(da_old - da_new),
3195 rsvd); 3361 rsvd);
3362 if (delta) {
3363 /* DELTA: report the original extent. */
3364 if (delta->xed_startoff > got.br_startoff)
3365 delta->xed_startoff = got.br_startoff;
3366 if (delta->xed_blockcount < got.br_startoff+got.br_blockcount)
3367 delta->xed_blockcount = got.br_startoff +
3368 got.br_blockcount;
3369 }
3196done: 3370done:
3197 *logflagsp = flags; 3371 *logflagsp = flags;
3198 return error; 3372 return error;
@@ -3279,6 +3453,7 @@ xfs_bmap_extents_to_btree(
3279 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); 3453 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
3280 args.tp = tp; 3454 args.tp = tp;
3281 args.mp = mp; 3455 args.mp = mp;
3456 args.firstblock = *firstblock;
3282 if (*firstblock == NULLFSBLOCK) { 3457 if (*firstblock == NULLFSBLOCK) {
3283 args.type = XFS_ALLOCTYPE_START_BNO; 3458 args.type = XFS_ALLOCTYPE_START_BNO;
3284 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); 3459 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
@@ -3414,6 +3589,7 @@ xfs_bmap_local_to_extents(
3414 3589
3415 args.tp = tp; 3590 args.tp = tp;
3416 args.mp = ip->i_mount; 3591 args.mp = ip->i_mount;
3592 args.firstblock = *firstblock;
3417 ASSERT((ifp->if_flags & 3593 ASSERT((ifp->if_flags &
3418 (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE); 3594 (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
3419 /* 3595 /*
@@ -3753,7 +3929,7 @@ xfs_bunmap_trace(
3753 if (ip->i_rwtrace == NULL) 3929 if (ip->i_rwtrace == NULL)
3754 return; 3930 return;
3755 ktrace_enter(ip->i_rwtrace, 3931 ktrace_enter(ip->i_rwtrace,
3756 (void *)(__psint_t)XFS_BUNMAPI, 3932 (void *)(__psint_t)XFS_BUNMAP,
3757 (void *)ip, 3933 (void *)ip,
3758 (void *)(__psint_t)((ip->i_d.di_size >> 32) & 0xffffffff), 3934 (void *)(__psint_t)((ip->i_d.di_size >> 32) & 0xffffffff),
3759 (void *)(__psint_t)(ip->i_d.di_size & 0xffffffff), 3935 (void *)(__psint_t)(ip->i_d.di_size & 0xffffffff),
@@ -4087,8 +4263,8 @@ xfs_bmap_finish(
4087 if (!XFS_FORCED_SHUTDOWN(mp)) 4263 if (!XFS_FORCED_SHUTDOWN(mp))
4088 xfs_force_shutdown(mp, 4264 xfs_force_shutdown(mp,
4089 (error == EFSCORRUPTED) ? 4265 (error == EFSCORRUPTED) ?
4090 XFS_CORRUPT_INCORE : 4266 SHUTDOWN_CORRUPT_INCORE :
4091 XFS_METADATA_IO_ERROR); 4267 SHUTDOWN_META_IO_ERROR);
4092 return error; 4268 return error;
4093 } 4269 }
4094 xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock, 4270 xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock,
@@ -4538,7 +4714,8 @@ xfs_bmapi(
4538 xfs_extlen_t total, /* total blocks needed */ 4714 xfs_extlen_t total, /* total blocks needed */
4539 xfs_bmbt_irec_t *mval, /* output: map values */ 4715 xfs_bmbt_irec_t *mval, /* output: map values */
4540 int *nmap, /* i/o: mval size/count */ 4716 int *nmap, /* i/o: mval size/count */
4541 xfs_bmap_free_t *flist) /* i/o: list extents to free */ 4717 xfs_bmap_free_t *flist, /* i/o: list extents to free */
4718 xfs_extdelta_t *delta) /* o: change made to incore extents */
4542{ 4719{
4543 xfs_fsblock_t abno; /* allocated block number */ 4720 xfs_fsblock_t abno; /* allocated block number */
4544 xfs_extlen_t alen; /* allocated extent length */ 4721 xfs_extlen_t alen; /* allocated extent length */
@@ -4650,6 +4827,10 @@ xfs_bmapi(
4650 end = bno + len; 4827 end = bno + len;
4651 obno = bno; 4828 obno = bno;
4652 bma.ip = NULL; 4829 bma.ip = NULL;
4830 if (delta) {
4831 delta->xed_startoff = NULLFILEOFF;
4832 delta->xed_blockcount = 0;
4833 }
4653 while (bno < end && n < *nmap) { 4834 while (bno < end && n < *nmap) {
4654 /* 4835 /*
4655 * Reading past eof, act as though there's a hole 4836 * Reading past eof, act as though there's a hole
@@ -4886,8 +5067,8 @@ xfs_bmapi(
4886 got.br_state = XFS_EXT_UNWRITTEN; 5067 got.br_state = XFS_EXT_UNWRITTEN;
4887 } 5068 }
4888 error = xfs_bmap_add_extent(ip, lastx, &cur, &got, 5069 error = xfs_bmap_add_extent(ip, lastx, &cur, &got,
4889 firstblock, flist, &tmp_logflags, whichfork, 5070 firstblock, flist, &tmp_logflags, delta,
4890 (flags & XFS_BMAPI_RSVBLOCKS)); 5071 whichfork, (flags & XFS_BMAPI_RSVBLOCKS));
4891 logflags |= tmp_logflags; 5072 logflags |= tmp_logflags;
4892 if (error) 5073 if (error)
4893 goto error0; 5074 goto error0;
@@ -4983,8 +5164,8 @@ xfs_bmapi(
4983 } 5164 }
4984 mval->br_state = XFS_EXT_NORM; 5165 mval->br_state = XFS_EXT_NORM;
4985 error = xfs_bmap_add_extent(ip, lastx, &cur, mval, 5166 error = xfs_bmap_add_extent(ip, lastx, &cur, mval,
4986 firstblock, flist, &tmp_logflags, whichfork, 5167 firstblock, flist, &tmp_logflags, delta,
4987 (flags & XFS_BMAPI_RSVBLOCKS)); 5168 whichfork, (flags & XFS_BMAPI_RSVBLOCKS));
4988 logflags |= tmp_logflags; 5169 logflags |= tmp_logflags;
4989 if (error) 5170 if (error)
4990 goto error0; 5171 goto error0;
@@ -5073,7 +5254,14 @@ xfs_bmapi(
5073 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE || 5254 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
5074 XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max); 5255 XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max);
5075 error = 0; 5256 error = 0;
5076 5257 if (delta && delta->xed_startoff != NULLFILEOFF) {
5258 /* A change was actually made.
5259 * Note that delta->xed_blockount is an offset at this
5260 * point and needs to be converted to a block count.
5261 */
5262 ASSERT(delta->xed_blockcount > delta->xed_startoff);
5263 delta->xed_blockcount -= delta->xed_startoff;
5264 }
5077error0: 5265error0:
5078 /* 5266 /*
5079 * Log everything. Do this after conversion, there's no point in 5267 * Log everything. Do this after conversion, there's no point in
@@ -5185,6 +5373,8 @@ xfs_bunmapi(
5185 xfs_fsblock_t *firstblock, /* first allocated block 5373 xfs_fsblock_t *firstblock, /* first allocated block
5186 controls a.g. for allocs */ 5374 controls a.g. for allocs */
5187 xfs_bmap_free_t *flist, /* i/o: list extents to free */ 5375 xfs_bmap_free_t *flist, /* i/o: list extents to free */
5376 xfs_extdelta_t *delta, /* o: change made to incore
5377 extents */
5188 int *done) /* set if not done yet */ 5378 int *done) /* set if not done yet */
5189{ 5379{
5190 xfs_btree_cur_t *cur; /* bmap btree cursor */ 5380 xfs_btree_cur_t *cur; /* bmap btree cursor */
@@ -5242,6 +5432,10 @@ xfs_bunmapi(
5242 bno = start + len - 1; 5432 bno = start + len - 1;
5243 ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, 5433 ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
5244 &prev); 5434 &prev);
5435 if (delta) {
5436 delta->xed_startoff = NULLFILEOFF;
5437 delta->xed_blockcount = 0;
5438 }
5245 /* 5439 /*
5246 * Check to see if the given block number is past the end of the 5440 * Check to see if the given block number is past the end of the
5247 * file, back up to the last block if so... 5441 * file, back up to the last block if so...
@@ -5340,7 +5534,8 @@ xfs_bunmapi(
5340 } 5534 }
5341 del.br_state = XFS_EXT_UNWRITTEN; 5535 del.br_state = XFS_EXT_UNWRITTEN;
5342 error = xfs_bmap_add_extent(ip, lastx, &cur, &del, 5536 error = xfs_bmap_add_extent(ip, lastx, &cur, &del,
5343 firstblock, flist, &logflags, XFS_DATA_FORK, 0); 5537 firstblock, flist, &logflags, delta,
5538 XFS_DATA_FORK, 0);
5344 if (error) 5539 if (error)
5345 goto error0; 5540 goto error0;
5346 goto nodelete; 5541 goto nodelete;
@@ -5394,7 +5589,7 @@ xfs_bunmapi(
5394 prev.br_state = XFS_EXT_UNWRITTEN; 5589 prev.br_state = XFS_EXT_UNWRITTEN;
5395 error = xfs_bmap_add_extent(ip, lastx - 1, &cur, 5590 error = xfs_bmap_add_extent(ip, lastx - 1, &cur,
5396 &prev, firstblock, flist, &logflags, 5591 &prev, firstblock, flist, &logflags,
5397 XFS_DATA_FORK, 0); 5592 delta, XFS_DATA_FORK, 0);
5398 if (error) 5593 if (error)
5399 goto error0; 5594 goto error0;
5400 goto nodelete; 5595 goto nodelete;
@@ -5403,7 +5598,7 @@ xfs_bunmapi(
5403 del.br_state = XFS_EXT_UNWRITTEN; 5598 del.br_state = XFS_EXT_UNWRITTEN;
5404 error = xfs_bmap_add_extent(ip, lastx, &cur, 5599 error = xfs_bmap_add_extent(ip, lastx, &cur,
5405 &del, firstblock, flist, &logflags, 5600 &del, firstblock, flist, &logflags,
5406 XFS_DATA_FORK, 0); 5601 delta, XFS_DATA_FORK, 0);
5407 if (error) 5602 if (error)
5408 goto error0; 5603 goto error0;
5409 goto nodelete; 5604 goto nodelete;
@@ -5456,7 +5651,7 @@ xfs_bunmapi(
5456 goto error0; 5651 goto error0;
5457 } 5652 }
5458 error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del, 5653 error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del,
5459 &tmp_logflags, whichfork, rsvd); 5654 &tmp_logflags, delta, whichfork, rsvd);
5460 logflags |= tmp_logflags; 5655 logflags |= tmp_logflags;
5461 if (error) 5656 if (error)
5462 goto error0; 5657 goto error0;
@@ -5513,6 +5708,14 @@ nodelete:
5513 ASSERT(ifp->if_ext_max == 5708 ASSERT(ifp->if_ext_max ==
5514 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); 5709 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
5515 error = 0; 5710 error = 0;
5711 if (delta && delta->xed_startoff != NULLFILEOFF) {
5712 /* A change was actually made.
5713 * Note that delta->xed_blockount is an offset at this
5714 * point and needs to be converted to a block count.
5715 */
5716 ASSERT(delta->xed_blockcount > delta->xed_startoff);
5717 delta->xed_blockcount -= delta->xed_startoff;
5718 }
5516error0: 5719error0:
5517 /* 5720 /*
5518 * Log everything. Do this after conversion, there's no point in 5721 * Log everything. Do this after conversion, there's no point in
@@ -5556,7 +5759,7 @@ xfs_getbmap(
5556 __int64_t fixlen; /* length for -1 case */ 5759 __int64_t fixlen; /* length for -1 case */
5557 int i; /* extent number */ 5760 int i; /* extent number */
5558 xfs_inode_t *ip; /* xfs incore inode pointer */ 5761 xfs_inode_t *ip; /* xfs incore inode pointer */
5559 vnode_t *vp; /* corresponding vnode */ 5762 bhv_vnode_t *vp; /* corresponding vnode */
5560 int lock; /* lock state */ 5763 int lock; /* lock state */
5561 xfs_bmbt_irec_t *map; /* buffer for user's data */ 5764 xfs_bmbt_irec_t *map; /* buffer for user's data */
5562 xfs_mount_t *mp; /* file system mount point */ 5765 xfs_mount_t *mp; /* file system mount point */
@@ -5653,7 +5856,7 @@ xfs_getbmap(
5653 5856
5654 if (whichfork == XFS_DATA_FORK && ip->i_delayed_blks) { 5857 if (whichfork == XFS_DATA_FORK && ip->i_delayed_blks) {
5655 /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */ 5858 /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */
5656 VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error); 5859 error = bhv_vop_flush_pages(vp, (xfs_off_t)0, -1, 0, FI_REMAPF);
5657 } 5860 }
5658 5861
5659 ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0); 5862 ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0);
@@ -5689,7 +5892,8 @@ xfs_getbmap(
5689 nmap = (nexleft > subnex) ? subnex : nexleft; 5892 nmap = (nexleft > subnex) ? subnex : nexleft;
5690 error = xfs_bmapi(NULL, ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), 5893 error = xfs_bmapi(NULL, ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
5691 XFS_BB_TO_FSB(mp, bmv->bmv_length), 5894 XFS_BB_TO_FSB(mp, bmv->bmv_length),
5692 bmapi_flags, NULL, 0, map, &nmap, NULL); 5895 bmapi_flags, NULL, 0, map, &nmap,
5896 NULL, NULL);
5693 if (error) 5897 if (error)
5694 goto unlock_and_return; 5898 goto unlock_and_return;
5695 ASSERT(nmap <= subnex); 5899 ASSERT(nmap <= subnex);
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 8e0d73d9ccc4..80e93409b78d 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
@@ -26,6 +26,20 @@ struct xfs_mount;
26struct xfs_trans; 26struct xfs_trans;
27 27
28/* 28/*
29 * DELTA: describe a change to the in-core extent list.
30 *
31 * Internally the use of xed_blockount is somewhat funky.
32 * xed_blockcount contains an offset much of the time because this
33 * makes merging changes easier. (xfs_fileoff_t and xfs_filblks_t are
34 * the same underlying type).
35 */
36typedef struct xfs_extdelta
37{
38 xfs_fileoff_t xed_startoff; /* offset of range */
39 xfs_filblks_t xed_blockcount; /* blocks in range */
40} xfs_extdelta_t;
41
42/*
29 * List of extents to be free "later". 43 * List of extents to be free "later".
30 * The list is kept sorted on xbf_startblock. 44 * The list is kept sorted on xbf_startblock.
31 */ 45 */
@@ -275,7 +289,9 @@ xfs_bmapi(
275 xfs_extlen_t total, /* total blocks needed */ 289 xfs_extlen_t total, /* total blocks needed */
276 struct xfs_bmbt_irec *mval, /* output: map values */ 290 struct xfs_bmbt_irec *mval, /* output: map values */
277 int *nmap, /* i/o: mval size/count */ 291 int *nmap, /* i/o: mval size/count */
278 xfs_bmap_free_t *flist); /* i/o: list extents to free */ 292 xfs_bmap_free_t *flist, /* i/o: list extents to free */
293 xfs_extdelta_t *delta); /* o: change made to incore
294 extents */
279 295
280/* 296/*
281 * Map file blocks to filesystem blocks, simple version. 297 * Map file blocks to filesystem blocks, simple version.
@@ -309,6 +325,8 @@ xfs_bunmapi(
309 xfs_fsblock_t *firstblock, /* first allocated block 325 xfs_fsblock_t *firstblock, /* first allocated block
310 controls a.g. for allocs */ 326 controls a.g. for allocs */
311 xfs_bmap_free_t *flist, /* i/o: list extents to free */ 327 xfs_bmap_free_t *flist, /* i/o: list extents to free */
328 xfs_extdelta_t *delta, /* o: change made to incore
329 extents */
312 int *done); /* set if not done yet */ 330 int *done); /* set if not done yet */
313 331
314/* 332/*
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index bea44709afbe..18fb7385d719 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -24,14 +24,12 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 32#include "xfs_ialloc_btree.h"
34#include "xfs_dir_sf.h"
35#include "xfs_dir2_sf.h" 33#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 35#include "xfs_dinode.h"
@@ -1569,12 +1567,11 @@ xfs_bmbt_split(
1569 lbno = XFS_DADDR_TO_FSB(args.mp, XFS_BUF_ADDR(lbp)); 1567 lbno = XFS_DADDR_TO_FSB(args.mp, XFS_BUF_ADDR(lbp));
1570 left = XFS_BUF_TO_BMBT_BLOCK(lbp); 1568 left = XFS_BUF_TO_BMBT_BLOCK(lbp);
1571 args.fsbno = cur->bc_private.b.firstblock; 1569 args.fsbno = cur->bc_private.b.firstblock;
1570 args.firstblock = args.fsbno;
1572 if (args.fsbno == NULLFSBLOCK) { 1571 if (args.fsbno == NULLFSBLOCK) {
1573 args.fsbno = lbno; 1572 args.fsbno = lbno;
1574 args.type = XFS_ALLOCTYPE_START_BNO; 1573 args.type = XFS_ALLOCTYPE_START_BNO;
1575 } else if (cur->bc_private.b.flist->xbf_low) 1574 } else
1576 args.type = XFS_ALLOCTYPE_FIRST_AG;
1577 else
1578 args.type = XFS_ALLOCTYPE_NEAR_BNO; 1575 args.type = XFS_ALLOCTYPE_NEAR_BNO;
1579 args.mod = args.minleft = args.alignment = args.total = args.isfl = 1576 args.mod = args.minleft = args.alignment = args.total = args.isfl =
1580 args.userdata = args.minalignslop = 0; 1577 args.userdata = args.minalignslop = 0;
@@ -2356,6 +2353,7 @@ xfs_bmbt_newroot(
2356 args.userdata = args.minalignslop = 0; 2353 args.userdata = args.minalignslop = 0;
2357 args.minlen = args.maxlen = args.prod = 1; 2354 args.minlen = args.maxlen = args.prod = 1;
2358 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; 2355 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
2356 args.firstblock = args.fsbno;
2359 if (args.fsbno == NULLFSBLOCK) { 2357 if (args.fsbno == NULLFSBLOCK) {
2360#ifdef DEBUG 2358#ifdef DEBUG
2361 if ((error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level))) { 2359 if ((error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level))) {
@@ -2365,9 +2363,7 @@ xfs_bmbt_newroot(
2365#endif 2363#endif
2366 args.fsbno = INT_GET(*pp, ARCH_CONVERT); 2364 args.fsbno = INT_GET(*pp, ARCH_CONVERT);
2367 args.type = XFS_ALLOCTYPE_START_BNO; 2365 args.type = XFS_ALLOCTYPE_START_BNO;
2368 } else if (args.wasdel) 2366 } else
2369 args.type = XFS_ALLOCTYPE_FIRST_AG;
2370 else
2371 args.type = XFS_ALLOCTYPE_NEAR_BNO; 2367 args.type = XFS_ALLOCTYPE_NEAR_BNO;
2372 if ((error = xfs_alloc_vextent(&args))) { 2368 if ((error = xfs_alloc_vextent(&args))) {
2373 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 2369 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 52d5d095fc35..ee2255bd6562 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -24,14 +24,12 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 32#include "xfs_ialloc_btree.h"
34#include "xfs_dir_sf.h"
35#include "xfs_dir2_sf.h" 33#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 35#include "xfs_dinode.h"
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 5fed15682dda..a4aa53974f76 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -23,7 +23,6 @@
23#include "xfs_inum.h" 23#include "xfs_inum.h"
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_dir.h"
27#include "xfs_dmapi.h" 26#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_buf_item.h" 28#include "xfs_buf_item.h"
@@ -1030,9 +1029,9 @@ xfs_buf_iodone_callbacks(
1030 if ((XFS_BUF_TARGET(bp) != lasttarg) || 1029 if ((XFS_BUF_TARGET(bp) != lasttarg) ||
1031 (time_after(jiffies, (lasttime + 5*HZ)))) { 1030 (time_after(jiffies, (lasttime + 5*HZ)))) {
1032 lasttime = jiffies; 1031 lasttime = jiffies;
1033 prdev("XFS write error in file system meta-data " 1032 cmn_err(CE_ALERT, "Device %s, XFS metadata write error"
1034 "block 0x%llx in %s", 1033 " block 0x%llx in %s",
1035 XFS_BUF_TARGET(bp), 1034 XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
1036 (__uint64_t)XFS_BUF_ADDR(bp), mp->m_fsname); 1035 (__uint64_t)XFS_BUF_ADDR(bp), mp->m_fsname);
1037 } 1036 }
1038 lasttarg = XFS_BUF_TARGET(bp); 1037 lasttarg = XFS_BUF_TARGET(bp);
@@ -1108,7 +1107,7 @@ xfs_buf_error_relse(
1108 XFS_BUF_ERROR(bp,0); 1107 XFS_BUF_ERROR(bp,0);
1109 xfs_buftrace("BUF_ERROR_RELSE", bp); 1108 xfs_buftrace("BUF_ERROR_RELSE", bp);
1110 if (! XFS_FORCED_SHUTDOWN(mp)) 1109 if (! XFS_FORCED_SHUTDOWN(mp))
1111 xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR); 1110 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1112 /* 1111 /*
1113 * We have to unpin the pinned buffers so do the 1112 * We have to unpin the pinned buffers so do the
1114 * callbacks. 1113 * callbacks.
diff --git a/fs/xfs/xfs_cap.h b/fs/xfs/xfs_cap.h
index d0035c6e9514..7a0e482dd436 100644
--- a/fs/xfs/xfs_cap.h
+++ b/fs/xfs/xfs_cap.h
@@ -49,12 +49,12 @@ typedef struct xfs_cap_set {
49 49
50#include <linux/posix_cap_xattr.h> 50#include <linux/posix_cap_xattr.h>
51 51
52struct vnode; 52struct bhv_vnode;
53 53
54extern int xfs_cap_vhascap(struct vnode *); 54extern int xfs_cap_vhascap(struct bhv_vnode *);
55extern int xfs_cap_vset(struct vnode *, void *, size_t); 55extern int xfs_cap_vset(struct bhv_vnode *, void *, size_t);
56extern int xfs_cap_vget(struct vnode *, void *, size_t); 56extern int xfs_cap_vget(struct bhv_vnode *, void *, size_t);
57extern int xfs_cap_vremove(struct vnode *vp); 57extern int xfs_cap_vremove(struct bhv_vnode *);
58 58
59#define _CAP_EXISTS xfs_cap_vhascap 59#define _CAP_EXISTS xfs_cap_vhascap
60 60
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 8988b9051175..32ab61d17ace 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -24,7 +24,6 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
@@ -32,7 +31,6 @@
32#include "xfs_bmap_btree.h" 31#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h" 32#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h" 33#include "xfs_ialloc_btree.h"
35#include "xfs_dir_sf.h"
36#include "xfs_dir2_sf.h" 34#include "xfs_dir2_sf.h"
37#include "xfs_attr_sf.h" 35#include "xfs_attr_sf.h"
38#include "xfs_dinode.h" 36#include "xfs_dinode.h"
@@ -43,7 +41,6 @@
43#include "xfs_bmap.h" 41#include "xfs_bmap.h"
44#include "xfs_attr.h" 42#include "xfs_attr.h"
45#include "xfs_attr_leaf.h" 43#include "xfs_attr_leaf.h"
46#include "xfs_dir_leaf.h"
47#include "xfs_dir2_data.h" 44#include "xfs_dir2_data.h"
48#include "xfs_dir2_leaf.h" 45#include "xfs_dir2_leaf.h"
49#include "xfs_dir2_block.h" 46#include "xfs_dir2_block.h"
@@ -159,7 +156,7 @@ xfs_da_split(xfs_da_state_t *state)
159 max = state->path.active - 1; 156 max = state->path.active - 1;
160 ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH)); 157 ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
161 ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC || 158 ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
162 state->path.blk[max].magic == XFS_DIRX_LEAF_MAGIC(state->mp)); 159 state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
163 160
164 addblk = &state->path.blk[max]; /* initial dummy value */ 161 addblk = &state->path.blk[max]; /* initial dummy value */
165 for (i = max; (i >= 0) && addblk; state->path.active--, i--) { 162 for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
@@ -199,38 +196,7 @@ xfs_da_split(xfs_da_state_t *state)
199 return(error); /* GROT: attr inconsistent */ 196 return(error); /* GROT: attr inconsistent */
200 addblk = newblk; 197 addblk = newblk;
201 break; 198 break;
202 case XFS_DIR_LEAF_MAGIC:
203 ASSERT(XFS_DIR_IS_V1(state->mp));
204 error = xfs_dir_leaf_split(state, oldblk, newblk);
205 if ((error != 0) && (error != ENOSPC)) {
206 return(error); /* GROT: dir is inconsistent */
207 }
208 if (!error) {
209 addblk = newblk;
210 break;
211 }
212 /*
213 * Entry wouldn't fit, split the leaf again.
214 */
215 state->extravalid = 1;
216 if (state->inleaf) {
217 state->extraafter = 0; /* before newblk */
218 error = xfs_dir_leaf_split(state, oldblk,
219 &state->extrablk);
220 if (error)
221 return(error); /* GROT: dir incon. */
222 addblk = newblk;
223 } else {
224 state->extraafter = 1; /* after newblk */
225 error = xfs_dir_leaf_split(state, newblk,
226 &state->extrablk);
227 if (error)
228 return(error); /* GROT: dir incon. */
229 addblk = newblk;
230 }
231 break;
232 case XFS_DIR2_LEAFN_MAGIC: 199 case XFS_DIR2_LEAFN_MAGIC:
233 ASSERT(XFS_DIR_IS_V2(state->mp));
234 error = xfs_dir2_leafn_split(state, oldblk, newblk); 200 error = xfs_dir2_leafn_split(state, oldblk, newblk);
235 if (error) 201 if (error)
236 return error; 202 return error;
@@ -363,7 +329,6 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
363 size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] - 329 size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] -
364 (char *)oldroot); 330 (char *)oldroot);
365 } else { 331 } else {
366 ASSERT(XFS_DIR_IS_V2(mp));
367 ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); 332 ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
368 leaf = (xfs_dir2_leaf_t *)oldroot; 333 leaf = (xfs_dir2_leaf_t *)oldroot;
369 size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] - 334 size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] -
@@ -379,8 +344,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
379 * Set up the new root node. 344 * Set up the new root node.
380 */ 345 */
381 error = xfs_da_node_create(args, 346 error = xfs_da_node_create(args,
382 args->whichfork == XFS_DATA_FORK && 347 (args->whichfork == XFS_DATA_FORK) ? mp->m_dirleafblk : 0,
383 XFS_DIR_IS_V2(mp) ? mp->m_dirleafblk : 0,
384 be16_to_cpu(node->hdr.level) + 1, &bp, args->whichfork); 348 be16_to_cpu(node->hdr.level) + 1, &bp, args->whichfork);
385 if (error) 349 if (error)
386 return(error); 350 return(error);
@@ -427,10 +391,9 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
427 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); 391 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
428 392
429 /* 393 /*
430 * With V2 the extra block is data or freespace. 394 * With V2 dirs the extra block is data or freespace.
431 */ 395 */
432 useextra = state->extravalid && (XFS_DIR_IS_V1(state->mp) || 396 useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK;
433 state->args->whichfork == XFS_ATTR_FORK);
434 newcount = 1 + useextra; 397 newcount = 1 + useextra;
435 /* 398 /*
436 * Do we have to split the node? 399 * Do we have to split the node?
@@ -624,7 +587,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
624 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); 587 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
625 ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count))); 588 ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
626 ASSERT(newblk->blkno != 0); 589 ASSERT(newblk->blkno != 0);
627 if (state->args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) 590 if (state->args->whichfork == XFS_DATA_FORK)
628 ASSERT(newblk->blkno >= mp->m_dirleafblk && 591 ASSERT(newblk->blkno >= mp->m_dirleafblk &&
629 newblk->blkno < mp->m_dirfreeblk); 592 newblk->blkno < mp->m_dirfreeblk);
630 593
@@ -670,7 +633,7 @@ xfs_da_join(xfs_da_state_t *state)
670 save_blk = &state->altpath.blk[ state->path.active-1 ]; 633 save_blk = &state->altpath.blk[ state->path.active-1 ];
671 ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC); 634 ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
672 ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC || 635 ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
673 drop_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp)); 636 drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
674 637
675 /* 638 /*
676 * Walk back up the tree joining/deallocating as necessary. 639 * Walk back up the tree joining/deallocating as necessary.
@@ -693,17 +656,7 @@ xfs_da_join(xfs_da_state_t *state)
693 return(0); 656 return(0);
694 xfs_attr_leaf_unbalance(state, drop_blk, save_blk); 657 xfs_attr_leaf_unbalance(state, drop_blk, save_blk);
695 break; 658 break;
696 case XFS_DIR_LEAF_MAGIC:
697 ASSERT(XFS_DIR_IS_V1(state->mp));
698 error = xfs_dir_leaf_toosmall(state, &action);
699 if (error)
700 return(error);
701 if (action == 0)
702 return(0);
703 xfs_dir_leaf_unbalance(state, drop_blk, save_blk);
704 break;
705 case XFS_DIR2_LEAFN_MAGIC: 659 case XFS_DIR2_LEAFN_MAGIC:
706 ASSERT(XFS_DIR_IS_V2(state->mp));
707 error = xfs_dir2_leafn_toosmall(state, &action); 660 error = xfs_dir2_leafn_toosmall(state, &action);
708 if (error) 661 if (error)
709 return error; 662 return error;
@@ -790,7 +743,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
790 ASSERT(bp != NULL); 743 ASSERT(bp != NULL);
791 blkinfo = bp->data; 744 blkinfo = bp->data;
792 if (be16_to_cpu(oldroot->hdr.level) == 1) { 745 if (be16_to_cpu(oldroot->hdr.level) == 1) {
793 ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIRX_LEAF_MAGIC(state->mp) || 746 ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIR2_LEAFN_MAGIC ||
794 be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC); 747 be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC);
795 } else { 748 } else {
796 ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DA_NODE_MAGIC); 749 ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DA_NODE_MAGIC);
@@ -951,14 +904,7 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
951 if (count == 0) 904 if (count == 0)
952 return; 905 return;
953 break; 906 break;
954 case XFS_DIR_LEAF_MAGIC:
955 ASSERT(XFS_DIR_IS_V1(state->mp));
956 lasthash = xfs_dir_leaf_lasthash(blk->bp, &count);
957 if (count == 0)
958 return;
959 break;
960 case XFS_DIR2_LEAFN_MAGIC: 907 case XFS_DIR2_LEAFN_MAGIC:
961 ASSERT(XFS_DIR_IS_V2(state->mp));
962 lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count); 908 lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count);
963 if (count == 0) 909 if (count == 0)
964 return; 910 return;
@@ -1117,10 +1063,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1117 * Descend thru the B-tree searching each level for the right 1063 * Descend thru the B-tree searching each level for the right
1118 * node to use, until the right hashval is found. 1064 * node to use, until the right hashval is found.
1119 */ 1065 */
1120 if (args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(state->mp)) 1066 blkno = (args->whichfork == XFS_DATA_FORK)? state->mp->m_dirleafblk : 0;
1121 blkno = state->mp->m_dirleafblk;
1122 else
1123 blkno = 0;
1124 for (blk = &state->path.blk[0], state->path.active = 1; 1067 for (blk = &state->path.blk[0], state->path.active = 1;
1125 state->path.active <= XFS_DA_NODE_MAXDEPTH; 1068 state->path.active <= XFS_DA_NODE_MAXDEPTH;
1126 blk++, state->path.active++) { 1069 blk++, state->path.active++) {
@@ -1137,7 +1080,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1137 } 1080 }
1138 curr = blk->bp->data; 1081 curr = blk->bp->data;
1139 ASSERT(be16_to_cpu(curr->magic) == XFS_DA_NODE_MAGIC || 1082 ASSERT(be16_to_cpu(curr->magic) == XFS_DA_NODE_MAGIC ||
1140 be16_to_cpu(curr->magic) == XFS_DIRX_LEAF_MAGIC(state->mp) || 1083 be16_to_cpu(curr->magic) == XFS_DIR2_LEAFN_MAGIC ||
1141 be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC); 1084 be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC);
1142 1085
1143 /* 1086 /*
@@ -1190,16 +1133,10 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1190 blk->index = probe; 1133 blk->index = probe;
1191 blkno = be32_to_cpu(btree->before); 1134 blkno = be32_to_cpu(btree->before);
1192 } 1135 }
1193 } 1136 } else if (be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC) {
1194 else if (be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC) {
1195 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); 1137 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1196 break; 1138 break;
1197 } 1139 } else if (be16_to_cpu(curr->magic) == XFS_DIR2_LEAFN_MAGIC) {
1198 else if (be16_to_cpu(curr->magic) == XFS_DIR_LEAF_MAGIC) {
1199 blk->hashval = xfs_dir_leaf_lasthash(blk->bp, NULL);
1200 break;
1201 }
1202 else if (be16_to_cpu(curr->magic) == XFS_DIR2_LEAFN_MAGIC) {
1203 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL); 1140 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
1204 break; 1141 break;
1205 } 1142 }
@@ -1212,12 +1149,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1212 * next leaf and keep searching. 1149 * next leaf and keep searching.
1213 */ 1150 */
1214 for (;;) { 1151 for (;;) {
1215 if (blk->magic == XFS_DIR_LEAF_MAGIC) { 1152 if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1216 ASSERT(XFS_DIR_IS_V1(state->mp));
1217 retval = xfs_dir_leaf_lookup_int(blk->bp, args,
1218 &blk->index);
1219 } else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1220 ASSERT(XFS_DIR_IS_V2(state->mp));
1221 retval = xfs_dir2_leafn_lookup_int(blk->bp, args, 1153 retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
1222 &blk->index, state); 1154 &blk->index, state);
1223 } 1155 }
@@ -1270,7 +1202,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1270 old_info = old_blk->bp->data; 1202 old_info = old_blk->bp->data;
1271 new_info = new_blk->bp->data; 1203 new_info = new_blk->bp->data;
1272 ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC || 1204 ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
1273 old_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) || 1205 old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1274 old_blk->magic == XFS_ATTR_LEAF_MAGIC); 1206 old_blk->magic == XFS_ATTR_LEAF_MAGIC);
1275 ASSERT(old_blk->magic == be16_to_cpu(old_info->magic)); 1207 ASSERT(old_blk->magic == be16_to_cpu(old_info->magic));
1276 ASSERT(new_blk->magic == be16_to_cpu(new_info->magic)); 1208 ASSERT(new_blk->magic == be16_to_cpu(new_info->magic));
@@ -1280,12 +1212,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1280 case XFS_ATTR_LEAF_MAGIC: 1212 case XFS_ATTR_LEAF_MAGIC:
1281 before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp); 1213 before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
1282 break; 1214 break;
1283 case XFS_DIR_LEAF_MAGIC:
1284 ASSERT(XFS_DIR_IS_V1(state->mp));
1285 before = xfs_dir_leaf_order(old_blk->bp, new_blk->bp);
1286 break;
1287 case XFS_DIR2_LEAFN_MAGIC: 1215 case XFS_DIR2_LEAFN_MAGIC:
1288 ASSERT(XFS_DIR_IS_V2(state->mp));
1289 before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp); 1216 before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp);
1290 break; 1217 break;
1291 case XFS_DA_NODE_MAGIC: 1218 case XFS_DA_NODE_MAGIC:
@@ -1404,7 +1331,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1404 save_info = save_blk->bp->data; 1331 save_info = save_blk->bp->data;
1405 drop_info = drop_blk->bp->data; 1332 drop_info = drop_blk->bp->data;
1406 ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC || 1333 ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
1407 save_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) || 1334 save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1408 save_blk->magic == XFS_ATTR_LEAF_MAGIC); 1335 save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1409 ASSERT(save_blk->magic == be16_to_cpu(save_info->magic)); 1336 ASSERT(save_blk->magic == be16_to_cpu(save_info->magic));
1410 ASSERT(drop_blk->magic == be16_to_cpu(drop_info->magic)); 1337 ASSERT(drop_blk->magic == be16_to_cpu(drop_info->magic));
@@ -1529,7 +1456,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1529 ASSERT(blk->bp != NULL); 1456 ASSERT(blk->bp != NULL);
1530 info = blk->bp->data; 1457 info = blk->bp->data;
1531 ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC || 1458 ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC ||
1532 be16_to_cpu(info->magic) == XFS_DIRX_LEAF_MAGIC(state->mp) || 1459 be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC ||
1533 be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC); 1460 be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC);
1534 blk->magic = be16_to_cpu(info->magic); 1461 blk->magic = be16_to_cpu(info->magic);
1535 if (blk->magic == XFS_DA_NODE_MAGIC) { 1462 if (blk->magic == XFS_DA_NODE_MAGIC) {
@@ -1548,20 +1475,13 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1548 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, 1475 blk->hashval = xfs_attr_leaf_lasthash(blk->bp,
1549 NULL); 1476 NULL);
1550 break; 1477 break;
1551 case XFS_DIR_LEAF_MAGIC:
1552 ASSERT(XFS_DIR_IS_V1(state->mp));
1553 blk->hashval = xfs_dir_leaf_lasthash(blk->bp,
1554 NULL);
1555 break;
1556 case XFS_DIR2_LEAFN_MAGIC: 1478 case XFS_DIR2_LEAFN_MAGIC:
1557 ASSERT(XFS_DIR_IS_V2(state->mp));
1558 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, 1479 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp,
1559 NULL); 1480 NULL);
1560 break; 1481 break;
1561 default: 1482 default:
1562 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC || 1483 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC ||
1563 blk->magic == 1484 blk->magic == XFS_DIR2_LEAFN_MAGIC);
1564 XFS_DIRX_LEAF_MAGIC(state->mp));
1565 break; 1485 break;
1566 } 1486 }
1567 } 1487 }
@@ -1620,7 +1540,6 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1620 xfs_bmbt_irec_t *mapp; 1540 xfs_bmbt_irec_t *mapp;
1621 xfs_inode_t *dp; 1541 xfs_inode_t *dp;
1622 int nmap, error, w, count, c, got, i, mapi; 1542 int nmap, error, w, count, c, got, i, mapi;
1623 xfs_fsize_t size;
1624 xfs_trans_t *tp; 1543 xfs_trans_t *tp;
1625 xfs_mount_t *mp; 1544 xfs_mount_t *mp;
1626 1545
@@ -1631,7 +1550,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1631 /* 1550 /*
1632 * For new directories adjust the file offset and block count. 1551 * For new directories adjust the file offset and block count.
1633 */ 1552 */
1634 if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) { 1553 if (w == XFS_DATA_FORK) {
1635 bno = mp->m_dirleafblk; 1554 bno = mp->m_dirleafblk;
1636 count = mp->m_dirblkfsbs; 1555 count = mp->m_dirblkfsbs;
1637 } else { 1556 } else {
@@ -1641,10 +1560,9 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1641 /* 1560 /*
1642 * Find a spot in the file space to put the new block. 1561 * Find a spot in the file space to put the new block.
1643 */ 1562 */
1644 if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w))) { 1563 if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w)))
1645 return error; 1564 return error;
1646 } 1565 if (w == XFS_DATA_FORK)
1647 if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
1648 ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk); 1566 ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk);
1649 /* 1567 /*
1650 * Try mapping it in one filesystem block. 1568 * Try mapping it in one filesystem block.
@@ -1655,7 +1573,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1655 XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA| 1573 XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
1656 XFS_BMAPI_CONTIG, 1574 XFS_BMAPI_CONTIG,
1657 args->firstblock, args->total, &map, &nmap, 1575 args->firstblock, args->total, &map, &nmap,
1658 args->flist))) { 1576 args->flist, NULL))) {
1659 return error; 1577 return error;
1660 } 1578 }
1661 ASSERT(nmap <= 1); 1579 ASSERT(nmap <= 1);
@@ -1676,7 +1594,8 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1676 XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE| 1594 XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|
1677 XFS_BMAPI_METADATA, 1595 XFS_BMAPI_METADATA,
1678 args->firstblock, args->total, 1596 args->firstblock, args->total,
1679 &mapp[mapi], &nmap, args->flist))) { 1597 &mapp[mapi], &nmap, args->flist,
1598 NULL))) {
1680 kmem_free(mapp, sizeof(*mapp) * count); 1599 kmem_free(mapp, sizeof(*mapp) * count);
1681 return error; 1600 return error;
1682 } 1601 }
@@ -1705,19 +1624,6 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1705 if (mapp != &map) 1624 if (mapp != &map)
1706 kmem_free(mapp, sizeof(*mapp) * count); 1625 kmem_free(mapp, sizeof(*mapp) * count);
1707 *new_blkno = (xfs_dablk_t)bno; 1626 *new_blkno = (xfs_dablk_t)bno;
1708 /*
1709 * For version 1 directories, adjust the file size if it changed.
1710 */
1711 if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) {
1712 ASSERT(mapi == 1);
1713 if ((error = xfs_bmap_last_offset(tp, dp, &bno, w)))
1714 return error;
1715 size = XFS_FSB_TO_B(mp, bno);
1716 if (size != dp->i_d.di_size) {
1717 dp->i_d.di_size = size;
1718 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1719 }
1720 }
1721 return 0; 1627 return 0;
1722} 1628}
1723 1629
@@ -1742,7 +1648,6 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
1742 int error, w, entno, level, dead_level; 1648 int error, w, entno, level, dead_level;
1743 xfs_da_blkinfo_t *dead_info, *sib_info; 1649 xfs_da_blkinfo_t *dead_info, *sib_info;
1744 xfs_da_intnode_t *par_node, *dead_node; 1650 xfs_da_intnode_t *par_node, *dead_node;
1745 xfs_dir_leafblock_t *dead_leaf;
1746 xfs_dir2_leaf_t *dead_leaf2; 1651 xfs_dir2_leaf_t *dead_leaf2;
1747 xfs_dahash_t dead_hash; 1652 xfs_dahash_t dead_hash;
1748 1653
@@ -1753,11 +1658,8 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
1753 w = args->whichfork; 1658 w = args->whichfork;
1754 ASSERT(w == XFS_DATA_FORK); 1659 ASSERT(w == XFS_DATA_FORK);
1755 mp = ip->i_mount; 1660 mp = ip->i_mount;
1756 if (XFS_DIR_IS_V2(mp)) { 1661 lastoff = mp->m_dirfreeblk;
1757 lastoff = mp->m_dirfreeblk; 1662 error = xfs_bmap_last_before(tp, ip, &lastoff, w);
1758 error = xfs_bmap_last_before(tp, ip, &lastoff, w);
1759 } else
1760 error = xfs_bmap_last_offset(tp, ip, &lastoff, w);
1761 if (error) 1663 if (error)
1762 return error; 1664 return error;
1763 if (unlikely(lastoff == 0)) { 1665 if (unlikely(lastoff == 0)) {
@@ -1780,14 +1682,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
1780 /* 1682 /*
1781 * Get values from the moved block. 1683 * Get values from the moved block.
1782 */ 1684 */
1783 if (be16_to_cpu(dead_info->magic) == XFS_DIR_LEAF_MAGIC) { 1685 if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) {
1784 ASSERT(XFS_DIR_IS_V1(mp));
1785 dead_leaf = (xfs_dir_leafblock_t *)dead_info;
1786 dead_level = 0;
1787 dead_hash =
1788 INT_GET(dead_leaf->entries[INT_GET(dead_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
1789 } else if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) {
1790 ASSERT(XFS_DIR_IS_V2(mp));
1791 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; 1686 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
1792 dead_level = 0; 1687 dead_level = 0;
1793 dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval); 1688 dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval);
@@ -1842,7 +1737,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
1842 xfs_da_buf_done(sib_buf); 1737 xfs_da_buf_done(sib_buf);
1843 sib_buf = NULL; 1738 sib_buf = NULL;
1844 } 1739 }
1845 par_blkno = XFS_DIR_IS_V1(mp) ? 0 : mp->m_dirleafblk; 1740 par_blkno = mp->m_dirleafblk;
1846 level = -1; 1741 level = -1;
1847 /* 1742 /*
1848 * Walk down the tree looking for the parent of the moved block. 1743 * Walk down the tree looking for the parent of the moved block.
@@ -1941,8 +1836,6 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
1941{ 1836{
1942 xfs_inode_t *dp; 1837 xfs_inode_t *dp;
1943 int done, error, w, count; 1838 int done, error, w, count;
1944 xfs_fileoff_t bno;
1945 xfs_fsize_t size;
1946 xfs_trans_t *tp; 1839 xfs_trans_t *tp;
1947 xfs_mount_t *mp; 1840 xfs_mount_t *mp;
1948 1841
@@ -1950,7 +1843,7 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
1950 w = args->whichfork; 1843 w = args->whichfork;
1951 tp = args->trans; 1844 tp = args->trans;
1952 mp = dp->i_mount; 1845 mp = dp->i_mount;
1953 if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) 1846 if (w == XFS_DATA_FORK)
1954 count = mp->m_dirblkfsbs; 1847 count = mp->m_dirblkfsbs;
1955 else 1848 else
1956 count = 1; 1849 count = 1;
@@ -1961,34 +1854,17 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
1961 */ 1854 */
1962 if ((error = xfs_bunmapi(tp, dp, dead_blkno, count, 1855 if ((error = xfs_bunmapi(tp, dp, dead_blkno, count,
1963 XFS_BMAPI_AFLAG(w)|XFS_BMAPI_METADATA, 1856 XFS_BMAPI_AFLAG(w)|XFS_BMAPI_METADATA,
1964 0, args->firstblock, args->flist, 1857 0, args->firstblock, args->flist, NULL,
1965 &done)) == ENOSPC) { 1858 &done)) == ENOSPC) {
1966 if (w != XFS_DATA_FORK) 1859 if (w != XFS_DATA_FORK)
1967 goto done; 1860 break;
1968 if ((error = xfs_da_swap_lastblock(args, &dead_blkno, 1861 if ((error = xfs_da_swap_lastblock(args, &dead_blkno,
1969 &dead_buf))) 1862 &dead_buf)))
1970 goto done; 1863 break;
1971 } else if (error) 1864 } else {
1972 goto done;
1973 else
1974 break; 1865 break;
1975 }
1976 ASSERT(done);
1977 xfs_da_binval(tp, dead_buf);
1978 /*
1979 * Adjust the directory size for version 1.
1980 */
1981 if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) {
1982 if ((error = xfs_bmap_last_offset(tp, dp, &bno, w)))
1983 return error;
1984 size = XFS_FSB_TO_B(dp->i_mount, bno);
1985 if (size != dp->i_d.di_size) {
1986 dp->i_d.di_size = size;
1987 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1988 } 1866 }
1989 } 1867 }
1990 return 0;
1991done:
1992 xfs_da_binval(tp, dead_buf); 1868 xfs_da_binval(tp, dead_buf);
1993 return error; 1869 return error;
1994} 1870}
@@ -2049,10 +1925,7 @@ xfs_da_do_buf(
2049 xfs_dabuf_t *rbp; 1925 xfs_dabuf_t *rbp;
2050 1926
2051 mp = dp->i_mount; 1927 mp = dp->i_mount;
2052 if (whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) 1928 nfsb = (whichfork == XFS_DATA_FORK) ? mp->m_dirblkfsbs : 1;
2053 nfsb = mp->m_dirblkfsbs;
2054 else
2055 nfsb = 1;
2056 mappedbno = *mappedbnop; 1929 mappedbno = *mappedbnop;
2057 /* 1930 /*
2058 * Caller doesn't have a mapping. -2 means don't complain 1931 * Caller doesn't have a mapping. -2 means don't complain
@@ -2086,7 +1959,7 @@ xfs_da_do_buf(
2086 nfsb, 1959 nfsb,
2087 XFS_BMAPI_METADATA | 1960 XFS_BMAPI_METADATA |
2088 XFS_BMAPI_AFLAG(whichfork), 1961 XFS_BMAPI_AFLAG(whichfork),
2089 NULL, 0, mapp, &nmap, NULL))) 1962 NULL, 0, mapp, &nmap, NULL, NULL)))
2090 goto exit0; 1963 goto exit0;
2091 } 1964 }
2092 } else { 1965 } else {
@@ -2198,7 +2071,6 @@ xfs_da_do_buf(
2198 magic1 = be32_to_cpu(data->hdr.magic); 2071 magic1 = be32_to_cpu(data->hdr.magic);
2199 if (unlikely( 2072 if (unlikely(
2200 XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) && 2073 XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
2201 (magic != XFS_DIR_LEAF_MAGIC) &&
2202 (magic != XFS_ATTR_LEAF_MAGIC) && 2074 (magic != XFS_ATTR_LEAF_MAGIC) &&
2203 (magic != XFS_DIR2_LEAF1_MAGIC) && 2075 (magic != XFS_DIR2_LEAF1_MAGIC) &&
2204 (magic != XFS_DIR2_LEAFN_MAGIC) && 2076 (magic != XFS_DIR2_LEAFN_MAGIC) &&
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 243a730d5ec8..4ab865ec8b82 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -36,14 +36,10 @@ struct zone;
36 * level in the Btree, and to identify which type of block this is. 36 * level in the Btree, and to identify which type of block this is.
37 */ 37 */
38#define XFS_DA_NODE_MAGIC 0xfebe /* magic number: non-leaf blocks */ 38#define XFS_DA_NODE_MAGIC 0xfebe /* magic number: non-leaf blocks */
39#define XFS_DIR_LEAF_MAGIC 0xfeeb /* magic number: directory leaf blks */
40#define XFS_ATTR_LEAF_MAGIC 0xfbee /* magic number: attribute leaf blks */ 39#define XFS_ATTR_LEAF_MAGIC 0xfbee /* magic number: attribute leaf blks */
41#define XFS_DIR2_LEAF1_MAGIC 0xd2f1 /* magic number: v2 dirlf single blks */ 40#define XFS_DIR2_LEAF1_MAGIC 0xd2f1 /* magic number: v2 dirlf single blks */
42#define XFS_DIR2_LEAFN_MAGIC 0xd2ff /* magic number: v2 dirlf multi blks */ 41#define XFS_DIR2_LEAFN_MAGIC 0xd2ff /* magic number: v2 dirlf multi blks */
43 42
44#define XFS_DIRX_LEAF_MAGIC(mp) \
45 (XFS_DIR_IS_V1(mp) ? XFS_DIR_LEAF_MAGIC : XFS_DIR2_LEAFN_MAGIC)
46
47typedef struct xfs_da_blkinfo { 43typedef struct xfs_da_blkinfo {
48 __be32 forw; /* previous block in list */ 44 __be32 forw; /* previous block in list */
49 __be32 back; /* following block in list */ 45 __be32 back; /* following block in list */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 4968a6358e61..80562b60fb95 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
@@ -24,14 +24,12 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 32#include "xfs_ialloc_btree.h"
34#include "xfs_dir_sf.h"
35#include "xfs_dir2_sf.h" 33#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 35#include "xfs_dinode.h"
@@ -54,24 +52,14 @@ xfs_swapext(
54 xfs_swapext_t __user *sxu) 52 xfs_swapext_t __user *sxu)
55{ 53{
56 xfs_swapext_t *sxp; 54 xfs_swapext_t *sxp;
57 xfs_inode_t *ip=NULL, *tip=NULL, *ips[2]; 55 xfs_inode_t *ip=NULL, *tip=NULL;
58 xfs_trans_t *tp;
59 xfs_mount_t *mp; 56 xfs_mount_t *mp;
60 xfs_bstat_t *sbp;
61 struct file *fp = NULL, *tfp = NULL; 57 struct file *fp = NULL, *tfp = NULL;
62 vnode_t *vp, *tvp; 58 bhv_vnode_t *vp, *tvp;
63 static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL;
64 int ilf_fields, tilf_fields;
65 int error = 0; 59 int error = 0;
66 xfs_ifork_t *tempifp, *ifp, *tifp;
67 __uint64_t tmp;
68 int aforkblks = 0;
69 int taforkblks = 0;
70 char locked = 0;
71 60
72 sxp = kmem_alloc(sizeof(xfs_swapext_t), KM_MAYFAIL); 61 sxp = kmem_alloc(sizeof(xfs_swapext_t), KM_MAYFAIL);
73 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); 62 if (!sxp) {
74 if (!sxp || !tempifp) {
75 error = XFS_ERROR(ENOMEM); 63 error = XFS_ERROR(ENOMEM);
76 goto error0; 64 goto error0;
77 } 65 }
@@ -118,14 +106,56 @@ xfs_swapext(
118 106
119 mp = ip->i_mount; 107 mp = ip->i_mount;
120 108
121 sbp = &sxp->sx_stat;
122
123 if (XFS_FORCED_SHUTDOWN(mp)) { 109 if (XFS_FORCED_SHUTDOWN(mp)) {
124 error = XFS_ERROR(EIO); 110 error = XFS_ERROR(EIO);
125 goto error0; 111 goto error0;
126 } 112 }
127 113
128 locked = 1; 114 error = XFS_SWAP_EXTENTS(mp, &ip->i_iocore, &tip->i_iocore, sxp);
115
116 error0:
117 if (fp != NULL)
118 fput(fp);
119 if (tfp != NULL)
120 fput(tfp);
121
122 if (sxp != NULL)
123 kmem_free(sxp, sizeof(xfs_swapext_t));
124
125 return error;
126}
127
128int
129xfs_swap_extents(
130 xfs_inode_t *ip,
131 xfs_inode_t *tip,
132 xfs_swapext_t *sxp)
133{
134 xfs_mount_t *mp;
135 xfs_inode_t *ips[2];
136 xfs_trans_t *tp;
137 xfs_bstat_t *sbp = &sxp->sx_stat;
138 bhv_vnode_t *vp, *tvp;
139 xfs_ifork_t *tempifp, *ifp, *tifp;
140 int ilf_fields, tilf_fields;
141 static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL;
142 int error = 0;
143 int aforkblks = 0;
144 int taforkblks = 0;
145 __uint64_t tmp;
146 char locked = 0;
147
148 mp = ip->i_mount;
149
150 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
151 if (!tempifp) {
152 error = XFS_ERROR(ENOMEM);
153 goto error0;
154 }
155
156 sbp = &sxp->sx_stat;
157 vp = XFS_ITOV(ip);
158 tvp = XFS_ITOV(tip);
129 159
130 /* Lock in i_ino order */ 160 /* Lock in i_ino order */
131 if (ip->i_ino < tip->i_ino) { 161 if (ip->i_ino < tip->i_ino) {
@@ -137,6 +167,7 @@ xfs_swapext(
137 } 167 }
138 168
139 xfs_lock_inodes(ips, 2, 0, lock_flags); 169 xfs_lock_inodes(ips, 2, 0, lock_flags);
170 locked = 1;
140 171
141 /* Check permissions */ 172 /* Check permissions */
142 error = xfs_iaccess(ip, S_IWUSR, NULL); 173 error = xfs_iaccess(ip, S_IWUSR, NULL);
@@ -169,7 +200,7 @@ xfs_swapext(
169 200
170 if (VN_CACHED(tvp) != 0) { 201 if (VN_CACHED(tvp) != 0) {
171 xfs_inval_cached_trace(&tip->i_iocore, 0, -1, 0, -1); 202 xfs_inval_cached_trace(&tip->i_iocore, 0, -1, 0, -1);
172 VOP_FLUSHINVAL_PAGES(tvp, 0, -1, FI_REMAPF_LOCKED); 203 bhv_vop_flushinval_pages(tvp, 0, -1, FI_REMAPF_LOCKED);
173 } 204 }
174 205
175 /* Verify O_DIRECT for ftmp */ 206 /* Verify O_DIRECT for ftmp */
@@ -214,7 +245,7 @@ xfs_swapext(
214 /* We need to fail if the file is memory mapped. Once we have tossed 245 /* We need to fail if the file is memory mapped. Once we have tossed
215 * all existing pages, the page fault will have no option 246 * all existing pages, the page fault will have no option
216 * but to go to the filesystem for pages. By making the page fault call 247 * but to go to the filesystem for pages. By making the page fault call
217 * VOP_READ (or write in the case of autogrow) they block on the iolock 248 * vop_read (or write in the case of autogrow) they block on the iolock
218 * until we have switched the extents. 249 * until we have switched the extents.
219 */ 250 */
220 if (VN_MAPPED(vp)) { 251 if (VN_MAPPED(vp)) {
@@ -233,7 +264,7 @@ xfs_swapext(
233 * fields change. 264 * fields change.
234 */ 265 */
235 266
236 VOP_TOSS_PAGES(vp, 0, -1, FI_REMAPF); 267 bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF);
237 268
238 tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); 269 tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
239 if ((error = xfs_trans_reserve(tp, 0, 270 if ((error = xfs_trans_reserve(tp, 0,
@@ -360,16 +391,7 @@ xfs_swapext(
360 xfs_iunlock(ip, lock_flags); 391 xfs_iunlock(ip, lock_flags);
361 xfs_iunlock(tip, lock_flags); 392 xfs_iunlock(tip, lock_flags);
362 } 393 }
363
364 if (fp != NULL)
365 fput(fp);
366 if (tfp != NULL)
367 fput(tfp);
368
369 if (sxp != NULL)
370 kmem_free(sxp, sizeof(xfs_swapext_t));
371 if (tempifp != NULL) 394 if (tempifp != NULL)
372 kmem_free(tempifp, sizeof(xfs_ifork_t)); 395 kmem_free(tempifp, sizeof(xfs_ifork_t));
373
374 return error; 396 return error;
375} 397}
diff --git a/fs/xfs/xfs_dfrag.h b/fs/xfs/xfs_dfrag.h
index f678559abc45..da178205be68 100644
--- a/fs/xfs/xfs_dfrag.h
+++ b/fs/xfs/xfs_dfrag.h
@@ -48,6 +48,9 @@ typedef struct xfs_swapext
48 */ 48 */
49int xfs_swapext(struct xfs_swapext __user *sx); 49int xfs_swapext(struct xfs_swapext __user *sx);
50 50
51int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
52 struct xfs_swapext *sxp);
53
51#endif /* __KERNEL__ */ 54#endif /* __KERNEL__ */
52 55
53#endif /* __XFS_DFRAG_H__ */ 56#endif /* __XFS_DFRAG_H__ */
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index 79d0d9e1fbab..b33826961c45 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -85,7 +85,6 @@ typedef struct xfs_dinode
85 union { 85 union {
86 xfs_bmdr_block_t di_bmbt; /* btree root block */ 86 xfs_bmdr_block_t di_bmbt; /* btree root block */
87 xfs_bmbt_rec_32_t di_bmx[1]; /* extent list */ 87 xfs_bmbt_rec_32_t di_bmx[1]; /* extent list */
88 xfs_dir_shortform_t di_dirsf; /* shortform directory */
89 xfs_dir2_sf_t di_dir2sf; /* shortform directory v2 */ 88 xfs_dir2_sf_t di_dir2sf; /* shortform directory v2 */
90 char di_c[1]; /* local contents */ 89 char di_c[1]; /* local contents */
91 xfs_dev_t di_dev; /* device for S_IFCHR/S_IFBLK */ 90 xfs_dev_t di_dev; /* device for S_IFCHR/S_IFBLK */
@@ -257,6 +256,7 @@ typedef enum xfs_dinode_fmt
257#define XFS_DIFLAG_NOSYMLINKS_BIT 10 /* disallow symlink creation */ 256#define XFS_DIFLAG_NOSYMLINKS_BIT 10 /* disallow symlink creation */
258#define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */ 257#define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */
259#define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */ 258#define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */
259#define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */
260#define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) 260#define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT)
261#define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) 261#define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT)
262#define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) 262#define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT)
@@ -270,12 +270,13 @@ typedef enum xfs_dinode_fmt
270#define XFS_DIFLAG_NOSYMLINKS (1 << XFS_DIFLAG_NOSYMLINKS_BIT) 270#define XFS_DIFLAG_NOSYMLINKS (1 << XFS_DIFLAG_NOSYMLINKS_BIT)
271#define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT) 271#define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT)
272#define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT) 272#define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
273#define XFS_DIFLAG_NODEFRAG (1 << XFS_DIFLAG_NODEFRAG_BIT)
273 274
274#define XFS_DIFLAG_ANY \ 275#define XFS_DIFLAG_ANY \
275 (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \ 276 (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
276 XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \ 277 XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
277 XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \ 278 XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
278 XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \ 279 XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
279 XFS_DIFLAG_EXTSZINHERIT) 280 XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG)
280 281
281#endif /* __XFS_DINODE_H__ */ 282#endif /* __XFS_DINODE_H__ */
diff --git a/fs/xfs/xfs_dir.c b/fs/xfs/xfs_dir.c
deleted file mode 100644
index 9cc702a839a3..000000000000
--- a/fs/xfs/xfs_dir.c
+++ /dev/null
@@ -1,1217 +0,0 @@
1/*
2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_log.h"
22#include "xfs_inum.h"
23#include "xfs_trans.h"
24#include "xfs_sb.h"
25#include "xfs_dir.h"
26#include "xfs_dir2.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h"
29#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h"
33#include "xfs_alloc.h"
34#include "xfs_btree.h"
35#include "xfs_dir_sf.h"
36#include "xfs_dir2_sf.h"
37#include "xfs_attr_sf.h"
38#include "xfs_dinode.h"
39#include "xfs_inode.h"
40#include "xfs_bmap.h"
41#include "xfs_dir_leaf.h"
42#include "xfs_error.h"
43
44/*
45 * xfs_dir.c
46 *
47 * Provide the external interfaces to manage directories.
48 */
49
50/*========================================================================
51 * Function prototypes for the kernel.
52 *========================================================================*/
53
54/*
55 * Functions for the dirops interfaces.
56 */
57static void xfs_dir_mount(struct xfs_mount *mp);
58
59static int xfs_dir_isempty(struct xfs_inode *dp);
60
61static int xfs_dir_init(struct xfs_trans *trans,
62 struct xfs_inode *dir,
63 struct xfs_inode *parent_dir);
64
65static int xfs_dir_createname(struct xfs_trans *trans,
66 struct xfs_inode *dp,
67 char *name_string,
68 int name_len,
69 xfs_ino_t inode_number,
70 xfs_fsblock_t *firstblock,
71 xfs_bmap_free_t *flist,
72 xfs_extlen_t total);
73
74static int xfs_dir_lookup(struct xfs_trans *tp,
75 struct xfs_inode *dp,
76 char *name_string,
77 int name_length,
78 xfs_ino_t *inode_number);
79
80static int xfs_dir_removename(struct xfs_trans *trans,
81 struct xfs_inode *dp,
82 char *name_string,
83 int name_length,
84 xfs_ino_t ino,
85 xfs_fsblock_t *firstblock,
86 xfs_bmap_free_t *flist,
87 xfs_extlen_t total);
88
89static int xfs_dir_getdents(struct xfs_trans *tp,
90 struct xfs_inode *dp,
91 struct uio *uiop,
92 int *eofp);
93
94static int xfs_dir_replace(struct xfs_trans *tp,
95 struct xfs_inode *dp,
96 char *name_string,
97 int name_length,
98 xfs_ino_t inode_number,
99 xfs_fsblock_t *firstblock,
100 xfs_bmap_free_t *flist,
101 xfs_extlen_t total);
102
103static int xfs_dir_canenter(struct xfs_trans *tp,
104 struct xfs_inode *dp,
105 char *name_string,
106 int name_length);
107
108static int xfs_dir_shortform_validate_ondisk(xfs_mount_t *mp,
109 xfs_dinode_t *dip);
110
111xfs_dirops_t xfsv1_dirops = {
112 .xd_mount = xfs_dir_mount,
113 .xd_isempty = xfs_dir_isempty,
114 .xd_init = xfs_dir_init,
115 .xd_createname = xfs_dir_createname,
116 .xd_lookup = xfs_dir_lookup,
117 .xd_removename = xfs_dir_removename,
118 .xd_getdents = xfs_dir_getdents,
119 .xd_replace = xfs_dir_replace,
120 .xd_canenter = xfs_dir_canenter,
121 .xd_shortform_validate_ondisk = xfs_dir_shortform_validate_ondisk,
122 .xd_shortform_to_single = xfs_dir_shortform_to_leaf,
123};
124
125/*
126 * Internal routines when dirsize == XFS_LBSIZE(mp).
127 */
128STATIC int xfs_dir_leaf_lookup(xfs_da_args_t *args);
129STATIC int xfs_dir_leaf_removename(xfs_da_args_t *args, int *number_entries,
130 int *total_namebytes);
131STATIC int xfs_dir_leaf_getdents(xfs_trans_t *trans, xfs_inode_t *dp,
132 uio_t *uio, int *eofp,
133 xfs_dirent_t *dbp,
134 xfs_dir_put_t put);
135STATIC int xfs_dir_leaf_replace(xfs_da_args_t *args);
136
137/*
138 * Internal routines when dirsize > XFS_LBSIZE(mp).
139 */
140STATIC int xfs_dir_node_addname(xfs_da_args_t *args);
141STATIC int xfs_dir_node_lookup(xfs_da_args_t *args);
142STATIC int xfs_dir_node_removename(xfs_da_args_t *args);
143STATIC int xfs_dir_node_getdents(xfs_trans_t *trans, xfs_inode_t *dp,
144 uio_t *uio, int *eofp,
145 xfs_dirent_t *dbp,
146 xfs_dir_put_t put);
147STATIC int xfs_dir_node_replace(xfs_da_args_t *args);
148
149#if defined(XFS_DIR_TRACE)
150ktrace_t *xfs_dir_trace_buf;
151#endif
152
153
154/*========================================================================
155 * Overall external interface routines.
156 *========================================================================*/
157
158xfs_dahash_t xfs_dir_hash_dot, xfs_dir_hash_dotdot;
159
160/*
161 * One-time startup routine called from xfs_init().
162 */
163void
164xfs_dir_startup(void)
165{
166 xfs_dir_hash_dot = xfs_da_hashname(".", 1);
167 xfs_dir_hash_dotdot = xfs_da_hashname("..", 2);
168}
169
170/*
171 * Initialize directory-related fields in the mount structure.
172 */
173static void
174xfs_dir_mount(xfs_mount_t *mp)
175{
176 uint shortcount, leafcount, count;
177
178 mp->m_dirversion = 1;
179 if (!(mp->m_flags & XFS_MOUNT_ATTR2)) {
180 shortcount = (mp->m_attroffset -
181 (uint)sizeof(xfs_dir_sf_hdr_t)) /
182 (uint)sizeof(xfs_dir_sf_entry_t);
183 leafcount = (XFS_LBSIZE(mp) -
184 (uint)sizeof(xfs_dir_leaf_hdr_t)) /
185 ((uint)sizeof(xfs_dir_leaf_entry_t) +
186 (uint)sizeof(xfs_dir_leaf_name_t));
187 } else {
188 shortcount = (XFS_BMDR_SPACE_CALC(MINABTPTRS) -
189 (uint)sizeof(xfs_dir_sf_hdr_t)) /
190 (uint)sizeof(xfs_dir_sf_entry_t);
191 leafcount = (XFS_LBSIZE(mp) -
192 (uint)sizeof(xfs_dir_leaf_hdr_t)) /
193 ((uint)sizeof(xfs_dir_leaf_entry_t) +
194 (uint)sizeof(xfs_dir_leaf_name_t));
195 }
196 count = shortcount > leafcount ? shortcount : leafcount;
197 mp->m_dircook_elog = xfs_da_log2_roundup(count + 1);
198 ASSERT(mp->m_dircook_elog <= mp->m_sb.sb_blocklog);
199 mp->m_dir_node_ents = mp->m_attr_node_ents =
200 (XFS_LBSIZE(mp) - (uint)sizeof(xfs_da_node_hdr_t)) /
201 (uint)sizeof(xfs_da_node_entry_t);
202 mp->m_dir_magicpct = (XFS_LBSIZE(mp) * 37) / 100;
203 mp->m_dirblksize = mp->m_sb.sb_blocksize;
204 mp->m_dirblkfsbs = 1;
205}
206
207/*
208 * Return 1 if directory contains only "." and "..".
209 */
210static int
211xfs_dir_isempty(xfs_inode_t *dp)
212{
213 xfs_dir_sf_hdr_t *hdr;
214
215 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
216 if (dp->i_d.di_size == 0)
217 return(1);
218 if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
219 return(0);
220 hdr = (xfs_dir_sf_hdr_t *)dp->i_df.if_u1.if_data;
221 return(hdr->count == 0);
222}
223
224/*
225 * Initialize a directory with its "." and ".." entries.
226 */
227static int
228xfs_dir_init(xfs_trans_t *trans, xfs_inode_t *dir, xfs_inode_t *parent_dir)
229{
230 xfs_da_args_t args;
231 int error;
232
233 memset((char *)&args, 0, sizeof(args));
234 args.dp = dir;
235 args.trans = trans;
236
237 ASSERT((dir->i_d.di_mode & S_IFMT) == S_IFDIR);
238 if ((error = xfs_dir_ino_validate(trans->t_mountp, parent_dir->i_ino)))
239 return error;
240
241 return(xfs_dir_shortform_create(&args, parent_dir->i_ino));
242}
243
244/*
245 * Generic handler routine to add a name to a directory.
246 * Transitions directory from shortform to Btree as necessary.
247 */
248static int /* error */
249xfs_dir_createname(xfs_trans_t *trans, xfs_inode_t *dp, char *name,
250 int namelen, xfs_ino_t inum, xfs_fsblock_t *firstblock,
251 xfs_bmap_free_t *flist, xfs_extlen_t total)
252{
253 xfs_da_args_t args;
254 int retval, newsize, done;
255
256 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
257
258 if ((retval = xfs_dir_ino_validate(trans->t_mountp, inum)))
259 return (retval);
260
261 XFS_STATS_INC(xs_dir_create);
262 /*
263 * Fill in the arg structure for this request.
264 */
265 args.name = name;
266 args.namelen = namelen;
267 args.hashval = xfs_da_hashname(name, namelen);
268 args.inumber = inum;
269 args.dp = dp;
270 args.firstblock = firstblock;
271 args.flist = flist;
272 args.total = total;
273 args.whichfork = XFS_DATA_FORK;
274 args.trans = trans;
275 args.justcheck = 0;
276 args.addname = args.oknoent = 1;
277
278 /*
279 * Decide on what work routines to call based on the inode size.
280 */
281 done = 0;
282 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
283 newsize = XFS_DIR_SF_ENTSIZE_BYNAME(args.namelen);
284 if ((dp->i_d.di_size + newsize) <= XFS_IFORK_DSIZE(dp)) {
285 retval = xfs_dir_shortform_addname(&args);
286 done = 1;
287 } else {
288 if (total == 0)
289 return XFS_ERROR(ENOSPC);
290 retval = xfs_dir_shortform_to_leaf(&args);
291 done = retval != 0;
292 }
293 }
294 if (!done && xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
295 retval = xfs_dir_leaf_addname(&args);
296 done = retval != ENOSPC;
297 if (!done) {
298 if (total == 0)
299 return XFS_ERROR(ENOSPC);
300 retval = xfs_dir_leaf_to_node(&args);
301 done = retval != 0;
302 }
303 }
304 if (!done) {
305 retval = xfs_dir_node_addname(&args);
306 }
307 return(retval);
308}
309
310/*
311 * Generic handler routine to check if a name can be added to a directory,
312 * without adding any blocks to the directory.
313 */
314static int /* error */
315xfs_dir_canenter(xfs_trans_t *trans, xfs_inode_t *dp, char *name, int namelen)
316{
317 xfs_da_args_t args;
318 int retval, newsize;
319
320 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
321 /*
322 * Fill in the arg structure for this request.
323 */
324 args.name = name;
325 args.namelen = namelen;
326 args.hashval = xfs_da_hashname(name, namelen);
327 args.inumber = 0;
328 args.dp = dp;
329 args.firstblock = NULL;
330 args.flist = NULL;
331 args.total = 0;
332 args.whichfork = XFS_DATA_FORK;
333 args.trans = trans;
334 args.justcheck = args.addname = args.oknoent = 1;
335
336 /*
337 * Decide on what work routines to call based on the inode size.
338 */
339 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
340 newsize = XFS_DIR_SF_ENTSIZE_BYNAME(args.namelen);
341 if ((dp->i_d.di_size + newsize) <= XFS_IFORK_DSIZE(dp))
342 retval = 0;
343 else
344 retval = XFS_ERROR(ENOSPC);
345 } else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
346 retval = xfs_dir_leaf_addname(&args);
347 } else {
348 retval = xfs_dir_node_addname(&args);
349 }
350 return(retval);
351}
352
353/*
354 * Generic handler routine to remove a name from a directory.
355 * Transitions directory from Btree to shortform as necessary.
356 */
357static int /* error */
358xfs_dir_removename(xfs_trans_t *trans, xfs_inode_t *dp, char *name,
359 int namelen, xfs_ino_t ino, xfs_fsblock_t *firstblock,
360 xfs_bmap_free_t *flist, xfs_extlen_t total)
361{
362 xfs_da_args_t args;
363 int count, totallen, newsize, retval;
364
365 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
366 XFS_STATS_INC(xs_dir_remove);
367 /*
368 * Fill in the arg structure for this request.
369 */
370 args.name = name;
371 args.namelen = namelen;
372 args.hashval = xfs_da_hashname(name, namelen);
373 args.inumber = ino;
374 args.dp = dp;
375 args.firstblock = firstblock;
376 args.flist = flist;
377 args.total = total;
378 args.whichfork = XFS_DATA_FORK;
379 args.trans = trans;
380 args.justcheck = args.addname = args.oknoent = 0;
381
382 /*
383 * Decide on what work routines to call based on the inode size.
384 */
385 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
386 retval = xfs_dir_shortform_removename(&args);
387 } else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
388 retval = xfs_dir_leaf_removename(&args, &count, &totallen);
389 if (retval == 0) {
390 newsize = XFS_DIR_SF_ALLFIT(count, totallen);
391 if (newsize <= XFS_IFORK_DSIZE(dp)) {
392 retval = xfs_dir_leaf_to_shortform(&args);
393 }
394 }
395 } else {
396 retval = xfs_dir_node_removename(&args);
397 }
398 return(retval);
399}
400
401static int /* error */
402xfs_dir_lookup(xfs_trans_t *trans, xfs_inode_t *dp, char *name, int namelen,
403 xfs_ino_t *inum)
404{
405 xfs_da_args_t args;
406 int retval;
407
408 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
409
410 XFS_STATS_INC(xs_dir_lookup);
411 /*
412 * Fill in the arg structure for this request.
413 */
414 args.name = name;
415 args.namelen = namelen;
416 args.hashval = xfs_da_hashname(name, namelen);
417 args.inumber = 0;
418 args.dp = dp;
419 args.firstblock = NULL;
420 args.flist = NULL;
421 args.total = 0;
422 args.whichfork = XFS_DATA_FORK;
423 args.trans = trans;
424 args.justcheck = args.addname = 0;
425 args.oknoent = 1;
426
427 /*
428 * Decide on what work routines to call based on the inode size.
429 */
430 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
431 retval = xfs_dir_shortform_lookup(&args);
432 } else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
433 retval = xfs_dir_leaf_lookup(&args);
434 } else {
435 retval = xfs_dir_node_lookup(&args);
436 }
437 if (retval == EEXIST)
438 retval = 0;
439 *inum = args.inumber;
440 return(retval);
441}
442
443/*
444 * Implement readdir.
445 */
446static int /* error */
447xfs_dir_getdents(xfs_trans_t *trans, xfs_inode_t *dp, uio_t *uio, int *eofp)
448{
449 xfs_dirent_t *dbp;
450 int alignment, retval;
451 xfs_dir_put_t put;
452
453 XFS_STATS_INC(xs_dir_getdents);
454 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
455
456 /*
457 * If our caller has given us a single contiguous memory buffer,
458 * just work directly within that buffer. If it's in user memory,
459 * lock it down first.
460 */
461 alignment = sizeof(xfs_off_t) - 1;
462 if ((uio->uio_iovcnt == 1) &&
463 (((__psint_t)uio->uio_iov[0].iov_base & alignment) == 0) &&
464 ((uio->uio_iov[0].iov_len & alignment) == 0)) {
465 dbp = NULL;
466 put = xfs_dir_put_dirent64_direct;
467 } else {
468 dbp = kmem_alloc(sizeof(*dbp) + MAXNAMELEN, KM_SLEEP);
469 put = xfs_dir_put_dirent64_uio;
470 }
471
472 /*
473 * Decide on what work routines to call based on the inode size.
474 */
475 *eofp = 0;
476
477 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
478 retval = xfs_dir_shortform_getdents(dp, uio, eofp, dbp, put);
479 } else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
480 retval = xfs_dir_leaf_getdents(trans, dp, uio, eofp, dbp, put);
481 } else {
482 retval = xfs_dir_node_getdents(trans, dp, uio, eofp, dbp, put);
483 }
484 if (dbp != NULL)
485 kmem_free(dbp, sizeof(*dbp) + MAXNAMELEN);
486
487 return(retval);
488}
489
490static int /* error */
491xfs_dir_replace(xfs_trans_t *trans, xfs_inode_t *dp, char *name, int namelen,
492 xfs_ino_t inum, xfs_fsblock_t *firstblock,
493 xfs_bmap_free_t *flist, xfs_extlen_t total)
494{
495 xfs_da_args_t args;
496 int retval;
497
498 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
499
500 if ((retval = xfs_dir_ino_validate(trans->t_mountp, inum)))
501 return retval;
502
503 /*
504 * Fill in the arg structure for this request.
505 */
506 args.name = name;
507 args.namelen = namelen;
508 args.hashval = xfs_da_hashname(name, namelen);
509 args.inumber = inum;
510 args.dp = dp;
511 args.firstblock = firstblock;
512 args.flist = flist;
513 args.total = total;
514 args.whichfork = XFS_DATA_FORK;
515 args.trans = trans;
516 args.justcheck = args.addname = args.oknoent = 0;
517
518 /*
519 * Decide on what work routines to call based on the inode size.
520 */
521 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
522 retval = xfs_dir_shortform_replace(&args);
523 } else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
524 retval = xfs_dir_leaf_replace(&args);
525 } else {
526 retval = xfs_dir_node_replace(&args);
527 }
528
529 return(retval);
530}
531
532static int
533xfs_dir_shortform_validate_ondisk(xfs_mount_t *mp, xfs_dinode_t *dp)
534{
535 xfs_ino_t ino;
536 int namelen_sum;
537 int count;
538 xfs_dir_shortform_t *sf;
539 xfs_dir_sf_entry_t *sfe;
540 int i;
541
542
543
544 if ((INT_GET(dp->di_core.di_mode, ARCH_CONVERT) & S_IFMT) != S_IFDIR) {
545 return 0;
546 }
547 if (INT_GET(dp->di_core.di_format, ARCH_CONVERT) != XFS_DINODE_FMT_LOCAL) {
548 return 0;
549 }
550 if (INT_GET(dp->di_core.di_size, ARCH_CONVERT) < sizeof(sf->hdr)) {
551 xfs_fs_cmn_err(CE_WARN, mp, "Invalid shortform size: dp 0x%p",
552 dp);
553 return 1;
554 }
555 sf = (xfs_dir_shortform_t *)(&dp->di_u.di_dirsf);
556 ino = XFS_GET_DIR_INO8(sf->hdr.parent);
557 if (xfs_dir_ino_validate(mp, ino))
558 return 1;
559
560 count = sf->hdr.count;
561 if ((count < 0) || ((count * 10) > XFS_LITINO(mp))) {
562 xfs_fs_cmn_err(CE_WARN, mp,
563 "Invalid shortform count: dp 0x%p", dp);
564 return(1);
565 }
566
567 if (count == 0) {
568 return 0;
569 }
570
571 namelen_sum = 0;
572 sfe = &sf->list[0];
573 for (i = sf->hdr.count - 1; i >= 0; i--) {
574 ino = XFS_GET_DIR_INO8(sfe->inumber);
575 xfs_dir_ino_validate(mp, ino);
576 if (sfe->namelen >= XFS_LITINO(mp)) {
577 xfs_fs_cmn_err(CE_WARN, mp,
578 "Invalid shortform namelen: dp 0x%p", dp);
579 return 1;
580 }
581 namelen_sum += sfe->namelen;
582 sfe = XFS_DIR_SF_NEXTENTRY(sfe);
583 }
584 if (namelen_sum >= XFS_LITINO(mp)) {
585 xfs_fs_cmn_err(CE_WARN, mp,
586 "Invalid shortform namelen: dp 0x%p", dp);
587 return 1;
588 }
589
590 return 0;
591}
592
593/*========================================================================
594 * External routines when dirsize == XFS_LBSIZE(dp->i_mount).
595 *========================================================================*/
596
597/*
598 * Add a name to the leaf directory structure
599 * This is the external routine.
600 */
601int
602xfs_dir_leaf_addname(xfs_da_args_t *args)
603{
604 int index, retval;
605 xfs_dabuf_t *bp;
606
607 retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
608 XFS_DATA_FORK);
609 if (retval)
610 return(retval);
611 ASSERT(bp != NULL);
612
613 retval = xfs_dir_leaf_lookup_int(bp, args, &index);
614 if (retval == ENOENT)
615 retval = xfs_dir_leaf_add(bp, args, index);
616 xfs_da_buf_done(bp);
617 return(retval);
618}
619
620/*
621 * Remove a name from the leaf directory structure
622 * This is the external routine.
623 */
624STATIC int
625xfs_dir_leaf_removename(xfs_da_args_t *args, int *count, int *totallen)
626{
627 xfs_dir_leafblock_t *leaf;
628 int index, retval;
629 xfs_dabuf_t *bp;
630
631 retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
632 XFS_DATA_FORK);
633 if (retval)
634 return(retval);
635 ASSERT(bp != NULL);
636 leaf = bp->data;
637 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
638 retval = xfs_dir_leaf_lookup_int(bp, args, &index);
639 if (retval == EEXIST) {
640 (void)xfs_dir_leaf_remove(args->trans, bp, index);
641 *count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
642 *totallen = INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
643 retval = 0;
644 }
645 xfs_da_buf_done(bp);
646 return(retval);
647}
648
649/*
650 * Look up a name in a leaf directory structure.
651 * This is the external routine.
652 */
653STATIC int
654xfs_dir_leaf_lookup(xfs_da_args_t *args)
655{
656 int index, retval;
657 xfs_dabuf_t *bp;
658
659 retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
660 XFS_DATA_FORK);
661 if (retval)
662 return(retval);
663 ASSERT(bp != NULL);
664 retval = xfs_dir_leaf_lookup_int(bp, args, &index);
665 xfs_da_brelse(args->trans, bp);
666 return(retval);
667}
668
669/*
670 * Copy out directory entries for getdents(), for leaf directories.
671 */
672STATIC int
673xfs_dir_leaf_getdents(xfs_trans_t *trans, xfs_inode_t *dp, uio_t *uio,
674 int *eofp, xfs_dirent_t *dbp, xfs_dir_put_t put)
675{
676 xfs_dabuf_t *bp;
677 int retval, eob;
678
679 retval = xfs_da_read_buf(dp->i_transp, dp, 0, -1, &bp, XFS_DATA_FORK);
680 if (retval)
681 return(retval);
682 ASSERT(bp != NULL);
683 retval = xfs_dir_leaf_getdents_int(bp, dp, 0, uio, &eob, dbp, put, -1);
684 xfs_da_brelse(trans, bp);
685 *eofp = (eob == 0);
686 return(retval);
687}
688
689/*
690 * Look up a name in a leaf directory structure, replace the inode number.
691 * This is the external routine.
692 */
693STATIC int
694xfs_dir_leaf_replace(xfs_da_args_t *args)
695{
696 int index, retval;
697 xfs_dabuf_t *bp;
698 xfs_ino_t inum;
699 xfs_dir_leafblock_t *leaf;
700 xfs_dir_leaf_entry_t *entry;
701 xfs_dir_leaf_name_t *namest;
702
703 inum = args->inumber;
704 retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
705 XFS_DATA_FORK);
706 if (retval)
707 return(retval);
708 ASSERT(bp != NULL);
709 retval = xfs_dir_leaf_lookup_int(bp, args, &index);
710 if (retval == EEXIST) {
711 leaf = bp->data;
712 entry = &leaf->entries[index];
713 namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
714 /* XXX - replace assert? */
715 XFS_DIR_SF_PUT_DIRINO(&inum, &namest->inumber);
716 xfs_da_log_buf(args->trans, bp,
717 XFS_DA_LOGRANGE(leaf, namest, sizeof(namest->inumber)));
718 xfs_da_buf_done(bp);
719 retval = 0;
720 } else
721 xfs_da_brelse(args->trans, bp);
722 return(retval);
723}
724
725
726/*========================================================================
727 * External routines when dirsize > XFS_LBSIZE(mp).
728 *========================================================================*/
729
730/*
731 * Add a name to a Btree-format directory.
732 *
733 * This will involve walking down the Btree, and may involve splitting
734 * leaf nodes and even splitting intermediate nodes up to and including
735 * the root node (a special case of an intermediate node).
736 */
737STATIC int
738xfs_dir_node_addname(xfs_da_args_t *args)
739{
740 xfs_da_state_t *state;
741 xfs_da_state_blk_t *blk;
742 int retval, error;
743
744 /*
745 * Fill in bucket of arguments/results/context to carry around.
746 */
747 state = xfs_da_state_alloc();
748 state->args = args;
749 state->mp = args->dp->i_mount;
750 state->blocksize = state->mp->m_sb.sb_blocksize;
751 state->node_ents = state->mp->m_dir_node_ents;
752
753 /*
754 * Search to see if name already exists, and get back a pointer
755 * to where it should go.
756 */
757 error = xfs_da_node_lookup_int(state, &retval);
758 if (error)
759 retval = error;
760 if (retval != ENOENT)
761 goto error;
762 blk = &state->path.blk[ state->path.active-1 ];
763 ASSERT(blk->magic == XFS_DIR_LEAF_MAGIC);
764 retval = xfs_dir_leaf_add(blk->bp, args, blk->index);
765 if (retval == 0) {
766 /*
767 * Addition succeeded, update Btree hashvals.
768 */
769 if (!args->justcheck)
770 xfs_da_fixhashpath(state, &state->path);
771 } else {
772 /*
773 * Addition failed, split as many Btree elements as required.
774 */
775 if (args->total == 0) {
776 ASSERT(retval == ENOSPC);
777 goto error;
778 }
779 retval = xfs_da_split(state);
780 }
781error:
782 xfs_da_state_free(state);
783
784 return(retval);
785}
786
787/*
788 * Remove a name from a B-tree directory.
789 *
790 * This will involve walking down the Btree, and may involve joining
791 * leaf nodes and even joining intermediate nodes up to and including
792 * the root node (a special case of an intermediate node).
793 */
794STATIC int
795xfs_dir_node_removename(xfs_da_args_t *args)
796{
797 xfs_da_state_t *state;
798 xfs_da_state_blk_t *blk;
799 int retval, error;
800
801 state = xfs_da_state_alloc();
802 state->args = args;
803 state->mp = args->dp->i_mount;
804 state->blocksize = state->mp->m_sb.sb_blocksize;
805 state->node_ents = state->mp->m_dir_node_ents;
806
807 /*
808 * Search to see if name exists, and get back a pointer to it.
809 */
810 error = xfs_da_node_lookup_int(state, &retval);
811 if (error)
812 retval = error;
813 if (retval != EEXIST) {
814 xfs_da_state_free(state);
815 return(retval);
816 }
817
818 /*
819 * Remove the name and update the hashvals in the tree.
820 */
821 blk = &state->path.blk[ state->path.active-1 ];
822 ASSERT(blk->magic == XFS_DIR_LEAF_MAGIC);
823 retval = xfs_dir_leaf_remove(args->trans, blk->bp, blk->index);
824 xfs_da_fixhashpath(state, &state->path);
825
826 /*
827 * Check to see if the tree needs to be collapsed.
828 */
829 error = 0;
830 if (retval) {
831 error = xfs_da_join(state);
832 }
833
834 xfs_da_state_free(state);
835 if (error)
836 return(error);
837 return(0);
838}
839
840/*
841 * Look up a filename in a int directory.
842 * Use an internal routine to actually do all the work.
843 */
844STATIC int
845xfs_dir_node_lookup(xfs_da_args_t *args)
846{
847 xfs_da_state_t *state;
848 int retval, error, i;
849
850 state = xfs_da_state_alloc();
851 state->args = args;
852 state->mp = args->dp->i_mount;
853 state->blocksize = state->mp->m_sb.sb_blocksize;
854 state->node_ents = state->mp->m_dir_node_ents;
855
856 /*
857 * Search to see if name exists,
858 * and get back a pointer to it.
859 */
860 error = xfs_da_node_lookup_int(state, &retval);
861 if (error) {
862 retval = error;
863 }
864
865 /*
866 * If not in a transaction, we have to release all the buffers.
867 */
868 for (i = 0; i < state->path.active; i++) {
869 xfs_da_brelse(args->trans, state->path.blk[i].bp);
870 state->path.blk[i].bp = NULL;
871 }
872
873 xfs_da_state_free(state);
874 return(retval);
875}
876
877STATIC int
878xfs_dir_node_getdents(xfs_trans_t *trans, xfs_inode_t *dp, uio_t *uio,
879 int *eofp, xfs_dirent_t *dbp, xfs_dir_put_t put)
880{
881 xfs_da_intnode_t *node;
882 xfs_da_node_entry_t *btree;
883 xfs_dir_leafblock_t *leaf = NULL;
884 xfs_dablk_t bno, nextbno;
885 xfs_dahash_t cookhash;
886 xfs_mount_t *mp;
887 int error, eob, i;
888 xfs_dabuf_t *bp;
889 xfs_daddr_t nextda;
890
891 /*
892 * Pick up our context.
893 */
894 mp = dp->i_mount;
895 bp = NULL;
896 bno = XFS_DA_COOKIE_BNO(mp, uio->uio_offset);
897 cookhash = XFS_DA_COOKIE_HASH(mp, uio->uio_offset);
898
899 xfs_dir_trace_g_du("node: start", dp, uio);
900
901 /*
902 * Re-find our place, even if we're confused about what our place is.
903 *
904 * First we check the block number from the magic cookie, it is a
905 * cache of where we ended last time. If we find a leaf block, and
906 * the starting hashval in that block is less than our desired
907 * hashval, then we run with it.
908 */
909 if (bno > 0) {
910 error = xfs_da_read_buf(trans, dp, bno, -2, &bp, XFS_DATA_FORK);
911 if ((error != 0) && (error != EFSCORRUPTED))
912 return(error);
913 if (bp)
914 leaf = bp->data;
915 if (bp && be16_to_cpu(leaf->hdr.info.magic) != XFS_DIR_LEAF_MAGIC) {
916 xfs_dir_trace_g_dub("node: block not a leaf",
917 dp, uio, bno);
918 xfs_da_brelse(trans, bp);
919 bp = NULL;
920 }
921 if (bp && INT_GET(leaf->entries[0].hashval, ARCH_CONVERT) > cookhash) {
922 xfs_dir_trace_g_dub("node: leaf hash too large",
923 dp, uio, bno);
924 xfs_da_brelse(trans, bp);
925 bp = NULL;
926 }
927 if (bp &&
928 cookhash > INT_GET(leaf->entries[INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT)) {
929 xfs_dir_trace_g_dub("node: leaf hash too small",
930 dp, uio, bno);
931 xfs_da_brelse(trans, bp);
932 bp = NULL;
933 }
934 }
935
936 /*
937 * If we did not find a leaf block from the blockno in the cookie,
938 * or we there was no blockno in the cookie (eg: first time thru),
939 * the we start at the top of the Btree and re-find our hashval.
940 */
941 if (bp == NULL) {
942 xfs_dir_trace_g_du("node: start at root" , dp, uio);
943 bno = 0;
944 for (;;) {
945 error = xfs_da_read_buf(trans, dp, bno, -1, &bp,
946 XFS_DATA_FORK);
947 if (error)
948 return(error);
949 if (bp == NULL)
950 return(XFS_ERROR(EFSCORRUPTED));
951 node = bp->data;
952 if (be16_to_cpu(node->hdr.info.magic) != XFS_DA_NODE_MAGIC)
953 break;
954 btree = &node->btree[0];
955 xfs_dir_trace_g_dun("node: node detail", dp, uio, node);
956 for (i = 0; i < be16_to_cpu(node->hdr.count); btree++, i++) {
957 if (be32_to_cpu(btree->hashval) >= cookhash) {
958 bno = be32_to_cpu(btree->before);
959 break;
960 }
961 }
962 if (i == be16_to_cpu(node->hdr.count)) {
963 xfs_da_brelse(trans, bp);
964 xfs_dir_trace_g_du("node: hash beyond EOF",
965 dp, uio);
966 uio->uio_offset = XFS_DA_MAKE_COOKIE(mp, 0, 0,
967 XFS_DA_MAXHASH);
968 *eofp = 1;
969 return(0);
970 }
971 xfs_dir_trace_g_dub("node: going to block",
972 dp, uio, bno);
973 xfs_da_brelse(trans, bp);
974 }
975 }
976 ASSERT(cookhash != XFS_DA_MAXHASH);
977
978 /*
979 * We've dropped down to the (first) leaf block that contains the
980 * hashval we are interested in. Continue rolling upward thru the
981 * leaf blocks until we fill up our buffer.
982 */
983 for (;;) {
984 leaf = bp->data;
985 if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_DIR_LEAF_MAGIC)) {
986 xfs_dir_trace_g_dul("node: not a leaf", dp, uio, leaf);
987 xfs_da_brelse(trans, bp);
988 XFS_CORRUPTION_ERROR("xfs_dir_node_getdents(1)",
989 XFS_ERRLEVEL_LOW, mp, leaf);
990 return XFS_ERROR(EFSCORRUPTED);
991 }
992 xfs_dir_trace_g_dul("node: leaf detail", dp, uio, leaf);
993 if ((nextbno = be32_to_cpu(leaf->hdr.info.forw))) {
994 nextda = xfs_da_reada_buf(trans, dp, nextbno,
995 XFS_DATA_FORK);
996 } else
997 nextda = -1;
998 error = xfs_dir_leaf_getdents_int(bp, dp, bno, uio, &eob, dbp,
999 put, nextda);
1000 xfs_da_brelse(trans, bp);
1001 bno = nextbno;
1002 if (eob) {
1003 xfs_dir_trace_g_dub("node: E-O-B", dp, uio, bno);
1004 *eofp = 0;
1005 return(error);
1006 }
1007 if (bno == 0)
1008 break;
1009 error = xfs_da_read_buf(trans, dp, bno, nextda, &bp,
1010 XFS_DATA_FORK);
1011 if (error)
1012 return(error);
1013 if (unlikely(bp == NULL)) {
1014 XFS_ERROR_REPORT("xfs_dir_node_getdents(2)",
1015 XFS_ERRLEVEL_LOW, mp);
1016 return(XFS_ERROR(EFSCORRUPTED));
1017 }
1018 }
1019 *eofp = 1;
1020 xfs_dir_trace_g_du("node: E-O-F", dp, uio);
1021 return(0);
1022}
1023
1024/*
1025 * Look up a filename in an int directory, replace the inode number.
1026 * Use an internal routine to actually do the lookup.
1027 */
1028STATIC int
1029xfs_dir_node_replace(xfs_da_args_t *args)
1030{
1031 xfs_da_state_t *state;
1032 xfs_da_state_blk_t *blk;
1033 xfs_dir_leafblock_t *leaf;
1034 xfs_dir_leaf_entry_t *entry;
1035 xfs_dir_leaf_name_t *namest;
1036 xfs_ino_t inum;
1037 int retval, error, i;
1038 xfs_dabuf_t *bp;
1039
1040 state = xfs_da_state_alloc();
1041 state->args = args;
1042 state->mp = args->dp->i_mount;
1043 state->blocksize = state->mp->m_sb.sb_blocksize;
1044 state->node_ents = state->mp->m_dir_node_ents;
1045 inum = args->inumber;
1046
1047 /*
1048 * Search to see if name exists,
1049 * and get back a pointer to it.
1050 */
1051 error = xfs_da_node_lookup_int(state, &retval);
1052 if (error) {
1053 retval = error;
1054 }
1055
1056 if (retval == EEXIST) {
1057 blk = &state->path.blk[state->path.active - 1];
1058 ASSERT(blk->magic == XFS_DIR_LEAF_MAGIC);
1059 bp = blk->bp;
1060 leaf = bp->data;
1061 entry = &leaf->entries[blk->index];
1062 namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
1063 /* XXX - replace assert ? */
1064 XFS_DIR_SF_PUT_DIRINO(&inum, &namest->inumber);
1065 xfs_da_log_buf(args->trans, bp,
1066 XFS_DA_LOGRANGE(leaf, namest, sizeof(namest->inumber)));
1067 xfs_da_buf_done(bp);
1068 blk->bp = NULL;
1069 retval = 0;
1070 } else {
1071 i = state->path.active - 1;
1072 xfs_da_brelse(args->trans, state->path.blk[i].bp);
1073 state->path.blk[i].bp = NULL;
1074 }
1075 for (i = 0; i < state->path.active - 1; i++) {
1076 xfs_da_brelse(args->trans, state->path.blk[i].bp);
1077 state->path.blk[i].bp = NULL;
1078 }
1079
1080 xfs_da_state_free(state);
1081 return(retval);
1082}
1083
1084#if defined(XFS_DIR_TRACE)
1085/*
1086 * Add a trace buffer entry for an inode and a uio.
1087 */
1088void
1089xfs_dir_trace_g_du(char *where, xfs_inode_t *dp, uio_t *uio)
1090{
1091 xfs_dir_trace_enter(XFS_DIR_KTRACE_G_DU, where,
1092 (void *)dp, (void *)dp->i_mount,
1093 (void *)((unsigned long)(uio->uio_offset >> 32)),
1094 (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)),
1095 (void *)(unsigned long)uio->uio_resid,
1096 NULL, NULL, NULL, NULL, NULL, NULL, NULL);
1097}
1098
1099/*
1100 * Add a trace buffer entry for an inode and a uio.
1101 */
1102void
1103xfs_dir_trace_g_dub(char *where, xfs_inode_t *dp, uio_t *uio, xfs_dablk_t bno)
1104{
1105 xfs_dir_trace_enter(XFS_DIR_KTRACE_G_DUB, where,
1106 (void *)dp, (void *)dp->i_mount,
1107 (void *)((unsigned long)(uio->uio_offset >> 32)),
1108 (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)),
1109 (void *)(unsigned long)uio->uio_resid,
1110 (void *)(unsigned long)bno,
1111 NULL, NULL, NULL, NULL, NULL, NULL);
1112}
1113
1114/*
1115 * Add a trace buffer entry for an inode and a uio.
1116 */
1117void
1118xfs_dir_trace_g_dun(char *where, xfs_inode_t *dp, uio_t *uio,
1119 xfs_da_intnode_t *node)
1120{
1121 int last = be16_to_cpu(node->hdr.count) - 1;
1122
1123 xfs_dir_trace_enter(XFS_DIR_KTRACE_G_DUN, where,
1124 (void *)dp, (void *)dp->i_mount,
1125 (void *)((unsigned long)(uio->uio_offset >> 32)),
1126 (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)),
1127 (void *)(unsigned long)uio->uio_resid,
1128 (void *)(unsigned long)be32_to_cpu(node->hdr.info.forw),
1129 (void *)(unsigned long)
1130 be16_to_cpu(node->hdr.count),
1131 (void *)(unsigned long)
1132 be32_to_cpu(node->btree[0].hashval),
1133 (void *)(unsigned long)
1134 be32_to_cpu(node->btree[last].hashval),
1135 NULL, NULL, NULL);
1136}
1137
1138/*
1139 * Add a trace buffer entry for an inode and a uio.
1140 */
1141void
1142xfs_dir_trace_g_dul(char *where, xfs_inode_t *dp, uio_t *uio,
1143 xfs_dir_leafblock_t *leaf)
1144{
1145 int last = INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1;
1146
1147 xfs_dir_trace_enter(XFS_DIR_KTRACE_G_DUL, where,
1148 (void *)dp, (void *)dp->i_mount,
1149 (void *)((unsigned long)(uio->uio_offset >> 32)),
1150 (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)),
1151 (void *)(unsigned long)uio->uio_resid,
1152 (void *)(unsigned long)be32_to_cpu(leaf->hdr.info.forw),
1153 (void *)(unsigned long)
1154 INT_GET(leaf->hdr.count, ARCH_CONVERT),
1155 (void *)(unsigned long)
1156 INT_GET(leaf->entries[0].hashval, ARCH_CONVERT),
1157 (void *)(unsigned long)
1158 INT_GET(leaf->entries[last].hashval, ARCH_CONVERT),
1159 NULL, NULL, NULL);
1160}
1161
1162/*
1163 * Add a trace buffer entry for an inode and a uio.
1164 */
1165void
1166xfs_dir_trace_g_due(char *where, xfs_inode_t *dp, uio_t *uio,
1167 xfs_dir_leaf_entry_t *entry)
1168{
1169 xfs_dir_trace_enter(XFS_DIR_KTRACE_G_DUE, where,
1170 (void *)dp, (void *)dp->i_mount,
1171 (void *)((unsigned long)(uio->uio_offset >> 32)),
1172 (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)),
1173 (void *)(unsigned long)uio->uio_resid,
1174 (void *)(unsigned long)
1175 INT_GET(entry->hashval, ARCH_CONVERT),
1176 NULL, NULL, NULL, NULL, NULL, NULL);
1177}
1178
1179/*
1180 * Add a trace buffer entry for an inode and a uio.
1181 */
1182void
1183xfs_dir_trace_g_duc(char *where, xfs_inode_t *dp, uio_t *uio, xfs_off_t cookie)
1184{
1185 xfs_dir_trace_enter(XFS_DIR_KTRACE_G_DUC, where,
1186 (void *)dp, (void *)dp->i_mount,
1187 (void *)((unsigned long)(uio->uio_offset >> 32)),
1188 (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)),
1189 (void *)(unsigned long)uio->uio_resid,
1190 (void *)((unsigned long)(cookie >> 32)),
1191 (void *)((unsigned long)(cookie & 0xFFFFFFFF)),
1192 NULL, NULL, NULL, NULL, NULL);
1193}
1194
1195/*
1196 * Add a trace buffer entry for the arguments given to the routine,
1197 * generic form.
1198 */
1199void
1200xfs_dir_trace_enter(int type, char *where,
1201 void * a0, void * a1,
1202 void * a2, void * a3,
1203 void * a4, void * a5,
1204 void * a6, void * a7,
1205 void * a8, void * a9,
1206 void * a10, void * a11)
1207{
1208 ASSERT(xfs_dir_trace_buf);
1209 ktrace_enter(xfs_dir_trace_buf, (void *)(unsigned long)type,
1210 (void *)where,
1211 (void *)a0, (void *)a1, (void *)a2,
1212 (void *)a3, (void *)a4, (void *)a5,
1213 (void *)a6, (void *)a7, (void *)a8,
1214 (void *)a9, (void *)a10, (void *)a11,
1215 NULL, NULL);
1216}
1217#endif /* XFS_DIR_TRACE */
diff --git a/fs/xfs/xfs_dir.h b/fs/xfs/xfs_dir.h
deleted file mode 100644
index 8cc8afb9f6c0..000000000000
--- a/fs/xfs/xfs_dir.h
+++ /dev/null
@@ -1,142 +0,0 @@
1/*
2 * Copyright (c) 2000,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_DIR_H__
19#define __XFS_DIR_H__
20
21/*
22 * Large directories are structured around Btrees where all the data
23 * elements are in the leaf nodes. Filenames are hashed into an int,
24 * then that int is used as the index into the Btree. Since the hashval
25 * of a filename may not be unique, we may have duplicate keys. The
26 * internal links in the Btree are logical block offsets into the file.
27 *
28 * Small directories use a different format and are packed as tightly
29 * as possible so as to fit into the literal area of the inode.
30 */
31
32/*========================================================================
33 * Function prototypes for the kernel.
34 *========================================================================*/
35
36struct uio;
37struct xfs_bmap_free;
38struct xfs_da_args;
39struct xfs_dinode;
40struct xfs_inode;
41struct xfs_mount;
42struct xfs_trans;
43
44/*
45 * Directory function types.
46 * Put in structures (xfs_dirops_t) for v1 and v2 directories.
47 */
48typedef void (*xfs_dir_mount_t)(struct xfs_mount *mp);
49typedef int (*xfs_dir_isempty_t)(struct xfs_inode *dp);
50typedef int (*xfs_dir_init_t)(struct xfs_trans *tp,
51 struct xfs_inode *dp,
52 struct xfs_inode *pdp);
53typedef int (*xfs_dir_createname_t)(struct xfs_trans *tp,
54 struct xfs_inode *dp,
55 char *name,
56 int namelen,
57 xfs_ino_t inum,
58 xfs_fsblock_t *first,
59 struct xfs_bmap_free *flist,
60 xfs_extlen_t total);
61typedef int (*xfs_dir_lookup_t)(struct xfs_trans *tp,
62 struct xfs_inode *dp,
63 char *name,
64 int namelen,
65 xfs_ino_t *inum);
66typedef int (*xfs_dir_removename_t)(struct xfs_trans *tp,
67 struct xfs_inode *dp,
68 char *name,
69 int namelen,
70 xfs_ino_t ino,
71 xfs_fsblock_t *first,
72 struct xfs_bmap_free *flist,
73 xfs_extlen_t total);
74typedef int (*xfs_dir_getdents_t)(struct xfs_trans *tp,
75 struct xfs_inode *dp,
76 struct uio *uio,
77 int *eofp);
78typedef int (*xfs_dir_replace_t)(struct xfs_trans *tp,
79 struct xfs_inode *dp,
80 char *name,
81 int namelen,
82 xfs_ino_t inum,
83 xfs_fsblock_t *first,
84 struct xfs_bmap_free *flist,
85 xfs_extlen_t total);
86typedef int (*xfs_dir_canenter_t)(struct xfs_trans *tp,
87 struct xfs_inode *dp,
88 char *name,
89 int namelen);
90typedef int (*xfs_dir_shortform_validate_ondisk_t)(struct xfs_mount *mp,
91 struct xfs_dinode *dip);
92typedef int (*xfs_dir_shortform_to_single_t)(struct xfs_da_args *args);
93
94typedef struct xfs_dirops {
95 xfs_dir_mount_t xd_mount;
96 xfs_dir_isempty_t xd_isempty;
97 xfs_dir_init_t xd_init;
98 xfs_dir_createname_t xd_createname;
99 xfs_dir_lookup_t xd_lookup;
100 xfs_dir_removename_t xd_removename;
101 xfs_dir_getdents_t xd_getdents;
102 xfs_dir_replace_t xd_replace;
103 xfs_dir_canenter_t xd_canenter;
104 xfs_dir_shortform_validate_ondisk_t xd_shortform_validate_ondisk;
105 xfs_dir_shortform_to_single_t xd_shortform_to_single;
106} xfs_dirops_t;
107
108/*
109 * Overall external interface routines.
110 */
111void xfs_dir_startup(void); /* called exactly once */
112
113#define XFS_DIR_MOUNT(mp) \
114 ((mp)->m_dirops.xd_mount(mp))
115#define XFS_DIR_ISEMPTY(mp,dp) \
116 ((mp)->m_dirops.xd_isempty(dp))
117#define XFS_DIR_INIT(mp,tp,dp,pdp) \
118 ((mp)->m_dirops.xd_init(tp,dp,pdp))
119#define XFS_DIR_CREATENAME(mp,tp,dp,name,namelen,inum,first,flist,total) \
120 ((mp)->m_dirops.xd_createname(tp,dp,name,namelen,inum,first,flist,\
121 total))
122#define XFS_DIR_LOOKUP(mp,tp,dp,name,namelen,inum) \
123 ((mp)->m_dirops.xd_lookup(tp,dp,name,namelen,inum))
124#define XFS_DIR_REMOVENAME(mp,tp,dp,name,namelen,ino,first,flist,total) \
125 ((mp)->m_dirops.xd_removename(tp,dp,name,namelen,ino,first,flist,total))
126#define XFS_DIR_GETDENTS(mp,tp,dp,uio,eofp) \
127 ((mp)->m_dirops.xd_getdents(tp,dp,uio,eofp))
128#define XFS_DIR_REPLACE(mp,tp,dp,name,namelen,inum,first,flist,total) \
129 ((mp)->m_dirops.xd_replace(tp,dp,name,namelen,inum,first,flist,total))
130#define XFS_DIR_CANENTER(mp,tp,dp,name,namelen) \
131 ((mp)->m_dirops.xd_canenter(tp,dp,name,namelen))
132#define XFS_DIR_SHORTFORM_VALIDATE_ONDISK(mp,dip) \
133 ((mp)->m_dirops.xd_shortform_validate_ondisk(mp,dip))
134#define XFS_DIR_SHORTFORM_TO_SINGLE(mp,args) \
135 ((mp)->m_dirops.xd_shortform_to_single(args))
136
137#define XFS_DIR_IS_V1(mp) ((mp)->m_dirversion == 1)
138#define XFS_DIR_IS_V2(mp) ((mp)->m_dirversion == 2)
139extern xfs_dirops_t xfsv1_dirops;
140extern xfs_dirops_t xfsv2_dirops;
141
142#endif /* __XFS_DIR_H__ */
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 022c8398ab62..8edbe1adb95b 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -24,21 +24,18 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_da_btree.h" 30#include "xfs_da_btree.h"
32#include "xfs_bmap_btree.h" 31#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h" 32#include "xfs_alloc_btree.h"
34#include "xfs_dir_sf.h"
35#include "xfs_dir2_sf.h" 33#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 35#include "xfs_dinode.h"
38#include "xfs_inode.h" 36#include "xfs_inode.h"
39#include "xfs_inode_item.h" 37#include "xfs_inode_item.h"
40#include "xfs_bmap.h" 38#include "xfs_bmap.h"
41#include "xfs_dir_leaf.h"
42#include "xfs_dir2_data.h" 39#include "xfs_dir2_data.h"
43#include "xfs_dir2_leaf.h" 40#include "xfs_dir2_leaf.h"
44#include "xfs_dir2_block.h" 41#include "xfs_dir2_block.h"
@@ -46,69 +43,14 @@
46#include "xfs_dir2_trace.h" 43#include "xfs_dir2_trace.h"
47#include "xfs_error.h" 44#include "xfs_error.h"
48 45
49/*
50 * Declarations for interface routines.
51 */
52static void xfs_dir2_mount(xfs_mount_t *mp);
53static int xfs_dir2_isempty(xfs_inode_t *dp);
54static int xfs_dir2_init(xfs_trans_t *tp, xfs_inode_t *dp,
55 xfs_inode_t *pdp);
56static int xfs_dir2_createname(xfs_trans_t *tp, xfs_inode_t *dp,
57 char *name, int namelen, xfs_ino_t inum,
58 xfs_fsblock_t *first,
59 xfs_bmap_free_t *flist, xfs_extlen_t total);
60static int xfs_dir2_lookup(xfs_trans_t *tp, xfs_inode_t *dp, char *name,
61 int namelen, xfs_ino_t *inum);
62static int xfs_dir2_removename(xfs_trans_t *tp, xfs_inode_t *dp,
63 char *name, int namelen, xfs_ino_t ino,
64 xfs_fsblock_t *first,
65 xfs_bmap_free_t *flist, xfs_extlen_t total);
66static int xfs_dir2_getdents(xfs_trans_t *tp, xfs_inode_t *dp, uio_t *uio,
67 int *eofp);
68static int xfs_dir2_replace(xfs_trans_t *tp, xfs_inode_t *dp, char *name,
69 int namelen, xfs_ino_t inum,
70 xfs_fsblock_t *first, xfs_bmap_free_t *flist,
71 xfs_extlen_t total);
72static int xfs_dir2_canenter(xfs_trans_t *tp, xfs_inode_t *dp, char *name,
73 int namelen);
74static int xfs_dir2_shortform_validate_ondisk(xfs_mount_t *mp,
75 xfs_dinode_t *dip);
76
77/*
78 * Utility routine declarations.
79 */
80static int xfs_dir2_put_dirent64_direct(xfs_dir2_put_args_t *pa); 46static int xfs_dir2_put_dirent64_direct(xfs_dir2_put_args_t *pa);
81static int xfs_dir2_put_dirent64_uio(xfs_dir2_put_args_t *pa); 47static int xfs_dir2_put_dirent64_uio(xfs_dir2_put_args_t *pa);
82 48
83/* 49void
84 * Directory operations vector. 50xfs_dir_mount(
85 */ 51 xfs_mount_t *mp)
86xfs_dirops_t xfsv2_dirops = {
87 .xd_mount = xfs_dir2_mount,
88 .xd_isempty = xfs_dir2_isempty,
89 .xd_init = xfs_dir2_init,
90 .xd_createname = xfs_dir2_createname,
91 .xd_lookup = xfs_dir2_lookup,
92 .xd_removename = xfs_dir2_removename,
93 .xd_getdents = xfs_dir2_getdents,
94 .xd_replace = xfs_dir2_replace,
95 .xd_canenter = xfs_dir2_canenter,
96 .xd_shortform_validate_ondisk = xfs_dir2_shortform_validate_ondisk,
97 .xd_shortform_to_single = xfs_dir2_sf_to_block,
98};
99
100/*
101 * Interface routines.
102 */
103
104/*
105 * Initialize directory-related fields in the mount structure.
106 */
107static void
108xfs_dir2_mount(
109 xfs_mount_t *mp) /* filesystem mount point */
110{ 52{
111 mp->m_dirversion = 2; 53 ASSERT(XFS_SB_VERSION_HASDIRV2(&mp->m_sb));
112 ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <= 54 ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <=
113 XFS_MAX_BLOCKSIZE); 55 XFS_MAX_BLOCKSIZE);
114 mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog); 56 mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog);
@@ -128,19 +70,15 @@ xfs_dir2_mount(
128/* 70/*
129 * Return 1 if directory contains only "." and "..". 71 * Return 1 if directory contains only "." and "..".
130 */ 72 */
131static int /* return code */ 73int
132xfs_dir2_isempty( 74xfs_dir_isempty(
133 xfs_inode_t *dp) /* incore inode structure */ 75 xfs_inode_t *dp)
134{ 76{
135 xfs_dir2_sf_t *sfp; /* shortform directory structure */ 77 xfs_dir2_sf_t *sfp;
136 78
137 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 79 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
138 /* 80 if (dp->i_d.di_size == 0) /* might happen during shutdown. */
139 * Might happen during shutdown.
140 */
141 if (dp->i_d.di_size == 0) {
142 return 1; 81 return 1;
143 }
144 if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp)) 82 if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
145 return 0; 83 return 0;
146 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 84 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
@@ -148,53 +86,83 @@ xfs_dir2_isempty(
148} 86}
149 87
150/* 88/*
89 * Validate a given inode number.
90 */
91int
92xfs_dir_ino_validate(
93 xfs_mount_t *mp,
94 xfs_ino_t ino)
95{
96 xfs_agblock_t agblkno;
97 xfs_agino_t agino;
98 xfs_agnumber_t agno;
99 int ino_ok;
100 int ioff;
101
102 agno = XFS_INO_TO_AGNO(mp, ino);
103 agblkno = XFS_INO_TO_AGBNO(mp, ino);
104 ioff = XFS_INO_TO_OFFSET(mp, ino);
105 agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff);
106 ino_ok =
107 agno < mp->m_sb.sb_agcount &&
108 agblkno < mp->m_sb.sb_agblocks &&
109 agblkno != 0 &&
110 ioff < (1 << mp->m_sb.sb_inopblog) &&
111 XFS_AGINO_TO_INO(mp, agno, agino) == ino;
112 if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE,
113 XFS_RANDOM_DIR_INO_VALIDATE))) {
114 xfs_fs_cmn_err(CE_WARN, mp, "Invalid inode number 0x%Lx",
115 (unsigned long long) ino);
116 XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp);
117 return XFS_ERROR(EFSCORRUPTED);
118 }
119 return 0;
120}
121
122/*
151 * Initialize a directory with its "." and ".." entries. 123 * Initialize a directory with its "." and ".." entries.
152 */ 124 */
153static int /* error */ 125int
154xfs_dir2_init( 126xfs_dir_init(
155 xfs_trans_t *tp, /* transaction pointer */ 127 xfs_trans_t *tp,
156 xfs_inode_t *dp, /* incore directory inode */ 128 xfs_inode_t *dp,
157 xfs_inode_t *pdp) /* incore parent directory inode */ 129 xfs_inode_t *pdp)
158{ 130{
159 xfs_da_args_t args; /* operation arguments */ 131 xfs_da_args_t args;
160 int error; /* error return value */ 132 int error;
161 133
162 memset((char *)&args, 0, sizeof(args)); 134 memset((char *)&args, 0, sizeof(args));
163 args.dp = dp; 135 args.dp = dp;
164 args.trans = tp; 136 args.trans = tp;
165 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 137 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
166 if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino))) { 138 if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino)))
167 return error; 139 return error;
168 }
169 return xfs_dir2_sf_create(&args, pdp->i_ino); 140 return xfs_dir2_sf_create(&args, pdp->i_ino);
170} 141}
171 142
172/* 143/*
173 Enter a name in a directory. 144 Enter a name in a directory.
174 */ 145 */
175static int /* error */ 146int
176xfs_dir2_createname( 147xfs_dir_createname(
177 xfs_trans_t *tp, /* transaction pointer */ 148 xfs_trans_t *tp,
178 xfs_inode_t *dp, /* incore directory inode */ 149 xfs_inode_t *dp,
179 char *name, /* new entry name */ 150 char *name,
180 int namelen, /* new entry name length */ 151 int namelen,
181 xfs_ino_t inum, /* new entry inode number */ 152 xfs_ino_t inum, /* new entry inode number */
182 xfs_fsblock_t *first, /* bmap's firstblock */ 153 xfs_fsblock_t *first, /* bmap's firstblock */
183 xfs_bmap_free_t *flist, /* bmap's freeblock list */ 154 xfs_bmap_free_t *flist, /* bmap's freeblock list */
184 xfs_extlen_t total) /* bmap's total block count */ 155 xfs_extlen_t total) /* bmap's total block count */
185{ 156{
186 xfs_da_args_t args; /* operation arguments */ 157 xfs_da_args_t args;
187 int rval; /* return value */ 158 int rval;
188 int v; /* type-checking value */ 159 int v; /* type-checking value */
189 160
190 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 161 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
191 if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) { 162 if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
192 return rval; 163 return rval;
193 }
194 XFS_STATS_INC(xs_dir_create); 164 XFS_STATS_INC(xs_dir_create);
195 /* 165
196 * Fill in the arg structure for this request.
197 */
198 args.name = name; 166 args.name = name;
199 args.namelen = namelen; 167 args.namelen = namelen;
200 args.hashval = xfs_da_hashname(name, namelen); 168 args.hashval = xfs_da_hashname(name, namelen);
@@ -207,18 +175,16 @@ xfs_dir2_createname(
207 args.trans = tp; 175 args.trans = tp;
208 args.justcheck = 0; 176 args.justcheck = 0;
209 args.addname = args.oknoent = 1; 177 args.addname = args.oknoent = 1;
210 /* 178
211 * Decide on what work routines to call based on the inode size.
212 */
213 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 179 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
214 rval = xfs_dir2_sf_addname(&args); 180 rval = xfs_dir2_sf_addname(&args);
215 else if ((rval = xfs_dir2_isblock(tp, dp, &v))) { 181 else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
216 return rval; 182 return rval;
217 } else if (v) 183 else if (v)
218 rval = xfs_dir2_block_addname(&args); 184 rval = xfs_dir2_block_addname(&args);
219 else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) { 185 else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
220 return rval; 186 return rval;
221 } else if (v) 187 else if (v)
222 rval = xfs_dir2_leaf_addname(&args); 188 rval = xfs_dir2_leaf_addname(&args);
223 else 189 else
224 rval = xfs_dir2_node_addname(&args); 190 rval = xfs_dir2_node_addname(&args);
@@ -228,24 +194,21 @@ xfs_dir2_createname(
228/* 194/*
229 * Lookup a name in a directory, give back the inode number. 195 * Lookup a name in a directory, give back the inode number.
230 */ 196 */
231static int /* error */ 197int
232xfs_dir2_lookup( 198xfs_dir_lookup(
233 xfs_trans_t *tp, /* transaction pointer */ 199 xfs_trans_t *tp,
234 xfs_inode_t *dp, /* incore directory inode */ 200 xfs_inode_t *dp,
235 char *name, /* lookup name */ 201 char *name,
236 int namelen, /* lookup name length */ 202 int namelen,
237 xfs_ino_t *inum) /* out: inode number */ 203 xfs_ino_t *inum) /* out: inode number */
238{ 204{
239 xfs_da_args_t args; /* operation arguments */ 205 xfs_da_args_t args;
240 int rval; /* return value */ 206 int rval;
241 int v; /* type-checking value */ 207 int v; /* type-checking value */
242 208
243 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 209 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
244 XFS_STATS_INC(xs_dir_lookup); 210 XFS_STATS_INC(xs_dir_lookup);
245 211
246 /*
247 * Fill in the arg structure for this request.
248 */
249 args.name = name; 212 args.name = name;
250 args.namelen = namelen; 213 args.namelen = namelen;
251 args.hashval = xfs_da_hashname(name, namelen); 214 args.hashval = xfs_da_hashname(name, namelen);
@@ -258,18 +221,16 @@ xfs_dir2_lookup(
258 args.trans = tp; 221 args.trans = tp;
259 args.justcheck = args.addname = 0; 222 args.justcheck = args.addname = 0;
260 args.oknoent = 1; 223 args.oknoent = 1;
261 /* 224
262 * Decide on what work routines to call based on the inode size.
263 */
264 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 225 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
265 rval = xfs_dir2_sf_lookup(&args); 226 rval = xfs_dir2_sf_lookup(&args);
266 else if ((rval = xfs_dir2_isblock(tp, dp, &v))) { 227 else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
267 return rval; 228 return rval;
268 } else if (v) 229 else if (v)
269 rval = xfs_dir2_block_lookup(&args); 230 rval = xfs_dir2_block_lookup(&args);
270 else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) { 231 else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
271 return rval; 232 return rval;
272 } else if (v) 233 else if (v)
273 rval = xfs_dir2_leaf_lookup(&args); 234 rval = xfs_dir2_leaf_lookup(&args);
274 else 235 else
275 rval = xfs_dir2_node_lookup(&args); 236 rval = xfs_dir2_node_lookup(&args);
@@ -283,26 +244,24 @@ xfs_dir2_lookup(
283/* 244/*
284 * Remove an entry from a directory. 245 * Remove an entry from a directory.
285 */ 246 */
286static int /* error */ 247int
287xfs_dir2_removename( 248xfs_dir_removename(
288 xfs_trans_t *tp, /* transaction pointer */ 249 xfs_trans_t *tp,
289 xfs_inode_t *dp, /* incore directory inode */ 250 xfs_inode_t *dp,
290 char *name, /* name of entry to remove */ 251 char *name,
291 int namelen, /* name length of entry to remove */ 252 int namelen,
292 xfs_ino_t ino, /* inode number of entry to remove */ 253 xfs_ino_t ino,
293 xfs_fsblock_t *first, /* bmap's firstblock */ 254 xfs_fsblock_t *first, /* bmap's firstblock */
294 xfs_bmap_free_t *flist, /* bmap's freeblock list */ 255 xfs_bmap_free_t *flist, /* bmap's freeblock list */
295 xfs_extlen_t total) /* bmap's total block count */ 256 xfs_extlen_t total) /* bmap's total block count */
296{ 257{
297 xfs_da_args_t args; /* operation arguments */ 258 xfs_da_args_t args;
298 int rval; /* return value */ 259 int rval;
299 int v; /* type-checking value */ 260 int v; /* type-checking value */
300 261
301 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 262 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
302 XFS_STATS_INC(xs_dir_remove); 263 XFS_STATS_INC(xs_dir_remove);
303 /* 264
304 * Fill in the arg structure for this request.
305 */
306 args.name = name; 265 args.name = name;
307 args.namelen = namelen; 266 args.namelen = namelen;
308 args.hashval = xfs_da_hashname(name, namelen); 267 args.hashval = xfs_da_hashname(name, namelen);
@@ -314,18 +273,16 @@ xfs_dir2_removename(
314 args.whichfork = XFS_DATA_FORK; 273 args.whichfork = XFS_DATA_FORK;
315 args.trans = tp; 274 args.trans = tp;
316 args.justcheck = args.addname = args.oknoent = 0; 275 args.justcheck = args.addname = args.oknoent = 0;
317 /* 276
318 * Decide on what work routines to call based on the inode size.
319 */
320 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 277 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
321 rval = xfs_dir2_sf_removename(&args); 278 rval = xfs_dir2_sf_removename(&args);
322 else if ((rval = xfs_dir2_isblock(tp, dp, &v))) { 279 else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
323 return rval; 280 return rval;
324 } else if (v) 281 else if (v)
325 rval = xfs_dir2_block_removename(&args); 282 rval = xfs_dir2_block_removename(&args);
326 else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) { 283 else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
327 return rval; 284 return rval;
328 } else if (v) 285 else if (v)
329 rval = xfs_dir2_leaf_removename(&args); 286 rval = xfs_dir2_leaf_removename(&args);
330 else 287 else
331 rval = xfs_dir2_node_removename(&args); 288 rval = xfs_dir2_node_removename(&args);
@@ -335,10 +292,10 @@ xfs_dir2_removename(
335/* 292/*
336 * Read a directory. 293 * Read a directory.
337 */ 294 */
338static int /* error */ 295int
339xfs_dir2_getdents( 296xfs_dir_getdents(
340 xfs_trans_t *tp, /* transaction pointer */ 297 xfs_trans_t *tp,
341 xfs_inode_t *dp, /* incore directory inode */ 298 xfs_inode_t *dp,
342 uio_t *uio, /* caller's buffer control */ 299 uio_t *uio, /* caller's buffer control */
343 int *eofp) /* out: eof reached */ 300 int *eofp) /* out: eof reached */
344{ 301{
@@ -367,14 +324,11 @@ xfs_dir2_getdents(
367 } 324 }
368 325
369 *eofp = 0; 326 *eofp = 0;
370 /*
371 * Decide on what work routines to call based on the inode size.
372 */
373 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 327 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
374 rval = xfs_dir2_sf_getdents(dp, uio, eofp, dbp, put); 328 rval = xfs_dir2_sf_getdents(dp, uio, eofp, dbp, put);
375 else if ((rval = xfs_dir2_isblock(tp, dp, &v))) { 329 else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
376 ; 330 ;
377 } else if (v) 331 else if (v)
378 rval = xfs_dir2_block_getdents(tp, dp, uio, eofp, dbp, put); 332 rval = xfs_dir2_block_getdents(tp, dp, uio, eofp, dbp, put);
379 else 333 else
380 rval = xfs_dir2_leaf_getdents(tp, dp, uio, eofp, dbp, put); 334 rval = xfs_dir2_leaf_getdents(tp, dp, uio, eofp, dbp, put);
@@ -386,29 +340,26 @@ xfs_dir2_getdents(
386/* 340/*
387 * Replace the inode number of a directory entry. 341 * Replace the inode number of a directory entry.
388 */ 342 */
389static int /* error */ 343int
390xfs_dir2_replace( 344xfs_dir_replace(
391 xfs_trans_t *tp, /* transaction pointer */ 345 xfs_trans_t *tp,
392 xfs_inode_t *dp, /* incore directory inode */ 346 xfs_inode_t *dp,
393 char *name, /* name of entry to replace */ 347 char *name, /* name of entry to replace */
394 int namelen, /* name length of entry to replace */ 348 int namelen,
395 xfs_ino_t inum, /* new inode number */ 349 xfs_ino_t inum, /* new inode number */
396 xfs_fsblock_t *first, /* bmap's firstblock */ 350 xfs_fsblock_t *first, /* bmap's firstblock */
397 xfs_bmap_free_t *flist, /* bmap's freeblock list */ 351 xfs_bmap_free_t *flist, /* bmap's freeblock list */
398 xfs_extlen_t total) /* bmap's total block count */ 352 xfs_extlen_t total) /* bmap's total block count */
399{ 353{
400 xfs_da_args_t args; /* operation arguments */ 354 xfs_da_args_t args;
401 int rval; /* return value */ 355 int rval;
402 int v; /* type-checking value */ 356 int v; /* type-checking value */
403 357
404 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 358 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
405 359
406 if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) { 360 if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
407 return rval; 361 return rval;
408 } 362
409 /*
410 * Fill in the arg structure for this request.
411 */
412 args.name = name; 363 args.name = name;
413 args.namelen = namelen; 364 args.namelen = namelen;
414 args.hashval = xfs_da_hashname(name, namelen); 365 args.hashval = xfs_da_hashname(name, namelen);
@@ -420,18 +371,16 @@ xfs_dir2_replace(
420 args.whichfork = XFS_DATA_FORK; 371 args.whichfork = XFS_DATA_FORK;
421 args.trans = tp; 372 args.trans = tp;
422 args.justcheck = args.addname = args.oknoent = 0; 373 args.justcheck = args.addname = args.oknoent = 0;
423 /* 374
424 * Decide on what work routines to call based on the inode size.
425 */
426 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 375 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
427 rval = xfs_dir2_sf_replace(&args); 376 rval = xfs_dir2_sf_replace(&args);
428 else if ((rval = xfs_dir2_isblock(tp, dp, &v))) { 377 else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
429 return rval; 378 return rval;
430 } else if (v) 379 else if (v)
431 rval = xfs_dir2_block_replace(&args); 380 rval = xfs_dir2_block_replace(&args);
432 else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) { 381 else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
433 return rval; 382 return rval;
434 } else if (v) 383 else if (v)
435 rval = xfs_dir2_leaf_replace(&args); 384 rval = xfs_dir2_leaf_replace(&args);
436 else 385 else
437 rval = xfs_dir2_node_replace(&args); 386 rval = xfs_dir2_node_replace(&args);
@@ -441,21 +390,19 @@ xfs_dir2_replace(
441/* 390/*
442 * See if this entry can be added to the directory without allocating space. 391 * See if this entry can be added to the directory without allocating space.
443 */ 392 */
444static int /* error */ 393int
445xfs_dir2_canenter( 394xfs_dir_canenter(
446 xfs_trans_t *tp, /* transaction pointer */ 395 xfs_trans_t *tp,
447 xfs_inode_t *dp, /* incore directory inode */ 396 xfs_inode_t *dp,
448 char *name, /* name of entry to add */ 397 char *name, /* name of entry to add */
449 int namelen) /* name length of entry to add */ 398 int namelen)
450{ 399{
451 xfs_da_args_t args; /* operation arguments */ 400 xfs_da_args_t args;
452 int rval; /* return value */ 401 int rval;
453 int v; /* type-checking value */ 402 int v; /* type-checking value */
454 403
455 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 404 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
456 /* 405
457 * Fill in the arg structure for this request.
458 */
459 args.name = name; 406 args.name = name;
460 args.namelen = namelen; 407 args.namelen = namelen;
461 args.hashval = xfs_da_hashname(name, namelen); 408 args.hashval = xfs_da_hashname(name, namelen);
@@ -467,18 +414,16 @@ xfs_dir2_canenter(
467 args.whichfork = XFS_DATA_FORK; 414 args.whichfork = XFS_DATA_FORK;
468 args.trans = tp; 415 args.trans = tp;
469 args.justcheck = args.addname = args.oknoent = 1; 416 args.justcheck = args.addname = args.oknoent = 1;
470 /* 417
471 * Decide on what work routines to call based on the inode size.
472 */
473 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 418 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
474 rval = xfs_dir2_sf_addname(&args); 419 rval = xfs_dir2_sf_addname(&args);
475 else if ((rval = xfs_dir2_isblock(tp, dp, &v))) { 420 else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
476 return rval; 421 return rval;
477 } else if (v) 422 else if (v)
478 rval = xfs_dir2_block_addname(&args); 423 rval = xfs_dir2_block_addname(&args);
479 else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) { 424 else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
480 return rval; 425 return rval;
481 } else if (v) 426 else if (v)
482 rval = xfs_dir2_leaf_addname(&args); 427 rval = xfs_dir2_leaf_addname(&args);
483 else 428 else
484 rval = xfs_dir2_node_addname(&args); 429 rval = xfs_dir2_node_addname(&args);
@@ -486,19 +431,6 @@ xfs_dir2_canenter(
486} 431}
487 432
488/* 433/*
489 * Dummy routine for shortform inode validation.
490 * Can't really do this.
491 */
492/* ARGSUSED */
493static int /* error */
494xfs_dir2_shortform_validate_ondisk(
495 xfs_mount_t *mp, /* filesystem mount point */
496 xfs_dinode_t *dip) /* ondisk inode */
497{
498 return 0;
499}
500
501/*
502 * Utility routines. 434 * Utility routines.
503 */ 435 */
504 436
@@ -507,24 +439,24 @@ xfs_dir2_shortform_validate_ondisk(
507 * This routine is for data and free blocks, not leaf/node blocks 439 * This routine is for data and free blocks, not leaf/node blocks
508 * which are handled by xfs_da_grow_inode. 440 * which are handled by xfs_da_grow_inode.
509 */ 441 */
510int /* error */ 442int
511xfs_dir2_grow_inode( 443xfs_dir2_grow_inode(
512 xfs_da_args_t *args, /* operation arguments */ 444 xfs_da_args_t *args,
513 int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */ 445 int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */
514 xfs_dir2_db_t *dbp) /* out: block number added */ 446 xfs_dir2_db_t *dbp) /* out: block number added */
515{ 447{
516 xfs_fileoff_t bno; /* directory offset of new block */ 448 xfs_fileoff_t bno; /* directory offset of new block */
517 int count; /* count of filesystem blocks */ 449 int count; /* count of filesystem blocks */
518 xfs_inode_t *dp; /* incore directory inode */ 450 xfs_inode_t *dp; /* incore directory inode */
519 int error; /* error return value */ 451 int error;
520 int got; /* blocks actually mapped */ 452 int got; /* blocks actually mapped */
521 int i; /* temp mapping index */ 453 int i;
522 xfs_bmbt_irec_t map; /* single structure for bmap */ 454 xfs_bmbt_irec_t map; /* single structure for bmap */
523 int mapi; /* mapping index */ 455 int mapi; /* mapping index */
524 xfs_bmbt_irec_t *mapp; /* bmap mapping structure(s) */ 456 xfs_bmbt_irec_t *mapp; /* bmap mapping structure(s) */
525 xfs_mount_t *mp; /* filesystem mount point */ 457 xfs_mount_t *mp;
526 int nmap; /* number of bmap entries */ 458 int nmap; /* number of bmap entries */
527 xfs_trans_t *tp; /* transaction pointer */ 459 xfs_trans_t *tp;
528 460
529 xfs_dir2_trace_args_s("grow_inode", args, space); 461 xfs_dir2_trace_args_s("grow_inode", args, space);
530 dp = args->dp; 462 dp = args->dp;
@@ -538,9 +470,8 @@ xfs_dir2_grow_inode(
538 /* 470 /*
539 * Find the first hole for our block. 471 * Find the first hole for our block.
540 */ 472 */
541 if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, XFS_DATA_FORK))) { 473 if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, XFS_DATA_FORK)))
542 return error; 474 return error;
543 }
544 nmap = 1; 475 nmap = 1;
545 ASSERT(args->firstblock != NULL); 476 ASSERT(args->firstblock != NULL);
546 /* 477 /*
@@ -549,13 +480,9 @@ xfs_dir2_grow_inode(
549 if ((error = xfs_bmapi(tp, dp, bno, count, 480 if ((error = xfs_bmapi(tp, dp, bno, count,
550 XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, 481 XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
551 args->firstblock, args->total, &map, &nmap, 482 args->firstblock, args->total, &map, &nmap,
552 args->flist))) { 483 args->flist, NULL)))
553 return error; 484 return error;
554 }
555 ASSERT(nmap <= 1); 485 ASSERT(nmap <= 1);
556 /*
557 * Got it in 1.
558 */
559 if (nmap == 1) { 486 if (nmap == 1) {
560 mapp = &map; 487 mapp = &map;
561 mapi = 1; 488 mapi = 1;
@@ -585,7 +512,8 @@ xfs_dir2_grow_inode(
585 if ((error = xfs_bmapi(tp, dp, b, c, 512 if ((error = xfs_bmapi(tp, dp, b, c,
586 XFS_BMAPI_WRITE|XFS_BMAPI_METADATA, 513 XFS_BMAPI_WRITE|XFS_BMAPI_METADATA,
587 args->firstblock, args->total, 514 args->firstblock, args->total,
588 &mapp[mapi], &nmap, args->flist))) { 515 &mapp[mapi], &nmap, args->flist,
516 NULL))) {
589 kmem_free(mapp, sizeof(*mapp) * count); 517 kmem_free(mapp, sizeof(*mapp) * count);
590 return error; 518 return error;
591 } 519 }
@@ -645,20 +573,19 @@ xfs_dir2_grow_inode(
645/* 573/*
646 * See if the directory is a single-block form directory. 574 * See if the directory is a single-block form directory.
647 */ 575 */
648int /* error */ 576int
649xfs_dir2_isblock( 577xfs_dir2_isblock(
650 xfs_trans_t *tp, /* transaction pointer */ 578 xfs_trans_t *tp,
651 xfs_inode_t *dp, /* incore directory inode */ 579 xfs_inode_t *dp,
652 int *vp) /* out: 1 is block, 0 is not block */ 580 int *vp) /* out: 1 is block, 0 is not block */
653{ 581{
654 xfs_fileoff_t last; /* last file offset */ 582 xfs_fileoff_t last; /* last file offset */
655 xfs_mount_t *mp; /* filesystem mount point */ 583 xfs_mount_t *mp;
656 int rval; /* return value */ 584 int rval;
657 585
658 mp = dp->i_mount; 586 mp = dp->i_mount;
659 if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK))) { 587 if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK)))
660 return rval; 588 return rval;
661 }
662 rval = XFS_FSB_TO_B(mp, last) == mp->m_dirblksize; 589 rval = XFS_FSB_TO_B(mp, last) == mp->m_dirblksize;
663 ASSERT(rval == 0 || dp->i_d.di_size == mp->m_dirblksize); 590 ASSERT(rval == 0 || dp->i_d.di_size == mp->m_dirblksize);
664 *vp = rval; 591 *vp = rval;
@@ -668,20 +595,19 @@ xfs_dir2_isblock(
668/* 595/*
669 * See if the directory is a single-leaf form directory. 596 * See if the directory is a single-leaf form directory.
670 */ 597 */
671int /* error */ 598int
672xfs_dir2_isleaf( 599xfs_dir2_isleaf(
673 xfs_trans_t *tp, /* transaction pointer */ 600 xfs_trans_t *tp,
674 xfs_inode_t *dp, /* incore directory inode */ 601 xfs_inode_t *dp,
675 int *vp) /* out: 1 is leaf, 0 is not leaf */ 602 int *vp) /* out: 1 is leaf, 0 is not leaf */
676{ 603{
677 xfs_fileoff_t last; /* last file offset */ 604 xfs_fileoff_t last; /* last file offset */
678 xfs_mount_t *mp; /* filesystem mount point */ 605 xfs_mount_t *mp;
679 int rval; /* return value */ 606 int rval;
680 607
681 mp = dp->i_mount; 608 mp = dp->i_mount;
682 if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK))) { 609 if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK)))
683 return rval; 610 return rval;
684 }
685 *vp = last == mp->m_dirleafblk + (1 << mp->m_sb.sb_dirblklog); 611 *vp = last == mp->m_dirleafblk + (1 << mp->m_sb.sb_dirblklog);
686 return 0; 612 return 0;
687} 613}
@@ -689,9 +615,9 @@ xfs_dir2_isleaf(
689/* 615/*
690 * Getdents put routine for 64-bit ABI, direct form. 616 * Getdents put routine for 64-bit ABI, direct form.
691 */ 617 */
692static int /* error */ 618static int
693xfs_dir2_put_dirent64_direct( 619xfs_dir2_put_dirent64_direct(
694 xfs_dir2_put_args_t *pa) /* argument bundle */ 620 xfs_dir2_put_args_t *pa)
695{ 621{
696 xfs_dirent_t *idbp; /* dirent pointer */ 622 xfs_dirent_t *idbp; /* dirent pointer */
697 iovec_t *iovp; /* io vector */ 623 iovec_t *iovp; /* io vector */
@@ -726,9 +652,9 @@ xfs_dir2_put_dirent64_direct(
726/* 652/*
727 * Getdents put routine for 64-bit ABI, uio form. 653 * Getdents put routine for 64-bit ABI, uio form.
728 */ 654 */
729static int /* error */ 655static int
730xfs_dir2_put_dirent64_uio( 656xfs_dir2_put_dirent64_uio(
731 xfs_dir2_put_args_t *pa) /* argument bundle */ 657 xfs_dir2_put_args_t *pa)
732{ 658{
733 xfs_dirent_t *idbp; /* dirent pointer */ 659 xfs_dirent_t *idbp; /* dirent pointer */
734 int namelen; /* entry name length */ 660 int namelen; /* entry name length */
@@ -764,17 +690,17 @@ xfs_dir2_put_dirent64_uio(
764 */ 690 */
765int 691int
766xfs_dir2_shrink_inode( 692xfs_dir2_shrink_inode(
767 xfs_da_args_t *args, /* operation arguments */ 693 xfs_da_args_t *args,
768 xfs_dir2_db_t db, /* directory block number */ 694 xfs_dir2_db_t db,
769 xfs_dabuf_t *bp) /* block's buffer */ 695 xfs_dabuf_t *bp)
770{ 696{
771 xfs_fileoff_t bno; /* directory file offset */ 697 xfs_fileoff_t bno; /* directory file offset */
772 xfs_dablk_t da; /* directory file offset */ 698 xfs_dablk_t da; /* directory file offset */
773 int done; /* bunmap is finished */ 699 int done; /* bunmap is finished */
774 xfs_inode_t *dp; /* incore directory inode */ 700 xfs_inode_t *dp;
775 int error; /* error return value */ 701 int error;
776 xfs_mount_t *mp; /* filesystem mount point */ 702 xfs_mount_t *mp;
777 xfs_trans_t *tp; /* transaction pointer */ 703 xfs_trans_t *tp;
778 704
779 xfs_dir2_trace_args_db("shrink_inode", args, db, bp); 705 xfs_dir2_trace_args_db("shrink_inode", args, db, bp);
780 dp = args->dp; 706 dp = args->dp;
@@ -786,7 +712,7 @@ xfs_dir2_shrink_inode(
786 */ 712 */
787 if ((error = xfs_bunmapi(tp, dp, da, mp->m_dirblkfsbs, 713 if ((error = xfs_bunmapi(tp, dp, da, mp->m_dirblkfsbs,
788 XFS_BMAPI_METADATA, 0, args->firstblock, args->flist, 714 XFS_BMAPI_METADATA, 0, args->firstblock, args->flist,
789 &done))) { 715 NULL, &done))) {
790 /* 716 /*
791 * ENOSPC actually can happen if we're in a removename with 717 * ENOSPC actually can happen if we're in a removename with
792 * no space reservation, and the resulting block removal 718 * no space reservation, and the resulting block removal
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 7dd364b1e038..86560b6f794c 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -22,7 +22,9 @@ struct uio;
22struct xfs_dabuf; 22struct xfs_dabuf;
23struct xfs_da_args; 23struct xfs_da_args;
24struct xfs_dir2_put_args; 24struct xfs_dir2_put_args;
25struct xfs_bmap_free;
25struct xfs_inode; 26struct xfs_inode;
27struct xfs_mount;
26struct xfs_trans; 28struct xfs_trans;
27 29
28/* 30/*
@@ -73,7 +75,35 @@ typedef struct xfs_dir2_put_args {
73} xfs_dir2_put_args_t; 75} xfs_dir2_put_args_t;
74 76
75/* 77/*
76 * Other interfaces used by the rest of the dir v2 code. 78 * Generic directory interface routines
79 */
80extern void xfs_dir_startup(void);
81extern void xfs_dir_mount(struct xfs_mount *mp);
82extern int xfs_dir_isempty(struct xfs_inode *dp);
83extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp,
84 struct xfs_inode *pdp);
85extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp,
86 char *name, int namelen, xfs_ino_t inum,
87 xfs_fsblock_t *first,
88 struct xfs_bmap_free *flist, xfs_extlen_t tot);
89extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
90 char *name, int namelen, xfs_ino_t *inum);
91extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
92 char *name, int namelen, xfs_ino_t ino,
93 xfs_fsblock_t *first,
94 struct xfs_bmap_free *flist, xfs_extlen_t tot);
95extern int xfs_dir_getdents(struct xfs_trans *tp, struct xfs_inode *dp,
96 uio_t *uio, int *eofp);
97extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
98 char *name, int namelen, xfs_ino_t inum,
99 xfs_fsblock_t *first,
100 struct xfs_bmap_free *flist, xfs_extlen_t tot);
101extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
102 char *name, int namelen);
103extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
104
105/*
106 * Utility routines for v2 directories.
77 */ 107 */
78extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, 108extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
79 xfs_dir2_db_t *dbp); 109 xfs_dir2_db_t *dbp);
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 972ded595476..9d7438bba30d 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -22,19 +22,16 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_dir.h"
26#include "xfs_dir2.h" 25#include "xfs_dir2.h"
27#include "xfs_dmapi.h" 26#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_dir_sf.h"
32#include "xfs_dir2_sf.h" 30#include "xfs_dir2_sf.h"
33#include "xfs_attr_sf.h" 31#include "xfs_attr_sf.h"
34#include "xfs_dinode.h" 32#include "xfs_dinode.h"
35#include "xfs_inode.h" 33#include "xfs_inode.h"
36#include "xfs_inode_item.h" 34#include "xfs_inode_item.h"
37#include "xfs_dir_leaf.h"
38#include "xfs_dir2_data.h" 35#include "xfs_dir2_data.h"
39#include "xfs_dir2_leaf.h" 36#include "xfs_dir2_leaf.h"
40#include "xfs_dir2_block.h" 37#include "xfs_dir2_block.h"
@@ -51,6 +48,18 @@ static int xfs_dir2_block_lookup_int(xfs_da_args_t *args, xfs_dabuf_t **bpp,
51 int *entno); 48 int *entno);
52static int xfs_dir2_block_sort(const void *a, const void *b); 49static int xfs_dir2_block_sort(const void *a, const void *b);
53 50
51static xfs_dahash_t xfs_dir_hash_dot, xfs_dir_hash_dotdot;
52
53/*
54 * One-time startup routine called from xfs_init().
55 */
56void
57xfs_dir_startup(void)
58{
59 xfs_dir_hash_dot = xfs_da_hashname(".", 1);
60 xfs_dir_hash_dotdot = xfs_da_hashname("..", 2);
61}
62
54/* 63/*
55 * Add an entry to a block directory. 64 * Add an entry to a block directory.
56 */ 65 */
@@ -400,7 +409,7 @@ xfs_dir2_block_addname(
400 /* 409 /*
401 * Create the new data entry. 410 * Create the new data entry.
402 */ 411 */
403 INT_SET(dep->inumber, ARCH_CONVERT, args->inumber); 412 dep->inumber = cpu_to_be64(args->inumber);
404 dep->namelen = args->namelen; 413 dep->namelen = args->namelen;
405 memcpy(dep->name, args->name, args->namelen); 414 memcpy(dep->name, args->name, args->namelen);
406 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); 415 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
@@ -508,7 +517,7 @@ xfs_dir2_block_getdents(
508 517
509 p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, 518 p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
510 ptr - (char *)block); 519 ptr - (char *)block);
511 p.ino = INT_GET(dep->inumber, ARCH_CONVERT); 520 p.ino = be64_to_cpu(dep->inumber);
512#if XFS_BIG_INUMS 521#if XFS_BIG_INUMS
513 p.ino += mp->m_inoadd; 522 p.ino += mp->m_inoadd;
514#endif 523#endif
@@ -626,7 +635,7 @@ xfs_dir2_block_lookup(
626 /* 635 /*
627 * Fill in inode number, release the block. 636 * Fill in inode number, release the block.
628 */ 637 */
629 args->inumber = INT_GET(dep->inumber, ARCH_CONVERT); 638 args->inumber = be64_to_cpu(dep->inumber);
630 xfs_da_brelse(args->trans, bp); 639 xfs_da_brelse(args->trans, bp);
631 return XFS_ERROR(EEXIST); 640 return XFS_ERROR(EEXIST);
632} 641}
@@ -844,11 +853,11 @@ xfs_dir2_block_replace(
844 */ 853 */
845 dep = (xfs_dir2_data_entry_t *) 854 dep = (xfs_dir2_data_entry_t *)
846 ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address))); 855 ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address)));
847 ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) != args->inumber); 856 ASSERT(be64_to_cpu(dep->inumber) != args->inumber);
848 /* 857 /*
849 * Change the inode number to the new value. 858 * Change the inode number to the new value.
850 */ 859 */
851 INT_SET(dep->inumber, ARCH_CONVERT, args->inumber); 860 dep->inumber = cpu_to_be64(args->inumber);
852 xfs_dir2_data_log_entry(args->trans, bp, dep); 861 xfs_dir2_data_log_entry(args->trans, bp, dep);
853 xfs_dir2_data_check(dp, bp); 862 xfs_dir2_data_check(dp, bp);
854 xfs_da_buf_done(bp); 863 xfs_da_buf_done(bp);
@@ -1130,7 +1139,7 @@ xfs_dir2_sf_to_block(
1130 */ 1139 */
1131 dep = (xfs_dir2_data_entry_t *) 1140 dep = (xfs_dir2_data_entry_t *)
1132 ((char *)block + XFS_DIR2_DATA_DOT_OFFSET); 1141 ((char *)block + XFS_DIR2_DATA_DOT_OFFSET);
1133 INT_SET(dep->inumber, ARCH_CONVERT, dp->i_ino); 1142 dep->inumber = cpu_to_be64(dp->i_ino);
1134 dep->namelen = 1; 1143 dep->namelen = 1;
1135 dep->name[0] = '.'; 1144 dep->name[0] = '.';
1136 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); 1145 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
@@ -1144,7 +1153,7 @@ xfs_dir2_sf_to_block(
1144 */ 1153 */
1145 dep = (xfs_dir2_data_entry_t *) 1154 dep = (xfs_dir2_data_entry_t *)
1146 ((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET); 1155 ((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET);
1147 INT_SET(dep->inumber, ARCH_CONVERT, XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent)); 1156 dep->inumber = cpu_to_be64(XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent));
1148 dep->namelen = 2; 1157 dep->namelen = 2;
1149 dep->name[0] = dep->name[1] = '.'; 1158 dep->name[0] = dep->name[1] = '.';
1150 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); 1159 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
@@ -1193,7 +1202,7 @@ xfs_dir2_sf_to_block(
1193 * Copy a real entry. 1202 * Copy a real entry.
1194 */ 1203 */
1195 dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset); 1204 dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset);
1196 INT_SET(dep->inumber, ARCH_CONVERT, XFS_DIR2_SF_GET_INUMBER(sfp, 1205 dep->inumber = cpu_to_be64(XFS_DIR2_SF_GET_INUMBER(sfp,
1197 XFS_DIR2_SF_INUMBERP(sfep))); 1206 XFS_DIR2_SF_INUMBERP(sfep)));
1198 dep->namelen = sfep->namelen; 1207 dep->namelen = sfep->namelen;
1199 memcpy(dep->name, sfep->name, dep->namelen); 1208 memcpy(dep->name, sfep->name, dep->namelen);
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index bb3d03ff002b..f7c799217072 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -22,18 +22,15 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_dir.h"
26#include "xfs_dir2.h" 25#include "xfs_dir2.h"
27#include "xfs_dmapi.h" 26#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_dir_sf.h"
32#include "xfs_dir2_sf.h" 30#include "xfs_dir2_sf.h"
33#include "xfs_attr_sf.h" 31#include "xfs_attr_sf.h"
34#include "xfs_dinode.h" 32#include "xfs_dinode.h"
35#include "xfs_inode.h" 33#include "xfs_inode.h"
36#include "xfs_dir_leaf.h"
37#include "xfs_dir2_data.h" 34#include "xfs_dir2_data.h"
38#include "xfs_dir2_leaf.h" 35#include "xfs_dir2_leaf.h"
39#include "xfs_dir2_block.h" 36#include "xfs_dir2_block.h"
@@ -133,7 +130,7 @@ xfs_dir2_data_check(
133 */ 130 */
134 dep = (xfs_dir2_data_entry_t *)p; 131 dep = (xfs_dir2_data_entry_t *)p;
135 ASSERT(dep->namelen != 0); 132 ASSERT(dep->namelen != 0);
136 ASSERT(xfs_dir_ino_validate(mp, INT_GET(dep->inumber, ARCH_CONVERT)) == 0); 133 ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0);
137 ASSERT(be16_to_cpu(*XFS_DIR2_DATA_ENTRY_TAG_P(dep)) == 134 ASSERT(be16_to_cpu(*XFS_DIR2_DATA_ENTRY_TAG_P(dep)) ==
138 (char *)dep - (char *)d); 135 (char *)dep - (char *)d);
139 count++; 136 count++;
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
index 0847cbb53e17..a6ae2d21c40a 100644
--- a/fs/xfs/xfs_dir2_data.h
+++ b/fs/xfs/xfs_dir2_data.h
@@ -85,11 +85,11 @@ typedef struct xfs_dir2_data_hdr {
85 * Tag appears as the last 2 bytes. 85 * Tag appears as the last 2 bytes.
86 */ 86 */
87typedef struct xfs_dir2_data_entry { 87typedef struct xfs_dir2_data_entry {
88 xfs_ino_t inumber; /* inode number */ 88 __be64 inumber; /* inode number */
89 __uint8_t namelen; /* name length */ 89 __u8 namelen; /* name length */
90 __uint8_t name[1]; /* name bytes, no null */ 90 __u8 name[1]; /* name bytes, no null */
91 /* variable offset */ 91 /* variable offset */
92 xfs_dir2_data_off_t tag; /* starting offset of us */ 92 __be16 tag; /* starting offset of us */
93} xfs_dir2_data_entry_t; 93} xfs_dir2_data_entry_t;
94 94
95/* 95/*
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 0f5e2f2ce6ec..b1cf1fbf423d 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -24,14 +24,12 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_da_btree.h" 30#include "xfs_da_btree.h"
32#include "xfs_bmap_btree.h" 31#include "xfs_bmap_btree.h"
33#include "xfs_attr_sf.h" 32#include "xfs_attr_sf.h"
34#include "xfs_dir_sf.h"
35#include "xfs_dir2_sf.h" 33#include "xfs_dir2_sf.h"
36#include "xfs_dinode.h" 34#include "xfs_dinode.h"
37#include "xfs_inode.h" 35#include "xfs_inode.h"
@@ -407,7 +405,7 @@ xfs_dir2_leaf_addname(
407 * Initialize our new entry (at last). 405 * Initialize our new entry (at last).
408 */ 406 */
409 dep = (xfs_dir2_data_entry_t *)dup; 407 dep = (xfs_dir2_data_entry_t *)dup;
410 INT_SET(dep->inumber, ARCH_CONVERT, args->inumber); 408 dep->inumber = cpu_to_be64(args->inumber);
411 dep->namelen = args->namelen; 409 dep->namelen = args->namelen;
412 memcpy(dep->name, args->name, dep->namelen); 410 memcpy(dep->name, args->name, dep->namelen);
413 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); 411 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
@@ -884,7 +882,7 @@ xfs_dir2_leaf_getdents(
884 XFS_DIR2_BYTE_TO_DA(mp, 882 XFS_DIR2_BYTE_TO_DA(mp,
885 XFS_DIR2_LEAF_OFFSET) - map_off, 883 XFS_DIR2_LEAF_OFFSET) - map_off,
886 XFS_BMAPI_METADATA, NULL, 0, 884 XFS_BMAPI_METADATA, NULL, 0,
887 &map[map_valid], &nmap, NULL); 885 &map[map_valid], &nmap, NULL, NULL);
888 /* 886 /*
889 * Don't know if we should ignore this or 887 * Don't know if we should ignore this or
890 * try to return an error. 888 * try to return an error.
@@ -1098,7 +1096,7 @@ xfs_dir2_leaf_getdents(
1098 1096
1099 p->cook = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff + length); 1097 p->cook = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff + length);
1100 1098
1101 p->ino = INT_GET(dep->inumber, ARCH_CONVERT); 1099 p->ino = be64_to_cpu(dep->inumber);
1102#if XFS_BIG_INUMS 1100#if XFS_BIG_INUMS
1103 p->ino += mp->m_inoadd; 1101 p->ino += mp->m_inoadd;
1104#endif 1102#endif
@@ -1319,7 +1317,7 @@ xfs_dir2_leaf_lookup(
1319 /* 1317 /*
1320 * Return the found inode number. 1318 * Return the found inode number.
1321 */ 1319 */
1322 args->inumber = INT_GET(dep->inumber, ARCH_CONVERT); 1320 args->inumber = be64_to_cpu(dep->inumber);
1323 xfs_da_brelse(tp, dbp); 1321 xfs_da_brelse(tp, dbp);
1324 xfs_da_brelse(tp, lbp); 1322 xfs_da_brelse(tp, lbp);
1325 return XFS_ERROR(EEXIST); 1323 return XFS_ERROR(EEXIST);
@@ -1606,11 +1604,11 @@ xfs_dir2_leaf_replace(
1606 dep = (xfs_dir2_data_entry_t *) 1604 dep = (xfs_dir2_data_entry_t *)
1607 ((char *)dbp->data + 1605 ((char *)dbp->data +
1608 XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, be32_to_cpu(lep->address))); 1606 XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, be32_to_cpu(lep->address)));
1609 ASSERT(args->inumber != INT_GET(dep->inumber, ARCH_CONVERT)); 1607 ASSERT(args->inumber != be64_to_cpu(dep->inumber));
1610 /* 1608 /*
1611 * Put the new inode number in, log it. 1609 * Put the new inode number in, log it.
1612 */ 1610 */
1613 INT_SET(dep->inumber, ARCH_CONVERT, args->inumber); 1611 dep->inumber = cpu_to_be64(args->inumber);
1614 tp = args->trans; 1612 tp = args->trans;
1615 xfs_dir2_data_log_entry(tp, dbp, dep); 1613 xfs_dir2_data_log_entry(tp, dbp, dep);
1616 xfs_da_buf_done(dbp); 1614 xfs_da_buf_done(dbp);
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index ac511ab9c52d..9ca71719b683 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -22,13 +22,11 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_dir.h"
26#include "xfs_dir2.h" 25#include "xfs_dir2.h"
27#include "xfs_dmapi.h" 26#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_dir_sf.h"
32#include "xfs_dir2_sf.h" 30#include "xfs_dir2_sf.h"
33#include "xfs_attr_sf.h" 31#include "xfs_attr_sf.h"
34#include "xfs_dinode.h" 32#include "xfs_dinode.h"
@@ -505,7 +503,6 @@ xfs_dir2_leafn_lookup_int(
505 XFS_DATA_FORK))) { 503 XFS_DATA_FORK))) {
506 return error; 504 return error;
507 } 505 }
508 curfdb = newfdb;
509 free = curbp->data; 506 free = curbp->data;
510 ASSERT(be32_to_cpu(free->hdr.magic) == 507 ASSERT(be32_to_cpu(free->hdr.magic) ==
511 XFS_DIR2_FREE_MAGIC); 508 XFS_DIR2_FREE_MAGIC);
@@ -527,8 +524,11 @@ xfs_dir2_leafn_lookup_int(
527 if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) { 524 if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
528 XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int", 525 XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
529 XFS_ERRLEVEL_LOW, mp); 526 XFS_ERRLEVEL_LOW, mp);
527 if (curfdb != newfdb)
528 xfs_da_brelse(tp, curbp);
530 return XFS_ERROR(EFSCORRUPTED); 529 return XFS_ERROR(EFSCORRUPTED);
531 } 530 }
531 curfdb = newfdb;
532 if (be16_to_cpu(free->bests[fi]) >= length) { 532 if (be16_to_cpu(free->bests[fi]) >= length) {
533 *indexp = index; 533 *indexp = index;
534 state->extravalid = 1; 534 state->extravalid = 1;
@@ -580,7 +580,7 @@ xfs_dir2_leafn_lookup_int(
580 if (dep->namelen == args->namelen && 580 if (dep->namelen == args->namelen &&
581 dep->name[0] == args->name[0] && 581 dep->name[0] == args->name[0] &&
582 memcmp(dep->name, args->name, args->namelen) == 0) { 582 memcmp(dep->name, args->name, args->namelen) == 0) {
583 args->inumber = INT_GET(dep->inumber, ARCH_CONVERT); 583 args->inumber = be64_to_cpu(dep->inumber);
584 *indexp = index; 584 *indexp = index;
585 state->extravalid = 1; 585 state->extravalid = 1;
586 state->extrablk.bp = curbp; 586 state->extrablk.bp = curbp;
@@ -970,7 +970,7 @@ xfs_dir2_leafn_remove(
970 /* 970 /*
971 * One less used entry in the free table. 971 * One less used entry in the free table.
972 */ 972 */
973 free->hdr.nused = cpu_to_be32(-1); 973 be32_add(&free->hdr.nused, -1);
974 xfs_dir2_free_log_header(tp, fbp); 974 xfs_dir2_free_log_header(tp, fbp);
975 /* 975 /*
976 * If this was the last entry in the table, we can 976 * If this was the last entry in the table, we can
@@ -1695,7 +1695,7 @@ xfs_dir2_node_addname_int(
1695 * Fill in the new entry and log it. 1695 * Fill in the new entry and log it.
1696 */ 1696 */
1697 dep = (xfs_dir2_data_entry_t *)dup; 1697 dep = (xfs_dir2_data_entry_t *)dup;
1698 INT_SET(dep->inumber, ARCH_CONVERT, args->inumber); 1698 dep->inumber = cpu_to_be64(args->inumber);
1699 dep->namelen = args->namelen; 1699 dep->namelen = args->namelen;
1700 memcpy(dep->name, args->name, dep->namelen); 1700 memcpy(dep->name, args->name, dep->namelen);
1701 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); 1701 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
@@ -1905,11 +1905,11 @@ xfs_dir2_node_replace(
1905 dep = (xfs_dir2_data_entry_t *) 1905 dep = (xfs_dir2_data_entry_t *)
1906 ((char *)data + 1906 ((char *)data +
1907 XFS_DIR2_DATAPTR_TO_OFF(state->mp, be32_to_cpu(lep->address))); 1907 XFS_DIR2_DATAPTR_TO_OFF(state->mp, be32_to_cpu(lep->address)));
1908 ASSERT(inum != INT_GET(dep->inumber, ARCH_CONVERT)); 1908 ASSERT(inum != be64_to_cpu(dep->inumber));
1909 /* 1909 /*
1910 * Fill in the new inode number and log the entry. 1910 * Fill in the new inode number and log the entry.
1911 */ 1911 */
1912 INT_SET(dep->inumber, ARCH_CONVERT, inum); 1912 dep->inumber = cpu_to_be64(inum);
1913 xfs_dir2_data_log_entry(args->trans, state->extrablk.bp, dep); 1913 xfs_dir2_data_log_entry(args->trans, state->extrablk.bp, dep);
1914 rval = 0; 1914 rval = 0;
1915 } 1915 }
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index d98a41d1fe63..0cd77b17bf92 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -22,19 +22,16 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_dir.h"
26#include "xfs_dir2.h" 25#include "xfs_dir2.h"
27#include "xfs_dmapi.h" 26#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_dir_sf.h"
32#include "xfs_dir2_sf.h" 30#include "xfs_dir2_sf.h"
33#include "xfs_attr_sf.h" 31#include "xfs_attr_sf.h"
34#include "xfs_dinode.h" 32#include "xfs_dinode.h"
35#include "xfs_inode.h" 33#include "xfs_inode.h"
36#include "xfs_inode_item.h" 34#include "xfs_inode_item.h"
37#include "xfs_dir_leaf.h"
38#include "xfs_error.h" 35#include "xfs_error.h"
39#include "xfs_dir2_data.h" 36#include "xfs_dir2_data.h"
40#include "xfs_dir2_leaf.h" 37#include "xfs_dir2_leaf.h"
@@ -117,13 +114,13 @@ xfs_dir2_block_sfsize(
117 dep->name[0] == '.' && dep->name[1] == '.'; 114 dep->name[0] == '.' && dep->name[1] == '.';
118#if XFS_BIG_INUMS 115#if XFS_BIG_INUMS
119 if (!isdot) 116 if (!isdot)
120 i8count += INT_GET(dep->inumber, ARCH_CONVERT) > XFS_DIR2_MAX_SHORT_INUM; 117 i8count += be64_to_cpu(dep->inumber) > XFS_DIR2_MAX_SHORT_INUM;
121#endif 118#endif
122 if (!isdot && !isdotdot) { 119 if (!isdot && !isdotdot) {
123 count++; 120 count++;
124 namelen += dep->namelen; 121 namelen += dep->namelen;
125 } else if (isdotdot) 122 } else if (isdotdot)
126 parent = INT_GET(dep->inumber, ARCH_CONVERT); 123 parent = be64_to_cpu(dep->inumber);
127 /* 124 /*
128 * Calculate the new size, see if we should give up yet. 125 * Calculate the new size, see if we should give up yet.
129 */ 126 */
@@ -229,13 +226,13 @@ xfs_dir2_block_to_sf(
229 * Skip . 226 * Skip .
230 */ 227 */
231 if (dep->namelen == 1 && dep->name[0] == '.') 228 if (dep->namelen == 1 && dep->name[0] == '.')
232 ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) == dp->i_ino); 229 ASSERT(be64_to_cpu(dep->inumber) == dp->i_ino);
233 /* 230 /*
234 * Skip .., but make sure the inode number is right. 231 * Skip .., but make sure the inode number is right.
235 */ 232 */
236 else if (dep->namelen == 2 && 233 else if (dep->namelen == 2 &&
237 dep->name[0] == '.' && dep->name[1] == '.') 234 dep->name[0] == '.' && dep->name[1] == '.')
238 ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) == 235 ASSERT(be64_to_cpu(dep->inumber) ==
239 XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent)); 236 XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent));
240 /* 237 /*
241 * Normal entry, copy it into shortform. 238 * Normal entry, copy it into shortform.
@@ -246,7 +243,7 @@ xfs_dir2_block_to_sf(
246 (xfs_dir2_data_aoff_t) 243 (xfs_dir2_data_aoff_t)
247 ((char *)dep - (char *)block)); 244 ((char *)dep - (char *)block));
248 memcpy(sfep->name, dep->name, dep->namelen); 245 memcpy(sfep->name, dep->name, dep->namelen);
249 temp=INT_GET(dep->inumber, ARCH_CONVERT); 246 temp = be64_to_cpu(dep->inumber);
250 XFS_DIR2_SF_PUT_INUMBER(sfp, &temp, 247 XFS_DIR2_SF_PUT_INUMBER(sfp, &temp,
251 XFS_DIR2_SF_INUMBERP(sfep)); 248 XFS_DIR2_SF_INUMBERP(sfep));
252 sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep); 249 sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
diff --git a/fs/xfs/xfs_dir2_trace.c b/fs/xfs/xfs_dir2_trace.c
index c626943b4112..f3fb2ffd6f5c 100644
--- a/fs/xfs/xfs_dir2_trace.c
+++ b/fs/xfs/xfs_dir2_trace.c
@@ -19,11 +19,9 @@
19#include "xfs_fs.h" 19#include "xfs_fs.h"
20#include "xfs_types.h" 20#include "xfs_types.h"
21#include "xfs_inum.h" 21#include "xfs_inum.h"
22#include "xfs_dir.h"
23#include "xfs_dir2.h" 22#include "xfs_dir2.h"
24#include "xfs_da_btree.h" 23#include "xfs_da_btree.h"
25#include "xfs_bmap_btree.h" 24#include "xfs_bmap_btree.h"
26#include "xfs_dir_sf.h"
27#include "xfs_dir2_sf.h" 25#include "xfs_dir2_sf.h"
28#include "xfs_attr_sf.h" 26#include "xfs_attr_sf.h"
29#include "xfs_dinode.h" 27#include "xfs_dinode.h"
diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c
deleted file mode 100644
index 6d711869262f..000000000000
--- a/fs/xfs/xfs_dir_leaf.c
+++ /dev/null
@@ -1,2213 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_log.h"
22#include "xfs_inum.h"
23#include "xfs_trans.h"
24#include "xfs_sb.h"
25#include "xfs_dir.h"
26#include "xfs_dir2.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h"
29#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h"
33#include "xfs_dir_sf.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h"
37#include "xfs_inode.h"
38#include "xfs_inode_item.h"
39#include "xfs_alloc.h"
40#include "xfs_btree.h"
41#include "xfs_bmap.h"
42#include "xfs_dir_leaf.h"
43#include "xfs_error.h"
44
45/*
46 * xfs_dir_leaf.c
47 *
48 * Routines to implement leaf blocks of directories as Btrees of hashed names.
49 */
50
51/*========================================================================
52 * Function prototypes for the kernel.
53 *========================================================================*/
54
55/*
56 * Routines used for growing the Btree.
57 */
58STATIC void xfs_dir_leaf_add_work(xfs_dabuf_t *leaf_buffer, xfs_da_args_t *args,
59 int insertion_index,
60 int freemap_index);
61STATIC int xfs_dir_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *leaf_buffer,
62 int musthave, int justcheck);
63STATIC void xfs_dir_leaf_rebalance(xfs_da_state_t *state,
64 xfs_da_state_blk_t *blk1,
65 xfs_da_state_blk_t *blk2);
66STATIC int xfs_dir_leaf_figure_balance(xfs_da_state_t *state,
67 xfs_da_state_blk_t *leaf_blk_1,
68 xfs_da_state_blk_t *leaf_blk_2,
69 int *number_entries_in_blk1,
70 int *number_namebytes_in_blk1);
71
72STATIC int xfs_dir_leaf_create(struct xfs_da_args *args,
73 xfs_dablk_t which_block,
74 struct xfs_dabuf **bpp);
75
76/*
77 * Utility routines.
78 */
79STATIC void xfs_dir_leaf_moveents(xfs_dir_leafblock_t *src_leaf,
80 int src_start,
81 xfs_dir_leafblock_t *dst_leaf,
82 int dst_start, int move_count,
83 xfs_mount_t *mp);
84
85
86/*========================================================================
87 * External routines when dirsize < XFS_IFORK_DSIZE(dp).
88 *========================================================================*/
89
90
91/*
92 * Validate a given inode number.
93 */
94int
95xfs_dir_ino_validate(xfs_mount_t *mp, xfs_ino_t ino)
96{
97 xfs_agblock_t agblkno;
98 xfs_agino_t agino;
99 xfs_agnumber_t agno;
100 int ino_ok;
101 int ioff;
102
103 agno = XFS_INO_TO_AGNO(mp, ino);
104 agblkno = XFS_INO_TO_AGBNO(mp, ino);
105 ioff = XFS_INO_TO_OFFSET(mp, ino);
106 agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff);
107 ino_ok =
108 agno < mp->m_sb.sb_agcount &&
109 agblkno < mp->m_sb.sb_agblocks &&
110 agblkno != 0 &&
111 ioff < (1 << mp->m_sb.sb_inopblog) &&
112 XFS_AGINO_TO_INO(mp, agno, agino) == ino;
113 if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE,
114 XFS_RANDOM_DIR_INO_VALIDATE))) {
115 xfs_fs_cmn_err(CE_WARN, mp, "Invalid inode number 0x%Lx",
116 (unsigned long long) ino);
117 XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp);
118 return XFS_ERROR(EFSCORRUPTED);
119 }
120 return 0;
121}
122
123/*
124 * Create the initial contents of a shortform directory.
125 */
126int
127xfs_dir_shortform_create(xfs_da_args_t *args, xfs_ino_t parent)
128{
129 xfs_dir_sf_hdr_t *hdr;
130 xfs_inode_t *dp;
131
132 dp = args->dp;
133 ASSERT(dp != NULL);
134 ASSERT(dp->i_d.di_size == 0);
135 if (dp->i_d.di_format == XFS_DINODE_FMT_EXTENTS) {
136 dp->i_df.if_flags &= ~XFS_IFEXTENTS; /* just in case */
137 dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
138 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
139 dp->i_df.if_flags |= XFS_IFINLINE;
140 }
141 ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
142 ASSERT(dp->i_df.if_bytes == 0);
143 xfs_idata_realloc(dp, sizeof(*hdr), XFS_DATA_FORK);
144 hdr = (xfs_dir_sf_hdr_t *)dp->i_df.if_u1.if_data;
145 XFS_DIR_SF_PUT_DIRINO(&parent, &hdr->parent);
146
147 hdr->count = 0;
148 dp->i_d.di_size = sizeof(*hdr);
149 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
150 return 0;
151}
152
153/*
154 * Add a name to the shortform directory structure.
155 * Overflow from the inode has already been checked for.
156 */
157int
158xfs_dir_shortform_addname(xfs_da_args_t *args)
159{
160 xfs_dir_shortform_t *sf;
161 xfs_dir_sf_entry_t *sfe;
162 int i, offset, size;
163 xfs_inode_t *dp;
164
165 dp = args->dp;
166 ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
167 /*
168 * Catch the case where the conversion from shortform to leaf
169 * failed part way through.
170 */
171 if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
172 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
173 return XFS_ERROR(EIO);
174 }
175 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
176 ASSERT(dp->i_df.if_u1.if_data != NULL);
177 sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
178 sfe = &sf->list[0];
179 for (i = sf->hdr.count-1; i >= 0; i--) {
180 if (sfe->namelen == args->namelen &&
181 args->name[0] == sfe->name[0] &&
182 memcmp(args->name, sfe->name, args->namelen) == 0)
183 return XFS_ERROR(EEXIST);
184 sfe = XFS_DIR_SF_NEXTENTRY(sfe);
185 }
186
187 offset = (int)((char *)sfe - (char *)sf);
188 size = XFS_DIR_SF_ENTSIZE_BYNAME(args->namelen);
189 xfs_idata_realloc(dp, size, XFS_DATA_FORK);
190 sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
191 sfe = (xfs_dir_sf_entry_t *)((char *)sf + offset);
192
193 XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sfe->inumber);
194 sfe->namelen = args->namelen;
195 memcpy(sfe->name, args->name, sfe->namelen);
196 sf->hdr.count++;
197
198 dp->i_d.di_size += size;
199 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
200
201 return 0;
202}
203
204/*
205 * Remove a name from the shortform directory structure.
206 */
207int
208xfs_dir_shortform_removename(xfs_da_args_t *args)
209{
210 xfs_dir_shortform_t *sf;
211 xfs_dir_sf_entry_t *sfe;
212 int base, size = 0, i;
213 xfs_inode_t *dp;
214
215 dp = args->dp;
216 ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
217 /*
218 * Catch the case where the conversion from shortform to leaf
219 * failed part way through.
220 */
221 if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
222 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
223 return XFS_ERROR(EIO);
224 }
225 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
226 ASSERT(dp->i_df.if_u1.if_data != NULL);
227 base = sizeof(xfs_dir_sf_hdr_t);
228 sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
229 sfe = &sf->list[0];
230 for (i = sf->hdr.count-1; i >= 0; i--) {
231 size = XFS_DIR_SF_ENTSIZE_BYENTRY(sfe);
232 if (sfe->namelen == args->namelen &&
233 sfe->name[0] == args->name[0] &&
234 memcmp(sfe->name, args->name, args->namelen) == 0)
235 break;
236 base += size;
237 sfe = XFS_DIR_SF_NEXTENTRY(sfe);
238 }
239 if (i < 0) {
240 ASSERT(args->oknoent);
241 return XFS_ERROR(ENOENT);
242 }
243
244 if ((base + size) != dp->i_d.di_size) {
245 memmove(&((char *)sf)[base], &((char *)sf)[base+size],
246 dp->i_d.di_size - (base+size));
247 }
248 sf->hdr.count--;
249
250 xfs_idata_realloc(dp, -size, XFS_DATA_FORK);
251 dp->i_d.di_size -= size;
252 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
253
254 return 0;
255}
256
257/*
258 * Look up a name in a shortform directory structure.
259 */
260int
261xfs_dir_shortform_lookup(xfs_da_args_t *args)
262{
263 xfs_dir_shortform_t *sf;
264 xfs_dir_sf_entry_t *sfe;
265 int i;
266 xfs_inode_t *dp;
267
268 dp = args->dp;
269 ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
270 /*
271 * Catch the case where the conversion from shortform to leaf
272 * failed part way through.
273 */
274 if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
275 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
276 return XFS_ERROR(EIO);
277 }
278 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
279 ASSERT(dp->i_df.if_u1.if_data != NULL);
280 sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
281 if (args->namelen == 2 &&
282 args->name[0] == '.' && args->name[1] == '.') {
283 XFS_DIR_SF_GET_DIRINO(&sf->hdr.parent, &args->inumber);
284 return(XFS_ERROR(EEXIST));
285 }
286 if (args->namelen == 1 && args->name[0] == '.') {
287 args->inumber = dp->i_ino;
288 return(XFS_ERROR(EEXIST));
289 }
290 sfe = &sf->list[0];
291 for (i = sf->hdr.count-1; i >= 0; i--) {
292 if (sfe->namelen == args->namelen &&
293 sfe->name[0] == args->name[0] &&
294 memcmp(args->name, sfe->name, args->namelen) == 0) {
295 XFS_DIR_SF_GET_DIRINO(&sfe->inumber, &args->inumber);
296 return(XFS_ERROR(EEXIST));
297 }
298 sfe = XFS_DIR_SF_NEXTENTRY(sfe);
299 }
300 ASSERT(args->oknoent);
301 return(XFS_ERROR(ENOENT));
302}
303
304/*
305 * Convert from using the shortform to the leaf.
306 */
307int
308xfs_dir_shortform_to_leaf(xfs_da_args_t *iargs)
309{
310 xfs_inode_t *dp;
311 xfs_dir_shortform_t *sf;
312 xfs_dir_sf_entry_t *sfe;
313 xfs_da_args_t args;
314 xfs_ino_t inumber;
315 char *tmpbuffer;
316 int retval, i, size;
317 xfs_dablk_t blkno;
318 xfs_dabuf_t *bp;
319
320 dp = iargs->dp;
321 /*
322 * Catch the case where the conversion from shortform to leaf
323 * failed part way through.
324 */
325 if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
326 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
327 return XFS_ERROR(EIO);
328 }
329 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
330 ASSERT(dp->i_df.if_u1.if_data != NULL);
331 size = dp->i_df.if_bytes;
332 tmpbuffer = kmem_alloc(size, KM_SLEEP);
333 ASSERT(tmpbuffer != NULL);
334
335 memcpy(tmpbuffer, dp->i_df.if_u1.if_data, size);
336
337 sf = (xfs_dir_shortform_t *)tmpbuffer;
338 XFS_DIR_SF_GET_DIRINO(&sf->hdr.parent, &inumber);
339
340 xfs_idata_realloc(dp, -size, XFS_DATA_FORK);
341 dp->i_d.di_size = 0;
342 xfs_trans_log_inode(iargs->trans, dp, XFS_ILOG_CORE);
343 retval = xfs_da_grow_inode(iargs, &blkno);
344 if (retval)
345 goto out;
346
347 ASSERT(blkno == 0);
348 retval = xfs_dir_leaf_create(iargs, blkno, &bp);
349 if (retval)
350 goto out;
351 xfs_da_buf_done(bp);
352
353 args.name = ".";
354 args.namelen = 1;
355 args.hashval = xfs_dir_hash_dot;
356 args.inumber = dp->i_ino;
357 args.dp = dp;
358 args.firstblock = iargs->firstblock;
359 args.flist = iargs->flist;
360 args.total = iargs->total;
361 args.whichfork = XFS_DATA_FORK;
362 args.trans = iargs->trans;
363 args.justcheck = 0;
364 args.addname = args.oknoent = 1;
365 retval = xfs_dir_leaf_addname(&args);
366 if (retval)
367 goto out;
368
369 args.name = "..";
370 args.namelen = 2;
371 args.hashval = xfs_dir_hash_dotdot;
372 args.inumber = inumber;
373 retval = xfs_dir_leaf_addname(&args);
374 if (retval)
375 goto out;
376
377 sfe = &sf->list[0];
378 for (i = 0; i < sf->hdr.count; i++) {
379 args.name = (char *)(sfe->name);
380 args.namelen = sfe->namelen;
381 args.hashval = xfs_da_hashname((char *)(sfe->name),
382 sfe->namelen);
383 XFS_DIR_SF_GET_DIRINO(&sfe->inumber, &args.inumber);
384 retval = xfs_dir_leaf_addname(&args);
385 if (retval)
386 goto out;
387 sfe = XFS_DIR_SF_NEXTENTRY(sfe);
388 }
389 retval = 0;
390
391out:
392 kmem_free(tmpbuffer, size);
393 return retval;
394}
395
396STATIC int
397xfs_dir_shortform_compare(const void *a, const void *b)
398{
399 xfs_dir_sf_sort_t *sa, *sb;
400
401 sa = (xfs_dir_sf_sort_t *)a;
402 sb = (xfs_dir_sf_sort_t *)b;
403 if (sa->hash < sb->hash)
404 return -1;
405 else if (sa->hash > sb->hash)
406 return 1;
407 else
408 return sa->entno - sb->entno;
409}
410
411/*
412 * Copy out directory entries for getdents(), for shortform directories.
413 */
414/*ARGSUSED*/
415int
416xfs_dir_shortform_getdents(xfs_inode_t *dp, uio_t *uio, int *eofp,
417 xfs_dirent_t *dbp, xfs_dir_put_t put)
418{
419 xfs_dir_shortform_t *sf;
420 xfs_dir_sf_entry_t *sfe;
421 int retval, i, sbsize, nsbuf, lastresid=0, want_entno;
422 xfs_mount_t *mp;
423 xfs_dahash_t cookhash, hash;
424 xfs_dir_put_args_t p;
425 xfs_dir_sf_sort_t *sbuf, *sbp;
426
427 mp = dp->i_mount;
428 sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
429 cookhash = XFS_DA_COOKIE_HASH(mp, uio->uio_offset);
430 want_entno = XFS_DA_COOKIE_ENTRY(mp, uio->uio_offset);
431 nsbuf = sf->hdr.count + 2;
432 sbsize = (nsbuf + 1) * sizeof(*sbuf);
433 sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP);
434
435 xfs_dir_trace_g_du("sf: start", dp, uio);
436
437 /*
438 * Collect all the entries into the buffer.
439 * Entry 0 is .
440 */
441 sbp->entno = 0;
442 sbp->seqno = 0;
443 sbp->hash = xfs_dir_hash_dot;
444 sbp->ino = dp->i_ino;
445 sbp->name = ".";
446 sbp->namelen = 1;
447 sbp++;
448
449 /*
450 * Entry 1 is ..
451 */
452 sbp->entno = 1;
453 sbp->seqno = 0;
454 sbp->hash = xfs_dir_hash_dotdot;
455 sbp->ino = XFS_GET_DIR_INO8(sf->hdr.parent);
456 sbp->name = "..";
457 sbp->namelen = 2;
458 sbp++;
459
460 /*
461 * Scan the directory data for the rest of the entries.
462 */
463 for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
464
465 if (unlikely(
466 ((char *)sfe < (char *)sf) ||
467 ((char *)sfe >= ((char *)sf + dp->i_df.if_bytes)))) {
468 xfs_dir_trace_g_du("sf: corrupted", dp, uio);
469 XFS_CORRUPTION_ERROR("xfs_dir_shortform_getdents",
470 XFS_ERRLEVEL_LOW, mp, sfe);
471 kmem_free(sbuf, sbsize);
472 return XFS_ERROR(EFSCORRUPTED);
473 }
474
475 sbp->entno = i + 2;
476 sbp->seqno = 0;
477 sbp->hash = xfs_da_hashname((char *)sfe->name, sfe->namelen);
478 sbp->ino = XFS_GET_DIR_INO8(sfe->inumber);
479 sbp->name = (char *)sfe->name;
480 sbp->namelen = sfe->namelen;
481 sfe = XFS_DIR_SF_NEXTENTRY(sfe);
482 sbp++;
483 }
484
485 /*
486 * Sort the entries on hash then entno.
487 */
488 xfs_sort(sbuf, nsbuf, sizeof(*sbuf), xfs_dir_shortform_compare);
489 /*
490 * Stuff in last entry.
491 */
492 sbp->entno = nsbuf;
493 sbp->hash = XFS_DA_MAXHASH;
494 sbp->seqno = 0;
495 /*
496 * Figure out the sequence numbers in case there's a hash duplicate.
497 */
498 for (hash = sbuf->hash, sbp = sbuf + 1;
499 sbp < &sbuf[nsbuf + 1]; sbp++) {
500 if (sbp->hash == hash)
501 sbp->seqno = sbp[-1].seqno + 1;
502 else
503 hash = sbp->hash;
504 }
505
506 /*
507 * Set up put routine.
508 */
509 p.dbp = dbp;
510 p.put = put;
511 p.uio = uio;
512
513 /*
514 * Find our place.
515 */
516 for (sbp = sbuf; sbp < &sbuf[nsbuf + 1]; sbp++) {
517 if (sbp->hash > cookhash ||
518 (sbp->hash == cookhash && sbp->seqno >= want_entno))
519 break;
520 }
521
522 /*
523 * Did we fail to find anything? We stop at the last entry,
524 * the one we put maxhash into.
525 */
526 if (sbp == &sbuf[nsbuf]) {
527 kmem_free(sbuf, sbsize);
528 xfs_dir_trace_g_du("sf: hash beyond end", dp, uio);
529 uio->uio_offset = XFS_DA_MAKE_COOKIE(mp, 0, 0, XFS_DA_MAXHASH);
530 *eofp = 1;
531 return 0;
532 }
533
534 /*
535 * Loop putting entries into the user buffer.
536 */
537 while (sbp < &sbuf[nsbuf]) {
538 /*
539 * Save the first resid in a run of equal-hashval entries
540 * so that we can back them out if they don't all fit.
541 */
542 if (sbp->seqno == 0 || sbp == sbuf)
543 lastresid = uio->uio_resid;
544 XFS_PUT_COOKIE(p.cook, mp, 0, sbp[1].seqno, sbp[1].hash);
545 p.ino = sbp->ino;
546#if XFS_BIG_INUMS
547 p.ino += mp->m_inoadd;
548#endif
549 p.name = sbp->name;
550 p.namelen = sbp->namelen;
551 retval = p.put(&p);
552 if (!p.done) {
553 uio->uio_offset =
554 XFS_DA_MAKE_COOKIE(mp, 0, 0, sbp->hash);
555 kmem_free(sbuf, sbsize);
556 uio->uio_resid = lastresid;
557 xfs_dir_trace_g_du("sf: E-O-B", dp, uio);
558 return retval;
559 }
560 sbp++;
561 }
562 kmem_free(sbuf, sbsize);
563 uio->uio_offset = p.cook.o;
564 *eofp = 1;
565 xfs_dir_trace_g_du("sf: E-O-F", dp, uio);
566 return 0;
567}
568
569/*
570 * Look up a name in a shortform directory structure, replace the inode number.
571 */
572int
573xfs_dir_shortform_replace(xfs_da_args_t *args)
574{
575 xfs_dir_shortform_t *sf;
576 xfs_dir_sf_entry_t *sfe;
577 xfs_inode_t *dp;
578 int i;
579
580 dp = args->dp;
581 ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
582 /*
583 * Catch the case where the conversion from shortform to leaf
584 * failed part way through.
585 */
586 if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
587 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
588 return XFS_ERROR(EIO);
589 }
590 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
591 ASSERT(dp->i_df.if_u1.if_data != NULL);
592 sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
593 if (args->namelen == 2 &&
594 args->name[0] == '.' && args->name[1] == '.') {
595 /* XXX - replace assert? */
596 XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sf->hdr.parent);
597 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
598 return 0;
599 }
600 ASSERT(args->namelen != 1 || args->name[0] != '.');
601 sfe = &sf->list[0];
602 for (i = sf->hdr.count-1; i >= 0; i--) {
603 if (sfe->namelen == args->namelen &&
604 sfe->name[0] == args->name[0] &&
605 memcmp(args->name, sfe->name, args->namelen) == 0) {
606 ASSERT(memcmp((char *)&args->inumber,
607 (char *)&sfe->inumber, sizeof(xfs_ino_t)));
608 XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sfe->inumber);
609 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
610 return 0;
611 }
612 sfe = XFS_DIR_SF_NEXTENTRY(sfe);
613 }
614 ASSERT(args->oknoent);
615 return XFS_ERROR(ENOENT);
616}
617
618/*
619 * Convert a leaf directory to shortform structure
620 */
621int
622xfs_dir_leaf_to_shortform(xfs_da_args_t *iargs)
623{
624 xfs_dir_leafblock_t *leaf;
625 xfs_dir_leaf_hdr_t *hdr;
626 xfs_dir_leaf_entry_t *entry;
627 xfs_dir_leaf_name_t *namest;
628 xfs_da_args_t args;
629 xfs_inode_t *dp;
630 xfs_ino_t parent = 0;
631 char *tmpbuffer;
632 int retval, i;
633 xfs_dabuf_t *bp;
634
635 dp = iargs->dp;
636 tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP);
637 ASSERT(tmpbuffer != NULL);
638
639 retval = xfs_da_read_buf(iargs->trans, iargs->dp, 0, -1, &bp,
640 XFS_DATA_FORK);
641 if (retval)
642 goto out;
643 ASSERT(bp != NULL);
644 memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
645 leaf = (xfs_dir_leafblock_t *)tmpbuffer;
646 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
647 memset(bp->data, 0, XFS_LBSIZE(dp->i_mount));
648
649 /*
650 * Find and special case the parent inode number
651 */
652 hdr = &leaf->hdr;
653 entry = &leaf->entries[0];
654 for (i = INT_GET(hdr->count, ARCH_CONVERT)-1; i >= 0; entry++, i--) {
655 namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
656 if ((entry->namelen == 2) &&
657 (namest->name[0] == '.') &&
658 (namest->name[1] == '.')) {
659 XFS_DIR_SF_GET_DIRINO(&namest->inumber, &parent);
660 entry->nameidx = 0;
661 } else if ((entry->namelen == 1) && (namest->name[0] == '.')) {
662 entry->nameidx = 0;
663 }
664 }
665 retval = xfs_da_shrink_inode(iargs, 0, bp);
666 if (retval)
667 goto out;
668 retval = xfs_dir_shortform_create(iargs, parent);
669 if (retval)
670 goto out;
671
672 /*
673 * Copy the rest of the filenames
674 */
675 entry = &leaf->entries[0];
676 args.dp = dp;
677 args.firstblock = iargs->firstblock;
678 args.flist = iargs->flist;
679 args.total = iargs->total;
680 args.whichfork = XFS_DATA_FORK;
681 args.trans = iargs->trans;
682 args.justcheck = 0;
683 args.addname = args.oknoent = 1;
684 for (i = 0; i < INT_GET(hdr->count, ARCH_CONVERT); entry++, i++) {
685 if (!entry->nameidx)
686 continue;
687 namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
688 args.name = (char *)(namest->name);
689 args.namelen = entry->namelen;
690 args.hashval = INT_GET(entry->hashval, ARCH_CONVERT);
691 XFS_DIR_SF_GET_DIRINO(&namest->inumber, &args.inumber);
692 xfs_dir_shortform_addname(&args);
693 }
694
695out:
696 kmem_free(tmpbuffer, XFS_LBSIZE(dp->i_mount));
697 return retval;
698}
699
700/*
701 * Convert from using a single leaf to a root node and a leaf.
702 */
703int
704xfs_dir_leaf_to_node(xfs_da_args_t *args)
705{
706 xfs_dir_leafblock_t *leaf;
707 xfs_da_intnode_t *node;
708 xfs_inode_t *dp;
709 xfs_dabuf_t *bp1, *bp2;
710 xfs_dablk_t blkno;
711 int retval;
712
713 dp = args->dp;
714 retval = xfs_da_grow_inode(args, &blkno);
715 ASSERT(blkno == 1);
716 if (retval)
717 return retval;
718 retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp1,
719 XFS_DATA_FORK);
720 if (retval)
721 return retval;
722 ASSERT(bp1 != NULL);
723 retval = xfs_da_get_buf(args->trans, args->dp, 1, -1, &bp2,
724 XFS_DATA_FORK);
725 if (retval) {
726 xfs_da_buf_done(bp1);
727 return retval;
728 }
729 ASSERT(bp2 != NULL);
730 memcpy(bp2->data, bp1->data, XFS_LBSIZE(dp->i_mount));
731 xfs_da_buf_done(bp1);
732 xfs_da_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1);
733
734 /*
735 * Set up the new root node.
736 */
737 retval = xfs_da_node_create(args, 0, 1, &bp1, XFS_DATA_FORK);
738 if (retval) {
739 xfs_da_buf_done(bp2);
740 return retval;
741 }
742 node = bp1->data;
743 leaf = bp2->data;
744 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
745 node->btree[0].hashval = cpu_to_be32(
746 INT_GET(leaf->entries[
747 INT_GET(leaf->hdr.count, ARCH_CONVERT)-1].hashval, ARCH_CONVERT));
748 xfs_da_buf_done(bp2);
749 node->btree[0].before = cpu_to_be32(blkno);
750 node->hdr.count = cpu_to_be16(1);
751 xfs_da_log_buf(args->trans, bp1,
752 XFS_DA_LOGRANGE(node, &node->btree[0], sizeof(node->btree[0])));
753 xfs_da_buf_done(bp1);
754
755 return retval;
756}
757
758
759/*========================================================================
760 * Routines used for growing the Btree.
761 *========================================================================*/
762
763/*
764 * Create the initial contents of a leaf directory
765 * or a leaf in a node directory.
766 */
767STATIC int
768xfs_dir_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
769{
770 xfs_dir_leafblock_t *leaf;
771 xfs_dir_leaf_hdr_t *hdr;
772 xfs_inode_t *dp;
773 xfs_dabuf_t *bp;
774 int retval;
775
776 dp = args->dp;
777 ASSERT(dp != NULL);
778 retval = xfs_da_get_buf(args->trans, dp, blkno, -1, &bp, XFS_DATA_FORK);
779 if (retval)
780 return retval;
781 ASSERT(bp != NULL);
782 leaf = bp->data;
783 memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount));
784 hdr = &leaf->hdr;
785 hdr->info.magic = cpu_to_be16(XFS_DIR_LEAF_MAGIC);
786 INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount));
787 if (!hdr->firstused)
788 INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount) - 1);
789 INT_SET(hdr->freemap[0].base, ARCH_CONVERT, sizeof(xfs_dir_leaf_hdr_t));
790 INT_SET(hdr->freemap[0].size, ARCH_CONVERT, INT_GET(hdr->firstused, ARCH_CONVERT) - INT_GET(hdr->freemap[0].base, ARCH_CONVERT));
791
792 xfs_da_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1);
793
794 *bpp = bp;
795 return 0;
796}
797
798/*
799 * Split the leaf node, rebalance, then add the new entry.
800 */
801int
802xfs_dir_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
803 xfs_da_state_blk_t *newblk)
804{
805 xfs_dablk_t blkno;
806 xfs_da_args_t *args;
807 int error;
808
809 /*
810 * Allocate space for a new leaf node.
811 */
812 args = state->args;
813 ASSERT(args != NULL);
814 ASSERT(oldblk->magic == XFS_DIR_LEAF_MAGIC);
815 error = xfs_da_grow_inode(args, &blkno);
816 if (error)
817 return error;
818 error = xfs_dir_leaf_create(args, blkno, &newblk->bp);
819 if (error)
820 return error;
821 newblk->blkno = blkno;
822 newblk->magic = XFS_DIR_LEAF_MAGIC;
823
824 /*
825 * Rebalance the entries across the two leaves.
826 */
827 xfs_dir_leaf_rebalance(state, oldblk, newblk);
828 error = xfs_da_blk_link(state, oldblk, newblk);
829 if (error)
830 return error;
831
832 /*
833 * Insert the new entry in the correct block.
834 */
835 if (state->inleaf) {
836 error = xfs_dir_leaf_add(oldblk->bp, args, oldblk->index);
837 } else {
838 error = xfs_dir_leaf_add(newblk->bp, args, newblk->index);
839 }
840
841 /*
842 * Update last hashval in each block since we added the name.
843 */
844 oldblk->hashval = xfs_dir_leaf_lasthash(oldblk->bp, NULL);
845 newblk->hashval = xfs_dir_leaf_lasthash(newblk->bp, NULL);
846 return error;
847}
848
849/*
850 * Add a name to the leaf directory structure.
851 *
852 * Must take into account fragmented leaves and leaves where spacemap has
853 * lost some freespace information (ie: holes).
854 */
855int
856xfs_dir_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index)
857{
858 xfs_dir_leafblock_t *leaf;
859 xfs_dir_leaf_hdr_t *hdr;
860 xfs_dir_leaf_map_t *map;
861 int tablesize, entsize, sum, i, tmp, error;
862
863 leaf = bp->data;
864 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
865 ASSERT((index >= 0) && (index <= INT_GET(leaf->hdr.count, ARCH_CONVERT)));
866 hdr = &leaf->hdr;
867 entsize = XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen);
868
869 /*
870 * Search through freemap for first-fit on new name length.
871 * (may need to figure in size of entry struct too)
872 */
873 tablesize = (INT_GET(hdr->count, ARCH_CONVERT) + 1) * (uint)sizeof(xfs_dir_leaf_entry_t)
874 + (uint)sizeof(xfs_dir_leaf_hdr_t);
875 map = &hdr->freemap[XFS_DIR_LEAF_MAPSIZE-1];
876 for (sum = 0, i = XFS_DIR_LEAF_MAPSIZE-1; i >= 0; map--, i--) {
877 if (tablesize > INT_GET(hdr->firstused, ARCH_CONVERT)) {
878 sum += INT_GET(map->size, ARCH_CONVERT);
879 continue;
880 }
881 if (!map->size)
882 continue; /* no space in this map */
883 tmp = entsize;
884 if (INT_GET(map->base, ARCH_CONVERT) < INT_GET(hdr->firstused, ARCH_CONVERT))
885 tmp += (uint)sizeof(xfs_dir_leaf_entry_t);
886 if (INT_GET(map->size, ARCH_CONVERT) >= tmp) {
887 if (!args->justcheck)
888 xfs_dir_leaf_add_work(bp, args, index, i);
889 return 0;
890 }
891 sum += INT_GET(map->size, ARCH_CONVERT);
892 }
893
894 /*
895 * If there are no holes in the address space of the block,
896 * and we don't have enough freespace, then compaction will do us
897 * no good and we should just give up.
898 */
899 if (!hdr->holes && (sum < entsize))
900 return XFS_ERROR(ENOSPC);
901
902 /*
903 * Compact the entries to coalesce free space.
904 * Pass the justcheck flag so the checking pass can return
905 * an error, without changing anything, if it won't fit.
906 */
907 error = xfs_dir_leaf_compact(args->trans, bp,
908 args->total == 0 ?
909 entsize +
910 (uint)sizeof(xfs_dir_leaf_entry_t) : 0,
911 args->justcheck);
912 if (error)
913 return error;
914 /*
915 * After compaction, the block is guaranteed to have only one
916 * free region, in freemap[0]. If it is not big enough, give up.
917 */
918 if (INT_GET(hdr->freemap[0].size, ARCH_CONVERT) <
919 (entsize + (uint)sizeof(xfs_dir_leaf_entry_t)))
920 return XFS_ERROR(ENOSPC);
921
922 if (!args->justcheck)
923 xfs_dir_leaf_add_work(bp, args, index, 0);
924 return 0;
925}
926
927/*
928 * Add a name to a leaf directory structure.
929 */
930STATIC void
931xfs_dir_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int index,
932 int mapindex)
933{
934 xfs_dir_leafblock_t *leaf;
935 xfs_dir_leaf_hdr_t *hdr;
936 xfs_dir_leaf_entry_t *entry;
937 xfs_dir_leaf_name_t *namest;
938 xfs_dir_leaf_map_t *map;
939 /* REFERENCED */
940 xfs_mount_t *mp;
941 int tmp, i;
942
943 leaf = bp->data;
944 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
945 hdr = &leaf->hdr;
946 ASSERT((mapindex >= 0) && (mapindex < XFS_DIR_LEAF_MAPSIZE));
947 ASSERT((index >= 0) && (index <= INT_GET(hdr->count, ARCH_CONVERT)));
948
949 /*
950 * Force open some space in the entry array and fill it in.
951 */
952 entry = &leaf->entries[index];
953 if (index < INT_GET(hdr->count, ARCH_CONVERT)) {
954 tmp = INT_GET(hdr->count, ARCH_CONVERT) - index;
955 tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
956 memmove(entry + 1, entry, tmp);
957 xfs_da_log_buf(args->trans, bp,
958 XFS_DA_LOGRANGE(leaf, entry, tmp + (uint)sizeof(*entry)));
959 }
960 INT_MOD(hdr->count, ARCH_CONVERT, +1);
961
962 /*
963 * Allocate space for the new string (at the end of the run).
964 */
965 map = &hdr->freemap[mapindex];
966 mp = args->trans->t_mountp;
967 ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp));
968 ASSERT(INT_GET(map->size, ARCH_CONVERT) >= XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen));
969 ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp));
970 INT_MOD(map->size, ARCH_CONVERT, -(XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen)));
971 INT_SET(entry->nameidx, ARCH_CONVERT, INT_GET(map->base, ARCH_CONVERT) + INT_GET(map->size, ARCH_CONVERT));
972 INT_SET(entry->hashval, ARCH_CONVERT, args->hashval);
973 entry->namelen = args->namelen;
974 xfs_da_log_buf(args->trans, bp,
975 XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
976
977 /*
978 * Copy the string and inode number into the new space.
979 */
980 namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
981 XFS_DIR_SF_PUT_DIRINO(&args->inumber, &namest->inumber);
982 memcpy(namest->name, args->name, args->namelen);
983 xfs_da_log_buf(args->trans, bp,
984 XFS_DA_LOGRANGE(leaf, namest, XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry)));
985
986 /*
987 * Update the control info for this leaf node
988 */
989 if (INT_GET(entry->nameidx, ARCH_CONVERT) < INT_GET(hdr->firstused, ARCH_CONVERT))
990 INT_COPY(hdr->firstused, entry->nameidx, ARCH_CONVERT);
991 ASSERT(INT_GET(hdr->firstused, ARCH_CONVERT) >= ((INT_GET(hdr->count, ARCH_CONVERT)*sizeof(*entry))+sizeof(*hdr)));
992 tmp = (INT_GET(hdr->count, ARCH_CONVERT)-1) * (uint)sizeof(xfs_dir_leaf_entry_t)
993 + (uint)sizeof(xfs_dir_leaf_hdr_t);
994 map = &hdr->freemap[0];
995 for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; map++, i++) {
996 if (INT_GET(map->base, ARCH_CONVERT) == tmp) {
997 INT_MOD(map->base, ARCH_CONVERT, (uint)sizeof(xfs_dir_leaf_entry_t));
998 INT_MOD(map->size, ARCH_CONVERT, -((uint)sizeof(xfs_dir_leaf_entry_t)));
999 }
1000 }
1001 INT_MOD(hdr->namebytes, ARCH_CONVERT, args->namelen);
1002 xfs_da_log_buf(args->trans, bp,
1003 XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
1004}
1005
1006/*
1007 * Garbage collect a leaf directory block by copying it to a new buffer.
1008 */
1009STATIC int
1010xfs_dir_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp, int musthave,
1011 int justcheck)
1012{
1013 xfs_dir_leafblock_t *leaf_s, *leaf_d;
1014 xfs_dir_leaf_hdr_t *hdr_s, *hdr_d;
1015 xfs_mount_t *mp;
1016 char *tmpbuffer;
1017 char *tmpbuffer2=NULL;
1018 int rval;
1019 int lbsize;
1020
1021 mp = trans->t_mountp;
1022 lbsize = XFS_LBSIZE(mp);
1023 tmpbuffer = kmem_alloc(lbsize, KM_SLEEP);
1024 ASSERT(tmpbuffer != NULL);
1025 memcpy(tmpbuffer, bp->data, lbsize);
1026
1027 /*
1028 * Make a second copy in case xfs_dir_leaf_moveents()
1029 * below destroys the original.
1030 */
1031 if (musthave || justcheck) {
1032 tmpbuffer2 = kmem_alloc(lbsize, KM_SLEEP);
1033 memcpy(tmpbuffer2, bp->data, lbsize);
1034 }
1035 memset(bp->data, 0, lbsize);
1036
1037 /*
1038 * Copy basic information
1039 */
1040 leaf_s = (xfs_dir_leafblock_t *)tmpbuffer;
1041 leaf_d = bp->data;
1042 hdr_s = &leaf_s->hdr;
1043 hdr_d = &leaf_d->hdr;
1044 hdr_d->info = hdr_s->info; /* struct copy */
1045 INT_SET(hdr_d->firstused, ARCH_CONVERT, lbsize);
1046 if (!hdr_d->firstused)
1047 INT_SET(hdr_d->firstused, ARCH_CONVERT, lbsize - 1);
1048 hdr_d->namebytes = 0;
1049 hdr_d->count = 0;
1050 hdr_d->holes = 0;
1051 INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT, sizeof(xfs_dir_leaf_hdr_t));
1052 INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT, INT_GET(hdr_d->firstused, ARCH_CONVERT) - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT));
1053
1054 /*
1055 * Copy all entry's in the same (sorted) order,
1056 * but allocate filenames packed and in sequence.
1057 * This changes the source (leaf_s) as well.
1058 */
1059 xfs_dir_leaf_moveents(leaf_s, 0, leaf_d, 0, (int)INT_GET(hdr_s->count, ARCH_CONVERT), mp);
1060
1061 if (musthave && INT_GET(hdr_d->freemap[0].size, ARCH_CONVERT) < musthave)
1062 rval = XFS_ERROR(ENOSPC);
1063 else
1064 rval = 0;
1065
1066 if (justcheck || rval == ENOSPC) {
1067 ASSERT(tmpbuffer2);
1068 memcpy(bp->data, tmpbuffer2, lbsize);
1069 } else {
1070 xfs_da_log_buf(trans, bp, 0, lbsize - 1);
1071 }
1072
1073 kmem_free(tmpbuffer, lbsize);
1074 if (musthave || justcheck)
1075 kmem_free(tmpbuffer2, lbsize);
1076 return rval;
1077}
1078
1079/*
1080 * Redistribute the directory entries between two leaf nodes,
1081 * taking into account the size of the new entry.
1082 *
1083 * NOTE: if new block is empty, then it will get the upper half of old block.
1084 */
1085STATIC void
1086xfs_dir_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1087 xfs_da_state_blk_t *blk2)
1088{
1089 xfs_da_state_blk_t *tmp_blk;
1090 xfs_dir_leafblock_t *leaf1, *leaf2;
1091 xfs_dir_leaf_hdr_t *hdr1, *hdr2;
1092 int count, totallen, max, space, swap;
1093
1094 /*
1095 * Set up environment.
1096 */
1097 ASSERT(blk1->magic == XFS_DIR_LEAF_MAGIC);
1098 ASSERT(blk2->magic == XFS_DIR_LEAF_MAGIC);
1099 leaf1 = blk1->bp->data;
1100 leaf2 = blk2->bp->data;
1101 ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
1102 ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
1103
1104 /*
1105 * Check ordering of blocks, reverse if it makes things simpler.
1106 */
1107 swap = 0;
1108 if (xfs_dir_leaf_order(blk1->bp, blk2->bp)) {
1109 tmp_blk = blk1;
1110 blk1 = blk2;
1111 blk2 = tmp_blk;
1112 leaf1 = blk1->bp->data;
1113 leaf2 = blk2->bp->data;
1114 swap = 1;
1115 }
1116 hdr1 = &leaf1->hdr;
1117 hdr2 = &leaf2->hdr;
1118
1119 /*
1120 * Examine entries until we reduce the absolute difference in
1121 * byte usage between the two blocks to a minimum. Then get
1122 * the direction to copy and the number of elements to move.
1123 */
1124 state->inleaf = xfs_dir_leaf_figure_balance(state, blk1, blk2,
1125 &count, &totallen);
1126 if (swap)
1127 state->inleaf = !state->inleaf;
1128
1129 /*
1130 * Move any entries required from leaf to leaf:
1131 */
1132 if (count < INT_GET(hdr1->count, ARCH_CONVERT)) {
1133 /*
1134 * Figure the total bytes to be added to the destination leaf.
1135 */
1136 count = INT_GET(hdr1->count, ARCH_CONVERT) - count; /* number entries being moved */
1137 space = INT_GET(hdr1->namebytes, ARCH_CONVERT) - totallen;
1138 space += count * ((uint)sizeof(xfs_dir_leaf_name_t)-1);
1139 space += count * (uint)sizeof(xfs_dir_leaf_entry_t);
1140
1141 /*
1142 * leaf2 is the destination, compact it if it looks tight.
1143 */
1144 max = INT_GET(hdr2->firstused, ARCH_CONVERT) - (uint)sizeof(xfs_dir_leaf_hdr_t);
1145 max -= INT_GET(hdr2->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t);
1146 if (space > max) {
1147 xfs_dir_leaf_compact(state->args->trans, blk2->bp,
1148 0, 0);
1149 }
1150
1151 /*
1152 * Move high entries from leaf1 to low end of leaf2.
1153 */
1154 xfs_dir_leaf_moveents(leaf1, INT_GET(hdr1->count, ARCH_CONVERT) - count,
1155 leaf2, 0, count, state->mp);
1156
1157 xfs_da_log_buf(state->args->trans, blk1->bp, 0,
1158 state->blocksize-1);
1159 xfs_da_log_buf(state->args->trans, blk2->bp, 0,
1160 state->blocksize-1);
1161
1162 } else if (count > INT_GET(hdr1->count, ARCH_CONVERT)) {
1163 /*
1164 * Figure the total bytes to be added to the destination leaf.
1165 */
1166 count -= INT_GET(hdr1->count, ARCH_CONVERT); /* number entries being moved */
1167 space = totallen - INT_GET(hdr1->namebytes, ARCH_CONVERT);
1168 space += count * ((uint)sizeof(xfs_dir_leaf_name_t)-1);
1169 space += count * (uint)sizeof(xfs_dir_leaf_entry_t);
1170
1171 /*
1172 * leaf1 is the destination, compact it if it looks tight.
1173 */
1174 max = INT_GET(hdr1->firstused, ARCH_CONVERT) - (uint)sizeof(xfs_dir_leaf_hdr_t);
1175 max -= INT_GET(hdr1->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t);
1176 if (space > max) {
1177 xfs_dir_leaf_compact(state->args->trans, blk1->bp,
1178 0, 0);
1179 }
1180
1181 /*
1182 * Move low entries from leaf2 to high end of leaf1.
1183 */
1184 xfs_dir_leaf_moveents(leaf2, 0, leaf1, (int)INT_GET(hdr1->count, ARCH_CONVERT),
1185 count, state->mp);
1186
1187 xfs_da_log_buf(state->args->trans, blk1->bp, 0,
1188 state->blocksize-1);
1189 xfs_da_log_buf(state->args->trans, blk2->bp, 0,
1190 state->blocksize-1);
1191 }
1192
1193 /*
1194 * Copy out last hashval in each block for B-tree code.
1195 */
1196 blk1->hashval = INT_GET(leaf1->entries[ INT_GET(leaf1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1197 blk2->hashval = INT_GET(leaf2->entries[ INT_GET(leaf2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1198
1199 /*
1200 * Adjust the expected index for insertion.
1201 * GROT: this doesn't work unless blk2 was originally empty.
1202 */
1203 if (!state->inleaf) {
1204 blk2->index = blk1->index - INT_GET(leaf1->hdr.count, ARCH_CONVERT);
1205 }
1206}
1207
1208/*
1209 * Examine entries until we reduce the absolute difference in
1210 * byte usage between the two blocks to a minimum.
1211 * GROT: Is this really necessary? With other than a 512 byte blocksize,
1212 * GROT: there will always be enough room in either block for a new entry.
1213 * GROT: Do a double-split for this case?
1214 */
1215STATIC int
1216xfs_dir_leaf_figure_balance(xfs_da_state_t *state,
1217 xfs_da_state_blk_t *blk1,
1218 xfs_da_state_blk_t *blk2,
1219 int *countarg, int *namebytesarg)
1220{
1221 xfs_dir_leafblock_t *leaf1, *leaf2;
1222 xfs_dir_leaf_hdr_t *hdr1, *hdr2;
1223 xfs_dir_leaf_entry_t *entry;
1224 int count, max, totallen, half;
1225 int lastdelta, foundit, tmp;
1226
1227 /*
1228 * Set up environment.
1229 */
1230 leaf1 = blk1->bp->data;
1231 leaf2 = blk2->bp->data;
1232 hdr1 = &leaf1->hdr;
1233 hdr2 = &leaf2->hdr;
1234 foundit = 0;
1235 totallen = 0;
1236
1237 /*
1238 * Examine entries until we reduce the absolute difference in
1239 * byte usage between the two blocks to a minimum.
1240 */
1241 max = INT_GET(hdr1->count, ARCH_CONVERT) + INT_GET(hdr2->count, ARCH_CONVERT);
1242 half = (max+1) * (uint)(sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1);
1243 half += INT_GET(hdr1->namebytes, ARCH_CONVERT) + INT_GET(hdr2->namebytes, ARCH_CONVERT) + state->args->namelen;
1244 half /= 2;
1245 lastdelta = state->blocksize;
1246 entry = &leaf1->entries[0];
1247 for (count = 0; count < max; entry++, count++) {
1248
1249#define XFS_DIR_ABS(A) (((A) < 0) ? -(A) : (A))
1250 /*
1251 * The new entry is in the first block, account for it.
1252 */
1253 if (count == blk1->index) {
1254 tmp = totallen + (uint)sizeof(*entry)
1255 + XFS_DIR_LEAF_ENTSIZE_BYNAME(state->args->namelen);
1256 if (XFS_DIR_ABS(half - tmp) > lastdelta)
1257 break;
1258 lastdelta = XFS_DIR_ABS(half - tmp);
1259 totallen = tmp;
1260 foundit = 1;
1261 }
1262
1263 /*
1264 * Wrap around into the second block if necessary.
1265 */
1266 if (count == INT_GET(hdr1->count, ARCH_CONVERT)) {
1267 leaf1 = leaf2;
1268 entry = &leaf1->entries[0];
1269 }
1270
1271 /*
1272 * Figure out if next leaf entry would be too much.
1273 */
1274 tmp = totallen + (uint)sizeof(*entry)
1275 + XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry);
1276 if (XFS_DIR_ABS(half - tmp) > lastdelta)
1277 break;
1278 lastdelta = XFS_DIR_ABS(half - tmp);
1279 totallen = tmp;
1280#undef XFS_DIR_ABS
1281 }
1282
1283 /*
1284 * Calculate the number of namebytes that will end up in lower block.
1285 * If new entry not in lower block, fix up the count.
1286 */
1287 totallen -=
1288 count * (uint)(sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1);
1289 if (foundit) {
1290 totallen -= (sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1) +
1291 state->args->namelen;
1292 }
1293
1294 *countarg = count;
1295 *namebytesarg = totallen;
1296 return foundit;
1297}
1298
1299/*========================================================================
1300 * Routines used for shrinking the Btree.
1301 *========================================================================*/
1302
1303/*
1304 * Check a leaf block and its neighbors to see if the block should be
1305 * collapsed into one or the other neighbor. Always keep the block
1306 * with the smaller block number.
1307 * If the current block is over 50% full, don't try to join it, return 0.
1308 * If the block is empty, fill in the state structure and return 2.
1309 * If it can be collapsed, fill in the state structure and return 1.
1310 * If nothing can be done, return 0.
1311 */
1312int
1313xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
1314{
1315 xfs_dir_leafblock_t *leaf;
1316 xfs_da_state_blk_t *blk;
1317 xfs_da_blkinfo_t *info;
1318 int count, bytes, forward, error, retval, i;
1319 xfs_dablk_t blkno;
1320 xfs_dabuf_t *bp;
1321
1322 /*
1323 * Check for the degenerate case of the block being over 50% full.
1324 * If so, it's not worth even looking to see if we might be able
1325 * to coalesce with a sibling.
1326 */
1327 blk = &state->path.blk[ state->path.active-1 ];
1328 info = blk->bp->data;
1329 ASSERT(be16_to_cpu(info->magic) == XFS_DIR_LEAF_MAGIC);
1330 leaf = (xfs_dir_leafblock_t *)info;
1331 count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
1332 bytes = (uint)sizeof(xfs_dir_leaf_hdr_t) +
1333 count * (uint)sizeof(xfs_dir_leaf_entry_t) +
1334 count * ((uint)sizeof(xfs_dir_leaf_name_t)-1) +
1335 INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
1336 if (bytes > (state->blocksize >> 1)) {
1337 *action = 0; /* blk over 50%, don't try to join */
1338 return 0;
1339 }
1340
1341 /*
1342 * Check for the degenerate case of the block being empty.
1343 * If the block is empty, we'll simply delete it, no need to
1344 * coalesce it with a sibling block. We choose (arbitrarily)
1345 * to merge with the forward block unless it is NULL.
1346 */
1347 if (count == 0) {
1348 /*
1349 * Make altpath point to the block we want to keep and
1350 * path point to the block we want to drop (this one).
1351 */
1352 forward = (info->forw != 0);
1353 memcpy(&state->altpath, &state->path, sizeof(state->path));
1354 error = xfs_da_path_shift(state, &state->altpath, forward,
1355 0, &retval);
1356 if (error)
1357 return error;
1358 if (retval) {
1359 *action = 0;
1360 } else {
1361 *action = 2;
1362 }
1363 return 0;
1364 }
1365
1366 /*
1367 * Examine each sibling block to see if we can coalesce with
1368 * at least 25% free space to spare. We need to figure out
1369 * whether to merge with the forward or the backward block.
1370 * We prefer coalescing with the lower numbered sibling so as
1371 * to shrink a directory over time.
1372 */
1373 forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back)); /* start with smaller blk num */
1374 for (i = 0; i < 2; forward = !forward, i++) {
1375 if (forward)
1376 blkno = be32_to_cpu(info->forw);
1377 else
1378 blkno = be32_to_cpu(info->back);
1379 if (blkno == 0)
1380 continue;
1381 error = xfs_da_read_buf(state->args->trans, state->args->dp,
1382 blkno, -1, &bp,
1383 XFS_DATA_FORK);
1384 if (error)
1385 return error;
1386 ASSERT(bp != NULL);
1387
1388 leaf = (xfs_dir_leafblock_t *)info;
1389 count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
1390 bytes = state->blocksize - (state->blocksize>>2);
1391 bytes -= INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
1392 leaf = bp->data;
1393 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
1394 count += INT_GET(leaf->hdr.count, ARCH_CONVERT);
1395 bytes -= INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
1396 bytes -= count * ((uint)sizeof(xfs_dir_leaf_name_t) - 1);
1397 bytes -= count * (uint)sizeof(xfs_dir_leaf_entry_t);
1398 bytes -= (uint)sizeof(xfs_dir_leaf_hdr_t);
1399 if (bytes >= 0)
1400 break; /* fits with at least 25% to spare */
1401
1402 xfs_da_brelse(state->args->trans, bp);
1403 }
1404 if (i >= 2) {
1405 *action = 0;
1406 return 0;
1407 }
1408 xfs_da_buf_done(bp);
1409
1410 /*
1411 * Make altpath point to the block we want to keep (the lower
1412 * numbered block) and path point to the block we want to drop.
1413 */
1414 memcpy(&state->altpath, &state->path, sizeof(state->path));
1415 if (blkno < blk->blkno) {
1416 error = xfs_da_path_shift(state, &state->altpath, forward,
1417 0, &retval);
1418 } else {
1419 error = xfs_da_path_shift(state, &state->path, forward,
1420 0, &retval);
1421 }
1422 if (error)
1423 return error;
1424 if (retval) {
1425 *action = 0;
1426 } else {
1427 *action = 1;
1428 }
1429 return 0;
1430}
1431
1432/*
1433 * Remove a name from the leaf directory structure.
1434 *
1435 * Return 1 if leaf is less than 37% full, 0 if >= 37% full.
1436 * If two leaves are 37% full, when combined they will leave 25% free.
1437 */
1438int
1439xfs_dir_leaf_remove(xfs_trans_t *trans, xfs_dabuf_t *bp, int index)
1440{
1441 xfs_dir_leafblock_t *leaf;
1442 xfs_dir_leaf_hdr_t *hdr;
1443 xfs_dir_leaf_map_t *map;
1444 xfs_dir_leaf_entry_t *entry;
1445 xfs_dir_leaf_name_t *namest;
1446 int before, after, smallest, entsize;
1447 int tablesize, tmp, i;
1448 xfs_mount_t *mp;
1449
1450 leaf = bp->data;
1451 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
1452 hdr = &leaf->hdr;
1453 mp = trans->t_mountp;
1454 ASSERT((INT_GET(hdr->count, ARCH_CONVERT) > 0) && (INT_GET(hdr->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8)));
1455 ASSERT((index >= 0) && (index < INT_GET(hdr->count, ARCH_CONVERT)));
1456 ASSERT(INT_GET(hdr->firstused, ARCH_CONVERT) >= ((INT_GET(hdr->count, ARCH_CONVERT)*sizeof(*entry))+sizeof(*hdr)));
1457 entry = &leaf->entries[index];
1458 ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) >= INT_GET(hdr->firstused, ARCH_CONVERT));
1459 ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) < XFS_LBSIZE(mp));
1460
1461 /*
1462 * Scan through free region table:
1463 * check for adjacency of free'd entry with an existing one,
1464 * find smallest free region in case we need to replace it,
1465 * adjust any map that borders the entry table,
1466 */
1467 tablesize = INT_GET(hdr->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t)
1468 + (uint)sizeof(xfs_dir_leaf_hdr_t);
1469 map = &hdr->freemap[0];
1470 tmp = INT_GET(map->size, ARCH_CONVERT);
1471 before = after = -1;
1472 smallest = XFS_DIR_LEAF_MAPSIZE - 1;
1473 entsize = XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry);
1474 for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; map++, i++) {
1475 ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp));
1476 ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp));
1477 if (INT_GET(map->base, ARCH_CONVERT) == tablesize) {
1478 INT_MOD(map->base, ARCH_CONVERT, -((uint)sizeof(xfs_dir_leaf_entry_t)));
1479 INT_MOD(map->size, ARCH_CONVERT, (uint)sizeof(xfs_dir_leaf_entry_t));
1480 }
1481
1482 if ((INT_GET(map->base, ARCH_CONVERT) + INT_GET(map->size, ARCH_CONVERT)) == INT_GET(entry->nameidx, ARCH_CONVERT)) {
1483 before = i;
1484 } else if (INT_GET(map->base, ARCH_CONVERT) == (INT_GET(entry->nameidx, ARCH_CONVERT) + entsize)) {
1485 after = i;
1486 } else if (INT_GET(map->size, ARCH_CONVERT) < tmp) {
1487 tmp = INT_GET(map->size, ARCH_CONVERT);
1488 smallest = i;
1489 }
1490 }
1491
1492 /*
1493 * Coalesce adjacent freemap regions,
1494 * or replace the smallest region.
1495 */
1496 if ((before >= 0) || (after >= 0)) {
1497 if ((before >= 0) && (after >= 0)) {
1498 map = &hdr->freemap[before];
1499 INT_MOD(map->size, ARCH_CONVERT, entsize);
1500 INT_MOD(map->size, ARCH_CONVERT, INT_GET(hdr->freemap[after].size, ARCH_CONVERT));
1501 hdr->freemap[after].base = 0;
1502 hdr->freemap[after].size = 0;
1503 } else if (before >= 0) {
1504 map = &hdr->freemap[before];
1505 INT_MOD(map->size, ARCH_CONVERT, entsize);
1506 } else {
1507 map = &hdr->freemap[after];
1508 INT_COPY(map->base, entry->nameidx, ARCH_CONVERT);
1509 INT_MOD(map->size, ARCH_CONVERT, entsize);
1510 }
1511 } else {
1512 /*
1513 * Replace smallest region (if it is smaller than free'd entry)
1514 */
1515 map = &hdr->freemap[smallest];
1516 if (INT_GET(map->size, ARCH_CONVERT) < entsize) {
1517 INT_COPY(map->base, entry->nameidx, ARCH_CONVERT);
1518 INT_SET(map->size, ARCH_CONVERT, entsize);
1519 }
1520 }
1521
1522 /*
1523 * Did we remove the first entry?
1524 */
1525 if (INT_GET(entry->nameidx, ARCH_CONVERT) == INT_GET(hdr->firstused, ARCH_CONVERT))
1526 smallest = 1;
1527 else
1528 smallest = 0;
1529
1530 /*
1531 * Compress the remaining entries and zero out the removed stuff.
1532 */
1533 namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
1534 memset((char *)namest, 0, entsize);
1535 xfs_da_log_buf(trans, bp, XFS_DA_LOGRANGE(leaf, namest, entsize));
1536
1537 INT_MOD(hdr->namebytes, ARCH_CONVERT, -(entry->namelen));
1538 tmp = (INT_GET(hdr->count, ARCH_CONVERT) - index) * (uint)sizeof(xfs_dir_leaf_entry_t);
1539 memmove(entry, entry + 1, tmp);
1540 INT_MOD(hdr->count, ARCH_CONVERT, -1);
1541 xfs_da_log_buf(trans, bp,
1542 XFS_DA_LOGRANGE(leaf, entry, tmp + (uint)sizeof(*entry)));
1543 entry = &leaf->entries[INT_GET(hdr->count, ARCH_CONVERT)];
1544 memset((char *)entry, 0, sizeof(xfs_dir_leaf_entry_t));
1545
1546 /*
1547 * If we removed the first entry, re-find the first used byte
1548 * in the name area. Note that if the entry was the "firstused",
1549 * then we don't have a "hole" in our block resulting from
1550 * removing the name.
1551 */
1552 if (smallest) {
1553 tmp = XFS_LBSIZE(mp);
1554 entry = &leaf->entries[0];
1555 for (i = INT_GET(hdr->count, ARCH_CONVERT)-1; i >= 0; entry++, i--) {
1556 ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) >= INT_GET(hdr->firstused, ARCH_CONVERT));
1557 ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) < XFS_LBSIZE(mp));
1558 if (INT_GET(entry->nameidx, ARCH_CONVERT) < tmp)
1559 tmp = INT_GET(entry->nameidx, ARCH_CONVERT);
1560 }
1561 INT_SET(hdr->firstused, ARCH_CONVERT, tmp);
1562 if (!hdr->firstused)
1563 INT_SET(hdr->firstused, ARCH_CONVERT, tmp - 1);
1564 } else {
1565 hdr->holes = 1; /* mark as needing compaction */
1566 }
1567
1568 xfs_da_log_buf(trans, bp, XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
1569
1570 /*
1571 * Check if leaf is less than 50% full, caller may want to
1572 * "join" the leaf with a sibling if so.
1573 */
1574 tmp = (uint)sizeof(xfs_dir_leaf_hdr_t);
1575 tmp += INT_GET(leaf->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t);
1576 tmp += INT_GET(leaf->hdr.count, ARCH_CONVERT) * ((uint)sizeof(xfs_dir_leaf_name_t) - 1);
1577 tmp += INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
1578 if (tmp < mp->m_dir_magicpct)
1579 return 1; /* leaf is < 37% full */
1580 return 0;
1581}
1582
1583/*
1584 * Move all the directory entries from drop_leaf into save_leaf.
1585 */
1586void
1587xfs_dir_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1588 xfs_da_state_blk_t *save_blk)
1589{
1590 xfs_dir_leafblock_t *drop_leaf, *save_leaf, *tmp_leaf;
1591 xfs_dir_leaf_hdr_t *drop_hdr, *save_hdr, *tmp_hdr;
1592 xfs_mount_t *mp;
1593 char *tmpbuffer;
1594
1595 /*
1596 * Set up environment.
1597 */
1598 mp = state->mp;
1599 ASSERT(drop_blk->magic == XFS_DIR_LEAF_MAGIC);
1600 ASSERT(save_blk->magic == XFS_DIR_LEAF_MAGIC);
1601 drop_leaf = drop_blk->bp->data;
1602 save_leaf = save_blk->bp->data;
1603 ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
1604 ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
1605 drop_hdr = &drop_leaf->hdr;
1606 save_hdr = &save_leaf->hdr;
1607
1608 /*
1609 * Save last hashval from dying block for later Btree fixup.
1610 */
1611 drop_blk->hashval = INT_GET(drop_leaf->entries[ drop_leaf->hdr.count-1 ].hashval, ARCH_CONVERT);
1612
1613 /*
1614 * Check if we need a temp buffer, or can we do it in place.
1615 * Note that we don't check "leaf" for holes because we will
1616 * always be dropping it, toosmall() decided that for us already.
1617 */
1618 if (save_hdr->holes == 0) {
1619 /*
1620 * dest leaf has no holes, so we add there. May need
1621 * to make some room in the entry array.
1622 */
1623 if (xfs_dir_leaf_order(save_blk->bp, drop_blk->bp)) {
1624 xfs_dir_leaf_moveents(drop_leaf, 0, save_leaf, 0,
1625 (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
1626 } else {
1627 xfs_dir_leaf_moveents(drop_leaf, 0,
1628 save_leaf, INT_GET(save_hdr->count, ARCH_CONVERT),
1629 (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
1630 }
1631 } else {
1632 /*
1633 * Destination has holes, so we make a temporary copy
1634 * of the leaf and add them both to that.
1635 */
1636 tmpbuffer = kmem_alloc(state->blocksize, KM_SLEEP);
1637 ASSERT(tmpbuffer != NULL);
1638 memset(tmpbuffer, 0, state->blocksize);
1639 tmp_leaf = (xfs_dir_leafblock_t *)tmpbuffer;
1640 tmp_hdr = &tmp_leaf->hdr;
1641 tmp_hdr->info = save_hdr->info; /* struct copy */
1642 tmp_hdr->count = 0;
1643 INT_SET(tmp_hdr->firstused, ARCH_CONVERT, state->blocksize);
1644 if (!tmp_hdr->firstused)
1645 INT_SET(tmp_hdr->firstused, ARCH_CONVERT, state->blocksize - 1);
1646 tmp_hdr->namebytes = 0;
1647 if (xfs_dir_leaf_order(save_blk->bp, drop_blk->bp)) {
1648 xfs_dir_leaf_moveents(drop_leaf, 0, tmp_leaf, 0,
1649 (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
1650 xfs_dir_leaf_moveents(save_leaf, 0,
1651 tmp_leaf, INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT),
1652 (int)INT_GET(save_hdr->count, ARCH_CONVERT), mp);
1653 } else {
1654 xfs_dir_leaf_moveents(save_leaf, 0, tmp_leaf, 0,
1655 (int)INT_GET(save_hdr->count, ARCH_CONVERT), mp);
1656 xfs_dir_leaf_moveents(drop_leaf, 0,
1657 tmp_leaf, INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT),
1658 (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
1659 }
1660 memcpy(save_leaf, tmp_leaf, state->blocksize);
1661 kmem_free(tmpbuffer, state->blocksize);
1662 }
1663
1664 xfs_da_log_buf(state->args->trans, save_blk->bp, 0,
1665 state->blocksize - 1);
1666
1667 /*
1668 * Copy out last hashval in each block for B-tree code.
1669 */
1670 save_blk->hashval = INT_GET(save_leaf->entries[ INT_GET(save_leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1671}
1672
1673/*========================================================================
1674 * Routines used for finding things in the Btree.
1675 *========================================================================*/
1676
1677/*
1678 * Look up a name in a leaf directory structure.
1679 * This is the internal routine, it uses the caller's buffer.
1680 *
1681 * Note that duplicate keys are allowed, but only check within the
1682 * current leaf node. The Btree code must check in adjacent leaf nodes.
1683 *
1684 * Return in *index the index into the entry[] array of either the found
1685 * entry, or where the entry should have been (insert before that entry).
1686 *
1687 * Don't change the args->inumber unless we find the filename.
1688 */
1689int
1690xfs_dir_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args, int *index)
1691{
1692 xfs_dir_leafblock_t *leaf;
1693 xfs_dir_leaf_entry_t *entry;
1694 xfs_dir_leaf_name_t *namest;
1695 int probe, span;
1696 xfs_dahash_t hashval;
1697
1698 leaf = bp->data;
1699 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
1700 ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) < (XFS_LBSIZE(args->dp->i_mount)/8));
1701
1702 /*
1703 * Binary search. (note: small blocks will skip this loop)
1704 */
1705 hashval = args->hashval;
1706 probe = span = INT_GET(leaf->hdr.count, ARCH_CONVERT) / 2;
1707 for (entry = &leaf->entries[probe]; span > 4;
1708 entry = &leaf->entries[probe]) {
1709 span /= 2;
1710 if (INT_GET(entry->hashval, ARCH_CONVERT) < hashval)
1711 probe += span;
1712 else if (INT_GET(entry->hashval, ARCH_CONVERT) > hashval)
1713 probe -= span;
1714 else
1715 break;
1716 }
1717 ASSERT((probe >= 0) && \
1718 ((!leaf->hdr.count) || (probe < INT_GET(leaf->hdr.count, ARCH_CONVERT))));
1719 ASSERT((span <= 4) || (INT_GET(entry->hashval, ARCH_CONVERT) == hashval));
1720
1721 /*
1722 * Since we may have duplicate hashval's, find the first matching
1723 * hashval in the leaf.
1724 */
1725 while ((probe > 0) && (INT_GET(entry->hashval, ARCH_CONVERT) >= hashval)) {
1726 entry--;
1727 probe--;
1728 }
1729 while ((probe < INT_GET(leaf->hdr.count, ARCH_CONVERT)) && (INT_GET(entry->hashval, ARCH_CONVERT) < hashval)) {
1730 entry++;
1731 probe++;
1732 }
1733 if ((probe == INT_GET(leaf->hdr.count, ARCH_CONVERT)) || (INT_GET(entry->hashval, ARCH_CONVERT) != hashval)) {
1734 *index = probe;
1735 ASSERT(args->oknoent);
1736 return XFS_ERROR(ENOENT);
1737 }
1738
1739 /*
1740 * Duplicate keys may be present, so search all of them for a match.
1741 */
1742 while ((probe < INT_GET(leaf->hdr.count, ARCH_CONVERT)) && (INT_GET(entry->hashval, ARCH_CONVERT) == hashval)) {
1743 namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
1744 if (entry->namelen == args->namelen &&
1745 namest->name[0] == args->name[0] &&
1746 memcmp(args->name, namest->name, args->namelen) == 0) {
1747 XFS_DIR_SF_GET_DIRINO(&namest->inumber, &args->inumber);
1748 *index = probe;
1749 return XFS_ERROR(EEXIST);
1750 }
1751 entry++;
1752 probe++;
1753 }
1754 *index = probe;
1755 ASSERT(probe == INT_GET(leaf->hdr.count, ARCH_CONVERT) || args->oknoent);
1756 return XFS_ERROR(ENOENT);
1757}
1758
1759/*========================================================================
1760 * Utility routines.
1761 *========================================================================*/
1762
1763/*
1764 * Move the indicated entries from one leaf to another.
1765 * NOTE: this routine modifies both source and destination leaves.
1766 */
1767/* ARGSUSED */
1768STATIC void
1769xfs_dir_leaf_moveents(xfs_dir_leafblock_t *leaf_s, int start_s,
1770 xfs_dir_leafblock_t *leaf_d, int start_d,
1771 int count, xfs_mount_t *mp)
1772{
1773 xfs_dir_leaf_hdr_t *hdr_s, *hdr_d;
1774 xfs_dir_leaf_entry_t *entry_s, *entry_d;
1775 int tmp, i;
1776
1777 /*
1778 * Check for nothing to do.
1779 */
1780 if (count == 0)
1781 return;
1782
1783 /*
1784 * Set up environment.
1785 */
1786 ASSERT(be16_to_cpu(leaf_s->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
1787 ASSERT(be16_to_cpu(leaf_d->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
1788 hdr_s = &leaf_s->hdr;
1789 hdr_d = &leaf_d->hdr;
1790 ASSERT((INT_GET(hdr_s->count, ARCH_CONVERT) > 0) && (INT_GET(hdr_s->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8)));
1791 ASSERT(INT_GET(hdr_s->firstused, ARCH_CONVERT) >=
1792 ((INT_GET(hdr_s->count, ARCH_CONVERT)*sizeof(*entry_s))+sizeof(*hdr_s)));
1793 ASSERT(INT_GET(hdr_d->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8));
1794 ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >=
1795 ((INT_GET(hdr_d->count, ARCH_CONVERT)*sizeof(*entry_d))+sizeof(*hdr_d)));
1796
1797 ASSERT(start_s < INT_GET(hdr_s->count, ARCH_CONVERT));
1798 ASSERT(start_d <= INT_GET(hdr_d->count, ARCH_CONVERT));
1799 ASSERT(count <= INT_GET(hdr_s->count, ARCH_CONVERT));
1800
1801 /*
1802 * Move the entries in the destination leaf up to make a hole?
1803 */
1804 if (start_d < INT_GET(hdr_d->count, ARCH_CONVERT)) {
1805 tmp = INT_GET(hdr_d->count, ARCH_CONVERT) - start_d;
1806 tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
1807 entry_s = &leaf_d->entries[start_d];
1808 entry_d = &leaf_d->entries[start_d + count];
1809 memcpy(entry_d, entry_s, tmp);
1810 }
1811
1812 /*
1813 * Copy all entry's in the same (sorted) order,
1814 * but allocate filenames packed and in sequence.
1815 */
1816 entry_s = &leaf_s->entries[start_s];
1817 entry_d = &leaf_d->entries[start_d];
1818 for (i = 0; i < count; entry_s++, entry_d++, i++) {
1819 ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) >= INT_GET(hdr_s->firstused, ARCH_CONVERT));
1820 tmp = XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry_s);
1821 INT_MOD(hdr_d->firstused, ARCH_CONVERT, -(tmp));
1822 entry_d->hashval = entry_s->hashval; /* INT_: direct copy */
1823 INT_COPY(entry_d->nameidx, hdr_d->firstused, ARCH_CONVERT);
1824 entry_d->namelen = entry_s->namelen;
1825 ASSERT(INT_GET(entry_d->nameidx, ARCH_CONVERT) + tmp <= XFS_LBSIZE(mp));
1826 memcpy(XFS_DIR_LEAF_NAMESTRUCT(leaf_d, INT_GET(entry_d->nameidx, ARCH_CONVERT)),
1827 XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)), tmp);
1828 ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) + tmp <= XFS_LBSIZE(mp));
1829 memset((char *)XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)),
1830 0, tmp);
1831 INT_MOD(hdr_s->namebytes, ARCH_CONVERT, -(entry_d->namelen));
1832 INT_MOD(hdr_d->namebytes, ARCH_CONVERT, entry_d->namelen);
1833 INT_MOD(hdr_s->count, ARCH_CONVERT, -1);
1834 INT_MOD(hdr_d->count, ARCH_CONVERT, +1);
1835 tmp = INT_GET(hdr_d->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t)
1836 + (uint)sizeof(xfs_dir_leaf_hdr_t);
1837 ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >= tmp);
1838
1839 }
1840
1841 /*
1842 * Zero out the entries we just copied.
1843 */
1844 if (start_s == INT_GET(hdr_s->count, ARCH_CONVERT)) {
1845 tmp = count * (uint)sizeof(xfs_dir_leaf_entry_t);
1846 entry_s = &leaf_s->entries[start_s];
1847 ASSERT((char *)entry_s + tmp <= (char *)leaf_s + XFS_LBSIZE(mp));
1848 memset((char *)entry_s, 0, tmp);
1849 } else {
1850 /*
1851 * Move the remaining entries down to fill the hole,
1852 * then zero the entries at the top.
1853 */
1854 tmp = INT_GET(hdr_s->count, ARCH_CONVERT) - count;
1855 tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
1856 entry_s = &leaf_s->entries[start_s + count];
1857 entry_d = &leaf_s->entries[start_s];
1858 memcpy(entry_d, entry_s, tmp);
1859
1860 tmp = count * (uint)sizeof(xfs_dir_leaf_entry_t);
1861 entry_s = &leaf_s->entries[INT_GET(hdr_s->count, ARCH_CONVERT)];
1862 ASSERT((char *)entry_s + tmp <= (char *)leaf_s + XFS_LBSIZE(mp));
1863 memset((char *)entry_s, 0, tmp);
1864 }
1865
1866 /*
1867 * Fill in the freemap information
1868 */
1869 INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT, (uint)sizeof(xfs_dir_leaf_hdr_t));
1870 INT_MOD(hdr_d->freemap[0].base, ARCH_CONVERT, INT_GET(hdr_d->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t));
1871 INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT, INT_GET(hdr_d->firstused, ARCH_CONVERT) - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT));
1872 INT_SET(hdr_d->freemap[1].base, ARCH_CONVERT, (hdr_d->freemap[2].base = 0));
1873 INT_SET(hdr_d->freemap[1].size, ARCH_CONVERT, (hdr_d->freemap[2].size = 0));
1874 hdr_s->holes = 1; /* leaf may not be compact */
1875}
1876
1877/*
1878 * Compare two leaf blocks "order".
1879 */
1880int
1881xfs_dir_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
1882{
1883 xfs_dir_leafblock_t *leaf1, *leaf2;
1884
1885 leaf1 = leaf1_bp->data;
1886 leaf2 = leaf2_bp->data;
1887 ASSERT((be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR_LEAF_MAGIC) &&
1888 (be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR_LEAF_MAGIC));
1889 if ((INT_GET(leaf1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(leaf2->hdr.count, ARCH_CONVERT) > 0) &&
1890 ((INT_GET(leaf2->entries[ 0 ].hashval, ARCH_CONVERT) <
1891 INT_GET(leaf1->entries[ 0 ].hashval, ARCH_CONVERT)) ||
1892 (INT_GET(leaf2->entries[ INT_GET(leaf2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
1893 INT_GET(leaf1->entries[ INT_GET(leaf1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
1894 return 1;
1895 }
1896 return 0;
1897}
1898
1899/*
1900 * Pick up the last hashvalue from a leaf block.
1901 */
1902xfs_dahash_t
1903xfs_dir_leaf_lasthash(xfs_dabuf_t *bp, int *count)
1904{
1905 xfs_dir_leafblock_t *leaf;
1906
1907 leaf = bp->data;
1908 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
1909 if (count)
1910 *count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
1911 if (!leaf->hdr.count)
1912 return(0);
1913 return(INT_GET(leaf->entries[ INT_GET(leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
1914}
1915
1916/*
1917 * Copy out directory entries for getdents(), for leaf directories.
1918 */
1919int
1920xfs_dir_leaf_getdents_int(
1921 xfs_dabuf_t *bp,
1922 xfs_inode_t *dp,
1923 xfs_dablk_t bno,
1924 uio_t *uio,
1925 int *eobp,
1926 xfs_dirent_t *dbp,
1927 xfs_dir_put_t put,
1928 xfs_daddr_t nextda)
1929{
1930 xfs_dir_leafblock_t *leaf;
1931 xfs_dir_leaf_entry_t *entry;
1932 xfs_dir_leaf_name_t *namest;
1933 int entno, want_entno, i, nextentno;
1934 xfs_mount_t *mp;
1935 xfs_dahash_t cookhash;
1936 xfs_dahash_t nexthash = 0;
1937#if (BITS_PER_LONG == 32)
1938 xfs_dahash_t lasthash = XFS_DA_MAXHASH;
1939#endif
1940 xfs_dir_put_args_t p;
1941
1942 mp = dp->i_mount;
1943 leaf = bp->data;
1944 if (be16_to_cpu(leaf->hdr.info.magic) != XFS_DIR_LEAF_MAGIC) {
1945 *eobp = 1;
1946 return XFS_ERROR(ENOENT); /* XXX wrong code */
1947 }
1948
1949 want_entno = XFS_DA_COOKIE_ENTRY(mp, uio->uio_offset);
1950
1951 cookhash = XFS_DA_COOKIE_HASH(mp, uio->uio_offset);
1952
1953 xfs_dir_trace_g_dul("leaf: start", dp, uio, leaf);
1954
1955 /*
1956 * Re-find our place.
1957 */
1958 for (i = entno = 0, entry = &leaf->entries[0];
1959 i < INT_GET(leaf->hdr.count, ARCH_CONVERT);
1960 entry++, i++) {
1961
1962 namest = XFS_DIR_LEAF_NAMESTRUCT(leaf,
1963 INT_GET(entry->nameidx, ARCH_CONVERT));
1964
1965 if (unlikely(
1966 ((char *)namest < (char *)leaf) ||
1967 ((char *)namest >= (char *)leaf + XFS_LBSIZE(mp)))) {
1968 XFS_CORRUPTION_ERROR("xfs_dir_leaf_getdents_int(1)",
1969 XFS_ERRLEVEL_LOW, mp, leaf);
1970 xfs_dir_trace_g_du("leaf: corrupted", dp, uio);
1971 return XFS_ERROR(EFSCORRUPTED);
1972 }
1973 if (INT_GET(entry->hashval, ARCH_CONVERT) >= cookhash) {
1974 if ( entno < want_entno
1975 && INT_GET(entry->hashval, ARCH_CONVERT)
1976 == cookhash) {
1977 /*
1978 * Trying to get to a particular offset in a
1979 * run of equal-hashval entries.
1980 */
1981 entno++;
1982 } else if ( want_entno > 0
1983 && entno == want_entno
1984 && INT_GET(entry->hashval, ARCH_CONVERT)
1985 == cookhash) {
1986 break;
1987 } else {
1988 entno = 0;
1989 break;
1990 }
1991 }
1992 }
1993
1994 if (i == INT_GET(leaf->hdr.count, ARCH_CONVERT)) {
1995 xfs_dir_trace_g_du("leaf: hash not found", dp, uio);
1996 if (!leaf->hdr.info.forw)
1997 uio->uio_offset =
1998 XFS_DA_MAKE_COOKIE(mp, 0, 0, XFS_DA_MAXHASH);
1999 /*
2000 * Don't set uio_offset if there's another block:
2001 * the node code will be setting uio_offset anyway.
2002 */
2003 *eobp = 0;
2004 return 0;
2005 }
2006 xfs_dir_trace_g_due("leaf: hash found", dp, uio, entry);
2007
2008 p.dbp = dbp;
2009 p.put = put;
2010 p.uio = uio;
2011
2012 /*
2013 * We're synchronized, start copying entries out to the user.
2014 */
2015 for (; entno >= 0 && i < INT_GET(leaf->hdr.count, ARCH_CONVERT);
2016 entry++, i++, (entno = nextentno)) {
2017 int lastresid=0, retval;
2018 xfs_dircook_t lastoffset;
2019 xfs_dahash_t thishash;
2020
2021 /*
2022 * Check for a damaged directory leaf block and pick up
2023 * the inode number from this entry.
2024 */
2025 namest = XFS_DIR_LEAF_NAMESTRUCT(leaf,
2026 INT_GET(entry->nameidx, ARCH_CONVERT));
2027
2028 if (unlikely(
2029 ((char *)namest < (char *)leaf) ||
2030 ((char *)namest >= (char *)leaf + XFS_LBSIZE(mp)))) {
2031 XFS_CORRUPTION_ERROR("xfs_dir_leaf_getdents_int(2)",
2032 XFS_ERRLEVEL_LOW, mp, leaf);
2033 xfs_dir_trace_g_du("leaf: corrupted", dp, uio);
2034 return XFS_ERROR(EFSCORRUPTED);
2035 }
2036
2037 xfs_dir_trace_g_duc("leaf: middle cookie ",
2038 dp, uio, p.cook.o);
2039
2040 if (i < (INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1)) {
2041 nexthash = INT_GET(entry[1].hashval, ARCH_CONVERT);
2042
2043 if (nexthash == INT_GET(entry->hashval, ARCH_CONVERT))
2044 nextentno = entno + 1;
2045 else
2046 nextentno = 0;
2047 XFS_PUT_COOKIE(p.cook, mp, bno, nextentno, nexthash);
2048 xfs_dir_trace_g_duc("leaf: middle cookie ",
2049 dp, uio, p.cook.o);
2050
2051 } else if ((thishash = be32_to_cpu(leaf->hdr.info.forw))) {
2052 xfs_dabuf_t *bp2;
2053 xfs_dir_leafblock_t *leaf2;
2054
2055 ASSERT(nextda != -1);
2056
2057 retval = xfs_da_read_buf(dp->i_transp, dp, thishash,
2058 nextda, &bp2, XFS_DATA_FORK);
2059 if (retval)
2060 return retval;
2061
2062 ASSERT(bp2 != NULL);
2063
2064 leaf2 = bp2->data;
2065
2066 if (unlikely(
2067 (be16_to_cpu(leaf2->hdr.info.magic)
2068 != XFS_DIR_LEAF_MAGIC)
2069 || (be32_to_cpu(leaf2->hdr.info.back)
2070 != bno))) { /* GROT */
2071 XFS_CORRUPTION_ERROR("xfs_dir_leaf_getdents_int(3)",
2072 XFS_ERRLEVEL_LOW, mp,
2073 leaf2);
2074 xfs_da_brelse(dp->i_transp, bp2);
2075
2076 return XFS_ERROR(EFSCORRUPTED);
2077 }
2078
2079 nexthash = INT_GET(leaf2->entries[0].hashval,
2080 ARCH_CONVERT);
2081 nextentno = -1;
2082 XFS_PUT_COOKIE(p.cook, mp, thishash, 0, nexthash);
2083 xfs_da_brelse(dp->i_transp, bp2);
2084 xfs_dir_trace_g_duc("leaf: next blk cookie",
2085 dp, uio, p.cook.o);
2086 } else {
2087 nextentno = -1;
2088 XFS_PUT_COOKIE(p.cook, mp, 0, 0, XFS_DA_MAXHASH);
2089 }
2090
2091 /*
2092 * Save off the cookie so we can fall back should the
2093 * 'put' into the outgoing buffer fails. To handle a run
2094 * of equal-hashvals, the off_t structure on 64bit
2095 * builds has entno built into the cookie to ID the
2096 * entry. On 32bit builds, we only have space for the
2097 * hashval so we can't ID specific entries within a group
2098 * of same hashval entries. For this, lastoffset is set
2099 * to the first in the run of equal hashvals so we don't
2100 * include any entries unless we can include all entries
2101 * that share the same hashval. Hopefully the buffer
2102 * provided is big enough to handle it (see pv763517).
2103 */
2104#if (BITS_PER_LONG == 32)
2105 if ((thishash = INT_GET(entry->hashval, ARCH_CONVERT))
2106 != lasthash) {
2107 XFS_PUT_COOKIE(lastoffset, mp, bno, entno, thishash);
2108 lastresid = uio->uio_resid;
2109 lasthash = thishash;
2110 } else {
2111 xfs_dir_trace_g_duc("leaf: DUP COOKIES, skipped",
2112 dp, uio, p.cook.o);
2113 }
2114#else
2115 thishash = INT_GET(entry->hashval, ARCH_CONVERT);
2116 XFS_PUT_COOKIE(lastoffset, mp, bno, entno, thishash);
2117 lastresid = uio->uio_resid;
2118#endif /* BITS_PER_LONG == 32 */
2119
2120 /*
2121 * Put the current entry into the outgoing buffer. If we fail
2122 * then restore the UIO to the first entry in the current
2123 * run of equal-hashval entries (probably one 1 entry long).
2124 */
2125 p.ino = XFS_GET_DIR_INO8(namest->inumber);
2126#if XFS_BIG_INUMS
2127 p.ino += mp->m_inoadd;
2128#endif
2129 p.name = (char *)namest->name;
2130 p.namelen = entry->namelen;
2131
2132 retval = p.put(&p);
2133
2134 if (!p.done) {
2135 uio->uio_offset = lastoffset.o;
2136 uio->uio_resid = lastresid;
2137
2138 *eobp = 1;
2139
2140 xfs_dir_trace_g_du("leaf: E-O-B", dp, uio);
2141
2142 return retval;
2143 }
2144 }
2145
2146 uio->uio_offset = p.cook.o;
2147
2148 *eobp = 0;
2149
2150 xfs_dir_trace_g_du("leaf: E-O-F", dp, uio);
2151
2152 return 0;
2153}
2154
2155/*
2156 * Format a dirent64 structure and copy it out the the user's buffer.
2157 */
2158int
2159xfs_dir_put_dirent64_direct(xfs_dir_put_args_t *pa)
2160{
2161 iovec_t *iovp;
2162 int reclen, namelen;
2163 xfs_dirent_t *idbp;
2164 uio_t *uio;
2165
2166 namelen = pa->namelen;
2167 reclen = DIRENTSIZE(namelen);
2168 uio = pa->uio;
2169 if (reclen > uio->uio_resid) {
2170 pa->done = 0;
2171 return 0;
2172 }
2173 iovp = uio->uio_iov;
2174 idbp = (xfs_dirent_t *)iovp->iov_base;
2175 iovp->iov_base = (char *)idbp + reclen;
2176 iovp->iov_len -= reclen;
2177 uio->uio_resid -= reclen;
2178 idbp->d_reclen = reclen;
2179 idbp->d_ino = pa->ino;
2180 idbp->d_off = pa->cook.o;
2181 idbp->d_name[namelen] = '\0';
2182 pa->done = 1;
2183 memcpy(idbp->d_name, pa->name, namelen);
2184 return 0;
2185}
2186
2187/*
2188 * Format a dirent64 structure and copy it out the the user's buffer.
2189 */
2190int
2191xfs_dir_put_dirent64_uio(xfs_dir_put_args_t *pa)
2192{
2193 int retval, reclen, namelen;
2194 xfs_dirent_t *idbp;
2195 uio_t *uio;
2196
2197 namelen = pa->namelen;
2198 reclen = DIRENTSIZE(namelen);
2199 uio = pa->uio;
2200 if (reclen > uio->uio_resid) {
2201 pa->done = 0;
2202 return 0;
2203 }
2204 idbp = pa->dbp;
2205 idbp->d_reclen = reclen;
2206 idbp->d_ino = pa->ino;
2207 idbp->d_off = pa->cook.o;
2208 idbp->d_name[namelen] = '\0';
2209 memcpy(idbp->d_name, pa->name, namelen);
2210 retval = uio_read((caddr_t)idbp, reclen, uio);
2211 pa->done = (retval == 0);
2212 return retval;
2213}
diff --git a/fs/xfs/xfs_dir_leaf.h b/fs/xfs/xfs_dir_leaf.h
deleted file mode 100644
index eb8cd9a4667f..000000000000
--- a/fs/xfs/xfs_dir_leaf.h
+++ /dev/null
@@ -1,231 +0,0 @@
1/*
2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_DIR_LEAF_H__
19#define __XFS_DIR_LEAF_H__
20
21/*
22 * Directory layout, internal structure, access macros, etc.
23 *
24 * Large directories are structured around Btrees where all the data
25 * elements are in the leaf nodes. Filenames are hashed into an int,
26 * then that int is used as the index into the Btree. Since the hashval
27 * of a filename may not be unique, we may have duplicate keys. The
28 * internal links in the Btree are logical block offsets into the file.
29 */
30
31struct uio;
32struct xfs_bmap_free;
33struct xfs_dabuf;
34struct xfs_da_args;
35struct xfs_da_state;
36struct xfs_da_state_blk;
37struct xfs_dir_put_args;
38struct xfs_inode;
39struct xfs_mount;
40struct xfs_trans;
41
42/*========================================================================
43 * Directory Structure when equal to XFS_LBSIZE(mp) bytes.
44 *========================================================================*/
45
46/*
47 * This is the structure of the leaf nodes in the Btree.
48 *
49 * Struct leaf_entry's are packed from the top. Names grow from the bottom
50 * but are not packed. The freemap contains run-length-encoded entries
51 * for the free bytes after the leaf_entry's, but only the N largest such,
52 * smaller runs are dropped. When the freemap doesn't show enough space
53 * for an allocation, we compact the namelist area and try again. If we
54 * still don't have enough space, then we have to split the block.
55 *
56 * Since we have duplicate hash keys, for each key that matches, compare
57 * the actual string. The root and intermediate node search always takes
58 * the first-in-the-block key match found, so we should only have to work
59 * "forw"ard. If none matches, continue with the "forw"ard leaf nodes
60 * until the hash key changes or the filename is found.
61 *
62 * The parent directory and the self-pointer are explicitly represented
63 * (ie: there are entries for "." and "..").
64 *
65 * Note that the count being a __uint16_t limits us to something like a
66 * blocksize of 1.3MB in the face of worst case (short) filenames.
67 */
68#define XFS_DIR_LEAF_MAPSIZE 3 /* how many freespace slots */
69
70typedef struct xfs_dir_leaf_map { /* RLE map of free bytes */
71 __uint16_t base; /* base of free region */
72 __uint16_t size; /* run length of free region */
73} xfs_dir_leaf_map_t;
74
75typedef struct xfs_dir_leaf_hdr { /* constant-structure header block */
76 xfs_da_blkinfo_t info; /* block type, links, etc. */
77 __uint16_t count; /* count of active leaf_entry's */
78 __uint16_t namebytes; /* num bytes of name strings stored */
79 __uint16_t firstused; /* first used byte in name area */
80 __uint8_t holes; /* != 0 if blk needs compaction */
81 __uint8_t pad1;
82 xfs_dir_leaf_map_t freemap[XFS_DIR_LEAF_MAPSIZE];
83} xfs_dir_leaf_hdr_t;
84
85typedef struct xfs_dir_leaf_entry { /* sorted on key, not name */
86 xfs_dahash_t hashval; /* hash value of name */
87 __uint16_t nameidx; /* index into buffer of name */
88 __uint8_t namelen; /* length of name string */
89 __uint8_t pad2;
90} xfs_dir_leaf_entry_t;
91
92typedef struct xfs_dir_leaf_name {
93 xfs_dir_ino_t inumber; /* inode number for this key */
94 __uint8_t name[1]; /* name string itself */
95} xfs_dir_leaf_name_t;
96
97typedef struct xfs_dir_leafblock {
98 xfs_dir_leaf_hdr_t hdr; /* constant-structure header block */
99 xfs_dir_leaf_entry_t entries[1]; /* var sized array */
100 xfs_dir_leaf_name_t namelist[1]; /* grows from bottom of buf */
101} xfs_dir_leafblock_t;
102
103/*
104 * Length of name for which a 512-byte block filesystem
105 * can get a double split.
106 */
107#define XFS_DIR_LEAF_CAN_DOUBLE_SPLIT_LEN \
108 (512 - (uint)sizeof(xfs_dir_leaf_hdr_t) - \
109 (uint)sizeof(xfs_dir_leaf_entry_t) * 2 - \
110 (uint)sizeof(xfs_dir_leaf_name_t) * 2 - (MAXNAMELEN - 2) + 1 + 1)
111
112typedef int (*xfs_dir_put_t)(struct xfs_dir_put_args *pa);
113
114typedef union {
115 xfs_off_t o; /* offset (cookie) */
116 /*
117 * Watch the order here (endian-ness dependent).
118 */
119 struct {
120#ifndef XFS_NATIVE_HOST
121 xfs_dahash_t h; /* hash value */
122 __uint32_t be; /* block and entry */
123#else
124 __uint32_t be; /* block and entry */
125 xfs_dahash_t h; /* hash value */
126#endif /* XFS_NATIVE_HOST */
127 } s;
128} xfs_dircook_t;
129
130#define XFS_PUT_COOKIE(c,mp,bno,entry,hash) \
131 ((c).s.be = XFS_DA_MAKE_BNOENTRY(mp, bno, entry), (c).s.h = (hash))
132
133typedef struct xfs_dir_put_args {
134 xfs_dircook_t cook; /* cookie of (next) entry */
135 xfs_intino_t ino; /* inode number */
136 struct xfs_dirent *dbp; /* buffer pointer */
137 char *name; /* directory entry name */
138 int namelen; /* length of name */
139 int done; /* output: set if value was stored */
140 xfs_dir_put_t put; /* put function ptr (i/o) */
141 struct uio *uio; /* uio control structure */
142} xfs_dir_put_args_t;
143
144#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len) \
145 xfs_dir_leaf_entsize_byname(len)
146static inline int xfs_dir_leaf_entsize_byname(int len)
147{
148 return (uint)sizeof(xfs_dir_leaf_name_t)-1 + len;
149}
150
151#define XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry) \
152 xfs_dir_leaf_entsize_byentry(entry)
153static inline int xfs_dir_leaf_entsize_byentry(xfs_dir_leaf_entry_t *entry)
154{
155 return (uint)sizeof(xfs_dir_leaf_name_t)-1 + (entry)->namelen;
156}
157
158#define XFS_DIR_LEAF_NAMESTRUCT(leafp,offset) \
159 xfs_dir_leaf_namestruct(leafp,offset)
160static inline xfs_dir_leaf_name_t *
161xfs_dir_leaf_namestruct(xfs_dir_leafblock_t *leafp, int offset)
162{
163 return (xfs_dir_leaf_name_t *)&((char *)(leafp))[offset];
164}
165
166/*========================================================================
167 * Function prototypes for the kernel.
168 *========================================================================*/
169
170/*
171 * Internal routines when dirsize < XFS_LITINO(mp).
172 */
173int xfs_dir_shortform_create(struct xfs_da_args *args, xfs_ino_t parent);
174int xfs_dir_shortform_addname(struct xfs_da_args *args);
175int xfs_dir_shortform_lookup(struct xfs_da_args *args);
176int xfs_dir_shortform_to_leaf(struct xfs_da_args *args);
177int xfs_dir_shortform_removename(struct xfs_da_args *args);
178int xfs_dir_shortform_getdents(struct xfs_inode *dp, struct uio *uio, int *eofp,
179 struct xfs_dirent *dbp, xfs_dir_put_t put);
180int xfs_dir_shortform_replace(struct xfs_da_args *args);
181
182/*
183 * Internal routines when dirsize == XFS_LBSIZE(mp).
184 */
185int xfs_dir_leaf_to_node(struct xfs_da_args *args);
186int xfs_dir_leaf_to_shortform(struct xfs_da_args *args);
187
188/*
189 * Routines used for growing the Btree.
190 */
191int xfs_dir_leaf_split(struct xfs_da_state *state,
192 struct xfs_da_state_blk *oldblk,
193 struct xfs_da_state_blk *newblk);
194int xfs_dir_leaf_add(struct xfs_dabuf *leaf_buffer,
195 struct xfs_da_args *args, int insertion_index);
196int xfs_dir_leaf_addname(struct xfs_da_args *args);
197int xfs_dir_leaf_lookup_int(struct xfs_dabuf *leaf_buffer,
198 struct xfs_da_args *args,
199 int *index_found_at);
200int xfs_dir_leaf_remove(struct xfs_trans *trans,
201 struct xfs_dabuf *leaf_buffer,
202 int index_to_remove);
203int xfs_dir_leaf_getdents_int(struct xfs_dabuf *bp, struct xfs_inode *dp,
204 xfs_dablk_t bno, struct uio *uio,
205 int *eobp, struct xfs_dirent *dbp,
206 xfs_dir_put_t put, xfs_daddr_t nextda);
207
208/*
209 * Routines used for shrinking the Btree.
210 */
211int xfs_dir_leaf_toosmall(struct xfs_da_state *state, int *retval);
212void xfs_dir_leaf_unbalance(struct xfs_da_state *state,
213 struct xfs_da_state_blk *drop_blk,
214 struct xfs_da_state_blk *save_blk);
215
216/*
217 * Utility routines.
218 */
219uint xfs_dir_leaf_lasthash(struct xfs_dabuf *bp, int *count);
220int xfs_dir_leaf_order(struct xfs_dabuf *leaf1_bp,
221 struct xfs_dabuf *leaf2_bp);
222int xfs_dir_put_dirent64_direct(xfs_dir_put_args_t *pa);
223int xfs_dir_put_dirent64_uio(xfs_dir_put_args_t *pa);
224int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
225
226/*
227 * Global data.
228 */
229extern xfs_dahash_t xfs_dir_hash_dot, xfs_dir_hash_dotdot;
230
231#endif /* __XFS_DIR_LEAF_H__ */
diff --git a/fs/xfs/xfs_dir_sf.h b/fs/xfs/xfs_dir_sf.h
deleted file mode 100644
index 5b20b4d3f57d..000000000000
--- a/fs/xfs/xfs_dir_sf.h
+++ /dev/null
@@ -1,155 +0,0 @@
1/*
2 * Copyright (c) 2000,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_DIR_SF_H__
19#define __XFS_DIR_SF_H__
20
21/*
22 * Directory layout when stored internal to an inode.
23 *
24 * Small directories are packed as tightly as possible so as to
25 * fit into the literal area of the inode.
26 */
27
28typedef struct { __uint8_t i[sizeof(xfs_ino_t)]; } xfs_dir_ino_t;
29
30/*
31 * The parent directory has a dedicated field, and the self-pointer must
32 * be calculated on the fly.
33 *
34 * Entries are packed toward the top as tight as possible. The header
35 * and the elements much be memcpy'd out into a work area to get correct
36 * alignment for the inode number fields.
37 */
38typedef struct xfs_dir_sf_hdr { /* constant-structure header block */
39 xfs_dir_ino_t parent; /* parent dir inode number */
40 __uint8_t count; /* count of active entries */
41} xfs_dir_sf_hdr_t;
42
43typedef struct xfs_dir_sf_entry {
44 xfs_dir_ino_t inumber; /* referenced inode number */
45 __uint8_t namelen; /* actual length of name (no NULL) */
46 __uint8_t name[1]; /* name */
47} xfs_dir_sf_entry_t;
48
49typedef struct xfs_dir_shortform {
50 xfs_dir_sf_hdr_t hdr;
51 xfs_dir_sf_entry_t list[1]; /* variable sized array */
52} xfs_dir_shortform_t;
53
54/*
55 * We generate this then sort it, so that readdirs are returned in
56 * hash-order. Else seekdir won't work.
57 */
58typedef struct xfs_dir_sf_sort {
59 __uint8_t entno; /* .=0, ..=1, else entry# + 2 */
60 __uint8_t seqno; /* sequence # with same hash value */
61 __uint8_t namelen; /* length of name value (no null) */
62 xfs_dahash_t hash; /* this entry's hash value */
63 xfs_intino_t ino; /* this entry's inode number */
64 char *name; /* name value, pointer into buffer */
65} xfs_dir_sf_sort_t;
66
67#define XFS_DIR_SF_GET_DIRINO(from,to) xfs_dir_sf_get_dirino(from, to)
68static inline void xfs_dir_sf_get_dirino(xfs_dir_ino_t *from, xfs_ino_t *to)
69{
70 *(to) = XFS_GET_DIR_INO8(*from);
71}
72
73#define XFS_DIR_SF_PUT_DIRINO(from,to) xfs_dir_sf_put_dirino(from, to)
74static inline void xfs_dir_sf_put_dirino(xfs_ino_t *from, xfs_dir_ino_t *to)
75{
76 XFS_PUT_DIR_INO8(*(from), *(to));
77}
78
79#define XFS_DIR_SF_ENTSIZE_BYNAME(len) xfs_dir_sf_entsize_byname(len)
80static inline int xfs_dir_sf_entsize_byname(int len)
81{
82 return (uint)sizeof(xfs_dir_sf_entry_t)-1 + (len);
83}
84
85#define XFS_DIR_SF_ENTSIZE_BYENTRY(sfep) xfs_dir_sf_entsize_byentry(sfep)
86static inline int xfs_dir_sf_entsize_byentry(xfs_dir_sf_entry_t *sfep)
87{
88 return (uint)sizeof(xfs_dir_sf_entry_t)-1 + (sfep)->namelen;
89}
90
91#define XFS_DIR_SF_NEXTENTRY(sfep) xfs_dir_sf_nextentry(sfep)
92static inline xfs_dir_sf_entry_t *xfs_dir_sf_nextentry(xfs_dir_sf_entry_t *sfep)
93{
94 return (xfs_dir_sf_entry_t *) \
95 ((char *)(sfep) + XFS_DIR_SF_ENTSIZE_BYENTRY(sfep));
96}
97
98#define XFS_DIR_SF_ALLFIT(count,totallen) \
99 xfs_dir_sf_allfit(count,totallen)
100static inline int xfs_dir_sf_allfit(int count, int totallen)
101{
102 return ((uint)sizeof(xfs_dir_sf_hdr_t) + \
103 ((uint)sizeof(xfs_dir_sf_entry_t)-1)*(count) + (totallen));
104}
105
106#if defined(XFS_DIR_TRACE)
107
108/*
109 * Kernel tracing support for directories.
110 */
111struct uio;
112struct xfs_inode;
113struct xfs_da_intnode;
114struct xfs_dinode;
115struct xfs_dir_leafblock;
116struct xfs_dir_leaf_entry;
117
118#define XFS_DIR_TRACE_SIZE 4096 /* size of global trace buffer */
119extern ktrace_t *xfs_dir_trace_buf;
120
121/*
122 * Trace record types.
123 */
124#define XFS_DIR_KTRACE_G_DU 1 /* dp, uio */
125#define XFS_DIR_KTRACE_G_DUB 2 /* dp, uio, bno */
126#define XFS_DIR_KTRACE_G_DUN 3 /* dp, uio, node */
127#define XFS_DIR_KTRACE_G_DUL 4 /* dp, uio, leaf */
128#define XFS_DIR_KTRACE_G_DUE 5 /* dp, uio, leaf entry */
129#define XFS_DIR_KTRACE_G_DUC 6 /* dp, uio, cookie */
130
131void xfs_dir_trace_g_du(char *where, struct xfs_inode *dp, struct uio *uio);
132void xfs_dir_trace_g_dub(char *where, struct xfs_inode *dp, struct uio *uio,
133 xfs_dablk_t bno);
134void xfs_dir_trace_g_dun(char *where, struct xfs_inode *dp, struct uio *uio,
135 struct xfs_da_intnode *node);
136void xfs_dir_trace_g_dul(char *where, struct xfs_inode *dp, struct uio *uio,
137 struct xfs_dir_leafblock *leaf);
138void xfs_dir_trace_g_due(char *where, struct xfs_inode *dp, struct uio *uio,
139 struct xfs_dir_leaf_entry *entry);
140void xfs_dir_trace_g_duc(char *where, struct xfs_inode *dp, struct uio *uio,
141 xfs_off_t cookie);
142void xfs_dir_trace_enter(int type, char *where,
143 void *a0, void *a1, void *a2, void *a3,
144 void *a4, void *a5, void *a6, void *a7,
145 void *a8, void *a9, void *a10, void *a11);
146#else
147#define xfs_dir_trace_g_du(w,d,u)
148#define xfs_dir_trace_g_dub(w,d,u,b)
149#define xfs_dir_trace_g_dun(w,d,u,n)
150#define xfs_dir_trace_g_dul(w,d,u,l)
151#define xfs_dir_trace_g_due(w,d,u,e)
152#define xfs_dir_trace_g_duc(w,d,u,c)
153#endif /* DEBUG */
154
155#endif /* __XFS_DIR_SF_H__ */
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index 00b1540f8108..4e7865ad6f0e 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -189,6 +189,6 @@ typedef enum {
189#define AT_DELAY_FLAG(f) ((f&ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0) 189#define AT_DELAY_FLAG(f) ((f&ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0)
190 190
191 191
192extern struct bhv_vfsops xfs_dmops; 192extern struct bhv_module_vfsops xfs_dmops;
193 193
194#endif /* __XFS_DMAPI_H__ */ 194#endif /* __XFS_DMAPI_H__ */
diff --git a/fs/xfs/xfs_dmops.c b/fs/xfs/xfs_dmops.c
index 629795b3b3d5..1e4a35ddf7f9 100644
--- a/fs/xfs/xfs_dmops.c
+++ b/fs/xfs/xfs_dmops.c
@@ -23,7 +23,6 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir.h"
27#include "xfs_dir2.h" 26#include "xfs_dir2.h"
28#include "xfs_dmapi.h" 27#include "xfs_dmapi.h"
29#include "xfs_mount.h" 28#include "xfs_mount.h"
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 2a21c5024017..b95681b03d81 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -22,12 +22,10 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_dir.h"
26#include "xfs_dir2.h" 25#include "xfs_dir2.h"
27#include "xfs_dmapi.h" 26#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
30#include "xfs_dir_sf.h"
31#include "xfs_dir2_sf.h" 29#include "xfs_dir2_sf.h"
32#include "xfs_attr_sf.h" 30#include "xfs_attr_sf.h"
33#include "xfs_dinode.h" 31#include "xfs_dinode.h"
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index f19282ec8549..6cf6d8769b97 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -23,7 +23,6 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_buf_item.h" 24#include "xfs_buf_item.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_dir.h"
27#include "xfs_dmapi.h" 26#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_trans_priv.h" 28#include "xfs_trans_priv.h"
@@ -294,6 +293,62 @@ xfs_efi_init(xfs_mount_t *mp,
294} 293}
295 294
296/* 295/*
296 * Copy an EFI format buffer from the given buf, and into the destination
297 * EFI format structure.
298 * The given buffer can be in 32 bit or 64 bit form (which has different padding),
299 * one of which will be the native format for this kernel.
300 * It will handle the conversion of formats if necessary.
301 */
302int
303xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
304{
305 xfs_efi_log_format_t *src_efi_fmt = (xfs_efi_log_format_t *)buf->i_addr;
306 uint i;
307 uint len = sizeof(xfs_efi_log_format_t) +
308 (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_t);
309 uint len32 = sizeof(xfs_efi_log_format_32_t) +
310 (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_32_t);
311 uint len64 = sizeof(xfs_efi_log_format_64_t) +
312 (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_64_t);
313
314 if (buf->i_len == len) {
315 memcpy((char *)dst_efi_fmt, (char*)src_efi_fmt, len);
316 return 0;
317 } else if (buf->i_len == len32) {
318 xfs_efi_log_format_32_t *src_efi_fmt_32 =
319 (xfs_efi_log_format_32_t *)buf->i_addr;
320
321 dst_efi_fmt->efi_type = src_efi_fmt_32->efi_type;
322 dst_efi_fmt->efi_size = src_efi_fmt_32->efi_size;
323 dst_efi_fmt->efi_nextents = src_efi_fmt_32->efi_nextents;
324 dst_efi_fmt->efi_id = src_efi_fmt_32->efi_id;
325 for (i = 0; i < dst_efi_fmt->efi_nextents; i++) {
326 dst_efi_fmt->efi_extents[i].ext_start =
327 src_efi_fmt_32->efi_extents[i].ext_start;
328 dst_efi_fmt->efi_extents[i].ext_len =
329 src_efi_fmt_32->efi_extents[i].ext_len;
330 }
331 return 0;
332 } else if (buf->i_len == len64) {
333 xfs_efi_log_format_64_t *src_efi_fmt_64 =
334 (xfs_efi_log_format_64_t *)buf->i_addr;
335
336 dst_efi_fmt->efi_type = src_efi_fmt_64->efi_type;
337 dst_efi_fmt->efi_size = src_efi_fmt_64->efi_size;
338 dst_efi_fmt->efi_nextents = src_efi_fmt_64->efi_nextents;
339 dst_efi_fmt->efi_id = src_efi_fmt_64->efi_id;
340 for (i = 0; i < dst_efi_fmt->efi_nextents; i++) {
341 dst_efi_fmt->efi_extents[i].ext_start =
342 src_efi_fmt_64->efi_extents[i].ext_start;
343 dst_efi_fmt->efi_extents[i].ext_len =
344 src_efi_fmt_64->efi_extents[i].ext_len;
345 }
346 return 0;
347 }
348 return EFSCORRUPTED;
349}
350
351/*
297 * This is called by the efd item code below to release references to 352 * This is called by the efd item code below to release references to
298 * the given efi item. Each efd calls this with the number of 353 * the given efi item. Each efd calls this with the number of
299 * extents that it has logged, and when the sum of these reaches 354 * extents that it has logged, and when the sum of these reaches
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 5bf681708fec..0ea45edaab03 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -27,6 +27,24 @@ typedef struct xfs_extent {
27} xfs_extent_t; 27} xfs_extent_t;
28 28
29/* 29/*
30 * Since an xfs_extent_t has types (start:64, len: 32)
31 * there are different alignments on 32 bit and 64 bit kernels.
32 * So we provide the different variants for use by a
33 * conversion routine.
34 */
35
36typedef struct xfs_extent_32 {
37 xfs_dfsbno_t ext_start;
38 xfs_extlen_t ext_len;
39} __attribute__((packed)) xfs_extent_32_t;
40
41typedef struct xfs_extent_64 {
42 xfs_dfsbno_t ext_start;
43 xfs_extlen_t ext_len;
44 __uint32_t ext_pad;
45} xfs_extent_64_t;
46
47/*
30 * This is the structure used to lay out an efi log item in the 48 * This is the structure used to lay out an efi log item in the
31 * log. The efi_extents field is a variable size array whose 49 * log. The efi_extents field is a variable size array whose
32 * size is given by efi_nextents. 50 * size is given by efi_nextents.
@@ -39,6 +57,22 @@ typedef struct xfs_efi_log_format {
39 xfs_extent_t efi_extents[1]; /* array of extents to free */ 57 xfs_extent_t efi_extents[1]; /* array of extents to free */
40} xfs_efi_log_format_t; 58} xfs_efi_log_format_t;
41 59
60typedef struct xfs_efi_log_format_32 {
61 unsigned short efi_type; /* efi log item type */
62 unsigned short efi_size; /* size of this item */
63 uint efi_nextents; /* # extents to free */
64 __uint64_t efi_id; /* efi identifier */
65 xfs_extent_32_t efi_extents[1]; /* array of extents to free */
66} __attribute__((packed)) xfs_efi_log_format_32_t;
67
68typedef struct xfs_efi_log_format_64 {
69 unsigned short efi_type; /* efi log item type */
70 unsigned short efi_size; /* size of this item */
71 uint efi_nextents; /* # extents to free */
72 __uint64_t efi_id; /* efi identifier */
73 xfs_extent_64_t efi_extents[1]; /* array of extents to free */
74} xfs_efi_log_format_64_t;
75
42/* 76/*
43 * This is the structure used to lay out an efd log item in the 77 * This is the structure used to lay out an efd log item in the
44 * log. The efd_extents array is a variable size array whose 78 * log. The efd_extents array is a variable size array whose
@@ -52,6 +86,22 @@ typedef struct xfs_efd_log_format {
52 xfs_extent_t efd_extents[1]; /* array of extents freed */ 86 xfs_extent_t efd_extents[1]; /* array of extents freed */
53} xfs_efd_log_format_t; 87} xfs_efd_log_format_t;
54 88
89typedef struct xfs_efd_log_format_32 {
90 unsigned short efd_type; /* efd log item type */
91 unsigned short efd_size; /* size of this item */
92 uint efd_nextents; /* # of extents freed */
93 __uint64_t efd_efi_id; /* id of corresponding efi */
94 xfs_extent_32_t efd_extents[1]; /* array of extents freed */
95} __attribute__((packed)) xfs_efd_log_format_32_t;
96
97typedef struct xfs_efd_log_format_64 {
98 unsigned short efd_type; /* efd log item type */
99 unsigned short efd_size; /* size of this item */
100 uint efd_nextents; /* # of extents freed */
101 __uint64_t efd_efi_id; /* id of corresponding efi */
102 xfs_extent_64_t efd_extents[1]; /* array of extents freed */
103} xfs_efd_log_format_64_t;
104
55 105
56#ifdef __KERNEL__ 106#ifdef __KERNEL__
57 107
@@ -103,7 +153,8 @@ extern struct kmem_zone *xfs_efd_zone;
103xfs_efi_log_item_t *xfs_efi_init(struct xfs_mount *, uint); 153xfs_efi_log_item_t *xfs_efi_init(struct xfs_mount *, uint);
104xfs_efd_log_item_t *xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *, 154xfs_efd_log_item_t *xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *,
105 uint); 155 uint);
106 156int xfs_efi_copy_format(xfs_log_iovec_t *buf,
157 xfs_efi_log_format_t *dst_efi_fmt);
107void xfs_efi_item_free(xfs_efi_log_item_t *); 158void xfs_efi_item_free(xfs_efi_log_item_t *);
108 159
109#endif /* __KERNEL__ */ 160#endif /* __KERNEL__ */
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 14010f1fa82f..0f0ad1535951 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -67,14 +67,15 @@ struct fsxattr {
67#define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */ 67#define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
68#define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */ 68#define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
69#define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ 69#define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
70#define XFS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
70#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ 71#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
71 72
72/* 73/*
73 * Structure for XFS_IOC_GETBMAP. 74 * Structure for XFS_IOC_GETBMAP.
74 * On input, fill in bmv_offset and bmv_length of the first structure 75 * On input, fill in bmv_offset and bmv_length of the first structure
75 * to indicate the area of interest in the file, and bmv_entry with the 76 * to indicate the area of interest in the file, and bmv_entries with
76 * number of array elements given. The first structure is updated on 77 * the number of array elements given back. The first structure is
77 * return to give the offset and length for the next call. 78 * updated on return to give the offset and length for the next call.
78 */ 79 */
79#ifndef HAVE_GETBMAP 80#ifndef HAVE_GETBMAP
80struct getbmap { 81struct getbmap {
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index dfa3527b20a7..077629bab532 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -24,14 +24,12 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 32#include "xfs_ialloc_btree.h"
34#include "xfs_dir_sf.h"
35#include "xfs_dir2_sf.h" 33#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 35#include "xfs_dinode.h"
@@ -542,14 +540,13 @@ xfs_reserve_blocks(
542} 540}
543 541
544void 542void
545xfs_fs_log_dummy(xfs_mount_t *mp) 543xfs_fs_log_dummy(
544 xfs_mount_t *mp)
546{ 545{
547 xfs_trans_t *tp; 546 xfs_trans_t *tp;
548 xfs_inode_t *ip; 547 xfs_inode_t *ip;
549
550 548
551 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); 549 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
552 atomic_inc(&mp->m_active_trans);
553 if (xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0)) { 550 if (xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0)) {
554 xfs_trans_cancel(tp, 0); 551 xfs_trans_cancel(tp, 0);
555 return; 552 return;
@@ -574,21 +571,22 @@ xfs_fs_goingdown(
574{ 571{
575 switch (inflags) { 572 switch (inflags) {
576 case XFS_FSOP_GOING_FLAGS_DEFAULT: { 573 case XFS_FSOP_GOING_FLAGS_DEFAULT: {
577 struct vfs *vfsp = XFS_MTOVFS(mp); 574 struct bhv_vfs *vfsp = XFS_MTOVFS(mp);
578 struct super_block *sb = freeze_bdev(vfsp->vfs_super->s_bdev); 575 struct super_block *sb = freeze_bdev(vfsp->vfs_super->s_bdev);
579 576
580 if (sb && !IS_ERR(sb)) { 577 if (sb && !IS_ERR(sb)) {
581 xfs_force_shutdown(mp, XFS_FORCE_UMOUNT); 578 xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
582 thaw_bdev(sb->s_bdev, sb); 579 thaw_bdev(sb->s_bdev, sb);
583 } 580 }
584 581
585 break; 582 break;
586 } 583 }
587 case XFS_FSOP_GOING_FLAGS_LOGFLUSH: 584 case XFS_FSOP_GOING_FLAGS_LOGFLUSH:
588 xfs_force_shutdown(mp, XFS_FORCE_UMOUNT); 585 xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
589 break; 586 break;
590 case XFS_FSOP_GOING_FLAGS_NOLOGFLUSH: 587 case XFS_FSOP_GOING_FLAGS_NOLOGFLUSH:
591 xfs_force_shutdown(mp, XFS_FORCE_UMOUNT|XFS_LOG_IO_ERROR); 588 xfs_force_shutdown(mp,
589 SHUTDOWN_FORCE_UMOUNT | SHUTDOWN_LOG_IO_ERROR);
592 break; 590 break;
593 default: 591 default:
594 return XFS_ERROR(EINVAL); 592 return XFS_ERROR(EINVAL);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index deddbd03c166..33164a85aa9d 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -24,14 +24,12 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 32#include "xfs_ialloc_btree.h"
34#include "xfs_dir_sf.h"
35#include "xfs_dir2_sf.h" 33#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 35#include "xfs_dinode.h"
@@ -1174,6 +1172,9 @@ xfs_dilocate(
1174 if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || 1172 if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
1175 ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1173 ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
1176#ifdef DEBUG 1174#ifdef DEBUG
1175 /* no diagnostics for bulkstat, ino comes from userspace */
1176 if (flags & XFS_IMAP_BULKSTAT)
1177 return XFS_ERROR(EINVAL);
1177 if (agno >= mp->m_sb.sb_agcount) { 1178 if (agno >= mp->m_sb.sb_agcount) {
1178 xfs_fs_cmn_err(CE_ALERT, mp, 1179 xfs_fs_cmn_err(CE_ALERT, mp,
1179 "xfs_dilocate: agno (%d) >= " 1180 "xfs_dilocate: agno (%d) >= "
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index 60c65683462d..616eeeb6953e 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -24,14 +24,12 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 32#include "xfs_ialloc_btree.h"
34#include "xfs_dir_sf.h"
35#include "xfs_dir2_sf.h" 33#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 35#include "xfs_dinode.h"
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index b53854325266..0724df7fabb7 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -24,14 +24,12 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 32#include "xfs_ialloc_btree.h"
34#include "xfs_dir_sf.h"
35#include "xfs_dir2_sf.h" 33#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 35#include "xfs_dinode.h"
@@ -186,7 +184,7 @@ xfs_ihash_promote(
186 */ 184 */
187STATIC int 185STATIC int
188xfs_iget_core( 186xfs_iget_core(
189 vnode_t *vp, 187 bhv_vnode_t *vp,
190 xfs_mount_t *mp, 188 xfs_mount_t *mp,
191 xfs_trans_t *tp, 189 xfs_trans_t *tp,
192 xfs_ino_t ino, 190 xfs_ino_t ino,
@@ -198,7 +196,7 @@ xfs_iget_core(
198 xfs_ihash_t *ih; 196 xfs_ihash_t *ih;
199 xfs_inode_t *ip; 197 xfs_inode_t *ip;
200 xfs_inode_t *iq; 198 xfs_inode_t *iq;
201 vnode_t *inode_vp; 199 bhv_vnode_t *inode_vp;
202 ulong version; 200 ulong version;
203 int error; 201 int error;
204 /* REFERENCED */ 202 /* REFERENCED */
@@ -468,7 +466,7 @@ finish_inode:
468 * If we have a real type for an on-disk inode, we can set ops(&unlock) 466 * If we have a real type for an on-disk inode, we can set ops(&unlock)
469 * now. If it's a new inode being created, xfs_ialloc will handle it. 467 * now. If it's a new inode being created, xfs_ialloc will handle it.
470 */ 468 */
471 VFS_INIT_VNODE(XFS_MTOVFS(mp), vp, XFS_ITOBHV(ip), 1); 469 bhv_vfs_init_vnode(XFS_MTOVFS(mp), vp, XFS_ITOBHV(ip), 1);
472 470
473 return 0; 471 return 0;
474} 472}
@@ -489,7 +487,7 @@ xfs_iget(
489 xfs_daddr_t bno) 487 xfs_daddr_t bno)
490{ 488{
491 struct inode *inode; 489 struct inode *inode;
492 vnode_t *vp = NULL; 490 bhv_vnode_t *vp = NULL;
493 int error; 491 int error;
494 492
495 XFS_STATS_INC(xs_ig_attempts); 493 XFS_STATS_INC(xs_ig_attempts);
@@ -543,7 +541,7 @@ retry:
543void 541void
544xfs_inode_lock_init( 542xfs_inode_lock_init(
545 xfs_inode_t *ip, 543 xfs_inode_t *ip,
546 vnode_t *vp) 544 bhv_vnode_t *vp)
547{ 545{
548 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 546 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
549 "xfsino", (long)vp->v_number); 547 "xfsino", (long)vp->v_number);
@@ -603,12 +601,10 @@ void
603xfs_iput(xfs_inode_t *ip, 601xfs_iput(xfs_inode_t *ip,
604 uint lock_flags) 602 uint lock_flags)
605{ 603{
606 vnode_t *vp = XFS_ITOV(ip); 604 bhv_vnode_t *vp = XFS_ITOV(ip);
607 605
608 vn_trace_entry(vp, "xfs_iput", (inst_t *)__return_address); 606 vn_trace_entry(vp, "xfs_iput", (inst_t *)__return_address);
609
610 xfs_iunlock(ip, lock_flags); 607 xfs_iunlock(ip, lock_flags);
611
612 VN_RELE(vp); 608 VN_RELE(vp);
613} 609}
614 610
@@ -619,7 +615,7 @@ void
619xfs_iput_new(xfs_inode_t *ip, 615xfs_iput_new(xfs_inode_t *ip,
620 uint lock_flags) 616 uint lock_flags)
621{ 617{
622 vnode_t *vp = XFS_ITOV(ip); 618 bhv_vnode_t *vp = XFS_ITOV(ip);
623 struct inode *inode = vn_to_inode(vp); 619 struct inode *inode = vn_to_inode(vp);
624 620
625 vn_trace_entry(vp, "xfs_iput_new", (inst_t *)__return_address); 621 vn_trace_entry(vp, "xfs_iput_new", (inst_t *)__return_address);
@@ -645,7 +641,7 @@ xfs_iput_new(xfs_inode_t *ip,
645void 641void
646xfs_ireclaim(xfs_inode_t *ip) 642xfs_ireclaim(xfs_inode_t *ip)
647{ 643{
648 vnode_t *vp; 644 bhv_vnode_t *vp;
649 645
650 /* 646 /*
651 * Remove from old hash list and mount list. 647 * Remove from old hash list and mount list.
@@ -1033,6 +1029,6 @@ xfs_iflock_nowait(xfs_inode_t *ip)
1033void 1029void
1034xfs_ifunlock(xfs_inode_t *ip) 1030xfs_ifunlock(xfs_inode_t *ip)
1035{ 1031{
1036 ASSERT(valusema(&(ip->i_flock)) <= 0); 1032 ASSERT(issemalocked(&(ip->i_flock)));
1037 vsema(&(ip->i_flock)); 1033 vsema(&(ip->i_flock));
1038} 1034}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 94b60dd03801..86c1bf0bba9e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
@@ -26,14 +26,12 @@
26#include "xfs_trans_priv.h" 26#include "xfs_trans_priv.h"
27#include "xfs_sb.h" 27#include "xfs_sb.h"
28#include "xfs_ag.h" 28#include "xfs_ag.h"
29#include "xfs_dir.h"
30#include "xfs_dir2.h" 29#include "xfs_dir2.h"
31#include "xfs_dmapi.h" 30#include "xfs_dmapi.h"
32#include "xfs_mount.h" 31#include "xfs_mount.h"
33#include "xfs_bmap_btree.h" 32#include "xfs_bmap_btree.h"
34#include "xfs_alloc_btree.h" 33#include "xfs_alloc_btree.h"
35#include "xfs_ialloc_btree.h" 34#include "xfs_ialloc_btree.h"
36#include "xfs_dir_sf.h"
37#include "xfs_dir2_sf.h" 35#include "xfs_dir2_sf.h"
38#include "xfs_attr_sf.h" 36#include "xfs_attr_sf.h"
39#include "xfs_dinode.h" 37#include "xfs_dinode.h"
@@ -256,13 +254,11 @@ xfs_itobp(
256 xfs_daddr_t bno, 254 xfs_daddr_t bno,
257 uint imap_flags) 255 uint imap_flags)
258{ 256{
257 xfs_imap_t imap;
259 xfs_buf_t *bp; 258 xfs_buf_t *bp;
260 int error; 259 int error;
261 xfs_imap_t imap;
262#ifdef __KERNEL__
263 int i; 260 int i;
264 int ni; 261 int ni;
265#endif
266 262
267 if (ip->i_blkno == (xfs_daddr_t)0) { 263 if (ip->i_blkno == (xfs_daddr_t)0) {
268 /* 264 /*
@@ -319,7 +315,6 @@ xfs_itobp(
319 */ 315 */
320 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, 316 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno,
321 (int)imap.im_len, XFS_BUF_LOCK, &bp); 317 (int)imap.im_len, XFS_BUF_LOCK, &bp);
322
323 if (error) { 318 if (error) {
324#ifdef DEBUG 319#ifdef DEBUG
325 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " 320 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: "
@@ -330,17 +325,21 @@ xfs_itobp(
330#endif /* DEBUG */ 325#endif /* DEBUG */
331 return error; 326 return error;
332 } 327 }
333#ifdef __KERNEL__ 328
334 /* 329 /*
335 * Validate the magic number and version of every inode in the buffer 330 * Validate the magic number and version of every inode in the buffer
336 * (if DEBUG kernel) or the first inode in the buffer, otherwise. 331 * (if DEBUG kernel) or the first inode in the buffer, otherwise.
332 * No validation is done here in userspace (xfs_repair).
337 */ 333 */
338#ifdef DEBUG 334#if !defined(__KERNEL__)
335 ni = 0;
336#elif defined(DEBUG)
339 ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 : 337 ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 :
340 (BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog); 338 (BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog);
341#else 339#else /* usual case */
342 ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 : 1; 340 ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 : 1;
343#endif 341#endif
342
344 for (i = 0; i < ni; i++) { 343 for (i = 0; i < ni; i++) {
345 int di_ok; 344 int di_ok;
346 xfs_dinode_t *dip; 345 xfs_dinode_t *dip;
@@ -352,8 +351,11 @@ xfs_itobp(
352 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, 351 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP,
353 XFS_RANDOM_ITOBP_INOTOBP))) { 352 XFS_RANDOM_ITOBP_INOTOBP))) {
354#ifdef DEBUG 353#ifdef DEBUG
355 prdev("bad inode magic/vsn daddr %lld #%d (magic=%x)", 354 if (!(imap_flags & XFS_IMAP_BULKSTAT))
356 mp->m_ddev_targp, 355 cmn_err(CE_ALERT,
356 "Device %s - bad inode magic/vsn "
357 "daddr %lld #%d (magic=%x)",
358 XFS_BUFTARG_NAME(mp->m_ddev_targp),
357 (unsigned long long)imap.im_blkno, i, 359 (unsigned long long)imap.im_blkno, i,
358 INT_GET(dip->di_core.di_magic, ARCH_CONVERT)); 360 INT_GET(dip->di_core.di_magic, ARCH_CONVERT));
359#endif 361#endif
@@ -363,7 +365,6 @@ xfs_itobp(
363 return XFS_ERROR(EFSCORRUPTED); 365 return XFS_ERROR(EFSCORRUPTED);
364 } 366 }
365 } 367 }
366#endif /* __KERNEL__ */
367 368
368 xfs_inobp_check(mp, bp); 369 xfs_inobp_check(mp, bp);
369 370
@@ -782,7 +783,6 @@ xfs_xlate_dinode_core(
782 783
783STATIC uint 784STATIC uint
784_xfs_dic2xflags( 785_xfs_dic2xflags(
785 xfs_dinode_core_t *dic,
786 __uint16_t di_flags) 786 __uint16_t di_flags)
787{ 787{
788 uint flags = 0; 788 uint flags = 0;
@@ -812,6 +812,8 @@ _xfs_dic2xflags(
812 flags |= XFS_XFLAG_EXTSIZE; 812 flags |= XFS_XFLAG_EXTSIZE;
813 if (di_flags & XFS_DIFLAG_EXTSZINHERIT) 813 if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
814 flags |= XFS_XFLAG_EXTSZINHERIT; 814 flags |= XFS_XFLAG_EXTSZINHERIT;
815 if (di_flags & XFS_DIFLAG_NODEFRAG)
816 flags |= XFS_XFLAG_NODEFRAG;
815 } 817 }
816 818
817 return flags; 819 return flags;
@@ -823,16 +825,16 @@ xfs_ip2xflags(
823{ 825{
824 xfs_dinode_core_t *dic = &ip->i_d; 826 xfs_dinode_core_t *dic = &ip->i_d;
825 827
826 return _xfs_dic2xflags(dic, dic->di_flags) | 828 return _xfs_dic2xflags(dic->di_flags) |
827 (XFS_CFORK_Q(dic) ? XFS_XFLAG_HASATTR : 0); 829 (XFS_CFORK_Q(dic) ? XFS_XFLAG_HASATTR : 0);
828} 830}
829 831
830uint 832uint
831xfs_dic2xflags( 833xfs_dic2xflags(
832 xfs_dinode_core_t *dic) 834 xfs_dinode_core_t *dic)
833{ 835{
834 return _xfs_dic2xflags(dic, INT_GET(dic->di_flags, ARCH_CONVERT)) | 836 return _xfs_dic2xflags(INT_GET(dic->di_flags, ARCH_CONVERT)) |
835 (XFS_CFORK_Q_DISK(dic) ? XFS_XFLAG_HASATTR : 0); 837 (XFS_CFORK_Q_DISK(dic) ? XFS_XFLAG_HASATTR : 0);
836} 838}
837 839
838/* 840/*
@@ -1083,7 +1085,7 @@ xfs_ialloc(
1083{ 1085{
1084 xfs_ino_t ino; 1086 xfs_ino_t ino;
1085 xfs_inode_t *ip; 1087 xfs_inode_t *ip;
1086 vnode_t *vp; 1088 bhv_vnode_t *vp;
1087 uint flags; 1089 uint flags;
1088 int error; 1090 int error;
1089 1091
@@ -1221,6 +1223,9 @@ xfs_ialloc(
1221 di_flags |= XFS_DIFLAG_NOSYMLINKS; 1223 di_flags |= XFS_DIFLAG_NOSYMLINKS;
1222 if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1224 if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
1223 di_flags |= XFS_DIFLAG_PROJINHERIT; 1225 di_flags |= XFS_DIFLAG_PROJINHERIT;
1226 if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
1227 xfs_inherit_nodefrag)
1228 di_flags |= XFS_DIFLAG_NODEFRAG;
1224 ip->i_d.di_flags |= di_flags; 1229 ip->i_d.di_flags |= di_flags;
1225 } 1230 }
1226 /* FALLTHROUGH */ 1231 /* FALLTHROUGH */
@@ -1244,8 +1249,8 @@ xfs_ialloc(
1244 */ 1249 */
1245 xfs_trans_log_inode(tp, ip, flags); 1250 xfs_trans_log_inode(tp, ip, flags);
1246 1251
1247 /* now that we have an i_mode we can set Linux inode ops (& unlock) */ 1252 /* now that we have an i_mode we can setup inode ops and unlock */
1248 VFS_INIT_VNODE(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1); 1253 bhv_vfs_init_vnode(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1);
1249 1254
1250 *ipp = ip; 1255 *ipp = ip;
1251 return 0; 1256 return 0;
@@ -1285,7 +1290,7 @@ xfs_isize_check(
1285 (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - 1290 (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) -
1286 map_first), 1291 map_first),
1287 XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps, 1292 XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps,
1288 NULL)) 1293 NULL, NULL))
1289 return; 1294 return;
1290 ASSERT(nimaps == 1); 1295 ASSERT(nimaps == 1);
1291 ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); 1296 ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
@@ -1421,7 +1426,7 @@ xfs_itruncate_start(
1421 xfs_fsize_t last_byte; 1426 xfs_fsize_t last_byte;
1422 xfs_off_t toss_start; 1427 xfs_off_t toss_start;
1423 xfs_mount_t *mp; 1428 xfs_mount_t *mp;
1424 vnode_t *vp; 1429 bhv_vnode_t *vp;
1425 1430
1426 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); 1431 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0);
1427 ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size)); 1432 ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size));
@@ -1434,9 +1439,9 @@ xfs_itruncate_start(
1434 vn_iowait(vp); /* wait for the completion of any pending DIOs */ 1439 vn_iowait(vp); /* wait for the completion of any pending DIOs */
1435 1440
1436 /* 1441 /*
1437 * Call VOP_TOSS_PAGES() or VOP_FLUSHINVAL_PAGES() to get rid of pages and buffers 1442 * Call toss_pages or flushinval_pages to get rid of pages
1438 * overlapping the region being removed. We have to use 1443 * overlapping the region being removed. We have to use
1439 * the less efficient VOP_FLUSHINVAL_PAGES() in the case that the 1444 * the less efficient flushinval_pages in the case that the
1440 * caller may not be able to finish the truncate without 1445 * caller may not be able to finish the truncate without
1441 * dropping the inode's I/O lock. Make sure 1446 * dropping the inode's I/O lock. Make sure
1442 * to catch any pages brought in by buffers overlapping 1447 * to catch any pages brought in by buffers overlapping
@@ -1445,10 +1450,10 @@ xfs_itruncate_start(
1445 * so that we don't toss things on the same block as 1450 * so that we don't toss things on the same block as
1446 * new_size but before it. 1451 * new_size but before it.
1447 * 1452 *
1448 * Before calling VOP_TOSS_PAGES() or VOP_FLUSHINVAL_PAGES(), make sure to 1453 * Before calling toss_page or flushinval_pages, make sure to
1449 * call remapf() over the same region if the file is mapped. 1454 * call remapf() over the same region if the file is mapped.
1450 * This frees up mapped file references to the pages in the 1455 * This frees up mapped file references to the pages in the
1451 * given range and for the VOP_FLUSHINVAL_PAGES() case it ensures 1456 * given range and for the flushinval_pages case it ensures
1452 * that we get the latest mapped changes flushed out. 1457 * that we get the latest mapped changes flushed out.
1453 */ 1458 */
1454 toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); 1459 toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
@@ -1466,9 +1471,9 @@ xfs_itruncate_start(
1466 last_byte); 1471 last_byte);
1467 if (last_byte > toss_start) { 1472 if (last_byte > toss_start) {
1468 if (flags & XFS_ITRUNC_DEFINITE) { 1473 if (flags & XFS_ITRUNC_DEFINITE) {
1469 VOP_TOSS_PAGES(vp, toss_start, -1, FI_REMAPF_LOCKED); 1474 bhv_vop_toss_pages(vp, toss_start, -1, FI_REMAPF_LOCKED);
1470 } else { 1475 } else {
1471 VOP_FLUSHINVAL_PAGES(vp, toss_start, -1, FI_REMAPF_LOCKED); 1476 bhv_vop_flushinval_pages(vp, toss_start, -1, FI_REMAPF_LOCKED);
1472 } 1477 }
1473 } 1478 }
1474 1479
@@ -1666,12 +1671,13 @@ xfs_itruncate_finish(
1666 * runs. 1671 * runs.
1667 */ 1672 */
1668 XFS_BMAP_INIT(&free_list, &first_block); 1673 XFS_BMAP_INIT(&free_list, &first_block);
1669 error = xfs_bunmapi(ntp, ip, first_unmap_block, 1674 error = XFS_BUNMAPI(mp, ntp, &ip->i_iocore,
1670 unmap_len, 1675 first_unmap_block, unmap_len,
1671 XFS_BMAPI_AFLAG(fork) | 1676 XFS_BMAPI_AFLAG(fork) |
1672 (sync ? 0 : XFS_BMAPI_ASYNC), 1677 (sync ? 0 : XFS_BMAPI_ASYNC),
1673 XFS_ITRUNC_MAX_EXTENTS, 1678 XFS_ITRUNC_MAX_EXTENTS,
1674 &first_block, &free_list, &done); 1679 &first_block, &free_list,
1680 NULL, &done);
1675 if (error) { 1681 if (error) {
1676 /* 1682 /*
1677 * If the bunmapi call encounters an error, 1683 * If the bunmapi call encounters an error,
@@ -1955,9 +1961,9 @@ xfs_iunlink_remove(
1955 xfs_agino_t agino; 1961 xfs_agino_t agino;
1956 xfs_agino_t next_agino; 1962 xfs_agino_t next_agino;
1957 xfs_buf_t *last_ibp; 1963 xfs_buf_t *last_ibp;
1958 xfs_dinode_t *last_dip; 1964 xfs_dinode_t *last_dip = NULL;
1959 short bucket_index; 1965 short bucket_index;
1960 int offset, last_offset; 1966 int offset, last_offset = 0;
1961 int error; 1967 int error;
1962 int agi_ok; 1968 int agi_ok;
1963 1969
@@ -2745,13 +2751,14 @@ xfs_iunpin(
2745 * the inode to become unpinned. 2751 * the inode to become unpinned.
2746 */ 2752 */
2747 if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) { 2753 if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) {
2748 vnode_t *vp = XFS_ITOV_NULL(ip); 2754 bhv_vnode_t *vp = XFS_ITOV_NULL(ip);
2749 2755
2750 /* make sync come back and flush this inode */ 2756 /* make sync come back and flush this inode */
2751 if (vp) { 2757 if (vp) {
2752 struct inode *inode = vn_to_inode(vp); 2758 struct inode *inode = vn_to_inode(vp);
2753 2759
2754 if (!(inode->i_state & I_NEW)) 2760 if (!(inode->i_state &
2761 (I_NEW|I_FREEING|I_CLEAR)))
2755 mark_inode_dirty_sync(inode); 2762 mark_inode_dirty_sync(inode);
2756 } 2763 }
2757 } 2764 }
@@ -2916,13 +2923,6 @@ xfs_iflush_fork(
2916 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 2923 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
2917 memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes); 2924 memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
2918 } 2925 }
2919 if (whichfork == XFS_DATA_FORK) {
2920 if (unlikely(XFS_DIR_SHORTFORM_VALIDATE_ONDISK(mp, dip))) {
2921 XFS_ERROR_REPORT("xfs_iflush_fork",
2922 XFS_ERRLEVEL_LOW, mp);
2923 return XFS_ERROR(EFSCORRUPTED);
2924 }
2925 }
2926 break; 2926 break;
2927 2927
2928 case XFS_DINODE_FMT_EXTENTS: 2928 case XFS_DINODE_FMT_EXTENTS:
@@ -3006,7 +3006,7 @@ xfs_iflush(
3006 XFS_STATS_INC(xs_iflush_count); 3006 XFS_STATS_INC(xs_iflush_count);
3007 3007
3008 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 3008 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
3009 ASSERT(valusema(&ip->i_flock) <= 0); 3009 ASSERT(issemalocked(&(ip->i_flock)));
3010 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3010 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
3011 ip->i_d.di_nextents > ip->i_df.if_ext_max); 3011 ip->i_d.di_nextents > ip->i_df.if_ext_max);
3012 3012
@@ -3199,7 +3199,7 @@ xfs_iflush(
3199 3199
3200corrupt_out: 3200corrupt_out:
3201 xfs_buf_relse(bp); 3201 xfs_buf_relse(bp);
3202 xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); 3202 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
3203 xfs_iflush_abort(ip); 3203 xfs_iflush_abort(ip);
3204 /* 3204 /*
3205 * Unlocks the flush lock 3205 * Unlocks the flush lock
@@ -3221,7 +3221,7 @@ cluster_corrupt_out:
3221 xfs_buf_relse(bp); 3221 xfs_buf_relse(bp);
3222 } 3222 }
3223 3223
3224 xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); 3224 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
3225 3225
3226 if(!bufwasdelwri) { 3226 if(!bufwasdelwri) {
3227 /* 3227 /*
@@ -3264,7 +3264,7 @@ xfs_iflush_int(
3264 SPLDECL(s); 3264 SPLDECL(s);
3265 3265
3266 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 3266 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
3267 ASSERT(valusema(&ip->i_flock) <= 0); 3267 ASSERT(issemalocked(&(ip->i_flock)));
3268 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3268 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
3269 ip->i_d.di_nextents > ip->i_df.if_ext_max); 3269 ip->i_d.di_nextents > ip->i_df.if_ext_max);
3270 3270
@@ -3504,7 +3504,7 @@ xfs_iflush_all(
3504 xfs_mount_t *mp) 3504 xfs_mount_t *mp)
3505{ 3505{
3506 xfs_inode_t *ip; 3506 xfs_inode_t *ip;
3507 vnode_t *vp; 3507 bhv_vnode_t *vp;
3508 3508
3509 again: 3509 again:
3510 XFS_MOUNT_ILOCK(mp); 3510 XFS_MOUNT_ILOCK(mp);
@@ -4180,7 +4180,7 @@ xfs_iext_direct_to_inline(
4180 */ 4180 */
4181 memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, 4181 memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
4182 nextents * sizeof(xfs_bmbt_rec_t)); 4182 nextents * sizeof(xfs_bmbt_rec_t));
4183 kmem_free(ifp->if_u1.if_extents, KM_SLEEP); 4183 kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
4184 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 4184 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
4185 ifp->if_real_bytes = 0; 4185 ifp->if_real_bytes = 0;
4186} 4186}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 3b544db1790b..d10b76ed1e5b 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -102,9 +102,9 @@ typedef struct xfs_ifork {
102 102
103#ifdef __KERNEL__ 103#ifdef __KERNEL__
104struct bhv_desc; 104struct bhv_desc;
105struct bhv_vnode;
105struct cred; 106struct cred;
106struct ktrace; 107struct ktrace;
107struct vnode;
108struct xfs_buf; 108struct xfs_buf;
109struct xfs_bmap_free; 109struct xfs_bmap_free;
110struct xfs_bmbt_irec; 110struct xfs_bmbt_irec;
@@ -400,7 +400,7 @@ void xfs_chash_init(struct xfs_mount *);
400void xfs_chash_free(struct xfs_mount *); 400void xfs_chash_free(struct xfs_mount *);
401xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, 401xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t,
402 struct xfs_trans *); 402 struct xfs_trans *);
403void xfs_inode_lock_init(xfs_inode_t *, struct vnode *); 403void xfs_inode_lock_init(xfs_inode_t *, struct bhv_vnode *);
404int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, 404int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
405 uint, uint, xfs_inode_t **, xfs_daddr_t); 405 uint, uint, xfs_inode_t **, xfs_daddr_t);
406void xfs_iput(xfs_inode_t *, uint); 406void xfs_iput(xfs_inode_t *, uint);
@@ -461,7 +461,7 @@ void xfs_ichgtime(xfs_inode_t *, int);
461xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); 461xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
462void xfs_lock_inodes(xfs_inode_t **, int, int, uint); 462void xfs_lock_inodes(xfs_inode_t **, int, int, uint);
463 463
464xfs_inode_t *xfs_vtoi(struct vnode *vp); 464xfs_inode_t *xfs_vtoi(struct bhv_vnode *vp);
465 465
466void xfs_synchronize_atime(xfs_inode_t *); 466void xfs_synchronize_atime(xfs_inode_t *);
467 467
@@ -509,7 +509,6 @@ extern struct kmem_zone *xfs_chashlist_zone;
509extern struct kmem_zone *xfs_ifork_zone; 509extern struct kmem_zone *xfs_ifork_zone;
510extern struct kmem_zone *xfs_inode_zone; 510extern struct kmem_zone *xfs_inode_zone;
511extern struct kmem_zone *xfs_ili_zone; 511extern struct kmem_zone *xfs_ili_zone;
512extern struct vnodeops xfs_vnodeops;
513 512
514#endif /* __KERNEL__ */ 513#endif /* __KERNEL__ */
515 514
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 7497a481b2f5..f8e80d8e7237 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -25,7 +25,6 @@
25#include "xfs_buf_item.h" 25#include "xfs_buf_item.h"
26#include "xfs_sb.h" 26#include "xfs_sb.h"
27#include "xfs_ag.h" 27#include "xfs_ag.h"
28#include "xfs_dir.h"
29#include "xfs_dir2.h" 28#include "xfs_dir2.h"
30#include "xfs_dmapi.h" 29#include "xfs_dmapi.h"
31#include "xfs_mount.h" 30#include "xfs_mount.h"
@@ -33,7 +32,6 @@
33#include "xfs_bmap_btree.h" 32#include "xfs_bmap_btree.h"
34#include "xfs_alloc_btree.h" 33#include "xfs_alloc_btree.h"
35#include "xfs_ialloc_btree.h" 34#include "xfs_ialloc_btree.h"
36#include "xfs_dir_sf.h"
37#include "xfs_dir2_sf.h" 35#include "xfs_dir2_sf.h"
38#include "xfs_attr_sf.h" 36#include "xfs_attr_sf.h"
39#include "xfs_dinode.h" 37#include "xfs_dinode.h"
@@ -794,7 +792,7 @@ xfs_inode_item_pushbuf(
794 * inode flush completed and the inode was taken off the AIL. 792 * inode flush completed and the inode was taken off the AIL.
795 * So, just get out. 793 * So, just get out.
796 */ 794 */
797 if ((valusema(&(ip->i_flock)) > 0) || 795 if (!issemalocked(&(ip->i_flock)) ||
798 ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { 796 ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) {
799 iip->ili_pushbuf_flag = 0; 797 iip->ili_pushbuf_flag = 0;
800 xfs_iunlock(ip, XFS_ILOCK_SHARED); 798 xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -816,7 +814,7 @@ xfs_inode_item_pushbuf(
816 * If not, we can flush it async. 814 * If not, we can flush it async.
817 */ 815 */
818 dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) && 816 dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) &&
819 (valusema(&(ip->i_flock)) <= 0)); 817 issemalocked(&(ip->i_flock)));
820 iip->ili_pushbuf_flag = 0; 818 iip->ili_pushbuf_flag = 0;
821 xfs_iunlock(ip, XFS_ILOCK_SHARED); 819 xfs_iunlock(ip, XFS_ILOCK_SHARED);
822 xfs_buftrace("INODE ITEM PUSH", bp); 820 xfs_buftrace("INODE ITEM PUSH", bp);
@@ -864,7 +862,7 @@ xfs_inode_item_push(
864 ip = iip->ili_inode; 862 ip = iip->ili_inode;
865 863
866 ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS)); 864 ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS));
867 ASSERT(valusema(&(ip->i_flock)) <= 0); 865 ASSERT(issemalocked(&(ip->i_flock)));
868 /* 866 /*
869 * Since we were able to lock the inode's flush lock and 867 * Since we were able to lock the inode's flush lock and
870 * we found it on the AIL, the inode must be dirty. This 868 * we found it on the AIL, the inode must be dirty. This
@@ -1084,3 +1082,52 @@ xfs_istale_done(
1084{ 1082{
1085 xfs_iflush_abort(iip->ili_inode); 1083 xfs_iflush_abort(iip->ili_inode);
1086} 1084}
1085
1086/*
1087 * convert an xfs_inode_log_format struct from either 32 or 64 bit versions
1088 * (which can have different field alignments) to the native version
1089 */
1090int
1091xfs_inode_item_format_convert(
1092 xfs_log_iovec_t *buf,
1093 xfs_inode_log_format_t *in_f)
1094{
1095 if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) {
1096 xfs_inode_log_format_32_t *in_f32;
1097
1098 in_f32 = (xfs_inode_log_format_32_t *)buf->i_addr;
1099 in_f->ilf_type = in_f32->ilf_type;
1100 in_f->ilf_size = in_f32->ilf_size;
1101 in_f->ilf_fields = in_f32->ilf_fields;
1102 in_f->ilf_asize = in_f32->ilf_asize;
1103 in_f->ilf_dsize = in_f32->ilf_dsize;
1104 in_f->ilf_ino = in_f32->ilf_ino;
1105 /* copy biggest field of ilf_u */
1106 memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
1107 in_f32->ilf_u.ilfu_uuid.__u_bits,
1108 sizeof(uuid_t));
1109 in_f->ilf_blkno = in_f32->ilf_blkno;
1110 in_f->ilf_len = in_f32->ilf_len;
1111 in_f->ilf_boffset = in_f32->ilf_boffset;
1112 return 0;
1113 } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){
1114 xfs_inode_log_format_64_t *in_f64;
1115
1116 in_f64 = (xfs_inode_log_format_64_t *)buf->i_addr;
1117 in_f->ilf_type = in_f64->ilf_type;
1118 in_f->ilf_size = in_f64->ilf_size;
1119 in_f->ilf_fields = in_f64->ilf_fields;
1120 in_f->ilf_asize = in_f64->ilf_asize;
1121 in_f->ilf_dsize = in_f64->ilf_dsize;
1122 in_f->ilf_ino = in_f64->ilf_ino;
1123 /* copy biggest field of ilf_u */
1124 memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
1125 in_f64->ilf_u.ilfu_uuid.__u_bits,
1126 sizeof(uuid_t));
1127 in_f->ilf_blkno = in_f64->ilf_blkno;
1128 in_f->ilf_len = in_f64->ilf_len;
1129 in_f->ilf_boffset = in_f64->ilf_boffset;
1130 return 0;
1131 }
1132 return EFSCORRUPTED;
1133}
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index c5dbf93b6661..5db6cd1b4cf3 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -23,25 +23,6 @@
23 * log. The size of the inline data/extents/b-tree root to be logged 23 * log. The size of the inline data/extents/b-tree root to be logged
24 * (if any) is indicated in the ilf_dsize field. Changes to this structure 24 * (if any) is indicated in the ilf_dsize field. Changes to this structure
25 * must be added on to the end. 25 * must be added on to the end.
26 *
27 * Convention for naming inode log item versions : The current version
28 * is always named XFS_LI_INODE. When an inode log item gets superseded,
29 * add the latest version of IRIX that will generate logs with that item
30 * to the version name.
31 *
32 * -Version 1 of this structure (XFS_LI_5_3_INODE) included up to the first
33 * union (ilf_u) field. This was released with IRIX 5.3-XFS.
34 * -Version 2 of this structure (XFS_LI_6_1_INODE) is currently the entire
35 * structure. This was released with IRIX 6.0.1-XFS and IRIX 6.1.
36 * -Version 3 of this structure (XFS_LI_INODE) is the same as version 2
37 * so a new structure definition wasn't necessary. However, we had
38 * to add a new type because the inode cluster size changed from 4K
39 * to 8K and the version number had to be rev'ved to keep older kernels
40 * from trying to recover logs with the 8K buffers in them. The logging
41 * code can handle recovery on different-sized clusters now so hopefully
42 * this'll be the last time we need to change the inode log item just
43 * for a change in the inode cluster size. This new version was
44 * released with IRIX 6.2.
45 */ 26 */
46typedef struct xfs_inode_log_format { 27typedef struct xfs_inode_log_format {
47 unsigned short ilf_type; /* inode log item type */ 28 unsigned short ilf_type; /* inode log item type */
@@ -59,18 +40,38 @@ typedef struct xfs_inode_log_format {
59 int ilf_boffset; /* off of inode in buffer */ 40 int ilf_boffset; /* off of inode in buffer */
60} xfs_inode_log_format_t; 41} xfs_inode_log_format_t;
61 42
62/* Initial version shipped with IRIX 5.3-XFS */ 43typedef struct xfs_inode_log_format_32 {
63typedef struct xfs_inode_log_format_v1 { 44 unsigned short ilf_type; /* 16: inode log item type */
64 unsigned short ilf_type; /* inode log item type */ 45 unsigned short ilf_size; /* 16: size of this item */
65 unsigned short ilf_size; /* size of this item */ 46 uint ilf_fields; /* 32: flags for fields logged */
66 uint ilf_fields; /* flags for fields logged */ 47 ushort ilf_asize; /* 32: size of attr d/ext/root */
67 uint ilf_dsize; /* size of data/ext/root */ 48 ushort ilf_dsize; /* 32: size of data/ext/root */
68 xfs_ino_t ilf_ino; /* inode number */ 49 xfs_ino_t ilf_ino; /* 64: inode number */
69 union { 50 union {
70 xfs_dev_t ilfu_rdev; /* rdev value for dev inode*/ 51 xfs_dev_t ilfu_rdev; /* 32: rdev value for dev inode*/
71 uuid_t ilfu_uuid; /* mount point value */ 52 uuid_t ilfu_uuid; /* 128: mount point value */
53 } ilf_u;
54 __int64_t ilf_blkno; /* 64: blkno of inode buffer */
55 int ilf_len; /* 32: len of inode buffer */
56 int ilf_boffset; /* 32: off of inode in buffer */
57} __attribute__((packed)) xfs_inode_log_format_32_t;
58
59typedef struct xfs_inode_log_format_64 {
60 unsigned short ilf_type; /* 16: inode log item type */
61 unsigned short ilf_size; /* 16: size of this item */
62 uint ilf_fields; /* 32: flags for fields logged */
63 ushort ilf_asize; /* 32: size of attr d/ext/root */
64 ushort ilf_dsize; /* 32: size of data/ext/root */
65 __uint32_t ilf_pad; /* 32: pad for 64 bit boundary */
66 xfs_ino_t ilf_ino; /* 64: inode number */
67 union {
68 xfs_dev_t ilfu_rdev; /* 32: rdev value for dev inode*/
69 uuid_t ilfu_uuid; /* 128: mount point value */
72 } ilf_u; 70 } ilf_u;
73} xfs_inode_log_format_t_v1; 71 __int64_t ilf_blkno; /* 64: blkno of inode buffer */
72 int ilf_len; /* 32: len of inode buffer */
73 int ilf_boffset; /* 32: off of inode in buffer */
74} xfs_inode_log_format_64_t;
74 75
75/* 76/*
76 * Flags for xfs_trans_log_inode flags field. 77 * Flags for xfs_trans_log_inode flags field.
@@ -172,6 +173,8 @@ extern void xfs_inode_item_destroy(struct xfs_inode *);
172extern void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *); 173extern void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *);
173extern void xfs_istale_done(struct xfs_buf *, xfs_inode_log_item_t *); 174extern void xfs_istale_done(struct xfs_buf *, xfs_inode_log_item_t *);
174extern void xfs_iflush_abort(struct xfs_inode *); 175extern void xfs_iflush_abort(struct xfs_inode *);
176extern int xfs_inode_item_format_convert(xfs_log_iovec_t *,
177 xfs_inode_log_format_t *);
175 178
176#endif /* __KERNEL__ */ 179#endif /* __KERNEL__ */
177 180
diff --git a/fs/xfs/xfs_iocore.c b/fs/xfs/xfs_iocore.c
index a07815661a8c..06d710c9ce4b 100644
--- a/fs/xfs/xfs_iocore.c
+++ b/fs/xfs/xfs_iocore.c
@@ -24,14 +24,13 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
28#include "xfs_dfrag.h"
29#include "xfs_dmapi.h" 29#include "xfs_dmapi.h"
30#include "xfs_mount.h" 30#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 31#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 32#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 33#include "xfs_ialloc_btree.h"
34#include "xfs_dir_sf.h"
35#include "xfs_dir2_sf.h" 34#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h" 35#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 36#include "xfs_dinode.h"
@@ -58,7 +57,7 @@ xfs_size_fn(
58 57
59STATIC int 58STATIC int
60xfs_ioinit( 59xfs_ioinit(
61 struct vfs *vfsp, 60 struct bhv_vfs *vfsp,
62 struct xfs_mount_args *mntargs, 61 struct xfs_mount_args *mntargs,
63 int flags) 62 int flags)
64{ 63{
@@ -68,6 +67,7 @@ xfs_ioinit(
68xfs_ioops_t xfs_iocore_xfs = { 67xfs_ioops_t xfs_iocore_xfs = {
69 .xfs_ioinit = (xfs_ioinit_t) xfs_ioinit, 68 .xfs_ioinit = (xfs_ioinit_t) xfs_ioinit,
70 .xfs_bmapi_func = (xfs_bmapi_t) xfs_bmapi, 69 .xfs_bmapi_func = (xfs_bmapi_t) xfs_bmapi,
70 .xfs_bunmapi_func = (xfs_bunmapi_t) xfs_bunmapi,
71 .xfs_bmap_eof_func = (xfs_bmap_eof_t) xfs_bmap_eof, 71 .xfs_bmap_eof_func = (xfs_bmap_eof_t) xfs_bmap_eof,
72 .xfs_iomap_write_direct = 72 .xfs_iomap_write_direct =
73 (xfs_iomap_write_direct_t) xfs_iomap_write_direct, 73 (xfs_iomap_write_direct_t) xfs_iomap_write_direct,
@@ -84,6 +84,7 @@ xfs_ioops_t xfs_iocore_xfs = {
84 .xfs_unlock = (xfs_unlk_t) xfs_iunlock, 84 .xfs_unlock = (xfs_unlk_t) xfs_iunlock,
85 .xfs_size_func = (xfs_size_t) xfs_size_fn, 85 .xfs_size_func = (xfs_size_t) xfs_size_fn,
86 .xfs_iodone = (xfs_iodone_t) fs_noerr, 86 .xfs_iodone = (xfs_iodone_t) fs_noerr,
87 .xfs_swap_extents_func = (xfs_swap_extents_t) xfs_swap_extents,
87}; 88};
88 89
89void 90void
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index d5dfedcb8922..f1949c16df15 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
@@ -23,7 +23,6 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir.h"
27#include "xfs_dir2.h" 26#include "xfs_dir2.h"
28#include "xfs_alloc.h" 27#include "xfs_alloc.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
@@ -32,7 +31,6 @@
32#include "xfs_bmap_btree.h" 31#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h" 32#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h" 33#include "xfs_ialloc_btree.h"
35#include "xfs_dir_sf.h"
36#include "xfs_dir2_sf.h" 34#include "xfs_dir2_sf.h"
37#include "xfs_attr_sf.h" 35#include "xfs_attr_sf.h"
38#include "xfs_dinode.h" 36#include "xfs_dinode.h"
@@ -252,7 +250,7 @@ xfs_iomap(
252 error = XFS_BMAPI(mp, NULL, io, offset_fsb, 250 error = XFS_BMAPI(mp, NULL, io, offset_fsb,
253 (xfs_filblks_t)(end_fsb - offset_fsb), 251 (xfs_filblks_t)(end_fsb - offset_fsb),
254 bmapi_flags, NULL, 0, &imap, 252 bmapi_flags, NULL, 0, &imap,
255 &nimaps, NULL); 253 &nimaps, NULL, NULL);
256 254
257 if (error) 255 if (error)
258 goto out; 256 goto out;
@@ -519,8 +517,8 @@ xfs_iomap_write_direct(
519 */ 517 */
520 XFS_BMAP_INIT(&free_list, &firstfsb); 518 XFS_BMAP_INIT(&free_list, &firstfsb);
521 nimaps = 1; 519 nimaps = 1;
522 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, 520 error = XFS_BMAPI(mp, tp, io, offset_fsb, count_fsb, bmapi_flag,
523 bmapi_flag, &firstfsb, 0, &imap, &nimaps, &free_list); 521 &firstfsb, 0, &imap, &nimaps, &free_list, NULL);
524 if (error) 522 if (error)
525 goto error0; 523 goto error0;
526 524
@@ -610,8 +608,8 @@ xfs_iomap_eof_want_preallocate(
610 while (count_fsb > 0) { 608 while (count_fsb > 0) {
611 imaps = nimaps; 609 imaps = nimaps;
612 firstblock = NULLFSBLOCK; 610 firstblock = NULLFSBLOCK;
613 error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb, 611 error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb, 0,
614 0, &firstblock, 0, imap, &imaps, NULL); 612 &firstblock, 0, imap, &imaps, NULL, NULL);
615 if (error) 613 if (error)
616 return error; 614 return error;
617 for (n = 0; n < imaps; n++) { 615 for (n = 0; n < imaps; n++) {
@@ -695,11 +693,11 @@ retry:
695 693
696 nimaps = XFS_WRITE_IMAPS; 694 nimaps = XFS_WRITE_IMAPS;
697 firstblock = NULLFSBLOCK; 695 firstblock = NULLFSBLOCK;
698 error = xfs_bmapi(NULL, ip, offset_fsb, 696 error = XFS_BMAPI(mp, NULL, io, offset_fsb,
699 (xfs_filblks_t)(last_fsb - offset_fsb), 697 (xfs_filblks_t)(last_fsb - offset_fsb),
700 XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | 698 XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
701 XFS_BMAPI_ENTIRE, &firstblock, 1, imap, 699 XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
702 &nimaps, NULL); 700 &nimaps, NULL, NULL);
703 if (error && (error != ENOSPC)) 701 if (error && (error != ENOSPC))
704 return XFS_ERROR(error); 702 return XFS_ERROR(error);
705 703
@@ -832,9 +830,9 @@ xfs_iomap_write_allocate(
832 } 830 }
833 831
834 /* Go get the actual blocks */ 832 /* Go get the actual blocks */
835 error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb, 833 error = XFS_BMAPI(mp, tp, io, map_start_fsb, count_fsb,
836 XFS_BMAPI_WRITE, &first_block, 1, 834 XFS_BMAPI_WRITE, &first_block, 1,
837 imap, &nimaps, &free_list); 835 imap, &nimaps, &free_list, NULL);
838 if (error) 836 if (error)
839 goto trans_cancel; 837 goto trans_cancel;
840 838
@@ -955,9 +953,9 @@ xfs_iomap_write_unwritten(
955 */ 953 */
956 XFS_BMAP_INIT(&free_list, &firstfsb); 954 XFS_BMAP_INIT(&free_list, &firstfsb);
957 nimaps = 1; 955 nimaps = 1;
958 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, 956 error = XFS_BMAPI(mp, tp, io, offset_fsb, count_fsb,
959 XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb, 957 XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
960 1, &imap, &nimaps, &free_list); 958 1, &imap, &nimaps, &free_list, NULL);
961 if (error) 959 if (error)
962 goto error_on_bmapi_transaction; 960 goto error_on_bmapi_transaction;
963 961
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 94068d014f27..46249e4d1fea 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -24,14 +24,12 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 32#include "xfs_ialloc_btree.h"
34#include "xfs_dir_sf.h"
35#include "xfs_dir2_sf.h" 33#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 35#include "xfs_dinode.h"
@@ -41,11 +39,6 @@
41#include "xfs_error.h" 39#include "xfs_error.h"
42#include "xfs_btree.h" 40#include "xfs_btree.h"
43 41
44#ifndef HAVE_USERACC
45#define useracc(ubuffer, size, flags, foo) (0)
46#define unuseracc(ubuffer, size, flags)
47#endif
48
49STATIC int 42STATIC int
50xfs_bulkstat_one_iget( 43xfs_bulkstat_one_iget(
51 xfs_mount_t *mp, /* mount point for filesystem */ 44 xfs_mount_t *mp, /* mount point for filesystem */
@@ -56,7 +49,7 @@ xfs_bulkstat_one_iget(
56{ 49{
57 xfs_dinode_core_t *dic; /* dinode core info pointer */ 50 xfs_dinode_core_t *dic; /* dinode core info pointer */
58 xfs_inode_t *ip; /* incore inode pointer */ 51 xfs_inode_t *ip; /* incore inode pointer */
59 vnode_t *vp; 52 bhv_vnode_t *vp;
60 int error; 53 int error;
61 54
62 error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, bno); 55 error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, bno);
@@ -336,15 +329,6 @@ xfs_bulkstat(
336 nimask = ~(nicluster - 1); 329 nimask = ~(nicluster - 1);
337 nbcluster = nicluster >> mp->m_sb.sb_inopblog; 330 nbcluster = nicluster >> mp->m_sb.sb_inopblog;
338 /* 331 /*
339 * Lock down the user's buffer. If a buffer was not sent, as in the case
340 * disk quota code calls here, we skip this.
341 */
342 if (ubuffer &&
343 (error = useracc(ubuffer, ubcount * statstruct_size,
344 (B_READ|B_PHYS), NULL))) {
345 return error;
346 }
347 /*
348 * Allocate a page-sized buffer for inode btree records. 332 * Allocate a page-sized buffer for inode btree records.
349 * We could try allocating something smaller, but for normal 333 * We could try allocating something smaller, but for normal
350 * calls we'll always (potentially) need the whole page. 334 * calls we'll always (potentially) need the whole page.
@@ -650,8 +634,6 @@ xfs_bulkstat(
650 * Done, we're either out of filesystem or space to put the data. 634 * Done, we're either out of filesystem or space to put the data.
651 */ 635 */
652 kmem_free(irbuf, NBPC); 636 kmem_free(irbuf, NBPC);
653 if (ubuffer)
654 unuseracc(ubuffer, ubcount * statstruct_size, (B_READ|B_PHYS));
655 *ubcountp = ubelem; 637 *ubcountp = ubelem;
656 if (agno >= mp->m_sb.sb_agcount) { 638 if (agno >= mp->m_sb.sb_agcount) {
657 /* 639 /*
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 11eb4e1b18c4..be5f12e07d22 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -45,7 +45,6 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp,
45 */ 45 */
46#define BULKSTAT_FG_IGET 0x1 /* Go through the buffer cache */ 46#define BULKSTAT_FG_IGET 0x1 /* Go through the buffer cache */
47#define BULKSTAT_FG_QUICK 0x2 /* No iget, walk the dinode cluster */ 47#define BULKSTAT_FG_QUICK 0x2 /* No iget, walk the dinode cluster */
48#define BULKSTAT_FG_VFSLOCKED 0x4 /* Already have vfs lock */
49 48
50/* 49/*
51 * Return stat information in bulk (by-inode) for the filesystem. 50 * Return stat information in bulk (by-inode) for the filesystem.
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 32e841d2f26d..e730328636c3 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -24,7 +24,6 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
@@ -36,7 +35,6 @@
36#include "xfs_ialloc_btree.h" 35#include "xfs_ialloc_btree.h"
37#include "xfs_log_recover.h" 36#include "xfs_log_recover.h"
38#include "xfs_trans_priv.h" 37#include "xfs_trans_priv.h"
39#include "xfs_dir_sf.h"
40#include "xfs_dir2_sf.h" 38#include "xfs_dir2_sf.h"
41#include "xfs_attr_sf.h" 39#include "xfs_attr_sf.h"
42#include "xfs_dinode.h" 40#include "xfs_dinode.h"
@@ -402,7 +400,7 @@ xfs_log_release_iclog(xfs_mount_t *mp,
402 xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; 400 xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
403 401
404 if (xlog_state_release_iclog(log, iclog)) { 402 if (xlog_state_release_iclog(log, iclog)) {
405 xfs_force_shutdown(mp, XFS_LOG_IO_ERROR); 403 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
406 return EIO; 404 return EIO;
407 } 405 }
408 406
@@ -498,9 +496,8 @@ xfs_log_mount(xfs_mount_t *mp,
498 * just worked. 496 * just worked.
499 */ 497 */
500 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) { 498 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) {
501 int error; 499 bhv_vfs_t *vfsp = XFS_MTOVFS(mp);
502 vfs_t *vfsp = XFS_MTOVFS(mp); 500 int error, readonly = (vfsp->vfs_flag & VFS_RDONLY);
503 int readonly = (vfsp->vfs_flag & VFS_RDONLY);
504 501
505 if (readonly) 502 if (readonly)
506 vfsp->vfs_flag &= ~VFS_RDONLY; 503 vfsp->vfs_flag &= ~VFS_RDONLY;
@@ -726,7 +723,7 @@ xfs_log_write(xfs_mount_t * mp,
726 return XFS_ERROR(EIO); 723 return XFS_ERROR(EIO);
727 724
728 if ((error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0))) { 725 if ((error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0))) {
729 xfs_force_shutdown(mp, XFS_LOG_IO_ERROR); 726 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
730 } 727 }
731 return error; 728 return error;
732} /* xfs_log_write */ 729} /* xfs_log_write */
@@ -816,9 +813,9 @@ xfs_log_need_covered(xfs_mount_t *mp)
816 SPLDECL(s); 813 SPLDECL(s);
817 int needed = 0, gen; 814 int needed = 0, gen;
818 xlog_t *log = mp->m_log; 815 xlog_t *log = mp->m_log;
819 vfs_t *vfsp = XFS_MTOVFS(mp); 816 bhv_vfs_t *vfsp = XFS_MTOVFS(mp);
820 817
821 if (fs_frozen(vfsp) || XFS_FORCED_SHUTDOWN(mp) || 818 if (vfs_test_for_freeze(vfsp) || XFS_FORCED_SHUTDOWN(mp) ||
822 (vfsp->vfs_flag & VFS_RDONLY)) 819 (vfsp->vfs_flag & VFS_RDONLY))
823 return 0; 820 return 0;
824 821
@@ -956,7 +953,7 @@ xlog_iodone(xfs_buf_t *bp)
956 XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) { 953 XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) {
957 xfs_ioerror_alert("xlog_iodone", l->l_mp, bp, XFS_BUF_ADDR(bp)); 954 xfs_ioerror_alert("xlog_iodone", l->l_mp, bp, XFS_BUF_ADDR(bp));
958 XFS_BUF_STALE(bp); 955 XFS_BUF_STALE(bp);
959 xfs_force_shutdown(l->l_mp, XFS_LOG_IO_ERROR); 956 xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR);
960 /* 957 /*
961 * This flag will be propagated to the trans-committed 958 * This flag will be propagated to the trans-committed
962 * callback routines to let them know that the log-commit 959 * callback routines to let them know that the log-commit
@@ -1261,7 +1258,7 @@ xlog_commit_record(xfs_mount_t *mp,
1261 ASSERT_ALWAYS(iclog); 1258 ASSERT_ALWAYS(iclog);
1262 if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, 1259 if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp,
1263 iclog, XLOG_COMMIT_TRANS))) { 1260 iclog, XLOG_COMMIT_TRANS))) {
1264 xfs_force_shutdown(mp, XFS_LOG_IO_ERROR); 1261 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
1265 } 1262 }
1266 return error; 1263 return error;
1267} /* xlog_commit_record */ 1264} /* xlog_commit_record */
@@ -1743,10 +1740,10 @@ xlog_write(xfs_mount_t * mp,
1743 xlog_in_core_t **commit_iclog, 1740 xlog_in_core_t **commit_iclog,
1744 uint flags) 1741 uint flags)
1745{ 1742{
1746 xlog_t *log = mp->m_log; 1743 xlog_t *log = mp->m_log;
1747 xlog_ticket_t *ticket = (xlog_ticket_t *)tic; 1744 xlog_ticket_t *ticket = (xlog_ticket_t *)tic;
1745 xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */
1748 xlog_op_header_t *logop_head; /* ptr to log operation header */ 1746 xlog_op_header_t *logop_head; /* ptr to log operation header */
1749 xlog_in_core_t *iclog; /* ptr to current in-core log */
1750 __psint_t ptr; /* copy address into data region */ 1747 __psint_t ptr; /* copy address into data region */
1751 int len; /* # xlog_write() bytes 2 still copy */ 1748 int len; /* # xlog_write() bytes 2 still copy */
1752 int index; /* region index currently copying */ 1749 int index; /* region index currently copying */
@@ -1790,7 +1787,7 @@ xlog_write(xfs_mount_t * mp,
1790 xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp, 1787 xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp,
1791 "xfs_log_write: reservation ran out. Need to up reservation"); 1788 "xfs_log_write: reservation ran out. Need to up reservation");
1792 /* If we did not panic, shutdown the filesystem */ 1789 /* If we did not panic, shutdown the filesystem */
1793 xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); 1790 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1794#endif 1791#endif
1795 } else 1792 } else
1796 ticket->t_curr_res -= len; 1793 ticket->t_curr_res -= len;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 1f0016b0b4ec..3cb678e3a132 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
@@ -24,7 +24,6 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
@@ -32,7 +31,6 @@
32#include "xfs_bmap_btree.h" 31#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h" 32#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h" 33#include "xfs_ialloc_btree.h"
35#include "xfs_dir_sf.h"
36#include "xfs_dir2_sf.h" 34#include "xfs_dir2_sf.h"
37#include "xfs_attr_sf.h" 35#include "xfs_attr_sf.h"
38#include "xfs_dinode.h" 36#include "xfs_dinode.h"
@@ -193,14 +191,14 @@ xlog_header_check_dump(
193{ 191{
194 int b; 192 int b;
195 193
196 printk("%s: SB : uuid = ", __FUNCTION__); 194 cmn_err(CE_DEBUG, "%s: SB : uuid = ", __FUNCTION__);
197 for (b = 0; b < 16; b++) 195 for (b = 0; b < 16; b++)
198 printk("%02x",((unsigned char *)&mp->m_sb.sb_uuid)[b]); 196 cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]);
199 printk(", fmt = %d\n", XLOG_FMT); 197 cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT);
200 printk(" log : uuid = "); 198 cmn_err(CE_DEBUG, " log : uuid = ");
201 for (b = 0; b < 16; b++) 199 for (b = 0; b < 16; b++)
202 printk("%02x",((unsigned char *)&head->h_fs_uuid)[b]); 200 cmn_err(CE_DEBUG, "%02x",((uchar_t *)&head->h_fs_uuid)[b]);
203 printk(", fmt = %d\n", INT_GET(head->h_fmt, ARCH_CONVERT)); 201 cmn_err(CE_DEBUG, ", fmt = %d\n", INT_GET(head->h_fmt, ARCH_CONVERT));
204} 202}
205#else 203#else
206#define xlog_header_check_dump(mp, head) 204#define xlog_header_check_dump(mp, head)
@@ -282,7 +280,7 @@ xlog_recover_iodone(
282 mp = XFS_BUF_FSPRIVATE(bp, xfs_mount_t *); 280 mp = XFS_BUF_FSPRIVATE(bp, xfs_mount_t *);
283 xfs_ioerror_alert("xlog_recover_iodone", 281 xfs_ioerror_alert("xlog_recover_iodone",
284 mp, bp, XFS_BUF_ADDR(bp)); 282 mp, bp, XFS_BUF_ADDR(bp));
285 xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR); 283 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
286 } 284 }
287 XFS_BUF_SET_FSPRIVATE(bp, NULL); 285 XFS_BUF_SET_FSPRIVATE(bp, NULL);
288 XFS_BUF_CLR_IODONE_FUNC(bp); 286 XFS_BUF_CLR_IODONE_FUNC(bp);
@@ -992,6 +990,8 @@ xlog_find_zeroed(
992 xfs_daddr_t num_scan_bblks; 990 xfs_daddr_t num_scan_bblks;
993 int error, log_bbnum = log->l_logBBsize; 991 int error, log_bbnum = log->l_logBBsize;
994 992
993 *blk_no = 0;
994
995 /* check totally zeroed log */ 995 /* check totally zeroed log */
996 bp = xlog_get_bp(log, 1); 996 bp = xlog_get_bp(log, 1);
997 if (!bp) 997 if (!bp)
@@ -1889,7 +1889,7 @@ xlog_recover_do_inode_buffer(
1889 1889
1890 buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, 1890 buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp,
1891 next_unlinked_offset); 1891 next_unlinked_offset);
1892 INT_SET(*buffer_nextp, ARCH_CONVERT, *logged_nextp); 1892 *buffer_nextp = *logged_nextp;
1893 } 1893 }
1894 1894
1895 return 0; 1895 return 0;
@@ -2292,12 +2292,22 @@ xlog_recover_do_inode_trans(
2292 int attr_index; 2292 int attr_index;
2293 uint fields; 2293 uint fields;
2294 xfs_dinode_core_t *dicp; 2294 xfs_dinode_core_t *dicp;
2295 int need_free = 0;
2295 2296
2296 if (pass == XLOG_RECOVER_PASS1) { 2297 if (pass == XLOG_RECOVER_PASS1) {
2297 return 0; 2298 return 0;
2298 } 2299 }
2299 2300
2300 in_f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr; 2301 if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
2302 in_f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr;
2303 } else {
2304 in_f = (xfs_inode_log_format_t *)kmem_alloc(
2305 sizeof(xfs_inode_log_format_t), KM_SLEEP);
2306 need_free = 1;
2307 error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
2308 if (error)
2309 goto error;
2310 }
2301 ino = in_f->ilf_ino; 2311 ino = in_f->ilf_ino;
2302 mp = log->l_mp; 2312 mp = log->l_mp;
2303 if (ITEM_TYPE(item) == XFS_LI_INODE) { 2313 if (ITEM_TYPE(item) == XFS_LI_INODE) {
@@ -2323,8 +2333,10 @@ xlog_recover_do_inode_trans(
2323 * Inode buffers can be freed, look out for it, 2333 * Inode buffers can be freed, look out for it,
2324 * and do not replay the inode. 2334 * and do not replay the inode.
2325 */ 2335 */
2326 if (xlog_check_buffer_cancelled(log, imap.im_blkno, imap.im_len, 0)) 2336 if (xlog_check_buffer_cancelled(log, imap.im_blkno, imap.im_len, 0)) {
2327 return 0; 2337 error = 0;
2338 goto error;
2339 }
2328 2340
2329 bp = xfs_buf_read_flags(mp->m_ddev_targp, imap.im_blkno, imap.im_len, 2341 bp = xfs_buf_read_flags(mp->m_ddev_targp, imap.im_blkno, imap.im_len,
2330 XFS_BUF_LOCK); 2342 XFS_BUF_LOCK);
@@ -2333,7 +2345,7 @@ xlog_recover_do_inode_trans(
2333 bp, imap.im_blkno); 2345 bp, imap.im_blkno);
2334 error = XFS_BUF_GETERROR(bp); 2346 error = XFS_BUF_GETERROR(bp);
2335 xfs_buf_relse(bp); 2347 xfs_buf_relse(bp);
2336 return error; 2348 goto error;
2337 } 2349 }
2338 error = 0; 2350 error = 0;
2339 ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); 2351 ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
@@ -2350,7 +2362,8 @@ xlog_recover_do_inode_trans(
2350 dip, bp, ino); 2362 dip, bp, ino);
2351 XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)", 2363 XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)",
2352 XFS_ERRLEVEL_LOW, mp); 2364 XFS_ERRLEVEL_LOW, mp);
2353 return XFS_ERROR(EFSCORRUPTED); 2365 error = EFSCORRUPTED;
2366 goto error;
2354 } 2367 }
2355 dicp = (xfs_dinode_core_t*)(item->ri_buf[1].i_addr); 2368 dicp = (xfs_dinode_core_t*)(item->ri_buf[1].i_addr);
2356 if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { 2369 if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) {
@@ -2360,7 +2373,8 @@ xlog_recover_do_inode_trans(
2360 item, ino); 2373 item, ino);
2361 XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)", 2374 XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)",
2362 XFS_ERRLEVEL_LOW, mp); 2375 XFS_ERRLEVEL_LOW, mp);
2363 return XFS_ERROR(EFSCORRUPTED); 2376 error = EFSCORRUPTED;
2377 goto error;
2364 } 2378 }
2365 2379
2366 /* Skip replay when the on disk inode is newer than the log one */ 2380 /* Skip replay when the on disk inode is newer than the log one */
@@ -2376,7 +2390,8 @@ xlog_recover_do_inode_trans(
2376 /* do nothing */ 2390 /* do nothing */
2377 } else { 2391 } else {
2378 xfs_buf_relse(bp); 2392 xfs_buf_relse(bp);
2379 return 0; 2393 error = 0;
2394 goto error;
2380 } 2395 }
2381 } 2396 }
2382 /* Take the opportunity to reset the flush iteration count */ 2397 /* Take the opportunity to reset the flush iteration count */
@@ -2391,7 +2406,8 @@ xlog_recover_do_inode_trans(
2391 xfs_fs_cmn_err(CE_ALERT, mp, 2406 xfs_fs_cmn_err(CE_ALERT, mp,
2392 "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2407 "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
2393 item, dip, bp, ino); 2408 item, dip, bp, ino);
2394 return XFS_ERROR(EFSCORRUPTED); 2409 error = EFSCORRUPTED;
2410 goto error;
2395 } 2411 }
2396 } else if (unlikely((dicp->di_mode & S_IFMT) == S_IFDIR)) { 2412 } else if (unlikely((dicp->di_mode & S_IFMT) == S_IFDIR)) {
2397 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && 2413 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
@@ -2403,7 +2419,8 @@ xlog_recover_do_inode_trans(
2403 xfs_fs_cmn_err(CE_ALERT, mp, 2419 xfs_fs_cmn_err(CE_ALERT, mp,
2404 "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2420 "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
2405 item, dip, bp, ino); 2421 item, dip, bp, ino);
2406 return XFS_ERROR(EFSCORRUPTED); 2422 error = EFSCORRUPTED;
2423 goto error;
2407 } 2424 }
2408 } 2425 }
2409 if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ 2426 if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){
@@ -2415,7 +2432,8 @@ xlog_recover_do_inode_trans(
2415 item, dip, bp, ino, 2432 item, dip, bp, ino,
2416 dicp->di_nextents + dicp->di_anextents, 2433 dicp->di_nextents + dicp->di_anextents,
2417 dicp->di_nblocks); 2434 dicp->di_nblocks);
2418 return XFS_ERROR(EFSCORRUPTED); 2435 error = EFSCORRUPTED;
2436 goto error;
2419 } 2437 }
2420 if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { 2438 if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
2421 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)", 2439 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)",
@@ -2424,7 +2442,8 @@ xlog_recover_do_inode_trans(
2424 xfs_fs_cmn_err(CE_ALERT, mp, 2442 xfs_fs_cmn_err(CE_ALERT, mp,
2425 "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x", 2443 "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x",
2426 item, dip, bp, ino, dicp->di_forkoff); 2444 item, dip, bp, ino, dicp->di_forkoff);
2427 return XFS_ERROR(EFSCORRUPTED); 2445 error = EFSCORRUPTED;
2446 goto error;
2428 } 2447 }
2429 if (unlikely(item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t))) { 2448 if (unlikely(item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t))) {
2430 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)", 2449 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)",
@@ -2433,7 +2452,8 @@ xlog_recover_do_inode_trans(
2433 xfs_fs_cmn_err(CE_ALERT, mp, 2452 xfs_fs_cmn_err(CE_ALERT, mp,
2434 "xfs_inode_recover: Bad inode log record length %d, rec ptr 0x%p", 2453 "xfs_inode_recover: Bad inode log record length %d, rec ptr 0x%p",
2435 item->ri_buf[1].i_len, item); 2454 item->ri_buf[1].i_len, item);
2436 return XFS_ERROR(EFSCORRUPTED); 2455 error = EFSCORRUPTED;
2456 goto error;
2437 } 2457 }
2438 2458
2439 /* The core is in in-core format */ 2459 /* The core is in in-core format */
@@ -2521,7 +2541,8 @@ xlog_recover_do_inode_trans(
2521 xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag"); 2541 xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag");
2522 ASSERT(0); 2542 ASSERT(0);
2523 xfs_buf_relse(bp); 2543 xfs_buf_relse(bp);
2524 return XFS_ERROR(EIO); 2544 error = EIO;
2545 goto error;
2525 } 2546 }
2526 } 2547 }
2527 2548
@@ -2537,7 +2558,10 @@ write_inode_buffer:
2537 error = xfs_bwrite(mp, bp); 2558 error = xfs_bwrite(mp, bp);
2538 } 2559 }
2539 2560
2540 return (error); 2561error:
2562 if (need_free)
2563 kmem_free(in_f, sizeof(*in_f));
2564 return XFS_ERROR(error);
2541} 2565}
2542 2566
2543/* 2567/*
@@ -2674,32 +2698,32 @@ xlog_recover_do_dquot_trans(
2674 * structure into it, and adds the efi to the AIL with the given 2698 * structure into it, and adds the efi to the AIL with the given
2675 * LSN. 2699 * LSN.
2676 */ 2700 */
2677STATIC void 2701STATIC int
2678xlog_recover_do_efi_trans( 2702xlog_recover_do_efi_trans(
2679 xlog_t *log, 2703 xlog_t *log,
2680 xlog_recover_item_t *item, 2704 xlog_recover_item_t *item,
2681 xfs_lsn_t lsn, 2705 xfs_lsn_t lsn,
2682 int pass) 2706 int pass)
2683{ 2707{
2708 int error;
2684 xfs_mount_t *mp; 2709 xfs_mount_t *mp;
2685 xfs_efi_log_item_t *efip; 2710 xfs_efi_log_item_t *efip;
2686 xfs_efi_log_format_t *efi_formatp; 2711 xfs_efi_log_format_t *efi_formatp;
2687 SPLDECL(s); 2712 SPLDECL(s);
2688 2713
2689 if (pass == XLOG_RECOVER_PASS1) { 2714 if (pass == XLOG_RECOVER_PASS1) {
2690 return; 2715 return 0;
2691 } 2716 }
2692 2717
2693 efi_formatp = (xfs_efi_log_format_t *)item->ri_buf[0].i_addr; 2718 efi_formatp = (xfs_efi_log_format_t *)item->ri_buf[0].i_addr;
2694 ASSERT(item->ri_buf[0].i_len ==
2695 (sizeof(xfs_efi_log_format_t) +
2696 ((efi_formatp->efi_nextents - 1) * sizeof(xfs_extent_t))));
2697 2719
2698 mp = log->l_mp; 2720 mp = log->l_mp;
2699 efip = xfs_efi_init(mp, efi_formatp->efi_nextents); 2721 efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
2700 memcpy((char *)&(efip->efi_format), (char *)efi_formatp, 2722 if ((error = xfs_efi_copy_format(&(item->ri_buf[0]),
2701 sizeof(xfs_efi_log_format_t) + 2723 &(efip->efi_format)))) {
2702 ((efi_formatp->efi_nextents - 1) * sizeof(xfs_extent_t))); 2724 xfs_efi_item_free(efip);
2725 return error;
2726 }
2703 efip->efi_next_extent = efi_formatp->efi_nextents; 2727 efip->efi_next_extent = efi_formatp->efi_nextents;
2704 efip->efi_flags |= XFS_EFI_COMMITTED; 2728 efip->efi_flags |= XFS_EFI_COMMITTED;
2705 2729
@@ -2708,6 +2732,7 @@ xlog_recover_do_efi_trans(
2708 * xfs_trans_update_ail() drops the AIL lock. 2732 * xfs_trans_update_ail() drops the AIL lock.
2709 */ 2733 */
2710 xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn, s); 2734 xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn, s);
2735 return 0;
2711} 2736}
2712 2737
2713 2738
@@ -2738,9 +2763,10 @@ xlog_recover_do_efd_trans(
2738 } 2763 }
2739 2764
2740 efd_formatp = (xfs_efd_log_format_t *)item->ri_buf[0].i_addr; 2765 efd_formatp = (xfs_efd_log_format_t *)item->ri_buf[0].i_addr;
2741 ASSERT(item->ri_buf[0].i_len == 2766 ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
2742 (sizeof(xfs_efd_log_format_t) + 2767 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
2743 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_t)))); 2768 (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
2769 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t)))));
2744 efi_id = efd_formatp->efd_efi_id; 2770 efi_id = efd_formatp->efd_efi_id;
2745 2771
2746 /* 2772 /*
@@ -2810,15 +2836,14 @@ xlog_recover_do_trans(
2810 if ((error = xlog_recover_do_buffer_trans(log, item, 2836 if ((error = xlog_recover_do_buffer_trans(log, item,
2811 pass))) 2837 pass)))
2812 break; 2838 break;
2813 } else if ((ITEM_TYPE(item) == XFS_LI_INODE) || 2839 } else if ((ITEM_TYPE(item) == XFS_LI_INODE)) {
2814 (ITEM_TYPE(item) == XFS_LI_6_1_INODE) ||
2815 (ITEM_TYPE(item) == XFS_LI_5_3_INODE)) {
2816 if ((error = xlog_recover_do_inode_trans(log, item, 2840 if ((error = xlog_recover_do_inode_trans(log, item,
2817 pass))) 2841 pass)))
2818 break; 2842 break;
2819 } else if (ITEM_TYPE(item) == XFS_LI_EFI) { 2843 } else if (ITEM_TYPE(item) == XFS_LI_EFI) {
2820 xlog_recover_do_efi_trans(log, item, trans->r_lsn, 2844 if ((error = xlog_recover_do_efi_trans(log, item, trans->r_lsn,
2821 pass); 2845 pass)))
2846 break;
2822 } else if (ITEM_TYPE(item) == XFS_LI_EFD) { 2847 } else if (ITEM_TYPE(item) == XFS_LI_EFD) {
2823 xlog_recover_do_efd_trans(log, item, pass); 2848 xlog_recover_do_efd_trans(log, item, pass);
2824 } else if (ITEM_TYPE(item) == XFS_LI_DQUOT) { 2849 } else if (ITEM_TYPE(item) == XFS_LI_DQUOT) {
@@ -3419,13 +3444,13 @@ xlog_unpack_data_checksum(
3419 if (rhead->h_chksum || 3444 if (rhead->h_chksum ||
3420 ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) { 3445 ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) {
3421 cmn_err(CE_DEBUG, 3446 cmn_err(CE_DEBUG,
3422 "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)", 3447 "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)\n",
3423 INT_GET(rhead->h_chksum, ARCH_CONVERT), chksum); 3448 INT_GET(rhead->h_chksum, ARCH_CONVERT), chksum);
3424 cmn_err(CE_DEBUG, 3449 cmn_err(CE_DEBUG,
3425"XFS: Disregard message if filesystem was created with non-DEBUG kernel"); 3450"XFS: Disregard message if filesystem was created with non-DEBUG kernel");
3426 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { 3451 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
3427 cmn_err(CE_DEBUG, 3452 cmn_err(CE_DEBUG,
3428 "XFS: LogR this is a LogV2 filesystem"); 3453 "XFS: LogR this is a LogV2 filesystem\n");
3429 } 3454 }
3430 log->l_flags |= XLOG_CHKSUM_MISMATCH; 3455 log->l_flags |= XLOG_CHKSUM_MISMATCH;
3431 } 3456 }
@@ -3798,7 +3823,7 @@ xlog_do_log_recovery(
3798 error = xlog_do_recovery_pass(log, head_blk, tail_blk, 3823 error = xlog_do_recovery_pass(log, head_blk, tail_blk,
3799 XLOG_RECOVER_PASS2); 3824 XLOG_RECOVER_PASS2);
3800#ifdef DEBUG 3825#ifdef DEBUG
3801 { 3826 if (!error) {
3802 int i; 3827 int i;
3803 3828
3804 for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) 3829 for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
@@ -3974,7 +3999,7 @@ xlog_recover_finish(
3974 log->l_flags &= ~XLOG_RECOVERY_NEEDED; 3999 log->l_flags &= ~XLOG_RECOVERY_NEEDED;
3975 } else { 4000 } else {
3976 cmn_err(CE_DEBUG, 4001 cmn_err(CE_DEBUG,
3977 "!Ending clean XFS mount for filesystem: %s", 4002 "!Ending clean XFS mount for filesystem: %s\n",
3978 log->l_mp->m_fsname); 4003 log->l_mp->m_fsname);
3979 } 4004 }
3980 return 0; 4005 return 0;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index c0b1c2906880..4be5c0b2d296 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -24,14 +24,12 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 32#include "xfs_ialloc_btree.h"
34#include "xfs_dir_sf.h"
35#include "xfs_dir2_sf.h" 33#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 35#include "xfs_dinode.h"
@@ -196,7 +194,7 @@ xfs_mount_free(
196 kmem_free(mp->m_logname, strlen(mp->m_logname) + 1); 194 kmem_free(mp->m_logname, strlen(mp->m_logname) + 1);
197 195
198 if (remove_bhv) { 196 if (remove_bhv) {
199 struct vfs *vfsp = XFS_MTOVFS(mp); 197 struct bhv_vfs *vfsp = XFS_MTOVFS(mp);
200 198
201 bhv_remove_all_vfsops(vfsp, 0); 199 bhv_remove_all_vfsops(vfsp, 0);
202 VFS_REMOVEBHV(vfsp, &mp->m_bhv); 200 VFS_REMOVEBHV(vfsp, &mp->m_bhv);
@@ -337,7 +335,7 @@ xfs_mount_validate_sb(
337 335
338xfs_agnumber_t 336xfs_agnumber_t
339xfs_initialize_perag( 337xfs_initialize_perag(
340 struct vfs *vfs, 338 bhv_vfs_t *vfs,
341 xfs_mount_t *mp, 339 xfs_mount_t *mp,
342 xfs_agnumber_t agcount) 340 xfs_agnumber_t agcount)
343{ 341{
@@ -651,14 +649,14 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
651 */ 649 */
652int 650int
653xfs_mountfs( 651xfs_mountfs(
654 vfs_t *vfsp, 652 bhv_vfs_t *vfsp,
655 xfs_mount_t *mp, 653 xfs_mount_t *mp,
656 int mfsi_flags) 654 int mfsi_flags)
657{ 655{
658 xfs_buf_t *bp; 656 xfs_buf_t *bp;
659 xfs_sb_t *sbp = &(mp->m_sb); 657 xfs_sb_t *sbp = &(mp->m_sb);
660 xfs_inode_t *rip; 658 xfs_inode_t *rip;
661 vnode_t *rvp = NULL; 659 bhv_vnode_t *rvp = NULL;
662 int readio_log, writeio_log; 660 int readio_log, writeio_log;
663 xfs_daddr_t d; 661 xfs_daddr_t d;
664 __uint64_t ret64; 662 __uint64_t ret64;
@@ -934,18 +932,7 @@ xfs_mountfs(
934 vfsp->vfs_altfsid = (xfs_fsid_t *)mp->m_fixedfsid; 932 vfsp->vfs_altfsid = (xfs_fsid_t *)mp->m_fixedfsid;
935 mp->m_dmevmask = 0; /* not persistent; set after each mount */ 933 mp->m_dmevmask = 0; /* not persistent; set after each mount */
936 934
937 /* 935 xfs_dir_mount(mp);
938 * Select the right directory manager.
939 */
940 mp->m_dirops =
941 XFS_SB_VERSION_HASDIRV2(&mp->m_sb) ?
942 xfsv2_dirops :
943 xfsv1_dirops;
944
945 /*
946 * Initialize directory manager's entries.
947 */
948 XFS_DIR_MOUNT(mp);
949 936
950 /* 937 /*
951 * Initialize the attribute manager's entries. 938 * Initialize the attribute manager's entries.
@@ -1006,8 +993,9 @@ xfs_mountfs(
1006 993
1007 if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { 994 if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) {
1008 cmn_err(CE_WARN, "XFS: corrupted root inode"); 995 cmn_err(CE_WARN, "XFS: corrupted root inode");
1009 prdev("Root inode %llu is not a directory", 996 cmn_err(CE_WARN, "Device %s - root %llu is not a directory",
1010 mp->m_ddev_targp, (unsigned long long)rip->i_ino); 997 XFS_BUFTARG_NAME(mp->m_ddev_targp),
998 (unsigned long long)rip->i_ino);
1011 xfs_iunlock(rip, XFS_ILOCK_EXCL); 999 xfs_iunlock(rip, XFS_ILOCK_EXCL);
1012 XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, 1000 XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
1013 mp); 1001 mp);
@@ -1094,7 +1082,7 @@ xfs_mountfs(
1094int 1082int
1095xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) 1083xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
1096{ 1084{
1097 struct vfs *vfsp = XFS_MTOVFS(mp); 1085 struct bhv_vfs *vfsp = XFS_MTOVFS(mp);
1098#if defined(DEBUG) || defined(INDUCE_IO_ERROR) 1086#if defined(DEBUG) || defined(INDUCE_IO_ERROR)
1099 int64_t fsid; 1087 int64_t fsid;
1100#endif 1088#endif
@@ -1254,6 +1242,26 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
1254 1242
1255 xfs_trans_log_buf(tp, bp, first, last); 1243 xfs_trans_log_buf(tp, bp, first, last);
1256} 1244}
1245
1246/*
1247 * In order to avoid ENOSPC-related deadlock caused by
1248 * out-of-order locking of AGF buffer (PV 947395), we place
1249 * constraints on the relationship among actual allocations for
1250 * data blocks, freelist blocks, and potential file data bmap
1251 * btree blocks. However, these restrictions may result in no
1252 * actual space allocated for a delayed extent, for example, a data
1253 * block in a certain AG is allocated but there is no additional
1254 * block for the additional bmap btree block due to a split of the
1255 * bmap btree of the file. The result of this may lead to an
1256 * infinite loop in xfssyncd when the file gets flushed to disk and
1257 * all delayed extents need to be actually allocated. To get around
1258 * this, we explicitly set aside a few blocks which will not be
1259 * reserved in delayed allocation. Considering the minimum number of
1260 * needed freelist blocks is 4 fsbs, a potential split of file's bmap
1261 * btree requires 1 fsb, so we set the number of set-aside blocks to 8.
1262*/
1263#define SET_ASIDE_BLOCKS 8
1264
1257/* 1265/*
1258 * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply 1266 * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
1259 * a delta to a specified field in the in-core superblock. Simply 1267 * a delta to a specified field in the in-core superblock. Simply
@@ -1298,7 +1306,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
1298 return 0; 1306 return 0;
1299 case XFS_SBS_FDBLOCKS: 1307 case XFS_SBS_FDBLOCKS:
1300 1308
1301 lcounter = (long long)mp->m_sb.sb_fdblocks; 1309 lcounter = (long long)mp->m_sb.sb_fdblocks - SET_ASIDE_BLOCKS;
1302 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); 1310 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
1303 1311
1304 if (delta > 0) { /* Putting blocks back */ 1312 if (delta > 0) { /* Putting blocks back */
@@ -1332,7 +1340,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
1332 } 1340 }
1333 } 1341 }
1334 1342
1335 mp->m_sb.sb_fdblocks = lcounter; 1343 mp->m_sb.sb_fdblocks = lcounter + SET_ASIDE_BLOCKS;
1336 return 0; 1344 return 0;
1337 case XFS_SBS_FREXTENTS: 1345 case XFS_SBS_FREXTENTS:
1338 lcounter = (long long)mp->m_sb.sb_frextents; 1346 lcounter = (long long)mp->m_sb.sb_frextents;
@@ -1713,15 +1721,14 @@ xfs_mount_log_sbunit(
1713 * is present to prevent thrashing). 1721 * is present to prevent thrashing).
1714 */ 1722 */
1715 1723
1724#ifdef CONFIG_HOTPLUG_CPU
1716/* 1725/*
1717 * hot-plug CPU notifier support. 1726 * hot-plug CPU notifier support.
1718 * 1727 *
1719 * We cannot use the hotcpu_register() function because it does 1728 * We need a notifier per filesystem as we need to be able to identify
1720 * not allow notifier instances. We need a notifier per filesystem 1729 * the filesystem to balance the counters out. This is achieved by
1721 * as we need to be able to identify the filesystem to balance 1730 * having a notifier block embedded in the xfs_mount_t and doing pointer
1722 * the counters out. This is achieved by having a notifier block 1731 * magic to get the mount pointer from the notifier block address.
1723 * embedded in the xfs_mount_t and doing pointer magic to get the
1724 * mount pointer from the notifier block address.
1725 */ 1732 */
1726STATIC int 1733STATIC int
1727xfs_icsb_cpu_notify( 1734xfs_icsb_cpu_notify(
@@ -1771,6 +1778,7 @@ xfs_icsb_cpu_notify(
1771 1778
1772 return NOTIFY_OK; 1779 return NOTIFY_OK;
1773} 1780}
1781#endif /* CONFIG_HOTPLUG_CPU */
1774 1782
1775int 1783int
1776xfs_icsb_init_counters( 1784xfs_icsb_init_counters(
@@ -1783,9 +1791,11 @@ xfs_icsb_init_counters(
1783 if (mp->m_sb_cnts == NULL) 1791 if (mp->m_sb_cnts == NULL)
1784 return -ENOMEM; 1792 return -ENOMEM;
1785 1793
1794#ifdef CONFIG_HOTPLUG_CPU
1786 mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify; 1795 mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
1787 mp->m_icsb_notifier.priority = 0; 1796 mp->m_icsb_notifier.priority = 0;
1788 register_cpu_notifier(&mp->m_icsb_notifier); 1797 register_hotcpu_notifier(&mp->m_icsb_notifier);
1798#endif /* CONFIG_HOTPLUG_CPU */
1789 1799
1790 for_each_online_cpu(i) { 1800 for_each_online_cpu(i) {
1791 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1801 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
@@ -1804,7 +1814,7 @@ xfs_icsb_destroy_counters(
1804 xfs_mount_t *mp) 1814 xfs_mount_t *mp)
1805{ 1815{
1806 if (mp->m_sb_cnts) { 1816 if (mp->m_sb_cnts) {
1807 unregister_cpu_notifier(&mp->m_icsb_notifier); 1817 unregister_hotcpu_notifier(&mp->m_icsb_notifier);
1808 free_percpu(mp->m_sb_cnts); 1818 free_percpu(mp->m_sb_cnts);
1809 } 1819 }
1810} 1820}
@@ -2018,7 +2028,7 @@ xfs_icsb_balance_counter(
2018 xfs_sb_field_t field, 2028 xfs_sb_field_t field,
2019 int flags) 2029 int flags)
2020{ 2030{
2021 uint64_t count, resid = 0; 2031 uint64_t count, resid;
2022 int weight = num_online_cpus(); 2032 int weight = num_online_cpus();
2023 int s; 2033 int s;
2024 2034
@@ -2050,6 +2060,7 @@ xfs_icsb_balance_counter(
2050 break; 2060 break;
2051 default: 2061 default:
2052 BUG(); 2062 BUG();
2063 count = resid = 0; /* quiet, gcc */
2053 break; 2064 break;
2054 } 2065 }
2055 2066
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 668ad23fd37c..b2bd4be4200a 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -53,8 +53,8 @@ typedef struct xfs_trans_reservations {
53#else 53#else
54struct cred; 54struct cred;
55struct log; 55struct log;
56struct vfs; 56struct bhv_vfs;
57struct vnode; 57struct bhv_vnode;
58struct xfs_mount_args; 58struct xfs_mount_args;
59struct xfs_ihash; 59struct xfs_ihash;
60struct xfs_chash; 60struct xfs_chash;
@@ -63,9 +63,11 @@ struct xfs_perag;
63struct xfs_iocore; 63struct xfs_iocore;
64struct xfs_bmbt_irec; 64struct xfs_bmbt_irec;
65struct xfs_bmap_free; 65struct xfs_bmap_free;
66struct xfs_extdelta;
67struct xfs_swapext;
66 68
67extern struct vfsops xfs_vfsops; 69extern struct bhv_vfsops xfs_vfsops;
68extern struct vnodeops xfs_vnodeops; 70extern struct bhv_vnodeops xfs_vnodeops;
69 71
70#define AIL_LOCK_T lock_t 72#define AIL_LOCK_T lock_t
71#define AIL_LOCKINIT(x,y) spinlock_init(x,y) 73#define AIL_LOCKINIT(x,y) spinlock_init(x,y)
@@ -78,15 +80,15 @@ extern struct vnodeops xfs_vnodeops;
78 * Prototypes and functions for the Data Migration subsystem. 80 * Prototypes and functions for the Data Migration subsystem.
79 */ 81 */
80 82
81typedef int (*xfs_send_data_t)(int, struct vnode *, 83typedef int (*xfs_send_data_t)(int, struct bhv_vnode *,
82 xfs_off_t, size_t, int, vrwlock_t *); 84 xfs_off_t, size_t, int, bhv_vrwlock_t *);
83typedef int (*xfs_send_mmap_t)(struct vm_area_struct *, uint); 85typedef int (*xfs_send_mmap_t)(struct vm_area_struct *, uint);
84typedef int (*xfs_send_destroy_t)(struct vnode *, dm_right_t); 86typedef int (*xfs_send_destroy_t)(struct bhv_vnode *, dm_right_t);
85typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct vfs *, 87typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct bhv_vfs *,
86 struct vnode *, 88 struct bhv_vnode *,
87 dm_right_t, struct vnode *, dm_right_t, 89 dm_right_t, struct bhv_vnode *, dm_right_t,
88 char *, char *, mode_t, int, int); 90 char *, char *, mode_t, int, int);
89typedef void (*xfs_send_unmount_t)(struct vfs *, struct vnode *, 91typedef void (*xfs_send_unmount_t)(struct bhv_vfs *, struct bhv_vnode *,
90 dm_right_t, mode_t, int, int); 92 dm_right_t, mode_t, int, int);
91 93
92typedef struct xfs_dmops { 94typedef struct xfs_dmops {
@@ -188,13 +190,18 @@ typedef struct xfs_qmops {
188 * Prototypes and functions for I/O core modularization. 190 * Prototypes and functions for I/O core modularization.
189 */ 191 */
190 192
191typedef int (*xfs_ioinit_t)(struct vfs *, 193typedef int (*xfs_ioinit_t)(struct bhv_vfs *,
192 struct xfs_mount_args *, int); 194 struct xfs_mount_args *, int);
193typedef int (*xfs_bmapi_t)(struct xfs_trans *, void *, 195typedef int (*xfs_bmapi_t)(struct xfs_trans *, void *,
194 xfs_fileoff_t, xfs_filblks_t, int, 196 xfs_fileoff_t, xfs_filblks_t, int,
195 xfs_fsblock_t *, xfs_extlen_t, 197 xfs_fsblock_t *, xfs_extlen_t,
196 struct xfs_bmbt_irec *, int *, 198 struct xfs_bmbt_irec *, int *,
197 struct xfs_bmap_free *); 199 struct xfs_bmap_free *, struct xfs_extdelta *);
200typedef int (*xfs_bunmapi_t)(struct xfs_trans *,
201 void *, xfs_fileoff_t,
202 xfs_filblks_t, int, xfs_extnum_t,
203 xfs_fsblock_t *, struct xfs_bmap_free *,
204 struct xfs_extdelta *, int *);
198typedef int (*xfs_bmap_eof_t)(void *, xfs_fileoff_t, int, int *); 205typedef int (*xfs_bmap_eof_t)(void *, xfs_fileoff_t, int, int *);
199typedef int (*xfs_iomap_write_direct_t)( 206typedef int (*xfs_iomap_write_direct_t)(
200 void *, xfs_off_t, size_t, int, 207 void *, xfs_off_t, size_t, int,
@@ -213,11 +220,14 @@ typedef void (*xfs_lock_demote_t)(void *, uint);
213typedef int (*xfs_lock_nowait_t)(void *, uint); 220typedef int (*xfs_lock_nowait_t)(void *, uint);
214typedef void (*xfs_unlk_t)(void *, unsigned int); 221typedef void (*xfs_unlk_t)(void *, unsigned int);
215typedef xfs_fsize_t (*xfs_size_t)(void *); 222typedef xfs_fsize_t (*xfs_size_t)(void *);
216typedef xfs_fsize_t (*xfs_iodone_t)(struct vfs *); 223typedef xfs_fsize_t (*xfs_iodone_t)(struct bhv_vfs *);
224typedef int (*xfs_swap_extents_t)(void *, void *,
225 struct xfs_swapext*);
217 226
218typedef struct xfs_ioops { 227typedef struct xfs_ioops {
219 xfs_ioinit_t xfs_ioinit; 228 xfs_ioinit_t xfs_ioinit;
220 xfs_bmapi_t xfs_bmapi_func; 229 xfs_bmapi_t xfs_bmapi_func;
230 xfs_bunmapi_t xfs_bunmapi_func;
221 xfs_bmap_eof_t xfs_bmap_eof_func; 231 xfs_bmap_eof_t xfs_bmap_eof_func;
222 xfs_iomap_write_direct_t xfs_iomap_write_direct; 232 xfs_iomap_write_direct_t xfs_iomap_write_direct;
223 xfs_iomap_write_delay_t xfs_iomap_write_delay; 233 xfs_iomap_write_delay_t xfs_iomap_write_delay;
@@ -230,13 +240,17 @@ typedef struct xfs_ioops {
230 xfs_unlk_t xfs_unlock; 240 xfs_unlk_t xfs_unlock;
231 xfs_size_t xfs_size_func; 241 xfs_size_t xfs_size_func;
232 xfs_iodone_t xfs_iodone; 242 xfs_iodone_t xfs_iodone;
243 xfs_swap_extents_t xfs_swap_extents_func;
233} xfs_ioops_t; 244} xfs_ioops_t;
234 245
235#define XFS_IOINIT(vfsp, args, flags) \ 246#define XFS_IOINIT(vfsp, args, flags) \
236 (*(mp)->m_io_ops.xfs_ioinit)(vfsp, args, flags) 247 (*(mp)->m_io_ops.xfs_ioinit)(vfsp, args, flags)
237#define XFS_BMAPI(mp, trans,io,bno,len,f,first,tot,mval,nmap,flist) \ 248#define XFS_BMAPI(mp, trans,io,bno,len,f,first,tot,mval,nmap,flist,delta) \
238 (*(mp)->m_io_ops.xfs_bmapi_func) \ 249 (*(mp)->m_io_ops.xfs_bmapi_func) \
239 (trans,(io)->io_obj,bno,len,f,first,tot,mval,nmap,flist) 250 (trans,(io)->io_obj,bno,len,f,first,tot,mval,nmap,flist,delta)
251#define XFS_BUNMAPI(mp, trans,io,bno,len,f,nexts,first,flist,delta,done) \
252 (*(mp)->m_io_ops.xfs_bunmapi_func) \
253 (trans,(io)->io_obj,bno,len,f,nexts,first,flist,delta,done)
240#define XFS_BMAP_EOF(mp, io, endoff, whichfork, eof) \ 254#define XFS_BMAP_EOF(mp, io, endoff, whichfork, eof) \
241 (*(mp)->m_io_ops.xfs_bmap_eof_func) \ 255 (*(mp)->m_io_ops.xfs_bmap_eof_func) \
242 ((io)->io_obj, endoff, whichfork, eof) 256 ((io)->io_obj, endoff, whichfork, eof)
@@ -266,6 +280,9 @@ typedef struct xfs_ioops {
266 (*(mp)->m_io_ops.xfs_size_func)((io)->io_obj) 280 (*(mp)->m_io_ops.xfs_size_func)((io)->io_obj)
267#define XFS_IODONE(vfsp) \ 281#define XFS_IODONE(vfsp) \
268 (*(mp)->m_io_ops.xfs_iodone)(vfsp) 282 (*(mp)->m_io_ops.xfs_iodone)(vfsp)
283#define XFS_SWAP_EXTENTS(mp, io, tio, sxp) \
284 (*(mp)->m_io_ops.xfs_swap_extents_func) \
285 ((io)->io_obj, (tio)->io_obj, sxp)
269 286
270#ifdef HAVE_PERCPU_SB 287#ifdef HAVE_PERCPU_SB
271 288
@@ -386,8 +403,6 @@ typedef struct xfs_mount {
386 __uint8_t m_inode_quiesce;/* call quiesce on new inodes. 403 __uint8_t m_inode_quiesce;/* call quiesce on new inodes.
387 field governed by m_ilock */ 404 field governed by m_ilock */
388 __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ 405 __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
389 __uint8_t m_dirversion; /* 1 or 2 */
390 xfs_dirops_t m_dirops; /* table of dir funcs */
391 int m_dirblksize; /* directory block sz--bytes */ 406 int m_dirblksize; /* directory block sz--bytes */
392 int m_dirblkfsbs; /* directory block sz--fsbs */ 407 int m_dirblkfsbs; /* directory block sz--fsbs */
393 xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */ 408 xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */
@@ -494,16 +509,7 @@ xfs_preferred_iosize(xfs_mount_t *mp)
494 509
495#define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN) 510#define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
496#define xfs_force_shutdown(m,f) \ 511#define xfs_force_shutdown(m,f) \
497 VFS_FORCE_SHUTDOWN((XFS_MTOVFS(m)), f, __FILE__, __LINE__) 512 bhv_vfs_force_shutdown((XFS_MTOVFS(m)), f, __FILE__, __LINE__)
498
499/*
500 * Flags sent to xfs_force_shutdown.
501 */
502#define XFS_METADATA_IO_ERROR 0x1
503#define XFS_LOG_IO_ERROR 0x2
504#define XFS_FORCE_UMOUNT 0x4
505#define XFS_CORRUPT_INCORE 0x8 /* Corrupt in-memory data structures */
506#define XFS_SHUTDOWN_REMOTE_REQ 0x10 /* Shutdown came from remote cell */
507 513
508/* 514/*
509 * Flags for xfs_mountfs 515 * Flags for xfs_mountfs
@@ -521,7 +527,7 @@ xfs_preferred_iosize(xfs_mount_t *mp)
521 * Macros for getting from mount to vfs and back. 527 * Macros for getting from mount to vfs and back.
522 */ 528 */
523#define XFS_MTOVFS(mp) xfs_mtovfs(mp) 529#define XFS_MTOVFS(mp) xfs_mtovfs(mp)
524static inline struct vfs *xfs_mtovfs(xfs_mount_t *mp) 530static inline struct bhv_vfs *xfs_mtovfs(xfs_mount_t *mp)
525{ 531{
526 return bhvtovfs(&mp->m_bhv); 532 return bhvtovfs(&mp->m_bhv);
527} 533}
@@ -533,7 +539,7 @@ static inline xfs_mount_t *xfs_bhvtom(bhv_desc_t *bdp)
533} 539}
534 540
535#define XFS_VFSTOM(vfs) xfs_vfstom(vfs) 541#define XFS_VFSTOM(vfs) xfs_vfstom(vfs)
536static inline xfs_mount_t *xfs_vfstom(vfs_t *vfs) 542static inline xfs_mount_t *xfs_vfstom(bhv_vfs_t *vfs)
537{ 543{
538 return XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfs), &xfs_vfsops)); 544 return XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfs), &xfs_vfsops));
539} 545}
@@ -571,7 +577,7 @@ typedef struct xfs_mod_sb {
571extern xfs_mount_t *xfs_mount_init(void); 577extern xfs_mount_t *xfs_mount_init(void);
572extern void xfs_mod_sb(xfs_trans_t *, __int64_t); 578extern void xfs_mod_sb(xfs_trans_t *, __int64_t);
573extern void xfs_mount_free(xfs_mount_t *mp, int remove_bhv); 579extern void xfs_mount_free(xfs_mount_t *mp, int remove_bhv);
574extern int xfs_mountfs(struct vfs *, xfs_mount_t *mp, int); 580extern int xfs_mountfs(struct bhv_vfs *, xfs_mount_t *mp, int);
575extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); 581extern void xfs_mountfs_check_barriers(xfs_mount_t *mp);
576 582
577extern int xfs_unmountfs(xfs_mount_t *, struct cred *); 583extern int xfs_unmountfs(xfs_mount_t *, struct cred *);
@@ -589,7 +595,7 @@ extern void xfs_freesb(xfs_mount_t *);
589extern void xfs_do_force_shutdown(bhv_desc_t *, int, char *, int); 595extern void xfs_do_force_shutdown(bhv_desc_t *, int, char *, int);
590extern int xfs_syncsub(xfs_mount_t *, int, int, int *); 596extern int xfs_syncsub(xfs_mount_t *, int, int, int *);
591extern int xfs_sync_inodes(xfs_mount_t *, int, int, int *); 597extern int xfs_sync_inodes(xfs_mount_t *, int, int, int *);
592extern xfs_agnumber_t xfs_initialize_perag(struct vfs *, xfs_mount_t *, 598extern xfs_agnumber_t xfs_initialize_perag(struct bhv_vfs *, xfs_mount_t *,
593 xfs_agnumber_t); 599 xfs_agnumber_t);
594extern void xfs_xlatesb(void *, struct xfs_sb *, int, __int64_t); 600extern void xfs_xlatesb(void *, struct xfs_sb *, int, __int64_t);
595 601
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
index 1408a32eef88..320d63ff9ca2 100644
--- a/fs/xfs/xfs_qmops.c
+++ b/fs/xfs/xfs_qmops.c
@@ -23,7 +23,6 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir.h"
27#include "xfs_dir2.h" 26#include "xfs_dir2.h"
28#include "xfs_dmapi.h" 27#include "xfs_dmapi.h"
29#include "xfs_mount.h" 28#include "xfs_mount.h"
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 7fbef974bce6..acb853b33ebb 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -365,7 +365,7 @@ typedef struct xfs_dqtrxops {
365extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *); 365extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *);
366extern int xfs_mount_reset_sbqflags(struct xfs_mount *); 366extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
367 367
368extern struct bhv_vfsops xfs_qmops; 368extern struct bhv_module_vfsops xfs_qmops;
369 369
370#endif /* __KERNEL__ */ 370#endif /* __KERNEL__ */
371 371
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 1f148762eb28..d98171deaa1c 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -22,13 +22,11 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_dir.h"
26#include "xfs_dir2.h" 25#include "xfs_dir2.h"
27#include "xfs_dmapi.h" 26#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_dir_sf.h"
32#include "xfs_dir2_sf.h" 30#include "xfs_dir2_sf.h"
33#include "xfs_attr_sf.h" 31#include "xfs_attr_sf.h"
34#include "xfs_dinode.h" 32#include "xfs_dinode.h"
@@ -40,7 +38,6 @@
40#include "xfs_refcache.h" 38#include "xfs_refcache.h"
41#include "xfs_utils.h" 39#include "xfs_utils.h"
42#include "xfs_trans_space.h" 40#include "xfs_trans_space.h"
43#include "xfs_dir_leaf.h"
44 41
45 42
46/* 43/*
@@ -87,8 +84,8 @@ STATIC int
87xfs_lock_for_rename( 84xfs_lock_for_rename(
88 xfs_inode_t *dp1, /* old (source) directory inode */ 85 xfs_inode_t *dp1, /* old (source) directory inode */
89 xfs_inode_t *dp2, /* new (target) directory inode */ 86 xfs_inode_t *dp2, /* new (target) directory inode */
90 vname_t *vname1,/* old entry name */ 87 bhv_vname_t *vname1,/* old entry name */
91 vname_t *vname2,/* new entry name */ 88 bhv_vname_t *vname2,/* new entry name */
92 xfs_inode_t **ipp1, /* inode of old entry */ 89 xfs_inode_t **ipp1, /* inode of old entry */
93 xfs_inode_t **ipp2, /* inode of new entry, if it 90 xfs_inode_t **ipp2, /* inode of new entry, if it
94 already exists, NULL otherwise. */ 91 already exists, NULL otherwise. */
@@ -225,9 +222,9 @@ xfs_lock_for_rename(
225int 222int
226xfs_rename( 223xfs_rename(
227 bhv_desc_t *src_dir_bdp, 224 bhv_desc_t *src_dir_bdp,
228 vname_t *src_vname, 225 bhv_vname_t *src_vname,
229 vnode_t *target_dir_vp, 226 bhv_vnode_t *target_dir_vp,
230 vname_t *target_vname, 227 bhv_vname_t *target_vname,
231 cred_t *credp) 228 cred_t *credp)
232{ 229{
233 xfs_trans_t *tp; 230 xfs_trans_t *tp;
@@ -242,7 +239,7 @@ xfs_rename(
242 int committed; 239 int committed;
243 xfs_inode_t *inodes[4]; 240 xfs_inode_t *inodes[4];
244 int target_ip_dropped = 0; /* dropped target_ip link? */ 241 int target_ip_dropped = 0; /* dropped target_ip link? */
245 vnode_t *src_dir_vp; 242 bhv_vnode_t *src_dir_vp;
246 int spaceres; 243 int spaceres;
247 int target_link_zero = 0; 244 int target_link_zero = 0;
248 int num_inodes; 245 int num_inodes;
@@ -398,34 +395,29 @@ xfs_rename(
398 * fit before actually inserting it. 395 * fit before actually inserting it.
399 */ 396 */
400 if (spaceres == 0 && 397 if (spaceres == 0 &&
401 (error = XFS_DIR_CANENTER(mp, tp, target_dp, target_name, 398 (error = xfs_dir_canenter(tp, target_dp, target_name,
402 target_namelen))) { 399 target_namelen)))
403 goto error_return; 400 goto error_return;
404 }
405 /* 401 /*
406 * If target does not exist and the rename crosses 402 * If target does not exist and the rename crosses
407 * directories, adjust the target directory link count 403 * directories, adjust the target directory link count
408 * to account for the ".." reference from the new entry. 404 * to account for the ".." reference from the new entry.
409 */ 405 */
410 error = XFS_DIR_CREATENAME(mp, tp, target_dp, target_name, 406 error = xfs_dir_createname(tp, target_dp, target_name,
411 target_namelen, src_ip->i_ino, 407 target_namelen, src_ip->i_ino,
412 &first_block, &free_list, spaceres); 408 &first_block, &free_list, spaceres);
413 if (error == ENOSPC) { 409 if (error == ENOSPC)
414 goto error_return; 410 goto error_return;
415 } 411 if (error)
416 if (error) {
417 goto abort_return; 412 goto abort_return;
418 }
419 xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 413 xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
420 414
421 if (new_parent && src_is_directory) { 415 if (new_parent && src_is_directory) {
422 error = xfs_bumplink(tp, target_dp); 416 error = xfs_bumplink(tp, target_dp);
423 if (error) { 417 if (error)
424 goto abort_return; 418 goto abort_return;
425 }
426 } 419 }
427 } else { /* target_ip != NULL */ 420 } else { /* target_ip != NULL */
428
429 /* 421 /*
430 * If target exists and it's a directory, check that both 422 * If target exists and it's a directory, check that both
431 * target and source are directories and that target can be 423 * target and source are directories and that target can be
@@ -435,7 +427,7 @@ xfs_rename(
435 /* 427 /*
436 * Make sure target dir is empty. 428 * Make sure target dir is empty.
437 */ 429 */
438 if (!(XFS_DIR_ISEMPTY(target_ip->i_mount, target_ip)) || 430 if (!(xfs_dir_isempty(target_ip)) ||
439 (target_ip->i_d.di_nlink > 2)) { 431 (target_ip->i_d.di_nlink > 2)) {
440 error = XFS_ERROR(EEXIST); 432 error = XFS_ERROR(EEXIST);
441 goto error_return; 433 goto error_return;
@@ -451,12 +443,11 @@ xfs_rename(
451 * In case there is already an entry with the same 443 * In case there is already an entry with the same
452 * name at the destination directory, remove it first. 444 * name at the destination directory, remove it first.
453 */ 445 */
454 error = XFS_DIR_REPLACE(mp, tp, target_dp, target_name, 446 error = xfs_dir_replace(tp, target_dp, target_name,
455 target_namelen, src_ip->i_ino, &first_block, 447 target_namelen, src_ip->i_ino,
456 &free_list, spaceres); 448 &first_block, &free_list, spaceres);
457 if (error) { 449 if (error)
458 goto abort_return; 450 goto abort_return;
459 }
460 xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 451 xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
461 452
462 /* 453 /*
@@ -464,9 +455,8 @@ xfs_rename(
464 * dir no longer points to it. 455 * dir no longer points to it.
465 */ 456 */
466 error = xfs_droplink(tp, target_ip); 457 error = xfs_droplink(tp, target_ip);
467 if (error) { 458 if (error)
468 goto abort_return; 459 goto abort_return;
469 }
470 target_ip_dropped = 1; 460 target_ip_dropped = 1;
471 461
472 if (src_is_directory) { 462 if (src_is_directory) {
@@ -474,9 +464,8 @@ xfs_rename(
474 * Drop the link from the old "." entry. 464 * Drop the link from the old "." entry.
475 */ 465 */
476 error = xfs_droplink(tp, target_ip); 466 error = xfs_droplink(tp, target_ip);
477 if (error) { 467 if (error)
478 goto abort_return; 468 goto abort_return;
479 }
480 } 469 }
481 470
482 /* Do this test while we still hold the locks */ 471 /* Do this test while we still hold the locks */
@@ -488,18 +477,15 @@ xfs_rename(
488 * Remove the source. 477 * Remove the source.
489 */ 478 */
490 if (new_parent && src_is_directory) { 479 if (new_parent && src_is_directory) {
491
492 /* 480 /*
493 * Rewrite the ".." entry to point to the new 481 * Rewrite the ".." entry to point to the new
494 * directory. 482 * directory.
495 */ 483 */
496 error = XFS_DIR_REPLACE(mp, tp, src_ip, "..", 2, 484 error = xfs_dir_replace(tp, src_ip, "..", 2, target_dp->i_ino,
497 target_dp->i_ino, &first_block, 485 &first_block, &free_list, spaceres);
498 &free_list, spaceres);
499 ASSERT(error != EEXIST); 486 ASSERT(error != EEXIST);
500 if (error) { 487 if (error)
501 goto abort_return; 488 goto abort_return;
502 }
503 xfs_ichgtime(src_ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 489 xfs_ichgtime(src_ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
504 490
505 } else { 491 } else {
@@ -527,16 +513,14 @@ xfs_rename(
527 * entry that's moved no longer points to it. 513 * entry that's moved no longer points to it.
528 */ 514 */
529 error = xfs_droplink(tp, src_dp); 515 error = xfs_droplink(tp, src_dp);
530 if (error) { 516 if (error)
531 goto abort_return; 517 goto abort_return;
532 }
533 } 518 }
534 519
535 error = XFS_DIR_REMOVENAME(mp, tp, src_dp, src_name, src_namelen, 520 error = xfs_dir_removename(tp, src_dp, src_name, src_namelen,
536 src_ip->i_ino, &first_block, &free_list, spaceres); 521 src_ip->i_ino, &first_block, &free_list, spaceres);
537 if (error) { 522 if (error)
538 goto abort_return; 523 goto abort_return;
539 }
540 xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 524 xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
541 525
542 /* 526 /*
@@ -609,7 +593,7 @@ xfs_rename(
609 * Let interposed file systems know about removed links. 593 * Let interposed file systems know about removed links.
610 */ 594 */
611 if (target_ip_dropped) { 595 if (target_ip_dropped) {
612 VOP_LINK_REMOVED(XFS_ITOV(target_ip), target_dir_vp, 596 bhv_vop_link_removed(XFS_ITOV(target_ip), target_dir_vp,
613 target_link_zero); 597 target_link_zero);
614 IRELE(target_ip); 598 IRELE(target_ip);
615 } 599 }
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 5b413946b1c5..5a0b678956e0 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -24,14 +24,12 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 32#include "xfs_ialloc_btree.h"
34#include "xfs_dir_sf.h"
35#include "xfs_dir2_sf.h" 33#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 35#include "xfs_dinode.h"
@@ -141,7 +139,7 @@ xfs_growfs_rt_alloc(
141 cancelflags |= XFS_TRANS_ABORT; 139 cancelflags |= XFS_TRANS_ABORT;
142 error = xfs_bmapi(tp, ip, oblocks, nblocks - oblocks, 140 error = xfs_bmapi(tp, ip, oblocks, nblocks - oblocks,
143 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, &firstblock, 141 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, &firstblock,
144 resblks, &map, &nmap, &flist); 142 resblks, &map, &nmap, &flist, NULL);
145 if (!error && nmap < 1) 143 if (!error && nmap < 1)
146 error = XFS_ERROR(ENOSPC); 144 error = XFS_ERROR(ENOSPC);
147 if (error) 145 if (error)
@@ -1931,7 +1929,7 @@ xfs_growfs_rt(
1931 /* 1929 /*
1932 * Initial error checking. 1930 * Initial error checking.
1933 */ 1931 */
1934 if (mp->m_rtdev_targp || mp->m_rbmip == NULL || 1932 if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL ||
1935 (nrblocks = in->newblocks) <= sbp->sb_rblocks || 1933 (nrblocks = in->newblocks) <= sbp->sb_rblocks ||
1936 (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize))) 1934 (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize)))
1937 return XFS_ERROR(EINVAL); 1935 return XFS_ERROR(EINVAL);
@@ -2404,10 +2402,10 @@ xfs_rtprint_range(
2404{ 2402{
2405 xfs_extlen_t i; /* block number in the extent */ 2403 xfs_extlen_t i; /* block number in the extent */
2406 2404
2407 printk("%Ld: ", (long long)start); 2405 cmn_err(CE_DEBUG, "%Ld: ", (long long)start);
2408 for (i = 0; i < len; i++) 2406 for (i = 0; i < len; i++)
2409 printk("%d", xfs_rtcheck_bit(mp, tp, start + i, 1)); 2407 cmn_err(CE_DEBUG, "%d", xfs_rtcheck_bit(mp, tp, start + i, 1));
2410 printk("\n"); 2408 cmn_err(CE_DEBUG, "\n");
2411} 2409}
2412 2410
2413/* 2411/*
@@ -2431,17 +2429,17 @@ xfs_rtprint_summary(
2431 (void)xfs_rtget_summary(mp, tp, l, i, &sumbp, &sb, &c); 2429 (void)xfs_rtget_summary(mp, tp, l, i, &sumbp, &sb, &c);
2432 if (c) { 2430 if (c) {
2433 if (!p) { 2431 if (!p) {
2434 printk("%Ld-%Ld:", 1LL << l, 2432 cmn_err(CE_DEBUG, "%Ld-%Ld:", 1LL << l,
2435 XFS_RTMIN((1LL << l) + 2433 XFS_RTMIN((1LL << l) +
2436 ((1LL << l) - 1LL), 2434 ((1LL << l) - 1LL),
2437 mp->m_sb.sb_rextents)); 2435 mp->m_sb.sb_rextents));
2438 p = 1; 2436 p = 1;
2439 } 2437 }
2440 printk(" %Ld:%d", (long long)i, c); 2438 cmn_err(CE_DEBUG, " %Ld:%d", (long long)i, c);
2441 } 2439 }
2442 } 2440 }
2443 if (p) 2441 if (p)
2444 printk("\n"); 2442 cmn_err(CE_DEBUG, "\n");
2445 } 2443 }
2446 if (sumbp) 2444 if (sumbp)
2447 xfs_trans_brelse(tp, sumbp); 2445 xfs_trans_brelse(tp, sumbp);
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index a59c102cf214..defb2febaaf5 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
@@ -24,14 +24,12 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 32#include "xfs_ialloc_btree.h"
34#include "xfs_dir_sf.h"
35#include "xfs_dir2_sf.h" 33#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 35#include "xfs_dinode.h"
@@ -92,6 +90,90 @@ xfs_write_clear_setuid(
92} 90}
93 91
94/* 92/*
93 * Handle logging requirements of various synchronous types of write.
94 */
95int
96xfs_write_sync_logforce(
97 xfs_mount_t *mp,
98 xfs_inode_t *ip)
99{
100 int error = 0;
101
102 /*
103 * If we're treating this as O_DSYNC and we have not updated the
104 * size, force the log.
105 */
106 if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) &&
107 !(ip->i_update_size)) {
108 xfs_inode_log_item_t *iip = ip->i_itemp;
109
110 /*
111 * If an allocation transaction occurred
112 * without extending the size, then we have to force
113 * the log up the proper point to ensure that the
114 * allocation is permanent. We can't count on
115 * the fact that buffered writes lock out direct I/O
116 * writes - the direct I/O write could have extended
117 * the size nontransactionally, then finished before
118 * we started. xfs_write_file will think that the file
119 * didn't grow but the update isn't safe unless the
120 * size change is logged.
121 *
122 * Force the log if we've committed a transaction
123 * against the inode or if someone else has and
124 * the commit record hasn't gone to disk (e.g.
125 * the inode is pinned). This guarantees that
126 * all changes affecting the inode are permanent
127 * when we return.
128 */
129 if (iip && iip->ili_last_lsn) {
130 xfs_log_force(mp, iip->ili_last_lsn,
131 XFS_LOG_FORCE | XFS_LOG_SYNC);
132 } else if (xfs_ipincount(ip) > 0) {
133 xfs_log_force(mp, (xfs_lsn_t)0,
134 XFS_LOG_FORCE | XFS_LOG_SYNC);
135 }
136
137 } else {
138 xfs_trans_t *tp;
139
140 /*
141 * O_SYNC or O_DSYNC _with_ a size update are handled
142 * the same way.
143 *
144 * If the write was synchronous then we need to make
145 * sure that the inode modification time is permanent.
146 * We'll have updated the timestamp above, so here
147 * we use a synchronous transaction to log the inode.
148 * It's not fast, but it's necessary.
149 *
150 * If this a dsync write and the size got changed
151 * non-transactionally, then we need to ensure that
152 * the size change gets logged in a synchronous
153 * transaction.
154 */
155 tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC);
156 if ((error = xfs_trans_reserve(tp, 0,
157 XFS_SWRITE_LOG_RES(mp),
158 0, 0, 0))) {
159 /* Transaction reserve failed */
160 xfs_trans_cancel(tp, 0);
161 } else {
162 /* Transaction reserve successful */
163 xfs_ilock(ip, XFS_ILOCK_EXCL);
164 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
165 xfs_trans_ihold(tp, ip);
166 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
167 xfs_trans_set_sync(tp);
168 error = xfs_trans_commit(tp, 0, NULL);
169 xfs_iunlock(ip, XFS_ILOCK_EXCL);
170 }
171 }
172
173 return error;
174}
175
176/*
95 * Force a shutdown of the filesystem instantly while keeping 177 * Force a shutdown of the filesystem instantly while keeping
96 * the filesystem consistent. We don't do an unmount here; just shutdown 178 * the filesystem consistent. We don't do an unmount here; just shutdown
97 * the shop, make sure that absolutely nothing persistent happens to 179 * the shop, make sure that absolutely nothing persistent happens to
@@ -109,12 +191,12 @@ xfs_do_force_shutdown(
109 xfs_mount_t *mp; 191 xfs_mount_t *mp;
110 192
111 mp = XFS_BHVTOM(bdp); 193 mp = XFS_BHVTOM(bdp);
112 logerror = flags & XFS_LOG_IO_ERROR; 194 logerror = flags & SHUTDOWN_LOG_IO_ERROR;
113 195
114 if (!(flags & XFS_FORCE_UMOUNT)) { 196 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
115 cmn_err(CE_NOTE, 197 cmn_err(CE_NOTE, "xfs_force_shutdown(%s,0x%x) called from "
116 "xfs_force_shutdown(%s,0x%x) called from line %d of file %s. Return address = 0x%p", 198 "line %d of file %s. Return address = 0x%p",
117 mp->m_fsname,flags,lnnum,fname,__return_address); 199 mp->m_fsname, flags, lnnum, fname, __return_address);
118 } 200 }
119 /* 201 /*
120 * No need to duplicate efforts. 202 * No need to duplicate efforts.
@@ -125,33 +207,37 @@ xfs_do_force_shutdown(
125 /* 207 /*
126 * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't 208 * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't
127 * queue up anybody new on the log reservations, and wakes up 209 * queue up anybody new on the log reservations, and wakes up
128 * everybody who's sleeping on log reservations and tells 210 * everybody who's sleeping on log reservations to tell them
129 * them the bad news. 211 * the bad news.
130 */ 212 */
131 if (xfs_log_force_umount(mp, logerror)) 213 if (xfs_log_force_umount(mp, logerror))
132 return; 214 return;
133 215
134 if (flags & XFS_CORRUPT_INCORE) { 216 if (flags & SHUTDOWN_CORRUPT_INCORE) {
135 xfs_cmn_err(XFS_PTAG_SHUTDOWN_CORRUPT, CE_ALERT, mp, 217 xfs_cmn_err(XFS_PTAG_SHUTDOWN_CORRUPT, CE_ALERT, mp,
136 "Corruption of in-memory data detected. Shutting down filesystem: %s", 218 "Corruption of in-memory data detected. Shutting down filesystem: %s",
137 mp->m_fsname); 219 mp->m_fsname);
138 if (XFS_ERRLEVEL_HIGH <= xfs_error_level) { 220 if (XFS_ERRLEVEL_HIGH <= xfs_error_level) {
139 xfs_stack_trace(); 221 xfs_stack_trace();
140 } 222 }
141 } else if (!(flags & XFS_FORCE_UMOUNT)) { 223 } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
142 if (logerror) { 224 if (logerror) {
143 xfs_cmn_err(XFS_PTAG_SHUTDOWN_LOGERROR, CE_ALERT, mp, 225 xfs_cmn_err(XFS_PTAG_SHUTDOWN_LOGERROR, CE_ALERT, mp,
144 "Log I/O Error Detected. Shutting down filesystem: %s", 226 "Log I/O Error Detected. Shutting down filesystem: %s",
227 mp->m_fsname);
228 } else if (flags & SHUTDOWN_DEVICE_REQ) {
229 xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp,
230 "All device paths lost. Shutting down filesystem: %s",
145 mp->m_fsname); 231 mp->m_fsname);
146 } else if (!(flags & XFS_SHUTDOWN_REMOTE_REQ)) { 232 } else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
147 xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp, 233 xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp,
148 "I/O Error Detected. Shutting down filesystem: %s", 234 "I/O Error Detected. Shutting down filesystem: %s",
149 mp->m_fsname); 235 mp->m_fsname);
150 } 236 }
151 } 237 }
152 if (!(flags & XFS_FORCE_UMOUNT)) { 238 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
153 cmn_err(CE_ALERT, 239 cmn_err(CE_ALERT, "Please umount the filesystem, "
154 "Please umount the filesystem, and rectify the problem(s)"); 240 "and rectify the problem(s)");
155 } 241 }
156} 242}
157 243
@@ -335,7 +421,7 @@ xfs_bwrite(
335 * from bwrite and we could be tracing a buffer that has 421 * from bwrite and we could be tracing a buffer that has
336 * been reused. 422 * been reused.
337 */ 423 */
338 xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR); 424 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
339 } 425 }
340 return (error); 426 return (error);
341} 427}
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index e63795644478..188b296ff50c 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
@@ -75,6 +75,7 @@ xfs_fsb_to_db_io(struct xfs_iocore *io, xfs_fsblock_t fsb)
75 * Prototypes for functions in xfs_rw.c. 75 * Prototypes for functions in xfs_rw.c.
76 */ 76 */
77extern int xfs_write_clear_setuid(struct xfs_inode *ip); 77extern int xfs_write_clear_setuid(struct xfs_inode *ip);
78extern int xfs_write_sync_logforce(struct xfs_mount *mp, struct xfs_inode *ip);
78extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); 79extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
79extern int xfs_bioerror(struct xfs_buf *bp); 80extern int xfs_bioerror(struct xfs_buf *bp);
80extern int xfs_bioerror_relse(struct xfs_buf *bp); 81extern int xfs_bioerror_relse(struct xfs_buf *bp);
@@ -87,9 +88,10 @@ extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp,
87/* 88/*
88 * Prototypes for functions in xfs_vnodeops.c. 89 * Prototypes for functions in xfs_vnodeops.c.
89 */ 90 */
90extern int xfs_rwlock(bhv_desc_t *bdp, vrwlock_t write_lock); 91extern int xfs_rwlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock);
91extern void xfs_rwunlock(bhv_desc_t *bdp, vrwlock_t write_lock); 92extern void xfs_rwunlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock);
92extern int xfs_setattr(bhv_desc_t *bdp, vattr_t *vap, int flags, cred_t *credp); 93extern int xfs_setattr(bhv_desc_t *, bhv_vattr_t *vap, int flags,
94 cred_t *credp);
93extern int xfs_change_file_space(bhv_desc_t *bdp, int cmd, xfs_flock64_t *bf, 95extern int xfs_change_file_space(bhv_desc_t *bdp, int cmd, xfs_flock64_t *bf,
94 xfs_off_t offset, cred_t *credp, int flags); 96 xfs_off_t offset, cred_t *credp, int flags);
95extern int xfs_set_dmattrs(bhv_desc_t *bdp, u_int evmask, u_int16_t state, 97extern int xfs_set_dmattrs(bhv_desc_t *bdp, u_int evmask, u_int16_t state,
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 8d056cef5d1f..ee2721e0de4d 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -24,7 +24,6 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
@@ -33,7 +32,6 @@
33#include "xfs_bmap_btree.h" 32#include "xfs_bmap_btree.h"
34#include "xfs_alloc_btree.h" 33#include "xfs_alloc_btree.h"
35#include "xfs_ialloc_btree.h" 34#include "xfs_ialloc_btree.h"
36#include "xfs_dir_sf.h"
37#include "xfs_dir2_sf.h" 35#include "xfs_dir2_sf.h"
38#include "xfs_attr_sf.h" 36#include "xfs_attr_sf.h"
39#include "xfs_dinode.h" 37#include "xfs_dinode.h"
@@ -236,11 +234,8 @@ xfs_trans_alloc(
236 xfs_mount_t *mp, 234 xfs_mount_t *mp,
237 uint type) 235 uint type)
238{ 236{
239 fs_check_frozen(XFS_MTOVFS(mp), SB_FREEZE_TRANS); 237 vfs_wait_for_freeze(XFS_MTOVFS(mp), SB_FREEZE_TRANS);
240 atomic_inc(&mp->m_active_trans); 238 return _xfs_trans_alloc(mp, type);
241
242 return (_xfs_trans_alloc(mp, type));
243
244} 239}
245 240
246xfs_trans_t * 241xfs_trans_t *
@@ -250,12 +245,9 @@ _xfs_trans_alloc(
250{ 245{
251 xfs_trans_t *tp; 246 xfs_trans_t *tp;
252 247
253 ASSERT(xfs_trans_zone != NULL); 248 atomic_inc(&mp->m_active_trans);
254 tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP);
255 249
256 /* 250 tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP);
257 * Initialize the transaction structure.
258 */
259 tp->t_magic = XFS_TRANS_MAGIC; 251 tp->t_magic = XFS_TRANS_MAGIC;
260 tp->t_type = type; 252 tp->t_type = type;
261 tp->t_mountp = mp; 253 tp->t_mountp = mp;
@@ -263,8 +255,7 @@ _xfs_trans_alloc(
263 tp->t_busy_free = XFS_LBC_NUM_SLOTS; 255 tp->t_busy_free = XFS_LBC_NUM_SLOTS;
264 XFS_LIC_INIT(&(tp->t_items)); 256 XFS_LIC_INIT(&(tp->t_items));
265 XFS_LBC_INIT(&(tp->t_busy)); 257 XFS_LBC_INIT(&(tp->t_busy));
266 258 return tp;
267 return (tp);
268} 259}
269 260
270/* 261/*
@@ -303,7 +294,7 @@ xfs_trans_dup(
303 tp->t_blk_res = tp->t_blk_res_used; 294 tp->t_blk_res = tp->t_blk_res_used;
304 ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; 295 ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
305 tp->t_rtx_res = tp->t_rtx_res_used; 296 tp->t_rtx_res = tp->t_rtx_res_used;
306 PFLAGS_DUP(&tp->t_pflags, &ntp->t_pflags); 297 ntp->t_pflags = tp->t_pflags;
307 298
308 XFS_TRANS_DUP_DQINFO(tp->t_mountp, tp, ntp); 299 XFS_TRANS_DUP_DQINFO(tp->t_mountp, tp, ntp);
309 300
@@ -335,14 +326,11 @@ xfs_trans_reserve(
335 uint logcount) 326 uint logcount)
336{ 327{
337 int log_flags; 328 int log_flags;
338 int error; 329 int error = 0;
339 int rsvd; 330 int rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
340
341 error = 0;
342 rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
343 331
344 /* Mark this thread as being in a transaction */ 332 /* Mark this thread as being in a transaction */
345 PFLAGS_SET_FSTRANS(&tp->t_pflags); 333 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
346 334
347 /* 335 /*
348 * Attempt to reserve the needed disk blocks by decrementing 336 * Attempt to reserve the needed disk blocks by decrementing
@@ -353,7 +341,7 @@ xfs_trans_reserve(
353 error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS, 341 error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS,
354 -blocks, rsvd); 342 -blocks, rsvd);
355 if (error != 0) { 343 if (error != 0) {
356 PFLAGS_RESTORE_FSTRANS(&tp->t_pflags); 344 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
357 return (XFS_ERROR(ENOSPC)); 345 return (XFS_ERROR(ENOSPC));
358 } 346 }
359 tp->t_blk_res += blocks; 347 tp->t_blk_res += blocks;
@@ -426,9 +414,9 @@ undo_blocks:
426 tp->t_blk_res = 0; 414 tp->t_blk_res = 0;
427 } 415 }
428 416
429 PFLAGS_RESTORE_FSTRANS(&tp->t_pflags); 417 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
430 418
431 return (error); 419 return error;
432} 420}
433 421
434 422
@@ -819,7 +807,7 @@ shut_us_down:
819 if (commit_lsn == -1 && !shutdown) 807 if (commit_lsn == -1 && !shutdown)
820 shutdown = XFS_ERROR(EIO); 808 shutdown = XFS_ERROR(EIO);
821 } 809 }
822 PFLAGS_RESTORE_FSTRANS(&tp->t_pflags); 810 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
823 xfs_trans_free_items(tp, shutdown? XFS_TRANS_ABORT : 0); 811 xfs_trans_free_items(tp, shutdown? XFS_TRANS_ABORT : 0);
824 xfs_trans_free_busy(tp); 812 xfs_trans_free_busy(tp);
825 xfs_trans_free(tp); 813 xfs_trans_free(tp);
@@ -846,7 +834,7 @@ shut_us_down:
846 */ 834 */
847 nvec = xfs_trans_count_vecs(tp); 835 nvec = xfs_trans_count_vecs(tp);
848 if (nvec == 0) { 836 if (nvec == 0) {
849 xfs_force_shutdown(mp, XFS_LOG_IO_ERROR); 837 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
850 goto shut_us_down; 838 goto shut_us_down;
851 } else if (nvec <= XFS_TRANS_LOGVEC_COUNT) { 839 } else if (nvec <= XFS_TRANS_LOGVEC_COUNT) {
852 log_vector = log_vector_fast; 840 log_vector = log_vector_fast;
@@ -884,7 +872,7 @@ shut_us_down:
884 * had pinned, clean up, free trans structure, and return error. 872 * had pinned, clean up, free trans structure, and return error.
885 */ 873 */
886 if (error || commit_lsn == -1) { 874 if (error || commit_lsn == -1) {
887 PFLAGS_RESTORE_FSTRANS(&tp->t_pflags); 875 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
888 xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT); 876 xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT);
889 return XFS_ERROR(EIO); 877 return XFS_ERROR(EIO);
890 } 878 }
@@ -926,7 +914,7 @@ shut_us_down:
926 /* 914 /*
927 * Mark this thread as no longer being in a transaction 915 * Mark this thread as no longer being in a transaction
928 */ 916 */
929 PFLAGS_RESTORE_FSTRANS(&tp->t_pflags); 917 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
930 918
931 /* 919 /*
932 * Once all the items of the transaction have been copied 920 * Once all the items of the transaction have been copied
@@ -1148,7 +1136,7 @@ xfs_trans_cancel(
1148 */ 1136 */
1149 if ((tp->t_flags & XFS_TRANS_DIRTY) && !XFS_FORCED_SHUTDOWN(mp)) { 1137 if ((tp->t_flags & XFS_TRANS_DIRTY) && !XFS_FORCED_SHUTDOWN(mp)) {
1150 XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp); 1138 XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp);
1151 xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); 1139 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1152 } 1140 }
1153#ifdef DEBUG 1141#ifdef DEBUG
1154 if (!(flags & XFS_TRANS_ABORT)) { 1142 if (!(flags & XFS_TRANS_ABORT)) {
@@ -1182,7 +1170,7 @@ xfs_trans_cancel(
1182 } 1170 }
1183 1171
1184 /* mark this thread as no longer being in a transaction */ 1172 /* mark this thread as no longer being in a transaction */
1185 PFLAGS_RESTORE_FSTRANS(&tp->t_pflags); 1173 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1186 1174
1187 xfs_trans_free_items(tp, flags); 1175 xfs_trans_free_items(tp, flags);
1188 xfs_trans_free_busy(tp); 1176 xfs_trans_free_busy(tp);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 100d9a4b38ee..9dc88b380608 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -338,8 +338,6 @@ typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *);
338typedef struct xfs_trans { 338typedef struct xfs_trans {
339 unsigned int t_magic; /* magic number */ 339 unsigned int t_magic; /* magic number */
340 xfs_log_callback_t t_logcb; /* log callback struct */ 340 xfs_log_callback_t t_logcb; /* log callback struct */
341 struct xfs_trans *t_forw; /* async list pointers */
342 struct xfs_trans *t_back; /* async list pointers */
343 unsigned int t_type; /* transaction type */ 341 unsigned int t_type; /* transaction type */
344 unsigned int t_log_res; /* amt of log space resvd */ 342 unsigned int t_log_res; /* amt of log space resvd */
345 unsigned int t_log_count; /* count for perm log res */ 343 unsigned int t_log_count; /* count for perm log res */
@@ -364,9 +362,11 @@ typedef struct xfs_trans {
364 long t_res_fdblocks_delta; /* on-disk only chg */ 362 long t_res_fdblocks_delta; /* on-disk only chg */
365 long t_frextents_delta;/* superblock freextents chg*/ 363 long t_frextents_delta;/* superblock freextents chg*/
366 long t_res_frextents_delta; /* on-disk only chg */ 364 long t_res_frextents_delta; /* on-disk only chg */
365#ifdef DEBUG
367 long t_ag_freeblks_delta; /* debugging counter */ 366 long t_ag_freeblks_delta; /* debugging counter */
368 long t_ag_flist_delta; /* debugging counter */ 367 long t_ag_flist_delta; /* debugging counter */
369 long t_ag_btree_delta; /* debugging counter */ 368 long t_ag_btree_delta; /* debugging counter */
369#endif
370 long t_dblocks_delta;/* superblock dblocks change */ 370 long t_dblocks_delta;/* superblock dblocks change */
371 long t_agcount_delta;/* superblock agcount change */ 371 long t_agcount_delta;/* superblock agcount change */
372 long t_imaxpct_delta;/* superblock imaxpct change */ 372 long t_imaxpct_delta;/* superblock imaxpct change */
@@ -805,12 +805,9 @@ typedef struct xfs_trans {
805 ((mp)->m_sb.sb_inodesize + \ 805 ((mp)->m_sb.sb_inodesize + \
806 (mp)->m_sb.sb_sectsize * 2 + \ 806 (mp)->m_sb.sb_sectsize * 2 + \
807 (mp)->m_dirblksize + \ 807 (mp)->m_dirblksize + \
808 (XFS_DIR_IS_V1(mp) ? 0 : \ 808 XFS_FSB_TO_B(mp, (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1)) + \
809 XFS_FSB_TO_B(mp, (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1))) + \
810 XFS_ALLOCFREE_LOG_RES(mp, 1) + \ 809 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
811 (128 * (4 + \ 810 (128 * (4 + (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + \
812 (XFS_DIR_IS_V1(mp) ? 0 : \
813 XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + \
814 XFS_ALLOCFREE_LOG_COUNT(mp, 1)))) 811 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
815 812
816#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork) 813#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork)
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 19ab24af1c1c..558c87ff0c41 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -22,7 +22,6 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_dir.h"
26#include "xfs_dmapi.h" 25#include "xfs_dmapi.h"
27#include "xfs_mount.h" 26#include "xfs_mount.h"
28#include "xfs_trans_priv.h" 27#include "xfs_trans_priv.h"
@@ -363,9 +362,10 @@ xfs_trans_delete_ail(
363 AIL_UNLOCK(mp, s); 362 AIL_UNLOCK(mp, s);
364 else { 363 else {
365 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, 364 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
366 "xfs_trans_delete_ail: attempting to delete a log item that is not in the AIL"); 365 "%s: attempting to delete a log item that is not in the AIL",
366 __FUNCTION__);
367 AIL_UNLOCK(mp, s); 367 AIL_UNLOCK(mp, s);
368 xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); 368 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
369 } 369 }
370 } 370 }
371} 371}
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index c74c31ebc81c..60b6b898022b 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -24,14 +24,12 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 32#include "xfs_ialloc_btree.h"
34#include "xfs_dir_sf.h"
35#include "xfs_dir2_sf.h" 33#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 35#include "xfs_dinode.h"
@@ -320,7 +318,7 @@ xfs_trans_read_buf(
320 if (xfs_error_target == target) { 318 if (xfs_error_target == target) {
321 if (((xfs_req_num++) % xfs_error_mod) == 0) { 319 if (((xfs_req_num++) % xfs_error_mod) == 0) {
322 xfs_buf_relse(bp); 320 xfs_buf_relse(bp);
323 printk("Returning error!\n"); 321 cmn_err(CE_DEBUG, "Returning error!\n");
324 return XFS_ERROR(EIO); 322 return XFS_ERROR(EIO);
325 } 323 }
326 } 324 }
@@ -369,7 +367,7 @@ xfs_trans_read_buf(
369 */ 367 */
370 if (tp->t_flags & XFS_TRANS_DIRTY) 368 if (tp->t_flags & XFS_TRANS_DIRTY)
371 xfs_force_shutdown(tp->t_mountp, 369 xfs_force_shutdown(tp->t_mountp,
372 XFS_METADATA_IO_ERROR); 370 SHUTDOWN_META_IO_ERROR);
373 return error; 371 return error;
374 } 372 }
375 } 373 }
@@ -414,7 +412,7 @@ xfs_trans_read_buf(
414 xfs_ioerror_alert("xfs_trans_read_buf", mp, 412 xfs_ioerror_alert("xfs_trans_read_buf", mp,
415 bp, blkno); 413 bp, blkno);
416 if (tp->t_flags & XFS_TRANS_DIRTY) 414 if (tp->t_flags & XFS_TRANS_DIRTY)
417 xfs_force_shutdown(tp->t_mountp, XFS_METADATA_IO_ERROR); 415 xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR);
418 xfs_buf_relse(bp); 416 xfs_buf_relse(bp);
419 return error; 417 return error;
420 } 418 }
@@ -423,9 +421,9 @@ xfs_trans_read_buf(
423 if (xfs_error_target == target) { 421 if (xfs_error_target == target) {
424 if (((xfs_req_num++) % xfs_error_mod) == 0) { 422 if (((xfs_req_num++) % xfs_error_mod) == 0) {
425 xfs_force_shutdown(tp->t_mountp, 423 xfs_force_shutdown(tp->t_mountp,
426 XFS_METADATA_IO_ERROR); 424 SHUTDOWN_META_IO_ERROR);
427 xfs_buf_relse(bp); 425 xfs_buf_relse(bp);
428 printk("Returning error in trans!\n"); 426 cmn_err(CE_DEBUG, "Returning trans error!\n");
429 return XFS_ERROR(EIO); 427 return XFS_ERROR(EIO);
430 } 428 }
431 } 429 }
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
index 7d7d627f25df..b290270dd4a6 100644
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -22,7 +22,6 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_dir.h"
26#include "xfs_dmapi.h" 25#include "xfs_dmapi.h"
27#include "xfs_mount.h" 26#include "xfs_mount.h"
28#include "xfs_trans_priv.h" 27#include "xfs_trans_priv.h"
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 7c5894d59f81..b8db1d5cde5a 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -24,14 +24,12 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 32#include "xfs_ialloc_btree.h"
34#include "xfs_dir_sf.h"
35#include "xfs_dir2_sf.h" 33#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 35#include "xfs_dinode.h"
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
index 1117d600d741..2912aac07c7b 100644
--- a/fs/xfs/xfs_trans_item.c
+++ b/fs/xfs/xfs_trans_item.c
@@ -493,7 +493,7 @@ xfs_trans_add_busy(xfs_trans_t *tp, xfs_agnumber_t ag, xfs_extlen_t idx)
493 break; 493 break;
494 } else { 494 } else {
495 /* out-of-order vacancy */ 495 /* out-of-order vacancy */
496 printk("OOO vacancy lbcp 0x%p\n", lbcp); 496 cmn_err(CE_DEBUG, "OOO vacancy lbcp 0x%p\n", lbcp);
497 ASSERT(0); 497 ASSERT(0);
498 } 498 }
499 } 499 }
diff --git a/fs/xfs/xfs_trans_space.h b/fs/xfs/xfs_trans_space.h
index 7fe3792b18df..4ea2e5074bdd 100644
--- a/fs/xfs/xfs_trans_space.h
+++ b/fs/xfs/xfs_trans_space.h
@@ -30,8 +30,7 @@
30 XFS_EXTENTADD_SPACE_RES(mp,w)) 30 XFS_EXTENTADD_SPACE_RES(mp,w))
31#define XFS_DAENTER_1B(mp,w) ((w) == XFS_DATA_FORK ? (mp)->m_dirblkfsbs : 1) 31#define XFS_DAENTER_1B(mp,w) ((w) == XFS_DATA_FORK ? (mp)->m_dirblkfsbs : 1)
32#define XFS_DAENTER_DBS(mp,w) \ 32#define XFS_DAENTER_DBS(mp,w) \
33 (XFS_DA_NODE_MAXDEPTH + \ 33 (XFS_DA_NODE_MAXDEPTH + (((w) == XFS_DATA_FORK) ? 2 : 0))
34 ((XFS_DIR_IS_V2(mp) && (w) == XFS_DATA_FORK) ? 2 : 0))
35#define XFS_DAENTER_BLOCKS(mp,w) \ 34#define XFS_DAENTER_BLOCKS(mp,w) \
36 (XFS_DAENTER_1B(mp,w) * XFS_DAENTER_DBS(mp,w)) 35 (XFS_DAENTER_1B(mp,w) * XFS_DAENTER_DBS(mp,w))
37#define XFS_DAENTER_BMAP1B(mp,w) \ 36#define XFS_DAENTER_BMAP1B(mp,w) \
@@ -41,10 +40,7 @@
41#define XFS_DAENTER_SPACE_RES(mp,w) \ 40#define XFS_DAENTER_SPACE_RES(mp,w) \
42 (XFS_DAENTER_BLOCKS(mp,w) + XFS_DAENTER_BMAPS(mp,w)) 41 (XFS_DAENTER_BLOCKS(mp,w) + XFS_DAENTER_BMAPS(mp,w))
43#define XFS_DAREMOVE_SPACE_RES(mp,w) XFS_DAENTER_BMAPS(mp,w) 42#define XFS_DAREMOVE_SPACE_RES(mp,w) XFS_DAENTER_BMAPS(mp,w)
44#define XFS_DIRENTER_MAX_SPLIT(mp,nl) \ 43#define XFS_DIRENTER_MAX_SPLIT(mp,nl) 1
45 (((mp)->m_sb.sb_blocksize == 512 && \
46 XFS_DIR_IS_V1(mp) && \
47 (nl) >= XFS_DIR_LEAF_CAN_DOUBLE_SPLIT_LEN) ? 2 : 1)
48#define XFS_DIRENTER_SPACE_RES(mp,nl) \ 44#define XFS_DIRENTER_SPACE_RES(mp,nl) \
49 (XFS_DAENTER_SPACE_RES(mp, XFS_DATA_FORK) * \ 45 (XFS_DAENTER_SPACE_RES(mp, XFS_DATA_FORK) * \
50 XFS_DIRENTER_MAX_SPLIT(mp,nl)) 46 XFS_DIRENTER_MAX_SPLIT(mp,nl))
@@ -57,8 +53,7 @@
57 * Space reservation values for various transactions. 53 * Space reservation values for various transactions.
58 */ 54 */
59#define XFS_ADDAFORK_SPACE_RES(mp) \ 55#define XFS_ADDAFORK_SPACE_RES(mp) \
60 ((mp)->m_dirblkfsbs + \ 56 ((mp)->m_dirblkfsbs + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK))
61 (XFS_DIR_IS_V1(mp) ? 0 : XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK)))
62#define XFS_ATTRRM_SPACE_RES(mp) \ 57#define XFS_ATTRRM_SPACE_RES(mp) \
63 XFS_DAREMOVE_SPACE_RES(mp, XFS_ATTR_FORK) 58 XFS_DAREMOVE_SPACE_RES(mp, XFS_ATTR_FORK)
64/* This macro is not used - see inline code in xfs_attr_set */ 59/* This macro is not used - see inline code in xfs_attr_set */
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 34654ec6ae10..9014d7e44488 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -24,12 +24,10 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_dir_sf.h"
33#include "xfs_dir2_sf.h" 31#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h" 32#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 33#include "xfs_dinode.h"
@@ -51,10 +49,10 @@
51 */ 49 */
52int 50int
53xfs_get_dir_entry( 51xfs_get_dir_entry(
54 vname_t *dentry, 52 bhv_vname_t *dentry,
55 xfs_inode_t **ipp) 53 xfs_inode_t **ipp)
56{ 54{
57 vnode_t *vp; 55 bhv_vnode_t *vp;
58 56
59 vp = VNAME_TO_VNODE(dentry); 57 vp = VNAME_TO_VNODE(dentry);
60 58
@@ -69,11 +67,11 @@ int
69xfs_dir_lookup_int( 67xfs_dir_lookup_int(
70 bhv_desc_t *dir_bdp, 68 bhv_desc_t *dir_bdp,
71 uint lock_mode, 69 uint lock_mode,
72 vname_t *dentry, 70 bhv_vname_t *dentry,
73 xfs_ino_t *inum, 71 xfs_ino_t *inum,
74 xfs_inode_t **ipp) 72 xfs_inode_t **ipp)
75{ 73{
76 vnode_t *dir_vp; 74 bhv_vnode_t *dir_vp;
77 xfs_inode_t *dp; 75 xfs_inode_t *dp;
78 int error; 76 int error;
79 77
@@ -82,8 +80,7 @@ xfs_dir_lookup_int(
82 80
83 dp = XFS_BHVTOI(dir_bdp); 81 dp = XFS_BHVTOI(dir_bdp);
84 82
85 error = XFS_DIR_LOOKUP(dp->i_mount, NULL, dp, 83 error = xfs_dir_lookup(NULL, dp, VNAME(dentry), VNAMELEN(dentry), inum);
86 VNAME(dentry), VNAMELEN(dentry), inum);
87 if (!error) { 84 if (!error) {
88 /* 85 /*
89 * Unlock the directory. We do this because we can't 86 * Unlock the directory. We do this because we can't
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index 472661a3b6d8..fe953e98afa7 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -23,9 +23,10 @@
23#define ITRACE(ip) vn_trace_ref(XFS_ITOV(ip), __FILE__, __LINE__, \ 23#define ITRACE(ip) vn_trace_ref(XFS_ITOV(ip), __FILE__, __LINE__, \
24 (inst_t *)__return_address) 24 (inst_t *)__return_address)
25 25
26extern int xfs_rename (bhv_desc_t *, vname_t *, vnode_t *, vname_t *, cred_t *); 26extern int xfs_rename (bhv_desc_t *, bhv_vname_t *, bhv_vnode_t *,
27extern int xfs_get_dir_entry (vname_t *, xfs_inode_t **); 27 bhv_vname_t *, cred_t *);
28extern int xfs_dir_lookup_int (bhv_desc_t *, uint, vname_t *, xfs_ino_t *, 28extern int xfs_get_dir_entry (bhv_vname_t *, xfs_inode_t **);
29extern int xfs_dir_lookup_int (bhv_desc_t *, uint, bhv_vname_t *, xfs_ino_t *,
29 xfs_inode_t **); 30 xfs_inode_t **);
30extern int xfs_truncate_file (xfs_mount_t *, xfs_inode_t *); 31extern int xfs_truncate_file (xfs_mount_t *, xfs_inode_t *);
31extern int xfs_dir_ialloc (xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, 32extern int xfs_dir_ialloc (xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 36ea1b2094f2..6c96391f3f1a 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -24,7 +24,6 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h" 27#include "xfs_dir2.h"
29#include "xfs_dmapi.h" 28#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
@@ -32,7 +31,6 @@
32#include "xfs_bmap_btree.h" 31#include "xfs_bmap_btree.h"
33#include "xfs_ialloc_btree.h" 32#include "xfs_ialloc_btree.h"
34#include "xfs_alloc_btree.h" 33#include "xfs_alloc_btree.h"
35#include "xfs_dir_sf.h"
36#include "xfs_dir2_sf.h" 34#include "xfs_dir2_sf.h"
37#include "xfs_attr_sf.h" 35#include "xfs_attr_sf.h"
38#include "xfs_dinode.h" 36#include "xfs_dinode.h"
@@ -131,9 +129,6 @@ xfs_init(void)
131#ifdef XFS_BMBT_TRACE 129#ifdef XFS_BMBT_TRACE
132 xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_SLEEP); 130 xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_SLEEP);
133#endif 131#endif
134#ifdef XFS_DIR_TRACE
135 xfs_dir_trace_buf = ktrace_alloc(XFS_DIR_TRACE_SIZE, KM_SLEEP);
136#endif
137#ifdef XFS_ATTR_TRACE 132#ifdef XFS_ATTR_TRACE
138 xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_SLEEP); 133 xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_SLEEP);
139#endif 134#endif
@@ -177,9 +172,6 @@ xfs_cleanup(void)
177#ifdef XFS_ATTR_TRACE 172#ifdef XFS_ATTR_TRACE
178 ktrace_free(xfs_attr_trace_buf); 173 ktrace_free(xfs_attr_trace_buf);
179#endif 174#endif
180#ifdef XFS_DIR_TRACE
181 ktrace_free(xfs_dir_trace_buf);
182#endif
183#ifdef XFS_BMBT_TRACE 175#ifdef XFS_BMBT_TRACE
184 ktrace_free(xfs_bmbt_trace_buf); 176 ktrace_free(xfs_bmbt_trace_buf);
185#endif 177#endif
@@ -212,7 +204,7 @@ xfs_cleanup(void)
212 */ 204 */
213STATIC int 205STATIC int
214xfs_start_flags( 206xfs_start_flags(
215 struct vfs *vfs, 207 struct bhv_vfs *vfs,
216 struct xfs_mount_args *ap, 208 struct xfs_mount_args *ap,
217 struct xfs_mount *mp) 209 struct xfs_mount *mp)
218{ 210{
@@ -337,7 +329,7 @@ xfs_start_flags(
337 */ 329 */
338STATIC int 330STATIC int
339xfs_finish_flags( 331xfs_finish_flags(
340 struct vfs *vfs, 332 struct bhv_vfs *vfs,
341 struct xfs_mount_args *ap, 333 struct xfs_mount_args *ap,
342 struct xfs_mount *mp) 334 struct xfs_mount *mp)
343{ 335{
@@ -423,7 +415,7 @@ xfs_mount(
423 struct xfs_mount_args *args, 415 struct xfs_mount_args *args,
424 cred_t *credp) 416 cred_t *credp)
425{ 417{
426 struct vfs *vfsp = bhvtovfs(bhvp); 418 struct bhv_vfs *vfsp = bhvtovfs(bhvp);
427 struct bhv_desc *p; 419 struct bhv_desc *p;
428 struct xfs_mount *mp = XFS_BHVTOM(bhvp); 420 struct xfs_mount *mp = XFS_BHVTOM(bhvp);
429 struct block_device *ddev, *logdev, *rtdev; 421 struct block_device *ddev, *logdev, *rtdev;
@@ -552,10 +544,10 @@ xfs_unmount(
552 int flags, 544 int flags,
553 cred_t *credp) 545 cred_t *credp)
554{ 546{
555 struct vfs *vfsp = bhvtovfs(bdp); 547 bhv_vfs_t *vfsp = bhvtovfs(bdp);
556 xfs_mount_t *mp = XFS_BHVTOM(bdp); 548 xfs_mount_t *mp = XFS_BHVTOM(bdp);
557 xfs_inode_t *rip; 549 xfs_inode_t *rip;
558 vnode_t *rvp; 550 bhv_vnode_t *rvp;
559 int unmount_event_wanted = 0; 551 int unmount_event_wanted = 0;
560 int unmount_event_flags = 0; 552 int unmount_event_flags = 0;
561 int xfs_unmountfs_needed = 0; 553 int xfs_unmountfs_needed = 0;
@@ -665,9 +657,8 @@ xfs_mntupdate(
665 int *flags, 657 int *flags,
666 struct xfs_mount_args *args) 658 struct xfs_mount_args *args)
667{ 659{
668 struct vfs *vfsp = bhvtovfs(bdp); 660 bhv_vfs_t *vfsp = bhvtovfs(bdp);
669 xfs_mount_t *mp = XFS_BHVTOM(bdp); 661 xfs_mount_t *mp = XFS_BHVTOM(bdp);
670 int error;
671 662
672 if (!(*flags & MS_RDONLY)) { /* rw/ro -> rw */ 663 if (!(*flags & MS_RDONLY)) { /* rw/ro -> rw */
673 if (vfsp->vfs_flag & VFS_RDONLY) 664 if (vfsp->vfs_flag & VFS_RDONLY)
@@ -679,7 +670,7 @@ xfs_mntupdate(
679 mp->m_flags &= ~XFS_MOUNT_BARRIER; 670 mp->m_flags &= ~XFS_MOUNT_BARRIER;
680 } 671 }
681 } else if (!(vfsp->vfs_flag & VFS_RDONLY)) { /* rw -> ro */ 672 } else if (!(vfsp->vfs_flag & VFS_RDONLY)) { /* rw -> ro */
682 VFS_SYNC(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL, error); 673 bhv_vfs_sync(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL);
683 xfs_quiesce_fs(mp); 674 xfs_quiesce_fs(mp);
684 xfs_log_unmount_write(mp); 675 xfs_log_unmount_write(mp);
685 xfs_unmountfs_writesb(mp); 676 xfs_unmountfs_writesb(mp);
@@ -702,7 +693,7 @@ xfs_unmount_flush(
702 xfs_inode_t *rip = mp->m_rootip; 693 xfs_inode_t *rip = mp->m_rootip;
703 xfs_inode_t *rbmip; 694 xfs_inode_t *rbmip;
704 xfs_inode_t *rsumip = NULL; 695 xfs_inode_t *rsumip = NULL;
705 vnode_t *rvp = XFS_ITOV(rip); 696 bhv_vnode_t *rvp = XFS_ITOV(rip);
706 int error; 697 int error;
707 698
708 xfs_ilock(rip, XFS_ILOCK_EXCL); 699 xfs_ilock(rip, XFS_ILOCK_EXCL);
@@ -781,9 +772,9 @@ fscorrupt_out2:
781STATIC int 772STATIC int
782xfs_root( 773xfs_root(
783 bhv_desc_t *bdp, 774 bhv_desc_t *bdp,
784 vnode_t **vpp) 775 bhv_vnode_t **vpp)
785{ 776{
786 vnode_t *vp; 777 bhv_vnode_t *vp;
787 778
788 vp = XFS_ITOV((XFS_BHVTOM(bdp))->m_rootip); 779 vp = XFS_ITOV((XFS_BHVTOM(bdp))->m_rootip);
789 VN_HOLD(vp); 780 VN_HOLD(vp);
@@ -801,8 +792,8 @@ xfs_root(
801STATIC int 792STATIC int
802xfs_statvfs( 793xfs_statvfs(
803 bhv_desc_t *bdp, 794 bhv_desc_t *bdp,
804 xfs_statfs_t *statp, 795 bhv_statvfs_t *statp,
805 vnode_t *vp) 796 bhv_vnode_t *vp)
806{ 797{
807 __uint64_t fakeinos; 798 __uint64_t fakeinos;
808 xfs_extlen_t lsize; 799 xfs_extlen_t lsize;
@@ -900,7 +891,7 @@ xfs_sync(
900/* 891/*
901 * xfs sync routine for internal use 892 * xfs sync routine for internal use
902 * 893 *
903 * This routine supports all of the flags defined for the generic VFS_SYNC 894 * This routine supports all of the flags defined for the generic vfs_sync
904 * interface as explained above under xfs_sync. In the interests of not 895 * interface as explained above under xfs_sync. In the interests of not
905 * changing interfaces within the 6.5 family, additional internally- 896 * changing interfaces within the 6.5 family, additional internally-
906 * required functions are specified within a separate xflags parameter, 897 * required functions are specified within a separate xflags parameter,
@@ -917,7 +908,7 @@ xfs_sync_inodes(
917 xfs_inode_t *ip = NULL; 908 xfs_inode_t *ip = NULL;
918 xfs_inode_t *ip_next; 909 xfs_inode_t *ip_next;
919 xfs_buf_t *bp; 910 xfs_buf_t *bp;
920 vnode_t *vp = NULL; 911 bhv_vnode_t *vp = NULL;
921 int error; 912 int error;
922 int last_error; 913 int last_error;
923 uint64_t fflag; 914 uint64_t fflag;
@@ -1156,9 +1147,9 @@ xfs_sync_inodes(
1156 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1147 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1157 1148
1158 if (XFS_FORCED_SHUTDOWN(mp)) { 1149 if (XFS_FORCED_SHUTDOWN(mp)) {
1159 VOP_TOSS_PAGES(vp, 0, -1, FI_REMAPF); 1150 bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF);
1160 } else { 1151 } else {
1161 VOP_FLUSHINVAL_PAGES(vp, 0, -1, FI_REMAPF); 1152 bhv_vop_flushinval_pages(vp, 0, -1, FI_REMAPF);
1162 } 1153 }
1163 1154
1164 xfs_ilock(ip, XFS_ILOCK_SHARED); 1155 xfs_ilock(ip, XFS_ILOCK_SHARED);
@@ -1178,8 +1169,8 @@ xfs_sync_inodes(
1178 * across calls to the buffer cache. 1169 * across calls to the buffer cache.
1179 */ 1170 */
1180 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1171 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1181 VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 1172 error = bhv_vop_flush_pages(vp, (xfs_off_t)0,
1182 fflag, FI_NONE, error); 1173 -1, fflag, FI_NONE);
1183 xfs_ilock(ip, XFS_ILOCK_SHARED); 1174 xfs_ilock(ip, XFS_ILOCK_SHARED);
1184 } 1175 }
1185 1176
@@ -1231,9 +1222,7 @@ xfs_sync_inodes(
1231 * marker and free it. 1222 * marker and free it.
1232 */ 1223 */
1233 XFS_MOUNT_ILOCK(mp); 1224 XFS_MOUNT_ILOCK(mp);
1234
1235 IPOINTER_REMOVE(ip, mp); 1225 IPOINTER_REMOVE(ip, mp);
1236
1237 XFS_MOUNT_IUNLOCK(mp); 1226 XFS_MOUNT_IUNLOCK(mp);
1238 1227
1239 ASSERT(!(lock_flags & 1228 ASSERT(!(lock_flags &
@@ -1421,7 +1410,7 @@ xfs_sync_inodes(
1421/* 1410/*
1422 * xfs sync routine for internal use 1411 * xfs sync routine for internal use
1423 * 1412 *
1424 * This routine supports all of the flags defined for the generic VFS_SYNC 1413 * This routine supports all of the flags defined for the generic vfs_sync
1425 * interface as explained above under xfs_sync. In the interests of not 1414 * interface as explained above under xfs_sync. In the interests of not
1426 * changing interfaces within the 6.5 family, additional internally- 1415 * changing interfaces within the 6.5 family, additional internally-
1427 * required functions are specified within a separate xflags parameter, 1416 * required functions are specified within a separate xflags parameter,
@@ -1574,7 +1563,7 @@ xfs_syncsub(
1574STATIC int 1563STATIC int
1575xfs_vget( 1564xfs_vget(
1576 bhv_desc_t *bdp, 1565 bhv_desc_t *bdp,
1577 vnode_t **vpp, 1566 bhv_vnode_t **vpp,
1578 fid_t *fidp) 1567 fid_t *fidp)
1579{ 1568{
1580 xfs_mount_t *mp = XFS_BHVTOM(bdp); 1569 xfs_mount_t *mp = XFS_BHVTOM(bdp);
@@ -1657,10 +1646,10 @@ xfs_vget(
1657#define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */ 1646#define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */
1658 1647
1659STATIC unsigned long 1648STATIC unsigned long
1660suffix_strtoul(const char *cp, char **endp, unsigned int base) 1649suffix_strtoul(char *s, char **endp, unsigned int base)
1661{ 1650{
1662 int last, shift_left_factor = 0; 1651 int last, shift_left_factor = 0;
1663 char *value = (char *)cp; 1652 char *value = s;
1664 1653
1665 last = strlen(value) - 1; 1654 last = strlen(value) - 1;
1666 if (value[last] == 'K' || value[last] == 'k') { 1655 if (value[last] == 'K' || value[last] == 'k') {
@@ -1676,7 +1665,7 @@ suffix_strtoul(const char *cp, char **endp, unsigned int base)
1676 value[last] = '\0'; 1665 value[last] = '\0';
1677 } 1666 }
1678 1667
1679 return simple_strtoul(cp, endp, base) << shift_left_factor; 1668 return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
1680} 1669}
1681 1670
1682STATIC int 1671STATIC int
@@ -1686,7 +1675,7 @@ xfs_parseargs(
1686 struct xfs_mount_args *args, 1675 struct xfs_mount_args *args,
1687 int update) 1676 int update)
1688{ 1677{
1689 struct vfs *vfsp = bhvtovfs(bhv); 1678 bhv_vfs_t *vfsp = bhvtovfs(bhv);
1690 char *this_char, *value, *eov; 1679 char *this_char, *value, *eov;
1691 int dsunit, dswidth, vol_dsunit, vol_dswidth; 1680 int dsunit, dswidth, vol_dsunit, vol_dswidth;
1692 int iosize; 1681 int iosize;
@@ -1708,42 +1697,48 @@ xfs_parseargs(
1708 1697
1709 if (!strcmp(this_char, MNTOPT_LOGBUFS)) { 1698 if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
1710 if (!value || !*value) { 1699 if (!value || !*value) {
1711 printk("XFS: %s option requires an argument\n", 1700 cmn_err(CE_WARN,
1701 "XFS: %s option requires an argument",
1712 this_char); 1702 this_char);
1713 return EINVAL; 1703 return EINVAL;
1714 } 1704 }
1715 args->logbufs = simple_strtoul(value, &eov, 10); 1705 args->logbufs = simple_strtoul(value, &eov, 10);
1716 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { 1706 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
1717 if (!value || !*value) { 1707 if (!value || !*value) {
1718 printk("XFS: %s option requires an argument\n", 1708 cmn_err(CE_WARN,
1709 "XFS: %s option requires an argument",
1719 this_char); 1710 this_char);
1720 return EINVAL; 1711 return EINVAL;
1721 } 1712 }
1722 args->logbufsize = suffix_strtoul(value, &eov, 10); 1713 args->logbufsize = suffix_strtoul(value, &eov, 10);
1723 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { 1714 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
1724 if (!value || !*value) { 1715 if (!value || !*value) {
1725 printk("XFS: %s option requires an argument\n", 1716 cmn_err(CE_WARN,
1717 "XFS: %s option requires an argument",
1726 this_char); 1718 this_char);
1727 return EINVAL; 1719 return EINVAL;
1728 } 1720 }
1729 strncpy(args->logname, value, MAXNAMELEN); 1721 strncpy(args->logname, value, MAXNAMELEN);
1730 } else if (!strcmp(this_char, MNTOPT_MTPT)) { 1722 } else if (!strcmp(this_char, MNTOPT_MTPT)) {
1731 if (!value || !*value) { 1723 if (!value || !*value) {
1732 printk("XFS: %s option requires an argument\n", 1724 cmn_err(CE_WARN,
1725 "XFS: %s option requires an argument",
1733 this_char); 1726 this_char);
1734 return EINVAL; 1727 return EINVAL;
1735 } 1728 }
1736 strncpy(args->mtpt, value, MAXNAMELEN); 1729 strncpy(args->mtpt, value, MAXNAMELEN);
1737 } else if (!strcmp(this_char, MNTOPT_RTDEV)) { 1730 } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
1738 if (!value || !*value) { 1731 if (!value || !*value) {
1739 printk("XFS: %s option requires an argument\n", 1732 cmn_err(CE_WARN,
1733 "XFS: %s option requires an argument",
1740 this_char); 1734 this_char);
1741 return EINVAL; 1735 return EINVAL;
1742 } 1736 }
1743 strncpy(args->rtname, value, MAXNAMELEN); 1737 strncpy(args->rtname, value, MAXNAMELEN);
1744 } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { 1738 } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
1745 if (!value || !*value) { 1739 if (!value || !*value) {
1746 printk("XFS: %s option requires an argument\n", 1740 cmn_err(CE_WARN,
1741 "XFS: %s option requires an argument",
1747 this_char); 1742 this_char);
1748 return EINVAL; 1743 return EINVAL;
1749 } 1744 }
@@ -1752,7 +1747,8 @@ xfs_parseargs(
1752 args->iosizelog = (uint8_t) iosize; 1747 args->iosizelog = (uint8_t) iosize;
1753 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { 1748 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
1754 if (!value || !*value) { 1749 if (!value || !*value) {
1755 printk("XFS: %s option requires an argument\n", 1750 cmn_err(CE_WARN,
1751 "XFS: %s option requires an argument",
1756 this_char); 1752 this_char);
1757 return EINVAL; 1753 return EINVAL;
1758 } 1754 }
@@ -1761,7 +1757,8 @@ xfs_parseargs(
1761 args->iosizelog = ffs(iosize) - 1; 1757 args->iosizelog = ffs(iosize) - 1;
1762 } else if (!strcmp(this_char, MNTOPT_IHASHSIZE)) { 1758 } else if (!strcmp(this_char, MNTOPT_IHASHSIZE)) {
1763 if (!value || !*value) { 1759 if (!value || !*value) {
1764 printk("XFS: %s option requires an argument\n", 1760 cmn_err(CE_WARN,
1761 "XFS: %s option requires an argument",
1765 this_char); 1762 this_char);
1766 return EINVAL; 1763 return EINVAL;
1767 } 1764 }
@@ -1782,7 +1779,8 @@ xfs_parseargs(
1782 } else if (!strcmp(this_char, MNTOPT_INO64)) { 1779 } else if (!strcmp(this_char, MNTOPT_INO64)) {
1783 args->flags |= XFSMNT_INO64; 1780 args->flags |= XFSMNT_INO64;
1784#if !XFS_BIG_INUMS 1781#if !XFS_BIG_INUMS
1785 printk("XFS: %s option not allowed on this system\n", 1782 cmn_err(CE_WARN,
1783 "XFS: %s option not allowed on this system",
1786 this_char); 1784 this_char);
1787 return EINVAL; 1785 return EINVAL;
1788#endif 1786#endif
@@ -1792,14 +1790,16 @@ xfs_parseargs(
1792 args->flags |= XFSMNT_SWALLOC; 1790 args->flags |= XFSMNT_SWALLOC;
1793 } else if (!strcmp(this_char, MNTOPT_SUNIT)) { 1791 } else if (!strcmp(this_char, MNTOPT_SUNIT)) {
1794 if (!value || !*value) { 1792 if (!value || !*value) {
1795 printk("XFS: %s option requires an argument\n", 1793 cmn_err(CE_WARN,
1794 "XFS: %s option requires an argument",
1796 this_char); 1795 this_char);
1797 return EINVAL; 1796 return EINVAL;
1798 } 1797 }
1799 dsunit = simple_strtoul(value, &eov, 10); 1798 dsunit = simple_strtoul(value, &eov, 10);
1800 } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { 1799 } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
1801 if (!value || !*value) { 1800 if (!value || !*value) {
1802 printk("XFS: %s option requires an argument\n", 1801 cmn_err(CE_WARN,
1802 "XFS: %s option requires an argument",
1803 this_char); 1803 this_char);
1804 return EINVAL; 1804 return EINVAL;
1805 } 1805 }
@@ -1807,7 +1807,8 @@ xfs_parseargs(
1807 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { 1807 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
1808 args->flags &= ~XFSMNT_32BITINODES; 1808 args->flags &= ~XFSMNT_32BITINODES;
1809#if !XFS_BIG_INUMS 1809#if !XFS_BIG_INUMS
1810 printk("XFS: %s option not allowed on this system\n", 1810 cmn_err(CE_WARN,
1811 "XFS: %s option not allowed on this system",
1811 this_char); 1812 this_char);
1812 return EINVAL; 1813 return EINVAL;
1813#endif 1814#endif
@@ -1831,36 +1832,41 @@ xfs_parseargs(
1831 args->flags &= ~XFSMNT_ATTR2; 1832 args->flags &= ~XFSMNT_ATTR2;
1832 } else if (!strcmp(this_char, "osyncisdsync")) { 1833 } else if (!strcmp(this_char, "osyncisdsync")) {
1833 /* no-op, this is now the default */ 1834 /* no-op, this is now the default */
1834printk("XFS: osyncisdsync is now the default, option is deprecated.\n"); 1835 cmn_err(CE_WARN,
1836 "XFS: osyncisdsync is now the default, option is deprecated.");
1835 } else if (!strcmp(this_char, "irixsgid")) { 1837 } else if (!strcmp(this_char, "irixsgid")) {
1836printk("XFS: irixsgid is now a sysctl(2) variable, option is deprecated.\n"); 1838 cmn_err(CE_WARN,
1839 "XFS: irixsgid is now a sysctl(2) variable, option is deprecated.");
1837 } else { 1840 } else {
1838 printk("XFS: unknown mount option [%s].\n", this_char); 1841 cmn_err(CE_WARN,
1842 "XFS: unknown mount option [%s].", this_char);
1839 return EINVAL; 1843 return EINVAL;
1840 } 1844 }
1841 } 1845 }
1842 1846
1843 if (args->flags & XFSMNT_NORECOVERY) { 1847 if (args->flags & XFSMNT_NORECOVERY) {
1844 if ((vfsp->vfs_flag & VFS_RDONLY) == 0) { 1848 if ((vfsp->vfs_flag & VFS_RDONLY) == 0) {
1845 printk("XFS: no-recovery mounts must be read-only.\n"); 1849 cmn_err(CE_WARN,
1850 "XFS: no-recovery mounts must be read-only.");
1846 return EINVAL; 1851 return EINVAL;
1847 } 1852 }
1848 } 1853 }
1849 1854
1850 if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) { 1855 if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) {
1851 printk( 1856 cmn_err(CE_WARN,
1852 "XFS: sunit and swidth options incompatible with the noalign option\n"); 1857 "XFS: sunit and swidth options incompatible with the noalign option");
1853 return EINVAL; 1858 return EINVAL;
1854 } 1859 }
1855 1860
1856 if ((dsunit && !dswidth) || (!dsunit && dswidth)) { 1861 if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
1857 printk("XFS: sunit and swidth must be specified together\n"); 1862 cmn_err(CE_WARN,
1863 "XFS: sunit and swidth must be specified together");
1858 return EINVAL; 1864 return EINVAL;
1859 } 1865 }
1860 1866
1861 if (dsunit && (dswidth % dsunit != 0)) { 1867 if (dsunit && (dswidth % dsunit != 0)) {
1862 printk( 1868 cmn_err(CE_WARN,
1863 "XFS: stripe width (%d) must be a multiple of the stripe unit (%d)\n", 1869 "XFS: stripe width (%d) must be a multiple of the stripe unit (%d)",
1864 dswidth, dsunit); 1870 dswidth, dsunit);
1865 return EINVAL; 1871 return EINVAL;
1866 } 1872 }
@@ -1907,7 +1913,7 @@ xfs_showargs(
1907 }; 1913 };
1908 struct proc_xfs_info *xfs_infop; 1914 struct proc_xfs_info *xfs_infop;
1909 struct xfs_mount *mp = XFS_BHVTOM(bhv); 1915 struct xfs_mount *mp = XFS_BHVTOM(bhv);
1910 struct vfs *vfsp = XFS_MTOVFS(mp); 1916 struct bhv_vfs *vfsp = XFS_MTOVFS(mp);
1911 1917
1912 for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) { 1918 for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) {
1913 if (mp->m_flags & xfs_infop->flag) 1919 if (mp->m_flags & xfs_infop->flag)
@@ -1967,7 +1973,7 @@ xfs_freeze(
1967} 1973}
1968 1974
1969 1975
1970vfsops_t xfs_vfsops = { 1976bhv_vfsops_t xfs_vfsops = {
1971 BHV_IDENTITY_INIT(VFS_BHV_XFS,VFS_POSITION_XFS), 1977 BHV_IDENTITY_INIT(VFS_BHV_XFS,VFS_POSITION_XFS),
1972 .vfs_parseargs = xfs_parseargs, 1978 .vfs_parseargs = xfs_parseargs,
1973 .vfs_showargs = xfs_showargs, 1979 .vfs_showargs = xfs_showargs,
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 7027ae68ee38..23cfa5837728 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
@@ -16,8 +16,6 @@
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 18
19#include <linux/capability.h>
20
21#include "xfs.h" 19#include "xfs.h"
22#include "xfs_fs.h" 20#include "xfs_fs.h"
23#include "xfs_types.h" 21#include "xfs_types.h"
@@ -27,7 +25,6 @@
27#include "xfs_trans.h" 25#include "xfs_trans.h"
28#include "xfs_sb.h" 26#include "xfs_sb.h"
29#include "xfs_ag.h" 27#include "xfs_ag.h"
30#include "xfs_dir.h"
31#include "xfs_dir2.h" 28#include "xfs_dir2.h"
32#include "xfs_dmapi.h" 29#include "xfs_dmapi.h"
33#include "xfs_mount.h" 30#include "xfs_mount.h"
@@ -35,13 +32,11 @@
35#include "xfs_bmap_btree.h" 32#include "xfs_bmap_btree.h"
36#include "xfs_alloc_btree.h" 33#include "xfs_alloc_btree.h"
37#include "xfs_ialloc_btree.h" 34#include "xfs_ialloc_btree.h"
38#include "xfs_dir_sf.h"
39#include "xfs_dir2_sf.h" 35#include "xfs_dir2_sf.h"
40#include "xfs_attr_sf.h" 36#include "xfs_attr_sf.h"
41#include "xfs_dinode.h" 37#include "xfs_dinode.h"
42#include "xfs_inode.h" 38#include "xfs_inode.h"
43#include "xfs_inode_item.h" 39#include "xfs_inode_item.h"
44#include "xfs_dir_leaf.h"
45#include "xfs_itable.h" 40#include "xfs_itable.h"
46#include "xfs_btree.h" 41#include "xfs_btree.h"
47#include "xfs_ialloc.h" 42#include "xfs_ialloc.h"
@@ -58,32 +53,14 @@
58#include "xfs_log_priv.h" 53#include "xfs_log_priv.h"
59#include "xfs_mac.h" 54#include "xfs_mac.h"
60 55
61
62/*
63 * The maximum pathlen is 1024 bytes. Since the minimum file system
64 * blocksize is 512 bytes, we can get a max of 2 extents back from
65 * bmapi.
66 */
67#define SYMLINK_MAPS 2
68
69/*
70 * For xfs, we check that the file isn't too big to be opened by this kernel.
71 * No other open action is required for regular files. Devices are handled
72 * through the specfs file system, pipes through fifofs. Device and
73 * fifo vnodes are "wrapped" by specfs and fifofs vnodes, respectively,
74 * when a new vnode is first looked up or created.
75 */
76STATIC int 56STATIC int
77xfs_open( 57xfs_open(
78 bhv_desc_t *bdp, 58 bhv_desc_t *bdp,
79 cred_t *credp) 59 cred_t *credp)
80{ 60{
81 int mode; 61 int mode;
82 vnode_t *vp; 62 bhv_vnode_t *vp = BHV_TO_VNODE(bdp);
83 xfs_inode_t *ip; 63 xfs_inode_t *ip = XFS_BHVTOI(bdp);
84
85 vp = BHV_TO_VNODE(bdp);
86 ip = XFS_BHVTOI(bdp);
87 64
88 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 65 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
89 return XFS_ERROR(EIO); 66 return XFS_ERROR(EIO);
@@ -101,6 +78,35 @@ xfs_open(
101 return 0; 78 return 0;
102} 79}
103 80
81STATIC int
82xfs_close(
83 bhv_desc_t *bdp,
84 int flags,
85 lastclose_t lastclose,
86 cred_t *credp)
87{
88 bhv_vnode_t *vp = BHV_TO_VNODE(bdp);
89 xfs_inode_t *ip = XFS_BHVTOI(bdp);
90
91 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
92 return XFS_ERROR(EIO);
93
94 if (lastclose != L_TRUE || !VN_ISREG(vp))
95 return 0;
96
97 /*
98 * If we previously truncated this file and removed old data in
99 * the process, we want to initiate "early" writeout on the last
100 * close. This is an attempt to combat the notorious NULL files
101 * problem which is particularly noticable from a truncate down,
102 * buffered (re-)write (delalloc), followed by a crash. What we
103 * are effectively doing here is significantly reducing the time
104 * window where we'd otherwise be exposed to that problem.
105 */
106 if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0)
107 return bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE);
108 return 0;
109}
104 110
105/* 111/*
106 * xfs_getattr 112 * xfs_getattr
@@ -108,13 +114,13 @@ xfs_open(
108STATIC int 114STATIC int
109xfs_getattr( 115xfs_getattr(
110 bhv_desc_t *bdp, 116 bhv_desc_t *bdp,
111 vattr_t *vap, 117 bhv_vattr_t *vap,
112 int flags, 118 int flags,
113 cred_t *credp) 119 cred_t *credp)
114{ 120{
115 xfs_inode_t *ip; 121 xfs_inode_t *ip;
116 xfs_mount_t *mp; 122 xfs_mount_t *mp;
117 vnode_t *vp; 123 bhv_vnode_t *vp;
118 124
119 vp = BHV_TO_VNODE(bdp); 125 vp = BHV_TO_VNODE(bdp);
120 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 126 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
@@ -241,7 +247,7 @@ xfs_getattr(
241int 247int
242xfs_setattr( 248xfs_setattr(
243 bhv_desc_t *bdp, 249 bhv_desc_t *bdp,
244 vattr_t *vap, 250 bhv_vattr_t *vap,
245 int flags, 251 int flags,
246 cred_t *credp) 252 cred_t *credp)
247{ 253{
@@ -255,7 +261,7 @@ xfs_setattr(
255 uid_t uid=0, iuid=0; 261 uid_t uid=0, iuid=0;
256 gid_t gid=0, igid=0; 262 gid_t gid=0, igid=0;
257 int timeflags = 0; 263 int timeflags = 0;
258 vnode_t *vp; 264 bhv_vnode_t *vp;
259 xfs_prid_t projid=0, iprojid=0; 265 xfs_prid_t projid=0, iprojid=0;
260 int mandlock_before, mandlock_after; 266 int mandlock_before, mandlock_after;
261 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 267 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
@@ -347,7 +353,6 @@ xfs_setattr(
347 */ 353 */
348 tp = NULL; 354 tp = NULL;
349 lock_flags = XFS_ILOCK_EXCL; 355 lock_flags = XFS_ILOCK_EXCL;
350 ASSERT(flags & ATTR_NOLOCK ? flags & ATTR_DMI : 1);
351 if (flags & ATTR_NOLOCK) 356 if (flags & ATTR_NOLOCK)
352 need_iolock = 0; 357 need_iolock = 0;
353 if (!(mask & XFS_AT_SIZE)) { 358 if (!(mask & XFS_AT_SIZE)) {
@@ -666,9 +671,17 @@ xfs_setattr(
666 ((ip->i_d.di_nlink != 0 || 671 ((ip->i_d.di_nlink != 0 ||
667 !(mp->m_flags & XFS_MOUNT_WSYNC)) 672 !(mp->m_flags & XFS_MOUNT_WSYNC))
668 ? 1 : 0)); 673 ? 1 : 0));
669 if (code) { 674 if (code)
670 goto abort_return; 675 goto abort_return;
671 } 676 /*
677 * Truncated "down", so we're removing references
678 * to old data here - if we now delay flushing for
679 * a long time, we expose ourselves unduly to the
680 * notorious NULL files problem. So, we mark this
681 * vnode and flush it when the file is closed, and
682 * do not wait the usual (long) time for writeout.
683 */
684 VTRUNCATE(vp);
672 } 685 }
673 /* 686 /*
674 * Have to do this even if the file's size doesn't change. 687 * Have to do this even if the file's size doesn't change.
@@ -800,6 +813,8 @@ xfs_setattr(
800 di_flags |= XFS_DIFLAG_NODUMP; 813 di_flags |= XFS_DIFLAG_NODUMP;
801 if (vap->va_xflags & XFS_XFLAG_PROJINHERIT) 814 if (vap->va_xflags & XFS_XFLAG_PROJINHERIT)
802 di_flags |= XFS_DIFLAG_PROJINHERIT; 815 di_flags |= XFS_DIFLAG_PROJINHERIT;
816 if (vap->va_xflags & XFS_XFLAG_NODEFRAG)
817 di_flags |= XFS_DIFLAG_NODEFRAG;
803 if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 818 if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
804 if (vap->va_xflags & XFS_XFLAG_RTINHERIT) 819 if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
805 di_flags |= XFS_DIFLAG_RTINHERIT; 820 di_flags |= XFS_DIFLAG_RTINHERIT;
@@ -869,7 +884,7 @@ xfs_setattr(
869 */ 884 */
870 mandlock_after = MANDLOCK(vp, ip->i_d.di_mode); 885 mandlock_after = MANDLOCK(vp, ip->i_d.di_mode);
871 if (mandlock_before != mandlock_after) { 886 if (mandlock_before != mandlock_after) {
872 VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_ENF_LOCKING, 887 bhv_vop_vnode_change(vp, VCHANGE_FLAGS_ENF_LOCKING,
873 mandlock_after); 888 mandlock_after);
874 } 889 }
875 890
@@ -936,6 +951,13 @@ xfs_access(
936 951
937 952
938/* 953/*
954 * The maximum pathlen is 1024 bytes. Since the minimum file system
955 * blocksize is 512 bytes, we can get a max of 2 extents back from
956 * bmapi.
957 */
958#define SYMLINK_MAPS 2
959
960/*
939 * xfs_readlink 961 * xfs_readlink
940 * 962 *
941 */ 963 */
@@ -950,7 +972,7 @@ xfs_readlink(
950 int count; 972 int count;
951 xfs_off_t offset; 973 xfs_off_t offset;
952 int pathlen; 974 int pathlen;
953 vnode_t *vp; 975 bhv_vnode_t *vp;
954 int error = 0; 976 int error = 0;
955 xfs_mount_t *mp; 977 xfs_mount_t *mp;
956 int nmaps; 978 int nmaps;
@@ -1000,7 +1022,7 @@ xfs_readlink(
1000 nmaps = SYMLINK_MAPS; 1022 nmaps = SYMLINK_MAPS;
1001 1023
1002 error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), 1024 error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen),
1003 0, NULL, 0, mval, &nmaps, NULL); 1025 0, NULL, 0, mval, &nmaps, NULL, NULL);
1004 1026
1005 if (error) { 1027 if (error) {
1006 goto error_return; 1028 goto error_return;
@@ -1208,8 +1230,8 @@ xfs_inactive_free_eofblocks(
1208 1230
1209 nimaps = 1; 1231 nimaps = 1;
1210 xfs_ilock(ip, XFS_ILOCK_SHARED); 1232 xfs_ilock(ip, XFS_ILOCK_SHARED);
1211 error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0, 1233 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, end_fsb, map_len, 0,
1212 NULL, 0, &imap, &nimaps, NULL); 1234 NULL, 0, &imap, &nimaps, NULL, NULL);
1213 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1235 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1214 1236
1215 if (!error && (nimaps != 0) && 1237 if (!error && (nimaps != 0) &&
@@ -1338,7 +1360,7 @@ xfs_inactive_symlink_rmt(
1338 nmaps = ARRAY_SIZE(mval); 1360 nmaps = ARRAY_SIZE(mval);
1339 if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size), 1361 if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size),
1340 XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps, 1362 XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps,
1341 &free_list))) 1363 &free_list, NULL)))
1342 goto error0; 1364 goto error0;
1343 /* 1365 /*
1344 * Invalidate the block(s). 1366 * Invalidate the block(s).
@@ -1353,7 +1375,7 @@ xfs_inactive_symlink_rmt(
1353 * Unmap the dead block(s) to the free_list. 1375 * Unmap the dead block(s) to the free_list.
1354 */ 1376 */
1355 if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, 1377 if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps,
1356 &first_block, &free_list, &done))) 1378 &first_block, &free_list, NULL, &done)))
1357 goto error1; 1379 goto error1;
1358 ASSERT(done); 1380 ASSERT(done);
1359 /* 1381 /*
@@ -1469,9 +1491,6 @@ xfs_inactive_symlink_local(
1469 return 0; 1491 return 0;
1470} 1492}
1471 1493
1472/*
1473 *
1474 */
1475STATIC int 1494STATIC int
1476xfs_inactive_attrs( 1495xfs_inactive_attrs(
1477 xfs_inode_t *ip, 1496 xfs_inode_t *ip,
@@ -1524,16 +1543,16 @@ xfs_release(
1524 bhv_desc_t *bdp) 1543 bhv_desc_t *bdp)
1525{ 1544{
1526 xfs_inode_t *ip; 1545 xfs_inode_t *ip;
1527 vnode_t *vp; 1546 bhv_vnode_t *vp;
1528 xfs_mount_t *mp; 1547 xfs_mount_t *mp;
1529 int error; 1548 int error;
1530 1549
1531 vp = BHV_TO_VNODE(bdp); 1550 vp = BHV_TO_VNODE(bdp);
1532 ip = XFS_BHVTOI(bdp); 1551 ip = XFS_BHVTOI(bdp);
1552 mp = ip->i_mount;
1533 1553
1534 if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) { 1554 if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0))
1535 return 0; 1555 return 0;
1536 }
1537 1556
1538 /* If this is a read-only mount, don't do this (would generate I/O) */ 1557 /* If this is a read-only mount, don't do this (would generate I/O) */
1539 if (vp->v_vfsp->vfs_flag & VFS_RDONLY) 1558 if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
@@ -1545,8 +1564,6 @@ xfs_release(
1545 return 0; 1564 return 0;
1546#endif 1565#endif
1547 1566
1548 mp = ip->i_mount;
1549
1550 if (ip->i_d.di_nlink != 0) { 1567 if (ip->i_d.di_nlink != 0) {
1551 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1568 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
1552 ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || 1569 ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 ||
@@ -1579,8 +1596,8 @@ xfs_inactive(
1579 cred_t *credp) 1596 cred_t *credp)
1580{ 1597{
1581 xfs_inode_t *ip; 1598 xfs_inode_t *ip;
1582 vnode_t *vp; 1599 bhv_vnode_t *vp;
1583 xfs_bmap_free_t free_list; 1600 xfs_bmap_free_t free_list;
1584 xfs_fsblock_t first_block; 1601 xfs_fsblock_t first_block;
1585 int committed; 1602 int committed;
1586 xfs_trans_t *tp; 1603 xfs_trans_t *tp;
@@ -1760,7 +1777,7 @@ xfs_inactive(
1760 cmn_err(CE_NOTE, 1777 cmn_err(CE_NOTE,
1761 "xfs_inactive: xfs_ifree() returned an error = %d on %s", 1778 "xfs_inactive: xfs_ifree() returned an error = %d on %s",
1762 error, mp->m_fsname); 1779 error, mp->m_fsname);
1763 xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR); 1780 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1764 } 1781 }
1765 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 1782 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
1766 } else { 1783 } else {
@@ -1795,17 +1812,17 @@ xfs_inactive(
1795STATIC int 1812STATIC int
1796xfs_lookup( 1813xfs_lookup(
1797 bhv_desc_t *dir_bdp, 1814 bhv_desc_t *dir_bdp,
1798 vname_t *dentry, 1815 bhv_vname_t *dentry,
1799 vnode_t **vpp, 1816 bhv_vnode_t **vpp,
1800 int flags, 1817 int flags,
1801 vnode_t *rdir, 1818 bhv_vnode_t *rdir,
1802 cred_t *credp) 1819 cred_t *credp)
1803{ 1820{
1804 xfs_inode_t *dp, *ip; 1821 xfs_inode_t *dp, *ip;
1805 xfs_ino_t e_inum; 1822 xfs_ino_t e_inum;
1806 int error; 1823 int error;
1807 uint lock_mode; 1824 uint lock_mode;
1808 vnode_t *dir_vp; 1825 bhv_vnode_t *dir_vp;
1809 1826
1810 dir_vp = BHV_TO_VNODE(dir_bdp); 1827 dir_vp = BHV_TO_VNODE(dir_bdp);
1811 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 1828 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
@@ -1832,15 +1849,15 @@ xfs_lookup(
1832STATIC int 1849STATIC int
1833xfs_create( 1850xfs_create(
1834 bhv_desc_t *dir_bdp, 1851 bhv_desc_t *dir_bdp,
1835 vname_t *dentry, 1852 bhv_vname_t *dentry,
1836 vattr_t *vap, 1853 bhv_vattr_t *vap,
1837 vnode_t **vpp, 1854 bhv_vnode_t **vpp,
1838 cred_t *credp) 1855 cred_t *credp)
1839{ 1856{
1840 char *name = VNAME(dentry); 1857 char *name = VNAME(dentry);
1841 vnode_t *dir_vp; 1858 bhv_vnode_t *dir_vp;
1842 xfs_inode_t *dp, *ip; 1859 xfs_inode_t *dp, *ip;
1843 vnode_t *vp=NULL; 1860 bhv_vnode_t *vp = NULL;
1844 xfs_trans_t *tp; 1861 xfs_trans_t *tp;
1845 xfs_mount_t *mp; 1862 xfs_mount_t *mp;
1846 xfs_dev_t rdev; 1863 xfs_dev_t rdev;
@@ -1938,8 +1955,7 @@ xfs_create(
1938 if (error) 1955 if (error)
1939 goto error_return; 1956 goto error_return;
1940 1957
1941 if (resblks == 0 && 1958 if (resblks == 0 && (error = xfs_dir_canenter(tp, dp, name, namelen)))
1942 (error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen)))
1943 goto error_return; 1959 goto error_return;
1944 rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0; 1960 rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0;
1945 error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 1, 1961 error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 1,
@@ -1970,9 +1986,9 @@ xfs_create(
1970 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 1986 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
1971 dp_joined_to_trans = B_TRUE; 1987 dp_joined_to_trans = B_TRUE;
1972 1988
1973 error = XFS_DIR_CREATENAME(mp, tp, dp, name, namelen, ip->i_ino, 1989 error = xfs_dir_createname(tp, dp, name, namelen, ip->i_ino,
1974 &first_block, &free_list, 1990 &first_block, &free_list, resblks ?
1975 resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 1991 resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
1976 if (error) { 1992 if (error) {
1977 ASSERT(error != ENOSPC); 1993 ASSERT(error != ENOSPC);
1978 goto abort_return; 1994 goto abort_return;
@@ -2026,7 +2042,7 @@ xfs_create(
2026 * Propagate the fact that the vnode changed after the 2042 * Propagate the fact that the vnode changed after the
2027 * xfs_inode locks have been released. 2043 * xfs_inode locks have been released.
2028 */ 2044 */
2029 VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_TRUNCATED, 3); 2045 bhv_vop_vnode_change(vp, VCHANGE_FLAGS_TRUNCATED, 3);
2030 2046
2031 *vpp = vp; 2047 *vpp = vp;
2032 2048
@@ -2107,7 +2123,7 @@ int xfs_rm_attempts;
2107STATIC int 2123STATIC int
2108xfs_lock_dir_and_entry( 2124xfs_lock_dir_and_entry(
2109 xfs_inode_t *dp, 2125 xfs_inode_t *dp,
2110 vname_t *dentry, 2126 bhv_vname_t *dentry,
2111 xfs_inode_t *ip) /* inode of entry 'name' */ 2127 xfs_inode_t *ip) /* inode of entry 'name' */
2112{ 2128{
2113 int attempts; 2129 int attempts;
@@ -2321,10 +2337,10 @@ int remove_which_error_return = 0;
2321STATIC int 2337STATIC int
2322xfs_remove( 2338xfs_remove(
2323 bhv_desc_t *dir_bdp, 2339 bhv_desc_t *dir_bdp,
2324 vname_t *dentry, 2340 bhv_vname_t *dentry,
2325 cred_t *credp) 2341 cred_t *credp)
2326{ 2342{
2327 vnode_t *dir_vp; 2343 bhv_vnode_t *dir_vp;
2328 char *name = VNAME(dentry); 2344 char *name = VNAME(dentry);
2329 xfs_inode_t *dp, *ip; 2345 xfs_inode_t *dp, *ip;
2330 xfs_trans_t *tp = NULL; 2346 xfs_trans_t *tp = NULL;
@@ -2448,8 +2464,8 @@ xfs_remove(
2448 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. 2464 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
2449 */ 2465 */
2450 XFS_BMAP_INIT(&free_list, &first_block); 2466 XFS_BMAP_INIT(&free_list, &first_block);
2451 error = XFS_DIR_REMOVENAME(mp, tp, dp, name, namelen, ip->i_ino, 2467 error = xfs_dir_removename(tp, dp, name, namelen, ip->i_ino,
2452 &first_block, &free_list, 0); 2468 &first_block, &free_list, 0);
2453 if (error) { 2469 if (error) {
2454 ASSERT(error != ENOENT); 2470 ASSERT(error != ENOENT);
2455 REMOVE_DEBUG_TRACE(__LINE__); 2471 REMOVE_DEBUG_TRACE(__LINE__);
@@ -2511,7 +2527,7 @@ xfs_remove(
2511 /* 2527 /*
2512 * Let interposed file systems know about removed links. 2528 * Let interposed file systems know about removed links.
2513 */ 2529 */
2514 VOP_LINK_REMOVED(XFS_ITOV(ip), dir_vp, link_zero); 2530 bhv_vop_link_removed(XFS_ITOV(ip), dir_vp, link_zero);
2515 2531
2516 IRELE(ip); 2532 IRELE(ip);
2517 2533
@@ -2564,8 +2580,8 @@ xfs_remove(
2564STATIC int 2580STATIC int
2565xfs_link( 2581xfs_link(
2566 bhv_desc_t *target_dir_bdp, 2582 bhv_desc_t *target_dir_bdp,
2567 vnode_t *src_vp, 2583 bhv_vnode_t *src_vp,
2568 vname_t *dentry, 2584 bhv_vname_t *dentry,
2569 cred_t *credp) 2585 cred_t *credp)
2570{ 2586{
2571 xfs_inode_t *tdp, *sip; 2587 xfs_inode_t *tdp, *sip;
@@ -2577,7 +2593,7 @@ xfs_link(
2577 xfs_fsblock_t first_block; 2593 xfs_fsblock_t first_block;
2578 int cancel_flags; 2594 int cancel_flags;
2579 int committed; 2595 int committed;
2580 vnode_t *target_dir_vp; 2596 bhv_vnode_t *target_dir_vp;
2581 int resblks; 2597 int resblks;
2582 char *target_name = VNAME(dentry); 2598 char *target_name = VNAME(dentry);
2583 int target_namelen; 2599 int target_namelen;
@@ -2587,8 +2603,7 @@ xfs_link(
2587 vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address); 2603 vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address);
2588 2604
2589 target_namelen = VNAMELEN(dentry); 2605 target_namelen = VNAMELEN(dentry);
2590 if (VN_ISDIR(src_vp)) 2606 ASSERT(!VN_ISDIR(src_vp));
2591 return XFS_ERROR(EPERM);
2592 2607
2593 sip = xfs_vtoi(src_vp); 2608 sip = xfs_vtoi(src_vp);
2594 tdp = XFS_BHVTOI(target_dir_bdp); 2609 tdp = XFS_BHVTOI(target_dir_bdp);
@@ -2668,13 +2683,12 @@ xfs_link(
2668 } 2683 }
2669 2684
2670 if (resblks == 0 && 2685 if (resblks == 0 &&
2671 (error = XFS_DIR_CANENTER(mp, tp, tdp, target_name, 2686 (error = xfs_dir_canenter(tp, tdp, target_name, target_namelen)))
2672 target_namelen)))
2673 goto error_return; 2687 goto error_return;
2674 2688
2675 XFS_BMAP_INIT(&free_list, &first_block); 2689 XFS_BMAP_INIT(&free_list, &first_block);
2676 2690
2677 error = XFS_DIR_CREATENAME(mp, tp, tdp, target_name, target_namelen, 2691 error = xfs_dir_createname(tp, tdp, target_name, target_namelen,
2678 sip->i_ino, &first_block, &free_list, 2692 sip->i_ino, &first_block, &free_list,
2679 resblks); 2693 resblks);
2680 if (error) 2694 if (error)
@@ -2684,9 +2698,8 @@ xfs_link(
2684 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); 2698 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
2685 2699
2686 error = xfs_bumplink(tp, sip); 2700 error = xfs_bumplink(tp, sip);
2687 if (error) { 2701 if (error)
2688 goto abort_return; 2702 goto abort_return;
2689 }
2690 2703
2691 /* 2704 /*
2692 * If this is a synchronous mount, make sure that the 2705 * If this is a synchronous mount, make sure that the
@@ -2704,9 +2717,8 @@ xfs_link(
2704 } 2717 }
2705 2718
2706 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 2719 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
2707 if (error) { 2720 if (error)
2708 goto std_return; 2721 goto std_return;
2709 }
2710 2722
2711 /* Fall through to std_return with error = 0. */ 2723 /* Fall through to std_return with error = 0. */
2712std_return: 2724std_return:
@@ -2727,6 +2739,8 @@ std_return:
2727 xfs_trans_cancel(tp, cancel_flags); 2739 xfs_trans_cancel(tp, cancel_flags);
2728 goto std_return; 2740 goto std_return;
2729} 2741}
2742
2743
2730/* 2744/*
2731 * xfs_mkdir 2745 * xfs_mkdir
2732 * 2746 *
@@ -2734,15 +2748,15 @@ std_return:
2734STATIC int 2748STATIC int
2735xfs_mkdir( 2749xfs_mkdir(
2736 bhv_desc_t *dir_bdp, 2750 bhv_desc_t *dir_bdp,
2737 vname_t *dentry, 2751 bhv_vname_t *dentry,
2738 vattr_t *vap, 2752 bhv_vattr_t *vap,
2739 vnode_t **vpp, 2753 bhv_vnode_t **vpp,
2740 cred_t *credp) 2754 cred_t *credp)
2741{ 2755{
2742 char *dir_name = VNAME(dentry); 2756 char *dir_name = VNAME(dentry);
2743 xfs_inode_t *dp; 2757 xfs_inode_t *dp;
2744 xfs_inode_t *cdp; /* inode of created dir */ 2758 xfs_inode_t *cdp; /* inode of created dir */
2745 vnode_t *cvp; /* vnode of created dir */ 2759 bhv_vnode_t *cvp; /* vnode of created dir */
2746 xfs_trans_t *tp; 2760 xfs_trans_t *tp;
2747 xfs_mount_t *mp; 2761 xfs_mount_t *mp;
2748 int cancel_flags; 2762 int cancel_flags;
@@ -2750,7 +2764,7 @@ xfs_mkdir(
2750 int committed; 2764 int committed;
2751 xfs_bmap_free_t free_list; 2765 xfs_bmap_free_t free_list;
2752 xfs_fsblock_t first_block; 2766 xfs_fsblock_t first_block;
2753 vnode_t *dir_vp; 2767 bhv_vnode_t *dir_vp;
2754 boolean_t dp_joined_to_trans; 2768 boolean_t dp_joined_to_trans;
2755 boolean_t created = B_FALSE; 2769 boolean_t created = B_FALSE;
2756 int dm_event_sent = 0; 2770 int dm_event_sent = 0;
@@ -2840,7 +2854,7 @@ xfs_mkdir(
2840 goto error_return; 2854 goto error_return;
2841 2855
2842 if (resblks == 0 && 2856 if (resblks == 0 &&
2843 (error = XFS_DIR_CANENTER(mp, tp, dp, dir_name, dir_namelen))) 2857 (error = xfs_dir_canenter(tp, dp, dir_name, dir_namelen)))
2844 goto error_return; 2858 goto error_return;
2845 /* 2859 /*
2846 * create the directory inode. 2860 * create the directory inode.
@@ -2867,9 +2881,9 @@ xfs_mkdir(
2867 2881
2868 XFS_BMAP_INIT(&free_list, &first_block); 2882 XFS_BMAP_INIT(&free_list, &first_block);
2869 2883
2870 error = XFS_DIR_CREATENAME(mp, tp, dp, dir_name, dir_namelen, 2884 error = xfs_dir_createname(tp, dp, dir_name, dir_namelen, cdp->i_ino,
2871 cdp->i_ino, &first_block, &free_list, 2885 &first_block, &free_list, resblks ?
2872 resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 2886 resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
2873 if (error) { 2887 if (error) {
2874 ASSERT(error != ENOSPC); 2888 ASSERT(error != ENOSPC);
2875 goto error1; 2889 goto error1;
@@ -2883,16 +2897,14 @@ xfs_mkdir(
2883 */ 2897 */
2884 dp->i_gen++; 2898 dp->i_gen++;
2885 2899
2886 error = XFS_DIR_INIT(mp, tp, cdp, dp); 2900 error = xfs_dir_init(tp, cdp, dp);
2887 if (error) { 2901 if (error)
2888 goto error2; 2902 goto error2;
2889 }
2890 2903
2891 cdp->i_gen = 1; 2904 cdp->i_gen = 1;
2892 error = xfs_bumplink(tp, dp); 2905 error = xfs_bumplink(tp, dp);
2893 if (error) { 2906 if (error)
2894 goto error2; 2907 goto error2;
2895 }
2896 2908
2897 cvp = XFS_ITOV(cdp); 2909 cvp = XFS_ITOV(cdp);
2898 2910
@@ -2969,7 +2981,7 @@ std_return:
2969STATIC int 2981STATIC int
2970xfs_rmdir( 2982xfs_rmdir(
2971 bhv_desc_t *dir_bdp, 2983 bhv_desc_t *dir_bdp,
2972 vname_t *dentry, 2984 bhv_vname_t *dentry,
2973 cred_t *credp) 2985 cred_t *credp)
2974{ 2986{
2975 char *name = VNAME(dentry); 2987 char *name = VNAME(dentry);
@@ -2982,7 +2994,7 @@ xfs_rmdir(
2982 xfs_fsblock_t first_block; 2994 xfs_fsblock_t first_block;
2983 int cancel_flags; 2995 int cancel_flags;
2984 int committed; 2996 int committed;
2985 vnode_t *dir_vp; 2997 bhv_vnode_t *dir_vp;
2986 int dm_di_mode = 0; 2998 int dm_di_mode = 0;
2987 int last_cdp_link; 2999 int last_cdp_link;
2988 int namelen; 3000 int namelen;
@@ -3101,16 +3113,15 @@ xfs_rmdir(
3101 error = XFS_ERROR(ENOTEMPTY); 3113 error = XFS_ERROR(ENOTEMPTY);
3102 goto error_return; 3114 goto error_return;
3103 } 3115 }
3104 if (!XFS_DIR_ISEMPTY(mp, cdp)) { 3116 if (!xfs_dir_isempty(cdp)) {
3105 error = XFS_ERROR(ENOTEMPTY); 3117 error = XFS_ERROR(ENOTEMPTY);
3106 goto error_return; 3118 goto error_return;
3107 } 3119 }
3108 3120
3109 error = XFS_DIR_REMOVENAME(mp, tp, dp, name, namelen, cdp->i_ino, 3121 error = xfs_dir_removename(tp, dp, name, namelen, cdp->i_ino,
3110 &first_block, &free_list, resblks); 3122 &first_block, &free_list, resblks);
3111 if (error) { 3123 if (error)
3112 goto error1; 3124 goto error1;
3113 }
3114 3125
3115 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3126 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
3116 3127
@@ -3181,7 +3192,7 @@ xfs_rmdir(
3181 /* 3192 /*
3182 * Let interposed file systems know about removed links. 3193 * Let interposed file systems know about removed links.
3183 */ 3194 */
3184 VOP_LINK_REMOVED(XFS_ITOV(cdp), dir_vp, last_cdp_link); 3195 bhv_vop_link_removed(XFS_ITOV(cdp), dir_vp, last_cdp_link);
3185 3196
3186 IRELE(cdp); 3197 IRELE(cdp);
3187 3198
@@ -3209,8 +3220,6 @@ xfs_rmdir(
3209 3220
3210 3221
3211/* 3222/*
3212 * xfs_readdir
3213 *
3214 * Read dp's entries starting at uiop->uio_offset and translate them into 3223 * Read dp's entries starting at uiop->uio_offset and translate them into
3215 * bufsize bytes worth of struct dirents starting at bufbase. 3224 * bufsize bytes worth of struct dirents starting at bufbase.
3216 */ 3225 */
@@ -3230,28 +3239,23 @@ xfs_readdir(
3230 (inst_t *)__return_address); 3239 (inst_t *)__return_address);
3231 dp = XFS_BHVTOI(dir_bdp); 3240 dp = XFS_BHVTOI(dir_bdp);
3232 3241
3233 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) { 3242 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
3234 return XFS_ERROR(EIO); 3243 return XFS_ERROR(EIO);
3235 }
3236 3244
3237 lock_mode = xfs_ilock_map_shared(dp); 3245 lock_mode = xfs_ilock_map_shared(dp);
3238 error = XFS_DIR_GETDENTS(dp->i_mount, tp, dp, uiop, eofp); 3246 error = xfs_dir_getdents(tp, dp, uiop, eofp);
3239 xfs_iunlock_map_shared(dp, lock_mode); 3247 xfs_iunlock_map_shared(dp, lock_mode);
3240 return error; 3248 return error;
3241} 3249}
3242 3250
3243 3251
3244/*
3245 * xfs_symlink
3246 *
3247 */
3248STATIC int 3252STATIC int
3249xfs_symlink( 3253xfs_symlink(
3250 bhv_desc_t *dir_bdp, 3254 bhv_desc_t *dir_bdp,
3251 vname_t *dentry, 3255 bhv_vname_t *dentry,
3252 vattr_t *vap, 3256 bhv_vattr_t *vap,
3253 char *target_path, 3257 char *target_path,
3254 vnode_t **vpp, 3258 bhv_vnode_t **vpp,
3255 cred_t *credp) 3259 cred_t *credp)
3256{ 3260{
3257 xfs_trans_t *tp; 3261 xfs_trans_t *tp;
@@ -3263,7 +3267,7 @@ xfs_symlink(
3263 xfs_bmap_free_t free_list; 3267 xfs_bmap_free_t free_list;
3264 xfs_fsblock_t first_block; 3268 xfs_fsblock_t first_block;
3265 boolean_t dp_joined_to_trans; 3269 boolean_t dp_joined_to_trans;
3266 vnode_t *dir_vp; 3270 bhv_vnode_t *dir_vp;
3267 uint cancel_flags; 3271 uint cancel_flags;
3268 int committed; 3272 int committed;
3269 xfs_fileoff_t first_fsb; 3273 xfs_fileoff_t first_fsb;
@@ -3308,7 +3312,7 @@ xfs_symlink(
3308 int len, total; 3312 int len, total;
3309 char *path; 3313 char *path;
3310 3314
3311 for(total = 0, path = target_path; total < pathlen;) { 3315 for (total = 0, path = target_path; total < pathlen;) {
3312 /* 3316 /*
3313 * Skip any slashes. 3317 * Skip any slashes.
3314 */ 3318 */
@@ -3402,7 +3406,7 @@ xfs_symlink(
3402 * Check for ability to enter directory entry, if no space reserved. 3406 * Check for ability to enter directory entry, if no space reserved.
3403 */ 3407 */
3404 if (resblks == 0 && 3408 if (resblks == 0 &&
3405 (error = XFS_DIR_CANENTER(mp, tp, dp, link_name, link_namelen))) 3409 (error = xfs_dir_canenter(tp, dp, link_name, link_namelen)))
3406 goto error_return; 3410 goto error_return;
3407 /* 3411 /*
3408 * Initialize the bmap freelist prior to calling either 3412 * Initialize the bmap freelist prior to calling either
@@ -3457,7 +3461,7 @@ xfs_symlink(
3457 error = xfs_bmapi(tp, ip, first_fsb, fs_blocks, 3461 error = xfs_bmapi(tp, ip, first_fsb, fs_blocks,
3458 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, 3462 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA,
3459 &first_block, resblks, mval, &nmaps, 3463 &first_block, resblks, mval, &nmaps,
3460 &free_list); 3464 &free_list, NULL);
3461 if (error) { 3465 if (error) {
3462 goto error1; 3466 goto error1;
3463 } 3467 }
@@ -3489,11 +3493,10 @@ xfs_symlink(
3489 /* 3493 /*
3490 * Create the directory entry for the symlink. 3494 * Create the directory entry for the symlink.
3491 */ 3495 */
3492 error = XFS_DIR_CREATENAME(mp, tp, dp, link_name, link_namelen, 3496 error = xfs_dir_createname(tp, dp, link_name, link_namelen, ip->i_ino,
3493 ip->i_ino, &first_block, &free_list, resblks); 3497 &first_block, &free_list, resblks);
3494 if (error) { 3498 if (error)
3495 goto error1; 3499 goto error1;
3496 }
3497 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3500 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
3498 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 3501 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
3499 3502
@@ -3541,7 +3544,7 @@ std_return:
3541 } 3544 }
3542 3545
3543 if (!error) { 3546 if (!error) {
3544 vnode_t *vp; 3547 bhv_vnode_t *vp;
3545 3548
3546 ASSERT(ip); 3549 ASSERT(ip);
3547 vp = XFS_ITOV(ip); 3550 vp = XFS_ITOV(ip);
@@ -3606,10 +3609,10 @@ xfs_fid2(
3606int 3609int
3607xfs_rwlock( 3610xfs_rwlock(
3608 bhv_desc_t *bdp, 3611 bhv_desc_t *bdp,
3609 vrwlock_t locktype) 3612 bhv_vrwlock_t locktype)
3610{ 3613{
3611 xfs_inode_t *ip; 3614 xfs_inode_t *ip;
3612 vnode_t *vp; 3615 bhv_vnode_t *vp;
3613 3616
3614 vp = BHV_TO_VNODE(bdp); 3617 vp = BHV_TO_VNODE(bdp);
3615 if (VN_ISDIR(vp)) 3618 if (VN_ISDIR(vp))
@@ -3637,10 +3640,10 @@ xfs_rwlock(
3637void 3640void
3638xfs_rwunlock( 3641xfs_rwunlock(
3639 bhv_desc_t *bdp, 3642 bhv_desc_t *bdp,
3640 vrwlock_t locktype) 3643 bhv_vrwlock_t locktype)
3641{ 3644{
3642 xfs_inode_t *ip; 3645 xfs_inode_t *ip;
3643 vnode_t *vp; 3646 bhv_vnode_t *vp;
3644 3647
3645 vp = BHV_TO_VNODE(bdp); 3648 vp = BHV_TO_VNODE(bdp);
3646 if (VN_ISDIR(vp)) 3649 if (VN_ISDIR(vp))
@@ -3744,7 +3747,6 @@ xfs_inode_flush(
3744 return error; 3747 return error;
3745} 3748}
3746 3749
3747
3748int 3750int
3749xfs_set_dmattrs ( 3751xfs_set_dmattrs (
3750 bhv_desc_t *bdp, 3752 bhv_desc_t *bdp,
@@ -3785,16 +3787,12 @@ xfs_set_dmattrs (
3785 return error; 3787 return error;
3786} 3788}
3787 3789
3788
3789/*
3790 * xfs_reclaim
3791 */
3792STATIC int 3790STATIC int
3793xfs_reclaim( 3791xfs_reclaim(
3794 bhv_desc_t *bdp) 3792 bhv_desc_t *bdp)
3795{ 3793{
3796 xfs_inode_t *ip; 3794 xfs_inode_t *ip;
3797 vnode_t *vp; 3795 bhv_vnode_t *vp;
3798 3796
3799 vp = BHV_TO_VNODE(bdp); 3797 vp = BHV_TO_VNODE(bdp);
3800 ip = XFS_BHVTOI(bdp); 3798 ip = XFS_BHVTOI(bdp);
@@ -3849,7 +3847,7 @@ xfs_finish_reclaim(
3849 int sync_mode) 3847 int sync_mode)
3850{ 3848{
3851 xfs_ihash_t *ih = ip->i_hash; 3849 xfs_ihash_t *ih = ip->i_hash;
3852 vnode_t *vp = XFS_ITOV_NULL(ip); 3850 bhv_vnode_t *vp = XFS_ITOV_NULL(ip);
3853 int error; 3851 int error;
3854 3852
3855 if (vp && VN_BAD(vp)) 3853 if (vp && VN_BAD(vp))
@@ -4116,10 +4114,10 @@ retry:
4116 * Issue the xfs_bmapi() call to allocate the blocks 4114 * Issue the xfs_bmapi() call to allocate the blocks
4117 */ 4115 */
4118 XFS_BMAP_INIT(&free_list, &firstfsb); 4116 XFS_BMAP_INIT(&free_list, &firstfsb);
4119 error = xfs_bmapi(tp, ip, startoffset_fsb, 4117 error = XFS_BMAPI(mp, tp, &ip->i_iocore, startoffset_fsb,
4120 allocatesize_fsb, bmapi_flag, 4118 allocatesize_fsb, bmapi_flag,
4121 &firstfsb, 0, imapp, &nimaps, 4119 &firstfsb, 0, imapp, &nimaps,
4122 &free_list); 4120 &free_list, NULL);
4123 if (error) { 4121 if (error) {
4124 goto error0; 4122 goto error0;
4125 } 4123 }
@@ -4199,8 +4197,8 @@ xfs_zero_remaining_bytes(
4199 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { 4197 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
4200 offset_fsb = XFS_B_TO_FSBT(mp, offset); 4198 offset_fsb = XFS_B_TO_FSBT(mp, offset);
4201 nimap = 1; 4199 nimap = 1;
4202 error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0, NULL, 0, &imap, 4200 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, offset_fsb, 1, 0,
4203 &nimap, NULL); 4201 NULL, 0, &imap, &nimap, NULL, NULL);
4204 if (error || nimap < 1) 4202 if (error || nimap < 1)
4205 break; 4203 break;
4206 ASSERT(imap.br_blockcount >= 1); 4204 ASSERT(imap.br_blockcount >= 1);
@@ -4259,7 +4257,7 @@ xfs_free_file_space(
4259 xfs_off_t len, 4257 xfs_off_t len,
4260 int attr_flags) 4258 int attr_flags)
4261{ 4259{
4262 vnode_t *vp; 4260 bhv_vnode_t *vp;
4263 int committed; 4261 int committed;
4264 int done; 4262 int done;
4265 xfs_off_t end_dmi_offset; 4263 xfs_off_t end_dmi_offset;
@@ -4308,7 +4306,6 @@ xfs_free_file_space(
4308 return error; 4306 return error;
4309 } 4307 }
4310 4308
4311 ASSERT(attr_flags & ATTR_NOLOCK ? attr_flags & ATTR_DMI : 1);
4312 if (attr_flags & ATTR_NOLOCK) 4309 if (attr_flags & ATTR_NOLOCK)
4313 need_iolock = 0; 4310 need_iolock = 0;
4314 if (need_iolock) { 4311 if (need_iolock) {
@@ -4326,7 +4323,7 @@ xfs_free_file_space(
4326 if (VN_CACHED(vp) != 0) { 4323 if (VN_CACHED(vp) != 0) {
4327 xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1, 4324 xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1,
4328 ctooff(offtoct(ioffset)), -1); 4325 ctooff(offtoct(ioffset)), -1);
4329 VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(ioffset)), 4326 bhv_vop_flushinval_pages(vp, ctooff(offtoct(ioffset)),
4330 -1, FI_REMAPF_LOCKED); 4327 -1, FI_REMAPF_LOCKED);
4331 } 4328 }
4332 4329
@@ -4338,8 +4335,8 @@ xfs_free_file_space(
4338 */ 4335 */
4339 if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) { 4336 if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
4340 nimap = 1; 4337 nimap = 1;
4341 error = xfs_bmapi(NULL, ip, startoffset_fsb, 1, 0, NULL, 0, 4338 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, startoffset_fsb,
4342 &imap, &nimap, NULL); 4339 1, 0, NULL, 0, &imap, &nimap, NULL, NULL);
4343 if (error) 4340 if (error)
4344 goto out_unlock_iolock; 4341 goto out_unlock_iolock;
4345 ASSERT(nimap == 0 || nimap == 1); 4342 ASSERT(nimap == 0 || nimap == 1);
@@ -4353,8 +4350,8 @@ xfs_free_file_space(
4353 startoffset_fsb += mp->m_sb.sb_rextsize - mod; 4350 startoffset_fsb += mp->m_sb.sb_rextsize - mod;
4354 } 4351 }
4355 nimap = 1; 4352 nimap = 1;
4356 error = xfs_bmapi(NULL, ip, endoffset_fsb - 1, 1, 0, NULL, 0, 4353 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, endoffset_fsb - 1,
4357 &imap, &nimap, NULL); 4354 1, 0, NULL, 0, &imap, &nimap, NULL, NULL);
4358 if (error) 4355 if (error)
4359 goto out_unlock_iolock; 4356 goto out_unlock_iolock;
4360 ASSERT(nimap == 0 || nimap == 1); 4357 ASSERT(nimap == 0 || nimap == 1);
@@ -4426,9 +4423,9 @@ xfs_free_file_space(
4426 * issue the bunmapi() call to free the blocks 4423 * issue the bunmapi() call to free the blocks
4427 */ 4424 */
4428 XFS_BMAP_INIT(&free_list, &firstfsb); 4425 XFS_BMAP_INIT(&free_list, &firstfsb);
4429 error = xfs_bunmapi(tp, ip, startoffset_fsb, 4426 error = XFS_BUNMAPI(mp, tp, &ip->i_iocore, startoffset_fsb,
4430 endoffset_fsb - startoffset_fsb, 4427 endoffset_fsb - startoffset_fsb,
4431 0, 2, &firstfsb, &free_list, &done); 4428 0, 2, &firstfsb, &free_list, NULL, &done);
4432 if (error) { 4429 if (error) {
4433 goto error0; 4430 goto error0;
4434 } 4431 }
@@ -4488,8 +4485,8 @@ xfs_change_file_space(
4488 xfs_off_t startoffset; 4485 xfs_off_t startoffset;
4489 xfs_off_t llen; 4486 xfs_off_t llen;
4490 xfs_trans_t *tp; 4487 xfs_trans_t *tp;
4491 vattr_t va; 4488 bhv_vattr_t va;
4492 vnode_t *vp; 4489 bhv_vnode_t *vp;
4493 4490
4494 vp = BHV_TO_VNODE(bdp); 4491 vp = BHV_TO_VNODE(bdp);
4495 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 4492 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
@@ -4642,9 +4639,10 @@ xfs_change_file_space(
4642 return error; 4639 return error;
4643} 4640}
4644 4641
4645vnodeops_t xfs_vnodeops = { 4642bhv_vnodeops_t xfs_vnodeops = {
4646 BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS), 4643 BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS),
4647 .vop_open = xfs_open, 4644 .vop_open = xfs_open,
4645 .vop_close = xfs_close,
4648 .vop_read = xfs_read, 4646 .vop_read = xfs_read,
4649#ifdef HAVE_SENDFILE 4647#ifdef HAVE_SENDFILE
4650 .vop_sendfile = xfs_sendfile, 4648 .vop_sendfile = xfs_sendfile,