aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLachlan McIlroy <lachlan@redback.melbourne.sgi.com>2008-05-19 01:09:05 -0400
committerLachlan McIlroy <lachlan@redback.melbourne.sgi.com>2008-05-19 01:09:05 -0400
commitc203e45f069af47ca7623e4dcd8c00bfba2722e4 (patch)
tree4563115b6565dcfd97015c1c9366fb3d07cabf19 /fs
parenta94477da38e0b261a7ecea71f4c95a3bcd5be69c (diff)
parentb8291ad07a7f3b5b990900f0001198ac23ba893e (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into for-linus
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/fid.h15
-rw-r--r--fs/9p/v9fs.c57
-rw-r--r--fs/9p/v9fs.h85
-rw-r--r--fs/9p/vfs_addr.c2
-rw-r--r--fs/9p/vfs_dir.c2
-rw-r--r--fs/9p/vfs_file.c11
-rw-r--r--fs/9p/vfs_inode.c50
-rw-r--r--fs/9p/vfs_super.c35
-rw-r--r--fs/Kconfig3
-rw-r--r--fs/Kconfig.binfmt2
-rw-r--r--fs/adfs/adfs.h2
-rw-r--r--fs/adfs/dir_f.c4
-rw-r--r--fs/affs/affs.h4
-rw-r--r--fs/affs/file.c50
-rw-r--r--fs/affs/inode.c34
-rw-r--r--fs/affs/namei.c6
-rw-r--r--fs/affs/super.c21
-rw-r--r--fs/afs/afs_cm.h3
-rw-r--r--fs/afs/cell.c2
-rw-r--r--fs/afs/cmservice.c133
-rw-r--r--fs/afs/dir.c4
-rw-r--r--fs/afs/internal.h8
-rw-r--r--fs/afs/proc.c33
-rw-r--r--fs/aio.c79
-rw-r--r--fs/anon_inodes.c13
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/expire.c26
-rw-r--r--fs/autofs4/root.c40
-rw-r--r--fs/autofs4/waitq.c2
-rw-r--r--fs/befs/endian.h2
-rw-r--r--fs/befs/linuxvfs.c6
-rw-r--r--fs/bfs/bfs.h2
-rw-r--r--fs/binfmt_aout.c12
-rw-r--r--fs/binfmt_elf.c30
-rw-r--r--fs/binfmt_elf_fdpic.c14
-rw-r--r--fs/binfmt_em86.c2
-rw-r--r--fs/binfmt_flat.c9
-rw-r--r--fs/binfmt_misc.c6
-rw-r--r--fs/binfmt_script.c2
-rw-r--r--fs/bio.c93
-rw-r--r--fs/buffer.c24
-rw-r--r--fs/char_dev.c1
-rw-r--r--fs/cifs/CHANGES3
-rw-r--r--fs/cifs/asn1.c10
-rw-r--r--fs/cifs/cifs_debug.c4
-rw-r--r--fs/cifs/cifs_dfs_ref.c25
-rw-r--r--fs/cifs/cifsacl.c16
-rw-r--r--fs/cifs/cifsfs.c72
-rw-r--r--fs/cifs/cifsfs.h10
-rw-r--r--fs/cifs/cifsglob.h44
-rw-r--r--fs/cifs/cifspdu.h3
-rw-r--r--fs/cifs/cifsproto.h15
-rw-r--r--fs/cifs/cifssmb.c54
-rw-r--r--fs/cifs/connect.c232
-rw-r--r--fs/cifs/dir.c34
-rw-r--r--fs/cifs/dns_resolve.c62
-rw-r--r--fs/cifs/fcntl.c2
-rw-r--r--fs/cifs/file.c80
-rw-r--r--fs/cifs/inode.c81
-rw-r--r--fs/cifs/link.c2
-rw-r--r--fs/cifs/misc.c33
-rw-r--r--fs/cifs/netmisc.c32
-rw-r--r--fs/cifs/readdir.c12
-rw-r--r--fs/cifs/smbencrypt.c8
-rw-r--r--fs/cifs/xattr.c2
-rw-r--r--fs/coda/coda_linux.c2
-rw-r--r--fs/coda/dir.c6
-rw-r--r--fs/compat.c15
-rw-r--r--fs/compat_ioctl.c4
-rw-r--r--fs/configfs/file.c2
-rw-r--r--fs/configfs/inode.c2
-rw-r--r--fs/configfs/mount.c2
-rw-r--r--fs/configfs/symlink.c4
-rw-r--r--fs/debugfs/file.c2
-rw-r--r--fs/devpts/inode.c43
-rw-r--r--fs/dlm/lockspace.c2
-rw-r--r--fs/dnotify.c11
-rw-r--r--fs/dquot.c10
-rw-r--r--fs/drop_caches.c14
-rw-r--r--fs/ecryptfs/Makefile2
-rw-r--r--fs/ecryptfs/crypto.c33
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h102
-rw-r--r--fs/ecryptfs/file.c2
-rw-r--r--fs/ecryptfs/inode.c19
-rw-r--r--fs/ecryptfs/keystore.c89
-rw-r--r--fs/ecryptfs/main.c2
-rw-r--r--fs/ecryptfs/messaging.c524
-rw-r--r--fs/ecryptfs/miscdev.c600
-rw-r--r--fs/ecryptfs/mmap.c18
-rw-r--r--fs/ecryptfs/netlink.c33
-rw-r--r--fs/ecryptfs/read_write.c16
-rw-r--r--fs/eventfd.c15
-rw-r--r--fs/eventpoll.c57
-rw-r--r--fs/exec.c24
-rw-r--r--fs/exportfs/expfs.c10
-rw-r--r--fs/ext3/inode.c14
-rw-r--r--fs/ext3/xattr.c5
-rw-r--r--fs/ext4/acl.c12
-rw-r--r--fs/ext4/balloc.c50
-rw-r--r--fs/ext4/bitmap.c2
-rw-r--r--fs/ext4/dir.c4
-rw-r--r--fs/ext4/ext4.h1205
-rw-r--r--fs/ext4/ext4_extents.h232
-rw-r--r--fs/ext4/ext4_i.h167
-rw-r--r--fs/ext4/ext4_jbd2.c14
-rw-r--r--fs/ext4/ext4_jbd2.h231
-rw-r--r--fs/ext4/ext4_sb.h148
-rw-r--r--fs/ext4/extents.c354
-rw-r--r--fs/ext4/file.c6
-rw-r--r--fs/ext4/fsync.c7
-rw-r--r--fs/ext4/hash.c2
-rw-r--r--fs/ext4/ialloc.c44
-rw-r--r--fs/ext4/inode.c57
-rw-r--r--fs/ext4/ioctl.c16
-rw-r--r--fs/ext4/mballoc.c531
-rw-r--r--fs/ext4/mballoc.h304
-rw-r--r--fs/ext4/migrate.c43
-rw-r--r--fs/ext4/namei.c44
-rw-r--r--fs/ext4/resize.c83
-rw-r--r--fs/ext4/super.c146
-rw-r--r--fs/ext4/symlink.c2
-rw-r--r--fs/ext4/xattr.c45
-rw-r--r--fs/ext4/xattr.h7
-rw-r--r--fs/ext4/xattr_security.c4
-rw-r--r--fs/ext4/xattr_trusted.c4
-rw-r--r--fs/ext4/xattr_user.c4
-rw-r--r--fs/fat/cache.c6
-rw-r--r--fs/fat/fatent.c2
-rw-r--r--fs/fat/file.c2
-rw-r--r--fs/fat/inode.c8
-rw-r--r--fs/fcntl.c1
-rw-r--r--fs/file.c23
-rw-r--r--fs/file_table.c1
-rw-r--r--fs/freevxfs/vxfs_extern.h5
-rw-r--r--fs/freevxfs/vxfs_immed.c1
-rw-r--r--fs/freevxfs/vxfs_inode.c5
-rw-r--r--fs/fs-writeback.c78
-rw-r--r--fs/fuse/control.c2
-rw-r--r--fs/fuse/dev.c23
-rw-r--r--fs/fuse/dir.c86
-rw-r--r--fs/fuse/file.c635
-rw-r--r--fs/fuse/fuse_i.h55
-rw-r--r--fs/fuse/inode.c95
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c2
-rw-r--r--fs/gfs2/util.h18
-rw-r--r--fs/hfs/btree.c10
-rw-r--r--fs/hfs/mdb.c2
-rw-r--r--fs/hfs/super.c6
-rw-r--r--fs/hfsplus/btree.c10
-rw-r--r--fs/hfsplus/hfsplus_fs.h4
-rw-r--r--fs/hfsplus/inode.c5
-rw-r--r--fs/hfsplus/options.c3
-rw-r--r--fs/hfsplus/super.c4
-rw-r--r--fs/hfsplus/wrapper.c2
-rw-r--r--fs/hppfs/Makefile6
-rw-r--r--fs/hppfs/hppfs.c (renamed from fs/hppfs/hppfs_kern.c)82
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/inode.c11
-rw-r--r--fs/inotify_user.c2
-rw-r--r--fs/ioctl.c4
-rw-r--r--fs/isofs/dir.c8
-rw-r--r--fs/isofs/isofs.h12
-rw-r--r--fs/isofs/namei.c7
-rw-r--r--fs/jbd/commit.c2
-rw-r--r--fs/jbd2/commit.c21
-rw-r--r--fs/jbd2/journal.c59
-rw-r--r--fs/jbd2/revoke.c165
-rw-r--r--fs/jbd2/transaction.c41
-rw-r--r--fs/jffs2/build.c31
-rw-r--r--fs/jffs2/debug.h8
-rw-r--r--fs/jffs2/dir.c42
-rw-r--r--fs/jffs2/erase.c9
-rw-r--r--fs/jffs2/fs.c14
-rw-r--r--fs/jffs2/gc.c8
-rw-r--r--fs/jffs2/nodelist.h5
-rw-r--r--fs/jffs2/nodemgmt.c2
-rw-r--r--fs/jffs2/os-linux.h2
-rw-r--r--fs/jffs2/readinode.c16
-rw-r--r--fs/jffs2/scan.c9
-rw-r--r--fs/jffs2/super.c15
-rw-r--r--fs/jffs2/wbuf.c2
-rw-r--r--fs/jffs2/write.c17
-rw-r--r--fs/jffs2/xattr.c8
-rw-r--r--fs/jfs/jfs_debug.c4
-rw-r--r--fs/lockd/clntproc.c2
-rw-r--r--fs/lockd/svclock.c2
-rw-r--r--fs/locks.c20
-rw-r--r--fs/msdos/namei.c2
-rw-r--r--fs/namei.c9
-rw-r--r--fs/namespace.c17
-rw-r--r--fs/ncpfs/ncplib_kernel.c39
-rw-r--r--fs/nfs/client.c20
-rw-r--r--fs/nfs/super.c26
-rw-r--r--fs/nfsd/nfs4callback.c4
-rw-r--r--fs/nfsd/nfsctl.c4
-rw-r--r--fs/ntfs/debug.h6
-rw-r--r--fs/ntfs/mft.c6
-rw-r--r--fs/ocfs2/cluster/sys.c2
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c8
-rw-r--r--fs/ocfs2/dlm/dlmfs.c2
-rw-r--r--fs/ocfs2/file.c4
-rw-r--r--fs/ocfs2/localalloc.c4
-rw-r--r--fs/ocfs2/stack_o2cb.c2
-rw-r--r--fs/ocfs2/stack_user.c2
-rw-r--r--fs/ocfs2/symlink.c2
-rw-r--r--fs/open.c1
-rw-r--r--fs/partitions/ldm.c8
-rw-r--r--fs/pipe.c21
-rw-r--r--fs/proc/array.c8
-rw-r--r--fs/proc/base.c114
-rw-r--r--fs/proc/generic.c149
-rw-r--r--fs/proc/inode.c69
-rw-r--r--fs/proc/internal.h4
-rw-r--r--fs/proc/nommu.c2
-rw-r--r--fs/proc/proc_misc.c68
-rw-r--r--fs/proc/proc_net.c11
-rw-r--r--fs/proc/proc_sysctl.c52
-rw-r--r--fs/proc/proc_tty.c87
-rw-r--r--fs/proc/root.c14
-rw-r--r--fs/proc/task_mmu.c36
-rw-r--r--fs/proc/task_nommu.c35
-rw-r--r--fs/quota_v2.c4
-rw-r--r--fs/ramfs/file-mmu.c3
-rw-r--r--fs/ramfs/inode.c2
-rw-r--r--fs/ramfs/internal.h1
-rw-r--r--fs/reiserfs/journal.c50
-rw-r--r--fs/reiserfs/procfs.c9
-rw-r--r--fs/select.c15
-rw-r--r--fs/signalfd.c17
-rw-r--r--fs/smbfs/smb_debug.h6
-rw-r--r--fs/splice.c29
-rw-r--r--fs/super.c2
-rw-r--r--fs/sync.c2
-rw-r--r--fs/sysfs/file.c2
-rw-r--r--fs/sysfs/inode.c4
-rw-r--r--fs/sysfs/mount.c2
-rw-r--r--fs/sysv/sysv.h8
-rw-r--r--fs/timerfd.c12
-rw-r--r--fs/udf/namei.c145
-rw-r--r--fs/udf/partition.c4
-rw-r--r--fs/udf/super.c5
-rw-r--r--fs/udf/udfdecl.h1
-rw-r--r--fs/ufs/ufs.h1
-rw-r--r--fs/utimes.c17
-rw-r--r--fs/vfat/namei.c2
-rw-r--r--fs/xattr.c41
246 files changed, 7307 insertions, 2866 deletions
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index 26e07df783b9..c3bbd6af996d 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -22,6 +22,21 @@
22 22
23#include <linux/list.h> 23#include <linux/list.h>
24 24
25/**
26 * struct v9fs_dentry - 9p private data stored in dentry d_fsdata
27 * @lock: protects the fidlist
28 * @fidlist: list of FIDs currently associated with this dentry
29 *
30 * This structure defines the 9p private data associated with
31 * a particular dentry. In particular, this private data is used
32 * to lookup which 9P FID handle should be used for a particular VFS
33 * operation. FID handles are associated with dentries instead of
34 * inodes in order to more closely map functionality to the Plan 9
35 * expected behavior for FID reclaimation and tracking.
36 *
37 * See Also: Mapping FIDs to Linux VFS model in
38 * Design and Implementation of the Linux 9P File System documentation
39 */
25struct v9fs_dentry { 40struct v9fs_dentry {
26 spinlock_t lock; /* protect fidlist */ 41 spinlock_t lock; /* protect fidlist */
27 struct list_head fidlist; 42 struct list_head fidlist;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 9b0f0222e8bb..047c791427aa 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -71,19 +71,19 @@ static match_table_t tokens = {
71 71
72/** 72/**
73 * v9fs_parse_options - parse mount options into session structure 73 * v9fs_parse_options - parse mount options into session structure
74 * @options: options string passed from mount
75 * @v9ses: existing v9fs session information 74 * @v9ses: existing v9fs session information
76 * 75 *
76 * Return 0 upon success, -ERRNO upon failure.
77 */ 77 */
78 78
79static void v9fs_parse_options(struct v9fs_session_info *v9ses) 79static int v9fs_parse_options(struct v9fs_session_info *v9ses)
80{ 80{
81 char *options; 81 char *options;
82 substring_t args[MAX_OPT_ARGS]; 82 substring_t args[MAX_OPT_ARGS];
83 char *p; 83 char *p;
84 int option = 0; 84 int option = 0;
85 char *s, *e; 85 char *s, *e;
86 int ret; 86 int ret = 0;
87 87
88 /* setup defaults */ 88 /* setup defaults */
89 v9ses->afid = ~0; 89 v9ses->afid = ~0;
@@ -91,19 +91,26 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses)
91 v9ses->cache = 0; 91 v9ses->cache = 0;
92 92
93 if (!v9ses->options) 93 if (!v9ses->options)
94 return; 94 return 0;
95 95
96 options = kstrdup(v9ses->options, GFP_KERNEL); 96 options = kstrdup(v9ses->options, GFP_KERNEL);
97 if (!options) {
98 P9_DPRINTK(P9_DEBUG_ERROR,
99 "failed to allocate copy of option string\n");
100 return -ENOMEM;
101 }
102
97 while ((p = strsep(&options, ",")) != NULL) { 103 while ((p = strsep(&options, ",")) != NULL) {
98 int token; 104 int token;
99 if (!*p) 105 if (!*p)
100 continue; 106 continue;
101 token = match_token(p, tokens, args); 107 token = match_token(p, tokens, args);
102 if (token < Opt_uname) { 108 if (token < Opt_uname) {
103 ret = match_int(&args[0], &option); 109 int r = match_int(&args[0], &option);
104 if (ret < 0) { 110 if (r < 0) {
105 P9_DPRINTK(P9_DEBUG_ERROR, 111 P9_DPRINTK(P9_DEBUG_ERROR,
106 "integer field, but no integer?\n"); 112 "integer field, but no integer?\n");
113 ret = r;
107 continue; 114 continue;
108 } 115 }
109 } 116 }
@@ -125,10 +132,10 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses)
125 v9ses->afid = option; 132 v9ses->afid = option;
126 break; 133 break;
127 case Opt_uname: 134 case Opt_uname:
128 match_strcpy(v9ses->uname, &args[0]); 135 match_strlcpy(v9ses->uname, &args[0], PATH_MAX);
129 break; 136 break;
130 case Opt_remotename: 137 case Opt_remotename:
131 match_strcpy(v9ses->aname, &args[0]); 138 match_strlcpy(v9ses->aname, &args[0], PATH_MAX);
132 break; 139 break;
133 case Opt_nodevmap: 140 case Opt_nodevmap:
134 v9ses->nodev = 1; 141 v9ses->nodev = 1;
@@ -139,6 +146,13 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses)
139 146
140 case Opt_access: 147 case Opt_access:
141 s = match_strdup(&args[0]); 148 s = match_strdup(&args[0]);
149 if (!s) {
150 P9_DPRINTK(P9_DEBUG_ERROR,
151 "failed to allocate copy"
152 " of option argument\n");
153 ret = -ENOMEM;
154 break;
155 }
142 v9ses->flags &= ~V9FS_ACCESS_MASK; 156 v9ses->flags &= ~V9FS_ACCESS_MASK;
143 if (strcmp(s, "user") == 0) 157 if (strcmp(s, "user") == 0)
144 v9ses->flags |= V9FS_ACCESS_USER; 158 v9ses->flags |= V9FS_ACCESS_USER;
@@ -158,6 +172,7 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses)
158 } 172 }
159 } 173 }
160 kfree(options); 174 kfree(options);
175 return ret;
161} 176}
162 177
163/** 178/**
@@ -173,6 +188,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
173{ 188{
174 int retval = -EINVAL; 189 int retval = -EINVAL;
175 struct p9_fid *fid; 190 struct p9_fid *fid;
191 int rc;
176 192
177 v9ses->uname = __getname(); 193 v9ses->uname = __getname();
178 if (!v9ses->uname) 194 if (!v9ses->uname)
@@ -190,8 +206,21 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
190 v9ses->uid = ~0; 206 v9ses->uid = ~0;
191 v9ses->dfltuid = V9FS_DEFUID; 207 v9ses->dfltuid = V9FS_DEFUID;
192 v9ses->dfltgid = V9FS_DEFGID; 208 v9ses->dfltgid = V9FS_DEFGID;
193 v9ses->options = kstrdup(data, GFP_KERNEL); 209 if (data) {
194 v9fs_parse_options(v9ses); 210 v9ses->options = kstrdup(data, GFP_KERNEL);
211 if (!v9ses->options) {
212 P9_DPRINTK(P9_DEBUG_ERROR,
213 "failed to allocate copy of option string\n");
214 retval = -ENOMEM;
215 goto error;
216 }
217 }
218
219 rc = v9fs_parse_options(v9ses);
220 if (rc < 0) {
221 retval = rc;
222 goto error;
223 }
195 224
196 v9ses->clnt = p9_client_create(dev_name, v9ses->options); 225 v9ses->clnt = p9_client_create(dev_name, v9ses->options);
197 226
@@ -233,7 +262,6 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
233 return fid; 262 return fid;
234 263
235error: 264error:
236 v9fs_session_close(v9ses);
237 return ERR_PTR(retval); 265 return ERR_PTR(retval);
238} 266}
239 267
@@ -256,9 +284,12 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
256} 284}
257 285
258/** 286/**
259 * v9fs_session_cancel - mark transport as disconnected 287 * v9fs_session_cancel - terminate a session
260 * and cancel all pending requests. 288 * @v9ses: session to terminate
289 *
290 * mark transport as disconnected and cancel all pending requests.
261 */ 291 */
292
262void v9fs_session_cancel(struct v9fs_session_info *v9ses) { 293void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
263 P9_DPRINTK(P9_DEBUG_ERROR, "cancel session %p\n", v9ses); 294 P9_DPRINTK(P9_DEBUG_ERROR, "cancel session %p\n", v9ses);
264 p9_client_disconnect(v9ses->clnt); 295 p9_client_disconnect(v9ses->clnt);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 7d3a1018db52..a7d567192998 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -21,18 +21,69 @@
21 * 21 *
22 */ 22 */
23 23
24/* 24/**
25 * Session structure provides information for an opened session 25 * enum p9_session_flags - option flags for each 9P session
26 * 26 * @V9FS_EXTENDED: whether or not to use 9P2000.u extensions
27 */ 27 * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy
28 * @V9FS_ACCESS_USER: a new attach will be issued for every user (default)
29 * @V9FS_ACCESS_ANY: use a single attach for all users
30 * @V9FS_ACCESS_MASK: bit mask of different ACCESS options
31 *
32 * Session flags reflect options selected by users at mount time
33 */
34enum p9_session_flags {
35 V9FS_EXTENDED = 0x01,
36 V9FS_ACCESS_SINGLE = 0x02,
37 V9FS_ACCESS_USER = 0x04,
38 V9FS_ACCESS_ANY = 0x06,
39 V9FS_ACCESS_MASK = 0x06,
40};
41
42/* possible values of ->cache */
43/**
44 * enum p9_cache_modes - user specified cache preferences
45 * @CACHE_NONE: do not cache data, dentries, or directory contents (default)
46 * @CACHE_LOOSE: cache data, dentries, and directory contents w/no consistency
47 *
48 * eventually support loose, tight, time, session, default always none
49 */
50
51enum p9_cache_modes {
52 CACHE_NONE,
53 CACHE_LOOSE,
54};
55
56/**
57 * struct v9fs_session_info - per-instance session information
58 * @flags: session options of type &p9_session_flags
59 * @nodev: set to 1 to disable device mapping
60 * @debug: debug level
61 * @afid: authentication handle
62 * @cache: cache mode of type &p9_cache_modes
63 * @options: copy of options string given by user
64 * @uname: string user name to mount hierarchy as
65 * @aname: mount specifier for remote hierarchy
66 * @maxdata: maximum data to be sent/recvd per protocol message
67 * @dfltuid: default numeric userid to mount hierarchy as
68 * @dfltgid: default numeric groupid to mount hierarchy as
69 * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy
70 * @clnt: reference to 9P network client instantiated for this session
71 * @debugfs_dir: reference to debugfs_dir which can be used for add'l debug
72 *
73 * This structure holds state for each session instance established during
74 * a sys_mount() .
75 *
76 * Bugs: there seems to be a lot of state which could be condensed and/or
77 * removed.
78 */
28 79
29struct v9fs_session_info { 80struct v9fs_session_info {
30 /* options */ 81 /* options */
31 unsigned char flags; /* session flags */ 82 unsigned char flags;
32 unsigned char nodev; /* set to 1 if no disable device mapping */ 83 unsigned char nodev;
33 unsigned short debug; /* debug level */ 84 unsigned short debug;
34 unsigned int afid; /* authentication fid */ 85 unsigned int afid;
35 unsigned int cache; /* cache mode */ 86 unsigned int cache;
36 87
37 char *options; /* copy of mount options */ 88 char *options; /* copy of mount options */
38 char *uname; /* user name to mount as */ 89 char *uname; /* user name to mount as */
@@ -45,22 +96,6 @@ struct v9fs_session_info {
45 struct dentry *debugfs_dir; 96 struct dentry *debugfs_dir;
46}; 97};
47 98
48/* session flags */
49enum {
50 V9FS_EXTENDED = 0x01, /* 9P2000.u */
51 V9FS_ACCESS_MASK = 0x06, /* access mask */
52 V9FS_ACCESS_SINGLE = 0x02, /* only one user can access the files */
53 V9FS_ACCESS_USER = 0x04, /* attache per user */
54 V9FS_ACCESS_ANY = 0x06, /* use the same attach for all users */
55};
56
57/* possible values of ->cache */
58/* eventually support loose, tight, time, session, default always none */
59enum {
60 CACHE_NONE, /* default */
61 CACHE_LOOSE, /* no consistency */
62};
63
64extern struct dentry *v9fs_debugfs_root; 99extern struct dentry *v9fs_debugfs_root;
65 100
66struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, 101struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 6248f0e727a3..97d3aed57983 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -43,7 +43,7 @@
43/** 43/**
44 * v9fs_vfs_readpage - read an entire page in from 9P 44 * v9fs_vfs_readpage - read an entire page in from 9P
45 * 45 *
46 * @file: file being read 46 * @filp: file being read
47 * @page: structure to page 47 * @page: structure to page
48 * 48 *
49 */ 49 */
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 0924d4477da3..88e3787c6ea9 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -60,7 +60,7 @@ static inline int dt_type(struct p9_stat *mistat)
60 60
61/** 61/**
62 * v9fs_dir_readdir - read a directory 62 * v9fs_dir_readdir - read a directory
63 * @filep: opened file structure 63 * @filp: opened file structure
64 * @dirent: directory structure ??? 64 * @dirent: directory structure ???
65 * @filldir: function to populate directory structure ??? 65 * @filldir: function to populate directory structure ???
66 * 66 *
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index a616fff8906d..0d55affe37d4 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -90,10 +90,11 @@ int v9fs_file_open(struct inode *inode, struct file *file)
90 90
91/** 91/**
92 * v9fs_file_lock - lock a file (or directory) 92 * v9fs_file_lock - lock a file (or directory)
93 * @inode: inode to be opened 93 * @filp: file to be locked
94 * @file: file being opened 94 * @cmd: lock command
95 * @fl: file lock structure
95 * 96 *
96 * XXX - this looks like a local only lock, we should extend into 9P 97 * Bugs: this looks like a local only lock, we should extend into 9P
97 * by using open exclusive 98 * by using open exclusive
98 */ 99 */
99 100
@@ -118,7 +119,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
118 119
119/** 120/**
120 * v9fs_file_read - read from a file 121 * v9fs_file_read - read from a file
121 * @filep: file pointer to read 122 * @filp: file pointer to read
122 * @data: data buffer to read data into 123 * @data: data buffer to read data into
123 * @count: size of buffer 124 * @count: size of buffer
124 * @offset: offset at which to read data 125 * @offset: offset at which to read data
@@ -142,7 +143,7 @@ v9fs_file_read(struct file *filp, char __user * data, size_t count,
142 143
143/** 144/**
144 * v9fs_file_write - write to a file 145 * v9fs_file_write - write to a file
145 * @filep: file pointer to write 146 * @filp: file pointer to write
146 * @data: data buffer to write data from 147 * @data: data buffer to write data from
147 * @count: size of buffer 148 * @count: size of buffer
148 * @offset: offset at which to write data 149 * @offset: offset at which to write data
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 6a28842052ea..40fa807bd929 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -129,6 +129,12 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
129 return res; 129 return res;
130} 130}
131 131
132/**
133 * v9fs_uflags2omode- convert posix open flags to plan 9 mode bits
134 * @uflags: flags to convert
135 *
136 */
137
132int v9fs_uflags2omode(int uflags) 138int v9fs_uflags2omode(int uflags)
133{ 139{
134 int ret; 140 int ret;
@@ -312,6 +318,14 @@ error:
312} 318}
313*/ 319*/
314 320
321/**
322 * v9fs_inode_from_fid - populate an inode by issuing a attribute request
323 * @v9ses: session information
324 * @fid: fid to issue attribute request for
325 * @sb: superblock on which to create inode
326 *
327 */
328
315static struct inode * 329static struct inode *
316v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, 330v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
317 struct super_block *sb) 331 struct super_block *sb)
@@ -384,9 +398,12 @@ v9fs_open_created(struct inode *inode, struct file *file)
384 398
385/** 399/**
386 * v9fs_create - Create a file 400 * v9fs_create - Create a file
401 * @v9ses: session information
402 * @dir: directory that dentry is being created in
387 * @dentry: dentry that is being created 403 * @dentry: dentry that is being created
388 * @perm: create permissions 404 * @perm: create permissions
389 * @mode: open mode 405 * @mode: open mode
406 * @extension: 9p2000.u extension string to support devices, etc.
390 * 407 *
391 */ 408 */
392static struct p9_fid * 409static struct p9_fid *
@@ -461,7 +478,7 @@ error:
461 478
462/** 479/**
463 * v9fs_vfs_create - VFS hook to create files 480 * v9fs_vfs_create - VFS hook to create files
464 * @inode: directory inode that is being created 481 * @dir: directory inode that is being created
465 * @dentry: dentry that is being deleted 482 * @dentry: dentry that is being deleted
466 * @mode: create permissions 483 * @mode: create permissions
467 * @nd: path information 484 * @nd: path information
@@ -519,7 +536,7 @@ error:
519 536
520/** 537/**
521 * v9fs_vfs_mkdir - VFS mkdir hook to create a directory 538 * v9fs_vfs_mkdir - VFS mkdir hook to create a directory
522 * @inode: inode that is being unlinked 539 * @dir: inode that is being unlinked
523 * @dentry: dentry that is being unlinked 540 * @dentry: dentry that is being unlinked
524 * @mode: mode for new directory 541 * @mode: mode for new directory
525 * 542 *
@@ -703,9 +720,9 @@ done:
703 720
704/** 721/**
705 * v9fs_vfs_getattr - retrieve file metadata 722 * v9fs_vfs_getattr - retrieve file metadata
706 * @mnt - mount information 723 * @mnt: mount information
707 * @dentry - file to get attributes on 724 * @dentry: file to get attributes on
708 * @stat - metadata structure to populate 725 * @stat: metadata structure to populate
709 * 726 *
710 */ 727 */
711 728
@@ -928,7 +945,7 @@ done:
928/** 945/**
929 * v9fs_vfs_readlink - read a symlink's location 946 * v9fs_vfs_readlink - read a symlink's location
930 * @dentry: dentry for symlink 947 * @dentry: dentry for symlink
931 * @buf: buffer to load symlink location into 948 * @buffer: buffer to load symlink location into
932 * @buflen: length of buffer 949 * @buflen: length of buffer
933 * 950 *
934 */ 951 */
@@ -996,10 +1013,12 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
996 * v9fs_vfs_put_link - release a symlink path 1013 * v9fs_vfs_put_link - release a symlink path
997 * @dentry: dentry for symlink 1014 * @dentry: dentry for symlink
998 * @nd: nameidata 1015 * @nd: nameidata
1016 * @p: unused
999 * 1017 *
1000 */ 1018 */
1001 1019
1002static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) 1020static void
1021v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1003{ 1022{
1004 char *s = nd_get_link(nd); 1023 char *s = nd_get_link(nd);
1005 1024
@@ -1008,6 +1027,15 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void
1008 __putname(s); 1027 __putname(s);
1009} 1028}
1010 1029
1030/**
1031 * v9fs_vfs_mkspecial - create a special file
1032 * @dir: inode to create special file in
1033 * @dentry: dentry to create
1034 * @mode: mode to create special file
1035 * @extension: 9p2000.u format extension string representing special file
1036 *
1037 */
1038
1011static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, 1039static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
1012 int mode, const char *extension) 1040 int mode, const char *extension)
1013{ 1041{
@@ -1037,7 +1065,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
1037 * @dentry: dentry for symlink 1065 * @dentry: dentry for symlink
1038 * @symname: symlink data 1066 * @symname: symlink data
1039 * 1067 *
1040 * See 9P2000.u RFC for more information 1068 * See Also: 9P2000.u RFC for more information
1041 * 1069 *
1042 */ 1070 */
1043 1071
@@ -1058,10 +1086,6 @@ v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1058 * 1086 *
1059 */ 1087 */
1060 1088
1061/* XXX - lots of code dup'd from symlink and creates,
1062 * figure out a better reuse strategy
1063 */
1064
1065static int 1089static int
1066v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, 1090v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1067 struct dentry *dentry) 1091 struct dentry *dentry)
@@ -1098,7 +1122,7 @@ clunk_fid:
1098 * @dir: inode destination for new link 1122 * @dir: inode destination for new link
1099 * @dentry: dentry for file 1123 * @dentry: dentry for file
1100 * @mode: mode for creation 1124 * @mode: mode for creation
1101 * @dev_t: device associated with special file 1125 * @rdev: device associated with special file
1102 * 1126 *
1103 */ 1127 */
1104 1128
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index a452ac67fc94..bf59c3960494 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -75,6 +75,7 @@ static int v9fs_set_super(struct super_block *s, void *data)
75 * v9fs_fill_super - populate superblock with info 75 * v9fs_fill_super - populate superblock with info
76 * @sb: superblock 76 * @sb: superblock
77 * @v9ses: session information 77 * @v9ses: session information
78 * @flags: flags propagated from v9fs_get_sb()
78 * 79 *
79 */ 80 */
80 81
@@ -127,29 +128,26 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
127 fid = v9fs_session_init(v9ses, dev_name, data); 128 fid = v9fs_session_init(v9ses, dev_name, data);
128 if (IS_ERR(fid)) { 129 if (IS_ERR(fid)) {
129 retval = PTR_ERR(fid); 130 retval = PTR_ERR(fid);
130 fid = NULL; 131 goto close_session;
131 kfree(v9ses);
132 v9ses = NULL;
133 goto error;
134 } 132 }
135 133
136 st = p9_client_stat(fid); 134 st = p9_client_stat(fid);
137 if (IS_ERR(st)) { 135 if (IS_ERR(st)) {
138 retval = PTR_ERR(st); 136 retval = PTR_ERR(st);
139 goto error; 137 goto clunk_fid;
140 } 138 }
141 139
142 sb = sget(fs_type, NULL, v9fs_set_super, v9ses); 140 sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
143 if (IS_ERR(sb)) { 141 if (IS_ERR(sb)) {
144 retval = PTR_ERR(sb); 142 retval = PTR_ERR(sb);
145 goto error; 143 goto free_stat;
146 } 144 }
147 v9fs_fill_super(sb, v9ses, flags); 145 v9fs_fill_super(sb, v9ses, flags);
148 146
149 inode = v9fs_get_inode(sb, S_IFDIR | mode); 147 inode = v9fs_get_inode(sb, S_IFDIR | mode);
150 if (IS_ERR(inode)) { 148 if (IS_ERR(inode)) {
151 retval = PTR_ERR(inode); 149 retval = PTR_ERR(inode);
152 goto error; 150 goto release_sb;
153 } 151 }
154 152
155 inode->i_uid = uid; 153 inode->i_uid = uid;
@@ -158,7 +156,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
158 root = d_alloc_root(inode); 156 root = d_alloc_root(inode);
159 if (!root) { 157 if (!root) {
160 retval = -ENOMEM; 158 retval = -ENOMEM;
161 goto error; 159 goto release_sb;
162 } 160 }
163 161
164 sb->s_root = root; 162 sb->s_root = root;
@@ -169,21 +167,22 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
169 167
170 return simple_set_mnt(mnt, sb); 168 return simple_set_mnt(mnt, sb);
171 169
172error: 170release_sb:
173 kfree(st);
174 if (fid)
175 p9_client_clunk(fid);
176
177 if (v9ses) {
178 v9fs_session_close(v9ses);
179 kfree(v9ses);
180 }
181
182 if (sb) { 171 if (sb) {
183 up_write(&sb->s_umount); 172 up_write(&sb->s_umount);
184 deactivate_super(sb); 173 deactivate_super(sb);
185 } 174 }
186 175
176free_stat:
177 kfree(st);
178
179clunk_fid:
180 p9_client_clunk(fid);
181
182close_session:
183 v9fs_session_close(v9ses);
184 kfree(v9ses);
185
187 return retval; 186 return retval;
188} 187}
189 188
diff --git a/fs/Kconfig b/fs/Kconfig
index 2e43d46f65d6..cf12c403b8c7 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1005,7 +1005,8 @@ config TMPFS_POSIX_ACL
1005 1005
1006config HUGETLBFS 1006config HUGETLBFS
1007 bool "HugeTLB file system support" 1007 bool "HugeTLB file system support"
1008 depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || BROKEN 1008 depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \
1009 (S390 && 64BIT) || BROKEN
1009 help 1010 help
1010 hugetlbfs is a filesystem backing for HugeTLB pages, based on 1011 hugetlbfs is a filesystem backing for HugeTLB pages, based on
1011 ramfs. For architectures that support it, say Y here and read 1012 ramfs. For architectures that support it, say Y here and read
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 853845abcca6..55e8ee1900a5 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -41,7 +41,7 @@ config BINFMT_ELF_FDPIC
41 It is also possible to run FDPIC ELF binaries on MMU linux also. 41 It is also possible to run FDPIC ELF binaries on MMU linux also.
42 42
43config BINFMT_FLAT 43config BINFMT_FLAT
44 tristate "Kernel support for flat binaries" 44 bool "Kernel support for flat binaries"
45 depends on !MMU 45 depends on !MMU
46 help 46 help
47 Support uClinux FLAT format binaries. 47 Support uClinux FLAT format binaries.
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 936f2af39c43..831157502d5a 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -75,7 +75,7 @@ extern unsigned int adfs_map_free(struct super_block *sb);
75/* Misc */ 75/* Misc */
76void __adfs_error(struct super_block *sb, const char *function, 76void __adfs_error(struct super_block *sb, const char *function,
77 const char *fmt, ...); 77 const char *fmt, ...);
78#define adfs_error(sb, fmt...) __adfs_error(sb, __FUNCTION__, fmt) 78#define adfs_error(sb, fmt...) __adfs_error(sb, __func__, fmt)
79 79
80/* super.c */ 80/* super.c */
81 81
diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c
index b9b2b27b68c3..ea7df2146921 100644
--- a/fs/adfs/dir_f.c
+++ b/fs/adfs/dir_f.c
@@ -122,9 +122,9 @@ adfs_dir_checkbyte(const struct adfs_dir *dir)
122 ptr.ptr8 = bufoff(bh, i); 122 ptr.ptr8 = bufoff(bh, i);
123 end.ptr8 = ptr.ptr8 + last - i; 123 end.ptr8 = ptr.ptr8 + last - i;
124 124
125 do 125 do {
126 dircheck = *ptr.ptr8++ ^ ror13(dircheck); 126 dircheck = *ptr.ptr8++ ^ ror13(dircheck);
127 while (ptr.ptr8 < end.ptr8); 127 } while (ptr.ptr8 < end.ptr8);
128 } 128 }
129 129
130 /* 130 /*
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index d5bd497ab9cb..223b1917093e 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -48,7 +48,7 @@ struct affs_ext_key {
48 * affs fs inode data in memory 48 * affs fs inode data in memory
49 */ 49 */
50struct affs_inode_info { 50struct affs_inode_info {
51 u32 i_opencnt; 51 atomic_t i_opencnt;
52 struct semaphore i_link_lock; /* Protects internal inode access. */ 52 struct semaphore i_link_lock; /* Protects internal inode access. */
53 struct semaphore i_ext_lock; /* Protects internal inode access. */ 53 struct semaphore i_ext_lock; /* Protects internal inode access. */
54#define i_hash_lock i_ext_lock 54#define i_hash_lock i_ext_lock
@@ -170,8 +170,6 @@ extern int affs_rename(struct inode *old_dir, struct dentry *old_dentry,
170extern unsigned long affs_parent_ino(struct inode *dir); 170extern unsigned long affs_parent_ino(struct inode *dir);
171extern struct inode *affs_new_inode(struct inode *dir); 171extern struct inode *affs_new_inode(struct inode *dir);
172extern int affs_notify_change(struct dentry *dentry, struct iattr *attr); 172extern int affs_notify_change(struct dentry *dentry, struct iattr *attr);
173extern void affs_put_inode(struct inode *inode);
174extern void affs_drop_inode(struct inode *inode);
175extern void affs_delete_inode(struct inode *inode); 173extern void affs_delete_inode(struct inode *inode);
176extern void affs_clear_inode(struct inode *inode); 174extern void affs_clear_inode(struct inode *inode);
177extern struct inode *affs_iget(struct super_block *sb, 175extern struct inode *affs_iget(struct super_block *sb,
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 6e0c9399200e..6eac7bdeec94 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -48,8 +48,9 @@ affs_file_open(struct inode *inode, struct file *filp)
48{ 48{
49 if (atomic_read(&filp->f_count) != 1) 49 if (atomic_read(&filp->f_count) != 1)
50 return 0; 50 return 0;
51 pr_debug("AFFS: open(%d)\n", AFFS_I(inode)->i_opencnt); 51 pr_debug("AFFS: open(%lu,%d)\n",
52 AFFS_I(inode)->i_opencnt++; 52 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
53 atomic_inc(&AFFS_I(inode)->i_opencnt);
53 return 0; 54 return 0;
54} 55}
55 56
@@ -58,10 +59,16 @@ affs_file_release(struct inode *inode, struct file *filp)
58{ 59{
59 if (atomic_read(&filp->f_count) != 0) 60 if (atomic_read(&filp->f_count) != 0)
60 return 0; 61 return 0;
61 pr_debug("AFFS: release(%d)\n", AFFS_I(inode)->i_opencnt); 62 pr_debug("AFFS: release(%lu, %d)\n",
62 AFFS_I(inode)->i_opencnt--; 63 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
63 if (!AFFS_I(inode)->i_opencnt) 64
65 if (atomic_dec_and_test(&AFFS_I(inode)->i_opencnt)) {
66 mutex_lock(&inode->i_mutex);
67 if (inode->i_size != AFFS_I(inode)->mmu_private)
68 affs_truncate(inode);
64 affs_free_prealloc(inode); 69 affs_free_prealloc(inode);
70 mutex_unlock(&inode->i_mutex);
71 }
65 72
66 return 0; 73 return 0;
67} 74}
@@ -180,7 +187,7 @@ affs_get_extblock(struct inode *inode, u32 ext)
180 /* inline the simplest case: same extended block as last time */ 187 /* inline the simplest case: same extended block as last time */
181 struct buffer_head *bh = AFFS_I(inode)->i_ext_bh; 188 struct buffer_head *bh = AFFS_I(inode)->i_ext_bh;
182 if (ext == AFFS_I(inode)->i_ext_last) 189 if (ext == AFFS_I(inode)->i_ext_last)
183 atomic_inc(&bh->b_count); 190 get_bh(bh);
184 else 191 else
185 /* we have to do more (not inlined) */ 192 /* we have to do more (not inlined) */
186 bh = affs_get_extblock_slow(inode, ext); 193 bh = affs_get_extblock_slow(inode, ext);
@@ -306,7 +313,7 @@ store_ext:
306 affs_brelse(AFFS_I(inode)->i_ext_bh); 313 affs_brelse(AFFS_I(inode)->i_ext_bh);
307 AFFS_I(inode)->i_ext_last = ext; 314 AFFS_I(inode)->i_ext_last = ext;
308 AFFS_I(inode)->i_ext_bh = bh; 315 AFFS_I(inode)->i_ext_bh = bh;
309 atomic_inc(&bh->b_count); 316 get_bh(bh);
310 317
311 return bh; 318 return bh;
312 319
@@ -324,9 +331,7 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul
324 331
325 pr_debug("AFFS: get_block(%u, %lu)\n", (u32)inode->i_ino, (unsigned long)block); 332 pr_debug("AFFS: get_block(%u, %lu)\n", (u32)inode->i_ino, (unsigned long)block);
326 333
327 334 BUG_ON(block > (sector_t)0x7fffffffUL);
328 if (block > (sector_t)0x7fffffffUL)
329 BUG();
330 335
331 if (block >= AFFS_I(inode)->i_blkcnt) { 336 if (block >= AFFS_I(inode)->i_blkcnt) {
332 if (block > AFFS_I(inode)->i_blkcnt || !create) 337 if (block > AFFS_I(inode)->i_blkcnt || !create)
@@ -493,8 +498,7 @@ affs_do_readpage_ofs(struct file *file, struct page *page, unsigned from, unsign
493 u32 tmp; 498 u32 tmp;
494 499
495 pr_debug("AFFS: read_page(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to); 500 pr_debug("AFFS: read_page(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to);
496 if (from > to || to > PAGE_CACHE_SIZE) 501 BUG_ON(from > to || to > PAGE_CACHE_SIZE);
497 BUG();
498 kmap(page); 502 kmap(page);
499 data = page_address(page); 503 data = page_address(page);
500 bsize = AFFS_SB(sb)->s_data_blksize; 504 bsize = AFFS_SB(sb)->s_data_blksize;
@@ -507,8 +511,7 @@ affs_do_readpage_ofs(struct file *file, struct page *page, unsigned from, unsign
507 if (IS_ERR(bh)) 511 if (IS_ERR(bh))
508 return PTR_ERR(bh); 512 return PTR_ERR(bh);
509 tmp = min(bsize - boff, to - from); 513 tmp = min(bsize - boff, to - from);
510 if (from + tmp > to || tmp > bsize) 514 BUG_ON(from + tmp > to || tmp > bsize);
511 BUG();
512 memcpy(data + from, AFFS_DATA(bh) + boff, tmp); 515 memcpy(data + from, AFFS_DATA(bh) + boff, tmp);
513 affs_brelse(bh); 516 affs_brelse(bh);
514 bidx++; 517 bidx++;
@@ -540,10 +543,9 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize)
540 if (IS_ERR(bh)) 543 if (IS_ERR(bh))
541 return PTR_ERR(bh); 544 return PTR_ERR(bh);
542 tmp = min(bsize - boff, newsize - size); 545 tmp = min(bsize - boff, newsize - size);
543 if (boff + tmp > bsize || tmp > bsize) 546 BUG_ON(boff + tmp > bsize || tmp > bsize);
544 BUG();
545 memset(AFFS_DATA(bh) + boff, 0, tmp); 547 memset(AFFS_DATA(bh) + boff, 0, tmp);
546 AFFS_DATA_HEAD(bh)->size = cpu_to_be32(be32_to_cpu(AFFS_DATA_HEAD(bh)->size) + tmp); 548 be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp);
547 affs_fix_checksum(sb, bh); 549 affs_fix_checksum(sb, bh);
548 mark_buffer_dirty_inode(bh, inode); 550 mark_buffer_dirty_inode(bh, inode);
549 size += tmp; 551 size += tmp;
@@ -560,8 +562,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize)
560 if (IS_ERR(bh)) 562 if (IS_ERR(bh))
561 goto out; 563 goto out;
562 tmp = min(bsize, newsize - size); 564 tmp = min(bsize, newsize - size);
563 if (tmp > bsize) 565 BUG_ON(tmp > bsize);
564 BUG();
565 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); 566 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
566 AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino); 567 AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino);
567 AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx); 568 AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx);
@@ -683,10 +684,9 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
683 if (IS_ERR(bh)) 684 if (IS_ERR(bh))
684 return PTR_ERR(bh); 685 return PTR_ERR(bh);
685 tmp = min(bsize - boff, to - from); 686 tmp = min(bsize - boff, to - from);
686 if (boff + tmp > bsize || tmp > bsize) 687 BUG_ON(boff + tmp > bsize || tmp > bsize);
687 BUG();
688 memcpy(AFFS_DATA(bh) + boff, data + from, tmp); 688 memcpy(AFFS_DATA(bh) + boff, data + from, tmp);
689 AFFS_DATA_HEAD(bh)->size = cpu_to_be32(be32_to_cpu(AFFS_DATA_HEAD(bh)->size) + tmp); 689 be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp);
690 affs_fix_checksum(sb, bh); 690 affs_fix_checksum(sb, bh);
691 mark_buffer_dirty_inode(bh, inode); 691 mark_buffer_dirty_inode(bh, inode);
692 written += tmp; 692 written += tmp;
@@ -732,8 +732,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
732 if (IS_ERR(bh)) 732 if (IS_ERR(bh))
733 goto out; 733 goto out;
734 tmp = min(bsize, to - from); 734 tmp = min(bsize, to - from);
735 if (tmp > bsize) 735 BUG_ON(tmp > bsize);
736 BUG();
737 memcpy(AFFS_DATA(bh), data + from, tmp); 736 memcpy(AFFS_DATA(bh), data + from, tmp);
738 if (buffer_new(bh)) { 737 if (buffer_new(bh)) {
739 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); 738 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
@@ -834,6 +833,8 @@ affs_truncate(struct inode *inode)
834 res = mapping->a_ops->write_begin(NULL, mapping, size, 0, 0, &page, &fsdata); 833 res = mapping->a_ops->write_begin(NULL, mapping, size, 0, 0, &page, &fsdata);
835 if (!res) 834 if (!res)
836 res = mapping->a_ops->write_end(NULL, mapping, size, 0, 0, page, fsdata); 835 res = mapping->a_ops->write_end(NULL, mapping, size, 0, 0, page, fsdata);
836 else
837 inode->i_size = AFFS_I(inode)->mmu_private;
837 mark_inode_dirty(inode); 838 mark_inode_dirty(inode);
838 return; 839 return;
839 } else if (inode->i_size == AFFS_I(inode)->mmu_private) 840 } else if (inode->i_size == AFFS_I(inode)->mmu_private)
@@ -869,6 +870,7 @@ affs_truncate(struct inode *inode)
869 blk++; 870 blk++;
870 } else 871 } else
871 AFFS_HEAD(ext_bh)->first_data = 0; 872 AFFS_HEAD(ext_bh)->first_data = 0;
873 AFFS_HEAD(ext_bh)->block_count = cpu_to_be32(i);
872 size = AFFS_SB(sb)->s_hashsize; 874 size = AFFS_SB(sb)->s_hashsize;
873 if (size > blkcnt - blk + i) 875 if (size > blkcnt - blk + i)
874 size = blkcnt - blk + i; 876 size = blkcnt - blk + i;
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 27fe6cbe43ae..a13b334a3910 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -58,7 +58,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
58 AFFS_I(inode)->i_extcnt = 1; 58 AFFS_I(inode)->i_extcnt = 1;
59 AFFS_I(inode)->i_ext_last = ~1; 59 AFFS_I(inode)->i_ext_last = ~1;
60 AFFS_I(inode)->i_protect = prot; 60 AFFS_I(inode)->i_protect = prot;
61 AFFS_I(inode)->i_opencnt = 0; 61 atomic_set(&AFFS_I(inode)->i_opencnt, 0);
62 AFFS_I(inode)->i_blkcnt = 0; 62 AFFS_I(inode)->i_blkcnt = 0;
63 AFFS_I(inode)->i_lc = NULL; 63 AFFS_I(inode)->i_lc = NULL;
64 AFFS_I(inode)->i_lc_size = 0; 64 AFFS_I(inode)->i_lc_size = 0;
@@ -108,8 +108,6 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
108 inode->i_mode |= S_IFDIR; 108 inode->i_mode |= S_IFDIR;
109 } else 109 } else
110 inode->i_mode = S_IRUGO | S_IXUGO | S_IWUSR | S_IFDIR; 110 inode->i_mode = S_IRUGO | S_IXUGO | S_IWUSR | S_IFDIR;
111 if (tail->link_chain)
112 inode->i_nlink = 2;
113 /* Maybe it should be controlled by mount parameter? */ 111 /* Maybe it should be controlled by mount parameter? */
114 //inode->i_mode |= S_ISVTX; 112 //inode->i_mode |= S_ISVTX;
115 inode->i_op = &affs_dir_inode_operations; 113 inode->i_op = &affs_dir_inode_operations;
@@ -245,31 +243,12 @@ out:
245} 243}
246 244
247void 245void
248affs_put_inode(struct inode *inode)
249{
250 pr_debug("AFFS: put_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
251 affs_free_prealloc(inode);
252}
253
254void
255affs_drop_inode(struct inode *inode)
256{
257 mutex_lock(&inode->i_mutex);
258 if (inode->i_size != AFFS_I(inode)->mmu_private)
259 affs_truncate(inode);
260 mutex_unlock(&inode->i_mutex);
261
262 generic_drop_inode(inode);
263}
264
265void
266affs_delete_inode(struct inode *inode) 246affs_delete_inode(struct inode *inode)
267{ 247{
268 pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); 248 pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
269 truncate_inode_pages(&inode->i_data, 0); 249 truncate_inode_pages(&inode->i_data, 0);
270 inode->i_size = 0; 250 inode->i_size = 0;
271 if (S_ISREG(inode->i_mode)) 251 affs_truncate(inode);
272 affs_truncate(inode);
273 clear_inode(inode); 252 clear_inode(inode);
274 affs_free_block(inode->i_sb, inode->i_ino); 253 affs_free_block(inode->i_sb, inode->i_ino);
275} 254}
@@ -277,9 +256,12 @@ affs_delete_inode(struct inode *inode)
277void 256void
278affs_clear_inode(struct inode *inode) 257affs_clear_inode(struct inode *inode)
279{ 258{
280 unsigned long cache_page = (unsigned long) AFFS_I(inode)->i_lc; 259 unsigned long cache_page;
281 260
282 pr_debug("AFFS: clear_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); 261 pr_debug("AFFS: clear_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
262
263 affs_free_prealloc(inode);
264 cache_page = (unsigned long)AFFS_I(inode)->i_lc;
283 if (cache_page) { 265 if (cache_page) {
284 pr_debug("AFFS: freeing ext cache\n"); 266 pr_debug("AFFS: freeing ext cache\n");
285 AFFS_I(inode)->i_lc = NULL; 267 AFFS_I(inode)->i_lc = NULL;
@@ -316,7 +298,7 @@ affs_new_inode(struct inode *dir)
316 inode->i_ino = block; 298 inode->i_ino = block;
317 inode->i_nlink = 1; 299 inode->i_nlink = 1;
318 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 300 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
319 AFFS_I(inode)->i_opencnt = 0; 301 atomic_set(&AFFS_I(inode)->i_opencnt, 0);
320 AFFS_I(inode)->i_blkcnt = 0; 302 AFFS_I(inode)->i_blkcnt = 0;
321 AFFS_I(inode)->i_lc = NULL; 303 AFFS_I(inode)->i_lc = NULL;
322 AFFS_I(inode)->i_lc_size = 0; 304 AFFS_I(inode)->i_lc_size = 0;
@@ -369,12 +351,12 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
369 switch (type) { 351 switch (type) {
370 case ST_LINKFILE: 352 case ST_LINKFILE:
371 case ST_LINKDIR: 353 case ST_LINKDIR:
372 inode_bh = bh;
373 retval = -ENOSPC; 354 retval = -ENOSPC;
374 block = affs_alloc_block(dir, dir->i_ino); 355 block = affs_alloc_block(dir, dir->i_ino);
375 if (!block) 356 if (!block)
376 goto err; 357 goto err;
377 retval = -EIO; 358 retval = -EIO;
359 inode_bh = bh;
378 bh = affs_getzeroblk(sb, block); 360 bh = affs_getzeroblk(sb, block);
379 if (!bh) 361 if (!bh)
380 goto err; 362 goto err;
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 2218f1ee71ce..cfcf1b6cf82b 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -234,7 +234,8 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
234int 234int
235affs_unlink(struct inode *dir, struct dentry *dentry) 235affs_unlink(struct inode *dir, struct dentry *dentry)
236{ 236{
237 pr_debug("AFFS: unlink(dir=%d, \"%.*s\")\n", (u32)dir->i_ino, 237 pr_debug("AFFS: unlink(dir=%d, %lu \"%.*s\")\n", (u32)dir->i_ino,
238 dentry->d_inode->i_ino,
238 (int)dentry->d_name.len, dentry->d_name.name); 239 (int)dentry->d_name.len, dentry->d_name.name);
239 240
240 return affs_remove_header(dentry); 241 return affs_remove_header(dentry);
@@ -302,7 +303,8 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
302int 303int
303affs_rmdir(struct inode *dir, struct dentry *dentry) 304affs_rmdir(struct inode *dir, struct dentry *dentry)
304{ 305{
305 pr_debug("AFFS: rmdir(dir=%u, \"%.*s\")\n", (u32)dir->i_ino, 306 pr_debug("AFFS: rmdir(dir=%u, %lu \"%.*s\")\n", (u32)dir->i_ino,
307 dentry->d_inode->i_ino,
306 (int)dentry->d_name.len, dentry->d_name.name); 308 (int)dentry->d_name.len, dentry->d_name.name);
307 309
308 return affs_remove_header(dentry); 310 return affs_remove_header(dentry);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index d2dc047cb479..d214837d5e42 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -71,12 +71,18 @@ static struct kmem_cache * affs_inode_cachep;
71 71
72static struct inode *affs_alloc_inode(struct super_block *sb) 72static struct inode *affs_alloc_inode(struct super_block *sb)
73{ 73{
74 struct affs_inode_info *ei; 74 struct affs_inode_info *i;
75 ei = (struct affs_inode_info *)kmem_cache_alloc(affs_inode_cachep, GFP_KERNEL); 75
76 if (!ei) 76 i = kmem_cache_alloc(affs_inode_cachep, GFP_KERNEL);
77 if (!i)
77 return NULL; 78 return NULL;
78 ei->vfs_inode.i_version = 1; 79
79 return &ei->vfs_inode; 80 i->vfs_inode.i_version = 1;
81 i->i_lc = NULL;
82 i->i_ext_bh = NULL;
83 i->i_pa_cnt = 0;
84
85 return &i->vfs_inode;
80} 86}
81 87
82static void affs_destroy_inode(struct inode *inode) 88static void affs_destroy_inode(struct inode *inode)
@@ -114,8 +120,6 @@ static const struct super_operations affs_sops = {
114 .alloc_inode = affs_alloc_inode, 120 .alloc_inode = affs_alloc_inode,
115 .destroy_inode = affs_destroy_inode, 121 .destroy_inode = affs_destroy_inode,
116 .write_inode = affs_write_inode, 122 .write_inode = affs_write_inode,
117 .put_inode = affs_put_inode,
118 .drop_inode = affs_drop_inode,
119 .delete_inode = affs_delete_inode, 123 .delete_inode = affs_delete_inode,
120 .clear_inode = affs_clear_inode, 124 .clear_inode = affs_clear_inode,
121 .put_super = affs_put_super, 125 .put_super = affs_put_super,
@@ -199,7 +203,6 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
199 case Opt_prefix: 203 case Opt_prefix:
200 /* Free any previous prefix */ 204 /* Free any previous prefix */
201 kfree(*prefix); 205 kfree(*prefix);
202 *prefix = NULL;
203 *prefix = match_strdup(&args[0]); 206 *prefix = match_strdup(&args[0]);
204 if (!*prefix) 207 if (!*prefix)
205 return 0; 208 return 0;
@@ -233,6 +236,8 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
233 break; 236 break;
234 case Opt_volume: { 237 case Opt_volume: {
235 char *vol = match_strdup(&args[0]); 238 char *vol = match_strdup(&args[0]);
239 if (!vol)
240 return 0;
236 strlcpy(volume, vol, 32); 241 strlcpy(volume, vol, 32);
237 kfree(vol); 242 kfree(vol);
238 break; 243 break;
diff --git a/fs/afs/afs_cm.h b/fs/afs/afs_cm.h
index 7b4d4fab4c80..255f5dd6040c 100644
--- a/fs/afs/afs_cm.h
+++ b/fs/afs/afs_cm.h
@@ -24,7 +24,8 @@ enum AFS_CM_Operations {
24 CBGetXStatsVersion = 209, /* get version of extended statistics */ 24 CBGetXStatsVersion = 209, /* get version of extended statistics */
25 CBGetXStats = 210, /* get contents of extended statistics data */ 25 CBGetXStats = 210, /* get contents of extended statistics data */
26 CBInitCallBackState3 = 213, /* initialise callback state, version 3 */ 26 CBInitCallBackState3 = 213, /* initialise callback state, version 3 */
27 CBGetCapabilities = 65538, /* get client capabilities */ 27 CBProbeUuid = 214, /* check the client hasn't rebooted */
28 CBTellMeAboutYourself = 65538, /* get client capabilities */
28}; 29};
29 30
30#define AFS_CAP_ERROR_TRANSLATION 0x1 31#define AFS_CAP_ERROR_TRANSLATION 0x1
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 584bb0f9c36a..5e1df14e16b1 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -20,7 +20,7 @@
20DECLARE_RWSEM(afs_proc_cells_sem); 20DECLARE_RWSEM(afs_proc_cells_sem);
21LIST_HEAD(afs_proc_cells); 21LIST_HEAD(afs_proc_cells);
22 22
23static struct list_head afs_cells = LIST_HEAD_INIT(afs_cells); 23static LIST_HEAD(afs_cells);
24static DEFINE_RWLOCK(afs_cells_lock); 24static DEFINE_RWLOCK(afs_cells_lock);
25static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */ 25static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */
26static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq); 26static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq);
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 47b71c8947f9..eb765489164f 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -26,8 +26,9 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *,
26 struct sk_buff *, bool); 26 struct sk_buff *, bool);
27static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool); 27static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool);
28static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool); 28static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool);
29static int afs_deliver_cb_get_capabilities(struct afs_call *, struct sk_buff *, 29static int afs_deliver_cb_probe_uuid(struct afs_call *, struct sk_buff *, bool);
30 bool); 30static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *,
31 struct sk_buff *, bool);
31static void afs_cm_destructor(struct afs_call *); 32static void afs_cm_destructor(struct afs_call *);
32 33
33/* 34/*
@@ -71,11 +72,21 @@ static const struct afs_call_type afs_SRXCBProbe = {
71}; 72};
72 73
73/* 74/*
74 * CB.GetCapabilities operation type 75 * CB.ProbeUuid operation type
75 */ 76 */
76static const struct afs_call_type afs_SRXCBGetCapabilites = { 77static const struct afs_call_type afs_SRXCBProbeUuid = {
77 .name = "CB.GetCapabilities", 78 .name = "CB.ProbeUuid",
78 .deliver = afs_deliver_cb_get_capabilities, 79 .deliver = afs_deliver_cb_probe_uuid,
80 .abort_to_error = afs_abort_to_error,
81 .destructor = afs_cm_destructor,
82};
83
84/*
85 * CB.TellMeAboutYourself operation type
86 */
87static const struct afs_call_type afs_SRXCBTellMeAboutYourself = {
88 .name = "CB.TellMeAboutYourself",
89 .deliver = afs_deliver_cb_tell_me_about_yourself,
79 .abort_to_error = afs_abort_to_error, 90 .abort_to_error = afs_abort_to_error,
80 .destructor = afs_cm_destructor, 91 .destructor = afs_cm_destructor,
81}; 92};
@@ -103,8 +114,8 @@ bool afs_cm_incoming_call(struct afs_call *call)
103 case CBProbe: 114 case CBProbe:
104 call->type = &afs_SRXCBProbe; 115 call->type = &afs_SRXCBProbe;
105 return true; 116 return true;
106 case CBGetCapabilities: 117 case CBTellMeAboutYourself:
107 call->type = &afs_SRXCBGetCapabilites; 118 call->type = &afs_SRXCBTellMeAboutYourself;
108 return true; 119 return true;
109 default: 120 default:
110 return false; 121 return false;
@@ -393,9 +404,105 @@ static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb,
393} 404}
394 405
395/* 406/*
407 * allow the fileserver to quickly find out if the fileserver has been rebooted
408 */
409static void SRXAFSCB_ProbeUuid(struct work_struct *work)
410{
411 struct afs_call *call = container_of(work, struct afs_call, work);
412 struct afs_uuid *r = call->request;
413
414 struct {
415 __be32 match;
416 } reply;
417
418 _enter("");
419
420
421 if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0)
422 reply.match = htonl(0);
423 else
424 reply.match = htonl(1);
425
426 afs_send_simple_reply(call, &reply, sizeof(reply));
427 _leave("");
428}
429
430/*
431 * deliver request data to a CB.ProbeUuid call
432 */
433static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
434 bool last)
435{
436 struct afs_uuid *r;
437 unsigned loop;
438 __be32 *b;
439 int ret;
440
441 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
442
443 if (skb->len > 0)
444 return -EBADMSG;
445 if (!last)
446 return 0;
447
448 switch (call->unmarshall) {
449 case 0:
450 call->offset = 0;
451 call->buffer = kmalloc(11 * sizeof(__be32), GFP_KERNEL);
452 if (!call->buffer)
453 return -ENOMEM;
454 call->unmarshall++;
455
456 case 1:
457 _debug("extract UUID");
458 ret = afs_extract_data(call, skb, last, call->buffer,
459 11 * sizeof(__be32));
460 switch (ret) {
461 case 0: break;
462 case -EAGAIN: return 0;
463 default: return ret;
464 }
465
466 _debug("unmarshall UUID");
467 call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
468 if (!call->request)
469 return -ENOMEM;
470
471 b = call->buffer;
472 r = call->request;
473 r->time_low = ntohl(b[0]);
474 r->time_mid = ntohl(b[1]);
475 r->time_hi_and_version = ntohl(b[2]);
476 r->clock_seq_hi_and_reserved = ntohl(b[3]);
477 r->clock_seq_low = ntohl(b[4]);
478
479 for (loop = 0; loop < 6; loop++)
480 r->node[loop] = ntohl(b[loop + 5]);
481
482 call->offset = 0;
483 call->unmarshall++;
484
485 case 2:
486 _debug("trailer");
487 if (skb->len != 0)
488 return -EBADMSG;
489 break;
490 }
491
492 if (!last)
493 return 0;
494
495 call->state = AFS_CALL_REPLYING;
496
497 INIT_WORK(&call->work, SRXAFSCB_ProbeUuid);
498 schedule_work(&call->work);
499 return 0;
500}
501
502/*
396 * allow the fileserver to ask about the cache manager's capabilities 503 * allow the fileserver to ask about the cache manager's capabilities
397 */ 504 */
398static void SRXAFSCB_GetCapabilities(struct work_struct *work) 505static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
399{ 506{
400 struct afs_interface *ifs; 507 struct afs_interface *ifs;
401 struct afs_call *call = container_of(work, struct afs_call, work); 508 struct afs_call *call = container_of(work, struct afs_call, work);
@@ -456,10 +563,10 @@ static void SRXAFSCB_GetCapabilities(struct work_struct *work)
456} 563}
457 564
458/* 565/*
459 * deliver request data to a CB.GetCapabilities call 566 * deliver request data to a CB.TellMeAboutYourself call
460 */ 567 */
461static int afs_deliver_cb_get_capabilities(struct afs_call *call, 568static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call,
462 struct sk_buff *skb, bool last) 569 struct sk_buff *skb, bool last)
463{ 570{
464 _enter(",{%u},%d", skb->len, last); 571 _enter(",{%u},%d", skb->len, last);
465 572
@@ -471,7 +578,7 @@ static int afs_deliver_cb_get_capabilities(struct afs_call *call,
471 /* no unmarshalling required */ 578 /* no unmarshalling required */
472 call->state = AFS_CALL_REPLYING; 579 call->state = AFS_CALL_REPLYING;
473 580
474 INIT_WORK(&call->work, SRXAFSCB_GetCapabilities); 581 INIT_WORK(&call->work, SRXAFSCB_TellMeAboutYourself);
475 schedule_work(&call->work); 582 schedule_work(&call->work);
476 return 0; 583 return 0;
477} 584}
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index b58af8f18bc4..dfda03d4397d 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -140,7 +140,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
140 140
141 if (page->index == 0 && qty != ntohs(dbuf->blocks[0].pagehdr.npages)) { 141 if (page->index == 0 && qty != ntohs(dbuf->blocks[0].pagehdr.npages)) {
142 printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n", 142 printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n",
143 __FUNCTION__, dir->i_ino, qty, 143 __func__, dir->i_ino, qty,
144 ntohs(dbuf->blocks[0].pagehdr.npages)); 144 ntohs(dbuf->blocks[0].pagehdr.npages));
145 goto error; 145 goto error;
146 } 146 }
@@ -159,7 +159,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
159 for (tmp = 0; tmp < qty; tmp++) { 159 for (tmp = 0; tmp < qty; tmp++) {
160 if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) { 160 if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) {
161 printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n", 161 printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n",
162 __FUNCTION__, dir->i_ino, tmp, qty, 162 __func__, dir->i_ino, tmp, qty,
163 ntohs(dbuf->blocks[tmp].pagehdr.magic)); 163 ntohs(dbuf->blocks[tmp].pagehdr.magic));
164 goto error; 164 goto error;
165 } 165 }
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index eec41c76de72..7102824ba847 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -757,8 +757,8 @@ void _dbprintk(const char *fmt, ...)
757{ 757{
758} 758}
759 759
760#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__) 760#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
761#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__) 761#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
762#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__) 762#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__)
763 763
764 764
@@ -791,8 +791,8 @@ do { \
791} while (0) 791} while (0)
792 792
793#else 793#else
794#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__) 794#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
795#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__) 795#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
796#define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__) 796#define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__)
797#endif 797#endif
798 798
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 846c7615ac9e..9f7d1ae70269 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -41,6 +41,7 @@ static const struct file_operations afs_proc_cells_fops = {
41 .write = afs_proc_cells_write, 41 .write = afs_proc_cells_write,
42 .llseek = seq_lseek, 42 .llseek = seq_lseek,
43 .release = seq_release, 43 .release = seq_release,
44 .owner = THIS_MODULE,
44}; 45};
45 46
46static int afs_proc_rootcell_open(struct inode *inode, struct file *file); 47static int afs_proc_rootcell_open(struct inode *inode, struct file *file);
@@ -56,7 +57,8 @@ static const struct file_operations afs_proc_rootcell_fops = {
56 .read = afs_proc_rootcell_read, 57 .read = afs_proc_rootcell_read,
57 .write = afs_proc_rootcell_write, 58 .write = afs_proc_rootcell_write,
58 .llseek = no_llseek, 59 .llseek = no_llseek,
59 .release = afs_proc_rootcell_release 60 .release = afs_proc_rootcell_release,
61 .owner = THIS_MODULE,
60}; 62};
61 63
62static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file); 64static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file);
@@ -80,6 +82,7 @@ static const struct file_operations afs_proc_cell_volumes_fops = {
80 .read = seq_read, 82 .read = seq_read,
81 .llseek = seq_lseek, 83 .llseek = seq_lseek,
82 .release = afs_proc_cell_volumes_release, 84 .release = afs_proc_cell_volumes_release,
85 .owner = THIS_MODULE,
83}; 86};
84 87
85static int afs_proc_cell_vlservers_open(struct inode *inode, 88static int afs_proc_cell_vlservers_open(struct inode *inode,
@@ -104,6 +107,7 @@ static const struct file_operations afs_proc_cell_vlservers_fops = {
104 .read = seq_read, 107 .read = seq_read,
105 .llseek = seq_lseek, 108 .llseek = seq_lseek,
106 .release = afs_proc_cell_vlservers_release, 109 .release = afs_proc_cell_vlservers_release,
110 .owner = THIS_MODULE,
107}; 111};
108 112
109static int afs_proc_cell_servers_open(struct inode *inode, struct file *file); 113static int afs_proc_cell_servers_open(struct inode *inode, struct file *file);
@@ -127,6 +131,7 @@ static const struct file_operations afs_proc_cell_servers_fops = {
127 .read = seq_read, 131 .read = seq_read,
128 .llseek = seq_lseek, 132 .llseek = seq_lseek,
129 .release = afs_proc_cell_servers_release, 133 .release = afs_proc_cell_servers_release,
134 .owner = THIS_MODULE,
130}; 135};
131 136
132/* 137/*
@@ -143,17 +148,13 @@ int afs_proc_init(void)
143 goto error_dir; 148 goto error_dir;
144 proc_afs->owner = THIS_MODULE; 149 proc_afs->owner = THIS_MODULE;
145 150
146 p = create_proc_entry("cells", 0, proc_afs); 151 p = proc_create("cells", 0, proc_afs, &afs_proc_cells_fops);
147 if (!p) 152 if (!p)
148 goto error_cells; 153 goto error_cells;
149 p->proc_fops = &afs_proc_cells_fops;
150 p->owner = THIS_MODULE;
151 154
152 p = create_proc_entry("rootcell", 0, proc_afs); 155 p = proc_create("rootcell", 0, proc_afs, &afs_proc_rootcell_fops);
153 if (!p) 156 if (!p)
154 goto error_rootcell; 157 goto error_rootcell;
155 p->proc_fops = &afs_proc_rootcell_fops;
156 p->owner = THIS_MODULE;
157 158
158 _leave(" = 0"); 159 _leave(" = 0");
159 return 0; 160 return 0;
@@ -395,26 +396,20 @@ int afs_proc_cell_setup(struct afs_cell *cell)
395 if (!cell->proc_dir) 396 if (!cell->proc_dir)
396 goto error_dir; 397 goto error_dir;
397 398
398 p = create_proc_entry("servers", 0, cell->proc_dir); 399 p = proc_create_data("servers", 0, cell->proc_dir,
400 &afs_proc_cell_servers_fops, cell);
399 if (!p) 401 if (!p)
400 goto error_servers; 402 goto error_servers;
401 p->proc_fops = &afs_proc_cell_servers_fops;
402 p->owner = THIS_MODULE;
403 p->data = cell;
404 403
405 p = create_proc_entry("vlservers", 0, cell->proc_dir); 404 p = proc_create_data("vlservers", 0, cell->proc_dir,
405 &afs_proc_cell_vlservers_fops, cell);
406 if (!p) 406 if (!p)
407 goto error_vlservers; 407 goto error_vlservers;
408 p->proc_fops = &afs_proc_cell_vlservers_fops;
409 p->owner = THIS_MODULE;
410 p->data = cell;
411 408
412 p = create_proc_entry("volumes", 0, cell->proc_dir); 409 p = proc_create_data("volumes", 0, cell->proc_dir,
410 &afs_proc_cell_volumes_fops, cell);
413 if (!p) 411 if (!p)
414 goto error_volumes; 412 goto error_volumes;
415 p->proc_fops = &afs_proc_cell_volumes_fops;
416 p->owner = THIS_MODULE;
417 p->data = cell;
418 413
419 _leave(" = 0"); 414 _leave(" = 0");
420 return 0; 415 return 0;
diff --git a/fs/aio.c b/fs/aio.c
index ae94e1dea266..b5253e77eb2f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -191,6 +191,43 @@ static int aio_setup_ring(struct kioctx *ctx)
191 kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \ 191 kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \
192} while(0) 192} while(0)
193 193
194
195/* __put_ioctx
196 * Called when the last user of an aio context has gone away,
197 * and the struct needs to be freed.
198 */
199static void __put_ioctx(struct kioctx *ctx)
200{
201 unsigned nr_events = ctx->max_reqs;
202
203 BUG_ON(ctx->reqs_active);
204
205 cancel_delayed_work(&ctx->wq);
206 cancel_work_sync(&ctx->wq.work);
207 aio_free_ring(ctx);
208 mmdrop(ctx->mm);
209 ctx->mm = NULL;
210 pr_debug("__put_ioctx: freeing %p\n", ctx);
211 kmem_cache_free(kioctx_cachep, ctx);
212
213 if (nr_events) {
214 spin_lock(&aio_nr_lock);
215 BUG_ON(aio_nr - nr_events > aio_nr);
216 aio_nr -= nr_events;
217 spin_unlock(&aio_nr_lock);
218 }
219}
220
221#define get_ioctx(kioctx) do { \
222 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \
223 atomic_inc(&(kioctx)->users); \
224} while (0)
225#define put_ioctx(kioctx) do { \
226 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \
227 if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \
228 __put_ioctx(kioctx); \
229} while (0)
230
194/* ioctx_alloc 231/* ioctx_alloc
195 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. 232 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
196 */ 233 */
@@ -240,7 +277,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
240 if (ctx->max_reqs == 0) 277 if (ctx->max_reqs == 0)
241 goto out_cleanup; 278 goto out_cleanup;
242 279
243 /* now link into global list. kludge. FIXME */ 280 /* now link into global list. */
244 write_lock(&mm->ioctx_list_lock); 281 write_lock(&mm->ioctx_list_lock);
245 ctx->next = mm->ioctx_list; 282 ctx->next = mm->ioctx_list;
246 mm->ioctx_list = ctx; 283 mm->ioctx_list = ctx;
@@ -361,32 +398,6 @@ void exit_aio(struct mm_struct *mm)
361 } 398 }
362} 399}
363 400
364/* __put_ioctx
365 * Called when the last user of an aio context has gone away,
366 * and the struct needs to be freed.
367 */
368void __put_ioctx(struct kioctx *ctx)
369{
370 unsigned nr_events = ctx->max_reqs;
371
372 BUG_ON(ctx->reqs_active);
373
374 cancel_delayed_work(&ctx->wq);
375 cancel_work_sync(&ctx->wq.work);
376 aio_free_ring(ctx);
377 mmdrop(ctx->mm);
378 ctx->mm = NULL;
379 pr_debug("__put_ioctx: freeing %p\n", ctx);
380 kmem_cache_free(kioctx_cachep, ctx);
381
382 if (nr_events) {
383 spin_lock(&aio_nr_lock);
384 BUG_ON(aio_nr - nr_events > aio_nr);
385 aio_nr -= nr_events;
386 spin_unlock(&aio_nr_lock);
387 }
388}
389
390/* aio_get_req 401/* aio_get_req
391 * Allocate a slot for an aio request. Increments the users count 402 * Allocate a slot for an aio request. Increments the users count
392 * of the kioctx so that the kioctx stays around until all requests are 403 * of the kioctx so that the kioctx stays around until all requests are
@@ -542,10 +553,7 @@ int aio_put_req(struct kiocb *req)
542 return ret; 553 return ret;
543} 554}
544 555
545/* Lookup an ioctx id. ioctx_list is lockless for reads. 556static struct kioctx *lookup_ioctx(unsigned long ctx_id)
546 * FIXME: this is O(n) and is only suitable for development.
547 */
548struct kioctx *lookup_ioctx(unsigned long ctx_id)
549{ 557{
550 struct kioctx *ioctx; 558 struct kioctx *ioctx;
551 struct mm_struct *mm; 559 struct mm_struct *mm;
@@ -1070,9 +1078,7 @@ static void timeout_func(unsigned long data)
1070 1078
1071static inline void init_timeout(struct aio_timeout *to) 1079static inline void init_timeout(struct aio_timeout *to)
1072{ 1080{
1073 init_timer(&to->timer); 1081 setup_timer_on_stack(&to->timer, timeout_func, (unsigned long) to);
1074 to->timer.data = (unsigned long)to;
1075 to->timer.function = timeout_func;
1076 to->timed_out = 0; 1082 to->timed_out = 0;
1077 to->p = current; 1083 to->p = current;
1078} 1084}
@@ -1205,6 +1211,7 @@ retry:
1205 if (timeout) 1211 if (timeout)
1206 clear_timeout(&to); 1212 clear_timeout(&to);
1207out: 1213out:
1214 destroy_timer_on_stack(&to.timer);
1208 return i ? i : ret; 1215 return i ? i : ret;
1209} 1216}
1210 1217
@@ -1552,7 +1559,7 @@ static int aio_wake_function(wait_queue_t *wait, unsigned mode,
1552 return 1; 1559 return 1;
1553} 1560}
1554 1561
1555int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1562static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1556 struct iocb *iocb) 1563 struct iocb *iocb)
1557{ 1564{
1558 struct kiocb *req; 1565 struct kiocb *req;
@@ -1593,7 +1600,7 @@ int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1593 * event using the eventfd_signal() function. 1600 * event using the eventfd_signal() function.
1594 */ 1601 */
1595 req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd); 1602 req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd);
1596 if (unlikely(IS_ERR(req->ki_eventfd))) { 1603 if (IS_ERR(req->ki_eventfd)) {
1597 ret = PTR_ERR(req->ki_eventfd); 1604 ret = PTR_ERR(req->ki_eventfd);
1598 goto out_put_req; 1605 goto out_put_req;
1599 } 1606 }
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index f42be069e085..977ef208c051 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -57,9 +57,6 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
57 * anonymous inode, and a dentry that describe the "class" 57 * anonymous inode, and a dentry that describe the "class"
58 * of the file 58 * of the file
59 * 59 *
60 * @pfd: [out] pointer to the file descriptor
61 * @dpinode: [out] pointer to the inode
62 * @pfile: [out] pointer to the file struct
63 * @name: [in] name of the "class" of the new file 60 * @name: [in] name of the "class" of the new file
64 * @fops [in] file operations for the new file 61 * @fops [in] file operations for the new file
65 * @priv [in] private data for the new file (will be file's private_data) 62 * @priv [in] private data for the new file (will be file's private_data)
@@ -68,10 +65,9 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
68 * that do not need to have a full-fledged inode in order to operate correctly. 65 * that do not need to have a full-fledged inode in order to operate correctly.
69 * All the files created with anon_inode_getfd() will share a single inode, 66 * All the files created with anon_inode_getfd() will share a single inode,
70 * hence saving memory and avoiding code duplication for the file/inode/dentry 67 * hence saving memory and avoiding code duplication for the file/inode/dentry
71 * setup. 68 * setup. Returns new descriptor or -error.
72 */ 69 */
73int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile, 70int anon_inode_getfd(const char *name, const struct file_operations *fops,
74 const char *name, const struct file_operations *fops,
75 void *priv) 71 void *priv)
76{ 72{
77 struct qstr this; 73 struct qstr this;
@@ -125,10 +121,7 @@ int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile,
125 121
126 fd_install(fd, file); 122 fd_install(fd, file);
127 123
128 *pfd = fd; 124 return fd;
129 *pinode = anon_inode_inode;
130 *pfile = file;
131 return 0;
132 125
133err_dput: 126err_dput:
134 dput(dentry); 127 dput(dentry);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 2d4ae40718d9..c3d352d7fa93 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -35,7 +35,7 @@
35/* #define DEBUG */ 35/* #define DEBUG */
36 36
37#ifdef DEBUG 37#ifdef DEBUG
38#define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __FUNCTION__ , ##args); } while(0) 38#define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __func__ , ##args); } while(0)
39#else 39#else
40#define DPRINTK(fmt,args...) do {} while(0) 40#define DPRINTK(fmt,args...) do {} while(0)
41#endif 41#endif
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index d96e5c14a9ca..894fee54d4d8 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -73,8 +73,8 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
73 status = 0; 73 status = 0;
74done: 74done:
75 DPRINTK("returning = %d", status); 75 DPRINTK("returning = %d", status);
76 mntput(mnt);
77 dput(dentry); 76 dput(dentry);
77 mntput(mnt);
78 return status; 78 return status;
79} 79}
80 80
@@ -333,7 +333,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
333 /* Can we expire this guy */ 333 /* Can we expire this guy */
334 if (autofs4_can_expire(dentry, timeout, do_now)) { 334 if (autofs4_can_expire(dentry, timeout, do_now)) {
335 expired = dentry; 335 expired = dentry;
336 break; 336 goto found;
337 } 337 }
338 goto next; 338 goto next;
339 } 339 }
@@ -352,7 +352,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
352 inf->flags |= AUTOFS_INF_EXPIRING; 352 inf->flags |= AUTOFS_INF_EXPIRING;
353 spin_unlock(&sbi->fs_lock); 353 spin_unlock(&sbi->fs_lock);
354 expired = dentry; 354 expired = dentry;
355 break; 355 goto found;
356 } 356 }
357 spin_unlock(&sbi->fs_lock); 357 spin_unlock(&sbi->fs_lock);
358 /* 358 /*
@@ -363,7 +363,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
363 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); 363 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
364 if (expired) { 364 if (expired) {
365 dput(dentry); 365 dput(dentry);
366 break; 366 goto found;
367 } 367 }
368 } 368 }
369next: 369next:
@@ -371,18 +371,16 @@ next:
371 spin_lock(&dcache_lock); 371 spin_lock(&dcache_lock);
372 next = next->next; 372 next = next->next;
373 } 373 }
374
375 if (expired) {
376 DPRINTK("returning %p %.*s",
377 expired, (int)expired->d_name.len, expired->d_name.name);
378 spin_lock(&dcache_lock);
379 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
380 spin_unlock(&dcache_lock);
381 return expired;
382 }
383 spin_unlock(&dcache_lock); 374 spin_unlock(&dcache_lock);
384
385 return NULL; 375 return NULL;
376
377found:
378 DPRINTK("returning %p %.*s",
379 expired, (int)expired->d_name.len, expired->d_name.name);
380 spin_lock(&dcache_lock);
381 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
382 spin_unlock(&dcache_lock);
383 return expired;
386} 384}
387 385
388/* Perform an expiry operation */ 386/* Perform an expiry operation */
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index a54a946a50ae..edf5b6bddb52 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -146,17 +146,17 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
146 146
147 if (d_mountpoint(dentry)) { 147 if (d_mountpoint(dentry)) {
148 struct file *fp = NULL; 148 struct file *fp = NULL;
149 struct vfsmount *fp_mnt = mntget(mnt); 149 struct path fp_path = { .dentry = dentry, .mnt = mnt };
150 struct dentry *fp_dentry = dget(dentry);
151 150
152 if (!autofs4_follow_mount(&fp_mnt, &fp_dentry)) { 151 path_get(&fp_path);
153 dput(fp_dentry); 152
154 mntput(fp_mnt); 153 if (!autofs4_follow_mount(&fp_path.mnt, &fp_path.dentry)) {
154 path_put(&fp_path);
155 dcache_dir_close(inode, file); 155 dcache_dir_close(inode, file);
156 goto out; 156 goto out;
157 } 157 }
158 158
159 fp = dentry_open(fp_dentry, fp_mnt, file->f_flags); 159 fp = dentry_open(fp_path.dentry, fp_path.mnt, file->f_flags);
160 status = PTR_ERR(fp); 160 status = PTR_ERR(fp);
161 if (IS_ERR(fp)) { 161 if (IS_ERR(fp)) {
162 dcache_dir_close(inode, file); 162 dcache_dir_close(inode, file);
@@ -242,7 +242,8 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
242{ 242{
243 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 243 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
244 struct autofs_info *ino = autofs4_dentry_ino(dentry); 244 struct autofs_info *ino = autofs4_dentry_ino(dentry);
245 int status = 0; 245 struct dentry *new;
246 int status;
246 247
247 /* Block on any pending expiry here; invalidate the dentry 248 /* Block on any pending expiry here; invalidate the dentry
248 when expiration is done to trigger mount request with a new 249 when expiration is done to trigger mount request with a new
@@ -318,7 +319,28 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
318 spin_lock(&dentry->d_lock); 319 spin_lock(&dentry->d_lock);
319 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; 320 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
320 spin_unlock(&dentry->d_lock); 321 spin_unlock(&dentry->d_lock);
321 return status; 322
323 /*
324 * The dentry that is passed in from lookup may not be the one
325 * we end up using, as mkdir can create a new one. If this
326 * happens, and another process tries the lookup at the same time,
327 * it will set the PENDING flag on this new dentry, but add itself
328 * to our waitq. Then, if after the lookup succeeds, the first
329 * process that requested the mount performs another lookup of the
330 * same directory, it will show up as still pending! So, we need
331 * to redo the lookup here and clear pending on that dentry.
332 */
333 if (d_unhashed(dentry)) {
334 new = d_lookup(dentry->d_parent, &dentry->d_name);
335 if (new) {
336 spin_lock(&new->d_lock);
337 new->d_flags &= ~DCACHE_AUTOFS_PENDING;
338 spin_unlock(&new->d_lock);
339 dput(new);
340 }
341 }
342
343 return 0;
322} 344}
323 345
324/* For autofs direct mounts the follow link triggers the mount */ 346/* For autofs direct mounts the follow link triggers the mount */
@@ -533,9 +555,9 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct
533 goto next; 555 goto next;
534 556
535 if (d_unhashed(dentry)) { 557 if (d_unhashed(dentry)) {
536 struct autofs_info *ino = autofs4_dentry_ino(dentry);
537 struct inode *inode = dentry->d_inode; 558 struct inode *inode = dentry->d_inode;
538 559
560 ino = autofs4_dentry_ino(dentry);
539 list_del_init(&ino->rehash); 561 list_del_init(&ino->rehash);
540 dget(dentry); 562 dget(dentry);
541 /* 563 /*
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 1fe28e4754c2..75e5955c3f6d 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -171,7 +171,7 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
171 for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) 171 for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
172 len += tmp->d_name.len + 1; 172 len += tmp->d_name.len + 1;
173 173
174 if (--len > NAME_MAX) { 174 if (!len || --len > NAME_MAX) {
175 spin_unlock(&dcache_lock); 175 spin_unlock(&dcache_lock);
176 return 0; 176 return 0;
177 } 177 }
diff --git a/fs/befs/endian.h b/fs/befs/endian.h
index e254a20869f4..6cb84d896d05 100644
--- a/fs/befs/endian.h
+++ b/fs/befs/endian.h
@@ -9,7 +9,7 @@
9#ifndef LINUX_BEFS_ENDIAN 9#ifndef LINUX_BEFS_ENDIAN
10#define LINUX_BEFS_ENDIAN 10#define LINUX_BEFS_ENDIAN
11 11
12#include <linux/byteorder/generic.h> 12#include <asm/byteorder.h>
13 13
14static inline u64 14static inline u64
15fs64_to_cpu(const struct super_block *sb, fs64 n) 15fs64_to_cpu(const struct super_block *sb, fs64 n)
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 82123ff3e1dd..e8717de3bab3 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -489,9 +489,9 @@ static void befs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
489{ 489{
490 befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); 490 befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
491 if (befs_ino->i_flags & BEFS_LONG_SYMLINK) { 491 if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
492 char *p = nd_get_link(nd); 492 char *link = nd_get_link(nd);
493 if (!IS_ERR(p)) 493 if (!IS_ERR(link))
494 kfree(p); 494 kfree(link);
495 } 495 }
496} 496}
497 497
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 71faf4d23908..70f5d3a8eede 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -42,7 +42,7 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode)
42 42
43 43
44#define printf(format, args...) \ 44#define printf(format, args...) \
45 printk(KERN_ERR "BFS-fs: %s(): " format, __FUNCTION__, ## args) 45 printk(KERN_ERR "BFS-fs: %s(): " format, __func__, ## args)
46 46
47/* inode.c */ 47/* inode.c */
48extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino); 48extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index a1bb2244cac7..ba4cddb92f1d 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -372,21 +372,17 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
372 372
373 flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data); 373 flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data);
374 } else { 374 } else {
375 static unsigned long error_time, error_time2;
376 if ((ex.a_text & 0xfff || ex.a_data & 0xfff) && 375 if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
377 (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ) 376 (N_MAGIC(ex) != NMAGIC) && printk_ratelimit())
378 { 377 {
379 printk(KERN_NOTICE "executable not page aligned\n"); 378 printk(KERN_NOTICE "executable not page aligned\n");
380 error_time2 = jiffies;
381 } 379 }
382 380
383 if ((fd_offset & ~PAGE_MASK) != 0 && 381 if ((fd_offset & ~PAGE_MASK) != 0 && printk_ratelimit())
384 (jiffies-error_time) > 5*HZ)
385 { 382 {
386 printk(KERN_WARNING 383 printk(KERN_WARNING
387 "fd_offset is not page aligned. Please convert program: %s\n", 384 "fd_offset is not page aligned. Please convert program: %s\n",
388 bprm->file->f_path.dentry->d_name.name); 385 bprm->file->f_path.dentry->d_name.name);
389 error_time = jiffies;
390 } 386 }
391 387
392 if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { 388 if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
@@ -495,15 +491,13 @@ static int load_aout_library(struct file *file)
495 start_addr = ex.a_entry & 0xfffff000; 491 start_addr = ex.a_entry & 0xfffff000;
496 492
497 if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) { 493 if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
498 static unsigned long error_time;
499 loff_t pos = N_TXTOFF(ex); 494 loff_t pos = N_TXTOFF(ex);
500 495
501 if ((jiffies-error_time) > 5*HZ) 496 if (printk_ratelimit())
502 { 497 {
503 printk(KERN_WARNING 498 printk(KERN_WARNING
504 "N_TXTOFF is not page aligned. Please convert library: %s\n", 499 "N_TXTOFF is not page aligned. Please convert library: %s\n",
505 file->f_path.dentry->d_name.name); 500 file->f_path.dentry->d_name.name);
506 error_time = jiffies;
507 } 501 }
508 down_write(&current->mm->mmap_sem); 502 down_write(&current->mm->mmap_sem);
509 do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); 503 do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 9924581df6f6..b25707fee2cc 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1255,26 +1255,23 @@ static int writenote(struct memelfnote *men, struct file *file,
1255static void fill_elf_header(struct elfhdr *elf, int segs, 1255static void fill_elf_header(struct elfhdr *elf, int segs,
1256 u16 machine, u32 flags, u8 osabi) 1256 u16 machine, u32 flags, u8 osabi)
1257{ 1257{
1258 memset(elf, 0, sizeof(*elf));
1259
1258 memcpy(elf->e_ident, ELFMAG, SELFMAG); 1260 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1259 elf->e_ident[EI_CLASS] = ELF_CLASS; 1261 elf->e_ident[EI_CLASS] = ELF_CLASS;
1260 elf->e_ident[EI_DATA] = ELF_DATA; 1262 elf->e_ident[EI_DATA] = ELF_DATA;
1261 elf->e_ident[EI_VERSION] = EV_CURRENT; 1263 elf->e_ident[EI_VERSION] = EV_CURRENT;
1262 elf->e_ident[EI_OSABI] = ELF_OSABI; 1264 elf->e_ident[EI_OSABI] = ELF_OSABI;
1263 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1264 1265
1265 elf->e_type = ET_CORE; 1266 elf->e_type = ET_CORE;
1266 elf->e_machine = machine; 1267 elf->e_machine = machine;
1267 elf->e_version = EV_CURRENT; 1268 elf->e_version = EV_CURRENT;
1268 elf->e_entry = 0;
1269 elf->e_phoff = sizeof(struct elfhdr); 1269 elf->e_phoff = sizeof(struct elfhdr);
1270 elf->e_shoff = 0;
1271 elf->e_flags = flags; 1270 elf->e_flags = flags;
1272 elf->e_ehsize = sizeof(struct elfhdr); 1271 elf->e_ehsize = sizeof(struct elfhdr);
1273 elf->e_phentsize = sizeof(struct elf_phdr); 1272 elf->e_phentsize = sizeof(struct elf_phdr);
1274 elf->e_phnum = segs; 1273 elf->e_phnum = segs;
1275 elf->e_shentsize = 0; 1274
1276 elf->e_shnum = 0;
1277 elf->e_shstrndx = 0;
1278 return; 1275 return;
1279} 1276}
1280 1277
@@ -1725,26 +1722,25 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1725 1722
1726 info->thread_status_size = 0; 1723 info->thread_status_size = 0;
1727 if (signr) { 1724 if (signr) {
1728 struct elf_thread_status *tmp; 1725 struct elf_thread_status *ets;
1729 rcu_read_lock(); 1726 rcu_read_lock();
1730 do_each_thread(g, p) 1727 do_each_thread(g, p)
1731 if (current->mm == p->mm && current != p) { 1728 if (current->mm == p->mm && current != p) {
1732 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); 1729 ets = kzalloc(sizeof(*ets), GFP_ATOMIC);
1733 if (!tmp) { 1730 if (!ets) {
1734 rcu_read_unlock(); 1731 rcu_read_unlock();
1735 return 0; 1732 return 0;
1736 } 1733 }
1737 tmp->thread = p; 1734 ets->thread = p;
1738 list_add(&tmp->list, &info->thread_list); 1735 list_add(&ets->list, &info->thread_list);
1739 } 1736 }
1740 while_each_thread(g, p); 1737 while_each_thread(g, p);
1741 rcu_read_unlock(); 1738 rcu_read_unlock();
1742 list_for_each(t, &info->thread_list) { 1739 list_for_each(t, &info->thread_list) {
1743 struct elf_thread_status *tmp;
1744 int sz; 1740 int sz;
1745 1741
1746 tmp = list_entry(t, struct elf_thread_status, list); 1742 ets = list_entry(t, struct elf_thread_status, list);
1747 sz = elf_dump_thread_status(signr, tmp); 1743 sz = elf_dump_thread_status(signr, ets);
1748 info->thread_status_size += sz; 1744 info->thread_status_size += sz;
1749 } 1745 }
1750 } 1746 }
@@ -2000,10 +1996,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
2000 1996
2001 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) { 1997 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2002 struct page *page; 1998 struct page *page;
2003 struct vm_area_struct *vma; 1999 struct vm_area_struct *tmp_vma;
2004 2000
2005 if (get_user_pages(current, current->mm, addr, 1, 0, 1, 2001 if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2006 &page, &vma) <= 0) { 2002 &page, &tmp_vma) <= 0) {
2007 DUMP_SEEK(PAGE_SIZE); 2003 DUMP_SEEK(PAGE_SIZE);
2008 } else { 2004 } else {
2009 if (page == ZERO_PAGE(0)) { 2005 if (page == ZERO_PAGE(0)) {
@@ -2013,7 +2009,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
2013 } 2009 }
2014 } else { 2010 } else {
2015 void *kaddr; 2011 void *kaddr;
2016 flush_cache_page(vma, addr, 2012 flush_cache_page(tmp_vma, addr,
2017 page_to_pfn(page)); 2013 page_to_pfn(page));
2018 kaddr = kmap(page); 2014 kaddr = kmap(page);
2019 if ((size += PAGE_SIZE) > limit || 2015 if ((size += PAGE_SIZE) > limit ||
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 32649f2a1654..ddd35d873391 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -136,8 +136,8 @@ static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params,
136 136
137 retval = kernel_read(file, params->hdr.e_phoff, 137 retval = kernel_read(file, params->hdr.e_phoff,
138 (char *) params->phdrs, size); 138 (char *) params->phdrs, size);
139 if (retval < 0) 139 if (unlikely(retval != size))
140 return retval; 140 return retval < 0 ? retval : -ENOEXEC;
141 141
142 /* determine stack size for this binary */ 142 /* determine stack size for this binary */
143 phdr = params->phdrs; 143 phdr = params->phdrs;
@@ -218,8 +218,11 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
218 phdr->p_offset, 218 phdr->p_offset,
219 interpreter_name, 219 interpreter_name,
220 phdr->p_filesz); 220 phdr->p_filesz);
221 if (retval < 0) 221 if (unlikely(retval != phdr->p_filesz)) {
222 if (retval >= 0)
223 retval = -ENOEXEC;
222 goto error; 224 goto error;
225 }
223 226
224 retval = -ENOENT; 227 retval = -ENOENT;
225 if (interpreter_name[phdr->p_filesz - 1] != '\0') 228 if (interpreter_name[phdr->p_filesz - 1] != '\0')
@@ -245,8 +248,11 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
245 248
246 retval = kernel_read(interpreter, 0, bprm->buf, 249 retval = kernel_read(interpreter, 0, bprm->buf,
247 BINPRM_BUF_SIZE); 250 BINPRM_BUF_SIZE);
248 if (retval < 0) 251 if (unlikely(retval != BINPRM_BUF_SIZE)) {
252 if (retval >= 0)
253 retval = -ENOEXEC;
249 goto error; 254 goto error;
255 }
250 256
251 interp_params.hdr = *((struct elfhdr *) bprm->buf); 257 interp_params.hdr = *((struct elfhdr *) bprm->buf);
252 break; 258 break;
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index f95ae9789c91..f9c88d0c8ced 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -43,7 +43,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs)
43 return -ENOEXEC; 43 return -ENOEXEC;
44 } 44 }
45 45
46 bprm->sh_bang++; /* Well, the bang-shell is implicit... */ 46 bprm->sh_bang = 1; /* Well, the bang-shell is implicit... */
47 allow_write_access(bprm->file); 47 allow_write_access(bprm->file);
48 fput(bprm->file); 48 fput(bprm->file);
49 bprm->file = NULL; 49 bprm->file = NULL;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 0498b181dd52..3b40d45a3a16 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -531,7 +531,8 @@ static int load_flat_file(struct linux_binprm * bprm,
531 DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n"); 531 DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n");
532 532
533 down_write(&current->mm->mmap_sem); 533 down_write(&current->mm->mmap_sem);
534 textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_PRIVATE, 0); 534 textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
535 MAP_PRIVATE|MAP_EXECUTABLE, 0);
535 up_write(&current->mm->mmap_sem); 536 up_write(&current->mm->mmap_sem);
536 if (!textpos || textpos >= (unsigned long) -4096) { 537 if (!textpos || textpos >= (unsigned long) -4096) {
537 if (!textpos) 538 if (!textpos)
@@ -932,14 +933,8 @@ static int __init init_flat_binfmt(void)
932 return register_binfmt(&flat_format); 933 return register_binfmt(&flat_format);
933} 934}
934 935
935static void __exit exit_flat_binfmt(void)
936{
937 unregister_binfmt(&flat_format);
938}
939
940/****************************************************************************/ 936/****************************************************************************/
941 937
942core_initcall(init_flat_binfmt); 938core_initcall(init_flat_binfmt);
943module_exit(exit_flat_binfmt);
944 939
945/****************************************************************************/ 940/****************************************************************************/
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index dbf0ac0523de..7191306367c5 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -115,6 +115,12 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
115 if (!enabled) 115 if (!enabled)
116 goto _ret; 116 goto _ret;
117 117
118 retval = -ENOEXEC;
119 if (bprm->misc_bang)
120 goto _ret;
121
122 bprm->misc_bang = 1;
123
118 /* to keep locking time low, we copy the interpreter string */ 124 /* to keep locking time low, we copy the interpreter string */
119 read_lock(&entries_lock); 125 read_lock(&entries_lock);
120 fmt = check_file(bprm); 126 fmt = check_file(bprm);
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index ab33939b12a7..9e3963f7ebf1 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -29,7 +29,7 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
29 * Sorta complicated, but hopefully it will work. -TYT 29 * Sorta complicated, but hopefully it will work. -TYT
30 */ 30 */
31 31
32 bprm->sh_bang++; 32 bprm->sh_bang = 1;
33 allow_write_access(bprm->file); 33 allow_write_access(bprm->file);
34 fput(bprm->file); 34 fput(bprm->file);
35 bprm->file = NULL; 35 bprm->file = NULL;
diff --git a/fs/bio.c b/fs/bio.c
index 6e0b6f66df03..78562574cb52 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -158,7 +158,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
158 158
159 bio_init(bio); 159 bio_init(bio);
160 if (likely(nr_iovecs)) { 160 if (likely(nr_iovecs)) {
161 unsigned long idx = 0; /* shut up gcc */ 161 unsigned long uninitialized_var(idx);
162 162
163 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); 163 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
164 if (unlikely(!bvl)) { 164 if (unlikely(!bvl)) {
@@ -937,6 +937,96 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
937 return ERR_PTR(-EINVAL); 937 return ERR_PTR(-EINVAL);
938} 938}
939 939
940static void bio_copy_kern_endio(struct bio *bio, int err)
941{
942 struct bio_vec *bvec;
943 const int read = bio_data_dir(bio) == READ;
944 char *p = bio->bi_private;
945 int i;
946
947 __bio_for_each_segment(bvec, bio, i, 0) {
948 char *addr = page_address(bvec->bv_page);
949
950 if (read && !err)
951 memcpy(p, addr, bvec->bv_len);
952
953 __free_page(bvec->bv_page);
954 p += bvec->bv_len;
955 }
956
957 bio_put(bio);
958}
959
960/**
961 * bio_copy_kern - copy kernel address into bio
962 * @q: the struct request_queue for the bio
963 * @data: pointer to buffer to copy
964 * @len: length in bytes
965 * @gfp_mask: allocation flags for bio and page allocation
966 * @reading: data direction is READ
967 *
968 * copy the kernel address into a bio suitable for io to a block
969 * device. Returns an error pointer in case of error.
970 */
971struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
972 gfp_t gfp_mask, int reading)
973{
974 unsigned long kaddr = (unsigned long)data;
975 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
976 unsigned long start = kaddr >> PAGE_SHIFT;
977 const int nr_pages = end - start;
978 struct bio *bio;
979 struct bio_vec *bvec;
980 int i, ret;
981
982 bio = bio_alloc(gfp_mask, nr_pages);
983 if (!bio)
984 return ERR_PTR(-ENOMEM);
985
986 while (len) {
987 struct page *page;
988 unsigned int bytes = PAGE_SIZE;
989
990 if (bytes > len)
991 bytes = len;
992
993 page = alloc_page(q->bounce_gfp | gfp_mask);
994 if (!page) {
995 ret = -ENOMEM;
996 goto cleanup;
997 }
998
999 if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) {
1000 ret = -EINVAL;
1001 goto cleanup;
1002 }
1003
1004 len -= bytes;
1005 }
1006
1007 if (!reading) {
1008 void *p = data;
1009
1010 bio_for_each_segment(bvec, bio, i) {
1011 char *addr = page_address(bvec->bv_page);
1012
1013 memcpy(addr, p, bvec->bv_len);
1014 p += bvec->bv_len;
1015 }
1016 }
1017
1018 bio->bi_private = data;
1019 bio->bi_end_io = bio_copy_kern_endio;
1020 return bio;
1021cleanup:
1022 bio_for_each_segment(bvec, bio, i)
1023 __free_page(bvec->bv_page);
1024
1025 bio_put(bio);
1026
1027 return ERR_PTR(ret);
1028}
1029
940/* 1030/*
941 * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions 1031 * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
942 * for performing direct-IO in BIOs. 1032 * for performing direct-IO in BIOs.
@@ -1273,6 +1363,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs);
1273EXPORT_SYMBOL(bio_map_user); 1363EXPORT_SYMBOL(bio_map_user);
1274EXPORT_SYMBOL(bio_unmap_user); 1364EXPORT_SYMBOL(bio_unmap_user);
1275EXPORT_SYMBOL(bio_map_kern); 1365EXPORT_SYMBOL(bio_map_kern);
1366EXPORT_SYMBOL(bio_copy_kern);
1276EXPORT_SYMBOL(bio_pair_release); 1367EXPORT_SYMBOL(bio_pair_release);
1277EXPORT_SYMBOL(bio_split); 1368EXPORT_SYMBOL(bio_split);
1278EXPORT_SYMBOL(bio_split_pool); 1369EXPORT_SYMBOL(bio_split_pool);
diff --git a/fs/buffer.c b/fs/buffer.c
index 3db4a26adc44..a073f3f4f013 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1101,7 +1101,7 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
1101 1101
1102 printk(KERN_ERR "%s: requested out-of-range block %llu for " 1102 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1103 "device %s\n", 1103 "device %s\n",
1104 __FUNCTION__, (unsigned long long)block, 1104 __func__, (unsigned long long)block,
1105 bdevname(bdev, b)); 1105 bdevname(bdev, b));
1106 return -EIO; 1106 return -EIO;
1107 } 1107 }
@@ -2211,8 +2211,8 @@ out:
2211 return err; 2211 return err;
2212} 2212}
2213 2213
2214int cont_expand_zero(struct file *file, struct address_space *mapping, 2214static int cont_expand_zero(struct file *file, struct address_space *mapping,
2215 loff_t pos, loff_t *bytes) 2215 loff_t pos, loff_t *bytes)
2216{ 2216{
2217 struct inode *inode = mapping->host; 2217 struct inode *inode = mapping->host;
2218 unsigned blocksize = 1 << inode->i_blkbits; 2218 unsigned blocksize = 1 << inode->i_blkbits;
@@ -2328,23 +2328,6 @@ int block_commit_write(struct page *page, unsigned from, unsigned to)
2328 return 0; 2328 return 0;
2329} 2329}
2330 2330
2331int generic_commit_write(struct file *file, struct page *page,
2332 unsigned from, unsigned to)
2333{
2334 struct inode *inode = page->mapping->host;
2335 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2336 __block_commit_write(inode,page,from,to);
2337 /*
2338 * No need to use i_size_read() here, the i_size
2339 * cannot change under us because we hold i_mutex.
2340 */
2341 if (pos > inode->i_size) {
2342 i_size_write(inode, pos);
2343 mark_inode_dirty(inode);
2344 }
2345 return 0;
2346}
2347
2348/* 2331/*
2349 * block_page_mkwrite() is not allowed to change the file size as it gets 2332 * block_page_mkwrite() is not allowed to change the file size as it gets
2350 * called from a page fault handler when a page is first dirtied. Hence we must 2333 * called from a page fault handler when a page is first dirtied. Hence we must
@@ -3315,7 +3298,6 @@ EXPORT_SYMBOL(end_buffer_write_sync);
3315EXPORT_SYMBOL(file_fsync); 3298EXPORT_SYMBOL(file_fsync);
3316EXPORT_SYMBOL(fsync_bdev); 3299EXPORT_SYMBOL(fsync_bdev);
3317EXPORT_SYMBOL(generic_block_bmap); 3300EXPORT_SYMBOL(generic_block_bmap);
3318EXPORT_SYMBOL(generic_commit_write);
3319EXPORT_SYMBOL(generic_cont_expand_simple); 3301EXPORT_SYMBOL(generic_cont_expand_simple);
3320EXPORT_SYMBOL(init_buffer); 3302EXPORT_SYMBOL(init_buffer);
3321EXPORT_SYMBOL(invalidate_bdev); 3303EXPORT_SYMBOL(invalidate_bdev);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 038674aa88a7..68e510b88457 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -55,7 +55,6 @@ static struct char_device_struct {
55 unsigned int baseminor; 55 unsigned int baseminor;
56 int minorct; 56 int minorct;
57 char name[64]; 57 char name[64];
58 struct file_operations *fops;
59 struct cdev *cdev; /* will die */ 58 struct cdev *cdev; /* will die */
60} *chrdevs[CHRDEV_MAJOR_HASH_SIZE]; 59} *chrdevs[CHRDEV_MAJOR_HASH_SIZE];
61 60
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 05c9da6181c3..8355e918fddf 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,6 @@
1Version 1.53
2------------
3
1Version 1.52 4Version 1.52
2------------ 5------------
3Fix oops on second mount to server when null auth is used. 6Fix oops on second mount to server when null auth is used.
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index bcda2c6b6a04..cb52cbbe45ff 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -460,8 +460,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
460 unsigned char *sequence_end; 460 unsigned char *sequence_end;
461 unsigned long *oid = NULL; 461 unsigned long *oid = NULL;
462 unsigned int cls, con, tag, oidlen, rc; 462 unsigned int cls, con, tag, oidlen, rc;
463 int use_ntlmssp = FALSE; 463 bool use_ntlmssp = false;
464 int use_kerberos = FALSE; 464 bool use_kerberos = false;
465 465
466 *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/ 466 *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/
467 467
@@ -561,15 +561,15 @@ decode_negTokenInit(unsigned char *security_blob, int length,
561 if (compare_oid(oid, oidlen, 561 if (compare_oid(oid, oidlen,
562 MSKRB5_OID, 562 MSKRB5_OID,
563 MSKRB5_OID_LEN)) 563 MSKRB5_OID_LEN))
564 use_kerberos = TRUE; 564 use_kerberos = true;
565 else if (compare_oid(oid, oidlen, 565 else if (compare_oid(oid, oidlen,
566 KRB5_OID, 566 KRB5_OID,
567 KRB5_OID_LEN)) 567 KRB5_OID_LEN))
568 use_kerberos = TRUE; 568 use_kerberos = true;
569 else if (compare_oid(oid, oidlen, 569 else if (compare_oid(oid, oidlen,
570 NTLMSSP_OID, 570 NTLMSSP_OID,
571 NTLMSSP_OID_LEN)) 571 NTLMSSP_OID_LEN))
572 use_ntlmssp = TRUE; 572 use_ntlmssp = true;
573 573
574 kfree(oid); 574 kfree(oid);
575 } 575 }
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 0228ed06069e..cc950f69e51e 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -468,7 +468,7 @@ cifs_proc_init(void)
468{ 468{
469 struct proc_dir_entry *pde; 469 struct proc_dir_entry *pde;
470 470
471 proc_fs_cifs = proc_mkdir("cifs", proc_root_fs); 471 proc_fs_cifs = proc_mkdir("fs/cifs", NULL);
472 if (proc_fs_cifs == NULL) 472 if (proc_fs_cifs == NULL)
473 return; 473 return;
474 474
@@ -559,7 +559,7 @@ cifs_proc_clean(void)
559 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); 559 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
560 remove_proc_entry("Experimental", proc_fs_cifs); 560 remove_proc_entry("Experimental", proc_fs_cifs);
561 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); 561 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
562 remove_proc_entry("cifs", proc_root_fs); 562 remove_proc_entry("fs/cifs", NULL);
563} 563}
564 564
565static int 565static int
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 95024c066d89..f6fdecf6598c 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -93,15 +93,11 @@ static char *cifs_get_share_name(const char *node_name)
93 /* find sharename end */ 93 /* find sharename end */
94 pSep++; 94 pSep++;
95 pSep = memchr(UNC+(pSep-UNC), '\\', len-(pSep-UNC)); 95 pSep = memchr(UNC+(pSep-UNC), '\\', len-(pSep-UNC));
96 if (!pSep) { 96 if (pSep) {
97 cERROR(1, ("%s:2 cant find share name in node name: %s", 97 /* trim path up to sharename end
98 __func__, node_name)); 98 * now we have share name in UNC */
99 kfree(UNC); 99 *pSep = 0;
100 return NULL;
101 } 100 }
102 /* trim path up to sharename end
103 * * now we have share name in UNC */
104 *pSep = 0;
105 101
106 return UNC; 102 return UNC;
107} 103}
@@ -188,7 +184,7 @@ static char *compose_mount_options(const char *sb_mountdata,
188 tkn_e = strchr(tkn_e+1, '\\'); 184 tkn_e = strchr(tkn_e+1, '\\');
189 if (tkn_e) { 185 if (tkn_e) {
190 strcat(mountdata, ",prefixpath="); 186 strcat(mountdata, ",prefixpath=");
191 strcat(mountdata, tkn_e); 187 strcat(mountdata, tkn_e+1);
192 } 188 }
193 } 189 }
194 190
@@ -244,7 +240,8 @@ static char *build_full_dfs_path_from_dentry(struct dentry *dentry)
244 return NULL; 240 return NULL;
245 241
246 if (cifs_sb->tcon->Flags & SMB_SHARE_IS_IN_DFS) { 242 if (cifs_sb->tcon->Flags & SMB_SHARE_IS_IN_DFS) {
247 /* we should use full path name to correct working with DFS */ 243 int i;
244 /* we should use full path name for correct working with DFS */
248 l_max_len = strnlen(cifs_sb->tcon->treeName, MAX_TREE_SIZE+1) + 245 l_max_len = strnlen(cifs_sb->tcon->treeName, MAX_TREE_SIZE+1) +
249 strnlen(search_path, MAX_PATHCONF) + 1; 246 strnlen(search_path, MAX_PATHCONF) + 1;
250 tmp_path = kmalloc(l_max_len, GFP_KERNEL); 247 tmp_path = kmalloc(l_max_len, GFP_KERNEL);
@@ -253,8 +250,14 @@ static char *build_full_dfs_path_from_dentry(struct dentry *dentry)
253 return NULL; 250 return NULL;
254 } 251 }
255 strncpy(tmp_path, cifs_sb->tcon->treeName, l_max_len); 252 strncpy(tmp_path, cifs_sb->tcon->treeName, l_max_len);
256 strcat(tmp_path, search_path);
257 tmp_path[l_max_len-1] = 0; 253 tmp_path[l_max_len-1] = 0;
254 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
255 for (i = 0; i < l_max_len; i++) {
256 if (tmp_path[i] == '\\')
257 tmp_path[i] = '/';
258 }
259 strncat(tmp_path, search_path, l_max_len - strlen(tmp_path));
260
258 full_path = tmp_path; 261 full_path = tmp_path;
259 kfree(search_path); 262 kfree(search_path);
260 } else { 263 } else {
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index e99d4faf5f02..34902cff5400 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -559,7 +559,7 @@ static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode,
559 const char *path, const __u16 *pfid) 559 const char *path, const __u16 *pfid)
560{ 560{
561 struct cifsFileInfo *open_file = NULL; 561 struct cifsFileInfo *open_file = NULL;
562 int unlock_file = FALSE; 562 bool unlock_file = false;
563 int xid; 563 int xid;
564 int rc = -EIO; 564 int rc = -EIO;
565 __u16 fid; 565 __u16 fid;
@@ -586,10 +586,10 @@ static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode,
586 cifs_sb = CIFS_SB(sb); 586 cifs_sb = CIFS_SB(sb);
587 587
588 if (open_file) { 588 if (open_file) {
589 unlock_file = TRUE; 589 unlock_file = true;
590 fid = open_file->netfid; 590 fid = open_file->netfid;
591 } else if (pfid == NULL) { 591 } else if (pfid == NULL) {
592 int oplock = FALSE; 592 int oplock = 0;
593 /* open file */ 593 /* open file */
594 rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, 594 rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN,
595 READ_CONTROL, 0, &fid, &oplock, NULL, 595 READ_CONTROL, 0, &fid, &oplock, NULL,
@@ -604,7 +604,7 @@ static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode,
604 604
605 rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen); 605 rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen);
606 cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen)); 606 cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen));
607 if (unlock_file == TRUE) /* find_readable_file increments ref count */ 607 if (unlock_file == true) /* find_readable_file increments ref count */
608 atomic_dec(&open_file->wrtPending); 608 atomic_dec(&open_file->wrtPending);
609 else if (pfid == NULL) /* if opened above we have to close the handle */ 609 else if (pfid == NULL) /* if opened above we have to close the handle */
610 CIFSSMBClose(xid, cifs_sb->tcon, fid); 610 CIFSSMBClose(xid, cifs_sb->tcon, fid);
@@ -619,7 +619,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
619 struct inode *inode, const char *path) 619 struct inode *inode, const char *path)
620{ 620{
621 struct cifsFileInfo *open_file; 621 struct cifsFileInfo *open_file;
622 int unlock_file = FALSE; 622 bool unlock_file = false;
623 int xid; 623 int xid;
624 int rc = -EIO; 624 int rc = -EIO;
625 __u16 fid; 625 __u16 fid;
@@ -640,10 +640,10 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
640 640
641 open_file = find_readable_file(CIFS_I(inode)); 641 open_file = find_readable_file(CIFS_I(inode));
642 if (open_file) { 642 if (open_file) {
643 unlock_file = TRUE; 643 unlock_file = true;
644 fid = open_file->netfid; 644 fid = open_file->netfid;
645 } else { 645 } else {
646 int oplock = FALSE; 646 int oplock = 0;
647 /* open file */ 647 /* open file */
648 rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, 648 rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN,
649 WRITE_DAC, 0, &fid, &oplock, NULL, 649 WRITE_DAC, 0, &fid, &oplock, NULL,
@@ -658,7 +658,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
658 658
659 rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen); 659 rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen);
660 cFYI(DBG2, ("SetCIFSACL rc = %d", rc)); 660 cFYI(DBG2, ("SetCIFSACL rc = %d", rc));
661 if (unlock_file == TRUE) 661 if (unlock_file)
662 atomic_dec(&open_file->wrtPending); 662 atomic_dec(&open_file->wrtPending);
663 else 663 else
664 CIFSSMBClose(xid, cifs_sb->tcon, fid); 664 CIFSSMBClose(xid, cifs_sb->tcon, fid);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 39c2cbdface7..427a7c695896 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -222,50 +222,50 @@ static int
222cifs_statfs(struct dentry *dentry, struct kstatfs *buf) 222cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
223{ 223{
224 struct super_block *sb = dentry->d_sb; 224 struct super_block *sb = dentry->d_sb;
225 int xid; 225 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
226 struct cifsTconInfo *tcon = cifs_sb->tcon;
226 int rc = -EOPNOTSUPP; 227 int rc = -EOPNOTSUPP;
227 struct cifs_sb_info *cifs_sb; 228 int xid;
228 struct cifsTconInfo *pTcon;
229 229
230 xid = GetXid(); 230 xid = GetXid();
231 231
232 cifs_sb = CIFS_SB(sb);
233 pTcon = cifs_sb->tcon;
234
235 buf->f_type = CIFS_MAGIC_NUMBER; 232 buf->f_type = CIFS_MAGIC_NUMBER;
236 233
237 /* instead could get the real value via SMB_QUERY_FS_ATTRIBUTE_INFO */ 234 /*
238 buf->f_namelen = PATH_MAX; /* PATH_MAX may be too long - it would 235 * PATH_MAX may be too long - it would presumably be total path,
239 presumably be total path, but note 236 * but note that some servers (includinng Samba 3) have a shorter
240 that some servers (includinng Samba 3) 237 * maximum path.
241 have a shorter maximum path */ 238 *
239 * Instead could get the real value via SMB_QUERY_FS_ATTRIBUTE_INFO.
240 */
241 buf->f_namelen = PATH_MAX;
242 buf->f_files = 0; /* undefined */ 242 buf->f_files = 0; /* undefined */
243 buf->f_ffree = 0; /* unlimited */ 243 buf->f_ffree = 0; /* unlimited */
244 244
245/* BB we could add a second check for a QFS Unix capability bit */ 245 /*
246/* BB FIXME check CIFS_POSIX_EXTENSIONS Unix cap first FIXME BB */ 246 * We could add a second check for a QFS Unix capability bit
247 if ((pTcon->ses->capabilities & CAP_UNIX) && (CIFS_POSIX_EXTENSIONS & 247 */
248 le64_to_cpu(pTcon->fsUnixInfo.Capability))) 248 if ((tcon->ses->capabilities & CAP_UNIX) &&
249 rc = CIFSSMBQFSPosixInfo(xid, pTcon, buf); 249 (CIFS_POSIX_EXTENSIONS & le64_to_cpu(tcon->fsUnixInfo.Capability)))
250 250 rc = CIFSSMBQFSPosixInfo(xid, tcon, buf);
251 /* Only need to call the old QFSInfo if failed 251
252 on newer one */ 252 /*
253 if (rc) 253 * Only need to call the old QFSInfo if failed on newer one,
254 if (pTcon->ses->capabilities & CAP_NT_SMBS) 254 * e.g. by OS/2.
255 rc = CIFSSMBQFSInfo(xid, pTcon, buf); /* not supported by OS2 */ 255 **/
256 256 if (rc && (tcon->ses->capabilities & CAP_NT_SMBS))
257 /* Some old Windows servers also do not support level 103, retry with 257 rc = CIFSSMBQFSInfo(xid, tcon, buf);
258 older level one if old server failed the previous call or we 258
259 bypassed it because we detected that this was an older LANMAN sess */ 259 /*
260 * Some old Windows servers also do not support level 103, retry with
261 * older level one if old server failed the previous call or we
262 * bypassed it because we detected that this was an older LANMAN sess
263 */
260 if (rc) 264 if (rc)
261 rc = SMBOldQFSInfo(xid, pTcon, buf); 265 rc = SMBOldQFSInfo(xid, tcon, buf);
262 /* int f_type; 266
263 __fsid_t f_fsid;
264 int f_namelen; */
265 /* BB get from info in tcon struct at mount time call to QFSAttrInfo */
266 FreeXid(xid); 267 FreeXid(xid);
267 return 0; /* always return success? what if volume is no 268 return 0;
268 longer available? */
269} 269}
270 270
271static int cifs_permission(struct inode *inode, int mask, struct nameidata *nd) 271static int cifs_permission(struct inode *inode, int mask, struct nameidata *nd)
@@ -306,8 +306,8 @@ cifs_alloc_inode(struct super_block *sb)
306 /* Until the file is open and we have gotten oplock 306 /* Until the file is open and we have gotten oplock
307 info back from the server, can not assume caching of 307 info back from the server, can not assume caching of
308 file data or metadata */ 308 file data or metadata */
309 cifs_inode->clientCanCacheRead = FALSE; 309 cifs_inode->clientCanCacheRead = false;
310 cifs_inode->clientCanCacheAll = FALSE; 310 cifs_inode->clientCanCacheAll = false;
311 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ 311 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
312 312
313 /* Can not set i_flags here - they get immediately overwritten 313 /* Can not set i_flags here - they get immediately overwritten
@@ -940,7 +940,7 @@ static int cifs_oplock_thread(void *dummyarg)
940 rc = CIFSSMBLock(0, pTcon, netfid, 940 rc = CIFSSMBLock(0, pTcon, netfid,
941 0 /* len */ , 0 /* offset */, 0, 941 0 /* len */ , 0 /* offset */, 0,
942 0, LOCKING_ANDX_OPLOCK_RELEASE, 942 0, LOCKING_ANDX_OPLOCK_RELEASE,
943 0 /* wait flag */); 943 false /* wait flag */);
944 cFYI(1, ("Oplock release rc = %d", rc)); 944 cFYI(1, ("Oplock release rc = %d", rc));
945 } 945 }
946 } else 946 } else
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index e1dd9f32e1d7..cd1301a09b3b 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -24,14 +24,6 @@
24 24
25#define ROOT_I 2 25#define ROOT_I 2
26 26
27#ifndef FALSE
28#define FALSE 0
29#endif
30
31#ifndef TRUE
32#define TRUE 1
33#endif
34
35extern struct file_system_type cifs_fs_type; 27extern struct file_system_type cifs_fs_type;
36extern const struct address_space_operations cifs_addr_ops; 28extern const struct address_space_operations cifs_addr_ops;
37extern const struct address_space_operations cifs_addr_ops_smallbuf; 29extern const struct address_space_operations cifs_addr_ops_smallbuf;
@@ -110,5 +102,5 @@ extern int cifs_ioctl(struct inode *inode, struct file *filep,
110extern const struct export_operations cifs_export_ops; 102extern const struct export_operations cifs_export_ops;
111#endif /* EXPERIMENTAL */ 103#endif /* EXPERIMENTAL */
112 104
113#define CIFS_VERSION "1.52" 105#define CIFS_VERSION "1.53"
114#endif /* _CIFSFS_H */ 106#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 69a2e1942542..b7d9f698e63e 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -57,14 +57,6 @@
57 57
58#include "cifspdu.h" 58#include "cifspdu.h"
59 59
60#ifndef FALSE
61#define FALSE 0
62#endif
63
64#ifndef TRUE
65#define TRUE 1
66#endif
67
68#ifndef XATTR_DOS_ATTRIB 60#ifndef XATTR_DOS_ATTRIB
69#define XATTR_DOS_ATTRIB "user.DOSATTRIB" 61#define XATTR_DOS_ATTRIB "user.DOSATTRIB"
70#endif 62#endif
@@ -147,7 +139,7 @@ struct TCP_Server_Info {
147 enum protocolEnum protocolType; 139 enum protocolEnum protocolType;
148 char versionMajor; 140 char versionMajor;
149 char versionMinor; 141 char versionMinor;
150 unsigned svlocal:1; /* local server or remote */ 142 bool svlocal:1; /* local server or remote */
151 atomic_t socketUseCount; /* number of open cifs sessions on socket */ 143 atomic_t socketUseCount; /* number of open cifs sessions on socket */
152 atomic_t inFlight; /* number of requests on the wire to server */ 144 atomic_t inFlight; /* number of requests on the wire to server */
153#ifdef CONFIG_CIFS_STATS2 145#ifdef CONFIG_CIFS_STATS2
@@ -286,10 +278,10 @@ struct cifsTconInfo {
286 FILE_SYSTEM_DEVICE_INFO fsDevInfo; 278 FILE_SYSTEM_DEVICE_INFO fsDevInfo;
287 FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */ 279 FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */
288 FILE_SYSTEM_UNIX_INFO fsUnixInfo; 280 FILE_SYSTEM_UNIX_INFO fsUnixInfo;
289 unsigned ipc:1; /* set if connection to IPC$ eg for RPC/PIPES */ 281 bool ipc:1; /* set if connection to IPC$ eg for RPC/PIPES */
290 unsigned retry:1; 282 bool retry:1;
291 unsigned nocase:1; 283 bool nocase:1;
292 unsigned unix_ext:1; /* if off disable Linux extensions to CIFS protocol 284 bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol
293 for this mount even if server would support */ 285 for this mount even if server would support */
294 /* BB add field for back pointer to sb struct(s)? */ 286 /* BB add field for back pointer to sb struct(s)? */
295}; 287};
@@ -317,10 +309,10 @@ struct cifs_search_info {
317 char *srch_entries_start; 309 char *srch_entries_start;
318 char *presume_name; 310 char *presume_name;
319 unsigned int resume_name_len; 311 unsigned int resume_name_len;
320 unsigned endOfSearch:1; 312 bool endOfSearch:1;
321 unsigned emptyDir:1; 313 bool emptyDir:1;
322 unsigned unicode:1; 314 bool unicode:1;
323 unsigned smallBuf:1; /* so we know which buf_release function to call */ 315 bool smallBuf:1; /* so we know which buf_release function to call */
324}; 316};
325 317
326struct cifsFileInfo { 318struct cifsFileInfo {
@@ -335,9 +327,9 @@ struct cifsFileInfo {
335 struct inode *pInode; /* needed for oplock break */ 327 struct inode *pInode; /* needed for oplock break */
336 struct mutex lock_mutex; 328 struct mutex lock_mutex;
337 struct list_head llist; /* list of byte range locks we have. */ 329 struct list_head llist; /* list of byte range locks we have. */
338 unsigned closePend:1; /* file is marked to close */ 330 bool closePend:1; /* file is marked to close */
339 unsigned invalidHandle:1; /* file closed via session abend */ 331 bool invalidHandle:1; /* file closed via session abend */
340 unsigned messageMode:1; /* for pipes: message vs byte mode */ 332 bool messageMode:1; /* for pipes: message vs byte mode */
341 atomic_t wrtPending; /* handle in use - defer close */ 333 atomic_t wrtPending; /* handle in use - defer close */
342 struct semaphore fh_sem; /* prevents reopen race after dead ses*/ 334 struct semaphore fh_sem; /* prevents reopen race after dead ses*/
343 char *search_resume_name; /* BB removeme BB */ 335 char *search_resume_name; /* BB removeme BB */
@@ -356,9 +348,9 @@ struct cifsInodeInfo {
356 __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ 348 __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
357 atomic_t inUse; /* num concurrent users (local openers cifs) of file*/ 349 atomic_t inUse; /* num concurrent users (local openers cifs) of file*/
358 unsigned long time; /* jiffies of last update/check of inode */ 350 unsigned long time; /* jiffies of last update/check of inode */
359 unsigned clientCanCacheRead:1; /* read oplock */ 351 bool clientCanCacheRead:1; /* read oplock */
360 unsigned clientCanCacheAll:1; /* read and writebehind oplock */ 352 bool clientCanCacheAll:1; /* read and writebehind oplock */
361 unsigned oplockPending:1; 353 bool oplockPending:1;
362 struct inode vfs_inode; 354 struct inode vfs_inode;
363}; 355};
364 356
@@ -426,9 +418,9 @@ struct mid_q_entry {
426 struct smb_hdr *resp_buf; /* response buffer */ 418 struct smb_hdr *resp_buf; /* response buffer */
427 int midState; /* wish this were enum but can not pass to wait_event */ 419 int midState; /* wish this were enum but can not pass to wait_event */
428 __u8 command; /* smb command code */ 420 __u8 command; /* smb command code */
429 unsigned largeBuf:1; /* if valid response, is pointer to large buf */ 421 bool largeBuf:1; /* if valid response, is pointer to large buf */
430 unsigned multiRsp:1; /* multiple trans2 responses for one request */ 422 bool multiRsp:1; /* multiple trans2 responses for one request */
431 unsigned multiEnd:1; /* both received */ 423 bool multiEnd:1; /* both received */
432}; 424};
433 425
434struct oplock_q_entry { 426struct oplock_q_entry {
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 9f49c2f3582c..c43bf4b7a556 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -340,6 +340,7 @@
340#define OPEN_NO_RECALL 0x00400000 340#define OPEN_NO_RECALL 0x00400000
341#define OPEN_FREE_SPACE_QUERY 0x00800000 /* should be zero */ 341#define OPEN_FREE_SPACE_QUERY 0x00800000 /* should be zero */
342#define CREATE_OPTIONS_MASK 0x007FFFFF 342#define CREATE_OPTIONS_MASK 0x007FFFFF
343#define CREATE_OPTION_READONLY 0x10000000
343#define CREATE_OPTION_SPECIAL 0x20000000 /* system. NB not sent over wire */ 344#define CREATE_OPTION_SPECIAL 0x20000000 /* system. NB not sent over wire */
344 345
345/* ImpersonationLevel flags */ 346/* ImpersonationLevel flags */
@@ -2050,7 +2051,7 @@ typedef struct {
2050 to 0xFFFF00 */ 2051 to 0xFFFF00 */
2051#define CIFS_UNIX_LARGE_WRITE_CAP 0x00000080 2052#define CIFS_UNIX_LARGE_WRITE_CAP 0x00000080
2052#define CIFS_UNIX_TRANSPORT_ENCRYPTION_CAP 0x00000100 /* can do SPNEGO crypt */ 2053#define CIFS_UNIX_TRANSPORT_ENCRYPTION_CAP 0x00000100 /* can do SPNEGO crypt */
2053#define CIFS_UNIX_TRANPSORT_ENCRYPTION_MANDATORY_CAP 0x00000200 /* must do */ 2054#define CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP 0x00000200 /* must do */
2054#define CIFS_UNIX_PROXY_CAP 0x00000400 /* Proxy cap: 0xACE ioctl and 2055#define CIFS_UNIX_PROXY_CAP 0x00000400 /* Proxy cap: 0xACE ioctl and
2055 QFS PROXY call */ 2056 QFS PROXY call */
2056#ifdef CONFIG_CIFS_POSIX 2057#ifdef CONFIG_CIFS_POSIX
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 50f9fdae19b3..d481f6c5a2be 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -59,8 +59,9 @@ extern int SendReceiveBlockingLock(const unsigned int xid,
59 struct smb_hdr *out_buf, 59 struct smb_hdr *out_buf,
60 int *bytes_returned); 60 int *bytes_returned);
61extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length); 61extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length);
62extern int is_valid_oplock_break(struct smb_hdr *smb, struct TCP_Server_Info *); 62extern bool is_valid_oplock_break(struct smb_hdr *smb,
63extern int is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); 63 struct TCP_Server_Info *);
64extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
64extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *); 65extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *);
65#ifdef CONFIG_CIFS_EXPERIMENTAL 66#ifdef CONFIG_CIFS_EXPERIMENTAL
66extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *); 67extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *);
@@ -69,7 +70,7 @@ extern unsigned int smbCalcSize(struct smb_hdr *ptr);
69extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); 70extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
70extern int decode_negTokenInit(unsigned char *security_blob, int length, 71extern int decode_negTokenInit(unsigned char *security_blob, int length,
71 enum securityEnum *secType); 72 enum securityEnum *secType);
72extern int cifs_inet_pton(int, char *source, void *dst); 73extern int cifs_inet_pton(const int, const char *source, void *dst);
73extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); 74extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr);
74extern void header_assemble(struct smb_hdr *, char /* command */ , 75extern void header_assemble(struct smb_hdr *, char /* command */ ,
75 const struct cifsTconInfo *, int /* length of 76 const struct cifsTconInfo *, int /* length of
@@ -187,12 +188,12 @@ extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon,
187#endif /* possibly unneeded function */ 188#endif /* possibly unneeded function */
188extern int CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon, 189extern int CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon,
189 const char *fileName, __u64 size, 190 const char *fileName, __u64 size,
190 int setAllocationSizeFlag, 191 bool setAllocationSizeFlag,
191 const struct nls_table *nls_codepage, 192 const struct nls_table *nls_codepage,
192 int remap_special_chars); 193 int remap_special_chars);
193extern int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, 194extern int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon,
194 __u64 size, __u16 fileHandle, __u32 opener_pid, 195 __u64 size, __u16 fileHandle, __u32 opener_pid,
195 int AllocSizeFlag); 196 bool AllocSizeFlag);
196extern int CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *pTcon, 197extern int CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *pTcon,
197 char *full_path, __u64 mode, __u64 uid, 198 char *full_path, __u64 mode, __u64 uid,
198 __u64 gid, dev_t dev, 199 __u64 gid, dev_t dev,
@@ -291,11 +292,11 @@ extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
291 const __u16 netfid, const __u64 len, 292 const __u16 netfid, const __u64 len,
292 const __u64 offset, const __u32 numUnlock, 293 const __u64 offset, const __u32 numUnlock,
293 const __u32 numLock, const __u8 lockType, 294 const __u32 numLock, const __u8 lockType,
294 const int waitFlag); 295 const bool waitFlag);
295extern int CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, 296extern int CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
296 const __u16 smb_file_id, const int get_flag, 297 const __u16 smb_file_id, const int get_flag,
297 const __u64 len, struct file_lock *, 298 const __u64 len, struct file_lock *,
298 const __u16 lock_type, const int waitFlag); 299 const __u16 lock_type, const bool waitFlag);
299extern int CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon); 300extern int CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon);
300extern int CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses); 301extern int CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses);
301 302
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 4728fa982a4e..95fbba4ea7d4 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -95,7 +95,7 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
95 list_for_each_safe(tmp, tmp1, &pTcon->openFileList) { 95 list_for_each_safe(tmp, tmp1, &pTcon->openFileList) {
96 open_file = list_entry(tmp, struct cifsFileInfo, tlist); 96 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
97 if (open_file) 97 if (open_file)
98 open_file->invalidHandle = TRUE; 98 open_file->invalidHandle = true;
99 } 99 }
100 write_unlock(&GlobalSMBSeslock); 100 write_unlock(&GlobalSMBSeslock);
101 /* BB Add call to invalidate_inodes(sb) for all superblocks mounted 101 /* BB Add call to invalidate_inodes(sb) for all superblocks mounted
@@ -141,7 +141,7 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
141 if (tcon->ses->server->tcpStatus == 141 if (tcon->ses->server->tcpStatus ==
142 CifsNeedReconnect) { 142 CifsNeedReconnect) {
143 /* on "soft" mounts we wait once */ 143 /* on "soft" mounts we wait once */
144 if ((tcon->retry == FALSE) || 144 if (!tcon->retry ||
145 (tcon->ses->status == CifsExiting)) { 145 (tcon->ses->status == CifsExiting)) {
146 cFYI(1, ("gave up waiting on " 146 cFYI(1, ("gave up waiting on "
147 "reconnect in smb_init")); 147 "reconnect in smb_init"));
@@ -289,7 +289,7 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
289 if (tcon->ses->server->tcpStatus == 289 if (tcon->ses->server->tcpStatus ==
290 CifsNeedReconnect) { 290 CifsNeedReconnect) {
291 /* on "soft" mounts we wait once */ 291 /* on "soft" mounts we wait once */
292 if ((tcon->retry == FALSE) || 292 if (!tcon->retry ||
293 (tcon->ses->status == CifsExiting)) { 293 (tcon->ses->status == CifsExiting)) {
294 cFYI(1, ("gave up waiting on " 294 cFYI(1, ("gave up waiting on "
295 "reconnect in smb_init")); 295 "reconnect in smb_init"));
@@ -1224,11 +1224,8 @@ OldOpenRetry:
1224 else /* BB FIXME BB */ 1224 else /* BB FIXME BB */
1225 pSMB->FileAttributes = cpu_to_le16(0/*ATTR_NORMAL*/); 1225 pSMB->FileAttributes = cpu_to_le16(0/*ATTR_NORMAL*/);
1226 1226
1227 /* if ((omode & S_IWUGO) == 0) 1227 if (create_options & CREATE_OPTION_READONLY)
1228 pSMB->FileAttributes |= cpu_to_le32(ATTR_READONLY);*/ 1228 pSMB->FileAttributes |= cpu_to_le16(ATTR_READONLY);
1229 /* Above line causes problems due to vfs splitting create into two
1230 pieces - need to set mode after file created not while it is
1231 being created */
1232 1229
1233 /* BB FIXME BB */ 1230 /* BB FIXME BB */
1234/* pSMB->CreateOptions = cpu_to_le32(create_options & 1231/* pSMB->CreateOptions = cpu_to_le32(create_options &
@@ -1331,17 +1328,16 @@ openRetry:
1331 pSMB->FileAttributes = cpu_to_le32(ATTR_SYSTEM); 1328 pSMB->FileAttributes = cpu_to_le32(ATTR_SYSTEM);
1332 else 1329 else
1333 pSMB->FileAttributes = cpu_to_le32(ATTR_NORMAL); 1330 pSMB->FileAttributes = cpu_to_le32(ATTR_NORMAL);
1331
1334 /* XP does not handle ATTR_POSIX_SEMANTICS */ 1332 /* XP does not handle ATTR_POSIX_SEMANTICS */
1335 /* but it helps speed up case sensitive checks for other 1333 /* but it helps speed up case sensitive checks for other
1336 servers such as Samba */ 1334 servers such as Samba */
1337 if (tcon->ses->capabilities & CAP_UNIX) 1335 if (tcon->ses->capabilities & CAP_UNIX)
1338 pSMB->FileAttributes |= cpu_to_le32(ATTR_POSIX_SEMANTICS); 1336 pSMB->FileAttributes |= cpu_to_le32(ATTR_POSIX_SEMANTICS);
1339 1337
1340 /* if ((omode & S_IWUGO) == 0) 1338 if (create_options & CREATE_OPTION_READONLY)
1341 pSMB->FileAttributes |= cpu_to_le32(ATTR_READONLY);*/ 1339 pSMB->FileAttributes |= cpu_to_le32(ATTR_READONLY);
1342 /* Above line causes problems due to vfs splitting create into two 1340
1343 pieces - need to set mode after file created not while it is
1344 being created */
1345 pSMB->ShareAccess = cpu_to_le32(FILE_SHARE_ALL); 1341 pSMB->ShareAccess = cpu_to_le32(FILE_SHARE_ALL);
1346 pSMB->CreateDisposition = cpu_to_le32(openDisposition); 1342 pSMB->CreateDisposition = cpu_to_le32(openDisposition);
1347 pSMB->CreateOptions = cpu_to_le32(create_options & CREATE_OPTIONS_MASK); 1343 pSMB->CreateOptions = cpu_to_le32(create_options & CREATE_OPTIONS_MASK);
@@ -1686,7 +1682,7 @@ int
1686CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, 1682CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1687 const __u16 smb_file_id, const __u64 len, 1683 const __u16 smb_file_id, const __u64 len,
1688 const __u64 offset, const __u32 numUnlock, 1684 const __u64 offset, const __u32 numUnlock,
1689 const __u32 numLock, const __u8 lockType, const int waitFlag) 1685 const __u32 numLock, const __u8 lockType, const bool waitFlag)
1690{ 1686{
1691 int rc = 0; 1687 int rc = 0;
1692 LOCK_REQ *pSMB = NULL; 1688 LOCK_REQ *pSMB = NULL;
@@ -1695,7 +1691,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1695 int timeout = 0; 1691 int timeout = 0;
1696 __u16 count; 1692 __u16 count;
1697 1693
1698 cFYI(1, ("CIFSSMBLock timeout %d numLock %d", waitFlag, numLock)); 1694 cFYI(1, ("CIFSSMBLock timeout %d numLock %d", (int)waitFlag, numLock));
1699 rc = small_smb_init(SMB_COM_LOCKING_ANDX, 8, tcon, (void **) &pSMB); 1695 rc = small_smb_init(SMB_COM_LOCKING_ANDX, 8, tcon, (void **) &pSMB);
1700 1696
1701 if (rc) 1697 if (rc)
@@ -1706,7 +1702,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1706 if (lockType == LOCKING_ANDX_OPLOCK_RELEASE) { 1702 if (lockType == LOCKING_ANDX_OPLOCK_RELEASE) {
1707 timeout = CIFS_ASYNC_OP; /* no response expected */ 1703 timeout = CIFS_ASYNC_OP; /* no response expected */
1708 pSMB->Timeout = 0; 1704 pSMB->Timeout = 0;
1709 } else if (waitFlag == TRUE) { 1705 } else if (waitFlag) {
1710 timeout = CIFS_BLOCKING_OP; /* blocking operation, no timeout */ 1706 timeout = CIFS_BLOCKING_OP; /* blocking operation, no timeout */
1711 pSMB->Timeout = cpu_to_le32(-1);/* blocking - do not time out */ 1707 pSMB->Timeout = cpu_to_le32(-1);/* blocking - do not time out */
1712 } else { 1708 } else {
@@ -1756,7 +1752,7 @@ int
1756CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, 1752CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1757 const __u16 smb_file_id, const int get_flag, const __u64 len, 1753 const __u16 smb_file_id, const int get_flag, const __u64 len,
1758 struct file_lock *pLockData, const __u16 lock_type, 1754 struct file_lock *pLockData, const __u16 lock_type,
1759 const int waitFlag) 1755 const bool waitFlag)
1760{ 1756{
1761 struct smb_com_transaction2_sfi_req *pSMB = NULL; 1757 struct smb_com_transaction2_sfi_req *pSMB = NULL;
1762 struct smb_com_transaction2_sfi_rsp *pSMBr = NULL; 1758 struct smb_com_transaction2_sfi_rsp *pSMBr = NULL;
@@ -3581,9 +3577,9 @@ findFirstRetry:
3581 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3577 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3582 if (rc == 0) { 3578 if (rc == 0) {
3583 if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) 3579 if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
3584 psrch_inf->unicode = TRUE; 3580 psrch_inf->unicode = true;
3585 else 3581 else
3586 psrch_inf->unicode = FALSE; 3582 psrch_inf->unicode = false;
3587 3583
3588 psrch_inf->ntwrk_buf_start = (char *)pSMBr; 3584 psrch_inf->ntwrk_buf_start = (char *)pSMBr;
3589 psrch_inf->smallBuf = 0; 3585 psrch_inf->smallBuf = 0;
@@ -3594,9 +3590,9 @@ findFirstRetry:
3594 le16_to_cpu(pSMBr->t2.ParameterOffset)); 3590 le16_to_cpu(pSMBr->t2.ParameterOffset));
3595 3591
3596 if (parms->EndofSearch) 3592 if (parms->EndofSearch)
3597 psrch_inf->endOfSearch = TRUE; 3593 psrch_inf->endOfSearch = true;
3598 else 3594 else
3599 psrch_inf->endOfSearch = FALSE; 3595 psrch_inf->endOfSearch = false;
3600 3596
3601 psrch_inf->entries_in_buffer = 3597 psrch_inf->entries_in_buffer =
3602 le16_to_cpu(parms->SearchCount); 3598 le16_to_cpu(parms->SearchCount);
@@ -3624,7 +3620,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
3624 3620
3625 cFYI(1, ("In FindNext")); 3621 cFYI(1, ("In FindNext"));
3626 3622
3627 if (psrch_inf->endOfSearch == TRUE) 3623 if (psrch_inf->endOfSearch)
3628 return -ENOENT; 3624 return -ENOENT;
3629 3625
3630 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 3626 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
@@ -3682,7 +3678,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
3682 cifs_stats_inc(&tcon->num_fnext); 3678 cifs_stats_inc(&tcon->num_fnext);
3683 if (rc) { 3679 if (rc) {
3684 if (rc == -EBADF) { 3680 if (rc == -EBADF) {
3685 psrch_inf->endOfSearch = TRUE; 3681 psrch_inf->endOfSearch = true;
3686 rc = 0; /* search probably was closed at end of search*/ 3682 rc = 0; /* search probably was closed at end of search*/
3687 } else 3683 } else
3688 cFYI(1, ("FindNext returned = %d", rc)); 3684 cFYI(1, ("FindNext returned = %d", rc));
@@ -3692,9 +3688,9 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
3692 if (rc == 0) { 3688 if (rc == 0) {
3693 /* BB fixme add lock for file (srch_info) struct here */ 3689 /* BB fixme add lock for file (srch_info) struct here */
3694 if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) 3690 if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
3695 psrch_inf->unicode = TRUE; 3691 psrch_inf->unicode = true;
3696 else 3692 else
3697 psrch_inf->unicode = FALSE; 3693 psrch_inf->unicode = false;
3698 response_data = (char *) &pSMBr->hdr.Protocol + 3694 response_data = (char *) &pSMBr->hdr.Protocol +
3699 le16_to_cpu(pSMBr->t2.ParameterOffset); 3695 le16_to_cpu(pSMBr->t2.ParameterOffset);
3700 parms = (T2_FNEXT_RSP_PARMS *)response_data; 3696 parms = (T2_FNEXT_RSP_PARMS *)response_data;
@@ -3709,9 +3705,9 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
3709 psrch_inf->ntwrk_buf_start = (char *)pSMB; 3705 psrch_inf->ntwrk_buf_start = (char *)pSMB;
3710 psrch_inf->smallBuf = 0; 3706 psrch_inf->smallBuf = 0;
3711 if (parms->EndofSearch) 3707 if (parms->EndofSearch)
3712 psrch_inf->endOfSearch = TRUE; 3708 psrch_inf->endOfSearch = true;
3713 else 3709 else
3714 psrch_inf->endOfSearch = FALSE; 3710 psrch_inf->endOfSearch = false;
3715 psrch_inf->entries_in_buffer = 3711 psrch_inf->entries_in_buffer =
3716 le16_to_cpu(parms->SearchCount); 3712 le16_to_cpu(parms->SearchCount);
3717 psrch_inf->index_of_last_entry += 3713 psrch_inf->index_of_last_entry +=
@@ -4586,7 +4582,7 @@ QFSPosixRetry:
4586 4582
4587int 4583int
4588CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon, const char *fileName, 4584CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon, const char *fileName,
4589 __u64 size, int SetAllocation, 4585 __u64 size, bool SetAllocation,
4590 const struct nls_table *nls_codepage, int remap) 4586 const struct nls_table *nls_codepage, int remap)
4591{ 4587{
4592 struct smb_com_transaction2_spi_req *pSMB = NULL; 4588 struct smb_com_transaction2_spi_req *pSMB = NULL;
@@ -4675,7 +4671,7 @@ SetEOFRetry:
4675 4671
4676int 4672int
4677CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, 4673CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
4678 __u16 fid, __u32 pid_of_opener, int SetAllocation) 4674 __u16 fid, __u32 pid_of_opener, bool SetAllocation)
4679{ 4675{
4680 struct smb_com_transaction2_sfi_req *pSMB = NULL; 4676 struct smb_com_transaction2_sfi_req *pSMB = NULL;
4681 char *data_offset; 4677 char *data_offset;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index e17106730168..f428bf3bf1a9 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -49,8 +49,6 @@
49#define CIFS_PORT 445 49#define CIFS_PORT 445
50#define RFC1001_PORT 139 50#define RFC1001_PORT 139
51 51
52static DECLARE_COMPLETION(cifsd_complete);
53
54extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, 52extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
55 unsigned char *p24); 53 unsigned char *p24);
56 54
@@ -71,23 +69,23 @@ struct smb_vol {
71 mode_t file_mode; 69 mode_t file_mode;
72 mode_t dir_mode; 70 mode_t dir_mode;
73 unsigned secFlg; 71 unsigned secFlg;
74 unsigned rw:1; 72 bool rw:1;
75 unsigned retry:1; 73 bool retry:1;
76 unsigned intr:1; 74 bool intr:1;
77 unsigned setuids:1; 75 bool setuids:1;
78 unsigned override_uid:1; 76 bool override_uid:1;
79 unsigned override_gid:1; 77 bool override_gid:1;
80 unsigned noperm:1; 78 bool noperm:1;
81 unsigned no_psx_acl:1; /* set if posix acl support should be disabled */ 79 bool no_psx_acl:1; /* set if posix acl support should be disabled */
82 unsigned cifs_acl:1; 80 bool cifs_acl:1;
83 unsigned no_xattr:1; /* set if xattr (EA) support should be disabled*/ 81 bool no_xattr:1; /* set if xattr (EA) support should be disabled*/
84 unsigned server_ino:1; /* use inode numbers from server ie UniqueId */ 82 bool server_ino:1; /* use inode numbers from server ie UniqueId */
85 unsigned direct_io:1; 83 bool direct_io:1;
86 unsigned remap:1; /* set to remap seven reserved chars in filenames */ 84 bool remap:1; /* set to remap seven reserved chars in filenames */
87 unsigned posix_paths:1; /* unset to not ask for posix pathnames. */ 85 bool posix_paths:1; /* unset to not ask for posix pathnames. */
88 unsigned no_linux_ext:1; 86 bool no_linux_ext:1;
89 unsigned sfu_emul:1; 87 bool sfu_emul:1;
90 unsigned nullauth:1; /* attempt to authenticate with null user */ 88 bool nullauth:1; /* attempt to authenticate with null user */
91 unsigned nocase; /* request case insensitive filenames */ 89 unsigned nocase; /* request case insensitive filenames */
92 unsigned nobrl; /* disable sending byte range locks to srv */ 90 unsigned nobrl; /* disable sending byte range locks to srv */
93 unsigned int rsize; 91 unsigned int rsize;
@@ -345,18 +343,16 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
345 struct task_struct *task_to_wake = NULL; 343 struct task_struct *task_to_wake = NULL;
346 struct mid_q_entry *mid_entry; 344 struct mid_q_entry *mid_entry;
347 char temp; 345 char temp;
348 int isLargeBuf = FALSE; 346 bool isLargeBuf = false;
349 int isMultiRsp; 347 bool isMultiRsp;
350 int reconnect; 348 int reconnect;
351 349
352 current->flags |= PF_MEMALLOC; 350 current->flags |= PF_MEMALLOC;
353 server->tsk = current; /* save process info to wake at shutdown */
354 cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current))); 351 cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current)));
355 write_lock(&GlobalSMBSeslock); 352 write_lock(&GlobalSMBSeslock);
356 atomic_inc(&tcpSesAllocCount); 353 atomic_inc(&tcpSesAllocCount);
357 length = tcpSesAllocCount.counter; 354 length = tcpSesAllocCount.counter;
358 write_unlock(&GlobalSMBSeslock); 355 write_unlock(&GlobalSMBSeslock);
359 complete(&cifsd_complete);
360 if (length > 1) 356 if (length > 1)
361 mempool_resize(cifs_req_poolp, length + cifs_min_rcv, 357 mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
362 GFP_KERNEL); 358 GFP_KERNEL);
@@ -390,8 +386,8 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
390 } else /* if existing small buf clear beginning */ 386 } else /* if existing small buf clear beginning */
391 memset(smallbuf, 0, sizeof(struct smb_hdr)); 387 memset(smallbuf, 0, sizeof(struct smb_hdr));
392 388
393 isLargeBuf = FALSE; 389 isLargeBuf = false;
394 isMultiRsp = FALSE; 390 isMultiRsp = false;
395 smb_buffer = smallbuf; 391 smb_buffer = smallbuf;
396 iov.iov_base = smb_buffer; 392 iov.iov_base = smb_buffer;
397 iov.iov_len = 4; 393 iov.iov_len = 4;
@@ -517,7 +513,7 @@ incomplete_rcv:
517 reconnect = 0; 513 reconnect = 0;
518 514
519 if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE - 4) { 515 if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE - 4) {
520 isLargeBuf = TRUE; 516 isLargeBuf = true;
521 memcpy(bigbuf, smallbuf, 4); 517 memcpy(bigbuf, smallbuf, 4);
522 smb_buffer = bigbuf; 518 smb_buffer = bigbuf;
523 } 519 }
@@ -582,16 +578,18 @@ incomplete_rcv:
582 (mid_entry->command == smb_buffer->Command)) { 578 (mid_entry->command == smb_buffer->Command)) {
583 if (check2ndT2(smb_buffer,server->maxBuf) > 0) { 579 if (check2ndT2(smb_buffer,server->maxBuf) > 0) {
584 /* We have a multipart transact2 resp */ 580 /* We have a multipart transact2 resp */
585 isMultiRsp = TRUE; 581 isMultiRsp = true;
586 if (mid_entry->resp_buf) { 582 if (mid_entry->resp_buf) {
587 /* merge response - fix up 1st*/ 583 /* merge response - fix up 1st*/
588 if (coalesce_t2(smb_buffer, 584 if (coalesce_t2(smb_buffer,
589 mid_entry->resp_buf)) { 585 mid_entry->resp_buf)) {
590 mid_entry->multiRsp = 1; 586 mid_entry->multiRsp =
587 true;
591 break; 588 break;
592 } else { 589 } else {
593 /* all parts received */ 590 /* all parts received */
594 mid_entry->multiEnd = 1; 591 mid_entry->multiEnd =
592 true;
595 goto multi_t2_fnd; 593 goto multi_t2_fnd;
596 } 594 }
597 } else { 595 } else {
@@ -603,17 +601,15 @@ incomplete_rcv:
603 /* Have first buffer */ 601 /* Have first buffer */
604 mid_entry->resp_buf = 602 mid_entry->resp_buf =
605 smb_buffer; 603 smb_buffer;
606 mid_entry->largeBuf = 1; 604 mid_entry->largeBuf =
605 true;
607 bigbuf = NULL; 606 bigbuf = NULL;
608 } 607 }
609 } 608 }
610 break; 609 break;
611 } 610 }
612 mid_entry->resp_buf = smb_buffer; 611 mid_entry->resp_buf = smb_buffer;
613 if (isLargeBuf) 612 mid_entry->largeBuf = isLargeBuf;
614 mid_entry->largeBuf = 1;
615 else
616 mid_entry->largeBuf = 0;
617multi_t2_fnd: 613multi_t2_fnd:
618 task_to_wake = mid_entry->tsk; 614 task_to_wake = mid_entry->tsk;
619 mid_entry->midState = MID_RESPONSE_RECEIVED; 615 mid_entry->midState = MID_RESPONSE_RECEIVED;
@@ -638,8 +634,8 @@ multi_t2_fnd:
638 smallbuf = NULL; 634 smallbuf = NULL;
639 } 635 }
640 wake_up_process(task_to_wake); 636 wake_up_process(task_to_wake);
641 } else if ((is_valid_oplock_break(smb_buffer, server) == FALSE) 637 } else if (!is_valid_oplock_break(smb_buffer, server) &&
642 && (isMultiRsp == FALSE)) { 638 !isMultiRsp) {
643 cERROR(1, ("No task to wake, unknown frame received! " 639 cERROR(1, ("No task to wake, unknown frame received! "
644 "NumMids %d", midCount.counter)); 640 "NumMids %d", midCount.counter));
645 cifs_dump_mem("Received Data is: ", (char *)smb_buffer, 641 cifs_dump_mem("Received Data is: ", (char *)smb_buffer,
@@ -654,10 +650,20 @@ multi_t2_fnd:
654 650
655 spin_lock(&GlobalMid_Lock); 651 spin_lock(&GlobalMid_Lock);
656 server->tcpStatus = CifsExiting; 652 server->tcpStatus = CifsExiting;
657 server->tsk = NULL; 653 spin_unlock(&GlobalMid_Lock);
654
655 /* don't exit until kthread_stop is called */
656 set_current_state(TASK_UNINTERRUPTIBLE);
657 while (!kthread_should_stop()) {
658 schedule();
659 set_current_state(TASK_UNINTERRUPTIBLE);
660 }
661 set_current_state(TASK_RUNNING);
662
658 /* check if we have blocked requests that need to free */ 663 /* check if we have blocked requests that need to free */
659 /* Note that cifs_max_pending is normally 50, but 664 /* Note that cifs_max_pending is normally 50, but
660 can be set at module install time to as little as two */ 665 can be set at module install time to as little as two */
666 spin_lock(&GlobalMid_Lock);
661 if (atomic_read(&server->inFlight) >= cifs_max_pending) 667 if (atomic_read(&server->inFlight) >= cifs_max_pending)
662 atomic_set(&server->inFlight, cifs_max_pending - 1); 668 atomic_set(&server->inFlight, cifs_max_pending - 1);
663 /* We do not want to set the max_pending too low or we 669 /* We do not want to set the max_pending too low or we
@@ -825,7 +831,7 @@ cifs_parse_mount_options(char *options, const char *devname,
825 vol->file_mode = (S_IRWXUGO | S_ISGID) & (~S_IXGRP); 831 vol->file_mode = (S_IRWXUGO | S_ISGID) & (~S_IXGRP);
826 832
827 /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */ 833 /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */
828 vol->rw = TRUE; 834 vol->rw = true;
829 /* default is always to request posix paths. */ 835 /* default is always to request posix paths. */
830 vol->posix_paths = 1; 836 vol->posix_paths = 1;
831 837
@@ -1181,7 +1187,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1181 } else if (strnicmp(data, "guest", 5) == 0) { 1187 } else if (strnicmp(data, "guest", 5) == 0) {
1182 /* ignore */ 1188 /* ignore */
1183 } else if (strnicmp(data, "rw", 2) == 0) { 1189 } else if (strnicmp(data, "rw", 2) == 0) {
1184 vol->rw = TRUE; 1190 vol->rw = true;
1185 } else if ((strnicmp(data, "suid", 4) == 0) || 1191 } else if ((strnicmp(data, "suid", 4) == 0) ||
1186 (strnicmp(data, "nosuid", 6) == 0) || 1192 (strnicmp(data, "nosuid", 6) == 0) ||
1187 (strnicmp(data, "exec", 4) == 0) || 1193 (strnicmp(data, "exec", 4) == 0) ||
@@ -1197,7 +1203,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1197 is ok to just ignore them */ 1203 is ok to just ignore them */
1198 continue; 1204 continue;
1199 } else if (strnicmp(data, "ro", 2) == 0) { 1205 } else if (strnicmp(data, "ro", 2) == 0) {
1200 vol->rw = FALSE; 1206 vol->rw = false;
1201 } else if (strnicmp(data, "hard", 4) == 0) { 1207 } else if (strnicmp(data, "hard", 4) == 0) {
1202 vol->retry = 1; 1208 vol->retry = 1;
1203 } else if (strnicmp(data, "soft", 4) == 0) { 1209 } else if (strnicmp(data, "soft", 4) == 0) {
@@ -1305,6 +1311,9 @@ cifs_parse_mount_options(char *options, const char *devname,
1305 "begin with // or \\\\ \n"); 1311 "begin with // or \\\\ \n");
1306 return 1; 1312 return 1;
1307 } 1313 }
1314 value = strpbrk(vol->UNC+2, "/\\");
1315 if (value)
1316 *value = '\\';
1308 } else { 1317 } else {
1309 printk(KERN_WARNING "CIFS: UNC name too long\n"); 1318 printk(KERN_WARNING "CIFS: UNC name too long\n");
1310 return 1; 1319 return 1;
@@ -1318,42 +1327,43 @@ cifs_parse_mount_options(char *options, const char *devname,
1318 1327
1319static struct cifsSesInfo * 1328static struct cifsSesInfo *
1320cifs_find_tcp_session(struct in_addr *target_ip_addr, 1329cifs_find_tcp_session(struct in_addr *target_ip_addr,
1321 struct in6_addr *target_ip6_addr, 1330 struct in6_addr *target_ip6_addr,
1322 char *userName, struct TCP_Server_Info **psrvTcp) 1331 char *userName, struct TCP_Server_Info **psrvTcp)
1323{ 1332{
1324 struct list_head *tmp; 1333 struct list_head *tmp;
1325 struct cifsSesInfo *ses; 1334 struct cifsSesInfo *ses;
1335
1326 *psrvTcp = NULL; 1336 *psrvTcp = NULL;
1327 read_lock(&GlobalSMBSeslock);
1328 1337
1338 read_lock(&GlobalSMBSeslock);
1329 list_for_each(tmp, &GlobalSMBSessionList) { 1339 list_for_each(tmp, &GlobalSMBSessionList) {
1330 ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); 1340 ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList);
1331 if (ses->server) { 1341 if (!ses->server)
1332 if ((target_ip_addr && 1342 continue;
1333 (ses->server->addr.sockAddr.sin_addr.s_addr 1343
1334 == target_ip_addr->s_addr)) || (target_ip6_addr 1344 if (target_ip_addr &&
1335 && memcmp(&ses->server->addr.sockAddr6.sin6_addr, 1345 ses->server->addr.sockAddr.sin_addr.s_addr != target_ip_addr->s_addr)
1336 target_ip6_addr, sizeof(*target_ip6_addr)))) { 1346 continue;
1337 /* BB lock server and tcp session and increment 1347 else if (target_ip6_addr &&
1338 use count here?? */ 1348 memcmp(&ses->server->addr.sockAddr6.sin6_addr,
1339 1349 target_ip6_addr, sizeof(*target_ip6_addr)))
1340 /* found a match on the TCP session */ 1350 continue;
1341 *psrvTcp = ses->server; 1351 /* BB lock server and tcp session; increment use count here?? */
1342 1352
1343 /* BB check if reconnection needed */ 1353 /* found a match on the TCP session */
1344 if (strncmp 1354 *psrvTcp = ses->server;
1345 (ses->userName, userName, 1355
1346 MAX_USERNAME_SIZE) == 0){ 1356 /* BB check if reconnection needed */
1347 read_unlock(&GlobalSMBSeslock); 1357 if (strncmp(ses->userName, userName, MAX_USERNAME_SIZE) == 0) {
1348 /* Found exact match on both TCP and 1358 read_unlock(&GlobalSMBSeslock);
1349 SMB sessions */ 1359 /* Found exact match on both TCP and
1350 return ses; 1360 SMB sessions */
1351 } 1361 return ses;
1352 }
1353 } 1362 }
1354 /* else tcp and smb sessions need reconnection */ 1363 /* else tcp and smb sessions need reconnection */
1355 } 1364 }
1356 read_unlock(&GlobalSMBSeslock); 1365 read_unlock(&GlobalSMBSeslock);
1366
1357 return NULL; 1367 return NULL;
1358} 1368}
1359 1369
@@ -1362,45 +1372,43 @@ find_unc(__be32 new_target_ip_addr, char *uncName, char *userName)
1362{ 1372{
1363 struct list_head *tmp; 1373 struct list_head *tmp;
1364 struct cifsTconInfo *tcon; 1374 struct cifsTconInfo *tcon;
1375 __be32 old_ip;
1365 1376
1366 read_lock(&GlobalSMBSeslock); 1377 read_lock(&GlobalSMBSeslock);
1378
1367 list_for_each(tmp, &GlobalTreeConnectionList) { 1379 list_for_each(tmp, &GlobalTreeConnectionList) {
1368 cFYI(1, ("Next tcon")); 1380 cFYI(1, ("Next tcon"));
1369 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); 1381 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
1370 if (tcon->ses) { 1382 if (!tcon->ses || !tcon->ses->server)
1371 if (tcon->ses->server) { 1383 continue;
1372 cFYI(1, 1384
1373 ("old ip addr: %x == new ip %x ?", 1385 old_ip = tcon->ses->server->addr.sockAddr.sin_addr.s_addr;
1374 tcon->ses->server->addr.sockAddr.sin_addr. 1386 cFYI(1, ("old ip addr: %x == new ip %x ?",
1375 s_addr, new_target_ip_addr)); 1387 old_ip, new_target_ip_addr));
1376 if (tcon->ses->server->addr.sockAddr.sin_addr. 1388
1377 s_addr == new_target_ip_addr) { 1389 if (old_ip != new_target_ip_addr)
1378 /* BB lock tcon, server and tcp session and increment use count here? */ 1390 continue;
1379 /* found a match on the TCP session */ 1391
1380 /* BB check if reconnection needed */ 1392 /* BB lock tcon, server, tcp session and increment use count? */
1381 cFYI(1, 1393 /* found a match on the TCP session */
1382 ("IP match, old UNC: %s new: %s", 1394 /* BB check if reconnection needed */
1383 tcon->treeName, uncName)); 1395 cFYI(1, ("IP match, old UNC: %s new: %s",
1384 if (strncmp 1396 tcon->treeName, uncName));
1385 (tcon->treeName, uncName, 1397
1386 MAX_TREE_SIZE) == 0) { 1398 if (strncmp(tcon->treeName, uncName, MAX_TREE_SIZE))
1387 cFYI(1, 1399 continue;
1388 ("and old usr: %s new: %s", 1400
1389 tcon->treeName, uncName)); 1401 cFYI(1, ("and old usr: %s new: %s",
1390 if (strncmp 1402 tcon->treeName, uncName));
1391 (tcon->ses->userName, 1403
1392 userName, 1404 if (strncmp(tcon->ses->userName, userName, MAX_USERNAME_SIZE))
1393 MAX_USERNAME_SIZE) == 0) { 1405 continue;
1394 read_unlock(&GlobalSMBSeslock); 1406
1395 /* matched smb session 1407 /* matched smb session (user name) */
1396 (user name */ 1408 read_unlock(&GlobalSMBSeslock);
1397 return tcon; 1409 return tcon;
1398 }
1399 }
1400 }
1401 }
1402 }
1403 } 1410 }
1411
1404 read_unlock(&GlobalSMBSeslock); 1412 read_unlock(&GlobalSMBSeslock);
1405 return NULL; 1413 return NULL;
1406} 1414}
@@ -1982,7 +1990,6 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1982 kfree(srvTcp->hostname); 1990 kfree(srvTcp->hostname);
1983 goto out; 1991 goto out;
1984 } 1992 }
1985 wait_for_completion(&cifsd_complete);
1986 rc = 0; 1993 rc = 0;
1987 memcpy(srvTcp->workstation_RFC1001_name, 1994 memcpy(srvTcp->workstation_RFC1001_name,
1988 volume_info.source_rfc1001_name, 16); 1995 volume_info.source_rfc1001_name, 16);
@@ -2189,15 +2196,12 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2189 srvTcp->tcpStatus = CifsExiting; 2196 srvTcp->tcpStatus = CifsExiting;
2190 spin_unlock(&GlobalMid_Lock); 2197 spin_unlock(&GlobalMid_Lock);
2191 if (srvTcp->tsk) { 2198 if (srvTcp->tsk) {
2192 struct task_struct *tsk;
2193 /* If we could verify that kthread_stop would 2199 /* If we could verify that kthread_stop would
2194 always wake up processes blocked in 2200 always wake up processes blocked in
2195 tcp in recv_mesg then we could remove the 2201 tcp in recv_mesg then we could remove the
2196 send_sig call */ 2202 send_sig call */
2197 force_sig(SIGKILL, srvTcp->tsk); 2203 force_sig(SIGKILL, srvTcp->tsk);
2198 tsk = srvTcp->tsk; 2204 kthread_stop(srvTcp->tsk);
2199 if (tsk)
2200 kthread_stop(tsk);
2201 } 2205 }
2202 } 2206 }
2203 /* If find_unc succeeded then rc == 0 so we can not end */ 2207 /* If find_unc succeeded then rc == 0 so we can not end */
@@ -2213,23 +2217,17 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2213 if ((temp_rc == -ESHUTDOWN) && 2217 if ((temp_rc == -ESHUTDOWN) &&
2214 (pSesInfo->server) && 2218 (pSesInfo->server) &&
2215 (pSesInfo->server->tsk)) { 2219 (pSesInfo->server->tsk)) {
2216 struct task_struct *tsk;
2217 force_sig(SIGKILL, 2220 force_sig(SIGKILL,
2218 pSesInfo->server->tsk); 2221 pSesInfo->server->tsk);
2219 tsk = pSesInfo->server->tsk; 2222 kthread_stop(pSesInfo->server->tsk);
2220 if (tsk)
2221 kthread_stop(tsk);
2222 } 2223 }
2223 } else { 2224 } else {
2224 cFYI(1, ("No session or bad tcon")); 2225 cFYI(1, ("No session or bad tcon"));
2225 if ((pSesInfo->server) && 2226 if ((pSesInfo->server) &&
2226 (pSesInfo->server->tsk)) { 2227 (pSesInfo->server->tsk)) {
2227 struct task_struct *tsk;
2228 force_sig(SIGKILL, 2228 force_sig(SIGKILL,
2229 pSesInfo->server->tsk); 2229 pSesInfo->server->tsk);
2230 tsk = pSesInfo->server->tsk; 2230 kthread_stop(pSesInfo->server->tsk);
2231 if (tsk)
2232 kthread_stop(tsk);
2233 } 2231 }
2234 } 2232 }
2235 sesInfoFree(pSesInfo); 2233 sesInfoFree(pSesInfo);
@@ -2602,7 +2600,7 @@ sesssetup_nomem: /* do not return an error on nomem for the info strings,
2602 2600
2603static int 2601static int
2604CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, 2602CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2605 struct cifsSesInfo *ses, int *pNTLMv2_flag, 2603 struct cifsSesInfo *ses, bool *pNTLMv2_flag,
2606 const struct nls_table *nls_codepage) 2604 const struct nls_table *nls_codepage)
2607{ 2605{
2608 struct smb_hdr *smb_buffer; 2606 struct smb_hdr *smb_buffer;
@@ -2625,7 +2623,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2625 if (ses == NULL) 2623 if (ses == NULL)
2626 return -EINVAL; 2624 return -EINVAL;
2627 domain = ses->domainName; 2625 domain = ses->domainName;
2628 *pNTLMv2_flag = FALSE; 2626 *pNTLMv2_flag = false;
2629 smb_buffer = cifs_buf_get(); 2627 smb_buffer = cifs_buf_get();
2630 if (smb_buffer == NULL) { 2628 if (smb_buffer == NULL) {
2631 return -ENOMEM; 2629 return -ENOMEM;
@@ -2778,7 +2776,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2778 CIFS_CRYPTO_KEY_SIZE); 2776 CIFS_CRYPTO_KEY_SIZE);
2779 if (SecurityBlob2->NegotiateFlags & 2777 if (SecurityBlob2->NegotiateFlags &
2780 cpu_to_le32(NTLMSSP_NEGOTIATE_NTLMV2)) 2778 cpu_to_le32(NTLMSSP_NEGOTIATE_NTLMV2))
2781 *pNTLMv2_flag = TRUE; 2779 *pNTLMv2_flag = true;
2782 2780
2783 if ((SecurityBlob2->NegotiateFlags & 2781 if ((SecurityBlob2->NegotiateFlags &
2784 cpu_to_le32(NTLMSSP_NEGOTIATE_ALWAYS_SIGN)) 2782 cpu_to_le32(NTLMSSP_NEGOTIATE_ALWAYS_SIGN))
@@ -2939,7 +2937,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2939} 2937}
2940static int 2938static int
2941CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, 2939CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2942 char *ntlm_session_key, int ntlmv2_flag, 2940 char *ntlm_session_key, bool ntlmv2_flag,
2943 const struct nls_table *nls_codepage) 2941 const struct nls_table *nls_codepage)
2944{ 2942{
2945 struct smb_hdr *smb_buffer; 2943 struct smb_hdr *smb_buffer;
@@ -3556,8 +3554,6 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
3556 cifs_sb->prepath = NULL; 3554 cifs_sb->prepath = NULL;
3557 kfree(tmp); 3555 kfree(tmp);
3558 if (ses) 3556 if (ses)
3559 schedule_timeout_interruptible(msecs_to_jiffies(500));
3560 if (ses)
3561 sesInfoFree(ses); 3557 sesInfoFree(ses);
3562 3558
3563 FreeXid(xid); 3559 FreeXid(xid);
@@ -3569,7 +3565,7 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3569{ 3565{
3570 int rc = 0; 3566 int rc = 0;
3571 char ntlm_session_key[CIFS_SESS_KEY_SIZE]; 3567 char ntlm_session_key[CIFS_SESS_KEY_SIZE];
3572 int ntlmv2_flag = FALSE; 3568 bool ntlmv2_flag = false;
3573 int first_time = 0; 3569 int first_time = 0;
3574 3570
3575 /* what if server changes its buffer size after dropping the session? */ 3571 /* what if server changes its buffer size after dropping the session? */
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 0f5c62ba4038..e4e0078a0526 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -119,6 +119,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
119{ 119{
120 int rc = -ENOENT; 120 int rc = -ENOENT;
121 int xid; 121 int xid;
122 int create_options = CREATE_NOT_DIR;
122 int oplock = 0; 123 int oplock = 0;
123 int desiredAccess = GENERIC_READ | GENERIC_WRITE; 124 int desiredAccess = GENERIC_READ | GENERIC_WRITE;
124 __u16 fileHandle; 125 __u16 fileHandle;
@@ -130,7 +131,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
130 struct cifsFileInfo *pCifsFile = NULL; 131 struct cifsFileInfo *pCifsFile = NULL;
131 struct cifsInodeInfo *pCifsInode; 132 struct cifsInodeInfo *pCifsInode;
132 int disposition = FILE_OVERWRITE_IF; 133 int disposition = FILE_OVERWRITE_IF;
133 int write_only = FALSE; 134 bool write_only = false;
134 135
135 xid = GetXid(); 136 xid = GetXid();
136 137
@@ -152,7 +153,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
152 if (oflags & FMODE_WRITE) { 153 if (oflags & FMODE_WRITE) {
153 desiredAccess |= GENERIC_WRITE; 154 desiredAccess |= GENERIC_WRITE;
154 if (!(oflags & FMODE_READ)) 155 if (!(oflags & FMODE_READ))
155 write_only = TRUE; 156 write_only = true;
156 } 157 }
157 158
158 if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) 159 if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
@@ -176,9 +177,19 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
176 FreeXid(xid); 177 FreeXid(xid);
177 return -ENOMEM; 178 return -ENOMEM;
178 } 179 }
180
181 mode &= ~current->fs->umask;
182
183 /*
184 * if we're not using unix extensions, see if we need to set
185 * ATTR_READONLY on the create call
186 */
187 if (!pTcon->unix_ext && (mode & S_IWUGO) == 0)
188 create_options |= CREATE_OPTION_READONLY;
189
179 if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS) 190 if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
180 rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, 191 rc = CIFSSMBOpen(xid, pTcon, full_path, disposition,
181 desiredAccess, CREATE_NOT_DIR, 192 desiredAccess, create_options,
182 &fileHandle, &oplock, buf, cifs_sb->local_nls, 193 &fileHandle, &oplock, buf, cifs_sb->local_nls,
183 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 194 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
184 else 195 else
@@ -187,7 +198,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
187 if (rc == -EIO) { 198 if (rc == -EIO) {
188 /* old server, retry the open legacy style */ 199 /* old server, retry the open legacy style */
189 rc = SMBLegacyOpen(xid, pTcon, full_path, disposition, 200 rc = SMBLegacyOpen(xid, pTcon, full_path, disposition,
190 desiredAccess, CREATE_NOT_DIR, 201 desiredAccess, create_options,
191 &fileHandle, &oplock, buf, cifs_sb->local_nls, 202 &fileHandle, &oplock, buf, cifs_sb->local_nls,
192 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 203 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
193 } 204 }
@@ -197,7 +208,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
197 /* If Open reported that we actually created a file 208 /* If Open reported that we actually created a file
198 then we now have to set the mode if possible */ 209 then we now have to set the mode if possible */
199 if ((pTcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) { 210 if ((pTcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) {
200 mode &= ~current->fs->umask;
201 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 211 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
202 CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, 212 CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode,
203 (__u64)current->fsuid, 213 (__u64)current->fsuid,
@@ -254,7 +264,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
254 d_instantiate(direntry, newinode); 264 d_instantiate(direntry, newinode);
255 } 265 }
256 if ((nd == NULL /* nfsd case - nfs srv does not set nd */) || 266 if ((nd == NULL /* nfsd case - nfs srv does not set nd */) ||
257 ((nd->flags & LOOKUP_OPEN) == FALSE)) { 267 (!(nd->flags & LOOKUP_OPEN))) {
258 /* mknod case - do not leave file open */ 268 /* mknod case - do not leave file open */
259 CIFSSMBClose(xid, pTcon, fileHandle); 269 CIFSSMBClose(xid, pTcon, fileHandle);
260 } else if (newinode) { 270 } else if (newinode) {
@@ -266,8 +276,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
266 pCifsFile->netfid = fileHandle; 276 pCifsFile->netfid = fileHandle;
267 pCifsFile->pid = current->tgid; 277 pCifsFile->pid = current->tgid;
268 pCifsFile->pInode = newinode; 278 pCifsFile->pInode = newinode;
269 pCifsFile->invalidHandle = FALSE; 279 pCifsFile->invalidHandle = false;
270 pCifsFile->closePend = FALSE; 280 pCifsFile->closePend = false;
271 init_MUTEX(&pCifsFile->fh_sem); 281 init_MUTEX(&pCifsFile->fh_sem);
272 mutex_init(&pCifsFile->lock_mutex); 282 mutex_init(&pCifsFile->lock_mutex);
273 INIT_LIST_HEAD(&pCifsFile->llist); 283 INIT_LIST_HEAD(&pCifsFile->llist);
@@ -280,7 +290,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
280 pCifsInode = CIFS_I(newinode); 290 pCifsInode = CIFS_I(newinode);
281 if (pCifsInode) { 291 if (pCifsInode) {
282 /* if readable file instance put first in list*/ 292 /* if readable file instance put first in list*/
283 if (write_only == TRUE) { 293 if (write_only) {
284 list_add_tail(&pCifsFile->flist, 294 list_add_tail(&pCifsFile->flist,
285 &pCifsInode->openFileList); 295 &pCifsInode->openFileList);
286 } else { 296 } else {
@@ -288,12 +298,12 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
288 &pCifsInode->openFileList); 298 &pCifsInode->openFileList);
289 } 299 }
290 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { 300 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
291 pCifsInode->clientCanCacheAll = TRUE; 301 pCifsInode->clientCanCacheAll = true;
292 pCifsInode->clientCanCacheRead = TRUE; 302 pCifsInode->clientCanCacheRead = true;
293 cFYI(1, ("Exclusive Oplock inode %p", 303 cFYI(1, ("Exclusive Oplock inode %p",
294 newinode)); 304 newinode));
295 } else if ((oplock & 0xF) == OPLOCK_READ) 305 } else if ((oplock & 0xF) == OPLOCK_READ)
296 pCifsInode->clientCanCacheRead = TRUE; 306 pCifsInode->clientCanCacheRead = true;
297 } 307 }
298 write_unlock(&GlobalSMBSeslock); 308 write_unlock(&GlobalSMBSeslock);
299 } 309 }
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 7cc86c418182..939e256f8497 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -55,6 +55,32 @@ struct key_type key_type_dns_resolver = {
55 .match = user_match, 55 .match = user_match,
56}; 56};
57 57
58/* Checks if supplied name is IP address
59 * returns:
60 * 1 - name is IP
61 * 0 - name is not IP
62 */
63static int is_ip(const char *name)
64{
65 int rc;
66 struct sockaddr_in sin_server;
67 struct sockaddr_in6 sin_server6;
68
69 rc = cifs_inet_pton(AF_INET, name,
70 &sin_server.sin_addr.s_addr);
71
72 if (rc <= 0) {
73 /* not ipv4 address, try ipv6 */
74 rc = cifs_inet_pton(AF_INET6, name,
75 &sin_server6.sin6_addr.in6_u);
76 if (rc > 0)
77 return 1;
78 } else {
79 return 1;
80 }
81 /* we failed translating address */
82 return 0;
83}
58 84
59/* Resolves server name to ip address. 85/* Resolves server name to ip address.
60 * input: 86 * input:
@@ -67,8 +93,9 @@ int
67dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) 93dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
68{ 94{
69 int rc = -EAGAIN; 95 int rc = -EAGAIN;
70 struct key *rkey; 96 struct key *rkey = ERR_PTR(-EAGAIN);
71 char *name; 97 char *name;
98 char *data = NULL;
72 int len; 99 int len;
73 100
74 if (!ip_addr || !unc) 101 if (!ip_addr || !unc)
@@ -97,26 +124,41 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
97 memcpy(name, unc+2, len); 124 memcpy(name, unc+2, len);
98 name[len] = 0; 125 name[len] = 0;
99 126
127 if (is_ip(name)) {
128 cFYI(1, ("%s: it is IP, skipping dns upcall: %s",
129 __func__, name));
130 data = name;
131 goto skip_upcall;
132 }
133
100 rkey = request_key(&key_type_dns_resolver, name, ""); 134 rkey = request_key(&key_type_dns_resolver, name, "");
101 if (!IS_ERR(rkey)) { 135 if (!IS_ERR(rkey)) {
102 len = strlen(rkey->payload.data); 136 data = rkey->payload.data;
103 *ip_addr = kmalloc(len+1, GFP_KERNEL); 137 cFYI(1, ("%s: resolved: %s to %s", __func__,
104 if (*ip_addr) {
105 memcpy(*ip_addr, rkey->payload.data, len);
106 (*ip_addr)[len] = '\0';
107 cFYI(1, ("%s: resolved: %s to %s", __func__,
108 rkey->description, 138 rkey->description,
109 *ip_addr 139 *ip_addr
110 )); 140 ));
141 } else {
142 cERROR(1, ("%s: unable to resolve: %s", __func__, name));
143 goto out;
144 }
145
146skip_upcall:
147 if (data) {
148 len = strlen(data);
149 *ip_addr = kmalloc(len+1, GFP_KERNEL);
150 if (*ip_addr) {
151 memcpy(*ip_addr, data, len);
152 (*ip_addr)[len] = '\0';
111 rc = 0; 153 rc = 0;
112 } else { 154 } else {
113 rc = -ENOMEM; 155 rc = -ENOMEM;
114 } 156 }
115 key_put(rkey); 157 if (!IS_ERR(rkey))
116 } else { 158 key_put(rkey);
117 cERROR(1, ("%s: unable to resolve: %s", __func__, name));
118 } 159 }
119 160
161out:
120 kfree(name); 162 kfree(name);
121 return rc; 163 return rc;
122} 164}
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
index 7d1d5aa4c430..5a57581eb4b2 100644
--- a/fs/cifs/fcntl.c
+++ b/fs/cifs/fcntl.c
@@ -68,7 +68,7 @@ int cifs_dir_notify(struct file *file, unsigned long arg)
68{ 68{
69 int xid; 69 int xid;
70 int rc = -EINVAL; 70 int rc = -EINVAL;
71 int oplock = FALSE; 71 int oplock = 0;
72 struct cifs_sb_info *cifs_sb; 72 struct cifs_sb_info *cifs_sb;
73 struct cifsTconInfo *pTcon; 73 struct cifsTconInfo *pTcon;
74 char *full_path = NULL; 74 char *full_path = NULL;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 40b690073fc1..31a0a33b9d95 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -51,8 +51,8 @@ static inline struct cifsFileInfo *cifs_init_private(
51 INIT_LIST_HEAD(&private_data->llist); 51 INIT_LIST_HEAD(&private_data->llist);
52 private_data->pfile = file; /* needed for writepage */ 52 private_data->pfile = file; /* needed for writepage */
53 private_data->pInode = inode; 53 private_data->pInode = inode;
54 private_data->invalidHandle = FALSE; 54 private_data->invalidHandle = false;
55 private_data->closePend = FALSE; 55 private_data->closePend = false;
56 /* we have to track num writers to the inode, since writepages 56 /* we have to track num writers to the inode, since writepages
57 does not tell us which handle the write is for so there can 57 does not tell us which handle the write is for so there can
58 be a close (overlapping with write) of the filehandle that 58 be a close (overlapping with write) of the filehandle that
@@ -148,12 +148,12 @@ client_can_cache:
148 full_path, buf, inode->i_sb, xid, NULL); 148 full_path, buf, inode->i_sb, xid, NULL);
149 149
150 if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) { 150 if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
151 pCifsInode->clientCanCacheAll = TRUE; 151 pCifsInode->clientCanCacheAll = true;
152 pCifsInode->clientCanCacheRead = TRUE; 152 pCifsInode->clientCanCacheRead = true;
153 cFYI(1, ("Exclusive Oplock granted on inode %p", 153 cFYI(1, ("Exclusive Oplock granted on inode %p",
154 file->f_path.dentry->d_inode)); 154 file->f_path.dentry->d_inode));
155 } else if ((*oplock & 0xF) == OPLOCK_READ) 155 } else if ((*oplock & 0xF) == OPLOCK_READ)
156 pCifsInode->clientCanCacheRead = TRUE; 156 pCifsInode->clientCanCacheRead = true;
157 157
158 return rc; 158 return rc;
159} 159}
@@ -247,7 +247,7 @@ int cifs_open(struct inode *inode, struct file *file)
247 if (oplockEnabled) 247 if (oplockEnabled)
248 oplock = REQ_OPLOCK; 248 oplock = REQ_OPLOCK;
249 else 249 else
250 oplock = FALSE; 250 oplock = 0;
251 251
252 /* BB pass O_SYNC flag through on file attributes .. BB */ 252 /* BB pass O_SYNC flag through on file attributes .. BB */
253 253
@@ -339,7 +339,7 @@ static int cifs_relock_file(struct cifsFileInfo *cifsFile)
339 return rc; 339 return rc;
340} 340}
341 341
342static int cifs_reopen_file(struct file *file, int can_flush) 342static int cifs_reopen_file(struct file *file, bool can_flush)
343{ 343{
344 int rc = -EACCES; 344 int rc = -EACCES;
345 int xid, oplock; 345 int xid, oplock;
@@ -360,7 +360,7 @@ static int cifs_reopen_file(struct file *file, int can_flush)
360 360
361 xid = GetXid(); 361 xid = GetXid();
362 down(&pCifsFile->fh_sem); 362 down(&pCifsFile->fh_sem);
363 if (pCifsFile->invalidHandle == FALSE) { 363 if (!pCifsFile->invalidHandle) {
364 up(&pCifsFile->fh_sem); 364 up(&pCifsFile->fh_sem);
365 FreeXid(xid); 365 FreeXid(xid);
366 return 0; 366 return 0;
@@ -404,7 +404,7 @@ reopen_error_exit:
404 if (oplockEnabled) 404 if (oplockEnabled)
405 oplock = REQ_OPLOCK; 405 oplock = REQ_OPLOCK;
406 else 406 else
407 oplock = FALSE; 407 oplock = 0;
408 408
409 /* Can not refresh inode by passing in file_info buf to be returned 409 /* Can not refresh inode by passing in file_info buf to be returned
410 by SMBOpen and then calling get_inode_info with returned buf 410 by SMBOpen and then calling get_inode_info with returned buf
@@ -422,7 +422,7 @@ reopen_error_exit:
422 cFYI(1, ("oplock: %d", oplock)); 422 cFYI(1, ("oplock: %d", oplock));
423 } else { 423 } else {
424 pCifsFile->netfid = netfid; 424 pCifsFile->netfid = netfid;
425 pCifsFile->invalidHandle = FALSE; 425 pCifsFile->invalidHandle = false;
426 up(&pCifsFile->fh_sem); 426 up(&pCifsFile->fh_sem);
427 pCifsInode = CIFS_I(inode); 427 pCifsInode = CIFS_I(inode);
428 if (pCifsInode) { 428 if (pCifsInode) {
@@ -432,8 +432,8 @@ reopen_error_exit:
432 CIFS_I(inode)->write_behind_rc = rc; 432 CIFS_I(inode)->write_behind_rc = rc;
433 /* temporarily disable caching while we 433 /* temporarily disable caching while we
434 go to server to get inode info */ 434 go to server to get inode info */
435 pCifsInode->clientCanCacheAll = FALSE; 435 pCifsInode->clientCanCacheAll = false;
436 pCifsInode->clientCanCacheRead = FALSE; 436 pCifsInode->clientCanCacheRead = false;
437 if (pTcon->unix_ext) 437 if (pTcon->unix_ext)
438 rc = cifs_get_inode_info_unix(&inode, 438 rc = cifs_get_inode_info_unix(&inode,
439 full_path, inode->i_sb, xid); 439 full_path, inode->i_sb, xid);
@@ -448,16 +448,16 @@ reopen_error_exit:
448 we can not go to the server to get the new inod 448 we can not go to the server to get the new inod
449 info */ 449 info */
450 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { 450 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
451 pCifsInode->clientCanCacheAll = TRUE; 451 pCifsInode->clientCanCacheAll = true;
452 pCifsInode->clientCanCacheRead = TRUE; 452 pCifsInode->clientCanCacheRead = true;
453 cFYI(1, ("Exclusive Oplock granted on inode %p", 453 cFYI(1, ("Exclusive Oplock granted on inode %p",
454 file->f_path.dentry->d_inode)); 454 file->f_path.dentry->d_inode));
455 } else if ((oplock & 0xF) == OPLOCK_READ) { 455 } else if ((oplock & 0xF) == OPLOCK_READ) {
456 pCifsInode->clientCanCacheRead = TRUE; 456 pCifsInode->clientCanCacheRead = true;
457 pCifsInode->clientCanCacheAll = FALSE; 457 pCifsInode->clientCanCacheAll = false;
458 } else { 458 } else {
459 pCifsInode->clientCanCacheRead = FALSE; 459 pCifsInode->clientCanCacheRead = false;
460 pCifsInode->clientCanCacheAll = FALSE; 460 pCifsInode->clientCanCacheAll = false;
461 } 461 }
462 cifs_relock_file(pCifsFile); 462 cifs_relock_file(pCifsFile);
463 } 463 }
@@ -484,7 +484,7 @@ int cifs_close(struct inode *inode, struct file *file)
484 if (pSMBFile) { 484 if (pSMBFile) {
485 struct cifsLockInfo *li, *tmp; 485 struct cifsLockInfo *li, *tmp;
486 486
487 pSMBFile->closePend = TRUE; 487 pSMBFile->closePend = true;
488 if (pTcon) { 488 if (pTcon) {
489 /* no sense reconnecting to close a file that is 489 /* no sense reconnecting to close a file that is
490 already closed */ 490 already closed */
@@ -553,8 +553,8 @@ int cifs_close(struct inode *inode, struct file *file)
553 cFYI(1, ("closing last open instance for inode %p", inode)); 553 cFYI(1, ("closing last open instance for inode %p", inode));
554 /* if the file is not open we do not know if we can cache info 554 /* if the file is not open we do not know if we can cache info
555 on this inode, much less write behind and read ahead */ 555 on this inode, much less write behind and read ahead */
556 CIFS_I(inode)->clientCanCacheRead = FALSE; 556 CIFS_I(inode)->clientCanCacheRead = false;
557 CIFS_I(inode)->clientCanCacheAll = FALSE; 557 CIFS_I(inode)->clientCanCacheAll = false;
558 } 558 }
559 read_unlock(&GlobalSMBSeslock); 559 read_unlock(&GlobalSMBSeslock);
560 if ((rc == 0) && CIFS_I(inode)->write_behind_rc) 560 if ((rc == 0) && CIFS_I(inode)->write_behind_rc)
@@ -583,9 +583,9 @@ int cifs_closedir(struct inode *inode, struct file *file)
583 pTcon = cifs_sb->tcon; 583 pTcon = cifs_sb->tcon;
584 584
585 cFYI(1, ("Freeing private data in close dir")); 585 cFYI(1, ("Freeing private data in close dir"));
586 if ((pCFileStruct->srch_inf.endOfSearch == FALSE) && 586 if (!pCFileStruct->srch_inf.endOfSearch &&
587 (pCFileStruct->invalidHandle == FALSE)) { 587 !pCFileStruct->invalidHandle) {
588 pCFileStruct->invalidHandle = TRUE; 588 pCFileStruct->invalidHandle = true;
589 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid); 589 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
590 cFYI(1, ("Closing uncompleted readdir with rc %d", 590 cFYI(1, ("Closing uncompleted readdir with rc %d",
591 rc)); 591 rc));
@@ -637,12 +637,12 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
637 __u32 numLock = 0; 637 __u32 numLock = 0;
638 __u32 numUnlock = 0; 638 __u32 numUnlock = 0;
639 __u64 length; 639 __u64 length;
640 int wait_flag = FALSE; 640 bool wait_flag = false;
641 struct cifs_sb_info *cifs_sb; 641 struct cifs_sb_info *cifs_sb;
642 struct cifsTconInfo *pTcon; 642 struct cifsTconInfo *pTcon;
643 __u16 netfid; 643 __u16 netfid;
644 __u8 lockType = LOCKING_ANDX_LARGE_FILES; 644 __u8 lockType = LOCKING_ANDX_LARGE_FILES;
645 int posix_locking; 645 bool posix_locking;
646 646
647 length = 1 + pfLock->fl_end - pfLock->fl_start; 647 length = 1 + pfLock->fl_end - pfLock->fl_start;
648 rc = -EACCES; 648 rc = -EACCES;
@@ -659,7 +659,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
659 cFYI(1, ("Flock")); 659 cFYI(1, ("Flock"));
660 if (pfLock->fl_flags & FL_SLEEP) { 660 if (pfLock->fl_flags & FL_SLEEP) {
661 cFYI(1, ("Blocking lock")); 661 cFYI(1, ("Blocking lock"));
662 wait_flag = TRUE; 662 wait_flag = true;
663 } 663 }
664 if (pfLock->fl_flags & FL_ACCESS) 664 if (pfLock->fl_flags & FL_ACCESS)
665 cFYI(1, ("Process suspended by mandatory locking - " 665 cFYI(1, ("Process suspended by mandatory locking - "
@@ -794,7 +794,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
794 stored_rc = CIFSSMBLock(xid, pTcon, 794 stored_rc = CIFSSMBLock(xid, pTcon,
795 netfid, 795 netfid,
796 li->length, li->offset, 796 li->length, li->offset,
797 1, 0, li->type, FALSE); 797 1, 0, li->type, false);
798 if (stored_rc) 798 if (stored_rc)
799 rc = stored_rc; 799 rc = stored_rc;
800 800
@@ -866,7 +866,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
866 filemap_fdatawait from here so tell 866 filemap_fdatawait from here so tell
867 reopen_file not to flush data to server 867 reopen_file not to flush data to server
868 now */ 868 now */
869 rc = cifs_reopen_file(file, FALSE); 869 rc = cifs_reopen_file(file, false);
870 if (rc != 0) 870 if (rc != 0)
871 break; 871 break;
872 } 872 }
@@ -966,7 +966,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
966 filemap_fdatawait from here so tell 966 filemap_fdatawait from here so tell
967 reopen_file not to flush data to 967 reopen_file not to flush data to
968 server now */ 968 server now */
969 rc = cifs_reopen_file(file, FALSE); 969 rc = cifs_reopen_file(file, false);
970 if (rc != 0) 970 if (rc != 0)
971 break; 971 break;
972 } 972 }
@@ -1093,7 +1093,7 @@ refind_writable:
1093 1093
1094 read_unlock(&GlobalSMBSeslock); 1094 read_unlock(&GlobalSMBSeslock);
1095 /* Had to unlock since following call can block */ 1095 /* Had to unlock since following call can block */
1096 rc = cifs_reopen_file(open_file->pfile, FALSE); 1096 rc = cifs_reopen_file(open_file->pfile, false);
1097 if (!rc) { 1097 if (!rc) {
1098 if (!open_file->closePend) 1098 if (!open_file->closePend)
1099 return open_file; 1099 return open_file;
@@ -1608,7 +1608,7 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
1608 int buf_type = CIFS_NO_BUFFER; 1608 int buf_type = CIFS_NO_BUFFER;
1609 if ((open_file->invalidHandle) && 1609 if ((open_file->invalidHandle) &&
1610 (!open_file->closePend)) { 1610 (!open_file->closePend)) {
1611 rc = cifs_reopen_file(file, TRUE); 1611 rc = cifs_reopen_file(file, true);
1612 if (rc != 0) 1612 if (rc != 0)
1613 break; 1613 break;
1614 } 1614 }
@@ -1693,7 +1693,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1693 while (rc == -EAGAIN) { 1693 while (rc == -EAGAIN) {
1694 if ((open_file->invalidHandle) && 1694 if ((open_file->invalidHandle) &&
1695 (!open_file->closePend)) { 1695 (!open_file->closePend)) {
1696 rc = cifs_reopen_file(file, TRUE); 1696 rc = cifs_reopen_file(file, true);
1697 if (rc != 0) 1697 if (rc != 0)
1698 break; 1698 break;
1699 } 1699 }
@@ -1850,7 +1850,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1850 while (rc == -EAGAIN) { 1850 while (rc == -EAGAIN) {
1851 if ((open_file->invalidHandle) && 1851 if ((open_file->invalidHandle) &&
1852 (!open_file->closePend)) { 1852 (!open_file->closePend)) {
1853 rc = cifs_reopen_file(file, TRUE); 1853 rc = cifs_reopen_file(file, true);
1854 if (rc != 0) 1854 if (rc != 0)
1855 break; 1855 break;
1856 } 1856 }
@@ -2009,10 +2009,10 @@ static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
2009 refreshing the inode only on increases in the file size 2009 refreshing the inode only on increases in the file size
2010 but this is tricky to do without racing with writebehind 2010 but this is tricky to do without racing with writebehind
2011 page caching in the current Linux kernel design */ 2011 page caching in the current Linux kernel design */
2012int is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file) 2012bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
2013{ 2013{
2014 if (!cifsInode) 2014 if (!cifsInode)
2015 return 1; 2015 return true;
2016 2016
2017 if (is_inode_writable(cifsInode)) { 2017 if (is_inode_writable(cifsInode)) {
2018 /* This inode is open for write at least once */ 2018 /* This inode is open for write at least once */
@@ -2022,15 +2022,15 @@ int is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
2022 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { 2022 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
2023 /* since no page cache to corrupt on directio 2023 /* since no page cache to corrupt on directio
2024 we can change size safely */ 2024 we can change size safely */
2025 return 1; 2025 return true;
2026 } 2026 }
2027 2027
2028 if (i_size_read(&cifsInode->vfs_inode) < end_of_file) 2028 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
2029 return 1; 2029 return true;
2030 2030
2031 return 0; 2031 return false;
2032 } else 2032 } else
2033 return 1; 2033 return true;
2034} 2034}
2035 2035
2036static int cifs_prepare_write(struct file *file, struct page *page, 2036static int cifs_prepare_write(struct file *file, struct page *page,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index e1031b9e2c55..fcbdbb6ad7bf 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -281,7 +281,7 @@ static int decode_sfu_inode(struct inode *inode, __u64 size,
281 struct cifs_sb_info *cifs_sb, int xid) 281 struct cifs_sb_info *cifs_sb, int xid)
282{ 282{
283 int rc; 283 int rc;
284 int oplock = FALSE; 284 int oplock = 0;
285 __u16 netfid; 285 __u16 netfid;
286 struct cifsTconInfo *pTcon = cifs_sb->tcon; 286 struct cifsTconInfo *pTcon = cifs_sb->tcon;
287 char buf[24]; 287 char buf[24];
@@ -389,7 +389,7 @@ int cifs_get_inode_info(struct inode **pinode,
389 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 389 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
390 const unsigned char *full_path = NULL; 390 const unsigned char *full_path = NULL;
391 char *buf = NULL; 391 char *buf = NULL;
392 int adjustTZ = FALSE; 392 bool adjustTZ = false;
393 bool is_dfs_referral = false; 393 bool is_dfs_referral = false;
394 394
395 pTcon = cifs_sb->tcon; 395 pTcon = cifs_sb->tcon;
@@ -425,7 +425,7 @@ try_again_CIFSSMBQPathInfo:
425 pfindData, cifs_sb->local_nls, 425 pfindData, cifs_sb->local_nls,
426 cifs_sb->mnt_cifs_flags & 426 cifs_sb->mnt_cifs_flags &
427 CIFS_MOUNT_MAP_SPECIAL_CHR); 427 CIFS_MOUNT_MAP_SPECIAL_CHR);
428 adjustTZ = TRUE; 428 adjustTZ = true;
429 } 429 }
430 } 430 }
431 /* dump_mem("\nQPathInfo return data",&findData, sizeof(findData)); */ 431 /* dump_mem("\nQPathInfo return data",&findData, sizeof(findData)); */
@@ -703,7 +703,7 @@ psx_del_no_retry:
703 } else if (rc == -ENOENT) { 703 } else if (rc == -ENOENT) {
704 d_drop(direntry); 704 d_drop(direntry);
705 } else if (rc == -ETXTBSY) { 705 } else if (rc == -ETXTBSY) {
706 int oplock = FALSE; 706 int oplock = 0;
707 __u16 netfid; 707 __u16 netfid;
708 708
709 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, DELETE, 709 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, DELETE,
@@ -736,7 +736,7 @@ psx_del_no_retry:
736 rc = -EOPNOTSUPP; 736 rc = -EOPNOTSUPP;
737 737
738 if (rc == -EOPNOTSUPP) { 738 if (rc == -EOPNOTSUPP) {
739 int oplock = FALSE; 739 int oplock = 0;
740 __u16 netfid; 740 __u16 netfid;
741 /* rc = CIFSSMBSetAttrLegacy(xid, pTcon, 741 /* rc = CIFSSMBSetAttrLegacy(xid, pTcon,
742 full_path, 742 full_path,
@@ -774,7 +774,7 @@ psx_del_no_retry:
774 if (direntry->d_inode) 774 if (direntry->d_inode)
775 drop_nlink(direntry->d_inode); 775 drop_nlink(direntry->d_inode);
776 } else if (rc == -ETXTBSY) { 776 } else if (rc == -ETXTBSY) {
777 int oplock = FALSE; 777 int oplock = 0;
778 __u16 netfid; 778 __u16 netfid;
779 779
780 rc = CIFSSMBOpen(xid, pTcon, full_path, 780 rc = CIFSSMBOpen(xid, pTcon, full_path,
@@ -974,8 +974,8 @@ mkdir_get_info:
974 * failed to get it from the server or was set bogus */ 974 * failed to get it from the server or was set bogus */
975 if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2)) 975 if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2))
976 direntry->d_inode->i_nlink = 2; 976 direntry->d_inode->i_nlink = 2;
977 mode &= ~current->fs->umask;
977 if (pTcon->unix_ext) { 978 if (pTcon->unix_ext) {
978 mode &= ~current->fs->umask;
979 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 979 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
980 CIFSSMBUnixSetPerms(xid, pTcon, full_path, 980 CIFSSMBUnixSetPerms(xid, pTcon, full_path,
981 mode, 981 mode,
@@ -994,9 +994,16 @@ mkdir_get_info:
994 CIFS_MOUNT_MAP_SPECIAL_CHR); 994 CIFS_MOUNT_MAP_SPECIAL_CHR);
995 } 995 }
996 } else { 996 } else {
997 /* BB to be implemented via Windows secrty descriptors 997 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) &&
998 eg CIFSSMBWinSetPerms(xid, pTcon, full_path, mode, 998 (mode & S_IWUGO) == 0) {
999 -1, -1, local_nls); */ 999 FILE_BASIC_INFO pInfo;
1000 memset(&pInfo, 0, sizeof(pInfo));
1001 pInfo.Attributes = cpu_to_le32(ATTR_READONLY);
1002 CIFSSMBSetTimes(xid, pTcon, full_path,
1003 &pInfo, cifs_sb->local_nls,
1004 cifs_sb->mnt_cifs_flags &
1005 CIFS_MOUNT_MAP_SPECIAL_CHR);
1006 }
1000 if (direntry->d_inode) { 1007 if (direntry->d_inode) {
1001 direntry->d_inode->i_mode = mode; 1008 direntry->d_inode->i_mode = mode;
1002 direntry->d_inode->i_mode |= S_IFDIR; 1009 direntry->d_inode->i_mode |= S_IFDIR;
@@ -1149,7 +1156,7 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry,
1149 cFYI(1, ("rename rc %d", rc)); 1156 cFYI(1, ("rename rc %d", rc));
1150 1157
1151 if ((rc == -EIO) || (rc == -EEXIST)) { 1158 if ((rc == -EIO) || (rc == -EEXIST)) {
1152 int oplock = FALSE; 1159 int oplock = 0;
1153 __u16 netfid; 1160 __u16 netfid;
1154 1161
1155 /* BB FIXME Is Generic Read correct for rename? */ 1162 /* BB FIXME Is Generic Read correct for rename? */
@@ -1186,7 +1193,7 @@ int cifs_revalidate(struct dentry *direntry)
1186 struct cifsInodeInfo *cifsInode; 1193 struct cifsInodeInfo *cifsInode;
1187 loff_t local_size; 1194 loff_t local_size;
1188 struct timespec local_mtime; 1195 struct timespec local_mtime;
1189 int invalidate_inode = FALSE; 1196 bool invalidate_inode = false;
1190 1197
1191 if (direntry->d_inode == NULL) 1198 if (direntry->d_inode == NULL)
1192 return -ENOENT; 1199 return -ENOENT;
@@ -1268,7 +1275,7 @@ int cifs_revalidate(struct dentry *direntry)
1268 only ones who could have modified the file and the 1275 only ones who could have modified the file and the
1269 server copy is staler than ours */ 1276 server copy is staler than ours */
1270 } else { 1277 } else {
1271 invalidate_inode = TRUE; 1278 invalidate_inode = true;
1272 } 1279 }
1273 } 1280 }
1274 1281
@@ -1402,24 +1409,25 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1402 int rc = -EACCES; 1409 int rc = -EACCES;
1403 struct cifsFileInfo *open_file = NULL; 1410 struct cifsFileInfo *open_file = NULL;
1404 FILE_BASIC_INFO time_buf; 1411 FILE_BASIC_INFO time_buf;
1405 int set_time = FALSE; 1412 bool set_time = false;
1406 int set_dosattr = FALSE; 1413 bool set_dosattr = false;
1407 __u64 mode = 0xFFFFFFFFFFFFFFFFULL; 1414 __u64 mode = 0xFFFFFFFFFFFFFFFFULL;
1408 __u64 uid = 0xFFFFFFFFFFFFFFFFULL; 1415 __u64 uid = 0xFFFFFFFFFFFFFFFFULL;
1409 __u64 gid = 0xFFFFFFFFFFFFFFFFULL; 1416 __u64 gid = 0xFFFFFFFFFFFFFFFFULL;
1410 struct cifsInodeInfo *cifsInode; 1417 struct cifsInodeInfo *cifsInode;
1418 struct inode *inode = direntry->d_inode;
1411 1419
1412 xid = GetXid(); 1420 xid = GetXid();
1413 1421
1414 cFYI(1, ("setattr on file %s attrs->iavalid 0x%x", 1422 cFYI(1, ("setattr on file %s attrs->iavalid 0x%x",
1415 direntry->d_name.name, attrs->ia_valid)); 1423 direntry->d_name.name, attrs->ia_valid));
1416 1424
1417 cifs_sb = CIFS_SB(direntry->d_inode->i_sb); 1425 cifs_sb = CIFS_SB(inode->i_sb);
1418 pTcon = cifs_sb->tcon; 1426 pTcon = cifs_sb->tcon;
1419 1427
1420 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) { 1428 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
1421 /* check if we have permission to change attrs */ 1429 /* check if we have permission to change attrs */
1422 rc = inode_change_ok(direntry->d_inode, attrs); 1430 rc = inode_change_ok(inode, attrs);
1423 if (rc < 0) { 1431 if (rc < 0) {
1424 FreeXid(xid); 1432 FreeXid(xid);
1425 return rc; 1433 return rc;
@@ -1432,7 +1440,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1432 FreeXid(xid); 1440 FreeXid(xid);
1433 return -ENOMEM; 1441 return -ENOMEM;
1434 } 1442 }
1435 cifsInode = CIFS_I(direntry->d_inode); 1443 cifsInode = CIFS_I(inode);
1436 1444
1437 if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) { 1445 if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) {
1438 /* 1446 /*
@@ -1443,9 +1451,9 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1443 will be truncated anyway? Also, should we error out here if 1451 will be truncated anyway? Also, should we error out here if
1444 the flush returns error? 1452 the flush returns error?
1445 */ 1453 */
1446 rc = filemap_write_and_wait(direntry->d_inode->i_mapping); 1454 rc = filemap_write_and_wait(inode->i_mapping);
1447 if (rc != 0) { 1455 if (rc != 0) {
1448 CIFS_I(direntry->d_inode)->write_behind_rc = rc; 1456 cifsInode->write_behind_rc = rc;
1449 rc = 0; 1457 rc = 0;
1450 } 1458 }
1451 } 1459 }
@@ -1464,7 +1472,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1464 __u16 nfid = open_file->netfid; 1472 __u16 nfid = open_file->netfid;
1465 __u32 npid = open_file->pid; 1473 __u32 npid = open_file->pid;
1466 rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, 1474 rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size,
1467 nfid, npid, FALSE); 1475 nfid, npid, false);
1468 atomic_dec(&open_file->wrtPending); 1476 atomic_dec(&open_file->wrtPending);
1469 cFYI(1, ("SetFSize for attrs rc = %d", rc)); 1477 cFYI(1, ("SetFSize for attrs rc = %d", rc));
1470 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 1478 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
@@ -1484,14 +1492,14 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1484 it was found or because there was an error setting 1492 it was found or because there was an error setting
1485 it by handle */ 1493 it by handle */
1486 rc = CIFSSMBSetEOF(xid, pTcon, full_path, 1494 rc = CIFSSMBSetEOF(xid, pTcon, full_path,
1487 attrs->ia_size, FALSE, 1495 attrs->ia_size, false,
1488 cifs_sb->local_nls, 1496 cifs_sb->local_nls,
1489 cifs_sb->mnt_cifs_flags & 1497 cifs_sb->mnt_cifs_flags &
1490 CIFS_MOUNT_MAP_SPECIAL_CHR); 1498 CIFS_MOUNT_MAP_SPECIAL_CHR);
1491 cFYI(1, ("SetEOF by path (setattrs) rc = %d", rc)); 1499 cFYI(1, ("SetEOF by path (setattrs) rc = %d", rc));
1492 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 1500 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1493 __u16 netfid; 1501 __u16 netfid;
1494 int oplock = FALSE; 1502 int oplock = 0;
1495 1503
1496 rc = SMBLegacyOpen(xid, pTcon, full_path, 1504 rc = SMBLegacyOpen(xid, pTcon, full_path,
1497 FILE_OPEN, 1505 FILE_OPEN,
@@ -1516,14 +1524,13 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1516 1524
1517 /* Server is ok setting allocation size implicitly - no need 1525 /* Server is ok setting allocation size implicitly - no need
1518 to call: 1526 to call:
1519 CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size, TRUE, 1527 CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size, true,
1520 cifs_sb->local_nls); 1528 cifs_sb->local_nls);
1521 */ 1529 */
1522 1530
1523 if (rc == 0) { 1531 if (rc == 0) {
1524 rc = cifs_vmtruncate(direntry->d_inode, attrs->ia_size); 1532 rc = cifs_vmtruncate(inode, attrs->ia_size);
1525 cifs_truncate_page(direntry->d_inode->i_mapping, 1533 cifs_truncate_page(inode->i_mapping, inode->i_size);
1526 direntry->d_inode->i_size);
1527 } else 1534 } else
1528 goto cifs_setattr_exit; 1535 goto cifs_setattr_exit;
1529 } 1536 }
@@ -1557,14 +1564,14 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1557 rc = 0; 1564 rc = 0;
1558#ifdef CONFIG_CIFS_EXPERIMENTAL 1565#ifdef CONFIG_CIFS_EXPERIMENTAL
1559 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) 1566 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
1560 rc = mode_to_acl(direntry->d_inode, full_path, mode); 1567 rc = mode_to_acl(inode, full_path, mode);
1561 else if ((mode & S_IWUGO) == 0) { 1568 else if ((mode & S_IWUGO) == 0) {
1562#else 1569#else
1563 if ((mode & S_IWUGO) == 0) { 1570 if ((mode & S_IWUGO) == 0) {
1564#endif 1571#endif
1565 /* not writeable */ 1572 /* not writeable */
1566 if ((cifsInode->cifsAttrs & ATTR_READONLY) == 0) { 1573 if ((cifsInode->cifsAttrs & ATTR_READONLY) == 0) {
1567 set_dosattr = TRUE; 1574 set_dosattr = true;
1568 time_buf.Attributes = 1575 time_buf.Attributes =
1569 cpu_to_le32(cifsInode->cifsAttrs | 1576 cpu_to_le32(cifsInode->cifsAttrs |
1570 ATTR_READONLY); 1577 ATTR_READONLY);
@@ -1574,28 +1581,24 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1574 not be able to write to it - so if any write 1581 not be able to write to it - so if any write
1575 bit is enabled for user or group or other we 1582 bit is enabled for user or group or other we
1576 need to at least try to remove r/o dos attr */ 1583 need to at least try to remove r/o dos attr */
1577 set_dosattr = TRUE; 1584 set_dosattr = true;
1578 time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs & 1585 time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs &
1579 (~ATTR_READONLY)); 1586 (~ATTR_READONLY));
1580 /* Windows ignores set to zero */ 1587 /* Windows ignores set to zero */
1581 if (time_buf.Attributes == 0) 1588 if (time_buf.Attributes == 0)
1582 time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL); 1589 time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL);
1583 } 1590 }
1584#ifdef CONFIG_CIFS_EXPERIMENTAL
1585 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
1586 mode_to_acl(direntry->d_inode, full_path, mode);
1587#endif
1588 } 1591 }
1589 1592
1590 if (attrs->ia_valid & ATTR_ATIME) { 1593 if (attrs->ia_valid & ATTR_ATIME) {
1591 set_time = TRUE; 1594 set_time = true;
1592 time_buf.LastAccessTime = 1595 time_buf.LastAccessTime =
1593 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime)); 1596 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime));
1594 } else 1597 } else
1595 time_buf.LastAccessTime = 0; 1598 time_buf.LastAccessTime = 0;
1596 1599
1597 if (attrs->ia_valid & ATTR_MTIME) { 1600 if (attrs->ia_valid & ATTR_MTIME) {
1598 set_time = TRUE; 1601 set_time = true;
1599 time_buf.LastWriteTime = 1602 time_buf.LastWriteTime =
1600 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime)); 1603 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime));
1601 } else 1604 } else
@@ -1606,7 +1609,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1606 server times */ 1609 server times */
1607 1610
1608 if (set_time && (attrs->ia_valid & ATTR_CTIME)) { 1611 if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
1609 set_time = TRUE; 1612 set_time = true;
1610 /* Although Samba throws this field away 1613 /* Although Samba throws this field away
1611 it may be useful to Windows - but we do 1614 it may be useful to Windows - but we do
1612 not want to set ctime unless some other 1615 not want to set ctime unless some other
@@ -1630,7 +1633,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1630 rc = -EOPNOTSUPP; 1633 rc = -EOPNOTSUPP;
1631 1634
1632 if (rc == -EOPNOTSUPP) { 1635 if (rc == -EOPNOTSUPP) {
1633 int oplock = FALSE; 1636 int oplock = 0;
1634 __u16 netfid; 1637 __u16 netfid;
1635 1638
1636 cFYI(1, ("calling SetFileInfo since SetPathInfo for " 1639 cFYI(1, ("calling SetFileInfo since SetPathInfo for "
@@ -1669,7 +1672,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1669 /* do not need local check to inode_check_ok since the server does 1672 /* do not need local check to inode_check_ok since the server does
1670 that */ 1673 that */
1671 if (!rc) 1674 if (!rc)
1672 rc = inode_setattr(direntry->d_inode, attrs); 1675 rc = inode_setattr(inode, attrs);
1673cifs_setattr_exit: 1676cifs_setattr_exit:
1674 kfree(full_path); 1677 kfree(full_path);
1675 FreeXid(xid); 1678 FreeXid(xid);
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index d4e7ec93285f..1c2c3ce5020b 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -230,7 +230,7 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen)
230 struct inode *inode = direntry->d_inode; 230 struct inode *inode = direntry->d_inode;
231 int rc = -EACCES; 231 int rc = -EACCES;
232 int xid; 232 int xid;
233 int oplock = FALSE; 233 int oplock = 0;
234 struct cifs_sb_info *cifs_sb; 234 struct cifs_sb_info *cifs_sb;
235 struct cifsTconInfo *pTcon; 235 struct cifsTconInfo *pTcon;
236 char *full_path = NULL; 236 char *full_path = NULL;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 2a42d9fedbb2..1d69b8014e0b 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -496,7 +496,8 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
496 } 496 }
497 return 0; 497 return 0;
498} 498}
499int 499
500bool
500is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) 501is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
501{ 502{
502 struct smb_com_lock_req *pSMB = (struct smb_com_lock_req *)buf; 503 struct smb_com_lock_req *pSMB = (struct smb_com_lock_req *)buf;
@@ -522,17 +523,17 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
522 pnotify->Action)); /* BB removeme BB */ 523 pnotify->Action)); /* BB removeme BB */
523 /* cifs_dump_mem("Rcvd notify Data: ",buf, 524 /* cifs_dump_mem("Rcvd notify Data: ",buf,
524 sizeof(struct smb_hdr)+60); */ 525 sizeof(struct smb_hdr)+60); */
525 return TRUE; 526 return true;
526 } 527 }
527 if (pSMBr->hdr.Status.CifsError) { 528 if (pSMBr->hdr.Status.CifsError) {
528 cFYI(1, ("notify err 0x%d", 529 cFYI(1, ("notify err 0x%d",
529 pSMBr->hdr.Status.CifsError)); 530 pSMBr->hdr.Status.CifsError));
530 return TRUE; 531 return true;
531 } 532 }
532 return FALSE; 533 return false;
533 } 534 }
534 if (pSMB->hdr.Command != SMB_COM_LOCKING_ANDX) 535 if (pSMB->hdr.Command != SMB_COM_LOCKING_ANDX)
535 return FALSE; 536 return false;
536 if (pSMB->hdr.Flags & SMBFLG_RESPONSE) { 537 if (pSMB->hdr.Flags & SMBFLG_RESPONSE) {
537 /* no sense logging error on invalid handle on oplock 538 /* no sense logging error on invalid handle on oplock
538 break - harmless race between close request and oplock 539 break - harmless race between close request and oplock
@@ -541,21 +542,21 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
541 if ((NT_STATUS_INVALID_HANDLE) == 542 if ((NT_STATUS_INVALID_HANDLE) ==
542 le32_to_cpu(pSMB->hdr.Status.CifsError)) { 543 le32_to_cpu(pSMB->hdr.Status.CifsError)) {
543 cFYI(1, ("invalid handle on oplock break")); 544 cFYI(1, ("invalid handle on oplock break"));
544 return TRUE; 545 return true;
545 } else if (ERRbadfid == 546 } else if (ERRbadfid ==
546 le16_to_cpu(pSMB->hdr.Status.DosError.Error)) { 547 le16_to_cpu(pSMB->hdr.Status.DosError.Error)) {
547 return TRUE; 548 return true;
548 } else { 549 } else {
549 return FALSE; /* on valid oplock brk we get "request" */ 550 return false; /* on valid oplock brk we get "request" */
550 } 551 }
551 } 552 }
552 if (pSMB->hdr.WordCount != 8) 553 if (pSMB->hdr.WordCount != 8)
553 return FALSE; 554 return false;
554 555
555 cFYI(1, ("oplock type 0x%d level 0x%d", 556 cFYI(1, ("oplock type 0x%d level 0x%d",
556 pSMB->LockType, pSMB->OplockLevel)); 557 pSMB->LockType, pSMB->OplockLevel));
557 if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE)) 558 if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE))
558 return FALSE; 559 return false;
559 560
560 /* look up tcon based on tid & uid */ 561 /* look up tcon based on tid & uid */
561 read_lock(&GlobalSMBSeslock); 562 read_lock(&GlobalSMBSeslock);
@@ -573,11 +574,11 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
573 ("file id match, oplock break")); 574 ("file id match, oplock break"));
574 pCifsInode = 575 pCifsInode =
575 CIFS_I(netfile->pInode); 576 CIFS_I(netfile->pInode);
576 pCifsInode->clientCanCacheAll = FALSE; 577 pCifsInode->clientCanCacheAll = false;
577 if (pSMB->OplockLevel == 0) 578 if (pSMB->OplockLevel == 0)
578 pCifsInode->clientCanCacheRead 579 pCifsInode->clientCanCacheRead
579 = FALSE; 580 = false;
580 pCifsInode->oplockPending = TRUE; 581 pCifsInode->oplockPending = true;
581 AllocOplockQEntry(netfile->pInode, 582 AllocOplockQEntry(netfile->pInode,
582 netfile->netfid, 583 netfile->netfid,
583 tcon); 584 tcon);
@@ -585,17 +586,17 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
585 ("about to wake up oplock thread")); 586 ("about to wake up oplock thread"));
586 if (oplockThread) 587 if (oplockThread)
587 wake_up_process(oplockThread); 588 wake_up_process(oplockThread);
588 return TRUE; 589 return true;
589 } 590 }
590 } 591 }
591 read_unlock(&GlobalSMBSeslock); 592 read_unlock(&GlobalSMBSeslock);
592 cFYI(1, ("No matching file for oplock break")); 593 cFYI(1, ("No matching file for oplock break"));
593 return TRUE; 594 return true;
594 } 595 }
595 } 596 }
596 read_unlock(&GlobalSMBSeslock); 597 read_unlock(&GlobalSMBSeslock);
597 cFYI(1, ("Can not process oplock break for non-existent connection")); 598 cFYI(1, ("Can not process oplock break for non-existent connection"));
598 return TRUE; 599 return true;
599} 600}
600 601
601void 602void
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 3b5a5ce882b6..00f4cff400b3 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -132,47 +132,17 @@ static const struct smb_to_posix_error mapping_table_ERRHRD[] = {
132 {0, 0} 132 {0, 0}
133}; 133};
134 134
135
136/* if the mount helper is missing we need to reverse the 1st slash
137 from '/' to backslash in order to format the UNC properly for
138 ip address parsing and for tree connect (unless the user
139 remembered to put the UNC name in properly). Fortunately we do
140 not have to call this twice (we check for IPv4 addresses
141 first, so it is already converted by the time we
142 try IPv6 addresses */
143static int canonicalize_unc(char *cp)
144{
145 int i;
146
147 for (i = 0; i <= 46 /* INET6_ADDRSTRLEN */ ; i++) {
148 if (cp[i] == 0)
149 break;
150 if (cp[i] == '\\')
151 break;
152 if (cp[i] == '/') {
153 cFYI(DBG2, ("change slash to \\ in malformed UNC"));
154 cp[i] = '\\';
155 return 1;
156 }
157 }
158 return 0;
159}
160
161/* Convert string containing dotted ip address to binary form */ 135/* Convert string containing dotted ip address to binary form */
162/* returns 0 if invalid address */ 136/* returns 0 if invalid address */
163 137
164int 138int
165cifs_inet_pton(int address_family, char *cp, void *dst) 139cifs_inet_pton(const int address_family, const char *cp, void *dst)
166{ 140{
167 int ret = 0; 141 int ret = 0;
168 142
169 /* calculate length by finding first slash or NULL */ 143 /* calculate length by finding first slash or NULL */
170 if (address_family == AF_INET) { 144 if (address_family == AF_INET) {
171 ret = in4_pton(cp, -1 /* len */, dst, '\\', NULL); 145 ret = in4_pton(cp, -1 /* len */, dst, '\\', NULL);
172 if (ret == 0) {
173 if (canonicalize_unc(cp))
174 ret = in4_pton(cp, -1, dst, '\\', NULL);
175 }
176 } else if (address_family == AF_INET6) { 146 } else if (address_family == AF_INET6) {
177 ret = in6_pton(cp, -1 /* len */, dst , '\\', NULL); 147 ret = in6_pton(cp, -1 /* len */, dst , '\\', NULL);
178 } 148 }
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 32b445edc882..34ec32100c72 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -447,8 +447,8 @@ static int initiate_cifs_search(const int xid, struct file *file)
447 if (file->private_data == NULL) 447 if (file->private_data == NULL)
448 return -ENOMEM; 448 return -ENOMEM;
449 cifsFile = file->private_data; 449 cifsFile = file->private_data;
450 cifsFile->invalidHandle = TRUE; 450 cifsFile->invalidHandle = true;
451 cifsFile->srch_inf.endOfSearch = FALSE; 451 cifsFile->srch_inf.endOfSearch = false;
452 452
453 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 453 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
454 if (cifs_sb == NULL) 454 if (cifs_sb == NULL)
@@ -485,7 +485,7 @@ ffirst_retry:
485 cifs_sb->mnt_cifs_flags & 485 cifs_sb->mnt_cifs_flags &
486 CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb)); 486 CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb));
487 if (rc == 0) 487 if (rc == 0)
488 cifsFile->invalidHandle = FALSE; 488 cifsFile->invalidHandle = false;
489 if ((rc == -EOPNOTSUPP) && 489 if ((rc == -EOPNOTSUPP) &&
490 (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) { 490 (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) {
491 cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM; 491 cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM;
@@ -670,7 +670,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
670 (index_to_find < first_entry_in_buffer)) { 670 (index_to_find < first_entry_in_buffer)) {
671 /* close and restart search */ 671 /* close and restart search */
672 cFYI(1, ("search backing up - close and restart search")); 672 cFYI(1, ("search backing up - close and restart search"));
673 cifsFile->invalidHandle = TRUE; 673 cifsFile->invalidHandle = true;
674 CIFSFindClose(xid, pTcon, cifsFile->netfid); 674 CIFSFindClose(xid, pTcon, cifsFile->netfid);
675 kfree(cifsFile->search_resume_name); 675 kfree(cifsFile->search_resume_name);
676 cifsFile->search_resume_name = NULL; 676 cifsFile->search_resume_name = NULL;
@@ -692,7 +692,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
692 } 692 }
693 693
694 while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && 694 while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) &&
695 (rc == 0) && (cifsFile->srch_inf.endOfSearch == FALSE)) { 695 (rc == 0) && !cifsFile->srch_inf.endOfSearch) {
696 cFYI(1, ("calling findnext2")); 696 cFYI(1, ("calling findnext2"));
697 rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, 697 rc = CIFSFindNext(xid, pTcon, cifsFile->netfid,
698 &cifsFile->srch_inf); 698 &cifsFile->srch_inf);
@@ -1038,7 +1038,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
1038 break; 1038 break;
1039 } 1039 }
1040 } /* else { 1040 } /* else {
1041 cifsFile->invalidHandle = TRUE; 1041 cifsFile->invalidHandle = true;
1042 CIFSFindClose(xid, pTcon, cifsFile->netfid); 1042 CIFSFindClose(xid, pTcon, cifsFile->netfid);
1043 } 1043 }
1044 kfree(cifsFile->search_resume_name); 1044 kfree(cifsFile->search_resume_name);
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index 58bbfd992cc0..ff3232fa1015 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -35,11 +35,11 @@
35#include "cifs_debug.h" 35#include "cifs_debug.h"
36#include "cifsencrypt.h" 36#include "cifsencrypt.h"
37 37
38#ifndef FALSE 38#ifndef false
39#define FALSE 0 39#define false 0
40#endif 40#endif
41#ifndef TRUE 41#ifndef true
42#define TRUE 1 42#define true 1
43#endif 43#endif
44 44
45/* following came from the other byteorder.h to avoid include conflicts */ 45/* following came from the other byteorder.h to avoid include conflicts */
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 8cd6a445b017..e9527eedc639 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -264,7 +264,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
264#ifdef CONFIG_CIFS_EXPERIMENTAL 264#ifdef CONFIG_CIFS_EXPERIMENTAL
265 else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { 265 else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
266 __u16 fid; 266 __u16 fid;
267 int oplock = FALSE; 267 int oplock = 0;
268 struct cifs_ntsd *pacl = NULL; 268 struct cifs_ntsd *pacl = NULL;
269 __u32 buflen = 0; 269 __u32 buflen = 0;
270 if (experimEnabled) 270 if (experimEnabled)
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 95a54253c047..e1c854890f94 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -134,7 +134,7 @@ void coda_iattr_to_vattr(struct iattr *iattr, struct coda_vattr *vattr)
134 unsigned int valid; 134 unsigned int valid;
135 135
136 /* clean out */ 136 /* clean out */
137 vattr->va_mode = (umode_t) -1; 137 vattr->va_mode = -1;
138 vattr->va_uid = (vuid_t) -1; 138 vattr->va_uid = (vuid_t) -1;
139 vattr->va_gid = (vgid_t) -1; 139 vattr->va_gid = (vgid_t) -1;
140 vattr->va_size = (off_t) -1; 140 vattr->va_size = (off_t) -1;
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index f89ff083079b..3d2580e00a3e 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -345,7 +345,7 @@ static int coda_symlink(struct inode *dir_inode, struct dentry *de,
345} 345}
346 346
347/* destruction routines: unlink, rmdir */ 347/* destruction routines: unlink, rmdir */
348int coda_unlink(struct inode *dir, struct dentry *de) 348static int coda_unlink(struct inode *dir, struct dentry *de)
349{ 349{
350 int error; 350 int error;
351 const char *name = de->d_name.name; 351 const char *name = de->d_name.name;
@@ -365,7 +365,7 @@ int coda_unlink(struct inode *dir, struct dentry *de)
365 return 0; 365 return 0;
366} 366}
367 367
368int coda_rmdir(struct inode *dir, struct dentry *de) 368static int coda_rmdir(struct inode *dir, struct dentry *de)
369{ 369{
370 const char *name = de->d_name.name; 370 const char *name = de->d_name.name;
371 int len = de->d_name.len; 371 int len = de->d_name.len;
@@ -424,7 +424,7 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry,
424 424
425 425
426/* file operations for directories */ 426/* file operations for directories */
427int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir) 427static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir)
428{ 428{
429 struct coda_file_info *cfi; 429 struct coda_file_info *cfi;
430 struct file *host_file; 430 struct file *host_file;
diff --git a/fs/compat.c b/fs/compat.c
index 2ce4456aad30..332a869d2c53 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -24,6 +24,7 @@
24#include <linux/fcntl.h> 24#include <linux/fcntl.h>
25#include <linux/namei.h> 25#include <linux/namei.h>
26#include <linux/file.h> 26#include <linux/file.h>
27#include <linux/fdtable.h>
27#include <linux/vfs.h> 28#include <linux/vfs.h>
28#include <linux/ioctl.h> 29#include <linux/ioctl.h>
29#include <linux/init.h> 30#include <linux/init.h>
@@ -1634,7 +1635,7 @@ sticky:
1634 return ret; 1635 return ret;
1635} 1636}
1636 1637
1637#ifdef TIF_RESTORE_SIGMASK 1638#ifdef HAVE_SET_RESTORE_SIGMASK
1638asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, 1639asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1639 compat_ulong_t __user *outp, compat_ulong_t __user *exp, 1640 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1640 struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask, 1641 struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask,
@@ -1720,7 +1721,7 @@ sticky:
1720 if (sigmask) { 1721 if (sigmask) {
1721 memcpy(&current->saved_sigmask, &sigsaved, 1722 memcpy(&current->saved_sigmask, &sigsaved,
1722 sizeof(sigsaved)); 1723 sizeof(sigsaved));
1723 set_thread_flag(TIF_RESTORE_SIGMASK); 1724 set_restore_sigmask();
1724 } 1725 }
1725 } else if (sigmask) 1726 } else if (sigmask)
1726 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1727 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
@@ -1791,7 +1792,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1791 if (sigmask) { 1792 if (sigmask) {
1792 memcpy(&current->saved_sigmask, &sigsaved, 1793 memcpy(&current->saved_sigmask, &sigsaved,
1793 sizeof(sigsaved)); 1794 sizeof(sigsaved));
1794 set_thread_flag(TIF_RESTORE_SIGMASK); 1795 set_restore_sigmask();
1795 } 1796 }
1796 ret = -ERESTARTNOHAND; 1797 ret = -ERESTARTNOHAND;
1797 } else if (sigmask) 1798 } else if (sigmask)
@@ -1825,7 +1826,7 @@ sticky:
1825 1826
1826 return ret; 1827 return ret;
1827} 1828}
1828#endif /* TIF_RESTORE_SIGMASK */ 1829#endif /* HAVE_SET_RESTORE_SIGMASK */
1829 1830
1830#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE) 1831#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)
1831/* Stuff for NFS server syscalls... */ 1832/* Stuff for NFS server syscalls... */
@@ -2080,7 +2081,7 @@ long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2)
2080 2081
2081#ifdef CONFIG_EPOLL 2082#ifdef CONFIG_EPOLL
2082 2083
2083#ifdef TIF_RESTORE_SIGMASK 2084#ifdef HAVE_SET_RESTORE_SIGMASK
2084asmlinkage long compat_sys_epoll_pwait(int epfd, 2085asmlinkage long compat_sys_epoll_pwait(int epfd,
2085 struct compat_epoll_event __user *events, 2086 struct compat_epoll_event __user *events,
2086 int maxevents, int timeout, 2087 int maxevents, int timeout,
@@ -2117,14 +2118,14 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
2117 if (err == -EINTR) { 2118 if (err == -EINTR) {
2118 memcpy(&current->saved_sigmask, &sigsaved, 2119 memcpy(&current->saved_sigmask, &sigsaved,
2119 sizeof(sigsaved)); 2120 sizeof(sigsaved));
2120 set_thread_flag(TIF_RESTORE_SIGMASK); 2121 set_restore_sigmask();
2121 } else 2122 } else
2122 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 2123 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2123 } 2124 }
2124 2125
2125 return err; 2126 return err;
2126} 2127}
2127#endif /* TIF_RESTORE_SIGMASK */ 2128#endif /* HAVE_SET_RESTORE_SIGMASK */
2128 2129
2129#endif /* CONFIG_EPOLL */ 2130#endif /* CONFIG_EPOLL */
2130 2131
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c6e72aebd16b..97dba0d92348 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1046,14 +1046,14 @@ static int vt_check(struct file *file)
1046 struct inode *inode = file->f_path.dentry->d_inode; 1046 struct inode *inode = file->f_path.dentry->d_inode;
1047 struct vc_data *vc; 1047 struct vc_data *vc;
1048 1048
1049 if (file->f_op->ioctl != tty_ioctl) 1049 if (file->f_op->unlocked_ioctl != tty_ioctl)
1050 return -EINVAL; 1050 return -EINVAL;
1051 1051
1052 tty = (struct tty_struct *)file->private_data; 1052 tty = (struct tty_struct *)file->private_data;
1053 if (tty_paranoia_check(tty, inode, "tty_ioctl")) 1053 if (tty_paranoia_check(tty, inode, "tty_ioctl"))
1054 return -EINVAL; 1054 return -EINVAL;
1055 1055
1056 if (tty->driver->ioctl != vt_ioctl) 1056 if (tty->ops->ioctl != vt_ioctl)
1057 return -EINVAL; 1057 return -EINVAL;
1058 1058
1059 vc = (struct vc_data *)tty->driver_data; 1059 vc = (struct vc_data *)tty->driver_data;
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 397cb503a180..2b6cb23dd14e 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -115,7 +115,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp
115 goto out; 115 goto out;
116 } 116 }
117 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", 117 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
118 __FUNCTION__, count, *ppos, buffer->page); 118 __func__, count, *ppos, buffer->page);
119 retval = simple_read_from_buffer(buf, count, ppos, buffer->page, 119 retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
120 buffer->count); 120 buffer->count);
121out: 121out:
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 4c1ebff778ee..b9a1d810346d 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -47,7 +47,7 @@ static const struct address_space_operations configfs_aops = {
47 47
48static struct backing_dev_info configfs_backing_dev_info = { 48static struct backing_dev_info configfs_backing_dev_info = {
49 .ra_pages = 0, /* No readahead */ 49 .ra_pages = 0, /* No readahead */
50 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 50 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
51}; 51};
52 52
53static const struct inode_operations configfs_inode_operations ={ 53static const struct inode_operations configfs_inode_operations ={
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index de3b31d0a37d..8421cea7d8c7 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -92,7 +92,7 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
92 92
93 root = d_alloc_root(inode); 93 root = d_alloc_root(inode);
94 if (!root) { 94 if (!root) {
95 pr_debug("%s: could not get root dentry!\n",__FUNCTION__); 95 pr_debug("%s: could not get root dentry!\n",__func__);
96 iput(inode); 96 iput(inode);
97 return -ENOMEM; 97 return -ENOMEM;
98 } 98 }
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 78929ea84ff2..2a731ef5f305 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -210,13 +210,13 @@ static int configfs_get_target_path(struct config_item * item, struct config_ite
210 if (size > PATH_MAX) 210 if (size > PATH_MAX)
211 return -ENAMETOOLONG; 211 return -ENAMETOOLONG;
212 212
213 pr_debug("%s: depth = %d, size = %d\n", __FUNCTION__, depth, size); 213 pr_debug("%s: depth = %d, size = %d\n", __func__, depth, size);
214 214
215 for (s = path; depth--; s += 3) 215 for (s = path; depth--; s += 3)
216 strcpy(s,"../"); 216 strcpy(s,"../");
217 217
218 fill_item_path(target, path, size); 218 fill_item_path(target, path, size);
219 pr_debug("%s: path = '%s'\n", __FUNCTION__, path); 219 pr_debug("%s: path = '%s'\n", __func__, path);
220 220
221 return 0; 221 return 0;
222} 222}
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index fddffe4851f5..159a5efd6a8a 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -9,7 +9,7 @@
9 * 2 as published by the Free Software Foundation. 9 * 2 as published by the Free Software Foundation.
10 * 10 *
11 * debugfs is for people to use instead of /proc or /sys. 11 * debugfs is for people to use instead of /proc or /sys.
12 * See Documentation/DocBook/kernel-api for more details. 12 * See Documentation/DocBook/filesystems for more details.
13 * 13 *
14 */ 14 */
15 15
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index f120e1207874..285b64a8b06e 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -17,6 +17,8 @@
17#include <linux/namei.h> 17#include <linux/namei.h>
18#include <linux/mount.h> 18#include <linux/mount.h>
19#include <linux/tty.h> 19#include <linux/tty.h>
20#include <linux/mutex.h>
21#include <linux/idr.h>
20#include <linux/devpts_fs.h> 22#include <linux/devpts_fs.h>
21#include <linux/parser.h> 23#include <linux/parser.h>
22#include <linux/fsnotify.h> 24#include <linux/fsnotify.h>
@@ -26,6 +28,10 @@
26 28
27#define DEVPTS_DEFAULT_MODE 0600 29#define DEVPTS_DEFAULT_MODE 0600
28 30
31extern int pty_limit; /* Config limit on Unix98 ptys */
32static DEFINE_IDR(allocated_ptys);
33static DEFINE_MUTEX(allocated_ptys_lock);
34
29static struct vfsmount *devpts_mnt; 35static struct vfsmount *devpts_mnt;
30static struct dentry *devpts_root; 36static struct dentry *devpts_root;
31 37
@@ -171,9 +177,44 @@ static struct dentry *get_node(int num)
171 return lookup_one_len(s, root, sprintf(s, "%d", num)); 177 return lookup_one_len(s, root, sprintf(s, "%d", num));
172} 178}
173 179
180int devpts_new_index(void)
181{
182 int index;
183 int idr_ret;
184
185retry:
186 if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) {
187 return -ENOMEM;
188 }
189
190 mutex_lock(&allocated_ptys_lock);
191 idr_ret = idr_get_new(&allocated_ptys, NULL, &index);
192 if (idr_ret < 0) {
193 mutex_unlock(&allocated_ptys_lock);
194 if (idr_ret == -EAGAIN)
195 goto retry;
196 return -EIO;
197 }
198
199 if (index >= pty_limit) {
200 idr_remove(&allocated_ptys, index);
201 mutex_unlock(&allocated_ptys_lock);
202 return -EIO;
203 }
204 mutex_unlock(&allocated_ptys_lock);
205 return index;
206}
207
208void devpts_kill_index(int idx)
209{
210 mutex_lock(&allocated_ptys_lock);
211 idr_remove(&allocated_ptys, idx);
212 mutex_unlock(&allocated_ptys_lock);
213}
214
174int devpts_pty_new(struct tty_struct *tty) 215int devpts_pty_new(struct tty_struct *tty)
175{ 216{
176 int number = tty->index; 217 int number = tty->index; /* tty layer puts index from devpts_new_index() in here */
177 struct tty_driver *driver = tty->driver; 218 struct tty_driver *driver = tty->driver;
178 dev_t device = MKDEV(driver->major, driver->minor_start+number); 219 dev_t device = MKDEV(driver->major, driver->minor_start+number);
179 struct dentry *dentry; 220 struct dentry *dentry;
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index b64e55e0515d..499e16759e96 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -200,7 +200,7 @@ int __init dlm_lockspace_init(void)
200 200
201 dlm_kset = kset_create_and_add("dlm", NULL, kernel_kobj); 201 dlm_kset = kset_create_and_add("dlm", NULL, kernel_kobj);
202 if (!dlm_kset) { 202 if (!dlm_kset) {
203 printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__); 203 printk(KERN_WARNING "%s: can not create kset\n", __func__);
204 return -ENOMEM; 204 return -ENOMEM;
205 } 205 }
206 return 0; 206 return 0;
diff --git a/fs/dnotify.c b/fs/dnotify.c
index 28d01ed66de0..676073b8dda5 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -20,6 +20,7 @@
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/spinlock.h> 21#include <linux/spinlock.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/fdtable.h>
23 24
24int dir_notify_enable __read_mostly = 1; 25int dir_notify_enable __read_mostly = 1;
25 26
@@ -66,6 +67,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
66 struct dnotify_struct **prev; 67 struct dnotify_struct **prev;
67 struct inode *inode; 68 struct inode *inode;
68 fl_owner_t id = current->files; 69 fl_owner_t id = current->files;
70 struct file *f;
69 int error = 0; 71 int error = 0;
70 72
71 if ((arg & ~DN_MULTISHOT) == 0) { 73 if ((arg & ~DN_MULTISHOT) == 0) {
@@ -92,6 +94,15 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
92 prev = &odn->dn_next; 94 prev = &odn->dn_next;
93 } 95 }
94 96
97 rcu_read_lock();
98 f = fcheck(fd);
99 rcu_read_unlock();
100 /* we'd lost the race with close(), sod off silently */
101 /* note that inode->i_lock prevents reordering problems
102 * between accesses to descriptor table and ->i_dnotify */
103 if (f != filp)
104 goto out_free;
105
95 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); 106 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
96 if (error) 107 if (error)
97 goto out_free; 108 goto out_free;
diff --git a/fs/dquot.c b/fs/dquot.c
index dfba1623cccb..5ac77da19959 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1491,6 +1491,16 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
1491 1491
1492 /* We need to serialize quota_off() for device */ 1492 /* We need to serialize quota_off() for device */
1493 mutex_lock(&dqopt->dqonoff_mutex); 1493 mutex_lock(&dqopt->dqonoff_mutex);
1494
1495 /*
1496 * Skip everything if there's nothing to do. We have to do this because
1497 * sometimes we are called when fill_super() failed and calling
1498 * sync_fs() in such cases does no good.
1499 */
1500 if (!sb_any_quota_enabled(sb) && !sb_any_quota_suspended(sb)) {
1501 mutex_unlock(&dqopt->dqonoff_mutex);
1502 return 0;
1503 }
1494 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1504 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1495 toputinode[cnt] = NULL; 1505 toputinode[cnt] = NULL;
1496 if (type != -1 && cnt != type) 1506 if (type != -1 && cnt != type)
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 59375efcf39d..3e5637fc3779 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -14,18 +14,26 @@ int sysctl_drop_caches;
14 14
15static void drop_pagecache_sb(struct super_block *sb) 15static void drop_pagecache_sb(struct super_block *sb)
16{ 16{
17 struct inode *inode; 17 struct inode *inode, *toput_inode = NULL;
18 18
19 spin_lock(&inode_lock); 19 spin_lock(&inode_lock);
20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
21 if (inode->i_state & (I_FREEING|I_WILL_FREE)) 21 if (inode->i_state & (I_FREEING|I_WILL_FREE))
22 continue; 22 continue;
23 if (inode->i_mapping->nrpages == 0)
24 continue;
25 __iget(inode);
26 spin_unlock(&inode_lock);
23 __invalidate_mapping_pages(inode->i_mapping, 0, -1, true); 27 __invalidate_mapping_pages(inode->i_mapping, 0, -1, true);
28 iput(toput_inode);
29 toput_inode = inode;
30 spin_lock(&inode_lock);
24 } 31 }
25 spin_unlock(&inode_lock); 32 spin_unlock(&inode_lock);
33 iput(toput_inode);
26} 34}
27 35
28void drop_pagecache(void) 36static void drop_pagecache(void)
29{ 37{
30 struct super_block *sb; 38 struct super_block *sb;
31 39
@@ -45,7 +53,7 @@ restart:
45 spin_unlock(&sb_lock); 53 spin_unlock(&sb_lock);
46} 54}
47 55
48void drop_slab(void) 56static void drop_slab(void)
49{ 57{
50 int nr_objects; 58 int nr_objects;
51 59
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
index 768857015516..1e34a7fd4884 100644
--- a/fs/ecryptfs/Makefile
+++ b/fs/ecryptfs/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o 5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
6 6
7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o debug.o 7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o debug.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index a066e109ad9c..cd62d75b2cc0 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -119,21 +119,21 @@ static int ecryptfs_calculate_md5(char *dst,
119 if (rc) { 119 if (rc) {
120 printk(KERN_ERR 120 printk(KERN_ERR
121 "%s: Error initializing crypto hash; rc = [%d]\n", 121 "%s: Error initializing crypto hash; rc = [%d]\n",
122 __FUNCTION__, rc); 122 __func__, rc);
123 goto out; 123 goto out;
124 } 124 }
125 rc = crypto_hash_update(&desc, &sg, len); 125 rc = crypto_hash_update(&desc, &sg, len);
126 if (rc) { 126 if (rc) {
127 printk(KERN_ERR 127 printk(KERN_ERR
128 "%s: Error updating crypto hash; rc = [%d]\n", 128 "%s: Error updating crypto hash; rc = [%d]\n",
129 __FUNCTION__, rc); 129 __func__, rc);
130 goto out; 130 goto out;
131 } 131 }
132 rc = crypto_hash_final(&desc, dst); 132 rc = crypto_hash_final(&desc, dst);
133 if (rc) { 133 if (rc) {
134 printk(KERN_ERR 134 printk(KERN_ERR
135 "%s: Error finalizing crypto hash; rc = [%d]\n", 135 "%s: Error finalizing crypto hash; rc = [%d]\n",
136 __FUNCTION__, rc); 136 __func__, rc);
137 goto out; 137 goto out;
138 } 138 }
139out: 139out:
@@ -437,7 +437,7 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page,
437 if (rc < 0) { 437 if (rc < 0) {
438 printk(KERN_ERR "%s: Error attempting to encrypt page with " 438 printk(KERN_ERR "%s: Error attempting to encrypt page with "
439 "page->index = [%ld], extent_offset = [%ld]; " 439 "page->index = [%ld], extent_offset = [%ld]; "
440 "rc = [%d]\n", __FUNCTION__, page->index, extent_offset, 440 "rc = [%d]\n", __func__, page->index, extent_offset,
441 rc); 441 rc);
442 goto out; 442 goto out;
443 } 443 }
@@ -487,7 +487,7 @@ int ecryptfs_encrypt_page(struct page *page)
487 0, PAGE_CACHE_SIZE); 487 0, PAGE_CACHE_SIZE);
488 if (rc) 488 if (rc)
489 printk(KERN_ERR "%s: Error attempting to copy " 489 printk(KERN_ERR "%s: Error attempting to copy "
490 "page at index [%ld]\n", __FUNCTION__, 490 "page at index [%ld]\n", __func__,
491 page->index); 491 page->index);
492 goto out; 492 goto out;
493 } 493 }
@@ -508,7 +508,7 @@ int ecryptfs_encrypt_page(struct page *page)
508 extent_offset); 508 extent_offset);
509 if (rc) { 509 if (rc) {
510 printk(KERN_ERR "%s: Error encrypting extent; " 510 printk(KERN_ERR "%s: Error encrypting extent; "
511 "rc = [%d]\n", __FUNCTION__, rc); 511 "rc = [%d]\n", __func__, rc);
512 goto out; 512 goto out;
513 } 513 }
514 ecryptfs_lower_offset_for_extent( 514 ecryptfs_lower_offset_for_extent(
@@ -569,7 +569,7 @@ static int ecryptfs_decrypt_extent(struct page *page,
569 if (rc < 0) { 569 if (rc < 0) {
570 printk(KERN_ERR "%s: Error attempting to decrypt to page with " 570 printk(KERN_ERR "%s: Error attempting to decrypt to page with "
571 "page->index = [%ld], extent_offset = [%ld]; " 571 "page->index = [%ld], extent_offset = [%ld]; "
572 "rc = [%d]\n", __FUNCTION__, page->index, extent_offset, 572 "rc = [%d]\n", __func__, page->index, extent_offset,
573 rc); 573 rc);
574 goto out; 574 goto out;
575 } 575 }
@@ -622,7 +622,7 @@ int ecryptfs_decrypt_page(struct page *page)
622 ecryptfs_inode); 622 ecryptfs_inode);
623 if (rc) 623 if (rc)
624 printk(KERN_ERR "%s: Error attempting to copy " 624 printk(KERN_ERR "%s: Error attempting to copy "
625 "page at index [%ld]\n", __FUNCTION__, 625 "page at index [%ld]\n", __func__,
626 page->index); 626 page->index);
627 goto out; 627 goto out;
628 } 628 }
@@ -656,7 +656,7 @@ int ecryptfs_decrypt_page(struct page *page)
656 extent_offset); 656 extent_offset);
657 if (rc) { 657 if (rc) {
658 printk(KERN_ERR "%s: Error encrypting extent; " 658 printk(KERN_ERR "%s: Error encrypting extent; "
659 "rc = [%d]\n", __FUNCTION__, rc); 659 "rc = [%d]\n", __func__, rc);
660 goto out; 660 goto out;
661 } 661 }
662 } 662 }
@@ -1215,7 +1215,7 @@ int ecryptfs_read_and_validate_header_region(char *data,
1215 ecryptfs_inode); 1215 ecryptfs_inode);
1216 if (rc) { 1216 if (rc) {
1217 printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n", 1217 printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n",
1218 __FUNCTION__, rc); 1218 __func__, rc);
1219 goto out; 1219 goto out;
1220 } 1220 }
1221 if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) { 1221 if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) {
@@ -1246,7 +1246,6 @@ ecryptfs_write_header_metadata(char *virt,
1246 (*written) = 6; 1246 (*written) = 6;
1247} 1247}
1248 1248
1249struct kmem_cache *ecryptfs_header_cache_0;
1250struct kmem_cache *ecryptfs_header_cache_1; 1249struct kmem_cache *ecryptfs_header_cache_1;
1251struct kmem_cache *ecryptfs_header_cache_2; 1250struct kmem_cache *ecryptfs_header_cache_2;
1252 1251
@@ -1320,7 +1319,7 @@ ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat,
1320 0, crypt_stat->num_header_bytes_at_front); 1319 0, crypt_stat->num_header_bytes_at_front);
1321 if (rc) 1320 if (rc)
1322 printk(KERN_ERR "%s: Error attempting to write header " 1321 printk(KERN_ERR "%s: Error attempting to write header "
1323 "information to lower file; rc = [%d]\n", __FUNCTION__, 1322 "information to lower file; rc = [%d]\n", __func__,
1324 rc); 1323 rc);
1325 return rc; 1324 return rc;
1326} 1325}
@@ -1365,14 +1364,14 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1365 } 1364 }
1366 } else { 1365 } else {
1367 printk(KERN_WARNING "%s: Encrypted flag not set\n", 1366 printk(KERN_WARNING "%s: Encrypted flag not set\n",
1368 __FUNCTION__); 1367 __func__);
1369 rc = -EINVAL; 1368 rc = -EINVAL;
1370 goto out; 1369 goto out;
1371 } 1370 }
1372 /* Released in this function */ 1371 /* Released in this function */
1373 virt = kzalloc(crypt_stat->num_header_bytes_at_front, GFP_KERNEL); 1372 virt = kzalloc(crypt_stat->num_header_bytes_at_front, GFP_KERNEL);
1374 if (!virt) { 1373 if (!virt) {
1375 printk(KERN_ERR "%s: Out of memory\n", __FUNCTION__); 1374 printk(KERN_ERR "%s: Out of memory\n", __func__);
1376 rc = -ENOMEM; 1375 rc = -ENOMEM;
1377 goto out; 1376 goto out;
1378 } 1377 }
@@ -1380,7 +1379,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1380 ecryptfs_dentry); 1379 ecryptfs_dentry);
1381 if (unlikely(rc)) { 1380 if (unlikely(rc)) {
1382 printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n", 1381 printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n",
1383 __FUNCTION__, rc); 1382 __func__, rc);
1384 goto out_free; 1383 goto out_free;
1385 } 1384 }
1386 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) 1385 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
@@ -1391,7 +1390,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1391 ecryptfs_dentry, virt); 1390 ecryptfs_dentry, virt);
1392 if (rc) { 1391 if (rc) {
1393 printk(KERN_ERR "%s: Error writing metadata out to lower file; " 1392 printk(KERN_ERR "%s: Error writing metadata out to lower file; "
1394 "rc = [%d]\n", __FUNCTION__, rc); 1393 "rc = [%d]\n", __func__, rc);
1395 goto out_free; 1394 goto out_free;
1396 } 1395 }
1397out_free: 1396out_free:
@@ -1585,7 +1584,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
1585 if (!page_virt) { 1584 if (!page_virt) {
1586 rc = -ENOMEM; 1585 rc = -ENOMEM;
1587 printk(KERN_ERR "%s: Unable to allocate page_virt\n", 1586 printk(KERN_ERR "%s: Unable to allocate page_virt\n",
1588 __FUNCTION__); 1587 __func__);
1589 goto out; 1588 goto out;
1590 } 1589 }
1591 rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size, 1590 rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size,
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 5007f788da01..951ee33a022d 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -4,7 +4,7 @@
4 * 4 *
5 * Copyright (C) 1997-2003 Erez Zadok 5 * Copyright (C) 1997-2003 Erez Zadok
6 * Copyright (C) 2001-2003 Stony Brook University 6 * Copyright (C) 2001-2003 Stony Brook University
7 * Copyright (C) 2004-2007 International Business Machines Corp. 7 * Copyright (C) 2004-2008 International Business Machines Corp.
8 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> 8 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
9 * Trevor S. Highland <trevor.highland@gmail.com> 9 * Trevor S. Highland <trevor.highland@gmail.com>
10 * Tyler Hicks <tyhicks@ou.edu> 10 * Tyler Hicks <tyhicks@ou.edu>
@@ -34,6 +34,7 @@
34#include <linux/namei.h> 34#include <linux/namei.h>
35#include <linux/scatterlist.h> 35#include <linux/scatterlist.h>
36#include <linux/hash.h> 36#include <linux/hash.h>
37#include <linux/nsproxy.h>
37 38
38/* Version verification for shared data structures w/ userspace */ 39/* Version verification for shared data structures w/ userspace */
39#define ECRYPTFS_VERSION_MAJOR 0x00 40#define ECRYPTFS_VERSION_MAJOR 0x00
@@ -49,11 +50,13 @@
49#define ECRYPTFS_VERSIONING_POLICY 0x00000008 50#define ECRYPTFS_VERSIONING_POLICY 0x00000008
50#define ECRYPTFS_VERSIONING_XATTR 0x00000010 51#define ECRYPTFS_VERSIONING_XATTR 0x00000010
51#define ECRYPTFS_VERSIONING_MULTKEY 0x00000020 52#define ECRYPTFS_VERSIONING_MULTKEY 0x00000020
53#define ECRYPTFS_VERSIONING_DEVMISC 0x00000040
52#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ 54#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
53 | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \ 55 | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \
54 | ECRYPTFS_VERSIONING_PUBKEY \ 56 | ECRYPTFS_VERSIONING_PUBKEY \
55 | ECRYPTFS_VERSIONING_XATTR \ 57 | ECRYPTFS_VERSIONING_XATTR \
56 | ECRYPTFS_VERSIONING_MULTKEY) 58 | ECRYPTFS_VERSIONING_MULTKEY \
59 | ECRYPTFS_VERSIONING_DEVMISC)
57#define ECRYPTFS_MAX_PASSWORD_LENGTH 64 60#define ECRYPTFS_MAX_PASSWORD_LENGTH 64
58#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH 61#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH
59#define ECRYPTFS_SALT_SIZE 8 62#define ECRYPTFS_SALT_SIZE 8
@@ -73,17 +76,14 @@
73#define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32 76#define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32
74#define ECRYPTFS_DEFAULT_SEND_TIMEOUT HZ 77#define ECRYPTFS_DEFAULT_SEND_TIMEOUT HZ
75#define ECRYPTFS_MAX_MSG_CTX_TTL (HZ*3) 78#define ECRYPTFS_MAX_MSG_CTX_TTL (HZ*3)
76#define ECRYPTFS_NLMSG_HELO 100
77#define ECRYPTFS_NLMSG_QUIT 101
78#define ECRYPTFS_NLMSG_REQUEST 102
79#define ECRYPTFS_NLMSG_RESPONSE 103
80#define ECRYPTFS_MAX_PKI_NAME_BYTES 16 79#define ECRYPTFS_MAX_PKI_NAME_BYTES 16
81#define ECRYPTFS_DEFAULT_NUM_USERS 4 80#define ECRYPTFS_DEFAULT_NUM_USERS 4
82#define ECRYPTFS_MAX_NUM_USERS 32768 81#define ECRYPTFS_MAX_NUM_USERS 32768
83#define ECRYPTFS_TRANSPORT_NETLINK 0 82#define ECRYPTFS_TRANSPORT_NETLINK 0
84#define ECRYPTFS_TRANSPORT_CONNECTOR 1 83#define ECRYPTFS_TRANSPORT_CONNECTOR 1
85#define ECRYPTFS_TRANSPORT_RELAYFS 2 84#define ECRYPTFS_TRANSPORT_RELAYFS 2
86#define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_NETLINK 85#define ECRYPTFS_TRANSPORT_MISCDEV 3
86#define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_MISCDEV
87#define ECRYPTFS_XATTR_NAME "user.ecryptfs" 87#define ECRYPTFS_XATTR_NAME "user.ecryptfs"
88 88
89#define RFC2440_CIPHER_DES3_EDE 0x02 89#define RFC2440_CIPHER_DES3_EDE 0x02
@@ -366,32 +366,63 @@ struct ecryptfs_auth_tok_list_item {
366}; 366};
367 367
368struct ecryptfs_message { 368struct ecryptfs_message {
369 /* Can never be greater than ecryptfs_message_buf_len */
370 /* Used to find the parent msg_ctx */
371 /* Inherits from msg_ctx->index */
369 u32 index; 372 u32 index;
370 u32 data_len; 373 u32 data_len;
371 u8 data[]; 374 u8 data[];
372}; 375};
373 376
374struct ecryptfs_msg_ctx { 377struct ecryptfs_msg_ctx {
375#define ECRYPTFS_MSG_CTX_STATE_FREE 0x0001 378#define ECRYPTFS_MSG_CTX_STATE_FREE 0x01
376#define ECRYPTFS_MSG_CTX_STATE_PENDING 0x0002 379#define ECRYPTFS_MSG_CTX_STATE_PENDING 0x02
377#define ECRYPTFS_MSG_CTX_STATE_DONE 0x0003 380#define ECRYPTFS_MSG_CTX_STATE_DONE 0x03
378 u32 state; 381#define ECRYPTFS_MSG_CTX_STATE_NO_REPLY 0x04
379 unsigned int index; 382 u8 state;
380 unsigned int counter; 383#define ECRYPTFS_MSG_HELO 100
384#define ECRYPTFS_MSG_QUIT 101
385#define ECRYPTFS_MSG_REQUEST 102
386#define ECRYPTFS_MSG_RESPONSE 103
387 u8 type;
388 u32 index;
389 /* Counter converts to a sequence number. Each message sent
390 * out for which we expect a response has an associated
391 * sequence number. The response must have the same sequence
392 * number as the counter for the msg_stc for the message to be
393 * valid. */
394 u32 counter;
395 size_t msg_size;
381 struct ecryptfs_message *msg; 396 struct ecryptfs_message *msg;
382 struct task_struct *task; 397 struct task_struct *task;
383 struct list_head node; 398 struct list_head node;
399 struct list_head daemon_out_list;
384 struct mutex mux; 400 struct mutex mux;
385}; 401};
386 402
387extern unsigned int ecryptfs_transport; 403extern unsigned int ecryptfs_transport;
388 404
389struct ecryptfs_daemon_id { 405struct ecryptfs_daemon;
390 pid_t pid; 406
391 uid_t uid; 407struct ecryptfs_daemon {
392 struct hlist_node id_chain; 408#define ECRYPTFS_DAEMON_IN_READ 0x00000001
409#define ECRYPTFS_DAEMON_IN_POLL 0x00000002
410#define ECRYPTFS_DAEMON_ZOMBIE 0x00000004
411#define ECRYPTFS_DAEMON_MISCDEV_OPEN 0x00000008
412 u32 flags;
413 u32 num_queued_msg_ctx;
414 struct pid *pid;
415 uid_t euid;
416 struct user_namespace *user_ns;
417 struct task_struct *task;
418 struct mutex mux;
419 struct list_head msg_ctx_out_queue;
420 wait_queue_head_t wait;
421 struct hlist_node euid_chain;
393}; 422};
394 423
424extern struct mutex ecryptfs_daemon_hash_mux;
425
395static inline struct ecryptfs_file_info * 426static inline struct ecryptfs_file_info *
396ecryptfs_file_to_private(struct file *file) 427ecryptfs_file_to_private(struct file *file)
397{ 428{
@@ -500,7 +531,7 @@ ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt)
500} 531}
501 532
502#define ecryptfs_printk(type, fmt, arg...) \ 533#define ecryptfs_printk(type, fmt, arg...) \
503 __ecryptfs_printk(type "%s: " fmt, __FUNCTION__, ## arg); 534 __ecryptfs_printk(type "%s: " fmt, __func__, ## arg);
504void __ecryptfs_printk(const char *fmt, ...); 535void __ecryptfs_printk(const char *fmt, ...);
505 536
506extern const struct file_operations ecryptfs_main_fops; 537extern const struct file_operations ecryptfs_main_fops;
@@ -581,10 +612,13 @@ int
581ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, 612ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
582 size_t size, int flags); 613 size_t size, int flags);
583int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); 614int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode);
584int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid); 615int ecryptfs_process_helo(unsigned int transport, uid_t euid,
585int ecryptfs_process_quit(uid_t uid, pid_t pid); 616 struct user_namespace *user_ns, struct pid *pid);
586int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid, 617int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns,
587 pid_t pid, u32 seq); 618 struct pid *pid);
619int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
620 struct user_namespace *user_ns, struct pid *pid,
621 u32 seq);
588int ecryptfs_send_message(unsigned int transport, char *data, int data_len, 622int ecryptfs_send_message(unsigned int transport, char *data, int data_len,
589 struct ecryptfs_msg_ctx **msg_ctx); 623 struct ecryptfs_msg_ctx **msg_ctx);
590int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, 624int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx,
@@ -593,14 +627,14 @@ int ecryptfs_init_messaging(unsigned int transport);
593void ecryptfs_release_messaging(unsigned int transport); 627void ecryptfs_release_messaging(unsigned int transport);
594 628
595int ecryptfs_send_netlink(char *data, int data_len, 629int ecryptfs_send_netlink(char *data, int data_len,
596 struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type, 630 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
597 u16 msg_flags, pid_t daemon_pid); 631 u16 msg_flags, struct pid *daemon_pid);
598int ecryptfs_init_netlink(void); 632int ecryptfs_init_netlink(void);
599void ecryptfs_release_netlink(void); 633void ecryptfs_release_netlink(void);
600 634
601int ecryptfs_send_connector(char *data, int data_len, 635int ecryptfs_send_connector(char *data, int data_len,
602 struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type, 636 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
603 u16 msg_flags, pid_t daemon_pid); 637 u16 msg_flags, struct pid *daemon_pid);
604int ecryptfs_init_connector(void); 638int ecryptfs_init_connector(void);
605void ecryptfs_release_connector(void); 639void ecryptfs_release_connector(void);
606void 640void
@@ -642,5 +676,21 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
642 size_t offset_in_page, size_t size, 676 size_t offset_in_page, size_t size,
643 struct inode *ecryptfs_inode); 677 struct inode *ecryptfs_inode);
644struct page *ecryptfs_get_locked_page(struct file *file, loff_t index); 678struct page *ecryptfs_get_locked_page(struct file *file, loff_t index);
679int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon);
680int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid,
681 struct user_namespace *user_ns);
682int ecryptfs_parse_packet_length(unsigned char *data, size_t *size,
683 size_t *length_size);
684int ecryptfs_write_packet_length(char *dest, size_t size,
685 size_t *packet_size_length);
686int ecryptfs_init_ecryptfs_miscdev(void);
687void ecryptfs_destroy_ecryptfs_miscdev(void);
688int ecryptfs_send_miscdev(char *data, size_t data_size,
689 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
690 u16 msg_flags, struct ecryptfs_daemon *daemon);
691void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx);
692int
693ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid,
694 struct user_namespace *user_ns, struct pid *pid);
645 695
646#endif /* #ifndef ECRYPTFS_KERNEL_H */ 696#endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 2b8f5ed4adea..2258b8f654a6 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -195,7 +195,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
195 file, ecryptfs_inode_to_private(inode)->lower_file); 195 file, ecryptfs_inode_to_private(inode)->lower_file);
196 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { 196 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
197 ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); 197 ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
198 mutex_lock(&crypt_stat->cs_mutex);
198 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); 199 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
200 mutex_unlock(&crypt_stat->cs_mutex);
199 rc = 0; 201 rc = 0;
200 goto out; 202 goto out;
201 } 203 }
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index e23861152101..c92cc1c00aae 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -37,17 +37,11 @@ static struct dentry *lock_parent(struct dentry *dentry)
37{ 37{
38 struct dentry *dir; 38 struct dentry *dir;
39 39
40 dir = dget(dentry->d_parent); 40 dir = dget_parent(dentry);
41 mutex_lock_nested(&(dir->d_inode->i_mutex), I_MUTEX_PARENT); 41 mutex_lock_nested(&(dir->d_inode->i_mutex), I_MUTEX_PARENT);
42 return dir; 42 return dir;
43} 43}
44 44
45static void unlock_parent(struct dentry *dentry)
46{
47 mutex_unlock(&(dentry->d_parent->d_inode->i_mutex));
48 dput(dentry->d_parent);
49}
50
51static void unlock_dir(struct dentry *dir) 45static void unlock_dir(struct dentry *dir)
52{ 46{
53 mutex_unlock(&dir->d_inode->i_mutex); 47 mutex_unlock(&dir->d_inode->i_mutex);
@@ -111,7 +105,7 @@ ecryptfs_do_create(struct inode *directory_inode,
111 105
112 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 106 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
113 lower_dir_dentry = lock_parent(lower_dentry); 107 lower_dir_dentry = lock_parent(lower_dentry);
114 if (unlikely(IS_ERR(lower_dir_dentry))) { 108 if (IS_ERR(lower_dir_dentry)) {
115 ecryptfs_printk(KERN_ERR, "Error locking directory of " 109 ecryptfs_printk(KERN_ERR, "Error locking directory of "
116 "dentry\n"); 110 "dentry\n");
117 rc = PTR_ERR(lower_dir_dentry); 111 rc = PTR_ERR(lower_dir_dentry);
@@ -121,7 +115,7 @@ ecryptfs_do_create(struct inode *directory_inode,
121 ecryptfs_dentry, mode, nd); 115 ecryptfs_dentry, mode, nd);
122 if (rc) { 116 if (rc) {
123 printk(KERN_ERR "%s: Failure to create dentry in lower fs; " 117 printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
124 "rc = [%d]\n", __FUNCTION__, rc); 118 "rc = [%d]\n", __func__, rc);
125 goto out_lock; 119 goto out_lock;
126 } 120 }
127 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, 121 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
@@ -426,8 +420,9 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
426 int rc = 0; 420 int rc = 0;
427 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); 421 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
428 struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir); 422 struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir);
423 struct dentry *lower_dir_dentry;
429 424
430 lock_parent(lower_dentry); 425 lower_dir_dentry = lock_parent(lower_dentry);
431 rc = vfs_unlink(lower_dir_inode, lower_dentry); 426 rc = vfs_unlink(lower_dir_inode, lower_dentry);
432 if (rc) { 427 if (rc) {
433 printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc); 428 printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
@@ -439,7 +434,7 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
439 dentry->d_inode->i_ctime = dir->i_ctime; 434 dentry->d_inode->i_ctime = dir->i_ctime;
440 d_drop(dentry); 435 d_drop(dentry);
441out_unlock: 436out_unlock:
442 unlock_parent(lower_dentry); 437 unlock_dir(lower_dir_dentry);
443 return rc; 438 return rc;
444} 439}
445 440
@@ -908,7 +903,9 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
908 if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) 903 if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
909 ia->ia_valid &= ~ATTR_MODE; 904 ia->ia_valid &= ~ATTR_MODE;
910 905
906 mutex_lock(&lower_dentry->d_inode->i_mutex);
911 rc = notify_change(lower_dentry, ia); 907 rc = notify_change(lower_dentry, ia);
908 mutex_unlock(&lower_dentry->d_inode->i_mutex);
912out: 909out:
913 fsstack_copy_attr_all(inode, lower_inode, NULL); 910 fsstack_copy_attr_all(inode, lower_inode, NULL);
914 return rc; 911 return rc;
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 682b1b2482c2..e82b457180be 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -65,7 +65,7 @@ static int process_request_key_err(long err_code)
65} 65}
66 66
67/** 67/**
68 * parse_packet_length 68 * ecryptfs_parse_packet_length
69 * @data: Pointer to memory containing length at offset 69 * @data: Pointer to memory containing length at offset
70 * @size: This function writes the decoded size to this memory 70 * @size: This function writes the decoded size to this memory
71 * address; zero on error 71 * address; zero on error
@@ -73,8 +73,8 @@ static int process_request_key_err(long err_code)
73 * 73 *
74 * Returns zero on success; non-zero on error 74 * Returns zero on success; non-zero on error
75 */ 75 */
76static int parse_packet_length(unsigned char *data, size_t *size, 76int ecryptfs_parse_packet_length(unsigned char *data, size_t *size,
77 size_t *length_size) 77 size_t *length_size)
78{ 78{
79 int rc = 0; 79 int rc = 0;
80 80
@@ -105,7 +105,7 @@ out:
105} 105}
106 106
107/** 107/**
108 * write_packet_length 108 * ecryptfs_write_packet_length
109 * @dest: The byte array target into which to write the length. Must 109 * @dest: The byte array target into which to write the length. Must
110 * have at least 5 bytes allocated. 110 * have at least 5 bytes allocated.
111 * @size: The length to write. 111 * @size: The length to write.
@@ -114,8 +114,8 @@ out:
114 * 114 *
115 * Returns zero on success; non-zero on error. 115 * Returns zero on success; non-zero on error.
116 */ 116 */
117static int write_packet_length(char *dest, size_t size, 117int ecryptfs_write_packet_length(char *dest, size_t size,
118 size_t *packet_size_length) 118 size_t *packet_size_length)
119{ 119{
120 int rc = 0; 120 int rc = 0;
121 121
@@ -162,8 +162,8 @@ write_tag_64_packet(char *signature, struct ecryptfs_session_key *session_key,
162 goto out; 162 goto out;
163 } 163 }
164 message[i++] = ECRYPTFS_TAG_64_PACKET_TYPE; 164 message[i++] = ECRYPTFS_TAG_64_PACKET_TYPE;
165 rc = write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX, 165 rc = ecryptfs_write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX,
166 &packet_size_len); 166 &packet_size_len);
167 if (rc) { 167 if (rc) {
168 ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet " 168 ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet "
169 "header; cannot generate packet length\n"); 169 "header; cannot generate packet length\n");
@@ -172,8 +172,9 @@ write_tag_64_packet(char *signature, struct ecryptfs_session_key *session_key,
172 i += packet_size_len; 172 i += packet_size_len;
173 memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX); 173 memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX);
174 i += ECRYPTFS_SIG_SIZE_HEX; 174 i += ECRYPTFS_SIG_SIZE_HEX;
175 rc = write_packet_length(&message[i], session_key->encrypted_key_size, 175 rc = ecryptfs_write_packet_length(&message[i],
176 &packet_size_len); 176 session_key->encrypted_key_size,
177 &packet_size_len);
177 if (rc) { 178 if (rc) {
178 ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet " 179 ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet "
179 "header; cannot generate packet length\n"); 180 "header; cannot generate packet length\n");
@@ -225,7 +226,7 @@ parse_tag_65_packet(struct ecryptfs_session_key *session_key, u8 *cipher_code,
225 rc = -EIO; 226 rc = -EIO;
226 goto out; 227 goto out;
227 } 228 }
228 rc = parse_packet_length(&data[i], &m_size, &data_len); 229 rc = ecryptfs_parse_packet_length(&data[i], &m_size, &data_len);
229 if (rc) { 230 if (rc) {
230 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; " 231 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; "
231 "rc = [%d]\n", rc); 232 "rc = [%d]\n", rc);
@@ -304,8 +305,8 @@ write_tag_66_packet(char *signature, u8 cipher_code,
304 goto out; 305 goto out;
305 } 306 }
306 message[i++] = ECRYPTFS_TAG_66_PACKET_TYPE; 307 message[i++] = ECRYPTFS_TAG_66_PACKET_TYPE;
307 rc = write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX, 308 rc = ecryptfs_write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX,
308 &packet_size_len); 309 &packet_size_len);
309 if (rc) { 310 if (rc) {
310 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet " 311 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet "
311 "header; cannot generate packet length\n"); 312 "header; cannot generate packet length\n");
@@ -315,8 +316,8 @@ write_tag_66_packet(char *signature, u8 cipher_code,
315 memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX); 316 memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX);
316 i += ECRYPTFS_SIG_SIZE_HEX; 317 i += ECRYPTFS_SIG_SIZE_HEX;
317 /* The encrypted key includes 1 byte cipher code and 2 byte checksum */ 318 /* The encrypted key includes 1 byte cipher code and 2 byte checksum */
318 rc = write_packet_length(&message[i], crypt_stat->key_size + 3, 319 rc = ecryptfs_write_packet_length(&message[i], crypt_stat->key_size + 3,
319 &packet_size_len); 320 &packet_size_len);
320 if (rc) { 321 if (rc) {
321 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet " 322 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet "
322 "header; cannot generate packet length\n"); 323 "header; cannot generate packet length\n");
@@ -357,20 +358,25 @@ parse_tag_67_packet(struct ecryptfs_key_record *key_rec,
357 /* verify that everything through the encrypted FEK size is present */ 358 /* verify that everything through the encrypted FEK size is present */
358 if (message_len < 4) { 359 if (message_len < 4) {
359 rc = -EIO; 360 rc = -EIO;
361 printk(KERN_ERR "%s: message_len is [%Zd]; minimum acceptable "
362 "message length is [%d]\n", __func__, message_len, 4);
360 goto out; 363 goto out;
361 } 364 }
362 if (data[i++] != ECRYPTFS_TAG_67_PACKET_TYPE) { 365 if (data[i++] != ECRYPTFS_TAG_67_PACKET_TYPE) {
363 ecryptfs_printk(KERN_ERR, "Type should be ECRYPTFS_TAG_67\n");
364 rc = -EIO; 366 rc = -EIO;
367 printk(KERN_ERR "%s: Type should be ECRYPTFS_TAG_67\n",
368 __func__);
365 goto out; 369 goto out;
366 } 370 }
367 if (data[i++]) { 371 if (data[i++]) {
368 ecryptfs_printk(KERN_ERR, "Status indicator has non zero value"
369 " [%d]\n", data[i-1]);
370 rc = -EIO; 372 rc = -EIO;
373 printk(KERN_ERR "%s: Status indicator has non zero "
374 "value [%d]\n", __func__, data[i-1]);
375
371 goto out; 376 goto out;
372 } 377 }
373 rc = parse_packet_length(&data[i], &key_rec->enc_key_size, &data_len); 378 rc = ecryptfs_parse_packet_length(&data[i], &key_rec->enc_key_size,
379 &data_len);
374 if (rc) { 380 if (rc) {
375 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; " 381 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; "
376 "rc = [%d]\n", rc); 382 "rc = [%d]\n", rc);
@@ -378,17 +384,17 @@ parse_tag_67_packet(struct ecryptfs_key_record *key_rec,
378 } 384 }
379 i += data_len; 385 i += data_len;
380 if (message_len < (i + key_rec->enc_key_size)) { 386 if (message_len < (i + key_rec->enc_key_size)) {
381 ecryptfs_printk(KERN_ERR, "message_len [%d]; max len is [%d]\n",
382 message_len, (i + key_rec->enc_key_size));
383 rc = -EIO; 387 rc = -EIO;
388 printk(KERN_ERR "%s: message_len [%Zd]; max len is [%Zd]\n",
389 __func__, message_len, (i + key_rec->enc_key_size));
384 goto out; 390 goto out;
385 } 391 }
386 if (key_rec->enc_key_size > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) { 392 if (key_rec->enc_key_size > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) {
387 ecryptfs_printk(KERN_ERR, "Encrypted key_size [%d] larger than "
388 "the maximum key size [%d]\n",
389 key_rec->enc_key_size,
390 ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES);
391 rc = -EIO; 393 rc = -EIO;
394 printk(KERN_ERR "%s: Encrypted key_size [%Zd] larger than "
395 "the maximum key size [%d]\n", __func__,
396 key_rec->enc_key_size,
397 ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES);
392 goto out; 398 goto out;
393 } 399 }
394 memcpy(key_rec->enc_key, &data[i], key_rec->enc_key_size); 400 memcpy(key_rec->enc_key, &data[i], key_rec->enc_key_size);
@@ -445,7 +451,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
445 rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key), 451 rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key),
446 &netlink_message, &netlink_message_length); 452 &netlink_message, &netlink_message_length);
447 if (rc) { 453 if (rc) {
448 ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet"); 454 ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet\n");
449 goto out; 455 goto out;
450 } 456 }
451 rc = ecryptfs_send_message(ecryptfs_transport, netlink_message, 457 rc = ecryptfs_send_message(ecryptfs_transport, netlink_message,
@@ -570,8 +576,8 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat,
570 goto out; 576 goto out;
571 } 577 }
572 (*new_auth_tok) = &auth_tok_list_item->auth_tok; 578 (*new_auth_tok) = &auth_tok_list_item->auth_tok;
573 rc = parse_packet_length(&data[(*packet_size)], &body_size, 579 rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size,
574 &length_size); 580 &length_size);
575 if (rc) { 581 if (rc) {
576 printk(KERN_WARNING "Error parsing packet length; " 582 printk(KERN_WARNING "Error parsing packet length; "
577 "rc = [%d]\n", rc); 583 "rc = [%d]\n", rc);
@@ -704,8 +710,8 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
704 goto out; 710 goto out;
705 } 711 }
706 (*new_auth_tok) = &auth_tok_list_item->auth_tok; 712 (*new_auth_tok) = &auth_tok_list_item->auth_tok;
707 rc = parse_packet_length(&data[(*packet_size)], &body_size, 713 rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size,
708 &length_size); 714 &length_size);
709 if (rc) { 715 if (rc) {
710 printk(KERN_WARNING "Error parsing packet length; rc = [%d]\n", 716 printk(KERN_WARNING "Error parsing packet length; rc = [%d]\n",
711 rc); 717 rc);
@@ -852,8 +858,8 @@ parse_tag_11_packet(unsigned char *data, unsigned char *contents,
852 rc = -EINVAL; 858 rc = -EINVAL;
853 goto out; 859 goto out;
854 } 860 }
855 rc = parse_packet_length(&data[(*packet_size)], &body_size, 861 rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size,
856 &length_size); 862 &length_size);
857 if (rc) { 863 if (rc) {
858 printk(KERN_WARNING "Invalid tag 11 packet format\n"); 864 printk(KERN_WARNING "Invalid tag 11 packet format\n");
859 goto out; 865 goto out;
@@ -1405,8 +1411,8 @@ write_tag_1_packet(char *dest, size_t *remaining_bytes,
1405 auth_tok->token.private_key.key_size; 1411 auth_tok->token.private_key.key_size;
1406 rc = pki_encrypt_session_key(auth_tok, crypt_stat, key_rec); 1412 rc = pki_encrypt_session_key(auth_tok, crypt_stat, key_rec);
1407 if (rc) { 1413 if (rc) {
1408 ecryptfs_printk(KERN_ERR, "Failed to encrypt session key " 1414 printk(KERN_ERR "Failed to encrypt session key via a key "
1409 "via a pki"); 1415 "module; rc = [%d]\n", rc);
1410 goto out; 1416 goto out;
1411 } 1417 }
1412 if (ecryptfs_verbosity > 0) { 1418 if (ecryptfs_verbosity > 0) {
@@ -1430,8 +1436,9 @@ encrypted_session_key_set:
1430 goto out; 1436 goto out;
1431 } 1437 }
1432 dest[(*packet_size)++] = ECRYPTFS_TAG_1_PACKET_TYPE; 1438 dest[(*packet_size)++] = ECRYPTFS_TAG_1_PACKET_TYPE;
1433 rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4), 1439 rc = ecryptfs_write_packet_length(&dest[(*packet_size)],
1434 &packet_size_length); 1440 (max_packet_size - 4),
1441 &packet_size_length);
1435 if (rc) { 1442 if (rc) {
1436 ecryptfs_printk(KERN_ERR, "Error generating tag 1 packet " 1443 ecryptfs_printk(KERN_ERR, "Error generating tag 1 packet "
1437 "header; cannot generate packet length\n"); 1444 "header; cannot generate packet length\n");
@@ -1489,8 +1496,9 @@ write_tag_11_packet(char *dest, size_t *remaining_bytes, char *contents,
1489 goto out; 1496 goto out;
1490 } 1497 }
1491 dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE; 1498 dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE;
1492 rc = write_packet_length(&dest[(*packet_length)], 1499 rc = ecryptfs_write_packet_length(&dest[(*packet_length)],
1493 (max_packet_size - 4), &packet_size_length); 1500 (max_packet_size - 4),
1501 &packet_size_length);
1494 if (rc) { 1502 if (rc) {
1495 printk(KERN_ERR "Error generating tag 11 packet header; cannot " 1503 printk(KERN_ERR "Error generating tag 11 packet header; cannot "
1496 "generate packet length. rc = [%d]\n", rc); 1504 "generate packet length. rc = [%d]\n", rc);
@@ -1682,8 +1690,9 @@ encrypted_session_key_set:
1682 dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE; 1690 dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE;
1683 /* Chop off the Tag 3 identifier(1) and Tag 3 packet size(3) 1691 /* Chop off the Tag 3 identifier(1) and Tag 3 packet size(3)
1684 * to get the number of octets in the actual Tag 3 packet */ 1692 * to get the number of octets in the actual Tag 3 packet */
1685 rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4), 1693 rc = ecryptfs_write_packet_length(&dest[(*packet_size)],
1686 &packet_size_length); 1694 (max_packet_size - 4),
1695 &packet_size_length);
1687 if (rc) { 1696 if (rc) {
1688 printk(KERN_ERR "Error generating tag 3 packet header; cannot " 1697 printk(KERN_ERR "Error generating tag 3 packet header; cannot "
1689 "generate packet length. rc = [%d]\n", rc); 1698 "generate packet length. rc = [%d]\n", rc);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index d25ac9500a92..d603631601eb 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -219,7 +219,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
219 if (rc) { 219 if (rc) {
220 printk(KERN_ERR "%s: Error attempting to initialize the " 220 printk(KERN_ERR "%s: Error attempting to initialize the "
221 "persistent file for the dentry with name [%s]; " 221 "persistent file for the dentry with name [%s]; "
222 "rc = [%d]\n", __FUNCTION__, dentry->d_name.name, rc); 222 "rc = [%d]\n", __func__, dentry->d_name.name, rc);
223 goto out; 223 goto out;
224 } 224 }
225out: 225out:
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 9cc2aec27b0d..1b5c20058acb 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * eCryptfs: Linux filesystem encryption layer 2 * eCryptfs: Linux filesystem encryption layer
3 * 3 *
4 * Copyright (C) 2004-2006 International Business Machines Corp. 4 * Copyright (C) 2004-2008 International Business Machines Corp.
5 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com> 5 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
6 * Tyler Hicks <tyhicks@ou.edu> 6 * Tyler Hicks <tyhicks@ou.edu>
7 * 7 *
@@ -20,19 +20,21 @@
20 * 02111-1307, USA. 20 * 02111-1307, USA.
21 */ 21 */
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/user_namespace.h>
24#include <linux/nsproxy.h>
23#include "ecryptfs_kernel.h" 25#include "ecryptfs_kernel.h"
24 26
25static LIST_HEAD(ecryptfs_msg_ctx_free_list); 27static LIST_HEAD(ecryptfs_msg_ctx_free_list);
26static LIST_HEAD(ecryptfs_msg_ctx_alloc_list); 28static LIST_HEAD(ecryptfs_msg_ctx_alloc_list);
27static struct mutex ecryptfs_msg_ctx_lists_mux; 29static struct mutex ecryptfs_msg_ctx_lists_mux;
28 30
29static struct hlist_head *ecryptfs_daemon_id_hash; 31static struct hlist_head *ecryptfs_daemon_hash;
30static struct mutex ecryptfs_daemon_id_hash_mux; 32struct mutex ecryptfs_daemon_hash_mux;
31static int ecryptfs_hash_buckets; 33static int ecryptfs_hash_buckets;
32#define ecryptfs_uid_hash(uid) \ 34#define ecryptfs_uid_hash(uid) \
33 hash_long((unsigned long)uid, ecryptfs_hash_buckets) 35 hash_long((unsigned long)uid, ecryptfs_hash_buckets)
34 36
35static unsigned int ecryptfs_msg_counter; 37static u32 ecryptfs_msg_counter;
36static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr; 38static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr;
37 39
38/** 40/**
@@ -40,9 +42,10 @@ static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr;
40 * @msg_ctx: The context that was acquired from the free list 42 * @msg_ctx: The context that was acquired from the free list
41 * 43 *
42 * Acquires a context element from the free list and locks the mutex 44 * Acquires a context element from the free list and locks the mutex
43 * on the context. Returns zero on success; non-zero on error or upon 45 * on the context. Sets the msg_ctx task to current. Returns zero on
44 * failure to acquire a free context element. Be sure to lock the 46 * success; non-zero on error or upon failure to acquire a free
45 * list mutex before calling. 47 * context element. Must be called with ecryptfs_msg_ctx_lists_mux
48 * held.
46 */ 49 */
47static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx) 50static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx)
48{ 51{
@@ -50,11 +53,11 @@ static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx)
50 int rc; 53 int rc;
51 54
52 if (list_empty(&ecryptfs_msg_ctx_free_list)) { 55 if (list_empty(&ecryptfs_msg_ctx_free_list)) {
53 ecryptfs_printk(KERN_WARNING, "The eCryptfs free " 56 printk(KERN_WARNING "%s: The eCryptfs free "
54 "context list is empty. It may be helpful to " 57 "context list is empty. It may be helpful to "
55 "specify the ecryptfs_message_buf_len " 58 "specify the ecryptfs_message_buf_len "
56 "parameter to be greater than the current " 59 "parameter to be greater than the current "
57 "value of [%d]\n", ecryptfs_message_buf_len); 60 "value of [%d]\n", __func__, ecryptfs_message_buf_len);
58 rc = -ENOMEM; 61 rc = -ENOMEM;
59 goto out; 62 goto out;
60 } 63 }
@@ -75,8 +78,7 @@ out:
75 * ecryptfs_msg_ctx_free_to_alloc 78 * ecryptfs_msg_ctx_free_to_alloc
76 * @msg_ctx: The context to move from the free list to the alloc list 79 * @msg_ctx: The context to move from the free list to the alloc list
77 * 80 *
78 * Be sure to lock the list mutex and the context mutex before 81 * Must be called with ecryptfs_msg_ctx_lists_mux held.
79 * calling.
80 */ 82 */
81static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx) 83static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx)
82{ 84{
@@ -89,36 +91,39 @@ static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx)
89 * ecryptfs_msg_ctx_alloc_to_free 91 * ecryptfs_msg_ctx_alloc_to_free
90 * @msg_ctx: The context to move from the alloc list to the free list 92 * @msg_ctx: The context to move from the alloc list to the free list
91 * 93 *
92 * Be sure to lock the list mutex and the context mutex before 94 * Must be called with ecryptfs_msg_ctx_lists_mux held.
93 * calling.
94 */ 95 */
95static void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx) 96void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx)
96{ 97{
97 list_move(&(msg_ctx->node), &ecryptfs_msg_ctx_free_list); 98 list_move(&(msg_ctx->node), &ecryptfs_msg_ctx_free_list);
98 if (msg_ctx->msg) 99 if (msg_ctx->msg)
99 kfree(msg_ctx->msg); 100 kfree(msg_ctx->msg);
101 msg_ctx->msg = NULL;
100 msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_FREE; 102 msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_FREE;
101} 103}
102 104
103/** 105/**
104 * ecryptfs_find_daemon_id 106 * ecryptfs_find_daemon_by_euid
105 * @uid: The user id which maps to the desired daemon id 107 * @euid: The effective user id which maps to the desired daemon id
106 * @id: If return value is zero, points to the desired daemon id 108 * @user_ns: The namespace in which @euid applies
107 * pointer 109 * @daemon: If return value is zero, points to the desired daemon pointer
108 * 110 *
109 * Search the hash list for the given user id. Returns zero if the 111 * Must be called with ecryptfs_daemon_hash_mux held.
110 * user id exists in the list; non-zero otherwise. The daemon id hash 112 *
111 * mutex should be held before calling this function. 113 * Search the hash list for the given user id.
114 *
115 * Returns zero if the user id exists in the list; non-zero otherwise.
112 */ 116 */
113static int ecryptfs_find_daemon_id(uid_t uid, struct ecryptfs_daemon_id **id) 117int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid,
118 struct user_namespace *user_ns)
114{ 119{
115 struct hlist_node *elem; 120 struct hlist_node *elem;
116 int rc; 121 int rc;
117 122
118 hlist_for_each_entry(*id, elem, 123 hlist_for_each_entry(*daemon, elem,
119 &ecryptfs_daemon_id_hash[ecryptfs_uid_hash(uid)], 124 &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)],
120 id_chain) { 125 euid_chain) {
121 if ((*id)->uid == uid) { 126 if ((*daemon)->euid == euid && (*daemon)->user_ns == user_ns) {
122 rc = 0; 127 rc = 0;
123 goto out; 128 goto out;
124 } 129 }
@@ -128,181 +133,325 @@ out:
128 return rc; 133 return rc;
129} 134}
130 135
131static int ecryptfs_send_raw_message(unsigned int transport, u16 msg_type, 136static int
132 pid_t pid) 137ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len,
138 u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx);
139
140/**
141 * ecryptfs_send_raw_message
142 * @transport: Transport type
143 * @msg_type: Message type
144 * @daemon: Daemon struct for recipient of message
145 *
146 * A raw message is one that does not include an ecryptfs_message
147 * struct. It simply has a type.
148 *
149 * Must be called with ecryptfs_daemon_hash_mux held.
150 *
151 * Returns zero on success; non-zero otherwise
152 */
153static int ecryptfs_send_raw_message(unsigned int transport, u8 msg_type,
154 struct ecryptfs_daemon *daemon)
133{ 155{
156 struct ecryptfs_msg_ctx *msg_ctx;
134 int rc; 157 int rc;
135 158
136 switch(transport) { 159 switch(transport) {
137 case ECRYPTFS_TRANSPORT_NETLINK: 160 case ECRYPTFS_TRANSPORT_NETLINK:
138 rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0, pid); 161 rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0,
162 daemon->pid);
163 break;
164 case ECRYPTFS_TRANSPORT_MISCDEV:
165 rc = ecryptfs_send_message_locked(transport, NULL, 0, msg_type,
166 &msg_ctx);
167 if (rc) {
168 printk(KERN_ERR "%s: Error whilst attempting to send "
169 "message via procfs; rc = [%d]\n", __func__, rc);
170 goto out;
171 }
172 /* Raw messages are logically context-free (e.g., no
173 * reply is expected), so we set the state of the
174 * ecryptfs_msg_ctx object to indicate that it should
175 * be freed as soon as the transport sends out the message. */
176 mutex_lock(&msg_ctx->mux);
177 msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY;
178 mutex_unlock(&msg_ctx->mux);
139 break; 179 break;
140 case ECRYPTFS_TRANSPORT_CONNECTOR: 180 case ECRYPTFS_TRANSPORT_CONNECTOR:
141 case ECRYPTFS_TRANSPORT_RELAYFS: 181 case ECRYPTFS_TRANSPORT_RELAYFS:
142 default: 182 default:
143 rc = -ENOSYS; 183 rc = -ENOSYS;
144 } 184 }
185out:
186 return rc;
187}
188
189/**
190 * ecryptfs_spawn_daemon - Create and initialize a new daemon struct
191 * @daemon: Pointer to set to newly allocated daemon struct
192 * @euid: Effective user id for the daemon
193 * @user_ns: The namespace in which @euid applies
194 * @pid: Process id for the daemon
195 *
196 * Must be called ceremoniously while in possession of
197 * ecryptfs_sacred_daemon_hash_mux
198 *
199 * Returns zero on success; non-zero otherwise
200 */
201int
202ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid,
203 struct user_namespace *user_ns, struct pid *pid)
204{
205 int rc = 0;
206
207 (*daemon) = kzalloc(sizeof(**daemon), GFP_KERNEL);
208 if (!(*daemon)) {
209 rc = -ENOMEM;
210 printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of "
211 "GFP_KERNEL memory\n", __func__, sizeof(**daemon));
212 goto out;
213 }
214 (*daemon)->euid = euid;
215 (*daemon)->user_ns = get_user_ns(user_ns);
216 (*daemon)->pid = get_pid(pid);
217 (*daemon)->task = current;
218 mutex_init(&(*daemon)->mux);
219 INIT_LIST_HEAD(&(*daemon)->msg_ctx_out_queue);
220 init_waitqueue_head(&(*daemon)->wait);
221 (*daemon)->num_queued_msg_ctx = 0;
222 hlist_add_head(&(*daemon)->euid_chain,
223 &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)]);
224out:
145 return rc; 225 return rc;
146} 226}
147 227
148/** 228/**
149 * ecryptfs_process_helo 229 * ecryptfs_process_helo
150 * @transport: The underlying transport (netlink, etc.) 230 * @transport: The underlying transport (netlink, etc.)
151 * @uid: The user ID owner of the message 231 * @euid: The user ID owner of the message
232 * @user_ns: The namespace in which @euid applies
152 * @pid: The process ID for the userspace program that sent the 233 * @pid: The process ID for the userspace program that sent the
153 * message 234 * message
154 * 235 *
155 * Adds the uid and pid values to the daemon id hash. If a uid 236 * Adds the euid and pid values to the daemon euid hash. If an euid
156 * already has a daemon pid registered, the daemon will be 237 * already has a daemon pid registered, the daemon will be
157 * unregistered before the new daemon id is put into the hash list. 238 * unregistered before the new daemon is put into the hash list.
158 * Returns zero after adding a new daemon id to the hash list; 239 * Returns zero after adding a new daemon to the hash list;
159 * non-zero otherwise. 240 * non-zero otherwise.
160 */ 241 */
161int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid) 242int ecryptfs_process_helo(unsigned int transport, uid_t euid,
243 struct user_namespace *user_ns, struct pid *pid)
162{ 244{
163 struct ecryptfs_daemon_id *new_id; 245 struct ecryptfs_daemon *new_daemon;
164 struct ecryptfs_daemon_id *old_id; 246 struct ecryptfs_daemon *old_daemon;
165 int rc; 247 int rc;
166 248
167 mutex_lock(&ecryptfs_daemon_id_hash_mux); 249 mutex_lock(&ecryptfs_daemon_hash_mux);
168 new_id = kmalloc(sizeof(*new_id), GFP_KERNEL); 250 rc = ecryptfs_find_daemon_by_euid(&old_daemon, euid, user_ns);
169 if (!new_id) { 251 if (rc != 0) {
170 rc = -ENOMEM;
171 ecryptfs_printk(KERN_ERR, "Failed to allocate memory; unable "
172 "to register daemon [%d] for user [%d]\n",
173 pid, uid);
174 goto unlock;
175 }
176 if (!ecryptfs_find_daemon_id(uid, &old_id)) {
177 printk(KERN_WARNING "Received request from user [%d] " 252 printk(KERN_WARNING "Received request from user [%d] "
178 "to register daemon [%d]; unregistering daemon " 253 "to register daemon [0x%p]; unregistering daemon "
179 "[%d]\n", uid, pid, old_id->pid); 254 "[0x%p]\n", euid, pid, old_daemon->pid);
180 hlist_del(&old_id->id_chain); 255 rc = ecryptfs_send_raw_message(transport, ECRYPTFS_MSG_QUIT,
181 rc = ecryptfs_send_raw_message(transport, ECRYPTFS_NLMSG_QUIT, 256 old_daemon);
182 old_id->pid);
183 if (rc) 257 if (rc)
184 printk(KERN_WARNING "Failed to send QUIT " 258 printk(KERN_WARNING "Failed to send QUIT "
185 "message to daemon [%d]; rc = [%d]\n", 259 "message to daemon [0x%p]; rc = [%d]\n",
186 old_id->pid, rc); 260 old_daemon->pid, rc);
187 kfree(old_id); 261 hlist_del(&old_daemon->euid_chain);
262 kfree(old_daemon);
188 } 263 }
189 new_id->uid = uid; 264 rc = ecryptfs_spawn_daemon(&new_daemon, euid, user_ns, pid);
190 new_id->pid = pid; 265 if (rc)
191 hlist_add_head(&new_id->id_chain, 266 printk(KERN_ERR "%s: The gods are displeased with this attempt "
192 &ecryptfs_daemon_id_hash[ecryptfs_uid_hash(uid)]); 267 "to create a new daemon object for euid [%d]; pid "
193 rc = 0; 268 "[0x%p]; rc = [%d]\n", __func__, euid, pid, rc);
194unlock: 269 mutex_unlock(&ecryptfs_daemon_hash_mux);
195 mutex_unlock(&ecryptfs_daemon_id_hash_mux); 270 return rc;
271}
272
273/**
274 * ecryptfs_exorcise_daemon - Destroy the daemon struct
275 *
276 * Must be called ceremoniously while in possession of
277 * ecryptfs_daemon_hash_mux and the daemon's own mux.
278 */
279int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon)
280{
281 struct ecryptfs_msg_ctx *msg_ctx, *msg_ctx_tmp;
282 int rc = 0;
283
284 mutex_lock(&daemon->mux);
285 if ((daemon->flags & ECRYPTFS_DAEMON_IN_READ)
286 || (daemon->flags & ECRYPTFS_DAEMON_IN_POLL)) {
287 rc = -EBUSY;
288 printk(KERN_WARNING "%s: Attempt to destroy daemon with pid "
289 "[0x%p], but it is in the midst of a read or a poll\n",
290 __func__, daemon->pid);
291 mutex_unlock(&daemon->mux);
292 goto out;
293 }
294 list_for_each_entry_safe(msg_ctx, msg_ctx_tmp,
295 &daemon->msg_ctx_out_queue, daemon_out_list) {
296 list_del(&msg_ctx->daemon_out_list);
297 daemon->num_queued_msg_ctx--;
298 printk(KERN_WARNING "%s: Warning: dropping message that is in "
299 "the out queue of a dying daemon\n", __func__);
300 ecryptfs_msg_ctx_alloc_to_free(msg_ctx);
301 }
302 hlist_del(&daemon->euid_chain);
303 if (daemon->task)
304 wake_up_process(daemon->task);
305 if (daemon->pid)
306 put_pid(daemon->pid);
307 if (daemon->user_ns)
308 put_user_ns(daemon->user_ns);
309 mutex_unlock(&daemon->mux);
310 memset(daemon, 0, sizeof(*daemon));
311 kfree(daemon);
312out:
196 return rc; 313 return rc;
197} 314}
198 315
199/** 316/**
200 * ecryptfs_process_quit 317 * ecryptfs_process_quit
201 * @uid: The user ID owner of the message 318 * @euid: The user ID owner of the message
319 * @user_ns: The namespace in which @euid applies
202 * @pid: The process ID for the userspace program that sent the 320 * @pid: The process ID for the userspace program that sent the
203 * message 321 * message
204 * 322 *
205 * Deletes the corresponding daemon id for the given uid and pid, if 323 * Deletes the corresponding daemon for the given euid and pid, if
206 * it is the registered that is requesting the deletion. Returns zero 324 * it is the registered that is requesting the deletion. Returns zero
207 * after deleting the desired daemon id; non-zero otherwise. 325 * after deleting the desired daemon; non-zero otherwise.
208 */ 326 */
209int ecryptfs_process_quit(uid_t uid, pid_t pid) 327int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns,
328 struct pid *pid)
210{ 329{
211 struct ecryptfs_daemon_id *id; 330 struct ecryptfs_daemon *daemon;
212 int rc; 331 int rc;
213 332
214 mutex_lock(&ecryptfs_daemon_id_hash_mux); 333 mutex_lock(&ecryptfs_daemon_hash_mux);
215 if (ecryptfs_find_daemon_id(uid, &id)) { 334 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, user_ns);
335 if (rc || !daemon) {
216 rc = -EINVAL; 336 rc = -EINVAL;
217 ecryptfs_printk(KERN_ERR, "Received request from user [%d] to " 337 printk(KERN_ERR "Received request from user [%d] to "
218 "unregister unrecognized daemon [%d]\n", uid, 338 "unregister unrecognized daemon [0x%p]\n", euid, pid);
219 pid); 339 goto out_unlock;
220 goto unlock;
221 } 340 }
222 if (id->pid != pid) { 341 rc = ecryptfs_exorcise_daemon(daemon);
223 rc = -EINVAL; 342out_unlock:
224 ecryptfs_printk(KERN_WARNING, "Received request from user [%d] " 343 mutex_unlock(&ecryptfs_daemon_hash_mux);
225 "with pid [%d] to unregister daemon [%d]\n",
226 uid, pid, id->pid);
227 goto unlock;
228 }
229 hlist_del(&id->id_chain);
230 kfree(id);
231 rc = 0;
232unlock:
233 mutex_unlock(&ecryptfs_daemon_id_hash_mux);
234 return rc; 344 return rc;
235} 345}
236 346
237/** 347/**
238 * ecryptfs_process_reponse 348 * ecryptfs_process_reponse
239 * @msg: The ecryptfs message received; the caller should sanity check 349 * @msg: The ecryptfs message received; the caller should sanity check
240 * msg->data_len 350 * msg->data_len and free the memory
241 * @pid: The process ID of the userspace application that sent the 351 * @pid: The process ID of the userspace application that sent the
242 * message 352 * message
243 * @seq: The sequence number of the message 353 * @seq: The sequence number of the message; must match the sequence
354 * number for the existing message context waiting for this
355 * response
356 *
357 * Processes a response message after sending an operation request to
358 * userspace. Some other process is awaiting this response. Before
359 * sending out its first communications, the other process allocated a
360 * msg_ctx from the ecryptfs_msg_ctx_arr at a particular index. The
361 * response message contains this index so that we can copy over the
362 * response message into the msg_ctx that the process holds a
363 * reference to. The other process is going to wake up, check to see
364 * that msg_ctx->state == ECRYPTFS_MSG_CTX_STATE_DONE, and then
365 * proceed to read off and process the response message. Returns zero
366 * upon delivery to desired context element; non-zero upon delivery
367 * failure or error.
244 * 368 *
245 * Processes a response message after sending a operation request to 369 * Returns zero on success; non-zero otherwise
246 * userspace. Returns zero upon delivery to desired context element;
247 * non-zero upon delivery failure or error.
248 */ 370 */
249int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid, 371int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
250 pid_t pid, u32 seq) 372 struct user_namespace *user_ns, struct pid *pid,
373 u32 seq)
251{ 374{
252 struct ecryptfs_daemon_id *id; 375 struct ecryptfs_daemon *daemon;
253 struct ecryptfs_msg_ctx *msg_ctx; 376 struct ecryptfs_msg_ctx *msg_ctx;
254 int msg_size; 377 size_t msg_size;
378 struct nsproxy *nsproxy;
379 struct user_namespace *current_user_ns;
255 int rc; 380 int rc;
256 381
257 if (msg->index >= ecryptfs_message_buf_len) { 382 if (msg->index >= ecryptfs_message_buf_len) {
258 rc = -EINVAL; 383 rc = -EINVAL;
259 ecryptfs_printk(KERN_ERR, "Attempt to reference " 384 printk(KERN_ERR "%s: Attempt to reference "
260 "context buffer at index [%d]; maximum " 385 "context buffer at index [%d]; maximum "
261 "allowable is [%d]\n", msg->index, 386 "allowable is [%d]\n", __func__, msg->index,
262 (ecryptfs_message_buf_len - 1)); 387 (ecryptfs_message_buf_len - 1));
263 goto out; 388 goto out;
264 } 389 }
265 msg_ctx = &ecryptfs_msg_ctx_arr[msg->index]; 390 msg_ctx = &ecryptfs_msg_ctx_arr[msg->index];
266 mutex_lock(&msg_ctx->mux); 391 mutex_lock(&msg_ctx->mux);
267 if (ecryptfs_find_daemon_id(msg_ctx->task->euid, &id)) { 392 mutex_lock(&ecryptfs_daemon_hash_mux);
393 rcu_read_lock();
394 nsproxy = task_nsproxy(msg_ctx->task);
395 if (nsproxy == NULL) {
268 rc = -EBADMSG; 396 rc = -EBADMSG;
269 ecryptfs_printk(KERN_WARNING, "User [%d] received a " 397 printk(KERN_ERR "%s: Receiving process is a zombie. Dropping "
270 "message response from process [%d] but does " 398 "message.\n", __func__);
271 "not have a registered daemon\n", 399 rcu_read_unlock();
272 msg_ctx->task->euid, pid); 400 mutex_unlock(&ecryptfs_daemon_hash_mux);
273 goto wake_up; 401 goto wake_up;
274 } 402 }
275 if (msg_ctx->task->euid != uid) { 403 current_user_ns = nsproxy->user_ns;
404 rc = ecryptfs_find_daemon_by_euid(&daemon, msg_ctx->task->euid,
405 current_user_ns);
406 rcu_read_unlock();
407 mutex_unlock(&ecryptfs_daemon_hash_mux);
408 if (rc) {
409 rc = -EBADMSG;
410 printk(KERN_WARNING "%s: User [%d] received a "
411 "message response from process [0x%p] but does "
412 "not have a registered daemon\n", __func__,
413 msg_ctx->task->euid, pid);
414 goto wake_up;
415 }
416 if (msg_ctx->task->euid != euid) {
276 rc = -EBADMSG; 417 rc = -EBADMSG;
277 ecryptfs_printk(KERN_WARNING, "Received message from user " 418 printk(KERN_WARNING "%s: Received message from user "
278 "[%d]; expected message from user [%d]\n", 419 "[%d]; expected message from user [%d]\n", __func__,
279 uid, msg_ctx->task->euid); 420 euid, msg_ctx->task->euid);
280 goto unlock; 421 goto unlock;
281 } 422 }
282 if (id->pid != pid) { 423 if (current_user_ns != user_ns) {
283 rc = -EBADMSG; 424 rc = -EBADMSG;
284 ecryptfs_printk(KERN_ERR, "User [%d] received a " 425 printk(KERN_WARNING "%s: Received message from user_ns "
285 "message response from an unrecognized " 426 "[0x%p]; expected message from user_ns [0x%p]\n",
286 "process [%d]\n", msg_ctx->task->euid, pid); 427 __func__, user_ns, nsproxy->user_ns);
428 goto unlock;
429 }
430 if (daemon->pid != pid) {
431 rc = -EBADMSG;
432 printk(KERN_ERR "%s: User [%d] sent a message response "
433 "from an unrecognized process [0x%p]\n",
434 __func__, msg_ctx->task->euid, pid);
287 goto unlock; 435 goto unlock;
288 } 436 }
289 if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_PENDING) { 437 if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_PENDING) {
290 rc = -EINVAL; 438 rc = -EINVAL;
291 ecryptfs_printk(KERN_WARNING, "Desired context element is not " 439 printk(KERN_WARNING "%s: Desired context element is not "
292 "pending a response\n"); 440 "pending a response\n", __func__);
293 goto unlock; 441 goto unlock;
294 } else if (msg_ctx->counter != seq) { 442 } else if (msg_ctx->counter != seq) {
295 rc = -EINVAL; 443 rc = -EINVAL;
296 ecryptfs_printk(KERN_WARNING, "Invalid message sequence; " 444 printk(KERN_WARNING "%s: Invalid message sequence; "
297 "expected [%d]; received [%d]\n", 445 "expected [%d]; received [%d]\n", __func__,
298 msg_ctx->counter, seq); 446 msg_ctx->counter, seq);
299 goto unlock; 447 goto unlock;
300 } 448 }
301 msg_size = sizeof(*msg) + msg->data_len; 449 msg_size = (sizeof(*msg) + msg->data_len);
302 msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL); 450 msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL);
303 if (!msg_ctx->msg) { 451 if (!msg_ctx->msg) {
304 rc = -ENOMEM; 452 rc = -ENOMEM;
305 ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); 453 printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of "
454 "GFP_KERNEL memory\n", __func__, msg_size);
306 goto unlock; 455 goto unlock;
307 } 456 }
308 memcpy(msg_ctx->msg, msg, msg_size); 457 memcpy(msg_ctx->msg, msg, msg_size);
@@ -317,34 +466,38 @@ out:
317} 466}
318 467
319/** 468/**
320 * ecryptfs_send_message 469 * ecryptfs_send_message_locked
321 * @transport: The transport over which to send the message (i.e., 470 * @transport: The transport over which to send the message (i.e.,
322 * netlink) 471 * netlink)
323 * @data: The data to send 472 * @data: The data to send
324 * @data_len: The length of data 473 * @data_len: The length of data
325 * @msg_ctx: The message context allocated for the send 474 * @msg_ctx: The message context allocated for the send
475 *
476 * Must be called with ecryptfs_daemon_hash_mux held.
477 *
478 * Returns zero on success; non-zero otherwise
326 */ 479 */
327int ecryptfs_send_message(unsigned int transport, char *data, int data_len, 480static int
328 struct ecryptfs_msg_ctx **msg_ctx) 481ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len,
482 u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx)
329{ 483{
330 struct ecryptfs_daemon_id *id; 484 struct ecryptfs_daemon *daemon;
331 int rc; 485 int rc;
332 486
333 mutex_lock(&ecryptfs_daemon_id_hash_mux); 487 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
334 if (ecryptfs_find_daemon_id(current->euid, &id)) { 488 current->nsproxy->user_ns);
335 mutex_unlock(&ecryptfs_daemon_id_hash_mux); 489 if (rc || !daemon) {
336 rc = -ENOTCONN; 490 rc = -ENOTCONN;
337 ecryptfs_printk(KERN_ERR, "User [%d] does not have a daemon " 491 printk(KERN_ERR "%s: User [%d] does not have a daemon "
338 "registered\n", current->euid); 492 "registered\n", __func__, current->euid);
339 goto out; 493 goto out;
340 } 494 }
341 mutex_unlock(&ecryptfs_daemon_id_hash_mux);
342 mutex_lock(&ecryptfs_msg_ctx_lists_mux); 495 mutex_lock(&ecryptfs_msg_ctx_lists_mux);
343 rc = ecryptfs_acquire_free_msg_ctx(msg_ctx); 496 rc = ecryptfs_acquire_free_msg_ctx(msg_ctx);
344 if (rc) { 497 if (rc) {
345 mutex_unlock(&ecryptfs_msg_ctx_lists_mux); 498 mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
346 ecryptfs_printk(KERN_WARNING, "Could not claim a free " 499 printk(KERN_WARNING "%s: Could not claim a free "
347 "context element\n"); 500 "context element\n", __func__);
348 goto out; 501 goto out;
349 } 502 }
350 ecryptfs_msg_ctx_free_to_alloc(*msg_ctx); 503 ecryptfs_msg_ctx_free_to_alloc(*msg_ctx);
@@ -352,23 +505,50 @@ int ecryptfs_send_message(unsigned int transport, char *data, int data_len,
352 mutex_unlock(&ecryptfs_msg_ctx_lists_mux); 505 mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
353 switch (transport) { 506 switch (transport) {
354 case ECRYPTFS_TRANSPORT_NETLINK: 507 case ECRYPTFS_TRANSPORT_NETLINK:
355 rc = ecryptfs_send_netlink(data, data_len, *msg_ctx, 508 rc = ecryptfs_send_netlink(data, data_len, *msg_ctx, msg_type,
356 ECRYPTFS_NLMSG_REQUEST, 0, id->pid); 509 0, daemon->pid);
510 break;
511 case ECRYPTFS_TRANSPORT_MISCDEV:
512 rc = ecryptfs_send_miscdev(data, data_len, *msg_ctx, msg_type,
513 0, daemon);
357 break; 514 break;
358 case ECRYPTFS_TRANSPORT_CONNECTOR: 515 case ECRYPTFS_TRANSPORT_CONNECTOR:
359 case ECRYPTFS_TRANSPORT_RELAYFS: 516 case ECRYPTFS_TRANSPORT_RELAYFS:
360 default: 517 default:
361 rc = -ENOSYS; 518 rc = -ENOSYS;
362 } 519 }
363 if (rc) { 520 if (rc)
364 printk(KERN_ERR "Error attempting to send message to userspace " 521 printk(KERN_ERR "%s: Error attempting to send message to "
365 "daemon; rc = [%d]\n", rc); 522 "userspace daemon; rc = [%d]\n", __func__, rc);
366 }
367out: 523out:
368 return rc; 524 return rc;
369} 525}
370 526
371/** 527/**
528 * ecryptfs_send_message
529 * @transport: The transport over which to send the message (i.e.,
530 * netlink)
531 * @data: The data to send
532 * @data_len: The length of data
533 * @msg_ctx: The message context allocated for the send
534 *
535 * Grabs ecryptfs_daemon_hash_mux.
536 *
537 * Returns zero on success; non-zero otherwise
538 */
539int ecryptfs_send_message(unsigned int transport, char *data, int data_len,
540 struct ecryptfs_msg_ctx **msg_ctx)
541{
542 int rc;
543
544 mutex_lock(&ecryptfs_daemon_hash_mux);
545 rc = ecryptfs_send_message_locked(transport, data, data_len,
546 ECRYPTFS_MSG_REQUEST, msg_ctx);
547 mutex_unlock(&ecryptfs_daemon_hash_mux);
548 return rc;
549}
550
551/**
372 * ecryptfs_wait_for_response 552 * ecryptfs_wait_for_response
373 * @msg_ctx: The context that was assigned when sending a message 553 * @msg_ctx: The context that was assigned when sending a message
374 * @msg: The incoming message from userspace; not set if rc != 0 554 * @msg: The incoming message from userspace; not set if rc != 0
@@ -377,7 +557,7 @@ out:
377 * of time exceeds ecryptfs_message_wait_timeout. If zero is 557 * of time exceeds ecryptfs_message_wait_timeout. If zero is
378 * returned, msg will point to a valid message from userspace; a 558 * returned, msg will point to a valid message from userspace; a
379 * non-zero value is returned upon failure to receive a message or an 559 * non-zero value is returned upon failure to receive a message or an
380 * error occurs. 560 * error occurs. Callee must free @msg on success.
381 */ 561 */
382int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, 562int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx,
383 struct ecryptfs_message **msg) 563 struct ecryptfs_message **msg)
@@ -413,32 +593,32 @@ int ecryptfs_init_messaging(unsigned int transport)
413 593
414 if (ecryptfs_number_of_users > ECRYPTFS_MAX_NUM_USERS) { 594 if (ecryptfs_number_of_users > ECRYPTFS_MAX_NUM_USERS) {
415 ecryptfs_number_of_users = ECRYPTFS_MAX_NUM_USERS; 595 ecryptfs_number_of_users = ECRYPTFS_MAX_NUM_USERS;
416 ecryptfs_printk(KERN_WARNING, "Specified number of users is " 596 printk(KERN_WARNING "%s: Specified number of users is "
417 "too large, defaulting to [%d] users\n", 597 "too large, defaulting to [%d] users\n", __func__,
418 ecryptfs_number_of_users); 598 ecryptfs_number_of_users);
419 } 599 }
420 mutex_init(&ecryptfs_daemon_id_hash_mux); 600 mutex_init(&ecryptfs_daemon_hash_mux);
421 mutex_lock(&ecryptfs_daemon_id_hash_mux); 601 mutex_lock(&ecryptfs_daemon_hash_mux);
422 ecryptfs_hash_buckets = 1; 602 ecryptfs_hash_buckets = 1;
423 while (ecryptfs_number_of_users >> ecryptfs_hash_buckets) 603 while (ecryptfs_number_of_users >> ecryptfs_hash_buckets)
424 ecryptfs_hash_buckets++; 604 ecryptfs_hash_buckets++;
425 ecryptfs_daemon_id_hash = kmalloc(sizeof(struct hlist_head) 605 ecryptfs_daemon_hash = kmalloc((sizeof(struct hlist_head)
426 * ecryptfs_hash_buckets, GFP_KERNEL); 606 * ecryptfs_hash_buckets), GFP_KERNEL);
427 if (!ecryptfs_daemon_id_hash) { 607 if (!ecryptfs_daemon_hash) {
428 rc = -ENOMEM; 608 rc = -ENOMEM;
429 ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); 609 printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
430 mutex_unlock(&ecryptfs_daemon_id_hash_mux); 610 mutex_unlock(&ecryptfs_daemon_hash_mux);
431 goto out; 611 goto out;
432 } 612 }
433 for (i = 0; i < ecryptfs_hash_buckets; i++) 613 for (i = 0; i < ecryptfs_hash_buckets; i++)
434 INIT_HLIST_HEAD(&ecryptfs_daemon_id_hash[i]); 614 INIT_HLIST_HEAD(&ecryptfs_daemon_hash[i]);
435 mutex_unlock(&ecryptfs_daemon_id_hash_mux); 615 mutex_unlock(&ecryptfs_daemon_hash_mux);
436
437 ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx) 616 ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx)
438 * ecryptfs_message_buf_len), GFP_KERNEL); 617 * ecryptfs_message_buf_len),
618 GFP_KERNEL);
439 if (!ecryptfs_msg_ctx_arr) { 619 if (!ecryptfs_msg_ctx_arr) {
440 rc = -ENOMEM; 620 rc = -ENOMEM;
441 ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); 621 printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
442 goto out; 622 goto out;
443 } 623 }
444 mutex_init(&ecryptfs_msg_ctx_lists_mux); 624 mutex_init(&ecryptfs_msg_ctx_lists_mux);
@@ -446,6 +626,7 @@ int ecryptfs_init_messaging(unsigned int transport)
446 ecryptfs_msg_counter = 0; 626 ecryptfs_msg_counter = 0;
447 for (i = 0; i < ecryptfs_message_buf_len; i++) { 627 for (i = 0; i < ecryptfs_message_buf_len; i++) {
448 INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].node); 628 INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].node);
629 INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].daemon_out_list);
449 mutex_init(&ecryptfs_msg_ctx_arr[i].mux); 630 mutex_init(&ecryptfs_msg_ctx_arr[i].mux);
450 mutex_lock(&ecryptfs_msg_ctx_arr[i].mux); 631 mutex_lock(&ecryptfs_msg_ctx_arr[i].mux);
451 ecryptfs_msg_ctx_arr[i].index = i; 632 ecryptfs_msg_ctx_arr[i].index = i;
@@ -464,6 +645,11 @@ int ecryptfs_init_messaging(unsigned int transport)
464 if (rc) 645 if (rc)
465 ecryptfs_release_messaging(transport); 646 ecryptfs_release_messaging(transport);
466 break; 647 break;
648 case ECRYPTFS_TRANSPORT_MISCDEV:
649 rc = ecryptfs_init_ecryptfs_miscdev();
650 if (rc)
651 ecryptfs_release_messaging(transport);
652 break;
467 case ECRYPTFS_TRANSPORT_CONNECTOR: 653 case ECRYPTFS_TRANSPORT_CONNECTOR:
468 case ECRYPTFS_TRANSPORT_RELAYFS: 654 case ECRYPTFS_TRANSPORT_RELAYFS:
469 default: 655 default:
@@ -488,27 +674,37 @@ void ecryptfs_release_messaging(unsigned int transport)
488 kfree(ecryptfs_msg_ctx_arr); 674 kfree(ecryptfs_msg_ctx_arr);
489 mutex_unlock(&ecryptfs_msg_ctx_lists_mux); 675 mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
490 } 676 }
491 if (ecryptfs_daemon_id_hash) { 677 if (ecryptfs_daemon_hash) {
492 struct hlist_node *elem; 678 struct hlist_node *elem;
493 struct ecryptfs_daemon_id *id; 679 struct ecryptfs_daemon *daemon;
494 int i; 680 int i;
495 681
496 mutex_lock(&ecryptfs_daemon_id_hash_mux); 682 mutex_lock(&ecryptfs_daemon_hash_mux);
497 for (i = 0; i < ecryptfs_hash_buckets; i++) { 683 for (i = 0; i < ecryptfs_hash_buckets; i++) {
498 hlist_for_each_entry(id, elem, 684 int rc;
499 &ecryptfs_daemon_id_hash[i], 685
500 id_chain) { 686 hlist_for_each_entry(daemon, elem,
501 hlist_del(elem); 687 &ecryptfs_daemon_hash[i],
502 kfree(id); 688 euid_chain) {
689 rc = ecryptfs_exorcise_daemon(daemon);
690 if (rc)
691 printk(KERN_ERR "%s: Error whilst "
692 "attempting to destroy daemon; "
693 "rc = [%d]. Dazed and confused, "
694 "but trying to continue.\n",
695 __func__, rc);
503 } 696 }
504 } 697 }
505 kfree(ecryptfs_daemon_id_hash); 698 kfree(ecryptfs_daemon_hash);
506 mutex_unlock(&ecryptfs_daemon_id_hash_mux); 699 mutex_unlock(&ecryptfs_daemon_hash_mux);
507 } 700 }
508 switch(transport) { 701 switch(transport) {
509 case ECRYPTFS_TRANSPORT_NETLINK: 702 case ECRYPTFS_TRANSPORT_NETLINK:
510 ecryptfs_release_netlink(); 703 ecryptfs_release_netlink();
511 break; 704 break;
705 case ECRYPTFS_TRANSPORT_MISCDEV:
706 ecryptfs_destroy_ecryptfs_miscdev();
707 break;
512 case ECRYPTFS_TRANSPORT_CONNECTOR: 708 case ECRYPTFS_TRANSPORT_CONNECTOR:
513 case ECRYPTFS_TRANSPORT_RELAYFS: 709 case ECRYPTFS_TRANSPORT_RELAYFS:
514 default: 710 default:
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
new file mode 100644
index 000000000000..6560da1a58ce
--- /dev/null
+++ b/fs/ecryptfs/miscdev.c
@@ -0,0 +1,600 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 2008 International Business Machines Corp.
5 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
19 * 02111-1307, USA.
20 */
21
22#include <linux/fs.h>
23#include <linux/hash.h>
24#include <linux/random.h>
25#include <linux/miscdevice.h>
26#include <linux/poll.h>
27#include <linux/wait.h>
28#include <linux/module.h>
29#include "ecryptfs_kernel.h"
30
31static atomic_t ecryptfs_num_miscdev_opens;
32
33/**
34 * ecryptfs_miscdev_poll
35 * @file: dev file (ignored)
36 * @pt: dev poll table (ignored)
37 *
38 * Returns the poll mask
39 */
40static unsigned int
41ecryptfs_miscdev_poll(struct file *file, poll_table *pt)
42{
43 struct ecryptfs_daemon *daemon;
44 unsigned int mask = 0;
45 int rc;
46
47 mutex_lock(&ecryptfs_daemon_hash_mux);
48 /* TODO: Just use file->private_data? */
49 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
50 current->nsproxy->user_ns);
51 BUG_ON(rc || !daemon);
52 mutex_lock(&daemon->mux);
53 mutex_unlock(&ecryptfs_daemon_hash_mux);
54 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
55 printk(KERN_WARNING "%s: Attempt to poll on zombified "
56 "daemon\n", __func__);
57 goto out_unlock_daemon;
58 }
59 if (daemon->flags & ECRYPTFS_DAEMON_IN_READ)
60 goto out_unlock_daemon;
61 if (daemon->flags & ECRYPTFS_DAEMON_IN_POLL)
62 goto out_unlock_daemon;
63 daemon->flags |= ECRYPTFS_DAEMON_IN_POLL;
64 mutex_unlock(&daemon->mux);
65 poll_wait(file, &daemon->wait, pt);
66 mutex_lock(&daemon->mux);
67 if (!list_empty(&daemon->msg_ctx_out_queue))
68 mask |= POLLIN | POLLRDNORM;
69out_unlock_daemon:
70 daemon->flags &= ~ECRYPTFS_DAEMON_IN_POLL;
71 mutex_unlock(&daemon->mux);
72 return mask;
73}
74
75/**
76 * ecryptfs_miscdev_open
77 * @inode: inode of miscdev handle (ignored)
78 * @file: file for miscdev handle (ignored)
79 *
80 * Returns zero on success; non-zero otherwise
81 */
82static int
83ecryptfs_miscdev_open(struct inode *inode, struct file *file)
84{
85 struct ecryptfs_daemon *daemon = NULL;
86 int rc;
87
88 mutex_lock(&ecryptfs_daemon_hash_mux);
89 rc = try_module_get(THIS_MODULE);
90 if (rc == 0) {
91 rc = -EIO;
92 printk(KERN_ERR "%s: Error attempting to increment module use "
93 "count; rc = [%d]\n", __func__, rc);
94 goto out_unlock_daemon_list;
95 }
96 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
97 current->nsproxy->user_ns);
98 if (rc || !daemon) {
99 rc = ecryptfs_spawn_daemon(&daemon, current->euid,
100 current->nsproxy->user_ns,
101 task_pid(current));
102 if (rc) {
103 printk(KERN_ERR "%s: Error attempting to spawn daemon; "
104 "rc = [%d]\n", __func__, rc);
105 goto out_module_put_unlock_daemon_list;
106 }
107 }
108 mutex_lock(&daemon->mux);
109 if (daemon->pid != task_pid(current)) {
110 rc = -EINVAL;
111 printk(KERN_ERR "%s: pid [0x%p] has registered with euid [%d], "
112 "but pid [0x%p] has attempted to open the handle "
113 "instead\n", __func__, daemon->pid, daemon->euid,
114 task_pid(current));
115 goto out_unlock_daemon;
116 }
117 if (daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN) {
118 rc = -EBUSY;
119 printk(KERN_ERR "%s: Miscellaneous device handle may only be "
120 "opened once per daemon; pid [0x%p] already has this "
121 "handle open\n", __func__, daemon->pid);
122 goto out_unlock_daemon;
123 }
124 daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN;
125 atomic_inc(&ecryptfs_num_miscdev_opens);
126out_unlock_daemon:
127 mutex_unlock(&daemon->mux);
128out_module_put_unlock_daemon_list:
129 if (rc)
130 module_put(THIS_MODULE);
131out_unlock_daemon_list:
132 mutex_unlock(&ecryptfs_daemon_hash_mux);
133 return rc;
134}
135
136/**
137 * ecryptfs_miscdev_release
138 * @inode: inode of fs/ecryptfs/euid handle (ignored)
139 * @file: file for fs/ecryptfs/euid handle (ignored)
140 *
141 * This keeps the daemon registered until the daemon sends another
142 * ioctl to fs/ecryptfs/ctl or until the kernel module unregisters.
143 *
144 * Returns zero on success; non-zero otherwise
145 */
146static int
147ecryptfs_miscdev_release(struct inode *inode, struct file *file)
148{
149 struct ecryptfs_daemon *daemon = NULL;
150 int rc;
151
152 mutex_lock(&ecryptfs_daemon_hash_mux);
153 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
154 current->nsproxy->user_ns);
155 BUG_ON(rc || !daemon);
156 mutex_lock(&daemon->mux);
157 BUG_ON(daemon->pid != task_pid(current));
158 BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN));
159 daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN;
160 atomic_dec(&ecryptfs_num_miscdev_opens);
161 mutex_unlock(&daemon->mux);
162 rc = ecryptfs_exorcise_daemon(daemon);
163 if (rc) {
164 printk(KERN_CRIT "%s: Fatal error whilst attempting to "
165 "shut down daemon; rc = [%d]. Please report this "
166 "bug.\n", __func__, rc);
167 BUG();
168 }
169 module_put(THIS_MODULE);
170 mutex_unlock(&ecryptfs_daemon_hash_mux);
171 return rc;
172}
173
174/**
175 * ecryptfs_send_miscdev
176 * @data: Data to send to daemon; may be NULL
177 * @data_size: Amount of data to send to daemon
178 * @msg_ctx: Message context, which is used to handle the reply. If
179 * this is NULL, then we do not expect a reply.
180 * @msg_type: Type of message
181 * @msg_flags: Flags for message
182 * @daemon: eCryptfs daemon object
183 *
184 * Add msg_ctx to queue and then, if it exists, notify the blocked
185 * miscdevess about the data being available. Must be called with
186 * ecryptfs_daemon_hash_mux held.
187 *
188 * Returns zero on success; non-zero otherwise
189 */
190int ecryptfs_send_miscdev(char *data, size_t data_size,
191 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
192 u16 msg_flags, struct ecryptfs_daemon *daemon)
193{
194 int rc = 0;
195
196 mutex_lock(&msg_ctx->mux);
197 if (data) {
198 msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size),
199 GFP_KERNEL);
200 if (!msg_ctx->msg) {
201 rc = -ENOMEM;
202 printk(KERN_ERR "%s: Out of memory whilst attempting "
203 "to kmalloc(%Zd, GFP_KERNEL)\n", __func__,
204 (sizeof(*msg_ctx->msg) + data_size));
205 goto out_unlock;
206 }
207 } else
208 msg_ctx->msg = NULL;
209 msg_ctx->msg->index = msg_ctx->index;
210 msg_ctx->msg->data_len = data_size;
211 msg_ctx->type = msg_type;
212 if (data) {
213 memcpy(msg_ctx->msg->data, data, data_size);
214 msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
215 } else
216 msg_ctx->msg_size = 0;
217 mutex_lock(&daemon->mux);
218 list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue);
219 daemon->num_queued_msg_ctx++;
220 wake_up_interruptible(&daemon->wait);
221 mutex_unlock(&daemon->mux);
222out_unlock:
223 mutex_unlock(&msg_ctx->mux);
224 return rc;
225}
226
227/**
228 * ecryptfs_miscdev_read - format and send message from queue
229 * @file: fs/ecryptfs/euid miscdevfs handle (ignored)
230 * @buf: User buffer into which to copy the next message on the daemon queue
231 * @count: Amount of space available in @buf
232 * @ppos: Offset in file (ignored)
233 *
234 * Pulls the most recent message from the daemon queue, formats it for
235 * being sent via a miscdevfs handle, and copies it into @buf
236 *
237 * Returns the number of bytes copied into the user buffer
238 */
239static ssize_t
240ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count,
241 loff_t *ppos)
242{
243 struct ecryptfs_daemon *daemon;
244 struct ecryptfs_msg_ctx *msg_ctx;
245 size_t packet_length_size;
246 u32 counter_nbo;
247 char packet_length[3];
248 size_t i;
249 size_t total_length;
250 int rc;
251
252 mutex_lock(&ecryptfs_daemon_hash_mux);
253 /* TODO: Just use file->private_data? */
254 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
255 current->nsproxy->user_ns);
256 BUG_ON(rc || !daemon);
257 mutex_lock(&daemon->mux);
258 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
259 rc = 0;
260 mutex_unlock(&ecryptfs_daemon_hash_mux);
261 printk(KERN_WARNING "%s: Attempt to read from zombified "
262 "daemon\n", __func__);
263 goto out_unlock_daemon;
264 }
265 if (daemon->flags & ECRYPTFS_DAEMON_IN_READ) {
266 rc = 0;
267 mutex_unlock(&ecryptfs_daemon_hash_mux);
268 goto out_unlock_daemon;
269 }
270 /* This daemon will not go away so long as this flag is set */
271 daemon->flags |= ECRYPTFS_DAEMON_IN_READ;
272 mutex_unlock(&ecryptfs_daemon_hash_mux);
273check_list:
274 if (list_empty(&daemon->msg_ctx_out_queue)) {
275 mutex_unlock(&daemon->mux);
276 rc = wait_event_interruptible(
277 daemon->wait, !list_empty(&daemon->msg_ctx_out_queue));
278 mutex_lock(&daemon->mux);
279 if (rc < 0) {
280 rc = 0;
281 goto out_unlock_daemon;
282 }
283 }
284 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
285 rc = 0;
286 goto out_unlock_daemon;
287 }
288 if (list_empty(&daemon->msg_ctx_out_queue)) {
289 /* Something else jumped in since the
290 * wait_event_interruptable() and removed the
291 * message from the queue; try again */
292 goto check_list;
293 }
294 BUG_ON(current->euid != daemon->euid);
295 BUG_ON(current->nsproxy->user_ns != daemon->user_ns);
296 BUG_ON(task_pid(current) != daemon->pid);
297 msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue,
298 struct ecryptfs_msg_ctx, daemon_out_list);
299 BUG_ON(!msg_ctx);
300 mutex_lock(&msg_ctx->mux);
301 if (msg_ctx->msg) {
302 rc = ecryptfs_write_packet_length(packet_length,
303 msg_ctx->msg_size,
304 &packet_length_size);
305 if (rc) {
306 rc = 0;
307 printk(KERN_WARNING "%s: Error writing packet length; "
308 "rc = [%d]\n", __func__, rc);
309 goto out_unlock_msg_ctx;
310 }
311 } else {
312 packet_length_size = 0;
313 msg_ctx->msg_size = 0;
314 }
315 /* miscdevfs packet format:
316 * Octet 0: Type
317 * Octets 1-4: network byte order msg_ctx->counter
318 * Octets 5-N0: Size of struct ecryptfs_message to follow
319 * Octets N0-N1: struct ecryptfs_message (including data)
320 *
321 * Octets 5-N1 not written if the packet type does not
322 * include a message */
323 total_length = (1 + 4 + packet_length_size + msg_ctx->msg_size);
324 if (count < total_length) {
325 rc = 0;
326 printk(KERN_WARNING "%s: Only given user buffer of "
327 "size [%Zd], but we need [%Zd] to read the "
328 "pending message\n", __func__, count, total_length);
329 goto out_unlock_msg_ctx;
330 }
331 i = 0;
332 buf[i++] = msg_ctx->type;
333 counter_nbo = cpu_to_be32(msg_ctx->counter);
334 memcpy(&buf[i], (char *)&counter_nbo, 4);
335 i += 4;
336 if (msg_ctx->msg) {
337 memcpy(&buf[i], packet_length, packet_length_size);
338 i += packet_length_size;
339 rc = copy_to_user(&buf[i], msg_ctx->msg, msg_ctx->msg_size);
340 if (rc) {
341 printk(KERN_ERR "%s: copy_to_user returned error "
342 "[%d]\n", __func__, rc);
343 goto out_unlock_msg_ctx;
344 }
345 i += msg_ctx->msg_size;
346 }
347 rc = i;
348 list_del(&msg_ctx->daemon_out_list);
349 kfree(msg_ctx->msg);
350 msg_ctx->msg = NULL;
351 /* We do not expect a reply from the userspace daemon for any
352 * message type other than ECRYPTFS_MSG_REQUEST */
353 if (msg_ctx->type != ECRYPTFS_MSG_REQUEST)
354 ecryptfs_msg_ctx_alloc_to_free(msg_ctx);
355out_unlock_msg_ctx:
356 mutex_unlock(&msg_ctx->mux);
357out_unlock_daemon:
358 daemon->flags &= ~ECRYPTFS_DAEMON_IN_READ;
359 mutex_unlock(&daemon->mux);
360 return rc;
361}
362
363/**
364 * ecryptfs_miscdev_helo
365 * @euid: effective user id of miscdevess sending helo packet
366 * @user_ns: The namespace in which @euid applies
367 * @pid: miscdevess id of miscdevess sending helo packet
368 *
369 * Returns zero on success; non-zero otherwise
370 */
371static int ecryptfs_miscdev_helo(uid_t euid, struct user_namespace *user_ns,
372 struct pid *pid)
373{
374 int rc;
375
376 rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_MISCDEV, euid, user_ns,
377 pid);
378 if (rc)
379 printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc);
380 return rc;
381}
382
383/**
384 * ecryptfs_miscdev_quit
385 * @euid: effective user id of miscdevess sending quit packet
386 * @user_ns: The namespace in which @euid applies
387 * @pid: miscdevess id of miscdevess sending quit packet
388 *
389 * Returns zero on success; non-zero otherwise
390 */
391static int ecryptfs_miscdev_quit(uid_t euid, struct user_namespace *user_ns,
392 struct pid *pid)
393{
394 int rc;
395
396 rc = ecryptfs_process_quit(euid, user_ns, pid);
397 if (rc)
398 printk(KERN_WARNING
399 "Error processing QUIT message; rc = [%d]\n", rc);
400 return rc;
401}
402
403/**
404 * ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon
405 * @data: Bytes comprising struct ecryptfs_message
406 * @data_size: sizeof(struct ecryptfs_message) + data len
407 * @euid: Effective user id of miscdevess sending the miscdev response
408 * @user_ns: The namespace in which @euid applies
409 * @pid: Miscdevess id of miscdevess sending the miscdev response
410 * @seq: Sequence number for miscdev response packet
411 *
412 * Returns zero on success; non-zero otherwise
413 */
414static int ecryptfs_miscdev_response(char *data, size_t data_size,
415 uid_t euid, struct user_namespace *user_ns,
416 struct pid *pid, u32 seq)
417{
418 struct ecryptfs_message *msg = (struct ecryptfs_message *)data;
419 int rc;
420
421 if ((sizeof(*msg) + msg->data_len) != data_size) {
422 printk(KERN_WARNING "%s: (sizeof(*msg) + msg->data_len) = "
423 "[%Zd]; data_size = [%Zd]. Invalid packet.\n", __func__,
424 (sizeof(*msg) + msg->data_len), data_size);
425 rc = -EINVAL;
426 goto out;
427 }
428 rc = ecryptfs_process_response(msg, euid, user_ns, pid, seq);
429 if (rc)
430 printk(KERN_ERR
431 "Error processing response message; rc = [%d]\n", rc);
432out:
433 return rc;
434}
435
436/**
437 * ecryptfs_miscdev_write - handle write to daemon miscdev handle
438 * @file: File for misc dev handle (ignored)
439 * @buf: Buffer containing user data
440 * @count: Amount of data in @buf
441 * @ppos: Pointer to offset in file (ignored)
442 *
443 * miscdevfs packet format:
444 * Octet 0: Type
445 * Octets 1-4: network byte order msg_ctx->counter (0's for non-response)
446 * Octets 5-N0: Size of struct ecryptfs_message to follow
447 * Octets N0-N1: struct ecryptfs_message (including data)
448 *
449 * Returns the number of bytes read from @buf
450 */
451static ssize_t
452ecryptfs_miscdev_write(struct file *file, const char __user *buf,
453 size_t count, loff_t *ppos)
454{
455 u32 counter_nbo, seq;
456 size_t packet_size, packet_size_length, i;
457 ssize_t sz = 0;
458 char *data;
459 int rc;
460
461 if (count == 0)
462 goto out;
463 data = kmalloc(count, GFP_KERNEL);
464 if (!data) {
465 printk(KERN_ERR "%s: Out of memory whilst attempting to "
466 "kmalloc([%Zd], GFP_KERNEL)\n", __func__, count);
467 goto out;
468 }
469 rc = copy_from_user(data, buf, count);
470 if (rc) {
471 printk(KERN_ERR "%s: copy_from_user returned error [%d]\n",
472 __func__, rc);
473 goto out_free;
474 }
475 sz = count;
476 i = 0;
477 switch (data[i++]) {
478 case ECRYPTFS_MSG_RESPONSE:
479 if (count < (1 + 4 + 1 + sizeof(struct ecryptfs_message))) {
480 printk(KERN_WARNING "%s: Minimum acceptable packet "
481 "size is [%Zd], but amount of data written is "
482 "only [%Zd]. Discarding response packet.\n",
483 __func__,
484 (1 + 4 + 1 + sizeof(struct ecryptfs_message)),
485 count);
486 goto out_free;
487 }
488 memcpy((char *)&counter_nbo, &data[i], 4);
489 seq = be32_to_cpu(counter_nbo);
490 i += 4;
491 rc = ecryptfs_parse_packet_length(&data[i], &packet_size,
492 &packet_size_length);
493 if (rc) {
494 printk(KERN_WARNING "%s: Error parsing packet length; "
495 "rc = [%d]\n", __func__, rc);
496 goto out_free;
497 }
498 i += packet_size_length;
499 if ((1 + 4 + packet_size_length + packet_size) != count) {
500 printk(KERN_WARNING "%s: (1 + packet_size_length([%Zd])"
501 " + packet_size([%Zd]))([%Zd]) != "
502 "count([%Zd]). Invalid packet format.\n",
503 __func__, packet_size_length, packet_size,
504 (1 + packet_size_length + packet_size), count);
505 goto out_free;
506 }
507 rc = ecryptfs_miscdev_response(&data[i], packet_size,
508 current->euid,
509 current->nsproxy->user_ns,
510 task_pid(current), seq);
511 if (rc)
512 printk(KERN_WARNING "%s: Failed to deliver miscdev "
513 "response to requesting operation; rc = [%d]\n",
514 __func__, rc);
515 break;
516 case ECRYPTFS_MSG_HELO:
517 rc = ecryptfs_miscdev_helo(current->euid,
518 current->nsproxy->user_ns,
519 task_pid(current));
520 if (rc) {
521 printk(KERN_ERR "%s: Error attempting to process "
522 "helo from pid [0x%p]; rc = [%d]\n", __func__,
523 task_pid(current), rc);
524 goto out_free;
525 }
526 break;
527 case ECRYPTFS_MSG_QUIT:
528 rc = ecryptfs_miscdev_quit(current->euid,
529 current->nsproxy->user_ns,
530 task_pid(current));
531 if (rc) {
532 printk(KERN_ERR "%s: Error attempting to process "
533 "quit from pid [0x%p]; rc = [%d]\n", __func__,
534 task_pid(current), rc);
535 goto out_free;
536 }
537 break;
538 default:
539 ecryptfs_printk(KERN_WARNING, "Dropping miscdev "
540 "message of unrecognized type [%d]\n",
541 data[0]);
542 break;
543 }
544out_free:
545 kfree(data);
546out:
547 return sz;
548}
549
550
551static const struct file_operations ecryptfs_miscdev_fops = {
552 .open = ecryptfs_miscdev_open,
553 .poll = ecryptfs_miscdev_poll,
554 .read = ecryptfs_miscdev_read,
555 .write = ecryptfs_miscdev_write,
556 .release = ecryptfs_miscdev_release,
557};
558
559static struct miscdevice ecryptfs_miscdev = {
560 .minor = MISC_DYNAMIC_MINOR,
561 .name = "ecryptfs",
562 .fops = &ecryptfs_miscdev_fops
563};
564
565/**
566 * ecryptfs_init_ecryptfs_miscdev
567 *
568 * Messages sent to the userspace daemon from the kernel are placed on
569 * a queue associated with the daemon. The next read against the
570 * miscdev handle by that daemon will return the oldest message placed
571 * on the message queue for the daemon.
572 *
573 * Returns zero on success; non-zero otherwise
574 */
575int ecryptfs_init_ecryptfs_miscdev(void)
576{
577 int rc;
578
579 atomic_set(&ecryptfs_num_miscdev_opens, 0);
580 mutex_lock(&ecryptfs_daemon_hash_mux);
581 rc = misc_register(&ecryptfs_miscdev);
582 if (rc)
583 printk(KERN_ERR "%s: Failed to register miscellaneous device "
584 "for communications with userspace daemons; rc = [%d]\n",
585 __func__, rc);
586 mutex_unlock(&ecryptfs_daemon_hash_mux);
587 return rc;
588}
589
590/**
591 * ecryptfs_destroy_ecryptfs_miscdev
592 *
593 * All of the daemons must be exorcised prior to calling this
594 * function.
595 */
596void ecryptfs_destroy_ecryptfs_miscdev(void)
597{
598 BUG_ON(atomic_read(&ecryptfs_num_miscdev_opens) != 0);
599 misc_deregister(&ecryptfs_miscdev);
600}
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 6df1debdccce..2b6fe1e6e8ba 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -153,7 +153,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
153 flush_dcache_page(page); 153 flush_dcache_page(page);
154 if (rc) { 154 if (rc) {
155 printk(KERN_ERR "%s: Error reading xattr " 155 printk(KERN_ERR "%s: Error reading xattr "
156 "region; rc = [%d]\n", __FUNCTION__, rc); 156 "region; rc = [%d]\n", __func__, rc);
157 goto out; 157 goto out;
158 } 158 }
159 } else { 159 } else {
@@ -169,7 +169,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
169 if (rc) { 169 if (rc) {
170 printk(KERN_ERR "%s: Error attempting to read " 170 printk(KERN_ERR "%s: Error attempting to read "
171 "extent at offset [%lld] in the lower " 171 "extent at offset [%lld] in the lower "
172 "file; rc = [%d]\n", __FUNCTION__, 172 "file; rc = [%d]\n", __func__,
173 lower_offset, rc); 173 lower_offset, rc);
174 goto out; 174 goto out;
175 } 175 }
@@ -212,7 +212,7 @@ static int ecryptfs_readpage(struct file *file, struct page *page)
212 "the encrypted content from the lower " 212 "the encrypted content from the lower "
213 "file whilst inserting the metadata " 213 "file whilst inserting the metadata "
214 "from the xattr into the header; rc = " 214 "from the xattr into the header; rc = "
215 "[%d]\n", __FUNCTION__, rc); 215 "[%d]\n", __func__, rc);
216 goto out; 216 goto out;
217 } 217 }
218 218
@@ -293,7 +293,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
293 if (rc) { 293 if (rc) {
294 printk(KERN_ERR "%s: Error attemping to read " 294 printk(KERN_ERR "%s: Error attemping to read "
295 "lower page segment; rc = [%d]\n", 295 "lower page segment; rc = [%d]\n",
296 __FUNCTION__, rc); 296 __func__, rc);
297 ClearPageUptodate(page); 297 ClearPageUptodate(page);
298 goto out; 298 goto out;
299 } else 299 } else
@@ -308,7 +308,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
308 "from the lower file whilst " 308 "from the lower file whilst "
309 "inserting the metadata from " 309 "inserting the metadata from "
310 "the xattr into the header; rc " 310 "the xattr into the header; rc "
311 "= [%d]\n", __FUNCTION__, rc); 311 "= [%d]\n", __func__, rc);
312 ClearPageUptodate(page); 312 ClearPageUptodate(page);
313 goto out; 313 goto out;
314 } 314 }
@@ -320,7 +320,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
320 if (rc) { 320 if (rc) {
321 printk(KERN_ERR "%s: Error reading " 321 printk(KERN_ERR "%s: Error reading "
322 "page; rc = [%d]\n", 322 "page; rc = [%d]\n",
323 __FUNCTION__, rc); 323 __func__, rc);
324 ClearPageUptodate(page); 324 ClearPageUptodate(page);
325 goto out; 325 goto out;
326 } 326 }
@@ -331,7 +331,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
331 if (rc) { 331 if (rc) {
332 printk(KERN_ERR "%s: Error decrypting page " 332 printk(KERN_ERR "%s: Error decrypting page "
333 "at index [%ld]; rc = [%d]\n", 333 "at index [%ld]; rc = [%d]\n",
334 __FUNCTION__, page->index, rc); 334 __func__, page->index, rc);
335 ClearPageUptodate(page); 335 ClearPageUptodate(page);
336 goto out; 336 goto out;
337 } 337 }
@@ -348,7 +348,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
348 if (rc) { 348 if (rc) {
349 printk(KERN_ERR "%s: Error on attempt to " 349 printk(KERN_ERR "%s: Error on attempt to "
350 "truncate to (higher) offset [%lld];" 350 "truncate to (higher) offset [%lld];"
351 " rc = [%d]\n", __FUNCTION__, 351 " rc = [%d]\n", __func__,
352 prev_page_end_size, rc); 352 prev_page_end_size, rc);
353 goto out; 353 goto out;
354 } 354 }
@@ -389,7 +389,7 @@ static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
389 kfree(file_size_virt); 389 kfree(file_size_virt);
390 if (rc) 390 if (rc)
391 printk(KERN_ERR "%s: Error writing file size to header; " 391 printk(KERN_ERR "%s: Error writing file size to header; "
392 "rc = [%d]\n", __FUNCTION__, rc); 392 "rc = [%d]\n", __func__, rc);
393out: 393out:
394 return rc; 394 return rc;
395} 395}
diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c
index f638a698dc52..e0abad62b395 100644
--- a/fs/ecryptfs/netlink.c
+++ b/fs/ecryptfs/netlink.c
@@ -44,8 +44,8 @@ static struct sock *ecryptfs_nl_sock;
44 * upon sending the message; non-zero upon error. 44 * upon sending the message; non-zero upon error.
45 */ 45 */
46int ecryptfs_send_netlink(char *data, int data_len, 46int ecryptfs_send_netlink(char *data, int data_len,
47 struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type, 47 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
48 u16 msg_flags, pid_t daemon_pid) 48 u16 msg_flags, struct pid *daemon_pid)
49{ 49{
50 struct sk_buff *skb; 50 struct sk_buff *skb;
51 struct nlmsghdr *nlh; 51 struct nlmsghdr *nlh;
@@ -60,7 +60,7 @@ int ecryptfs_send_netlink(char *data, int data_len,
60 ecryptfs_printk(KERN_ERR, "Failed to allocate socket buffer\n"); 60 ecryptfs_printk(KERN_ERR, "Failed to allocate socket buffer\n");
61 goto out; 61 goto out;
62 } 62 }
63 nlh = NLMSG_PUT(skb, daemon_pid, msg_ctx ? msg_ctx->counter : 0, 63 nlh = NLMSG_PUT(skb, pid_nr(daemon_pid), msg_ctx ? msg_ctx->counter : 0,
64 msg_type, payload_len); 64 msg_type, payload_len);
65 nlh->nlmsg_flags = msg_flags; 65 nlh->nlmsg_flags = msg_flags;
66 if (msg_ctx && payload_len) { 66 if (msg_ctx && payload_len) {
@@ -69,7 +69,7 @@ int ecryptfs_send_netlink(char *data, int data_len,
69 msg->data_len = data_len; 69 msg->data_len = data_len;
70 memcpy(msg->data, data, data_len); 70 memcpy(msg->data, data, data_len);
71 } 71 }
72 rc = netlink_unicast(ecryptfs_nl_sock, skb, daemon_pid, 0); 72 rc = netlink_unicast(ecryptfs_nl_sock, skb, pid_nr(daemon_pid), 0);
73 if (rc < 0) { 73 if (rc < 0) {
74 ecryptfs_printk(KERN_ERR, "Failed to send eCryptfs netlink " 74 ecryptfs_printk(KERN_ERR, "Failed to send eCryptfs netlink "
75 "message; rc = [%d]\n", rc); 75 "message; rc = [%d]\n", rc);
@@ -99,6 +99,7 @@ static int ecryptfs_process_nl_response(struct sk_buff *skb)
99{ 99{
100 struct nlmsghdr *nlh = nlmsg_hdr(skb); 100 struct nlmsghdr *nlh = nlmsg_hdr(skb);
101 struct ecryptfs_message *msg = NLMSG_DATA(nlh); 101 struct ecryptfs_message *msg = NLMSG_DATA(nlh);
102 struct pid *pid;
102 int rc; 103 int rc;
103 104
104 if (skb->len - NLMSG_HDRLEN - sizeof(*msg) != msg->data_len) { 105 if (skb->len - NLMSG_HDRLEN - sizeof(*msg) != msg->data_len) {
@@ -107,8 +108,10 @@ static int ecryptfs_process_nl_response(struct sk_buff *skb)
107 "incorrectly specified data length\n"); 108 "incorrectly specified data length\n");
108 goto out; 109 goto out;
109 } 110 }
110 rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid, 111 pid = find_get_pid(NETLINK_CREDS(skb)->pid);
111 NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq); 112 rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid, NULL,
113 pid, nlh->nlmsg_seq);
114 put_pid(pid);
112 if (rc) 115 if (rc)
113 printk(KERN_ERR 116 printk(KERN_ERR
114 "Error processing response message; rc = [%d]\n", rc); 117 "Error processing response message; rc = [%d]\n", rc);
@@ -126,11 +129,13 @@ out:
126 */ 129 */
127static int ecryptfs_process_nl_helo(struct sk_buff *skb) 130static int ecryptfs_process_nl_helo(struct sk_buff *skb)
128{ 131{
132 struct pid *pid;
129 int rc; 133 int rc;
130 134
135 pid = find_get_pid(NETLINK_CREDS(skb)->pid);
131 rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_NETLINK, 136 rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_NETLINK,
132 NETLINK_CREDS(skb)->uid, 137 NETLINK_CREDS(skb)->uid, NULL, pid);
133 NETLINK_CREDS(skb)->pid); 138 put_pid(pid);
134 if (rc) 139 if (rc)
135 printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc); 140 printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc);
136 return rc; 141 return rc;
@@ -147,10 +152,12 @@ static int ecryptfs_process_nl_helo(struct sk_buff *skb)
147 */ 152 */
148static int ecryptfs_process_nl_quit(struct sk_buff *skb) 153static int ecryptfs_process_nl_quit(struct sk_buff *skb)
149{ 154{
155 struct pid *pid;
150 int rc; 156 int rc;
151 157
152 rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid, 158 pid = find_get_pid(NETLINK_CREDS(skb)->pid);
153 NETLINK_CREDS(skb)->pid); 159 rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid, NULL, pid);
160 put_pid(pid);
154 if (rc) 161 if (rc)
155 printk(KERN_WARNING 162 printk(KERN_WARNING
156 "Error processing QUIT message; rc = [%d]\n", rc); 163 "Error processing QUIT message; rc = [%d]\n", rc);
@@ -176,20 +183,20 @@ static void ecryptfs_receive_nl_message(struct sk_buff *skb)
176 goto free; 183 goto free;
177 } 184 }
178 switch (nlh->nlmsg_type) { 185 switch (nlh->nlmsg_type) {
179 case ECRYPTFS_NLMSG_RESPONSE: 186 case ECRYPTFS_MSG_RESPONSE:
180 if (ecryptfs_process_nl_response(skb)) { 187 if (ecryptfs_process_nl_response(skb)) {
181 ecryptfs_printk(KERN_WARNING, "Failed to " 188 ecryptfs_printk(KERN_WARNING, "Failed to "
182 "deliver netlink response to " 189 "deliver netlink response to "
183 "requesting operation\n"); 190 "requesting operation\n");
184 } 191 }
185 break; 192 break;
186 case ECRYPTFS_NLMSG_HELO: 193 case ECRYPTFS_MSG_HELO:
187 if (ecryptfs_process_nl_helo(skb)) { 194 if (ecryptfs_process_nl_helo(skb)) {
188 ecryptfs_printk(KERN_WARNING, "Failed to " 195 ecryptfs_printk(KERN_WARNING, "Failed to "
189 "fulfill HELO request\n"); 196 "fulfill HELO request\n");
190 } 197 }
191 break; 198 break;
192 case ECRYPTFS_NLMSG_QUIT: 199 case ECRYPTFS_MSG_QUIT:
193 if (ecryptfs_process_nl_quit(skb)) { 200 if (ecryptfs_process_nl_quit(skb)) {
194 ecryptfs_printk(KERN_WARNING, "Failed to " 201 ecryptfs_printk(KERN_WARNING, "Failed to "
195 "fulfill QUIT request\n"); 202 "fulfill QUIT request\n");
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 0c4928623bbc..ebf55150be56 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -55,7 +55,7 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
55 set_fs(fs_save); 55 set_fs(fs_save);
56 if (octets_written < 0) { 56 if (octets_written < 0) {
57 printk(KERN_ERR "%s: octets_written = [%td]; " 57 printk(KERN_ERR "%s: octets_written = [%td]; "
58 "expected [%td]\n", __FUNCTION__, octets_written, size); 58 "expected [%td]\n", __func__, octets_written, size);
59 rc = -EINVAL; 59 rc = -EINVAL;
60 } 60 }
61 mutex_unlock(&inode_info->lower_file_mutex); 61 mutex_unlock(&inode_info->lower_file_mutex);
@@ -153,7 +153,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
153 rc = PTR_ERR(ecryptfs_page); 153 rc = PTR_ERR(ecryptfs_page);
154 printk(KERN_ERR "%s: Error getting page at " 154 printk(KERN_ERR "%s: Error getting page at "
155 "index [%ld] from eCryptfs inode " 155 "index [%ld] from eCryptfs inode "
156 "mapping; rc = [%d]\n", __FUNCTION__, 156 "mapping; rc = [%d]\n", __func__,
157 ecryptfs_page_idx, rc); 157 ecryptfs_page_idx, rc);
158 goto out; 158 goto out;
159 } 159 }
@@ -165,7 +165,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
165 if (rc) { 165 if (rc) {
166 printk(KERN_ERR "%s: Error decrypting " 166 printk(KERN_ERR "%s: Error decrypting "
167 "page; rc = [%d]\n", 167 "page; rc = [%d]\n",
168 __FUNCTION__, rc); 168 __func__, rc);
169 ClearPageUptodate(ecryptfs_page); 169 ClearPageUptodate(ecryptfs_page);
170 page_cache_release(ecryptfs_page); 170 page_cache_release(ecryptfs_page);
171 goto out; 171 goto out;
@@ -202,7 +202,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
202 page_cache_release(ecryptfs_page); 202 page_cache_release(ecryptfs_page);
203 if (rc) { 203 if (rc) {
204 printk(KERN_ERR "%s: Error encrypting " 204 printk(KERN_ERR "%s: Error encrypting "
205 "page; rc = [%d]\n", __FUNCTION__, rc); 205 "page; rc = [%d]\n", __func__, rc);
206 goto out; 206 goto out;
207 } 207 }
208 pos += num_bytes; 208 pos += num_bytes;
@@ -254,7 +254,7 @@ int ecryptfs_read_lower(char *data, loff_t offset, size_t size,
254 set_fs(fs_save); 254 set_fs(fs_save);
255 if (octets_read < 0) { 255 if (octets_read < 0) {
256 printk(KERN_ERR "%s: octets_read = [%td]; " 256 printk(KERN_ERR "%s: octets_read = [%td]; "
257 "expected [%td]\n", __FUNCTION__, octets_read, size); 257 "expected [%td]\n", __func__, octets_read, size);
258 rc = -EINVAL; 258 rc = -EINVAL;
259 } 259 }
260 mutex_unlock(&inode_info->lower_file_mutex); 260 mutex_unlock(&inode_info->lower_file_mutex);
@@ -327,7 +327,7 @@ int ecryptfs_read(char *data, loff_t offset, size_t size,
327 printk(KERN_ERR "%s: Attempt to read data past the end of the " 327 printk(KERN_ERR "%s: Attempt to read data past the end of the "
328 "file; offset = [%lld]; size = [%td]; " 328 "file; offset = [%lld]; size = [%td]; "
329 "ecryptfs_file_size = [%lld]\n", 329 "ecryptfs_file_size = [%lld]\n",
330 __FUNCTION__, offset, size, ecryptfs_file_size); 330 __func__, offset, size, ecryptfs_file_size);
331 goto out; 331 goto out;
332 } 332 }
333 pos = offset; 333 pos = offset;
@@ -345,14 +345,14 @@ int ecryptfs_read(char *data, loff_t offset, size_t size,
345 rc = PTR_ERR(ecryptfs_page); 345 rc = PTR_ERR(ecryptfs_page);
346 printk(KERN_ERR "%s: Error getting page at " 346 printk(KERN_ERR "%s: Error getting page at "
347 "index [%ld] from eCryptfs inode " 347 "index [%ld] from eCryptfs inode "
348 "mapping; rc = [%d]\n", __FUNCTION__, 348 "mapping; rc = [%d]\n", __func__,
349 ecryptfs_page_idx, rc); 349 ecryptfs_page_idx, rc);
350 goto out; 350 goto out;
351 } 351 }
352 rc = ecryptfs_decrypt_page(ecryptfs_page); 352 rc = ecryptfs_decrypt_page(ecryptfs_page);
353 if (rc) { 353 if (rc) {
354 printk(KERN_ERR "%s: Error decrypting " 354 printk(KERN_ERR "%s: Error decrypting "
355 "page; rc = [%d]\n", __FUNCTION__, rc); 355 "page; rc = [%d]\n", __func__, rc);
356 ClearPageUptodate(ecryptfs_page); 356 ClearPageUptodate(ecryptfs_page);
357 page_cache_release(ecryptfs_page); 357 page_cache_release(ecryptfs_page);
358 goto out; 358 goto out;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index a9f130cd50ac..343942deeec1 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -200,10 +200,8 @@ struct file *eventfd_fget(int fd)
200 200
201asmlinkage long sys_eventfd(unsigned int count) 201asmlinkage long sys_eventfd(unsigned int count)
202{ 202{
203 int error, fd; 203 int fd;
204 struct eventfd_ctx *ctx; 204 struct eventfd_ctx *ctx;
205 struct file *file;
206 struct inode *inode;
207 205
208 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 206 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
209 if (!ctx) 207 if (!ctx)
@@ -216,12 +214,9 @@ asmlinkage long sys_eventfd(unsigned int count)
216 * When we call this, the initialization must be complete, since 214 * When we call this, the initialization must be complete, since
217 * anon_inode_getfd() will install the fd. 215 * anon_inode_getfd() will install the fd.
218 */ 216 */
219 error = anon_inode_getfd(&fd, &inode, &file, "[eventfd]", 217 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx);
220 &eventfd_fops, ctx); 218 if (fd < 0)
221 if (!error) 219 kfree(ctx);
222 return fd; 220 return fd;
223
224 kfree(ctx);
225 return error;
226} 221}
227 222
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index a415f42d32cf..990c01d2d66b 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -257,25 +257,6 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1,
257 (p1->file < p2->file ? -1 : p1->fd - p2->fd)); 257 (p1->file < p2->file ? -1 : p1->fd - p2->fd));
258} 258}
259 259
260/* Special initialization for the RB tree node to detect linkage */
261static inline void ep_rb_initnode(struct rb_node *n)
262{
263 rb_set_parent(n, n);
264}
265
266/* Removes a node from the RB tree and marks it for a fast is-linked check */
267static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r)
268{
269 rb_erase(n, r);
270 rb_set_parent(n, n);
271}
272
273/* Fast check to verify that the item is linked to the main RB tree */
274static inline int ep_rb_linked(struct rb_node *n)
275{
276 return rb_parent(n) != n;
277}
278
279/* Tells us if the item is currently linked */ 260/* Tells us if the item is currently linked */
280static inline int ep_is_linked(struct list_head *p) 261static inline int ep_is_linked(struct list_head *p)
281{ 262{
@@ -283,13 +264,13 @@ static inline int ep_is_linked(struct list_head *p)
283} 264}
284 265
285/* Get the "struct epitem" from a wait queue pointer */ 266/* Get the "struct epitem" from a wait queue pointer */
286static inline struct epitem * ep_item_from_wait(wait_queue_t *p) 267static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
287{ 268{
288 return container_of(p, struct eppoll_entry, wait)->base; 269 return container_of(p, struct eppoll_entry, wait)->base;
289} 270}
290 271
291/* Get the "struct epitem" from an epoll queue wrapper */ 272/* Get the "struct epitem" from an epoll queue wrapper */
292static inline struct epitem * ep_item_from_epqueue(poll_table *p) 273static inline struct epitem *ep_item_from_epqueue(poll_table *p)
293{ 274{
294 return container_of(p, struct ep_pqueue, pt)->epi; 275 return container_of(p, struct ep_pqueue, pt)->epi;
295} 276}
@@ -411,8 +392,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
411 list_del_init(&epi->fllink); 392 list_del_init(&epi->fllink);
412 spin_unlock(&file->f_ep_lock); 393 spin_unlock(&file->f_ep_lock);
413 394
414 if (ep_rb_linked(&epi->rbn)) 395 rb_erase(&epi->rbn, &ep->rbr);
415 ep_rb_erase(&epi->rbn, &ep->rbr);
416 396
417 spin_lock_irqsave(&ep->lock, flags); 397 spin_lock_irqsave(&ep->lock, flags);
418 if (ep_is_linked(&epi->rdllink)) 398 if (ep_is_linked(&epi->rdllink))
@@ -728,7 +708,6 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
728 goto error_return; 708 goto error_return;
729 709
730 /* Item initialization follow here ... */ 710 /* Item initialization follow here ... */
731 ep_rb_initnode(&epi->rbn);
732 INIT_LIST_HEAD(&epi->rdllink); 711 INIT_LIST_HEAD(&epi->rdllink);
733 INIT_LIST_HEAD(&epi->fllink); 712 INIT_LIST_HEAD(&epi->fllink);
734 INIT_LIST_HEAD(&epi->pwqlist); 713 INIT_LIST_HEAD(&epi->pwqlist);
@@ -1071,8 +1050,6 @@ asmlinkage long sys_epoll_create(int size)
1071{ 1050{
1072 int error, fd = -1; 1051 int error, fd = -1;
1073 struct eventpoll *ep; 1052 struct eventpoll *ep;
1074 struct inode *inode;
1075 struct file *file;
1076 1053
1077 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", 1054 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
1078 current, size)); 1055 current, size));
@@ -1082,29 +1059,24 @@ asmlinkage long sys_epoll_create(int size)
1082 * structure ( "struct eventpoll" ). 1059 * structure ( "struct eventpoll" ).
1083 */ 1060 */
1084 error = -EINVAL; 1061 error = -EINVAL;
1085 if (size <= 0 || (error = ep_alloc(&ep)) != 0) 1062 if (size <= 0 || (error = ep_alloc(&ep)) < 0) {
1063 fd = error;
1086 goto error_return; 1064 goto error_return;
1065 }
1087 1066
1088 /* 1067 /*
1089 * Creates all the items needed to setup an eventpoll file. That is, 1068 * Creates all the items needed to setup an eventpoll file. That is,
1090 * a file structure, and inode and a free file descriptor. 1069 * a file structure and a free file descriptor.
1091 */ 1070 */
1092 error = anon_inode_getfd(&fd, &inode, &file, "[eventpoll]", 1071 fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep);
1093 &eventpoll_fops, ep); 1072 if (fd < 0)
1094 if (error) 1073 ep_free(ep);
1095 goto error_free;
1096 1074
1075error_return:
1097 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", 1076 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
1098 current, size, fd)); 1077 current, size, fd));
1099 1078
1100 return fd; 1079 return fd;
1101
1102error_free:
1103 ep_free(ep);
1104error_return:
1105 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
1106 current, size, error));
1107 return error;
1108} 1080}
1109 1081
1110/* 1082/*
@@ -1262,7 +1234,7 @@ error_return:
1262 return error; 1234 return error;
1263} 1235}
1264 1236
1265#ifdef TIF_RESTORE_SIGMASK 1237#ifdef HAVE_SET_RESTORE_SIGMASK
1266 1238
1267/* 1239/*
1268 * Implement the event wait interface for the eventpoll file. It is the kernel 1240 * Implement the event wait interface for the eventpoll file. It is the kernel
@@ -1300,7 +1272,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
1300 if (error == -EINTR) { 1272 if (error == -EINTR) {
1301 memcpy(&current->saved_sigmask, &sigsaved, 1273 memcpy(&current->saved_sigmask, &sigsaved,
1302 sizeof(sigsaved)); 1274 sizeof(sigsaved));
1303 set_thread_flag(TIF_RESTORE_SIGMASK); 1275 set_restore_sigmask();
1304 } else 1276 } else
1305 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1277 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1306 } 1278 }
@@ -1308,7 +1280,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
1308 return error; 1280 return error;
1309} 1281}
1310 1282
1311#endif /* #ifdef TIF_RESTORE_SIGMASK */ 1283#endif /* HAVE_SET_RESTORE_SIGMASK */
1312 1284
1313static int __init eventpoll_init(void) 1285static int __init eventpoll_init(void)
1314{ 1286{
@@ -1330,4 +1302,3 @@ static int __init eventpoll_init(void)
1330 return 0; 1302 return 0;
1331} 1303}
1332fs_initcall(eventpoll_init); 1304fs_initcall(eventpoll_init);
1333
diff --git a/fs/exec.c b/fs/exec.c
index b152029f18f6..1f8a24aa1f8b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -24,6 +24,7 @@
24 24
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/file.h> 26#include <linux/file.h>
27#include <linux/fdtable.h>
27#include <linux/mman.h> 28#include <linux/mman.h>
28#include <linux/a.out.h> 29#include <linux/a.out.h>
29#include <linux/stat.h> 30#include <linux/stat.h>
@@ -735,6 +736,7 @@ static int exec_mmap(struct mm_struct *mm)
735 tsk->active_mm = mm; 736 tsk->active_mm = mm;
736 activate_mm(active_mm, mm); 737 activate_mm(active_mm, mm);
737 task_unlock(tsk); 738 task_unlock(tsk);
739 mm_update_next_owner(old_mm);
738 arch_pick_mmap_layout(mm); 740 arch_pick_mmap_layout(mm);
739 if (old_mm) { 741 if (old_mm) {
740 up_read(&old_mm->mmap_sem); 742 up_read(&old_mm->mmap_sem);
@@ -765,9 +767,7 @@ static int de_thread(struct task_struct *tsk)
765 767
766 /* 768 /*
767 * Kill all other threads in the thread group. 769 * Kill all other threads in the thread group.
768 * We must hold tasklist_lock to call zap_other_threads.
769 */ 770 */
770 read_lock(&tasklist_lock);
771 spin_lock_irq(lock); 771 spin_lock_irq(lock);
772 if (signal_group_exit(sig)) { 772 if (signal_group_exit(sig)) {
773 /* 773 /*
@@ -775,21 +775,10 @@ static int de_thread(struct task_struct *tsk)
775 * return so that the signal is processed. 775 * return so that the signal is processed.
776 */ 776 */
777 spin_unlock_irq(lock); 777 spin_unlock_irq(lock);
778 read_unlock(&tasklist_lock);
779 return -EAGAIN; 778 return -EAGAIN;
780 } 779 }
781
782 /*
783 * child_reaper ignores SIGKILL, change it now.
784 * Reparenting needs write_lock on tasklist_lock,
785 * so it is safe to do it under read_lock.
786 */
787 if (unlikely(tsk->group_leader == task_child_reaper(tsk)))
788 task_active_pid_ns(tsk)->child_reaper = tsk;
789
790 sig->group_exit_task = tsk; 780 sig->group_exit_task = tsk;
791 zap_other_threads(tsk); 781 zap_other_threads(tsk);
792 read_unlock(&tasklist_lock);
793 782
794 /* Account for the thread group leader hanging around: */ 783 /* Account for the thread group leader hanging around: */
795 count = thread_group_leader(tsk) ? 1 : 2; 784 count = thread_group_leader(tsk) ? 1 : 2;
@@ -810,7 +799,7 @@ static int de_thread(struct task_struct *tsk)
810 if (!thread_group_leader(tsk)) { 799 if (!thread_group_leader(tsk)) {
811 leader = tsk->group_leader; 800 leader = tsk->group_leader;
812 801
813 sig->notify_count = -1; 802 sig->notify_count = -1; /* for exit_notify() */
814 for (;;) { 803 for (;;) {
815 write_lock_irq(&tasklist_lock); 804 write_lock_irq(&tasklist_lock);
816 if (likely(leader->exit_state)) 805 if (likely(leader->exit_state))
@@ -820,6 +809,8 @@ static int de_thread(struct task_struct *tsk)
820 schedule(); 809 schedule();
821 } 810 }
822 811
812 if (unlikely(task_child_reaper(tsk) == leader))
813 task_active_pid_ns(tsk)->child_reaper = tsk;
823 /* 814 /*
824 * The only record we have of the real-time age of a 815 * The only record we have of the real-time age of a
825 * process, regardless of execs it's done, is start_time. 816 * process, regardless of execs it's done, is start_time.
@@ -963,6 +954,8 @@ int flush_old_exec(struct linux_binprm * bprm)
963 if (retval) 954 if (retval)
964 goto out; 955 goto out;
965 956
957 set_mm_exe_file(bprm->mm, bprm->file);
958
966 /* 959 /*
967 * Release all of the old mmap stuff 960 * Release all of the old mmap stuff
968 */ 961 */
@@ -1268,7 +1261,6 @@ int do_execve(char * filename,
1268{ 1261{
1269 struct linux_binprm *bprm; 1262 struct linux_binprm *bprm;
1270 struct file *file; 1263 struct file *file;
1271 unsigned long env_p;
1272 struct files_struct *displaced; 1264 struct files_struct *displaced;
1273 int retval; 1265 int retval;
1274 1266
@@ -1321,11 +1313,9 @@ int do_execve(char * filename,
1321 if (retval < 0) 1313 if (retval < 0)
1322 goto out; 1314 goto out;
1323 1315
1324 env_p = bprm->p;
1325 retval = copy_strings(bprm->argc, argv, bprm); 1316 retval = copy_strings(bprm->argc, argv, bprm);
1326 if (retval < 0) 1317 if (retval < 0)
1327 goto out; 1318 goto out;
1328 bprm->argv_len = env_p - bprm->p;
1329 1319
1330 retval = search_binary_handler(bprm,regs); 1320 retval = search_binary_handler(bprm,regs);
1331 if (retval >= 0) { 1321 if (retval >= 0) {
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 109ab5e44eca..cc91227d3bb8 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -150,12 +150,12 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir)
150 if (IS_ERR(ppd)) { 150 if (IS_ERR(ppd)) {
151 err = PTR_ERR(ppd); 151 err = PTR_ERR(ppd);
152 dprintk("%s: get_parent of %ld failed, err %d\n", 152 dprintk("%s: get_parent of %ld failed, err %d\n",
153 __FUNCTION__, pd->d_inode->i_ino, err); 153 __func__, pd->d_inode->i_ino, err);
154 dput(pd); 154 dput(pd);
155 break; 155 break;
156 } 156 }
157 157
158 dprintk("%s: find name of %lu in %lu\n", __FUNCTION__, 158 dprintk("%s: find name of %lu in %lu\n", __func__,
159 pd->d_inode->i_ino, ppd->d_inode->i_ino); 159 pd->d_inode->i_ino, ppd->d_inode->i_ino);
160 err = exportfs_get_name(mnt, ppd, nbuf, pd); 160 err = exportfs_get_name(mnt, ppd, nbuf, pd);
161 if (err) { 161 if (err) {
@@ -168,14 +168,14 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir)
168 continue; 168 continue;
169 break; 169 break;
170 } 170 }
171 dprintk("%s: found name: %s\n", __FUNCTION__, nbuf); 171 dprintk("%s: found name: %s\n", __func__, nbuf);
172 mutex_lock(&ppd->d_inode->i_mutex); 172 mutex_lock(&ppd->d_inode->i_mutex);
173 npd = lookup_one_len(nbuf, ppd, strlen(nbuf)); 173 npd = lookup_one_len(nbuf, ppd, strlen(nbuf));
174 mutex_unlock(&ppd->d_inode->i_mutex); 174 mutex_unlock(&ppd->d_inode->i_mutex);
175 if (IS_ERR(npd)) { 175 if (IS_ERR(npd)) {
176 err = PTR_ERR(npd); 176 err = PTR_ERR(npd);
177 dprintk("%s: lookup failed: %d\n", 177 dprintk("%s: lookup failed: %d\n",
178 __FUNCTION__, err); 178 __func__, err);
179 dput(ppd); 179 dput(ppd);
180 dput(pd); 180 dput(pd);
181 break; 181 break;
@@ -188,7 +188,7 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir)
188 if (npd == pd) 188 if (npd == pd)
189 noprogress = 0; 189 noprogress = 0;
190 else 190 else
191 printk("%s: npd != pd\n", __FUNCTION__); 191 printk("%s: npd != pd\n", __func__);
192 dput(npd); 192 dput(npd);
193 dput(ppd); 193 dput(ppd);
194 if (IS_ROOT(pd)) { 194 if (IS_ROOT(pd)) {
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index cc47b76091bf..6ae4ecf3ce40 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1261,10 +1261,11 @@ static int ext3_ordered_write_end(struct file *file,
1261 new_i_size = pos + copied; 1261 new_i_size = pos + copied;
1262 if (new_i_size > EXT3_I(inode)->i_disksize) 1262 if (new_i_size > EXT3_I(inode)->i_disksize)
1263 EXT3_I(inode)->i_disksize = new_i_size; 1263 EXT3_I(inode)->i_disksize = new_i_size;
1264 copied = ext3_generic_write_end(file, mapping, pos, len, copied, 1264 ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
1265 page, fsdata); 1265 page, fsdata);
1266 if (copied < 0) 1266 copied = ret2;
1267 ret = copied; 1267 if (ret2 < 0)
1268 ret = ret2;
1268 } 1269 }
1269 ret2 = ext3_journal_stop(handle); 1270 ret2 = ext3_journal_stop(handle);
1270 if (!ret) 1271 if (!ret)
@@ -1289,10 +1290,11 @@ static int ext3_writeback_write_end(struct file *file,
1289 if (new_i_size > EXT3_I(inode)->i_disksize) 1290 if (new_i_size > EXT3_I(inode)->i_disksize)
1290 EXT3_I(inode)->i_disksize = new_i_size; 1291 EXT3_I(inode)->i_disksize = new_i_size;
1291 1292
1292 copied = ext3_generic_write_end(file, mapping, pos, len, copied, 1293 ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
1293 page, fsdata); 1294 page, fsdata);
1294 if (copied < 0) 1295 copied = ret2;
1295 ret = copied; 1296 if (ret2 < 0)
1297 ret = ret2;
1296 1298
1297 ret2 = ext3_journal_stop(handle); 1299 ret2 = ext3_journal_stop(handle);
1298 if (!ret) 1300 if (!ret)
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index d4a4f0e9ff69..175414ac2210 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -1000,6 +1000,11 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1000 i.value = NULL; 1000 i.value = NULL;
1001 error = ext3_xattr_block_set(handle, inode, &i, &bs); 1001 error = ext3_xattr_block_set(handle, inode, &i, &bs);
1002 } else if (error == -ENOSPC) { 1002 } else if (error == -ENOSPC) {
1003 if (EXT3_I(inode)->i_file_acl && !bs.s.base) {
1004 error = ext3_xattr_block_find(inode, &i, &bs);
1005 if (error)
1006 goto cleanup;
1007 }
1003 error = ext3_xattr_block_set(handle, inode, &i, &bs); 1008 error = ext3_xattr_block_set(handle, inode, &i, &bs);
1004 if (error) 1009 if (error)
1005 goto cleanup; 1010 goto cleanup;
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index a8bae8cd1d5d..3c8dab880d91 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -9,8 +9,8 @@
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/capability.h> 10#include <linux/capability.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/ext4_jbd2.h> 12#include "ext4_jbd2.h"
13#include <linux/ext4_fs.h> 13#include "ext4.h"
14#include "xattr.h" 14#include "xattr.h"
15#include "acl.h" 15#include "acl.h"
16 16
@@ -37,7 +37,7 @@ ext4_acl_from_disk(const void *value, size_t size)
37 return ERR_PTR(-EINVAL); 37 return ERR_PTR(-EINVAL);
38 if (count == 0) 38 if (count == 0)
39 return NULL; 39 return NULL;
40 acl = posix_acl_alloc(count, GFP_KERNEL); 40 acl = posix_acl_alloc(count, GFP_NOFS);
41 if (!acl) 41 if (!acl)
42 return ERR_PTR(-ENOMEM); 42 return ERR_PTR(-ENOMEM);
43 for (n=0; n < count; n++) { 43 for (n=0; n < count; n++) {
@@ -91,7 +91,7 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
91 91
92 *size = ext4_acl_size(acl->a_count); 92 *size = ext4_acl_size(acl->a_count);
93 ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count * 93 ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count *
94 sizeof(ext4_acl_entry), GFP_KERNEL); 94 sizeof(ext4_acl_entry), GFP_NOFS);
95 if (!ext_acl) 95 if (!ext_acl)
96 return ERR_PTR(-ENOMEM); 96 return ERR_PTR(-ENOMEM);
97 ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION); 97 ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
@@ -187,7 +187,7 @@ ext4_get_acl(struct inode *inode, int type)
187 } 187 }
188 retval = ext4_xattr_get(inode, name_index, "", NULL, 0); 188 retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
189 if (retval > 0) { 189 if (retval > 0) {
190 value = kmalloc(retval, GFP_KERNEL); 190 value = kmalloc(retval, GFP_NOFS);
191 if (!value) 191 if (!value)
192 return ERR_PTR(-ENOMEM); 192 return ERR_PTR(-ENOMEM);
193 retval = ext4_xattr_get(inode, name_index, "", value, retval); 193 retval = ext4_xattr_get(inode, name_index, "", value, retval);
@@ -335,7 +335,7 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
335 if (error) 335 if (error)
336 goto cleanup; 336 goto cleanup;
337 } 337 }
338 clone = posix_acl_clone(acl, GFP_KERNEL); 338 clone = posix_acl_clone(acl, GFP_NOFS);
339 error = -ENOMEM; 339 error = -ENOMEM;
340 if (!clone) 340 if (!clone)
341 goto cleanup; 341 goto cleanup;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 0737e05ba3dd..30494c5da843 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -15,12 +15,12 @@
15#include <linux/capability.h> 15#include <linux/capability.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/jbd2.h> 17#include <linux/jbd2.h>
18#include <linux/ext4_fs.h>
19#include <linux/ext4_jbd2.h>
20#include <linux/quotaops.h> 18#include <linux/quotaops.h>
21#include <linux/buffer_head.h> 19#include <linux/buffer_head.h>
22 20#include "ext4.h"
21#include "ext4_jbd2.h"
23#include "group.h" 22#include "group.h"
23
24/* 24/*
25 * balloc.c contains the blocks allocation and deallocation routines 25 * balloc.c contains the blocks allocation and deallocation routines
26 */ 26 */
@@ -48,7 +48,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
48unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, 48unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
49 ext4_group_t block_group, struct ext4_group_desc *gdp) 49 ext4_group_t block_group, struct ext4_group_desc *gdp)
50{ 50{
51 unsigned long start;
52 int bit, bit_max; 51 int bit, bit_max;
53 unsigned free_blocks, group_blocks; 52 unsigned free_blocks, group_blocks;
54 struct ext4_sb_info *sbi = EXT4_SB(sb); 53 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -59,7 +58,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
59 /* If checksum is bad mark all blocks used to prevent allocation 58 /* If checksum is bad mark all blocks used to prevent allocation
60 * essentially implementing a per-group read-only flag. */ 59 * essentially implementing a per-group read-only flag. */
61 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 60 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
62 ext4_error(sb, __FUNCTION__, 61 ext4_error(sb, __func__,
63 "Checksum bad for group %lu\n", block_group); 62 "Checksum bad for group %lu\n", block_group);
64 gdp->bg_free_blocks_count = 0; 63 gdp->bg_free_blocks_count = 0;
65 gdp->bg_free_inodes_count = 0; 64 gdp->bg_free_inodes_count = 0;
@@ -106,11 +105,12 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
106 free_blocks = group_blocks - bit_max; 105 free_blocks = group_blocks - bit_max;
107 106
108 if (bh) { 107 if (bh) {
108 ext4_fsblk_t start;
109
109 for (bit = 0; bit < bit_max; bit++) 110 for (bit = 0; bit < bit_max; bit++)
110 ext4_set_bit(bit, bh->b_data); 111 ext4_set_bit(bit, bh->b_data);
111 112
112 start = block_group * EXT4_BLOCKS_PER_GROUP(sb) + 113 start = ext4_group_first_block_no(sb, block_group);
113 le32_to_cpu(sbi->s_es->s_first_data_block);
114 114
115 /* Set bits for block and inode bitmaps, and inode table */ 115 /* Set bits for block and inode bitmaps, and inode table */
116 ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data); 116 ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
@@ -235,7 +235,7 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
235 return 1; 235 return 1;
236 236
237err_out: 237err_out:
238 ext4_error(sb, __FUNCTION__, 238 ext4_error(sb, __func__,
239 "Invalid block bitmap - " 239 "Invalid block bitmap - "
240 "block_group = %d, block = %llu", 240 "block_group = %d, block = %llu",
241 block_group, bitmap_blk); 241 block_group, bitmap_blk);
@@ -264,7 +264,7 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
264 bitmap_blk = ext4_block_bitmap(sb, desc); 264 bitmap_blk = ext4_block_bitmap(sb, desc);
265 bh = sb_getblk(sb, bitmap_blk); 265 bh = sb_getblk(sb, bitmap_blk);
266 if (unlikely(!bh)) { 266 if (unlikely(!bh)) {
267 ext4_error(sb, __FUNCTION__, 267 ext4_error(sb, __func__,
268 "Cannot read block bitmap - " 268 "Cannot read block bitmap - "
269 "block_group = %d, block_bitmap = %llu", 269 "block_group = %d, block_bitmap = %llu",
270 (int)block_group, (unsigned long long)bitmap_blk); 270 (int)block_group, (unsigned long long)bitmap_blk);
@@ -281,17 +281,17 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
281 } 281 }
282 if (bh_submit_read(bh) < 0) { 282 if (bh_submit_read(bh) < 0) {
283 put_bh(bh); 283 put_bh(bh);
284 ext4_error(sb, __FUNCTION__, 284 ext4_error(sb, __func__,
285 "Cannot read block bitmap - " 285 "Cannot read block bitmap - "
286 "block_group = %d, block_bitmap = %llu", 286 "block_group = %d, block_bitmap = %llu",
287 (int)block_group, (unsigned long long)bitmap_blk); 287 (int)block_group, (unsigned long long)bitmap_blk);
288 return NULL; 288 return NULL;
289 } 289 }
290 if (!ext4_valid_block_bitmap(sb, desc, block_group, bh)) { 290 ext4_valid_block_bitmap(sb, desc, block_group, bh);
291 put_bh(bh); 291 /*
292 return NULL; 292 * file system mounted not to panic on error,
293 } 293 * continue with corrupt bitmap
294 294 */
295 return bh; 295 return bh;
296} 296}
297/* 297/*
@@ -360,7 +360,7 @@ restart:
360 BUG(); 360 BUG();
361} 361}
362#define rsv_window_dump(root, verbose) \ 362#define rsv_window_dump(root, verbose) \
363 __rsv_window_dump((root), (verbose), __FUNCTION__) 363 __rsv_window_dump((root), (verbose), __func__)
364#else 364#else
365#define rsv_window_dump(root, verbose) do {} while (0) 365#define rsv_window_dump(root, verbose) do {} while (0)
366#endif 366#endif
@@ -740,7 +740,7 @@ do_more:
740 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 740 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
741 bit + i, bitmap_bh->b_data)) { 741 bit + i, bitmap_bh->b_data)) {
742 jbd_unlock_bh_state(bitmap_bh); 742 jbd_unlock_bh_state(bitmap_bh);
743 ext4_error(sb, __FUNCTION__, 743 ext4_error(sb, __func__,
744 "bit already cleared for block %llu", 744 "bit already cleared for block %llu",
745 (ext4_fsblk_t)(block + i)); 745 (ext4_fsblk_t)(block + i));
746 jbd_lock_bh_state(bitmap_bh); 746 jbd_lock_bh_state(bitmap_bh);
@@ -752,9 +752,7 @@ do_more:
752 jbd_unlock_bh_state(bitmap_bh); 752 jbd_unlock_bh_state(bitmap_bh);
753 753
754 spin_lock(sb_bgl_lock(sbi, block_group)); 754 spin_lock(sb_bgl_lock(sbi, block_group));
755 desc->bg_free_blocks_count = 755 le16_add_cpu(&desc->bg_free_blocks_count, group_freed);
756 cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
757 group_freed);
758 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); 756 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
759 spin_unlock(sb_bgl_lock(sbi, block_group)); 757 spin_unlock(sb_bgl_lock(sbi, block_group));
760 percpu_counter_add(&sbi->s_freeblocks_counter, count); 758 percpu_counter_add(&sbi->s_freeblocks_counter, count);
@@ -1772,7 +1770,12 @@ allocated:
1772 "Allocating block in system zone - " 1770 "Allocating block in system zone - "
1773 "blocks from %llu, length %lu", 1771 "blocks from %llu, length %lu",
1774 ret_block, num); 1772 ret_block, num);
1775 goto out; 1773 /*
1774 * claim_block marked the blocks we allocated
1775 * as in use. So we may want to selectively
1776 * mark some of the blocks as free
1777 */
1778 goto retry_alloc;
1776 } 1779 }
1777 1780
1778 performed_allocation = 1; 1781 performed_allocation = 1;
@@ -1798,7 +1801,7 @@ allocated:
1798 if (ext4_test_bit(grp_alloc_blk+i, 1801 if (ext4_test_bit(grp_alloc_blk+i,
1799 bh2jh(bitmap_bh)->b_committed_data)) { 1802 bh2jh(bitmap_bh)->b_committed_data)) {
1800 printk("%s: block was unexpectedly set in " 1803 printk("%s: block was unexpectedly set in "
1801 "b_committed_data\n", __FUNCTION__); 1804 "b_committed_data\n", __func__);
1802 } 1805 }
1803 } 1806 }
1804 } 1807 }
@@ -1823,8 +1826,7 @@ allocated:
1823 spin_lock(sb_bgl_lock(sbi, group_no)); 1826 spin_lock(sb_bgl_lock(sbi, group_no));
1824 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) 1827 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
1825 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 1828 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
1826 gdp->bg_free_blocks_count = 1829 le16_add_cpu(&gdp->bg_free_blocks_count, -num);
1827 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
1828 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); 1830 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
1829 spin_unlock(sb_bgl_lock(sbi, group_no)); 1831 spin_unlock(sb_bgl_lock(sbi, group_no));
1830 percpu_counter_sub(&sbi->s_freeblocks_counter, num); 1832 percpu_counter_sub(&sbi->s_freeblocks_counter, num);
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index 420554f8f79d..d37ea6750454 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -9,7 +9,7 @@
9 9
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11#include <linux/jbd2.h> 11#include <linux/jbd2.h>
12#include <linux/ext4_fs.h> 12#include "ext4.h"
13 13
14#ifdef EXT4FS_DEBUG 14#ifdef EXT4FS_DEBUG
15 15
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 2c23bade9aa6..2bf0331ea194 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -23,10 +23,10 @@
23 23
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/jbd2.h> 25#include <linux/jbd2.h>
26#include <linux/ext4_fs.h>
27#include <linux/buffer_head.h> 26#include <linux/buffer_head.h>
28#include <linux/slab.h> 27#include <linux/slab.h>
29#include <linux/rbtree.h> 28#include <linux/rbtree.h>
29#include "ext4.h"
30 30
31static unsigned char ext4_filetype_table[] = { 31static unsigned char ext4_filetype_table[] = {
32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
@@ -42,7 +42,7 @@ const struct file_operations ext4_dir_operations = {
42 .llseek = generic_file_llseek, 42 .llseek = generic_file_llseek,
43 .read = generic_read_dir, 43 .read = generic_read_dir,
44 .readdir = ext4_readdir, /* we take BKL. needed?*/ 44 .readdir = ext4_readdir, /* we take BKL. needed?*/
45 .ioctl = ext4_ioctl, /* BKL held */ 45 .unlocked_ioctl = ext4_ioctl,
46#ifdef CONFIG_COMPAT 46#ifdef CONFIG_COMPAT
47 .compat_ioctl = ext4_compat_ioctl, 47 .compat_ioctl = ext4_compat_ioctl,
48#endif 48#endif
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
new file mode 100644
index 000000000000..8158083f7ac0
--- /dev/null
+++ b/fs/ext4/ext4.h
@@ -0,0 +1,1205 @@
1/*
2 * ext4.h
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/include/linux/minix_fs.h
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */
15
16#ifndef _EXT4_H
17#define _EXT4_H
18
19#include <linux/types.h>
20#include <linux/blkdev.h>
21#include <linux/magic.h>
22#include "ext4_i.h"
23
24/*
25 * The second extended filesystem constants/structures
26 */
27
28/*
29 * Define EXT4FS_DEBUG to produce debug messages
30 */
31#undef EXT4FS_DEBUG
32
33/*
34 * Define EXT4_RESERVATION to reserve data blocks for expanding files
35 */
36#define EXT4_DEFAULT_RESERVE_BLOCKS 8
37/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
38#define EXT4_MAX_RESERVE_BLOCKS 1027
39#define EXT4_RESERVE_WINDOW_NOT_ALLOCATED 0
40
41/*
42 * Debug code
43 */
44#ifdef EXT4FS_DEBUG
45#define ext4_debug(f, a...) \
46 do { \
47 printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \
48 __FILE__, __LINE__, __FUNCTION__); \
49 printk (KERN_DEBUG f, ## a); \
50 } while (0)
51#else
52#define ext4_debug(f, a...) do {} while (0)
53#endif
54
55#define EXT4_MULTIBLOCK_ALLOCATOR 1
56
57/* prefer goal again. length */
58#define EXT4_MB_HINT_MERGE 1
59/* blocks already reserved */
60#define EXT4_MB_HINT_RESERVED 2
61/* metadata is being allocated */
62#define EXT4_MB_HINT_METADATA 4
63/* first blocks in the file */
64#define EXT4_MB_HINT_FIRST 8
65/* search for the best chunk */
66#define EXT4_MB_HINT_BEST 16
67/* data is being allocated */
68#define EXT4_MB_HINT_DATA 32
69/* don't preallocate (for tails) */
70#define EXT4_MB_HINT_NOPREALLOC 64
71/* allocate for locality group */
72#define EXT4_MB_HINT_GROUP_ALLOC 128
73/* allocate goal blocks or none */
74#define EXT4_MB_HINT_GOAL_ONLY 256
75/* goal is meaningful */
76#define EXT4_MB_HINT_TRY_GOAL 512
77
78struct ext4_allocation_request {
79 /* target inode for block we're allocating */
80 struct inode *inode;
81 /* logical block in target inode */
82 ext4_lblk_t logical;
83 /* phys. target (a hint) */
84 ext4_fsblk_t goal;
85 /* the closest logical allocated block to the left */
86 ext4_lblk_t lleft;
87 /* phys. block for ^^^ */
88 ext4_fsblk_t pleft;
89 /* the closest logical allocated block to the right */
90 ext4_lblk_t lright;
91 /* phys. block for ^^^ */
92 ext4_fsblk_t pright;
93 /* how many blocks we want to allocate */
94 unsigned long len;
95 /* flags. see above EXT4_MB_HINT_* */
96 unsigned long flags;
97};
98
99/*
100 * Special inodes numbers
101 */
102#define EXT4_BAD_INO 1 /* Bad blocks inode */
103#define EXT4_ROOT_INO 2 /* Root inode */
104#define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */
105#define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */
106#define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */
107#define EXT4_JOURNAL_INO 8 /* Journal inode */
108
109/* First non-reserved inode for old ext4 filesystems */
110#define EXT4_GOOD_OLD_FIRST_INO 11
111
112/*
113 * Maximal count of links to a file
114 */
115#define EXT4_LINK_MAX 65000
116
117/*
118 * Macro-instructions used to manage several block sizes
119 */
120#define EXT4_MIN_BLOCK_SIZE 1024
121#define EXT4_MAX_BLOCK_SIZE 65536
122#define EXT4_MIN_BLOCK_LOG_SIZE 10
123#ifdef __KERNEL__
124# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize)
125#else
126# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
127#endif
128#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32))
129#ifdef __KERNEL__
130# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
131#else
132# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10)
133#endif
134#ifdef __KERNEL__
135#define EXT4_ADDR_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_addr_per_block_bits)
136#define EXT4_INODE_SIZE(s) (EXT4_SB(s)->s_inode_size)
137#define EXT4_FIRST_INO(s) (EXT4_SB(s)->s_first_ino)
138#else
139#define EXT4_INODE_SIZE(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
140 EXT4_GOOD_OLD_INODE_SIZE : \
141 (s)->s_inode_size)
142#define EXT4_FIRST_INO(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
143 EXT4_GOOD_OLD_FIRST_INO : \
144 (s)->s_first_ino)
145#endif
146#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))
147
148/*
149 * Structure of a blocks group descriptor
150 */
151struct ext4_group_desc
152{
153 __le32 bg_block_bitmap_lo; /* Blocks bitmap block */
154 __le32 bg_inode_bitmap_lo; /* Inodes bitmap block */
155 __le32 bg_inode_table_lo; /* Inodes table block */
156 __le16 bg_free_blocks_count; /* Free blocks count */
157 __le16 bg_free_inodes_count; /* Free inodes count */
158 __le16 bg_used_dirs_count; /* Directories count */
159 __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */
160 __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */
161 __le16 bg_itable_unused; /* Unused inodes count */
162 __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */
163 __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */
164 __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */
165 __le32 bg_inode_table_hi; /* Inodes table block MSB */
166 __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */
167 __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */
168 __le16 bg_used_dirs_count_hi; /* Directories count MSB */
169 __le16 bg_itable_unused_hi; /* Unused inodes count MSB */
170 __u32 bg_reserved2[3];
171};
172
173#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
174#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */
175#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
176
177#ifdef __KERNEL__
178#include "ext4_sb.h"
179#endif
180/*
181 * Macro-instructions used to manage group descriptors
182 */
183#define EXT4_MIN_DESC_SIZE 32
184#define EXT4_MIN_DESC_SIZE_64BIT 64
185#define EXT4_MAX_DESC_SIZE EXT4_MIN_BLOCK_SIZE
186#define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size)
187#ifdef __KERNEL__
188# define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group)
189# define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block)
190# define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group)
191# define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits)
192#else
193# define EXT4_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group)
194# define EXT4_DESC_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s))
195# define EXT4_INODES_PER_GROUP(s) ((s)->s_inodes_per_group)
196#endif
197
198/*
199 * Constants relative to the data blocks
200 */
201#define EXT4_NDIR_BLOCKS 12
202#define EXT4_IND_BLOCK EXT4_NDIR_BLOCKS
203#define EXT4_DIND_BLOCK (EXT4_IND_BLOCK + 1)
204#define EXT4_TIND_BLOCK (EXT4_DIND_BLOCK + 1)
205#define EXT4_N_BLOCKS (EXT4_TIND_BLOCK + 1)
206
207/*
208 * Inode flags
209 */
210#define EXT4_SECRM_FL 0x00000001 /* Secure deletion */
211#define EXT4_UNRM_FL 0x00000002 /* Undelete */
212#define EXT4_COMPR_FL 0x00000004 /* Compress file */
213#define EXT4_SYNC_FL 0x00000008 /* Synchronous updates */
214#define EXT4_IMMUTABLE_FL 0x00000010 /* Immutable file */
215#define EXT4_APPEND_FL 0x00000020 /* writes to file may only append */
216#define EXT4_NODUMP_FL 0x00000040 /* do not dump file */
217#define EXT4_NOATIME_FL 0x00000080 /* do not update atime */
218/* Reserved for compression usage... */
219#define EXT4_DIRTY_FL 0x00000100
220#define EXT4_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
221#define EXT4_NOCOMPR_FL 0x00000400 /* Don't compress */
222#define EXT4_ECOMPR_FL 0x00000800 /* Compression error */
223/* End compression flags --- maybe not all used */
224#define EXT4_INDEX_FL 0x00001000 /* hash-indexed directory */
225#define EXT4_IMAGIC_FL 0x00002000 /* AFS directory */
226#define EXT4_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */
227#define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */
228#define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
229#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
230#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
231#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
232#define EXT4_EXT_MIGRATE 0x00100000 /* Inode is migrating */
233#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
234
235#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
236#define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
237
238/*
239 * Inode dynamic state flags
240 */
241#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */
242#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */
243#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
244#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
245
246/* Used to pass group descriptor data when online resize is done */
247struct ext4_new_group_input {
248 __u32 group; /* Group number for this data */
249 __u64 block_bitmap; /* Absolute block number of block bitmap */
250 __u64 inode_bitmap; /* Absolute block number of inode bitmap */
251 __u64 inode_table; /* Absolute block number of inode table start */
252 __u32 blocks_count; /* Total number of blocks in this group */
253 __u16 reserved_blocks; /* Number of reserved blocks in this group */
254 __u16 unused;
255};
256
257/* The struct ext4_new_group_input in kernel space, with free_blocks_count */
258struct ext4_new_group_data {
259 __u32 group;
260 __u64 block_bitmap;
261 __u64 inode_bitmap;
262 __u64 inode_table;
263 __u32 blocks_count;
264 __u16 reserved_blocks;
265 __u16 unused;
266 __u32 free_blocks_count;
267};
268
269/*
270 * Following is used by preallocation code to tell get_blocks() that we
271 * want uninitialzed extents.
272 */
273#define EXT4_CREATE_UNINITIALIZED_EXT 2
274
275/*
276 * ioctl commands
277 */
278#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS
279#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS
280#define EXT4_IOC_GETVERSION _IOR('f', 3, long)
281#define EXT4_IOC_SETVERSION _IOW('f', 4, long)
282#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
283#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input)
284#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
285#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
286#ifdef CONFIG_JBD2_DEBUG
287#define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
288#endif
289#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
290#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
291#define EXT4_IOC_MIGRATE _IO('f', 7)
292
293/*
294 * ioctl commands in 32 bit emulation
295 */
296#define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS
297#define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS
298#define EXT4_IOC32_GETVERSION _IOR('f', 3, int)
299#define EXT4_IOC32_SETVERSION _IOW('f', 4, int)
300#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int)
301#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int)
302#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
303#ifdef CONFIG_JBD2_DEBUG
304#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
305#endif
306#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
307#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
308
309
310/*
311 * Mount options
312 */
313struct ext4_mount_options {
314 unsigned long s_mount_opt;
315 uid_t s_resuid;
316 gid_t s_resgid;
317 unsigned long s_commit_interval;
318#ifdef CONFIG_QUOTA
319 int s_jquota_fmt;
320 char *s_qf_names[MAXQUOTAS];
321#endif
322};
323
324/*
325 * Structure of an inode on the disk
326 */
327struct ext4_inode {
328 __le16 i_mode; /* File mode */
329 __le16 i_uid; /* Low 16 bits of Owner Uid */
330 __le32 i_size_lo; /* Size in bytes */
331 __le32 i_atime; /* Access time */
332 __le32 i_ctime; /* Inode Change time */
333 __le32 i_mtime; /* Modification time */
334 __le32 i_dtime; /* Deletion Time */
335 __le16 i_gid; /* Low 16 bits of Group Id */
336 __le16 i_links_count; /* Links count */
337 __le32 i_blocks_lo; /* Blocks count */
338 __le32 i_flags; /* File flags */
339 union {
340 struct {
341 __le32 l_i_version;
342 } linux1;
343 struct {
344 __u32 h_i_translator;
345 } hurd1;
346 struct {
347 __u32 m_i_reserved1;
348 } masix1;
349 } osd1; /* OS dependent 1 */
350 __le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */
351 __le32 i_generation; /* File version (for NFS) */
352 __le32 i_file_acl_lo; /* File ACL */
353 __le32 i_size_high;
354 __le32 i_obso_faddr; /* Obsoleted fragment address */
355 union {
356 struct {
357 __le16 l_i_blocks_high; /* were l_i_reserved1 */
358 __le16 l_i_file_acl_high;
359 __le16 l_i_uid_high; /* these 2 fields */
360 __le16 l_i_gid_high; /* were reserved2[0] */
361 __u32 l_i_reserved2;
362 } linux2;
363 struct {
364 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
365 __u16 h_i_mode_high;
366 __u16 h_i_uid_high;
367 __u16 h_i_gid_high;
368 __u32 h_i_author;
369 } hurd2;
370 struct {
371 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
372 __le16 m_i_file_acl_high;
373 __u32 m_i_reserved2[2];
374 } masix2;
375 } osd2; /* OS dependent 2 */
376 __le16 i_extra_isize;
377 __le16 i_pad1;
378 __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
379 __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
380 __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
381 __le32 i_crtime; /* File Creation time */
382 __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
383 __le32 i_version_hi; /* high 32 bits for 64-bit version */
384};
385
386
387#define EXT4_EPOCH_BITS 2
388#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
389#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS)
390
391/*
392 * Extended fields will fit into an inode if the filesystem was formatted
393 * with large inodes (-I 256 or larger) and there are not currently any EAs
394 * consuming all of the available space. For new inodes we always reserve
395 * enough space for the kernel's known extended fields, but for inodes
396 * created with an old kernel this might not have been the case. None of
397 * the extended inode fields is critical for correct filesystem operation.
398 * This macro checks if a certain field fits in the inode. Note that
399 * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize
400 */
401#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \
402 ((offsetof(typeof(*ext4_inode), field) + \
403 sizeof((ext4_inode)->field)) \
404 <= (EXT4_GOOD_OLD_INODE_SIZE + \
405 (einode)->i_extra_isize)) \
406
407static inline __le32 ext4_encode_extra_time(struct timespec *time)
408{
409 return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
410 time->tv_sec >> 32 : 0) |
411 ((time->tv_nsec << 2) & EXT4_NSEC_MASK));
412}
413
414static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
415{
416 if (sizeof(time->tv_sec) > 4)
417 time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
418 << 32;
419 time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2;
420}
421
422#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \
423do { \
424 (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
425 if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
426 (raw_inode)->xtime ## _extra = \
427 ext4_encode_extra_time(&(inode)->xtime); \
428} while (0)
429
430#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \
431do { \
432 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
433 (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \
434 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
435 (raw_inode)->xtime ## _extra = \
436 ext4_encode_extra_time(&(einode)->xtime); \
437} while (0)
438
439#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
440do { \
441 (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \
442 if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
443 ext4_decode_extra_time(&(inode)->xtime, \
444 raw_inode->xtime ## _extra); \
445} while (0)
446
447#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
448do { \
449 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
450 (einode)->xtime.tv_sec = \
451 (signed)le32_to_cpu((raw_inode)->xtime); \
452 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
453 ext4_decode_extra_time(&(einode)->xtime, \
454 raw_inode->xtime ## _extra); \
455} while (0)
456
457#define i_disk_version osd1.linux1.l_i_version
458
459#if defined(__KERNEL__) || defined(__linux__)
460#define i_reserved1 osd1.linux1.l_i_reserved1
461#define i_file_acl_high osd2.linux2.l_i_file_acl_high
462#define i_blocks_high osd2.linux2.l_i_blocks_high
463#define i_uid_low i_uid
464#define i_gid_low i_gid
465#define i_uid_high osd2.linux2.l_i_uid_high
466#define i_gid_high osd2.linux2.l_i_gid_high
467#define i_reserved2 osd2.linux2.l_i_reserved2
468
469#elif defined(__GNU__)
470
471#define i_translator osd1.hurd1.h_i_translator
472#define i_uid_high osd2.hurd2.h_i_uid_high
473#define i_gid_high osd2.hurd2.h_i_gid_high
474#define i_author osd2.hurd2.h_i_author
475
476#elif defined(__masix__)
477
478#define i_reserved1 osd1.masix1.m_i_reserved1
479#define i_file_acl_high osd2.masix2.m_i_file_acl_high
480#define i_reserved2 osd2.masix2.m_i_reserved2
481
482#endif /* defined(__KERNEL__) || defined(__linux__) */
483
484/*
485 * File system states
486 */
487#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */
488#define EXT4_ERROR_FS 0x0002 /* Errors detected */
489#define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */
490
491/*
492 * Misc. filesystem flags
493 */
494#define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */
495#define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */
496#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */
497
498/*
499 * Mount flags
500 */
501#define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */
502#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */
503#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
504#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
505#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
506#define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */
507#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
508#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
509#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
510#define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */
511#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
512#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
513#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
514#define EXT4_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */
515#define EXT4_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */
516#define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */
517#define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */
518#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
519#define EXT4_MOUNT_RESERVATION 0x10000 /* Preallocation */
520#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
521#define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */
522#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
523#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
524#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
525#define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */
526#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
527#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
528#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
529#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */
530/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
531#ifndef _LINUX_EXT2_FS_H
532#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
533#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
534#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \
535 EXT4_MOUNT_##opt)
536#else
537#define EXT2_MOUNT_NOLOAD EXT4_MOUNT_NOLOAD
538#define EXT2_MOUNT_ABORT EXT4_MOUNT_ABORT
539#define EXT2_MOUNT_DATA_FLAGS EXT4_MOUNT_DATA_FLAGS
540#endif
541
542#define ext4_set_bit ext2_set_bit
543#define ext4_set_bit_atomic ext2_set_bit_atomic
544#define ext4_clear_bit ext2_clear_bit
545#define ext4_clear_bit_atomic ext2_clear_bit_atomic
546#define ext4_test_bit ext2_test_bit
547#define ext4_find_first_zero_bit ext2_find_first_zero_bit
548#define ext4_find_next_zero_bit ext2_find_next_zero_bit
549#define ext4_find_next_bit ext2_find_next_bit
550
551/*
552 * Maximal mount counts between two filesystem checks
553 */
554#define EXT4_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */
555#define EXT4_DFL_CHECKINTERVAL 0 /* Don't use interval check */
556
557/*
558 * Behaviour when detecting errors
559 */
560#define EXT4_ERRORS_CONTINUE 1 /* Continue execution */
561#define EXT4_ERRORS_RO 2 /* Remount fs read-only */
562#define EXT4_ERRORS_PANIC 3 /* Panic */
563#define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE
564
565/*
566 * Structure of the super block
567 */
568struct ext4_super_block {
569/*00*/ __le32 s_inodes_count; /* Inodes count */
570 __le32 s_blocks_count_lo; /* Blocks count */
571 __le32 s_r_blocks_count_lo; /* Reserved blocks count */
572 __le32 s_free_blocks_count_lo; /* Free blocks count */
573/*10*/ __le32 s_free_inodes_count; /* Free inodes count */
574 __le32 s_first_data_block; /* First Data Block */
575 __le32 s_log_block_size; /* Block size */
576 __le32 s_obso_log_frag_size; /* Obsoleted fragment size */
577/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
578 __le32 s_obso_frags_per_group; /* Obsoleted fragments per group */
579 __le32 s_inodes_per_group; /* # Inodes per group */
580 __le32 s_mtime; /* Mount time */
581/*30*/ __le32 s_wtime; /* Write time */
582 __le16 s_mnt_count; /* Mount count */
583 __le16 s_max_mnt_count; /* Maximal mount count */
584 __le16 s_magic; /* Magic signature */
585 __le16 s_state; /* File system state */
586 __le16 s_errors; /* Behaviour when detecting errors */
587 __le16 s_minor_rev_level; /* minor revision level */
588/*40*/ __le32 s_lastcheck; /* time of last check */
589 __le32 s_checkinterval; /* max. time between checks */
590 __le32 s_creator_os; /* OS */
591 __le32 s_rev_level; /* Revision level */
592/*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */
593 __le16 s_def_resgid; /* Default gid for reserved blocks */
594 /*
595 * These fields are for EXT4_DYNAMIC_REV superblocks only.
596 *
597 * Note: the difference between the compatible feature set and
598 * the incompatible feature set is that if there is a bit set
599 * in the incompatible feature set that the kernel doesn't
600 * know about, it should refuse to mount the filesystem.
601 *
602 * e2fsck's requirements are more strict; if it doesn't know
603 * about a feature in either the compatible or incompatible
604 * feature set, it must abort and not try to meddle with
605 * things it doesn't understand...
606 */
607 __le32 s_first_ino; /* First non-reserved inode */
608 __le16 s_inode_size; /* size of inode structure */
609 __le16 s_block_group_nr; /* block group # of this superblock */
610 __le32 s_feature_compat; /* compatible feature set */
611/*60*/ __le32 s_feature_incompat; /* incompatible feature set */
612 __le32 s_feature_ro_compat; /* readonly-compatible feature set */
613/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
614/*78*/ char s_volume_name[16]; /* volume name */
615/*88*/ char s_last_mounted[64]; /* directory where last mounted */
616/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
617 /*
618 * Performance hints. Directory preallocation should only
619 * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
620 */
621 __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/
622 __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
623 __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */
624 /*
625 * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set.
626 */
627/*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */
628/*E0*/ __le32 s_journal_inum; /* inode number of journal file */
629 __le32 s_journal_dev; /* device number of journal file */
630 __le32 s_last_orphan; /* start of list of inodes to delete */
631 __le32 s_hash_seed[4]; /* HTREE hash seed */
632 __u8 s_def_hash_version; /* Default hash version to use */
633 __u8 s_reserved_char_pad;
634 __le16 s_desc_size; /* size of group descriptor */
635/*100*/ __le32 s_default_mount_opts;
636 __le32 s_first_meta_bg; /* First metablock block group */
637 __le32 s_mkfs_time; /* When the filesystem was created */
638 __le32 s_jnl_blocks[17]; /* Backup of the journal inode */
639 /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
640/*150*/ __le32 s_blocks_count_hi; /* Blocks count */
641 __le32 s_r_blocks_count_hi; /* Reserved blocks count */
642 __le32 s_free_blocks_count_hi; /* Free blocks count */
643 __le16 s_min_extra_isize; /* All inodes have at least # bytes */
644 __le16 s_want_extra_isize; /* New inodes should reserve # bytes */
645 __le32 s_flags; /* Miscellaneous flags */
646 __le16 s_raid_stride; /* RAID stride */
647 __le16 s_mmp_interval; /* # seconds to wait in MMP checking */
648 __le64 s_mmp_block; /* Block for multi-mount protection */
649 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
650 __u32 s_reserved[163]; /* Padding to the end of the block */
651};
652
653#ifdef __KERNEL__
654static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb)
655{
656 return sb->s_fs_info;
657}
658static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
659{
660 return container_of(inode, struct ext4_inode_info, vfs_inode);
661}
662
663static inline struct timespec ext4_current_time(struct inode *inode)
664{
665 return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
666 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
667}
668
669
670static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
671{
672 return ino == EXT4_ROOT_INO ||
673 ino == EXT4_JOURNAL_INO ||
674 ino == EXT4_RESIZE_INO ||
675 (ino >= EXT4_FIRST_INO(sb) &&
676 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
677}
678#else
679/* Assume that user mode programs are passing in an ext4fs superblock, not
680 * a kernel struct super_block. This will allow us to call the feature-test
681 * macros from user land. */
682#define EXT4_SB(sb) (sb)
683#endif
684
685#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
686
687/*
688 * Codes for operating systems
689 */
690#define EXT4_OS_LINUX 0
691#define EXT4_OS_HURD 1
692#define EXT4_OS_MASIX 2
693#define EXT4_OS_FREEBSD 3
694#define EXT4_OS_LITES 4
695
696/*
697 * Revision levels
698 */
699#define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */
700#define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */
701
702#define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV
703#define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV
704
705#define EXT4_GOOD_OLD_INODE_SIZE 128
706
707/*
708 * Feature set definitions
709 */
710
711#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \
712 ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) )
713#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \
714 ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) )
715#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \
716 ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) )
717#define EXT4_SET_COMPAT_FEATURE(sb,mask) \
718 EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
719#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \
720 EXT4_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask)
721#define EXT4_SET_INCOMPAT_FEATURE(sb,mask) \
722 EXT4_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask)
723#define EXT4_CLEAR_COMPAT_FEATURE(sb,mask) \
724 EXT4_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask)
725#define EXT4_CLEAR_RO_COMPAT_FEATURE(sb,mask) \
726 EXT4_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask)
727#define EXT4_CLEAR_INCOMPAT_FEATURE(sb,mask) \
728 EXT4_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask)
729
730#define EXT4_FEATURE_COMPAT_DIR_PREALLOC 0x0001
731#define EXT4_FEATURE_COMPAT_IMAGIC_INODES 0x0002
732#define EXT4_FEATURE_COMPAT_HAS_JOURNAL 0x0004
733#define EXT4_FEATURE_COMPAT_EXT_ATTR 0x0008
734#define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010
735#define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020
736
737#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
738#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
739#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
740#define EXT4_FEATURE_RO_COMPAT_HUGE_FILE 0x0008
741#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010
742#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
743#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
744
745#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
746#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
747#define EXT4_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */
748#define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */
749#define EXT4_FEATURE_INCOMPAT_META_BG 0x0010
750#define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */
751#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080
752#define EXT4_FEATURE_INCOMPAT_MMP 0x0100
753#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
754
755#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
756#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
757 EXT4_FEATURE_INCOMPAT_RECOVER| \
758 EXT4_FEATURE_INCOMPAT_META_BG| \
759 EXT4_FEATURE_INCOMPAT_EXTENTS| \
760 EXT4_FEATURE_INCOMPAT_64BIT| \
761 EXT4_FEATURE_INCOMPAT_FLEX_BG)
762#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
763 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
764 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
765 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
766 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
767 EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
768 EXT4_FEATURE_RO_COMPAT_HUGE_FILE)
769
770/*
771 * Default values for user and/or group using reserved blocks
772 */
773#define EXT4_DEF_RESUID 0
774#define EXT4_DEF_RESGID 0
775
776/*
777 * Default mount options
778 */
779#define EXT4_DEFM_DEBUG 0x0001
780#define EXT4_DEFM_BSDGROUPS 0x0002
781#define EXT4_DEFM_XATTR_USER 0x0004
782#define EXT4_DEFM_ACL 0x0008
783#define EXT4_DEFM_UID16 0x0010
784#define EXT4_DEFM_JMODE 0x0060
785#define EXT4_DEFM_JMODE_DATA 0x0020
786#define EXT4_DEFM_JMODE_ORDERED 0x0040
787#define EXT4_DEFM_JMODE_WBACK 0x0060
788
789/*
790 * Structure of a directory entry
791 */
792#define EXT4_NAME_LEN 255
793
794struct ext4_dir_entry {
795 __le32 inode; /* Inode number */
796 __le16 rec_len; /* Directory entry length */
797 __le16 name_len; /* Name length */
798 char name[EXT4_NAME_LEN]; /* File name */
799};
800
801/*
802 * The new version of the directory entry. Since EXT4 structures are
803 * stored in intel byte order, and the name_len field could never be
804 * bigger than 255 chars, it's safe to reclaim the extra byte for the
805 * file_type field.
806 */
807struct ext4_dir_entry_2 {
808 __le32 inode; /* Inode number */
809 __le16 rec_len; /* Directory entry length */
810 __u8 name_len; /* Name length */
811 __u8 file_type;
812 char name[EXT4_NAME_LEN]; /* File name */
813};
814
815/*
816 * Ext4 directory file types. Only the low 3 bits are used. The
817 * other bits are reserved for now.
818 */
819#define EXT4_FT_UNKNOWN 0
820#define EXT4_FT_REG_FILE 1
821#define EXT4_FT_DIR 2
822#define EXT4_FT_CHRDEV 3
823#define EXT4_FT_BLKDEV 4
824#define EXT4_FT_FIFO 5
825#define EXT4_FT_SOCK 6
826#define EXT4_FT_SYMLINK 7
827
828#define EXT4_FT_MAX 8
829
830/*
831 * EXT4_DIR_PAD defines the directory entries boundaries
832 *
833 * NOTE: It must be a multiple of 4
834 */
835#define EXT4_DIR_PAD 4
836#define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1)
837#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
838 ~EXT4_DIR_ROUND)
839#define EXT4_MAX_REC_LEN ((1<<16)-1)
840
841static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
842{
843 unsigned len = le16_to_cpu(dlen);
844
845 if (len == EXT4_MAX_REC_LEN)
846 return 1 << 16;
847 return len;
848}
849
850static inline __le16 ext4_rec_len_to_disk(unsigned len)
851{
852 if (len == (1 << 16))
853 return cpu_to_le16(EXT4_MAX_REC_LEN);
854 else if (len > (1 << 16))
855 BUG();
856 return cpu_to_le16(len);
857}
858
859/*
860 * Hash Tree Directory indexing
861 * (c) Daniel Phillips, 2001
862 */
863
864#define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \
865 EXT4_FEATURE_COMPAT_DIR_INDEX) && \
866 (EXT4_I(dir)->i_flags & EXT4_INDEX_FL))
867#define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX)
868#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
869
870/* Legal values for the dx_root hash_version field: */
871
872#define DX_HASH_LEGACY 0
873#define DX_HASH_HALF_MD4 1
874#define DX_HASH_TEA 2
875
876#ifdef __KERNEL__
877
878/* hash info structure used by the directory hash */
879struct dx_hash_info
880{
881 u32 hash;
882 u32 minor_hash;
883 int hash_version;
884 u32 *seed;
885};
886
887#define EXT4_HTREE_EOF 0x7fffffff
888
889/*
890 * Control parameters used by ext4_htree_next_block
891 */
892#define HASH_NB_ALWAYS 1
893
894
895/*
896 * Describe an inode's exact location on disk and in memory
897 */
898struct ext4_iloc
899{
900 struct buffer_head *bh;
901 unsigned long offset;
902 ext4_group_t block_group;
903};
904
905static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc)
906{
907 return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset);
908}
909
910/*
911 * This structure is stuffed into the struct file's private_data field
912 * for directories. It is where we put information so that we can do
913 * readdir operations in hash tree order.
914 */
915struct dir_private_info {
916 struct rb_root root;
917 struct rb_node *curr_node;
918 struct fname *extra_fname;
919 loff_t last_pos;
920 __u32 curr_hash;
921 __u32 curr_minor_hash;
922 __u32 next_hash;
923};
924
925/* calculate the first block number of the group */
926static inline ext4_fsblk_t
927ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
928{
929 return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) +
930 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
931}
932
933/*
934 * Special error return code only used by dx_probe() and its callers.
935 */
936#define ERR_BAD_DX_DIR -75000
937
938void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
939 unsigned long *blockgrpp, ext4_grpblk_t *offsetp);
940
941/*
942 * Function prototypes
943 */
944
945/*
946 * Ok, these declarations are also in <linux/kernel.h> but none of the
947 * ext4 source programs needs to include it so they are duplicated here.
948 */
949# define NORET_TYPE /**/
950# define ATTRIB_NORET __attribute__((noreturn))
951# define NORET_AND noreturn,
952
953/* balloc.c */
954extern unsigned int ext4_block_group(struct super_block *sb,
955 ext4_fsblk_t blocknr);
956extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
957 ext4_fsblk_t blocknr);
958extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
959extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
960 ext4_group_t group);
961extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode,
962 ext4_fsblk_t goal, int *errp);
963extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode,
964 ext4_fsblk_t goal, unsigned long *count, int *errp);
965extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
966 ext4_fsblk_t goal, unsigned long *count, int *errp);
967extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
968 ext4_fsblk_t block, unsigned long count, int metadata);
969extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb,
970 ext4_fsblk_t block, unsigned long count,
971 unsigned long *pdquot_freed_blocks);
972extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *);
973extern void ext4_check_blocks_bitmap (struct super_block *);
974extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
975 ext4_group_t block_group,
976 struct buffer_head ** bh);
977extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
978extern void ext4_init_block_alloc_info(struct inode *);
979extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv);
980
981/* dir.c */
982extern int ext4_check_dir_entry(const char *, struct inode *,
983 struct ext4_dir_entry_2 *,
984 struct buffer_head *, unsigned long);
985extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
986 __u32 minor_hash,
987 struct ext4_dir_entry_2 *dirent);
988extern void ext4_htree_free_dir_info(struct dir_private_info *p);
989
990/* fsync.c */
991extern int ext4_sync_file (struct file *, struct dentry *, int);
992
993/* hash.c */
994extern int ext4fs_dirhash(const char *name, int len, struct
995 dx_hash_info *hinfo);
996
997/* ialloc.c */
998extern struct inode * ext4_new_inode (handle_t *, struct inode *, int);
999extern void ext4_free_inode (handle_t *, struct inode *);
1000extern struct inode * ext4_orphan_get (struct super_block *, unsigned long);
1001extern unsigned long ext4_count_free_inodes (struct super_block *);
1002extern unsigned long ext4_count_dirs (struct super_block *);
1003extern void ext4_check_inodes_bitmap (struct super_block *);
1004extern unsigned long ext4_count_free (struct buffer_head *, unsigned);
1005
1006/* mballoc.c */
1007extern long ext4_mb_stats;
1008extern long ext4_mb_max_to_scan;
1009extern int ext4_mb_init(struct super_block *, int);
1010extern int ext4_mb_release(struct super_block *);
1011extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
1012 struct ext4_allocation_request *, int *);
1013extern int ext4_mb_reserve_blocks(struct super_block *, int);
1014extern void ext4_mb_discard_inode_preallocations(struct inode *);
1015extern int __init init_ext4_mballoc(void);
1016extern void exit_ext4_mballoc(void);
1017extern void ext4_mb_free_blocks(handle_t *, struct inode *,
1018 unsigned long, unsigned long, int, unsigned long *);
1019
1020
1021/* inode.c */
1022int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
1023 struct buffer_head *bh, ext4_fsblk_t blocknr);
1024struct buffer_head *ext4_getblk(handle_t *, struct inode *,
1025 ext4_lblk_t, int, int *);
1026struct buffer_head *ext4_bread(handle_t *, struct inode *,
1027 ext4_lblk_t, int, int *);
1028int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
1029 ext4_lblk_t iblock, unsigned long maxblocks,
1030 struct buffer_head *bh_result,
1031 int create, int extend_disksize);
1032
1033extern struct inode *ext4_iget(struct super_block *, unsigned long);
1034extern int ext4_write_inode (struct inode *, int);
1035extern int ext4_setattr (struct dentry *, struct iattr *);
1036extern void ext4_delete_inode (struct inode *);
1037extern int ext4_sync_inode (handle_t *, struct inode *);
1038extern void ext4_discard_reservation (struct inode *);
1039extern void ext4_dirty_inode(struct inode *);
1040extern int ext4_change_inode_journal_flag(struct inode *, int);
1041extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
1042extern void ext4_truncate (struct inode *);
1043extern void ext4_set_inode_flags(struct inode *);
1044extern void ext4_get_inode_flags(struct ext4_inode_info *);
1045extern void ext4_set_aops(struct inode *inode);
1046extern int ext4_writepage_trans_blocks(struct inode *);
1047extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
1048 struct address_space *mapping, loff_t from);
1049
1050/* ioctl.c */
1051extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
1052extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long);
1053
1054/* migrate.c */
1055extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int,
1056 unsigned long);
1057/* namei.c */
1058extern int ext4_orphan_add(handle_t *, struct inode *);
1059extern int ext4_orphan_del(handle_t *, struct inode *);
1060extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
1061 __u32 start_minor_hash, __u32 *next_hash);
1062
1063/* resize.c */
1064extern int ext4_group_add(struct super_block *sb,
1065 struct ext4_new_group_data *input);
1066extern int ext4_group_extend(struct super_block *sb,
1067 struct ext4_super_block *es,
1068 ext4_fsblk_t n_blocks_count);
1069
1070/* super.c */
1071extern void ext4_error (struct super_block *, const char *, const char *, ...)
1072 __attribute__ ((format (printf, 3, 4)));
1073extern void __ext4_std_error (struct super_block *, const char *, int);
1074extern void ext4_abort (struct super_block *, const char *, const char *, ...)
1075 __attribute__ ((format (printf, 3, 4)));
1076extern void ext4_warning (struct super_block *, const char *, const char *, ...)
1077 __attribute__ ((format (printf, 3, 4)));
1078extern void ext4_update_dynamic_rev (struct super_block *sb);
1079extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
1080 __u32 compat);
1081extern int ext4_update_rocompat_feature(handle_t *handle,
1082 struct super_block *sb, __u32 rocompat);
1083extern int ext4_update_incompat_feature(handle_t *handle,
1084 struct super_block *sb, __u32 incompat);
1085extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
1086 struct ext4_group_desc *bg);
1087extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
1088 struct ext4_group_desc *bg);
1089extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
1090 struct ext4_group_desc *bg);
1091extern void ext4_block_bitmap_set(struct super_block *sb,
1092 struct ext4_group_desc *bg, ext4_fsblk_t blk);
1093extern void ext4_inode_bitmap_set(struct super_block *sb,
1094 struct ext4_group_desc *bg, ext4_fsblk_t blk);
1095extern void ext4_inode_table_set(struct super_block *sb,
1096 struct ext4_group_desc *bg, ext4_fsblk_t blk);
1097
1098static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
1099{
1100 return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
1101 le32_to_cpu(es->s_blocks_count_lo);
1102}
1103
1104static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es)
1105{
1106 return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) |
1107 le32_to_cpu(es->s_r_blocks_count_lo);
1108}
1109
1110static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es)
1111{
1112 return ((ext4_fsblk_t)le32_to_cpu(es->s_free_blocks_count_hi) << 32) |
1113 le32_to_cpu(es->s_free_blocks_count_lo);
1114}
1115
1116static inline void ext4_blocks_count_set(struct ext4_super_block *es,
1117 ext4_fsblk_t blk)
1118{
1119 es->s_blocks_count_lo = cpu_to_le32((u32)blk);
1120 es->s_blocks_count_hi = cpu_to_le32(blk >> 32);
1121}
1122
1123static inline void ext4_free_blocks_count_set(struct ext4_super_block *es,
1124 ext4_fsblk_t blk)
1125{
1126 es->s_free_blocks_count_lo = cpu_to_le32((u32)blk);
1127 es->s_free_blocks_count_hi = cpu_to_le32(blk >> 32);
1128}
1129
1130static inline void ext4_r_blocks_count_set(struct ext4_super_block *es,
1131 ext4_fsblk_t blk)
1132{
1133 es->s_r_blocks_count_lo = cpu_to_le32((u32)blk);
1134 es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
1135}
1136
1137static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
1138{
1139 return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
1140 le32_to_cpu(raw_inode->i_size_lo);
1141}
1142
1143static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
1144{
1145 raw_inode->i_size_lo = cpu_to_le32(i_size);
1146 raw_inode->i_size_high = cpu_to_le32(i_size >> 32);
1147}
1148
1149static inline
1150struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
1151 ext4_group_t group)
1152{
1153 struct ext4_group_info ***grp_info;
1154 long indexv, indexh;
1155 grp_info = EXT4_SB(sb)->s_group_info;
1156 indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
1157 indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
1158 return grp_info[indexv][indexh];
1159}
1160
1161
1162#define ext4_std_error(sb, errno) \
1163do { \
1164 if ((errno)) \
1165 __ext4_std_error((sb), __FUNCTION__, (errno)); \
1166} while (0)
1167
1168/*
1169 * Inodes and files operations
1170 */
1171
1172/* dir.c */
1173extern const struct file_operations ext4_dir_operations;
1174
1175/* file.c */
1176extern const struct inode_operations ext4_file_inode_operations;
1177extern const struct file_operations ext4_file_operations;
1178
1179/* namei.c */
1180extern const struct inode_operations ext4_dir_inode_operations;
1181extern const struct inode_operations ext4_special_inode_operations;
1182
1183/* symlink.c */
1184extern const struct inode_operations ext4_symlink_inode_operations;
1185extern const struct inode_operations ext4_fast_symlink_inode_operations;
1186
1187/* extents.c */
1188extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
1189extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
1190extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
1191 ext4_lblk_t iblock,
1192 unsigned long max_blocks, struct buffer_head *bh_result,
1193 int create, int extend_disksize);
1194extern void ext4_ext_truncate(struct inode *, struct page *);
1195extern void ext4_ext_init(struct super_block *);
1196extern void ext4_ext_release(struct super_block *);
1197extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
1198 loff_t len);
1199extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
1200 sector_t block, unsigned long max_blocks,
1201 struct buffer_head *bh, int create,
1202 int extend_disksize);
1203#endif /* __KERNEL__ */
1204
1205#endif /* _EXT4_H */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
new file mode 100644
index 000000000000..75333b595fab
--- /dev/null
+++ b/fs/ext4/ext4_extents.h
@@ -0,0 +1,232 @@
1/*
2 * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
3 * Written by Alex Tomas <alex@clusterfs.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public Licens
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
17 */
18
19#ifndef _EXT4_EXTENTS
20#define _EXT4_EXTENTS
21
22#include "ext4.h"
23
24/*
25 * With AGGRESSIVE_TEST defined, the capacity of index/leaf blocks
26 * becomes very small, so index split, in-depth growing and
27 * other hard changes happen much more often.
28 * This is for debug purposes only.
29 */
30#define AGGRESSIVE_TEST_
31
32/*
33 * With EXTENTS_STATS defined, the number of blocks and extents
34 * are collected in the truncate path. They'll be shown at
35 * umount time.
36 */
37#define EXTENTS_STATS__
38
39/*
40 * If CHECK_BINSEARCH is defined, then the results of the binary search
41 * will also be checked by linear search.
42 */
43#define CHECK_BINSEARCH__
44
45/*
46 * If EXT_DEBUG is defined you can use the 'extdebug' mount option
47 * to get lots of info about what's going on.
48 */
49#define EXT_DEBUG__
50#ifdef EXT_DEBUG
51#define ext_debug(a...) printk(a)
52#else
53#define ext_debug(a...)
54#endif
55
56/*
57 * If EXT_STATS is defined then stats numbers are collected.
58 * These number will be displayed at umount time.
59 */
60#define EXT_STATS_
61
62
63/*
64 * ext4_inode has i_block array (60 bytes total).
65 * The first 12 bytes store ext4_extent_header;
66 * the remainder stores an array of ext4_extent.
67 */
68
69/*
70 * This is the extent on-disk structure.
71 * It's used at the bottom of the tree.
72 */
73struct ext4_extent {
74 __le32 ee_block; /* first logical block extent covers */
75 __le16 ee_len; /* number of blocks covered by extent */
76 __le16 ee_start_hi; /* high 16 bits of physical block */
77 __le32 ee_start_lo; /* low 32 bits of physical block */
78};
79
80/*
81 * This is index on-disk structure.
82 * It's used at all the levels except the bottom.
83 */
84struct ext4_extent_idx {
85 __le32 ei_block; /* index covers logical blocks from 'block' */
86 __le32 ei_leaf_lo; /* pointer to the physical block of the next *
87 * level. leaf or next index could be there */
88 __le16 ei_leaf_hi; /* high 16 bits of physical block */
89 __u16 ei_unused;
90};
91
92/*
93 * Each block (leaves and indexes), even inode-stored has header.
94 */
95struct ext4_extent_header {
96 __le16 eh_magic; /* probably will support different formats */
97 __le16 eh_entries; /* number of valid entries */
98 __le16 eh_max; /* capacity of store in entries */
99 __le16 eh_depth; /* has tree real underlying blocks? */
100 __le32 eh_generation; /* generation of the tree */
101};
102
103#define EXT4_EXT_MAGIC cpu_to_le16(0xf30a)
104
105/*
106 * Array of ext4_ext_path contains path to some extent.
107 * Creation/lookup routines use it for traversal/splitting/etc.
108 * Truncate uses it to simulate recursive walking.
109 */
110struct ext4_ext_path {
111 ext4_fsblk_t p_block;
112 __u16 p_depth;
113 struct ext4_extent *p_ext;
114 struct ext4_extent_idx *p_idx;
115 struct ext4_extent_header *p_hdr;
116 struct buffer_head *p_bh;
117};
118
119/*
120 * structure for external API
121 */
122
123#define EXT4_EXT_CACHE_NO 0
124#define EXT4_EXT_CACHE_GAP 1
125#define EXT4_EXT_CACHE_EXTENT 2
126
127
128#define EXT_MAX_BLOCK 0xffffffff
129
130/*
131 * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
132 * initialized extent. This is 2^15 and not (2^16 - 1), since we use the
133 * MSB of ee_len field in the extent datastructure to signify if this
134 * particular extent is an initialized extent or an uninitialized (i.e.
135 * preallocated).
136 * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an
137 * uninitialized extent.
138 * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an
139 * uninitialized one. In other words, if MSB of ee_len is set, it is an
140 * uninitialized extent with only one special scenario when ee_len = 0x8000.
141 * In this case we can not have an uninitialized extent of zero length and
142 * thus we make it as a special case of initialized extent with 0x8000 length.
143 * This way we get better extent-to-group alignment for initialized extents.
144 * Hence, the maximum number of blocks we can have in an *initialized*
145 * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767).
146 */
147#define EXT_INIT_MAX_LEN (1UL << 15)
148#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1)
149
150
151#define EXT_FIRST_EXTENT(__hdr__) \
152 ((struct ext4_extent *) (((char *) (__hdr__)) + \
153 sizeof(struct ext4_extent_header)))
154#define EXT_FIRST_INDEX(__hdr__) \
155 ((struct ext4_extent_idx *) (((char *) (__hdr__)) + \
156 sizeof(struct ext4_extent_header)))
157#define EXT_HAS_FREE_INDEX(__path__) \
158 (le16_to_cpu((__path__)->p_hdr->eh_entries) \
159 < le16_to_cpu((__path__)->p_hdr->eh_max))
160#define EXT_LAST_EXTENT(__hdr__) \
161 (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
162#define EXT_LAST_INDEX(__hdr__) \
163 (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
164#define EXT_MAX_EXTENT(__hdr__) \
165 (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
166#define EXT_MAX_INDEX(__hdr__) \
167 (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
168
169static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode)
170{
171 return (struct ext4_extent_header *) EXT4_I(inode)->i_data;
172}
173
174static inline struct ext4_extent_header *ext_block_hdr(struct buffer_head *bh)
175{
176 return (struct ext4_extent_header *) bh->b_data;
177}
178
179static inline unsigned short ext_depth(struct inode *inode)
180{
181 return le16_to_cpu(ext_inode_hdr(inode)->eh_depth);
182}
183
184static inline void ext4_ext_tree_changed(struct inode *inode)
185{
186 EXT4_I(inode)->i_ext_generation++;
187}
188
189static inline void
190ext4_ext_invalidate_cache(struct inode *inode)
191{
192 EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO;
193}
194
195static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
196{
197 /* We can not have an uninitialized extent of zero length! */
198 BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0);
199 ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN);
200}
201
202static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext)
203{
204 /* Extent with ee_len of 0x8000 is treated as an initialized extent */
205 return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN);
206}
207
208static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
209{
210 return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ?
211 le16_to_cpu(ext->ee_len) :
212 (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
213}
214
215extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
216extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
217extern int ext4_extent_tree_init(handle_t *, struct inode *);
218extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
219extern int ext4_ext_try_to_merge(struct inode *inode,
220 struct ext4_ext_path *path,
221 struct ext4_extent *);
222extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
223extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
224extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
225 struct ext4_ext_path *);
226extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
227 ext4_lblk_t *, ext4_fsblk_t *);
228extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
229 ext4_lblk_t *, ext4_fsblk_t *);
230extern void ext4_ext_drop_refs(struct ext4_ext_path *);
231#endif /* _EXT4_EXTENTS */
232
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
new file mode 100644
index 000000000000..26a4ae255d79
--- /dev/null
+++ b/fs/ext4/ext4_i.h
@@ -0,0 +1,167 @@
1/*
2 * ext4_i.h
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/include/linux/minix_fs_i.h
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */
15
16#ifndef _EXT4_I
17#define _EXT4_I
18
19#include <linux/rwsem.h>
20#include <linux/rbtree.h>
21#include <linux/seqlock.h>
22#include <linux/mutex.h>
23
24/* data type for block offset of block group */
25typedef int ext4_grpblk_t;
26
27/* data type for filesystem-wide blocks number */
28typedef unsigned long long ext4_fsblk_t;
29
30/* data type for file logical block number */
31typedef __u32 ext4_lblk_t;
32
33/* data type for block group number */
34typedef unsigned long ext4_group_t;
35
36struct ext4_reserve_window {
37 ext4_fsblk_t _rsv_start; /* First byte reserved */
38 ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */
39};
40
41struct ext4_reserve_window_node {
42 struct rb_node rsv_node;
43 __u32 rsv_goal_size;
44 __u32 rsv_alloc_hit;
45 struct ext4_reserve_window rsv_window;
46};
47
48struct ext4_block_alloc_info {
49 /* information about reservation window */
50 struct ext4_reserve_window_node rsv_window_node;
51 /*
52 * was i_next_alloc_block in ext4_inode_info
53 * is the logical (file-relative) number of the
54 * most-recently-allocated block in this file.
55 * We use this for detecting linearly ascending allocation requests.
56 */
57 ext4_lblk_t last_alloc_logical_block;
58 /*
59 * Was i_next_alloc_goal in ext4_inode_info
60 * is the *physical* companion to i_next_alloc_block.
61 * it the physical block number of the block which was most-recentl
62 * allocated to this file. This give us the goal (target) for the next
63 * allocation when we detect linearly ascending requests.
64 */
65 ext4_fsblk_t last_alloc_physical_block;
66};
67
68#define rsv_start rsv_window._rsv_start
69#define rsv_end rsv_window._rsv_end
70
71/*
72 * storage for cached extent
73 */
74struct ext4_ext_cache {
75 ext4_fsblk_t ec_start;
76 ext4_lblk_t ec_block;
77 __u32 ec_len; /* must be 32bit to return holes */
78 __u32 ec_type;
79};
80
81/*
82 * third extended file system inode data in memory
83 */
84struct ext4_inode_info {
85 __le32 i_data[15]; /* unconverted */
86 __u32 i_flags;
87 ext4_fsblk_t i_file_acl;
88 __u32 i_dtime;
89
90 /*
91 * i_block_group is the number of the block group which contains
92 * this file's inode. Constant across the lifetime of the inode,
93 * it is ued for making block allocation decisions - we try to
94 * place a file's data blocks near its inode block, and new inodes
95 * near to their parent directory's inode.
96 */
97 ext4_group_t i_block_group;
98 __u32 i_state; /* Dynamic state flags for ext4 */
99
100 /* block reservation info */
101 struct ext4_block_alloc_info *i_block_alloc_info;
102
103 ext4_lblk_t i_dir_start_lookup;
104#ifdef CONFIG_EXT4DEV_FS_XATTR
105 /*
106 * Extended attributes can be read independently of the main file
107 * data. Taking i_mutex even when reading would cause contention
108 * between readers of EAs and writers of regular file data, so
109 * instead we synchronize on xattr_sem when reading or changing
110 * EAs.
111 */
112 struct rw_semaphore xattr_sem;
113#endif
114#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
115 struct posix_acl *i_acl;
116 struct posix_acl *i_default_acl;
117#endif
118
119 struct list_head i_orphan; /* unlinked but open inodes */
120
121 /*
122 * i_disksize keeps track of what the inode size is ON DISK, not
123 * in memory. During truncate, i_size is set to the new size by
124 * the VFS prior to calling ext4_truncate(), but the filesystem won't
125 * set i_disksize to 0 until the truncate is actually under way.
126 *
127 * The intent is that i_disksize always represents the blocks which
128 * are used by this file. This allows recovery to restart truncate
129 * on orphans if we crash during truncate. We actually write i_disksize
130 * into the on-disk inode when writing inodes out, instead of i_size.
131 *
132 * The only time when i_disksize and i_size may be different is when
133 * a truncate is in progress. The only things which change i_disksize
134 * are ext4_get_block (growth) and ext4_truncate (shrinkth).
135 */
136 loff_t i_disksize;
137
138 /* on-disk additional length */
139 __u16 i_extra_isize;
140
141 /*
142 * i_data_sem is for serialising ext4_truncate() against
143 * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
144 * data tree are chopped off during truncate. We can't do that in
145 * ext4 because whenever we perform intermediate commits during
146 * truncate, the inode and all the metadata blocks *must* be in a
147 * consistent state which allows truncation of the orphans to restart
148 * during recovery. Hence we must fix the get_block-vs-truncate race
149 * by other means, so we have i_data_sem.
150 */
151 struct rw_semaphore i_data_sem;
152 struct inode vfs_inode;
153
154 unsigned long i_ext_generation;
155 struct ext4_ext_cache i_cached_extent;
156 /*
157 * File creation time. Its function is same as that of
158 * struct timespec i_{a,c,m}time in the generic inode.
159 */
160 struct timespec i_crtime;
161
162 /* mballoc */
163 struct list_head i_prealloc_list;
164 spinlock_t i_prealloc_lock;
165};
166
167#endif /* _EXT4_I */
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index d6afe4e27340..c75384b34f2c 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -2,14 +2,14 @@
2 * Interface between ext4 and JBD 2 * Interface between ext4 and JBD
3 */ 3 */
4 4
5#include <linux/ext4_jbd2.h> 5#include "ext4_jbd2.h"
6 6
7int __ext4_journal_get_undo_access(const char *where, handle_t *handle, 7int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
8 struct buffer_head *bh) 8 struct buffer_head *bh)
9{ 9{
10 int err = jbd2_journal_get_undo_access(handle, bh); 10 int err = jbd2_journal_get_undo_access(handle, bh);
11 if (err) 11 if (err)
12 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 12 ext4_journal_abort_handle(where, __func__, bh, handle, err);
13 return err; 13 return err;
14} 14}
15 15
@@ -18,7 +18,7 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
18{ 18{
19 int err = jbd2_journal_get_write_access(handle, bh); 19 int err = jbd2_journal_get_write_access(handle, bh);
20 if (err) 20 if (err)
21 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 21 ext4_journal_abort_handle(where, __func__, bh, handle, err);
22 return err; 22 return err;
23} 23}
24 24
@@ -27,7 +27,7 @@ int __ext4_journal_forget(const char *where, handle_t *handle,
27{ 27{
28 int err = jbd2_journal_forget(handle, bh); 28 int err = jbd2_journal_forget(handle, bh);
29 if (err) 29 if (err)
30 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 30 ext4_journal_abort_handle(where, __func__, bh, handle, err);
31 return err; 31 return err;
32} 32}
33 33
@@ -36,7 +36,7 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
36{ 36{
37 int err = jbd2_journal_revoke(handle, blocknr, bh); 37 int err = jbd2_journal_revoke(handle, blocknr, bh);
38 if (err) 38 if (err)
39 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 39 ext4_journal_abort_handle(where, __func__, bh, handle, err);
40 return err; 40 return err;
41} 41}
42 42
@@ -45,7 +45,7 @@ int __ext4_journal_get_create_access(const char *where,
45{ 45{
46 int err = jbd2_journal_get_create_access(handle, bh); 46 int err = jbd2_journal_get_create_access(handle, bh);
47 if (err) 47 if (err)
48 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 48 ext4_journal_abort_handle(where, __func__, bh, handle, err);
49 return err; 49 return err;
50} 50}
51 51
@@ -54,6 +54,6 @@ int __ext4_journal_dirty_metadata(const char *where,
54{ 54{
55 int err = jbd2_journal_dirty_metadata(handle, bh); 55 int err = jbd2_journal_dirty_metadata(handle, bh);
56 if (err) 56 if (err)
57 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 57 ext4_journal_abort_handle(where, __func__, bh, handle, err);
58 return err; 58 return err;
59} 59}
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
new file mode 100644
index 000000000000..9255a7d28b24
--- /dev/null
+++ b/fs/ext4/ext4_jbd2.h
@@ -0,0 +1,231 @@
1/*
2 * ext4_jbd2.h
3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5 *
6 * Copyright 1998--1999 Red Hat corp --- All Rights Reserved
7 *
8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference.
11 *
12 * Ext4-specific journaling extensions.
13 */
14
15#ifndef _EXT4_JBD2_H
16#define _EXT4_JBD2_H
17
18#include <linux/fs.h>
19#include <linux/jbd2.h>
20#include "ext4.h"
21
22#define EXT4_JOURNAL(inode) (EXT4_SB((inode)->i_sb)->s_journal)
23
24/* Define the number of blocks we need to account to a transaction to
25 * modify one block of data.
26 *
27 * We may have to touch one inode, one bitmap buffer, up to three
28 * indirection blocks, the group and superblock summaries, and the data
29 * block to complete the transaction.
30 *
31 * For extents-enabled fs we may have to allocate and modify up to
32 * 5 levels of tree + root which are stored in the inode. */
33
34#define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \
35 (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \
36 || test_opt(sb, EXTENTS) ? 27U : 8U)
37
38/* Extended attribute operations touch at most two data buffers,
39 * two bitmap buffers, and two group summaries, in addition to the inode
40 * and the superblock, which are already accounted for. */
41
42#define EXT4_XATTR_TRANS_BLOCKS 6U
43
44/* Define the minimum size for a transaction which modifies data. This
45 * needs to take into account the fact that we may end up modifying two
46 * quota files too (one for the group, one for the user quota). The
47 * superblock only gets updated once, of course, so don't bother
48 * counting that again for the quota updates. */
49
50#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
51 EXT4_XATTR_TRANS_BLOCKS - 2 + \
52 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
53
54/* Delete operations potentially hit one directory's namespace plus an
55 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
56 * generous. We can grow the delete transaction later if necessary. */
57
58#define EXT4_DELETE_TRANS_BLOCKS(sb) (2 * EXT4_DATA_TRANS_BLOCKS(sb) + 64)
59
60/* Define an arbitrary limit for the amount of data we will anticipate
61 * writing to any given transaction. For unbounded transactions such as
62 * write(2) and truncate(2) we can write more than this, but we always
63 * start off at the maximum transaction size and grow the transaction
64 * optimistically as we go. */
65
66#define EXT4_MAX_TRANS_DATA 64U
67
68/* We break up a large truncate or write transaction once the handle's
69 * buffer credits gets this low, we need either to extend the
70 * transaction or to start a new one. Reserve enough space here for
71 * inode, bitmap, superblock, group and indirection updates for at least
72 * one block, plus two quota updates. Quota allocations are not
73 * needed. */
74
75#define EXT4_RESERVE_TRANS_BLOCKS 12U
76
77#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 8
78
79#ifdef CONFIG_QUOTA
80/* Amount of blocks needed for quota update - we know that the structure was
81 * allocated so we need to update only inode+data */
82#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
83/* Amount of blocks needed for quota insert/delete - we do some block writes
84 * but inode, sb and group updates are done only once */
85#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
86 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
87#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
88 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
89#else
90#define EXT4_QUOTA_TRANS_BLOCKS(sb) 0
91#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
92#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
93#endif
94
95int
96ext4_mark_iloc_dirty(handle_t *handle,
97 struct inode *inode,
98 struct ext4_iloc *iloc);
99
100/*
101 * On success, We end up with an outstanding reference count against
102 * iloc->bh. This _must_ be cleaned up later.
103 */
104
105int ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
106 struct ext4_iloc *iloc);
107
108int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
109
110/*
111 * Wrapper functions with which ext4 calls into JBD. The intent here is
112 * to allow these to be turned into appropriate stubs so ext4 can control
113 * ext2 filesystems, so ext2+ext4 systems only nee one fs. This work hasn't
114 * been done yet.
115 */
116
117static inline void ext4_journal_release_buffer(handle_t *handle,
118 struct buffer_head *bh)
119{
120 jbd2_journal_release_buffer(handle, bh);
121}
122
123void ext4_journal_abort_handle(const char *caller, const char *err_fn,
124 struct buffer_head *bh, handle_t *handle, int err);
125
126int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
127 struct buffer_head *bh);
128
129int __ext4_journal_get_write_access(const char *where, handle_t *handle,
130 struct buffer_head *bh);
131
132int __ext4_journal_forget(const char *where, handle_t *handle,
133 struct buffer_head *bh);
134
135int __ext4_journal_revoke(const char *where, handle_t *handle,
136 ext4_fsblk_t blocknr, struct buffer_head *bh);
137
138int __ext4_journal_get_create_access(const char *where,
139 handle_t *handle, struct buffer_head *bh);
140
141int __ext4_journal_dirty_metadata(const char *where,
142 handle_t *handle, struct buffer_head *bh);
143
144#define ext4_journal_get_undo_access(handle, bh) \
145 __ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh))
146#define ext4_journal_get_write_access(handle, bh) \
147 __ext4_journal_get_write_access(__FUNCTION__, (handle), (bh))
148#define ext4_journal_revoke(handle, blocknr, bh) \
149 __ext4_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh))
150#define ext4_journal_get_create_access(handle, bh) \
151 __ext4_journal_get_create_access(__FUNCTION__, (handle), (bh))
152#define ext4_journal_dirty_metadata(handle, bh) \
153 __ext4_journal_dirty_metadata(__FUNCTION__, (handle), (bh))
154#define ext4_journal_forget(handle, bh) \
155 __ext4_journal_forget(__FUNCTION__, (handle), (bh))
156
157int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
158
159handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
160int __ext4_journal_stop(const char *where, handle_t *handle);
161
162static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
163{
164 return ext4_journal_start_sb(inode->i_sb, nblocks);
165}
166
167#define ext4_journal_stop(handle) \
168 __ext4_journal_stop(__FUNCTION__, (handle))
169
170static inline handle_t *ext4_journal_current_handle(void)
171{
172 return journal_current_handle();
173}
174
175static inline int ext4_journal_extend(handle_t *handle, int nblocks)
176{
177 return jbd2_journal_extend(handle, nblocks);
178}
179
180static inline int ext4_journal_restart(handle_t *handle, int nblocks)
181{
182 return jbd2_journal_restart(handle, nblocks);
183}
184
185static inline int ext4_journal_blocks_per_page(struct inode *inode)
186{
187 return jbd2_journal_blocks_per_page(inode);
188}
189
190static inline int ext4_journal_force_commit(journal_t *journal)
191{
192 return jbd2_journal_force_commit(journal);
193}
194
195/* super.c */
196int ext4_force_commit(struct super_block *sb);
197
198static inline int ext4_should_journal_data(struct inode *inode)
199{
200 if (!S_ISREG(inode->i_mode))
201 return 1;
202 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
203 return 1;
204 if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
205 return 1;
206 return 0;
207}
208
209static inline int ext4_should_order_data(struct inode *inode)
210{
211 if (!S_ISREG(inode->i_mode))
212 return 0;
213 if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
214 return 0;
215 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
216 return 1;
217 return 0;
218}
219
220static inline int ext4_should_writeback_data(struct inode *inode)
221{
222 if (!S_ISREG(inode->i_mode))
223 return 0;
224 if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
225 return 0;
226 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
227 return 1;
228 return 0;
229}
230
231#endif /* _EXT4_JBD2_H */
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
new file mode 100644
index 000000000000..5802e69f2191
--- /dev/null
+++ b/fs/ext4/ext4_sb.h
@@ -0,0 +1,148 @@
1/*
2 * ext4_sb.h
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/include/linux/minix_fs_sb.h
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */
15
16#ifndef _EXT4_SB
17#define _EXT4_SB
18
19#ifdef __KERNEL__
20#include <linux/timer.h>
21#include <linux/wait.h>
22#include <linux/blockgroup_lock.h>
23#include <linux/percpu_counter.h>
24#endif
25#include <linux/rbtree.h>
26
27/*
28 * third extended-fs super-block data in memory
29 */
30struct ext4_sb_info {
31 unsigned long s_desc_size; /* Size of a group descriptor in bytes */
32 unsigned long s_inodes_per_block;/* Number of inodes per block */
33 unsigned long s_blocks_per_group;/* Number of blocks in a group */
34 unsigned long s_inodes_per_group;/* Number of inodes in a group */
35 unsigned long s_itb_per_group; /* Number of inode table blocks per group */
36 unsigned long s_gdb_count; /* Number of group descriptor blocks */
37 unsigned long s_desc_per_block; /* Number of group descriptors per block */
38 ext4_group_t s_groups_count; /* Number of groups in the fs */
39 unsigned long s_overhead_last; /* Last calculated overhead */
40 unsigned long s_blocks_last; /* Last seen block count */
41 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
42 struct buffer_head * s_sbh; /* Buffer containing the super block */
43 struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */
44 struct buffer_head ** s_group_desc;
45 unsigned long s_mount_opt;
46 ext4_fsblk_t s_sb_block;
47 uid_t s_resuid;
48 gid_t s_resgid;
49 unsigned short s_mount_state;
50 unsigned short s_pad;
51 int s_addr_per_block_bits;
52 int s_desc_per_block_bits;
53 int s_inode_size;
54 int s_first_ino;
55 spinlock_t s_next_gen_lock;
56 u32 s_next_generation;
57 u32 s_hash_seed[4];
58 int s_def_hash_version;
59 struct percpu_counter s_freeblocks_counter;
60 struct percpu_counter s_freeinodes_counter;
61 struct percpu_counter s_dirs_counter;
62 struct blockgroup_lock s_blockgroup_lock;
63
64 /* root of the per fs reservation window tree */
65 spinlock_t s_rsv_window_lock;
66 struct rb_root s_rsv_window_root;
67 struct ext4_reserve_window_node s_rsv_window_head;
68
69 /* Journaling */
70 struct inode * s_journal_inode;
71 struct journal_s * s_journal;
72 struct list_head s_orphan;
73 unsigned long s_commit_interval;
74 struct block_device *journal_bdev;
75#ifdef CONFIG_JBD2_DEBUG
76 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
77 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
78#endif
79#ifdef CONFIG_QUOTA
80 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
81 int s_jquota_fmt; /* Format of quota to use */
82#endif
83 unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
84
85#ifdef EXTENTS_STATS
86 /* ext4 extents stats */
87 unsigned long s_ext_min;
88 unsigned long s_ext_max;
89 unsigned long s_depth_max;
90 spinlock_t s_ext_stats_lock;
91 unsigned long s_ext_blocks;
92 unsigned long s_ext_extents;
93#endif
94
95 /* for buddy allocator */
96 struct ext4_group_info ***s_group_info;
97 struct inode *s_buddy_cache;
98 long s_blocks_reserved;
99 spinlock_t s_reserve_lock;
100 struct list_head s_active_transaction;
101 struct list_head s_closed_transaction;
102 struct list_head s_committed_transaction;
103 spinlock_t s_md_lock;
104 tid_t s_last_transaction;
105 unsigned short *s_mb_offsets, *s_mb_maxs;
106
107 /* tunables */
108 unsigned long s_stripe;
109 unsigned long s_mb_stream_request;
110 unsigned long s_mb_max_to_scan;
111 unsigned long s_mb_min_to_scan;
112 unsigned long s_mb_stats;
113 unsigned long s_mb_order2_reqs;
114 unsigned long s_mb_group_prealloc;
115 /* where last allocation was done - for stream allocation */
116 unsigned long s_mb_last_group;
117 unsigned long s_mb_last_start;
118
119 /* history to debug policy */
120 struct ext4_mb_history *s_mb_history;
121 int s_mb_history_cur;
122 int s_mb_history_max;
123 int s_mb_history_num;
124 struct proc_dir_entry *s_mb_proc;
125 spinlock_t s_mb_history_lock;
126 int s_mb_history_filter;
127
128 /* stats for buddy allocator */
129 spinlock_t s_mb_pa_lock;
130 atomic_t s_bal_reqs; /* number of reqs with len > 1 */
131 atomic_t s_bal_success; /* we found long enough chunks */
132 atomic_t s_bal_allocated; /* in blocks */
133 atomic_t s_bal_ex_scanned; /* total extents scanned */
134 atomic_t s_bal_goals; /* goal hits */
135 atomic_t s_bal_breaks; /* too long searches */
136 atomic_t s_bal_2orders; /* 2^order hits */
137 spinlock_t s_bal_lock;
138 unsigned long s_mb_buddies_generated;
139 unsigned long long s_mb_generation_time;
140 atomic_t s_mb_lost_chunks;
141 atomic_t s_mb_preallocated;
142 atomic_t s_mb_discarded;
143
144 /* locality groups */
145 struct ext4_locality_group *s_locality_groups;
146};
147
148#endif /* _EXT4_SB */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 9ae6e67090cd..47929c4e3dae 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -32,7 +32,6 @@
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/fs.h> 33#include <linux/fs.h>
34#include <linux/time.h> 34#include <linux/time.h>
35#include <linux/ext4_jbd2.h>
36#include <linux/jbd2.h> 35#include <linux/jbd2.h>
37#include <linux/highuid.h> 36#include <linux/highuid.h>
38#include <linux/pagemap.h> 37#include <linux/pagemap.h>
@@ -40,8 +39,9 @@
40#include <linux/string.h> 39#include <linux/string.h>
41#include <linux/slab.h> 40#include <linux/slab.h>
42#include <linux/falloc.h> 41#include <linux/falloc.h>
43#include <linux/ext4_fs_extents.h>
44#include <asm/uaccess.h> 42#include <asm/uaccess.h>
43#include "ext4_jbd2.h"
44#include "ext4_extents.h"
45 45
46 46
47/* 47/*
@@ -308,7 +308,7 @@ corrupted:
308} 308}
309 309
310#define ext4_ext_check_header(inode, eh, depth) \ 310#define ext4_ext_check_header(inode, eh, depth) \
311 __ext4_ext_check_header(__FUNCTION__, inode, eh, depth) 311 __ext4_ext_check_header(__func__, inode, eh, depth)
312 312
313#ifdef EXT_DEBUG 313#ifdef EXT_DEBUG
314static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) 314static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
@@ -614,7 +614,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
614 614
615 ix->ei_block = cpu_to_le32(logical); 615 ix->ei_block = cpu_to_le32(logical);
616 ext4_idx_store_pblock(ix, ptr); 616 ext4_idx_store_pblock(ix, ptr);
617 curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1); 617 le16_add_cpu(&curp->p_hdr->eh_entries, 1);
618 618
619 BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries) 619 BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
620 > le16_to_cpu(curp->p_hdr->eh_max)); 620 > le16_to_cpu(curp->p_hdr->eh_max));
@@ -736,7 +736,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
736 } 736 }
737 if (m) { 737 if (m) {
738 memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m); 738 memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m);
739 neh->eh_entries = cpu_to_le16(le16_to_cpu(neh->eh_entries)+m); 739 le16_add_cpu(&neh->eh_entries, m);
740 } 740 }
741 741
742 set_buffer_uptodate(bh); 742 set_buffer_uptodate(bh);
@@ -753,8 +753,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
753 err = ext4_ext_get_access(handle, inode, path + depth); 753 err = ext4_ext_get_access(handle, inode, path + depth);
754 if (err) 754 if (err)
755 goto cleanup; 755 goto cleanup;
756 path[depth].p_hdr->eh_entries = 756 le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
757 cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m);
758 err = ext4_ext_dirty(handle, inode, path + depth); 757 err = ext4_ext_dirty(handle, inode, path + depth);
759 if (err) 758 if (err)
760 goto cleanup; 759 goto cleanup;
@@ -817,8 +816,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
817 if (m) { 816 if (m) {
818 memmove(++fidx, path[i].p_idx - m, 817 memmove(++fidx, path[i].p_idx - m,
819 sizeof(struct ext4_extent_idx) * m); 818 sizeof(struct ext4_extent_idx) * m);
820 neh->eh_entries = 819 le16_add_cpu(&neh->eh_entries, m);
821 cpu_to_le16(le16_to_cpu(neh->eh_entries) + m);
822 } 820 }
823 set_buffer_uptodate(bh); 821 set_buffer_uptodate(bh);
824 unlock_buffer(bh); 822 unlock_buffer(bh);
@@ -834,7 +832,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
834 err = ext4_ext_get_access(handle, inode, path + i); 832 err = ext4_ext_get_access(handle, inode, path + i);
835 if (err) 833 if (err)
836 goto cleanup; 834 goto cleanup;
837 path[i].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[i].p_hdr->eh_entries)-m); 835 le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
838 err = ext4_ext_dirty(handle, inode, path + i); 836 err = ext4_ext_dirty(handle, inode, path + i);
839 if (err) 837 if (err)
840 goto cleanup; 838 goto cleanup;
@@ -1369,7 +1367,7 @@ int ext4_ext_try_to_merge(struct inode *inode,
1369 * sizeof(struct ext4_extent); 1367 * sizeof(struct ext4_extent);
1370 memmove(ex + 1, ex + 2, len); 1368 memmove(ex + 1, ex + 2, len);
1371 } 1369 }
1372 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries) - 1); 1370 le16_add_cpu(&eh->eh_entries, -1);
1373 merge_done = 1; 1371 merge_done = 1;
1374 WARN_ON(eh->eh_entries == 0); 1372 WARN_ON(eh->eh_entries == 0);
1375 if (!eh->eh_entries) 1373 if (!eh->eh_entries)
@@ -1560,7 +1558,7 @@ has_space:
1560 path[depth].p_ext = nearex; 1558 path[depth].p_ext = nearex;
1561 } 1559 }
1562 1560
1563 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1); 1561 le16_add_cpu(&eh->eh_entries, 1);
1564 nearex = path[depth].p_ext; 1562 nearex = path[depth].p_ext;
1565 nearex->ee_block = newext->ee_block; 1563 nearex->ee_block = newext->ee_block;
1566 ext4_ext_store_pblock(nearex, ext_pblock(newext)); 1564 ext4_ext_store_pblock(nearex, ext_pblock(newext));
@@ -1699,7 +1697,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
1699 err = ext4_ext_get_access(handle, inode, path); 1697 err = ext4_ext_get_access(handle, inode, path);
1700 if (err) 1698 if (err)
1701 return err; 1699 return err;
1702 path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1); 1700 le16_add_cpu(&path->p_hdr->eh_entries, -1);
1703 err = ext4_ext_dirty(handle, inode, path); 1701 err = ext4_ext_dirty(handle, inode, path);
1704 if (err) 1702 if (err)
1705 return err; 1703 return err;
@@ -1902,7 +1900,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1902 if (num == 0) { 1900 if (num == 0) {
1903 /* this extent is removed; mark slot entirely unused */ 1901 /* this extent is removed; mark slot entirely unused */
1904 ext4_ext_store_pblock(ex, 0); 1902 ext4_ext_store_pblock(ex, 0);
1905 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); 1903 le16_add_cpu(&eh->eh_entries, -1);
1906 } 1904 }
1907 1905
1908 ex->ee_block = cpu_to_le32(block); 1906 ex->ee_block = cpu_to_le32(block);
@@ -1979,7 +1977,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
1979 * We start scanning from right side, freeing all the blocks 1977 * We start scanning from right side, freeing all the blocks
1980 * after i_size and walking into the tree depth-wise. 1978 * after i_size and walking into the tree depth-wise.
1981 */ 1979 */
1982 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL); 1980 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
1983 if (path == NULL) { 1981 if (path == NULL) {
1984 ext4_journal_stop(handle); 1982 ext4_journal_stop(handle);
1985 return -ENOMEM; 1983 return -ENOMEM;
@@ -2138,6 +2136,82 @@ void ext4_ext_release(struct super_block *sb)
2138#endif 2136#endif
2139} 2137}
2140 2138
2139static void bi_complete(struct bio *bio, int error)
2140{
2141 complete((struct completion *)bio->bi_private);
2142}
2143
2144/* FIXME!! we need to try to merge to left or right after zero-out */
2145static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
2146{
2147 int ret = -EIO;
2148 struct bio *bio;
2149 int blkbits, blocksize;
2150 sector_t ee_pblock;
2151 struct completion event;
2152 unsigned int ee_len, len, done, offset;
2153
2154
2155 blkbits = inode->i_blkbits;
2156 blocksize = inode->i_sb->s_blocksize;
2157 ee_len = ext4_ext_get_actual_len(ex);
2158 ee_pblock = ext_pblock(ex);
2159
2160 /* convert ee_pblock to 512 byte sectors */
2161 ee_pblock = ee_pblock << (blkbits - 9);
2162
2163 while (ee_len > 0) {
2164
2165 if (ee_len > BIO_MAX_PAGES)
2166 len = BIO_MAX_PAGES;
2167 else
2168 len = ee_len;
2169
2170 bio = bio_alloc(GFP_NOIO, len);
2171 if (!bio)
2172 return -ENOMEM;
2173 bio->bi_sector = ee_pblock;
2174 bio->bi_bdev = inode->i_sb->s_bdev;
2175
2176 done = 0;
2177 offset = 0;
2178 while (done < len) {
2179 ret = bio_add_page(bio, ZERO_PAGE(0),
2180 blocksize, offset);
2181 if (ret != blocksize) {
2182 /*
2183 * We can't add any more pages because of
2184 * hardware limitations. Start a new bio.
2185 */
2186 break;
2187 }
2188 done++;
2189 offset += blocksize;
2190 if (offset >= PAGE_CACHE_SIZE)
2191 offset = 0;
2192 }
2193
2194 init_completion(&event);
2195 bio->bi_private = &event;
2196 bio->bi_end_io = bi_complete;
2197 submit_bio(WRITE, bio);
2198 wait_for_completion(&event);
2199
2200 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
2201 ret = 0;
2202 else {
2203 ret = -EIO;
2204 break;
2205 }
2206 bio_put(bio);
2207 ee_len -= done;
2208 ee_pblock += done << (blkbits - 9);
2209 }
2210 return ret;
2211}
2212
2213#define EXT4_EXT_ZERO_LEN 7
2214
2141/* 2215/*
2142 * This function is called by ext4_ext_get_blocks() if someone tries to write 2216 * This function is called by ext4_ext_get_blocks() if someone tries to write
2143 * to an uninitialized extent. It may result in splitting the uninitialized 2217 * to an uninitialized extent. It may result in splitting the uninitialized
@@ -2154,7 +2228,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2154 ext4_lblk_t iblock, 2228 ext4_lblk_t iblock,
2155 unsigned long max_blocks) 2229 unsigned long max_blocks)
2156{ 2230{
2157 struct ext4_extent *ex, newex; 2231 struct ext4_extent *ex, newex, orig_ex;
2158 struct ext4_extent *ex1 = NULL; 2232 struct ext4_extent *ex1 = NULL;
2159 struct ext4_extent *ex2 = NULL; 2233 struct ext4_extent *ex2 = NULL;
2160 struct ext4_extent *ex3 = NULL; 2234 struct ext4_extent *ex3 = NULL;
@@ -2173,10 +2247,26 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2173 allocated = ee_len - (iblock - ee_block); 2247 allocated = ee_len - (iblock - ee_block);
2174 newblock = iblock - ee_block + ext_pblock(ex); 2248 newblock = iblock - ee_block + ext_pblock(ex);
2175 ex2 = ex; 2249 ex2 = ex;
2250 orig_ex.ee_block = ex->ee_block;
2251 orig_ex.ee_len = cpu_to_le16(ee_len);
2252 ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
2176 2253
2177 err = ext4_ext_get_access(handle, inode, path + depth); 2254 err = ext4_ext_get_access(handle, inode, path + depth);
2178 if (err) 2255 if (err)
2179 goto out; 2256 goto out;
2257 /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
2258 if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
2259 err = ext4_ext_zeroout(inode, &orig_ex);
2260 if (err)
2261 goto fix_extent_len;
2262 /* update the extent length and mark as initialized */
2263 ex->ee_block = orig_ex.ee_block;
2264 ex->ee_len = orig_ex.ee_len;
2265 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2266 ext4_ext_dirty(handle, inode, path + depth);
2267 /* zeroed the full extent */
2268 return allocated;
2269 }
2180 2270
2181 /* ex1: ee_block to iblock - 1 : uninitialized */ 2271 /* ex1: ee_block to iblock - 1 : uninitialized */
2182 if (iblock > ee_block) { 2272 if (iblock > ee_block) {
@@ -2195,19 +2285,103 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2195 /* ex3: to ee_block + ee_len : uninitialised */ 2285 /* ex3: to ee_block + ee_len : uninitialised */
2196 if (allocated > max_blocks) { 2286 if (allocated > max_blocks) {
2197 unsigned int newdepth; 2287 unsigned int newdepth;
2288 /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
2289 if (allocated <= EXT4_EXT_ZERO_LEN) {
2290 /* Mark first half uninitialized.
2291 * Mark second half initialized and zero out the
2292 * initialized extent
2293 */
2294 ex->ee_block = orig_ex.ee_block;
2295 ex->ee_len = cpu_to_le16(ee_len - allocated);
2296 ext4_ext_mark_uninitialized(ex);
2297 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2298 ext4_ext_dirty(handle, inode, path + depth);
2299
2300 ex3 = &newex;
2301 ex3->ee_block = cpu_to_le32(iblock);
2302 ext4_ext_store_pblock(ex3, newblock);
2303 ex3->ee_len = cpu_to_le16(allocated);
2304 err = ext4_ext_insert_extent(handle, inode, path, ex3);
2305 if (err == -ENOSPC) {
2306 err = ext4_ext_zeroout(inode, &orig_ex);
2307 if (err)
2308 goto fix_extent_len;
2309 ex->ee_block = orig_ex.ee_block;
2310 ex->ee_len = orig_ex.ee_len;
2311 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2312 ext4_ext_dirty(handle, inode, path + depth);
2313 /* zeroed the full extent */
2314 return allocated;
2315
2316 } else if (err)
2317 goto fix_extent_len;
2318
2319 /*
2320 * We need to zero out the second half because
2321 * an fallocate request can update file size and
2322 * converting the second half to initialized extent
2323 * implies that we can leak some junk data to user
2324 * space.
2325 */
2326 err = ext4_ext_zeroout(inode, ex3);
2327 if (err) {
2328 /*
2329 * We should actually mark the
2330 * second half as uninit and return error
2331 * Insert would have changed the extent
2332 */
2333 depth = ext_depth(inode);
2334 ext4_ext_drop_refs(path);
2335 path = ext4_ext_find_extent(inode,
2336 iblock, path);
2337 if (IS_ERR(path)) {
2338 err = PTR_ERR(path);
2339 return err;
2340 }
2341 ex = path[depth].p_ext;
2342 err = ext4_ext_get_access(handle, inode,
2343 path + depth);
2344 if (err)
2345 return err;
2346 ext4_ext_mark_uninitialized(ex);
2347 ext4_ext_dirty(handle, inode, path + depth);
2348 return err;
2349 }
2350
2351 /* zeroed the second half */
2352 return allocated;
2353 }
2198 ex3 = &newex; 2354 ex3 = &newex;
2199 ex3->ee_block = cpu_to_le32(iblock + max_blocks); 2355 ex3->ee_block = cpu_to_le32(iblock + max_blocks);
2200 ext4_ext_store_pblock(ex3, newblock + max_blocks); 2356 ext4_ext_store_pblock(ex3, newblock + max_blocks);
2201 ex3->ee_len = cpu_to_le16(allocated - max_blocks); 2357 ex3->ee_len = cpu_to_le16(allocated - max_blocks);
2202 ext4_ext_mark_uninitialized(ex3); 2358 ext4_ext_mark_uninitialized(ex3);
2203 err = ext4_ext_insert_extent(handle, inode, path, ex3); 2359 err = ext4_ext_insert_extent(handle, inode, path, ex3);
2204 if (err) 2360 if (err == -ENOSPC) {
2205 goto out; 2361 err = ext4_ext_zeroout(inode, &orig_ex);
2362 if (err)
2363 goto fix_extent_len;
2364 /* update the extent length and mark as initialized */
2365 ex->ee_block = orig_ex.ee_block;
2366 ex->ee_len = orig_ex.ee_len;
2367 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2368 ext4_ext_dirty(handle, inode, path + depth);
2369 /* zeroed the full extent */
2370 return allocated;
2371
2372 } else if (err)
2373 goto fix_extent_len;
2206 /* 2374 /*
2207 * The depth, and hence eh & ex might change 2375 * The depth, and hence eh & ex might change
2208 * as part of the insert above. 2376 * as part of the insert above.
2209 */ 2377 */
2210 newdepth = ext_depth(inode); 2378 newdepth = ext_depth(inode);
2379 /*
2380 * update the extent length after successfull insert of the
2381 * split extent
2382 */
2383 orig_ex.ee_len = cpu_to_le16(ee_len -
2384 ext4_ext_get_actual_len(ex3));
2211 if (newdepth != depth) { 2385 if (newdepth != depth) {
2212 depth = newdepth; 2386 depth = newdepth;
2213 ext4_ext_drop_refs(path); 2387 ext4_ext_drop_refs(path);
@@ -2226,6 +2400,24 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2226 goto out; 2400 goto out;
2227 } 2401 }
2228 allocated = max_blocks; 2402 allocated = max_blocks;
2403
2404 /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
2405 * to insert a extent in the middle zerout directly
2406 * otherwise give the extent a chance to merge to left
2407 */
2408 if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
2409 iblock != ee_block) {
2410 err = ext4_ext_zeroout(inode, &orig_ex);
2411 if (err)
2412 goto fix_extent_len;
2413 /* update the extent length and mark as initialized */
2414 ex->ee_block = orig_ex.ee_block;
2415 ex->ee_len = orig_ex.ee_len;
2416 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2417 ext4_ext_dirty(handle, inode, path + depth);
2418 /* zero out the first half */
2419 return allocated;
2420 }
2229 } 2421 }
2230 /* 2422 /*
2231 * If there was a change of depth as part of the 2423 * If there was a change of depth as part of the
@@ -2282,8 +2474,29 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2282 goto out; 2474 goto out;
2283insert: 2475insert:
2284 err = ext4_ext_insert_extent(handle, inode, path, &newex); 2476 err = ext4_ext_insert_extent(handle, inode, path, &newex);
2477 if (err == -ENOSPC) {
2478 err = ext4_ext_zeroout(inode, &orig_ex);
2479 if (err)
2480 goto fix_extent_len;
2481 /* update the extent length and mark as initialized */
2482 ex->ee_block = orig_ex.ee_block;
2483 ex->ee_len = orig_ex.ee_len;
2484 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2485 ext4_ext_dirty(handle, inode, path + depth);
2486 /* zero out the first half */
2487 return allocated;
2488 } else if (err)
2489 goto fix_extent_len;
2285out: 2490out:
2286 return err ? err : allocated; 2491 return err ? err : allocated;
2492
2493fix_extent_len:
2494 ex->ee_block = orig_ex.ee_block;
2495 ex->ee_len = orig_ex.ee_len;
2496 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2497 ext4_ext_mark_uninitialized(ex);
2498 ext4_ext_dirty(handle, inode, path + depth);
2499 return err;
2287} 2500}
2288 2501
2289/* 2502/*
@@ -2393,8 +2606,20 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2393 } 2606 }
2394 if (create == EXT4_CREATE_UNINITIALIZED_EXT) 2607 if (create == EXT4_CREATE_UNINITIALIZED_EXT)
2395 goto out; 2608 goto out;
2396 if (!create) 2609 if (!create) {
2610 /*
2611 * We have blocks reserved already. We
2612 * return allocated blocks so that delalloc
2613 * won't do block reservation for us. But
2614 * the buffer head will be unmapped so that
2615 * a read from the block returns 0s.
2616 */
2617 if (allocated > max_blocks)
2618 allocated = max_blocks;
2619 /* mark the buffer unwritten */
2620 __set_bit(BH_Unwritten, &bh_result->b_state);
2397 goto out2; 2621 goto out2;
2622 }
2398 2623
2399 ret = ext4_ext_convert_to_initialized(handle, inode, 2624 ret = ext4_ext_convert_to_initialized(handle, inode,
2400 path, iblock, 2625 path, iblock,
@@ -2584,6 +2809,8 @@ out_stop:
2584 ext4_orphan_del(handle, inode); 2809 ext4_orphan_del(handle, inode);
2585 2810
2586 up_write(&EXT4_I(inode)->i_data_sem); 2811 up_write(&EXT4_I(inode)->i_data_sem);
2812 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
2813 ext4_mark_inode_dirty(handle, inode);
2587 ext4_journal_stop(handle); 2814 ext4_journal_stop(handle);
2588} 2815}
2589 2816
@@ -2608,6 +2835,28 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
2608 return needed; 2835 return needed;
2609} 2836}
2610 2837
2838static void ext4_falloc_update_inode(struct inode *inode,
2839 int mode, loff_t new_size, int update_ctime)
2840{
2841 struct timespec now;
2842
2843 if (update_ctime) {
2844 now = current_fs_time(inode->i_sb);
2845 if (!timespec_equal(&inode->i_ctime, &now))
2846 inode->i_ctime = now;
2847 }
2848 /*
2849 * Update only when preallocation was requested beyond
2850 * the file size.
2851 */
2852 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
2853 new_size > i_size_read(inode)) {
2854 i_size_write(inode, new_size);
2855 EXT4_I(inode)->i_disksize = new_size;
2856 }
2857
2858}
2859
2611/* 2860/*
2612 * preallocate space for a file. This implements ext4's fallocate inode 2861 * preallocate space for a file. This implements ext4's fallocate inode
2613 * operation, which gets called from sys_fallocate system call. 2862 * operation, which gets called from sys_fallocate system call.
@@ -2619,8 +2868,8 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
2619{ 2868{
2620 handle_t *handle; 2869 handle_t *handle;
2621 ext4_lblk_t block; 2870 ext4_lblk_t block;
2871 loff_t new_size;
2622 unsigned long max_blocks; 2872 unsigned long max_blocks;
2623 ext4_fsblk_t nblocks = 0;
2624 int ret = 0; 2873 int ret = 0;
2625 int ret2 = 0; 2874 int ret2 = 0;
2626 int retries = 0; 2875 int retries = 0;
@@ -2639,9 +2888,12 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
2639 return -ENODEV; 2888 return -ENODEV;
2640 2889
2641 block = offset >> blkbits; 2890 block = offset >> blkbits;
2891 /*
2892 * We can't just convert len to max_blocks because
2893 * If blocksize = 4096 offset = 3072 and len = 2048
2894 */
2642 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) 2895 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
2643 - block; 2896 - block;
2644
2645 /* 2897 /*
2646 * credits to insert 1 extent into extent tree + buffers to be able to 2898 * credits to insert 1 extent into extent tree + buffers to be able to
2647 * modify 1 super block, 1 block bitmap and 1 group descriptor. 2899 * modify 1 super block, 1 block bitmap and 1 group descriptor.
@@ -2657,7 +2909,6 @@ retry:
2657 ret = PTR_ERR(handle); 2909 ret = PTR_ERR(handle);
2658 break; 2910 break;
2659 } 2911 }
2660
2661 ret = ext4_get_blocks_wrap(handle, inode, block, 2912 ret = ext4_get_blocks_wrap(handle, inode, block,
2662 max_blocks, &map_bh, 2913 max_blocks, &map_bh,
2663 EXT4_CREATE_UNINITIALIZED_EXT, 0); 2914 EXT4_CREATE_UNINITIALIZED_EXT, 0);
@@ -2673,61 +2924,24 @@ retry:
2673 ret2 = ext4_journal_stop(handle); 2924 ret2 = ext4_journal_stop(handle);
2674 break; 2925 break;
2675 } 2926 }
2676 if (ret > 0) { 2927 if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
2677 /* check wrap through sign-bit/zero here */ 2928 blkbits) >> blkbits))
2678 if ((block + ret) < 0 || (block + ret) < block) { 2929 new_size = offset + len;
2679 ret = -EIO; 2930 else
2680 ext4_mark_inode_dirty(handle, inode); 2931 new_size = (block + ret) << blkbits;
2681 ret2 = ext4_journal_stop(handle);
2682 break;
2683 }
2684 if (buffer_new(&map_bh) && ((block + ret) >
2685 (EXT4_BLOCK_ALIGN(i_size_read(inode), blkbits)
2686 >> blkbits)))
2687 nblocks = nblocks + ret;
2688 }
2689
2690 /* Update ctime if new blocks get allocated */
2691 if (nblocks) {
2692 struct timespec now;
2693
2694 now = current_fs_time(inode->i_sb);
2695 if (!timespec_equal(&inode->i_ctime, &now))
2696 inode->i_ctime = now;
2697 }
2698 2932
2933 ext4_falloc_update_inode(inode, mode, new_size,
2934 buffer_new(&map_bh));
2699 ext4_mark_inode_dirty(handle, inode); 2935 ext4_mark_inode_dirty(handle, inode);
2700 ret2 = ext4_journal_stop(handle); 2936 ret2 = ext4_journal_stop(handle);
2701 if (ret2) 2937 if (ret2)
2702 break; 2938 break;
2703 } 2939 }
2704 2940 if (ret == -ENOSPC &&
2705 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 2941 ext4_should_retry_alloc(inode->i_sb, &retries)) {
2942 ret = 0;
2706 goto retry; 2943 goto retry;
2707
2708 /*
2709 * Time to update the file size.
2710 * Update only when preallocation was requested beyond the file size.
2711 */
2712 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
2713 (offset + len) > i_size_read(inode)) {
2714 if (ret > 0) {
2715 /*
2716 * if no error, we assume preallocation succeeded
2717 * completely
2718 */
2719 i_size_write(inode, offset + len);
2720 EXT4_I(inode)->i_disksize = i_size_read(inode);
2721 } else if (ret < 0 && nblocks) {
2722 /* Handle partial allocation scenario */
2723 loff_t newsize;
2724
2725 newsize = (nblocks << blkbits) + i_size_read(inode);
2726 i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits));
2727 EXT4_I(inode)->i_disksize = i_size_read(inode);
2728 }
2729 } 2944 }
2730
2731 mutex_unlock(&inode->i_mutex); 2945 mutex_unlock(&inode->i_mutex);
2732 return ret > 0 ? ret2 : ret; 2946 return ret > 0 ? ret2 : ret;
2733} 2947}
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ac35ec58db55..4159be6366ab 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -21,8 +21,8 @@
21#include <linux/time.h> 21#include <linux/time.h>
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/jbd2.h> 23#include <linux/jbd2.h>
24#include <linux/ext4_fs.h> 24#include "ext4.h"
25#include <linux/ext4_jbd2.h> 25#include "ext4_jbd2.h"
26#include "xattr.h" 26#include "xattr.h"
27#include "acl.h" 27#include "acl.h"
28 28
@@ -129,7 +129,7 @@ const struct file_operations ext4_file_operations = {
129 .write = do_sync_write, 129 .write = do_sync_write,
130 .aio_read = generic_file_aio_read, 130 .aio_read = generic_file_aio_read,
131 .aio_write = ext4_file_write, 131 .aio_write = ext4_file_write,
132 .ioctl = ext4_ioctl, 132 .unlocked_ioctl = ext4_ioctl,
133#ifdef CONFIG_COMPAT 133#ifdef CONFIG_COMPAT
134 .compat_ioctl = ext4_compat_ioctl, 134 .compat_ioctl = ext4_compat_ioctl,
135#endif 135#endif
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 8d50879d1c2c..1c8ba48d4f8d 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -27,8 +27,8 @@
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/jbd2.h> 29#include <linux/jbd2.h>
30#include <linux/ext4_fs.h> 30#include "ext4.h"
31#include <linux/ext4_jbd2.h> 31#include "ext4_jbd2.h"
32 32
33/* 33/*
34 * akpm: A new design for ext4_sync_file(). 34 * akpm: A new design for ext4_sync_file().
@@ -72,6 +72,9 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
72 goto out; 72 goto out;
73 } 73 }
74 74
75 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
76 goto out;
77
75 /* 78 /*
76 * The VFS has written the file data. If the inode is unaltered 79 * The VFS has written the file data. If the inode is unaltered
77 * then we need not start a commit. 80 * then we need not start a commit.
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 1555024e3b36..1d6329dbe390 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -11,8 +11,8 @@
11 11
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include <linux/jbd2.h> 13#include <linux/jbd2.h>
14#include <linux/ext4_fs.h>
15#include <linux/cryptohash.h> 14#include <linux/cryptohash.h>
15#include "ext4.h"
16 16
17#define DELTA 0x9E3779B9 17#define DELTA 0x9E3779B9
18 18
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 486e46a3918d..c6efbab0c801 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -15,8 +15,6 @@
15#include <linux/time.h> 15#include <linux/time.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/jbd2.h> 17#include <linux/jbd2.h>
18#include <linux/ext4_fs.h>
19#include <linux/ext4_jbd2.h>
20#include <linux/stat.h> 18#include <linux/stat.h>
21#include <linux/string.h> 19#include <linux/string.h>
22#include <linux/quotaops.h> 20#include <linux/quotaops.h>
@@ -25,7 +23,8 @@
25#include <linux/bitops.h> 23#include <linux/bitops.h>
26#include <linux/blkdev.h> 24#include <linux/blkdev.h>
27#include <asm/byteorder.h> 25#include <asm/byteorder.h>
28 26#include "ext4.h"
27#include "ext4_jbd2.h"
29#include "xattr.h" 28#include "xattr.h"
30#include "acl.h" 29#include "acl.h"
31#include "group.h" 30#include "group.h"
@@ -75,7 +74,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
75 /* If checksum is bad mark all blocks and inodes use to prevent 74 /* If checksum is bad mark all blocks and inodes use to prevent
76 * allocation, essentially implementing a per-group read-only flag. */ 75 * allocation, essentially implementing a per-group read-only flag. */
77 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 76 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
78 ext4_error(sb, __FUNCTION__, "Checksum bad for group %lu\n", 77 ext4_error(sb, __func__, "Checksum bad for group %lu\n",
79 block_group); 78 block_group);
80 gdp->bg_free_blocks_count = 0; 79 gdp->bg_free_blocks_count = 0;
81 gdp->bg_free_inodes_count = 0; 80 gdp->bg_free_inodes_count = 0;
@@ -223,11 +222,9 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
223 222
224 if (gdp) { 223 if (gdp) {
225 spin_lock(sb_bgl_lock(sbi, block_group)); 224 spin_lock(sb_bgl_lock(sbi, block_group));
226 gdp->bg_free_inodes_count = cpu_to_le16( 225 le16_add_cpu(&gdp->bg_free_inodes_count, 1);
227 le16_to_cpu(gdp->bg_free_inodes_count) + 1);
228 if (is_directory) 226 if (is_directory)
229 gdp->bg_used_dirs_count = cpu_to_le16( 227 le16_add_cpu(&gdp->bg_used_dirs_count, -1);
230 le16_to_cpu(gdp->bg_used_dirs_count) - 1);
231 gdp->bg_checksum = ext4_group_desc_csum(sbi, 228 gdp->bg_checksum = ext4_group_desc_csum(sbi,
232 block_group, gdp); 229 block_group, gdp);
233 spin_unlock(sb_bgl_lock(sbi, block_group)); 230 spin_unlock(sb_bgl_lock(sbi, block_group));
@@ -588,7 +585,7 @@ got:
588 ino++; 585 ino++;
589 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || 586 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
590 ino > EXT4_INODES_PER_GROUP(sb)) { 587 ino > EXT4_INODES_PER_GROUP(sb)) {
591 ext4_error(sb, __FUNCTION__, 588 ext4_error(sb, __func__,
592 "reserved inode or inode > inodes count - " 589 "reserved inode or inode > inodes count - "
593 "block_group = %lu, inode=%lu", group, 590 "block_group = %lu, inode=%lu", group,
594 ino + group * EXT4_INODES_PER_GROUP(sb)); 591 ino + group * EXT4_INODES_PER_GROUP(sb));
@@ -664,11 +661,9 @@ got:
664 cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino); 661 cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
665 } 662 }
666 663
667 gdp->bg_free_inodes_count = 664 le16_add_cpu(&gdp->bg_free_inodes_count, -1);
668 cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
669 if (S_ISDIR(mode)) { 665 if (S_ISDIR(mode)) {
670 gdp->bg_used_dirs_count = 666 le16_add_cpu(&gdp->bg_used_dirs_count, 1);
671 cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
672 } 667 }
673 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 668 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
674 spin_unlock(sb_bgl_lock(sbi, group)); 669 spin_unlock(sb_bgl_lock(sbi, group));
@@ -744,23 +739,24 @@ got:
744 if (err) 739 if (err)
745 goto fail_free_drop; 740 goto fail_free_drop;
746 741
747 err = ext4_mark_inode_dirty(handle, inode);
748 if (err) {
749 ext4_std_error(sb, err);
750 goto fail_free_drop;
751 }
752 if (test_opt(sb, EXTENTS)) { 742 if (test_opt(sb, EXTENTS)) {
753 /* set extent flag only for directory and file */ 743 /* set extent flag only for diretory, file and normal symlink*/
754 if (S_ISDIR(mode) || S_ISREG(mode)) { 744 if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
755 EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; 745 EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
756 ext4_ext_tree_init(handle, inode); 746 ext4_ext_tree_init(handle, inode);
757 err = ext4_update_incompat_feature(handle, sb, 747 err = ext4_update_incompat_feature(handle, sb,
758 EXT4_FEATURE_INCOMPAT_EXTENTS); 748 EXT4_FEATURE_INCOMPAT_EXTENTS);
759 if (err) 749 if (err)
760 goto fail; 750 goto fail_free_drop;
761 } 751 }
762 } 752 }
763 753
754 err = ext4_mark_inode_dirty(handle, inode);
755 if (err) {
756 ext4_std_error(sb, err);
757 goto fail_free_drop;
758 }
759
764 ext4_debug("allocating inode %lu\n", inode->i_ino); 760 ext4_debug("allocating inode %lu\n", inode->i_ino);
765 goto really_out; 761 goto really_out;
766fail: 762fail:
@@ -796,7 +792,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
796 792
797 /* Error cases - e2fsck has already cleaned up for us */ 793 /* Error cases - e2fsck has already cleaned up for us */
798 if (ino > max_ino) { 794 if (ino > max_ino) {
799 ext4_warning(sb, __FUNCTION__, 795 ext4_warning(sb, __func__,
800 "bad orphan ino %lu! e2fsck was run?", ino); 796 "bad orphan ino %lu! e2fsck was run?", ino);
801 goto error; 797 goto error;
802 } 798 }
@@ -805,7 +801,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
805 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); 801 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
806 bitmap_bh = read_inode_bitmap(sb, block_group); 802 bitmap_bh = read_inode_bitmap(sb, block_group);
807 if (!bitmap_bh) { 803 if (!bitmap_bh) {
808 ext4_warning(sb, __FUNCTION__, 804 ext4_warning(sb, __func__,
809 "inode bitmap error for orphan %lu", ino); 805 "inode bitmap error for orphan %lu", ino);
810 goto error; 806 goto error;
811 } 807 }
@@ -830,7 +826,7 @@ iget_failed:
830 err = PTR_ERR(inode); 826 err = PTR_ERR(inode);
831 inode = NULL; 827 inode = NULL;
832bad_orphan: 828bad_orphan:
833 ext4_warning(sb, __FUNCTION__, 829 ext4_warning(sb, __func__,
834 "bad orphan inode %lu! e2fsck was run?", ino); 830 "bad orphan inode %lu! e2fsck was run?", ino);
835 printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", 831 printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
836 bit, (unsigned long long)bitmap_bh->b_blocknr, 832 bit, (unsigned long long)bitmap_bh->b_blocknr,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8fab233cb05f..8d9707746413 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -25,7 +25,6 @@
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/time.h> 27#include <linux/time.h>
28#include <linux/ext4_jbd2.h>
29#include <linux/jbd2.h> 28#include <linux/jbd2.h>
30#include <linux/highuid.h> 29#include <linux/highuid.h>
31#include <linux/pagemap.h> 30#include <linux/pagemap.h>
@@ -36,6 +35,7 @@
36#include <linux/mpage.h> 35#include <linux/mpage.h>
37#include <linux/uio.h> 36#include <linux/uio.h>
38#include <linux/bio.h> 37#include <linux/bio.h>
38#include "ext4_jbd2.h"
39#include "xattr.h" 39#include "xattr.h"
40#include "acl.h" 40#include "acl.h"
41 41
@@ -93,7 +93,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
93 BUFFER_TRACE(bh, "call ext4_journal_revoke"); 93 BUFFER_TRACE(bh, "call ext4_journal_revoke");
94 err = ext4_journal_revoke(handle, blocknr, bh); 94 err = ext4_journal_revoke(handle, blocknr, bh);
95 if (err) 95 if (err)
96 ext4_abort(inode->i_sb, __FUNCTION__, 96 ext4_abort(inode->i_sb, __func__,
97 "error %d when attempting revoke", err); 97 "error %d when attempting revoke", err);
98 BUFFER_TRACE(bh, "exit"); 98 BUFFER_TRACE(bh, "exit");
99 return err; 99 return err;
@@ -985,6 +985,16 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
985 } else { 985 } else {
986 retval = ext4_get_blocks_handle(handle, inode, block, 986 retval = ext4_get_blocks_handle(handle, inode, block,
987 max_blocks, bh, create, extend_disksize); 987 max_blocks, bh, create, extend_disksize);
988
989 if (retval > 0 && buffer_new(bh)) {
990 /*
991 * We allocated new blocks which will result in
992 * i_data's format changing. Force the migrate
993 * to fail by clearing migrate flags
994 */
995 EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
996 ~EXT4_EXT_MIGRATE;
997 }
988 } 998 }
989 up_write((&EXT4_I(inode)->i_data_sem)); 999 up_write((&EXT4_I(inode)->i_data_sem));
990 return retval; 1000 return retval;
@@ -1230,7 +1240,7 @@ int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1230{ 1240{
1231 int err = jbd2_journal_dirty_data(handle, bh); 1241 int err = jbd2_journal_dirty_data(handle, bh);
1232 if (err) 1242 if (err)
1233 ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__, 1243 ext4_journal_abort_handle(__func__, __func__,
1234 bh, handle, err); 1244 bh, handle, err);
1235 return err; 1245 return err;
1236} 1246}
@@ -1301,10 +1311,11 @@ static int ext4_ordered_write_end(struct file *file,
1301 new_i_size = pos + copied; 1311 new_i_size = pos + copied;
1302 if (new_i_size > EXT4_I(inode)->i_disksize) 1312 if (new_i_size > EXT4_I(inode)->i_disksize)
1303 EXT4_I(inode)->i_disksize = new_i_size; 1313 EXT4_I(inode)->i_disksize = new_i_size;
1304 copied = ext4_generic_write_end(file, mapping, pos, len, copied, 1314 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1305 page, fsdata); 1315 page, fsdata);
1306 if (copied < 0) 1316 copied = ret2;
1307 ret = copied; 1317 if (ret2 < 0)
1318 ret = ret2;
1308 } 1319 }
1309 ret2 = ext4_journal_stop(handle); 1320 ret2 = ext4_journal_stop(handle);
1310 if (!ret) 1321 if (!ret)
@@ -1329,10 +1340,11 @@ static int ext4_writeback_write_end(struct file *file,
1329 if (new_i_size > EXT4_I(inode)->i_disksize) 1340 if (new_i_size > EXT4_I(inode)->i_disksize)
1330 EXT4_I(inode)->i_disksize = new_i_size; 1341 EXT4_I(inode)->i_disksize = new_i_size;
1331 1342
1332 copied = ext4_generic_write_end(file, mapping, pos, len, copied, 1343 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1333 page, fsdata); 1344 page, fsdata);
1334 if (copied < 0) 1345 copied = ret2;
1335 ret = copied; 1346 if (ret2 < 0)
1347 ret = ret2;
1336 1348
1337 ret2 = ext4_journal_stop(handle); 1349 ret2 = ext4_journal_stop(handle);
1338 if (!ret) 1350 if (!ret)
@@ -2501,12 +2513,10 @@ out_stop:
2501static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, 2513static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
2502 unsigned long ino, struct ext4_iloc *iloc) 2514 unsigned long ino, struct ext4_iloc *iloc)
2503{ 2515{
2504 unsigned long desc, group_desc;
2505 ext4_group_t block_group; 2516 ext4_group_t block_group;
2506 unsigned long offset; 2517 unsigned long offset;
2507 ext4_fsblk_t block; 2518 ext4_fsblk_t block;
2508 struct buffer_head *bh; 2519 struct ext4_group_desc *gdp;
2509 struct ext4_group_desc * gdp;
2510 2520
2511 if (!ext4_valid_inum(sb, ino)) { 2521 if (!ext4_valid_inum(sb, ino)) {
2512 /* 2522 /*
@@ -2518,22 +2528,10 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
2518 } 2528 }
2519 2529
2520 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 2530 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
2521 if (block_group >= EXT4_SB(sb)->s_groups_count) { 2531 gdp = ext4_get_group_desc(sb, block_group, NULL);
2522 ext4_error(sb,"ext4_get_inode_block","group >= groups count"); 2532 if (!gdp)
2523 return 0; 2533 return 0;
2524 }
2525 smp_rmb();
2526 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
2527 desc = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2528 bh = EXT4_SB(sb)->s_group_desc[group_desc];
2529 if (!bh) {
2530 ext4_error (sb, "ext4_get_inode_block",
2531 "Descriptor not loaded");
2532 return 0;
2533 }
2534 2534
2535 gdp = (struct ext4_group_desc *)((__u8 *)bh->b_data +
2536 desc * EXT4_DESC_SIZE(sb));
2537 /* 2535 /*
2538 * Figure out the offset within the block group inode table 2536 * Figure out the offset within the block group inode table
2539 */ 2537 */
@@ -2976,7 +2974,8 @@ static int ext4_do_update_inode(handle_t *handle,
2976 if (ext4_inode_blocks_set(handle, raw_inode, ei)) 2974 if (ext4_inode_blocks_set(handle, raw_inode, ei))
2977 goto out_brelse; 2975 goto out_brelse;
2978 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); 2976 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
2979 raw_inode->i_flags = cpu_to_le32(ei->i_flags); 2977 /* clear the migrate flag in the raw_inode */
2978 raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE);
2980 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 2979 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
2981 cpu_to_le32(EXT4_OS_HURD)) 2980 cpu_to_le32(EXT4_OS_HURD))
2982 raw_inode->i_file_acl_high = 2981 raw_inode->i_file_acl_high =
@@ -3374,7 +3373,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
3374 EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; 3373 EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
3375 if (mnt_count != 3374 if (mnt_count !=
3376 le16_to_cpu(sbi->s_es->s_mnt_count)) { 3375 le16_to_cpu(sbi->s_es->s_mnt_count)) {
3377 ext4_warning(inode->i_sb, __FUNCTION__, 3376 ext4_warning(inode->i_sb, __func__,
3378 "Unable to expand inode %lu. Delete" 3377 "Unable to expand inode %lu. Delete"
3379 " some EAs or run e2fsck.", 3378 " some EAs or run e2fsck.",
3380 inode->i_ino); 3379 inode->i_ino);
@@ -3415,7 +3414,7 @@ void ext4_dirty_inode(struct inode *inode)
3415 current_handle->h_transaction != handle->h_transaction) { 3414 current_handle->h_transaction != handle->h_transaction) {
3416 /* This task has a transaction open against a different fs */ 3415 /* This task has a transaction open against a different fs */
3417 printk(KERN_EMERG "%s: transactions do not match!\n", 3416 printk(KERN_EMERG "%s: transactions do not match!\n",
3418 __FUNCTION__); 3417 __func__);
3419 } else { 3418 } else {
3420 jbd_debug(5, "marking dirty. outer handle=%p\n", 3419 jbd_debug(5, "marking dirty. outer handle=%p\n",
3421 current_handle); 3420 current_handle);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 25b13ede8086..7a6c2f1faba6 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -10,17 +10,17 @@
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/jbd2.h> 11#include <linux/jbd2.h>
12#include <linux/capability.h> 12#include <linux/capability.h>
13#include <linux/ext4_fs.h>
14#include <linux/ext4_jbd2.h>
15#include <linux/time.h> 13#include <linux/time.h>
16#include <linux/compat.h> 14#include <linux/compat.h>
17#include <linux/smp_lock.h> 15#include <linux/smp_lock.h>
18#include <linux/mount.h> 16#include <linux/mount.h>
19#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18#include "ext4_jbd2.h"
19#include "ext4.h"
20 20
21int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, 21long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
22 unsigned long arg)
23{ 22{
23 struct inode *inode = filp->f_dentry->d_inode;
24 struct ext4_inode_info *ei = EXT4_I(inode); 24 struct ext4_inode_info *ei = EXT4_I(inode);
25 unsigned int flags; 25 unsigned int flags;
26 unsigned short rsv_window_size; 26 unsigned short rsv_window_size;
@@ -277,9 +277,6 @@ setversion_out:
277#ifdef CONFIG_COMPAT 277#ifdef CONFIG_COMPAT
278long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 278long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
279{ 279{
280 struct inode *inode = file->f_path.dentry->d_inode;
281 int ret;
282
283 /* These are just misnamed, they actually get/put from/to user an int */ 280 /* These are just misnamed, they actually get/put from/to user an int */
284 switch (cmd) { 281 switch (cmd) {
285 case EXT4_IOC32_GETFLAGS: 282 case EXT4_IOC32_GETFLAGS:
@@ -319,9 +316,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
319 default: 316 default:
320 return -ENOIOCTLCMD; 317 return -ENOIOCTLCMD;
321 } 318 }
322 lock_kernel(); 319 return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
323 ret = ext4_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
324 unlock_kernel();
325 return ret;
326} 320}
327#endif 321#endif
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index ef97f19c2f9d..873ad9b3418c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -21,21 +21,7 @@
21 * mballoc.c contains the multiblocks allocation routines 21 * mballoc.c contains the multiblocks allocation routines
22 */ 22 */
23 23
24#include <linux/time.h> 24#include "mballoc.h"
25#include <linux/fs.h>
26#include <linux/namei.h>
27#include <linux/ext4_jbd2.h>
28#include <linux/ext4_fs.h>
29#include <linux/quotaops.h>
30#include <linux/buffer_head.h>
31#include <linux/module.h>
32#include <linux/swap.h>
33#include <linux/proc_fs.h>
34#include <linux/pagemap.h>
35#include <linux/seq_file.h>
36#include <linux/version.h>
37#include "group.h"
38
39/* 25/*
40 * MUSTDO: 26 * MUSTDO:
41 * - test ext4_ext_search_left() and ext4_ext_search_right() 27 * - test ext4_ext_search_left() and ext4_ext_search_right()
@@ -345,288 +331,6 @@
345 * 331 *
346 */ 332 */
347 333
348/*
349 * with AGGRESSIVE_CHECK allocator runs consistency checks over
350 * structures. these checks slow things down a lot
351 */
352#define AGGRESSIVE_CHECK__
353
354/*
355 * with DOUBLE_CHECK defined mballoc creates persistent in-core
356 * bitmaps, maintains and uses them to check for double allocations
357 */
358#define DOUBLE_CHECK__
359
360/*
361 */
362#define MB_DEBUG__
363#ifdef MB_DEBUG
364#define mb_debug(fmt, a...) printk(fmt, ##a)
365#else
366#define mb_debug(fmt, a...)
367#endif
368
369/*
370 * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
371 * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
372 */
373#define EXT4_MB_HISTORY
374#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */
375#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */
376#define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */
377#define EXT4_MB_HISTORY_FREE 8 /* free */
378
379#define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \
380 EXT4_MB_HISTORY_PREALLOC)
381
382/*
383 * How long mballoc can look for a best extent (in found extents)
384 */
385#define MB_DEFAULT_MAX_TO_SCAN 200
386
387/*
388 * How long mballoc must look for a best extent
389 */
390#define MB_DEFAULT_MIN_TO_SCAN 10
391
392/*
393 * How many groups mballoc will scan looking for the best chunk
394 */
395#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5
396
397/*
398 * with 'ext4_mb_stats' allocator will collect stats that will be
399 * shown at umount. The collecting costs though!
400 */
401#define MB_DEFAULT_STATS 1
402
403/*
404 * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
405 * by the stream allocator, which purpose is to pack requests
406 * as close each to other as possible to produce smooth I/O traffic
407 * We use locality group prealloc space for stream request.
408 * We can tune the same via /proc/fs/ext4/<parition>/stream_req
409 */
410#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */
411
412/*
413 * for which requests use 2^N search using buddies
414 */
415#define MB_DEFAULT_ORDER2_REQS 2
416
417/*
418 * default group prealloc size 512 blocks
419 */
420#define MB_DEFAULT_GROUP_PREALLOC 512
421
422static struct kmem_cache *ext4_pspace_cachep;
423static struct kmem_cache *ext4_ac_cachep;
424
425#ifdef EXT4_BB_MAX_BLOCKS
426#undef EXT4_BB_MAX_BLOCKS
427#endif
428#define EXT4_BB_MAX_BLOCKS 30
429
430struct ext4_free_metadata {
431 ext4_group_t group;
432 unsigned short num;
433 ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS];
434 struct list_head list;
435};
436
437struct ext4_group_info {
438 unsigned long bb_state;
439 unsigned long bb_tid;
440 struct ext4_free_metadata *bb_md_cur;
441 unsigned short bb_first_free;
442 unsigned short bb_free;
443 unsigned short bb_fragments;
444 struct list_head bb_prealloc_list;
445#ifdef DOUBLE_CHECK
446 void *bb_bitmap;
447#endif
448 unsigned short bb_counters[];
449};
450
451#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
452#define EXT4_GROUP_INFO_LOCKED_BIT 1
453
454#define EXT4_MB_GRP_NEED_INIT(grp) \
455 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
456
457
458struct ext4_prealloc_space {
459 struct list_head pa_inode_list;
460 struct list_head pa_group_list;
461 union {
462 struct list_head pa_tmp_list;
463 struct rcu_head pa_rcu;
464 } u;
465 spinlock_t pa_lock;
466 atomic_t pa_count;
467 unsigned pa_deleted;
468 ext4_fsblk_t pa_pstart; /* phys. block */
469 ext4_lblk_t pa_lstart; /* log. block */
470 unsigned short pa_len; /* len of preallocated chunk */
471 unsigned short pa_free; /* how many blocks are free */
472 unsigned short pa_linear; /* consumed in one direction
473 * strictly, for grp prealloc */
474 spinlock_t *pa_obj_lock;
475 struct inode *pa_inode; /* hack, for history only */
476};
477
478
479struct ext4_free_extent {
480 ext4_lblk_t fe_logical;
481 ext4_grpblk_t fe_start;
482 ext4_group_t fe_group;
483 int fe_len;
484};
485
486/*
487 * Locality group:
488 * we try to group all related changes together
489 * so that writeback can flush/allocate them together as well
490 */
491struct ext4_locality_group {
492 /* for allocator */
493 struct mutex lg_mutex; /* to serialize allocates */
494 struct list_head lg_prealloc_list;/* list of preallocations */
495 spinlock_t lg_prealloc_lock;
496};
497
498struct ext4_allocation_context {
499 struct inode *ac_inode;
500 struct super_block *ac_sb;
501
502 /* original request */
503 struct ext4_free_extent ac_o_ex;
504
505 /* goal request (after normalization) */
506 struct ext4_free_extent ac_g_ex;
507
508 /* the best found extent */
509 struct ext4_free_extent ac_b_ex;
510
511 /* copy of the bext found extent taken before preallocation efforts */
512 struct ext4_free_extent ac_f_ex;
513
514 /* number of iterations done. we have to track to limit searching */
515 unsigned long ac_ex_scanned;
516 __u16 ac_groups_scanned;
517 __u16 ac_found;
518 __u16 ac_tail;
519 __u16 ac_buddy;
520 __u16 ac_flags; /* allocation hints */
521 __u8 ac_status;
522 __u8 ac_criteria;
523 __u8 ac_repeats;
524 __u8 ac_2order; /* if request is to allocate 2^N blocks and
525 * N > 0, the field stores N, otherwise 0 */
526 __u8 ac_op; /* operation, for history only */
527 struct page *ac_bitmap_page;
528 struct page *ac_buddy_page;
529 struct ext4_prealloc_space *ac_pa;
530 struct ext4_locality_group *ac_lg;
531};
532
533#define AC_STATUS_CONTINUE 1
534#define AC_STATUS_FOUND 2
535#define AC_STATUS_BREAK 3
536
537struct ext4_mb_history {
538 struct ext4_free_extent orig; /* orig allocation */
539 struct ext4_free_extent goal; /* goal allocation */
540 struct ext4_free_extent result; /* result allocation */
541 unsigned pid;
542 unsigned ino;
543 __u16 found; /* how many extents have been found */
544 __u16 groups; /* how many groups have been scanned */
545 __u16 tail; /* what tail broke some buddy */
546 __u16 buddy; /* buddy the tail ^^^ broke */
547 __u16 flags;
548 __u8 cr:3; /* which phase the result extent was found at */
549 __u8 op:4;
550 __u8 merged:1;
551};
552
553struct ext4_buddy {
554 struct page *bd_buddy_page;
555 void *bd_buddy;
556 struct page *bd_bitmap_page;
557 void *bd_bitmap;
558 struct ext4_group_info *bd_info;
559 struct super_block *bd_sb;
560 __u16 bd_blkbits;
561 ext4_group_t bd_group;
562};
563#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
564#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
565
566#ifndef EXT4_MB_HISTORY
567static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
568{
569 return;
570}
571#else
572static void ext4_mb_store_history(struct ext4_allocation_context *ac);
573#endif
574
575#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
576
577static struct proc_dir_entry *proc_root_ext4;
578struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
579ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
580 ext4_fsblk_t goal, unsigned long *count, int *errp);
581
582static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
583 ext4_group_t group);
584static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
585static void ext4_mb_free_committed_blocks(struct super_block *);
586static void ext4_mb_return_to_preallocation(struct inode *inode,
587 struct ext4_buddy *e4b, sector_t block,
588 int count);
589static void ext4_mb_put_pa(struct ext4_allocation_context *,
590 struct super_block *, struct ext4_prealloc_space *pa);
591static int ext4_mb_init_per_dev_proc(struct super_block *sb);
592static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
593
594
595static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
596{
597 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
598
599 bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
600}
601
602static inline void ext4_unlock_group(struct super_block *sb,
603 ext4_group_t group)
604{
605 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
606
607 bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
608}
609
610static inline int ext4_is_group_locked(struct super_block *sb,
611 ext4_group_t group)
612{
613 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
614
615 return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
616 &(grinfo->bb_state));
617}
618
619static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
620 struct ext4_free_extent *fex)
621{
622 ext4_fsblk_t block;
623
624 block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
625 + fex->fe_start
626 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
627 return block;
628}
629
630static inline void *mb_correct_addr_and_bit(int *bit, void *addr) 334static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
631{ 335{
632#if BITS_PER_LONG == 64 336#if BITS_PER_LONG == 64
@@ -736,7 +440,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
736 blocknr += 440 blocknr +=
737 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 441 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
738 442
739 ext4_error(sb, __FUNCTION__, "double-free of inode" 443 ext4_error(sb, __func__, "double-free of inode"
740 " %lu's block %llu(bit %u in group %lu)\n", 444 " %lu's block %llu(bit %u in group %lu)\n",
741 inode ? inode->i_ino : 0, blocknr, 445 inode ? inode->i_ino : 0, blocknr,
742 first + i, e4b->bd_group); 446 first + i, e4b->bd_group);
@@ -898,17 +602,17 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
898 list_for_each(cur, &grp->bb_prealloc_list) { 602 list_for_each(cur, &grp->bb_prealloc_list) {
899 ext4_group_t groupnr; 603 ext4_group_t groupnr;
900 struct ext4_prealloc_space *pa; 604 struct ext4_prealloc_space *pa;
901 pa = list_entry(cur, struct ext4_prealloc_space, group_list); 605 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
902 ext4_get_group_no_and_offset(sb, pa->pstart, &groupnr, &k); 606 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
903 MB_CHECK_ASSERT(groupnr == e4b->bd_group); 607 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
904 for (i = 0; i < pa->len; i++) 608 for (i = 0; i < pa->pa_len; i++)
905 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy)); 609 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
906 } 610 }
907 return 0; 611 return 0;
908} 612}
909#undef MB_CHECK_ASSERT 613#undef MB_CHECK_ASSERT
910#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \ 614#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
911 __FILE__, __FUNCTION__, __LINE__) 615 __FILE__, __func__, __LINE__)
912#else 616#else
913#define mb_check_buddy(e4b) 617#define mb_check_buddy(e4b)
914#endif 618#endif
@@ -982,7 +686,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
982 grp->bb_fragments = fragments; 686 grp->bb_fragments = fragments;
983 687
984 if (free != grp->bb_free) { 688 if (free != grp->bb_free) {
985 ext4_error(sb, __FUNCTION__, 689 ext4_error(sb, __func__,
986 "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n", 690 "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
987 group, free, grp->bb_free); 691 group, free, grp->bb_free);
988 /* 692 /*
@@ -1168,8 +872,9 @@ out:
1168 return err; 872 return err;
1169} 873}
1170 874
1171static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, 875static noinline_for_stack int
1172 struct ext4_buddy *e4b) 876ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
877 struct ext4_buddy *e4b)
1173{ 878{
1174 struct ext4_sb_info *sbi = EXT4_SB(sb); 879 struct ext4_sb_info *sbi = EXT4_SB(sb);
1175 struct inode *inode = sbi->s_buddy_cache; 880 struct inode *inode = sbi->s_buddy_cache;
@@ -1367,7 +1072,7 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1367 blocknr += 1072 blocknr +=
1368 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 1073 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
1369 1074
1370 ext4_error(sb, __FUNCTION__, "double-free of inode" 1075 ext4_error(sb, __func__, "double-free of inode"
1371 " %lu's block %llu(bit %u in group %lu)\n", 1076 " %lu's block %llu(bit %u in group %lu)\n",
1372 inode ? inode->i_ino : 0, blocknr, block, 1077 inode ? inode->i_ino : 0, blocknr, block,
1373 e4b->bd_group); 1078 e4b->bd_group);
@@ -1848,7 +1553,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1848 * free blocks even though group info says we 1553 * free blocks even though group info says we
1849 * we have free blocks 1554 * we have free blocks
1850 */ 1555 */
1851 ext4_error(sb, __FUNCTION__, "%d free blocks as per " 1556 ext4_error(sb, __func__, "%d free blocks as per "
1852 "group info. But bitmap says 0\n", 1557 "group info. But bitmap says 0\n",
1853 free); 1558 free);
1854 break; 1559 break;
@@ -1857,7 +1562,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1857 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); 1562 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
1858 BUG_ON(ex.fe_len <= 0); 1563 BUG_ON(ex.fe_len <= 0);
1859 if (free < ex.fe_len) { 1564 if (free < ex.fe_len) {
1860 ext4_error(sb, __FUNCTION__, "%d free blocks as per " 1565 ext4_error(sb, __func__, "%d free blocks as per "
1861 "group info. But got %d blocks\n", 1566 "group info. But got %d blocks\n",
1862 free, ex.fe_len); 1567 free, ex.fe_len);
1863 /* 1568 /*
@@ -1965,7 +1670,8 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1965 return 0; 1670 return 0;
1966} 1671}
1967 1672
1968static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac) 1673static noinline_for_stack int
1674ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1969{ 1675{
1970 ext4_group_t group; 1676 ext4_group_t group;
1971 ext4_group_t i; 1677 ext4_group_t i;
@@ -2449,17 +2155,10 @@ static void ext4_mb_history_init(struct super_block *sb)
2449 int i; 2155 int i;
2450 2156
2451 if (sbi->s_mb_proc != NULL) { 2157 if (sbi->s_mb_proc != NULL) {
2452 struct proc_dir_entry *p; 2158 proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc,
2453 p = create_proc_entry("mb_history", S_IRUGO, sbi->s_mb_proc); 2159 &ext4_mb_seq_history_fops, sb);
2454 if (p) { 2160 proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc,
2455 p->proc_fops = &ext4_mb_seq_history_fops; 2161 &ext4_mb_seq_groups_fops, sb);
2456 p->data = sb;
2457 }
2458 p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc);
2459 if (p) {
2460 p->proc_fops = &ext4_mb_seq_groups_fops;
2461 p->data = sb;
2462 }
2463 } 2162 }
2464 2163
2465 sbi->s_mb_history_max = 1000; 2164 sbi->s_mb_history_max = 1000;
@@ -2472,7 +2171,8 @@ static void ext4_mb_history_init(struct super_block *sb)
2472 /* if we can't allocate history, then we simple won't use it */ 2171 /* if we can't allocate history, then we simple won't use it */
2473} 2172}
2474 2173
2475static void ext4_mb_store_history(struct ext4_allocation_context *ac) 2174static noinline_for_stack void
2175ext4_mb_store_history(struct ext4_allocation_context *ac)
2476{ 2176{
2477 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 2177 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2478 struct ext4_mb_history h; 2178 struct ext4_mb_history h;
@@ -2572,13 +2272,13 @@ static int ext4_mb_init_backend(struct super_block *sb)
2572 meta_group_info[j] = kzalloc(len, GFP_KERNEL); 2272 meta_group_info[j] = kzalloc(len, GFP_KERNEL);
2573 if (meta_group_info[j] == NULL) { 2273 if (meta_group_info[j] == NULL) {
2574 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); 2274 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
2575 i--;
2576 goto err_freebuddy; 2275 goto err_freebuddy;
2577 } 2276 }
2578 desc = ext4_get_group_desc(sb, i, NULL); 2277 desc = ext4_get_group_desc(sb, i, NULL);
2579 if (desc == NULL) { 2278 if (desc == NULL) {
2580 printk(KERN_ERR 2279 printk(KERN_ERR
2581 "EXT4-fs: can't read descriptor %lu\n", i); 2280 "EXT4-fs: can't read descriptor %lu\n", i);
2281 i++;
2582 goto err_freebuddy; 2282 goto err_freebuddy;
2583 } 2283 }
2584 memset(meta_group_info[j], 0, len); 2284 memset(meta_group_info[j], 0, len);
@@ -2618,13 +2318,11 @@ static int ext4_mb_init_backend(struct super_block *sb)
2618 return 0; 2318 return 0;
2619 2319
2620err_freebuddy: 2320err_freebuddy:
2621 while (i >= 0) { 2321 while (i-- > 0)
2622 kfree(ext4_get_group_info(sb, i)); 2322 kfree(ext4_get_group_info(sb, i));
2623 i--;
2624 }
2625 i = num_meta_group_infos; 2323 i = num_meta_group_infos;
2626err_freemeta: 2324err_freemeta:
2627 while (--i >= 0) 2325 while (i-- > 0)
2628 kfree(sbi->s_group_info[i]); 2326 kfree(sbi->s_group_info[i]);
2629 iput(sbi->s_buddy_cache); 2327 iput(sbi->s_buddy_cache);
2630err_freesgi: 2328err_freesgi:
@@ -2808,7 +2506,8 @@ int ext4_mb_release(struct super_block *sb)
2808 return 0; 2506 return 0;
2809} 2507}
2810 2508
2811static void ext4_mb_free_committed_blocks(struct super_block *sb) 2509static noinline_for_stack void
2510ext4_mb_free_committed_blocks(struct super_block *sb)
2812{ 2511{
2813 struct ext4_sb_info *sbi = EXT4_SB(sb); 2512 struct ext4_sb_info *sbi = EXT4_SB(sb);
2814 int err; 2513 int err;
@@ -2867,7 +2566,6 @@ static void ext4_mb_free_committed_blocks(struct super_block *sb)
2867 mb_debug("freed %u blocks in %u structures\n", count, count2); 2566 mb_debug("freed %u blocks in %u structures\n", count, count2);
2868} 2567}
2869 2568
2870#define EXT4_ROOT "ext4"
2871#define EXT4_MB_STATS_NAME "stats" 2569#define EXT4_MB_STATS_NAME "stats"
2872#define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan" 2570#define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan"
2873#define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan" 2571#define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan"
@@ -2941,8 +2639,7 @@ static int ext4_mb_init_per_dev_proc(struct super_block *sb)
2941 struct proc_dir_entry *proc; 2639 struct proc_dir_entry *proc;
2942 char devname[64]; 2640 char devname[64];
2943 2641
2944 snprintf(devname, sizeof(devname) - 1, "%s", 2642 bdevname(sb->s_bdev, devname);
2945 bdevname(sb->s_bdev, devname));
2946 sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4); 2643 sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4);
2947 2644
2948 MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats); 2645 MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats);
@@ -2976,8 +2673,7 @@ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
2976 if (sbi->s_mb_proc == NULL) 2673 if (sbi->s_mb_proc == NULL)
2977 return -EINVAL; 2674 return -EINVAL;
2978 2675
2979 snprintf(devname, sizeof(devname) - 1, "%s", 2676 bdevname(sb->s_bdev, devname);
2980 bdevname(sb->s_bdev, devname));
2981 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); 2677 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
2982 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); 2678 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
2983 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); 2679 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
@@ -3007,9 +2703,9 @@ int __init init_ext4_mballoc(void)
3007 return -ENOMEM; 2703 return -ENOMEM;
3008 } 2704 }
3009#ifdef CONFIG_PROC_FS 2705#ifdef CONFIG_PROC_FS
3010 proc_root_ext4 = proc_mkdir(EXT4_ROOT, proc_root_fs); 2706 proc_root_ext4 = proc_mkdir("fs/ext4", NULL);
3011 if (proc_root_ext4 == NULL) 2707 if (proc_root_ext4 == NULL)
3012 printk(KERN_ERR "EXT4-fs: Unable to create %s\n", EXT4_ROOT); 2708 printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n");
3013#endif 2709#endif
3014 return 0; 2710 return 0;
3015} 2711}
@@ -3020,7 +2716,7 @@ void exit_ext4_mballoc(void)
3020 kmem_cache_destroy(ext4_pspace_cachep); 2716 kmem_cache_destroy(ext4_pspace_cachep);
3021 kmem_cache_destroy(ext4_ac_cachep); 2717 kmem_cache_destroy(ext4_ac_cachep);
3022#ifdef CONFIG_PROC_FS 2718#ifdef CONFIG_PROC_FS
3023 remove_proc_entry(EXT4_ROOT, proc_root_fs); 2719 remove_proc_entry("fs/ext4", NULL);
3024#endif 2720#endif
3025} 2721}
3026 2722
@@ -3029,7 +2725,8 @@ void exit_ext4_mballoc(void)
3029 * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps 2725 * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps
3030 * Returns 0 if success or error code 2726 * Returns 0 if success or error code
3031 */ 2727 */
3032static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, 2728static noinline_for_stack int
2729ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3033 handle_t *handle) 2730 handle_t *handle)
3034{ 2731{
3035 struct buffer_head *bitmap_bh = NULL; 2732 struct buffer_head *bitmap_bh = NULL;
@@ -3039,7 +2736,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3039 struct ext4_sb_info *sbi; 2736 struct ext4_sb_info *sbi;
3040 struct super_block *sb; 2737 struct super_block *sb;
3041 ext4_fsblk_t block; 2738 ext4_fsblk_t block;
3042 int err; 2739 int err, len;
3043 2740
3044 BUG_ON(ac->ac_status != AC_STATUS_FOUND); 2741 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3045 BUG_ON(ac->ac_b_ex.fe_len <= 0); 2742 BUG_ON(ac->ac_b_ex.fe_len <= 0);
@@ -3073,14 +2770,27 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3073 + ac->ac_b_ex.fe_start 2770 + ac->ac_b_ex.fe_start
3074 + le32_to_cpu(es->s_first_data_block); 2771 + le32_to_cpu(es->s_first_data_block);
3075 2772
3076 if (block == ext4_block_bitmap(sb, gdp) || 2773 len = ac->ac_b_ex.fe_len;
3077 block == ext4_inode_bitmap(sb, gdp) || 2774 if (in_range(ext4_block_bitmap(sb, gdp), block, len) ||
3078 in_range(block, ext4_inode_table(sb, gdp), 2775 in_range(ext4_inode_bitmap(sb, gdp), block, len) ||
3079 EXT4_SB(sb)->s_itb_per_group)) { 2776 in_range(block, ext4_inode_table(sb, gdp),
3080 2777 EXT4_SB(sb)->s_itb_per_group) ||
3081 ext4_error(sb, __FUNCTION__, 2778 in_range(block + len - 1, ext4_inode_table(sb, gdp),
2779 EXT4_SB(sb)->s_itb_per_group)) {
2780 ext4_error(sb, __func__,
3082 "Allocating block in system zone - block = %llu", 2781 "Allocating block in system zone - block = %llu",
3083 block); 2782 block);
2783 /* File system mounted not to panic on error
2784 * Fix the bitmap and repeat the block allocation
2785 * We leak some of the blocks here.
2786 */
2787 mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group),
2788 bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2789 ac->ac_b_ex.fe_len);
2790 err = ext4_journal_dirty_metadata(handle, bitmap_bh);
2791 if (!err)
2792 err = -EAGAIN;
2793 goto out_err;
3084 } 2794 }
3085#ifdef AGGRESSIVE_CHECK 2795#ifdef AGGRESSIVE_CHECK
3086 { 2796 {
@@ -3102,9 +2812,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3102 ac->ac_b_ex.fe_group, 2812 ac->ac_b_ex.fe_group,
3103 gdp)); 2813 gdp));
3104 } 2814 }
3105 gdp->bg_free_blocks_count = 2815 le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
3106 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)
3107 - ac->ac_b_ex.fe_len);
3108 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); 2816 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
3109 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); 2817 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
3110 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); 2818 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
@@ -3138,7 +2846,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3138 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; 2846 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
3139 else 2847 else
3140 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; 2848 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
3141 mb_debug("#%u: goal %lu blocks for locality group\n", 2849 mb_debug("#%u: goal %u blocks for locality group\n",
3142 current->pid, ac->ac_g_ex.fe_len); 2850 current->pid, ac->ac_g_ex.fe_len);
3143} 2851}
3144 2852
@@ -3146,15 +2854,16 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3146 * Normalization means making request better in terms of 2854 * Normalization means making request better in terms of
3147 * size and alignment 2855 * size and alignment
3148 */ 2856 */
3149static void ext4_mb_normalize_request(struct ext4_allocation_context *ac, 2857static noinline_for_stack void
2858ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3150 struct ext4_allocation_request *ar) 2859 struct ext4_allocation_request *ar)
3151{ 2860{
3152 int bsbits, max; 2861 int bsbits, max;
3153 ext4_lblk_t end; 2862 ext4_lblk_t end;
3154 struct list_head *cur;
3155 loff_t size, orig_size, start_off; 2863 loff_t size, orig_size, start_off;
3156 ext4_lblk_t start, orig_start; 2864 ext4_lblk_t start, orig_start;
3157 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 2865 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
2866 struct ext4_prealloc_space *pa;
3158 2867
3159 /* do normalize only data requests, metadata requests 2868 /* do normalize only data requests, metadata requests
3160 do not need preallocation */ 2869 do not need preallocation */
@@ -3184,12 +2893,11 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3184 if (size < i_size_read(ac->ac_inode)) 2893 if (size < i_size_read(ac->ac_inode))
3185 size = i_size_read(ac->ac_inode); 2894 size = i_size_read(ac->ac_inode);
3186 2895
3187 /* max available blocks in a free group */ 2896 /* max size of free chunks */
3188 max = EXT4_BLOCKS_PER_GROUP(ac->ac_sb) - 1 - 1 - 2897 max = 2 << bsbits;
3189 EXT4_SB(ac->ac_sb)->s_itb_per_group;
3190 2898
3191#define NRL_CHECK_SIZE(req, size, max,bits) \ 2899#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
3192 (req <= (size) || max <= ((size) >> bits)) 2900 (req <= (size) || max <= (chunk_size))
3193 2901
3194 /* first, try to predict filesize */ 2902 /* first, try to predict filesize */
3195 /* XXX: should this table be tunable? */ 2903 /* XXX: should this table be tunable? */
@@ -3208,16 +2916,16 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3208 size = 512 * 1024; 2916 size = 512 * 1024;
3209 } else if (size <= 1024 * 1024) { 2917 } else if (size <= 1024 * 1024) {
3210 size = 1024 * 1024; 2918 size = 1024 * 1024;
3211 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, bsbits)) { 2919 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
3212 start_off = ((loff_t)ac->ac_o_ex.fe_logical >> 2920 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3213 (20 - bsbits)) << 20; 2921 (21 - bsbits)) << 21;
3214 size = 1024 * 1024; 2922 size = 2 * 1024 * 1024;
3215 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, bsbits)) { 2923 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
3216 start_off = ((loff_t)ac->ac_o_ex.fe_logical >> 2924 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3217 (22 - bsbits)) << 22; 2925 (22 - bsbits)) << 22;
3218 size = 4 * 1024 * 1024; 2926 size = 4 * 1024 * 1024;
3219 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len, 2927 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
3220 (8<<20)>>bsbits, max, bsbits)) { 2928 (8<<20)>>bsbits, max, 8 * 1024)) {
3221 start_off = ((loff_t)ac->ac_o_ex.fe_logical >> 2929 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3222 (23 - bsbits)) << 23; 2930 (23 - bsbits)) << 23;
3223 size = 8 * 1024 * 1024; 2931 size = 8 * 1024 * 1024;
@@ -3240,12 +2948,9 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3240 2948
3241 /* check we don't cross already preallocated blocks */ 2949 /* check we don't cross already preallocated blocks */
3242 rcu_read_lock(); 2950 rcu_read_lock();
3243 list_for_each_rcu(cur, &ei->i_prealloc_list) { 2951 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3244 struct ext4_prealloc_space *pa;
3245 unsigned long pa_end; 2952 unsigned long pa_end;
3246 2953
3247 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3248
3249 if (pa->pa_deleted) 2954 if (pa->pa_deleted)
3250 continue; 2955 continue;
3251 spin_lock(&pa->pa_lock); 2956 spin_lock(&pa->pa_lock);
@@ -3287,10 +2992,8 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3287 2992
3288 /* XXX: extra loop to check we really don't overlap preallocations */ 2993 /* XXX: extra loop to check we really don't overlap preallocations */
3289 rcu_read_lock(); 2994 rcu_read_lock();
3290 list_for_each_rcu(cur, &ei->i_prealloc_list) { 2995 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3291 struct ext4_prealloc_space *pa;
3292 unsigned long pa_end; 2996 unsigned long pa_end;
3293 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3294 spin_lock(&pa->pa_lock); 2997 spin_lock(&pa->pa_lock);
3295 if (pa->pa_deleted == 0) { 2998 if (pa->pa_deleted == 0) {
3296 pa_end = pa->pa_lstart + pa->pa_len; 2999 pa_end = pa->pa_lstart + pa->pa_len;
@@ -3382,7 +3085,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3382 BUG_ON(pa->pa_free < len); 3085 BUG_ON(pa->pa_free < len);
3383 pa->pa_free -= len; 3086 pa->pa_free -= len;
3384 3087
3385 mb_debug("use %llu/%lu from inode pa %p\n", start, len, pa); 3088 mb_debug("use %llu/%u from inode pa %p\n", start, len, pa);
3386} 3089}
3387 3090
3388/* 3091/*
@@ -3412,12 +3115,12 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3412/* 3115/*
3413 * search goal blocks in preallocated space 3116 * search goal blocks in preallocated space
3414 */ 3117 */
3415static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac) 3118static noinline_for_stack int
3119ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3416{ 3120{
3417 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 3121 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3418 struct ext4_locality_group *lg; 3122 struct ext4_locality_group *lg;
3419 struct ext4_prealloc_space *pa; 3123 struct ext4_prealloc_space *pa;
3420 struct list_head *cur;
3421 3124
3422 /* only data can be preallocated */ 3125 /* only data can be preallocated */
3423 if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) 3126 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3425,8 +3128,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3425 3128
3426 /* first, try per-file preallocation */ 3129 /* first, try per-file preallocation */
3427 rcu_read_lock(); 3130 rcu_read_lock();
3428 list_for_each_rcu(cur, &ei->i_prealloc_list) { 3131 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3429 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3430 3132
3431 /* all fields in this condition don't change, 3133 /* all fields in this condition don't change,
3432 * so we can skip locking for them */ 3134 * so we can skip locking for them */
@@ -3458,8 +3160,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3458 return 0; 3160 return 0;
3459 3161
3460 rcu_read_lock(); 3162 rcu_read_lock();
3461 list_for_each_rcu(cur, &lg->lg_prealloc_list) { 3163 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) {
3462 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3463 spin_lock(&pa->pa_lock); 3164 spin_lock(&pa->pa_lock);
3464 if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) { 3165 if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) {
3465 atomic_inc(&pa->pa_count); 3166 atomic_inc(&pa->pa_count);
@@ -3579,7 +3280,8 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3579/* 3280/*
3580 * creates new preallocated space for given inode 3281 * creates new preallocated space for given inode
3581 */ 3282 */
3582static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) 3283static noinline_for_stack int
3284ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3583{ 3285{
3584 struct super_block *sb = ac->ac_sb; 3286 struct super_block *sb = ac->ac_sb;
3585 struct ext4_prealloc_space *pa; 3287 struct ext4_prealloc_space *pa;
@@ -3666,7 +3368,8 @@ static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3666/* 3368/*
3667 * creates new preallocated space for locality group inodes belongs to 3369 * creates new preallocated space for locality group inodes belongs to
3668 */ 3370 */
3669static int ext4_mb_new_group_pa(struct ext4_allocation_context *ac) 3371static noinline_for_stack int
3372ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3670{ 3373{
3671 struct super_block *sb = ac->ac_sb; 3374 struct super_block *sb = ac->ac_sb;
3672 struct ext4_locality_group *lg; 3375 struct ext4_locality_group *lg;
@@ -3739,11 +3442,11 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3739 * the caller MUST hold group/inode locks. 3442 * the caller MUST hold group/inode locks.
3740 * TODO: optimize the case when there are no in-core structures yet 3443 * TODO: optimize the case when there are no in-core structures yet
3741 */ 3444 */
3742static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, 3445static noinline_for_stack int
3743 struct buffer_head *bitmap_bh, 3446ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3744 struct ext4_prealloc_space *pa) 3447 struct ext4_prealloc_space *pa,
3448 struct ext4_allocation_context *ac)
3745{ 3449{
3746 struct ext4_allocation_context *ac;
3747 struct super_block *sb = e4b->bd_sb; 3450 struct super_block *sb = e4b->bd_sb;
3748 struct ext4_sb_info *sbi = EXT4_SB(sb); 3451 struct ext4_sb_info *sbi = EXT4_SB(sb);
3749 unsigned long end; 3452 unsigned long end;
@@ -3759,8 +3462,6 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3759 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3462 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3760 end = bit + pa->pa_len; 3463 end = bit + pa->pa_len;
3761 3464
3762 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3763
3764 if (ac) { 3465 if (ac) {
3765 ac->ac_sb = sb; 3466 ac->ac_sb = sb;
3766 ac->ac_inode = pa->pa_inode; 3467 ac->ac_inode = pa->pa_inode;
@@ -3797,7 +3498,7 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3797 pa, (unsigned long) pa->pa_lstart, 3498 pa, (unsigned long) pa->pa_lstart,
3798 (unsigned long) pa->pa_pstart, 3499 (unsigned long) pa->pa_pstart,
3799 (unsigned long) pa->pa_len); 3500 (unsigned long) pa->pa_len);
3800 ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n", 3501 ext4_error(sb, __func__, "free %u, pa_free %u\n",
3801 free, pa->pa_free); 3502 free, pa->pa_free);
3802 /* 3503 /*
3803 * pa is already deleted so we use the value obtained 3504 * pa is already deleted so we use the value obtained
@@ -3805,22 +3506,19 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3805 */ 3506 */
3806 } 3507 }
3807 atomic_add(free, &sbi->s_mb_discarded); 3508 atomic_add(free, &sbi->s_mb_discarded);
3808 if (ac)
3809 kmem_cache_free(ext4_ac_cachep, ac);
3810 3509
3811 return err; 3510 return err;
3812} 3511}
3813 3512
3814static int ext4_mb_release_group_pa(struct ext4_buddy *e4b, 3513static noinline_for_stack int
3815 struct ext4_prealloc_space *pa) 3514ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3515 struct ext4_prealloc_space *pa,
3516 struct ext4_allocation_context *ac)
3816{ 3517{
3817 struct ext4_allocation_context *ac;
3818 struct super_block *sb = e4b->bd_sb; 3518 struct super_block *sb = e4b->bd_sb;
3819 ext4_group_t group; 3519 ext4_group_t group;
3820 ext4_grpblk_t bit; 3520 ext4_grpblk_t bit;
3821 3521
3822 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3823
3824 if (ac) 3522 if (ac)
3825 ac->ac_op = EXT4_MB_HISTORY_DISCARD; 3523 ac->ac_op = EXT4_MB_HISTORY_DISCARD;
3826 3524
@@ -3838,7 +3536,6 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3838 ac->ac_b_ex.fe_len = pa->pa_len; 3536 ac->ac_b_ex.fe_len = pa->pa_len;
3839 ac->ac_b_ex.fe_logical = 0; 3537 ac->ac_b_ex.fe_logical = 0;
3840 ext4_mb_store_history(ac); 3538 ext4_mb_store_history(ac);
3841 kmem_cache_free(ext4_ac_cachep, ac);
3842 } 3539 }
3843 3540
3844 return 0; 3541 return 0;
@@ -3853,12 +3550,14 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3853 * - how many do we discard 3550 * - how many do we discard
3854 * 1) how many requested 3551 * 1) how many requested
3855 */ 3552 */
3856static int ext4_mb_discard_group_preallocations(struct super_block *sb, 3553static noinline_for_stack int
3554ext4_mb_discard_group_preallocations(struct super_block *sb,
3857 ext4_group_t group, int needed) 3555 ext4_group_t group, int needed)
3858{ 3556{
3859 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 3557 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3860 struct buffer_head *bitmap_bh = NULL; 3558 struct buffer_head *bitmap_bh = NULL;
3861 struct ext4_prealloc_space *pa, *tmp; 3559 struct ext4_prealloc_space *pa, *tmp;
3560 struct ext4_allocation_context *ac;
3862 struct list_head list; 3561 struct list_head list;
3863 struct ext4_buddy e4b; 3562 struct ext4_buddy e4b;
3864 int err; 3563 int err;
@@ -3886,6 +3585,7 @@ static int ext4_mb_discard_group_preallocations(struct super_block *sb,
3886 grp = ext4_get_group_info(sb, group); 3585 grp = ext4_get_group_info(sb, group);
3887 INIT_LIST_HEAD(&list); 3586 INIT_LIST_HEAD(&list);
3888 3587
3588 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3889repeat: 3589repeat:
3890 ext4_lock_group(sb, group); 3590 ext4_lock_group(sb, group);
3891 list_for_each_entry_safe(pa, tmp, 3591 list_for_each_entry_safe(pa, tmp,
@@ -3940,9 +3640,9 @@ repeat:
3940 spin_unlock(pa->pa_obj_lock); 3640 spin_unlock(pa->pa_obj_lock);
3941 3641
3942 if (pa->pa_linear) 3642 if (pa->pa_linear)
3943 ext4_mb_release_group_pa(&e4b, pa); 3643 ext4_mb_release_group_pa(&e4b, pa, ac);
3944 else 3644 else
3945 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); 3645 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
3946 3646
3947 list_del(&pa->u.pa_tmp_list); 3647 list_del(&pa->u.pa_tmp_list);
3948 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 3648 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@@ -3950,6 +3650,8 @@ repeat:
3950 3650
3951out: 3651out:
3952 ext4_unlock_group(sb, group); 3652 ext4_unlock_group(sb, group);
3653 if (ac)
3654 kmem_cache_free(ext4_ac_cachep, ac);
3953 ext4_mb_release_desc(&e4b); 3655 ext4_mb_release_desc(&e4b);
3954 put_bh(bitmap_bh); 3656 put_bh(bitmap_bh);
3955 return free; 3657 return free;
@@ -3970,6 +3672,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
3970 struct super_block *sb = inode->i_sb; 3672 struct super_block *sb = inode->i_sb;
3971 struct buffer_head *bitmap_bh = NULL; 3673 struct buffer_head *bitmap_bh = NULL;
3972 struct ext4_prealloc_space *pa, *tmp; 3674 struct ext4_prealloc_space *pa, *tmp;
3675 struct ext4_allocation_context *ac;
3973 ext4_group_t group = 0; 3676 ext4_group_t group = 0;
3974 struct list_head list; 3677 struct list_head list;
3975 struct ext4_buddy e4b; 3678 struct ext4_buddy e4b;
@@ -3984,6 +3687,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
3984 3687
3985 INIT_LIST_HEAD(&list); 3688 INIT_LIST_HEAD(&list);
3986 3689
3690 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3987repeat: 3691repeat:
3988 /* first, collect all pa's in the inode */ 3692 /* first, collect all pa's in the inode */
3989 spin_lock(&ei->i_prealloc_lock); 3693 spin_lock(&ei->i_prealloc_lock);
@@ -4048,7 +3752,7 @@ repeat:
4048 3752
4049 ext4_lock_group(sb, group); 3753 ext4_lock_group(sb, group);
4050 list_del(&pa->pa_group_list); 3754 list_del(&pa->pa_group_list);
4051 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); 3755 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
4052 ext4_unlock_group(sb, group); 3756 ext4_unlock_group(sb, group);
4053 3757
4054 ext4_mb_release_desc(&e4b); 3758 ext4_mb_release_desc(&e4b);
@@ -4057,6 +3761,8 @@ repeat:
4057 list_del(&pa->u.pa_tmp_list); 3761 list_del(&pa->u.pa_tmp_list);
4058 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 3762 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4059 } 3763 }
3764 if (ac)
3765 kmem_cache_free(ext4_ac_cachep, ac);
4060} 3766}
4061 3767
4062/* 3768/*
@@ -4116,7 +3822,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4116 printk(KERN_ERR "PA:%lu:%d:%u \n", i, 3822 printk(KERN_ERR "PA:%lu:%d:%u \n", i,
4117 start, pa->pa_len); 3823 start, pa->pa_len);
4118 } 3824 }
4119 ext4_lock_group(sb, i); 3825 ext4_unlock_group(sb, i);
4120 3826
4121 if (grp->bb_free == 0) 3827 if (grp->bb_free == 0)
4122 continue; 3828 continue;
@@ -4175,7 +3881,8 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4175 mutex_lock(&ac->ac_lg->lg_mutex); 3881 mutex_lock(&ac->ac_lg->lg_mutex);
4176} 3882}
4177 3883
4178static int ext4_mb_initialize_context(struct ext4_allocation_context *ac, 3884static noinline_for_stack int
3885ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4179 struct ext4_allocation_request *ar) 3886 struct ext4_allocation_request *ar)
4180{ 3887{
4181 struct super_block *sb = ar->inode->i_sb; 3888 struct super_block *sb = ar->inode->i_sb;
@@ -4338,7 +4045,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4338 4045
4339 ac->ac_op = EXT4_MB_HISTORY_ALLOC; 4046 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
4340 ext4_mb_normalize_request(ac, ar); 4047 ext4_mb_normalize_request(ac, ar);
4341
4342repeat: 4048repeat:
4343 /* allocate space in core */ 4049 /* allocate space in core */
4344 ext4_mb_regular_allocator(ac); 4050 ext4_mb_regular_allocator(ac);
@@ -4352,10 +4058,21 @@ repeat:
4352 } 4058 }
4353 4059
4354 if (likely(ac->ac_status == AC_STATUS_FOUND)) { 4060 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4355 ext4_mb_mark_diskspace_used(ac, handle); 4061 *errp = ext4_mb_mark_diskspace_used(ac, handle);
4356 *errp = 0; 4062 if (*errp == -EAGAIN) {
4357 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); 4063 ac->ac_b_ex.fe_group = 0;
4358 ar->len = ac->ac_b_ex.fe_len; 4064 ac->ac_b_ex.fe_start = 0;
4065 ac->ac_b_ex.fe_len = 0;
4066 ac->ac_status = AC_STATUS_CONTINUE;
4067 goto repeat;
4068 } else if (*errp) {
4069 ac->ac_b_ex.fe_len = 0;
4070 ar->len = 0;
4071 ext4_mb_show_ac(ac);
4072 } else {
4073 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4074 ar->len = ac->ac_b_ex.fe_len;
4075 }
4359 } else { 4076 } else {
4360 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len); 4077 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
4361 if (freed) 4078 if (freed)
@@ -4406,7 +4123,8 @@ static void ext4_mb_poll_new_transaction(struct super_block *sb,
4406 ext4_mb_free_committed_blocks(sb); 4123 ext4_mb_free_committed_blocks(sb);
4407} 4124}
4408 4125
4409static int ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, 4126static noinline_for_stack int
4127ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4410 ext4_group_t group, ext4_grpblk_t block, int count) 4128 ext4_group_t group, ext4_grpblk_t block, int count)
4411{ 4129{
4412 struct ext4_group_info *db = e4b->bd_info; 4130 struct ext4_group_info *db = e4b->bd_info;
@@ -4497,7 +4215,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4497 if (block < le32_to_cpu(es->s_first_data_block) || 4215 if (block < le32_to_cpu(es->s_first_data_block) ||
4498 block + count < block || 4216 block + count < block ||
4499 block + count > ext4_blocks_count(es)) { 4217 block + count > ext4_blocks_count(es)) {
4500 ext4_error(sb, __FUNCTION__, 4218 ext4_error(sb, __func__,
4501 "Freeing blocks not in datazone - " 4219 "Freeing blocks not in datazone - "
4502 "block = %lu, count = %lu", block, count); 4220 "block = %lu, count = %lu", block, count);
4503 goto error_return; 4221 goto error_return;
@@ -4538,9 +4256,11 @@ do_more:
4538 in_range(block + count - 1, ext4_inode_table(sb, gdp), 4256 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4539 EXT4_SB(sb)->s_itb_per_group)) { 4257 EXT4_SB(sb)->s_itb_per_group)) {
4540 4258
4541 ext4_error(sb, __FUNCTION__, 4259 ext4_error(sb, __func__,
4542 "Freeing blocks in system zone - " 4260 "Freeing blocks in system zone - "
4543 "Block = %lu, count = %lu", block, count); 4261 "Block = %lu, count = %lu", block, count);
4262 /* err = 0. ext4_std_error should be a no op */
4263 goto error_return;
4544 } 4264 }
4545 4265
4546 BUFFER_TRACE(bitmap_bh, "getting write access"); 4266 BUFFER_TRACE(bitmap_bh, "getting write access");
@@ -4596,8 +4316,7 @@ do_more:
4596 } 4316 }
4597 4317
4598 spin_lock(sb_bgl_lock(sbi, block_group)); 4318 spin_lock(sb_bgl_lock(sbi, block_group));
4599 gdp->bg_free_blocks_count = 4319 le16_add_cpu(&gdp->bg_free_blocks_count, count);
4600 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
4601 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); 4320 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
4602 spin_unlock(sb_bgl_lock(sbi, block_group)); 4321 spin_unlock(sb_bgl_lock(sbi, block_group));
4603 percpu_counter_add(&sbi->s_freeblocks_counter, count); 4322 percpu_counter_add(&sbi->s_freeblocks_counter, count);
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
new file mode 100644
index 000000000000..bfe6add46bcf
--- /dev/null
+++ b/fs/ext4/mballoc.h
@@ -0,0 +1,304 @@
1/*
2 * fs/ext4/mballoc.h
3 *
4 * Written by: Alex Tomas <alex@clusterfs.com>
5 *
6 */
7#ifndef _EXT4_MBALLOC_H
8#define _EXT4_MBALLOC_H
9
10#include <linux/time.h>
11#include <linux/fs.h>
12#include <linux/namei.h>
13#include <linux/quotaops.h>
14#include <linux/buffer_head.h>
15#include <linux/module.h>
16#include <linux/swap.h>
17#include <linux/proc_fs.h>
18#include <linux/pagemap.h>
19#include <linux/seq_file.h>
20#include <linux/version.h>
21#include "ext4_jbd2.h"
22#include "ext4.h"
23#include "group.h"
24
25/*
26 * with AGGRESSIVE_CHECK allocator runs consistency checks over
27 * structures. these checks slow things down a lot
28 */
29#define AGGRESSIVE_CHECK__
30
31/*
32 * with DOUBLE_CHECK defined mballoc creates persistent in-core
33 * bitmaps, maintains and uses them to check for double allocations
34 */
35#define DOUBLE_CHECK__
36
37/*
38 */
39#define MB_DEBUG__
40#ifdef MB_DEBUG
41#define mb_debug(fmt, a...) printk(fmt, ##a)
42#else
43#define mb_debug(fmt, a...)
44#endif
45
46/*
47 * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
48 * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
49 */
50#define EXT4_MB_HISTORY
51#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */
52#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */
53#define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */
54#define EXT4_MB_HISTORY_FREE 8 /* free */
55
56#define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \
57 EXT4_MB_HISTORY_PREALLOC)
58
59/*
60 * How long mballoc can look for a best extent (in found extents)
61 */
62#define MB_DEFAULT_MAX_TO_SCAN 200
63
64/*
65 * How long mballoc must look for a best extent
66 */
67#define MB_DEFAULT_MIN_TO_SCAN 10
68
69/*
70 * How many groups mballoc will scan looking for the best chunk
71 */
72#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5
73
74/*
75 * with 'ext4_mb_stats' allocator will collect stats that will be
76 * shown at umount. The collecting costs though!
77 */
78#define MB_DEFAULT_STATS 1
79
80/*
81 * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
82 * by the stream allocator, which purpose is to pack requests
83 * as close each to other as possible to produce smooth I/O traffic
84 * We use locality group prealloc space for stream request.
85 * We can tune the same via /proc/fs/ext4/<parition>/stream_req
86 */
87#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */
88
89/*
90 * for which requests use 2^N search using buddies
91 */
92#define MB_DEFAULT_ORDER2_REQS 2
93
94/*
95 * default group prealloc size 512 blocks
96 */
97#define MB_DEFAULT_GROUP_PREALLOC 512
98
99static struct kmem_cache *ext4_pspace_cachep;
100static struct kmem_cache *ext4_ac_cachep;
101
102#ifdef EXT4_BB_MAX_BLOCKS
103#undef EXT4_BB_MAX_BLOCKS
104#endif
105#define EXT4_BB_MAX_BLOCKS 30
106
107struct ext4_free_metadata {
108 ext4_group_t group;
109 unsigned short num;
110 ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS];
111 struct list_head list;
112};
113
114struct ext4_group_info {
115 unsigned long bb_state;
116 unsigned long bb_tid;
117 struct ext4_free_metadata *bb_md_cur;
118 unsigned short bb_first_free;
119 unsigned short bb_free;
120 unsigned short bb_fragments;
121 struct list_head bb_prealloc_list;
122#ifdef DOUBLE_CHECK
123 void *bb_bitmap;
124#endif
125 unsigned short bb_counters[];
126};
127
128#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
129#define EXT4_GROUP_INFO_LOCKED_BIT 1
130
131#define EXT4_MB_GRP_NEED_INIT(grp) \
132 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
133
134
135struct ext4_prealloc_space {
136 struct list_head pa_inode_list;
137 struct list_head pa_group_list;
138 union {
139 struct list_head pa_tmp_list;
140 struct rcu_head pa_rcu;
141 } u;
142 spinlock_t pa_lock;
143 atomic_t pa_count;
144 unsigned pa_deleted;
145 ext4_fsblk_t pa_pstart; /* phys. block */
146 ext4_lblk_t pa_lstart; /* log. block */
147 unsigned short pa_len; /* len of preallocated chunk */
148 unsigned short pa_free; /* how many blocks are free */
149 unsigned short pa_linear; /* consumed in one direction
150 * strictly, for grp prealloc */
151 spinlock_t *pa_obj_lock;
152 struct inode *pa_inode; /* hack, for history only */
153};
154
155
156struct ext4_free_extent {
157 ext4_lblk_t fe_logical;
158 ext4_grpblk_t fe_start;
159 ext4_group_t fe_group;
160 int fe_len;
161};
162
163/*
164 * Locality group:
165 * we try to group all related changes together
166 * so that writeback can flush/allocate them together as well
167 */
168struct ext4_locality_group {
169 /* for allocator */
170 struct mutex lg_mutex; /* to serialize allocates */
171 struct list_head lg_prealloc_list;/* list of preallocations */
172 spinlock_t lg_prealloc_lock;
173};
174
175struct ext4_allocation_context {
176 struct inode *ac_inode;
177 struct super_block *ac_sb;
178
179 /* original request */
180 struct ext4_free_extent ac_o_ex;
181
182 /* goal request (after normalization) */
183 struct ext4_free_extent ac_g_ex;
184
185 /* the best found extent */
186 struct ext4_free_extent ac_b_ex;
187
188 /* copy of the bext found extent taken before preallocation efforts */
189 struct ext4_free_extent ac_f_ex;
190
191 /* number of iterations done. we have to track to limit searching */
192 unsigned long ac_ex_scanned;
193 __u16 ac_groups_scanned;
194 __u16 ac_found;
195 __u16 ac_tail;
196 __u16 ac_buddy;
197 __u16 ac_flags; /* allocation hints */
198 __u8 ac_status;
199 __u8 ac_criteria;
200 __u8 ac_repeats;
201 __u8 ac_2order; /* if request is to allocate 2^N blocks and
202 * N > 0, the field stores N, otherwise 0 */
203 __u8 ac_op; /* operation, for history only */
204 struct page *ac_bitmap_page;
205 struct page *ac_buddy_page;
206 struct ext4_prealloc_space *ac_pa;
207 struct ext4_locality_group *ac_lg;
208};
209
210#define AC_STATUS_CONTINUE 1
211#define AC_STATUS_FOUND 2
212#define AC_STATUS_BREAK 3
213
214struct ext4_mb_history {
215 struct ext4_free_extent orig; /* orig allocation */
216 struct ext4_free_extent goal; /* goal allocation */
217 struct ext4_free_extent result; /* result allocation */
218 unsigned pid;
219 unsigned ino;
220 __u16 found; /* how many extents have been found */
221 __u16 groups; /* how many groups have been scanned */
222 __u16 tail; /* what tail broke some buddy */
223 __u16 buddy; /* buddy the tail ^^^ broke */
224 __u16 flags;
225 __u8 cr:3; /* which phase the result extent was found at */
226 __u8 op:4;
227 __u8 merged:1;
228};
229
230struct ext4_buddy {
231 struct page *bd_buddy_page;
232 void *bd_buddy;
233 struct page *bd_bitmap_page;
234 void *bd_bitmap;
235 struct ext4_group_info *bd_info;
236 struct super_block *bd_sb;
237 __u16 bd_blkbits;
238 ext4_group_t bd_group;
239};
240#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
241#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
242
243#ifndef EXT4_MB_HISTORY
244static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
245{
246 return;
247}
248#else
249static void ext4_mb_store_history(struct ext4_allocation_context *ac);
250#endif
251
252#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
253
254static struct proc_dir_entry *proc_root_ext4;
255struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
256
257static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
258 ext4_group_t group);
259static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
260static void ext4_mb_free_committed_blocks(struct super_block *);
261static void ext4_mb_return_to_preallocation(struct inode *inode,
262 struct ext4_buddy *e4b, sector_t block,
263 int count);
264static void ext4_mb_put_pa(struct ext4_allocation_context *,
265 struct super_block *, struct ext4_prealloc_space *pa);
266static int ext4_mb_init_per_dev_proc(struct super_block *sb);
267static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
268
269
270static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
271{
272 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
273
274 bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
275}
276
277static inline void ext4_unlock_group(struct super_block *sb,
278 ext4_group_t group)
279{
280 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
281
282 bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
283}
284
285static inline int ext4_is_group_locked(struct super_block *sb,
286 ext4_group_t group)
287{
288 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
289
290 return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
291 &(grinfo->bb_state));
292}
293
294static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
295 struct ext4_free_extent *fex)
296{
297 ext4_fsblk_t block;
298
299 block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
300 + fex->fe_start
301 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
302 return block;
303}
304#endif
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 5c1e27de7755..b9e077ba07e9 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -13,8 +13,8 @@
13 */ 13 */
14 14
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/ext4_jbd2.h> 16#include "ext4_jbd2.h"
17#include <linux/ext4_fs_extents.h> 17#include "ext4_extents.h"
18 18
19/* 19/*
20 * The contiguous blocks details which can be 20 * The contiguous blocks details which can be
@@ -327,7 +327,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
327} 327}
328 328
329static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, 329static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
330 struct inode *tmp_inode) 330 struct inode *tmp_inode)
331{ 331{
332 int retval; 332 int retval;
333 __le32 i_data[3]; 333 __le32 i_data[3];
@@ -339,7 +339,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
339 * i_data field of the original inode 339 * i_data field of the original inode
340 */ 340 */
341 retval = ext4_journal_extend(handle, 1); 341 retval = ext4_journal_extend(handle, 1);
342 if (retval != 0) { 342 if (retval) {
343 retval = ext4_journal_restart(handle, 1); 343 retval = ext4_journal_restart(handle, 1);
344 if (retval) 344 if (retval)
345 goto err_out; 345 goto err_out;
@@ -351,6 +351,18 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
351 351
352 down_write(&EXT4_I(inode)->i_data_sem); 352 down_write(&EXT4_I(inode)->i_data_sem);
353 /* 353 /*
354 * if EXT4_EXT_MIGRATE is cleared a block allocation
355 * happened after we started the migrate. We need to
356 * fail the migrate
357 */
358 if (!(EXT4_I(inode)->i_flags & EXT4_EXT_MIGRATE)) {
359 retval = -EAGAIN;
360 up_write(&EXT4_I(inode)->i_data_sem);
361 goto err_out;
362 } else
363 EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
364 ~EXT4_EXT_MIGRATE;
365 /*
354 * We have the extent map build with the tmp inode. 366 * We have the extent map build with the tmp inode.
355 * Now copy the i_data across 367 * Now copy the i_data across
356 */ 368 */
@@ -508,6 +520,17 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
508 * switch the inode format to prevent read. 520 * switch the inode format to prevent read.
509 */ 521 */
510 mutex_lock(&(inode->i_mutex)); 522 mutex_lock(&(inode->i_mutex));
523 /*
524 * Even though we take i_mutex we can still cause block allocation
525 * via mmap write to holes. If we have allocated new blocks we fail
526 * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag.
527 * The flag is updated with i_data_sem held to prevent racing with
528 * block allocation.
529 */
530 down_read((&EXT4_I(inode)->i_data_sem));
531 EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags | EXT4_EXT_MIGRATE;
532 up_read((&EXT4_I(inode)->i_data_sem));
533
511 handle = ext4_journal_start(inode, 1); 534 handle = ext4_journal_start(inode, 1);
512 535
513 ei = EXT4_I(inode); 536 ei = EXT4_I(inode);
@@ -559,9 +582,15 @@ err_out:
559 * tmp_inode 582 * tmp_inode
560 */ 583 */
561 free_ext_block(handle, tmp_inode); 584 free_ext_block(handle, tmp_inode);
562 else 585 else {
563 retval = ext4_ext_swap_inode_data(handle, inode, 586 retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode);
564 tmp_inode); 587 if (retval)
588 /*
589 * if we fail to swap inode data free the extent
590 * details of the tmp inode
591 */
592 free_ext_block(handle, tmp_inode);
593 }
565 594
566 /* We mark the tmp_inode dirty via ext4_ext_tree_init. */ 595 /* We mark the tmp_inode dirty via ext4_ext_tree_init. */
567 if (ext4_journal_extend(handle, 1) != 0) 596 if (ext4_journal_extend(handle, 1) != 0)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 28aa2ed4297e..ab16beaa830d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -28,14 +28,14 @@
28#include <linux/pagemap.h> 28#include <linux/pagemap.h>
29#include <linux/jbd2.h> 29#include <linux/jbd2.h>
30#include <linux/time.h> 30#include <linux/time.h>
31#include <linux/ext4_fs.h>
32#include <linux/ext4_jbd2.h>
33#include <linux/fcntl.h> 31#include <linux/fcntl.h>
34#include <linux/stat.h> 32#include <linux/stat.h>
35#include <linux/string.h> 33#include <linux/string.h>
36#include <linux/quotaops.h> 34#include <linux/quotaops.h>
37#include <linux/buffer_head.h> 35#include <linux/buffer_head.h>
38#include <linux/bio.h> 36#include <linux/bio.h>
37#include "ext4.h"
38#include "ext4_jbd2.h"
39 39
40#include "namei.h" 40#include "namei.h"
41#include "xattr.h" 41#include "xattr.h"
@@ -57,10 +57,15 @@ static struct buffer_head *ext4_append(handle_t *handle,
57 57
58 *block = inode->i_size >> inode->i_sb->s_blocksize_bits; 58 *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
59 59
60 if ((bh = ext4_bread(handle, inode, *block, 1, err))) { 60 bh = ext4_bread(handle, inode, *block, 1, err);
61 if (bh) {
61 inode->i_size += inode->i_sb->s_blocksize; 62 inode->i_size += inode->i_sb->s_blocksize;
62 EXT4_I(inode)->i_disksize = inode->i_size; 63 EXT4_I(inode)->i_disksize = inode->i_size;
63 ext4_journal_get_write_access(handle,bh); 64 *err = ext4_journal_get_write_access(handle, bh);
65 if (*err) {
66 brelse(bh);
67 bh = NULL;
68 }
64 } 69 }
65 return bh; 70 return bh;
66} 71}
@@ -348,7 +353,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
348 if (root->info.hash_version != DX_HASH_TEA && 353 if (root->info.hash_version != DX_HASH_TEA &&
349 root->info.hash_version != DX_HASH_HALF_MD4 && 354 root->info.hash_version != DX_HASH_HALF_MD4 &&
350 root->info.hash_version != DX_HASH_LEGACY) { 355 root->info.hash_version != DX_HASH_LEGACY) {
351 ext4_warning(dir->i_sb, __FUNCTION__, 356 ext4_warning(dir->i_sb, __func__,
352 "Unrecognised inode hash code %d", 357 "Unrecognised inode hash code %d",
353 root->info.hash_version); 358 root->info.hash_version);
354 brelse(bh); 359 brelse(bh);
@@ -362,7 +367,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
362 hash = hinfo->hash; 367 hash = hinfo->hash;
363 368
364 if (root->info.unused_flags & 1) { 369 if (root->info.unused_flags & 1) {
365 ext4_warning(dir->i_sb, __FUNCTION__, 370 ext4_warning(dir->i_sb, __func__,
366 "Unimplemented inode hash flags: %#06x", 371 "Unimplemented inode hash flags: %#06x",
367 root->info.unused_flags); 372 root->info.unused_flags);
368 brelse(bh); 373 brelse(bh);
@@ -371,7 +376,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
371 } 376 }
372 377
373 if ((indirect = root->info.indirect_levels) > 1) { 378 if ((indirect = root->info.indirect_levels) > 1) {
374 ext4_warning(dir->i_sb, __FUNCTION__, 379 ext4_warning(dir->i_sb, __func__,
375 "Unimplemented inode hash depth: %#06x", 380 "Unimplemented inode hash depth: %#06x",
376 root->info.indirect_levels); 381 root->info.indirect_levels);
377 brelse(bh); 382 brelse(bh);
@@ -384,7 +389,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
384 389
385 if (dx_get_limit(entries) != dx_root_limit(dir, 390 if (dx_get_limit(entries) != dx_root_limit(dir,
386 root->info.info_length)) { 391 root->info.info_length)) {
387 ext4_warning(dir->i_sb, __FUNCTION__, 392 ext4_warning(dir->i_sb, __func__,
388 "dx entry: limit != root limit"); 393 "dx entry: limit != root limit");
389 brelse(bh); 394 brelse(bh);
390 *err = ERR_BAD_DX_DIR; 395 *err = ERR_BAD_DX_DIR;
@@ -396,7 +401,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
396 { 401 {
397 count = dx_get_count(entries); 402 count = dx_get_count(entries);
398 if (!count || count > dx_get_limit(entries)) { 403 if (!count || count > dx_get_limit(entries)) {
399 ext4_warning(dir->i_sb, __FUNCTION__, 404 ext4_warning(dir->i_sb, __func__,
400 "dx entry: no count or count > limit"); 405 "dx entry: no count or count > limit");
401 brelse(bh); 406 brelse(bh);
402 *err = ERR_BAD_DX_DIR; 407 *err = ERR_BAD_DX_DIR;
@@ -441,7 +446,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
441 goto fail2; 446 goto fail2;
442 at = entries = ((struct dx_node *) bh->b_data)->entries; 447 at = entries = ((struct dx_node *) bh->b_data)->entries;
443 if (dx_get_limit(entries) != dx_node_limit (dir)) { 448 if (dx_get_limit(entries) != dx_node_limit (dir)) {
444 ext4_warning(dir->i_sb, __FUNCTION__, 449 ext4_warning(dir->i_sb, __func__,
445 "dx entry: limit != node limit"); 450 "dx entry: limit != node limit");
446 brelse(bh); 451 brelse(bh);
447 *err = ERR_BAD_DX_DIR; 452 *err = ERR_BAD_DX_DIR;
@@ -457,7 +462,7 @@ fail2:
457 } 462 }
458fail: 463fail:
459 if (*err == ERR_BAD_DX_DIR) 464 if (*err == ERR_BAD_DX_DIR)
460 ext4_warning(dir->i_sb, __FUNCTION__, 465 ext4_warning(dir->i_sb, __func__,
461 "Corrupt dir inode %ld, running e2fsck is " 466 "Corrupt dir inode %ld, running e2fsck is "
462 "recommended.", dir->i_ino); 467 "recommended.", dir->i_ino);
463 return NULL; 468 return NULL;
@@ -914,7 +919,7 @@ restart:
914 wait_on_buffer(bh); 919 wait_on_buffer(bh);
915 if (!buffer_uptodate(bh)) { 920 if (!buffer_uptodate(bh)) {
916 /* read error, skip block & hope for the best */ 921 /* read error, skip block & hope for the best */
917 ext4_error(sb, __FUNCTION__, "reading directory #%lu " 922 ext4_error(sb, __func__, "reading directory #%lu "
918 "offset %lu", dir->i_ino, 923 "offset %lu", dir->i_ino,
919 (unsigned long)block); 924 (unsigned long)block);
920 brelse(bh); 925 brelse(bh);
@@ -1007,7 +1012,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
1007 retval = ext4_htree_next_block(dir, hash, frame, 1012 retval = ext4_htree_next_block(dir, hash, frame,
1008 frames, NULL); 1013 frames, NULL);
1009 if (retval < 0) { 1014 if (retval < 0) {
1010 ext4_warning(sb, __FUNCTION__, 1015 ext4_warning(sb, __func__,
1011 "error reading index page in directory #%lu", 1016 "error reading index page in directory #%lu",
1012 dir->i_ino); 1017 dir->i_ino);
1013 *err = retval; 1018 *err = retval;
@@ -1532,7 +1537,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1532 1537
1533 if (levels && (dx_get_count(frames->entries) == 1538 if (levels && (dx_get_count(frames->entries) ==
1534 dx_get_limit(frames->entries))) { 1539 dx_get_limit(frames->entries))) {
1535 ext4_warning(sb, __FUNCTION__, 1540 ext4_warning(sb, __func__,
1536 "Directory index full!"); 1541 "Directory index full!");
1537 err = -ENOSPC; 1542 err = -ENOSPC;
1538 goto cleanup; 1543 goto cleanup;
@@ -1860,11 +1865,11 @@ static int empty_dir (struct inode * inode)
1860 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || 1865 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
1861 !(bh = ext4_bread (NULL, inode, 0, 0, &err))) { 1866 !(bh = ext4_bread (NULL, inode, 0, 0, &err))) {
1862 if (err) 1867 if (err)
1863 ext4_error(inode->i_sb, __FUNCTION__, 1868 ext4_error(inode->i_sb, __func__,
1864 "error %d reading directory #%lu offset 0", 1869 "error %d reading directory #%lu offset 0",
1865 err, inode->i_ino); 1870 err, inode->i_ino);
1866 else 1871 else
1867 ext4_warning(inode->i_sb, __FUNCTION__, 1872 ext4_warning(inode->i_sb, __func__,
1868 "bad directory (dir #%lu) - no data block", 1873 "bad directory (dir #%lu) - no data block",
1869 inode->i_ino); 1874 inode->i_ino);
1870 return 1; 1875 return 1;
@@ -1893,7 +1898,7 @@ static int empty_dir (struct inode * inode)
1893 offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); 1898 offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
1894 if (!bh) { 1899 if (!bh) {
1895 if (err) 1900 if (err)
1896 ext4_error(sb, __FUNCTION__, 1901 ext4_error(sb, __func__,
1897 "error %d reading directory" 1902 "error %d reading directory"
1898 " #%lu offset %lu", 1903 " #%lu offset %lu",
1899 err, inode->i_ino, offset); 1904 err, inode->i_ino, offset);
@@ -2217,6 +2222,8 @@ retry:
2217 goto out_stop; 2222 goto out_stop;
2218 } 2223 }
2219 } else { 2224 } else {
2225 /* clear the extent format for fast symlink */
2226 EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
2220 inode->i_op = &ext4_fast_symlink_inode_operations; 2227 inode->i_op = &ext4_fast_symlink_inode_operations;
2221 memcpy((char*)&EXT4_I(inode)->i_data,symname,l); 2228 memcpy((char*)&EXT4_I(inode)->i_data,symname,l);
2222 inode->i_size = l-1; 2229 inode->i_size = l-1;
@@ -2347,6 +2354,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
2347 EXT4_FEATURE_INCOMPAT_FILETYPE)) 2354 EXT4_FEATURE_INCOMPAT_FILETYPE))
2348 new_de->file_type = old_de->file_type; 2355 new_de->file_type = old_de->file_type;
2349 new_dir->i_version++; 2356 new_dir->i_version++;
2357 new_dir->i_ctime = new_dir->i_mtime =
2358 ext4_current_time(new_dir);
2359 ext4_mark_inode_dirty(handle, new_dir);
2350 BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata"); 2360 BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata");
2351 ext4_journal_dirty_metadata(handle, new_bh); 2361 ext4_journal_dirty_metadata(handle, new_bh);
2352 brelse(new_bh); 2362 brelse(new_bh);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index e29efa0f9d62..9f086a6a472b 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -11,11 +11,10 @@
11 11
12#define EXT4FS_DEBUG 12#define EXT4FS_DEBUG
13 13
14#include <linux/ext4_jbd2.h>
15
16#include <linux/errno.h> 14#include <linux/errno.h>
17#include <linux/slab.h> 15#include <linux/slab.h>
18 16
17#include "ext4_jbd2.h"
19#include "group.h" 18#include "group.h"
20 19
21#define outside(b, first, last) ((b) < (first) || (b) >= (last)) 20#define outside(b, first, last) ((b) < (first) || (b) >= (last))
@@ -50,63 +49,63 @@ static int verify_group_input(struct super_block *sb,
50 49
51 ext4_get_group_no_and_offset(sb, start, NULL, &offset); 50 ext4_get_group_no_and_offset(sb, start, NULL, &offset);
52 if (group != sbi->s_groups_count) 51 if (group != sbi->s_groups_count)
53 ext4_warning(sb, __FUNCTION__, 52 ext4_warning(sb, __func__,
54 "Cannot add at group %u (only %lu groups)", 53 "Cannot add at group %u (only %lu groups)",
55 input->group, sbi->s_groups_count); 54 input->group, sbi->s_groups_count);
56 else if (offset != 0) 55 else if (offset != 0)
57 ext4_warning(sb, __FUNCTION__, "Last group not full"); 56 ext4_warning(sb, __func__, "Last group not full");
58 else if (input->reserved_blocks > input->blocks_count / 5) 57 else if (input->reserved_blocks > input->blocks_count / 5)
59 ext4_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)", 58 ext4_warning(sb, __func__, "Reserved blocks too high (%u)",
60 input->reserved_blocks); 59 input->reserved_blocks);
61 else if (free_blocks_count < 0) 60 else if (free_blocks_count < 0)
62 ext4_warning(sb, __FUNCTION__, "Bad blocks count %u", 61 ext4_warning(sb, __func__, "Bad blocks count %u",
63 input->blocks_count); 62 input->blocks_count);
64 else if (!(bh = sb_bread(sb, end - 1))) 63 else if (!(bh = sb_bread(sb, end - 1)))
65 ext4_warning(sb, __FUNCTION__, 64 ext4_warning(sb, __func__,
66 "Cannot read last block (%llu)", 65 "Cannot read last block (%llu)",
67 end - 1); 66 end - 1);
68 else if (outside(input->block_bitmap, start, end)) 67 else if (outside(input->block_bitmap, start, end))
69 ext4_warning(sb, __FUNCTION__, 68 ext4_warning(sb, __func__,
70 "Block bitmap not in group (block %llu)", 69 "Block bitmap not in group (block %llu)",
71 (unsigned long long)input->block_bitmap); 70 (unsigned long long)input->block_bitmap);
72 else if (outside(input->inode_bitmap, start, end)) 71 else if (outside(input->inode_bitmap, start, end))
73 ext4_warning(sb, __FUNCTION__, 72 ext4_warning(sb, __func__,
74 "Inode bitmap not in group (block %llu)", 73 "Inode bitmap not in group (block %llu)",
75 (unsigned long long)input->inode_bitmap); 74 (unsigned long long)input->inode_bitmap);
76 else if (outside(input->inode_table, start, end) || 75 else if (outside(input->inode_table, start, end) ||
77 outside(itend - 1, start, end)) 76 outside(itend - 1, start, end))
78 ext4_warning(sb, __FUNCTION__, 77 ext4_warning(sb, __func__,
79 "Inode table not in group (blocks %llu-%llu)", 78 "Inode table not in group (blocks %llu-%llu)",
80 (unsigned long long)input->inode_table, itend - 1); 79 (unsigned long long)input->inode_table, itend - 1);
81 else if (input->inode_bitmap == input->block_bitmap) 80 else if (input->inode_bitmap == input->block_bitmap)
82 ext4_warning(sb, __FUNCTION__, 81 ext4_warning(sb, __func__,
83 "Block bitmap same as inode bitmap (%llu)", 82 "Block bitmap same as inode bitmap (%llu)",
84 (unsigned long long)input->block_bitmap); 83 (unsigned long long)input->block_bitmap);
85 else if (inside(input->block_bitmap, input->inode_table, itend)) 84 else if (inside(input->block_bitmap, input->inode_table, itend))
86 ext4_warning(sb, __FUNCTION__, 85 ext4_warning(sb, __func__,
87 "Block bitmap (%llu) in inode table (%llu-%llu)", 86 "Block bitmap (%llu) in inode table (%llu-%llu)",
88 (unsigned long long)input->block_bitmap, 87 (unsigned long long)input->block_bitmap,
89 (unsigned long long)input->inode_table, itend - 1); 88 (unsigned long long)input->inode_table, itend - 1);
90 else if (inside(input->inode_bitmap, input->inode_table, itend)) 89 else if (inside(input->inode_bitmap, input->inode_table, itend))
91 ext4_warning(sb, __FUNCTION__, 90 ext4_warning(sb, __func__,
92 "Inode bitmap (%llu) in inode table (%llu-%llu)", 91 "Inode bitmap (%llu) in inode table (%llu-%llu)",
93 (unsigned long long)input->inode_bitmap, 92 (unsigned long long)input->inode_bitmap,
94 (unsigned long long)input->inode_table, itend - 1); 93 (unsigned long long)input->inode_table, itend - 1);
95 else if (inside(input->block_bitmap, start, metaend)) 94 else if (inside(input->block_bitmap, start, metaend))
96 ext4_warning(sb, __FUNCTION__, 95 ext4_warning(sb, __func__,
97 "Block bitmap (%llu) in GDT table" 96 "Block bitmap (%llu) in GDT table"
98 " (%llu-%llu)", 97 " (%llu-%llu)",
99 (unsigned long long)input->block_bitmap, 98 (unsigned long long)input->block_bitmap,
100 start, metaend - 1); 99 start, metaend - 1);
101 else if (inside(input->inode_bitmap, start, metaend)) 100 else if (inside(input->inode_bitmap, start, metaend))
102 ext4_warning(sb, __FUNCTION__, 101 ext4_warning(sb, __func__,
103 "Inode bitmap (%llu) in GDT table" 102 "Inode bitmap (%llu) in GDT table"
104 " (%llu-%llu)", 103 " (%llu-%llu)",
105 (unsigned long long)input->inode_bitmap, 104 (unsigned long long)input->inode_bitmap,
106 start, metaend - 1); 105 start, metaend - 1);
107 else if (inside(input->inode_table, start, metaend) || 106 else if (inside(input->inode_table, start, metaend) ||
108 inside(itend - 1, start, metaend)) 107 inside(itend - 1, start, metaend))
109 ext4_warning(sb, __FUNCTION__, 108 ext4_warning(sb, __func__,
110 "Inode table (%llu-%llu) overlaps" 109 "Inode table (%llu-%llu) overlaps"
111 "GDT table (%llu-%llu)", 110 "GDT table (%llu-%llu)",
112 (unsigned long long)input->inode_table, 111 (unsigned long long)input->inode_table,
@@ -368,7 +367,7 @@ static int verify_reserved_gdb(struct super_block *sb,
368 while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) { 367 while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
369 if (le32_to_cpu(*p++) != 368 if (le32_to_cpu(*p++) !=
370 grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){ 369 grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
371 ext4_warning(sb, __FUNCTION__, 370 ext4_warning(sb, __func__,
372 "reserved GDT %llu" 371 "reserved GDT %llu"
373 " missing grp %d (%llu)", 372 " missing grp %d (%llu)",
374 blk, grp, 373 blk, grp,
@@ -424,7 +423,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
424 */ 423 */
425 if (EXT4_SB(sb)->s_sbh->b_blocknr != 424 if (EXT4_SB(sb)->s_sbh->b_blocknr !=
426 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { 425 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
427 ext4_warning(sb, __FUNCTION__, 426 ext4_warning(sb, __func__,
428 "won't resize using backup superblock at %llu", 427 "won't resize using backup superblock at %llu",
429 (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); 428 (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
430 return -EPERM; 429 return -EPERM;
@@ -448,7 +447,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
448 447
449 data = (__le32 *)dind->b_data; 448 data = (__le32 *)dind->b_data;
450 if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { 449 if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
451 ext4_warning(sb, __FUNCTION__, 450 ext4_warning(sb, __func__,
452 "new group %u GDT block %llu not reserved", 451 "new group %u GDT block %llu not reserved",
453 input->group, gdblock); 452 input->group, gdblock);
454 err = -EINVAL; 453 err = -EINVAL;
@@ -469,10 +468,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
469 goto exit_dindj; 468 goto exit_dindj;
470 469
471 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), 470 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
472 GFP_KERNEL); 471 GFP_NOFS);
473 if (!n_group_desc) { 472 if (!n_group_desc) {
474 err = -ENOMEM; 473 err = -ENOMEM;
475 ext4_warning (sb, __FUNCTION__, 474 ext4_warning(sb, __func__,
476 "not enough memory for %lu groups", gdb_num + 1); 475 "not enough memory for %lu groups", gdb_num + 1);
477 goto exit_inode; 476 goto exit_inode;
478 } 477 }
@@ -502,8 +501,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
502 EXT4_SB(sb)->s_gdb_count++; 501 EXT4_SB(sb)->s_gdb_count++;
503 kfree(o_group_desc); 502 kfree(o_group_desc);
504 503
505 es->s_reserved_gdt_blocks = 504 le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
506 cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1);
507 ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); 505 ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
508 506
509 return 0; 507 return 0;
@@ -553,7 +551,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
553 int res, i; 551 int res, i;
554 int err; 552 int err;
555 553
556 primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL); 554 primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS);
557 if (!primary) 555 if (!primary)
558 return -ENOMEM; 556 return -ENOMEM;
559 557
@@ -571,7 +569,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
571 /* Get each reserved primary GDT block and verify it holds backups */ 569 /* Get each reserved primary GDT block and verify it holds backups */
572 for (res = 0; res < reserved_gdb; res++, blk++) { 570 for (res = 0; res < reserved_gdb; res++, blk++) {
573 if (le32_to_cpu(*data) != blk) { 571 if (le32_to_cpu(*data) != blk) {
574 ext4_warning(sb, __FUNCTION__, 572 ext4_warning(sb, __func__,
575 "reserved block %llu" 573 "reserved block %llu"
576 " not at offset %ld", 574 " not at offset %ld",
577 blk, 575 blk,
@@ -715,7 +713,7 @@ static void update_backups(struct super_block *sb,
715 */ 713 */
716exit_err: 714exit_err:
717 if (err) { 715 if (err) {
718 ext4_warning(sb, __FUNCTION__, 716 ext4_warning(sb, __func__,
719 "can't update backup for group %lu (err %d), " 717 "can't update backup for group %lu (err %d), "
720 "forcing fsck on next reboot", group, err); 718 "forcing fsck on next reboot", group, err);
721 sbi->s_mount_state &= ~EXT4_VALID_FS; 719 sbi->s_mount_state &= ~EXT4_VALID_FS;
@@ -755,33 +753,33 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
755 753
756 if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, 754 if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb,
757 EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { 755 EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
758 ext4_warning(sb, __FUNCTION__, 756 ext4_warning(sb, __func__,
759 "Can't resize non-sparse filesystem further"); 757 "Can't resize non-sparse filesystem further");
760 return -EPERM; 758 return -EPERM;
761 } 759 }
762 760
763 if (ext4_blocks_count(es) + input->blocks_count < 761 if (ext4_blocks_count(es) + input->blocks_count <
764 ext4_blocks_count(es)) { 762 ext4_blocks_count(es)) {
765 ext4_warning(sb, __FUNCTION__, "blocks_count overflow\n"); 763 ext4_warning(sb, __func__, "blocks_count overflow\n");
766 return -EINVAL; 764 return -EINVAL;
767 } 765 }
768 766
769 if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < 767 if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
770 le32_to_cpu(es->s_inodes_count)) { 768 le32_to_cpu(es->s_inodes_count)) {
771 ext4_warning(sb, __FUNCTION__, "inodes_count overflow\n"); 769 ext4_warning(sb, __func__, "inodes_count overflow\n");
772 return -EINVAL; 770 return -EINVAL;
773 } 771 }
774 772
775 if (reserved_gdb || gdb_off == 0) { 773 if (reserved_gdb || gdb_off == 0) {
776 if (!EXT4_HAS_COMPAT_FEATURE(sb, 774 if (!EXT4_HAS_COMPAT_FEATURE(sb,
777 EXT4_FEATURE_COMPAT_RESIZE_INODE)){ 775 EXT4_FEATURE_COMPAT_RESIZE_INODE)){
778 ext4_warning(sb, __FUNCTION__, 776 ext4_warning(sb, __func__,
779 "No reserved GDT blocks, can't resize"); 777 "No reserved GDT blocks, can't resize");
780 return -EPERM; 778 return -EPERM;
781 } 779 }
782 inode = ext4_iget(sb, EXT4_RESIZE_INO); 780 inode = ext4_iget(sb, EXT4_RESIZE_INO);
783 if (IS_ERR(inode)) { 781 if (IS_ERR(inode)) {
784 ext4_warning(sb, __FUNCTION__, 782 ext4_warning(sb, __func__,
785 "Error opening resize inode"); 783 "Error opening resize inode");
786 return PTR_ERR(inode); 784 return PTR_ERR(inode);
787 } 785 }
@@ -810,7 +808,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
810 808
811 lock_super(sb); 809 lock_super(sb);
812 if (input->group != sbi->s_groups_count) { 810 if (input->group != sbi->s_groups_count) {
813 ext4_warning(sb, __FUNCTION__, 811 ext4_warning(sb, __func__,
814 "multiple resizers run on filesystem!"); 812 "multiple resizers run on filesystem!");
815 err = -EBUSY; 813 err = -EBUSY;
816 goto exit_journal; 814 goto exit_journal;
@@ -877,8 +875,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
877 */ 875 */
878 ext4_blocks_count_set(es, ext4_blocks_count(es) + 876 ext4_blocks_count_set(es, ext4_blocks_count(es) +
879 input->blocks_count); 877 input->blocks_count);
880 es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) + 878 le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb));
881 EXT4_INODES_PER_GROUP(sb));
882 879
883 /* 880 /*
884 * We need to protect s_groups_count against other CPUs seeing 881 * We need to protect s_groups_count against other CPUs seeing
@@ -977,13 +974,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
977 " too large to resize to %llu blocks safely\n", 974 " too large to resize to %llu blocks safely\n",
978 sb->s_id, n_blocks_count); 975 sb->s_id, n_blocks_count);
979 if (sizeof(sector_t) < 8) 976 if (sizeof(sector_t) < 8)
980 ext4_warning(sb, __FUNCTION__, 977 ext4_warning(sb, __func__,
981 "CONFIG_LBD not enabled\n"); 978 "CONFIG_LBD not enabled\n");
982 return -EINVAL; 979 return -EINVAL;
983 } 980 }
984 981
985 if (n_blocks_count < o_blocks_count) { 982 if (n_blocks_count < o_blocks_count) {
986 ext4_warning(sb, __FUNCTION__, 983 ext4_warning(sb, __func__,
987 "can't shrink FS - resize aborted"); 984 "can't shrink FS - resize aborted");
988 return -EBUSY; 985 return -EBUSY;
989 } 986 }
@@ -992,7 +989,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
992 ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last); 989 ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last);
993 990
994 if (last == 0) { 991 if (last == 0) {
995 ext4_warning(sb, __FUNCTION__, 992 ext4_warning(sb, __func__,
996 "need to use ext2online to resize further"); 993 "need to use ext2online to resize further");
997 return -EPERM; 994 return -EPERM;
998 } 995 }
@@ -1000,7 +997,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1000 add = EXT4_BLOCKS_PER_GROUP(sb) - last; 997 add = EXT4_BLOCKS_PER_GROUP(sb) - last;
1001 998
1002 if (o_blocks_count + add < o_blocks_count) { 999 if (o_blocks_count + add < o_blocks_count) {
1003 ext4_warning(sb, __FUNCTION__, "blocks_count overflow"); 1000 ext4_warning(sb, __func__, "blocks_count overflow");
1004 return -EINVAL; 1001 return -EINVAL;
1005 } 1002 }
1006 1003
@@ -1008,7 +1005,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1008 add = n_blocks_count - o_blocks_count; 1005 add = n_blocks_count - o_blocks_count;
1009 1006
1010 if (o_blocks_count + add < n_blocks_count) 1007 if (o_blocks_count + add < n_blocks_count)
1011 ext4_warning(sb, __FUNCTION__, 1008 ext4_warning(sb, __func__,
1012 "will only finish group (%llu" 1009 "will only finish group (%llu"
1013 " blocks, %u new)", 1010 " blocks, %u new)",
1014 o_blocks_count + add, add); 1011 o_blocks_count + add, add);
@@ -1016,7 +1013,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1016 /* See if the device is actually as big as what was requested */ 1013 /* See if the device is actually as big as what was requested */
1017 bh = sb_bread(sb, o_blocks_count + add -1); 1014 bh = sb_bread(sb, o_blocks_count + add -1);
1018 if (!bh) { 1015 if (!bh) {
1019 ext4_warning(sb, __FUNCTION__, 1016 ext4_warning(sb, __func__,
1020 "can't read last block, resize aborted"); 1017 "can't read last block, resize aborted");
1021 return -ENOSPC; 1018 return -ENOSPC;
1022 } 1019 }
@@ -1028,13 +1025,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1028 handle = ext4_journal_start_sb(sb, 3); 1025 handle = ext4_journal_start_sb(sb, 3);
1029 if (IS_ERR(handle)) { 1026 if (IS_ERR(handle)) {
1030 err = PTR_ERR(handle); 1027 err = PTR_ERR(handle);
1031 ext4_warning(sb, __FUNCTION__, "error %d on journal start",err); 1028 ext4_warning(sb, __func__, "error %d on journal start", err);
1032 goto exit_put; 1029 goto exit_put;
1033 } 1030 }
1034 1031
1035 lock_super(sb); 1032 lock_super(sb);
1036 if (o_blocks_count != ext4_blocks_count(es)) { 1033 if (o_blocks_count != ext4_blocks_count(es)) {
1037 ext4_warning(sb, __FUNCTION__, 1034 ext4_warning(sb, __func__,
1038 "multiple resizers run on filesystem!"); 1035 "multiple resizers run on filesystem!");
1039 unlock_super(sb); 1036 unlock_super(sb);
1040 ext4_journal_stop(handle); 1037 ext4_journal_stop(handle);
@@ -1044,7 +1041,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1044 1041
1045 if ((err = ext4_journal_get_write_access(handle, 1042 if ((err = ext4_journal_get_write_access(handle,
1046 EXT4_SB(sb)->s_sbh))) { 1043 EXT4_SB(sb)->s_sbh))) {
1047 ext4_warning(sb, __FUNCTION__, 1044 ext4_warning(sb, __func__,
1048 "error %d on journal write access", err); 1045 "error %d on journal write access", err);
1049 unlock_super(sb); 1046 unlock_super(sb);
1050 ext4_journal_stop(handle); 1047 ext4_journal_stop(handle);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c81a8e759bad..09d9359c8055 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -21,8 +21,6 @@
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/time.h> 22#include <linux/time.h>
23#include <linux/jbd2.h> 23#include <linux/jbd2.h>
24#include <linux/ext4_fs.h>
25#include <linux/ext4_jbd2.h>
26#include <linux/slab.h> 24#include <linux/slab.h>
27#include <linux/init.h> 25#include <linux/init.h>
28#include <linux/blkdev.h> 26#include <linux/blkdev.h>
@@ -38,9 +36,10 @@
38#include <linux/seq_file.h> 36#include <linux/seq_file.h>
39#include <linux/log2.h> 37#include <linux/log2.h>
40#include <linux/crc16.h> 38#include <linux/crc16.h>
41
42#include <asm/uaccess.h> 39#include <asm/uaccess.h>
43 40
41#include "ext4.h"
42#include "ext4_jbd2.h"
44#include "xattr.h" 43#include "xattr.h"
45#include "acl.h" 44#include "acl.h"
46#include "namei.h" 45#include "namei.h"
@@ -135,7 +134,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
135 * take the FS itself readonly cleanly. */ 134 * take the FS itself readonly cleanly. */
136 journal = EXT4_SB(sb)->s_journal; 135 journal = EXT4_SB(sb)->s_journal;
137 if (is_journal_aborted(journal)) { 136 if (is_journal_aborted(journal)) {
138 ext4_abort(sb, __FUNCTION__, 137 ext4_abort(sb, __func__,
139 "Detected aborted journal"); 138 "Detected aborted journal");
140 return ERR_PTR(-EROFS); 139 return ERR_PTR(-EROFS);
141 } 140 }
@@ -355,7 +354,7 @@ void ext4_update_dynamic_rev(struct super_block *sb)
355 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 354 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
356 return; 355 return;
357 356
358 ext4_warning(sb, __FUNCTION__, 357 ext4_warning(sb, __func__,
359 "updating to rev %d because of new feature flag, " 358 "updating to rev %d because of new feature flag, "
360 "running e2fsck is recommended", 359 "running e2fsck is recommended",
361 EXT4_DYNAMIC_REV); 360 EXT4_DYNAMIC_REV);
@@ -945,8 +944,8 @@ static match_table_t tokens = {
945 {Opt_mballoc, "mballoc"}, 944 {Opt_mballoc, "mballoc"},
946 {Opt_nomballoc, "nomballoc"}, 945 {Opt_nomballoc, "nomballoc"},
947 {Opt_stripe, "stripe=%u"}, 946 {Opt_stripe, "stripe=%u"},
948 {Opt_err, NULL},
949 {Opt_resize, "resize"}, 947 {Opt_resize, "resize"},
948 {Opt_err, NULL},
950}; 949};
951 950
952static ext4_fsblk_t get_sb_block(void **data) 951static ext4_fsblk_t get_sb_block(void **data)
@@ -980,7 +979,7 @@ static int parse_options (char *options, struct super_block *sb,
980 int data_opt = 0; 979 int data_opt = 0;
981 int option; 980 int option;
982#ifdef CONFIG_QUOTA 981#ifdef CONFIG_QUOTA
983 int qtype; 982 int qtype, qfmt;
984 char *qname; 983 char *qname;
985#endif 984#endif
986 985
@@ -1163,9 +1162,11 @@ static int parse_options (char *options, struct super_block *sb,
1163 case Opt_grpjquota: 1162 case Opt_grpjquota:
1164 qtype = GRPQUOTA; 1163 qtype = GRPQUOTA;
1165set_qf_name: 1164set_qf_name:
1166 if (sb_any_quota_enabled(sb)) { 1165 if ((sb_any_quota_enabled(sb) ||
1166 sb_any_quota_suspended(sb)) &&
1167 !sbi->s_qf_names[qtype]) {
1167 printk(KERN_ERR 1168 printk(KERN_ERR
1168 "EXT4-fs: Cannot change journalled " 1169 "EXT4-fs: Cannot change journaled "
1169 "quota options when quota turned on.\n"); 1170 "quota options when quota turned on.\n");
1170 return 0; 1171 return 0;
1171 } 1172 }
@@ -1201,9 +1202,11 @@ set_qf_name:
1201 case Opt_offgrpjquota: 1202 case Opt_offgrpjquota:
1202 qtype = GRPQUOTA; 1203 qtype = GRPQUOTA;
1203clear_qf_name: 1204clear_qf_name:
1204 if (sb_any_quota_enabled(sb)) { 1205 if ((sb_any_quota_enabled(sb) ||
1206 sb_any_quota_suspended(sb)) &&
1207 sbi->s_qf_names[qtype]) {
1205 printk(KERN_ERR "EXT4-fs: Cannot change " 1208 printk(KERN_ERR "EXT4-fs: Cannot change "
1206 "journalled quota options when " 1209 "journaled quota options when "
1207 "quota turned on.\n"); 1210 "quota turned on.\n");
1208 return 0; 1211 return 0;
1209 } 1212 }
@@ -1214,10 +1217,20 @@ clear_qf_name:
1214 sbi->s_qf_names[qtype] = NULL; 1217 sbi->s_qf_names[qtype] = NULL;
1215 break; 1218 break;
1216 case Opt_jqfmt_vfsold: 1219 case Opt_jqfmt_vfsold:
1217 sbi->s_jquota_fmt = QFMT_VFS_OLD; 1220 qfmt = QFMT_VFS_OLD;
1218 break; 1221 goto set_qf_format;
1219 case Opt_jqfmt_vfsv0: 1222 case Opt_jqfmt_vfsv0:
1220 sbi->s_jquota_fmt = QFMT_VFS_V0; 1223 qfmt = QFMT_VFS_V0;
1224set_qf_format:
1225 if ((sb_any_quota_enabled(sb) ||
1226 sb_any_quota_suspended(sb)) &&
1227 sbi->s_jquota_fmt != qfmt) {
1228 printk(KERN_ERR "EXT4-fs: Cannot change "
1229 "journaled quota options when "
1230 "quota turned on.\n");
1231 return 0;
1232 }
1233 sbi->s_jquota_fmt = qfmt;
1221 break; 1234 break;
1222 case Opt_quota: 1235 case Opt_quota:
1223 case Opt_usrquota: 1236 case Opt_usrquota:
@@ -1242,6 +1255,9 @@ clear_qf_name:
1242 case Opt_quota: 1255 case Opt_quota:
1243 case Opt_usrquota: 1256 case Opt_usrquota:
1244 case Opt_grpquota: 1257 case Opt_grpquota:
1258 printk(KERN_ERR
1259 "EXT4-fs: quota options not supported.\n");
1260 break;
1245 case Opt_usrjquota: 1261 case Opt_usrjquota:
1246 case Opt_grpjquota: 1262 case Opt_grpjquota:
1247 case Opt_offusrjquota: 1263 case Opt_offusrjquota:
@@ -1249,7 +1265,7 @@ clear_qf_name:
1249 case Opt_jqfmt_vfsold: 1265 case Opt_jqfmt_vfsold:
1250 case Opt_jqfmt_vfsv0: 1266 case Opt_jqfmt_vfsv0:
1251 printk(KERN_ERR 1267 printk(KERN_ERR
1252 "EXT4-fs: journalled quota options not " 1268 "EXT4-fs: journaled quota options not "
1253 "supported.\n"); 1269 "supported.\n");
1254 break; 1270 break;
1255 case Opt_noquota: 1271 case Opt_noquota:
@@ -1334,14 +1350,14 @@ clear_qf_name:
1334 } 1350 }
1335 1351
1336 if (!sbi->s_jquota_fmt) { 1352 if (!sbi->s_jquota_fmt) {
1337 printk(KERN_ERR "EXT4-fs: journalled quota format " 1353 printk(KERN_ERR "EXT4-fs: journaled quota format "
1338 "not specified.\n"); 1354 "not specified.\n");
1339 return 0; 1355 return 0;
1340 } 1356 }
1341 } else { 1357 } else {
1342 if (sbi->s_jquota_fmt) { 1358 if (sbi->s_jquota_fmt) {
1343 printk(KERN_ERR "EXT4-fs: journalled quota format " 1359 printk(KERN_ERR "EXT4-fs: journaled quota format "
1344 "specified with no journalling " 1360 "specified with no journaling "
1345 "enabled.\n"); 1361 "enabled.\n");
1346 return 0; 1362 return 0;
1347 } 1363 }
@@ -1388,11 +1404,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1388 * a plain journaled filesystem we can keep it set as 1404 * a plain journaled filesystem we can keep it set as
1389 * valid forever! :) 1405 * valid forever! :)
1390 */ 1406 */
1391 es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT4_VALID_FS); 1407 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1392#endif 1408#endif
1393 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1409 if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1394 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1410 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
1395 es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1); 1411 le16_add_cpu(&es->s_mnt_count, 1);
1396 es->s_mtime = cpu_to_le32(get_seconds()); 1412 es->s_mtime = cpu_to_le32(get_seconds());
1397 ext4_update_dynamic_rev(sb); 1413 ext4_update_dynamic_rev(sb);
1398 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1414 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -1485,36 +1501,33 @@ static int ext4_check_descriptors(struct super_block *sb)
1485 block_bitmap = ext4_block_bitmap(sb, gdp); 1501 block_bitmap = ext4_block_bitmap(sb, gdp);
1486 if (block_bitmap < first_block || block_bitmap > last_block) 1502 if (block_bitmap < first_block || block_bitmap > last_block)
1487 { 1503 {
1488 ext4_error (sb, "ext4_check_descriptors", 1504 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1489 "Block bitmap for group %lu" 1505 "Block bitmap for group %lu not in group "
1490 " not in group (block %llu)!", 1506 "(block %llu)!", i, block_bitmap);
1491 i, block_bitmap);
1492 return 0; 1507 return 0;
1493 } 1508 }
1494 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1509 inode_bitmap = ext4_inode_bitmap(sb, gdp);
1495 if (inode_bitmap < first_block || inode_bitmap > last_block) 1510 if (inode_bitmap < first_block || inode_bitmap > last_block)
1496 { 1511 {
1497 ext4_error (sb, "ext4_check_descriptors", 1512 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1498 "Inode bitmap for group %lu" 1513 "Inode bitmap for group %lu not in group "
1499 " not in group (block %llu)!", 1514 "(block %llu)!", i, inode_bitmap);
1500 i, inode_bitmap);
1501 return 0; 1515 return 0;
1502 } 1516 }
1503 inode_table = ext4_inode_table(sb, gdp); 1517 inode_table = ext4_inode_table(sb, gdp);
1504 if (inode_table < first_block || 1518 if (inode_table < first_block ||
1505 inode_table + sbi->s_itb_per_group - 1 > last_block) 1519 inode_table + sbi->s_itb_per_group - 1 > last_block)
1506 { 1520 {
1507 ext4_error (sb, "ext4_check_descriptors", 1521 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1508 "Inode table for group %lu" 1522 "Inode table for group %lu not in group "
1509 " not in group (block %llu)!", 1523 "(block %llu)!", i, inode_table);
1510 i, inode_table);
1511 return 0; 1524 return 0;
1512 } 1525 }
1513 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1526 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1514 ext4_error(sb, __FUNCTION__, 1527 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1515 "Checksum for group %lu failed (%u!=%u)\n", 1528 "Checksum for group %lu failed (%u!=%u)\n",
1516 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1529 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1517 gdp)), le16_to_cpu(gdp->bg_checksum)); 1530 gdp)), le16_to_cpu(gdp->bg_checksum));
1518 return 0; 1531 return 0;
1519 } 1532 }
1520 if (!flexbg_flag) 1533 if (!flexbg_flag)
@@ -1585,7 +1598,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1585 int ret = ext4_quota_on_mount(sb, i); 1598 int ret = ext4_quota_on_mount(sb, i);
1586 if (ret < 0) 1599 if (ret < 0)
1587 printk(KERN_ERR 1600 printk(KERN_ERR
1588 "EXT4-fs: Cannot turn on journalled " 1601 "EXT4-fs: Cannot turn on journaled "
1589 "quota: error %d\n", ret); 1602 "quota: error %d\n", ret);
1590 } 1603 }
1591 } 1604 }
@@ -1594,8 +1607,8 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1594 while (es->s_last_orphan) { 1607 while (es->s_last_orphan) {
1595 struct inode *inode; 1608 struct inode *inode;
1596 1609
1597 if (!(inode = 1610 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
1598 ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) { 1611 if (IS_ERR(inode)) {
1599 es->s_last_orphan = 0; 1612 es->s_last_orphan = 0;
1600 break; 1613 break;
1601 } 1614 }
@@ -1605,7 +1618,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1605 if (inode->i_nlink) { 1618 if (inode->i_nlink) {
1606 printk(KERN_DEBUG 1619 printk(KERN_DEBUG
1607 "%s: truncating inode %lu to %Ld bytes\n", 1620 "%s: truncating inode %lu to %Ld bytes\n",
1608 __FUNCTION__, inode->i_ino, inode->i_size); 1621 __func__, inode->i_ino, inode->i_size);
1609 jbd_debug(2, "truncating inode %lu to %Ld bytes\n", 1622 jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
1610 inode->i_ino, inode->i_size); 1623 inode->i_ino, inode->i_size);
1611 ext4_truncate(inode); 1624 ext4_truncate(inode);
@@ -1613,7 +1626,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1613 } else { 1626 } else {
1614 printk(KERN_DEBUG 1627 printk(KERN_DEBUG
1615 "%s: deleting unreferenced inode %lu\n", 1628 "%s: deleting unreferenced inode %lu\n",
1616 __FUNCTION__, inode->i_ino); 1629 __func__, inode->i_ino);
1617 jbd_debug(2, "deleting unreferenced inode %lu\n", 1630 jbd_debug(2, "deleting unreferenced inode %lu\n",
1618 inode->i_ino); 1631 inode->i_ino);
1619 nr_orphans++; 1632 nr_orphans++;
@@ -2699,9 +2712,9 @@ static void ext4_clear_journal_err(struct super_block * sb,
2699 char nbuf[16]; 2712 char nbuf[16];
2700 2713
2701 errstr = ext4_decode_error(sb, j_errno, nbuf); 2714 errstr = ext4_decode_error(sb, j_errno, nbuf);
2702 ext4_warning(sb, __FUNCTION__, "Filesystem error recorded " 2715 ext4_warning(sb, __func__, "Filesystem error recorded "
2703 "from previous mount: %s", errstr); 2716 "from previous mount: %s", errstr);
2704 ext4_warning(sb, __FUNCTION__, "Marking fs in need of " 2717 ext4_warning(sb, __func__, "Marking fs in need of "
2705 "filesystem check."); 2718 "filesystem check.");
2706 2719
2707 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2720 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
@@ -2828,7 +2841,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
2828 } 2841 }
2829 2842
2830 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) 2843 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
2831 ext4_abort(sb, __FUNCTION__, "Abort forced by user"); 2844 ext4_abort(sb, __func__, "Abort forced by user");
2832 2845
2833 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2846 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2834 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 2847 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
@@ -3040,8 +3053,14 @@ static int ext4_dquot_drop(struct inode *inode)
3040 3053
3041 /* We may delete quota structure so we need to reserve enough blocks */ 3054 /* We may delete quota structure so we need to reserve enough blocks */
3042 handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb)); 3055 handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb));
3043 if (IS_ERR(handle)) 3056 if (IS_ERR(handle)) {
3057 /*
3058 * We call dquot_drop() anyway to at least release references
3059 * to quota structures so that umount does not hang.
3060 */
3061 dquot_drop(inode);
3044 return PTR_ERR(handle); 3062 return PTR_ERR(handle);
3063 }
3045 ret = dquot_drop(inode); 3064 ret = dquot_drop(inode);
3046 err = ext4_journal_stop(handle); 3065 err = ext4_journal_stop(handle);
3047 if (!ret) 3066 if (!ret)
@@ -3104,7 +3123,7 @@ static int ext4_release_dquot(struct dquot *dquot)
3104 3123
3105static int ext4_mark_dquot_dirty(struct dquot *dquot) 3124static int ext4_mark_dquot_dirty(struct dquot *dquot)
3106{ 3125{
3107 /* Are we journalling quotas? */ 3126 /* Are we journaling quotas? */
3108 if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 3127 if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
3109 EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 3128 EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
3110 dquot_mark_dquot_dirty(dquot); 3129 dquot_mark_dquot_dirty(dquot);
@@ -3151,23 +3170,42 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3151 3170
3152 if (!test_opt(sb, QUOTA)) 3171 if (!test_opt(sb, QUOTA))
3153 return -EINVAL; 3172 return -EINVAL;
3154 /* Not journalling quota? */ 3173 /* When remounting, no checks are needed and in fact, path is NULL */
3155 if ((!EXT4_SB(sb)->s_qf_names[USRQUOTA] && 3174 if (remount)
3156 !EXT4_SB(sb)->s_qf_names[GRPQUOTA]) || remount)
3157 return vfs_quota_on(sb, type, format_id, path, remount); 3175 return vfs_quota_on(sb, type, format_id, path, remount);
3176
3158 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 3177 err = path_lookup(path, LOOKUP_FOLLOW, &nd);
3159 if (err) 3178 if (err)
3160 return err; 3179 return err;
3180
3161 /* Quotafile not on the same filesystem? */ 3181 /* Quotafile not on the same filesystem? */
3162 if (nd.path.mnt->mnt_sb != sb) { 3182 if (nd.path.mnt->mnt_sb != sb) {
3163 path_put(&nd.path); 3183 path_put(&nd.path);
3164 return -EXDEV; 3184 return -EXDEV;
3165 } 3185 }
3166 /* Quotafile not of fs root? */ 3186 /* Journaling quota? */
3167 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) 3187 if (EXT4_SB(sb)->s_qf_names[type]) {
3168 printk(KERN_WARNING 3188 /* Quotafile not of fs root? */
3169 "EXT4-fs: Quota file not on filesystem root. " 3189 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
3170 "Journalled quota will not work.\n"); 3190 printk(KERN_WARNING
3191 "EXT4-fs: Quota file not on filesystem root. "
3192 "Journaled quota will not work.\n");
3193 }
3194
3195 /*
3196 * When we journal data on quota file, we have to flush journal to see
3197 * all updates to the file when we bypass pagecache...
3198 */
3199 if (ext4_should_journal_data(nd.path.dentry->d_inode)) {
3200 /*
3201 * We don't need to lock updates but journal_flush() could
3202 * otherwise be livelocked...
3203 */
3204 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
3205 jbd2_journal_flush(EXT4_SB(sb)->s_journal);
3206 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3207 }
3208
3171 path_put(&nd.path); 3209 path_put(&nd.path);
3172 return vfs_quota_on(sb, type, format_id, path, remount); 3210 return vfs_quota_on(sb, type, format_id, path, remount);
3173} 3211}
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index e6f9da4287c4..e9178643dc01 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -19,8 +19,8 @@
19 19
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/jbd2.h> 21#include <linux/jbd2.h>
22#include <linux/ext4_fs.h>
23#include <linux/namei.h> 22#include <linux/namei.h>
23#include "ext4.h"
24#include "xattr.h" 24#include "xattr.h"
25 25
26static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd) 26static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index e9054c1c7d93..ff08633f398e 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -53,11 +53,11 @@
53#include <linux/init.h> 53#include <linux/init.h>
54#include <linux/fs.h> 54#include <linux/fs.h>
55#include <linux/slab.h> 55#include <linux/slab.h>
56#include <linux/ext4_jbd2.h>
57#include <linux/ext4_fs.h>
58#include <linux/mbcache.h> 56#include <linux/mbcache.h>
59#include <linux/quotaops.h> 57#include <linux/quotaops.h>
60#include <linux/rwsem.h> 58#include <linux/rwsem.h>
59#include "ext4_jbd2.h"
60#include "ext4.h"
61#include "xattr.h" 61#include "xattr.h"
62#include "acl.h" 62#include "acl.h"
63 63
@@ -92,6 +92,8 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *,
92 struct mb_cache_entry **); 92 struct mb_cache_entry **);
93static void ext4_xattr_rehash(struct ext4_xattr_header *, 93static void ext4_xattr_rehash(struct ext4_xattr_header *,
94 struct ext4_xattr_entry *); 94 struct ext4_xattr_entry *);
95static int ext4_xattr_list(struct inode *inode, char *buffer,
96 size_t buffer_size);
95 97
96static struct mb_cache *ext4_xattr_cache; 98static struct mb_cache *ext4_xattr_cache;
97 99
@@ -225,7 +227,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
225 ea_bdebug(bh, "b_count=%d, refcount=%d", 227 ea_bdebug(bh, "b_count=%d, refcount=%d",
226 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 228 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
227 if (ext4_xattr_check_block(bh)) { 229 if (ext4_xattr_check_block(bh)) {
228bad_block: ext4_error(inode->i_sb, __FUNCTION__, 230bad_block: ext4_error(inode->i_sb, __func__,
229 "inode %lu: bad block %llu", inode->i_ino, 231 "inode %lu: bad block %llu", inode->i_ino,
230 EXT4_I(inode)->i_file_acl); 232 EXT4_I(inode)->i_file_acl);
231 error = -EIO; 233 error = -EIO;
@@ -367,7 +369,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
367 ea_bdebug(bh, "b_count=%d, refcount=%d", 369 ea_bdebug(bh, "b_count=%d, refcount=%d",
368 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 370 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
369 if (ext4_xattr_check_block(bh)) { 371 if (ext4_xattr_check_block(bh)) {
370 ext4_error(inode->i_sb, __FUNCTION__, 372 ext4_error(inode->i_sb, __func__,
371 "inode %lu: bad block %llu", inode->i_ino, 373 "inode %lu: bad block %llu", inode->i_ino,
372 EXT4_I(inode)->i_file_acl); 374 EXT4_I(inode)->i_file_acl);
373 error = -EIO; 375 error = -EIO;
@@ -420,7 +422,7 @@ cleanup:
420 * Returns a negative error number on failure, or the number of bytes 422 * Returns a negative error number on failure, or the number of bytes
421 * used / required on success. 423 * used / required on success.
422 */ 424 */
423int 425static int
424ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) 426ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
425{ 427{
426 int i_error, b_error; 428 int i_error, b_error;
@@ -484,8 +486,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
484 get_bh(bh); 486 get_bh(bh);
485 ext4_forget(handle, 1, inode, bh, bh->b_blocknr); 487 ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
486 } else { 488 } else {
487 BHDR(bh)->h_refcount = cpu_to_le32( 489 le32_add_cpu(&BHDR(bh)->h_refcount, -1);
488 le32_to_cpu(BHDR(bh)->h_refcount) - 1);
489 error = ext4_journal_dirty_metadata(handle, bh); 490 error = ext4_journal_dirty_metadata(handle, bh);
490 if (IS_SYNC(inode)) 491 if (IS_SYNC(inode))
491 handle->h_sync = 1; 492 handle->h_sync = 1;
@@ -660,7 +661,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
660 atomic_read(&(bs->bh->b_count)), 661 atomic_read(&(bs->bh->b_count)),
661 le32_to_cpu(BHDR(bs->bh)->h_refcount)); 662 le32_to_cpu(BHDR(bs->bh)->h_refcount));
662 if (ext4_xattr_check_block(bs->bh)) { 663 if (ext4_xattr_check_block(bs->bh)) {
663 ext4_error(sb, __FUNCTION__, 664 ext4_error(sb, __func__,
664 "inode %lu: bad block %llu", inode->i_ino, 665 "inode %lu: bad block %llu", inode->i_ino,
665 EXT4_I(inode)->i_file_acl); 666 EXT4_I(inode)->i_file_acl);
666 error = -EIO; 667 error = -EIO;
@@ -738,7 +739,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
738 ce = NULL; 739 ce = NULL;
739 } 740 }
740 ea_bdebug(bs->bh, "cloning"); 741 ea_bdebug(bs->bh, "cloning");
741 s->base = kmalloc(bs->bh->b_size, GFP_KERNEL); 742 s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
742 error = -ENOMEM; 743 error = -ENOMEM;
743 if (s->base == NULL) 744 if (s->base == NULL)
744 goto cleanup; 745 goto cleanup;
@@ -750,7 +751,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
750 } 751 }
751 } else { 752 } else {
752 /* Allocate a buffer where we construct the new block. */ 753 /* Allocate a buffer where we construct the new block. */
753 s->base = kzalloc(sb->s_blocksize, GFP_KERNEL); 754 s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
754 /* assert(header == s->base) */ 755 /* assert(header == s->base) */
755 error = -ENOMEM; 756 error = -ENOMEM;
756 if (s->base == NULL) 757 if (s->base == NULL)
@@ -789,8 +790,7 @@ inserted:
789 if (error) 790 if (error)
790 goto cleanup_dquot; 791 goto cleanup_dquot;
791 lock_buffer(new_bh); 792 lock_buffer(new_bh);
792 BHDR(new_bh)->h_refcount = cpu_to_le32(1 + 793 le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
793 le32_to_cpu(BHDR(new_bh)->h_refcount));
794 ea_bdebug(new_bh, "reusing; refcount now=%d", 794 ea_bdebug(new_bh, "reusing; refcount now=%d",
795 le32_to_cpu(BHDR(new_bh)->h_refcount)); 795 le32_to_cpu(BHDR(new_bh)->h_refcount));
796 unlock_buffer(new_bh); 796 unlock_buffer(new_bh);
@@ -808,10 +808,8 @@ inserted:
808 get_bh(new_bh); 808 get_bh(new_bh);
809 } else { 809 } else {
810 /* We need to allocate a new block */ 810 /* We need to allocate a new block */
811 ext4_fsblk_t goal = le32_to_cpu( 811 ext4_fsblk_t goal = ext4_group_first_block_no(sb,
812 EXT4_SB(sb)->s_es->s_first_data_block) + 812 EXT4_I(inode)->i_block_group);
813 (ext4_fsblk_t)EXT4_I(inode)->i_block_group *
814 EXT4_BLOCKS_PER_GROUP(sb);
815 ext4_fsblk_t block = ext4_new_block(handle, inode, 813 ext4_fsblk_t block = ext4_new_block(handle, inode,
816 goal, &error); 814 goal, &error);
817 if (error) 815 if (error)
@@ -863,7 +861,7 @@ cleanup_dquot:
863 goto cleanup; 861 goto cleanup;
864 862
865bad_block: 863bad_block:
866 ext4_error(inode->i_sb, __FUNCTION__, 864 ext4_error(inode->i_sb, __func__,
867 "inode %lu: bad block %llu", inode->i_ino, 865 "inode %lu: bad block %llu", inode->i_ino,
868 EXT4_I(inode)->i_file_acl); 866 EXT4_I(inode)->i_file_acl);
869 goto cleanup; 867 goto cleanup;
@@ -1011,6 +1009,11 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1011 i.value = NULL; 1009 i.value = NULL;
1012 error = ext4_xattr_block_set(handle, inode, &i, &bs); 1010 error = ext4_xattr_block_set(handle, inode, &i, &bs);
1013 } else if (error == -ENOSPC) { 1011 } else if (error == -ENOSPC) {
1012 if (EXT4_I(inode)->i_file_acl && !bs.s.base) {
1013 error = ext4_xattr_block_find(inode, &i, &bs);
1014 if (error)
1015 goto cleanup;
1016 }
1014 error = ext4_xattr_block_set(handle, inode, &i, &bs); 1017 error = ext4_xattr_block_set(handle, inode, &i, &bs);
1015 if (error) 1018 if (error)
1016 goto cleanup; 1019 goto cleanup;
@@ -1166,7 +1169,7 @@ retry:
1166 if (!bh) 1169 if (!bh)
1167 goto cleanup; 1170 goto cleanup;
1168 if (ext4_xattr_check_block(bh)) { 1171 if (ext4_xattr_check_block(bh)) {
1169 ext4_error(inode->i_sb, __FUNCTION__, 1172 ext4_error(inode->i_sb, __func__,
1170 "inode %lu: bad block %llu", inode->i_ino, 1173 "inode %lu: bad block %llu", inode->i_ino,
1171 EXT4_I(inode)->i_file_acl); 1174 EXT4_I(inode)->i_file_acl);
1172 error = -EIO; 1175 error = -EIO;
@@ -1341,14 +1344,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
1341 goto cleanup; 1344 goto cleanup;
1342 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); 1345 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
1343 if (!bh) { 1346 if (!bh) {
1344 ext4_error(inode->i_sb, __FUNCTION__, 1347 ext4_error(inode->i_sb, __func__,
1345 "inode %lu: block %llu read error", inode->i_ino, 1348 "inode %lu: block %llu read error", inode->i_ino,
1346 EXT4_I(inode)->i_file_acl); 1349 EXT4_I(inode)->i_file_acl);
1347 goto cleanup; 1350 goto cleanup;
1348 } 1351 }
1349 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || 1352 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
1350 BHDR(bh)->h_blocks != cpu_to_le32(1)) { 1353 BHDR(bh)->h_blocks != cpu_to_le32(1)) {
1351 ext4_error(inode->i_sb, __FUNCTION__, 1354 ext4_error(inode->i_sb, __func__,
1352 "inode %lu: bad block %llu", inode->i_ino, 1355 "inode %lu: bad block %llu", inode->i_ino,
1353 EXT4_I(inode)->i_file_acl); 1356 EXT4_I(inode)->i_file_acl);
1354 goto cleanup; 1357 goto cleanup;
@@ -1475,7 +1478,7 @@ again:
1475 } 1478 }
1476 bh = sb_bread(inode->i_sb, ce->e_block); 1479 bh = sb_bread(inode->i_sb, ce->e_block);
1477 if (!bh) { 1480 if (!bh) {
1478 ext4_error(inode->i_sb, __FUNCTION__, 1481 ext4_error(inode->i_sb, __func__,
1479 "inode %lu: block %lu read error", 1482 "inode %lu: block %lu read error",
1480 inode->i_ino, (unsigned long) ce->e_block); 1483 inode->i_ino, (unsigned long) ce->e_block);
1481 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= 1484 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index d7f5d6a12651..5992fe979bb9 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -74,7 +74,6 @@ extern struct xattr_handler ext4_xattr_security_handler;
74extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); 74extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
75 75
76extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); 76extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
77extern int ext4_xattr_list(struct inode *, char *, size_t);
78extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int); 77extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
79extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); 78extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
80 79
@@ -99,12 +98,6 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
99} 98}
100 99
101static inline int 100static inline int
102ext4_xattr_list(struct inode *inode, void *buffer, size_t size)
103{
104 return -EOPNOTSUPP;
105}
106
107static inline int
108ext4_xattr_set(struct inode *inode, int name_index, const char *name, 101ext4_xattr_set(struct inode *inode, int name_index, const char *name,
109 const void *value, size_t size, int flags) 102 const void *value, size_t size, int flags)
110{ 103{
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index f17eaf2321b9..ca5f89fc6cae 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -6,9 +6,9 @@
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/string.h> 7#include <linux/string.h>
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/ext4_jbd2.h>
10#include <linux/ext4_fs.h>
11#include <linux/security.h> 9#include <linux/security.h>
10#include "ext4_jbd2.h"
11#include "ext4.h"
12#include "xattr.h" 12#include "xattr.h"
13 13
14static size_t 14static size_t
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index e0f05acdafec..fff33382cadc 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -9,8 +9,8 @@
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/capability.h> 10#include <linux/capability.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/ext4_jbd2.h> 12#include "ext4_jbd2.h"
13#include <linux/ext4_fs.h> 13#include "ext4.h"
14#include "xattr.h" 14#include "xattr.h"
15 15
16#define XATTR_TRUSTED_PREFIX "trusted." 16#define XATTR_TRUSTED_PREFIX "trusted."
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index 7ed3d8ebf096..67be723fcc4e 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -8,8 +8,8 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/ext4_jbd2.h> 11#include "ext4_jbd2.h"
12#include <linux/ext4_fs.h> 12#include "ext4.h"
13#include "xattr.h" 13#include "xattr.h"
14 14
15#define XATTR_USER_PREFIX "user." 15#define XATTR_USER_PREFIX "user."
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 639b3b4f86d1..fda25479af26 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -242,7 +242,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
242 /* prevent the infinite loop of cluster chain */ 242 /* prevent the infinite loop of cluster chain */
243 if (*fclus > limit) { 243 if (*fclus > limit) {
244 fat_fs_panic(sb, "%s: detected the cluster chain loop" 244 fat_fs_panic(sb, "%s: detected the cluster chain loop"
245 " (i_pos %lld)", __FUNCTION__, 245 " (i_pos %lld)", __func__,
246 MSDOS_I(inode)->i_pos); 246 MSDOS_I(inode)->i_pos);
247 nr = -EIO; 247 nr = -EIO;
248 goto out; 248 goto out;
@@ -253,7 +253,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
253 goto out; 253 goto out;
254 else if (nr == FAT_ENT_FREE) { 254 else if (nr == FAT_ENT_FREE) {
255 fat_fs_panic(sb, "%s: invalid cluster chain" 255 fat_fs_panic(sb, "%s: invalid cluster chain"
256 " (i_pos %lld)", __FUNCTION__, 256 " (i_pos %lld)", __func__,
257 MSDOS_I(inode)->i_pos); 257 MSDOS_I(inode)->i_pos);
258 nr = -EIO; 258 nr = -EIO;
259 goto out; 259 goto out;
@@ -286,7 +286,7 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)
286 return ret; 286 return ret;
287 else if (ret == FAT_ENT_EOF) { 287 else if (ret == FAT_ENT_EOF) {
288 fat_fs_panic(sb, "%s: request beyond EOF (i_pos %lld)", 288 fat_fs_panic(sb, "%s: request beyond EOF (i_pos %lld)",
289 __FUNCTION__, MSDOS_I(inode)->i_pos); 289 __func__, MSDOS_I(inode)->i_pos);
290 return -EIO; 290 return -EIO;
291 } 291 }
292 return dclus; 292 return dclus;
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 13ab763cc510..302e95c4af7e 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -546,7 +546,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
546 goto error; 546 goto error;
547 } else if (cluster == FAT_ENT_FREE) { 547 } else if (cluster == FAT_ENT_FREE) {
548 fat_fs_panic(sb, "%s: deleting FAT entry beyond EOF", 548 fat_fs_panic(sb, "%s: deleting FAT entry beyond EOF",
549 __FUNCTION__); 549 __func__);
550 err = -EIO; 550 err = -EIO;
551 goto error; 551 goto error;
552 } 552 }
diff --git a/fs/fat/file.c b/fs/fat/file.c
index d604bb132422..27cc1164ec36 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -208,7 +208,7 @@ static int fat_free(struct inode *inode, int skip)
208 } else if (ret == FAT_ENT_FREE) { 208 } else if (ret == FAT_ENT_FREE) {
209 fat_fs_panic(sb, 209 fat_fs_panic(sb,
210 "%s: invalid cluster chain (i_pos %lld)", 210 "%s: invalid cluster chain (i_pos %lld)",
211 __FUNCTION__, MSDOS_I(inode)->i_pos); 211 __func__, MSDOS_I(inode)->i_pos);
212 ret = -EIO; 212 ret = -EIO;
213 } else if (ret > 0) { 213 } else if (ret > 0) {
214 err = fat_ent_write(inode, &fatent, FAT_ENT_EOF, wait); 214 err = fat_ent_write(inode, &fatent, FAT_ENT_EOF, wait);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 5f522a55b596..4e0a3dd9d677 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1222,8 +1222,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1222 brelse(bh); 1222 brelse(bh);
1223 goto out_invalid; 1223 goto out_invalid;
1224 } 1224 }
1225 logical_sector_size = 1225 logical_sector_size = get_unaligned_le16(&b->sector_size);
1226 le16_to_cpu(get_unaligned((__le16 *)&b->sector_size));
1227 if (!is_power_of_2(logical_sector_size) 1226 if (!is_power_of_2(logical_sector_size)
1228 || (logical_sector_size < 512) 1227 || (logical_sector_size < 512)
1229 || (logical_sector_size > 4096)) { 1228 || (logical_sector_size > 4096)) {
@@ -1322,8 +1321,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1322 sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1; 1321 sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1;
1323 1322
1324 sbi->dir_start = sbi->fat_start + sbi->fats * sbi->fat_length; 1323 sbi->dir_start = sbi->fat_start + sbi->fats * sbi->fat_length;
1325 sbi->dir_entries = 1324 sbi->dir_entries = get_unaligned_le16(&b->dir_entries);
1326 le16_to_cpu(get_unaligned((__le16 *)&b->dir_entries));
1327 if (sbi->dir_entries & (sbi->dir_per_block - 1)) { 1325 if (sbi->dir_entries & (sbi->dir_per_block - 1)) {
1328 if (!silent) 1326 if (!silent)
1329 printk(KERN_ERR "FAT: bogus directroy-entries per block" 1327 printk(KERN_ERR "FAT: bogus directroy-entries per block"
@@ -1335,7 +1333,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1335 rootdir_sectors = sbi->dir_entries 1333 rootdir_sectors = sbi->dir_entries
1336 * sizeof(struct msdos_dir_entry) / sb->s_blocksize; 1334 * sizeof(struct msdos_dir_entry) / sb->s_blocksize;
1337 sbi->data_start = sbi->dir_start + rootdir_sectors; 1335 sbi->data_start = sbi->dir_start + rootdir_sectors;
1338 total_sectors = le16_to_cpu(get_unaligned((__le16 *)&b->sectors)); 1336 total_sectors = get_unaligned_le16(&b->sectors);
1339 if (total_sectors == 0) 1337 if (total_sectors == 0)
1340 total_sectors = le32_to_cpu(b->total_sect); 1338 total_sectors = le32_to_cpu(b->total_sect);
1341 1339
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 3f3ac630ccde..bfd776509a72 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -9,6 +9,7 @@
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/file.h> 11#include <linux/file.h>
12#include <linux/fdtable.h>
12#include <linux/capability.h> 13#include <linux/capability.h>
13#include <linux/dnotify.h> 14#include <linux/dnotify.h>
14#include <linux/smp_lock.h> 15#include <linux/smp_lock.h>
diff --git a/fs/file.c b/fs/file.c
index 5110acb1c9ef..4c6f0ea12c41 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -12,6 +12,7 @@
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/vmalloc.h> 13#include <linux/vmalloc.h>
14#include <linux/file.h> 14#include <linux/file.h>
15#include <linux/fdtable.h>
15#include <linux/bitops.h> 16#include <linux/bitops.h>
16#include <linux/interrupt.h> 17#include <linux/interrupt.h>
17#include <linux/spinlock.h> 18#include <linux/spinlock.h>
@@ -149,8 +150,16 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
149 nr /= (1024 / sizeof(struct file *)); 150 nr /= (1024 / sizeof(struct file *));
150 nr = roundup_pow_of_two(nr + 1); 151 nr = roundup_pow_of_two(nr + 1);
151 nr *= (1024 / sizeof(struct file *)); 152 nr *= (1024 / sizeof(struct file *));
152 if (nr > sysctl_nr_open) 153 /*
153 nr = sysctl_nr_open; 154 * Note that this can drive nr *below* what we had passed if sysctl_nr_open
155 * had been set lower between the check in expand_files() and here. Deal
156 * with that in caller, it's cheaper that way.
157 *
158 * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
159 * bitmaps handling below becomes unpleasant, to put it mildly...
160 */
161 if (unlikely(nr > sysctl_nr_open))
162 nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
154 163
155 fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL); 164 fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
156 if (!fdt) 165 if (!fdt)
@@ -199,6 +208,16 @@ static int expand_fdtable(struct files_struct *files, int nr)
199 if (!new_fdt) 208 if (!new_fdt)
200 return -ENOMEM; 209 return -ENOMEM;
201 /* 210 /*
211 * extremely unlikely race - sysctl_nr_open decreased between the check in
212 * caller and alloc_fdtable(). Cheaper to catch it here...
213 */
214 if (unlikely(new_fdt->max_fds <= nr)) {
215 free_fdarr(new_fdt);
216 free_fdset(new_fdt);
217 kfree(new_fdt);
218 return -EMFILE;
219 }
220 /*
202 * Check again since another task may have expanded the fd table while 221 * Check again since another task may have expanded the fd table while
203 * we dropped the lock 222 * we dropped the lock
204 */ 223 */
diff --git a/fs/file_table.c b/fs/file_table.c
index 7a0a9b872251..83084225b4c3 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -8,6 +8,7 @@
8#include <linux/string.h> 8#include <linux/string.h>
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/file.h> 10#include <linux/file.h>
11#include <linux/fdtable.h>
11#include <linux/init.h> 12#include <linux/init.h>
12#include <linux/module.h> 13#include <linux/module.h>
13#include <linux/fs.h> 14#include <linux/fs.h>
diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h
index 2b46064f66b2..50ab5eecb99b 100644
--- a/fs/freevxfs/vxfs_extern.h
+++ b/fs/freevxfs/vxfs_extern.h
@@ -50,7 +50,11 @@ extern daddr_t vxfs_bmap1(struct inode *, long);
50/* vxfs_fshead.c */ 50/* vxfs_fshead.c */
51extern int vxfs_read_fshead(struct super_block *); 51extern int vxfs_read_fshead(struct super_block *);
52 52
53/* vxfs_immed.c */
54extern const struct inode_operations vxfs_immed_symlink_iops;
55
53/* vxfs_inode.c */ 56/* vxfs_inode.c */
57extern const struct address_space_operations vxfs_immed_aops;
54extern struct kmem_cache *vxfs_inode_cachep; 58extern struct kmem_cache *vxfs_inode_cachep;
55extern void vxfs_dumpi(struct vxfs_inode_info *, ino_t); 59extern void vxfs_dumpi(struct vxfs_inode_info *, ino_t);
56extern struct inode * vxfs_get_fake_inode(struct super_block *, 60extern struct inode * vxfs_get_fake_inode(struct super_block *,
@@ -69,6 +73,7 @@ extern const struct file_operations vxfs_dir_operations;
69extern int vxfs_read_olt(struct super_block *, u_long); 73extern int vxfs_read_olt(struct super_block *, u_long);
70 74
71/* vxfs_subr.c */ 75/* vxfs_subr.c */
76extern const struct address_space_operations vxfs_aops;
72extern struct page * vxfs_get_page(struct address_space *, u_long); 77extern struct page * vxfs_get_page(struct address_space *, u_long);
73extern void vxfs_put_page(struct page *); 78extern void vxfs_put_page(struct page *);
74extern struct buffer_head * vxfs_bread(struct inode *, int); 79extern struct buffer_head * vxfs_bread(struct inode *, int);
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index 8a5959a61ba9..c36aeaf92e41 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -35,6 +35,7 @@
35#include <linux/namei.h> 35#include <linux/namei.h>
36 36
37#include "vxfs.h" 37#include "vxfs.h"
38#include "vxfs_extern.h"
38#include "vxfs_inode.h" 39#include "vxfs_inode.h"
39 40
40 41
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index ad88d2364bc2..9f3f2ceb73f0 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -41,11 +41,6 @@
41#include "vxfs_extern.h" 41#include "vxfs_extern.h"
42 42
43 43
44extern const struct address_space_operations vxfs_aops;
45extern const struct address_space_operations vxfs_immed_aops;
46
47extern const struct inode_operations vxfs_immed_symlink_iops;
48
49struct kmem_cache *vxfs_inode_cachep; 44struct kmem_cache *vxfs_inode_cachep;
50 45
51 46
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 06557679ca41..ae45f77765c0 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -25,6 +25,45 @@
25#include <linux/buffer_head.h> 25#include <linux/buffer_head.h>
26#include "internal.h" 26#include "internal.h"
27 27
28
29/**
30 * writeback_acquire - attempt to get exclusive writeback access to a device
31 * @bdi: the device's backing_dev_info structure
32 *
33 * It is a waste of resources to have more than one pdflush thread blocked on
34 * a single request queue. Exclusion at the request_queue level is obtained
35 * via a flag in the request_queue's backing_dev_info.state.
36 *
37 * Non-request_queue-backed address_spaces will share default_backing_dev_info,
38 * unless they implement their own. Which is somewhat inefficient, as this
39 * may prevent concurrent writeback against multiple devices.
40 */
41static int writeback_acquire(struct backing_dev_info *bdi)
42{
43 return !test_and_set_bit(BDI_pdflush, &bdi->state);
44}
45
46/**
47 * writeback_in_progress - determine whether there is writeback in progress
48 * @bdi: the device's backing_dev_info structure.
49 *
50 * Determine whether there is writeback in progress against a backing device.
51 */
52int writeback_in_progress(struct backing_dev_info *bdi)
53{
54 return test_bit(BDI_pdflush, &bdi->state);
55}
56
57/**
58 * writeback_release - relinquish exclusive writeback access against a device.
59 * @bdi: the device's backing_dev_info structure
60 */
61static void writeback_release(struct backing_dev_info *bdi)
62{
63 BUG_ON(!writeback_in_progress(bdi));
64 clear_bit(BDI_pdflush, &bdi->state);
65}
66
28/** 67/**
29 * __mark_inode_dirty - internal function 68 * __mark_inode_dirty - internal function
30 * @inode: inode to mark 69 * @inode: inode to mark
@@ -747,43 +786,4 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int
747 786
748 return err; 787 return err;
749} 788}
750
751EXPORT_SYMBOL(generic_osync_inode); 789EXPORT_SYMBOL(generic_osync_inode);
752
753/**
754 * writeback_acquire - attempt to get exclusive writeback access to a device
755 * @bdi: the device's backing_dev_info structure
756 *
757 * It is a waste of resources to have more than one pdflush thread blocked on
758 * a single request queue. Exclusion at the request_queue level is obtained
759 * via a flag in the request_queue's backing_dev_info.state.
760 *
761 * Non-request_queue-backed address_spaces will share default_backing_dev_info,
762 * unless they implement their own. Which is somewhat inefficient, as this
763 * may prevent concurrent writeback against multiple devices.
764 */
765int writeback_acquire(struct backing_dev_info *bdi)
766{
767 return !test_and_set_bit(BDI_pdflush, &bdi->state);
768}
769
770/**
771 * writeback_in_progress - determine whether there is writeback in progress
772 * @bdi: the device's backing_dev_info structure.
773 *
774 * Determine whether there is writeback in progress against a backing device.
775 */
776int writeback_in_progress(struct backing_dev_info *bdi)
777{
778 return test_bit(BDI_pdflush, &bdi->state);
779}
780
781/**
782 * writeback_release - relinquish exclusive writeback access against a device.
783 * @bdi: the device's backing_dev_info structure
784 */
785void writeback_release(struct backing_dev_info *bdi)
786{
787 BUG_ON(!writeback_in_progress(bdi));
788 clear_bit(BDI_pdflush, &bdi->state);
789}
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 105d4a271e07..4f3cab321415 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -117,7 +117,7 @@ int fuse_ctl_add_conn(struct fuse_conn *fc)
117 117
118 parent = fuse_control_sb->s_root; 118 parent = fuse_control_sb->s_root;
119 inc_nlink(parent->d_inode); 119 inc_nlink(parent->d_inode);
120 sprintf(name, "%llu", (unsigned long long) fc->id); 120 sprintf(name, "%u", fc->dev);
121 parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2, 121 parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2,
122 &simple_dir_inode_operations, 122 &simple_dir_inode_operations,
123 &simple_dir_operations); 123 &simple_dir_operations);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index af639807524e..87250b6a8682 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -47,6 +47,14 @@ struct fuse_req *fuse_request_alloc(void)
47 return req; 47 return req;
48} 48}
49 49
50struct fuse_req *fuse_request_alloc_nofs(void)
51{
52 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
53 if (req)
54 fuse_request_init(req);
55 return req;
56}
57
50void fuse_request_free(struct fuse_req *req) 58void fuse_request_free(struct fuse_req *req)
51{ 59{
52 kmem_cache_free(fuse_req_cachep, req); 60 kmem_cache_free(fuse_req_cachep, req);
@@ -291,6 +299,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
291 299
292static void wait_answer_interruptible(struct fuse_conn *fc, 300static void wait_answer_interruptible(struct fuse_conn *fc,
293 struct fuse_req *req) 301 struct fuse_req *req)
302 __releases(fc->lock) __acquires(fc->lock)
294{ 303{
295 if (signal_pending(current)) 304 if (signal_pending(current))
296 return; 305 return;
@@ -307,8 +316,8 @@ static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
307 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 316 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
308} 317}
309 318
310/* Called with fc->lock held. Releases, and then reacquires it. */
311static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) 319static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
320 __releases(fc->lock) __acquires(fc->lock)
312{ 321{
313 if (!fc->no_interrupt) { 322 if (!fc->no_interrupt) {
314 /* Any signal may interrupt this */ 323 /* Any signal may interrupt this */
@@ -430,6 +439,17 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
430} 439}
431 440
432/* 441/*
442 * Called under fc->lock
443 *
444 * fc->connected must have been checked previously
445 */
446void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req)
447{
448 req->isreply = 1;
449 request_send_nowait_locked(fc, req);
450}
451
452/*
433 * Lock the request. Up to the next unlock_request() there mustn't be 453 * Lock the request. Up to the next unlock_request() there mustn't be
434 * anything that could cause a page-fault. If the request was already 454 * anything that could cause a page-fault. If the request was already
435 * aborted bail out. 455 * aborted bail out.
@@ -968,6 +988,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
968 * locked). 988 * locked).
969 */ 989 */
970static void end_io_requests(struct fuse_conn *fc) 990static void end_io_requests(struct fuse_conn *fc)
991 __releases(fc->lock) __acquires(fc->lock)
971{ 992{
972 while (!list_empty(&fc->io)) { 993 while (!list_empty(&fc->io)) {
973 struct fuse_req *req = 994 struct fuse_req *req =
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c4807b3fc8a3..2060bf06b906 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -132,7 +132,7 @@ static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
132 req->out.args[0].value = outarg; 132 req->out.args[0].value = outarg;
133} 133}
134 134
135static u64 fuse_get_attr_version(struct fuse_conn *fc) 135u64 fuse_get_attr_version(struct fuse_conn *fc)
136{ 136{
137 u64 curr_version; 137 u64 curr_version;
138 138
@@ -1107,6 +1107,50 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
1107} 1107}
1108 1108
1109/* 1109/*
1110 * Prevent concurrent writepages on inode
1111 *
1112 * This is done by adding a negative bias to the inode write counter
1113 * and waiting for all pending writes to finish.
1114 */
1115void fuse_set_nowrite(struct inode *inode)
1116{
1117 struct fuse_conn *fc = get_fuse_conn(inode);
1118 struct fuse_inode *fi = get_fuse_inode(inode);
1119
1120 BUG_ON(!mutex_is_locked(&inode->i_mutex));
1121
1122 spin_lock(&fc->lock);
1123 BUG_ON(fi->writectr < 0);
1124 fi->writectr += FUSE_NOWRITE;
1125 spin_unlock(&fc->lock);
1126 wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1127}
1128
1129/*
1130 * Allow writepages on inode
1131 *
1132 * Remove the bias from the writecounter and send any queued
1133 * writepages.
1134 */
1135static void __fuse_release_nowrite(struct inode *inode)
1136{
1137 struct fuse_inode *fi = get_fuse_inode(inode);
1138
1139 BUG_ON(fi->writectr != FUSE_NOWRITE);
1140 fi->writectr = 0;
1141 fuse_flush_writepages(inode);
1142}
1143
1144void fuse_release_nowrite(struct inode *inode)
1145{
1146 struct fuse_conn *fc = get_fuse_conn(inode);
1147
1148 spin_lock(&fc->lock);
1149 __fuse_release_nowrite(inode);
1150 spin_unlock(&fc->lock);
1151}
1152
1153/*
1110 * Set attributes, and at the same time refresh them. 1154 * Set attributes, and at the same time refresh them.
1111 * 1155 *
1112 * Truncation is slightly complicated, because the 'truncate' request 1156 * Truncation is slightly complicated, because the 'truncate' request
@@ -1122,6 +1166,8 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1122 struct fuse_req *req; 1166 struct fuse_req *req;
1123 struct fuse_setattr_in inarg; 1167 struct fuse_setattr_in inarg;
1124 struct fuse_attr_out outarg; 1168 struct fuse_attr_out outarg;
1169 bool is_truncate = false;
1170 loff_t oldsize;
1125 int err; 1171 int err;
1126 1172
1127 if (!fuse_allow_task(fc, current)) 1173 if (!fuse_allow_task(fc, current))
@@ -1145,12 +1191,16 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1145 send_sig(SIGXFSZ, current, 0); 1191 send_sig(SIGXFSZ, current, 0);
1146 return -EFBIG; 1192 return -EFBIG;
1147 } 1193 }
1194 is_truncate = true;
1148 } 1195 }
1149 1196
1150 req = fuse_get_req(fc); 1197 req = fuse_get_req(fc);
1151 if (IS_ERR(req)) 1198 if (IS_ERR(req))
1152 return PTR_ERR(req); 1199 return PTR_ERR(req);
1153 1200
1201 if (is_truncate)
1202 fuse_set_nowrite(inode);
1203
1154 memset(&inarg, 0, sizeof(inarg)); 1204 memset(&inarg, 0, sizeof(inarg));
1155 memset(&outarg, 0, sizeof(outarg)); 1205 memset(&outarg, 0, sizeof(outarg));
1156 iattr_to_fattr(attr, &inarg); 1206 iattr_to_fattr(attr, &inarg);
@@ -1181,16 +1231,44 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1181 if (err) { 1231 if (err) {
1182 if (err == -EINTR) 1232 if (err == -EINTR)
1183 fuse_invalidate_attr(inode); 1233 fuse_invalidate_attr(inode);
1184 return err; 1234 goto error;
1185 } 1235 }
1186 1236
1187 if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { 1237 if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
1188 make_bad_inode(inode); 1238 make_bad_inode(inode);
1189 return -EIO; 1239 err = -EIO;
1240 goto error;
1241 }
1242
1243 spin_lock(&fc->lock);
1244 fuse_change_attributes_common(inode, &outarg.attr,
1245 attr_timeout(&outarg));
1246 oldsize = inode->i_size;
1247 i_size_write(inode, outarg.attr.size);
1248
1249 if (is_truncate) {
1250 /* NOTE: this may release/reacquire fc->lock */
1251 __fuse_release_nowrite(inode);
1252 }
1253 spin_unlock(&fc->lock);
1254
1255 /*
1256 * Only call invalidate_inode_pages2() after removing
1257 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1258 */
1259 if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1260 if (outarg.attr.size < oldsize)
1261 fuse_truncate(inode->i_mapping, outarg.attr.size);
1262 invalidate_inode_pages2(inode->i_mapping);
1190 } 1263 }
1191 1264
1192 fuse_change_attributes(inode, &outarg.attr, attr_timeout(&outarg), 0);
1193 return 0; 1265 return 0;
1266
1267error:
1268 if (is_truncate)
1269 fuse_release_nowrite(inode);
1270
1271 return err;
1194} 1272}
1195 1273
1196static int fuse_setattr(struct dentry *entry, struct iattr *attr) 1274static int fuse_setattr(struct dentry *entry, struct iattr *attr)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 676b0bc8a86d..8092f0d9fd1f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -210,6 +210,49 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
210 return (u64) v0 + ((u64) v1 << 32); 210 return (u64) v0 + ((u64) v1 << 32);
211} 211}
212 212
213/*
214 * Check if page is under writeback
215 *
216 * This is currently done by walking the list of writepage requests
217 * for the inode, which can be pretty inefficient.
218 */
219static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
220{
221 struct fuse_conn *fc = get_fuse_conn(inode);
222 struct fuse_inode *fi = get_fuse_inode(inode);
223 struct fuse_req *req;
224 bool found = false;
225
226 spin_lock(&fc->lock);
227 list_for_each_entry(req, &fi->writepages, writepages_entry) {
228 pgoff_t curr_index;
229
230 BUG_ON(req->inode != inode);
231 curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
232 if (curr_index == index) {
233 found = true;
234 break;
235 }
236 }
237 spin_unlock(&fc->lock);
238
239 return found;
240}
241
242/*
243 * Wait for page writeback to be completed.
244 *
245 * Since fuse doesn't rely on the VM writeback tracking, this has to
246 * use some other means.
247 */
248static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
249{
250 struct fuse_inode *fi = get_fuse_inode(inode);
251
252 wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
253 return 0;
254}
255
213static int fuse_flush(struct file *file, fl_owner_t id) 256static int fuse_flush(struct file *file, fl_owner_t id)
214{ 257{
215 struct inode *inode = file->f_path.dentry->d_inode; 258 struct inode *inode = file->f_path.dentry->d_inode;
@@ -245,6 +288,21 @@ static int fuse_flush(struct file *file, fl_owner_t id)
245 return err; 288 return err;
246} 289}
247 290
291/*
292 * Wait for all pending writepages on the inode to finish.
293 *
294 * This is currently done by blocking further writes with FUSE_NOWRITE
295 * and waiting for all sent writes to complete.
296 *
297 * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
298 * could conflict with truncation.
299 */
300static void fuse_sync_writes(struct inode *inode)
301{
302 fuse_set_nowrite(inode);
303 fuse_release_nowrite(inode);
304}
305
248int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, 306int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
249 int isdir) 307 int isdir)
250{ 308{
@@ -261,6 +319,17 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
261 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) 319 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
262 return 0; 320 return 0;
263 321
322 /*
323 * Start writeback against all dirty pages of the inode, then
324 * wait for all outstanding writes, before sending the FSYNC
325 * request.
326 */
327 err = write_inode_now(inode, 0);
328 if (err)
329 return err;
330
331 fuse_sync_writes(inode);
332
264 req = fuse_get_req(fc); 333 req = fuse_get_req(fc);
265 if (IS_ERR(req)) 334 if (IS_ERR(req))
266 return PTR_ERR(req); 335 return PTR_ERR(req);
@@ -294,7 +363,7 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
294void fuse_read_fill(struct fuse_req *req, struct file *file, 363void fuse_read_fill(struct fuse_req *req, struct file *file,
295 struct inode *inode, loff_t pos, size_t count, int opcode) 364 struct inode *inode, loff_t pos, size_t count, int opcode)
296{ 365{
297 struct fuse_read_in *inarg = &req->misc.read_in; 366 struct fuse_read_in *inarg = &req->misc.read.in;
298 struct fuse_file *ff = file->private_data; 367 struct fuse_file *ff = file->private_data;
299 368
300 inarg->fh = ff->fh; 369 inarg->fh = ff->fh;
@@ -320,7 +389,7 @@ static size_t fuse_send_read(struct fuse_req *req, struct file *file,
320 389
321 fuse_read_fill(req, file, inode, pos, count, FUSE_READ); 390 fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
322 if (owner != NULL) { 391 if (owner != NULL) {
323 struct fuse_read_in *inarg = &req->misc.read_in; 392 struct fuse_read_in *inarg = &req->misc.read.in;
324 393
325 inarg->read_flags |= FUSE_READ_LOCKOWNER; 394 inarg->read_flags |= FUSE_READ_LOCKOWNER;
326 inarg->lock_owner = fuse_lock_owner_id(fc, owner); 395 inarg->lock_owner = fuse_lock_owner_id(fc, owner);
@@ -329,31 +398,66 @@ static size_t fuse_send_read(struct fuse_req *req, struct file *file,
329 return req->out.args[0].size; 398 return req->out.args[0].size;
330} 399}
331 400
401static void fuse_read_update_size(struct inode *inode, loff_t size,
402 u64 attr_ver)
403{
404 struct fuse_conn *fc = get_fuse_conn(inode);
405 struct fuse_inode *fi = get_fuse_inode(inode);
406
407 spin_lock(&fc->lock);
408 if (attr_ver == fi->attr_version && size < inode->i_size) {
409 fi->attr_version = ++fc->attr_version;
410 i_size_write(inode, size);
411 }
412 spin_unlock(&fc->lock);
413}
414
332static int fuse_readpage(struct file *file, struct page *page) 415static int fuse_readpage(struct file *file, struct page *page)
333{ 416{
334 struct inode *inode = page->mapping->host; 417 struct inode *inode = page->mapping->host;
335 struct fuse_conn *fc = get_fuse_conn(inode); 418 struct fuse_conn *fc = get_fuse_conn(inode);
336 struct fuse_req *req; 419 struct fuse_req *req;
420 size_t num_read;
421 loff_t pos = page_offset(page);
422 size_t count = PAGE_CACHE_SIZE;
423 u64 attr_ver;
337 int err; 424 int err;
338 425
339 err = -EIO; 426 err = -EIO;
340 if (is_bad_inode(inode)) 427 if (is_bad_inode(inode))
341 goto out; 428 goto out;
342 429
430 /*
431 * Page writeback can extend beyond the liftime of the
432 * page-cache page, so make sure we read a properly synced
433 * page.
434 */
435 fuse_wait_on_page_writeback(inode, page->index);
436
343 req = fuse_get_req(fc); 437 req = fuse_get_req(fc);
344 err = PTR_ERR(req); 438 err = PTR_ERR(req);
345 if (IS_ERR(req)) 439 if (IS_ERR(req))
346 goto out; 440 goto out;
347 441
442 attr_ver = fuse_get_attr_version(fc);
443
348 req->out.page_zeroing = 1; 444 req->out.page_zeroing = 1;
349 req->num_pages = 1; 445 req->num_pages = 1;
350 req->pages[0] = page; 446 req->pages[0] = page;
351 fuse_send_read(req, file, inode, page_offset(page), PAGE_CACHE_SIZE, 447 num_read = fuse_send_read(req, file, inode, pos, count, NULL);
352 NULL);
353 err = req->out.h.error; 448 err = req->out.h.error;
354 fuse_put_request(fc, req); 449 fuse_put_request(fc, req);
355 if (!err) 450
451 if (!err) {
452 /*
453 * Short read means EOF. If file size is larger, truncate it
454 */
455 if (num_read < count)
456 fuse_read_update_size(inode, pos + num_read, attr_ver);
457
356 SetPageUptodate(page); 458 SetPageUptodate(page);
459 }
460
357 fuse_invalidate_attr(inode); /* atime changed */ 461 fuse_invalidate_attr(inode); /* atime changed */
358 out: 462 out:
359 unlock_page(page); 463 unlock_page(page);
@@ -363,8 +467,19 @@ static int fuse_readpage(struct file *file, struct page *page)
363static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) 467static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
364{ 468{
365 int i; 469 int i;
470 size_t count = req->misc.read.in.size;
471 size_t num_read = req->out.args[0].size;
472 struct inode *inode = req->pages[0]->mapping->host;
473
474 /*
475 * Short read means EOF. If file size is larger, truncate it
476 */
477 if (!req->out.h.error && num_read < count) {
478 loff_t pos = page_offset(req->pages[0]) + num_read;
479 fuse_read_update_size(inode, pos, req->misc.read.attr_ver);
480 }
366 481
367 fuse_invalidate_attr(req->pages[0]->mapping->host); /* atime changed */ 482 fuse_invalidate_attr(inode); /* atime changed */
368 483
369 for (i = 0; i < req->num_pages; i++) { 484 for (i = 0; i < req->num_pages; i++) {
370 struct page *page = req->pages[i]; 485 struct page *page = req->pages[i];
@@ -387,6 +502,7 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file,
387 size_t count = req->num_pages << PAGE_CACHE_SHIFT; 502 size_t count = req->num_pages << PAGE_CACHE_SHIFT;
388 req->out.page_zeroing = 1; 503 req->out.page_zeroing = 1;
389 fuse_read_fill(req, file, inode, pos, count, FUSE_READ); 504 fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
505 req->misc.read.attr_ver = fuse_get_attr_version(fc);
390 if (fc->async_read) { 506 if (fc->async_read) {
391 struct fuse_file *ff = file->private_data; 507 struct fuse_file *ff = file->private_data;
392 req->ff = fuse_file_get(ff); 508 req->ff = fuse_file_get(ff);
@@ -411,6 +527,8 @@ static int fuse_readpages_fill(void *_data, struct page *page)
411 struct inode *inode = data->inode; 527 struct inode *inode = data->inode;
412 struct fuse_conn *fc = get_fuse_conn(inode); 528 struct fuse_conn *fc = get_fuse_conn(inode);
413 529
530 fuse_wait_on_page_writeback(inode, page->index);
531
414 if (req->num_pages && 532 if (req->num_pages &&
415 (req->num_pages == FUSE_MAX_PAGES_PER_REQ || 533 (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
416 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || 534 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
@@ -477,11 +595,10 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
477} 595}
478 596
479static void fuse_write_fill(struct fuse_req *req, struct file *file, 597static void fuse_write_fill(struct fuse_req *req, struct file *file,
480 struct inode *inode, loff_t pos, size_t count, 598 struct fuse_file *ff, struct inode *inode,
481 int writepage) 599 loff_t pos, size_t count, int writepage)
482{ 600{
483 struct fuse_conn *fc = get_fuse_conn(inode); 601 struct fuse_conn *fc = get_fuse_conn(inode);
484 struct fuse_file *ff = file->private_data;
485 struct fuse_write_in *inarg = &req->misc.write.in; 602 struct fuse_write_in *inarg = &req->misc.write.in;
486 struct fuse_write_out *outarg = &req->misc.write.out; 603 struct fuse_write_out *outarg = &req->misc.write.out;
487 604
@@ -490,7 +607,7 @@ static void fuse_write_fill(struct fuse_req *req, struct file *file,
490 inarg->offset = pos; 607 inarg->offset = pos;
491 inarg->size = count; 608 inarg->size = count;
492 inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0; 609 inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0;
493 inarg->flags = file->f_flags; 610 inarg->flags = file ? file->f_flags : 0;
494 req->in.h.opcode = FUSE_WRITE; 611 req->in.h.opcode = FUSE_WRITE;
495 req->in.h.nodeid = get_node_id(inode); 612 req->in.h.nodeid = get_node_id(inode);
496 req->in.argpages = 1; 613 req->in.argpages = 1;
@@ -511,7 +628,7 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file,
511 fl_owner_t owner) 628 fl_owner_t owner)
512{ 629{
513 struct fuse_conn *fc = get_fuse_conn(inode); 630 struct fuse_conn *fc = get_fuse_conn(inode);
514 fuse_write_fill(req, file, inode, pos, count, 0); 631 fuse_write_fill(req, file, file->private_data, inode, pos, count, 0);
515 if (owner != NULL) { 632 if (owner != NULL) {
516 struct fuse_write_in *inarg = &req->misc.write.in; 633 struct fuse_write_in *inarg = &req->misc.write.in;
517 inarg->write_flags |= FUSE_WRITE_LOCKOWNER; 634 inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
@@ -533,19 +650,36 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
533 return 0; 650 return 0;
534} 651}
535 652
653static void fuse_write_update_size(struct inode *inode, loff_t pos)
654{
655 struct fuse_conn *fc = get_fuse_conn(inode);
656 struct fuse_inode *fi = get_fuse_inode(inode);
657
658 spin_lock(&fc->lock);
659 fi->attr_version = ++fc->attr_version;
660 if (pos > inode->i_size)
661 i_size_write(inode, pos);
662 spin_unlock(&fc->lock);
663}
664
536static int fuse_buffered_write(struct file *file, struct inode *inode, 665static int fuse_buffered_write(struct file *file, struct inode *inode,
537 loff_t pos, unsigned count, struct page *page) 666 loff_t pos, unsigned count, struct page *page)
538{ 667{
539 int err; 668 int err;
540 size_t nres; 669 size_t nres;
541 struct fuse_conn *fc = get_fuse_conn(inode); 670 struct fuse_conn *fc = get_fuse_conn(inode);
542 struct fuse_inode *fi = get_fuse_inode(inode);
543 unsigned offset = pos & (PAGE_CACHE_SIZE - 1); 671 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
544 struct fuse_req *req; 672 struct fuse_req *req;
545 673
546 if (is_bad_inode(inode)) 674 if (is_bad_inode(inode))
547 return -EIO; 675 return -EIO;
548 676
677 /*
678 * Make sure writepages on the same page are not mixed up with
679 * plain writes.
680 */
681 fuse_wait_on_page_writeback(inode, page->index);
682
549 req = fuse_get_req(fc); 683 req = fuse_get_req(fc);
550 if (IS_ERR(req)) 684 if (IS_ERR(req))
551 return PTR_ERR(req); 685 return PTR_ERR(req);
@@ -560,12 +694,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
560 err = -EIO; 694 err = -EIO;
561 if (!err) { 695 if (!err) {
562 pos += nres; 696 pos += nres;
563 spin_lock(&fc->lock); 697 fuse_write_update_size(inode, pos);
564 fi->attr_version = ++fc->attr_version;
565 if (pos > inode->i_size)
566 i_size_write(inode, pos);
567 spin_unlock(&fc->lock);
568
569 if (count == PAGE_CACHE_SIZE) 698 if (count == PAGE_CACHE_SIZE)
570 SetPageUptodate(page); 699 SetPageUptodate(page);
571 } 700 }
@@ -588,6 +717,200 @@ static int fuse_write_end(struct file *file, struct address_space *mapping,
588 return res; 717 return res;
589} 718}
590 719
720static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
721 struct inode *inode, loff_t pos,
722 size_t count)
723{
724 size_t res;
725 unsigned offset;
726 unsigned i;
727
728 for (i = 0; i < req->num_pages; i++)
729 fuse_wait_on_page_writeback(inode, req->pages[i]->index);
730
731 res = fuse_send_write(req, file, inode, pos, count, NULL);
732
733 offset = req->page_offset;
734 count = res;
735 for (i = 0; i < req->num_pages; i++) {
736 struct page *page = req->pages[i];
737
738 if (!req->out.h.error && !offset && count >= PAGE_CACHE_SIZE)
739 SetPageUptodate(page);
740
741 if (count > PAGE_CACHE_SIZE - offset)
742 count -= PAGE_CACHE_SIZE - offset;
743 else
744 count = 0;
745 offset = 0;
746
747 unlock_page(page);
748 page_cache_release(page);
749 }
750
751 return res;
752}
753
754static ssize_t fuse_fill_write_pages(struct fuse_req *req,
755 struct address_space *mapping,
756 struct iov_iter *ii, loff_t pos)
757{
758 struct fuse_conn *fc = get_fuse_conn(mapping->host);
759 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
760 size_t count = 0;
761 int err;
762
763 req->page_offset = offset;
764
765 do {
766 size_t tmp;
767 struct page *page;
768 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
769 size_t bytes = min_t(size_t, PAGE_CACHE_SIZE - offset,
770 iov_iter_count(ii));
771
772 bytes = min_t(size_t, bytes, fc->max_write - count);
773
774 again:
775 err = -EFAULT;
776 if (iov_iter_fault_in_readable(ii, bytes))
777 break;
778
779 err = -ENOMEM;
780 page = __grab_cache_page(mapping, index);
781 if (!page)
782 break;
783
784 pagefault_disable();
785 tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
786 pagefault_enable();
787 flush_dcache_page(page);
788
789 if (!tmp) {
790 unlock_page(page);
791 page_cache_release(page);
792 bytes = min(bytes, iov_iter_single_seg_count(ii));
793 goto again;
794 }
795
796 err = 0;
797 req->pages[req->num_pages] = page;
798 req->num_pages++;
799
800 iov_iter_advance(ii, tmp);
801 count += tmp;
802 pos += tmp;
803 offset += tmp;
804 if (offset == PAGE_CACHE_SIZE)
805 offset = 0;
806
807 if (!fc->big_writes)
808 break;
809 } while (iov_iter_count(ii) && count < fc->max_write &&
810 req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0);
811
812 return count > 0 ? count : err;
813}
814
815static ssize_t fuse_perform_write(struct file *file,
816 struct address_space *mapping,
817 struct iov_iter *ii, loff_t pos)
818{
819 struct inode *inode = mapping->host;
820 struct fuse_conn *fc = get_fuse_conn(inode);
821 int err = 0;
822 ssize_t res = 0;
823
824 if (is_bad_inode(inode))
825 return -EIO;
826
827 do {
828 struct fuse_req *req;
829 ssize_t count;
830
831 req = fuse_get_req(fc);
832 if (IS_ERR(req)) {
833 err = PTR_ERR(req);
834 break;
835 }
836
837 count = fuse_fill_write_pages(req, mapping, ii, pos);
838 if (count <= 0) {
839 err = count;
840 } else {
841 size_t num_written;
842
843 num_written = fuse_send_write_pages(req, file, inode,
844 pos, count);
845 err = req->out.h.error;
846 if (!err) {
847 res += num_written;
848 pos += num_written;
849
850 /* break out of the loop on short write */
851 if (num_written != count)
852 err = -EIO;
853 }
854 }
855 fuse_put_request(fc, req);
856 } while (!err && iov_iter_count(ii));
857
858 if (res > 0)
859 fuse_write_update_size(inode, pos);
860
861 fuse_invalidate_attr(inode);
862
863 return res > 0 ? res : err;
864}
865
866static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
867 unsigned long nr_segs, loff_t pos)
868{
869 struct file *file = iocb->ki_filp;
870 struct address_space *mapping = file->f_mapping;
871 size_t count = 0;
872 ssize_t written = 0;
873 struct inode *inode = mapping->host;
874 ssize_t err;
875 struct iov_iter i;
876
877 WARN_ON(iocb->ki_pos != pos);
878
879 err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
880 if (err)
881 return err;
882
883 mutex_lock(&inode->i_mutex);
884 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
885
886 /* We can write back this queue in page reclaim */
887 current->backing_dev_info = mapping->backing_dev_info;
888
889 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
890 if (err)
891 goto out;
892
893 if (count == 0)
894 goto out;
895
896 err = remove_suid(file->f_path.dentry);
897 if (err)
898 goto out;
899
900 file_update_time(file);
901
902 iov_iter_init(&i, iov, nr_segs, count, 0);
903 written = fuse_perform_write(file, mapping, &i, pos);
904 if (written >= 0)
905 iocb->ki_pos = pos + written;
906
907out:
908 current->backing_dev_info = NULL;
909 mutex_unlock(&inode->i_mutex);
910
911 return written ? written : err;
912}
913
591static void fuse_release_user_pages(struct fuse_req *req, int write) 914static void fuse_release_user_pages(struct fuse_req *req, int write)
592{ 915{
593 unsigned i; 916 unsigned i;
@@ -613,7 +936,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
613 936
614 nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); 937 nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
615 npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; 938 npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
616 npages = min(max(npages, 1), FUSE_MAX_PAGES_PER_REQ); 939 npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ);
617 down_read(&current->mm->mmap_sem); 940 down_read(&current->mm->mmap_sem);
618 npages = get_user_pages(current, current->mm, user_addr, npages, write, 941 npages = get_user_pages(current, current->mm, user_addr, npages, write,
619 0, req->pages, NULL); 942 0, req->pages, NULL);
@@ -645,14 +968,15 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
645 968
646 while (count) { 969 while (count) {
647 size_t nres; 970 size_t nres;
648 size_t nbytes = min(count, nmax); 971 size_t nbytes_limit = min(count, nmax);
649 int err = fuse_get_user_pages(req, buf, nbytes, !write); 972 size_t nbytes;
973 int err = fuse_get_user_pages(req, buf, nbytes_limit, !write);
650 if (err) { 974 if (err) {
651 res = err; 975 res = err;
652 break; 976 break;
653 } 977 }
654 nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset; 978 nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset;
655 nbytes = min(count, nbytes); 979 nbytes = min(nbytes_limit, nbytes);
656 if (write) 980 if (write)
657 nres = fuse_send_write(req, file, inode, pos, nbytes, 981 nres = fuse_send_write(req, file, inode, pos, nbytes,
658 current->files); 982 current->files);
@@ -683,12 +1007,8 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
683 } 1007 }
684 fuse_put_request(fc, req); 1008 fuse_put_request(fc, req);
685 if (res > 0) { 1009 if (res > 0) {
686 if (write) { 1010 if (write)
687 spin_lock(&fc->lock); 1011 fuse_write_update_size(inode, pos);
688 if (pos > inode->i_size)
689 i_size_write(inode, pos);
690 spin_unlock(&fc->lock);
691 }
692 *ppos = pos; 1012 *ppos = pos;
693 } 1013 }
694 fuse_invalidate_attr(inode); 1014 fuse_invalidate_attr(inode);
@@ -716,21 +1036,225 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
716 return res; 1036 return res;
717} 1037}
718 1038
719static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) 1039static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
720{ 1040{
721 if ((vma->vm_flags & VM_SHARED)) { 1041 __free_page(req->pages[0]);
722 if ((vma->vm_flags & VM_WRITE)) 1042 fuse_file_put(req->ff);
723 return -ENODEV; 1043 fuse_put_request(fc, req);
724 else 1044}
725 vma->vm_flags &= ~VM_MAYWRITE; 1045
1046static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
1047{
1048 struct inode *inode = req->inode;
1049 struct fuse_inode *fi = get_fuse_inode(inode);
1050 struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
1051
1052 list_del(&req->writepages_entry);
1053 dec_bdi_stat(bdi, BDI_WRITEBACK);
1054 dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP);
1055 bdi_writeout_inc(bdi);
1056 wake_up(&fi->page_waitq);
1057}
1058
1059/* Called under fc->lock, may release and reacquire it */
1060static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
1061{
1062 struct fuse_inode *fi = get_fuse_inode(req->inode);
1063 loff_t size = i_size_read(req->inode);
1064 struct fuse_write_in *inarg = &req->misc.write.in;
1065
1066 if (!fc->connected)
1067 goto out_free;
1068
1069 if (inarg->offset + PAGE_CACHE_SIZE <= size) {
1070 inarg->size = PAGE_CACHE_SIZE;
1071 } else if (inarg->offset < size) {
1072 inarg->size = size & (PAGE_CACHE_SIZE - 1);
1073 } else {
1074 /* Got truncated off completely */
1075 goto out_free;
1076 }
1077
1078 req->in.args[1].size = inarg->size;
1079 fi->writectr++;
1080 request_send_background_locked(fc, req);
1081 return;
1082
1083 out_free:
1084 fuse_writepage_finish(fc, req);
1085 spin_unlock(&fc->lock);
1086 fuse_writepage_free(fc, req);
1087 spin_lock(&fc->lock);
1088}
1089
1090/*
1091 * If fi->writectr is positive (no truncate or fsync going on) send
1092 * all queued writepage requests.
1093 *
1094 * Called with fc->lock
1095 */
1096void fuse_flush_writepages(struct inode *inode)
1097{
1098 struct fuse_conn *fc = get_fuse_conn(inode);
1099 struct fuse_inode *fi = get_fuse_inode(inode);
1100 struct fuse_req *req;
1101
1102 while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
1103 req = list_entry(fi->queued_writes.next, struct fuse_req, list);
1104 list_del_init(&req->list);
1105 fuse_send_writepage(fc, req);
1106 }
1107}
1108
1109static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
1110{
1111 struct inode *inode = req->inode;
1112 struct fuse_inode *fi = get_fuse_inode(inode);
1113
1114 mapping_set_error(inode->i_mapping, req->out.h.error);
1115 spin_lock(&fc->lock);
1116 fi->writectr--;
1117 fuse_writepage_finish(fc, req);
1118 spin_unlock(&fc->lock);
1119 fuse_writepage_free(fc, req);
1120}
1121
1122static int fuse_writepage_locked(struct page *page)
1123{
1124 struct address_space *mapping = page->mapping;
1125 struct inode *inode = mapping->host;
1126 struct fuse_conn *fc = get_fuse_conn(inode);
1127 struct fuse_inode *fi = get_fuse_inode(inode);
1128 struct fuse_req *req;
1129 struct fuse_file *ff;
1130 struct page *tmp_page;
1131
1132 set_page_writeback(page);
1133
1134 req = fuse_request_alloc_nofs();
1135 if (!req)
1136 goto err;
1137
1138 tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
1139 if (!tmp_page)
1140 goto err_free;
1141
1142 spin_lock(&fc->lock);
1143 BUG_ON(list_empty(&fi->write_files));
1144 ff = list_entry(fi->write_files.next, struct fuse_file, write_entry);
1145 req->ff = fuse_file_get(ff);
1146 spin_unlock(&fc->lock);
1147
1148 fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1);
1149
1150 copy_highpage(tmp_page, page);
1151 req->num_pages = 1;
1152 req->pages[0] = tmp_page;
1153 req->page_offset = 0;
1154 req->end = fuse_writepage_end;
1155 req->inode = inode;
1156
1157 inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK);
1158 inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
1159 end_page_writeback(page);
1160
1161 spin_lock(&fc->lock);
1162 list_add(&req->writepages_entry, &fi->writepages);
1163 list_add_tail(&req->list, &fi->queued_writes);
1164 fuse_flush_writepages(inode);
1165 spin_unlock(&fc->lock);
1166
1167 return 0;
1168
1169err_free:
1170 fuse_request_free(req);
1171err:
1172 end_page_writeback(page);
1173 return -ENOMEM;
1174}
1175
1176static int fuse_writepage(struct page *page, struct writeback_control *wbc)
1177{
1178 int err;
1179
1180 err = fuse_writepage_locked(page);
1181 unlock_page(page);
1182
1183 return err;
1184}
1185
1186static int fuse_launder_page(struct page *page)
1187{
1188 int err = 0;
1189 if (clear_page_dirty_for_io(page)) {
1190 struct inode *inode = page->mapping->host;
1191 err = fuse_writepage_locked(page);
1192 if (!err)
1193 fuse_wait_on_page_writeback(inode, page->index);
726 } 1194 }
727 return generic_file_mmap(file, vma); 1195 return err;
728} 1196}
729 1197
730static int fuse_set_page_dirty(struct page *page) 1198/*
1199 * Write back dirty pages now, because there may not be any suitable
1200 * open files later
1201 */
1202static void fuse_vma_close(struct vm_area_struct *vma)
731{ 1203{
732 printk("fuse_set_page_dirty: should not happen\n"); 1204 filemap_write_and_wait(vma->vm_file->f_mapping);
733 dump_stack(); 1205}
1206
1207/*
1208 * Wait for writeback against this page to complete before allowing it
1209 * to be marked dirty again, and hence written back again, possibly
1210 * before the previous writepage completed.
1211 *
1212 * Block here, instead of in ->writepage(), so that the userspace fs
1213 * can only block processes actually operating on the filesystem.
1214 *
1215 * Otherwise unprivileged userspace fs would be able to block
1216 * unrelated:
1217 *
1218 * - page migration
1219 * - sync(2)
1220 * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER
1221 */
1222static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1223{
1224 /*
1225 * Don't use page->mapping as it may become NULL from a
1226 * concurrent truncate.
1227 */
1228 struct inode *inode = vma->vm_file->f_mapping->host;
1229
1230 fuse_wait_on_page_writeback(inode, page->index);
1231 return 0;
1232}
1233
1234static struct vm_operations_struct fuse_file_vm_ops = {
1235 .close = fuse_vma_close,
1236 .fault = filemap_fault,
1237 .page_mkwrite = fuse_page_mkwrite,
1238};
1239
1240static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
1241{
1242 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
1243 struct inode *inode = file->f_dentry->d_inode;
1244 struct fuse_conn *fc = get_fuse_conn(inode);
1245 struct fuse_inode *fi = get_fuse_inode(inode);
1246 struct fuse_file *ff = file->private_data;
1247 /*
1248 * file may be written through mmap, so chain it onto the
1249 * inodes's write_file list
1250 */
1251 spin_lock(&fc->lock);
1252 if (list_empty(&ff->write_entry))
1253 list_add(&ff->write_entry, &fi->write_files);
1254 spin_unlock(&fc->lock);
1255 }
1256 file_accessed(file);
1257 vma->vm_ops = &fuse_file_vm_ops;
734 return 0; 1258 return 0;
735} 1259}
736 1260
@@ -909,12 +1433,37 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
909 return err ? 0 : outarg.block; 1433 return err ? 0 : outarg.block;
910} 1434}
911 1435
1436static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
1437{
1438 loff_t retval;
1439 struct inode *inode = file->f_path.dentry->d_inode;
1440
1441 mutex_lock(&inode->i_mutex);
1442 switch (origin) {
1443 case SEEK_END:
1444 offset += i_size_read(inode);
1445 break;
1446 case SEEK_CUR:
1447 offset += file->f_pos;
1448 }
1449 retval = -EINVAL;
1450 if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
1451 if (offset != file->f_pos) {
1452 file->f_pos = offset;
1453 file->f_version = 0;
1454 }
1455 retval = offset;
1456 }
1457 mutex_unlock(&inode->i_mutex);
1458 return retval;
1459}
1460
912static const struct file_operations fuse_file_operations = { 1461static const struct file_operations fuse_file_operations = {
913 .llseek = generic_file_llseek, 1462 .llseek = fuse_file_llseek,
914 .read = do_sync_read, 1463 .read = do_sync_read,
915 .aio_read = fuse_file_aio_read, 1464 .aio_read = fuse_file_aio_read,
916 .write = do_sync_write, 1465 .write = do_sync_write,
917 .aio_write = generic_file_aio_write, 1466 .aio_write = fuse_file_aio_write,
918 .mmap = fuse_file_mmap, 1467 .mmap = fuse_file_mmap,
919 .open = fuse_open, 1468 .open = fuse_open,
920 .flush = fuse_flush, 1469 .flush = fuse_flush,
@@ -926,7 +1475,7 @@ static const struct file_operations fuse_file_operations = {
926}; 1475};
927 1476
928static const struct file_operations fuse_direct_io_file_operations = { 1477static const struct file_operations fuse_direct_io_file_operations = {
929 .llseek = generic_file_llseek, 1478 .llseek = fuse_file_llseek,
930 .read = fuse_direct_read, 1479 .read = fuse_direct_read,
931 .write = fuse_direct_write, 1480 .write = fuse_direct_write,
932 .open = fuse_open, 1481 .open = fuse_open,
@@ -940,10 +1489,12 @@ static const struct file_operations fuse_direct_io_file_operations = {
940 1489
941static const struct address_space_operations fuse_file_aops = { 1490static const struct address_space_operations fuse_file_aops = {
942 .readpage = fuse_readpage, 1491 .readpage = fuse_readpage,
1492 .writepage = fuse_writepage,
1493 .launder_page = fuse_launder_page,
943 .write_begin = fuse_write_begin, 1494 .write_begin = fuse_write_begin,
944 .write_end = fuse_write_end, 1495 .write_end = fuse_write_end,
945 .readpages = fuse_readpages, 1496 .readpages = fuse_readpages,
946 .set_page_dirty = fuse_set_page_dirty, 1497 .set_page_dirty = __set_page_dirty_nobuffers,
947 .bmap = fuse_bmap, 1498 .bmap = fuse_bmap,
948}; 1499};
949 1500
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 67aaf6ee38ea..bae948657c4f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -15,6 +15,7 @@
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/backing-dev.h> 16#include <linux/backing-dev.h>
17#include <linux/mutex.h> 17#include <linux/mutex.h>
18#include <linux/rwsem.h>
18 19
19/** Max number of pages that can be used in a single read request */ 20/** Max number of pages that can be used in a single read request */
20#define FUSE_MAX_PAGES_PER_REQ 32 21#define FUSE_MAX_PAGES_PER_REQ 32
@@ -25,6 +26,9 @@
25/** Congestion starts at 75% of maximum */ 26/** Congestion starts at 75% of maximum */
26#define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100) 27#define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100)
27 28
29/** Bias for fi->writectr, meaning new writepages must not be sent */
30#define FUSE_NOWRITE INT_MIN
31
28/** It could be as large as PATH_MAX, but would that have any uses? */ 32/** It could be as large as PATH_MAX, but would that have any uses? */
29#define FUSE_NAME_MAX 1024 33#define FUSE_NAME_MAX 1024
30 34
@@ -73,6 +77,19 @@ struct fuse_inode {
73 77
74 /** Files usable in writepage. Protected by fc->lock */ 78 /** Files usable in writepage. Protected by fc->lock */
75 struct list_head write_files; 79 struct list_head write_files;
80
81 /** Writepages pending on truncate or fsync */
82 struct list_head queued_writes;
83
84 /** Number of sent writes, a negative bias (FUSE_NOWRITE)
85 * means more writes are blocked */
86 int writectr;
87
88 /** Waitq for writepage completion */
89 wait_queue_head_t page_waitq;
90
91 /** List of writepage requestst (pending or sent) */
92 struct list_head writepages;
76}; 93};
77 94
78/** FUSE specific file data */ 95/** FUSE specific file data */
@@ -222,7 +239,10 @@ struct fuse_req {
222 } release; 239 } release;
223 struct fuse_init_in init_in; 240 struct fuse_init_in init_in;
224 struct fuse_init_out init_out; 241 struct fuse_init_out init_out;
225 struct fuse_read_in read_in; 242 struct {
243 struct fuse_read_in in;
244 u64 attr_ver;
245 } read;
226 struct { 246 struct {
227 struct fuse_write_in in; 247 struct fuse_write_in in;
228 struct fuse_write_out out; 248 struct fuse_write_out out;
@@ -242,6 +262,12 @@ struct fuse_req {
242 /** File used in the request (or NULL) */ 262 /** File used in the request (or NULL) */
243 struct fuse_file *ff; 263 struct fuse_file *ff;
244 264
265 /** Inode used in the request or NULL */
266 struct inode *inode;
267
268 /** Link on fi->writepages */
269 struct list_head writepages_entry;
270
245 /** Request completion callback */ 271 /** Request completion callback */
246 void (*end)(struct fuse_conn *, struct fuse_req *); 272 void (*end)(struct fuse_conn *, struct fuse_req *);
247 273
@@ -378,6 +404,9 @@ struct fuse_conn {
378 /** Is bmap not implemented by fs? */ 404 /** Is bmap not implemented by fs? */
379 unsigned no_bmap : 1; 405 unsigned no_bmap : 1;
380 406
407 /** Do multi-page cached writes */
408 unsigned big_writes : 1;
409
381 /** The number of requests waiting for completion */ 410 /** The number of requests waiting for completion */
382 atomic_t num_waiting; 411 atomic_t num_waiting;
383 412
@@ -390,8 +419,8 @@ struct fuse_conn {
390 /** Entry on the fuse_conn_list */ 419 /** Entry on the fuse_conn_list */
391 struct list_head entry; 420 struct list_head entry;
392 421
393 /** Unique ID */ 422 /** Device ID from super block */
394 u64 id; 423 dev_t dev;
395 424
396 /** Dentries in the control filesystem */ 425 /** Dentries in the control filesystem */
397 struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES]; 426 struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES];
@@ -438,7 +467,7 @@ extern const struct file_operations fuse_dev_operations;
438/** 467/**
439 * Get a filled in inode 468 * Get a filled in inode
440 */ 469 */
441struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, 470struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
442 int generation, struct fuse_attr *attr, 471 int generation, struct fuse_attr *attr,
443 u64 attr_valid, u64 attr_version); 472 u64 attr_valid, u64 attr_version);
444 473
@@ -446,7 +475,7 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
446 * Send FORGET command 475 * Send FORGET command
447 */ 476 */
448void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, 477void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
449 unsigned long nodeid, u64 nlookup); 478 u64 nodeid, u64 nlookup);
450 479
451/** 480/**
452 * Initialize READ or READDIR request 481 * Initialize READ or READDIR request
@@ -504,6 +533,11 @@ void fuse_init_symlink(struct inode *inode);
504void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, 533void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
505 u64 attr_valid, u64 attr_version); 534 u64 attr_valid, u64 attr_version);
506 535
536void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
537 u64 attr_valid);
538
539void fuse_truncate(struct address_space *mapping, loff_t offset);
540
507/** 541/**
508 * Initialize the client device 542 * Initialize the client device
509 */ 543 */
@@ -522,6 +556,8 @@ void fuse_ctl_cleanup(void);
522 */ 556 */
523struct fuse_req *fuse_request_alloc(void); 557struct fuse_req *fuse_request_alloc(void);
524 558
559struct fuse_req *fuse_request_alloc_nofs(void);
560
525/** 561/**
526 * Free a request 562 * Free a request
527 */ 563 */
@@ -558,6 +594,8 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
558 */ 594 */
559void request_send_background(struct fuse_conn *fc, struct fuse_req *req); 595void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
560 596
597void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req);
598
561/* Abort all requests */ 599/* Abort all requests */
562void fuse_abort_conn(struct fuse_conn *fc); 600void fuse_abort_conn(struct fuse_conn *fc);
563 601
@@ -600,3 +638,10 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
600 638
601int fuse_update_attributes(struct inode *inode, struct kstat *stat, 639int fuse_update_attributes(struct inode *inode, struct kstat *stat,
602 struct file *file, bool *refreshed); 640 struct file *file, bool *refreshed);
641
642void fuse_flush_writepages(struct inode *inode);
643
644void fuse_set_nowrite(struct inode *inode);
645void fuse_release_nowrite(struct inode *inode);
646
647u64 fuse_get_attr_version(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 4df34da2284a..fb77e0962132 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -59,7 +59,11 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
59 fi->nodeid = 0; 59 fi->nodeid = 0;
60 fi->nlookup = 0; 60 fi->nlookup = 0;
61 fi->attr_version = 0; 61 fi->attr_version = 0;
62 fi->writectr = 0;
62 INIT_LIST_HEAD(&fi->write_files); 63 INIT_LIST_HEAD(&fi->write_files);
64 INIT_LIST_HEAD(&fi->queued_writes);
65 INIT_LIST_HEAD(&fi->writepages);
66 init_waitqueue_head(&fi->page_waitq);
63 fi->forget_req = fuse_request_alloc(); 67 fi->forget_req = fuse_request_alloc();
64 if (!fi->forget_req) { 68 if (!fi->forget_req) {
65 kmem_cache_free(fuse_inode_cachep, inode); 69 kmem_cache_free(fuse_inode_cachep, inode);
@@ -73,13 +77,14 @@ static void fuse_destroy_inode(struct inode *inode)
73{ 77{
74 struct fuse_inode *fi = get_fuse_inode(inode); 78 struct fuse_inode *fi = get_fuse_inode(inode);
75 BUG_ON(!list_empty(&fi->write_files)); 79 BUG_ON(!list_empty(&fi->write_files));
80 BUG_ON(!list_empty(&fi->queued_writes));
76 if (fi->forget_req) 81 if (fi->forget_req)
77 fuse_request_free(fi->forget_req); 82 fuse_request_free(fi->forget_req);
78 kmem_cache_free(fuse_inode_cachep, inode); 83 kmem_cache_free(fuse_inode_cachep, inode);
79} 84}
80 85
81void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, 86void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
82 unsigned long nodeid, u64 nlookup) 87 u64 nodeid, u64 nlookup)
83{ 88{
84 struct fuse_forget_in *inarg = &req->misc.forget_in; 89 struct fuse_forget_in *inarg = &req->misc.forget_in;
85 inarg->nlookup = nlookup; 90 inarg->nlookup = nlookup;
@@ -109,7 +114,7 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
109 return 0; 114 return 0;
110} 115}
111 116
112static void fuse_truncate(struct address_space *mapping, loff_t offset) 117void fuse_truncate(struct address_space *mapping, loff_t offset)
113{ 118{
114 /* See vmtruncate() */ 119 /* See vmtruncate() */
115 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); 120 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
@@ -117,19 +122,12 @@ static void fuse_truncate(struct address_space *mapping, loff_t offset)
117 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); 122 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
118} 123}
119 124
120 125void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
121void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, 126 u64 attr_valid)
122 u64 attr_valid, u64 attr_version)
123{ 127{
124 struct fuse_conn *fc = get_fuse_conn(inode); 128 struct fuse_conn *fc = get_fuse_conn(inode);
125 struct fuse_inode *fi = get_fuse_inode(inode); 129 struct fuse_inode *fi = get_fuse_inode(inode);
126 loff_t oldsize;
127 130
128 spin_lock(&fc->lock);
129 if (attr_version != 0 && fi->attr_version > attr_version) {
130 spin_unlock(&fc->lock);
131 return;
132 }
133 fi->attr_version = ++fc->attr_version; 131 fi->attr_version = ++fc->attr_version;
134 fi->i_time = attr_valid; 132 fi->i_time = attr_valid;
135 133
@@ -159,6 +157,22 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
159 fi->orig_i_mode = inode->i_mode; 157 fi->orig_i_mode = inode->i_mode;
160 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) 158 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
161 inode->i_mode &= ~S_ISVTX; 159 inode->i_mode &= ~S_ISVTX;
160}
161
162void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
163 u64 attr_valid, u64 attr_version)
164{
165 struct fuse_conn *fc = get_fuse_conn(inode);
166 struct fuse_inode *fi = get_fuse_inode(inode);
167 loff_t oldsize;
168
169 spin_lock(&fc->lock);
170 if (attr_version != 0 && fi->attr_version > attr_version) {
171 spin_unlock(&fc->lock);
172 return;
173 }
174
175 fuse_change_attributes_common(inode, attr, attr_valid);
162 176
163 oldsize = inode->i_size; 177 oldsize = inode->i_size;
164 i_size_write(inode, attr->size); 178 i_size_write(inode, attr->size);
@@ -193,7 +207,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
193 207
194static int fuse_inode_eq(struct inode *inode, void *_nodeidp) 208static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
195{ 209{
196 unsigned long nodeid = *(unsigned long *) _nodeidp; 210 u64 nodeid = *(u64 *) _nodeidp;
197 if (get_node_id(inode) == nodeid) 211 if (get_node_id(inode) == nodeid)
198 return 1; 212 return 1;
199 else 213 else
@@ -202,12 +216,12 @@ static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
202 216
203static int fuse_inode_set(struct inode *inode, void *_nodeidp) 217static int fuse_inode_set(struct inode *inode, void *_nodeidp)
204{ 218{
205 unsigned long nodeid = *(unsigned long *) _nodeidp; 219 u64 nodeid = *(u64 *) _nodeidp;
206 get_fuse_inode(inode)->nodeid = nodeid; 220 get_fuse_inode(inode)->nodeid = nodeid;
207 return 0; 221 return 0;
208} 222}
209 223
210struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, 224struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
211 int generation, struct fuse_attr *attr, 225 int generation, struct fuse_attr *attr,
212 u64 attr_valid, u64 attr_version) 226 u64 attr_valid, u64 attr_version)
213{ 227{
@@ -447,7 +461,7 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
447 return 0; 461 return 0;
448} 462}
449 463
450static struct fuse_conn *new_conn(void) 464static struct fuse_conn *new_conn(struct super_block *sb)
451{ 465{
452 struct fuse_conn *fc; 466 struct fuse_conn *fc;
453 int err; 467 int err;
@@ -468,19 +482,41 @@ static struct fuse_conn *new_conn(void)
468 atomic_set(&fc->num_waiting, 0); 482 atomic_set(&fc->num_waiting, 0);
469 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 483 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
470 fc->bdi.unplug_io_fn = default_unplug_io_fn; 484 fc->bdi.unplug_io_fn = default_unplug_io_fn;
485 /* fuse does it's own writeback accounting */
486 fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
487 fc->dev = sb->s_dev;
471 err = bdi_init(&fc->bdi); 488 err = bdi_init(&fc->bdi);
472 if (err) { 489 if (err)
473 kfree(fc); 490 goto error_kfree;
474 fc = NULL; 491 err = bdi_register_dev(&fc->bdi, fc->dev);
475 goto out; 492 if (err)
476 } 493 goto error_bdi_destroy;
494 /*
495 * For a single fuse filesystem use max 1% of dirty +
496 * writeback threshold.
497 *
498 * This gives about 1M of write buffer for memory maps on a
499 * machine with 1G and 10% dirty_ratio, which should be more
500 * than enough.
501 *
502 * Privileged users can raise it by writing to
503 *
504 * /sys/class/bdi/<bdi>/max_ratio
505 */
506 bdi_set_max_ratio(&fc->bdi, 1);
477 fc->reqctr = 0; 507 fc->reqctr = 0;
478 fc->blocked = 1; 508 fc->blocked = 1;
479 fc->attr_version = 1; 509 fc->attr_version = 1;
480 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); 510 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
481 } 511 }
482out:
483 return fc; 512 return fc;
513
514error_bdi_destroy:
515 bdi_destroy(&fc->bdi);
516error_kfree:
517 mutex_destroy(&fc->inst_mutex);
518 kfree(fc);
519 return NULL;
484} 520}
485 521
486void fuse_conn_put(struct fuse_conn *fc) 522void fuse_conn_put(struct fuse_conn *fc)
@@ -540,6 +576,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
540 fc->no_lock = 1; 576 fc->no_lock = 1;
541 if (arg->flags & FUSE_ATOMIC_O_TRUNC) 577 if (arg->flags & FUSE_ATOMIC_O_TRUNC)
542 fc->atomic_o_trunc = 1; 578 fc->atomic_o_trunc = 1;
579 if (arg->flags & FUSE_BIG_WRITES)
580 fc->big_writes = 1;
543 } else { 581 } else {
544 ra_pages = fc->max_read / PAGE_CACHE_SIZE; 582 ra_pages = fc->max_read / PAGE_CACHE_SIZE;
545 fc->no_lock = 1; 583 fc->no_lock = 1;
@@ -548,6 +586,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
548 fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); 586 fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
549 fc->minor = arg->minor; 587 fc->minor = arg->minor;
550 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; 588 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
589 fc->max_write = min_t(unsigned, 4096, fc->max_write);
551 fc->conn_init = 1; 590 fc->conn_init = 1;
552 } 591 }
553 fuse_put_request(fc, req); 592 fuse_put_request(fc, req);
@@ -562,7 +601,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
562 arg->major = FUSE_KERNEL_VERSION; 601 arg->major = FUSE_KERNEL_VERSION;
563 arg->minor = FUSE_KERNEL_MINOR_VERSION; 602 arg->minor = FUSE_KERNEL_MINOR_VERSION;
564 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; 603 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
565 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC; 604 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
605 FUSE_BIG_WRITES;
566 req->in.h.opcode = FUSE_INIT; 606 req->in.h.opcode = FUSE_INIT;
567 req->in.numargs = 1; 607 req->in.numargs = 1;
568 req->in.args[0].size = sizeof(*arg); 608 req->in.args[0].size = sizeof(*arg);
@@ -578,12 +618,6 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
578 request_send_background(fc, req); 618 request_send_background(fc, req);
579} 619}
580 620
581static u64 conn_id(void)
582{
583 static u64 ctr = 1;
584 return ctr++;
585}
586
587static int fuse_fill_super(struct super_block *sb, void *data, int silent) 621static int fuse_fill_super(struct super_block *sb, void *data, int silent)
588{ 622{
589 struct fuse_conn *fc; 623 struct fuse_conn *fc;
@@ -621,14 +655,14 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
621 if (file->f_op != &fuse_dev_operations) 655 if (file->f_op != &fuse_dev_operations)
622 return -EINVAL; 656 return -EINVAL;
623 657
624 fc = new_conn(); 658 fc = new_conn(sb);
625 if (!fc) 659 if (!fc)
626 return -ENOMEM; 660 return -ENOMEM;
627 661
628 fc->flags = d.flags; 662 fc->flags = d.flags;
629 fc->user_id = d.user_id; 663 fc->user_id = d.user_id;
630 fc->group_id = d.group_id; 664 fc->group_id = d.group_id;
631 fc->max_read = d.max_read; 665 fc->max_read = min_t(unsigned, 4096, d.max_read);
632 666
633 /* Used by get_root_inode() */ 667 /* Used by get_root_inode() */
634 sb->s_fs_info = fc; 668 sb->s_fs_info = fc;
@@ -659,7 +693,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
659 if (file->private_data) 693 if (file->private_data)
660 goto err_unlock; 694 goto err_unlock;
661 695
662 fc->id = conn_id();
663 err = fuse_ctl_add_conn(fc); 696 err = fuse_ctl_add_conn(fc);
664 if (err) 697 if (err)
665 goto err_unlock; 698 goto err_unlock;
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
index 8479da47049c..a4ff271df9ee 100644
--- a/fs/gfs2/locking/dlm/sysfs.c
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -212,7 +212,7 @@ int gdlm_sysfs_init(void)
212{ 212{
213 gdlm_kset = kset_create_and_add("lock_dlm", NULL, kernel_kobj); 213 gdlm_kset = kset_create_and_add("lock_dlm", NULL, kernel_kobj);
214 if (!gdlm_kset) { 214 if (!gdlm_kset) {
215 printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__); 215 printk(KERN_WARNING "%s: can not create kset\n", __func__);
216 return -ENOMEM; 216 return -ENOMEM;
217 } 217 }
218 return 0; 218 return 0;
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 509c5d60bd80..7f48576289c9 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -41,7 +41,7 @@ int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
41 41
42#define gfs2_assert_withdraw(sdp, assertion) \ 42#define gfs2_assert_withdraw(sdp, assertion) \
43((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \ 43((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \
44 __FUNCTION__, __FILE__, __LINE__)) 44 __func__, __FILE__, __LINE__))
45 45
46 46
47int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, 47int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
@@ -49,28 +49,28 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
49 49
50#define gfs2_assert_warn(sdp, assertion) \ 50#define gfs2_assert_warn(sdp, assertion) \
51((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \ 51((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \
52 __FUNCTION__, __FILE__, __LINE__)) 52 __func__, __FILE__, __LINE__))
53 53
54 54
55int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, 55int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide,
56 const char *function, char *file, unsigned int line); 56 const char *function, char *file, unsigned int line);
57 57
58#define gfs2_consist(sdp) \ 58#define gfs2_consist(sdp) \
59gfs2_consist_i((sdp), 0, __FUNCTION__, __FILE__, __LINE__) 59gfs2_consist_i((sdp), 0, __func__, __FILE__, __LINE__)
60 60
61 61
62int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, 62int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
63 const char *function, char *file, unsigned int line); 63 const char *function, char *file, unsigned int line);
64 64
65#define gfs2_consist_inode(ip) \ 65#define gfs2_consist_inode(ip) \
66gfs2_consist_inode_i((ip), 0, __FUNCTION__, __FILE__, __LINE__) 66gfs2_consist_inode_i((ip), 0, __func__, __FILE__, __LINE__)
67 67
68 68
69int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, 69int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
70 const char *function, char *file, unsigned int line); 70 const char *function, char *file, unsigned int line);
71 71
72#define gfs2_consist_rgrpd(rgd) \ 72#define gfs2_consist_rgrpd(rgd) \
73gfs2_consist_rgrpd_i((rgd), 0, __FUNCTION__, __FILE__, __LINE__) 73gfs2_consist_rgrpd_i((rgd), 0, __func__, __FILE__, __LINE__)
74 74
75 75
76int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, 76int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
@@ -91,7 +91,7 @@ static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
91} 91}
92 92
93#define gfs2_meta_check(sdp, bh) \ 93#define gfs2_meta_check(sdp, bh) \
94gfs2_meta_check_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__) 94gfs2_meta_check_i((sdp), (bh), __func__, __FILE__, __LINE__)
95 95
96 96
97int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, 97int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
@@ -118,7 +118,7 @@ static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp,
118} 118}
119 119
120#define gfs2_metatype_check(sdp, bh, type) \ 120#define gfs2_metatype_check(sdp, bh, type) \
121gfs2_metatype_check_i((sdp), (bh), (type), __FUNCTION__, __FILE__, __LINE__) 121gfs2_metatype_check_i((sdp), (bh), (type), __func__, __FILE__, __LINE__)
122 122
123static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type, 123static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type,
124 u16 format) 124 u16 format)
@@ -134,14 +134,14 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
134 char *file, unsigned int line); 134 char *file, unsigned int line);
135 135
136#define gfs2_io_error(sdp) \ 136#define gfs2_io_error(sdp) \
137gfs2_io_error_i((sdp), __FUNCTION__, __FILE__, __LINE__); 137gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__);
138 138
139 139
140int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, 140int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
141 const char *function, char *file, unsigned int line); 141 const char *function, char *file, unsigned int line);
142 142
143#define gfs2_io_error_bh(sdp, bh) \ 143#define gfs2_io_error_bh(sdp, bh) \
144gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__); 144gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__);
145 145
146 146
147extern struct kmem_cache *gfs2_glock_cachep; 147extern struct kmem_cache *gfs2_glock_cachep;
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 24cf6fc43021..f6621a785202 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -208,7 +208,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
208 struct hfs_bnode *node, *next_node; 208 struct hfs_bnode *node, *next_node;
209 struct page **pagep; 209 struct page **pagep;
210 u32 nidx, idx; 210 u32 nidx, idx;
211 u16 off, len; 211 unsigned off;
212 u16 off16;
213 u16 len;
212 u8 *data, byte, m; 214 u8 *data, byte, m;
213 int i; 215 int i;
214 216
@@ -235,7 +237,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
235 node = hfs_bnode_find(tree, nidx); 237 node = hfs_bnode_find(tree, nidx);
236 if (IS_ERR(node)) 238 if (IS_ERR(node))
237 return node; 239 return node;
238 len = hfs_brec_lenoff(node, 2, &off); 240 len = hfs_brec_lenoff(node, 2, &off16);
241 off = off16;
239 242
240 off += node->page_offset; 243 off += node->page_offset;
241 pagep = node->page + (off >> PAGE_CACHE_SHIFT); 244 pagep = node->page + (off >> PAGE_CACHE_SHIFT);
@@ -280,7 +283,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
280 return next_node; 283 return next_node;
281 node = next_node; 284 node = next_node;
282 285
283 len = hfs_brec_lenoff(node, 0, &off); 286 len = hfs_brec_lenoff(node, 0, &off16);
287 off = off16;
284 off += node->page_offset; 288 off += node->page_offset;
285 pagep = node->page + (off >> PAGE_CACHE_SHIFT); 289 pagep = node->page + (off >> PAGE_CACHE_SHIFT);
286 data = kmap(*pagep); 290 data = kmap(*pagep);
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index b4651e128d7f..36ca2e1a4fa3 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -215,7 +215,7 @@ int hfs_mdb_get(struct super_block *sb)
215 attrib &= cpu_to_be16(~HFS_SB_ATTRIB_UNMNT); 215 attrib &= cpu_to_be16(~HFS_SB_ATTRIB_UNMNT);
216 attrib |= cpu_to_be16(HFS_SB_ATTRIB_INCNSTNT); 216 attrib |= cpu_to_be16(HFS_SB_ATTRIB_INCNSTNT);
217 mdb->drAtrb = attrib; 217 mdb->drAtrb = attrib;
218 mdb->drWrCnt = cpu_to_be32(be32_to_cpu(mdb->drWrCnt) + 1); 218 be32_add_cpu(&mdb->drWrCnt, 1);
219 mdb->drLsMod = hfs_mtime(); 219 mdb->drLsMod = hfs_mtime();
220 220
221 mark_buffer_dirty(HFS_SB(sb)->mdb_bh); 221 mark_buffer_dirty(HFS_SB(sb)->mdb_bh);
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 32de44ed0021..8cf67974adf6 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -297,7 +297,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
297 return 0; 297 return 0;
298 } 298 }
299 p = match_strdup(&args[0]); 299 p = match_strdup(&args[0]);
300 hsb->nls_disk = load_nls(p); 300 if (p)
301 hsb->nls_disk = load_nls(p);
301 if (!hsb->nls_disk) { 302 if (!hsb->nls_disk) {
302 printk(KERN_ERR "hfs: unable to load codepage \"%s\"\n", p); 303 printk(KERN_ERR "hfs: unable to load codepage \"%s\"\n", p);
303 kfree(p); 304 kfree(p);
@@ -311,7 +312,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
311 return 0; 312 return 0;
312 } 313 }
313 p = match_strdup(&args[0]); 314 p = match_strdup(&args[0]);
314 hsb->nls_io = load_nls(p); 315 if (p)
316 hsb->nls_io = load_nls(p);
315 if (!hsb->nls_io) { 317 if (!hsb->nls_io) {
316 printk(KERN_ERR "hfs: unable to load iocharset \"%s\"\n", p); 318 printk(KERN_ERR "hfs: unable to load iocharset \"%s\"\n", p);
317 kfree(p); 319 kfree(p);
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index bb5433608a42..e49fcee1e293 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -184,7 +184,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
184 struct hfs_bnode *node, *next_node; 184 struct hfs_bnode *node, *next_node;
185 struct page **pagep; 185 struct page **pagep;
186 u32 nidx, idx; 186 u32 nidx, idx;
187 u16 off, len; 187 unsigned off;
188 u16 off16;
189 u16 len;
188 u8 *data, byte, m; 190 u8 *data, byte, m;
189 int i; 191 int i;
190 192
@@ -211,7 +213,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
211 node = hfs_bnode_find(tree, nidx); 213 node = hfs_bnode_find(tree, nidx);
212 if (IS_ERR(node)) 214 if (IS_ERR(node))
213 return node; 215 return node;
214 len = hfs_brec_lenoff(node, 2, &off); 216 len = hfs_brec_lenoff(node, 2, &off16);
217 off = off16;
215 218
216 off += node->page_offset; 219 off += node->page_offset;
217 pagep = node->page + (off >> PAGE_CACHE_SHIFT); 220 pagep = node->page + (off >> PAGE_CACHE_SHIFT);
@@ -256,7 +259,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
256 return next_node; 259 return next_node;
257 node = next_node; 260 node = next_node;
258 261
259 len = hfs_brec_lenoff(node, 0, &off); 262 len = hfs_brec_lenoff(node, 0, &off16);
263 off = off16;
260 off += node->page_offset; 264 off += node->page_offset;
261 pagep = node->page + (off >> PAGE_CACHE_SHIFT); 265 pagep = node->page + (off >> PAGE_CACHE_SHIFT);
262 data = kmap(*pagep); 266 data = kmap(*pagep);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index d72d0a8b25aa..9e59537b43d5 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -311,6 +311,10 @@ int hfsplus_delete_cat(u32, struct inode *, struct qstr *);
311int hfsplus_rename_cat(u32, struct inode *, struct qstr *, 311int hfsplus_rename_cat(u32, struct inode *, struct qstr *,
312 struct inode *, struct qstr *); 312 struct inode *, struct qstr *);
313 313
314/* dir.c */
315extern const struct inode_operations hfsplus_dir_inode_operations;
316extern const struct file_operations hfsplus_dir_operations;
317
314/* extents.c */ 318/* extents.c */
315int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *); 319int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *);
316void hfsplus_ext_write_extent(struct inode *); 320void hfsplus_ext_write_extent(struct inode *);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 37744cf3706a..67e1c8b467c4 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -65,6 +65,8 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
65 BUG(); 65 BUG();
66 return 0; 66 return 0;
67 } 67 }
68 if (!tree)
69 return 0;
68 if (tree->node_size >= PAGE_CACHE_SIZE) { 70 if (tree->node_size >= PAGE_CACHE_SIZE) {
69 nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT); 71 nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT);
70 spin_lock(&tree->hash_lock); 72 spin_lock(&tree->hash_lock);
@@ -278,9 +280,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
278 return 0; 280 return 0;
279} 281}
280 282
281extern const struct inode_operations hfsplus_dir_inode_operations;
282extern struct file_operations hfsplus_dir_operations;
283
284static const struct inode_operations hfsplus_file_inode_operations = { 283static const struct inode_operations hfsplus_file_inode_operations = {
285 .lookup = hfsplus_file_lookup, 284 .lookup = hfsplus_file_lookup,
286 .truncate = hfsplus_file_truncate, 285 .truncate = hfsplus_file_truncate,
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index dc64fac00831..9997cbf8beb5 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -132,7 +132,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
132 return 0; 132 return 0;
133 } 133 }
134 p = match_strdup(&args[0]); 134 p = match_strdup(&args[0]);
135 sbi->nls = load_nls(p); 135 if (p)
136 sbi->nls = load_nls(p);
136 if (!sbi->nls) { 137 if (!sbi->nls) {
137 printk(KERN_ERR "hfs: unable to load nls mapping \"%s\"\n", p); 138 printk(KERN_ERR "hfs: unable to load nls mapping \"%s\"\n", p);
138 kfree(p); 139 kfree(p);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index b0f9ad362d1d..ce97a54518d8 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -357,7 +357,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
357 printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); 357 printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n");
358 sb->s_flags |= MS_RDONLY; 358 sb->s_flags |= MS_RDONLY;
359 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) { 359 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) {
360 printk(KERN_WARNING "hfs: write access to a jounaled filesystem is not supported, " 360 printk(KERN_WARNING "hfs: write access to a journaled filesystem is not supported, "
361 "use the force option at your own risk, mounting read-only.\n"); 361 "use the force option at your own risk, mounting read-only.\n");
362 sb->s_flags |= MS_RDONLY; 362 sb->s_flags |= MS_RDONLY;
363 } 363 }
@@ -423,7 +423,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
423 */ 423 */
424 vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION); 424 vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION);
425 vhdr->modify_date = hfsp_now2mt(); 425 vhdr->modify_date = hfsp_now2mt();
426 vhdr->write_count = cpu_to_be32(be32_to_cpu(vhdr->write_count) + 1); 426 be32_add_cpu(&vhdr->write_count, 1);
427 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); 427 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT);
428 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); 428 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT);
429 mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); 429 mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh);
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 72cab78f0509..175d08eacc86 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -47,7 +47,7 @@ static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
47 return 0; 47 return 0;
48 wd->ablk_start = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ABLKSTART)); 48 wd->ablk_start = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ABLKSTART));
49 49
50 extent = be32_to_cpu(get_unaligned((__be32 *)(bufptr + HFSP_WRAPOFF_EMBEDEXT))); 50 extent = get_unaligned_be32(bufptr + HFSP_WRAPOFF_EMBEDEXT);
51 wd->embed_start = (extent >> 16) & 0xFFFF; 51 wd->embed_start = (extent >> 16) & 0xFFFF;
52 wd->embed_count = extent & 0xFFFF; 52 wd->embed_count = extent & 0xFFFF;
53 53
diff --git a/fs/hppfs/Makefile b/fs/hppfs/Makefile
index 6890433f7595..8a1f50344368 100644
--- a/fs/hppfs/Makefile
+++ b/fs/hppfs/Makefile
@@ -1,9 +1,9 @@
1# 1#
2# Copyright (C) 2002, 2003 Jeff Dike (jdike@karaya.com) 2# Copyright (C) 2002 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3# Licensed under the GPL 3# Licensed under the GPL
4# 4#
5 5
6hppfs-objs := hppfs_kern.o 6hppfs-objs := hppfs.o
7 7
8obj-y = 8obj-y =
9obj-$(CONFIG_HPPFS) += hppfs.o 9obj-$(CONFIG_HPPFS) += $(hppfs-objs)
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs.c
index 8601d8ef3b55..65077aa90f0a 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs.c
@@ -33,7 +33,7 @@ struct hppfs_private {
33}; 33};
34 34
35struct hppfs_inode_info { 35struct hppfs_inode_info {
36 struct dentry *proc_dentry; 36 struct dentry *proc_dentry;
37 struct inode vfs_inode; 37 struct inode vfs_inode;
38}; 38};
39 39
@@ -52,7 +52,7 @@ static int is_pid(struct dentry *dentry)
52 int i; 52 int i;
53 53
54 sb = dentry->d_sb; 54 sb = dentry->d_sb;
55 if ((sb->s_op != &hppfs_sbops) || (dentry->d_parent != sb->s_root)) 55 if (dentry->d_parent != sb->s_root)
56 return 0; 56 return 0;
57 57
58 for (i = 0; i < dentry->d_name.len; i++) { 58 for (i = 0; i < dentry->d_name.len; i++) {
@@ -136,7 +136,7 @@ static int file_removed(struct dentry *dentry, const char *file)
136} 136}
137 137
138static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, 138static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
139 struct nameidata *nd) 139 struct nameidata *nd)
140{ 140{
141 struct dentry *proc_dentry, *new, *parent; 141 struct dentry *proc_dentry, *new, *parent;
142 struct inode *inode; 142 struct inode *inode;
@@ -254,6 +254,8 @@ static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count,
254 int err; 254 int err;
255 255
256 if (hppfs->contents != NULL) { 256 if (hppfs->contents != NULL) {
257 int rem;
258
257 if (*ppos >= hppfs->len) 259 if (*ppos >= hppfs->len)
258 return 0; 260 return 0;
259 261
@@ -267,8 +269,10 @@ static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count,
267 269
268 if (off + count > hppfs->len) 270 if (off + count > hppfs->len)
269 count = hppfs->len - off; 271 count = hppfs->len - off;
270 copy_to_user(buf, &data->contents[off], count); 272 rem = copy_to_user(buf, &data->contents[off], count);
271 *ppos += count; 273 *ppos += count - rem;
274 if (rem > 0)
275 return -EFAULT;
272 } else if (hppfs->host_fd != -1) { 276 } else if (hppfs->host_fd != -1) {
273 err = os_seek_file(hppfs->host_fd, *ppos); 277 err = os_seek_file(hppfs->host_fd, *ppos);
274 if (err) { 278 if (err) {
@@ -285,21 +289,15 @@ static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count,
285 return count; 289 return count;
286} 290}
287 291
288static ssize_t hppfs_write(struct file *file, const char __user *buf, size_t len, 292static ssize_t hppfs_write(struct file *file, const char __user *buf,
289 loff_t *ppos) 293 size_t len, loff_t *ppos)
290{ 294{
291 struct hppfs_private *data = file->private_data; 295 struct hppfs_private *data = file->private_data;
292 struct file *proc_file = data->proc_file; 296 struct file *proc_file = data->proc_file;
293 ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); 297 ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
294 int err;
295 298
296 write = proc_file->f_path.dentry->d_inode->i_fop->write; 299 write = proc_file->f_path.dentry->d_inode->i_fop->write;
297 300 return (*write)(proc_file, buf, len, ppos);
298 proc_file->f_pos = file->f_pos;
299 err = (*write)(proc_file, buf, len, &proc_file->f_pos);
300 file->f_pos = proc_file->f_pos;
301
302 return err;
303} 301}
304 302
305static int open_host_sock(char *host_file, int *filter_out) 303static int open_host_sock(char *host_file, int *filter_out)
@@ -357,7 +355,7 @@ static struct hppfs_data *hppfs_get_data(int fd, int filter,
357 355
358 if (filter) { 356 if (filter) {
359 while ((n = read_proc(proc_file, data->contents, 357 while ((n = read_proc(proc_file, data->contents,
360 sizeof(data->contents), NULL, 0)) > 0) 358 sizeof(data->contents), NULL, 0)) > 0)
361 os_write_file(fd, data->contents, n); 359 os_write_file(fd, data->contents, n);
362 err = os_shutdown_socket(fd, 0, 1); 360 err = os_shutdown_socket(fd, 0, 1);
363 if (err) { 361 if (err) {
@@ -429,8 +427,8 @@ static int file_mode(int fmode)
429static int hppfs_open(struct inode *inode, struct file *file) 427static int hppfs_open(struct inode *inode, struct file *file)
430{ 428{
431 struct hppfs_private *data; 429 struct hppfs_private *data;
432 struct dentry *proc_dentry;
433 struct vfsmount *proc_mnt; 430 struct vfsmount *proc_mnt;
431 struct dentry *proc_dentry;
434 char *host_file; 432 char *host_file;
435 int err, fd, type, filter; 433 int err, fd, type, filter;
436 434
@@ -492,8 +490,8 @@ static int hppfs_open(struct inode *inode, struct file *file)
492static int hppfs_dir_open(struct inode *inode, struct file *file) 490static int hppfs_dir_open(struct inode *inode, struct file *file)
493{ 491{
494 struct hppfs_private *data; 492 struct hppfs_private *data;
495 struct dentry *proc_dentry;
496 struct vfsmount *proc_mnt; 493 struct vfsmount *proc_mnt;
494 struct dentry *proc_dentry;
497 int err; 495 int err;
498 496
499 err = -ENOMEM; 497 err = -ENOMEM;
@@ -620,6 +618,9 @@ static struct inode *hppfs_alloc_inode(struct super_block *sb)
620 618
621void hppfs_delete_inode(struct inode *ino) 619void hppfs_delete_inode(struct inode *ino)
622{ 620{
621 dput(HPPFS_I(ino)->proc_dentry);
622 mntput(ino->i_sb->s_fs_info);
623
623 clear_inode(ino); 624 clear_inode(ino);
624} 625}
625 626
@@ -628,69 +629,46 @@ static void hppfs_destroy_inode(struct inode *inode)
628 kfree(HPPFS_I(inode)); 629 kfree(HPPFS_I(inode));
629} 630}
630 631
631static void hppfs_put_super(struct super_block *sb)
632{
633 mntput(sb->s_fs_info);
634}
635
636static const struct super_operations hppfs_sbops = { 632static const struct super_operations hppfs_sbops = {
637 .alloc_inode = hppfs_alloc_inode, 633 .alloc_inode = hppfs_alloc_inode,
638 .destroy_inode = hppfs_destroy_inode, 634 .destroy_inode = hppfs_destroy_inode,
639 .delete_inode = hppfs_delete_inode, 635 .delete_inode = hppfs_delete_inode,
640 .statfs = hppfs_statfs, 636 .statfs = hppfs_statfs,
641 .put_super = hppfs_put_super,
642}; 637};
643 638
644static int hppfs_readlink(struct dentry *dentry, char __user *buffer, 639static int hppfs_readlink(struct dentry *dentry, char __user *buffer,
645 int buflen) 640 int buflen)
646{ 641{
647 struct file *proc_file;
648 struct dentry *proc_dentry; 642 struct dentry *proc_dentry;
649 struct vfsmount *proc_mnt;
650 int ret;
651 643
652 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; 644 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
653 proc_mnt = dentry->d_sb->s_fs_info; 645 return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer,
654 646 buflen);
655 proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), O_RDONLY);
656 if (IS_ERR(proc_file))
657 return PTR_ERR(proc_file);
658
659 ret = proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, buflen);
660
661 fput(proc_file);
662
663 return ret;
664} 647}
665 648
666static void* hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) 649static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
667{ 650{
668 struct file *proc_file;
669 struct dentry *proc_dentry; 651 struct dentry *proc_dentry;
670 struct vfsmount *proc_mnt;
671 void *ret;
672 652
673 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; 653 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
674 proc_mnt = dentry->d_sb->s_fs_info;
675
676 proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), O_RDONLY);
677 if (IS_ERR(proc_file))
678 return proc_file;
679
680 ret = proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
681 654
682 fput(proc_file); 655 return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
656}
683 657
684 return ret; 658int hppfs_permission(struct inode *inode, int mask, struct nameidata *nd)
659{
660 return generic_permission(inode, mask, NULL);
685} 661}
686 662
687static const struct inode_operations hppfs_dir_iops = { 663static const struct inode_operations hppfs_dir_iops = {
688 .lookup = hppfs_lookup, 664 .lookup = hppfs_lookup,
665 .permission = hppfs_permission,
689}; 666};
690 667
691static const struct inode_operations hppfs_link_iops = { 668static const struct inode_operations hppfs_link_iops = {
692 .readlink = hppfs_readlink, 669 .readlink = hppfs_readlink,
693 .follow_link = hppfs_follow_link, 670 .follow_link = hppfs_follow_link,
671 .permission = hppfs_permission,
694}; 672};
695 673
696static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) 674static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
@@ -712,7 +690,7 @@ static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
712 inode->i_fop = &hppfs_file_fops; 690 inode->i_fop = &hppfs_file_fops;
713 } 691 }
714 692
715 HPPFS_I(inode)->proc_dentry = dentry; 693 HPPFS_I(inode)->proc_dentry = dget(dentry);
716 694
717 inode->i_uid = proc_ino->i_uid; 695 inode->i_uid = proc_ino->i_uid;
718 inode->i_gid = proc_ino->i_gid; 696 inode->i_gid = proc_ino->i_gid;
@@ -725,7 +703,7 @@ static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
725 inode->i_size = proc_ino->i_size; 703 inode->i_size = proc_ino->i_size;
726 inode->i_blocks = proc_ino->i_blocks; 704 inode->i_blocks = proc_ino->i_blocks;
727 705
728 return 0; 706 return inode;
729} 707}
730 708
731static int hppfs_fill_super(struct super_block *sb, void *d, int silent) 709static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9783723e8ffe..aeabf80f81a5 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -45,7 +45,7 @@ static const struct inode_operations hugetlbfs_inode_operations;
45 45
46static struct backing_dev_info hugetlbfs_backing_dev_info = { 46static struct backing_dev_info hugetlbfs_backing_dev_info = {
47 .ra_pages = 0, /* No readahead */ 47 .ra_pages = 0, /* No readahead */
48 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 48 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
49}; 49};
50 50
51int sysctl_hugetlb_shm_group; 51int sysctl_hugetlb_shm_group;
diff --git a/fs/inode.c b/fs/inode.c
index 27ee1af50d02..c36d9480335c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -495,8 +495,7 @@ static struct inode * find_inode(struct super_block * sb, struct hlist_head *hea
495 struct inode * inode = NULL; 495 struct inode * inode = NULL;
496 496
497repeat: 497repeat:
498 hlist_for_each (node, head) { 498 hlist_for_each_entry(inode, node, head, i_hash) {
499 inode = hlist_entry(node, struct inode, i_hash);
500 if (inode->i_sb != sb) 499 if (inode->i_sb != sb)
501 continue; 500 continue;
502 if (!test(inode, data)) 501 if (!test(inode, data))
@@ -520,8 +519,7 @@ static struct inode * find_inode_fast(struct super_block * sb, struct hlist_head
520 struct inode * inode = NULL; 519 struct inode * inode = NULL;
521 520
522repeat: 521repeat:
523 hlist_for_each (node, head) { 522 hlist_for_each_entry(inode, node, head, i_hash) {
524 inode = hlist_entry(node, struct inode, i_hash);
525 if (inode->i_ino != ino) 523 if (inode->i_ino != ino)
526 continue; 524 continue;
527 if (inode->i_sb != sb) 525 if (inode->i_sb != sb)
@@ -1151,13 +1149,8 @@ static inline void iput_final(struct inode *inode)
1151void iput(struct inode *inode) 1149void iput(struct inode *inode)
1152{ 1150{
1153 if (inode) { 1151 if (inode) {
1154 const struct super_operations *op = inode->i_sb->s_op;
1155
1156 BUG_ON(inode->i_state == I_CLEAR); 1152 BUG_ON(inode->i_state == I_CLEAR);
1157 1153
1158 if (op && op->put_inode)
1159 op->put_inode(inode);
1160
1161 if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) 1154 if (atomic_dec_and_lock(&inode->i_count, &inode_lock))
1162 iput_final(inode); 1155 iput_final(inode);
1163 } 1156 }
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 7b94a1e3c015..6676c06bb7c1 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -598,7 +598,7 @@ asmlinkage long sys_inotify_init(void)
598 } 598 }
599 599
600 ih = inotify_init(&inotify_user_ops); 600 ih = inotify_init(&inotify_user_ops);
601 if (unlikely(IS_ERR(ih))) { 601 if (IS_ERR(ih)) {
602 ret = PTR_ERR(ih); 602 ret = PTR_ERR(ih);
603 goto out_free_dev; 603 goto out_free_dev;
604 } 604 }
diff --git a/fs/ioctl.c b/fs/ioctl.c
index f32fbde2175e..7db32b3382d3 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -28,8 +28,8 @@
28 * 28 *
29 * Returns 0 on success, -errno on error. 29 * Returns 0 on success, -errno on error.
30 */ 30 */
31long vfs_ioctl(struct file *filp, unsigned int cmd, 31static long vfs_ioctl(struct file *filp, unsigned int cmd,
32 unsigned long arg) 32 unsigned long arg)
33{ 33{
34 int error = -ENOTTY; 34 int error = -ENOTTY;
35 35
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 1ba407c64df1..2f0dc5a14633 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -145,6 +145,14 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp,
145 } 145 }
146 de = tmpde; 146 de = tmpde;
147 } 147 }
148 /* Basic sanity check, whether name doesn't exceed dir entry */
149 if (de_len < de->name_len[0] +
150 sizeof(struct iso_directory_record)) {
151 printk(KERN_NOTICE "iso9660: Corrupted directory entry"
152 " in block %lu of inode %lu\n", block,
153 inode->i_ino);
154 return -EIO;
155 }
148 156
149 if (first_de) { 157 if (first_de) {
150 isofs_normalize_block_and_offset(de, 158 isofs_normalize_block_and_offset(de,
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index d1bdf8adb351..ccbf72faf27a 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -78,29 +78,29 @@ static inline int isonum_712(char *p)
78} 78}
79static inline unsigned int isonum_721(char *p) 79static inline unsigned int isonum_721(char *p)
80{ 80{
81 return le16_to_cpu(get_unaligned((__le16 *)p)); 81 return get_unaligned_le16(p);
82} 82}
83static inline unsigned int isonum_722(char *p) 83static inline unsigned int isonum_722(char *p)
84{ 84{
85 return be16_to_cpu(get_unaligned((__le16 *)p)); 85 return get_unaligned_be16(p);
86} 86}
87static inline unsigned int isonum_723(char *p) 87static inline unsigned int isonum_723(char *p)
88{ 88{
89 /* Ignore bigendian datum due to broken mastering programs */ 89 /* Ignore bigendian datum due to broken mastering programs */
90 return le16_to_cpu(get_unaligned((__le16 *)p)); 90 return get_unaligned_le16(p);
91} 91}
92static inline unsigned int isonum_731(char *p) 92static inline unsigned int isonum_731(char *p)
93{ 93{
94 return le32_to_cpu(get_unaligned((__le32 *)p)); 94 return get_unaligned_le32(p);
95} 95}
96static inline unsigned int isonum_732(char *p) 96static inline unsigned int isonum_732(char *p)
97{ 97{
98 return be32_to_cpu(get_unaligned((__le32 *)p)); 98 return get_unaligned_be32(p);
99} 99}
100static inline unsigned int isonum_733(char *p) 100static inline unsigned int isonum_733(char *p)
101{ 101{
102 /* Ignore bigendian datum due to broken mastering programs */ 102 /* Ignore bigendian datum due to broken mastering programs */
103 return le32_to_cpu(get_unaligned((__le32 *)p)); 103 return get_unaligned_le32(p);
104} 104}
105extern int iso_date(char *, int); 105extern int iso_date(char *, int);
106 106
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 344b247bc29a..8299889a835e 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -111,6 +111,13 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
111 111
112 dlen = de->name_len[0]; 112 dlen = de->name_len[0];
113 dpnt = de->name; 113 dpnt = de->name;
114 /* Basic sanity check, whether name doesn't exceed dir entry */
115 if (de_len < dlen + sizeof(struct iso_directory_record)) {
116 printk(KERN_NOTICE "iso9660: Corrupted directory entry"
117 " in block %lu of inode %lu\n", block,
118 dir->i_ino);
119 return 0;
120 }
114 121
115 if (sbi->s_rock && 122 if (sbi->s_rock &&
116 ((i = get_rock_ridge_filename(de, tmpname, dir)))) { 123 ((i = get_rock_ridge_filename(de, tmpname, dir)))) {
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index cd931ef1f000..5a8ca61498ca 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -470,7 +470,9 @@ void journal_commit_transaction(journal_t *journal)
470 * transaction! Now comes the tricky part: we need to write out 470 * transaction! Now comes the tricky part: we need to write out
471 * metadata. Loop over the transaction's entire buffer list: 471 * metadata. Loop over the transaction's entire buffer list:
472 */ 472 */
473 spin_lock(&journal->j_state_lock);
473 commit_transaction->t_state = T_COMMIT; 474 commit_transaction->t_state = T_COMMIT;
475 spin_unlock(&journal->j_state_lock);
474 476
475 J_ASSERT(commit_transaction->t_nr_buffers <= 477 J_ASSERT(commit_transaction->t_nr_buffers <=
476 commit_transaction->t_outstanding_credits); 478 commit_transaction->t_outstanding_credits);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index a8173081f831..4d99685fdce4 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -520,22 +520,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
520 jbd_debug (3, "JBD: commit phase 2\n"); 520 jbd_debug (3, "JBD: commit phase 2\n");
521 521
522 /* 522 /*
523 * First, drop modified flag: all accesses to the buffers
524 * will be tracked for a new trasaction only -bzzz
525 */
526 spin_lock(&journal->j_list_lock);
527 if (commit_transaction->t_buffers) {
528 new_jh = jh = commit_transaction->t_buffers->b_tnext;
529 do {
530 J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
531 new_jh->b_modified == 0);
532 new_jh->b_modified = 0;
533 new_jh = new_jh->b_tnext;
534 } while (new_jh != jh);
535 }
536 spin_unlock(&journal->j_list_lock);
537
538 /*
539 * Now start flushing things to disk, in the order they appear 523 * Now start flushing things to disk, in the order they appear
540 * on the transaction lists. Data blocks go first. 524 * on the transaction lists. Data blocks go first.
541 */ 525 */
@@ -576,7 +560,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
576 * transaction! Now comes the tricky part: we need to write out 560 * transaction! Now comes the tricky part: we need to write out
577 * metadata. Loop over the transaction's entire buffer list: 561 * metadata. Loop over the transaction's entire buffer list:
578 */ 562 */
563 spin_lock(&journal->j_state_lock);
579 commit_transaction->t_state = T_COMMIT; 564 commit_transaction->t_state = T_COMMIT;
565 spin_unlock(&journal->j_state_lock);
580 566
581 stats.u.run.rs_logging = jiffies; 567 stats.u.run.rs_logging = jiffies;
582 stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing, 568 stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing,
@@ -584,6 +570,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
584 stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits; 570 stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits;
585 stats.u.run.rs_blocks_logged = 0; 571 stats.u.run.rs_blocks_logged = 0;
586 572
573 J_ASSERT(commit_transaction->t_nr_buffers <=
574 commit_transaction->t_outstanding_credits);
575
587 descriptor = NULL; 576 descriptor = NULL;
588 bufs = 0; 577 bufs = 0;
589 while (commit_transaction->t_buffers) { 578 while (commit_transaction->t_buffers) {
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 954cff001df6..2e24567c4a79 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -534,7 +534,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
534 if (!tid_geq(journal->j_commit_request, tid)) { 534 if (!tid_geq(journal->j_commit_request, tid)) {
535 printk(KERN_EMERG 535 printk(KERN_EMERG
536 "%s: error: j_commit_request=%d, tid=%d\n", 536 "%s: error: j_commit_request=%d, tid=%d\n",
537 __FUNCTION__, journal->j_commit_request, tid); 537 __func__, journal->j_commit_request, tid);
538 } 538 }
539 spin_unlock(&journal->j_state_lock); 539 spin_unlock(&journal->j_state_lock);
540#endif 540#endif
@@ -599,7 +599,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
599 599
600 printk(KERN_ALERT "%s: journal block not found " 600 printk(KERN_ALERT "%s: journal block not found "
601 "at offset %lu on %s\n", 601 "at offset %lu on %s\n",
602 __FUNCTION__, 602 __func__,
603 blocknr, 603 blocknr,
604 bdevname(journal->j_dev, b)); 604 bdevname(journal->j_dev, b));
605 err = -EIO; 605 err = -EIO;
@@ -901,22 +901,13 @@ static void jbd2_stats_proc_init(journal_t *journal)
901{ 901{
902 char name[BDEVNAME_SIZE]; 902 char name[BDEVNAME_SIZE];
903 903
904 snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); 904 bdevname(journal->j_dev, name);
905 journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats); 905 journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats);
906 if (journal->j_proc_entry) { 906 if (journal->j_proc_entry) {
907 struct proc_dir_entry *p; 907 proc_create_data("history", S_IRUGO, journal->j_proc_entry,
908 p = create_proc_entry("history", S_IRUGO, 908 &jbd2_seq_history_fops, journal);
909 journal->j_proc_entry); 909 proc_create_data("info", S_IRUGO, journal->j_proc_entry,
910 if (p) { 910 &jbd2_seq_info_fops, journal);
911 p->proc_fops = &jbd2_seq_history_fops;
912 p->data = journal;
913 p = create_proc_entry("info", S_IRUGO,
914 journal->j_proc_entry);
915 if (p) {
916 p->proc_fops = &jbd2_seq_info_fops;
917 p->data = journal;
918 }
919 }
920 } 911 }
921} 912}
922 913
@@ -924,7 +915,7 @@ static void jbd2_stats_proc_exit(journal_t *journal)
924{ 915{
925 char name[BDEVNAME_SIZE]; 916 char name[BDEVNAME_SIZE];
926 917
927 snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); 918 bdevname(journal->j_dev, name);
928 remove_proc_entry("info", journal->j_proc_entry); 919 remove_proc_entry("info", journal->j_proc_entry);
929 remove_proc_entry("history", journal->j_proc_entry); 920 remove_proc_entry("history", journal->j_proc_entry);
930 remove_proc_entry(name, proc_jbd2_stats); 921 remove_proc_entry(name, proc_jbd2_stats);
@@ -1006,13 +997,14 @@ fail:
1006 */ 997 */
1007 998
1008/** 999/**
1009 * journal_t * jbd2_journal_init_dev() - creates an initialises a journal structure 1000 * journal_t * jbd2_journal_init_dev() - creates and initialises a journal structure
1010 * @bdev: Block device on which to create the journal 1001 * @bdev: Block device on which to create the journal
1011 * @fs_dev: Device which hold journalled filesystem for this journal. 1002 * @fs_dev: Device which hold journalled filesystem for this journal.
1012 * @start: Block nr Start of journal. 1003 * @start: Block nr Start of journal.
1013 * @len: Length of the journal in blocks. 1004 * @len: Length of the journal in blocks.
1014 * @blocksize: blocksize of journalling device 1005 * @blocksize: blocksize of journalling device
1015 * @returns: a newly created journal_t * 1006 *
1007 * Returns: a newly created journal_t *
1016 * 1008 *
1017 * jbd2_journal_init_dev creates a journal which maps a fixed contiguous 1009 * jbd2_journal_init_dev creates a journal which maps a fixed contiguous
1018 * range of blocks on an arbitrary block device. 1010 * range of blocks on an arbitrary block device.
@@ -1036,7 +1028,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
1036 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 1028 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
1037 if (!journal->j_wbuf) { 1029 if (!journal->j_wbuf) {
1038 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 1030 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
1039 __FUNCTION__); 1031 __func__);
1040 kfree(journal); 1032 kfree(journal);
1041 journal = NULL; 1033 journal = NULL;
1042 goto out; 1034 goto out;
@@ -1092,7 +1084,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
1092 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 1084 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
1093 if (!journal->j_wbuf) { 1085 if (!journal->j_wbuf) {
1094 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 1086 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
1095 __FUNCTION__); 1087 __func__);
1096 kfree(journal); 1088 kfree(journal);
1097 return NULL; 1089 return NULL;
1098 } 1090 }
@@ -1101,7 +1093,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
1101 /* If that failed, give up */ 1093 /* If that failed, give up */
1102 if (err) { 1094 if (err) {
1103 printk(KERN_ERR "%s: Cannnot locate journal superblock\n", 1095 printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
1104 __FUNCTION__); 1096 __func__);
1105 kfree(journal); 1097 kfree(journal);
1106 return NULL; 1098 return NULL;
1107 } 1099 }
@@ -1187,7 +1179,7 @@ int jbd2_journal_create(journal_t *journal)
1187 */ 1179 */
1188 printk(KERN_EMERG 1180 printk(KERN_EMERG
1189 "%s: creation of journal on external device!\n", 1181 "%s: creation of journal on external device!\n",
1190 __FUNCTION__); 1182 __func__);
1191 BUG(); 1183 BUG();
1192 } 1184 }
1193 1185
@@ -1985,9 +1977,10 @@ static int journal_init_jbd2_journal_head_cache(void)
1985 1977
1986static void jbd2_journal_destroy_jbd2_journal_head_cache(void) 1978static void jbd2_journal_destroy_jbd2_journal_head_cache(void)
1987{ 1979{
1988 J_ASSERT(jbd2_journal_head_cache != NULL); 1980 if (jbd2_journal_head_cache) {
1989 kmem_cache_destroy(jbd2_journal_head_cache); 1981 kmem_cache_destroy(jbd2_journal_head_cache);
1990 jbd2_journal_head_cache = NULL; 1982 jbd2_journal_head_cache = NULL;
1983 }
1991} 1984}
1992 1985
1993/* 1986/*
@@ -2006,7 +1999,7 @@ static struct journal_head *journal_alloc_journal_head(void)
2006 jbd_debug(1, "out of memory for journal_head\n"); 1999 jbd_debug(1, "out of memory for journal_head\n");
2007 if (time_after(jiffies, last_warning + 5*HZ)) { 2000 if (time_after(jiffies, last_warning + 5*HZ)) {
2008 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n", 2001 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
2009 __FUNCTION__); 2002 __func__);
2010 last_warning = jiffies; 2003 last_warning = jiffies;
2011 } 2004 }
2012 while (!ret) { 2005 while (!ret) {
@@ -2143,13 +2136,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
2143 if (jh->b_frozen_data) { 2136 if (jh->b_frozen_data) {
2144 printk(KERN_WARNING "%s: freeing " 2137 printk(KERN_WARNING "%s: freeing "
2145 "b_frozen_data\n", 2138 "b_frozen_data\n",
2146 __FUNCTION__); 2139 __func__);
2147 jbd2_free(jh->b_frozen_data, bh->b_size); 2140 jbd2_free(jh->b_frozen_data, bh->b_size);
2148 } 2141 }
2149 if (jh->b_committed_data) { 2142 if (jh->b_committed_data) {
2150 printk(KERN_WARNING "%s: freeing " 2143 printk(KERN_WARNING "%s: freeing "
2151 "b_committed_data\n", 2144 "b_committed_data\n",
2152 __FUNCTION__); 2145 __func__);
2153 jbd2_free(jh->b_committed_data, bh->b_size); 2146 jbd2_free(jh->b_committed_data, bh->b_size);
2154 } 2147 }
2155 bh->b_private = NULL; 2148 bh->b_private = NULL;
@@ -2314,10 +2307,12 @@ static int __init journal_init(void)
2314 BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024); 2307 BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024);
2315 2308
2316 ret = journal_init_caches(); 2309 ret = journal_init_caches();
2317 if (ret != 0) 2310 if (ret == 0) {
2311 jbd2_create_debugfs_entry();
2312 jbd2_create_jbd_stats_proc_entry();
2313 } else {
2318 jbd2_journal_destroy_caches(); 2314 jbd2_journal_destroy_caches();
2319 jbd2_create_debugfs_entry(); 2315 }
2320 jbd2_create_jbd_stats_proc_entry();
2321 return ret; 2316 return ret;
2322} 2317}
2323 2318
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 2e1453a5e998..257ff2625765 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -139,7 +139,7 @@ repeat:
139oom: 139oom:
140 if (!journal_oom_retry) 140 if (!journal_oom_retry)
141 return -ENOMEM; 141 return -ENOMEM;
142 jbd_debug(1, "ENOMEM in %s, retrying\n", __FUNCTION__); 142 jbd_debug(1, "ENOMEM in %s, retrying\n", __func__);
143 yield(); 143 yield();
144 goto repeat; 144 goto repeat;
145} 145}
@@ -167,138 +167,121 @@ static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal,
167 return NULL; 167 return NULL;
168} 168}
169 169
170void jbd2_journal_destroy_revoke_caches(void)
171{
172 if (jbd2_revoke_record_cache) {
173 kmem_cache_destroy(jbd2_revoke_record_cache);
174 jbd2_revoke_record_cache = NULL;
175 }
176 if (jbd2_revoke_table_cache) {
177 kmem_cache_destroy(jbd2_revoke_table_cache);
178 jbd2_revoke_table_cache = NULL;
179 }
180}
181
170int __init jbd2_journal_init_revoke_caches(void) 182int __init jbd2_journal_init_revoke_caches(void)
171{ 183{
184 J_ASSERT(!jbd2_revoke_record_cache);
185 J_ASSERT(!jbd2_revoke_table_cache);
186
172 jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", 187 jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record",
173 sizeof(struct jbd2_revoke_record_s), 188 sizeof(struct jbd2_revoke_record_s),
174 0, 189 0,
175 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, 190 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
176 NULL); 191 NULL);
177 if (!jbd2_revoke_record_cache) 192 if (!jbd2_revoke_record_cache)
178 return -ENOMEM; 193 goto record_cache_failure;
179 194
180 jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", 195 jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table",
181 sizeof(struct jbd2_revoke_table_s), 196 sizeof(struct jbd2_revoke_table_s),
182 0, SLAB_TEMPORARY, NULL); 197 0, SLAB_TEMPORARY, NULL);
183 if (!jbd2_revoke_table_cache) { 198 if (!jbd2_revoke_table_cache)
184 kmem_cache_destroy(jbd2_revoke_record_cache); 199 goto table_cache_failure;
185 jbd2_revoke_record_cache = NULL;
186 return -ENOMEM;
187 }
188 return 0; 200 return 0;
201table_cache_failure:
202 jbd2_journal_destroy_revoke_caches();
203record_cache_failure:
204 return -ENOMEM;
189} 205}
190 206
191void jbd2_journal_destroy_revoke_caches(void) 207static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size)
192{ 208{
193 kmem_cache_destroy(jbd2_revoke_record_cache); 209 int shift = 0;
194 jbd2_revoke_record_cache = NULL; 210 int tmp = hash_size;
195 kmem_cache_destroy(jbd2_revoke_table_cache); 211 struct jbd2_revoke_table_s *table;
196 jbd2_revoke_table_cache = NULL;
197}
198
199/* Initialise the revoke table for a given journal to a given size. */
200
201int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
202{
203 int shift, tmp;
204 212
205 J_ASSERT (journal->j_revoke_table[0] == NULL); 213 table = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
214 if (!table)
215 goto out;
206 216
207 shift = 0;
208 tmp = hash_size;
209 while((tmp >>= 1UL) != 0UL) 217 while((tmp >>= 1UL) != 0UL)
210 shift++; 218 shift++;
211 219
212 journal->j_revoke_table[0] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); 220 table->hash_size = hash_size;
213 if (!journal->j_revoke_table[0]) 221 table->hash_shift = shift;
214 return -ENOMEM; 222 table->hash_table =
215 journal->j_revoke = journal->j_revoke_table[0];
216
217 /* Check that the hash_size is a power of two */
218 J_ASSERT(is_power_of_2(hash_size));
219
220 journal->j_revoke->hash_size = hash_size;
221
222 journal->j_revoke->hash_shift = shift;
223
224 journal->j_revoke->hash_table =
225 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); 223 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
226 if (!journal->j_revoke->hash_table) { 224 if (!table->hash_table) {
227 kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); 225 kmem_cache_free(jbd2_revoke_table_cache, table);
228 journal->j_revoke = NULL; 226 table = NULL;
229 return -ENOMEM; 227 goto out;
230 } 228 }
231 229
232 for (tmp = 0; tmp < hash_size; tmp++) 230 for (tmp = 0; tmp < hash_size; tmp++)
233 INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); 231 INIT_LIST_HEAD(&table->hash_table[tmp]);
234 232
235 journal->j_revoke_table[1] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); 233out:
236 if (!journal->j_revoke_table[1]) { 234 return table;
237 kfree(journal->j_revoke_table[0]->hash_table); 235}
238 kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); 236
239 return -ENOMEM; 237static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table)
238{
239 int i;
240 struct list_head *hash_list;
241
242 for (i = 0; i < table->hash_size; i++) {
243 hash_list = &table->hash_table[i];
244 J_ASSERT(list_empty(hash_list));
240 } 245 }
241 246
242 journal->j_revoke = journal->j_revoke_table[1]; 247 kfree(table->hash_table);
248 kmem_cache_free(jbd2_revoke_table_cache, table);
249}
243 250
244 /* Check that the hash_size is a power of two */ 251/* Initialise the revoke table for a given journal to a given size. */
252int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
253{
254 J_ASSERT(journal->j_revoke_table[0] == NULL);
245 J_ASSERT(is_power_of_2(hash_size)); 255 J_ASSERT(is_power_of_2(hash_size));
246 256
247 journal->j_revoke->hash_size = hash_size; 257 journal->j_revoke_table[0] = jbd2_journal_init_revoke_table(hash_size);
248 258 if (!journal->j_revoke_table[0])
249 journal->j_revoke->hash_shift = shift; 259 goto fail0;
250 260
251 journal->j_revoke->hash_table = 261 journal->j_revoke_table[1] = jbd2_journal_init_revoke_table(hash_size);
252 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); 262 if (!journal->j_revoke_table[1])
253 if (!journal->j_revoke->hash_table) { 263 goto fail1;
254 kfree(journal->j_revoke_table[0]->hash_table);
255 kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
256 kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[1]);
257 journal->j_revoke = NULL;
258 return -ENOMEM;
259 }
260 264
261 for (tmp = 0; tmp < hash_size; tmp++) 265 journal->j_revoke = journal->j_revoke_table[1];
262 INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
263 266
264 spin_lock_init(&journal->j_revoke_lock); 267 spin_lock_init(&journal->j_revoke_lock);
265 268
266 return 0; 269 return 0;
267}
268 270
269/* Destoy a journal's revoke table. The table must already be empty! */ 271fail1:
272 jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
273fail0:
274 return -ENOMEM;
275}
270 276
277/* Destroy a journal's revoke table. The table must already be empty! */
271void jbd2_journal_destroy_revoke(journal_t *journal) 278void jbd2_journal_destroy_revoke(journal_t *journal)
272{ 279{
273 struct jbd2_revoke_table_s *table;
274 struct list_head *hash_list;
275 int i;
276
277 table = journal->j_revoke_table[0];
278 if (!table)
279 return;
280
281 for (i=0; i<table->hash_size; i++) {
282 hash_list = &table->hash_table[i];
283 J_ASSERT (list_empty(hash_list));
284 }
285
286 kfree(table->hash_table);
287 kmem_cache_free(jbd2_revoke_table_cache, table);
288 journal->j_revoke = NULL;
289
290 table = journal->j_revoke_table[1];
291 if (!table)
292 return;
293
294 for (i=0; i<table->hash_size; i++) {
295 hash_list = &table->hash_table[i];
296 J_ASSERT (list_empty(hash_list));
297 }
298
299 kfree(table->hash_table);
300 kmem_cache_free(jbd2_revoke_table_cache, table);
301 journal->j_revoke = NULL; 280 journal->j_revoke = NULL;
281 if (journal->j_revoke_table[0])
282 jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
283 if (journal->j_revoke_table[1])
284 jbd2_journal_destroy_revoke_table(journal->j_revoke_table[1]);
302} 285}
303 286
304 287
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index b9b0b6f899b9..d6e006e67804 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -618,6 +618,12 @@ repeat:
618 goto done; 618 goto done;
619 619
620 /* 620 /*
621 * this is the first time this transaction is touching this buffer,
622 * reset the modified flag
623 */
624 jh->b_modified = 0;
625
626 /*
621 * If there is already a copy-out version of this buffer, then we don't 627 * If there is already a copy-out version of this buffer, then we don't
622 * need to make another one 628 * need to make another one
623 */ 629 */
@@ -690,7 +696,7 @@ repeat:
690 if (!frozen_buffer) { 696 if (!frozen_buffer) {
691 printk(KERN_EMERG 697 printk(KERN_EMERG
692 "%s: OOM for frozen_buffer\n", 698 "%s: OOM for frozen_buffer\n",
693 __FUNCTION__); 699 __func__);
694 JBUFFER_TRACE(jh, "oom!"); 700 JBUFFER_TRACE(jh, "oom!");
695 error = -ENOMEM; 701 error = -ENOMEM;
696 jbd_lock_bh_state(bh); 702 jbd_lock_bh_state(bh);
@@ -829,9 +835,16 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
829 835
830 if (jh->b_transaction == NULL) { 836 if (jh->b_transaction == NULL) {
831 jh->b_transaction = transaction; 837 jh->b_transaction = transaction;
838
839 /* first access by this transaction */
840 jh->b_modified = 0;
841
832 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 842 JBUFFER_TRACE(jh, "file as BJ_Reserved");
833 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); 843 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
834 } else if (jh->b_transaction == journal->j_committing_transaction) { 844 } else if (jh->b_transaction == journal->j_committing_transaction) {
845 /* first access by this transaction */
846 jh->b_modified = 0;
847
835 JBUFFER_TRACE(jh, "set next transaction"); 848 JBUFFER_TRACE(jh, "set next transaction");
836 jh->b_next_transaction = transaction; 849 jh->b_next_transaction = transaction;
837 } 850 }
@@ -901,7 +914,7 @@ repeat:
901 committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); 914 committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
902 if (!committed_data) { 915 if (!committed_data) {
903 printk(KERN_EMERG "%s: No memory for committed data\n", 916 printk(KERN_EMERG "%s: No memory for committed data\n",
904 __FUNCTION__); 917 __func__);
905 err = -ENOMEM; 918 err = -ENOMEM;
906 goto out; 919 goto out;
907 } 920 }
@@ -1230,6 +1243,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1230 struct journal_head *jh; 1243 struct journal_head *jh;
1231 int drop_reserve = 0; 1244 int drop_reserve = 0;
1232 int err = 0; 1245 int err = 0;
1246 int was_modified = 0;
1233 1247
1234 BUFFER_TRACE(bh, "entry"); 1248 BUFFER_TRACE(bh, "entry");
1235 1249
@@ -1248,6 +1262,9 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1248 goto not_jbd; 1262 goto not_jbd;
1249 } 1263 }
1250 1264
1265 /* keep track of wether or not this transaction modified us */
1266 was_modified = jh->b_modified;
1267
1251 /* 1268 /*
1252 * The buffer's going from the transaction, we must drop 1269 * The buffer's going from the transaction, we must drop
1253 * all references -bzzz 1270 * all references -bzzz
@@ -1265,7 +1282,12 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1265 1282
1266 JBUFFER_TRACE(jh, "belongs to current transaction: unfile"); 1283 JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
1267 1284
1268 drop_reserve = 1; 1285 /*
1286 * we only want to drop a reference if this transaction
1287 * modified the buffer
1288 */
1289 if (was_modified)
1290 drop_reserve = 1;
1269 1291
1270 /* 1292 /*
1271 * We are no longer going to journal this buffer. 1293 * We are no longer going to journal this buffer.
@@ -1305,7 +1327,13 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1305 if (jh->b_next_transaction) { 1327 if (jh->b_next_transaction) {
1306 J_ASSERT(jh->b_next_transaction == transaction); 1328 J_ASSERT(jh->b_next_transaction == transaction);
1307 jh->b_next_transaction = NULL; 1329 jh->b_next_transaction = NULL;
1308 drop_reserve = 1; 1330
1331 /*
1332 * only drop a reference if this transaction modified
1333 * the buffer
1334 */
1335 if (was_modified)
1336 drop_reserve = 1;
1309 } 1337 }
1310 } 1338 }
1311 1339
@@ -1434,7 +1462,8 @@ int jbd2_journal_stop(handle_t *handle)
1434 return err; 1462 return err;
1435} 1463}
1436 1464
1437/**int jbd2_journal_force_commit() - force any uncommitted transactions 1465/**
1466 * int jbd2_journal_force_commit() - force any uncommitted transactions
1438 * @journal: journal to force 1467 * @journal: journal to force
1439 * 1468 *
1440 * For synchronous operations: force any uncommitted transactions 1469 * For synchronous operations: force any uncommitted transactions
@@ -2077,7 +2106,7 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
2077 jh->b_transaction = jh->b_next_transaction; 2106 jh->b_transaction = jh->b_next_transaction;
2078 jh->b_next_transaction = NULL; 2107 jh->b_next_transaction = NULL;
2079 __jbd2_journal_file_buffer(jh, jh->b_transaction, 2108 __jbd2_journal_file_buffer(jh, jh->b_transaction,
2080 was_dirty ? BJ_Metadata : BJ_Reserved); 2109 jh->b_modified ? BJ_Metadata : BJ_Reserved);
2081 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); 2110 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
2082 2111
2083 if (was_dirty) 2112 if (was_dirty)
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index d58f845ccb85..c5e1450d79f9 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -46,7 +46,7 @@ next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c)
46 46
47 47
48static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, 48static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
49 struct jffs2_inode_cache *ic) 49 struct jffs2_inode_cache *ic)
50{ 50{
51 struct jffs2_full_dirent *fd; 51 struct jffs2_full_dirent *fd;
52 52
@@ -68,11 +68,17 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
68 continue; 68 continue;
69 } 69 }
70 70
71 if (child_ic->nlink++ && fd->type == DT_DIR) { 71 if (fd->type == DT_DIR) {
72 JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n", 72 if (child_ic->pino_nlink) {
73 fd->name, fd->ino, ic->ino); 73 JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n",
74 /* TODO: What do we do about it? */ 74 fd->name, fd->ino, ic->ino);
75 } 75 /* TODO: What do we do about it? */
76 } else {
77 child_ic->pino_nlink = ic->ino;
78 }
79 } else
80 child_ic->pino_nlink++;
81
76 dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino); 82 dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino);
77 /* Can't free scan_dents so far. We might need them in pass 2 */ 83 /* Can't free scan_dents so far. We might need them in pass 2 */
78 } 84 }
@@ -125,7 +131,7 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
125 dbg_fsbuild("pass 2 starting\n"); 131 dbg_fsbuild("pass 2 starting\n");
126 132
127 for_each_inode(i, c, ic) { 133 for_each_inode(i, c, ic) {
128 if (ic->nlink) 134 if (ic->pino_nlink)
129 continue; 135 continue;
130 136
131 jffs2_build_remove_unlinked_inode(c, ic, &dead_fds); 137 jffs2_build_remove_unlinked_inode(c, ic, &dead_fds);
@@ -232,16 +238,19 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *c,
232 /* Reduce nlink of the child. If it's now zero, stick it on the 238 /* Reduce nlink of the child. If it's now zero, stick it on the
233 dead_fds list to be cleaned up later. Else just free the fd */ 239 dead_fds list to be cleaned up later. Else just free the fd */
234 240
235 child_ic->nlink--; 241 if (fd->type == DT_DIR)
242 child_ic->pino_nlink = 0;
243 else
244 child_ic->pino_nlink--;
236 245
237 if (!child_ic->nlink) { 246 if (!child_ic->pino_nlink) {
238 dbg_fsbuild("inode #%u (\"%s\") has now got zero nlink, adding to dead_fds list.\n", 247 dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n",
239 fd->ino, fd->name); 248 fd->ino, fd->name);
240 fd->next = *dead_fds; 249 fd->next = *dead_fds;
241 *dead_fds = fd; 250 *dead_fds = fd;
242 } else { 251 } else {
243 dbg_fsbuild("inode #%u (\"%s\") has now got nlink %d. Ignoring.\n", 252 dbg_fsbuild("inode #%u (\"%s\") has now got nlink %d. Ignoring.\n",
244 fd->ino, fd->name, child_ic->nlink); 253 fd->ino, fd->name, child_ic->pino_nlink);
245 jffs2_free_full_dirent(fd); 254 jffs2_free_full_dirent(fd);
246 } 255 }
247 } 256 }
diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h
index 9645275023e6..a113ecc3bafe 100644
--- a/fs/jffs2/debug.h
+++ b/fs/jffs2/debug.h
@@ -82,28 +82,28 @@
82 do { \ 82 do { \
83 printk(JFFS2_ERR_MSG_PREFIX \ 83 printk(JFFS2_ERR_MSG_PREFIX \
84 " (%d) %s: " fmt, task_pid_nr(current), \ 84 " (%d) %s: " fmt, task_pid_nr(current), \
85 __FUNCTION__ , ##__VA_ARGS__); \ 85 __func__ , ##__VA_ARGS__); \
86 } while(0) 86 } while(0)
87 87
88#define JFFS2_WARNING(fmt, ...) \ 88#define JFFS2_WARNING(fmt, ...) \
89 do { \ 89 do { \
90 printk(JFFS2_WARN_MSG_PREFIX \ 90 printk(JFFS2_WARN_MSG_PREFIX \
91 " (%d) %s: " fmt, task_pid_nr(current), \ 91 " (%d) %s: " fmt, task_pid_nr(current), \
92 __FUNCTION__ , ##__VA_ARGS__); \ 92 __func__ , ##__VA_ARGS__); \
93 } while(0) 93 } while(0)
94 94
95#define JFFS2_NOTICE(fmt, ...) \ 95#define JFFS2_NOTICE(fmt, ...) \
96 do { \ 96 do { \
97 printk(JFFS2_NOTICE_MSG_PREFIX \ 97 printk(JFFS2_NOTICE_MSG_PREFIX \
98 " (%d) %s: " fmt, task_pid_nr(current), \ 98 " (%d) %s: " fmt, task_pid_nr(current), \
99 __FUNCTION__ , ##__VA_ARGS__); \ 99 __func__ , ##__VA_ARGS__); \
100 } while(0) 100 } while(0)
101 101
102#define JFFS2_DEBUG(fmt, ...) \ 102#define JFFS2_DEBUG(fmt, ...) \
103 do { \ 103 do { \
104 printk(JFFS2_DBG_MSG_PREFIX \ 104 printk(JFFS2_DBG_MSG_PREFIX \
105 " (%d) %s: " fmt, task_pid_nr(current), \ 105 " (%d) %s: " fmt, task_pid_nr(current), \
106 __FUNCTION__ , ##__VA_ARGS__); \ 106 __func__ , ##__VA_ARGS__); \
107 } while(0) 107 } while(0)
108 108
109/* 109/*
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index c63e7a96af0d..c0c141f6fde1 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -208,6 +208,13 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
208 f = JFFS2_INODE_INFO(inode); 208 f = JFFS2_INODE_INFO(inode);
209 dir_f = JFFS2_INODE_INFO(dir_i); 209 dir_f = JFFS2_INODE_INFO(dir_i);
210 210
211 /* jffs2_do_create() will want to lock it, _after_ reserving
212 space and taking c-alloc_sem. If we keep it locked here,
213 lockdep gets unhappy (although it's a false positive;
214 nothing else will be looking at this inode yet so there's
215 no chance of AB-BA deadlock involving its f->sem). */
216 mutex_unlock(&f->sem);
217
211 ret = jffs2_do_create(c, dir_f, f, ri, 218 ret = jffs2_do_create(c, dir_f, f, ri,
212 dentry->d_name.name, dentry->d_name.len); 219 dentry->d_name.name, dentry->d_name.len);
213 if (ret) 220 if (ret)
@@ -219,7 +226,8 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
219 d_instantiate(dentry, inode); 226 d_instantiate(dentry, inode);
220 227
221 D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n", 228 D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n",
222 inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->nlink, inode->i_mapping->nrpages)); 229 inode->i_ino, inode->i_mode, inode->i_nlink,
230 f->inocache->pino_nlink, inode->i_mapping->nrpages));
223 return 0; 231 return 0;
224 232
225 fail: 233 fail:
@@ -243,7 +251,7 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry)
243 ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name, 251 ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
244 dentry->d_name.len, dead_f, now); 252 dentry->d_name.len, dead_f, now);
245 if (dead_f->inocache) 253 if (dead_f->inocache)
246 dentry->d_inode->i_nlink = dead_f->inocache->nlink; 254 dentry->d_inode->i_nlink = dead_f->inocache->pino_nlink;
247 if (!ret) 255 if (!ret)
248 dir_i->i_mtime = dir_i->i_ctime = ITIME(now); 256 dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
249 return ret; 257 return ret;
@@ -276,7 +284,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
276 284
277 if (!ret) { 285 if (!ret) {
278 mutex_lock(&f->sem); 286 mutex_lock(&f->sem);
279 old_dentry->d_inode->i_nlink = ++f->inocache->nlink; 287 old_dentry->d_inode->i_nlink = ++f->inocache->pino_nlink;
280 mutex_unlock(&f->sem); 288 mutex_unlock(&f->sem);
281 d_instantiate(dentry, old_dentry->d_inode); 289 d_instantiate(dentry, old_dentry->d_inode);
282 dir_i->i_mtime = dir_i->i_ctime = ITIME(now); 290 dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
@@ -493,11 +501,14 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
493 501
494 inode->i_op = &jffs2_dir_inode_operations; 502 inode->i_op = &jffs2_dir_inode_operations;
495 inode->i_fop = &jffs2_dir_operations; 503 inode->i_fop = &jffs2_dir_operations;
496 /* Directories get nlink 2 at start */
497 inode->i_nlink = 2;
498 504
499 f = JFFS2_INODE_INFO(inode); 505 f = JFFS2_INODE_INFO(inode);
500 506
507 /* Directories get nlink 2 at start */
508 inode->i_nlink = 2;
509 /* but ic->pino_nlink is the parent ino# */
510 f->inocache->pino_nlink = dir_i->i_ino;
511
501 ri->data_crc = cpu_to_je32(0); 512 ri->data_crc = cpu_to_je32(0);
502 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); 513 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8));
503 514
@@ -594,17 +605,25 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
594 605
595static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry) 606static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
596{ 607{
608 struct jffs2_sb_info *c = JFFS2_SB_INFO(dir_i->i_sb);
609 struct jffs2_inode_info *dir_f = JFFS2_INODE_INFO(dir_i);
597 struct jffs2_inode_info *f = JFFS2_INODE_INFO(dentry->d_inode); 610 struct jffs2_inode_info *f = JFFS2_INODE_INFO(dentry->d_inode);
598 struct jffs2_full_dirent *fd; 611 struct jffs2_full_dirent *fd;
599 int ret; 612 int ret;
613 uint32_t now = get_seconds();
600 614
601 for (fd = f->dents ; fd; fd = fd->next) { 615 for (fd = f->dents ; fd; fd = fd->next) {
602 if (fd->ino) 616 if (fd->ino)
603 return -ENOTEMPTY; 617 return -ENOTEMPTY;
604 } 618 }
605 ret = jffs2_unlink(dir_i, dentry); 619
606 if (!ret) 620 ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
621 dentry->d_name.len, f, now);
622 if (!ret) {
623 dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
624 clear_nlink(dentry->d_inode);
607 drop_nlink(dir_i); 625 drop_nlink(dir_i);
626 }
608 return ret; 627 return ret;
609} 628}
610 629
@@ -817,7 +836,10 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
817 inode which didn't exist. */ 836 inode which didn't exist. */
818 if (victim_f->inocache) { 837 if (victim_f->inocache) {
819 mutex_lock(&victim_f->sem); 838 mutex_lock(&victim_f->sem);
820 victim_f->inocache->nlink--; 839 if (S_ISDIR(new_dentry->d_inode->i_mode))
840 victim_f->inocache->pino_nlink = 0;
841 else
842 victim_f->inocache->pino_nlink--;
821 mutex_unlock(&victim_f->sem); 843 mutex_unlock(&victim_f->sem);
822 } 844 }
823 } 845 }
@@ -838,8 +860,8 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
838 struct jffs2_inode_info *f = JFFS2_INODE_INFO(old_dentry->d_inode); 860 struct jffs2_inode_info *f = JFFS2_INODE_INFO(old_dentry->d_inode);
839 mutex_lock(&f->sem); 861 mutex_lock(&f->sem);
840 inc_nlink(old_dentry->d_inode); 862 inc_nlink(old_dentry->d_inode);
841 if (f->inocache) 863 if (f->inocache && !S_ISDIR(old_dentry->d_inode->i_mode))
842 f->inocache->nlink++; 864 f->inocache->pino_nlink++;
843 mutex_unlock(&f->sem); 865 mutex_unlock(&f->sem);
844 866
845 printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret); 867 printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret);
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 25a640e566d3..dddb2a6c9e2c 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -294,7 +294,7 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
294 break; 294 break;
295#endif 295#endif
296 default: 296 default:
297 if (ic->nodes == (void *)ic && ic->nlink == 0) 297 if (ic->nodes == (void *)ic && ic->pino_nlink == 0)
298 jffs2_del_ino_cache(c, ic); 298 jffs2_del_ino_cache(c, ic);
299 } 299 }
300} 300}
@@ -332,7 +332,8 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
332 if (c->mtd->point) { 332 if (c->mtd->point) {
333 unsigned long *wordebuf; 333 unsigned long *wordebuf;
334 334
335 ret = c->mtd->point(c->mtd, jeb->offset, c->sector_size, &retlen, (unsigned char **)&ebuf); 335 ret = c->mtd->point(c->mtd, jeb->offset, c->sector_size,
336 &retlen, &ebuf, NULL);
336 if (ret) { 337 if (ret) {
337 D1(printk(KERN_DEBUG "MTD point failed %d\n", ret)); 338 D1(printk(KERN_DEBUG "MTD point failed %d\n", ret));
338 goto do_flash_read; 339 goto do_flash_read;
@@ -340,7 +341,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
340 if (retlen < c->sector_size) { 341 if (retlen < c->sector_size) {
341 /* Don't muck about if it won't let us point to the whole erase sector */ 342 /* Don't muck about if it won't let us point to the whole erase sector */
342 D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", retlen)); 343 D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", retlen));
343 c->mtd->unpoint(c->mtd, ebuf, jeb->offset, retlen); 344 c->mtd->unpoint(c->mtd, jeb->offset, retlen);
344 goto do_flash_read; 345 goto do_flash_read;
345 } 346 }
346 wordebuf = ebuf-sizeof(*wordebuf); 347 wordebuf = ebuf-sizeof(*wordebuf);
@@ -349,7 +350,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
349 if (*++wordebuf != ~0) 350 if (*++wordebuf != ~0)
350 break; 351 break;
351 } while(--retlen); 352 } while(--retlen);
352 c->mtd->unpoint(c->mtd, ebuf, jeb->offset, c->sector_size); 353 c->mtd->unpoint(c->mtd, jeb->offset, c->sector_size);
353 if (retlen) { 354 if (retlen) {
354 printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08tx\n", 355 printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08tx\n",
355 *wordebuf, jeb->offset + c->sector_size-retlen*sizeof(*wordebuf)); 356 *wordebuf, jeb->offset + c->sector_size-retlen*sizeof(*wordebuf));
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 3eb1c84b0a33..086c43830221 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -273,7 +273,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
273 inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime)); 273 inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime));
274 inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime)); 274 inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime));
275 275
276 inode->i_nlink = f->inocache->nlink; 276 inode->i_nlink = f->inocache->pino_nlink;
277 277
278 inode->i_blocks = (inode->i_size + 511) >> 9; 278 inode->i_blocks = (inode->i_size + 511) >> 9;
279 279
@@ -286,13 +286,12 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
286 case S_IFDIR: 286 case S_IFDIR:
287 { 287 {
288 struct jffs2_full_dirent *fd; 288 struct jffs2_full_dirent *fd;
289 inode->i_nlink = 2; /* parent and '.' */
289 290
290 for (fd=f->dents; fd; fd = fd->next) { 291 for (fd=f->dents; fd; fd = fd->next) {
291 if (fd->type == DT_DIR && fd->ino) 292 if (fd->type == DT_DIR && fd->ino)
292 inc_nlink(inode); 293 inc_nlink(inode);
293 } 294 }
294 /* and '..' */
295 inc_nlink(inode);
296 /* Root dir gets i_nlink 3 for some reason */ 295 /* Root dir gets i_nlink 3 for some reason */
297 if (inode->i_ino == 1) 296 if (inode->i_ino == 1)
298 inc_nlink(inode); 297 inc_nlink(inode);
@@ -586,11 +585,12 @@ void jffs2_gc_release_inode(struct jffs2_sb_info *c,
586} 585}
587 586
588struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c, 587struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c,
589 int inum, int nlink) 588 int inum, int unlinked)
590{ 589{
591 struct inode *inode; 590 struct inode *inode;
592 struct jffs2_inode_cache *ic; 591 struct jffs2_inode_cache *ic;
593 if (!nlink) { 592
593 if (unlinked) {
594 /* The inode has zero nlink but its nodes weren't yet marked 594 /* The inode has zero nlink but its nodes weren't yet marked
595 obsolete. This has to be because we're still waiting for 595 obsolete. This has to be because we're still waiting for
596 the final (close() and) iput() to happen. 596 the final (close() and) iput() to happen.
@@ -638,8 +638,8 @@ struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c,
638 return ERR_CAST(inode); 638 return ERR_CAST(inode);
639 } 639 }
640 if (is_bad_inode(inode)) { 640 if (is_bad_inode(inode)) {
641 printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u. nlink %d\n", 641 printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u. unlinked %d\n",
642 inum, nlink); 642 inum, unlinked);
643 /* NB. This will happen again. We need to do something appropriate here. */ 643 /* NB. This will happen again. We need to do something appropriate here. */
644 iput(inode); 644 iput(inode);
645 return ERR_PTR(-EIO); 645 return ERR_PTR(-EIO);
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index bad005664e30..090c556ffed2 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -161,8 +161,8 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
161 continue; 161 continue;
162 } 162 }
163 163
164 if (!ic->nlink) { 164 if (!ic->pino_nlink) {
165 D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n", 165 D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink/pino zero\n",
166 ic->ino)); 166 ic->ino));
167 spin_unlock(&c->inocache_lock); 167 spin_unlock(&c->inocache_lock);
168 jffs2_xattr_delete_inode(c, ic); 168 jffs2_xattr_delete_inode(c, ic);
@@ -398,10 +398,10 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
398 it's vaguely possible. */ 398 it's vaguely possible. */
399 399
400 inum = ic->ino; 400 inum = ic->ino;
401 nlink = ic->nlink; 401 nlink = ic->pino_nlink;
402 spin_unlock(&c->inocache_lock); 402 spin_unlock(&c->inocache_lock);
403 403
404 f = jffs2_gc_fetch_inode(c, inum, nlink); 404 f = jffs2_gc_fetch_inode(c, inum, !nlink);
405 if (IS_ERR(f)) { 405 if (IS_ERR(f)) {
406 ret = PTR_ERR(f); 406 ret = PTR_ERR(f);
407 goto release_sem; 407 goto release_sem;
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 8219df6eb6d8..1750445556c3 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -177,7 +177,10 @@ struct jffs2_inode_cache {
177#ifdef CONFIG_JFFS2_FS_XATTR 177#ifdef CONFIG_JFFS2_FS_XATTR
178 struct jffs2_xattr_ref *xref; 178 struct jffs2_xattr_ref *xref;
179#endif 179#endif
180 int nlink; 180 uint32_t pino_nlink; /* Directories store parent inode
181 here; other inodes store nlink.
182 Zero always means that it's
183 completely unlinked. */
181}; 184};
182 185
183/* Inode states for 'state' above. We need the 'GC' state to prevent 186/* Inode states for 'state' above. We need the 'GC' state to prevent
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index 9df8f3ef20df..a9bf9603c1ba 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -709,7 +709,7 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
709 break; 709 break;
710#endif 710#endif
711 default: 711 default:
712 if (ic->nodes == (void *)ic && ic->nlink == 0) 712 if (ic->nodes == (void *)ic && ic->pino_nlink == 0)
713 jffs2_del_ino_cache(c, ic); 713 jffs2_del_ino_cache(c, ic);
714 break; 714 break;
715 } 715 }
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 1b10d2594092..2cc866cf134f 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -187,7 +187,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent);
187void jffs2_gc_release_inode(struct jffs2_sb_info *c, 187void jffs2_gc_release_inode(struct jffs2_sb_info *c,
188 struct jffs2_inode_info *f); 188 struct jffs2_inode_info *f);
189struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c, 189struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c,
190 int inum, int nlink); 190 int inum, int unlinked);
191 191
192unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c, 192unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c,
193 struct jffs2_inode_info *f, 193 struct jffs2_inode_info *f,
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 4cb4d76de07f..6ca08ad887c0 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -63,10 +63,11 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
63 /* TODO: instead, incapsulate point() stuff to jffs2_flash_read(), 63 /* TODO: instead, incapsulate point() stuff to jffs2_flash_read(),
64 * adding and jffs2_flash_read_end() interface. */ 64 * adding and jffs2_flash_read_end() interface. */
65 if (c->mtd->point) { 65 if (c->mtd->point) {
66 err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer); 66 err = c->mtd->point(c->mtd, ofs, len, &retlen,
67 (void **)&buffer, NULL);
67 if (!err && retlen < len) { 68 if (!err && retlen < len) {
68 JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize); 69 JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize);
69 c->mtd->unpoint(c->mtd, buffer, ofs, retlen); 70 c->mtd->unpoint(c->mtd, ofs, retlen);
70 } else if (err) 71 } else if (err)
71 JFFS2_WARNING("MTD point failed: error code %d.\n", err); 72 JFFS2_WARNING("MTD point failed: error code %d.\n", err);
72 else 73 else
@@ -100,7 +101,7 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
100 kfree(buffer); 101 kfree(buffer);
101#ifndef __ECOS 102#ifndef __ECOS
102 else 103 else
103 c->mtd->unpoint(c->mtd, buffer, ofs, len); 104 c->mtd->unpoint(c->mtd, ofs, len);
104#endif 105#endif
105 106
106 if (crc != tn->data_crc) { 107 if (crc != tn->data_crc) {
@@ -136,7 +137,7 @@ free_out:
136 kfree(buffer); 137 kfree(buffer);
137#ifndef __ECOS 138#ifndef __ECOS
138 else 139 else
139 c->mtd->unpoint(c->mtd, buffer, ofs, len); 140 c->mtd->unpoint(c->mtd, ofs, len);
140#endif 141#endif
141 return err; 142 return err;
142} 143}
@@ -1123,7 +1124,8 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
1123 size_t retlen; 1124 size_t retlen;
1124 int ret; 1125 int ret;
1125 1126
1126 dbg_readinode("ino #%u nlink is %d\n", f->inocache->ino, f->inocache->nlink); 1127 dbg_readinode("ino #%u pino/nlink is %d\n", f->inocache->ino,
1128 f->inocache->pino_nlink);
1127 1129
1128 memset(&rii, 0, sizeof(rii)); 1130 memset(&rii, 0, sizeof(rii));
1129 1131
@@ -1358,7 +1360,7 @@ int jffs2_do_read_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
1358 } 1360 }
1359 dbg_readinode("creating inocache for root inode\n"); 1361 dbg_readinode("creating inocache for root inode\n");
1360 memset(f->inocache, 0, sizeof(struct jffs2_inode_cache)); 1362 memset(f->inocache, 0, sizeof(struct jffs2_inode_cache));
1361 f->inocache->ino = f->inocache->nlink = 1; 1363 f->inocache->ino = f->inocache->pino_nlink = 1;
1362 f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache; 1364 f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache;
1363 f->inocache->state = INO_STATE_READING; 1365 f->inocache->state = INO_STATE_READING;
1364 jffs2_add_ino_cache(c, f->inocache); 1366 jffs2_add_ino_cache(c, f->inocache);
@@ -1401,7 +1403,7 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
1401 jffs2_clear_acl(f); 1403 jffs2_clear_acl(f);
1402 jffs2_xattr_delete_inode(c, f->inocache); 1404 jffs2_xattr_delete_inode(c, f->inocache);
1403 mutex_lock(&f->sem); 1405 mutex_lock(&f->sem);
1404 deleted = f->inocache && !f->inocache->nlink; 1406 deleted = f->inocache && !f->inocache->pino_nlink;
1405 1407
1406 if (f->inocache && f->inocache->state != INO_STATE_CHECKING) 1408 if (f->inocache && f->inocache->state != INO_STATE_CHECKING)
1407 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CLEARING); 1409 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CLEARING);
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 272872d27fd5..1d437de1e9a8 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -97,11 +97,12 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
97 size_t pointlen; 97 size_t pointlen;
98 98
99 if (c->mtd->point) { 99 if (c->mtd->point) {
100 ret = c->mtd->point (c->mtd, 0, c->mtd->size, &pointlen, &flashbuf); 100 ret = c->mtd->point(c->mtd, 0, c->mtd->size, &pointlen,
101 (void **)&flashbuf, NULL);
101 if (!ret && pointlen < c->mtd->size) { 102 if (!ret && pointlen < c->mtd->size) {
102 /* Don't muck about if it won't let us point to the whole flash */ 103 /* Don't muck about if it won't let us point to the whole flash */
103 D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen)); 104 D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen));
104 c->mtd->unpoint(c->mtd, flashbuf, 0, pointlen); 105 c->mtd->unpoint(c->mtd, 0, pointlen);
105 flashbuf = NULL; 106 flashbuf = NULL;
106 } 107 }
107 if (ret) 108 if (ret)
@@ -267,7 +268,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
267 kfree(flashbuf); 268 kfree(flashbuf);
268#ifndef __ECOS 269#ifndef __ECOS
269 else 270 else
270 c->mtd->unpoint(c->mtd, flashbuf, 0, c->mtd->size); 271 c->mtd->unpoint(c->mtd, 0, c->mtd->size);
271#endif 272#endif
272 if (s) 273 if (s)
273 kfree(s); 274 kfree(s);
@@ -940,7 +941,7 @@ struct jffs2_inode_cache *jffs2_scan_make_ino_cache(struct jffs2_sb_info *c, uin
940 ic->nodes = (void *)ic; 941 ic->nodes = (void *)ic;
941 jffs2_add_ino_cache(c, ic); 942 jffs2_add_ino_cache(c, ic);
942 if (ino == 1) 943 if (ino == 1)
943 ic->nlink = 1; 944 ic->pino_nlink = 1;
944 return ic; 945 return ic;
945} 946}
946 947
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index f3353df178e7..7da69eae49e4 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -31,11 +31,12 @@ static struct kmem_cache *jffs2_inode_cachep;
31 31
32static struct inode *jffs2_alloc_inode(struct super_block *sb) 32static struct inode *jffs2_alloc_inode(struct super_block *sb)
33{ 33{
34 struct jffs2_inode_info *ei; 34 struct jffs2_inode_info *f;
35 ei = (struct jffs2_inode_info *)kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL); 35
36 if (!ei) 36 f = kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL);
37 if (!f)
37 return NULL; 38 return NULL;
38 return &ei->vfs_inode; 39 return &f->vfs_inode;
39} 40}
40 41
41static void jffs2_destroy_inode(struct inode *inode) 42static void jffs2_destroy_inode(struct inode *inode)
@@ -45,10 +46,10 @@ static void jffs2_destroy_inode(struct inode *inode)
45 46
46static void jffs2_i_init_once(struct kmem_cache *cachep, void *foo) 47static void jffs2_i_init_once(struct kmem_cache *cachep, void *foo)
47{ 48{
48 struct jffs2_inode_info *ei = (struct jffs2_inode_info *) foo; 49 struct jffs2_inode_info *f = foo;
49 50
50 mutex_init(&ei->sem); 51 mutex_init(&f->sem);
51 inode_init_once(&ei->vfs_inode); 52 inode_init_once(&f->vfs_inode);
52} 53}
53 54
54static int jffs2_sync_fs(struct super_block *sb, int wait) 55static int jffs2_sync_fs(struct super_block *sb, int wait)
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 8de52b607678..0e78b00035e4 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -494,7 +494,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
494 /* If it's an in-core inode, then we have to adjust any 494 /* If it's an in-core inode, then we have to adjust any
495 full_dirent or full_dnode structure to point to the 495 full_dirent or full_dnode structure to point to the
496 new version instead of the old */ 496 new version instead of the old */
497 f = jffs2_gc_fetch_inode(c, ic->ino, ic->nlink); 497 f = jffs2_gc_fetch_inode(c, ic->ino, !ic->pino_nlink);
498 if (IS_ERR(f)) { 498 if (IS_ERR(f)) {
499 /* Should never happen; it _must_ be present */ 499 /* Should never happen; it _must_ be present */
500 JFFS2_ERROR("Failed to iget() ino #%u, err %ld\n", 500 JFFS2_ERROR("Failed to iget() ino #%u, err %ld\n",
diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c
index 665fce9797d3..ca29440e9435 100644
--- a/fs/jffs2/write.c
+++ b/fs/jffs2/write.c
@@ -19,7 +19,8 @@
19#include "compr.h" 19#include "compr.h"
20 20
21 21
22int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint32_t mode, struct jffs2_raw_inode *ri) 22int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
23 uint32_t mode, struct jffs2_raw_inode *ri)
23{ 24{
24 struct jffs2_inode_cache *ic; 25 struct jffs2_inode_cache *ic;
25 26
@@ -31,7 +32,7 @@ int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint
31 memset(ic, 0, sizeof(*ic)); 32 memset(ic, 0, sizeof(*ic));
32 33
33 f->inocache = ic; 34 f->inocache = ic;
34 f->inocache->nlink = 1; 35 f->inocache->pino_nlink = 1; /* Will be overwritten shortly for directories */
35 f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache; 36 f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache;
36 f->inocache->state = INO_STATE_PRESENT; 37 f->inocache->state = INO_STATE_PRESENT;
37 38
@@ -438,10 +439,10 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
438 ret = jffs2_reserve_space(c, sizeof(*ri), &alloclen, ALLOC_NORMAL, 439 ret = jffs2_reserve_space(c, sizeof(*ri), &alloclen, ALLOC_NORMAL,
439 JFFS2_SUMMARY_INODE_SIZE); 440 JFFS2_SUMMARY_INODE_SIZE);
440 D1(printk(KERN_DEBUG "jffs2_do_create(): reserved 0x%x bytes\n", alloclen)); 441 D1(printk(KERN_DEBUG "jffs2_do_create(): reserved 0x%x bytes\n", alloclen));
441 if (ret) { 442 if (ret)
442 mutex_unlock(&f->sem);
443 return ret; 443 return ret;
444 } 444
445 mutex_lock(&f->sem);
445 446
446 ri->data_crc = cpu_to_je32(0); 447 ri->data_crc = cpu_to_je32(0);
447 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); 448 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8));
@@ -635,9 +636,9 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
635 jffs2_mark_node_obsolete(c, fd->raw); 636 jffs2_mark_node_obsolete(c, fd->raw);
636 jffs2_free_full_dirent(fd); 637 jffs2_free_full_dirent(fd);
637 } 638 }
638 } 639 dead_f->inocache->pino_nlink = 0;
639 640 } else
640 dead_f->inocache->nlink--; 641 dead_f->inocache->pino_nlink--;
641 /* NB: Caller must set inode nlink if appropriate */ 642 /* NB: Caller must set inode nlink if appropriate */
642 mutex_unlock(&dead_f->sem); 643 mutex_unlock(&dead_f->sem);
643 } 644 }
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index e48665984cb3..082e844ab2db 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -82,7 +82,7 @@ static int is_xattr_datum_unchecked(struct jffs2_sb_info *c, struct jffs2_xattr_
82static void unload_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) 82static void unload_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
83{ 83{
84 /* must be called under down_write(xattr_sem) */ 84 /* must be called under down_write(xattr_sem) */
85 D1(dbg_xattr("%s: xid=%u, version=%u\n", __FUNCTION__, xd->xid, xd->version)); 85 D1(dbg_xattr("%s: xid=%u, version=%u\n", __func__, xd->xid, xd->version));
86 if (xd->xname) { 86 if (xd->xname) {
87 c->xdatum_mem_usage -= (xd->name_len + 1 + xd->value_len); 87 c->xdatum_mem_usage -= (xd->name_len + 1 + xd->value_len);
88 kfree(xd->xname); 88 kfree(xd->xname);
@@ -592,7 +592,7 @@ void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache
592 When an inode with XATTR is removed, those XATTRs must be removed. */ 592 When an inode with XATTR is removed, those XATTRs must be removed. */
593 struct jffs2_xattr_ref *ref, *_ref; 593 struct jffs2_xattr_ref *ref, *_ref;
594 594
595 if (!ic || ic->nlink > 0) 595 if (!ic || ic->pino_nlink > 0)
596 return; 596 return;
597 597
598 down_write(&c->xattr_sem); 598 down_write(&c->xattr_sem);
@@ -829,7 +829,7 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
829 ref->xd and ref->ic are not valid yet. */ 829 ref->xd and ref->ic are not valid yet. */
830 xd = jffs2_find_xattr_datum(c, ref->xid); 830 xd = jffs2_find_xattr_datum(c, ref->xid);
831 ic = jffs2_get_ino_cache(c, ref->ino); 831 ic = jffs2_get_ino_cache(c, ref->ino);
832 if (!xd || !ic || !ic->nlink) { 832 if (!xd || !ic || !ic->pino_nlink) {
833 dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) is orphan.\n", 833 dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) is orphan.\n",
834 ref->ino, ref->xid, ref->xseqno); 834 ref->ino, ref->xid, ref->xseqno);
835 ref->xseqno |= XREF_DELETE_MARKER; 835 ref->xseqno |= XREF_DELETE_MARKER;
@@ -1252,7 +1252,7 @@ int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_
1252 rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XREF_SIZE); 1252 rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XREF_SIZE);
1253 if (rc) { 1253 if (rc) {
1254 JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n", 1254 JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n",
1255 __FUNCTION__, rc, totlen); 1255 __func__, rc, totlen);
1256 rc = rc ? rc : -EBADFD; 1256 rc = rc ? rc : -EBADFD;
1257 goto out; 1257 goto out;
1258 } 1258 }
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index 887f5759e536..bf6ab19b86ee 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -89,7 +89,7 @@ void jfs_proc_init(void)
89{ 89{
90 int i; 90 int i;
91 91
92 if (!(base = proc_mkdir("jfs", proc_root_fs))) 92 if (!(base = proc_mkdir("fs/jfs", NULL)))
93 return; 93 return;
94 base->owner = THIS_MODULE; 94 base->owner = THIS_MODULE;
95 95
@@ -109,7 +109,7 @@ void jfs_proc_clean(void)
109 if (base) { 109 if (base) {
110 for (i = 0; i < NPROCENT; i++) 110 for (i = 0; i < NPROCENT; i++)
111 remove_proc_entry(Entries[i].name, base); 111 remove_proc_entry(Entries[i].name, base);
112 remove_proc_entry("jfs", proc_root_fs); 112 remove_proc_entry("fs/jfs", NULL);
113 } 113 }
114} 114}
115 115
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 40b16f23e49a..5df517b81f3f 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -573,7 +573,7 @@ again:
573 /* Ensure the resulting lock will get added to granted list */ 573 /* Ensure the resulting lock will get added to granted list */
574 fl->fl_flags |= FL_SLEEP; 574 fl->fl_flags |= FL_SLEEP;
575 if (do_vfs_lock(fl) < 0) 575 if (do_vfs_lock(fl) < 0)
576 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); 576 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__);
577 up_read(&host->h_rwsem); 577 up_read(&host->h_rwsem);
578 fl->fl_flags = fl_flags; 578 fl->fl_flags = fl_flags;
579 status = 0; 579 status = 0;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 4d81553d2948..81aca859bfde 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -752,7 +752,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
752 return; 752 return;
753 default: 753 default:
754 printk(KERN_WARNING "lockd: unexpected error %d in %s!\n", 754 printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
755 -error, __FUNCTION__); 755 -error, __func__);
756 nlmsvc_insert_block(block, 10 * HZ); 756 nlmsvc_insert_block(block, 10 * HZ);
757 nlmsvc_release_block(block); 757 nlmsvc_release_block(block);
758 return; 758 return;
diff --git a/fs/locks.c b/fs/locks.c
index 44d9a6a7ec50..11dbf08651b7 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -116,6 +116,7 @@
116 116
117#include <linux/capability.h> 117#include <linux/capability.h>
118#include <linux/file.h> 118#include <linux/file.h>
119#include <linux/fdtable.h>
119#include <linux/fs.h> 120#include <linux/fs.h>
120#include <linux/init.h> 121#include <linux/init.h>
121#include <linux/module.h> 122#include <linux/module.h>
@@ -772,7 +773,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
772 * give it the opportunity to lock the file. 773 * give it the opportunity to lock the file.
773 */ 774 */
774 if (found) 775 if (found)
775 cond_resched(); 776 cond_resched_bkl();
776 777
777find_conflict: 778find_conflict:
778 for_each_lock(inode, before) { 779 for_each_lock(inode, before) {
@@ -1752,6 +1753,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
1752 struct file_lock *file_lock = locks_alloc_lock(); 1753 struct file_lock *file_lock = locks_alloc_lock();
1753 struct flock flock; 1754 struct flock flock;
1754 struct inode *inode; 1755 struct inode *inode;
1756 struct file *f;
1755 int error; 1757 int error;
1756 1758
1757 if (file_lock == NULL) 1759 if (file_lock == NULL)
@@ -1824,7 +1826,15 @@ again:
1824 * Attempt to detect a close/fcntl race and recover by 1826 * Attempt to detect a close/fcntl race and recover by
1825 * releasing the lock that was just acquired. 1827 * releasing the lock that was just acquired.
1826 */ 1828 */
1827 if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) { 1829 /*
1830 * we need that spin_lock here - it prevents reordering between
1831 * update of inode->i_flock and check for it done in close().
1832 * rcu_read_lock() wouldn't do.
1833 */
1834 spin_lock(&current->files->file_lock);
1835 f = fcheck(fd);
1836 spin_unlock(&current->files->file_lock);
1837 if (!error && f != filp && flock.l_type != F_UNLCK) {
1828 flock.l_type = F_UNLCK; 1838 flock.l_type = F_UNLCK;
1829 goto again; 1839 goto again;
1830 } 1840 }
@@ -1880,6 +1890,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
1880 struct file_lock *file_lock = locks_alloc_lock(); 1890 struct file_lock *file_lock = locks_alloc_lock();
1881 struct flock64 flock; 1891 struct flock64 flock;
1882 struct inode *inode; 1892 struct inode *inode;
1893 struct file *f;
1883 int error; 1894 int error;
1884 1895
1885 if (file_lock == NULL) 1896 if (file_lock == NULL)
@@ -1952,7 +1963,10 @@ again:
1952 * Attempt to detect a close/fcntl race and recover by 1963 * Attempt to detect a close/fcntl race and recover by
1953 * releasing the lock that was just acquired. 1964 * releasing the lock that was just acquired.
1954 */ 1965 */
1955 if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) { 1966 spin_lock(&current->files->file_lock);
1967 f = fcheck(fd);
1968 spin_unlock(&current->files->file_lock);
1969 if (!error && f != filp && flock.l_type != F_UNLCK) {
1956 flock.l_type = F_UNLCK; 1970 flock.l_type = F_UNLCK;
1957 goto again; 1971 goto again;
1958 } 1972 }
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 2d4358c59f68..05ff4f1d7026 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -609,7 +609,7 @@ error_inode:
609 if (corrupt < 0) { 609 if (corrupt < 0) {
610 fat_fs_panic(new_dir->i_sb, 610 fat_fs_panic(new_dir->i_sb,
611 "%s: Filesystem corrupted (i_pos %lld)", 611 "%s: Filesystem corrupted (i_pos %lld)",
612 __FUNCTION__, sinfo.i_pos); 612 __func__, sinfo.i_pos);
613 } 613 }
614 goto out; 614 goto out;
615} 615}
diff --git a/fs/namei.c b/fs/namei.c
index e179f71bfcb0..32fd9655485b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -30,6 +30,7 @@
30#include <linux/capability.h> 30#include <linux/capability.h>
31#include <linux/file.h> 31#include <linux/file.h>
32#include <linux/fcntl.h> 32#include <linux/fcntl.h>
33#include <linux/device_cgroup.h>
33#include <asm/namei.h> 34#include <asm/namei.h>
34#include <asm/uaccess.h> 35#include <asm/uaccess.h>
35 36
@@ -281,6 +282,10 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
281 if (retval) 282 if (retval)
282 return retval; 283 return retval;
283 284
285 retval = devcgroup_inode_permission(inode, mask);
286 if (retval)
287 return retval;
288
284 return security_inode_permission(inode, mask, nd); 289 return security_inode_permission(inode, mask, nd);
285} 290}
286 291
@@ -2028,6 +2033,10 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
2028 if (!dir->i_op || !dir->i_op->mknod) 2033 if (!dir->i_op || !dir->i_op->mknod)
2029 return -EPERM; 2034 return -EPERM;
2030 2035
2036 error = devcgroup_inode_mknod(mode, dev);
2037 if (error)
2038 return error;
2039
2031 error = security_inode_mknod(dir, dentry, mode, dev); 2040 error = security_inode_mknod(dir, dentry, mode, dev);
2032 if (error) 2041 if (error)
2033 return error; 2042 return error;
diff --git a/fs/namespace.c b/fs/namespace.c
index fe376805cf5f..4fc302c2a0e0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1176,17 +1176,6 @@ static int mount_is_safe(struct nameidata *nd)
1176#endif 1176#endif
1177} 1177}
1178 1178
1179static int lives_below_in_same_fs(struct dentry *d, struct dentry *dentry)
1180{
1181 while (1) {
1182 if (d == dentry)
1183 return 1;
1184 if (d == NULL || d == d->d_parent)
1185 return 0;
1186 d = d->d_parent;
1187 }
1188}
1189
1190struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, 1179struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1191 int flag) 1180 int flag)
1192{ 1181{
@@ -1203,7 +1192,7 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1203 1192
1204 p = mnt; 1193 p = mnt;
1205 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) { 1194 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1206 if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry)) 1195 if (!is_subdir(r->mnt_mountpoint, dentry))
1207 continue; 1196 continue;
1208 1197
1209 for (s = r; s; s = next_mnt(s, r)) { 1198 for (s = r; s; s = next_mnt(s, r)) {
@@ -2340,10 +2329,10 @@ void __init mnt_init(void)
2340 err = sysfs_init(); 2329 err = sysfs_init();
2341 if (err) 2330 if (err)
2342 printk(KERN_WARNING "%s: sysfs_init error: %d\n", 2331 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
2343 __FUNCTION__, err); 2332 __func__, err);
2344 fs_kobj = kobject_create_and_add("fs", NULL); 2333 fs_kobj = kobject_create_and_add("fs", NULL);
2345 if (!fs_kobj) 2334 if (!fs_kobj)
2346 printk(KERN_WARNING "%s: kobj create error\n", __FUNCTION__); 2335 printk(KERN_WARNING "%s: kobj create error\n", __func__);
2347 init_rootfs(); 2336 init_rootfs();
2348 init_mount_tree(); 2337 init_mount_tree();
2349} 2338}
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
index df6d60bdfcd3..97645f112114 100644
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -102,48 +102,47 @@ static inline void ncp_init_request_s(struct ncp_server *server, int subfunction
102} 102}
103 103
104static inline char * 104static inline char *
105 ncp_reply_data(struct ncp_server *server, int offset) 105ncp_reply_data(struct ncp_server *server, int offset)
106{ 106{
107 return &(server->packet[sizeof(struct ncp_reply_header) + offset]); 107 return &(server->packet[sizeof(struct ncp_reply_header) + offset]);
108} 108}
109 109
110static inline __u8 BVAL(void* data) 110static inline u8 BVAL(void *data)
111{ 111{
112 return get_unaligned((__u8*)data); 112 return *(u8 *)data;
113} 113}
114 114
115static __u8 115static u8 ncp_reply_byte(struct ncp_server *server, int offset)
116 ncp_reply_byte(struct ncp_server *server, int offset)
117{ 116{
118 return get_unaligned((__u8 *) ncp_reply_data(server, offset)); 117 return *(u8 *)ncp_reply_data(server, offset);
119} 118}
120 119
121static inline __u16 WVAL_LH(void* data) 120static inline u16 WVAL_LH(void *data)
122{ 121{
123 return le16_to_cpu(get_unaligned((__le16*)data)); 122 return get_unaligned_le16(data);
124} 123}
125 124
126static __u16 125static u16
127 ncp_reply_le16(struct ncp_server *server, int offset) 126ncp_reply_le16(struct ncp_server *server, int offset)
128{ 127{
129 return le16_to_cpu(get_unaligned((__le16 *) ncp_reply_data(server, offset))); 128 return get_unaligned_le16(ncp_reply_data(server, offset));
130} 129}
131 130
132static __u16 131static u16
133 ncp_reply_be16(struct ncp_server *server, int offset) 132ncp_reply_be16(struct ncp_server *server, int offset)
134{ 133{
135 return be16_to_cpu(get_unaligned((__be16 *) ncp_reply_data(server, offset))); 134 return get_unaligned_be16(ncp_reply_data(server, offset));
136} 135}
137 136
138static inline __u32 DVAL_LH(void* data) 137static inline u32 DVAL_LH(void *data)
139{ 138{
140 return le32_to_cpu(get_unaligned((__le32*)data)); 139 return get_unaligned_le32(data);
141} 140}
142 141
143static __le32 142static __le32
144 ncp_reply_dword(struct ncp_server *server, int offset) 143ncp_reply_dword(struct ncp_server *server, int offset)
145{ 144{
146 return get_unaligned((__le32 *) ncp_reply_data(server, offset)); 145 return get_unaligned((__le32 *)ncp_reply_data(server, offset));
147} 146}
148 147
149static inline __u32 ncp_reply_dword_lh(struct ncp_server* server, int offset) { 148static inline __u32 ncp_reply_dword_lh(struct ncp_server* server, int offset) {
@@ -1006,8 +1005,8 @@ ncp_read_bounce(struct ncp_server *server, const char *file_id,
1006 result = ncp_request2(server, 72, bounce, bufsize); 1005 result = ncp_request2(server, 72, bounce, bufsize);
1007 ncp_unlock_server(server); 1006 ncp_unlock_server(server);
1008 if (!result) { 1007 if (!result) {
1009 int len = be16_to_cpu(get_unaligned((__be16*)((char*)bounce + 1008 int len = get_unaligned_be16((char *)bounce +
1010 sizeof(struct ncp_reply_header)))); 1009 sizeof(struct ncp_reply_header));
1011 result = -EIO; 1010 result = -EIO;
1012 if (len <= to_read) { 1011 if (len <= to_read) {
1013 char* source; 1012 char* source;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index f2f3b284e6dd..89ac5bb0401c 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1321,6 +1321,7 @@ static const struct file_operations nfs_server_list_fops = {
1321 .read = seq_read, 1321 .read = seq_read,
1322 .llseek = seq_lseek, 1322 .llseek = seq_lseek,
1323 .release = seq_release, 1323 .release = seq_release,
1324 .owner = THIS_MODULE,
1324}; 1325};
1325 1326
1326static int nfs_volume_list_open(struct inode *inode, struct file *file); 1327static int nfs_volume_list_open(struct inode *inode, struct file *file);
@@ -1341,6 +1342,7 @@ static const struct file_operations nfs_volume_list_fops = {
1341 .read = seq_read, 1342 .read = seq_read,
1342 .llseek = seq_lseek, 1343 .llseek = seq_lseek,
1343 .release = seq_release, 1344 .release = seq_release,
1345 .owner = THIS_MODULE,
1344}; 1346};
1345 1347
1346/* 1348/*
@@ -1500,33 +1502,29 @@ int __init nfs_fs_proc_init(void)
1500{ 1502{
1501 struct proc_dir_entry *p; 1503 struct proc_dir_entry *p;
1502 1504
1503 proc_fs_nfs = proc_mkdir("nfsfs", proc_root_fs); 1505 proc_fs_nfs = proc_mkdir("fs/nfsfs", NULL);
1504 if (!proc_fs_nfs) 1506 if (!proc_fs_nfs)
1505 goto error_0; 1507 goto error_0;
1506 1508
1507 proc_fs_nfs->owner = THIS_MODULE; 1509 proc_fs_nfs->owner = THIS_MODULE;
1508 1510
1509 /* a file of servers with which we're dealing */ 1511 /* a file of servers with which we're dealing */
1510 p = create_proc_entry("servers", S_IFREG|S_IRUGO, proc_fs_nfs); 1512 p = proc_create("servers", S_IFREG|S_IRUGO,
1513 proc_fs_nfs, &nfs_server_list_fops);
1511 if (!p) 1514 if (!p)
1512 goto error_1; 1515 goto error_1;
1513 1516
1514 p->proc_fops = &nfs_server_list_fops;
1515 p->owner = THIS_MODULE;
1516
1517 /* a file of volumes that we have mounted */ 1517 /* a file of volumes that we have mounted */
1518 p = create_proc_entry("volumes", S_IFREG|S_IRUGO, proc_fs_nfs); 1518 p = proc_create("volumes", S_IFREG|S_IRUGO,
1519 proc_fs_nfs, &nfs_volume_list_fops);
1519 if (!p) 1520 if (!p)
1520 goto error_2; 1521 goto error_2;
1521
1522 p->proc_fops = &nfs_volume_list_fops;
1523 p->owner = THIS_MODULE;
1524 return 0; 1522 return 0;
1525 1523
1526error_2: 1524error_2:
1527 remove_proc_entry("servers", proc_fs_nfs); 1525 remove_proc_entry("servers", proc_fs_nfs);
1528error_1: 1526error_1:
1529 remove_proc_entry("nfsfs", proc_root_fs); 1527 remove_proc_entry("fs/nfsfs", NULL);
1530error_0: 1528error_0:
1531 return -ENOMEM; 1529 return -ENOMEM;
1532} 1530}
@@ -1538,7 +1536,7 @@ void nfs_fs_proc_exit(void)
1538{ 1536{
1539 remove_proc_entry("volumes", proc_fs_nfs); 1537 remove_proc_entry("volumes", proc_fs_nfs);
1540 remove_proc_entry("servers", proc_fs_nfs); 1538 remove_proc_entry("servers", proc_fs_nfs);
1541 remove_proc_entry("nfsfs", proc_root_fs); 1539 remove_proc_entry("fs/nfsfs", NULL);
1542} 1540}
1543 1541
1544#endif /* CONFIG_PROC_FS */ 1542#endif /* CONFIG_PROC_FS */
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index fa220dc74609..7226a506f3ca 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1575,6 +1575,11 @@ static int nfs_compare_super(struct super_block *sb, void *data)
1575 return nfs_compare_mount_options(sb, server, mntflags); 1575 return nfs_compare_mount_options(sb, server, mntflags);
1576} 1576}
1577 1577
1578static int nfs_bdi_register(struct nfs_server *server)
1579{
1580 return bdi_register_dev(&server->backing_dev_info, server->s_dev);
1581}
1582
1578static int nfs_get_sb(struct file_system_type *fs_type, 1583static int nfs_get_sb(struct file_system_type *fs_type,
1579 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 1584 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
1580{ 1585{
@@ -1617,6 +1622,10 @@ static int nfs_get_sb(struct file_system_type *fs_type,
1617 if (s->s_fs_info != server) { 1622 if (s->s_fs_info != server) {
1618 nfs_free_server(server); 1623 nfs_free_server(server);
1619 server = NULL; 1624 server = NULL;
1625 } else {
1626 error = nfs_bdi_register(server);
1627 if (error)
1628 goto error_splat_super;
1620 } 1629 }
1621 1630
1622 if (!s->s_root) { 1631 if (!s->s_root) {
@@ -1664,6 +1673,7 @@ static void nfs_kill_super(struct super_block *s)
1664{ 1673{
1665 struct nfs_server *server = NFS_SB(s); 1674 struct nfs_server *server = NFS_SB(s);
1666 1675
1676 bdi_unregister(&server->backing_dev_info);
1667 kill_anon_super(s); 1677 kill_anon_super(s);
1668 nfs_free_server(server); 1678 nfs_free_server(server);
1669} 1679}
@@ -1708,6 +1718,10 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
1708 if (s->s_fs_info != server) { 1718 if (s->s_fs_info != server) {
1709 nfs_free_server(server); 1719 nfs_free_server(server);
1710 server = NULL; 1720 server = NULL;
1721 } else {
1722 error = nfs_bdi_register(server);
1723 if (error)
1724 goto error_splat_super;
1711 } 1725 }
1712 1726
1713 if (!s->s_root) { 1727 if (!s->s_root) {
@@ -1984,6 +1998,10 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
1984 if (s->s_fs_info != server) { 1998 if (s->s_fs_info != server) {
1985 nfs_free_server(server); 1999 nfs_free_server(server);
1986 server = NULL; 2000 server = NULL;
2001 } else {
2002 error = nfs_bdi_register(server);
2003 if (error)
2004 goto error_splat_super;
1987 } 2005 }
1988 2006
1989 if (!s->s_root) { 2007 if (!s->s_root) {
@@ -2070,6 +2088,10 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
2070 if (s->s_fs_info != server) { 2088 if (s->s_fs_info != server) {
2071 nfs_free_server(server); 2089 nfs_free_server(server);
2072 server = NULL; 2090 server = NULL;
2091 } else {
2092 error = nfs_bdi_register(server);
2093 if (error)
2094 goto error_splat_super;
2073 } 2095 }
2074 2096
2075 if (!s->s_root) { 2097 if (!s->s_root) {
@@ -2149,6 +2171,10 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
2149 if (s->s_fs_info != server) { 2171 if (s->s_fs_info != server) {
2150 nfs_free_server(server); 2172 nfs_free_server(server);
2151 server = NULL; 2173 server = NULL;
2174 } else {
2175 error = nfs_bdi_register(server);
2176 if (error)
2177 goto error_splat_super;
2152 } 2178 }
2153 2179
2154 if (!s->s_root) { 2180 if (!s->s_root) {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 562abf3380d0..0b3ffa9840c2 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -104,7 +104,7 @@ xdr_writemem(__be32 *p, const void *ptr, int nbytes)
104} while (0) 104} while (0)
105#define RESERVE_SPACE(nbytes) do { \ 105#define RESERVE_SPACE(nbytes) do { \
106 p = xdr_reserve_space(xdr, nbytes); \ 106 p = xdr_reserve_space(xdr, nbytes); \
107 if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \ 107 if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __func__); \
108 BUG_ON(!p); \ 108 BUG_ON(!p); \
109} while (0) 109} while (0)
110 110
@@ -134,7 +134,7 @@ xdr_error: \
134 p = xdr_inline_decode(xdr, nbytes); \ 134 p = xdr_inline_decode(xdr, nbytes); \
135 if (!p) { \ 135 if (!p) { \
136 dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \ 136 dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \
137 __FUNCTION__, __LINE__); \ 137 __func__, __LINE__); \
138 return -EIO; \ 138 return -EIO; \
139 } \ 139 } \
140} while (0) 140} while (0)
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 42f3820ee8f5..5ac00c4fee91 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -169,6 +169,7 @@ static const struct file_operations exports_operations = {
169 .read = seq_read, 169 .read = seq_read,
170 .llseek = seq_lseek, 170 .llseek = seq_lseek,
171 .release = seq_release, 171 .release = seq_release,
172 .owner = THIS_MODULE,
172}; 173};
173 174
174/*----------------------------------------------------------------------------*/ 175/*----------------------------------------------------------------------------*/
@@ -801,10 +802,9 @@ static int create_proc_exports_entry(void)
801 entry = proc_mkdir("fs/nfs", NULL); 802 entry = proc_mkdir("fs/nfs", NULL);
802 if (!entry) 803 if (!entry)
803 return -ENOMEM; 804 return -ENOMEM;
804 entry = create_proc_entry("fs/nfs/exports", 0, NULL); 805 entry = proc_create("exports", 0, entry, &exports_operations);
805 if (!entry) 806 if (!entry)
806 return -ENOMEM; 807 return -ENOMEM;
807 entry->proc_fops = &exports_operations;
808 return 0; 808 return 0;
809} 809}
810#else /* CONFIG_PROC_FS */ 810#else /* CONFIG_PROC_FS */
diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h
index 8ac37c33d127..5e6724c1afd1 100644
--- a/fs/ntfs/debug.h
+++ b/fs/ntfs/debug.h
@@ -45,7 +45,7 @@ static void ntfs_debug(const char *f, ...);
45extern void __ntfs_debug (const char *file, int line, const char *function, 45extern void __ntfs_debug (const char *file, int line, const char *function,
46 const char *format, ...) __attribute__ ((format (printf, 4, 5))); 46 const char *format, ...) __attribute__ ((format (printf, 4, 5)));
47#define ntfs_debug(f, a...) \ 47#define ntfs_debug(f, a...) \
48 __ntfs_debug(__FILE__, __LINE__, __FUNCTION__, f, ##a) 48 __ntfs_debug(__FILE__, __LINE__, __func__, f, ##a)
49 49
50extern void ntfs_debug_dump_runlist(const runlist_element *rl); 50extern void ntfs_debug_dump_runlist(const runlist_element *rl);
51 51
@@ -58,10 +58,10 @@ extern void ntfs_debug_dump_runlist(const runlist_element *rl);
58 58
59extern void __ntfs_warning(const char *function, const struct super_block *sb, 59extern void __ntfs_warning(const char *function, const struct super_block *sb,
60 const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); 60 const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
61#define ntfs_warning(sb, f, a...) __ntfs_warning(__FUNCTION__, sb, f, ##a) 61#define ntfs_warning(sb, f, a...) __ntfs_warning(__func__, sb, f, ##a)
62 62
63extern void __ntfs_error(const char *function, const struct super_block *sb, 63extern void __ntfs_error(const char *function, const struct super_block *sb,
64 const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); 64 const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
65#define ntfs_error(sb, f, a...) __ntfs_error(__FUNCTION__, sb, f, ##a) 65#define ntfs_error(sb, f, a...) __ntfs_error(__func__, sb, f, ##a)
66 66
67#endif /* _LINUX_NTFS_DEBUG_H */ 67#endif /* _LINUX_NTFS_DEBUG_H */
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 2ad5c8b104b9..790defb847e7 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1191,7 +1191,7 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol,
1191 if (size) { 1191 if (size) {
1192 page = ntfs_map_page(mftbmp_mapping, 1192 page = ntfs_map_page(mftbmp_mapping,
1193 ofs >> PAGE_CACHE_SHIFT); 1193 ofs >> PAGE_CACHE_SHIFT);
1194 if (unlikely(IS_ERR(page))) { 1194 if (IS_ERR(page)) {
1195 ntfs_error(vol->sb, "Failed to read mft " 1195 ntfs_error(vol->sb, "Failed to read mft "
1196 "bitmap, aborting."); 1196 "bitmap, aborting.");
1197 return PTR_ERR(page); 1197 return PTR_ERR(page);
@@ -2118,7 +2118,7 @@ static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no)
2118 } 2118 }
2119 /* Read, map, and pin the page containing the mft record. */ 2119 /* Read, map, and pin the page containing the mft record. */
2120 page = ntfs_map_page(mft_vi->i_mapping, index); 2120 page = ntfs_map_page(mft_vi->i_mapping, index);
2121 if (unlikely(IS_ERR(page))) { 2121 if (IS_ERR(page)) {
2122 ntfs_error(vol->sb, "Failed to map page containing mft record " 2122 ntfs_error(vol->sb, "Failed to map page containing mft record "
2123 "to format 0x%llx.", (long long)mft_no); 2123 "to format 0x%llx.", (long long)mft_no);
2124 return PTR_ERR(page); 2124 return PTR_ERR(page);
@@ -2519,7 +2519,7 @@ mft_rec_already_initialized:
2519 ofs = (bit << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; 2519 ofs = (bit << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
2520 /* Read, map, and pin the page containing the mft record. */ 2520 /* Read, map, and pin the page containing the mft record. */
2521 page = ntfs_map_page(vol->mft_ino->i_mapping, index); 2521 page = ntfs_map_page(vol->mft_ino->i_mapping, index);
2522 if (unlikely(IS_ERR(page))) { 2522 if (IS_ERR(page)) {
2523 ntfs_error(vol->sb, "Failed to map page containing allocated " 2523 ntfs_error(vol->sb, "Failed to map page containing allocated "
2524 "mft record 0x%llx.", (long long)bit); 2524 "mft record 0x%llx.", (long long)bit);
2525 err = PTR_ERR(page); 2525 err = PTR_ERR(page);
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c
index 98429fd68499..bc702dab5d1f 100644
--- a/fs/ocfs2/cluster/sys.c
+++ b/fs/ocfs2/cluster/sys.c
@@ -65,7 +65,7 @@ int o2cb_sys_init(void)
65{ 65{
66 int ret; 66 int ret;
67 67
68 o2cb_kset = kset_create_and_add("o2cb", NULL, NULL); 68 o2cb_kset = kset_create_and_add("o2cb", NULL, fs_kobj);
69 if (!o2cb_kset) 69 if (!o2cb_kset)
70 return -ENOMEM; 70 return -ENOMEM;
71 71
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 5f6d858770a2..1b81dcba175d 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -44,7 +44,8 @@
44#define MLOG_MASK_PREFIX ML_DLM 44#define MLOG_MASK_PREFIX ML_DLM
45#include "cluster/masklog.h" 45#include "cluster/masklog.h"
46 46
47int stringify_lockname(const char *lockname, int locklen, char *buf, int len); 47static int stringify_lockname(const char *lockname, int locklen, char *buf,
48 int len);
48 49
49void dlm_print_one_lock_resource(struct dlm_lock_resource *res) 50void dlm_print_one_lock_resource(struct dlm_lock_resource *res)
50{ 51{
@@ -251,7 +252,8 @@ EXPORT_SYMBOL_GPL(dlm_errname);
251 * 252 *
252 * For more on lockname formats, please refer to dlmglue.c and ocfs2_lockid.h. 253 * For more on lockname formats, please refer to dlmglue.c and ocfs2_lockid.h.
253 */ 254 */
254int stringify_lockname(const char *lockname, int locklen, char *buf, int len) 255static int stringify_lockname(const char *lockname, int locklen, char *buf,
256 int len)
255{ 257{
256 int out = 0; 258 int out = 0;
257 __be64 inode_blkno_be; 259 __be64 inode_blkno_be;
@@ -368,7 +370,7 @@ static void dlm_debug_free(struct kref *kref)
368 kfree(dc); 370 kfree(dc);
369} 371}
370 372
371void dlm_debug_put(struct dlm_debug_ctxt *dc) 373static void dlm_debug_put(struct dlm_debug_ctxt *dc)
372{ 374{
373 if (dc) 375 if (dc)
374 kref_put(&dc->debug_refcnt, dlm_debug_free); 376 kref_put(&dc->debug_refcnt, dlm_debug_free);
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 61a000f8524c..e48aba698b77 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -327,7 +327,7 @@ clear_fields:
327 327
328static struct backing_dev_info dlmfs_backing_dev_info = { 328static struct backing_dev_info dlmfs_backing_dev_info = {
329 .ra_pages = 0, /* No readahead */ 329 .ra_pages = 0, /* No readahead */
330 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 330 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
331}; 331};
332 332
333static struct inode *dlmfs_get_root_inode(struct super_block *sb) 333static struct inode *dlmfs_get_root_inode(struct super_block *sb)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 9154c82d3258..57e0d30cde98 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1048,6 +1048,10 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1048 mlog_entry("(0x%p, '%.*s')\n", dentry, 1048 mlog_entry("(0x%p, '%.*s')\n", dentry,
1049 dentry->d_name.len, dentry->d_name.name); 1049 dentry->d_name.len, dentry->d_name.name);
1050 1050
1051 /* ensuring we don't even attempt to truncate a symlink */
1052 if (S_ISLNK(inode->i_mode))
1053 attr->ia_valid &= ~ATTR_SIZE;
1054
1051 if (attr->ia_valid & ATTR_MODE) 1055 if (attr->ia_valid & ATTR_MODE)
1052 mlog(0, "mode change: %d\n", attr->ia_mode); 1056 mlog(0, "mode change: %d\n", attr->ia_mode);
1053 if (attr->ia_valid & ATTR_UID) 1057 if (attr->ia_valid & ATTR_UID)
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index ce0dc147602a..be774bdc8b36 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -260,7 +260,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
260 bh = osb->local_alloc_bh; 260 bh = osb->local_alloc_bh;
261 alloc = (struct ocfs2_dinode *) bh->b_data; 261 alloc = (struct ocfs2_dinode *) bh->b_data;
262 262
263 alloc_copy = kmalloc(bh->b_size, GFP_KERNEL); 263 alloc_copy = kmalloc(bh->b_size, GFP_NOFS);
264 if (!alloc_copy) { 264 if (!alloc_copy) {
265 status = -ENOMEM; 265 status = -ENOMEM;
266 goto out_commit; 266 goto out_commit;
@@ -931,7 +931,7 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
931 * local alloc shutdown won't try to double free main bitmap 931 * local alloc shutdown won't try to double free main bitmap
932 * bits. Make a copy so the sync function knows which bits to 932 * bits. Make a copy so the sync function knows which bits to
933 * free. */ 933 * free. */
934 alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_KERNEL); 934 alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS);
935 if (!alloc_copy) { 935 if (!alloc_copy) {
936 status = -ENOMEM; 936 status = -ENOMEM;
937 mlog_errno(status); 937 mlog_errno(status);
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index ac1d74c63bf5..bbd1667aa7d3 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -385,7 +385,7 @@ static int o2cb_cluster_this_node(unsigned int *node)
385 return 0; 385 return 0;
386} 386}
387 387
388struct ocfs2_stack_operations o2cb_stack_ops = { 388static struct ocfs2_stack_operations o2cb_stack_ops = {
389 .connect = o2cb_cluster_connect, 389 .connect = o2cb_cluster_connect,
390 .disconnect = o2cb_cluster_disconnect, 390 .disconnect = o2cb_cluster_disconnect,
391 .hangup = o2cb_cluster_hangup, 391 .hangup = o2cb_cluster_hangup,
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 7428663f9cbb..b503772cd0ec 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -635,7 +635,7 @@ static const struct file_operations ocfs2_control_fops = {
635 .owner = THIS_MODULE, 635 .owner = THIS_MODULE,
636}; 636};
637 637
638struct miscdevice ocfs2_control_device = { 638static struct miscdevice ocfs2_control_device = {
639 .minor = MISC_DYNAMIC_MINOR, 639 .minor = MISC_DYNAMIC_MINOR,
640 .name = "ocfs2_control", 640 .name = "ocfs2_control",
641 .fops = &ocfs2_control_fops, 641 .fops = &ocfs2_control_fops,
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 7134007ba22f..ba9dbb51d25b 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -167,9 +167,11 @@ const struct inode_operations ocfs2_symlink_inode_operations = {
167 .readlink = page_readlink, 167 .readlink = page_readlink,
168 .follow_link = ocfs2_follow_link, 168 .follow_link = ocfs2_follow_link,
169 .getattr = ocfs2_getattr, 169 .getattr = ocfs2_getattr,
170 .setattr = ocfs2_setattr,
170}; 171};
171const struct inode_operations ocfs2_fast_symlink_inode_operations = { 172const struct inode_operations ocfs2_fast_symlink_inode_operations = {
172 .readlink = ocfs2_readlink, 173 .readlink = ocfs2_readlink,
173 .follow_link = ocfs2_follow_link, 174 .follow_link = ocfs2_follow_link,
174 .getattr = ocfs2_getattr, 175 .getattr = ocfs2_getattr,
176 .setattr = ocfs2_setattr,
175}; 177};
diff --git a/fs/open.c b/fs/open.c
index 7af1f05d5978..a1450086e92f 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -7,6 +7,7 @@
7#include <linux/string.h> 7#include <linux/string.h>
8#include <linux/mm.h> 8#include <linux/mm.h>
9#include <linux/file.h> 9#include <linux/file.h>
10#include <linux/fdtable.h>
10#include <linux/quotaops.h> 11#include <linux/quotaops.h>
11#include <linux/fsnotify.h> 12#include <linux/fsnotify.h>
12#include <linux/module.h> 13#include <linux/module.h>
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index e7dd1d4e3473..0fdda2e8a4cc 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -41,12 +41,12 @@
41#ifndef CONFIG_LDM_DEBUG 41#ifndef CONFIG_LDM_DEBUG
42#define ldm_debug(...) do {} while (0) 42#define ldm_debug(...) do {} while (0)
43#else 43#else
44#define ldm_debug(f, a...) _ldm_printk (KERN_DEBUG, __FUNCTION__, f, ##a) 44#define ldm_debug(f, a...) _ldm_printk (KERN_DEBUG, __func__, f, ##a)
45#endif 45#endif
46 46
47#define ldm_crit(f, a...) _ldm_printk (KERN_CRIT, __FUNCTION__, f, ##a) 47#define ldm_crit(f, a...) _ldm_printk (KERN_CRIT, __func__, f, ##a)
48#define ldm_error(f, a...) _ldm_printk (KERN_ERR, __FUNCTION__, f, ##a) 48#define ldm_error(f, a...) _ldm_printk (KERN_ERR, __func__, f, ##a)
49#define ldm_info(f, a...) _ldm_printk (KERN_INFO, __FUNCTION__, f, ##a) 49#define ldm_info(f, a...) _ldm_printk (KERN_INFO, __func__, f, ##a)
50 50
51__attribute__ ((format (printf, 3, 4))) 51__attribute__ ((format (printf, 3, 4)))
52static void _ldm_printk (const char *level, const char *function, 52static void _ldm_printk (const char *level, const char *function,
diff --git a/fs/pipe.c b/fs/pipe.c
index f73492b6817e..ec228bc9f882 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -17,6 +17,7 @@
17#include <linux/highmem.h> 17#include <linux/highmem.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/audit.h> 19#include <linux/audit.h>
20#include <linux/syscalls.h>
20 21
21#include <asm/uaccess.h> 22#include <asm/uaccess.h>
22#include <asm/ioctls.h> 23#include <asm/ioctls.h>
@@ -1076,6 +1077,26 @@ int do_pipe(int *fd)
1076} 1077}
1077 1078
1078/* 1079/*
1080 * sys_pipe() is the normal C calling standard for creating
1081 * a pipe. It's not the way Unix traditionally does this, though.
1082 */
1083asmlinkage long __weak sys_pipe(int __user *fildes)
1084{
1085 int fd[2];
1086 int error;
1087
1088 error = do_pipe(fd);
1089 if (!error) {
1090 if (copy_to_user(fildes, fd, sizeof(fd))) {
1091 sys_close(fd[0]);
1092 sys_close(fd[1]);
1093 error = -EFAULT;
1094 }
1095 }
1096 return error;
1097}
1098
1099/*
1079 * pipefs should _never_ be mounted by userland - too much of security hassle, 1100 * pipefs should _never_ be mounted by userland - too much of security hassle,
1080 * no real gain from having the whole whorehouse mounted. So we don't need 1101 * no real gain from having the whole whorehouse mounted. So we don't need
1081 * any operations on the root directory. However, we need a non-trivial 1102 * any operations on the root directory. However, we need a non-trivial
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 07d6c4853fe8..9e3b8c33c24b 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -73,6 +73,7 @@
73#include <linux/signal.h> 73#include <linux/signal.h>
74#include <linux/highmem.h> 74#include <linux/highmem.h>
75#include <linux/file.h> 75#include <linux/file.h>
76#include <linux/fdtable.h>
76#include <linux/times.h> 77#include <linux/times.h>
77#include <linux/cpuset.h> 78#include <linux/cpuset.h>
78#include <linux/rcupdate.h> 79#include <linux/rcupdate.h>
@@ -297,6 +298,7 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
297 render_cap_t(m, "CapInh:\t", &p->cap_inheritable); 298 render_cap_t(m, "CapInh:\t", &p->cap_inheritable);
298 render_cap_t(m, "CapPrm:\t", &p->cap_permitted); 299 render_cap_t(m, "CapPrm:\t", &p->cap_permitted);
299 render_cap_t(m, "CapEff:\t", &p->cap_effective); 300 render_cap_t(m, "CapEff:\t", &p->cap_effective);
301 render_cap_t(m, "CapBnd:\t", &p->cap_bset);
300} 302}
301 303
302static inline void task_context_switch_counts(struct seq_file *m, 304static inline void task_context_switch_counts(struct seq_file *m,
@@ -425,12 +427,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
425 cutime = cstime = utime = stime = cputime_zero; 427 cutime = cstime = utime = stime = cputime_zero;
426 cgtime = gtime = cputime_zero; 428 cgtime = gtime = cputime_zero;
427 429
428 rcu_read_lock();
429 if (lock_task_sighand(task, &flags)) { 430 if (lock_task_sighand(task, &flags)) {
430 struct signal_struct *sig = task->signal; 431 struct signal_struct *sig = task->signal;
431 432
432 if (sig->tty) { 433 if (sig->tty) {
433 tty_pgrp = pid_nr_ns(sig->tty->pgrp, ns); 434 struct pid *pgrp = tty_get_pgrp(sig->tty);
435 tty_pgrp = pid_nr_ns(pgrp, ns);
436 put_pid(pgrp);
434 tty_nr = new_encode_dev(tty_devnum(sig->tty)); 437 tty_nr = new_encode_dev(tty_devnum(sig->tty));
435 } 438 }
436 439
@@ -469,7 +472,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
469 472
470 unlock_task_sighand(task, &flags); 473 unlock_task_sighand(task, &flags);
471 } 474 }
472 rcu_read_unlock();
473 475
474 if (!whole || num_threads < 2) 476 if (!whole || num_threads < 2)
475 wchan = get_wchan(task); 477 wchan = get_wchan(task);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c5e412a00b17..808cbdc193d3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -56,6 +56,7 @@
56#include <linux/init.h> 56#include <linux/init.h>
57#include <linux/capability.h> 57#include <linux/capability.h>
58#include <linux/file.h> 58#include <linux/file.h>
59#include <linux/fdtable.h>
59#include <linux/string.h> 60#include <linux/string.h>
60#include <linux/seq_file.h> 61#include <linux/seq_file.h>
61#include <linux/namei.h> 62#include <linux/namei.h>
@@ -195,12 +196,32 @@ static int proc_root_link(struct inode *inode, struct path *path)
195 return result; 196 return result;
196} 197}
197 198
198#define MAY_PTRACE(task) \ 199/*
199 (task == current || \ 200 * Return zero if current may access user memory in @task, -error if not.
200 (task->parent == current && \ 201 */
201 (task->ptrace & PT_PTRACED) && \ 202static int check_mem_permission(struct task_struct *task)
202 (task_is_stopped_or_traced(task)) && \ 203{
203 security_ptrace(current,task) == 0)) 204 /*
205 * A task can always look at itself, in case it chooses
206 * to use system calls instead of load instructions.
207 */
208 if (task == current)
209 return 0;
210
211 /*
212 * If current is actively ptrace'ing, and would also be
213 * permitted to freshly attach with ptrace now, permit it.
214 */
215 if (task->parent == current && (task->ptrace & PT_PTRACED) &&
216 task_is_stopped_or_traced(task) &&
217 ptrace_may_attach(task))
218 return 0;
219
220 /*
221 * Noone else is allowed.
222 */
223 return -EPERM;
224}
204 225
205struct mm_struct *mm_for_maps(struct task_struct *task) 226struct mm_struct *mm_for_maps(struct task_struct *task)
206{ 227{
@@ -722,7 +743,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
722 if (!task) 743 if (!task)
723 goto out_no_task; 744 goto out_no_task;
724 745
725 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 746 if (check_mem_permission(task))
726 goto out; 747 goto out;
727 748
728 ret = -ENOMEM; 749 ret = -ENOMEM;
@@ -748,7 +769,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
748 769
749 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 770 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
750 retval = access_process_vm(task, src, page, this_len, 0); 771 retval = access_process_vm(task, src, page, this_len, 0);
751 if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) { 772 if (!retval || check_mem_permission(task)) {
752 if (!ret) 773 if (!ret)
753 ret = -EIO; 774 ret = -EIO;
754 break; 775 break;
@@ -792,7 +813,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
792 if (!task) 813 if (!task)
793 goto out_no_task; 814 goto out_no_task;
794 815
795 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 816 if (check_mem_permission(task))
796 goto out; 817 goto out;
797 818
798 copied = -ENOMEM; 819 copied = -ENOMEM;
@@ -1181,6 +1202,81 @@ static const struct file_operations proc_pid_sched_operations = {
1181 1202
1182#endif 1203#endif
1183 1204
1205/*
1206 * We added or removed a vma mapping the executable. The vmas are only mapped
1207 * during exec and are not mapped with the mmap system call.
1208 * Callers must hold down_write() on the mm's mmap_sem for these
1209 */
1210void added_exe_file_vma(struct mm_struct *mm)
1211{
1212 mm->num_exe_file_vmas++;
1213}
1214
1215void removed_exe_file_vma(struct mm_struct *mm)
1216{
1217 mm->num_exe_file_vmas--;
1218 if ((mm->num_exe_file_vmas == 0) && mm->exe_file){
1219 fput(mm->exe_file);
1220 mm->exe_file = NULL;
1221 }
1222
1223}
1224
1225void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
1226{
1227 if (new_exe_file)
1228 get_file(new_exe_file);
1229 if (mm->exe_file)
1230 fput(mm->exe_file);
1231 mm->exe_file = new_exe_file;
1232 mm->num_exe_file_vmas = 0;
1233}
1234
1235struct file *get_mm_exe_file(struct mm_struct *mm)
1236{
1237 struct file *exe_file;
1238
1239 /* We need mmap_sem to protect against races with removal of
1240 * VM_EXECUTABLE vmas */
1241 down_read(&mm->mmap_sem);
1242 exe_file = mm->exe_file;
1243 if (exe_file)
1244 get_file(exe_file);
1245 up_read(&mm->mmap_sem);
1246 return exe_file;
1247}
1248
1249void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
1250{
1251 /* It's safe to write the exe_file pointer without exe_file_lock because
1252 * this is called during fork when the task is not yet in /proc */
1253 newmm->exe_file = get_mm_exe_file(oldmm);
1254}
1255
1256static int proc_exe_link(struct inode *inode, struct path *exe_path)
1257{
1258 struct task_struct *task;
1259 struct mm_struct *mm;
1260 struct file *exe_file;
1261
1262 task = get_proc_task(inode);
1263 if (!task)
1264 return -ENOENT;
1265 mm = get_task_mm(task);
1266 put_task_struct(task);
1267 if (!mm)
1268 return -ENOENT;
1269 exe_file = get_mm_exe_file(mm);
1270 mmput(mm);
1271 if (exe_file) {
1272 *exe_path = exe_file->f_path;
1273 path_get(&exe_file->f_path);
1274 fput(exe_file);
1275 return 0;
1276 } else
1277 return -ENOENT;
1278}
1279
1184static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) 1280static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1185{ 1281{
1186 struct inode *inode = dentry->d_inode; 1282 struct inode *inode = dentry->d_inode;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index a36ad3c75cf4..43e54e86cefd 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -69,12 +69,7 @@ proc_file_read(struct file *file, char __user *buf, size_t nbytes,
69 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); 69 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
70 70
71 start = NULL; 71 start = NULL;
72 if (dp->get_info) { 72 if (dp->read_proc) {
73 /* Handle old net routines */
74 n = dp->get_info(page, &start, *ppos, count);
75 if (n < count)
76 eof = 1;
77 } else if (dp->read_proc) {
78 /* 73 /*
79 * How to be a proc read function 74 * How to be a proc read function
80 * ------------------------------ 75 * ------------------------------
@@ -277,8 +272,11 @@ static int xlate_proc_name(const char *name,
277 int len; 272 int len;
278 int rtn = 0; 273 int rtn = 0;
279 274
275 de = *ret;
276 if (!de)
277 de = &proc_root;
278
280 spin_lock(&proc_subdir_lock); 279 spin_lock(&proc_subdir_lock);
281 de = &proc_root;
282 while (1) { 280 while (1) {
283 next = strchr(cp, '/'); 281 next = strchr(cp, '/');
284 if (!next) 282 if (!next)
@@ -385,20 +383,18 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
385 383
386 lock_kernel(); 384 lock_kernel();
387 spin_lock(&proc_subdir_lock); 385 spin_lock(&proc_subdir_lock);
388 if (de) { 386 for (de = de->subdir; de ; de = de->next) {
389 for (de = de->subdir; de ; de = de->next) { 387 if (de->namelen != dentry->d_name.len)
390 if (de->namelen != dentry->d_name.len) 388 continue;
391 continue; 389 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
392 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 390 unsigned int ino;
393 unsigned int ino;
394 391
395 ino = de->low_ino; 392 ino = de->low_ino;
396 de_get(de); 393 de_get(de);
397 spin_unlock(&proc_subdir_lock); 394 spin_unlock(&proc_subdir_lock);
398 error = -EINVAL; 395 error = -EINVAL;
399 inode = proc_get_inode(dir->i_sb, ino, de); 396 inode = proc_get_inode(dir->i_sb, ino, de);
400 goto out_unlock; 397 goto out_unlock;
401 }
402 } 398 }
403 } 399 }
404 spin_unlock(&proc_subdir_lock); 400 spin_unlock(&proc_subdir_lock);
@@ -410,7 +406,8 @@ out_unlock:
410 d_add(dentry, inode); 406 d_add(dentry, inode);
411 return NULL; 407 return NULL;
412 } 408 }
413 de_put(de); 409 if (de)
410 de_put(de);
414 return ERR_PTR(error); 411 return ERR_PTR(error);
415} 412}
416 413
@@ -440,10 +437,6 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
440 lock_kernel(); 437 lock_kernel();
441 438
442 ino = inode->i_ino; 439 ino = inode->i_ino;
443 if (!de) {
444 ret = -EINVAL;
445 goto out;
446 }
447 i = filp->f_pos; 440 i = filp->f_pos;
448 switch (i) { 441 switch (i) {
449 case 0: 442 case 0:
@@ -582,7 +575,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
582 /* make sure name is valid */ 575 /* make sure name is valid */
583 if (!name || !strlen(name)) goto out; 576 if (!name || !strlen(name)) goto out;
584 577
585 if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0) 578 if (xlate_proc_name(name, parent, &fn) != 0)
586 goto out; 579 goto out;
587 580
588 /* At this point there must not be any '/' characters beyond *fn */ 581 /* At this point there must not be any '/' characters beyond *fn */
@@ -648,6 +641,23 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
648 return ent; 641 return ent;
649} 642}
650 643
644struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
645 struct proc_dir_entry *parent)
646{
647 struct proc_dir_entry *ent;
648
649 ent = __proc_create(&parent, name, S_IFDIR | S_IRUGO | S_IXUGO, 2);
650 if (ent) {
651 ent->data = net;
652 if (proc_register(parent, ent) < 0) {
653 kfree(ent);
654 ent = NULL;
655 }
656 }
657 return ent;
658}
659EXPORT_SYMBOL_GPL(proc_net_mkdir);
660
651struct proc_dir_entry *proc_mkdir(const char *name, 661struct proc_dir_entry *proc_mkdir(const char *name,
652 struct proc_dir_entry *parent) 662 struct proc_dir_entry *parent)
653{ 663{
@@ -682,9 +692,10 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
682 return ent; 692 return ent;
683} 693}
684 694
685struct proc_dir_entry *proc_create(const char *name, mode_t mode, 695struct proc_dir_entry *proc_create_data(const char *name, mode_t mode,
686 struct proc_dir_entry *parent, 696 struct proc_dir_entry *parent,
687 const struct file_operations *proc_fops) 697 const struct file_operations *proc_fops,
698 void *data)
688{ 699{
689 struct proc_dir_entry *pde; 700 struct proc_dir_entry *pde;
690 nlink_t nlink; 701 nlink_t nlink;
@@ -705,6 +716,7 @@ struct proc_dir_entry *proc_create(const char *name, mode_t mode,
705 if (!pde) 716 if (!pde)
706 goto out; 717 goto out;
707 pde->proc_fops = proc_fops; 718 pde->proc_fops = proc_fops;
719 pde->data = data;
708 if (proc_register(parent, pde) < 0) 720 if (proc_register(parent, pde) < 0)
709 goto out_free; 721 goto out_free;
710 return pde; 722 return pde;
@@ -734,55 +746,58 @@ void free_proc_entry(struct proc_dir_entry *de)
734void remove_proc_entry(const char *name, struct proc_dir_entry *parent) 746void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
735{ 747{
736 struct proc_dir_entry **p; 748 struct proc_dir_entry **p;
737 struct proc_dir_entry *de; 749 struct proc_dir_entry *de = NULL;
738 const char *fn = name; 750 const char *fn = name;
739 int len; 751 int len;
740 752
741 if (!parent && xlate_proc_name(name, &parent, &fn) != 0) 753 if (xlate_proc_name(name, &parent, &fn) != 0)
742 goto out; 754 return;
743 len = strlen(fn); 755 len = strlen(fn);
744 756
745 spin_lock(&proc_subdir_lock); 757 spin_lock(&proc_subdir_lock);
746 for (p = &parent->subdir; *p; p=&(*p)->next ) { 758 for (p = &parent->subdir; *p; p=&(*p)->next ) {
747 if (!proc_match(len, fn, *p)) 759 if (proc_match(len, fn, *p)) {
748 continue; 760 de = *p;
749 de = *p; 761 *p = de->next;
750 *p = de->next; 762 de->next = NULL;
751 de->next = NULL; 763 break;
752 764 }
753 spin_lock(&de->pde_unload_lock); 765 }
754 /* 766 spin_unlock(&proc_subdir_lock);
755 * Stop accepting new callers into module. If you're 767 if (!de)
756 * dynamically allocating ->proc_fops, save a pointer somewhere. 768 return;
757 */
758 de->proc_fops = NULL;
759 /* Wait until all existing callers into module are done. */
760 if (de->pde_users > 0) {
761 DECLARE_COMPLETION_ONSTACK(c);
762
763 if (!de->pde_unload_completion)
764 de->pde_unload_completion = &c;
765
766 spin_unlock(&de->pde_unload_lock);
767 spin_unlock(&proc_subdir_lock);
768 769
769 wait_for_completion(de->pde_unload_completion); 770 spin_lock(&de->pde_unload_lock);
771 /*
772 * Stop accepting new callers into module. If you're
773 * dynamically allocating ->proc_fops, save a pointer somewhere.
774 */
775 de->proc_fops = NULL;
776 /* Wait until all existing callers into module are done. */
777 if (de->pde_users > 0) {
778 DECLARE_COMPLETION_ONSTACK(c);
779
780 if (!de->pde_unload_completion)
781 de->pde_unload_completion = &c;
770 782
771 spin_lock(&proc_subdir_lock);
772 goto continue_removing;
773 }
774 spin_unlock(&de->pde_unload_lock); 783 spin_unlock(&de->pde_unload_lock);
775 784
785 wait_for_completion(de->pde_unload_completion);
786
787 goto continue_removing;
788 }
789 spin_unlock(&de->pde_unload_lock);
790
776continue_removing: 791continue_removing:
777 if (S_ISDIR(de->mode)) 792 if (S_ISDIR(de->mode))
778 parent->nlink--; 793 parent->nlink--;
779 de->nlink = 0; 794 de->nlink = 0;
780 WARN_ON(de->subdir); 795 if (de->subdir) {
781 if (atomic_dec_and_test(&de->count)) 796 printk(KERN_WARNING "%s: removing non-empty directory "
782 free_proc_entry(de); 797 "'%s/%s', leaking at least '%s'\n", __func__,
783 break; 798 de->parent->name, de->name, de->subdir->name);
799 WARN_ON(1);
784 } 800 }
785 spin_unlock(&proc_subdir_lock); 801 if (atomic_dec_and_test(&de->count))
786out: 802 free_proc_entry(de);
787 return;
788} 803}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 82b3a1b5a70b..6f4e8dc97da1 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -25,8 +25,7 @@
25 25
26struct proc_dir_entry *de_get(struct proc_dir_entry *de) 26struct proc_dir_entry *de_get(struct proc_dir_entry *de)
27{ 27{
28 if (de) 28 atomic_inc(&de->count);
29 atomic_inc(&de->count);
30 return de; 29 return de;
31} 30}
32 31
@@ -35,18 +34,16 @@ struct proc_dir_entry *de_get(struct proc_dir_entry *de)
35 */ 34 */
36void de_put(struct proc_dir_entry *de) 35void de_put(struct proc_dir_entry *de)
37{ 36{
38 if (de) { 37 lock_kernel();
39 lock_kernel(); 38 if (!atomic_read(&de->count)) {
40 if (!atomic_read(&de->count)) { 39 printk("de_put: entry %s already free!\n", de->name);
41 printk("de_put: entry %s already free!\n", de->name);
42 unlock_kernel();
43 return;
44 }
45
46 if (atomic_dec_and_test(&de->count))
47 free_proc_entry(de);
48 unlock_kernel(); 40 unlock_kernel();
41 return;
49 } 42 }
43
44 if (atomic_dec_and_test(&de->count))
45 free_proc_entry(de);
46 unlock_kernel();
50} 47}
51 48
52/* 49/*
@@ -392,7 +389,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
392{ 389{
393 struct inode * inode; 390 struct inode * inode;
394 391
395 if (de != NULL && !try_module_get(de->owner)) 392 if (!try_module_get(de->owner))
396 goto out_mod; 393 goto out_mod;
397 394
398 inode = iget_locked(sb, ino); 395 inode = iget_locked(sb, ino);
@@ -402,30 +399,29 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
402 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 399 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
403 PROC_I(inode)->fd = 0; 400 PROC_I(inode)->fd = 0;
404 PROC_I(inode)->pde = de; 401 PROC_I(inode)->pde = de;
405 if (de) { 402
406 if (de->mode) { 403 if (de->mode) {
407 inode->i_mode = de->mode; 404 inode->i_mode = de->mode;
408 inode->i_uid = de->uid; 405 inode->i_uid = de->uid;
409 inode->i_gid = de->gid; 406 inode->i_gid = de->gid;
410 } 407 }
411 if (de->size) 408 if (de->size)
412 inode->i_size = de->size; 409 inode->i_size = de->size;
413 if (de->nlink) 410 if (de->nlink)
414 inode->i_nlink = de->nlink; 411 inode->i_nlink = de->nlink;
415 if (de->proc_iops) 412 if (de->proc_iops)
416 inode->i_op = de->proc_iops; 413 inode->i_op = de->proc_iops;
417 if (de->proc_fops) { 414 if (de->proc_fops) {
418 if (S_ISREG(inode->i_mode)) { 415 if (S_ISREG(inode->i_mode)) {
419#ifdef CONFIG_COMPAT 416#ifdef CONFIG_COMPAT
420 if (!de->proc_fops->compat_ioctl) 417 if (!de->proc_fops->compat_ioctl)
421 inode->i_fop = 418 inode->i_fop =
422 &proc_reg_file_ops_no_compat; 419 &proc_reg_file_ops_no_compat;
423 else 420 else
424#endif 421#endif
425 inode->i_fop = &proc_reg_file_ops; 422 inode->i_fop = &proc_reg_file_ops;
426 } else { 423 } else {
427 inode->i_fop = de->proc_fops; 424 inode->i_fop = de->proc_fops;
428 }
429 } 425 }
430 } 426 }
431 unlock_new_inode(inode); 427 unlock_new_inode(inode);
@@ -433,8 +429,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
433 return inode; 429 return inode;
434 430
435out_ino: 431out_ino:
436 if (de != NULL) 432 module_put(de->owner);
437 module_put(de->owner);
438out_mod: 433out_mod:
439 return NULL; 434 return NULL;
440} 435}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index bc72f5c8c47d..28cbca805905 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -11,6 +11,7 @@
11 11
12#include <linux/proc_fs.h> 12#include <linux/proc_fs.h>
13 13
14extern struct proc_dir_entry proc_root;
14#ifdef CONFIG_PROC_SYSCTL 15#ifdef CONFIG_PROC_SYSCTL
15extern int proc_sys_init(void); 16extern int proc_sys_init(void);
16#else 17#else
@@ -46,9 +47,6 @@ extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *);
46 47
47extern int maps_protect; 48extern int maps_protect;
48 49
49extern void create_seq_entry(char *name, mode_t mode,
50 const struct file_operations *f);
51extern int proc_exe_link(struct inode *, struct path *);
52extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, 50extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
53 struct pid *pid, struct task_struct *task); 51 struct pid *pid, struct task_struct *task);
54extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, 52extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 941e95114b5a..79ecd281d2cb 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -137,7 +137,7 @@ static const struct file_operations proc_nommu_vma_list_operations = {
137 137
138static int __init proc_nommu_init(void) 138static int __init proc_nommu_init(void)
139{ 139{
140 create_seq_entry("maps", S_IRUGO, &proc_nommu_vma_list_operations); 140 proc_create("maps", S_IRUGO, NULL, &proc_nommu_vma_list_operations);
141 return 0; 141 return 0;
142} 142}
143 143
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 441a32f0e5f2..74a323d2b850 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -179,6 +179,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
179 "PageTables: %8lu kB\n" 179 "PageTables: %8lu kB\n"
180 "NFS_Unstable: %8lu kB\n" 180 "NFS_Unstable: %8lu kB\n"
181 "Bounce: %8lu kB\n" 181 "Bounce: %8lu kB\n"
182 "WritebackTmp: %8lu kB\n"
182 "CommitLimit: %8lu kB\n" 183 "CommitLimit: %8lu kB\n"
183 "Committed_AS: %8lu kB\n" 184 "Committed_AS: %8lu kB\n"
184 "VmallocTotal: %8lu kB\n" 185 "VmallocTotal: %8lu kB\n"
@@ -210,6 +211,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
210 K(global_page_state(NR_PAGETABLE)), 211 K(global_page_state(NR_PAGETABLE)),
211 K(global_page_state(NR_UNSTABLE_NFS)), 212 K(global_page_state(NR_UNSTABLE_NFS)),
212 K(global_page_state(NR_BOUNCE)), 213 K(global_page_state(NR_BOUNCE)),
214 K(global_page_state(NR_WRITEBACK_TEMP)),
213 K(allowed), 215 K(allowed),
214 K(committed), 216 K(committed),
215 (unsigned long)VMALLOC_TOTAL >> 10, 217 (unsigned long)VMALLOC_TOTAL >> 10,
@@ -826,14 +828,6 @@ static struct file_operations proc_kpageflags_operations = {
826 828
827struct proc_dir_entry *proc_root_kcore; 829struct proc_dir_entry *proc_root_kcore;
828 830
829void create_seq_entry(char *name, mode_t mode, const struct file_operations *f)
830{
831 struct proc_dir_entry *entry;
832 entry = create_proc_entry(name, mode, NULL);
833 if (entry)
834 entry->proc_fops = f;
835}
836
837void __init proc_misc_init(void) 831void __init proc_misc_init(void)
838{ 832{
839 static struct { 833 static struct {
@@ -862,66 +856,52 @@ void __init proc_misc_init(void)
862 856
863 /* And now for trickier ones */ 857 /* And now for trickier ones */
864#ifdef CONFIG_PRINTK 858#ifdef CONFIG_PRINTK
865 { 859 proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations);
866 struct proc_dir_entry *entry;
867 entry = create_proc_entry("kmsg", S_IRUSR, &proc_root);
868 if (entry)
869 entry->proc_fops = &proc_kmsg_operations;
870 }
871#endif 860#endif
872 create_seq_entry("locks", 0, &proc_locks_operations); 861 proc_create("locks", 0, NULL, &proc_locks_operations);
873 create_seq_entry("devices", 0, &proc_devinfo_operations); 862 proc_create("devices", 0, NULL, &proc_devinfo_operations);
874 create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); 863 proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations);
875#ifdef CONFIG_BLOCK 864#ifdef CONFIG_BLOCK
876 create_seq_entry("partitions", 0, &proc_partitions_operations); 865 proc_create("partitions", 0, NULL, &proc_partitions_operations);
877#endif 866#endif
878 create_seq_entry("stat", 0, &proc_stat_operations); 867 proc_create("stat", 0, NULL, &proc_stat_operations);
879 create_seq_entry("interrupts", 0, &proc_interrupts_operations); 868 proc_create("interrupts", 0, NULL, &proc_interrupts_operations);
880#ifdef CONFIG_SLABINFO 869#ifdef CONFIG_SLABINFO
881 create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); 870 proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);
882#ifdef CONFIG_DEBUG_SLAB_LEAK 871#ifdef CONFIG_DEBUG_SLAB_LEAK
883 create_seq_entry("slab_allocators", 0 ,&proc_slabstats_operations); 872 proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
884#endif 873#endif
885#endif 874#endif
886#ifdef CONFIG_MMU 875#ifdef CONFIG_MMU
887 proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations); 876 proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
888#endif 877#endif
889 create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); 878 proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
890 create_seq_entry("pagetypeinfo", S_IRUGO, &pagetypeinfo_file_ops); 879 proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops);
891 create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); 880 proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
892 create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations); 881 proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
893#ifdef CONFIG_BLOCK 882#ifdef CONFIG_BLOCK
894 create_seq_entry("diskstats", 0, &proc_diskstats_operations); 883 proc_create("diskstats", 0, NULL, &proc_diskstats_operations);
895#endif 884#endif
896#ifdef CONFIG_MODULES 885#ifdef CONFIG_MODULES
897 create_seq_entry("modules", 0, &proc_modules_operations); 886 proc_create("modules", 0, NULL, &proc_modules_operations);
898#endif 887#endif
899#ifdef CONFIG_SCHEDSTATS 888#ifdef CONFIG_SCHEDSTATS
900 create_seq_entry("schedstat", 0, &proc_schedstat_operations); 889 proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
901#endif 890#endif
902#ifdef CONFIG_PROC_KCORE 891#ifdef CONFIG_PROC_KCORE
903 proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL); 892 proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations);
904 if (proc_root_kcore) { 893 if (proc_root_kcore)
905 proc_root_kcore->proc_fops = &proc_kcore_operations;
906 proc_root_kcore->size = 894 proc_root_kcore->size =
907 (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; 895 (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE;
908 }
909#endif 896#endif
910#ifdef CONFIG_PROC_PAGE_MONITOR 897#ifdef CONFIG_PROC_PAGE_MONITOR
911 create_seq_entry("kpagecount", S_IRUSR, &proc_kpagecount_operations); 898 proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations);
912 create_seq_entry("kpageflags", S_IRUSR, &proc_kpageflags_operations); 899 proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations);
913#endif 900#endif
914#ifdef CONFIG_PROC_VMCORE 901#ifdef CONFIG_PROC_VMCORE
915 proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL); 902 proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations);
916 if (proc_vmcore)
917 proc_vmcore->proc_fops = &proc_vmcore_operations;
918#endif 903#endif
919#ifdef CONFIG_MAGIC_SYSRQ 904#ifdef CONFIG_MAGIC_SYSRQ
920 { 905 proc_create("sysrq-trigger", S_IWUSR, NULL, &proc_sysrq_trigger_operations);
921 struct proc_dir_entry *entry;
922 entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL);
923 if (entry)
924 entry->proc_fops = &proc_sysrq_trigger_operations;
925 }
926#endif 906#endif
927} 907}
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 13cd7835d0df..83f357b30d71 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -159,17 +159,6 @@ struct net *get_proc_net(const struct inode *inode)
159} 159}
160EXPORT_SYMBOL_GPL(get_proc_net); 160EXPORT_SYMBOL_GPL(get_proc_net);
161 161
162struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
163 struct proc_dir_entry *parent)
164{
165 struct proc_dir_entry *pde;
166 pde = proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent);
167 if (pde != NULL)
168 pde->data = net;
169 return pde;
170}
171EXPORT_SYMBOL_GPL(proc_net_mkdir);
172
173static __net_init int proc_net_ns_init(struct net *net) 162static __net_init int proc_net_ns_init(struct net *net)
174{ 163{
175 struct proc_dir_entry *netd, *net_statd; 164 struct proc_dir_entry *netd, *net_statd;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 614c34b6d1c2..5acc001d49f6 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -165,8 +165,8 @@ out:
165 return err; 165 return err;
166} 166}
167 167
168static ssize_t proc_sys_read(struct file *filp, char __user *buf, 168static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
169 size_t count, loff_t *ppos) 169 size_t count, loff_t *ppos, int write)
170{ 170{
171 struct dentry *dentry = filp->f_dentry; 171 struct dentry *dentry = filp->f_dentry;
172 struct ctl_table_header *head; 172 struct ctl_table_header *head;
@@ -190,12 +190,12 @@ static ssize_t proc_sys_read(struct file *filp, char __user *buf,
190 * and won't be until we finish. 190 * and won't be until we finish.
191 */ 191 */
192 error = -EPERM; 192 error = -EPERM;
193 if (sysctl_perm(table, MAY_READ)) 193 if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ))
194 goto out; 194 goto out;
195 195
196 /* careful: calling conventions are nasty here */ 196 /* careful: calling conventions are nasty here */
197 res = count; 197 res = count;
198 error = table->proc_handler(table, 0, filp, buf, &res, ppos); 198 error = table->proc_handler(table, write, filp, buf, &res, ppos);
199 if (!error) 199 if (!error)
200 error = res; 200 error = res;
201out: 201out:
@@ -204,44 +204,16 @@ out:
204 return error; 204 return error;
205} 205}
206 206
207static ssize_t proc_sys_write(struct file *filp, const char __user *buf, 207static ssize_t proc_sys_read(struct file *filp, char __user *buf,
208 size_t count, loff_t *ppos) 208 size_t count, loff_t *ppos)
209{ 209{
210 struct dentry *dentry = filp->f_dentry; 210 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0);
211 struct ctl_table_header *head; 211}
212 struct ctl_table *table;
213 ssize_t error;
214 size_t res;
215
216 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
217 /* Has the sysctl entry disappeared on us? */
218 error = -ENOENT;
219 if (!table)
220 goto out;
221
222 /* Has the sysctl entry been replaced by a directory? */
223 error = -EISDIR;
224 if (!table->proc_handler)
225 goto out;
226
227 /*
228 * At this point we know that the sysctl was not unregistered
229 * and won't be until we finish.
230 */
231 error = -EPERM;
232 if (sysctl_perm(table, MAY_WRITE))
233 goto out;
234
235 /* careful: calling conventions are nasty here */
236 res = count;
237 error = table->proc_handler(table, 1, filp, (char __user *)buf,
238 &res, ppos);
239 if (!error)
240 error = res;
241out:
242 sysctl_head_finish(head);
243 212
244 return error; 213static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
214 size_t count, loff_t *ppos)
215{
216 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
245} 217}
246 218
247 219
@@ -416,7 +388,7 @@ static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *
416 goto out; 388 goto out;
417 389
418 /* Use the permissions on the sysctl table entry */ 390 /* Use the permissions on the sysctl table entry */
419 error = sysctl_perm(table, mask); 391 error = sysctl_perm(head->root, table, mask);
420out: 392out:
421 sysctl_head_finish(head); 393 sysctl_head_finish(head);
422 return error; 394 return error;
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index 49816e00b51a..21f490f5d65c 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -5,7 +5,7 @@
5 */ 5 */
6 6
7#include <asm/uaccess.h> 7#include <asm/uaccess.h>
8 8#include <linux/module.h>
9#include <linux/init.h> 9#include <linux/init.h>
10#include <linux/errno.h> 10#include <linux/errno.h>
11#include <linux/time.h> 11#include <linux/time.h>
@@ -136,39 +136,54 @@ static const struct file_operations proc_tty_drivers_operations = {
136 .release = seq_release, 136 .release = seq_release,
137}; 137};
138 138
139/* 139static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
140 * This is the handler for /proc/tty/ldiscs
141 */
142static int tty_ldiscs_read_proc(char *page, char **start, off_t off,
143 int count, int *eof, void *data)
144{ 140{
145 int i; 141 return (*pos < NR_LDISCS) ? pos : NULL;
146 int len = 0; 142}
147 off_t begin = 0; 143
144static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos)
145{
146 (*pos)++;
147 return (*pos < NR_LDISCS) ? pos : NULL;
148}
149
150static void tty_ldiscs_seq_stop(struct seq_file *m, void *v)
151{
152}
153
154static int tty_ldiscs_seq_show(struct seq_file *m, void *v)
155{
156 int i = *(loff_t *)v;
148 struct tty_ldisc *ld; 157 struct tty_ldisc *ld;
149 158
150 for (i=0; i < NR_LDISCS; i++) { 159 ld = tty_ldisc_get(i);
151 ld = tty_ldisc_get(i); 160 if (ld == NULL)
152 if (ld == NULL)
153 continue;
154 len += sprintf(page+len, "%-10s %2d\n",
155 ld->name ? ld->name : "???", i);
156 tty_ldisc_put(i);
157 if (len+begin > off+count)
158 break;
159 if (len+begin < off) {
160 begin += len;
161 len = 0;
162 }
163 }
164 if (i >= NR_LDISCS)
165 *eof = 1;
166 if (off >= len+begin)
167 return 0; 161 return 0;
168 *start = page + (off-begin); 162 seq_printf(m, "%-10s %2d\n", ld->name ? ld->name : "???", i);
169 return ((count < begin+len-off) ? count : begin+len-off); 163 tty_ldisc_put(i);
164 return 0;
165}
166
167static const struct seq_operations tty_ldiscs_seq_ops = {
168 .start = tty_ldiscs_seq_start,
169 .next = tty_ldiscs_seq_next,
170 .stop = tty_ldiscs_seq_stop,
171 .show = tty_ldiscs_seq_show,
172};
173
174static int proc_tty_ldiscs_open(struct inode *inode, struct file *file)
175{
176 return seq_open(file, &tty_ldiscs_seq_ops);
170} 177}
171 178
179static const struct file_operations tty_ldiscs_proc_fops = {
180 .owner = THIS_MODULE,
181 .open = proc_tty_ldiscs_open,
182 .read = seq_read,
183 .llseek = seq_lseek,
184 .release = seq_release,
185};
186
172/* 187/*
173 * This function is called by tty_register_driver() to handle 188 * This function is called by tty_register_driver() to handle
174 * registering the driver's /proc handler into /proc/tty/driver/<foo> 189 * registering the driver's /proc handler into /proc/tty/driver/<foo>
@@ -177,16 +192,14 @@ void proc_tty_register_driver(struct tty_driver *driver)
177{ 192{
178 struct proc_dir_entry *ent; 193 struct proc_dir_entry *ent;
179 194
180 if ((!driver->read_proc && !driver->write_proc) || 195 if (!driver->ops->read_proc || !driver->driver_name ||
181 !driver->driver_name ||
182 driver->proc_entry) 196 driver->proc_entry)
183 return; 197 return;
184 198
185 ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver); 199 ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver);
186 if (!ent) 200 if (!ent)
187 return; 201 return;
188 ent->read_proc = driver->read_proc; 202 ent->read_proc = driver->ops->read_proc;
189 ent->write_proc = driver->write_proc;
190 ent->owner = driver->owner; 203 ent->owner = driver->owner;
191 ent->data = driver; 204 ent->data = driver;
192 205
@@ -214,7 +227,6 @@ void proc_tty_unregister_driver(struct tty_driver *driver)
214 */ 227 */
215void __init proc_tty_init(void) 228void __init proc_tty_init(void)
216{ 229{
217 struct proc_dir_entry *entry;
218 if (!proc_mkdir("tty", NULL)) 230 if (!proc_mkdir("tty", NULL))
219 return; 231 return;
220 proc_tty_ldisc = proc_mkdir("tty/ldisc", NULL); 232 proc_tty_ldisc = proc_mkdir("tty/ldisc", NULL);
@@ -224,10 +236,7 @@ void __init proc_tty_init(void)
224 * password lengths and inter-keystroke timings during password 236 * password lengths and inter-keystroke timings during password
225 * entry. 237 * entry.
226 */ 238 */
227 proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR | S_IXUSR, NULL); 239 proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR|S_IXUSR, NULL);
228 240 proc_create("tty/ldiscs", 0, NULL, &tty_ldiscs_proc_fops);
229 create_proc_read_entry("tty/ldiscs", 0, NULL, tty_ldiscs_read_proc, NULL); 241 proc_create("tty/drivers", 0, NULL, &proc_tty_drivers_operations);
230 entry = create_proc_entry("tty/drivers", 0, NULL);
231 if (entry)
232 entry->proc_fops = &proc_tty_drivers_operations;
233} 242}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index ef0fb57fc9ef..95117538a4f6 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -22,8 +22,6 @@
22 22
23#include "internal.h" 23#include "internal.h"
24 24
25struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
26
27static int proc_test_super(struct super_block *sb, void *data) 25static int proc_test_super(struct super_block *sb, void *data)
28{ 26{
29 return sb->s_fs_info == data; 27 return sb->s_fs_info == data;
@@ -126,8 +124,8 @@ void __init proc_root_init(void)
126#ifdef CONFIG_SYSVIPC 124#ifdef CONFIG_SYSVIPC
127 proc_mkdir("sysvipc", NULL); 125 proc_mkdir("sysvipc", NULL);
128#endif 126#endif
129 proc_root_fs = proc_mkdir("fs", NULL); 127 proc_mkdir("fs", NULL);
130 proc_root_driver = proc_mkdir("driver", NULL); 128 proc_mkdir("driver", NULL);
131 proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ 129 proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */
132#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) 130#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
133 /* just give it a mountpoint */ 131 /* just give it a mountpoint */
@@ -137,7 +135,7 @@ void __init proc_root_init(void)
137#ifdef CONFIG_PROC_DEVICETREE 135#ifdef CONFIG_PROC_DEVICETREE
138 proc_device_tree_init(); 136 proc_device_tree_init();
139#endif 137#endif
140 proc_bus = proc_mkdir("bus", NULL); 138 proc_mkdir("bus", NULL);
141 proc_sys_init(); 139 proc_sys_init();
142} 140}
143 141
@@ -232,9 +230,5 @@ void pid_ns_release_proc(struct pid_namespace *ns)
232EXPORT_SYMBOL(proc_symlink); 230EXPORT_SYMBOL(proc_symlink);
233EXPORT_SYMBOL(proc_mkdir); 231EXPORT_SYMBOL(proc_mkdir);
234EXPORT_SYMBOL(create_proc_entry); 232EXPORT_SYMBOL(create_proc_entry);
235EXPORT_SYMBOL(proc_create); 233EXPORT_SYMBOL(proc_create_data);
236EXPORT_SYMBOL(remove_proc_entry); 234EXPORT_SYMBOL(remove_proc_entry);
237EXPORT_SYMBOL(proc_root);
238EXPORT_SYMBOL(proc_root_fs);
239EXPORT_SYMBOL(proc_bus);
240EXPORT_SYMBOL(proc_root_driver);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7415eeb7cc3a..88717c0f941b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -5,11 +5,9 @@
5#include <linux/highmem.h> 5#include <linux/highmem.h>
6#include <linux/ptrace.h> 6#include <linux/ptrace.h>
7#include <linux/pagemap.h> 7#include <linux/pagemap.h>
8#include <linux/ptrace.h>
9#include <linux/mempolicy.h> 8#include <linux/mempolicy.h>
10#include <linux/swap.h> 9#include <linux/swap.h>
11#include <linux/swapops.h> 10#include <linux/swapops.h>
12#include <linux/seq_file.h>
13 11
14#include <asm/elf.h> 12#include <asm/elf.h>
15#include <asm/uaccess.h> 13#include <asm/uaccess.h>
@@ -75,40 +73,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
75 return mm->total_vm; 73 return mm->total_vm;
76} 74}
77 75
78int proc_exe_link(struct inode *inode, struct path *path)
79{
80 struct vm_area_struct * vma;
81 int result = -ENOENT;
82 struct task_struct *task = get_proc_task(inode);
83 struct mm_struct * mm = NULL;
84
85 if (task) {
86 mm = get_task_mm(task);
87 put_task_struct(task);
88 }
89 if (!mm)
90 goto out;
91 down_read(&mm->mmap_sem);
92
93 vma = mm->mmap;
94 while (vma) {
95 if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file)
96 break;
97 vma = vma->vm_next;
98 }
99
100 if (vma) {
101 *path = vma->vm_file->f_path;
102 path_get(&vma->vm_file->f_path);
103 result = 0;
104 }
105
106 up_read(&mm->mmap_sem);
107 mmput(mm);
108out:
109 return result;
110}
111
112static void pad_len_spaces(struct seq_file *m, int len) 76static void pad_len_spaces(struct seq_file *m, int len)
113{ 77{
114 len = 25 + sizeof(void*) * 6 - len; 78 len = 25 + sizeof(void*) * 6 - len;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 8011528518bd..4b4f9cc2f186 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -1,6 +1,7 @@
1 1
2#include <linux/mm.h> 2#include <linux/mm.h>
3#include <linux/file.h> 3#include <linux/file.h>
4#include <linux/fdtable.h>
4#include <linux/mount.h> 5#include <linux/mount.h>
5#include <linux/ptrace.h> 6#include <linux/ptrace.h>
6#include <linux/seq_file.h> 7#include <linux/seq_file.h>
@@ -103,40 +104,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
103 return size; 104 return size;
104} 105}
105 106
106int proc_exe_link(struct inode *inode, struct path *path)
107{
108 struct vm_list_struct *vml;
109 struct vm_area_struct *vma;
110 struct task_struct *task = get_proc_task(inode);
111 struct mm_struct *mm = get_task_mm(task);
112 int result = -ENOENT;
113
114 if (!mm)
115 goto out;
116 down_read(&mm->mmap_sem);
117
118 vml = mm->context.vmlist;
119 vma = NULL;
120 while (vml) {
121 if ((vml->vma->vm_flags & VM_EXECUTABLE) && vml->vma->vm_file) {
122 vma = vml->vma;
123 break;
124 }
125 vml = vml->next;
126 }
127
128 if (vma) {
129 *path = vma->vm_file->f_path;
130 path_get(&vma->vm_file->f_path);
131 result = 0;
132 }
133
134 up_read(&mm->mmap_sem);
135 mmput(mm);
136out:
137 return result;
138}
139
140/* 107/*
141 * display mapping lines for a particular process's /proc/pid/maps 108 * display mapping lines for a particular process's /proc/pid/maps
142 */ 109 */
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index 23b647f25d08..234ada903633 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -306,7 +306,7 @@ static uint find_free_dqentry(struct dquot *dquot, int *err)
306 printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk); 306 printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
307 goto out_buf; 307 goto out_buf;
308 } 308 }
309 dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1); 309 le16_add_cpu(&dh->dqdh_entries, 1);
310 memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk)); 310 memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
311 /* Find free structure in block */ 311 /* Find free structure in block */
312 for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++); 312 for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
@@ -448,7 +448,7 @@ static int free_dqentry(struct dquot *dquot, uint blk)
448 goto out_buf; 448 goto out_buf;
449 } 449 }
450 dh = (struct v2_disk_dqdbheader *)buf; 450 dh = (struct v2_disk_dqdbheader *)buf;
451 dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)-1); 451 le16_add_cpu(&dh->dqdh_entries, -1);
452 if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */ 452 if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */
453 if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 || 453 if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 ||
454 (ret = put_free_dqblk(sb, type, buf, blk)) < 0) { 454 (ret = put_free_dqblk(sb, type, buf, blk)) < 0) {
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index b41a514b0976..9590b9024300 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -26,6 +26,9 @@
26 26
27#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/ramfs.h>
30
31#include "internal.h"
29 32
30const struct address_space_operations ramfs_aops = { 33const struct address_space_operations ramfs_aops = {
31 .readpage = simple_readpage, 34 .readpage = simple_readpage,
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 8428d5b2711d..b13123424e49 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -44,7 +44,7 @@ static const struct inode_operations ramfs_dir_inode_operations;
44 44
45static struct backing_dev_info ramfs_backing_dev_info = { 45static struct backing_dev_info ramfs_backing_dev_info = {
46 .ra_pages = 0, /* No readahead */ 46 .ra_pages = 0, /* No readahead */
47 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | 47 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK |
48 BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | 48 BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY |
49 BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP, 49 BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP,
50}; 50};
diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h
index af7cc074a476..6b330639b51d 100644
--- a/fs/ramfs/internal.h
+++ b/fs/ramfs/internal.h
@@ -11,5 +11,4 @@
11 11
12 12
13extern const struct address_space_operations ramfs_aops; 13extern const struct address_space_operations ramfs_aops;
14extern const struct file_operations ramfs_file_operations;
15extern const struct inode_operations ramfs_file_inode_operations; 14extern const struct inode_operations ramfs_file_inode_operations;
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index da86042b3e03..e396b2fa4743 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2574,11 +2574,9 @@ static int release_journal_dev(struct super_block *super,
2574 2574
2575 result = 0; 2575 result = 0;
2576 2576
2577 if (journal->j_dev_file != NULL) { 2577 if (journal->j_dev_bd != NULL) {
2578 result = filp_close(journal->j_dev_file, NULL); 2578 if (journal->j_dev_bd->bd_dev != super->s_dev)
2579 journal->j_dev_file = NULL; 2579 bd_release(journal->j_dev_bd);
2580 journal->j_dev_bd = NULL;
2581 } else if (journal->j_dev_bd != NULL) {
2582 result = blkdev_put(journal->j_dev_bd); 2580 result = blkdev_put(journal->j_dev_bd);
2583 journal->j_dev_bd = NULL; 2581 journal->j_dev_bd = NULL;
2584 } 2582 }
@@ -2603,7 +2601,6 @@ static int journal_init_dev(struct super_block *super,
2603 result = 0; 2601 result = 0;
2604 2602
2605 journal->j_dev_bd = NULL; 2603 journal->j_dev_bd = NULL;
2606 journal->j_dev_file = NULL;
2607 jdev = SB_ONDISK_JOURNAL_DEVICE(super) ? 2604 jdev = SB_ONDISK_JOURNAL_DEVICE(super) ?
2608 new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; 2605 new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
2609 2606
@@ -2620,35 +2617,34 @@ static int journal_init_dev(struct super_block *super,
2620 "cannot init journal device '%s': %i", 2617 "cannot init journal device '%s': %i",
2621 __bdevname(jdev, b), result); 2618 __bdevname(jdev, b), result);
2622 return result; 2619 return result;
2623 } else if (jdev != super->s_dev) 2620 } else if (jdev != super->s_dev) {
2621 result = bd_claim(journal->j_dev_bd, journal);
2622 if (result) {
2623 blkdev_put(journal->j_dev_bd);
2624 return result;
2625 }
2626
2624 set_blocksize(journal->j_dev_bd, super->s_blocksize); 2627 set_blocksize(journal->j_dev_bd, super->s_blocksize);
2628 }
2629
2625 return 0; 2630 return 0;
2626 } 2631 }
2627 2632
2628 journal->j_dev_file = filp_open(jdev_name, 0, 0); 2633 journal->j_dev_bd = open_bdev_excl(jdev_name, 0, journal);
2629 if (!IS_ERR(journal->j_dev_file)) { 2634 if (IS_ERR(journal->j_dev_bd)) {
2630 struct inode *jdev_inode = journal->j_dev_file->f_mapping->host; 2635 result = PTR_ERR(journal->j_dev_bd);
2631 if (!S_ISBLK(jdev_inode->i_mode)) { 2636 journal->j_dev_bd = NULL;
2632 reiserfs_warning(super, "journal_init_dev: '%s' is "
2633 "not a block device", jdev_name);
2634 result = -ENOTBLK;
2635 release_journal_dev(super, journal);
2636 } else {
2637 /* ok */
2638 journal->j_dev_bd = I_BDEV(jdev_inode);
2639 set_blocksize(journal->j_dev_bd, super->s_blocksize);
2640 reiserfs_info(super,
2641 "journal_init_dev: journal device: %s\n",
2642 bdevname(journal->j_dev_bd, b));
2643 }
2644 } else {
2645 result = PTR_ERR(journal->j_dev_file);
2646 journal->j_dev_file = NULL;
2647 reiserfs_warning(super, 2637 reiserfs_warning(super,
2648 "journal_init_dev: Cannot open '%s': %i", 2638 "journal_init_dev: Cannot open '%s': %i",
2649 jdev_name, result); 2639 jdev_name, result);
2640 return result;
2650 } 2641 }
2651 return result; 2642
2643 set_blocksize(journal->j_dev_bd, super->s_blocksize);
2644 reiserfs_info(super,
2645 "journal_init_dev: journal device: %s\n",
2646 bdevname(journal->j_dev_bd, b));
2647 return 0;
2652} 2648}
2653 2649
2654/** 2650/**
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 8f86c52b30d8..b9dbeeca7049 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -467,6 +467,7 @@ static const struct file_operations r_file_operations = {
467 .read = seq_read, 467 .read = seq_read,
468 .llseek = seq_lseek, 468 .llseek = seq_lseek,
469 .release = seq_release, 469 .release = seq_release,
470 .owner = THIS_MODULE,
470}; 471};
471 472
472static struct proc_dir_entry *proc_info_root = NULL; 473static struct proc_dir_entry *proc_info_root = NULL;
@@ -475,12 +476,8 @@ static const char proc_info_root_name[] = "fs/reiserfs";
475static void add_file(struct super_block *sb, char *name, 476static void add_file(struct super_block *sb, char *name,
476 int (*func) (struct seq_file *, struct super_block *)) 477 int (*func) (struct seq_file *, struct super_block *))
477{ 478{
478 struct proc_dir_entry *de; 479 proc_create_data(name, 0, REISERFS_SB(sb)->procdir,
479 de = create_proc_entry(name, 0, REISERFS_SB(sb)->procdir); 480 &r_file_operations, func);
480 if (de) {
481 de->data = func;
482 de->proc_fops = &r_file_operations;
483 }
484} 481}
485 482
486int reiserfs_proc_info_init(struct super_block *sb) 483int reiserfs_proc_info_init(struct super_block *sb)
diff --git a/fs/select.c b/fs/select.c
index 00f58c5c7e05..8dda969614a9 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -21,6 +21,7 @@
21#include <linux/poll.h> 21#include <linux/poll.h>
22#include <linux/personality.h> /* for STICKY_TIMEOUTS */ 22#include <linux/personality.h> /* for STICKY_TIMEOUTS */
23#include <linux/file.h> 23#include <linux/file.h>
24#include <linux/fdtable.h>
24#include <linux/fs.h> 25#include <linux/fs.h>
25#include <linux/rcupdate.h> 26#include <linux/rcupdate.h>
26 27
@@ -298,7 +299,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
298#define MAX_SELECT_SECONDS \ 299#define MAX_SELECT_SECONDS \
299 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 300 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
300 301
301static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, 302int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
302 fd_set __user *exp, s64 *timeout) 303 fd_set __user *exp, s64 *timeout)
303{ 304{
304 fd_set_bits fds; 305 fd_set_bits fds;
@@ -425,7 +426,7 @@ sticky:
425 return ret; 426 return ret;
426} 427}
427 428
428#ifdef TIF_RESTORE_SIGMASK 429#ifdef HAVE_SET_RESTORE_SIGMASK
429asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, 430asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
430 fd_set __user *exp, struct timespec __user *tsp, 431 fd_set __user *exp, struct timespec __user *tsp,
431 const sigset_t __user *sigmask, size_t sigsetsize) 432 const sigset_t __user *sigmask, size_t sigsetsize)
@@ -498,7 +499,7 @@ sticky:
498 if (sigmask) { 499 if (sigmask) {
499 memcpy(&current->saved_sigmask, &sigsaved, 500 memcpy(&current->saved_sigmask, &sigsaved,
500 sizeof(sigsaved)); 501 sizeof(sigsaved));
501 set_thread_flag(TIF_RESTORE_SIGMASK); 502 set_restore_sigmask();
502 } 503 }
503 } else if (sigmask) 504 } else if (sigmask)
504 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 505 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
@@ -528,7 +529,7 @@ asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp,
528 529
529 return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize); 530 return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize);
530} 531}
531#endif /* TIF_RESTORE_SIGMASK */ 532#endif /* HAVE_SET_RESTORE_SIGMASK */
532 533
533struct poll_list { 534struct poll_list {
534 struct poll_list *next; 535 struct poll_list *next;
@@ -759,7 +760,7 @@ asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
759 return ret; 760 return ret;
760} 761}
761 762
762#ifdef TIF_RESTORE_SIGMASK 763#ifdef HAVE_SET_RESTORE_SIGMASK
763asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, 764asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
764 struct timespec __user *tsp, const sigset_t __user *sigmask, 765 struct timespec __user *tsp, const sigset_t __user *sigmask,
765 size_t sigsetsize) 766 size_t sigsetsize)
@@ -805,7 +806,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
805 if (sigmask) { 806 if (sigmask) {
806 memcpy(&current->saved_sigmask, &sigsaved, 807 memcpy(&current->saved_sigmask, &sigsaved,
807 sizeof(sigsaved)); 808 sizeof(sigsaved));
808 set_thread_flag(TIF_RESTORE_SIGMASK); 809 set_restore_sigmask();
809 } 810 }
810 ret = -ERESTARTNOHAND; 811 ret = -ERESTARTNOHAND;
811 } else if (sigmask) 812 } else if (sigmask)
@@ -839,4 +840,4 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
839 840
840 return ret; 841 return ret;
841} 842}
842#endif /* TIF_RESTORE_SIGMASK */ 843#endif /* HAVE_SET_RESTORE_SIGMASK */
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 8ead0db35933..619725644c75 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -207,11 +207,8 @@ static const struct file_operations signalfd_fops = {
207 207
208asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) 208asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask)
209{ 209{
210 int error;
211 sigset_t sigmask; 210 sigset_t sigmask;
212 struct signalfd_ctx *ctx; 211 struct signalfd_ctx *ctx;
213 struct file *file;
214 struct inode *inode;
215 212
216 if (sizemask != sizeof(sigset_t) || 213 if (sizemask != sizeof(sigset_t) ||
217 copy_from_user(&sigmask, user_mask, sizeof(sigmask))) 214 copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
@@ -230,12 +227,11 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
230 * When we call this, the initialization must be complete, since 227 * When we call this, the initialization must be complete, since
231 * anon_inode_getfd() will install the fd. 228 * anon_inode_getfd() will install the fd.
232 */ 229 */
233 error = anon_inode_getfd(&ufd, &inode, &file, "[signalfd]", 230 ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx);
234 &signalfd_fops, ctx); 231 if (ufd < 0)
235 if (error) 232 kfree(ctx);
236 goto err_fdalloc;
237 } else { 233 } else {
238 file = fget(ufd); 234 struct file *file = fget(ufd);
239 if (!file) 235 if (!file)
240 return -EBADF; 236 return -EBADF;
241 ctx = file->private_data; 237 ctx = file->private_data;
@@ -252,9 +248,4 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
252 } 248 }
253 249
254 return ufd; 250 return ufd;
255
256err_fdalloc:
257 kfree(ctx);
258 return error;
259} 251}
260
diff --git a/fs/smbfs/smb_debug.h b/fs/smbfs/smb_debug.h
index 734972b92694..fc4b1a5dd755 100644
--- a/fs/smbfs/smb_debug.h
+++ b/fs/smbfs/smb_debug.h
@@ -11,14 +11,14 @@
11 * these are normally enabled. 11 * these are normally enabled.
12 */ 12 */
13#ifdef SMBFS_PARANOIA 13#ifdef SMBFS_PARANOIA
14# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __FUNCTION__ , ## a) 14# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __func__ , ## a)
15#else 15#else
16# define PARANOIA(f, a...) do { ; } while(0) 16# define PARANOIA(f, a...) do { ; } while(0)
17#endif 17#endif
18 18
19/* lots of debug messages */ 19/* lots of debug messages */
20#ifdef SMBFS_DEBUG_VERBOSE 20#ifdef SMBFS_DEBUG_VERBOSE
21# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __FUNCTION__ , ## a) 21# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
22#else 22#else
23# define VERBOSE(f, a...) do { ; } while(0) 23# define VERBOSE(f, a...) do { ; } while(0)
24#endif 24#endif
@@ -28,7 +28,7 @@
28 * too common name. 28 * too common name.
29 */ 29 */
30#ifdef SMBFS_DEBUG 30#ifdef SMBFS_DEBUG
31#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __FUNCTION__ , ## a) 31#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
32#else 32#else
33#define DEBUG1(f, a...) do { ; } while(0) 33#define DEBUG1(f, a...) do { ; } while(0)
34#endif 34#endif
diff --git a/fs/splice.c b/fs/splice.c
index eeb1a86a7014..78150038b584 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -811,24 +811,19 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
811{ 811{
812 struct address_space *mapping = out->f_mapping; 812 struct address_space *mapping = out->f_mapping;
813 struct inode *inode = mapping->host; 813 struct inode *inode = mapping->host;
814 int killsuid, killpriv; 814 struct splice_desc sd = {
815 .total_len = len,
816 .flags = flags,
817 .pos = *ppos,
818 .u.file = out,
819 };
815 ssize_t ret; 820 ssize_t ret;
816 int err = 0;
817
818 killpriv = security_inode_need_killpriv(out->f_path.dentry);
819 killsuid = should_remove_suid(out->f_path.dentry);
820 if (unlikely(killsuid || killpriv)) {
821 mutex_lock(&inode->i_mutex);
822 if (killpriv)
823 err = security_inode_killpriv(out->f_path.dentry);
824 if (!err && killsuid)
825 err = __remove_suid(out->f_path.dentry, killsuid);
826 mutex_unlock(&inode->i_mutex);
827 if (err)
828 return err;
829 }
830 821
831 ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); 822 inode_double_lock(inode, pipe->inode);
823 ret = remove_suid(out->f_path.dentry);
824 if (likely(!ret))
825 ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
826 inode_double_unlock(inode, pipe->inode);
832 if (ret > 0) { 827 if (ret > 0) {
833 unsigned long nr_pages; 828 unsigned long nr_pages;
834 829
@@ -840,6 +835,8 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
840 * sync it. 835 * sync it.
841 */ 836 */
842 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { 837 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
838 int err;
839
843 mutex_lock(&inode->i_mutex); 840 mutex_lock(&inode->i_mutex);
844 err = generic_osync_inode(inode, mapping, 841 err = generic_osync_inode(inode, mapping,
845 OSYNC_METADATA|OSYNC_DATA); 842 OSYNC_METADATA|OSYNC_DATA);
diff --git a/fs/super.c b/fs/super.c
index a5a4aca7e22f..453877c5697b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -117,7 +117,7 @@ static inline void destroy_super(struct super_block *s)
117 * Drop a superblock's refcount. Returns non-zero if the superblock was 117 * Drop a superblock's refcount. Returns non-zero if the superblock was
118 * destroyed. The caller must hold sb_lock. 118 * destroyed. The caller must hold sb_lock.
119 */ 119 */
120int __put_super(struct super_block *sb) 120static int __put_super(struct super_block *sb)
121{ 121{
122 int ret = 0; 122 int ret = 0;
123 123
diff --git a/fs/sync.c b/fs/sync.c
index 7cd005ea7639..228e17b5e9ee 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -64,7 +64,7 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
64 /* sync the superblock to buffers */ 64 /* sync the superblock to buffers */
65 sb = inode->i_sb; 65 sb = inode->i_sb;
66 lock_super(sb); 66 lock_super(sb);
67 if (sb->s_op->write_super) 67 if (sb->s_dirt && sb->s_op->write_super)
68 sb->s_op->write_super(sb); 68 sb->s_op->write_super(sb);
69 unlock_super(sb); 69 unlock_super(sb);
70 70
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index dbdfabbfd609..e7735f643cd1 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -135,7 +135,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
135 goto out; 135 goto out;
136 } 136 }
137 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", 137 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
138 __FUNCTION__, count, *ppos, buffer->page); 138 __func__, count, *ppos, buffer->page);
139 retval = simple_read_from_buffer(buf, count, ppos, buffer->page, 139 retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
140 buffer->count); 140 buffer->count);
141out: 141out:
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index d9262f74f94e..eb53c632f856 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -30,7 +30,7 @@ static const struct address_space_operations sysfs_aops = {
30 30
31static struct backing_dev_info sysfs_backing_dev_info = { 31static struct backing_dev_info sysfs_backing_dev_info = {
32 .ra_pages = 0, /* No readahead */ 32 .ra_pages = 0, /* No readahead */
33 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 33 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
34}; 34};
35 35
36static const struct inode_operations sysfs_inode_operations ={ 36static const struct inode_operations sysfs_inode_operations ={
@@ -59,6 +59,8 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
59 if (error) 59 if (error)
60 return error; 60 return error;
61 61
62 iattr->ia_valid &= ~ATTR_SIZE; /* ignore size changes */
63
62 error = inode_setattr(inode, iattr); 64 error = inode_setattr(inode, iattr);
63 if (error) 65 if (error)
64 return error; 66 return error;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 74168266cd59..14f0023984d7 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -61,7 +61,7 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
61 /* instantiate and link root dentry */ 61 /* instantiate and link root dentry */
62 root = d_alloc_root(inode); 62 root = d_alloc_root(inode);
63 if (!root) { 63 if (!root) {
64 pr_debug("%s: could not get root dentry!\n",__FUNCTION__); 64 pr_debug("%s: could not get root dentry!\n",__func__);
65 iput(inode); 65 iput(inode);
66 return -ENOMEM; 66 return -ENOMEM;
67 } 67 }
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 42d51d1c05cd..38ebe3f85b3d 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -217,9 +217,9 @@ static inline __fs32 fs32_add(struct sysv_sb_info *sbi, __fs32 *n, int d)
217 if (sbi->s_bytesex == BYTESEX_PDP) 217 if (sbi->s_bytesex == BYTESEX_PDP)
218 *(__u32*)n = PDP_swab(PDP_swab(*(__u32*)n)+d); 218 *(__u32*)n = PDP_swab(PDP_swab(*(__u32*)n)+d);
219 else if (sbi->s_bytesex == BYTESEX_LE) 219 else if (sbi->s_bytesex == BYTESEX_LE)
220 *(__le32*)n = cpu_to_le32(le32_to_cpu(*(__le32*)n)+d); 220 le32_add_cpu((__le32 *)n, d);
221 else 221 else
222 *(__be32*)n = cpu_to_be32(be32_to_cpu(*(__be32*)n)+d); 222 be32_add_cpu((__be32 *)n, d);
223 return *n; 223 return *n;
224} 224}
225 225
@@ -242,9 +242,9 @@ static inline __fs16 cpu_to_fs16(struct sysv_sb_info *sbi, __u16 n)
242static inline __fs16 fs16_add(struct sysv_sb_info *sbi, __fs16 *n, int d) 242static inline __fs16 fs16_add(struct sysv_sb_info *sbi, __fs16 *n, int d)
243{ 243{
244 if (sbi->s_bytesex != BYTESEX_BE) 244 if (sbi->s_bytesex != BYTESEX_BE)
245 *(__le16*)n = cpu_to_le16(le16_to_cpu(*(__le16 *)n)+d); 245 le16_add_cpu((__le16 *)n, d);
246 else 246 else
247 *(__be16*)n = cpu_to_be16(be16_to_cpu(*(__be16 *)n)+d); 247 be16_add_cpu((__be16 *)n, d);
248 return *n; 248 return *n;
249} 249}
250 250
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 10c80b59ec4b..d87d354ec424 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -20,6 +20,7 @@
20#include <linux/hrtimer.h> 20#include <linux/hrtimer.h>
21#include <linux/anon_inodes.h> 21#include <linux/anon_inodes.h>
22#include <linux/timerfd.h> 22#include <linux/timerfd.h>
23#include <linux/syscalls.h>
23 24
24struct timerfd_ctx { 25struct timerfd_ctx {
25 struct hrtimer tmr; 26 struct hrtimer tmr;
@@ -180,10 +181,8 @@ static struct file *timerfd_fget(int fd)
180 181
181asmlinkage long sys_timerfd_create(int clockid, int flags) 182asmlinkage long sys_timerfd_create(int clockid, int flags)
182{ 183{
183 int error, ufd; 184 int ufd;
184 struct timerfd_ctx *ctx; 185 struct timerfd_ctx *ctx;
185 struct file *file;
186 struct inode *inode;
187 186
188 if (flags) 187 if (flags)
189 return -EINVAL; 188 return -EINVAL;
@@ -199,12 +198,9 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
199 ctx->clockid = clockid; 198 ctx->clockid = clockid;
200 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); 199 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
201 200
202 error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]", 201 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx);
203 &timerfd_fops, ctx); 202 if (ufd < 0)
204 if (error) {
205 kfree(ctx); 203 kfree(ctx);
206 return error;
207 }
208 204
209 return ufd; 205 return ufd;
210} 206}
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 2b34c8ca6c83..d3231947db19 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -32,6 +32,7 @@
32#include <linux/buffer_head.h> 32#include <linux/buffer_head.h>
33#include <linux/sched.h> 33#include <linux/sched.h>
34#include <linux/crc-itu-t.h> 34#include <linux/crc-itu-t.h>
35#include <linux/exportfs.h>
35 36
36static inline int udf_match(int len1, const char *name1, int len2, 37static inline int udf_match(int len1, const char *name1, int len2,
37 const char *name2) 38 const char *name2)
@@ -158,6 +159,8 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
158 sector_t offset; 159 sector_t offset;
159 struct extent_position epos = {}; 160 struct extent_position epos = {};
160 struct udf_inode_info *dinfo = UDF_I(dir); 161 struct udf_inode_info *dinfo = UDF_I(dir);
162 int isdotdot = dentry->d_name.len == 2 &&
163 dentry->d_name.name[0] == '.' && dentry->d_name.name[1] == '.';
161 164
162 size = udf_ext0_offset(dir) + dir->i_size; 165 size = udf_ext0_offset(dir) + dir->i_size;
163 f_pos = udf_ext0_offset(dir); 166 f_pos = udf_ext0_offset(dir);
@@ -225,6 +228,12 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
225 continue; 228 continue;
226 } 229 }
227 230
231 if ((cfi->fileCharacteristics & FID_FILE_CHAR_PARENT) &&
232 isdotdot) {
233 brelse(epos.bh);
234 return fi;
235 }
236
228 if (!lfi) 237 if (!lfi)
229 continue; 238 continue;
230 239
@@ -286,9 +295,8 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
286 } 295 }
287 } 296 }
288 unlock_kernel(); 297 unlock_kernel();
289 d_add(dentry, inode);
290 298
291 return NULL; 299 return d_splice_alias(inode, dentry);
292} 300}
293 301
294static struct fileIdentDesc *udf_add_entry(struct inode *dir, 302static struct fileIdentDesc *udf_add_entry(struct inode *dir,
@@ -307,7 +315,7 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
307 uint16_t liu; 315 uint16_t liu;
308 int block; 316 int block;
309 kernel_lb_addr eloc; 317 kernel_lb_addr eloc;
310 uint32_t elen; 318 uint32_t elen = 0;
311 sector_t offset; 319 sector_t offset;
312 struct extent_position epos = {}; 320 struct extent_position epos = {};
313 struct udf_inode_info *dinfo; 321 struct udf_inode_info *dinfo;
@@ -398,7 +406,8 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
398 } 406 }
399 407
400add: 408add:
401 if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { 409 /* Is there any extent whose size we need to round up? */
410 if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && elen) {
402 elen = (elen + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1); 411 elen = (elen + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1);
403 if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) 412 if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
404 epos.offset -= sizeof(short_ad); 413 epos.offset -= sizeof(short_ad);
@@ -1232,6 +1241,134 @@ end_rename:
1232 return retval; 1241 return retval;
1233} 1242}
1234 1243
1244static struct dentry *udf_get_parent(struct dentry *child)
1245{
1246 struct dentry *parent;
1247 struct inode *inode = NULL;
1248 struct dentry dotdot;
1249 struct fileIdentDesc cfi;
1250 struct udf_fileident_bh fibh;
1251
1252 dotdot.d_name.name = "..";
1253 dotdot.d_name.len = 2;
1254
1255 lock_kernel();
1256 if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi))
1257 goto out_unlock;
1258
1259 if (fibh.sbh != fibh.ebh)
1260 brelse(fibh.ebh);
1261 brelse(fibh.sbh);
1262
1263 inode = udf_iget(child->d_inode->i_sb,
1264 lelb_to_cpu(cfi.icb.extLocation));
1265 if (!inode)
1266 goto out_unlock;
1267 unlock_kernel();
1268
1269 parent = d_alloc_anon(inode);
1270 if (!parent) {
1271 iput(inode);
1272 parent = ERR_PTR(-ENOMEM);
1273 }
1274
1275 return parent;
1276out_unlock:
1277 unlock_kernel();
1278 return ERR_PTR(-EACCES);
1279}
1280
1281
1282static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block,
1283 u16 partref, __u32 generation)
1284{
1285 struct inode *inode;
1286 struct dentry *result;
1287 kernel_lb_addr loc;
1288
1289 if (block == 0)
1290 return ERR_PTR(-ESTALE);
1291
1292 loc.logicalBlockNum = block;
1293 loc.partitionReferenceNum = partref;
1294 inode = udf_iget(sb, loc);
1295
1296 if (inode == NULL)
1297 return ERR_PTR(-ENOMEM);
1298
1299 if (generation && inode->i_generation != generation) {
1300 iput(inode);
1301 return ERR_PTR(-ESTALE);
1302 }
1303 result = d_alloc_anon(inode);
1304 if (!result) {
1305 iput(inode);
1306 return ERR_PTR(-ENOMEM);
1307 }
1308 return result;
1309}
1310
1311static struct dentry *udf_fh_to_dentry(struct super_block *sb,
1312 struct fid *fid, int fh_len, int fh_type)
1313{
1314 if ((fh_len != 3 && fh_len != 5) ||
1315 (fh_type != FILEID_UDF_WITH_PARENT &&
1316 fh_type != FILEID_UDF_WITHOUT_PARENT))
1317 return NULL;
1318
1319 return udf_nfs_get_inode(sb, fid->udf.block, fid->udf.partref,
1320 fid->udf.generation);
1321}
1322
1323static struct dentry *udf_fh_to_parent(struct super_block *sb,
1324 struct fid *fid, int fh_len, int fh_type)
1325{
1326 if (fh_len != 5 || fh_type != FILEID_UDF_WITH_PARENT)
1327 return NULL;
1328
1329 return udf_nfs_get_inode(sb, fid->udf.parent_block,
1330 fid->udf.parent_partref,
1331 fid->udf.parent_generation);
1332}
1333static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
1334 int connectable)
1335{
1336 int len = *lenp;
1337 struct inode *inode = de->d_inode;
1338 kernel_lb_addr location = UDF_I(inode)->i_location;
1339 struct fid *fid = (struct fid *)fh;
1340 int type = FILEID_UDF_WITHOUT_PARENT;
1341
1342 if (len < 3 || (connectable && len < 5))
1343 return 255;
1344
1345 *lenp = 3;
1346 fid->udf.block = location.logicalBlockNum;
1347 fid->udf.partref = location.partitionReferenceNum;
1348 fid->udf.generation = inode->i_generation;
1349
1350 if (connectable && !S_ISDIR(inode->i_mode)) {
1351 spin_lock(&de->d_lock);
1352 inode = de->d_parent->d_inode;
1353 location = UDF_I(inode)->i_location;
1354 fid->udf.parent_block = location.logicalBlockNum;
1355 fid->udf.parent_partref = location.partitionReferenceNum;
1356 fid->udf.parent_generation = inode->i_generation;
1357 spin_unlock(&de->d_lock);
1358 *lenp = 5;
1359 type = FILEID_UDF_WITH_PARENT;
1360 }
1361
1362 return type;
1363}
1364
1365const struct export_operations udf_export_ops = {
1366 .encode_fh = udf_encode_fh,
1367 .fh_to_dentry = udf_fh_to_dentry,
1368 .fh_to_parent = udf_fh_to_parent,
1369 .get_parent = udf_get_parent,
1370};
1371
1235const struct inode_operations udf_dir_inode_operations = { 1372const struct inode_operations udf_dir_inode_operations = {
1236 .lookup = udf_lookup, 1373 .lookup = udf_lookup,
1237 .create = udf_create, 1374 .create = udf_create,
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index 63610f026ae1..96dfd207c3d6 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -27,8 +27,8 @@
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/buffer_head.h> 28#include <linux/buffer_head.h>
29 29
30inline uint32_t udf_get_pblock(struct super_block *sb, uint32_t block, 30uint32_t udf_get_pblock(struct super_block *sb, uint32_t block,
31 uint16_t partition, uint32_t offset) 31 uint16_t partition, uint32_t offset)
32{ 32{
33 struct udf_sb_info *sbi = UDF_SB(sb); 33 struct udf_sb_info *sbi = UDF_SB(sb);
34 struct udf_part_map *map; 34 struct udf_part_map *map;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index b564fc140fe4..7a5f69be6ac2 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -240,7 +240,7 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count)
240 sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map), 240 sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map),
241 GFP_KERNEL); 241 GFP_KERNEL);
242 if (!sbi->s_partmaps) { 242 if (!sbi->s_partmaps) {
243 udf_error(sb, __FUNCTION__, 243 udf_error(sb, __func__,
244 "Unable to allocate space for %d partition maps", 244 "Unable to allocate space for %d partition maps",
245 count); 245 count);
246 sbi->s_partitions = 0; 246 sbi->s_partitions = 0;
@@ -1086,7 +1086,7 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
1086 bitmap = vmalloc(size); /* TODO: get rid of vmalloc */ 1086 bitmap = vmalloc(size); /* TODO: get rid of vmalloc */
1087 1087
1088 if (bitmap == NULL) { 1088 if (bitmap == NULL) {
1089 udf_error(sb, __FUNCTION__, 1089 udf_error(sb, __func__,
1090 "Unable to allocate space for bitmap " 1090 "Unable to allocate space for bitmap "
1091 "and %d buffer_head pointers", nr_groups); 1091 "and %d buffer_head pointers", nr_groups);
1092 return NULL; 1092 return NULL;
@@ -1933,6 +1933,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1933 1933
1934 /* Fill in the rest of the superblock */ 1934 /* Fill in the rest of the superblock */
1935 sb->s_op = &udf_sb_ops; 1935 sb->s_op = &udf_sb_ops;
1936 sb->s_export_op = &udf_export_ops;
1936 sb->dq_op = NULL; 1937 sb->dq_op = NULL;
1937 sb->s_dirt = 0; 1938 sb->s_dirt = 0;
1938 sb->s_magic = UDF_SUPER_MAGIC; 1939 sb->s_magic = UDF_SUPER_MAGIC;
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index f3f45d029277..8fa9c2d70911 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -73,6 +73,7 @@ struct task_struct;
73struct buffer_head; 73struct buffer_head;
74struct super_block; 74struct super_block;
75 75
76extern const struct export_operations udf_export_ops;
76extern const struct inode_operations udf_dir_inode_operations; 77extern const struct inode_operations udf_dir_inode_operations;
77extern const struct file_operations udf_dir_operations; 78extern const struct file_operations udf_dir_operations;
78extern const struct inode_operations udf_file_inode_operations; 79extern const struct inode_operations udf_file_inode_operations;
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 244a1aaa940e..11c035168ea6 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -107,7 +107,6 @@ extern struct inode * ufs_new_inode (struct inode *, int);
107 107
108/* inode.c */ 108/* inode.c */
109extern struct inode *ufs_iget(struct super_block *, unsigned long); 109extern struct inode *ufs_iget(struct super_block *, unsigned long);
110extern void ufs_put_inode (struct inode *);
111extern int ufs_write_inode (struct inode *, int); 110extern int ufs_write_inode (struct inode *, int);
112extern int ufs_sync_inode (struct inode *); 111extern int ufs_sync_inode (struct inode *);
113extern void ufs_delete_inode (struct inode *); 112extern void ufs_delete_inode (struct inode *);
diff --git a/fs/utimes.c b/fs/utimes.c
index a2bef77dc9c9..af059d5cb485 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -40,9 +40,14 @@ asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times)
40 40
41#endif 41#endif
42 42
43static bool nsec_special(long nsec)
44{
45 return nsec == UTIME_OMIT || nsec == UTIME_NOW;
46}
47
43static bool nsec_valid(long nsec) 48static bool nsec_valid(long nsec)
44{ 49{
45 if (nsec == UTIME_OMIT || nsec == UTIME_NOW) 50 if (nsec_special(nsec))
46 return true; 51 return true;
47 52
48 return nsec >= 0 && nsec <= 999999999; 53 return nsec >= 0 && nsec <= 999999999;
@@ -119,7 +124,15 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
119 newattrs.ia_mtime.tv_nsec = times[1].tv_nsec; 124 newattrs.ia_mtime.tv_nsec = times[1].tv_nsec;
120 newattrs.ia_valid |= ATTR_MTIME_SET; 125 newattrs.ia_valid |= ATTR_MTIME_SET;
121 } 126 }
122 } else { 127 }
128
129 /*
130 * If times is NULL or both times are either UTIME_OMIT or
131 * UTIME_NOW, then need to check permissions, because
132 * inode_change_ok() won't do it.
133 */
134 if (!times || (nsec_special(times[0].tv_nsec) &&
135 nsec_special(times[1].tv_nsec))) {
123 error = -EACCES; 136 error = -EACCES;
124 if (IS_IMMUTABLE(inode)) 137 if (IS_IMMUTABLE(inode))
125 goto mnt_drop_write_and_out; 138 goto mnt_drop_write_and_out;
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index 5b66162d0747..a3522727ea5b 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -986,7 +986,7 @@ error_inode:
986 if (corrupt < 0) { 986 if (corrupt < 0) {
987 fat_fs_panic(new_dir->i_sb, 987 fat_fs_panic(new_dir->i_sb,
988 "%s: Filesystem corrupted (i_pos %lld)", 988 "%s: Filesystem corrupted (i_pos %lld)",
989 __FUNCTION__, sinfo.i_pos); 989 __func__, sinfo.i_pos);
990 } 990 }
991 goto out; 991 goto out;
992} 992}
diff --git a/fs/xattr.c b/fs/xattr.c
index 89a942f07e1b..4706a8b1f495 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -67,7 +67,7 @@ xattr_permission(struct inode *inode, const char *name, int mask)
67} 67}
68 68
69int 69int
70vfs_setxattr(struct dentry *dentry, char *name, void *value, 70vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
71 size_t size, int flags) 71 size_t size, int flags)
72{ 72{
73 struct inode *inode = dentry->d_inode; 73 struct inode *inode = dentry->d_inode;
@@ -131,7 +131,7 @@ out_noalloc:
131EXPORT_SYMBOL_GPL(xattr_getsecurity); 131EXPORT_SYMBOL_GPL(xattr_getsecurity);
132 132
133ssize_t 133ssize_t
134vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) 134vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size)
135{ 135{
136 struct inode *inode = dentry->d_inode; 136 struct inode *inode = dentry->d_inode;
137 int error; 137 int error;
@@ -187,7 +187,7 @@ vfs_listxattr(struct dentry *d, char *list, size_t size)
187EXPORT_SYMBOL_GPL(vfs_listxattr); 187EXPORT_SYMBOL_GPL(vfs_listxattr);
188 188
189int 189int
190vfs_removexattr(struct dentry *dentry, char *name) 190vfs_removexattr(struct dentry *dentry, const char *name)
191{ 191{
192 struct inode *inode = dentry->d_inode; 192 struct inode *inode = dentry->d_inode;
193 int error; 193 int error;
@@ -218,7 +218,7 @@ EXPORT_SYMBOL_GPL(vfs_removexattr);
218 * Extended attribute SET operations 218 * Extended attribute SET operations
219 */ 219 */
220static long 220static long
221setxattr(struct dentry *d, char __user *name, void __user *value, 221setxattr(struct dentry *d, const char __user *name, const void __user *value,
222 size_t size, int flags) 222 size_t size, int flags)
223{ 223{
224 int error; 224 int error;
@@ -252,8 +252,8 @@ setxattr(struct dentry *d, char __user *name, void __user *value,
252} 252}
253 253
254asmlinkage long 254asmlinkage long
255sys_setxattr(char __user *path, char __user *name, void __user *value, 255sys_setxattr(const char __user *path, const char __user *name,
256 size_t size, int flags) 256 const void __user *value, size_t size, int flags)
257{ 257{
258 struct nameidata nd; 258 struct nameidata nd;
259 int error; 259 int error;
@@ -271,8 +271,8 @@ sys_setxattr(char __user *path, char __user *name, void __user *value,
271} 271}
272 272
273asmlinkage long 273asmlinkage long
274sys_lsetxattr(char __user *path, char __user *name, void __user *value, 274sys_lsetxattr(const char __user *path, const char __user *name,
275 size_t size, int flags) 275 const void __user *value, size_t size, int flags)
276{ 276{
277 struct nameidata nd; 277 struct nameidata nd;
278 int error; 278 int error;
@@ -290,7 +290,7 @@ sys_lsetxattr(char __user *path, char __user *name, void __user *value,
290} 290}
291 291
292asmlinkage long 292asmlinkage long
293sys_fsetxattr(int fd, char __user *name, void __user *value, 293sys_fsetxattr(int fd, const char __user *name, const void __user *value,
294 size_t size, int flags) 294 size_t size, int flags)
295{ 295{
296 struct file *f; 296 struct file *f;
@@ -315,7 +315,8 @@ sys_fsetxattr(int fd, char __user *name, void __user *value,
315 * Extended attribute GET operations 315 * Extended attribute GET operations
316 */ 316 */
317static ssize_t 317static ssize_t
318getxattr(struct dentry *d, char __user *name, void __user *value, size_t size) 318getxattr(struct dentry *d, const char __user *name, void __user *value,
319 size_t size)
319{ 320{
320 ssize_t error; 321 ssize_t error;
321 void *kvalue = NULL; 322 void *kvalue = NULL;
@@ -349,8 +350,8 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size)
349} 350}
350 351
351asmlinkage ssize_t 352asmlinkage ssize_t
352sys_getxattr(char __user *path, char __user *name, void __user *value, 353sys_getxattr(const char __user *path, const char __user *name,
353 size_t size) 354 void __user *value, size_t size)
354{ 355{
355 struct nameidata nd; 356 struct nameidata nd;
356 ssize_t error; 357 ssize_t error;
@@ -364,7 +365,7 @@ sys_getxattr(char __user *path, char __user *name, void __user *value,
364} 365}
365 366
366asmlinkage ssize_t 367asmlinkage ssize_t
367sys_lgetxattr(char __user *path, char __user *name, void __user *value, 368sys_lgetxattr(const char __user *path, const char __user *name, void __user *value,
368 size_t size) 369 size_t size)
369{ 370{
370 struct nameidata nd; 371 struct nameidata nd;
@@ -379,7 +380,7 @@ sys_lgetxattr(char __user *path, char __user *name, void __user *value,
379} 380}
380 381
381asmlinkage ssize_t 382asmlinkage ssize_t
382sys_fgetxattr(int fd, char __user *name, void __user *value, size_t size) 383sys_fgetxattr(int fd, const char __user *name, void __user *value, size_t size)
383{ 384{
384 struct file *f; 385 struct file *f;
385 ssize_t error = -EBADF; 386 ssize_t error = -EBADF;
@@ -424,7 +425,7 @@ listxattr(struct dentry *d, char __user *list, size_t size)
424} 425}
425 426
426asmlinkage ssize_t 427asmlinkage ssize_t
427sys_listxattr(char __user *path, char __user *list, size_t size) 428sys_listxattr(const char __user *path, char __user *list, size_t size)
428{ 429{
429 struct nameidata nd; 430 struct nameidata nd;
430 ssize_t error; 431 ssize_t error;
@@ -438,7 +439,7 @@ sys_listxattr(char __user *path, char __user *list, size_t size)
438} 439}
439 440
440asmlinkage ssize_t 441asmlinkage ssize_t
441sys_llistxattr(char __user *path, char __user *list, size_t size) 442sys_llistxattr(const char __user *path, char __user *list, size_t size)
442{ 443{
443 struct nameidata nd; 444 struct nameidata nd;
444 ssize_t error; 445 ssize_t error;
@@ -470,7 +471,7 @@ sys_flistxattr(int fd, char __user *list, size_t size)
470 * Extended attribute REMOVE operations 471 * Extended attribute REMOVE operations
471 */ 472 */
472static long 473static long
473removexattr(struct dentry *d, char __user *name) 474removexattr(struct dentry *d, const char __user *name)
474{ 475{
475 int error; 476 int error;
476 char kname[XATTR_NAME_MAX + 1]; 477 char kname[XATTR_NAME_MAX + 1];
@@ -485,7 +486,7 @@ removexattr(struct dentry *d, char __user *name)
485} 486}
486 487
487asmlinkage long 488asmlinkage long
488sys_removexattr(char __user *path, char __user *name) 489sys_removexattr(const char __user *path, const char __user *name)
489{ 490{
490 struct nameidata nd; 491 struct nameidata nd;
491 int error; 492 int error;
@@ -503,7 +504,7 @@ sys_removexattr(char __user *path, char __user *name)
503} 504}
504 505
505asmlinkage long 506asmlinkage long
506sys_lremovexattr(char __user *path, char __user *name) 507sys_lremovexattr(const char __user *path, const char __user *name)
507{ 508{
508 struct nameidata nd; 509 struct nameidata nd;
509 int error; 510 int error;
@@ -521,7 +522,7 @@ sys_lremovexattr(char __user *path, char __user *name)
521} 522}
522 523
523asmlinkage long 524asmlinkage long
524sys_fremovexattr(int fd, char __user *name) 525sys_fremovexattr(int fd, const char __user *name)
525{ 526{
526 struct file *f; 527 struct file *f;
527 struct dentry *dentry; 528 struct dentry *dentry;