aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-11-10 03:10:44 -0500
committerIngo Molnar <mingo@elte.hu>2008-11-10 03:10:44 -0500
commita5a64498c194c82ecad3a2d67cff6231cda8d3dd (patch)
tree723d5d81419f9960b8d30ed9a2ece8a58d6c4328 /fs
parentbb93d802ae5c1949977cc6da247b218240677f11 (diff)
parentf7160c7573615ec82c691e294cf80d920b5d588d (diff)
Merge commit 'v2.6.28-rc4' into timers/rtc
Conflicts: drivers/rtc/rtc-cmos.c
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c4
-rw-r--r--fs/9p/v9fs_vfs.h6
-rw-r--r--fs/9p/vfs_addr.c5
-rw-r--r--fs/9p/vfs_dir.c60
-rw-r--r--fs/9p/vfs_file.c93
-rw-r--r--fs/9p/vfs_inode.c39
-rw-r--r--fs/9p/vfs_super.c6
-rw-r--r--fs/Kconfig614
-rw-r--r--fs/Kconfig.binfmt22
-rw-r--r--fs/Makefile2
-rw-r--r--fs/afs/dir.c1
-rw-r--r--fs/attr.c10
-rw-r--r--fs/autofs4/dev-ioctl.c5
-rw-r--r--fs/autofs4/expire.c19
-rw-r--r--fs/bfs/dir.c1
-rw-r--r--fs/binfmt_elf.c31
-rw-r--r--fs/binfmt_elf_fdpic.c19
-rw-r--r--fs/block_dev.c148
-rw-r--r--fs/buffer.c3
-rw-r--r--fs/char_dev.c21
-rw-r--r--fs/cifs/CHANGES13
-rw-r--r--fs/cifs/Kconfig142
-rw-r--r--fs/cifs/README19
-rw-r--r--fs/cifs/cifsfs.c52
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h4
-rw-r--r--fs/cifs/cifsproto.h2
-rw-r--r--fs/cifs/cifssmb.c4
-rw-r--r--fs/cifs/connect.c144
-rw-r--r--fs/cifs/file.c6
-rw-r--r--fs/cifs/inode.c213
-rw-r--r--fs/cifs/readdir.c3
-rw-r--r--fs/cifs/transport.c48
-rw-r--r--fs/coda/dir.c3
-rw-r--r--fs/coda/pioctl.c2
-rw-r--r--fs/coda/psdev.c2
-rw-r--r--fs/compat.c210
-rw-r--r--fs/configfs/symlink.c16
-rw-r--r--fs/dcache.c184
-rw-r--r--fs/dquot.c10
-rw-r--r--fs/ecryptfs/crypto.c15
-rw-r--r--fs/ecryptfs/main.c23
-rw-r--r--fs/efs/namei.c29
-rw-r--r--fs/eventpoll.c11
-rw-r--r--fs/exec.c7
-rw-r--r--fs/exportfs/expfs.c20
-rw-r--r--fs/ext2/Kconfig55
-rw-r--r--fs/ext2/dir.c14
-rw-r--r--fs/ext2/ext2.h4
-rw-r--r--fs/ext2/namei.c30
-rw-r--r--fs/ext2/xip.c1
-rw-r--r--fs/ext3/Kconfig67
-rw-r--r--fs/ext3/balloc.c3
-rw-r--r--fs/ext3/dir.c32
-rw-r--r--fs/ext3/inode.c7
-rw-r--r--fs/ext3/ioctl.c12
-rw-r--r--fs/ext3/namei.c84
-rw-r--r--fs/ext3/resize.c3
-rw-r--r--fs/ext3/super.c84
-rw-r--r--fs/ext4/Kconfig79
-rw-r--r--fs/ext4/balloc.c77
-rw-r--r--fs/ext4/dir.c20
-rw-r--r--fs/ext4/ext4.h3
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c7
-rw-r--r--fs/ext4/mballoc.c1
-rw-r--r--fs/ext4/namei.c12
-rw-r--r--fs/ext4/super.c63
-rw-r--r--fs/fat/Makefile6
-rw-r--r--fs/fat/cache.c25
-rw-r--r--fs/fat/dir.c21
-rw-r--r--fs/fat/fat.h329
-rw-r--r--fs/fat/fatent.c24
-rw-r--r--fs/fat/file.c49
-rw-r--r--fs/fat/inode.c185
-rw-r--r--fs/fat/misc.c155
-rw-r--r--fs/fat/namei_msdos.c (renamed from fs/msdos/namei.c)42
-rw-r--r--fs/fat/namei_vfat.c (renamed from fs/vfat/namei.c)161
-rw-r--r--fs/fifo.c6
-rw-r--r--fs/file_table.c8
-rw-r--r--fs/filesystems.c39
-rw-r--r--fs/fuse/dev.c1
-rw-r--r--fs/fuse/file.c5
-rw-r--r--fs/fuse/fuse_i.h5
-rw-r--r--fs/fuse/inode.c26
-rw-r--r--fs/gfs2/ops_export.c33
-rw-r--r--fs/gfs2/ops_inode.c2
-rw-r--r--fs/hfs/inode.c8
-rw-r--r--fs/hfsplus/extents.c3
-rw-r--r--fs/hfsplus/inode.c15
-rw-r--r--fs/hostfs/hostfs_kern.c5
-rw-r--r--fs/hpfs/file.c2
-rw-r--r--fs/hpfs/hpfs_fn.h2
-rw-r--r--fs/hpfs/inode.c29
-rw-r--r--fs/hpfs/namei.c2
-rw-r--r--fs/inotify_user.c3
-rw-r--r--fs/isofs/export.c33
-rw-r--r--fs/jbd/Kconfig30
-rw-r--r--fs/jbd/checkpoint.c89
-rw-r--r--fs/jbd/commit.c10
-rw-r--r--fs/jbd/journal.c28
-rw-r--r--fs/jbd/recovery.c7
-rw-r--r--fs/jbd/transaction.c17
-rw-r--r--fs/jbd2/Kconfig33
-rw-r--r--fs/jbd2/checkpoint.c32
-rw-r--r--fs/jbd2/commit.c8
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/jffs2/Kconfig188
-rw-r--r--fs/jffs2/background.c10
-rw-r--r--fs/jffs2/compr.c4
-rw-r--r--fs/jffs2/compr_lzo.c15
-rw-r--r--fs/jffs2/dir.c9
-rw-r--r--fs/jffs2/erase.c4
-rw-r--r--fs/jffs2/fs.c6
-rw-r--r--fs/jffs2/nodemgmt.c6
-rw-r--r--fs/jffs2/super.c48
-rw-r--r--fs/jffs2/wbuf.c5
-rw-r--r--fs/jfs/jfs_logmgr.c4
-rw-r--r--fs/jfs/namei.c16
-rw-r--r--fs/libfs.c28
-rw-r--r--fs/lockd/svc4proc.c1
-rw-r--r--fs/lockd/svcproc.c1
-rw-r--r--fs/locks.c25
-rw-r--r--fs/msdos/Makefile7
-rw-r--r--fs/namei.c146
-rw-r--r--fs/namespace.c133
-rw-r--r--fs/nfs/callback.c19
-rw-r--r--fs/nfs/dir.c11
-rw-r--r--fs/nfs/getroot.c14
-rw-r--r--fs/nfs/inode.c13
-rw-r--r--fs/nfs/nfs4proc.c6
-rw-r--r--fs/nfs/super.c6
-rw-r--r--fs/nfsd/export.c144
-rw-r--r--fs/nfsd/nfs4recover.c50
-rw-r--r--fs/nfsd/nfs4state.c8
-rw-r--r--fs/nfsd/nfsctl.c8
-rw-r--r--fs/nfsd/nfssvc.c4
-rw-r--r--fs/nfsd/vfs.c130
-rw-r--r--fs/ntfs/file.c4
-rw-r--r--fs/ntfs/namei.c22
-rw-r--r--fs/ocfs2/cluster/heartbeat.c6
-rw-r--r--fs/ocfs2/export.c30
-rw-r--r--fs/ocfs2/file.c3
-rw-r--r--fs/omfs/dir.c1
-rw-r--r--fs/open.c2
-rw-r--r--fs/openpromfs/inode.c1
-rw-r--r--fs/partitions/check.c4
-rw-r--r--fs/pipe.c3
-rw-r--r--fs/proc/Makefile13
-rw-r--r--fs/proc/array.c10
-rw-r--r--fs/proc/base.c4
-rw-r--r--fs/proc/cmdline.c29
-rw-r--r--fs/proc/cpuinfo.c24
-rw-r--r--fs/proc/devices.c70
-rw-r--r--fs/proc/generic.c3
-rw-r--r--fs/proc/inode.c3
-rw-r--r--fs/proc/internal.h3
-rw-r--r--fs/proc/interrupts.c53
-rw-r--r--fs/proc/kcore.c14
-rw-r--r--fs/proc/kmsg.c12
-rw-r--r--fs/proc/loadavg.c51
-rw-r--r--fs/proc/meminfo.c168
-rw-r--r--fs/proc/page.c147
-rw-r--r--fs/proc/proc_devtree.c3
-rw-r--r--fs/proc/proc_misc.c914
-rw-r--r--fs/proc/proc_sysctl.c15
-rw-r--r--fs/proc/root.c8
-rw-r--r--fs/proc/stat.c153
-rw-r--r--fs/proc/task_mmu.c25
-rw-r--r--fs/proc/uptime.c45
-rw-r--r--fs/proc/version.c34
-rw-r--r--fs/proc/vmcore.c11
-rw-r--r--fs/ramfs/file-nommu.c4
-rw-r--r--fs/ramfs/inode.c1
-rw-r--r--fs/read_write.c58
-rw-r--r--fs/readdir.c22
-rw-r--r--fs/reiserfs/file.c1
-rw-r--r--fs/reiserfs/inode.c13
-rw-r--r--fs/reiserfs/journal.c11
-rw-r--r--fs/reiserfs/namei.c11
-rw-r--r--fs/reiserfs/super.c18
-rw-r--r--fs/select.c397
-rw-r--r--fs/seq_file.c29
-rw-r--r--fs/splice.c4
-rw-r--r--fs/super.c29
-rw-r--r--fs/sysfs/dir.c1
-rw-r--r--fs/timerfd.c8
-rw-r--r--fs/ubifs/budget.c26
-rw-r--r--fs/ubifs/compress.c2
-rw-r--r--fs/ubifs/debug.c79
-rw-r--r--fs/ubifs/debug.h6
-rw-r--r--fs/ubifs/file.c260
-rw-r--r--fs/ubifs/find.c4
-rw-r--r--fs/ubifs/gc.c90
-rw-r--r--fs/ubifs/io.c12
-rw-r--r--fs/ubifs/key.h22
-rw-r--r--fs/ubifs/lprops.c34
-rw-r--r--fs/ubifs/lpt.c48
-rw-r--r--fs/ubifs/lpt_commit.c187
-rw-r--r--fs/ubifs/misc.h27
-rw-r--r--fs/ubifs/scan.c2
-rw-r--r--fs/ubifs/super.c109
-rw-r--r--fs/ubifs/tnc.c345
-rw-r--r--fs/ubifs/tnc_misc.c4
-rw-r--r--fs/ubifs/ubifs-media.h1
-rw-r--r--fs/ubifs/ubifs.h85
-rw-r--r--fs/ubifs/xattr.c2
-rw-r--r--fs/udf/namei.c43
-rw-r--r--fs/ufs/dir.c1
-rw-r--r--fs/vfat/Makefile7
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c32
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c128
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c4
214 files changed, 5642 insertions, 3922 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index c061c3f18e7c..24eb01087b6d 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -30,8 +30,8 @@
30#include <linux/parser.h> 30#include <linux/parser.h>
31#include <linux/idr.h> 31#include <linux/idr.h>
32#include <net/9p/9p.h> 32#include <net/9p/9p.h>
33#include <net/9p/transport.h>
34#include <net/9p/client.h> 33#include <net/9p/client.h>
34#include <net/9p/transport.h>
35#include "v9fs.h" 35#include "v9fs.h"
36#include "v9fs_vfs.h" 36#include "v9fs_vfs.h"
37 37
@@ -234,7 +234,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
234 if (!v9ses->clnt->dotu) 234 if (!v9ses->clnt->dotu)
235 v9ses->flags &= ~V9FS_EXTENDED; 235 v9ses->flags &= ~V9FS_EXTENDED;
236 236
237 v9ses->maxdata = v9ses->clnt->msize; 237 v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
238 238
239 /* for legacy mode, fall back to V9FS_ACCESS_ANY */ 239 /* for legacy mode, fall back to V9FS_ACCESS_ANY */
240 if (!v9fs_extended(v9ses) && 240 if (!v9fs_extended(v9ses) &&
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 57997fa14e69..c295ba786edd 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -46,9 +46,11 @@ extern struct dentry_operations v9fs_cached_dentry_operations;
46 46
47struct inode *v9fs_get_inode(struct super_block *sb, int mode); 47struct inode *v9fs_get_inode(struct super_block *sb, int mode);
48ino_t v9fs_qid2ino(struct p9_qid *qid); 48ino_t v9fs_qid2ino(struct p9_qid *qid);
49void v9fs_stat2inode(struct p9_stat *, struct inode *, struct super_block *); 49void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
50int v9fs_dir_release(struct inode *inode, struct file *filp); 50int v9fs_dir_release(struct inode *inode, struct file *filp);
51int v9fs_file_open(struct inode *inode, struct file *file); 51int v9fs_file_open(struct inode *inode, struct file *file);
52void v9fs_inode2stat(struct inode *inode, struct p9_stat *stat); 52void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
53void v9fs_dentry_release(struct dentry *); 53void v9fs_dentry_release(struct dentry *);
54int v9fs_uflags2omode(int uflags, int extended); 54int v9fs_uflags2omode(int uflags, int extended);
55
56ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 97d3aed57983..6fcb1e7095cf 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -38,7 +38,6 @@
38 38
39#include "v9fs.h" 39#include "v9fs.h"
40#include "v9fs_vfs.h" 40#include "v9fs_vfs.h"
41#include "fid.h"
42 41
43/** 42/**
44 * v9fs_vfs_readpage - read an entire page in from 9P 43 * v9fs_vfs_readpage - read an entire page in from 9P
@@ -53,14 +52,12 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page)
53 int retval; 52 int retval;
54 loff_t offset; 53 loff_t offset;
55 char *buffer; 54 char *buffer;
56 struct p9_fid *fid;
57 55
58 P9_DPRINTK(P9_DEBUG_VFS, "\n"); 56 P9_DPRINTK(P9_DEBUG_VFS, "\n");
59 fid = filp->private_data;
60 buffer = kmap(page); 57 buffer = kmap(page);
61 offset = page_offset(page); 58 offset = page_offset(page);
62 59
63 retval = p9_client_readn(fid, buffer, offset, PAGE_CACHE_SIZE); 60 retval = v9fs_file_readn(filp, buffer, NULL, offset, PAGE_CACHE_SIZE);
64 if (retval < 0) 61 if (retval < 0)
65 goto done; 62 goto done;
66 63
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index e298fe194093..873cd31baa47 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -45,7 +45,7 @@
45 * 45 *
46 */ 46 */
47 47
48static inline int dt_type(struct p9_stat *mistat) 48static inline int dt_type(struct p9_wstat *mistat)
49{ 49{
50 unsigned long perm = mistat->mode; 50 unsigned long perm = mistat->mode;
51 int rettype = DT_REG; 51 int rettype = DT_REG;
@@ -69,32 +69,58 @@ static inline int dt_type(struct p9_stat *mistat)
69static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) 69static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
70{ 70{
71 int over; 71 int over;
72 struct p9_wstat st;
73 int err;
72 struct p9_fid *fid; 74 struct p9_fid *fid;
73 struct v9fs_session_info *v9ses; 75 int buflen;
74 struct inode *inode; 76 char *statbuf;
75 struct p9_stat *st; 77 int n, i = 0;
76 78
77 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); 79 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
78 inode = filp->f_path.dentry->d_inode;
79 v9ses = v9fs_inode2v9ses(inode);
80 fid = filp->private_data; 80 fid = filp->private_data;
81 while ((st = p9_client_dirread(fid, filp->f_pos)) != NULL) {
82 if (IS_ERR(st))
83 return PTR_ERR(st);
84 81
85 over = filldir(dirent, st->name.str, st->name.len, filp->f_pos, 82 buflen = fid->clnt->msize - P9_IOHDRSZ;
86 v9fs_qid2ino(&st->qid), dt_type(st)); 83 statbuf = kmalloc(buflen, GFP_KERNEL);
84 if (!statbuf)
85 return -ENOMEM;
87 86
88 if (over) 87 while (1) {
88 err = v9fs_file_readn(filp, statbuf, NULL, buflen,
89 fid->rdir_fpos);
90 if (err <= 0)
89 break; 91 break;
90 92
91 filp->f_pos += st->size; 93 n = err;
92 kfree(st); 94 while (i < n) {
93 st = NULL; 95 err = p9stat_read(statbuf + i, buflen-i, &st,
96 fid->clnt->dotu);
97 if (err) {
98 P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
99 err = -EIO;
100 p9stat_free(&st);
101 goto free_and_exit;
102 }
103
104 i += st.size+2;
105 fid->rdir_fpos += st.size+2;
106
107 over = filldir(dirent, st.name, strlen(st.name),
108 filp->f_pos, v9fs_qid2ino(&st.qid), dt_type(&st));
109
110 filp->f_pos += st.size+2;
111
112 p9stat_free(&st);
113
114 if (over) {
115 err = 0;
116 goto free_and_exit;
117 }
118 }
94 } 119 }
95 120
96 kfree(st); 121free_and_exit:
97 return 0; 122 kfree(statbuf);
123 return err;
98} 124}
99 125
100 126
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 52944d2249a4..68bf2af6c389 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -120,23 +120,72 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
120} 120}
121 121
122/** 122/**
123 * v9fs_file_read - read from a file 123 * v9fs_file_readn - read from a file
124 * @filp: file pointer to read 124 * @filp: file pointer to read
125 * @data: data buffer to read data into 125 * @data: data buffer to read data into
126 * @udata: user data buffer to read data into
126 * @count: size of buffer 127 * @count: size of buffer
127 * @offset: offset at which to read data 128 * @offset: offset at which to read data
128 * 129 *
129 */ 130 */
131
132ssize_t
133v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
134 u64 offset)
135{
136 int n, total;
137 struct p9_fid *fid = filp->private_data;
138
139 P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid,
140 (long long unsigned) offset, count);
141
142 n = 0;
143 total = 0;
144 do {
145 n = p9_client_read(fid, data, udata, offset, count);
146 if (n <= 0)
147 break;
148
149 if (data)
150 data += n;
151 if (udata)
152 udata += n;
153
154 offset += n;
155 count -= n;
156 total += n;
157 } while (count > 0 && n == (fid->clnt->msize - P9_IOHDRSZ));
158
159 if (n < 0)
160 total = n;
161
162 return total;
163}
164
165/**
166 * v9fs_file_read - read from a file
167 * @filp: file pointer to read
168 * @udata: user data buffer to read data into
169 * @count: size of buffer
170 * @offset: offset at which to read data
171 *
172 */
173
130static ssize_t 174static ssize_t
131v9fs_file_read(struct file *filp, char __user * data, size_t count, 175v9fs_file_read(struct file *filp, char __user *udata, size_t count,
132 loff_t * offset) 176 loff_t * offset)
133{ 177{
134 int ret; 178 int ret;
135 struct p9_fid *fid; 179 struct p9_fid *fid;
136 180
137 P9_DPRINTK(P9_DEBUG_VFS, "\n"); 181 P9_DPRINTK(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset);
138 fid = filp->private_data; 182 fid = filp->private_data;
139 ret = p9_client_uread(fid, data, *offset, count); 183
184 if (count > (fid->clnt->msize - P9_IOHDRSZ))
185 ret = v9fs_file_readn(filp, NULL, udata, count, *offset);
186 else
187 ret = p9_client_read(fid, NULL, udata, *offset, count);
188
140 if (ret > 0) 189 if (ret > 0)
141 *offset += ret; 190 *offset += ret;
142 191
@@ -156,19 +205,38 @@ static ssize_t
156v9fs_file_write(struct file *filp, const char __user * data, 205v9fs_file_write(struct file *filp, const char __user * data,
157 size_t count, loff_t * offset) 206 size_t count, loff_t * offset)
158{ 207{
159 int ret; 208 int n, rsize, total = 0;
160 struct p9_fid *fid; 209 struct p9_fid *fid;
210 struct p9_client *clnt;
161 struct inode *inode = filp->f_path.dentry->d_inode; 211 struct inode *inode = filp->f_path.dentry->d_inode;
212 int origin = *offset;
162 213
163 P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data, 214 P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data,
164 (int)count, (int)*offset); 215 (int)count, (int)*offset);
165 216
166 fid = filp->private_data; 217 fid = filp->private_data;
167 ret = p9_client_uwrite(fid, data, *offset, count); 218 clnt = fid->clnt;
168 if (ret > 0) { 219
169 invalidate_inode_pages2_range(inode->i_mapping, *offset, 220 rsize = fid->iounit;
170 *offset+ret); 221 if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
171 *offset += ret; 222 rsize = clnt->msize - P9_IOHDRSZ;
223
224 do {
225 if (count < rsize)
226 rsize = count;
227
228 n = p9_client_write(fid, NULL, data+total, *offset+total,
229 rsize);
230 if (n <= 0)
231 break;
232 count -= n;
233 total += n;
234 } while (count > 0);
235
236 if (total > 0) {
237 invalidate_inode_pages2_range(inode->i_mapping, origin,
238 origin+total);
239 *offset += total;
172 } 240 }
173 241
174 if (*offset > inode->i_size) { 242 if (*offset > inode->i_size) {
@@ -176,7 +244,10 @@ v9fs_file_write(struct file *filp, const char __user * data,
176 inode->i_blocks = (inode->i_size + 512 - 1) >> 9; 244 inode->i_blocks = (inode->i_size + 512 - 1) >> 9;
177 } 245 }
178 246
179 return ret; 247 if (n < 0)
248 return n;
249
250 return total;
180} 251}
181 252
182static const struct file_operations v9fs_cached_file_operations = { 253static const struct file_operations v9fs_cached_file_operations = {
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index e83aa5ebe861..8314d3f43b71 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -334,7 +334,7 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
334{ 334{
335 int err, umode; 335 int err, umode;
336 struct inode *ret; 336 struct inode *ret;
337 struct p9_stat *st; 337 struct p9_wstat *st;
338 338
339 ret = NULL; 339 ret = NULL;
340 st = p9_client_stat(fid); 340 st = p9_client_stat(fid);
@@ -417,6 +417,8 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
417 struct p9_fid *dfid, *ofid, *fid; 417 struct p9_fid *dfid, *ofid, *fid;
418 struct inode *inode; 418 struct inode *inode;
419 419
420 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
421
420 err = 0; 422 err = 0;
421 ofid = NULL; 423 ofid = NULL;
422 fid = NULL; 424 fid = NULL;
@@ -424,6 +426,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
424 dfid = v9fs_fid_clone(dentry->d_parent); 426 dfid = v9fs_fid_clone(dentry->d_parent);
425 if (IS_ERR(dfid)) { 427 if (IS_ERR(dfid)) {
426 err = PTR_ERR(dfid); 428 err = PTR_ERR(dfid);
429 P9_DPRINTK(P9_DEBUG_VFS, "fid clone failed %d\n", err);
427 dfid = NULL; 430 dfid = NULL;
428 goto error; 431 goto error;
429 } 432 }
@@ -432,18 +435,22 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
432 ofid = p9_client_walk(dfid, 0, NULL, 1); 435 ofid = p9_client_walk(dfid, 0, NULL, 1);
433 if (IS_ERR(ofid)) { 436 if (IS_ERR(ofid)) {
434 err = PTR_ERR(ofid); 437 err = PTR_ERR(ofid);
438 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
435 ofid = NULL; 439 ofid = NULL;
436 goto error; 440 goto error;
437 } 441 }
438 442
439 err = p9_client_fcreate(ofid, name, perm, mode, extension); 443 err = p9_client_fcreate(ofid, name, perm, mode, extension);
440 if (err < 0) 444 if (err < 0) {
445 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_fcreate failed %d\n", err);
441 goto error; 446 goto error;
447 }
442 448
443 /* now walk from the parent so we can get unopened fid */ 449 /* now walk from the parent so we can get unopened fid */
444 fid = p9_client_walk(dfid, 1, &name, 0); 450 fid = p9_client_walk(dfid, 1, &name, 0);
445 if (IS_ERR(fid)) { 451 if (IS_ERR(fid)) {
446 err = PTR_ERR(fid); 452 err = PTR_ERR(fid);
453 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
447 fid = NULL; 454 fid = NULL;
448 goto error; 455 goto error;
449 } else 456 } else
@@ -453,6 +460,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
453 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 460 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
454 if (IS_ERR(inode)) { 461 if (IS_ERR(inode)) {
455 err = PTR_ERR(inode); 462 err = PTR_ERR(inode);
463 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
456 goto error; 464 goto error;
457 } 465 }
458 466
@@ -734,7 +742,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
734 int err; 742 int err;
735 struct v9fs_session_info *v9ses; 743 struct v9fs_session_info *v9ses;
736 struct p9_fid *fid; 744 struct p9_fid *fid;
737 struct p9_stat *st; 745 struct p9_wstat *st;
738 746
739 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); 747 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
740 err = -EPERM; 748 err = -EPERM;
@@ -815,10 +823,9 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
815 */ 823 */
816 824
817void 825void
818v9fs_stat2inode(struct p9_stat *stat, struct inode *inode, 826v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
819 struct super_block *sb) 827 struct super_block *sb)
820{ 828{
821 int n;
822 char ext[32]; 829 char ext[32];
823 struct v9fs_session_info *v9ses = sb->s_fs_info; 830 struct v9fs_session_info *v9ses = sb->s_fs_info;
824 831
@@ -842,11 +849,7 @@ v9fs_stat2inode(struct p9_stat *stat, struct inode *inode,
842 int major = -1; 849 int major = -1;
843 int minor = -1; 850 int minor = -1;
844 851
845 n = stat->extension.len; 852 strncpy(ext, stat->extension, sizeof(ext));
846 if (n > sizeof(ext)-1)
847 n = sizeof(ext)-1;
848 memmove(ext, stat->extension.str, n);
849 ext[n] = 0;
850 sscanf(ext, "%c %u %u", &type, &major, &minor); 853 sscanf(ext, "%c %u %u", &type, &major, &minor);
851 switch (type) { 854 switch (type) {
852 case 'c': 855 case 'c':
@@ -857,10 +860,11 @@ v9fs_stat2inode(struct p9_stat *stat, struct inode *inode,
857 break; 860 break;
858 default: 861 default:
859 P9_DPRINTK(P9_DEBUG_ERROR, 862 P9_DPRINTK(P9_DEBUG_ERROR,
860 "Unknown special type %c (%.*s)\n", type, 863 "Unknown special type %c %s\n", type,
861 stat->extension.len, stat->extension.str); 864 stat->extension);
862 }; 865 };
863 inode->i_rdev = MKDEV(major, minor); 866 inode->i_rdev = MKDEV(major, minor);
867 init_special_inode(inode, inode->i_mode, inode->i_rdev);
864 } else 868 } else
865 inode->i_rdev = 0; 869 inode->i_rdev = 0;
866 870
@@ -904,7 +908,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
904 908
905 struct v9fs_session_info *v9ses; 909 struct v9fs_session_info *v9ses;
906 struct p9_fid *fid; 910 struct p9_fid *fid;
907 struct p9_stat *st; 911 struct p9_wstat *st;
908 912
909 P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name); 913 P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
910 retval = -EPERM; 914 retval = -EPERM;
@@ -926,15 +930,10 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
926 } 930 }
927 931
928 /* copy extension buffer into buffer */ 932 /* copy extension buffer into buffer */
929 if (st->extension.len < buflen) 933 strncpy(buffer, st->extension, buflen);
930 buflen = st->extension.len + 1;
931
932 memmove(buffer, st->extension.str, buflen - 1);
933 buffer[buflen-1] = 0;
934 934
935 P9_DPRINTK(P9_DEBUG_VFS, 935 P9_DPRINTK(P9_DEBUG_VFS,
936 "%s -> %.*s (%s)\n", dentry->d_name.name, st->extension.len, 936 "%s -> %s (%s)\n", dentry->d_name.name, st->extension, buffer);
937 st->extension.str, buffer);
938 937
939 retval = buflen; 938 retval = buflen;
940 939
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index bf59c3960494..d6cb1a0ca724 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -111,7 +111,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
111 struct inode *inode = NULL; 111 struct inode *inode = NULL;
112 struct dentry *root = NULL; 112 struct dentry *root = NULL;
113 struct v9fs_session_info *v9ses = NULL; 113 struct v9fs_session_info *v9ses = NULL;
114 struct p9_stat *st = NULL; 114 struct p9_wstat *st = NULL;
115 int mode = S_IRWXUGO | S_ISVTX; 115 int mode = S_IRWXUGO | S_ISVTX;
116 uid_t uid = current->fsuid; 116 uid_t uid = current->fsuid;
117 gid_t gid = current->fsgid; 117 gid_t gid = current->fsgid;
@@ -161,10 +161,14 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
161 161
162 sb->s_root = root; 162 sb->s_root = root;
163 root->d_inode->i_ino = v9fs_qid2ino(&st->qid); 163 root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
164
164 v9fs_stat2inode(st, root->d_inode, sb); 165 v9fs_stat2inode(st, root->d_inode, sb);
166
165 v9fs_fid_add(root, fid); 167 v9fs_fid_add(root, fid);
168 p9stat_free(st);
166 kfree(st); 169 kfree(st);
167 170
171P9_DPRINTK(P9_DEBUG_VFS, " return simple set mount\n");
168 return simple_set_mnt(mnt, sb); 172 return simple_set_mnt(mnt, sb);
169 173
170release_sb: 174release_sb:
diff --git a/fs/Kconfig b/fs/Kconfig
index d0a1174fb516..522469a7eca3 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -6,61 +6,9 @@ menu "File systems"
6 6
7if BLOCK 7if BLOCK
8 8
9config EXT2_FS 9source "fs/ext2/Kconfig"
10 tristate "Second extended fs support" 10source "fs/ext3/Kconfig"
11 help 11source "fs/ext4/Kconfig"
12 Ext2 is a standard Linux file system for hard disks.
13
14 To compile this file system support as a module, choose M here: the
15 module will be called ext2.
16
17 If unsure, say Y.
18
19config EXT2_FS_XATTR
20 bool "Ext2 extended attributes"
21 depends on EXT2_FS
22 help
23 Extended attributes are name:value pairs associated with inodes by
24 the kernel or by users (see the attr(5) manual page, or visit
25 <http://acl.bestbits.at/> for details).
26
27 If unsure, say N.
28
29config EXT2_FS_POSIX_ACL
30 bool "Ext2 POSIX Access Control Lists"
31 depends on EXT2_FS_XATTR
32 select FS_POSIX_ACL
33 help
34 Posix Access Control Lists (ACLs) support permissions for users and
35 groups beyond the owner/group/world scheme.
36
37 To learn more about Access Control Lists, visit the Posix ACLs for
38 Linux website <http://acl.bestbits.at/>.
39
40 If you don't know what Access Control Lists are, say N
41
42config EXT2_FS_SECURITY
43 bool "Ext2 Security Labels"
44 depends on EXT2_FS_XATTR
45 help
46 Security labels support alternative access control models
47 implemented by security modules like SELinux. This option
48 enables an extended attribute handler for file security
49 labels in the ext2 filesystem.
50
51 If you are not using a security module that requires using
52 extended attributes for file security labels, say N.
53
54config EXT2_FS_XIP
55 bool "Ext2 execute in place support"
56 depends on EXT2_FS && MMU
57 help
58 Execute in place can be used on memory-backed block devices. If you
59 enable this option, you can select to mount block devices which are
60 capable of this feature without using the page cache.
61
62 If you do not use a block device that is capable of using this,
63 or if unsure, say N.
64 12
65config FS_XIP 13config FS_XIP
66# execute in place 14# execute in place
@@ -68,225 +16,16 @@ config FS_XIP
68 depends on EXT2_FS_XIP 16 depends on EXT2_FS_XIP
69 default y 17 default y
70 18
71config EXT3_FS 19source "fs/jbd/Kconfig"
72 tristate "Ext3 journalling file system support" 20source "fs/jbd2/Kconfig"
73 select JBD
74 help
75 This is the journalling version of the Second extended file system
76 (often called ext3), the de facto standard Linux file system
77 (method to organize files on a storage device) for hard disks.
78
79 The journalling code included in this driver means you do not have
80 to run e2fsck (file system checker) on your file systems after a
81 crash. The journal keeps track of any changes that were being made
82 at the time the system crashed, and can ensure that your file system
83 is consistent without the need for a lengthy check.
84
85 Other than adding the journal to the file system, the on-disk format
86 of ext3 is identical to ext2. It is possible to freely switch
87 between using the ext3 driver and the ext2 driver, as long as the
88 file system has been cleanly unmounted, or e2fsck is run on the file
89 system.
90
91 To add a journal on an existing ext2 file system or change the
92 behavior of ext3 file systems, you can use the tune2fs utility ("man
93 tune2fs"). To modify attributes of files and directories on ext3
94 file systems, use chattr ("man chattr"). You need to be using
95 e2fsprogs version 1.20 or later in order to create ext3 journals
96 (available at <http://sourceforge.net/projects/e2fsprogs/>).
97
98 To compile this file system support as a module, choose M here: the
99 module will be called ext3.
100
101config EXT3_FS_XATTR
102 bool "Ext3 extended attributes"
103 depends on EXT3_FS
104 default y
105 help
106 Extended attributes are name:value pairs associated with inodes by
107 the kernel or by users (see the attr(5) manual page, or visit
108 <http://acl.bestbits.at/> for details).
109
110 If unsure, say N.
111
112 You need this for POSIX ACL support on ext3.
113
114config EXT3_FS_POSIX_ACL
115 bool "Ext3 POSIX Access Control Lists"
116 depends on EXT3_FS_XATTR
117 select FS_POSIX_ACL
118 help
119 Posix Access Control Lists (ACLs) support permissions for users and
120 groups beyond the owner/group/world scheme.
121
122 To learn more about Access Control Lists, visit the Posix ACLs for
123 Linux website <http://acl.bestbits.at/>.
124
125 If you don't know what Access Control Lists are, say N
126
127config EXT3_FS_SECURITY
128 bool "Ext3 Security Labels"
129 depends on EXT3_FS_XATTR
130 help
131 Security labels support alternative access control models
132 implemented by security modules like SELinux. This option
133 enables an extended attribute handler for file security
134 labels in the ext3 filesystem.
135
136 If you are not using a security module that requires using
137 extended attributes for file security labels, say N.
138
139config EXT4_FS
140 tristate "The Extended 4 (ext4) filesystem"
141 select JBD2
142 select CRC16
143 help
144 This is the next generation of the ext3 filesystem.
145
146 Unlike the change from ext2 filesystem to ext3 filesystem,
147 the on-disk format of ext4 is not forwards compatible with
148 ext3; it is based on extent maps and it supports 48-bit
149 physical block numbers. The ext4 filesystem also supports delayed
150 allocation, persistent preallocation, high resolution time stamps,
151 and a number of other features to improve performance and speed
152 up fsck time. For more information, please see the web pages at
153 http://ext4.wiki.kernel.org.
154
155 The ext4 filesystem will support mounting an ext3
156 filesystem; while there will be some performance gains from
157 the delayed allocation and inode table readahead, the best
158 performance gains will require enabling ext4 features in the
159 filesystem, or formating a new filesystem as an ext4
160 filesystem initially.
161
162 To compile this file system support as a module, choose M here. The
163 module will be called ext4.
164
165 If unsure, say N.
166
167config EXT4DEV_COMPAT
168 bool "Enable ext4dev compatibility"
169 depends on EXT4_FS
170 help
171 Starting with 2.6.28, the name of the ext4 filesystem was
172 renamed from ext4dev to ext4. Unfortunately there are some
173 legacy userspace programs (such as klibc's fstype) have
174 "ext4dev" hardcoded.
175
176 To enable backwards compatibility so that systems that are
177 still expecting to mount ext4 filesystems using ext4dev,
178 chose Y here. This feature will go away by 2.6.31, so
179 please arrange to get your userspace programs fixed!
180
181config EXT4_FS_XATTR
182 bool "Ext4 extended attributes"
183 depends on EXT4_FS
184 default y
185 help
186 Extended attributes are name:value pairs associated with inodes by
187 the kernel or by users (see the attr(5) manual page, or visit
188 <http://acl.bestbits.at/> for details).
189
190 If unsure, say N.
191
192 You need this for POSIX ACL support on ext4.
193
194config EXT4_FS_POSIX_ACL
195 bool "Ext4 POSIX Access Control Lists"
196 depends on EXT4_FS_XATTR
197 select FS_POSIX_ACL
198 help
199 POSIX Access Control Lists (ACLs) support permissions for users and
200 groups beyond the owner/group/world scheme.
201
202 To learn more about Access Control Lists, visit the POSIX ACLs for
203 Linux website <http://acl.bestbits.at/>.
204
205 If you don't know what Access Control Lists are, say N
206
207config EXT4_FS_SECURITY
208 bool "Ext4 Security Labels"
209 depends on EXT4_FS_XATTR
210 help
211 Security labels support alternative access control models
212 implemented by security modules like SELinux. This option
213 enables an extended attribute handler for file security
214 labels in the ext4 filesystem.
215
216 If you are not using a security module that requires using
217 extended attributes for file security labels, say N.
218
219config JBD
220 tristate
221 help
222 This is a generic journalling layer for block devices. It is
223 currently used by the ext3 file system, but it could also be
224 used to add journal support to other file systems or block
225 devices such as RAID or LVM.
226
227 If you are using the ext3 file system, you need to say Y here.
228 If you are not using ext3 then you will probably want to say N.
229
230 To compile this device as a module, choose M here: the module will be
231 called jbd. If you are compiling ext3 into the kernel, you
232 cannot compile this code as a module.
233
234config JBD_DEBUG
235 bool "JBD (ext3) debugging support"
236 depends on JBD && DEBUG_FS
237 help
238 If you are using the ext3 journaled file system (or potentially any
239 other file system/device using JBD), this option allows you to
240 enable debugging output while the system is running, in order to
241 help track down any problems you are having. By default the
242 debugging output will be turned off.
243
244 If you select Y here, then you will be able to turn on debugging
245 with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a
246 number between 1 and 5, the higher the number, the more debugging
247 output is generated. To turn debugging off again, do
248 "echo 0 > /sys/kernel/debug/jbd/jbd-debug".
249
250config JBD2
251 tristate
252 select CRC32
253 help
254 This is a generic journaling layer for block devices that support
255 both 32-bit and 64-bit block numbers. It is currently used by
256 the ext4 and OCFS2 filesystems, but it could also be used to add
257 journal support to other file systems or block devices such
258 as RAID or LVM.
259
260 If you are using ext4 or OCFS2, you need to say Y here.
261 If you are not using ext4 or OCFS2 then you will
262 probably want to say N.
263
264 To compile this device as a module, choose M here. The module will be
265 called jbd2. If you are compiling ext4 or OCFS2 into the kernel,
266 you cannot compile this code as a module.
267
268config JBD2_DEBUG
269 bool "JBD2 (ext4) debugging support"
270 depends on JBD2 && DEBUG_FS
271 help
272 If you are using the ext4 journaled file system (or
273 potentially any other filesystem/device using JBD2), this option
274 allows you to enable debugging output while the system is running,
275 in order to help track down any problems you are having.
276 By default, the debugging output will be turned off.
277
278 If you select Y here, then you will be able to turn on debugging
279 with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a
280 number between 1 and 5. The higher the number, the more debugging
281 output is generated. To turn debugging off again, do
282 "echo 0 > /sys/kernel/debug/jbd2/jbd2-debug".
283 21
284config FS_MBCACHE 22config FS_MBCACHE
285# Meta block cache for Extended Attributes (ext2/ext3/ext4) 23# Meta block cache for Extended Attributes (ext2/ext3/ext4)
286 tristate 24 tristate
287 depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS_XATTR 25 default y if EXT2_FS=y && EXT2_FS_XATTR
288 default y if EXT2_FS=y || EXT3_FS=y || EXT4_FS=y 26 default y if EXT3_FS=y && EXT3_FS_XATTR
289 default m if EXT2_FS=m || EXT3_FS=m || EXT4_FS=m 27 default y if EXT4_FS=y && EXT4_FS_XATTR
28 default m if EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS_XATTR
290 29
291config REISERFS_FS 30config REISERFS_FS
292 tristate "Reiserfs support" 31 tristate "Reiserfs support"
@@ -665,7 +404,7 @@ config AUTOFS4_FS
665 N here. 404 N here.
666 405
667config FUSE_FS 406config FUSE_FS
668 tristate "Filesystem in Userspace support" 407 tristate "FUSE (Filesystem in Userspace) support"
669 help 408 help
670 With FUSE it is possible to implement a fully functional filesystem 409 With FUSE it is possible to implement a fully functional filesystem
671 in a userspace program. 410 in a userspace program.
@@ -1168,195 +907,7 @@ config EFS_FS
1168 To compile the EFS file system support as a module, choose M here: the 907 To compile the EFS file system support as a module, choose M here: the
1169 module will be called efs. 908 module will be called efs.
1170 909
1171config JFFS2_FS 910source "fs/jffs2/Kconfig"
1172 tristate "Journalling Flash File System v2 (JFFS2) support"
1173 select CRC32
1174 depends on MTD
1175 help
1176 JFFS2 is the second generation of the Journalling Flash File System
1177 for use on diskless embedded devices. It provides improved wear
1178 levelling, compression and support for hard links. You cannot use
1179 this on normal block devices, only on 'MTD' devices.
1180
1181 Further information on the design and implementation of JFFS2 is
1182 available at <http://sources.redhat.com/jffs2/>.
1183
1184config JFFS2_FS_DEBUG
1185 int "JFFS2 debugging verbosity (0 = quiet, 2 = noisy)"
1186 depends on JFFS2_FS
1187 default "0"
1188 help
1189 This controls the amount of debugging messages produced by the JFFS2
1190 code. Set it to zero for use in production systems. For evaluation,
1191 testing and debugging, it's advisable to set it to one. This will
1192 enable a few assertions and will print debugging messages at the
1193 KERN_DEBUG loglevel, where they won't normally be visible. Level 2
1194 is unlikely to be useful - it enables extra debugging in certain
1195 areas which at one point needed debugging, but when the bugs were
1196 located and fixed, the detailed messages were relegated to level 2.
1197
1198 If reporting bugs, please try to have available a full dump of the
1199 messages at debug level 1 while the misbehaviour was occurring.
1200
1201config JFFS2_FS_WRITEBUFFER
1202 bool "JFFS2 write-buffering support"
1203 depends on JFFS2_FS
1204 default y
1205 help
1206 This enables the write-buffering support in JFFS2.
1207
1208 This functionality is required to support JFFS2 on the following
1209 types of flash devices:
1210 - NAND flash
1211 - NOR flash with transparent ECC
1212 - DataFlash
1213
1214config JFFS2_FS_WBUF_VERIFY
1215 bool "Verify JFFS2 write-buffer reads"
1216 depends on JFFS2_FS_WRITEBUFFER
1217 default n
1218 help
1219 This causes JFFS2 to read back every page written through the
1220 write-buffer, and check for errors.
1221
1222config JFFS2_SUMMARY
1223 bool "JFFS2 summary support (EXPERIMENTAL)"
1224 depends on JFFS2_FS && EXPERIMENTAL
1225 default n
1226 help
1227 This feature makes it possible to use summary information
1228 for faster filesystem mount.
1229
1230 The summary information can be inserted into a filesystem image
1231 by the utility 'sumtool'.
1232
1233 If unsure, say 'N'.
1234
1235config JFFS2_FS_XATTR
1236 bool "JFFS2 XATTR support (EXPERIMENTAL)"
1237 depends on JFFS2_FS && EXPERIMENTAL
1238 default n
1239 help
1240 Extended attributes are name:value pairs associated with inodes by
1241 the kernel or by users (see the attr(5) manual page, or visit
1242 <http://acl.bestbits.at/> for details).
1243
1244 If unsure, say N.
1245
1246config JFFS2_FS_POSIX_ACL
1247 bool "JFFS2 POSIX Access Control Lists"
1248 depends on JFFS2_FS_XATTR
1249 default y
1250 select FS_POSIX_ACL
1251 help
1252 Posix Access Control Lists (ACLs) support permissions for users and
1253 groups beyond the owner/group/world scheme.
1254
1255 To learn more about Access Control Lists, visit the Posix ACLs for
1256 Linux website <http://acl.bestbits.at/>.
1257
1258 If you don't know what Access Control Lists are, say N
1259
1260config JFFS2_FS_SECURITY
1261 bool "JFFS2 Security Labels"
1262 depends on JFFS2_FS_XATTR
1263 default y
1264 help
1265 Security labels support alternative access control models
1266 implemented by security modules like SELinux. This option
1267 enables an extended attribute handler for file security
1268 labels in the jffs2 filesystem.
1269
1270 If you are not using a security module that requires using
1271 extended attributes for file security labels, say N.
1272
1273config JFFS2_COMPRESSION_OPTIONS
1274 bool "Advanced compression options for JFFS2"
1275 depends on JFFS2_FS
1276 default n
1277 help
1278 Enabling this option allows you to explicitly choose which
1279 compression modules, if any, are enabled in JFFS2. Removing
1280 compressors can mean you cannot read existing file systems,
1281 and enabling experimental compressors can mean that you
1282 write a file system which cannot be read by a standard kernel.
1283
1284 If unsure, you should _definitely_ say 'N'.
1285
1286config JFFS2_ZLIB
1287 bool "JFFS2 ZLIB compression support" if JFFS2_COMPRESSION_OPTIONS
1288 select ZLIB_INFLATE
1289 select ZLIB_DEFLATE
1290 depends on JFFS2_FS
1291 default y
1292 help
1293 Zlib is designed to be a free, general-purpose, legally unencumbered,
1294 lossless data-compression library for use on virtually any computer
1295 hardware and operating system. See <http://www.gzip.org/zlib/> for
1296 further information.
1297
1298 Say 'Y' if unsure.
1299
1300config JFFS2_LZO
1301 bool "JFFS2 LZO compression support" if JFFS2_COMPRESSION_OPTIONS
1302 select LZO_COMPRESS
1303 select LZO_DECOMPRESS
1304 depends on JFFS2_FS
1305 default n
1306 help
1307 minilzo-based compression. Generally works better than Zlib.
1308
1309 This feature was added in July, 2007. Say 'N' if you need
1310 compatibility with older bootloaders or kernels.
1311
1312config JFFS2_RTIME
1313 bool "JFFS2 RTIME compression support" if JFFS2_COMPRESSION_OPTIONS
1314 depends on JFFS2_FS
1315 default y
1316 help
1317 Rtime does manage to recompress already-compressed data. Say 'Y' if unsure.
1318
1319config JFFS2_RUBIN
1320 bool "JFFS2 RUBIN compression support" if JFFS2_COMPRESSION_OPTIONS
1321 depends on JFFS2_FS
1322 default n
1323 help
1324 RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure.
1325
1326choice
1327 prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS
1328 default JFFS2_CMODE_PRIORITY
1329 depends on JFFS2_FS
1330 help
1331 You can set here the default compression mode of JFFS2 from
1332 the available compression modes. Don't touch if unsure.
1333
1334config JFFS2_CMODE_NONE
1335 bool "no compression"
1336 help
1337 Uses no compression.
1338
1339config JFFS2_CMODE_PRIORITY
1340 bool "priority"
1341 help
1342 Tries the compressors in a predefined order and chooses the first
1343 successful one.
1344
1345config JFFS2_CMODE_SIZE
1346 bool "size (EXPERIMENTAL)"
1347 help
1348 Tries all compressors and chooses the one which has the smallest
1349 result.
1350
1351config JFFS2_CMODE_FAVOURLZO
1352 bool "Favour LZO"
1353 help
1354 Tries all compressors and chooses the one which has the smallest
1355 result but gives some preference to LZO (which has faster
1356 decompression) at the expense of size.
1357
1358endchoice
1359
1360# UBIFS File system configuration 911# UBIFS File system configuration
1361source "fs/ubifs/Kconfig" 912source "fs/ubifs/Kconfig"
1362 913
@@ -1913,148 +1464,7 @@ config SMB_NLS_REMOTE
1913 1464
1914 smbmount from samba 2.2.0 or later supports this. 1465 smbmount from samba 2.2.0 or later supports this.
1915 1466
1916config CIFS 1467source "fs/cifs/Kconfig"
1917 tristate "CIFS support (advanced network filesystem, SMBFS successor)"
1918 depends on INET
1919 select NLS
1920 help
1921 This is the client VFS module for the Common Internet File System
1922 (CIFS) protocol which is the successor to the Server Message Block
1923 (SMB) protocol, the native file sharing mechanism for most early
1924 PC operating systems. The CIFS protocol is fully supported by
1925 file servers such as Windows 2000 (including Windows 2003, NT 4
1926 and Windows XP) as well by Samba (which provides excellent CIFS
1927 server support for Linux and many other operating systems). Limited
1928 support for OS/2 and Windows ME and similar servers is provided as
1929 well.
1930
1931 The cifs module provides an advanced network file system
1932 client for mounting to CIFS compliant servers. It includes
1933 support for DFS (hierarchical name space), secure per-user
1934 session establishment via Kerberos or NTLM or NTLMv2,
1935 safe distributed caching (oplock), optional packet
1936 signing, Unicode and other internationalization improvements.
1937 If you need to mount to Samba or Windows from this machine, say Y.
1938
1939config CIFS_STATS
1940 bool "CIFS statistics"
1941 depends on CIFS
1942 help
1943 Enabling this option will cause statistics for each server share
1944 mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
1945
1946config CIFS_STATS2
1947 bool "Extended statistics"
1948 depends on CIFS_STATS
1949 help
1950 Enabling this option will allow more detailed statistics on SMB
1951 request timing to be displayed in /proc/fs/cifs/DebugData and also
1952 allow optional logging of slow responses to dmesg (depending on the
1953 value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details).
1954 These additional statistics may have a minor effect on performance
1955 and memory utilization.
1956
1957 Unless you are a developer or are doing network performance analysis
1958 or tuning, say N.
1959
1960config CIFS_WEAK_PW_HASH
1961 bool "Support legacy servers which use weaker LANMAN security"
1962 depends on CIFS
1963 help
1964 Modern CIFS servers including Samba and most Windows versions
1965 (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
1966 security mechanisms. These hash the password more securely
1967 than the mechanisms used in the older LANMAN version of the
1968 SMB protocol but LANMAN based authentication is needed to
1969 establish sessions with some old SMB servers.
1970
1971 Enabling this option allows the cifs module to mount to older
1972 LANMAN based servers such as OS/2 and Windows 95, but such
1973 mounts may be less secure than mounts using NTLM or more recent
1974 security mechanisms if you are on a public network. Unless you
1975 have a need to access old SMB servers (and are on a private
1976 network) you probably want to say N. Even if this support
1977 is enabled in the kernel build, LANMAN authentication will not be
1978 used automatically. At runtime LANMAN mounts are disabled but
1979 can be set to required (or optional) either in
1980 /proc/fs/cifs (see fs/cifs/README for more detail) or via an
1981 option on the mount command. This support is disabled by
1982 default in order to reduce the possibility of a downgrade
1983 attack.
1984
1985 If unsure, say N.
1986
1987config CIFS_UPCALL
1988 bool "Kerberos/SPNEGO advanced session setup"
1989 depends on CIFS && KEYS
1990 help
1991 Enables an upcall mechanism for CIFS which accesses
1992 userspace helper utilities to provide SPNEGO packaged (RFC 4178)
1993 Kerberos tickets which are needed to mount to certain secure servers
1994 (for which more secure Kerberos authentication is required). If
1995 unsure, say N.
1996
1997config CIFS_XATTR
1998 bool "CIFS extended attributes"
1999 depends on CIFS
2000 help
2001 Extended attributes are name:value pairs associated with inodes by
2002 the kernel or by users (see the attr(5) manual page, or visit
2003 <http://acl.bestbits.at/> for details). CIFS maps the name of
2004 extended attributes beginning with the user namespace prefix
2005 to SMB/CIFS EAs. EAs are stored on Windows servers without the
2006 user namespace prefix, but their names are seen by Linux cifs clients
2007 prefaced by the user namespace prefix. The system namespace
2008 (used by some filesystems to store ACLs) is not supported at
2009 this time.
2010
2011 If unsure, say N.
2012
2013config CIFS_POSIX
2014 bool "CIFS POSIX Extensions"
2015 depends on CIFS_XATTR
2016 help
2017 Enabling this option will cause the cifs client to attempt to
2018 negotiate a newer dialect with servers, such as Samba 3.0.5
2019 or later, that optionally can handle more POSIX like (rather
2020 than Windows like) file behavior. It also enables
2021 support for POSIX ACLs (getfacl and setfacl) to servers
2022 (such as Samba 3.10 and later) which can negotiate
2023 CIFS POSIX ACL support. If unsure, say N.
2024
2025config CIFS_DEBUG2
2026 bool "Enable additional CIFS debugging routines"
2027 depends on CIFS
2028 help
2029 Enabling this option adds a few more debugging routines
2030 to the cifs code which slightly increases the size of
2031 the cifs module and can cause additional logging of debug
2032 messages in some error paths, slowing performance. This
2033 option can be turned off unless you are debugging
2034 cifs problems. If unsure, say N.
2035
2036config CIFS_EXPERIMENTAL
2037 bool "CIFS Experimental Features (EXPERIMENTAL)"
2038 depends on CIFS && EXPERIMENTAL
2039 help
2040 Enables cifs features under testing. These features are
2041 experimental and currently include DFS support and directory
2042 change notification ie fcntl(F_DNOTIFY), as well as the upcall
2043 mechanism which will be used for Kerberos session negotiation
2044 and uid remapping. Some of these features also may depend on
2045 setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
2046 (which is disabled by default). See the file fs/cifs/README
2047 for more details. If unsure, say N.
2048
2049config CIFS_DFS_UPCALL
2050 bool "DFS feature support (EXPERIMENTAL)"
2051 depends on CIFS_EXPERIMENTAL
2052 depends on KEYS
2053 help
2054 Enables an upcall mechanism for CIFS which contacts userspace
2055 helper utilities to provide server name resolution (host names to
2056 IP addresses) which is needed for implicit mounts of DFS junction
2057 points. If unsure, say N.
2058 1468
2059config NCP_FS 1469config NCP_FS
2060 tristate "NCP file system support (to mount NetWare volumes)" 1470 tristate "NCP file system support (to mount NetWare volumes)"
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 801db1341811..ce9fb3fbfae4 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -40,6 +40,28 @@ config BINFMT_ELF_FDPIC
40 40
41 It is also possible to run FDPIC ELF binaries on MMU linux also. 41 It is also possible to run FDPIC ELF binaries on MMU linux also.
42 42
43config CORE_DUMP_DEFAULT_ELF_HEADERS
44 bool "Write ELF core dumps with partial segments"
45 default n
46 depends on BINFMT_ELF
47 help
48 ELF core dump files describe each memory mapping of the crashed
49 process, and can contain or omit the memory contents of each one.
50 The contents of an unmodified text mapping are omitted by default.
51
52 For an unmodified text mapping of an ELF object, including just
53 the first page of the file in a core dump makes it possible to
54 identify the build ID bits in the file, without paying the i/o
55 cost and disk space to dump all the text. However, versions of
56 GDB before 6.7 are confused by ELF core dump files in this format.
57
58 The core dump behavior can be controlled per process using
59 the /proc/PID/coredump_filter pseudo-file; this setting is
60 inherited. See Documentation/filesystems/proc.txt for details.
61
62 This config option changes the default setting of coredump_filter
63 seen at boot time. If unsure, say N.
64
43config BINFMT_FLAT 65config BINFMT_FLAT
44 bool "Kernel support for flat binaries" 66 bool "Kernel support for flat binaries"
45 depends on !MMU && (!FRV || BROKEN) 67 depends on !MMU && (!FRV || BROKEN)
diff --git a/fs/Makefile b/fs/Makefile
index 2168c902d5ca..d9f8afe6f0c4 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -81,8 +81,6 @@ obj-$(CONFIG_HUGETLBFS) += hugetlbfs/
81obj-$(CONFIG_CODA_FS) += coda/ 81obj-$(CONFIG_CODA_FS) += coda/
82obj-$(CONFIG_MINIX_FS) += minix/ 82obj-$(CONFIG_MINIX_FS) += minix/
83obj-$(CONFIG_FAT_FS) += fat/ 83obj-$(CONFIG_FAT_FS) += fat/
84obj-$(CONFIG_MSDOS_FS) += msdos/
85obj-$(CONFIG_VFAT_FS) += vfat/
86obj-$(CONFIG_BFS_FS) += bfs/ 84obj-$(CONFIG_BFS_FS) += bfs/
87obj-$(CONFIG_ISO9660_FS) += isofs/ 85obj-$(CONFIG_ISO9660_FS) += isofs/
88obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ 86obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index dfda03d4397d..99cf390641f7 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -45,6 +45,7 @@ const struct file_operations afs_dir_file_operations = {
45 .release = afs_release, 45 .release = afs_release,
46 .readdir = afs_readdir, 46 .readdir = afs_readdir,
47 .lock = afs_lock, 47 .lock = afs_lock,
48 .llseek = generic_file_llseek,
48}; 49};
49 50
50const struct inode_operations afs_dir_inode_operations = { 51const struct inode_operations afs_dir_inode_operations = {
diff --git a/fs/attr.c b/fs/attr.c
index 26c71ba1eed4..7a83819f6ba2 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -159,17 +159,17 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
159 if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID))) 159 if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID)))
160 return 0; 160 return 0;
161 161
162 error = security_inode_setattr(dentry, attr);
163 if (error)
164 return error;
165
162 if (ia_valid & ATTR_SIZE) 166 if (ia_valid & ATTR_SIZE)
163 down_write(&dentry->d_inode->i_alloc_sem); 167 down_write(&dentry->d_inode->i_alloc_sem);
164 168
165 if (inode->i_op && inode->i_op->setattr) { 169 if (inode->i_op && inode->i_op->setattr) {
166 error = security_inode_setattr(dentry, attr); 170 error = inode->i_op->setattr(dentry, attr);
167 if (!error)
168 error = inode->i_op->setattr(dentry, attr);
169 } else { 171 } else {
170 error = inode_change_ok(inode, attr); 172 error = inode_change_ok(inode, attr);
171 if (!error)
172 error = security_inode_setattr(dentry, attr);
173 if (!error) { 173 if (!error) {
174 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 174 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
175 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) 175 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid))
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 625abf5422e2..33bf8cbfd051 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -128,9 +128,10 @@ static inline void free_dev_ioctl(struct autofs_dev_ioctl *param)
128 */ 128 */
129static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) 129static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
130{ 130{
131 int err = -EINVAL; 131 int err;
132 132
133 if (check_dev_ioctl_version(cmd, param)) { 133 err = check_dev_ioctl_version(cmd, param);
134 if (err) {
134 AUTOFS_WARN("invalid device control module version " 135 AUTOFS_WARN("invalid device control module version "
135 "supplied for cmd(0x%08x)", cmd); 136 "supplied for cmd(0x%08x)", cmd);
136 goto out; 137 goto out;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index cde2f8e8935a..4b6fb3f628c0 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -56,12 +56,23 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
56 mntget(mnt); 56 mntget(mnt);
57 dget(dentry); 57 dget(dentry);
58 58
59 if (!autofs4_follow_mount(&mnt, &dentry)) 59 if (!follow_down(&mnt, &dentry))
60 goto done; 60 goto done;
61 61
62 /* This is an autofs submount, we can't expire it */ 62 if (is_autofs4_dentry(dentry)) {
63 if (is_autofs4_dentry(dentry)) 63 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
64 goto done; 64
65 /* This is an autofs submount, we can't expire it */
66 if (sbi->type == AUTOFS_TYPE_INDIRECT)
67 goto done;
68
69 /*
70 * Otherwise it's an offset mount and we need to check
71 * if we can umount its mount, if there is one.
72 */
73 if (!d_mountpoint(dentry))
74 goto done;
75 }
65 76
66 /* Update the expiry counter if fs is busy */ 77 /* Update the expiry counter if fs is busy */
67 if (!may_umount_tree(mnt)) { 78 if (!may_umount_tree(mnt)) {
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index ed8feb052df9..daae463068e4 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -80,6 +80,7 @@ const struct file_operations bfs_dir_operations = {
80 .read = generic_read_dir, 80 .read = generic_read_dir,
81 .readdir = bfs_readdir, 81 .readdir = bfs_readdir,
82 .fsync = file_fsync, 82 .fsync = file_fsync,
83 .llseek = generic_file_llseek,
83}; 84};
84 85
85extern void dump_imap(const char *, struct super_block *); 86extern void dump_imap(const char *, struct super_block *);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index c76afa26edf7..8fcfa398d350 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1156,16 +1156,24 @@ static int dump_seek(struct file *file, loff_t off)
1156static unsigned long vma_dump_size(struct vm_area_struct *vma, 1156static unsigned long vma_dump_size(struct vm_area_struct *vma,
1157 unsigned long mm_flags) 1157 unsigned long mm_flags)
1158{ 1158{
1159#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1160
1159 /* The vma can be set up to tell us the answer directly. */ 1161 /* The vma can be set up to tell us the answer directly. */
1160 if (vma->vm_flags & VM_ALWAYSDUMP) 1162 if (vma->vm_flags & VM_ALWAYSDUMP)
1161 goto whole; 1163 goto whole;
1162 1164
1165 /* Hugetlb memory check */
1166 if (vma->vm_flags & VM_HUGETLB) {
1167 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1168 goto whole;
1169 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1170 goto whole;
1171 }
1172
1163 /* Do not dump I/O mapped devices or special mappings */ 1173 /* Do not dump I/O mapped devices or special mappings */
1164 if (vma->vm_flags & (VM_IO | VM_RESERVED)) 1174 if (vma->vm_flags & (VM_IO | VM_RESERVED))
1165 return 0; 1175 return 0;
1166 1176
1167#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1168
1169 /* By default, dump shared memory if mapped from an anonymous file. */ 1177 /* By default, dump shared memory if mapped from an anonymous file. */
1170 if (vma->vm_flags & VM_SHARED) { 1178 if (vma->vm_flags & VM_SHARED) {
1171 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ? 1179 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
@@ -1333,20 +1341,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1333 prstatus->pr_pgrp = task_pgrp_vnr(p); 1341 prstatus->pr_pgrp = task_pgrp_vnr(p);
1334 prstatus->pr_sid = task_session_vnr(p); 1342 prstatus->pr_sid = task_session_vnr(p);
1335 if (thread_group_leader(p)) { 1343 if (thread_group_leader(p)) {
1344 struct task_cputime cputime;
1345
1336 /* 1346 /*
1337 * This is the record for the group leader. Add in the 1347 * This is the record for the group leader. It shows the
1338 * cumulative times of previous dead threads. This total 1348 * group-wide total, not its individual thread total.
1339 * won't include the time of each live thread whose state
1340 * is included in the core dump. The final total reported
1341 * to our parent process when it calls wait4 will include
1342 * those sums as well as the little bit more time it takes
1343 * this and each other thread to finish dying after the
1344 * core dump synchronization phase.
1345 */ 1349 */
1346 cputime_to_timeval(cputime_add(p->utime, p->signal->utime), 1350 thread_group_cputime(p, &cputime);
1347 &prstatus->pr_utime); 1351 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1348 cputime_to_timeval(cputime_add(p->stime, p->signal->stime), 1352 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1349 &prstatus->pr_stime);
1350 } else { 1353 } else {
1351 cputime_to_timeval(p->utime, &prstatus->pr_utime); 1354 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1352 cputime_to_timeval(p->stime, &prstatus->pr_stime); 1355 cputime_to_timeval(p->stime, &prstatus->pr_stime);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 0e8367c54624..5b5424cb3391 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1390,20 +1390,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1390 prstatus->pr_pgrp = task_pgrp_vnr(p); 1390 prstatus->pr_pgrp = task_pgrp_vnr(p);
1391 prstatus->pr_sid = task_session_vnr(p); 1391 prstatus->pr_sid = task_session_vnr(p);
1392 if (thread_group_leader(p)) { 1392 if (thread_group_leader(p)) {
1393 struct task_cputime cputime;
1394
1393 /* 1395 /*
1394 * This is the record for the group leader. Add in the 1396 * This is the record for the group leader. It shows the
1395 * cumulative times of previous dead threads. This total 1397 * group-wide total, not its individual thread total.
1396 * won't include the time of each live thread whose state
1397 * is included in the core dump. The final total reported
1398 * to our parent process when it calls wait4 will include
1399 * those sums as well as the little bit more time it takes
1400 * this and each other thread to finish dying after the
1401 * core dump synchronization phase.
1402 */ 1398 */
1403 cputime_to_timeval(cputime_add(p->utime, p->signal->utime), 1399 thread_group_cputime(p, &cputime);
1404 &prstatus->pr_utime); 1400 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1405 cputime_to_timeval(cputime_add(p->stime, p->signal->stime), 1401 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1406 &prstatus->pr_stime);
1407 } else { 1402 } else {
1408 cputime_to_timeval(p->utime, &prstatus->pr_utime); 1403 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1409 cputime_to_timeval(p->stime, &prstatus->pr_stime); 1404 cputime_to_timeval(p->stime, &prstatus->pr_stime);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 218408eed1bb..db831efbdbbd 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -840,13 +840,12 @@ EXPORT_SYMBOL_GPL(bd_release_from_disk);
840 * to be used for internal purposes. If you ever need it - reconsider 840 * to be used for internal purposes. If you ever need it - reconsider
841 * your API. 841 * your API.
842 */ 842 */
843struct block_device *open_by_devnum(dev_t dev, unsigned mode) 843struct block_device *open_by_devnum(dev_t dev, fmode_t mode)
844{ 844{
845 struct block_device *bdev = bdget(dev); 845 struct block_device *bdev = bdget(dev);
846 int err = -ENOMEM; 846 int err = -ENOMEM;
847 int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY;
848 if (bdev) 847 if (bdev)
849 err = blkdev_get(bdev, mode, flags); 848 err = blkdev_get(bdev, mode);
850 return err ? ERR_PTR(err) : bdev; 849 return err ? ERR_PTR(err) : bdev;
851} 850}
852 851
@@ -975,9 +974,7 @@ void bd_set_size(struct block_device *bdev, loff_t size)
975} 974}
976EXPORT_SYMBOL(bd_set_size); 975EXPORT_SYMBOL(bd_set_size);
977 976
978static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, 977static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
979 int for_part);
980static int __blkdev_put(struct block_device *bdev, int for_part);
981 978
982/* 979/*
983 * bd_mutex locking: 980 * bd_mutex locking:
@@ -986,17 +983,16 @@ static int __blkdev_put(struct block_device *bdev, int for_part);
986 * mutex_lock_nested(whole->bd_mutex, 1) 983 * mutex_lock_nested(whole->bd_mutex, 1)
987 */ 984 */
988 985
989static int do_open(struct block_device *bdev, struct file *file, int for_part) 986static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
990{ 987{
991 struct gendisk *disk; 988 struct gendisk *disk;
992 struct hd_struct *part = NULL;
993 int ret; 989 int ret;
994 int partno; 990 int partno;
995 int perm = 0; 991 int perm = 0;
996 992
997 if (file->f_mode & FMODE_READ) 993 if (mode & FMODE_READ)
998 perm |= MAY_READ; 994 perm |= MAY_READ;
999 if (file->f_mode & FMODE_WRITE) 995 if (mode & FMODE_WRITE)
1000 perm |= MAY_WRITE; 996 perm |= MAY_WRITE;
1001 /* 997 /*
1002 * hooks: /n/, see "layering violations". 998 * hooks: /n/, see "layering violations".
@@ -1007,27 +1003,27 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
1007 return ret; 1003 return ret;
1008 } 1004 }
1009 1005
1010 ret = -ENXIO;
1011 file->f_mapping = bdev->bd_inode->i_mapping;
1012
1013 lock_kernel(); 1006 lock_kernel();
1014 1007
1008 ret = -ENXIO;
1015 disk = get_gendisk(bdev->bd_dev, &partno); 1009 disk = get_gendisk(bdev->bd_dev, &partno);
1016 if (!disk) 1010 if (!disk)
1017 goto out_unlock_kernel; 1011 goto out_unlock_kernel;
1018 part = disk_get_part(disk, partno);
1019 if (!part)
1020 goto out_unlock_kernel;
1021 1012
1022 mutex_lock_nested(&bdev->bd_mutex, for_part); 1013 mutex_lock_nested(&bdev->bd_mutex, for_part);
1023 if (!bdev->bd_openers) { 1014 if (!bdev->bd_openers) {
1024 bdev->bd_disk = disk; 1015 bdev->bd_disk = disk;
1025 bdev->bd_part = part;
1026 bdev->bd_contains = bdev; 1016 bdev->bd_contains = bdev;
1027 if (!partno) { 1017 if (!partno) {
1028 struct backing_dev_info *bdi; 1018 struct backing_dev_info *bdi;
1019
1020 ret = -ENXIO;
1021 bdev->bd_part = disk_get_part(disk, partno);
1022 if (!bdev->bd_part)
1023 goto out_clear;
1024
1029 if (disk->fops->open) { 1025 if (disk->fops->open) {
1030 ret = disk->fops->open(bdev->bd_inode, file); 1026 ret = disk->fops->open(bdev, mode);
1031 if (ret) 1027 if (ret)
1032 goto out_clear; 1028 goto out_clear;
1033 } 1029 }
@@ -1047,28 +1043,27 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
1047 if (!whole) 1043 if (!whole)
1048 goto out_clear; 1044 goto out_clear;
1049 BUG_ON(for_part); 1045 BUG_ON(for_part);
1050 ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); 1046 ret = __blkdev_get(whole, mode, 1);
1051 if (ret) 1047 if (ret)
1052 goto out_clear; 1048 goto out_clear;
1053 bdev->bd_contains = whole; 1049 bdev->bd_contains = whole;
1054 bdev->bd_inode->i_data.backing_dev_info = 1050 bdev->bd_inode->i_data.backing_dev_info =
1055 whole->bd_inode->i_data.backing_dev_info; 1051 whole->bd_inode->i_data.backing_dev_info;
1052 bdev->bd_part = disk_get_part(disk, partno);
1056 if (!(disk->flags & GENHD_FL_UP) || 1053 if (!(disk->flags & GENHD_FL_UP) ||
1057 !part || !part->nr_sects) { 1054 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1058 ret = -ENXIO; 1055 ret = -ENXIO;
1059 goto out_clear; 1056 goto out_clear;
1060 } 1057 }
1061 bd_set_size(bdev, (loff_t)part->nr_sects << 9); 1058 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1062 } 1059 }
1063 } else { 1060 } else {
1064 disk_put_part(part);
1065 put_disk(disk); 1061 put_disk(disk);
1066 module_put(disk->fops->owner); 1062 module_put(disk->fops->owner);
1067 part = NULL;
1068 disk = NULL; 1063 disk = NULL;
1069 if (bdev->bd_contains == bdev) { 1064 if (bdev->bd_contains == bdev) {
1070 if (bdev->bd_disk->fops->open) { 1065 if (bdev->bd_disk->fops->open) {
1071 ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); 1066 ret = bdev->bd_disk->fops->open(bdev, mode);
1072 if (ret) 1067 if (ret)
1073 goto out_unlock_bdev; 1068 goto out_unlock_bdev;
1074 } 1069 }
@@ -1084,18 +1079,18 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
1084 return 0; 1079 return 0;
1085 1080
1086 out_clear: 1081 out_clear:
1082 disk_put_part(bdev->bd_part);
1087 bdev->bd_disk = NULL; 1083 bdev->bd_disk = NULL;
1088 bdev->bd_part = NULL; 1084 bdev->bd_part = NULL;
1089 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1085 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
1090 if (bdev != bdev->bd_contains) 1086 if (bdev != bdev->bd_contains)
1091 __blkdev_put(bdev->bd_contains, 1); 1087 __blkdev_put(bdev->bd_contains, mode, 1);
1092 bdev->bd_contains = NULL; 1088 bdev->bd_contains = NULL;
1093 out_unlock_bdev: 1089 out_unlock_bdev:
1094 mutex_unlock(&bdev->bd_mutex); 1090 mutex_unlock(&bdev->bd_mutex);
1095 out_unlock_kernel: 1091 out_unlock_kernel:
1096 unlock_kernel(); 1092 unlock_kernel();
1097 1093
1098 disk_put_part(part);
1099 if (disk) 1094 if (disk)
1100 module_put(disk->fops->owner); 1095 module_put(disk->fops->owner);
1101 put_disk(disk); 1096 put_disk(disk);
@@ -1104,28 +1099,9 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
1104 return ret; 1099 return ret;
1105} 1100}
1106 1101
1107static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, 1102int blkdev_get(struct block_device *bdev, fmode_t mode)
1108 int for_part)
1109{ 1103{
1110 /* 1104 return __blkdev_get(bdev, mode, 0);
1111 * This crockload is due to bad choice of ->open() type.
1112 * It will go away.
1113 * For now, block device ->open() routine must _not_
1114 * examine anything in 'inode' argument except ->i_rdev.
1115 */
1116 struct file fake_file = {};
1117 struct dentry fake_dentry = {};
1118 fake_file.f_mode = mode;
1119 fake_file.f_flags = flags;
1120 fake_file.f_path.dentry = &fake_dentry;
1121 fake_dentry.d_inode = bdev->bd_inode;
1122
1123 return do_open(bdev, &fake_file, for_part);
1124}
1125
1126int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags)
1127{
1128 return __blkdev_get(bdev, mode, flags, 0);
1129} 1105}
1130EXPORT_SYMBOL(blkdev_get); 1106EXPORT_SYMBOL(blkdev_get);
1131 1107
@@ -1142,28 +1118,36 @@ static int blkdev_open(struct inode * inode, struct file * filp)
1142 */ 1118 */
1143 filp->f_flags |= O_LARGEFILE; 1119 filp->f_flags |= O_LARGEFILE;
1144 1120
1121 if (filp->f_flags & O_NDELAY)
1122 filp->f_mode |= FMODE_NDELAY;
1123 if (filp->f_flags & O_EXCL)
1124 filp->f_mode |= FMODE_EXCL;
1125 if ((filp->f_flags & O_ACCMODE) == 3)
1126 filp->f_mode |= FMODE_WRITE_IOCTL;
1127
1145 bdev = bd_acquire(inode); 1128 bdev = bd_acquire(inode);
1146 if (bdev == NULL) 1129 if (bdev == NULL)
1147 return -ENOMEM; 1130 return -ENOMEM;
1148 1131
1149 res = do_open(bdev, filp, 0); 1132 filp->f_mapping = bdev->bd_inode->i_mapping;
1133
1134 res = blkdev_get(bdev, filp->f_mode);
1150 if (res) 1135 if (res)
1151 return res; 1136 return res;
1152 1137
1153 if (!(filp->f_flags & O_EXCL) ) 1138 if (!(filp->f_mode & FMODE_EXCL))
1154 return 0; 1139 return 0;
1155 1140
1156 if (!(res = bd_claim(bdev, filp))) 1141 if (!(res = bd_claim(bdev, filp)))
1157 return 0; 1142 return 0;
1158 1143
1159 blkdev_put(bdev); 1144 blkdev_put(bdev, filp->f_mode);
1160 return res; 1145 return res;
1161} 1146}
1162 1147
1163static int __blkdev_put(struct block_device *bdev, int for_part) 1148static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1164{ 1149{
1165 int ret = 0; 1150 int ret = 0;
1166 struct inode *bd_inode = bdev->bd_inode;
1167 struct gendisk *disk = bdev->bd_disk; 1151 struct gendisk *disk = bdev->bd_disk;
1168 struct block_device *victim = NULL; 1152 struct block_device *victim = NULL;
1169 1153
@@ -1178,7 +1162,7 @@ static int __blkdev_put(struct block_device *bdev, int for_part)
1178 } 1162 }
1179 if (bdev->bd_contains == bdev) { 1163 if (bdev->bd_contains == bdev) {
1180 if (disk->fops->release) 1164 if (disk->fops->release)
1181 ret = disk->fops->release(bd_inode, NULL); 1165 ret = disk->fops->release(disk, mode);
1182 } 1166 }
1183 if (!bdev->bd_openers) { 1167 if (!bdev->bd_openers) {
1184 struct module *owner = disk->fops->owner; 1168 struct module *owner = disk->fops->owner;
@@ -1197,13 +1181,13 @@ static int __blkdev_put(struct block_device *bdev, int for_part)
1197 mutex_unlock(&bdev->bd_mutex); 1181 mutex_unlock(&bdev->bd_mutex);
1198 bdput(bdev); 1182 bdput(bdev);
1199 if (victim) 1183 if (victim)
1200 __blkdev_put(victim, 1); 1184 __blkdev_put(victim, mode, 1);
1201 return ret; 1185 return ret;
1202} 1186}
1203 1187
1204int blkdev_put(struct block_device *bdev) 1188int blkdev_put(struct block_device *bdev, fmode_t mode)
1205{ 1189{
1206 return __blkdev_put(bdev, 0); 1190 return __blkdev_put(bdev, mode, 0);
1207} 1191}
1208EXPORT_SYMBOL(blkdev_put); 1192EXPORT_SYMBOL(blkdev_put);
1209 1193
@@ -1212,12 +1196,16 @@ static int blkdev_close(struct inode * inode, struct file * filp)
1212 struct block_device *bdev = I_BDEV(filp->f_mapping->host); 1196 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
1213 if (bdev->bd_holder == filp) 1197 if (bdev->bd_holder == filp)
1214 bd_release(bdev); 1198 bd_release(bdev);
1215 return blkdev_put(bdev); 1199 return blkdev_put(bdev, filp->f_mode);
1216} 1200}
1217 1201
1218static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) 1202static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1219{ 1203{
1220 return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); 1204 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1205 fmode_t mode = file->f_mode;
1206 if (file->f_flags & O_NDELAY)
1207 mode |= FMODE_NDELAY_NOW;
1208 return blkdev_ioctl(bdev, mode, cmd, arg);
1221} 1209}
1222 1210
1223static const struct address_space_operations def_blk_aops = { 1211static const struct address_space_operations def_blk_aops = {
@@ -1253,7 +1241,7 @@ int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
1253 int res; 1241 int res;
1254 mm_segment_t old_fs = get_fs(); 1242 mm_segment_t old_fs = get_fs();
1255 set_fs(KERNEL_DS); 1243 set_fs(KERNEL_DS);
1256 res = blkdev_ioctl(bdev->bd_inode, NULL, cmd, arg); 1244 res = blkdev_ioctl(bdev, 0, cmd, arg);
1257 set_fs(old_fs); 1245 set_fs(old_fs);
1258 return res; 1246 return res;
1259} 1247}
@@ -1268,33 +1256,33 @@ EXPORT_SYMBOL(ioctl_by_bdev);
1268 * namespace if possible and return it. Return ERR_PTR(error) 1256 * namespace if possible and return it. Return ERR_PTR(error)
1269 * otherwise. 1257 * otherwise.
1270 */ 1258 */
1271struct block_device *lookup_bdev(const char *path) 1259struct block_device *lookup_bdev(const char *pathname)
1272{ 1260{
1273 struct block_device *bdev; 1261 struct block_device *bdev;
1274 struct inode *inode; 1262 struct inode *inode;
1275 struct nameidata nd; 1263 struct path path;
1276 int error; 1264 int error;
1277 1265
1278 if (!path || !*path) 1266 if (!pathname || !*pathname)
1279 return ERR_PTR(-EINVAL); 1267 return ERR_PTR(-EINVAL);
1280 1268
1281 error = path_lookup(path, LOOKUP_FOLLOW, &nd); 1269 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
1282 if (error) 1270 if (error)
1283 return ERR_PTR(error); 1271 return ERR_PTR(error);
1284 1272
1285 inode = nd.path.dentry->d_inode; 1273 inode = path.dentry->d_inode;
1286 error = -ENOTBLK; 1274 error = -ENOTBLK;
1287 if (!S_ISBLK(inode->i_mode)) 1275 if (!S_ISBLK(inode->i_mode))
1288 goto fail; 1276 goto fail;
1289 error = -EACCES; 1277 error = -EACCES;
1290 if (nd.path.mnt->mnt_flags & MNT_NODEV) 1278 if (path.mnt->mnt_flags & MNT_NODEV)
1291 goto fail; 1279 goto fail;
1292 error = -ENOMEM; 1280 error = -ENOMEM;
1293 bdev = bd_acquire(inode); 1281 bdev = bd_acquire(inode);
1294 if (!bdev) 1282 if (!bdev)
1295 goto fail; 1283 goto fail;
1296out: 1284out:
1297 path_put(&nd.path); 1285 path_put(&path);
1298 return bdev; 1286 return bdev;
1299fail: 1287fail:
1300 bdev = ERR_PTR(error); 1288 bdev = ERR_PTR(error);
@@ -1303,32 +1291,29 @@ fail:
1303EXPORT_SYMBOL(lookup_bdev); 1291EXPORT_SYMBOL(lookup_bdev);
1304 1292
1305/** 1293/**
1306 * open_bdev_excl - open a block device by name and set it up for use 1294 * open_bdev_exclusive - open a block device by name and set it up for use
1307 * 1295 *
1308 * @path: special file representing the block device 1296 * @path: special file representing the block device
1309 * @flags: %MS_RDONLY for opening read-only 1297 * @mode: FMODE_... combination to pass be used
1310 * @holder: owner for exclusion 1298 * @holder: owner for exclusion
1311 * 1299 *
1312 * Open the blockdevice described by the special file at @path, claim it 1300 * Open the blockdevice described by the special file at @path, claim it
1313 * for the @holder. 1301 * for the @holder.
1314 */ 1302 */
1315struct block_device *open_bdev_excl(const char *path, int flags, void *holder) 1303struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder)
1316{ 1304{
1317 struct block_device *bdev; 1305 struct block_device *bdev;
1318 mode_t mode = FMODE_READ;
1319 int error = 0; 1306 int error = 0;
1320 1307
1321 bdev = lookup_bdev(path); 1308 bdev = lookup_bdev(path);
1322 if (IS_ERR(bdev)) 1309 if (IS_ERR(bdev))
1323 return bdev; 1310 return bdev;
1324 1311
1325 if (!(flags & MS_RDONLY)) 1312 error = blkdev_get(bdev, mode);
1326 mode |= FMODE_WRITE;
1327 error = blkdev_get(bdev, mode, 0);
1328 if (error) 1313 if (error)
1329 return ERR_PTR(error); 1314 return ERR_PTR(error);
1330 error = -EACCES; 1315 error = -EACCES;
1331 if (!(flags & MS_RDONLY) && bdev_read_only(bdev)) 1316 if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
1332 goto blkdev_put; 1317 goto blkdev_put;
1333 error = bd_claim(bdev, holder); 1318 error = bd_claim(bdev, holder);
1334 if (error) 1319 if (error)
@@ -1337,26 +1322,27 @@ struct block_device *open_bdev_excl(const char *path, int flags, void *holder)
1337 return bdev; 1322 return bdev;
1338 1323
1339blkdev_put: 1324blkdev_put:
1340 blkdev_put(bdev); 1325 blkdev_put(bdev, mode);
1341 return ERR_PTR(error); 1326 return ERR_PTR(error);
1342} 1327}
1343 1328
1344EXPORT_SYMBOL(open_bdev_excl); 1329EXPORT_SYMBOL(open_bdev_exclusive);
1345 1330
1346/** 1331/**
1347 * close_bdev_excl - release a blockdevice openen by open_bdev_excl() 1332 * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive()
1348 * 1333 *
1349 * @bdev: blockdevice to close 1334 * @bdev: blockdevice to close
1335 * @mode: mode, must match that used to open.
1350 * 1336 *
1351 * This is the counterpart to open_bdev_excl(). 1337 * This is the counterpart to open_bdev_exclusive().
1352 */ 1338 */
1353void close_bdev_excl(struct block_device *bdev) 1339void close_bdev_exclusive(struct block_device *bdev, fmode_t mode)
1354{ 1340{
1355 bd_release(bdev); 1341 bd_release(bdev);
1356 blkdev_put(bdev); 1342 blkdev_put(bdev, mode);
1357} 1343}
1358 1344
1359EXPORT_SYMBOL(close_bdev_excl); 1345EXPORT_SYMBOL(close_bdev_exclusive);
1360 1346
1361int __invalidate_device(struct block_device *bdev) 1347int __invalidate_device(struct block_device *bdev)
1362{ 1348{
diff --git a/fs/buffer.c b/fs/buffer.c
index ac78d4c19b3b..6569fda5cfed 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -76,8 +76,7 @@ EXPORT_SYMBOL(__lock_buffer);
76 76
77void unlock_buffer(struct buffer_head *bh) 77void unlock_buffer(struct buffer_head *bh)
78{ 78{
79 smp_mb__before_clear_bit(); 79 clear_bit_unlock(BH_Lock, &bh->b_state);
80 clear_buffer_locked(bh);
81 smp_mb__after_clear_bit(); 80 smp_mb__after_clear_bit();
82 wake_up_bit(&bh->b_state, BH_Lock); 81 wake_up_bit(&bh->b_state, BH_Lock);
83} 82}
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 262fa10e213d..700697a72618 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -386,15 +386,22 @@ static int chrdev_open(struct inode *inode, struct file *filp)
386 cdev_put(new); 386 cdev_put(new);
387 if (ret) 387 if (ret)
388 return ret; 388 return ret;
389
390 ret = -ENXIO;
389 filp->f_op = fops_get(p->ops); 391 filp->f_op = fops_get(p->ops);
390 if (!filp->f_op) { 392 if (!filp->f_op)
391 cdev_put(p); 393 goto out_cdev_put;
392 return -ENXIO; 394
393 } 395 if (filp->f_op->open) {
394 if (filp->f_op->open)
395 ret = filp->f_op->open(inode,filp); 396 ret = filp->f_op->open(inode,filp);
396 if (ret) 397 if (ret)
397 cdev_put(p); 398 goto out_cdev_put;
399 }
400
401 return 0;
402
403 out_cdev_put:
404 cdev_put(p);
398 return ret; 405 return ret;
399} 406}
400 407
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 06e521a945c3..8855331b2fba 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,15 @@
1Version 1.55
2------------
3Various fixes to make delete of open files behavior more predictable
4(when delete of an open file fails we mark the file as "delete-on-close"
5in a way that more servers accept, but only if we can first rename the
6file to a temporary name). Add experimental support for more safely
7handling fcntl(F_SETLEASE). Convert cifs to using blocking tcp
8sends, and also let tcp autotune the socket send and receive buffers.
9This reduces the number of EAGAIN errors returned by TCP/IP in
10high stress workloads (and the number of retries on socket writes
11when sending large SMBWriteX requests).
12
1Version 1.54 13Version 1.54
2------------ 14------------
3Fix premature write failure on congested networks (we would give up 15Fix premature write failure on congested networks (we would give up
@@ -13,6 +25,7 @@ on dns_upcall (resolving DFS referralls). Fix plain text password
13authentication (requires setting SecurityFlags to 0x30030 to enable 25authentication (requires setting SecurityFlags to 0x30030 to enable
14lanman and plain text though). Fix writes to be at correct offset when 26lanman and plain text though). Fix writes to be at correct offset when
15file is open with O_APPEND and file is on a directio (forcediretio) mount. 27file is open with O_APPEND and file is on a directio (forcediretio) mount.
28Fix bug in rewinding readdir directory searches. Add nodfs mount option.
16 29
17Version 1.53 30Version 1.53
18------------ 31------------
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
new file mode 100644
index 000000000000..341a98965bd0
--- /dev/null
+++ b/fs/cifs/Kconfig
@@ -0,0 +1,142 @@
1config CIFS
2 tristate "CIFS support (advanced network filesystem, SMBFS successor)"
3 depends on INET
4 select NLS
5 help
6 This is the client VFS module for the Common Internet File System
7 (CIFS) protocol which is the successor to the Server Message Block
8 (SMB) protocol, the native file sharing mechanism for most early
9 PC operating systems. The CIFS protocol is fully supported by
10 file servers such as Windows 2000 (including Windows 2003, NT 4
11 and Windows XP) as well by Samba (which provides excellent CIFS
12 server support for Linux and many other operating systems). Limited
13 support for OS/2 and Windows ME and similar servers is provided as
14 well.
15
16 The cifs module provides an advanced network file system
17 client for mounting to CIFS compliant servers. It includes
18 support for DFS (hierarchical name space), secure per-user
19 session establishment via Kerberos or NTLM or NTLMv2,
20 safe distributed caching (oplock), optional packet
21 signing, Unicode and other internationalization improvements.
22 If you need to mount to Samba or Windows from this machine, say Y.
23
24config CIFS_STATS
25 bool "CIFS statistics"
26 depends on CIFS
27 help
28 Enabling this option will cause statistics for each server share
29 mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
30
31config CIFS_STATS2
32 bool "Extended statistics"
33 depends on CIFS_STATS
34 help
35 Enabling this option will allow more detailed statistics on SMB
36 request timing to be displayed in /proc/fs/cifs/DebugData and also
37 allow optional logging of slow responses to dmesg (depending on the
38 value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details).
39 These additional statistics may have a minor effect on performance
40 and memory utilization.
41
42 Unless you are a developer or are doing network performance analysis
43 or tuning, say N.
44
45config CIFS_WEAK_PW_HASH
46 bool "Support legacy servers which use weaker LANMAN security"
47 depends on CIFS
48 help
49 Modern CIFS servers including Samba and most Windows versions
50 (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
51 security mechanisms. These hash the password more securely
52 than the mechanisms used in the older LANMAN version of the
53 SMB protocol but LANMAN based authentication is needed to
54 establish sessions with some old SMB servers.
55
56 Enabling this option allows the cifs module to mount to older
57 LANMAN based servers such as OS/2 and Windows 95, but such
58 mounts may be less secure than mounts using NTLM or more recent
59 security mechanisms if you are on a public network. Unless you
60 have a need to access old SMB servers (and are on a private
61 network) you probably want to say N. Even if this support
62 is enabled in the kernel build, LANMAN authentication will not be
63 used automatically. At runtime LANMAN mounts are disabled but
64 can be set to required (or optional) either in
65 /proc/fs/cifs (see fs/cifs/README for more detail) or via an
66 option on the mount command. This support is disabled by
67 default in order to reduce the possibility of a downgrade
68 attack.
69
70 If unsure, say N.
71
72config CIFS_UPCALL
73 bool "Kerberos/SPNEGO advanced session setup"
74 depends on CIFS && KEYS
75 help
76 Enables an upcall mechanism for CIFS which accesses
77 userspace helper utilities to provide SPNEGO packaged (RFC 4178)
78 Kerberos tickets which are needed to mount to certain secure servers
79 (for which more secure Kerberos authentication is required). If
80 unsure, say N.
81
82config CIFS_XATTR
83 bool "CIFS extended attributes"
84 depends on CIFS
85 help
86 Extended attributes are name:value pairs associated with inodes by
87 the kernel or by users (see the attr(5) manual page, or visit
88 <http://acl.bestbits.at/> for details). CIFS maps the name of
89 extended attributes beginning with the user namespace prefix
90 to SMB/CIFS EAs. EAs are stored on Windows servers without the
91 user namespace prefix, but their names are seen by Linux cifs clients
92 prefaced by the user namespace prefix. The system namespace
93 (used by some filesystems to store ACLs) is not supported at
94 this time.
95
96 If unsure, say N.
97
98config CIFS_POSIX
99 bool "CIFS POSIX Extensions"
100 depends on CIFS_XATTR
101 help
102 Enabling this option will cause the cifs client to attempt to
103 negotiate a newer dialect with servers, such as Samba 3.0.5
104 or later, that optionally can handle more POSIX like (rather
105 than Windows like) file behavior. It also enables
106 support for POSIX ACLs (getfacl and setfacl) to servers
107 (such as Samba 3.10 and later) which can negotiate
108 CIFS POSIX ACL support. If unsure, say N.
109
110config CIFS_DEBUG2
111 bool "Enable additional CIFS debugging routines"
112 depends on CIFS
113 help
114 Enabling this option adds a few more debugging routines
115 to the cifs code which slightly increases the size of
116 the cifs module and can cause additional logging of debug
117 messages in some error paths, slowing performance. This
118 option can be turned off unless you are debugging
119 cifs problems. If unsure, say N.
120
121config CIFS_EXPERIMENTAL
122 bool "CIFS Experimental Features (EXPERIMENTAL)"
123 depends on CIFS && EXPERIMENTAL
124 help
125 Enables cifs features under testing. These features are
126 experimental and currently include DFS support and directory
127 change notification ie fcntl(F_DNOTIFY), as well as the upcall
128 mechanism which will be used for Kerberos session negotiation
129 and uid remapping. Some of these features also may depend on
130 setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
131 (which is disabled by default). See the file fs/cifs/README
132 for more details. If unsure, say N.
133
134config CIFS_DFS_UPCALL
135 bool "DFS feature support (EXPERIMENTAL)"
136 depends on CIFS_EXPERIMENTAL
137 depends on KEYS
138 help
139 Enables an upcall mechanism for CIFS which contacts userspace
140 helper utilities to provide server name resolution (host names to
141 IP addresses) which is needed for implicit mounts of DFS junction
142 points. If unsure, say N.
diff --git a/fs/cifs/README b/fs/cifs/README
index bd2343d4c6a6..a439dc1739b3 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -463,6 +463,9 @@ A partial list of the supported mount options follows:
463 with cifs style mandatory byte range locks (and most 463 with cifs style mandatory byte range locks (and most
464 cifs servers do not yet support requesting advisory 464 cifs servers do not yet support requesting advisory
465 byte range locks). 465 byte range locks).
466 nodfs Disable DFS (global name space support) even if the
467 server claims to support it. This can help work around
468 a problem with parsing of DFS paths with Samba 3.0.24 server.
466 remount remount the share (often used to change from ro to rw mounts 469 remount remount the share (often used to change from ro to rw mounts
467 or vice versa) 470 or vice versa)
468 cifsacl Report mode bits (e.g. on stat) based on the Windows ACL for 471 cifsacl Report mode bits (e.g. on stat) based on the Windows ACL for
@@ -488,6 +491,19 @@ A partial list of the supported mount options follows:
488 Note that this differs from the sign mount option in that it 491 Note that this differs from the sign mount option in that it
489 causes encryption of data sent over this mounted share but other 492 causes encryption of data sent over this mounted share but other
490 shares mounted to the same server are unaffected. 493 shares mounted to the same server are unaffected.
494 locallease This option is rarely needed. Fcntl F_SETLEASE is
495 used by some applications such as Samba and NFSv4 server to
496 check to see whether a file is cacheable. CIFS has no way
497 to explicitly request a lease, but can check whether a file
498 is cacheable (oplocked). Unfortunately, even if a file
499 is not oplocked, it could still be cacheable (ie cifs client
500 could grant fcntl leases if no other local processes are using
501 the file) for cases for example such as when the server does not
502 support oplocks and the user is sure that the only updates to
503 the file will be from this client. Specifying this mount option
504 will allow the cifs client to check for leases (only) locally
505 for files which are not oplocked instead of denying leases
506 in that case. (EXPERIMENTAL)
491 sec Security mode. Allowed values are: 507 sec Security mode. Allowed values are:
492 none attempt to connection as a null user (no name) 508 none attempt to connection as a null user (no name)
493 krb5 Use Kerberos version 5 authentication 509 krb5 Use Kerberos version 5 authentication
@@ -638,6 +654,9 @@ requires enabling CONFIG_CIFS_EXPERIMENTAL
638 cifsacl support needed to retrieve approximated mode bits based on 654 cifsacl support needed to retrieve approximated mode bits based on
639 the contents on the CIFS ACL. 655 the contents on the CIFS ACL.
640 656
657 lease support: cifs will check the oplock state before calling into
658 the vfs to see if we can grant a lease on a file.
659
641 DNOTIFY fcntl: needed for support of directory change 660 DNOTIFY fcntl: needed for support of directory change
642 notification and perhaps later for file leases) 661 notification and perhaps later for file leases)
643 662
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 25ecbd5b0404..ac5915d61dca 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -275,9 +275,12 @@ static int cifs_permission(struct inode *inode, int mask)
275 275
276 cifs_sb = CIFS_SB(inode->i_sb); 276 cifs_sb = CIFS_SB(inode->i_sb);
277 277
278 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) 278 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) {
279 return 0; 279 if ((mask & MAY_EXEC) && !execute_ok(inode))
280 else /* file mode might have been restricted at mount time 280 return -EACCES;
281 else
282 return 0;
283 } else /* file mode might have been restricted at mount time
281 on the client (above and beyond ACL on servers) for 284 on the client (above and beyond ACL on servers) for
282 servers which do not support setting and viewing mode bits, 285 servers which do not support setting and viewing mode bits,
283 so allowing client to check permissions is useful */ 286 so allowing client to check permissions is useful */
@@ -309,6 +312,7 @@ cifs_alloc_inode(struct super_block *sb)
309 file data or metadata */ 312 file data or metadata */
310 cifs_inode->clientCanCacheRead = false; 313 cifs_inode->clientCanCacheRead = false;
311 cifs_inode->clientCanCacheAll = false; 314 cifs_inode->clientCanCacheAll = false;
315 cifs_inode->delete_pending = false;
312 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ 316 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
313 317
314 /* Can not set i_flags here - they get immediately overwritten 318 /* Can not set i_flags here - they get immediately overwritten
@@ -617,6 +621,37 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
617 return generic_file_llseek_unlocked(file, offset, origin); 621 return generic_file_llseek_unlocked(file, offset, origin);
618} 622}
619 623
624#ifdef CONFIG_CIFS_EXPERIMENTAL
625static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
626{
627 /* note that this is called by vfs setlease with the BKL held
628 although I doubt that BKL is needed here in cifs */
629 struct inode *inode = file->f_path.dentry->d_inode;
630
631 if (!(S_ISREG(inode->i_mode)))
632 return -EINVAL;
633
634 /* check if file is oplocked */
635 if (((arg == F_RDLCK) &&
636 (CIFS_I(inode)->clientCanCacheRead)) ||
637 ((arg == F_WRLCK) &&
638 (CIFS_I(inode)->clientCanCacheAll)))
639 return generic_setlease(file, arg, lease);
640 else if (CIFS_SB(inode->i_sb)->tcon->local_lease &&
641 !CIFS_I(inode)->clientCanCacheRead)
642 /* If the server claims to support oplock on this
643 file, then we still need to check oplock even
644 if the local_lease mount option is set, but there
645 are servers which do not support oplock for which
646 this mount option may be useful if the user
647 knows that the file won't be changed on the server
648 by anyone else */
649 return generic_setlease(file, arg, lease);
650 else
651 return -EAGAIN;
652}
653#endif
654
620struct file_system_type cifs_fs_type = { 655struct file_system_type cifs_fs_type = {
621 .owner = THIS_MODULE, 656 .owner = THIS_MODULE,
622 .name = "cifs", 657 .name = "cifs",
@@ -695,6 +730,7 @@ const struct file_operations cifs_file_ops = {
695 730
696#ifdef CONFIG_CIFS_EXPERIMENTAL 731#ifdef CONFIG_CIFS_EXPERIMENTAL
697 .dir_notify = cifs_dir_notify, 732 .dir_notify = cifs_dir_notify,
733 .setlease = cifs_setlease,
698#endif /* CONFIG_CIFS_EXPERIMENTAL */ 734#endif /* CONFIG_CIFS_EXPERIMENTAL */
699}; 735};
700 736
@@ -715,6 +751,7 @@ const struct file_operations cifs_file_direct_ops = {
715 .llseek = cifs_llseek, 751 .llseek = cifs_llseek,
716#ifdef CONFIG_CIFS_EXPERIMENTAL 752#ifdef CONFIG_CIFS_EXPERIMENTAL
717 .dir_notify = cifs_dir_notify, 753 .dir_notify = cifs_dir_notify,
754 .setlease = cifs_setlease,
718#endif /* CONFIG_CIFS_EXPERIMENTAL */ 755#endif /* CONFIG_CIFS_EXPERIMENTAL */
719}; 756};
720const struct file_operations cifs_file_nobrl_ops = { 757const struct file_operations cifs_file_nobrl_ops = {
@@ -735,6 +772,7 @@ const struct file_operations cifs_file_nobrl_ops = {
735 772
736#ifdef CONFIG_CIFS_EXPERIMENTAL 773#ifdef CONFIG_CIFS_EXPERIMENTAL
737 .dir_notify = cifs_dir_notify, 774 .dir_notify = cifs_dir_notify,
775 .setlease = cifs_setlease,
738#endif /* CONFIG_CIFS_EXPERIMENTAL */ 776#endif /* CONFIG_CIFS_EXPERIMENTAL */
739}; 777};
740 778
@@ -754,6 +792,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
754 .llseek = cifs_llseek, 792 .llseek = cifs_llseek,
755#ifdef CONFIG_CIFS_EXPERIMENTAL 793#ifdef CONFIG_CIFS_EXPERIMENTAL
756 .dir_notify = cifs_dir_notify, 794 .dir_notify = cifs_dir_notify,
795 .setlease = cifs_setlease,
757#endif /* CONFIG_CIFS_EXPERIMENTAL */ 796#endif /* CONFIG_CIFS_EXPERIMENTAL */
758}; 797};
759 798
@@ -765,6 +804,7 @@ const struct file_operations cifs_dir_ops = {
765 .dir_notify = cifs_dir_notify, 804 .dir_notify = cifs_dir_notify,
766#endif /* CONFIG_CIFS_EXPERIMENTAL */ 805#endif /* CONFIG_CIFS_EXPERIMENTAL */
767 .unlocked_ioctl = cifs_ioctl, 806 .unlocked_ioctl = cifs_ioctl,
807 .llseek = generic_file_llseek,
768}; 808};
769 809
770static void 810static void
@@ -945,6 +985,12 @@ static int cifs_oplock_thread(void *dummyarg)
945 the call */ 985 the call */
946 /* mutex_lock(&inode->i_mutex);*/ 986 /* mutex_lock(&inode->i_mutex);*/
947 if (S_ISREG(inode->i_mode)) { 987 if (S_ISREG(inode->i_mode)) {
988#ifdef CONFIG_CIFS_EXPERIMENTAL
989 if (CIFS_I(inode)->clientCanCacheAll == 0)
990 break_lease(inode, FMODE_READ);
991 else if (CIFS_I(inode)->clientCanCacheRead == 0)
992 break_lease(inode, FMODE_WRITE);
993#endif
948 rc = filemap_fdatawrite(inode->i_mapping); 994 rc = filemap_fdatawrite(inode->i_mapping);
949 if (CIFS_I(inode)->clientCanCacheRead == 0) { 995 if (CIFS_I(inode)->clientCanCacheRead == 0) {
950 waitrc = filemap_fdatawait( 996 waitrc = filemap_fdatawait(
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index f7b4a5cd837b..074de0b5064d 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -101,5 +101,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
101extern const struct export_operations cifs_export_ops; 101extern const struct export_operations cifs_export_ops;
102#endif /* EXPERIMENTAL */ 102#endif /* EXPERIMENTAL */
103 103
104#define CIFS_VERSION "1.54" 104#define CIFS_VERSION "1.55"
105#endif /* _CIFSFS_H */ 105#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 0d22479d99b7..1cb1189f24e0 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -141,6 +141,8 @@ struct TCP_Server_Info {
141 char versionMajor; 141 char versionMajor;
142 char versionMinor; 142 char versionMinor;
143 bool svlocal:1; /* local server or remote */ 143 bool svlocal:1; /* local server or remote */
144 bool noblocksnd; /* use blocking sendmsg */
145 bool noautotune; /* do not autotune send buf sizes */
144 atomic_t socketUseCount; /* number of open cifs sessions on socket */ 146 atomic_t socketUseCount; /* number of open cifs sessions on socket */
145 atomic_t inFlight; /* number of requests on the wire to server */ 147 atomic_t inFlight; /* number of requests on the wire to server */
146#ifdef CONFIG_CIFS_STATS2 148#ifdef CONFIG_CIFS_STATS2
@@ -285,6 +287,7 @@ struct cifsTconInfo {
285 bool seal:1; /* transport encryption for this mounted share */ 287 bool seal:1; /* transport encryption for this mounted share */
286 bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol 288 bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol
287 for this mount even if server would support */ 289 for this mount even if server would support */
290 bool local_lease:1; /* check leases (only) on local system not remote */
288 /* BB add field for back pointer to sb struct(s)? */ 291 /* BB add field for back pointer to sb struct(s)? */
289}; 292};
290 293
@@ -353,6 +356,7 @@ struct cifsInodeInfo {
353 bool clientCanCacheRead:1; /* read oplock */ 356 bool clientCanCacheRead:1; /* read oplock */
354 bool clientCanCacheAll:1; /* read and writebehind oplock */ 357 bool clientCanCacheAll:1; /* read and writebehind oplock */
355 bool oplockPending:1; 358 bool oplockPending:1;
359 bool delete_pending:1; /* DELETE_ON_CLOSE is set */
356 struct inode vfs_inode; 360 struct inode vfs_inode;
357}; 361};
358 362
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 0cff7fe986e8..6f21ecb85ce5 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -36,7 +36,7 @@ extern void cifs_buf_release(void *);
36extern struct smb_hdr *cifs_small_buf_get(void); 36extern struct smb_hdr *cifs_small_buf_get(void);
37extern void cifs_small_buf_release(void *); 37extern void cifs_small_buf_release(void *);
38extern int smb_send(struct socket *, struct smb_hdr *, 38extern int smb_send(struct socket *, struct smb_hdr *,
39 unsigned int /* length */ , struct sockaddr *); 39 unsigned int /* length */ , struct sockaddr *, bool);
40extern unsigned int _GetXid(void); 40extern unsigned int _GetXid(void);
41extern void _FreeXid(unsigned int); 41extern void _FreeXid(unsigned int);
42#define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current->fsuid)); 42#define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current->fsuid));
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 6f4ffe15d68d..d5eac48fc415 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1309,6 +1309,7 @@ OldOpenRetry:
1309 cpu_to_le64(le32_to_cpu(pSMBr->EndOfFile)); 1309 cpu_to_le64(le32_to_cpu(pSMBr->EndOfFile));
1310 pfile_info->EndOfFile = pfile_info->AllocationSize; 1310 pfile_info->EndOfFile = pfile_info->AllocationSize;
1311 pfile_info->NumberOfLinks = cpu_to_le32(1); 1311 pfile_info->NumberOfLinks = cpu_to_le32(1);
1312 pfile_info->DeletePending = 0;
1312 } 1313 }
1313 } 1314 }
1314 1315
@@ -1410,6 +1411,7 @@ openRetry:
1410 pfile_info->AllocationSize = pSMBr->AllocationSize; 1411 pfile_info->AllocationSize = pSMBr->AllocationSize;
1411 pfile_info->EndOfFile = pSMBr->EndOfFile; 1412 pfile_info->EndOfFile = pSMBr->EndOfFile;
1412 pfile_info->NumberOfLinks = cpu_to_le32(1); 1413 pfile_info->NumberOfLinks = cpu_to_le32(1);
1414 pfile_info->DeletePending = 0;
1413 } 1415 }
1414 } 1416 }
1415 1417
@@ -1534,7 +1536,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1534 __u32 bytes_sent; 1536 __u32 bytes_sent;
1535 __u16 byte_count; 1537 __u16 byte_count;
1536 1538
1537 /* cFYI(1,("write at %lld %d bytes",offset,count));*/ 1539 /* cFYI(1, ("write at %lld %d bytes", offset, count));*/
1538 if (tcon->ses == NULL) 1540 if (tcon->ses == NULL)
1539 return -ECONNABORTED; 1541 return -ECONNABORTED;
1540 1542
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 4c13bcdb92a5..e9f9248cb3fe 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -90,6 +90,10 @@ struct smb_vol {
90 bool nocase:1; /* request case insensitive filenames */ 90 bool nocase:1; /* request case insensitive filenames */
91 bool nobrl:1; /* disable sending byte range locks to srv */ 91 bool nobrl:1; /* disable sending byte range locks to srv */
92 bool seal:1; /* request transport encryption on share */ 92 bool seal:1; /* request transport encryption on share */
93 bool nodfs:1; /* Do not request DFS, even if available */
94 bool local_lease:1; /* check leases only on local system, not remote */
95 bool noblocksnd:1;
96 bool noautotune:1;
93 unsigned int rsize; 97 unsigned int rsize;
94 unsigned int wsize; 98 unsigned int wsize;
95 unsigned int sockopt; 99 unsigned int sockopt;
@@ -100,9 +104,11 @@ struct smb_vol {
100static int ipv4_connect(struct sockaddr_in *psin_server, 104static int ipv4_connect(struct sockaddr_in *psin_server,
101 struct socket **csocket, 105 struct socket **csocket,
102 char *netb_name, 106 char *netb_name,
103 char *server_netb_name); 107 char *server_netb_name,
108 bool noblocksnd,
109 bool nosndbuf); /* ipv6 never set sndbuf size */
104static int ipv6_connect(struct sockaddr_in6 *psin_server, 110static int ipv6_connect(struct sockaddr_in6 *psin_server,
105 struct socket **csocket); 111 struct socket **csocket, bool noblocksnd);
106 112
107 113
108 /* 114 /*
@@ -124,7 +130,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
124 struct mid_q_entry *mid_entry; 130 struct mid_q_entry *mid_entry;
125 131
126 spin_lock(&GlobalMid_Lock); 132 spin_lock(&GlobalMid_Lock);
127 if (kthread_should_stop()) { 133 if (server->tcpStatus == CifsExiting) {
128 /* the demux thread will exit normally 134 /* the demux thread will exit normally
129 next time through the loop */ 135 next time through the loop */
130 spin_unlock(&GlobalMid_Lock); 136 spin_unlock(&GlobalMid_Lock);
@@ -184,16 +190,18 @@ cifs_reconnect(struct TCP_Server_Info *server)
184 spin_unlock(&GlobalMid_Lock); 190 spin_unlock(&GlobalMid_Lock);
185 up(&server->tcpSem); 191 up(&server->tcpSem);
186 192
187 while ((!kthread_should_stop()) && (server->tcpStatus != CifsGood)) { 193 while ((server->tcpStatus != CifsExiting) &&
194 (server->tcpStatus != CifsGood)) {
188 try_to_freeze(); 195 try_to_freeze();
189 if (server->protocolType == IPV6) { 196 if (server->protocolType == IPV6) {
190 rc = ipv6_connect(&server->addr.sockAddr6, 197 rc = ipv6_connect(&server->addr.sockAddr6,
191 &server->ssocket); 198 &server->ssocket, server->noautotune);
192 } else { 199 } else {
193 rc = ipv4_connect(&server->addr.sockAddr, 200 rc = ipv4_connect(&server->addr.sockAddr,
194 &server->ssocket, 201 &server->ssocket,
195 server->workstation_RFC1001_name, 202 server->workstation_RFC1001_name,
196 server->server_RFC1001_name); 203 server->server_RFC1001_name,
204 server->noblocksnd, server->noautotune);
197 } 205 }
198 if (rc) { 206 if (rc) {
199 cFYI(1, ("reconnect error %d", rc)); 207 cFYI(1, ("reconnect error %d", rc));
@@ -201,7 +209,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
201 } else { 209 } else {
202 atomic_inc(&tcpSesReconnectCount); 210 atomic_inc(&tcpSesReconnectCount);
203 spin_lock(&GlobalMid_Lock); 211 spin_lock(&GlobalMid_Lock);
204 if (!kthread_should_stop()) 212 if (server->tcpStatus != CifsExiting)
205 server->tcpStatus = CifsGood; 213 server->tcpStatus = CifsGood;
206 server->sequence_number = 0; 214 server->sequence_number = 0;
207 spin_unlock(&GlobalMid_Lock); 215 spin_unlock(&GlobalMid_Lock);
@@ -356,7 +364,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
356 GFP_KERNEL); 364 GFP_KERNEL);
357 365
358 set_freezable(); 366 set_freezable();
359 while (!kthread_should_stop()) { 367 while (server->tcpStatus != CifsExiting) {
360 if (try_to_freeze()) 368 if (try_to_freeze())
361 continue; 369 continue;
362 if (bigbuf == NULL) { 370 if (bigbuf == NULL) {
@@ -397,7 +405,7 @@ incomplete_rcv:
397 kernel_recvmsg(csocket, &smb_msg, 405 kernel_recvmsg(csocket, &smb_msg,
398 &iov, 1, pdu_length, 0 /* BB other flags? */); 406 &iov, 1, pdu_length, 0 /* BB other flags? */);
399 407
400 if (kthread_should_stop()) { 408 if (server->tcpStatus == CifsExiting) {
401 break; 409 break;
402 } else if (server->tcpStatus == CifsNeedReconnect) { 410 } else if (server->tcpStatus == CifsNeedReconnect) {
403 cFYI(1, ("Reconnect after server stopped responding")); 411 cFYI(1, ("Reconnect after server stopped responding"));
@@ -522,7 +530,7 @@ incomplete_rcv:
522 total_read += length) { 530 total_read += length) {
523 length = kernel_recvmsg(csocket, &smb_msg, &iov, 1, 531 length = kernel_recvmsg(csocket, &smb_msg, &iov, 1,
524 pdu_length - total_read, 0); 532 pdu_length - total_read, 0);
525 if (kthread_should_stop() || 533 if ((server->tcpStatus == CifsExiting) ||
526 (length == -EINTR)) { 534 (length == -EINTR)) {
527 /* then will exit */ 535 /* then will exit */
528 reconnect = 2; 536 reconnect = 2;
@@ -651,14 +659,6 @@ multi_t2_fnd:
651 spin_unlock(&GlobalMid_Lock); 659 spin_unlock(&GlobalMid_Lock);
652 wake_up_all(&server->response_q); 660 wake_up_all(&server->response_q);
653 661
654 /* don't exit until kthread_stop is called */
655 set_current_state(TASK_UNINTERRUPTIBLE);
656 while (!kthread_should_stop()) {
657 schedule();
658 set_current_state(TASK_UNINTERRUPTIBLE);
659 }
660 set_current_state(TASK_RUNNING);
661
662 /* check if we have blocked requests that need to free */ 662 /* check if we have blocked requests that need to free */
663 /* Note that cifs_max_pending is normally 50, but 663 /* Note that cifs_max_pending is normally 50, but
664 can be set at module install time to as little as two */ 664 can be set at module install time to as little as two */
@@ -755,6 +755,7 @@ multi_t2_fnd:
755 write_unlock(&GlobalSMBSeslock); 755 write_unlock(&GlobalSMBSeslock);
756 756
757 kfree(server->hostname); 757 kfree(server->hostname);
758 task_to_wake = xchg(&server->tsk, NULL);
758 kfree(server); 759 kfree(server);
759 760
760 length = atomic_dec_return(&tcpSesAllocCount); 761 length = atomic_dec_return(&tcpSesAllocCount);
@@ -762,6 +763,16 @@ multi_t2_fnd:
762 mempool_resize(cifs_req_poolp, length + cifs_min_rcv, 763 mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
763 GFP_KERNEL); 764 GFP_KERNEL);
764 765
766 /* if server->tsk was NULL then wait for a signal before exiting */
767 if (!task_to_wake) {
768 set_current_state(TASK_INTERRUPTIBLE);
769 while (!signal_pending(current)) {
770 schedule();
771 set_current_state(TASK_INTERRUPTIBLE);
772 }
773 set_current_state(TASK_RUNNING);
774 }
775
765 return 0; 776 return 0;
766} 777}
767 778
@@ -1186,6 +1197,10 @@ cifs_parse_mount_options(char *options, const char *devname,
1186 /* ignore */ 1197 /* ignore */
1187 } else if (strnicmp(data, "rw", 2) == 0) { 1198 } else if (strnicmp(data, "rw", 2) == 0) {
1188 vol->rw = true; 1199 vol->rw = true;
1200 } else if (strnicmp(data, "noblocksend", 11) == 0) {
1201 vol->noblocksnd = 1;
1202 } else if (strnicmp(data, "noautotune", 10) == 0) {
1203 vol->noautotune = 1;
1189 } else if ((strnicmp(data, "suid", 4) == 0) || 1204 } else if ((strnicmp(data, "suid", 4) == 0) ||
1190 (strnicmp(data, "nosuid", 6) == 0) || 1205 (strnicmp(data, "nosuid", 6) == 0) ||
1191 (strnicmp(data, "exec", 4) == 0) || 1206 (strnicmp(data, "exec", 4) == 0) ||
@@ -1218,6 +1233,8 @@ cifs_parse_mount_options(char *options, const char *devname,
1218 vol->sfu_emul = 1; 1233 vol->sfu_emul = 1;
1219 } else if (strnicmp(data, "nosfu", 5) == 0) { 1234 } else if (strnicmp(data, "nosfu", 5) == 0) {
1220 vol->sfu_emul = 0; 1235 vol->sfu_emul = 0;
1236 } else if (strnicmp(data, "nodfs", 5) == 0) {
1237 vol->nodfs = 1;
1221 } else if (strnicmp(data, "posixpaths", 10) == 0) { 1238 } else if (strnicmp(data, "posixpaths", 10) == 0) {
1222 vol->posix_paths = 1; 1239 vol->posix_paths = 1;
1223 } else if (strnicmp(data, "noposixpaths", 12) == 0) { 1240 } else if (strnicmp(data, "noposixpaths", 12) == 0) {
@@ -1268,6 +1285,10 @@ cifs_parse_mount_options(char *options, const char *devname,
1268 vol->no_psx_acl = 0; 1285 vol->no_psx_acl = 0;
1269 } else if (strnicmp(data, "noacl", 5) == 0) { 1286 } else if (strnicmp(data, "noacl", 5) == 0) {
1270 vol->no_psx_acl = 1; 1287 vol->no_psx_acl = 1;
1288#ifdef CONFIG_CIFS_EXPERIMENTAL
1289 } else if (strnicmp(data, "locallease", 6) == 0) {
1290 vol->local_lease = 1;
1291#endif
1271 } else if (strnicmp(data, "sign", 4) == 0) { 1292 } else if (strnicmp(data, "sign", 4) == 0) {
1272 vol->secFlg |= CIFSSEC_MUST_SIGN; 1293 vol->secFlg |= CIFSSEC_MUST_SIGN;
1273 } else if (strnicmp(data, "seal", 4) == 0) { 1294 } else if (strnicmp(data, "seal", 4) == 0) {
@@ -1506,7 +1527,8 @@ static void rfc1002mangle(char *target, char *source, unsigned int length)
1506 1527
1507static int 1528static int
1508ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, 1529ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1509 char *netbios_name, char *target_name) 1530 char *netbios_name, char *target_name,
1531 bool noblocksnd, bool noautotune)
1510{ 1532{
1511 int rc = 0; 1533 int rc = 0;
1512 int connected = 0; 1534 int connected = 0;
@@ -1578,11 +1600,16 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1578 (*csocket)->sk->sk_sndbuf, 1600 (*csocket)->sk->sk_sndbuf,
1579 (*csocket)->sk->sk_rcvbuf, (*csocket)->sk->sk_rcvtimeo)); 1601 (*csocket)->sk->sk_rcvbuf, (*csocket)->sk->sk_rcvtimeo));
1580 (*csocket)->sk->sk_rcvtimeo = 7 * HZ; 1602 (*csocket)->sk->sk_rcvtimeo = 7 * HZ;
1603 if (!noblocksnd)
1604 (*csocket)->sk->sk_sndtimeo = 3 * HZ;
1605
1581 /* make the bufsizes depend on wsize/rsize and max requests */ 1606 /* make the bufsizes depend on wsize/rsize and max requests */
1582 if ((*csocket)->sk->sk_sndbuf < (200 * 1024)) 1607 if (noautotune) {
1583 (*csocket)->sk->sk_sndbuf = 200 * 1024; 1608 if ((*csocket)->sk->sk_sndbuf < (200 * 1024))
1584 if ((*csocket)->sk->sk_rcvbuf < (140 * 1024)) 1609 (*csocket)->sk->sk_sndbuf = 200 * 1024;
1585 (*csocket)->sk->sk_rcvbuf = 140 * 1024; 1610 if ((*csocket)->sk->sk_rcvbuf < (140 * 1024))
1611 (*csocket)->sk->sk_rcvbuf = 140 * 1024;
1612 }
1586 1613
1587 /* send RFC1001 sessinit */ 1614 /* send RFC1001 sessinit */
1588 if (psin_server->sin_port == htons(RFC1001_PORT)) { 1615 if (psin_server->sin_port == htons(RFC1001_PORT)) {
@@ -1619,7 +1646,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1619 /* sizeof RFC1002_SESSION_REQUEST with no scope */ 1646 /* sizeof RFC1002_SESSION_REQUEST with no scope */
1620 smb_buf->smb_buf_length = 0x81000044; 1647 smb_buf->smb_buf_length = 0x81000044;
1621 rc = smb_send(*csocket, smb_buf, 0x44, 1648 rc = smb_send(*csocket, smb_buf, 0x44,
1622 (struct sockaddr *)psin_server); 1649 (struct sockaddr *)psin_server, noblocksnd);
1623 kfree(ses_init_buf); 1650 kfree(ses_init_buf);
1624 msleep(1); /* RFC1001 layer in at least one server 1651 msleep(1); /* RFC1001 layer in at least one server
1625 requires very short break before negprot 1652 requires very short break before negprot
@@ -1639,7 +1666,8 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1639} 1666}
1640 1667
1641static int 1668static int
1642ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket) 1669ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket,
1670 bool noblocksnd)
1643{ 1671{
1644 int rc = 0; 1672 int rc = 0;
1645 int connected = 0; 1673 int connected = 0;
@@ -1708,6 +1736,9 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket)
1708 the default. sock_setsockopt not used because it expects 1736 the default. sock_setsockopt not used because it expects
1709 user space buffer */ 1737 user space buffer */
1710 (*csocket)->sk->sk_rcvtimeo = 7 * HZ; 1738 (*csocket)->sk->sk_rcvtimeo = 7 * HZ;
1739 if (!noblocksnd)
1740 (*csocket)->sk->sk_sndtimeo = 3 * HZ;
1741
1711 1742
1712 return rc; 1743 return rc;
1713} 1744}
@@ -1845,6 +1876,16 @@ convert_delimiter(char *path, char delim)
1845 } 1876 }
1846} 1877}
1847 1878
1879static void
1880kill_cifsd(struct TCP_Server_Info *server)
1881{
1882 struct task_struct *task;
1883
1884 task = xchg(&server->tsk, NULL);
1885 if (task)
1886 force_sig(SIGKILL, task);
1887}
1888
1848int 1889int
1849cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, 1890cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1850 char *mount_data, const char *devname) 1891 char *mount_data, const char *devname)
@@ -1961,11 +2002,14 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1961 cFYI(1, ("attempting ipv6 connect")); 2002 cFYI(1, ("attempting ipv6 connect"));
1962 /* BB should we allow ipv6 on port 139? */ 2003 /* BB should we allow ipv6 on port 139? */
1963 /* other OS never observed in Wild doing 139 with v6 */ 2004 /* other OS never observed in Wild doing 139 with v6 */
1964 rc = ipv6_connect(&sin_server6, &csocket); 2005 rc = ipv6_connect(&sin_server6, &csocket,
2006 volume_info.noblocksnd);
1965 } else 2007 } else
1966 rc = ipv4_connect(&sin_server, &csocket, 2008 rc = ipv4_connect(&sin_server, &csocket,
1967 volume_info.source_rfc1001_name, 2009 volume_info.source_rfc1001_name,
1968 volume_info.target_rfc1001_name); 2010 volume_info.target_rfc1001_name,
2011 volume_info.noblocksnd,
2012 volume_info.noautotune);
1969 if (rc < 0) { 2013 if (rc < 0) {
1970 cERROR(1, ("Error connecting to IPv4 socket. " 2014 cERROR(1, ("Error connecting to IPv4 socket. "
1971 "Aborting operation")); 2015 "Aborting operation"));
@@ -1980,6 +2024,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1980 sock_release(csocket); 2024 sock_release(csocket);
1981 goto out; 2025 goto out;
1982 } else { 2026 } else {
2027 srvTcp->noblocksnd = volume_info.noblocksnd;
2028 srvTcp->noautotune = volume_info.noautotune;
1983 memcpy(&srvTcp->addr.sockAddr, &sin_server, 2029 memcpy(&srvTcp->addr.sockAddr, &sin_server,
1984 sizeof(struct sockaddr_in)); 2030 sizeof(struct sockaddr_in));
1985 atomic_set(&srvTcp->inFlight, 0); 2031 atomic_set(&srvTcp->inFlight, 0);
@@ -2166,6 +2212,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2166 for the retry flag is used */ 2212 for the retry flag is used */
2167 tcon->retry = volume_info.retry; 2213 tcon->retry = volume_info.retry;
2168 tcon->nocase = volume_info.nocase; 2214 tcon->nocase = volume_info.nocase;
2215 tcon->local_lease = volume_info.local_lease;
2169 if (tcon->seal != volume_info.seal) 2216 if (tcon->seal != volume_info.seal)
2170 cERROR(1, ("transport encryption setting " 2217 cERROR(1, ("transport encryption setting "
2171 "conflicts with existing tid")); 2218 "conflicts with existing tid"));
@@ -2197,6 +2244,12 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2197 volume_info.UNC, 2244 volume_info.UNC,
2198 tcon, cifs_sb->local_nls); 2245 tcon, cifs_sb->local_nls);
2199 cFYI(1, ("CIFS Tcon rc = %d", rc)); 2246 cFYI(1, ("CIFS Tcon rc = %d", rc));
2247 if (volume_info.nodfs) {
2248 tcon->Flags &=
2249 ~SMB_SHARE_IS_IN_DFS;
2250 cFYI(1, ("DFS disabled (%d)",
2251 tcon->Flags));
2252 }
2200 } 2253 }
2201 if (!rc) { 2254 if (!rc) {
2202 atomic_inc(&pSesInfo->inUse); 2255 atomic_inc(&pSesInfo->inUse);
@@ -2225,14 +2278,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2225 spin_lock(&GlobalMid_Lock); 2278 spin_lock(&GlobalMid_Lock);
2226 srvTcp->tcpStatus = CifsExiting; 2279 srvTcp->tcpStatus = CifsExiting;
2227 spin_unlock(&GlobalMid_Lock); 2280 spin_unlock(&GlobalMid_Lock);
2228 if (srvTcp->tsk) { 2281 kill_cifsd(srvTcp);
2229 /* If we could verify that kthread_stop would
2230 always wake up processes blocked in
2231 tcp in recv_mesg then we could remove the
2232 send_sig call */
2233 force_sig(SIGKILL, srvTcp->tsk);
2234 kthread_stop(srvTcp->tsk);
2235 }
2236 } 2282 }
2237 /* If find_unc succeeded then rc == 0 so we can not end */ 2283 /* If find_unc succeeded then rc == 0 so we can not end */
2238 if (tcon) /* up accidently freeing someone elses tcon struct */ 2284 if (tcon) /* up accidently freeing someone elses tcon struct */
@@ -2245,19 +2291,15 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2245 temp_rc = CIFSSMBLogoff(xid, pSesInfo); 2291 temp_rc = CIFSSMBLogoff(xid, pSesInfo);
2246 /* if the socketUseCount is now zero */ 2292 /* if the socketUseCount is now zero */
2247 if ((temp_rc == -ESHUTDOWN) && 2293 if ((temp_rc == -ESHUTDOWN) &&
2248 (pSesInfo->server) && 2294 (pSesInfo->server))
2249 (pSesInfo->server->tsk)) { 2295 kill_cifsd(pSesInfo->server);
2250 force_sig(SIGKILL,
2251 pSesInfo->server->tsk);
2252 kthread_stop(pSesInfo->server->tsk);
2253 }
2254 } else { 2296 } else {
2255 cFYI(1, ("No session or bad tcon")); 2297 cFYI(1, ("No session or bad tcon"));
2256 if ((pSesInfo->server) && 2298 if (pSesInfo->server) {
2257 (pSesInfo->server->tsk)) { 2299 spin_lock(&GlobalMid_Lock);
2258 force_sig(SIGKILL, 2300 srvTcp->tcpStatus = CifsExiting;
2259 pSesInfo->server->tsk); 2301 spin_unlock(&GlobalMid_Lock);
2260 kthread_stop(pSesInfo->server->tsk); 2302 kill_cifsd(pSesInfo->server);
2261 } 2303 }
2262 } 2304 }
2263 sesInfoFree(pSesInfo); 2305 sesInfoFree(pSesInfo);
@@ -3544,7 +3586,6 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
3544 int rc = 0; 3586 int rc = 0;
3545 int xid; 3587 int xid;
3546 struct cifsSesInfo *ses = NULL; 3588 struct cifsSesInfo *ses = NULL;
3547 struct task_struct *cifsd_task;
3548 char *tmp; 3589 char *tmp;
3549 3590
3550 xid = GetXid(); 3591 xid = GetXid();
@@ -3560,7 +3601,6 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
3560 tconInfoFree(cifs_sb->tcon); 3601 tconInfoFree(cifs_sb->tcon);
3561 if ((ses) && (ses->server)) { 3602 if ((ses) && (ses->server)) {
3562 /* save off task so we do not refer to ses later */ 3603 /* save off task so we do not refer to ses later */
3563 cifsd_task = ses->server->tsk;
3564 cFYI(1, ("About to do SMBLogoff ")); 3604 cFYI(1, ("About to do SMBLogoff "));
3565 rc = CIFSSMBLogoff(xid, ses); 3605 rc = CIFSSMBLogoff(xid, ses);
3566 if (rc == -EBUSY) { 3606 if (rc == -EBUSY) {
@@ -3568,10 +3608,8 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
3568 return 0; 3608 return 0;
3569 } else if (rc == -ESHUTDOWN) { 3609 } else if (rc == -ESHUTDOWN) {
3570 cFYI(1, ("Waking up socket by sending signal")); 3610 cFYI(1, ("Waking up socket by sending signal"));
3571 if (cifsd_task) { 3611 if (ses->server)
3572 force_sig(SIGKILL, cifsd_task); 3612 kill_cifsd(ses->server);
3573 kthread_stop(cifsd_task);
3574 }
3575 rc = 0; 3613 rc = 0;
3576 } /* else - we have an smb session 3614 } /* else - we have an smb session
3577 left on this socket do not kill cifsd */ 3615 left on this socket do not kill cifsd */
@@ -3701,7 +3739,9 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3701 cERROR(1, ("Send error in SessSetup = %d", rc)); 3739 cERROR(1, ("Send error in SessSetup = %d", rc));
3702 } else { 3740 } else {
3703 cFYI(1, ("CIFS Session Established successfully")); 3741 cFYI(1, ("CIFS Session Established successfully"));
3742 spin_lock(&GlobalMid_Lock);
3704 pSesInfo->status = CifsGood; 3743 pSesInfo->status = CifsGood;
3744 spin_unlock(&GlobalMid_Lock);
3705 } 3745 }
3706 3746
3707ss_err_exit: 3747ss_err_exit:
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c4a8a0605125..ead1a3bb0256 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1791,7 +1791,7 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
1791 SetPageUptodate(page); 1791 SetPageUptodate(page);
1792 unlock_page(page); 1792 unlock_page(page);
1793 if (!pagevec_add(plru_pvec, page)) 1793 if (!pagevec_add(plru_pvec, page))
1794 __pagevec_lru_add(plru_pvec); 1794 __pagevec_lru_add_file(plru_pvec);
1795 data += PAGE_CACHE_SIZE; 1795 data += PAGE_CACHE_SIZE;
1796 } 1796 }
1797 return; 1797 return;
@@ -1824,7 +1824,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1824 pTcon = cifs_sb->tcon; 1824 pTcon = cifs_sb->tcon;
1825 1825
1826 pagevec_init(&lru_pvec, 0); 1826 pagevec_init(&lru_pvec, 0);
1827 cFYI(DBG2, ("rpages: num pages %d", num_pages)); 1827 cFYI(DBG2, ("rpages: num pages %d", num_pages));
1828 for (i = 0; i < num_pages; ) { 1828 for (i = 0; i < num_pages; ) {
1829 unsigned contig_pages; 1829 unsigned contig_pages;
1830 struct page *tmp_page; 1830 struct page *tmp_page;
@@ -1925,7 +1925,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1925 bytes_read = 0; 1925 bytes_read = 0;
1926 } 1926 }
1927 1927
1928 pagevec_lru_add(&lru_pvec); 1928 pagevec_lru_add_file(&lru_pvec);
1929 1929
1930/* need to free smb_read_data buf before exit */ 1930/* need to free smb_read_data buf before exit */
1931 if (smb_read_data) { 1931 if (smb_read_data) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a8c833345fc9..ff8c68de4a92 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -506,6 +506,7 @@ int cifs_get_inode_info(struct inode **pinode,
506 inode = *pinode; 506 inode = *pinode;
507 cifsInfo = CIFS_I(inode); 507 cifsInfo = CIFS_I(inode);
508 cifsInfo->cifsAttrs = attr; 508 cifsInfo->cifsAttrs = attr;
509 cifsInfo->delete_pending = pfindData->DeletePending ? true : false;
509 cFYI(1, ("Old time %ld", cifsInfo->time)); 510 cFYI(1, ("Old time %ld", cifsInfo->time));
510 cifsInfo->time = jiffies; 511 cifsInfo->time = jiffies;
511 cFYI(1, ("New time %ld", cifsInfo->time)); 512 cFYI(1, ("New time %ld", cifsInfo->time));
@@ -772,63 +773,106 @@ out:
772 * anything else. 773 * anything else.
773 */ 774 */
774static int 775static int
775cifs_rename_pending_delete(char *full_path, struct inode *inode, int xid) 776cifs_rename_pending_delete(char *full_path, struct dentry *dentry, int xid)
776{ 777{
777 int oplock = 0; 778 int oplock = 0;
778 int rc; 779 int rc;
779 __u16 netfid; 780 __u16 netfid;
781 struct inode *inode = dentry->d_inode;
780 struct cifsInodeInfo *cifsInode = CIFS_I(inode); 782 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
781 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 783 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
782 struct cifsTconInfo *tcon = cifs_sb->tcon; 784 struct cifsTconInfo *tcon = cifs_sb->tcon;
783 __u32 dosattr; 785 __u32 dosattr, origattr;
784 FILE_BASIC_INFO *info_buf; 786 FILE_BASIC_INFO *info_buf = NULL;
785 787
786 rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, 788 rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN,
787 DELETE|FILE_WRITE_ATTRIBUTES, 789 DELETE|FILE_WRITE_ATTRIBUTES, CREATE_NOT_DIR,
788 CREATE_NOT_DIR|CREATE_DELETE_ON_CLOSE,
789 &netfid, &oplock, NULL, cifs_sb->local_nls, 790 &netfid, &oplock, NULL, cifs_sb->local_nls,
790 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 791 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
791 if (rc != 0) 792 if (rc != 0)
792 goto out; 793 goto out;
793 794
794 /* set ATTR_HIDDEN and clear ATTR_READONLY */ 795 origattr = cifsInode->cifsAttrs;
795 cifsInode = CIFS_I(inode); 796 if (origattr == 0)
796 dosattr = cifsInode->cifsAttrs & ~ATTR_READONLY; 797 origattr |= ATTR_NORMAL;
798
799 dosattr = origattr & ~ATTR_READONLY;
797 if (dosattr == 0) 800 if (dosattr == 0)
798 dosattr |= ATTR_NORMAL; 801 dosattr |= ATTR_NORMAL;
799 dosattr |= ATTR_HIDDEN; 802 dosattr |= ATTR_HIDDEN;
800 803
801 info_buf = kzalloc(sizeof(*info_buf), GFP_KERNEL); 804 /* set ATTR_HIDDEN and clear ATTR_READONLY, but only if needed */
802 if (info_buf == NULL) { 805 if (dosattr != origattr) {
803 rc = -ENOMEM; 806 info_buf = kzalloc(sizeof(*info_buf), GFP_KERNEL);
804 goto out_close; 807 if (info_buf == NULL) {
808 rc = -ENOMEM;
809 goto out_close;
810 }
811 info_buf->Attributes = cpu_to_le32(dosattr);
812 rc = CIFSSMBSetFileInfo(xid, tcon, info_buf, netfid,
813 current->tgid);
814 /* although we would like to mark the file hidden
815 if that fails we will still try to rename it */
816 if (rc != 0)
817 cifsInode->cifsAttrs = dosattr;
818 else
819 dosattr = origattr; /* since not able to change them */
805 } 820 }
806 info_buf->Attributes = cpu_to_le32(dosattr);
807 rc = CIFSSMBSetFileInfo(xid, tcon, info_buf, netfid, current->tgid);
808 kfree(info_buf);
809 if (rc != 0)
810 goto out_close;
811 cifsInode->cifsAttrs = dosattr;
812 821
813 /* silly-rename the file */ 822 /* rename the file */
814 CIFSSMBRenameOpenFile(xid, tcon, netfid, NULL, cifs_sb->local_nls, 823 rc = CIFSSMBRenameOpenFile(xid, tcon, netfid, NULL, cifs_sb->local_nls,
815 cifs_sb->mnt_cifs_flags & 824 cifs_sb->mnt_cifs_flags &
816 CIFS_MOUNT_MAP_SPECIAL_CHR); 825 CIFS_MOUNT_MAP_SPECIAL_CHR);
826 if (rc != 0) {
827 rc = -ETXTBSY;
828 goto undo_setattr;
829 }
817 830
818 /* set DELETE_ON_CLOSE */ 831 /* try to set DELETE_ON_CLOSE */
819 rc = CIFSSMBSetFileDisposition(xid, tcon, true, netfid, current->tgid); 832 if (!cifsInode->delete_pending) {
820 833 rc = CIFSSMBSetFileDisposition(xid, tcon, true, netfid,
821 /* 834 current->tgid);
822 * some samba versions return -ENOENT when we try to set the file 835 /*
823 * disposition here. Likely a samba bug, but work around it for now 836 * some samba versions return -ENOENT when we try to set the
824 */ 837 * file disposition here. Likely a samba bug, but work around
825 if (rc == -ENOENT) 838 * it for now. This means that some cifsXXX files may hang
826 rc = 0; 839 * around after they shouldn't.
840 *
841 * BB: remove this hack after more servers have the fix
842 */
843 if (rc == -ENOENT)
844 rc = 0;
845 else if (rc != 0) {
846 rc = -ETXTBSY;
847 goto undo_rename;
848 }
849 cifsInode->delete_pending = true;
850 }
827 851
828out_close: 852out_close:
829 CIFSSMBClose(xid, tcon, netfid); 853 CIFSSMBClose(xid, tcon, netfid);
830out: 854out:
855 kfree(info_buf);
831 return rc; 856 return rc;
857
858 /*
859 * reset everything back to the original state. Don't bother
860 * dealing with errors here since we can't do anything about
861 * them anyway.
862 */
863undo_rename:
864 CIFSSMBRenameOpenFile(xid, tcon, netfid, dentry->d_name.name,
865 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
866 CIFS_MOUNT_MAP_SPECIAL_CHR);
867undo_setattr:
868 if (dosattr != origattr) {
869 info_buf->Attributes = cpu_to_le32(origattr);
870 if (!CIFSSMBSetFileInfo(xid, tcon, info_buf, netfid,
871 current->tgid))
872 cifsInode->cifsAttrs = origattr;
873 }
874
875 goto out_close;
832} 876}
833 877
834int cifs_unlink(struct inode *dir, struct dentry *dentry) 878int cifs_unlink(struct inode *dir, struct dentry *dentry)
@@ -878,7 +922,7 @@ psx_del_no_retry:
878 } else if (rc == -ENOENT) { 922 } else if (rc == -ENOENT) {
879 d_drop(dentry); 923 d_drop(dentry);
880 } else if (rc == -ETXTBSY) { 924 } else if (rc == -ETXTBSY) {
881 rc = cifs_rename_pending_delete(full_path, inode, xid); 925 rc = cifs_rename_pending_delete(full_path, dentry, xid);
882 if (rc == 0) 926 if (rc == 0)
883 drop_nlink(inode); 927 drop_nlink(inode);
884 } else if (rc == -EACCES && dosattr == 0) { 928 } else if (rc == -EACCES && dosattr == 0) {
@@ -1241,22 +1285,21 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath,
1241 return rc; 1285 return rc;
1242} 1286}
1243 1287
1244int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, 1288int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
1245 struct inode *target_inode, struct dentry *target_direntry) 1289 struct inode *target_dir, struct dentry *target_dentry)
1246{ 1290{
1247 char *fromName = NULL; 1291 char *fromName = NULL;
1248 char *toName = NULL; 1292 char *toName = NULL;
1249 struct cifs_sb_info *cifs_sb_source; 1293 struct cifs_sb_info *cifs_sb_source;
1250 struct cifs_sb_info *cifs_sb_target; 1294 struct cifs_sb_info *cifs_sb_target;
1251 struct cifsTconInfo *pTcon; 1295 struct cifsTconInfo *tcon;
1252 FILE_UNIX_BASIC_INFO *info_buf_source = NULL; 1296 FILE_UNIX_BASIC_INFO *info_buf_source = NULL;
1253 FILE_UNIX_BASIC_INFO *info_buf_target; 1297 FILE_UNIX_BASIC_INFO *info_buf_target;
1254 int xid; 1298 int xid, rc, tmprc;
1255 int rc;
1256 1299
1257 cifs_sb_target = CIFS_SB(target_inode->i_sb); 1300 cifs_sb_target = CIFS_SB(target_dir->i_sb);
1258 cifs_sb_source = CIFS_SB(source_inode->i_sb); 1301 cifs_sb_source = CIFS_SB(source_dir->i_sb);
1259 pTcon = cifs_sb_source->tcon; 1302 tcon = cifs_sb_source->tcon;
1260 1303
1261 xid = GetXid(); 1304 xid = GetXid();
1262 1305
@@ -1264,7 +1307,7 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry,
1264 * BB: this might be allowed if same server, but different share. 1307 * BB: this might be allowed if same server, but different share.
1265 * Consider adding support for this 1308 * Consider adding support for this
1266 */ 1309 */
1267 if (pTcon != cifs_sb_target->tcon) { 1310 if (tcon != cifs_sb_target->tcon) {
1268 rc = -EXDEV; 1311 rc = -EXDEV;
1269 goto cifs_rename_exit; 1312 goto cifs_rename_exit;
1270 } 1313 }
@@ -1273,65 +1316,67 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry,
1273 * we already have the rename sem so we do not need to 1316 * we already have the rename sem so we do not need to
1274 * grab it again here to protect the path integrity 1317 * grab it again here to protect the path integrity
1275 */ 1318 */
1276 fromName = build_path_from_dentry(source_direntry); 1319 fromName = build_path_from_dentry(source_dentry);
1277 if (fromName == NULL) { 1320 if (fromName == NULL) {
1278 rc = -ENOMEM; 1321 rc = -ENOMEM;
1279 goto cifs_rename_exit; 1322 goto cifs_rename_exit;
1280 } 1323 }
1281 1324
1282 toName = build_path_from_dentry(target_direntry); 1325 toName = build_path_from_dentry(target_dentry);
1283 if (toName == NULL) { 1326 if (toName == NULL) {
1284 rc = -ENOMEM; 1327 rc = -ENOMEM;
1285 goto cifs_rename_exit; 1328 goto cifs_rename_exit;
1286 } 1329 }
1287 1330
1288 rc = cifs_do_rename(xid, source_direntry, fromName, 1331 rc = cifs_do_rename(xid, source_dentry, fromName,
1289 target_direntry, toName); 1332 target_dentry, toName);
1290 1333
1291 if (rc == -EEXIST) { 1334 if (rc == -EEXIST && tcon->unix_ext) {
1292 if (pTcon->unix_ext) { 1335 /*
1293 /* 1336 * Are src and dst hardlinks of same inode? We can
1294 * Are src and dst hardlinks of same inode? We can 1337 * only tell with unix extensions enabled
1295 * only tell with unix extensions enabled 1338 */
1296 */ 1339 info_buf_source =
1297 info_buf_source = 1340 kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO),
1298 kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), 1341 GFP_KERNEL);
1299 GFP_KERNEL); 1342 if (info_buf_source == NULL) {
1300 if (info_buf_source == NULL) 1343 rc = -ENOMEM;
1301 goto unlink_target; 1344 goto cifs_rename_exit;
1302 1345 }
1303 info_buf_target = info_buf_source + 1; 1346
1304 rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName, 1347 info_buf_target = info_buf_source + 1;
1305 info_buf_source, 1348 tmprc = CIFSSMBUnixQPathInfo(xid, tcon, fromName,
1306 cifs_sb_source->local_nls, 1349 info_buf_source,
1307 cifs_sb_source->mnt_cifs_flags & 1350 cifs_sb_source->local_nls,
1308 CIFS_MOUNT_MAP_SPECIAL_CHR); 1351 cifs_sb_source->mnt_cifs_flags &
1309 if (rc != 0) 1352 CIFS_MOUNT_MAP_SPECIAL_CHR);
1310 goto unlink_target; 1353 if (tmprc != 0)
1311 1354 goto unlink_target;
1312 rc = CIFSSMBUnixQPathInfo(xid, pTcon, 1355
1313 toName, info_buf_target, 1356 tmprc = CIFSSMBUnixQPathInfo(xid, tcon,
1314 cifs_sb_target->local_nls, 1357 toName, info_buf_target,
1315 /* remap based on source sb */ 1358 cifs_sb_target->local_nls,
1316 cifs_sb_source->mnt_cifs_flags & 1359 /* remap based on source sb */
1317 CIFS_MOUNT_MAP_SPECIAL_CHR); 1360 cifs_sb_source->mnt_cifs_flags &
1361 CIFS_MOUNT_MAP_SPECIAL_CHR);
1318 1362
1319 if (rc == 0 && (info_buf_source->UniqueId == 1363 if (tmprc == 0 && (info_buf_source->UniqueId ==
1320 info_buf_target->UniqueId)) 1364 info_buf_target->UniqueId)) {
1321 /* same file, POSIX says that this is a noop */ 1365 /* same file, POSIX says that this is a noop */
1322 goto cifs_rename_exit; 1366 rc = 0;
1323 } /* else ... BB we could add the same check for Windows by 1367 goto cifs_rename_exit;
1368 }
1369 } /* else ... BB we could add the same check for Windows by
1324 checking the UniqueId via FILE_INTERNAL_INFO */ 1370 checking the UniqueId via FILE_INTERNAL_INFO */
1371
1325unlink_target: 1372unlink_target:
1326 /* 1373 if ((rc == -EACCES) || (rc == -EEXIST)) {
1327 * we either can not tell the files are hardlinked (as with 1374 tmprc = cifs_unlink(target_dir, target_dentry);
1328 * Windows servers) or files are not hardlinked. Delete the 1375 if (tmprc)
1329 * target manually before renaming to follow POSIX rather than 1376 goto cifs_rename_exit;
1330 * Windows semantics 1377
1331 */ 1378 rc = cifs_do_rename(xid, source_dentry, fromName,
1332 cifs_unlink(target_inode, target_direntry); 1379 target_dentry, toName);
1333 rc = cifs_do_rename(xid, source_direntry, fromName,
1334 target_direntry, toName);
1335 } 1380 }
1336 1381
1337cifs_rename_exit: 1382cifs_rename_exit:
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 765adf12d54f..58d57299f2a0 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -762,14 +762,15 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
762 rc)); 762 rc));
763 return rc; 763 return rc;
764 } 764 }
765 cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile);
765 } 766 }
766 767
767 while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && 768 while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) &&
768 (rc == 0) && !cifsFile->srch_inf.endOfSearch) { 769 (rc == 0) && !cifsFile->srch_inf.endOfSearch) {
769 cFYI(1, ("calling findnext2")); 770 cFYI(1, ("calling findnext2"));
770 cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile);
771 rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, 771 rc = CIFSFindNext(xid, pTcon, cifsFile->netfid,
772 &cifsFile->srch_inf); 772 &cifsFile->srch_inf);
773 cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile);
773 if (rc) 774 if (rc)
774 return -ENOENT; 775 return -ENOENT;
775 } 776 }
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index bf0e6d8e382a..ff8243a8fe3e 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -161,7 +161,7 @@ void DeleteTconOplockQEntries(struct cifsTconInfo *tcon)
161 161
162int 162int
163smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, 163smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
164 unsigned int smb_buf_length, struct sockaddr *sin) 164 unsigned int smb_buf_length, struct sockaddr *sin, bool noblocksnd)
165{ 165{
166 int rc = 0; 166 int rc = 0;
167 int i = 0; 167 int i = 0;
@@ -178,7 +178,10 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
178 smb_msg.msg_namelen = sizeof(struct sockaddr); 178 smb_msg.msg_namelen = sizeof(struct sockaddr);
179 smb_msg.msg_control = NULL; 179 smb_msg.msg_control = NULL;
180 smb_msg.msg_controllen = 0; 180 smb_msg.msg_controllen = 0;
181 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ 181 if (noblocksnd)
182 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL;
183 else
184 smb_msg.msg_flags = MSG_NOSIGNAL;
182 185
183 /* smb header is converted in header_assemble. bcc and rest of SMB word 186 /* smb header is converted in header_assemble. bcc and rest of SMB word
184 area, and byte area if necessary, is converted to littleendian in 187 area, and byte area if necessary, is converted to littleendian in
@@ -229,8 +232,8 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
229} 232}
230 233
231static int 234static int
232smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, 235smb_send2(struct TCP_Server_Info *server, struct kvec *iov, int n_vec,
233 struct sockaddr *sin) 236 struct sockaddr *sin, bool noblocksnd)
234{ 237{
235 int rc = 0; 238 int rc = 0;
236 int i = 0; 239 int i = 0;
@@ -240,6 +243,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
240 unsigned int total_len; 243 unsigned int total_len;
241 int first_vec = 0; 244 int first_vec = 0;
242 unsigned int smb_buf_length = smb_buffer->smb_buf_length; 245 unsigned int smb_buf_length = smb_buffer->smb_buf_length;
246 struct socket *ssocket = server->ssocket;
243 247
244 if (ssocket == NULL) 248 if (ssocket == NULL)
245 return -ENOTSOCK; /* BB eventually add reconnect code here */ 249 return -ENOTSOCK; /* BB eventually add reconnect code here */
@@ -248,7 +252,10 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
248 smb_msg.msg_namelen = sizeof(struct sockaddr); 252 smb_msg.msg_namelen = sizeof(struct sockaddr);
249 smb_msg.msg_control = NULL; 253 smb_msg.msg_control = NULL;
250 smb_msg.msg_controllen = 0; 254 smb_msg.msg_controllen = 0;
251 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ 255 if (noblocksnd)
256 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL;
257 else
258 smb_msg.msg_flags = MSG_NOSIGNAL;
252 259
253 /* smb header is converted in header_assemble. bcc and rest of SMB word 260 /* smb header is converted in header_assemble. bcc and rest of SMB word
254 area, and byte area if necessary, is converted to littleendian in 261 area, and byte area if necessary, is converted to littleendian in
@@ -283,8 +290,11 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
283 if (rc < 0) 290 if (rc < 0)
284 break; 291 break;
285 292
286 if (rc >= total_len) { 293 if (rc == total_len) {
287 WARN_ON(rc > total_len); 294 total_len = 0;
295 break;
296 } else if (rc > total_len) {
297 cERROR(1, ("sent %d requested %d", rc, total_len));
288 break; 298 break;
289 } 299 }
290 if (rc == 0) { 300 if (rc == 0) {
@@ -312,6 +322,16 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
312 i = 0; /* in case we get ENOSPC on the next send */ 322 i = 0; /* in case we get ENOSPC on the next send */
313 } 323 }
314 324
325 if ((total_len > 0) && (total_len != smb_buf_length + 4)) {
326 cFYI(1, ("partial send (%d remaining), terminating session",
327 total_len));
328 /* If we have only sent part of an SMB then the next SMB
329 could be taken as the remainder of this one. We need
330 to kill the socket so the server throws away the partial
331 SMB */
332 server->tcpStatus = CifsNeedReconnect;
333 }
334
315 if (rc < 0) { 335 if (rc < 0) {
316 cERROR(1, ("Error %d sending data on socket to server", rc)); 336 cERROR(1, ("Error %d sending data on socket to server", rc));
317 } else 337 } else
@@ -518,8 +538,9 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
518#ifdef CONFIG_CIFS_STATS2 538#ifdef CONFIG_CIFS_STATS2
519 atomic_inc(&ses->server->inSend); 539 atomic_inc(&ses->server->inSend);
520#endif 540#endif
521 rc = smb_send2(ses->server->ssocket, iov, n_vec, 541 rc = smb_send2(ses->server, iov, n_vec,
522 (struct sockaddr *) &(ses->server->addr.sockAddr)); 542 (struct sockaddr *) &(ses->server->addr.sockAddr),
543 ses->server->noblocksnd);
523#ifdef CONFIG_CIFS_STATS2 544#ifdef CONFIG_CIFS_STATS2
524 atomic_dec(&ses->server->inSend); 545 atomic_dec(&ses->server->inSend);
525 midQ->when_sent = jiffies; 546 midQ->when_sent = jiffies;
@@ -711,7 +732,8 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
711 atomic_inc(&ses->server->inSend); 732 atomic_inc(&ses->server->inSend);
712#endif 733#endif
713 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, 734 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length,
714 (struct sockaddr *) &(ses->server->addr.sockAddr)); 735 (struct sockaddr *) &(ses->server->addr.sockAddr),
736 ses->server->noblocksnd);
715#ifdef CONFIG_CIFS_STATS2 737#ifdef CONFIG_CIFS_STATS2
716 atomic_dec(&ses->server->inSend); 738 atomic_dec(&ses->server->inSend);
717 midQ->when_sent = jiffies; 739 midQ->when_sent = jiffies;
@@ -851,7 +873,8 @@ send_nt_cancel(struct cifsTconInfo *tcon, struct smb_hdr *in_buf,
851 return rc; 873 return rc;
852 } 874 }
853 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, 875 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length,
854 (struct sockaddr *) &(ses->server->addr.sockAddr)); 876 (struct sockaddr *) &(ses->server->addr.sockAddr),
877 ses->server->noblocksnd);
855 up(&ses->server->tcpSem); 878 up(&ses->server->tcpSem);
856 return rc; 879 return rc;
857} 880}
@@ -941,7 +964,8 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
941 atomic_inc(&ses->server->inSend); 964 atomic_inc(&ses->server->inSend);
942#endif 965#endif
943 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, 966 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length,
944 (struct sockaddr *) &(ses->server->addr.sockAddr)); 967 (struct sockaddr *) &(ses->server->addr.sockAddr),
968 ses->server->noblocksnd);
945#ifdef CONFIG_CIFS_STATS2 969#ifdef CONFIG_CIFS_STATS2
946 atomic_dec(&ses->server->inSend); 970 atomic_dec(&ses->server->inSend);
947 midQ->when_sent = jiffies; 971 midQ->when_sent = jiffies;
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index c5916228243c..75b1fa90b2cb 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -146,6 +146,9 @@ int coda_permission(struct inode *inode, int mask)
146 if (!mask) 146 if (!mask)
147 return 0; 147 return 0;
148 148
149 if ((mask & MAY_EXEC) && !execute_ok(inode))
150 return -EACCES;
151
149 lock_kernel(); 152 lock_kernel();
150 153
151 if (coda_cache_check(inode, mask)) 154 if (coda_cache_check(inode, mask))
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index c51365422aa8..773f2ce9aa06 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -43,7 +43,7 @@ const struct file_operations coda_ioctl_operations = {
43/* the coda pioctl inode ops */ 43/* the coda pioctl inode ops */
44static int coda_ioctl_permission(struct inode *inode, int mask) 44static int coda_ioctl_permission(struct inode *inode, int mask)
45{ 45{
46 return 0; 46 return (mask & MAY_EXEC) ? -EACCES : 0;
47} 47}
48 48
49static int coda_pioctl(struct inode * inode, struct file * filp, 49static int coda_pioctl(struct inode * inode, struct file * filp,
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index cfd29da714d1..0376ac66c44a 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -2,7 +2,7 @@
2 * An implementation of a loadable kernel mode driver providing 2 * An implementation of a loadable kernel mode driver providing
3 * multiple kernel/user space bidirectional communications links. 3 * multiple kernel/user space bidirectional communications links.
4 * 4 *
5 * Author: Alan Cox <alan@redhat.com> 5 * Author: Alan Cox <alan@lxorguk.ukuu.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/compat.c b/fs/compat.c
index 5f9ec449c799..e5f49f538502 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -869,7 +869,7 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd,
869 buf.dirent = dirent; 869 buf.dirent = dirent;
870 870
871 error = vfs_readdir(file, compat_fillonedir, &buf); 871 error = vfs_readdir(file, compat_fillonedir, &buf);
872 if (error >= 0) 872 if (buf.result)
873 error = buf.result; 873 error = buf.result;
874 874
875 fput(file); 875 fput(file);
@@ -956,9 +956,8 @@ asmlinkage long compat_sys_getdents(unsigned int fd,
956 buf.error = 0; 956 buf.error = 0;
957 957
958 error = vfs_readdir(file, compat_filldir, &buf); 958 error = vfs_readdir(file, compat_filldir, &buf);
959 if (error < 0) 959 if (error >= 0)
960 goto out_putf; 960 error = buf.error;
961 error = buf.error;
962 lastdirent = buf.previous; 961 lastdirent = buf.previous;
963 if (lastdirent) { 962 if (lastdirent) {
964 if (put_user(file->f_pos, &lastdirent->d_off)) 963 if (put_user(file->f_pos, &lastdirent->d_off))
@@ -966,8 +965,6 @@ asmlinkage long compat_sys_getdents(unsigned int fd,
966 else 965 else
967 error = count - buf.count; 966 error = count - buf.count;
968 } 967 }
969
970out_putf:
971 fput(file); 968 fput(file);
972out: 969out:
973 return error; 970 return error;
@@ -1047,19 +1044,16 @@ asmlinkage long compat_sys_getdents64(unsigned int fd,
1047 buf.error = 0; 1044 buf.error = 0;
1048 1045
1049 error = vfs_readdir(file, compat_filldir64, &buf); 1046 error = vfs_readdir(file, compat_filldir64, &buf);
1050 if (error < 0) 1047 if (error >= 0)
1051 goto out_putf; 1048 error = buf.error;
1052 error = buf.error;
1053 lastdirent = buf.previous; 1049 lastdirent = buf.previous;
1054 if (lastdirent) { 1050 if (lastdirent) {
1055 typeof(lastdirent->d_off) d_off = file->f_pos; 1051 typeof(lastdirent->d_off) d_off = file->f_pos;
1056 error = -EFAULT;
1057 if (__put_user_unaligned(d_off, &lastdirent->d_off)) 1052 if (__put_user_unaligned(d_off, &lastdirent->d_off))
1058 goto out_putf; 1053 error = -EFAULT;
1059 error = count - buf.count; 1054 else
1055 error = count - buf.count;
1060 } 1056 }
1061
1062out_putf:
1063 fput(file); 1057 fput(file);
1064out: 1058out:
1065 return error; 1059 return error;
@@ -1475,6 +1469,57 @@ out_ret:
1475 1469
1476#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) 1470#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
1477 1471
1472static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
1473 int timeval, int ret)
1474{
1475 struct timespec ts;
1476
1477 if (!p)
1478 return ret;
1479
1480 if (current->personality & STICKY_TIMEOUTS)
1481 goto sticky;
1482
1483 /* No update for zero timeout */
1484 if (!end_time->tv_sec && !end_time->tv_nsec)
1485 return ret;
1486
1487 ktime_get_ts(&ts);
1488 ts = timespec_sub(*end_time, ts);
1489 if (ts.tv_sec < 0)
1490 ts.tv_sec = ts.tv_nsec = 0;
1491
1492 if (timeval) {
1493 struct compat_timeval rtv;
1494
1495 rtv.tv_sec = ts.tv_sec;
1496 rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
1497
1498 if (!copy_to_user(p, &rtv, sizeof(rtv)))
1499 return ret;
1500 } else {
1501 struct compat_timespec rts;
1502
1503 rts.tv_sec = ts.tv_sec;
1504 rts.tv_nsec = ts.tv_nsec;
1505
1506 if (!copy_to_user(p, &rts, sizeof(rts)))
1507 return ret;
1508 }
1509 /*
1510 * If an application puts its timeval in read-only memory, we
1511 * don't want the Linux-specific update to the timeval to
1512 * cause a fault after the select has completed
1513 * successfully. However, because we're not updating the
1514 * timeval, we can't restart the system call.
1515 */
1516
1517sticky:
1518 if (ret == -ERESTARTNOHAND)
1519 ret = -EINTR;
1520 return ret;
1521}
1522
1478/* 1523/*
1479 * Ooo, nasty. We need here to frob 32-bit unsigned longs to 1524 * Ooo, nasty. We need here to frob 32-bit unsigned longs to
1480 * 64-bit unsigned longs. 1525 * 64-bit unsigned longs.
@@ -1556,7 +1601,8 @@ int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
1556 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 1601 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
1557 1602
1558int compat_core_sys_select(int n, compat_ulong_t __user *inp, 1603int compat_core_sys_select(int n, compat_ulong_t __user *inp,
1559 compat_ulong_t __user *outp, compat_ulong_t __user *exp, s64 *timeout) 1604 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1605 struct timespec *end_time)
1560{ 1606{
1561 fd_set_bits fds; 1607 fd_set_bits fds;
1562 void *bits; 1608 void *bits;
@@ -1603,7 +1649,7 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp,
1603 zero_fd_set(n, fds.res_out); 1649 zero_fd_set(n, fds.res_out);
1604 zero_fd_set(n, fds.res_ex); 1650 zero_fd_set(n, fds.res_ex);
1605 1651
1606 ret = do_select(n, &fds, timeout); 1652 ret = do_select(n, &fds, end_time);
1607 1653
1608 if (ret < 0) 1654 if (ret < 0)
1609 goto out; 1655 goto out;
@@ -1629,7 +1675,7 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
1629 compat_ulong_t __user *outp, compat_ulong_t __user *exp, 1675 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1630 struct compat_timeval __user *tvp) 1676 struct compat_timeval __user *tvp)
1631{ 1677{
1632 s64 timeout = -1; 1678 struct timespec end_time, *to = NULL;
1633 struct compat_timeval tv; 1679 struct compat_timeval tv;
1634 int ret; 1680 int ret;
1635 1681
@@ -1637,43 +1683,15 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
1637 if (copy_from_user(&tv, tvp, sizeof(tv))) 1683 if (copy_from_user(&tv, tvp, sizeof(tv)))
1638 return -EFAULT; 1684 return -EFAULT;
1639 1685
1640 if (tv.tv_sec < 0 || tv.tv_usec < 0) 1686 to = &end_time;
1687 if (poll_select_set_timeout(to,
1688 tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
1689 (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
1641 return -EINVAL; 1690 return -EINVAL;
1642
1643 /* Cast to u64 to make GCC stop complaining */
1644 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
1645 timeout = -1; /* infinite */
1646 else {
1647 timeout = DIV_ROUND_UP(tv.tv_usec, 1000000/HZ);
1648 timeout += tv.tv_sec * HZ;
1649 }
1650 } 1691 }
1651 1692
1652 ret = compat_core_sys_select(n, inp, outp, exp, &timeout); 1693 ret = compat_core_sys_select(n, inp, outp, exp, to);
1653 1694 ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
1654 if (tvp) {
1655 struct compat_timeval rtv;
1656
1657 if (current->personality & STICKY_TIMEOUTS)
1658 goto sticky;
1659 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
1660 rtv.tv_sec = timeout;
1661 if (compat_timeval_compare(&rtv, &tv) >= 0)
1662 rtv = tv;
1663 if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
1664sticky:
1665 /*
1666 * If an application puts its timeval in read-only
1667 * memory, we don't want the Linux-specific update to
1668 * the timeval to cause a fault after the select has
1669 * completed successfully. However, because we're not
1670 * updating the timeval, we can't restart the system
1671 * call.
1672 */
1673 if (ret == -ERESTARTNOHAND)
1674 ret = -EINTR;
1675 }
1676 }
1677 1695
1678 return ret; 1696 return ret;
1679} 1697}
@@ -1686,15 +1704,16 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1686{ 1704{
1687 compat_sigset_t ss32; 1705 compat_sigset_t ss32;
1688 sigset_t ksigmask, sigsaved; 1706 sigset_t ksigmask, sigsaved;
1689 s64 timeout = MAX_SCHEDULE_TIMEOUT;
1690 struct compat_timespec ts; 1707 struct compat_timespec ts;
1708 struct timespec end_time, *to = NULL;
1691 int ret; 1709 int ret;
1692 1710
1693 if (tsp) { 1711 if (tsp) {
1694 if (copy_from_user(&ts, tsp, sizeof(ts))) 1712 if (copy_from_user(&ts, tsp, sizeof(ts)))
1695 return -EFAULT; 1713 return -EFAULT;
1696 1714
1697 if (ts.tv_sec < 0 || ts.tv_nsec < 0) 1715 to = &end_time;
1716 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
1698 return -EINVAL; 1717 return -EINVAL;
1699 } 1718 }
1700 1719
@@ -1709,51 +1728,8 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1709 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 1728 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1710 } 1729 }
1711 1730
1712 do { 1731 ret = compat_core_sys_select(n, inp, outp, exp, to);
1713 if (tsp) { 1732 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
1714 if ((unsigned long)ts.tv_sec < MAX_SELECT_SECONDS) {
1715 timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ);
1716 timeout += ts.tv_sec * (unsigned long)HZ;
1717 ts.tv_sec = 0;
1718 ts.tv_nsec = 0;
1719 } else {
1720 ts.tv_sec -= MAX_SELECT_SECONDS;
1721 timeout = MAX_SELECT_SECONDS * HZ;
1722 }
1723 }
1724
1725 ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
1726
1727 } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
1728
1729 if (tsp) {
1730 struct compat_timespec rts;
1731
1732 if (current->personality & STICKY_TIMEOUTS)
1733 goto sticky;
1734
1735 rts.tv_sec = timeout / HZ;
1736 rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ);
1737 if (rts.tv_nsec >= NSEC_PER_SEC) {
1738 rts.tv_sec++;
1739 rts.tv_nsec -= NSEC_PER_SEC;
1740 }
1741 if (compat_timespec_compare(&rts, &ts) >= 0)
1742 rts = ts;
1743 if (copy_to_user(tsp, &rts, sizeof(rts))) {
1744sticky:
1745 /*
1746 * If an application puts its timeval in read-only
1747 * memory, we don't want the Linux-specific update to
1748 * the timeval to cause a fault after the select has
1749 * completed successfully. However, because we're not
1750 * updating the timeval, we can't restart the system
1751 * call.
1752 */
1753 if (ret == -ERESTARTNOHAND)
1754 ret = -EINTR;
1755 }
1756 }
1757 1733
1758 if (ret == -ERESTARTNOHAND) { 1734 if (ret == -ERESTARTNOHAND) {
1759 /* 1735 /*
@@ -1798,18 +1774,16 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1798 compat_sigset_t ss32; 1774 compat_sigset_t ss32;
1799 sigset_t ksigmask, sigsaved; 1775 sigset_t ksigmask, sigsaved;
1800 struct compat_timespec ts; 1776 struct compat_timespec ts;
1801 s64 timeout = -1; 1777 struct timespec end_time, *to = NULL;
1802 int ret; 1778 int ret;
1803 1779
1804 if (tsp) { 1780 if (tsp) {
1805 if (copy_from_user(&ts, tsp, sizeof(ts))) 1781 if (copy_from_user(&ts, tsp, sizeof(ts)))
1806 return -EFAULT; 1782 return -EFAULT;
1807 1783
1808 /* We assume that ts.tv_sec is always lower than 1784 to = &end_time;
1809 the number of seconds that can be expressed in 1785 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
1810 an s64. Otherwise the compiler bitches at us */ 1786 return -EINVAL;
1811 timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ);
1812 timeout += ts.tv_sec * HZ;
1813 } 1787 }
1814 1788
1815 if (sigmask) { 1789 if (sigmask) {
@@ -1823,7 +1797,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1823 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 1797 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1824 } 1798 }
1825 1799
1826 ret = do_sys_poll(ufds, nfds, &timeout); 1800 ret = do_sys_poll(ufds, nfds, to);
1827 1801
1828 /* We can restart this syscall, usually */ 1802 /* We can restart this syscall, usually */
1829 if (ret == -EINTR) { 1803 if (ret == -EINTR) {
@@ -1841,31 +1815,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1841 } else if (sigmask) 1815 } else if (sigmask)
1842 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1816 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1843 1817
1844 if (tsp && timeout >= 0) { 1818 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
1845 struct compat_timespec rts;
1846
1847 if (current->personality & STICKY_TIMEOUTS)
1848 goto sticky;
1849 /* Yes, we know it's actually an s64, but it's also positive. */
1850 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
1851 1000;
1852 rts.tv_sec = timeout;
1853 if (compat_timespec_compare(&rts, &ts) >= 0)
1854 rts = ts;
1855 if (copy_to_user(tsp, &rts, sizeof(rts))) {
1856sticky:
1857 /*
1858 * If an application puts its timeval in read-only
1859 * memory, we don't want the Linux-specific update to
1860 * the timeval to cause a fault after the select has
1861 * completed successfully. However, because we're not
1862 * updating the timeval, we can't restart the system
1863 * call.
1864 */
1865 if (ret == -ERESTARTNOHAND && timeout >= 0)
1866 ret = -EINTR;
1867 }
1868 }
1869 1819
1870 return ret; 1820 return ret;
1871} 1821}
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index bf74973b0492..932a92b31483 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -108,18 +108,18 @@ out:
108} 108}
109 109
110 110
111static int get_target(const char *symname, struct nameidata *nd, 111static int get_target(const char *symname, struct path *path,
112 struct config_item **target) 112 struct config_item **target)
113{ 113{
114 int ret; 114 int ret;
115 115
116 ret = path_lookup(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, nd); 116 ret = kern_path(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, path);
117 if (!ret) { 117 if (!ret) {
118 if (nd->path.dentry->d_sb == configfs_sb) { 118 if (path->dentry->d_sb == configfs_sb) {
119 *target = configfs_get_config_item(nd->path.dentry); 119 *target = configfs_get_config_item(path->dentry);
120 if (!*target) { 120 if (!*target) {
121 ret = -ENOENT; 121 ret = -ENOENT;
122 path_put(&nd->path); 122 path_put(path);
123 } 123 }
124 } else 124 } else
125 ret = -EPERM; 125 ret = -EPERM;
@@ -132,7 +132,7 @@ static int get_target(const char *symname, struct nameidata *nd,
132int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) 132int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
133{ 133{
134 int ret; 134 int ret;
135 struct nameidata nd; 135 struct path path;
136 struct configfs_dirent *sd; 136 struct configfs_dirent *sd;
137 struct config_item *parent_item; 137 struct config_item *parent_item;
138 struct config_item *target_item; 138 struct config_item *target_item;
@@ -159,7 +159,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
159 !type->ct_item_ops->allow_link) 159 !type->ct_item_ops->allow_link)
160 goto out_put; 160 goto out_put;
161 161
162 ret = get_target(symname, &nd, &target_item); 162 ret = get_target(symname, &path, &target_item);
163 if (ret) 163 if (ret)
164 goto out_put; 164 goto out_put;
165 165
@@ -174,7 +174,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
174 } 174 }
175 175
176 config_item_put(target_item); 176 config_item_put(target_item);
177 path_put(&nd.path); 177 path_put(&path);
178 178
179out_put: 179out_put:
180 config_item_put(parent_item); 180 config_item_put(parent_item);
diff --git a/fs/dcache.c b/fs/dcache.c
index e7a1a99b7464..a1d86c7f3e66 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -69,6 +69,7 @@ struct dentry_stat_t dentry_stat = {
69 69
70static void __d_free(struct dentry *dentry) 70static void __d_free(struct dentry *dentry)
71{ 71{
72 WARN_ON(!list_empty(&dentry->d_alias));
72 if (dname_external(dentry)) 73 if (dname_external(dentry))
73 kfree(dentry->d_name.name); 74 kfree(dentry->d_name.name);
74 kmem_cache_free(dentry_cache, dentry); 75 kmem_cache_free(dentry_cache, dentry);
@@ -174,9 +175,12 @@ static struct dentry *d_kill(struct dentry *dentry)
174 dentry_stat.nr_dentry--; /* For d_free, below */ 175 dentry_stat.nr_dentry--; /* For d_free, below */
175 /*drops the locks, at that point nobody can reach this dentry */ 176 /*drops the locks, at that point nobody can reach this dentry */
176 dentry_iput(dentry); 177 dentry_iput(dentry);
177 parent = dentry->d_parent; 178 if (IS_ROOT(dentry))
179 parent = NULL;
180 else
181 parent = dentry->d_parent;
178 d_free(dentry); 182 d_free(dentry);
179 return dentry == parent ? NULL : parent; 183 return parent;
180} 184}
181 185
182/* 186/*
@@ -666,11 +670,12 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
666 BUG(); 670 BUG();
667 } 671 }
668 672
669 parent = dentry->d_parent; 673 if (IS_ROOT(dentry))
670 if (parent == dentry)
671 parent = NULL; 674 parent = NULL;
672 else 675 else {
676 parent = dentry->d_parent;
673 atomic_dec(&parent->d_count); 677 atomic_dec(&parent->d_count);
678 }
674 679
675 list_del(&dentry->d_u.d_child); 680 list_del(&dentry->d_u.d_child);
676 detached++; 681 detached++;
@@ -977,6 +982,15 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
977 return d_alloc(parent, &q); 982 return d_alloc(parent, &q);
978} 983}
979 984
985/* the caller must hold dcache_lock */
986static void __d_instantiate(struct dentry *dentry, struct inode *inode)
987{
988 if (inode)
989 list_add(&dentry->d_alias, &inode->i_dentry);
990 dentry->d_inode = inode;
991 fsnotify_d_instantiate(dentry, inode);
992}
993
980/** 994/**
981 * d_instantiate - fill in inode information for a dentry 995 * d_instantiate - fill in inode information for a dentry
982 * @entry: dentry to complete 996 * @entry: dentry to complete
@@ -996,10 +1010,7 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
996{ 1010{
997 BUG_ON(!list_empty(&entry->d_alias)); 1011 BUG_ON(!list_empty(&entry->d_alias));
998 spin_lock(&dcache_lock); 1012 spin_lock(&dcache_lock);
999 if (inode) 1013 __d_instantiate(entry, inode);
1000 list_add(&entry->d_alias, &inode->i_dentry);
1001 entry->d_inode = inode;
1002 fsnotify_d_instantiate(entry, inode);
1003 spin_unlock(&dcache_lock); 1014 spin_unlock(&dcache_lock);
1004 security_d_instantiate(entry, inode); 1015 security_d_instantiate(entry, inode);
1005} 1016}
@@ -1029,7 +1040,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
1029 unsigned int hash = entry->d_name.hash; 1040 unsigned int hash = entry->d_name.hash;
1030 1041
1031 if (!inode) { 1042 if (!inode) {
1032 entry->d_inode = NULL; 1043 __d_instantiate(entry, NULL);
1033 return NULL; 1044 return NULL;
1034 } 1045 }
1035 1046
@@ -1048,9 +1059,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
1048 return alias; 1059 return alias;
1049 } 1060 }
1050 1061
1051 list_add(&entry->d_alias, &inode->i_dentry); 1062 __d_instantiate(entry, inode);
1052 entry->d_inode = inode;
1053 fsnotify_d_instantiate(entry, inode);
1054 return NULL; 1063 return NULL;
1055} 1064}
1056 1065
@@ -1111,69 +1120,71 @@ static inline struct hlist_head *d_hash(struct dentry *parent,
1111} 1120}
1112 1121
1113/** 1122/**
1114 * d_alloc_anon - allocate an anonymous dentry 1123 * d_obtain_alias - find or allocate a dentry for a given inode
1115 * @inode: inode to allocate the dentry for 1124 * @inode: inode to allocate the dentry for
1116 * 1125 *
1117 * This is similar to d_alloc_root. It is used by filesystems when 1126 * Obtain a dentry for an inode resulting from NFS filehandle conversion or
1118 * creating a dentry for a given inode, often in the process of 1127 * similar open by handle operations. The returned dentry may be anonymous,
1119 * mapping a filehandle to a dentry. The returned dentry may be 1128 * or may have a full name (if the inode was already in the cache).
1120 * anonymous, or may have a full name (if the inode was already
1121 * in the cache). The file system may need to make further
1122 * efforts to connect this dentry into the dcache properly.
1123 * 1129 *
1124 * When called on a directory inode, we must ensure that 1130 * When called on a directory inode, we must ensure that the inode only ever
1125 * the inode only ever has one dentry. If a dentry is 1131 * has one dentry. If a dentry is found, that is returned instead of
1126 * found, that is returned instead of allocating a new one. 1132 * allocating a new one.
1127 * 1133 *
1128 * On successful return, the reference to the inode has been transferred 1134 * On successful return, the reference to the inode has been transferred
1129 * to the dentry. If %NULL is returned (indicating kmalloc failure), 1135 * to the dentry. In case of an error the reference on the inode is released.
1130 * the reference on the inode has not been released. 1136 * To make it easier to use in export operations a %NULL or IS_ERR inode may
1137 * be passed in and will be the error will be propagate to the return value,
1138 * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
1131 */ 1139 */
1132 1140struct dentry *d_obtain_alias(struct inode *inode)
1133struct dentry * d_alloc_anon(struct inode *inode)
1134{ 1141{
1135 static const struct qstr anonstring = { .name = "" }; 1142 static const struct qstr anonstring = { .name = "" };
1136 struct dentry *tmp; 1143 struct dentry *tmp;
1137 struct dentry *res; 1144 struct dentry *res;
1138 1145
1139 if ((res = d_find_alias(inode))) { 1146 if (!inode)
1140 iput(inode); 1147 return ERR_PTR(-ESTALE);
1141 return res; 1148 if (IS_ERR(inode))
1142 } 1149 return ERR_CAST(inode);
1143 1150
1144 tmp = d_alloc(NULL, &anonstring); 1151 res = d_find_alias(inode);
1145 if (!tmp) 1152 if (res)
1146 return NULL; 1153 goto out_iput;
1147 1154
1155 tmp = d_alloc(NULL, &anonstring);
1156 if (!tmp) {
1157 res = ERR_PTR(-ENOMEM);
1158 goto out_iput;
1159 }
1148 tmp->d_parent = tmp; /* make sure dput doesn't croak */ 1160 tmp->d_parent = tmp; /* make sure dput doesn't croak */
1149 1161
1150 spin_lock(&dcache_lock); 1162 spin_lock(&dcache_lock);
1151 res = __d_find_alias(inode, 0); 1163 res = __d_find_alias(inode, 0);
1152 if (!res) { 1164 if (res) {
1153 /* attach a disconnected dentry */ 1165 spin_unlock(&dcache_lock);
1154 res = tmp; 1166 dput(tmp);
1155 tmp = NULL; 1167 goto out_iput;
1156 spin_lock(&res->d_lock);
1157 res->d_sb = inode->i_sb;
1158 res->d_parent = res;
1159 res->d_inode = inode;
1160 res->d_flags |= DCACHE_DISCONNECTED;
1161 res->d_flags &= ~DCACHE_UNHASHED;
1162 list_add(&res->d_alias, &inode->i_dentry);
1163 hlist_add_head(&res->d_hash, &inode->i_sb->s_anon);
1164 spin_unlock(&res->d_lock);
1165
1166 inode = NULL; /* don't drop reference */
1167 } 1168 }
1169
1170 /* attach a disconnected dentry */
1171 spin_lock(&tmp->d_lock);
1172 tmp->d_sb = inode->i_sb;
1173 tmp->d_inode = inode;
1174 tmp->d_flags |= DCACHE_DISCONNECTED;
1175 tmp->d_flags &= ~DCACHE_UNHASHED;
1176 list_add(&tmp->d_alias, &inode->i_dentry);
1177 hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon);
1178 spin_unlock(&tmp->d_lock);
1179
1168 spin_unlock(&dcache_lock); 1180 spin_unlock(&dcache_lock);
1181 return tmp;
1169 1182
1170 if (inode) 1183 out_iput:
1171 iput(inode); 1184 iput(inode);
1172 if (tmp)
1173 dput(tmp);
1174 return res; 1185 return res;
1175} 1186}
1176 1187EXPORT_SYMBOL_GPL(d_obtain_alias);
1177 1188
1178/** 1189/**
1179 * d_splice_alias - splice a disconnected dentry into the tree if one exists 1190 * d_splice_alias - splice a disconnected dentry into the tree if one exists
@@ -1200,17 +1211,14 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1200 new = __d_find_alias(inode, 1); 1211 new = __d_find_alias(inode, 1);
1201 if (new) { 1212 if (new) {
1202 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 1213 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1203 fsnotify_d_instantiate(new, inode);
1204 spin_unlock(&dcache_lock); 1214 spin_unlock(&dcache_lock);
1205 security_d_instantiate(new, inode); 1215 security_d_instantiate(new, inode);
1206 d_rehash(dentry); 1216 d_rehash(dentry);
1207 d_move(new, dentry); 1217 d_move(new, dentry);
1208 iput(inode); 1218 iput(inode);
1209 } else { 1219 } else {
1210 /* d_instantiate takes dcache_lock, so we do it by hand */ 1220 /* already taking dcache_lock, so d_add() by hand */
1211 list_add(&dentry->d_alias, &inode->i_dentry); 1221 __d_instantiate(dentry, inode);
1212 dentry->d_inode = inode;
1213 fsnotify_d_instantiate(dentry, inode);
1214 spin_unlock(&dcache_lock); 1222 spin_unlock(&dcache_lock);
1215 security_d_instantiate(dentry, inode); 1223 security_d_instantiate(dentry, inode);
1216 d_rehash(dentry); 1224 d_rehash(dentry);
@@ -1293,8 +1301,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1293 * d_instantiate() by hand because it takes dcache_lock which 1301 * d_instantiate() by hand because it takes dcache_lock which
1294 * we already hold. 1302 * we already hold.
1295 */ 1303 */
1296 list_add(&found->d_alias, &inode->i_dentry); 1304 __d_instantiate(found, inode);
1297 found->d_inode = inode;
1298 spin_unlock(&dcache_lock); 1305 spin_unlock(&dcache_lock);
1299 security_d_instantiate(found, inode); 1306 security_d_instantiate(found, inode);
1300 return found; 1307 return found;
@@ -1456,8 +1463,6 @@ out:
1456 * d_validate - verify dentry provided from insecure source 1463 * d_validate - verify dentry provided from insecure source
1457 * @dentry: The dentry alleged to be valid child of @dparent 1464 * @dentry: The dentry alleged to be valid child of @dparent
1458 * @dparent: The parent dentry (known to be valid) 1465 * @dparent: The parent dentry (known to be valid)
1459 * @hash: Hash of the dentry
1460 * @len: Length of the name
1461 * 1466 *
1462 * An insecure source has sent us a dentry, here we verify it and dget() it. 1467 * An insecure source has sent us a dentry, here we verify it and dget() it.
1463 * This is used by ncpfs in its readdir implementation. 1468 * This is used by ncpfs in its readdir implementation.
@@ -1714,18 +1719,23 @@ void d_move(struct dentry * dentry, struct dentry * target)
1714 spin_unlock(&dcache_lock); 1719 spin_unlock(&dcache_lock);
1715} 1720}
1716 1721
1717/* 1722/**
1718 * Helper that returns 1 if p1 is a parent of p2, else 0 1723 * d_ancestor - search for an ancestor
1724 * @p1: ancestor dentry
1725 * @p2: child dentry
1726 *
1727 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
1728 * an ancestor of p2, else NULL.
1719 */ 1729 */
1720static int d_isparent(struct dentry *p1, struct dentry *p2) 1730struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
1721{ 1731{
1722 struct dentry *p; 1732 struct dentry *p;
1723 1733
1724 for (p = p2; p->d_parent != p; p = p->d_parent) { 1734 for (p = p2; !IS_ROOT(p); p = p->d_parent) {
1725 if (p->d_parent == p1) 1735 if (p->d_parent == p1)
1726 return 1; 1736 return p;
1727 } 1737 }
1728 return 0; 1738 return NULL;
1729} 1739}
1730 1740
1731/* 1741/*
@@ -1749,7 +1759,7 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
1749 1759
1750 /* Check for loops */ 1760 /* Check for loops */
1751 ret = ERR_PTR(-ELOOP); 1761 ret = ERR_PTR(-ELOOP);
1752 if (d_isparent(alias, dentry)) 1762 if (d_ancestor(alias, dentry))
1753 goto out_err; 1763 goto out_err;
1754 1764
1755 /* See lock_rename() */ 1765 /* See lock_rename() */
@@ -1822,7 +1832,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1822 1832
1823 if (!inode) { 1833 if (!inode) {
1824 actual = dentry; 1834 actual = dentry;
1825 dentry->d_inode = NULL; 1835 __d_instantiate(dentry, NULL);
1826 goto found_lock; 1836 goto found_lock;
1827 } 1837 }
1828 1838
@@ -2149,32 +2159,27 @@ out:
2149 * Caller must ensure that "new_dentry" is pinned before calling is_subdir() 2159 * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
2150 */ 2160 */
2151 2161
2152int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry) 2162int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2153{ 2163{
2154 int result; 2164 int result;
2155 struct dentry * saved = new_dentry;
2156 unsigned long seq; 2165 unsigned long seq;
2157 2166
2158 /* need rcu_readlock to protect against the d_parent trashing due to 2167 /* FIXME: This is old behavior, needed? Please check callers. */
2159 * d_move 2168 if (new_dentry == old_dentry)
2169 return 1;
2170
2171 /*
2172 * Need rcu_readlock to protect against the d_parent trashing
2173 * due to d_move
2160 */ 2174 */
2161 rcu_read_lock(); 2175 rcu_read_lock();
2162 do { 2176 do {
2163 /* for restarting inner loop in case of seq retry */ 2177 /* for restarting inner loop in case of seq retry */
2164 new_dentry = saved;
2165 result = 0;
2166 seq = read_seqbegin(&rename_lock); 2178 seq = read_seqbegin(&rename_lock);
2167 for (;;) { 2179 if (d_ancestor(old_dentry, new_dentry))
2168 if (new_dentry != old_dentry) {
2169 struct dentry * parent = new_dentry->d_parent;
2170 if (parent == new_dentry)
2171 break;
2172 new_dentry = parent;
2173 continue;
2174 }
2175 result = 1; 2180 result = 1;
2176 break; 2181 else
2177 } 2182 result = 0;
2178 } while (read_seqretry(&rename_lock, seq)); 2183 } while (read_seqretry(&rename_lock, seq));
2179 rcu_read_unlock(); 2184 rcu_read_unlock();
2180 2185
@@ -2344,7 +2349,6 @@ void __init vfs_caches_init(unsigned long mempages)
2344} 2349}
2345 2350
2346EXPORT_SYMBOL(d_alloc); 2351EXPORT_SYMBOL(d_alloc);
2347EXPORT_SYMBOL(d_alloc_anon);
2348EXPORT_SYMBOL(d_alloc_root); 2352EXPORT_SYMBOL(d_alloc_root);
2349EXPORT_SYMBOL(d_delete); 2353EXPORT_SYMBOL(d_delete);
2350EXPORT_SYMBOL(d_find_alias); 2354EXPORT_SYMBOL(d_find_alias);
diff --git a/fs/dquot.c b/fs/dquot.c
index da30a27f2242..5e95261005b2 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1805,19 +1805,19 @@ int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
1805} 1805}
1806 1806
1807/* Actual function called from quotactl() */ 1807/* Actual function called from quotactl() */
1808int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path, 1808int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name,
1809 int remount) 1809 int remount)
1810{ 1810{
1811 struct nameidata nd; 1811 struct path path;
1812 int error; 1812 int error;
1813 1813
1814 if (remount) 1814 if (remount)
1815 return vfs_quota_on_remount(sb, type); 1815 return vfs_quota_on_remount(sb, type);
1816 1816
1817 error = path_lookup(path, LOOKUP_FOLLOW, &nd); 1817 error = kern_path(name, LOOKUP_FOLLOW, &path);
1818 if (!error) { 1818 if (!error) {
1819 error = vfs_quota_on_path(sb, type, format_id, &nd.path); 1819 error = vfs_quota_on_path(sb, type, format_id, &path);
1820 path_put(&nd.path); 1820 path_put(&path);
1821 } 1821 }
1822 return error; 1822 return error;
1823} 1823}
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 06db79d05c12..6046239465a1 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1251,6 +1251,7 @@ struct kmem_cache *ecryptfs_header_cache_2;
1251/** 1251/**
1252 * ecryptfs_write_headers_virt 1252 * ecryptfs_write_headers_virt
1253 * @page_virt: The virtual address to write the headers to 1253 * @page_virt: The virtual address to write the headers to
1254 * @max: The size of memory allocated at page_virt
1254 * @size: Set to the number of bytes written by this function 1255 * @size: Set to the number of bytes written by this function
1255 * @crypt_stat: The cryptographic context 1256 * @crypt_stat: The cryptographic context
1256 * @ecryptfs_dentry: The eCryptfs dentry 1257 * @ecryptfs_dentry: The eCryptfs dentry
@@ -1278,7 +1279,8 @@ struct kmem_cache *ecryptfs_header_cache_2;
1278 * 1279 *
1279 * Returns zero on success 1280 * Returns zero on success
1280 */ 1281 */
1281static int ecryptfs_write_headers_virt(char *page_virt, size_t *size, 1282static int ecryptfs_write_headers_virt(char *page_virt, size_t max,
1283 size_t *size,
1282 struct ecryptfs_crypt_stat *crypt_stat, 1284 struct ecryptfs_crypt_stat *crypt_stat,
1283 struct dentry *ecryptfs_dentry) 1285 struct dentry *ecryptfs_dentry)
1284{ 1286{
@@ -1296,7 +1298,7 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t *size,
1296 offset += written; 1298 offset += written;
1297 rc = ecryptfs_generate_key_packet_set((page_virt + offset), crypt_stat, 1299 rc = ecryptfs_generate_key_packet_set((page_virt + offset), crypt_stat,
1298 ecryptfs_dentry, &written, 1300 ecryptfs_dentry, &written,
1299 PAGE_CACHE_SIZE - offset); 1301 max - offset);
1300 if (rc) 1302 if (rc)
1301 ecryptfs_printk(KERN_WARNING, "Error generating key packet " 1303 ecryptfs_printk(KERN_WARNING, "Error generating key packet "
1302 "set; rc = [%d]\n", rc); 1304 "set; rc = [%d]\n", rc);
@@ -1368,14 +1370,14 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1368 goto out; 1370 goto out;
1369 } 1371 }
1370 /* Released in this function */ 1372 /* Released in this function */
1371 virt = kzalloc(crypt_stat->num_header_bytes_at_front, GFP_KERNEL); 1373 virt = (char *)get_zeroed_page(GFP_KERNEL);
1372 if (!virt) { 1374 if (!virt) {
1373 printk(KERN_ERR "%s: Out of memory\n", __func__); 1375 printk(KERN_ERR "%s: Out of memory\n", __func__);
1374 rc = -ENOMEM; 1376 rc = -ENOMEM;
1375 goto out; 1377 goto out;
1376 } 1378 }
1377 rc = ecryptfs_write_headers_virt(virt, &size, crypt_stat, 1379 rc = ecryptfs_write_headers_virt(virt, PAGE_CACHE_SIZE, &size,
1378 ecryptfs_dentry); 1380 crypt_stat, ecryptfs_dentry);
1379 if (unlikely(rc)) { 1381 if (unlikely(rc)) {
1380 printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n", 1382 printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n",
1381 __func__, rc); 1383 __func__, rc);
@@ -1393,8 +1395,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1393 goto out_free; 1395 goto out_free;
1394 } 1396 }
1395out_free: 1397out_free:
1396 memset(virt, 0, crypt_stat->num_header_bytes_at_front); 1398 free_page((unsigned long)virt);
1397 kfree(virt);
1398out: 1399out:
1399 return rc; 1400 return rc;
1400} 1401}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 046e027a4cb1..64d2ba980df4 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -471,31 +471,26 @@ out:
471 */ 471 */
472static int ecryptfs_read_super(struct super_block *sb, const char *dev_name) 472static int ecryptfs_read_super(struct super_block *sb, const char *dev_name)
473{ 473{
474 struct path path;
474 int rc; 475 int rc;
475 struct nameidata nd;
476 struct dentry *lower_root;
477 struct vfsmount *lower_mnt;
478 476
479 memset(&nd, 0, sizeof(struct nameidata)); 477 rc = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
480 rc = path_lookup(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &nd);
481 if (rc) { 478 if (rc) {
482 ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n"); 479 ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n");
483 goto out; 480 goto out;
484 } 481 }
485 lower_root = nd.path.dentry; 482 ecryptfs_set_superblock_lower(sb, path.dentry->d_sb);
486 lower_mnt = nd.path.mnt; 483 sb->s_maxbytes = path.dentry->d_sb->s_maxbytes;
487 ecryptfs_set_superblock_lower(sb, lower_root->d_sb); 484 sb->s_blocksize = path.dentry->d_sb->s_blocksize;
488 sb->s_maxbytes = lower_root->d_sb->s_maxbytes; 485 ecryptfs_set_dentry_lower(sb->s_root, path.dentry);
489 sb->s_blocksize = lower_root->d_sb->s_blocksize; 486 ecryptfs_set_dentry_lower_mnt(sb->s_root, path.mnt);
490 ecryptfs_set_dentry_lower(sb->s_root, lower_root); 487 rc = ecryptfs_interpose(path.dentry, sb->s_root, sb, 0);
491 ecryptfs_set_dentry_lower_mnt(sb->s_root, lower_mnt);
492 rc = ecryptfs_interpose(lower_root, sb->s_root, sb, 0);
493 if (rc) 488 if (rc)
494 goto out_free; 489 goto out_free;
495 rc = 0; 490 rc = 0;
496 goto out; 491 goto out;
497out_free: 492out_free:
498 path_put(&nd.path); 493 path_put(&path);
499out: 494out:
500 return rc; 495 return rc;
501} 496}
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index 291abb11e20e..c3fb5f9c4a44 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -112,35 +112,14 @@ struct dentry *efs_fh_to_parent(struct super_block *sb, struct fid *fid,
112 112
113struct dentry *efs_get_parent(struct dentry *child) 113struct dentry *efs_get_parent(struct dentry *child)
114{ 114{
115 struct dentry *parent; 115 struct dentry *parent = ERR_PTR(-ENOENT);
116 struct inode *inode;
117 efs_ino_t ino; 116 efs_ino_t ino;
118 long error;
119 117
120 lock_kernel(); 118 lock_kernel();
121
122 error = -ENOENT;
123 ino = efs_find_entry(child->d_inode, "..", 2); 119 ino = efs_find_entry(child->d_inode, "..", 2);
124 if (!ino) 120 if (ino)
125 goto fail; 121 parent = d_obtain_alias(efs_iget(child->d_inode->i_sb, ino));
126
127 inode = efs_iget(child->d_inode->i_sb, ino);
128 if (IS_ERR(inode)) {
129 error = PTR_ERR(inode);
130 goto fail;
131 }
132
133 error = -ENOMEM;
134 parent = d_alloc_anon(inode);
135 if (!parent)
136 goto fail_iput;
137
138 unlock_kernel(); 122 unlock_kernel();
139 return parent;
140 123
141 fail_iput: 124 return parent;
142 iput(inode);
143 fail:
144 unlock_kernel();
145 return ERR_PTR(error);
146} 125}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 99368bda0261..aec5c13f6341 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -930,8 +930,15 @@ errxit:
930 * inside the main ready-list here. 930 * inside the main ready-list here.
931 */ 931 */
932 for (nepi = ep->ovflist; (epi = nepi) != NULL; 932 for (nepi = ep->ovflist; (epi = nepi) != NULL;
933 nepi = epi->next, epi->next = EP_UNACTIVE_PTR) 933 nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {
934 list_add_tail(&epi->rdllink, &ep->rdllist); 934 /*
935 * If the above loop quit with errors, the epoll item might still
936 * be linked to "txlist", and the list_splice() done below will
937 * take care of those cases.
938 */
939 if (!ep_is_linked(&epi->rdllink))
940 list_add_tail(&epi->rdllink, &ep->rdllist);
941 }
935 /* 942 /*
936 * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after 943 * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
937 * releasing the lock, events will be queued in the normal way inside 944 * releasing the lock, events will be queued in the normal way inside
diff --git a/fs/exec.c b/fs/exec.c
index a41e7902ed0b..4e834f16d9da 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1386,7 +1386,7 @@ EXPORT_SYMBOL(set_binfmt);
1386 * name into corename, which must have space for at least 1386 * name into corename, which must have space for at least
1387 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 1387 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
1388 */ 1388 */
1389static int format_corename(char *corename, int nr_threads, long signr) 1389static int format_corename(char *corename, long signr)
1390{ 1390{
1391 const char *pat_ptr = core_pattern; 1391 const char *pat_ptr = core_pattern;
1392 int ispipe = (*pat_ptr == '|'); 1392 int ispipe = (*pat_ptr == '|');
@@ -1493,8 +1493,7 @@ static int format_corename(char *corename, int nr_threads, long signr)
1493 * If core_pattern does not include a %p (as is the default) 1493 * If core_pattern does not include a %p (as is the default)
1494 * and core_uses_pid is set, then .%pid will be appended to 1494 * and core_uses_pid is set, then .%pid will be appended to
1495 * the filename. Do not do this for piped commands. */ 1495 * the filename. Do not do this for piped commands. */
1496 if (!ispipe && !pid_in_pattern 1496 if (!ispipe && !pid_in_pattern && core_uses_pid) {
1497 && (core_uses_pid || nr_threads)) {
1498 rc = snprintf(out_ptr, out_end - out_ptr, 1497 rc = snprintf(out_ptr, out_end - out_ptr,
1499 ".%d", task_tgid_vnr(current)); 1498 ".%d", task_tgid_vnr(current));
1500 if (rc > out_end - out_ptr) 1499 if (rc > out_end - out_ptr)
@@ -1757,7 +1756,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1757 * uses lock_kernel() 1756 * uses lock_kernel()
1758 */ 1757 */
1759 lock_kernel(); 1758 lock_kernel();
1760 ispipe = format_corename(corename, retval, signr); 1759 ispipe = format_corename(corename, signr);
1761 unlock_kernel(); 1760 unlock_kernel();
1762 /* 1761 /*
1763 * Don't bother to check the RLIMIT_CORE value if core_pattern points 1762 * Don't bother to check the RLIMIT_CORE value if core_pattern points
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index cc91227d3bb8..80246bad1b7f 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -94,9 +94,8 @@ find_disconnected_root(struct dentry *dentry)
94 * It may already be, as the flag isn't always updated when connection happens. 94 * It may already be, as the flag isn't always updated when connection happens.
95 */ 95 */
96static int 96static int
97reconnect_path(struct vfsmount *mnt, struct dentry *target_dir) 97reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
98{ 98{
99 char nbuf[NAME_MAX+1];
100 int noprogress = 0; 99 int noprogress = 0;
101 int err = -ESTALE; 100 int err = -ESTALE;
102 101
@@ -281,13 +280,14 @@ static int get_name(struct vfsmount *mnt, struct dentry *dentry,
281 int old_seq = buffer.sequence; 280 int old_seq = buffer.sequence;
282 281
283 error = vfs_readdir(file, filldir_one, &buffer); 282 error = vfs_readdir(file, filldir_one, &buffer);
283 if (buffer.found) {
284 error = 0;
285 break;
286 }
284 287
285 if (error < 0) 288 if (error < 0)
286 break; 289 break;
287 290
288 error = 0;
289 if (buffer.found)
290 break;
291 error = -ENOENT; 291 error = -ENOENT;
292 if (old_seq == buffer.sequence) 292 if (old_seq == buffer.sequence)
293 break; 293 break;
@@ -360,14 +360,13 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
360{ 360{
361 const struct export_operations *nop = mnt->mnt_sb->s_export_op; 361 const struct export_operations *nop = mnt->mnt_sb->s_export_op;
362 struct dentry *result, *alias; 362 struct dentry *result, *alias;
363 char nbuf[NAME_MAX+1];
363 int err; 364 int err;
364 365
365 /* 366 /*
366 * Try to get any dentry for the given file handle from the filesystem. 367 * Try to get any dentry for the given file handle from the filesystem.
367 */ 368 */
368 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); 369 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
369 if (!result)
370 result = ERR_PTR(-ESTALE);
371 if (IS_ERR(result)) 370 if (IS_ERR(result))
372 return result; 371 return result;
373 372
@@ -381,7 +380,7 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
381 * filesystem root. 380 * filesystem root.
382 */ 381 */
383 if (result->d_flags & DCACHE_DISCONNECTED) { 382 if (result->d_flags & DCACHE_DISCONNECTED) {
384 err = reconnect_path(mnt, result); 383 err = reconnect_path(mnt, result, nbuf);
385 if (err) 384 if (err)
386 goto err_result; 385 goto err_result;
387 } 386 }
@@ -397,7 +396,6 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
397 * It's not a directory. Life is a little more complicated. 396 * It's not a directory. Life is a little more complicated.
398 */ 397 */
399 struct dentry *target_dir, *nresult; 398 struct dentry *target_dir, *nresult;
400 char nbuf[NAME_MAX+1];
401 399
402 /* 400 /*
403 * See if either the dentry we just got from the filesystem 401 * See if either the dentry we just got from the filesystem
@@ -422,8 +420,6 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
422 420
423 target_dir = nop->fh_to_parent(mnt->mnt_sb, fid, 421 target_dir = nop->fh_to_parent(mnt->mnt_sb, fid,
424 fh_len, fileid_type); 422 fh_len, fileid_type);
425 if (!target_dir)
426 goto err_result;
427 err = PTR_ERR(target_dir); 423 err = PTR_ERR(target_dir);
428 if (IS_ERR(target_dir)) 424 if (IS_ERR(target_dir))
429 goto err_result; 425 goto err_result;
@@ -433,7 +429,7 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
433 * connected to the filesystem root. The VFS really doesn't 429 * connected to the filesystem root. The VFS really doesn't
434 * like disconnected directories.. 430 * like disconnected directories..
435 */ 431 */
436 err = reconnect_path(mnt, target_dir); 432 err = reconnect_path(mnt, target_dir, nbuf);
437 if (err) { 433 if (err) {
438 dput(target_dir); 434 dput(target_dir);
439 goto err_result; 435 goto err_result;
diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig
new file mode 100644
index 000000000000..14a6780fd034
--- /dev/null
+++ b/fs/ext2/Kconfig
@@ -0,0 +1,55 @@
1config EXT2_FS
2 tristate "Second extended fs support"
3 help
4 Ext2 is a standard Linux file system for hard disks.
5
6 To compile this file system support as a module, choose M here: the
7 module will be called ext2.
8
9 If unsure, say Y.
10
11config EXT2_FS_XATTR
12 bool "Ext2 extended attributes"
13 depends on EXT2_FS
14 help
15 Extended attributes are name:value pairs associated with inodes by
16 the kernel or by users (see the attr(5) manual page, or visit
17 <http://acl.bestbits.at/> for details).
18
19 If unsure, say N.
20
21config EXT2_FS_POSIX_ACL
22 bool "Ext2 POSIX Access Control Lists"
23 depends on EXT2_FS_XATTR
24 select FS_POSIX_ACL
25 help
26 Posix Access Control Lists (ACLs) support permissions for users and
27 groups beyond the owner/group/world scheme.
28
29 To learn more about Access Control Lists, visit the Posix ACLs for
30 Linux website <http://acl.bestbits.at/>.
31
32 If you don't know what Access Control Lists are, say N
33
34config EXT2_FS_SECURITY
35 bool "Ext2 Security Labels"
36 depends on EXT2_FS_XATTR
37 help
38 Security labels support alternative access control models
39 implemented by security modules like SELinux. This option
40 enables an extended attribute handler for file security
41 labels in the ext2 filesystem.
42
43 If you are not using a security module that requires using
44 extended attributes for file security labels, say N.
45
46config EXT2_FS_XIP
47 bool "Ext2 execute in place support"
48 depends on EXT2_FS && MMU
49 help
50 Execute in place can be used on memory-backed block devices. If you
51 enable this option, you can select to mount block devices which are
52 capable of this feature without using the page cache.
53
54 If you do not use a block device that is capable of using this,
55 or if unsure, say N.
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 11a49ce84392..9a0fc400f91c 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -354,11 +354,11 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
354 * (as a parameter - res_dir). Page is returned mapped and unlocked. 354 * (as a parameter - res_dir). Page is returned mapped and unlocked.
355 * Entry is guaranteed to be valid. 355 * Entry is guaranteed to be valid.
356 */ 356 */
357struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, 357struct ext2_dir_entry_2 *ext2_find_entry (struct inode * dir,
358 struct dentry *dentry, struct page ** res_page) 358 struct qstr *child, struct page ** res_page)
359{ 359{
360 const char *name = dentry->d_name.name; 360 const char *name = child->name;
361 int namelen = dentry->d_name.len; 361 int namelen = child->len;
362 unsigned reclen = EXT2_DIR_REC_LEN(namelen); 362 unsigned reclen = EXT2_DIR_REC_LEN(namelen);
363 unsigned long start, n; 363 unsigned long start, n;
364 unsigned long npages = dir_pages(dir); 364 unsigned long npages = dir_pages(dir);
@@ -431,13 +431,13 @@ struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p)
431 return de; 431 return de;
432} 432}
433 433
434ino_t ext2_inode_by_name(struct inode * dir, struct dentry *dentry) 434ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child)
435{ 435{
436 ino_t res = 0; 436 ino_t res = 0;
437 struct ext2_dir_entry_2 * de; 437 struct ext2_dir_entry_2 *de;
438 struct page *page; 438 struct page *page;
439 439
440 de = ext2_find_entry (dir, dentry, &page); 440 de = ext2_find_entry (dir, child, &page);
441 if (de) { 441 if (de) {
442 res = le32_to_cpu(de->inode); 442 res = le32_to_cpu(de->inode);
443 ext2_put_page(page); 443 ext2_put_page(page);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index bae998c1e44e..3203042b36ef 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -105,9 +105,9 @@ extern void ext2_rsv_window_add(struct super_block *sb, struct ext2_reserve_wind
105 105
106/* dir.c */ 106/* dir.c */
107extern int ext2_add_link (struct dentry *, struct inode *); 107extern int ext2_add_link (struct dentry *, struct inode *);
108extern ino_t ext2_inode_by_name(struct inode *, struct dentry *); 108extern ino_t ext2_inode_by_name(struct inode *, struct qstr *);
109extern int ext2_make_empty(struct inode *, struct inode *); 109extern int ext2_make_empty(struct inode *, struct inode *);
110extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct dentry *, struct page **); 110extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct qstr *, struct page **);
111extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *); 111extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
112extern int ext2_empty_dir (struct inode *); 112extern int ext2_empty_dir (struct inode *);
113extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **); 113extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 80c97fd8c571..2a747252ec12 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -60,7 +60,7 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
60 if (dentry->d_name.len > EXT2_NAME_LEN) 60 if (dentry->d_name.len > EXT2_NAME_LEN)
61 return ERR_PTR(-ENAMETOOLONG); 61 return ERR_PTR(-ENAMETOOLONG);
62 62
63 ino = ext2_inode_by_name(dir, dentry); 63 ino = ext2_inode_by_name(dir, &dentry->d_name);
64 inode = NULL; 64 inode = NULL;
65 if (ino) { 65 if (ino) {
66 inode = ext2_iget(dir->i_sb, ino); 66 inode = ext2_iget(dir->i_sb, ino);
@@ -72,27 +72,11 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
72 72
73struct dentry *ext2_get_parent(struct dentry *child) 73struct dentry *ext2_get_parent(struct dentry *child)
74{ 74{
75 unsigned long ino; 75 struct qstr dotdot = {.name = "..", .len = 2};
76 struct dentry *parent; 76 unsigned long ino = ext2_inode_by_name(child->d_inode, &dotdot);
77 struct inode *inode;
78 struct dentry dotdot;
79
80 dotdot.d_name.name = "..";
81 dotdot.d_name.len = 2;
82
83 ino = ext2_inode_by_name(child->d_inode, &dotdot);
84 if (!ino) 77 if (!ino)
85 return ERR_PTR(-ENOENT); 78 return ERR_PTR(-ENOENT);
86 inode = ext2_iget(child->d_inode->i_sb, ino); 79 return d_obtain_alias(ext2_iget(child->d_inode->i_sb, ino));
87
88 if (IS_ERR(inode))
89 return ERR_CAST(inode);
90 parent = d_alloc_anon(inode);
91 if (!parent) {
92 iput(inode);
93 parent = ERR_PTR(-ENOMEM);
94 }
95 return parent;
96} 80}
97 81
98/* 82/*
@@ -257,7 +241,7 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry)
257 struct page * page; 241 struct page * page;
258 int err = -ENOENT; 242 int err = -ENOENT;
259 243
260 de = ext2_find_entry (dir, dentry, &page); 244 de = ext2_find_entry (dir, &dentry->d_name, &page);
261 if (!de) 245 if (!de)
262 goto out; 246 goto out;
263 247
@@ -299,7 +283,7 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
299 struct ext2_dir_entry_2 * old_de; 283 struct ext2_dir_entry_2 * old_de;
300 int err = -ENOENT; 284 int err = -ENOENT;
301 285
302 old_de = ext2_find_entry (old_dir, old_dentry, &old_page); 286 old_de = ext2_find_entry (old_dir, &old_dentry->d_name, &old_page);
303 if (!old_de) 287 if (!old_de)
304 goto out; 288 goto out;
305 289
@@ -319,7 +303,7 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
319 goto out_dir; 303 goto out_dir;
320 304
321 err = -ENOENT; 305 err = -ENOENT;
322 new_de = ext2_find_entry (new_dir, new_dentry, &new_page); 306 new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page);
323 if (!new_de) 307 if (!new_de)
324 goto out_dir; 308 goto out_dir;
325 inode_inc_link_count(old_inode); 309 inode_inc_link_count(old_inode);
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index 4fb94c20041b..b72b85884223 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -11,6 +11,7 @@
11#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
12#include <linux/ext2_fs_sb.h> 12#include <linux/ext2_fs_sb.h>
13#include <linux/ext2_fs.h> 13#include <linux/ext2_fs.h>
14#include <linux/blkdev.h>
14#include "ext2.h" 15#include "ext2.h"
15#include "xip.h" 16#include "xip.h"
16 17
diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig
new file mode 100644
index 000000000000..8e0cfe44b0fc
--- /dev/null
+++ b/fs/ext3/Kconfig
@@ -0,0 +1,67 @@
1config EXT3_FS
2 tristate "Ext3 journalling file system support"
3 select JBD
4 help
5 This is the journalling version of the Second extended file system
6 (often called ext3), the de facto standard Linux file system
7 (method to organize files on a storage device) for hard disks.
8
9 The journalling code included in this driver means you do not have
10 to run e2fsck (file system checker) on your file systems after a
11 crash. The journal keeps track of any changes that were being made
12 at the time the system crashed, and can ensure that your file system
13 is consistent without the need for a lengthy check.
14
15 Other than adding the journal to the file system, the on-disk format
16 of ext3 is identical to ext2. It is possible to freely switch
17 between using the ext3 driver and the ext2 driver, as long as the
18 file system has been cleanly unmounted, or e2fsck is run on the file
19 system.
20
21 To add a journal on an existing ext2 file system or change the
22 behavior of ext3 file systems, you can use the tune2fs utility ("man
23 tune2fs"). To modify attributes of files and directories on ext3
24 file systems, use chattr ("man chattr"). You need to be using
25 e2fsprogs version 1.20 or later in order to create ext3 journals
26 (available at <http://sourceforge.net/projects/e2fsprogs/>).
27
28 To compile this file system support as a module, choose M here: the
29 module will be called ext3.
30
31config EXT3_FS_XATTR
32 bool "Ext3 extended attributes"
33 depends on EXT3_FS
34 default y
35 help
36 Extended attributes are name:value pairs associated with inodes by
37 the kernel or by users (see the attr(5) manual page, or visit
38 <http://acl.bestbits.at/> for details).
39
40 If unsure, say N.
41
42 You need this for POSIX ACL support on ext3.
43
44config EXT3_FS_POSIX_ACL
45 bool "Ext3 POSIX Access Control Lists"
46 depends on EXT3_FS_XATTR
47 select FS_POSIX_ACL
48 help
49 Posix Access Control Lists (ACLs) support permissions for users and
50 groups beyond the owner/group/world scheme.
51
52 To learn more about Access Control Lists, visit the Posix ACLs for
53 Linux website <http://acl.bestbits.at/>.
54
55 If you don't know what Access Control Lists are, say N
56
57config EXT3_FS_SECURITY
58 bool "Ext3 Security Labels"
59 depends on EXT3_FS_XATTR
60 help
61 Security labels support alternative access control models
62 implemented by security modules like SELinux. This option
63 enables an extended attribute handler for file security
64 labels in the ext3 filesystem.
65
66 If you are not using a security module that requires using
67 extended attributes for file security labels, say N.
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 92fd0338a6eb..f5b57a2ca35a 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1547,6 +1547,7 @@ retry_alloc:
1547 * turn off reservation for this allocation 1547 * turn off reservation for this allocation
1548 */ 1548 */
1549 if (my_rsv && (free_blocks < windowsz) 1549 if (my_rsv && (free_blocks < windowsz)
1550 && (free_blocks > 0)
1550 && (rsv_is_empty(&my_rsv->rsv_window))) 1551 && (rsv_is_empty(&my_rsv->rsv_window)))
1551 my_rsv = NULL; 1552 my_rsv = NULL;
1552 1553
@@ -1585,7 +1586,7 @@ retry_alloc:
1585 * free blocks is less than half of the reservation 1586 * free blocks is less than half of the reservation
1586 * window size. 1587 * window size.
1587 */ 1588 */
1588 if (free_blocks <= (windowsz/2)) 1589 if (my_rsv && (free_blocks <= (windowsz/2)))
1589 continue; 1590 continue;
1590 1591
1591 brelse(bitmap_bh); 1592 brelse(bitmap_bh);
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 2eea96ec78ed..5853f4440af4 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -102,6 +102,7 @@ static int ext3_readdir(struct file * filp,
102 int err; 102 int err;
103 struct inode *inode = filp->f_path.dentry->d_inode; 103 struct inode *inode = filp->f_path.dentry->d_inode;
104 int ret = 0; 104 int ret = 0;
105 int dir_has_error = 0;
105 106
106 sb = inode->i_sb; 107 sb = inode->i_sb;
107 108
@@ -148,9 +149,12 @@ static int ext3_readdir(struct file * filp,
148 * of recovering data when there's a bad sector 149 * of recovering data when there's a bad sector
149 */ 150 */
150 if (!bh) { 151 if (!bh) {
151 ext3_error (sb, "ext3_readdir", 152 if (!dir_has_error) {
152 "directory #%lu contains a hole at offset %lu", 153 ext3_error(sb, __func__, "directory #%lu "
153 inode->i_ino, (unsigned long)filp->f_pos); 154 "contains a hole at offset %lld",
155 inode->i_ino, filp->f_pos);
156 dir_has_error = 1;
157 }
154 /* corrupt size? Maybe no more blocks to read */ 158 /* corrupt size? Maybe no more blocks to read */
155 if (filp->f_pos > inode->i_blocks << 9) 159 if (filp->f_pos > inode->i_blocks << 9)
156 break; 160 break;
@@ -410,7 +414,7 @@ static int call_filldir(struct file * filp, void * dirent,
410 get_dtype(sb, fname->file_type)); 414 get_dtype(sb, fname->file_type));
411 if (error) { 415 if (error) {
412 filp->f_pos = curr_pos; 416 filp->f_pos = curr_pos;
413 info->extra_fname = fname->next; 417 info->extra_fname = fname;
414 return error; 418 return error;
415 } 419 }
416 fname = fname->next; 420 fname = fname->next;
@@ -449,11 +453,12 @@ static int ext3_dx_readdir(struct file * filp,
449 * If there are any leftover names on the hash collision 453 * If there are any leftover names on the hash collision
450 * chain, return them first. 454 * chain, return them first.
451 */ 455 */
452 if (info->extra_fname && 456 if (info->extra_fname) {
453 call_filldir(filp, dirent, filldir, info->extra_fname)) 457 if (call_filldir(filp, dirent, filldir, info->extra_fname))
454 goto finished; 458 goto finished;
455 459 info->extra_fname = NULL;
456 if (!info->curr_node) 460 goto next_node;
461 } else if (!info->curr_node)
457 info->curr_node = rb_first(&info->root); 462 info->curr_node = rb_first(&info->root);
458 463
459 while (1) { 464 while (1) {
@@ -484,9 +489,14 @@ static int ext3_dx_readdir(struct file * filp,
484 info->curr_minor_hash = fname->minor_hash; 489 info->curr_minor_hash = fname->minor_hash;
485 if (call_filldir(filp, dirent, filldir, fname)) 490 if (call_filldir(filp, dirent, filldir, fname))
486 break; 491 break;
487 492 next_node:
488 info->curr_node = rb_next(info->curr_node); 493 info->curr_node = rb_next(info->curr_node);
489 if (!info->curr_node) { 494 if (info->curr_node) {
495 fname = rb_entry(info->curr_node, struct fname,
496 rb_hash);
497 info->curr_hash = fname->hash;
498 info->curr_minor_hash = fname->minor_hash;
499 } else {
490 if (info->next_hash == ~0) { 500 if (info->next_hash == ~0) {
491 filp->f_pos = EXT3_HTREE_EOF; 501 filp->f_pos = EXT3_HTREE_EOF;
492 break; 502 break;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ebfec4d0148e..f8424ad89971 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1186,6 +1186,13 @@ write_begin_failed:
1186 ext3_journal_stop(handle); 1186 ext3_journal_stop(handle);
1187 unlock_page(page); 1187 unlock_page(page);
1188 page_cache_release(page); 1188 page_cache_release(page);
1189 /*
1190 * block_write_begin may have instantiated a few blocks
1191 * outside i_size. Trim these off again. Don't need
1192 * i_size_read because we hold i_mutex.
1193 */
1194 if (pos + len > inode->i_size)
1195 vmtruncate(inode, inode->i_size);
1189 } 1196 }
1190 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) 1197 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
1191 goto retry; 1198 goto retry;
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 0d0c70151642..b7394d05ee8e 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -239,7 +239,7 @@ setrsvsz_out:
239 case EXT3_IOC_GROUP_EXTEND: { 239 case EXT3_IOC_GROUP_EXTEND: {
240 ext3_fsblk_t n_blocks_count; 240 ext3_fsblk_t n_blocks_count;
241 struct super_block *sb = inode->i_sb; 241 struct super_block *sb = inode->i_sb;
242 int err; 242 int err, err2;
243 243
244 if (!capable(CAP_SYS_RESOURCE)) 244 if (!capable(CAP_SYS_RESOURCE))
245 return -EPERM; 245 return -EPERM;
@@ -254,8 +254,10 @@ setrsvsz_out:
254 } 254 }
255 err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count); 255 err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count);
256 journal_lock_updates(EXT3_SB(sb)->s_journal); 256 journal_lock_updates(EXT3_SB(sb)->s_journal);
257 journal_flush(EXT3_SB(sb)->s_journal); 257 err2 = journal_flush(EXT3_SB(sb)->s_journal);
258 journal_unlock_updates(EXT3_SB(sb)->s_journal); 258 journal_unlock_updates(EXT3_SB(sb)->s_journal);
259 if (err == 0)
260 err = err2;
259group_extend_out: 261group_extend_out:
260 mnt_drop_write(filp->f_path.mnt); 262 mnt_drop_write(filp->f_path.mnt);
261 return err; 263 return err;
@@ -263,7 +265,7 @@ group_extend_out:
263 case EXT3_IOC_GROUP_ADD: { 265 case EXT3_IOC_GROUP_ADD: {
264 struct ext3_new_group_data input; 266 struct ext3_new_group_data input;
265 struct super_block *sb = inode->i_sb; 267 struct super_block *sb = inode->i_sb;
266 int err; 268 int err, err2;
267 269
268 if (!capable(CAP_SYS_RESOURCE)) 270 if (!capable(CAP_SYS_RESOURCE))
269 return -EPERM; 271 return -EPERM;
@@ -280,8 +282,10 @@ group_extend_out:
280 282
281 err = ext3_group_add(sb, &input); 283 err = ext3_group_add(sb, &input);
282 journal_lock_updates(EXT3_SB(sb)->s_journal); 284 journal_lock_updates(EXT3_SB(sb)->s_journal);
283 journal_flush(EXT3_SB(sb)->s_journal); 285 err2 = journal_flush(EXT3_SB(sb)->s_journal);
284 journal_unlock_updates(EXT3_SB(sb)->s_journal); 286 journal_unlock_updates(EXT3_SB(sb)->s_journal);
287 if (err == 0)
288 err = err2;
285group_add_out: 289group_add_out:
286 mnt_drop_write(filp->f_path.mnt); 290 mnt_drop_write(filp->f_path.mnt);
287 return err; 291 return err;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index de13e919cd81..3e5edc92aa0b 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -159,7 +159,7 @@ static void dx_set_count (struct dx_entry *entries, unsigned value);
159static void dx_set_limit (struct dx_entry *entries, unsigned value); 159static void dx_set_limit (struct dx_entry *entries, unsigned value);
160static unsigned dx_root_limit (struct inode *dir, unsigned infosize); 160static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
161static unsigned dx_node_limit (struct inode *dir); 161static unsigned dx_node_limit (struct inode *dir);
162static struct dx_frame *dx_probe(struct dentry *dentry, 162static struct dx_frame *dx_probe(struct qstr *entry,
163 struct inode *dir, 163 struct inode *dir,
164 struct dx_hash_info *hinfo, 164 struct dx_hash_info *hinfo,
165 struct dx_frame *frame, 165 struct dx_frame *frame,
@@ -176,8 +176,9 @@ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
176 struct dx_frame *frame, 176 struct dx_frame *frame,
177 struct dx_frame *frames, 177 struct dx_frame *frames,
178 __u32 *start_hash); 178 __u32 *start_hash);
179static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, 179static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
180 struct ext3_dir_entry_2 **res_dir, int *err); 180 struct qstr *entry, struct ext3_dir_entry_2 **res_dir,
181 int *err);
181static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, 182static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
182 struct inode *inode); 183 struct inode *inode);
183 184
@@ -342,7 +343,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
342 * back to userspace. 343 * back to userspace.
343 */ 344 */
344static struct dx_frame * 345static struct dx_frame *
345dx_probe(struct dentry *dentry, struct inode *dir, 346dx_probe(struct qstr *entry, struct inode *dir,
346 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) 347 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
347{ 348{
348 unsigned count, indirect; 349 unsigned count, indirect;
@@ -353,8 +354,6 @@ dx_probe(struct dentry *dentry, struct inode *dir,
353 u32 hash; 354 u32 hash;
354 355
355 frame->bh = NULL; 356 frame->bh = NULL;
356 if (dentry)
357 dir = dentry->d_parent->d_inode;
358 if (!(bh = ext3_bread (NULL,dir, 0, 0, err))) 357 if (!(bh = ext3_bread (NULL,dir, 0, 0, err)))
359 goto fail; 358 goto fail;
360 root = (struct dx_root *) bh->b_data; 359 root = (struct dx_root *) bh->b_data;
@@ -370,8 +369,8 @@ dx_probe(struct dentry *dentry, struct inode *dir,
370 } 369 }
371 hinfo->hash_version = root->info.hash_version; 370 hinfo->hash_version = root->info.hash_version;
372 hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed; 371 hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed;
373 if (dentry) 372 if (entry)
374 ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); 373 ext3fs_dirhash(entry->name, entry->len, hinfo);
375 hash = hinfo->hash; 374 hash = hinfo->hash;
376 375
377 if (root->info.unused_flags & 1) { 376 if (root->info.unused_flags & 1) {
@@ -803,15 +802,15 @@ static inline int ext3_match (int len, const char * const name,
803 */ 802 */
804static inline int search_dirblock(struct buffer_head * bh, 803static inline int search_dirblock(struct buffer_head * bh,
805 struct inode *dir, 804 struct inode *dir,
806 struct dentry *dentry, 805 struct qstr *child,
807 unsigned long offset, 806 unsigned long offset,
808 struct ext3_dir_entry_2 ** res_dir) 807 struct ext3_dir_entry_2 ** res_dir)
809{ 808{
810 struct ext3_dir_entry_2 * de; 809 struct ext3_dir_entry_2 * de;
811 char * dlimit; 810 char * dlimit;
812 int de_len; 811 int de_len;
813 const char *name = dentry->d_name.name; 812 const char *name = child->name;
814 int namelen = dentry->d_name.len; 813 int namelen = child->len;
815 814
816 de = (struct ext3_dir_entry_2 *) bh->b_data; 815 de = (struct ext3_dir_entry_2 *) bh->b_data;
817 dlimit = bh->b_data + dir->i_sb->s_blocksize; 816 dlimit = bh->b_data + dir->i_sb->s_blocksize;
@@ -850,8 +849,9 @@ static inline int search_dirblock(struct buffer_head * bh,
850 * The returned buffer_head has ->b_count elevated. The caller is expected 849 * The returned buffer_head has ->b_count elevated. The caller is expected
851 * to brelse() it when appropriate. 850 * to brelse() it when appropriate.
852 */ 851 */
853static struct buffer_head * ext3_find_entry (struct dentry *dentry, 852static struct buffer_head *ext3_find_entry(struct inode *dir,
854 struct ext3_dir_entry_2 ** res_dir) 853 struct qstr *entry,
854 struct ext3_dir_entry_2 **res_dir)
855{ 855{
856 struct super_block * sb; 856 struct super_block * sb;
857 struct buffer_head * bh_use[NAMEI_RA_SIZE]; 857 struct buffer_head * bh_use[NAMEI_RA_SIZE];
@@ -863,16 +863,15 @@ static struct buffer_head * ext3_find_entry (struct dentry *dentry,
863 buffer */ 863 buffer */
864 int num = 0; 864 int num = 0;
865 int nblocks, i, err; 865 int nblocks, i, err;
866 struct inode *dir = dentry->d_parent->d_inode;
867 int namelen; 866 int namelen;
868 867
869 *res_dir = NULL; 868 *res_dir = NULL;
870 sb = dir->i_sb; 869 sb = dir->i_sb;
871 namelen = dentry->d_name.len; 870 namelen = entry->len;
872 if (namelen > EXT3_NAME_LEN) 871 if (namelen > EXT3_NAME_LEN)
873 return NULL; 872 return NULL;
874 if (is_dx(dir)) { 873 if (is_dx(dir)) {
875 bh = ext3_dx_find_entry(dentry, res_dir, &err); 874 bh = ext3_dx_find_entry(dir, entry, res_dir, &err);
876 /* 875 /*
877 * On success, or if the error was file not found, 876 * On success, or if the error was file not found,
878 * return. Otherwise, fall back to doing a search the 877 * return. Otherwise, fall back to doing a search the
@@ -923,7 +922,7 @@ restart:
923 brelse(bh); 922 brelse(bh);
924 goto next; 923 goto next;
925 } 924 }
926 i = search_dirblock(bh, dir, dentry, 925 i = search_dirblock(bh, dir, entry,
927 block << EXT3_BLOCK_SIZE_BITS(sb), res_dir); 926 block << EXT3_BLOCK_SIZE_BITS(sb), res_dir);
928 if (i == 1) { 927 if (i == 1) {
929 EXT3_I(dir)->i_dir_start_lookup = block; 928 EXT3_I(dir)->i_dir_start_lookup = block;
@@ -957,8 +956,9 @@ cleanup_and_exit:
957 return ret; 956 return ret;
958} 957}
959 958
960static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, 959static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
961 struct ext3_dir_entry_2 **res_dir, int *err) 960 struct qstr *entry, struct ext3_dir_entry_2 **res_dir,
961 int *err)
962{ 962{
963 struct super_block * sb; 963 struct super_block * sb;
964 struct dx_hash_info hinfo; 964 struct dx_hash_info hinfo;
@@ -968,14 +968,13 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
968 struct buffer_head *bh; 968 struct buffer_head *bh;
969 unsigned long block; 969 unsigned long block;
970 int retval; 970 int retval;
971 int namelen = dentry->d_name.len; 971 int namelen = entry->len;
972 const u8 *name = dentry->d_name.name; 972 const u8 *name = entry->name;
973 struct inode *dir = dentry->d_parent->d_inode;
974 973
975 sb = dir->i_sb; 974 sb = dir->i_sb;
976 /* NFS may look up ".." - look at dx_root directory block */ 975 /* NFS may look up ".." - look at dx_root directory block */
977 if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ 976 if (namelen > 2 || name[0] != '.'|| (namelen == 2 && name[1] != '.')) {
978 if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) 977 if (!(frame = dx_probe(entry, dir, &hinfo, frames, err)))
979 return NULL; 978 return NULL;
980 } else { 979 } else {
981 frame = frames; 980 frame = frames;
@@ -1036,7 +1035,7 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
1036 if (dentry->d_name.len > EXT3_NAME_LEN) 1035 if (dentry->d_name.len > EXT3_NAME_LEN)
1037 return ERR_PTR(-ENAMETOOLONG); 1036 return ERR_PTR(-ENAMETOOLONG);
1038 1037
1039 bh = ext3_find_entry(dentry, &de); 1038 bh = ext3_find_entry(dir, &dentry->d_name, &de);
1040 inode = NULL; 1039 inode = NULL;
1041 if (bh) { 1040 if (bh) {
1042 unsigned long ino = le32_to_cpu(de->inode); 1041 unsigned long ino = le32_to_cpu(de->inode);
@@ -1057,18 +1056,11 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
1057struct dentry *ext3_get_parent(struct dentry *child) 1056struct dentry *ext3_get_parent(struct dentry *child)
1058{ 1057{
1059 unsigned long ino; 1058 unsigned long ino;
1060 struct dentry *parent; 1059 struct qstr dotdot = {.name = "..", .len = 2};
1061 struct inode *inode;
1062 struct dentry dotdot;
1063 struct ext3_dir_entry_2 * de; 1060 struct ext3_dir_entry_2 * de;
1064 struct buffer_head *bh; 1061 struct buffer_head *bh;
1065 1062
1066 dotdot.d_name.name = ".."; 1063 bh = ext3_find_entry(child->d_inode, &dotdot, &de);
1067 dotdot.d_name.len = 2;
1068 dotdot.d_parent = child; /* confusing, isn't it! */
1069
1070 bh = ext3_find_entry(&dotdot, &de);
1071 inode = NULL;
1072 if (!bh) 1064 if (!bh)
1073 return ERR_PTR(-ENOENT); 1065 return ERR_PTR(-ENOENT);
1074 ino = le32_to_cpu(de->inode); 1066 ino = le32_to_cpu(de->inode);
@@ -1080,16 +1072,7 @@ struct dentry *ext3_get_parent(struct dentry *child)
1080 return ERR_PTR(-EIO); 1072 return ERR_PTR(-EIO);
1081 } 1073 }
1082 1074
1083 inode = ext3_iget(child->d_inode->i_sb, ino); 1075 return d_obtain_alias(ext3_iget(child->d_inode->i_sb, ino));
1084 if (IS_ERR(inode))
1085 return ERR_CAST(inode);
1086
1087 parent = d_alloc_anon(inode);
1088 if (!parent) {
1089 iput(inode);
1090 parent = ERR_PTR(-ENOMEM);
1091 }
1092 return parent;
1093} 1076}
1094 1077
1095#define S_SHIFT 12 1078#define S_SHIFT 12
@@ -1503,7 +1486,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
1503 struct ext3_dir_entry_2 *de; 1486 struct ext3_dir_entry_2 *de;
1504 int err; 1487 int err;
1505 1488
1506 frame = dx_probe(dentry, NULL, &hinfo, frames, &err); 1489 frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
1507 if (!frame) 1490 if (!frame)
1508 return err; 1491 return err;
1509 entries = frame->entries; 1492 entries = frame->entries;
@@ -2056,7 +2039,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
2056 return PTR_ERR(handle); 2039 return PTR_ERR(handle);
2057 2040
2058 retval = -ENOENT; 2041 retval = -ENOENT;
2059 bh = ext3_find_entry (dentry, &de); 2042 bh = ext3_find_entry(dir, &dentry->d_name, &de);
2060 if (!bh) 2043 if (!bh)
2061 goto end_rmdir; 2044 goto end_rmdir;
2062 2045
@@ -2118,7 +2101,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
2118 handle->h_sync = 1; 2101 handle->h_sync = 1;
2119 2102
2120 retval = -ENOENT; 2103 retval = -ENOENT;
2121 bh = ext3_find_entry (dentry, &de); 2104 bh = ext3_find_entry(dir, &dentry->d_name, &de);
2122 if (!bh) 2105 if (!bh)
2123 goto end_unlink; 2106 goto end_unlink;
2124 2107
@@ -2276,7 +2259,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
2276 if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) 2259 if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
2277 handle->h_sync = 1; 2260 handle->h_sync = 1;
2278 2261
2279 old_bh = ext3_find_entry (old_dentry, &old_de); 2262 old_bh = ext3_find_entry(old_dir, &old_dentry->d_name, &old_de);
2280 /* 2263 /*
2281 * Check for inode number is _not_ due to possible IO errors. 2264 * Check for inode number is _not_ due to possible IO errors.
2282 * We might rmdir the source, keep it as pwd of some process 2265 * We might rmdir the source, keep it as pwd of some process
@@ -2289,7 +2272,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
2289 goto end_rename; 2272 goto end_rename;
2290 2273
2291 new_inode = new_dentry->d_inode; 2274 new_inode = new_dentry->d_inode;
2292 new_bh = ext3_find_entry (new_dentry, &new_de); 2275 new_bh = ext3_find_entry(new_dir, &new_dentry->d_name, &new_de);
2293 if (new_bh) { 2276 if (new_bh) {
2294 if (!new_inode) { 2277 if (!new_inode) {
2295 brelse (new_bh); 2278 brelse (new_bh);
@@ -2355,7 +2338,8 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
2355 struct buffer_head *old_bh2; 2338 struct buffer_head *old_bh2;
2356 struct ext3_dir_entry_2 *old_de2; 2339 struct ext3_dir_entry_2 *old_de2;
2357 2340
2358 old_bh2 = ext3_find_entry(old_dentry, &old_de2); 2341 old_bh2 = ext3_find_entry(old_dir, &old_dentry->d_name,
2342 &old_de2);
2359 if (old_bh2) { 2343 if (old_bh2) {
2360 retval = ext3_delete_entry(handle, old_dir, 2344 retval = ext3_delete_entry(handle, old_dir,
2361 old_de2, old_bh2); 2345 old_de2, old_bh2);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 77278e947e94..78fdf3836370 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -790,7 +790,8 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
790 790
791 if (reserved_gdb || gdb_off == 0) { 791 if (reserved_gdb || gdb_off == 0) {
792 if (!EXT3_HAS_COMPAT_FEATURE(sb, 792 if (!EXT3_HAS_COMPAT_FEATURE(sb,
793 EXT3_FEATURE_COMPAT_RESIZE_INODE)){ 793 EXT3_FEATURE_COMPAT_RESIZE_INODE)
794 || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
794 ext3_warning(sb, __func__, 795 ext3_warning(sb, __func__,
795 "No reserved GDT blocks, can't resize"); 796 "No reserved GDT blocks, can't resize");
796 return -EPERM; 797 return -EPERM;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 399a96a6c556..5dec6d1356c4 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -281,7 +281,8 @@ void ext3_abort (struct super_block * sb, const char * function,
281 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 281 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
282 sb->s_flags |= MS_RDONLY; 282 sb->s_flags |= MS_RDONLY;
283 EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT; 283 EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
284 journal_abort(EXT3_SB(sb)->s_journal, -EIO); 284 if (EXT3_SB(sb)->s_journal)
285 journal_abort(EXT3_SB(sb)->s_journal, -EIO);
285} 286}
286 287
287void ext3_warning (struct super_block * sb, const char * function, 288void ext3_warning (struct super_block * sb, const char * function,
@@ -347,7 +348,7 @@ fail:
347static int ext3_blkdev_put(struct block_device *bdev) 348static int ext3_blkdev_put(struct block_device *bdev)
348{ 349{
349 bd_release(bdev); 350 bd_release(bdev);
350 return blkdev_put(bdev); 351 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
351} 352}
352 353
353static int ext3_blkdev_remove(struct ext3_sb_info *sbi) 354static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
@@ -390,10 +391,14 @@ static void ext3_put_super (struct super_block * sb)
390{ 391{
391 struct ext3_sb_info *sbi = EXT3_SB(sb); 392 struct ext3_sb_info *sbi = EXT3_SB(sb);
392 struct ext3_super_block *es = sbi->s_es; 393 struct ext3_super_block *es = sbi->s_es;
393 int i; 394 int i, err;
394 395
395 ext3_xattr_put_super(sb); 396 ext3_xattr_put_super(sb);
396 journal_destroy(sbi->s_journal); 397 err = journal_destroy(sbi->s_journal);
398 sbi->s_journal = NULL;
399 if (err < 0)
400 ext3_abort(sb, __func__, "Couldn't clean up the journal");
401
397 if (!(sb->s_flags & MS_RDONLY)) { 402 if (!(sb->s_flags & MS_RDONLY)) {
398 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 403 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
399 es->s_state = cpu_to_le16(sbi->s_mount_state); 404 es->s_state = cpu_to_le16(sbi->s_mount_state);
@@ -625,6 +630,9 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
625 else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA) 630 else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
626 seq_puts(seq, ",data=writeback"); 631 seq_puts(seq, ",data=writeback");
627 632
633 if (test_opt(sb, DATA_ERR_ABORT))
634 seq_puts(seq, ",data_err=abort");
635
628 ext3_show_quota_options(seq, sb); 636 ext3_show_quota_options(seq, sb);
629 637
630 return 0; 638 return 0;
@@ -754,6 +762,7 @@ enum {
754 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, 762 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
755 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 763 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
756 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 764 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
765 Opt_data_err_abort, Opt_data_err_ignore,
757 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 766 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
758 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 767 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
759 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 768 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
@@ -796,6 +805,8 @@ static const match_table_t tokens = {
796 {Opt_data_journal, "data=journal"}, 805 {Opt_data_journal, "data=journal"},
797 {Opt_data_ordered, "data=ordered"}, 806 {Opt_data_ordered, "data=ordered"},
798 {Opt_data_writeback, "data=writeback"}, 807 {Opt_data_writeback, "data=writeback"},
808 {Opt_data_err_abort, "data_err=abort"},
809 {Opt_data_err_ignore, "data_err=ignore"},
799 {Opt_offusrjquota, "usrjquota="}, 810 {Opt_offusrjquota, "usrjquota="},
800 {Opt_usrjquota, "usrjquota=%s"}, 811 {Opt_usrjquota, "usrjquota=%s"},
801 {Opt_offgrpjquota, "grpjquota="}, 812 {Opt_offgrpjquota, "grpjquota="},
@@ -1011,6 +1022,12 @@ static int parse_options (char *options, struct super_block *sb,
1011 sbi->s_mount_opt |= data_opt; 1022 sbi->s_mount_opt |= data_opt;
1012 } 1023 }
1013 break; 1024 break;
1025 case Opt_data_err_abort:
1026 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1027 break;
1028 case Opt_data_err_ignore:
1029 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1030 break;
1014#ifdef CONFIG_QUOTA 1031#ifdef CONFIG_QUOTA
1015 case Opt_usrjquota: 1032 case Opt_usrjquota:
1016 qtype = USRQUOTA; 1033 qtype = USRQUOTA;
@@ -1986,6 +2003,10 @@ static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
1986 journal->j_flags |= JFS_BARRIER; 2003 journal->j_flags |= JFS_BARRIER;
1987 else 2004 else
1988 journal->j_flags &= ~JFS_BARRIER; 2005 journal->j_flags &= ~JFS_BARRIER;
2006 if (test_opt(sb, DATA_ERR_ABORT))
2007 journal->j_flags |= JFS_ABORT_ON_SYNCDATA_ERR;
2008 else
2009 journal->j_flags &= ~JFS_ABORT_ON_SYNCDATA_ERR;
1989 spin_unlock(&journal->j_state_lock); 2010 spin_unlock(&journal->j_state_lock);
1990} 2011}
1991 2012
@@ -2050,7 +2071,7 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
2050 if (bd_claim(bdev, sb)) { 2071 if (bd_claim(bdev, sb)) {
2051 printk(KERN_ERR 2072 printk(KERN_ERR
2052 "EXT3: failed to claim external journal device.\n"); 2073 "EXT3: failed to claim external journal device.\n");
2053 blkdev_put(bdev); 2074 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
2054 return NULL; 2075 return NULL;
2055 } 2076 }
2056 2077
@@ -2280,7 +2301,9 @@ static void ext3_mark_recovery_complete(struct super_block * sb,
2280 journal_t *journal = EXT3_SB(sb)->s_journal; 2301 journal_t *journal = EXT3_SB(sb)->s_journal;
2281 2302
2282 journal_lock_updates(journal); 2303 journal_lock_updates(journal);
2283 journal_flush(journal); 2304 if (journal_flush(journal) < 0)
2305 goto out;
2306
2284 lock_super(sb); 2307 lock_super(sb);
2285 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && 2308 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
2286 sb->s_flags & MS_RDONLY) { 2309 sb->s_flags & MS_RDONLY) {
@@ -2289,6 +2312,8 @@ static void ext3_mark_recovery_complete(struct super_block * sb,
2289 ext3_commit_super(sb, es, 1); 2312 ext3_commit_super(sb, es, 1);
2290 } 2313 }
2291 unlock_super(sb); 2314 unlock_super(sb);
2315
2316out:
2292 journal_unlock_updates(journal); 2317 journal_unlock_updates(journal);
2293} 2318}
2294 2319
@@ -2365,13 +2390,12 @@ static void ext3_write_super (struct super_block * sb)
2365 2390
2366static int ext3_sync_fs(struct super_block *sb, int wait) 2391static int ext3_sync_fs(struct super_block *sb, int wait)
2367{ 2392{
2368 tid_t target;
2369
2370 sb->s_dirt = 0; 2393 sb->s_dirt = 0;
2371 if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { 2394 if (wait)
2372 if (wait) 2395 ext3_force_commit(sb);
2373 log_wait_commit(EXT3_SB(sb)->s_journal, target); 2396 else
2374 } 2397 journal_start_commit(EXT3_SB(sb)->s_journal, NULL);
2398
2375 return 0; 2399 return 0;
2376} 2400}
2377 2401
@@ -2388,7 +2412,13 @@ static void ext3_write_super_lockfs(struct super_block *sb)
2388 2412
2389 /* Now we set up the journal barrier. */ 2413 /* Now we set up the journal barrier. */
2390 journal_lock_updates(journal); 2414 journal_lock_updates(journal);
2391 journal_flush(journal); 2415
2416 /*
2417 * We don't want to clear needs_recovery flag when we failed
2418 * to flush the journal.
2419 */
2420 if (journal_flush(journal) < 0)
2421 return;
2392 2422
2393 /* Journal blocked and flushed, clear needs_recovery flag. */ 2423 /* Journal blocked and flushed, clear needs_recovery flag. */
2394 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2424 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
@@ -2767,30 +2797,30 @@ static int ext3_quota_on_mount(struct super_block *sb, int type)
2767 * Standard function to be called on quota_on 2797 * Standard function to be called on quota_on
2768 */ 2798 */
2769static int ext3_quota_on(struct super_block *sb, int type, int format_id, 2799static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2770 char *path, int remount) 2800 char *name, int remount)
2771{ 2801{
2772 int err; 2802 int err;
2773 struct nameidata nd; 2803 struct path path;
2774 2804
2775 if (!test_opt(sb, QUOTA)) 2805 if (!test_opt(sb, QUOTA))
2776 return -EINVAL; 2806 return -EINVAL;
2777 /* When remounting, no checks are needed and in fact, path is NULL */ 2807 /* When remounting, no checks are needed and in fact, name is NULL */
2778 if (remount) 2808 if (remount)
2779 return vfs_quota_on(sb, type, format_id, path, remount); 2809 return vfs_quota_on(sb, type, format_id, name, remount);
2780 2810
2781 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 2811 err = kern_path(name, LOOKUP_FOLLOW, &path);
2782 if (err) 2812 if (err)
2783 return err; 2813 return err;
2784 2814
2785 /* Quotafile not on the same filesystem? */ 2815 /* Quotafile not on the same filesystem? */
2786 if (nd.path.mnt->mnt_sb != sb) { 2816 if (path.mnt->mnt_sb != sb) {
2787 path_put(&nd.path); 2817 path_put(&path);
2788 return -EXDEV; 2818 return -EXDEV;
2789 } 2819 }
2790 /* Journaling quota? */ 2820 /* Journaling quota? */
2791 if (EXT3_SB(sb)->s_qf_names[type]) { 2821 if (EXT3_SB(sb)->s_qf_names[type]) {
2792 /* Quotafile not of fs root? */ 2822 /* Quotafile not of fs root? */
2793 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) 2823 if (path.dentry->d_parent != sb->s_root)
2794 printk(KERN_WARNING 2824 printk(KERN_WARNING
2795 "EXT3-fs: Quota file not on filesystem root. " 2825 "EXT3-fs: Quota file not on filesystem root. "
2796 "Journaled quota will not work.\n"); 2826 "Journaled quota will not work.\n");
@@ -2800,18 +2830,22 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2800 * When we journal data on quota file, we have to flush journal to see 2830 * When we journal data on quota file, we have to flush journal to see
2801 * all updates to the file when we bypass pagecache... 2831 * all updates to the file when we bypass pagecache...
2802 */ 2832 */
2803 if (ext3_should_journal_data(nd.path.dentry->d_inode)) { 2833 if (ext3_should_journal_data(path.dentry->d_inode)) {
2804 /* 2834 /*
2805 * We don't need to lock updates but journal_flush() could 2835 * We don't need to lock updates but journal_flush() could
2806 * otherwise be livelocked... 2836 * otherwise be livelocked...
2807 */ 2837 */
2808 journal_lock_updates(EXT3_SB(sb)->s_journal); 2838 journal_lock_updates(EXT3_SB(sb)->s_journal);
2809 journal_flush(EXT3_SB(sb)->s_journal); 2839 err = journal_flush(EXT3_SB(sb)->s_journal);
2810 journal_unlock_updates(EXT3_SB(sb)->s_journal); 2840 journal_unlock_updates(EXT3_SB(sb)->s_journal);
2841 if (err) {
2842 path_put(&path);
2843 return err;
2844 }
2811 } 2845 }
2812 2846
2813 err = vfs_quota_on_path(sb, type, format_id, &nd.path); 2847 err = vfs_quota_on_path(sb, type, format_id, &path);
2814 path_put(&nd.path); 2848 path_put(&path);
2815 return err; 2849 return err;
2816} 2850}
2817 2851
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
new file mode 100644
index 000000000000..7505482a08fa
--- /dev/null
+++ b/fs/ext4/Kconfig
@@ -0,0 +1,79 @@
1config EXT4_FS
2 tristate "The Extended 4 (ext4) filesystem"
3 select JBD2
4 select CRC16
5 help
6 This is the next generation of the ext3 filesystem.
7
8 Unlike the change from ext2 filesystem to ext3 filesystem,
9 the on-disk format of ext4 is not forwards compatible with
10 ext3; it is based on extent maps and it supports 48-bit
11 physical block numbers. The ext4 filesystem also supports delayed
12 allocation, persistent preallocation, high resolution time stamps,
13 and a number of other features to improve performance and speed
14 up fsck time. For more information, please see the web pages at
15 http://ext4.wiki.kernel.org.
16
17 The ext4 filesystem will support mounting an ext3
18 filesystem; while there will be some performance gains from
19 the delayed allocation and inode table readahead, the best
20 performance gains will require enabling ext4 features in the
21 filesystem, or formating a new filesystem as an ext4
22 filesystem initially.
23
24 To compile this file system support as a module, choose M here. The
25 module will be called ext4.
26
27 If unsure, say N.
28
29config EXT4DEV_COMPAT
30 bool "Enable ext4dev compatibility"
31 depends on EXT4_FS
32 help
33 Starting with 2.6.28, the name of the ext4 filesystem was
34 renamed from ext4dev to ext4. Unfortunately there are some
35 legacy userspace programs (such as klibc's fstype) have
36 "ext4dev" hardcoded.
37
38 To enable backwards compatibility so that systems that are
39 still expecting to mount ext4 filesystems using ext4dev,
40 chose Y here. This feature will go away by 2.6.31, so
41 please arrange to get your userspace programs fixed!
42
43config EXT4_FS_XATTR
44 bool "Ext4 extended attributes"
45 depends on EXT4_FS
46 default y
47 help
48 Extended attributes are name:value pairs associated with inodes by
49 the kernel or by users (see the attr(5) manual page, or visit
50 <http://acl.bestbits.at/> for details).
51
52 If unsure, say N.
53
54 You need this for POSIX ACL support on ext4.
55
56config EXT4_FS_POSIX_ACL
57 bool "Ext4 POSIX Access Control Lists"
58 depends on EXT4_FS_XATTR
59 select FS_POSIX_ACL
60 help
61 POSIX Access Control Lists (ACLs) support permissions for users and
62 groups beyond the owner/group/world scheme.
63
64 To learn more about Access Control Lists, visit the POSIX ACLs for
65 Linux website <http://acl.bestbits.at/>.
66
67 If you don't know what Access Control Lists are, say N
68
69config EXT4_FS_SECURITY
70 bool "Ext4 Security Labels"
71 depends on EXT4_FS_XATTR
72 help
73 Security labels support alternative access control models
74 implemented by security modules like SELinux. This option
75 enables an extended attribute handler for file security
76 labels in the ext4 filesystem.
77
78 If you are not using a security module that requires using
79 extended attributes for file security labels, say N.
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index b9821be709bd..d2003cdc36aa 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -589,21 +589,23 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
589 return; 589 return;
590} 590}
591 591
592int ext4_claim_free_blocks(struct ext4_sb_info *sbi, 592/**
593 s64 nblocks) 593 * ext4_has_free_blocks()
594 * @sbi: in-core super block structure.
595 * @nblocks: number of needed blocks
596 *
597 * Check if filesystem has nblocks free & available for allocation.
598 * On success return 1, return 0 on failure.
599 */
600int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
594{ 601{
595 s64 free_blocks, dirty_blocks; 602 s64 free_blocks, dirty_blocks, root_blocks;
596 s64 root_blocks = 0;
597 struct percpu_counter *fbc = &sbi->s_freeblocks_counter; 603 struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
598 struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; 604 struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
599 605
600 free_blocks = percpu_counter_read_positive(fbc); 606 free_blocks = percpu_counter_read_positive(fbc);
601 dirty_blocks = percpu_counter_read_positive(dbc); 607 dirty_blocks = percpu_counter_read_positive(dbc);
602 608 root_blocks = ext4_r_blocks_count(sbi->s_es);
603 if (!capable(CAP_SYS_RESOURCE) &&
604 sbi->s_resuid != current->fsuid &&
605 (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
606 root_blocks = ext4_r_blocks_count(sbi->s_es);
607 609
608 if (free_blocks - (nblocks + root_blocks + dirty_blocks) < 610 if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
609 EXT4_FREEBLOCKS_WATERMARK) { 611 EXT4_FREEBLOCKS_WATERMARK) {
@@ -616,57 +618,32 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
616 } 618 }
617 } 619 }
618 /* Check whether we have space after 620 /* Check whether we have space after
619 * accounting for current dirty blocks 621 * accounting for current dirty blocks & root reserved blocks.
620 */ 622 */
621 if (free_blocks < ((root_blocks + nblocks) + dirty_blocks)) 623 if (free_blocks >= ((root_blocks + nblocks) + dirty_blocks))
622 /* we don't have free space */ 624 return 1;
623 return -ENOSPC; 625
626 /* Hm, nope. Are (enough) root reserved blocks available? */
627 if (sbi->s_resuid == current->fsuid ||
628 ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
629 capable(CAP_SYS_RESOURCE)) {
630 if (free_blocks >= (nblocks + dirty_blocks))
631 return 1;
632 }
624 633
625 /* Add the blocks to nblocks */
626 percpu_counter_add(dbc, nblocks);
627 return 0; 634 return 0;
628} 635}
629 636
630/** 637int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
631 * ext4_has_free_blocks()
632 * @sbi: in-core super block structure.
633 * @nblocks: number of neeed blocks
634 *
635 * Check if filesystem has free blocks available for allocation.
636 * Return the number of blocks avaible for allocation for this request
637 * On success, return nblocks
638 */
639ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
640 s64 nblocks) 638 s64 nblocks)
641{ 639{
642 s64 free_blocks, dirty_blocks; 640 if (ext4_has_free_blocks(sbi, nblocks)) {
643 s64 root_blocks = 0; 641 percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks);
644 struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
645 struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
646
647 free_blocks = percpu_counter_read_positive(fbc);
648 dirty_blocks = percpu_counter_read_positive(dbc);
649
650 if (!capable(CAP_SYS_RESOURCE) &&
651 sbi->s_resuid != current->fsuid &&
652 (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
653 root_blocks = ext4_r_blocks_count(sbi->s_es);
654
655 if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
656 EXT4_FREEBLOCKS_WATERMARK) {
657 free_blocks = percpu_counter_sum(fbc);
658 dirty_blocks = percpu_counter_sum(dbc);
659 }
660 if (free_blocks <= (root_blocks + dirty_blocks))
661 /* we don't have free space */
662 return 0; 642 return 0;
663 643 } else
664 if (free_blocks - (root_blocks + dirty_blocks) < nblocks) 644 return -ENOSPC;
665 return free_blocks - (root_blocks + dirty_blocks);
666 return nblocks;
667} 645}
668 646
669
670/** 647/**
671 * ext4_should_retry_alloc() 648 * ext4_should_retry_alloc()
672 * @sb: super block 649 * @sb: super block
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 3ca6a2b7632d..fed5b610df5a 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -459,17 +459,8 @@ static int ext4_dx_readdir(struct file *filp,
459 if (info->extra_fname) { 459 if (info->extra_fname) {
460 if (call_filldir(filp, dirent, filldir, info->extra_fname)) 460 if (call_filldir(filp, dirent, filldir, info->extra_fname))
461 goto finished; 461 goto finished;
462
463 info->extra_fname = NULL; 462 info->extra_fname = NULL;
464 info->curr_node = rb_next(info->curr_node); 463 goto next_node;
465 if (!info->curr_node) {
466 if (info->next_hash == ~0) {
467 filp->f_pos = EXT4_HTREE_EOF;
468 goto finished;
469 }
470 info->curr_hash = info->next_hash;
471 info->curr_minor_hash = 0;
472 }
473 } else if (!info->curr_node) 464 } else if (!info->curr_node)
474 info->curr_node = rb_first(&info->root); 465 info->curr_node = rb_first(&info->root);
475 466
@@ -501,9 +492,14 @@ static int ext4_dx_readdir(struct file *filp,
501 info->curr_minor_hash = fname->minor_hash; 492 info->curr_minor_hash = fname->minor_hash;
502 if (call_filldir(filp, dirent, filldir, fname)) 493 if (call_filldir(filp, dirent, filldir, fname))
503 break; 494 break;
504 495 next_node:
505 info->curr_node = rb_next(info->curr_node); 496 info->curr_node = rb_next(info->curr_node);
506 if (!info->curr_node) { 497 if (info->curr_node) {
498 fname = rb_entry(info->curr_node, struct fname,
499 rb_hash);
500 info->curr_hash = fname->hash;
501 info->curr_minor_hash = fname->minor_hash;
502 } else {
507 if (info->next_hash == ~0) { 503 if (info->next_hash == ~0) {
508 filp->f_pos = EXT4_HTREE_EOF; 504 filp->f_pos = EXT4_HTREE_EOF;
509 break; 505 break;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4880cc3e6727..b0537c827024 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1003,8 +1003,7 @@ extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
1003 ext4_lblk_t iblock, ext4_fsblk_t goal, 1003 ext4_lblk_t iblock, ext4_fsblk_t goal,
1004 unsigned long *count, int *errp); 1004 unsigned long *count, int *errp);
1005extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); 1005extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1006extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, 1006extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1007 s64 nblocks);
1008extern void ext4_free_blocks(handle_t *handle, struct inode *inode, 1007extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
1009 ext4_fsblk_t block, unsigned long count, int metadata); 1008 ext4_fsblk_t block, unsigned long count, int metadata);
1010extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, 1009extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index fe34d74cfb19..2a117e286e54 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -718,6 +718,8 @@ got:
718 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 718 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
719 free = ext4_free_blocks_after_init(sb, group, gdp); 719 free = ext4_free_blocks_after_init(sb, group, gdp);
720 gdp->bg_free_blocks_count = cpu_to_le16(free); 720 gdp->bg_free_blocks_count = cpu_to_le16(free);
721 gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
722 gdp);
721 } 723 }
722 spin_unlock(sb_bgl_lock(sbi, group)); 724 spin_unlock(sb_bgl_lock(sbi, group));
723 725
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8dbf6953845b..be21a5ae33cb 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2329,6 +2329,8 @@ static int ext4_da_writepage(struct page *page,
2329 unlock_page(page); 2329 unlock_page(page);
2330 return 0; 2330 return 0;
2331 } 2331 }
2332 /* now mark the buffer_heads as dirty and uptodate */
2333 block_commit_write(page, 0, PAGE_CACHE_SIZE);
2332 } 2334 }
2333 2335
2334 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) 2336 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
@@ -4580,9 +4582,10 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
4580static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) 4582static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4581{ 4583{
4582 if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) 4584 if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
4583 return ext4_indirect_trans_blocks(inode, nrblocks, 0); 4585 return ext4_indirect_trans_blocks(inode, nrblocks, chunk);
4584 return ext4_ext_index_trans_blocks(inode, nrblocks, 0); 4586 return ext4_ext_index_trans_blocks(inode, nrblocks, chunk);
4585} 4587}
4588
4586/* 4589/*
4587 * Account for index blocks, block groups bitmaps and block group 4590 * Account for index blocks, block groups bitmaps and block group
4588 * descriptor blocks if modify datablocks and index blocks 4591 * descriptor blocks if modify datablocks and index blocks
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index dfe17a134052..444ad998f72e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4441,6 +4441,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4441 else if (block >= (entry->start_blk + entry->count)) 4441 else if (block >= (entry->start_blk + entry->count))
4442 n = &(*n)->rb_right; 4442 n = &(*n)->rb_right;
4443 else { 4443 else {
4444 ext4_unlock_group(sb, group);
4444 ext4_error(sb, __func__, 4445 ext4_error(sb, __func__,
4445 "Double free of blocks %d (%d %d)\n", 4446 "Double free of blocks %d (%d %d)\n",
4446 block, entry->start_blk, entry->count); 4447 block, entry->start_blk, entry->count);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 92db9e945147..63adcb792988 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1061,7 +1061,6 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
1061struct dentry *ext4_get_parent(struct dentry *child) 1061struct dentry *ext4_get_parent(struct dentry *child)
1062{ 1062{
1063 unsigned long ino; 1063 unsigned long ino;
1064 struct dentry *parent;
1065 struct inode *inode; 1064 struct inode *inode;
1066 static const struct qstr dotdot = { 1065 static const struct qstr dotdot = {
1067 .name = "..", 1066 .name = "..",
@@ -1083,16 +1082,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
1083 return ERR_PTR(-EIO); 1082 return ERR_PTR(-EIO);
1084 } 1083 }
1085 1084
1086 inode = ext4_iget(child->d_inode->i_sb, ino); 1085 return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino));
1087 if (IS_ERR(inode))
1088 return ERR_CAST(inode);
1089
1090 parent = d_alloc_anon(inode);
1091 if (!parent) {
1092 iput(inode);
1093 parent = ERR_PTR(-ENOMEM);
1094 }
1095 return parent;
1096} 1086}
1097 1087
1098#define S_SHIFT 12 1088#define S_SHIFT 12
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9b2b2bc4ec17..e4a241c65dbe 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -333,7 +333,8 @@ void ext4_abort(struct super_block *sb, const char *function,
333 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 333 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
334 sb->s_flags |= MS_RDONLY; 334 sb->s_flags |= MS_RDONLY;
335 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 335 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
336 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 336 if (EXT4_SB(sb)->s_journal)
337 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
337} 338}
338 339
339void ext4_warning(struct super_block *sb, const char *function, 340void ext4_warning(struct super_block *sb, const char *function,
@@ -399,7 +400,7 @@ fail:
399static int ext4_blkdev_put(struct block_device *bdev) 400static int ext4_blkdev_put(struct block_device *bdev)
400{ 401{
401 bd_release(bdev); 402 bd_release(bdev);
402 return blkdev_put(bdev); 403 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
403} 404}
404 405
405static int ext4_blkdev_remove(struct ext4_sb_info *sbi) 406static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
@@ -442,14 +443,16 @@ static void ext4_put_super(struct super_block *sb)
442{ 443{
443 struct ext4_sb_info *sbi = EXT4_SB(sb); 444 struct ext4_sb_info *sbi = EXT4_SB(sb);
444 struct ext4_super_block *es = sbi->s_es; 445 struct ext4_super_block *es = sbi->s_es;
445 int i; 446 int i, err;
446 447
447 ext4_mb_release(sb); 448 ext4_mb_release(sb);
448 ext4_ext_release(sb); 449 ext4_ext_release(sb);
449 ext4_xattr_put_super(sb); 450 ext4_xattr_put_super(sb);
450 if (jbd2_journal_destroy(sbi->s_journal) < 0) 451 err = jbd2_journal_destroy(sbi->s_journal);
451 ext4_abort(sb, __func__, "Couldn't clean up the journal");
452 sbi->s_journal = NULL; 452 sbi->s_journal = NULL;
453 if (err < 0)
454 ext4_abort(sb, __func__, "Couldn't clean up the journal");
455
453 if (!(sb->s_flags & MS_RDONLY)) { 456 if (!(sb->s_flags & MS_RDONLY)) {
454 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 457 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
455 es->s_state = cpu_to_le16(sbi->s_mount_state); 458 es->s_state = cpu_to_le16(sbi->s_mount_state);
@@ -1455,9 +1458,8 @@ static int ext4_fill_flex_info(struct super_block *sb)
1455 1458
1456 /* We allocate both existing and potentially added groups */ 1459 /* We allocate both existing and potentially added groups */
1457 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1460 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1458 ((sbi->s_es->s_reserved_gdt_blocks +1 ) << 1461 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
1459 EXT4_DESC_PER_BLOCK_BITS(sb))) / 1462 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
1460 groups_per_flex;
1461 sbi->s_flex_groups = kzalloc(flex_group_count * 1463 sbi->s_flex_groups = kzalloc(flex_group_count *
1462 sizeof(struct flex_groups), GFP_KERNEL); 1464 sizeof(struct flex_groups), GFP_KERNEL);
1463 if (sbi->s_flex_groups == NULL) { 1465 if (sbi->s_flex_groups == NULL) {
@@ -2553,7 +2555,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
2553 if (bd_claim(bdev, sb)) { 2555 if (bd_claim(bdev, sb)) {
2554 printk(KERN_ERR 2556 printk(KERN_ERR
2555 "EXT4: failed to claim external journal device.\n"); 2557 "EXT4: failed to claim external journal device.\n");
2556 blkdev_put(bdev); 2558 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
2557 return NULL; 2559 return NULL;
2558 } 2560 }
2559 2561
@@ -2882,12 +2884,9 @@ int ext4_force_commit(struct super_block *sb)
2882/* 2884/*
2883 * Ext4 always journals updates to the superblock itself, so we don't 2885 * Ext4 always journals updates to the superblock itself, so we don't
2884 * have to propagate any other updates to the superblock on disk at this 2886 * have to propagate any other updates to the superblock on disk at this
2885 * point. Just start an async writeback to get the buffers on their way 2887 * point. (We can probably nuke this function altogether, and remove
2886 * to the disk. 2888 * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...)
2887 *
2888 * This implicitly triggers the writebehind on sync().
2889 */ 2889 */
2890
2891static void ext4_write_super(struct super_block *sb) 2890static void ext4_write_super(struct super_block *sb)
2892{ 2891{
2893 if (mutex_trylock(&sb->s_lock) != 0) 2892 if (mutex_trylock(&sb->s_lock) != 0)
@@ -2897,15 +2896,15 @@ static void ext4_write_super(struct super_block *sb)
2897 2896
2898static int ext4_sync_fs(struct super_block *sb, int wait) 2897static int ext4_sync_fs(struct super_block *sb, int wait)
2899{ 2898{
2900 tid_t target; 2899 int ret = 0;
2901 2900
2902 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); 2901 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
2903 sb->s_dirt = 0; 2902 sb->s_dirt = 0;
2904 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { 2903 if (wait)
2905 if (wait) 2904 ret = ext4_force_commit(sb);
2906 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); 2905 else
2907 } 2906 jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
2908 return 0; 2907 return ret;
2909} 2908}
2910 2909
2911/* 2910/*
@@ -3328,30 +3327,30 @@ static int ext4_quota_on_mount(struct super_block *sb, int type)
3328 * Standard function to be called on quota_on 3327 * Standard function to be called on quota_on
3329 */ 3328 */
3330static int ext4_quota_on(struct super_block *sb, int type, int format_id, 3329static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3331 char *path, int remount) 3330 char *name, int remount)
3332{ 3331{
3333 int err; 3332 int err;
3334 struct nameidata nd; 3333 struct path path;
3335 3334
3336 if (!test_opt(sb, QUOTA)) 3335 if (!test_opt(sb, QUOTA))
3337 return -EINVAL; 3336 return -EINVAL;
3338 /* When remounting, no checks are needed and in fact, path is NULL */ 3337 /* When remounting, no checks are needed and in fact, name is NULL */
3339 if (remount) 3338 if (remount)
3340 return vfs_quota_on(sb, type, format_id, path, remount); 3339 return vfs_quota_on(sb, type, format_id, name, remount);
3341 3340
3342 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 3341 err = kern_path(name, LOOKUP_FOLLOW, &path);
3343 if (err) 3342 if (err)
3344 return err; 3343 return err;
3345 3344
3346 /* Quotafile not on the same filesystem? */ 3345 /* Quotafile not on the same filesystem? */
3347 if (nd.path.mnt->mnt_sb != sb) { 3346 if (path.mnt->mnt_sb != sb) {
3348 path_put(&nd.path); 3347 path_put(&path);
3349 return -EXDEV; 3348 return -EXDEV;
3350 } 3349 }
3351 /* Journaling quota? */ 3350 /* Journaling quota? */
3352 if (EXT4_SB(sb)->s_qf_names[type]) { 3351 if (EXT4_SB(sb)->s_qf_names[type]) {
3353 /* Quotafile not in fs root? */ 3352 /* Quotafile not in fs root? */
3354 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) 3353 if (path.dentry->d_parent != sb->s_root)
3355 printk(KERN_WARNING 3354 printk(KERN_WARNING
3356 "EXT4-fs: Quota file not on filesystem root. " 3355 "EXT4-fs: Quota file not on filesystem root. "
3357 "Journaled quota will not work.\n"); 3356 "Journaled quota will not work.\n");
@@ -3361,7 +3360,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3361 * When we journal data on quota file, we have to flush journal to see 3360 * When we journal data on quota file, we have to flush journal to see
3362 * all updates to the file when we bypass pagecache... 3361 * all updates to the file when we bypass pagecache...
3363 */ 3362 */
3364 if (ext4_should_journal_data(nd.path.dentry->d_inode)) { 3363 if (ext4_should_journal_data(path.dentry->d_inode)) {
3365 /* 3364 /*
3366 * We don't need to lock updates but journal_flush() could 3365 * We don't need to lock updates but journal_flush() could
3367 * otherwise be livelocked... 3366 * otherwise be livelocked...
@@ -3370,13 +3369,13 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3370 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); 3369 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
3371 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3370 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3372 if (err) { 3371 if (err) {
3373 path_put(&nd.path); 3372 path_put(&path);
3374 return err; 3373 return err;
3375 } 3374 }
3376 } 3375 }
3377 3376
3378 err = vfs_quota_on_path(sb, type, format_id, &nd.path); 3377 err = vfs_quota_on_path(sb, type, format_id, &path);
3379 path_put(&nd.path); 3378 path_put(&path);
3380 return err; 3379 return err;
3381} 3380}
3382 3381
diff --git a/fs/fat/Makefile b/fs/fat/Makefile
index bfb5f06cf2c8..e06190322c1c 100644
--- a/fs/fat/Makefile
+++ b/fs/fat/Makefile
@@ -3,5 +3,9 @@
3# 3#
4 4
5obj-$(CONFIG_FAT_FS) += fat.o 5obj-$(CONFIG_FAT_FS) += fat.o
6obj-$(CONFIG_VFAT_FS) += vfat.o
7obj-$(CONFIG_MSDOS_FS) += msdos.o
6 8
7fat-objs := cache.o dir.o fatent.o file.o inode.o misc.o 9fat-y := cache.o dir.o fatent.o file.o inode.o misc.o
10vfat-y := namei_vfat.o
11msdos-y := namei_msdos.o
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 3222f51c41cf..b42602298087 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -9,8 +9,8 @@
9 */ 9 */
10 10
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/msdos_fs.h>
13#include <linux/buffer_head.h> 12#include <linux/buffer_head.h>
13#include "fat.h"
14 14
15/* this must be > 0. */ 15/* this must be > 0. */
16#define FAT_MAX_CACHE 8 16#define FAT_MAX_CACHE 8
@@ -293,10 +293,12 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)
293} 293}
294 294
295int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, 295int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
296 unsigned long *mapped_blocks) 296 unsigned long *mapped_blocks, int create)
297{ 297{
298 struct super_block *sb = inode->i_sb; 298 struct super_block *sb = inode->i_sb;
299 struct msdos_sb_info *sbi = MSDOS_SB(sb); 299 struct msdos_sb_info *sbi = MSDOS_SB(sb);
300 const unsigned long blocksize = sb->s_blocksize;
301 const unsigned char blocksize_bits = sb->s_blocksize_bits;
300 sector_t last_block; 302 sector_t last_block;
301 int cluster, offset; 303 int cluster, offset;
302 304
@@ -309,10 +311,21 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
309 } 311 }
310 return 0; 312 return 0;
311 } 313 }
312 last_block = (MSDOS_I(inode)->mmu_private + (sb->s_blocksize - 1)) 314
313 >> sb->s_blocksize_bits; 315 last_block = (i_size_read(inode) + (blocksize - 1)) >> blocksize_bits;
314 if (sector >= last_block) 316 if (sector >= last_block) {
315 return 0; 317 if (!create)
318 return 0;
319
320 /*
321 * ->mmu_private can access on only allocation path.
322 * (caller must hold ->i_mutex)
323 */
324 last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1))
325 >> blocksize_bits;
326 if (sector >= last_block)
327 return 0;
328 }
316 329
317 cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits); 330 cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits);
318 offset = sector & (sbi->sec_per_clus - 1); 331 offset = sector & (sbi->sec_per_clus - 1);
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index cd4a0162e10d..67e058357098 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -16,11 +16,11 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/time.h> 18#include <linux/time.h>
19#include <linux/msdos_fs.h>
20#include <linux/smp_lock.h> 19#include <linux/smp_lock.h>
21#include <linux/buffer_head.h> 20#include <linux/buffer_head.h>
22#include <linux/compat.h> 21#include <linux/compat.h>
23#include <asm/uaccess.h> 22#include <asm/uaccess.h>
23#include "fat.h"
24 24
25static inline loff_t fat_make_i_pos(struct super_block *sb, 25static inline loff_t fat_make_i_pos(struct super_block *sb,
26 struct buffer_head *bh, 26 struct buffer_head *bh,
@@ -77,7 +77,7 @@ next:
77 77
78 *bh = NULL; 78 *bh = NULL;
79 iblock = *pos >> sb->s_blocksize_bits; 79 iblock = *pos >> sb->s_blocksize_bits;
80 err = fat_bmap(dir, iblock, &phys, &mapped_blocks); 80 err = fat_bmap(dir, iblock, &phys, &mapped_blocks, 0);
81 if (err || !phys) 81 if (err || !phys)
82 return -1; /* beyond EOF or error */ 82 return -1; /* beyond EOF or error */
83 83
@@ -86,7 +86,7 @@ next:
86 *bh = sb_bread(sb, phys); 86 *bh = sb_bread(sb, phys);
87 if (*bh == NULL) { 87 if (*bh == NULL) {
88 printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n", 88 printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n",
89 (unsigned long long)phys); 89 (llu)phys);
90 /* skip this block */ 90 /* skip this block */
91 *pos = (iblock + 1) << sb->s_blocksize_bits; 91 *pos = (iblock + 1) << sb->s_blocksize_bits;
92 goto next; 92 goto next;
@@ -373,9 +373,10 @@ parse_record:
373 if (de->attr == ATTR_EXT) { 373 if (de->attr == ATTR_EXT) {
374 int status = fat_parse_long(inode, &cpos, &bh, &de, 374 int status = fat_parse_long(inode, &cpos, &bh, &de,
375 &unicode, &nr_slots); 375 &unicode, &nr_slots);
376 if (status < 0) 376 if (status < 0) {
377 return status; 377 err = status;
378 else if (status == PARSE_INVALID) 378 goto end_of_dir;
379 } else if (status == PARSE_INVALID)
379 continue; 380 continue;
380 else if (status == PARSE_NOT_LONGNAME) 381 else if (status == PARSE_NOT_LONGNAME)
381 goto parse_record; 382 goto parse_record;
@@ -832,6 +833,7 @@ static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd,
832#endif /* CONFIG_COMPAT */ 833#endif /* CONFIG_COMPAT */
833 834
834const struct file_operations fat_dir_operations = { 835const struct file_operations fat_dir_operations = {
836 .llseek = generic_file_llseek,
835 .read = generic_read_dir, 837 .read = generic_read_dir,
836 .readdir = fat_readdir, 838 .readdir = fat_readdir,
837 .ioctl = fat_dir_ioctl, 839 .ioctl = fat_dir_ioctl,
@@ -839,6 +841,7 @@ const struct file_operations fat_dir_operations = {
839 .compat_ioctl = fat_compat_dir_ioctl, 841 .compat_ioctl = fat_compat_dir_ioctl,
840#endif 842#endif
841 .fsync = file_fsync, 843 .fsync = file_fsync,
844 .llseek = generic_file_llseek,
842}; 845};
843 846
844static int fat_get_short_entry(struct inode *dir, loff_t *pos, 847static int fat_get_short_entry(struct inode *dir, loff_t *pos,
@@ -1088,6 +1091,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
1088 struct msdos_dir_entry *de; 1091 struct msdos_dir_entry *de;
1089 sector_t blknr; 1092 sector_t blknr;
1090 __le16 date, time; 1093 __le16 date, time;
1094 u8 time_cs;
1091 int err, cluster; 1095 int err, cluster;
1092 1096
1093 err = fat_alloc_clusters(dir, &cluster, 1); 1097 err = fat_alloc_clusters(dir, &cluster, 1);
@@ -1101,7 +1105,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
1101 goto error_free; 1105 goto error_free;
1102 } 1106 }
1103 1107
1104 fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); 1108 fat_time_unix2fat(sbi, ts, &time, &date, &time_cs);
1105 1109
1106 de = (struct msdos_dir_entry *)bhs[0]->b_data; 1110 de = (struct msdos_dir_entry *)bhs[0]->b_data;
1107 /* filling the new directory slots ("." and ".." entries) */ 1111 /* filling the new directory slots ("." and ".." entries) */
@@ -1111,13 +1115,14 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
1111 de[0].lcase = de[1].lcase = 0; 1115 de[0].lcase = de[1].lcase = 0;
1112 de[0].time = de[1].time = time; 1116 de[0].time = de[1].time = time;
1113 de[0].date = de[1].date = date; 1117 de[0].date = de[1].date = date;
1114 de[0].ctime_cs = de[1].ctime_cs = 0;
1115 if (sbi->options.isvfat) { 1118 if (sbi->options.isvfat) {
1116 /* extra timestamps */ 1119 /* extra timestamps */
1117 de[0].ctime = de[1].ctime = time; 1120 de[0].ctime = de[1].ctime = time;
1121 de[0].ctime_cs = de[1].ctime_cs = time_cs;
1118 de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = date; 1122 de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = date;
1119 } else { 1123 } else {
1120 de[0].ctime = de[1].ctime = 0; 1124 de[0].ctime = de[1].ctime = 0;
1125 de[0].ctime_cs = de[1].ctime_cs = 0;
1121 de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = 0; 1126 de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = 0;
1122 } 1127 }
1123 de[0].start = cpu_to_le16(cluster); 1128 de[0].start = cpu_to_le16(cluster);
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
new file mode 100644
index 000000000000..ea440d65819c
--- /dev/null
+++ b/fs/fat/fat.h
@@ -0,0 +1,329 @@
1#ifndef _FAT_H
2#define _FAT_H
3
4#include <linux/buffer_head.h>
5#include <linux/string.h>
6#include <linux/nls.h>
7#include <linux/fs.h>
8#include <linux/mutex.h>
9#include <linux/msdos_fs.h>
10
11/*
12 * vfat shortname flags
13 */
14#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */
15#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */
16#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */
17#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */
18#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */
19
20struct fat_mount_options {
21 uid_t fs_uid;
22 gid_t fs_gid;
23 unsigned short fs_fmask;
24 unsigned short fs_dmask;
25 unsigned short codepage; /* Codepage for shortname conversions */
26 char *iocharset; /* Charset used for filename input/display */
27 unsigned short shortname; /* flags for shortname display/create rule */
28 unsigned char name_check; /* r = relaxed, n = normal, s = strict */
29 unsigned short allow_utime;/* permission for setting the [am]time */
30 unsigned quiet:1, /* set = fake successful chmods and chowns */
31 showexec:1, /* set = only set x bit for com/exe/bat */
32 sys_immutable:1, /* set = system files are immutable */
33 dotsOK:1, /* set = hidden and system files are named '.filename' */
34 isvfat:1, /* 0=no vfat long filename support, 1=vfat support */
35 utf8:1, /* Use of UTF-8 character set (Default) */
36 unicode_xlate:1, /* create escape sequences for unhandled Unicode */
37 numtail:1, /* Does first alias have a numeric '~1' type tail? */
38 flush:1, /* write things quickly */
39 nocase:1, /* Does this need case conversion? 0=need case conversion*/
40 usefree:1, /* Use free_clusters for FAT32 */
41 tz_utc:1, /* Filesystem timestamps are in UTC */
42 rodir:1; /* allow ATTR_RO for directory */
43};
44
45#define FAT_HASH_BITS 8
46#define FAT_HASH_SIZE (1UL << FAT_HASH_BITS)
47
48/*
49 * MS-DOS file system in-core superblock data
50 */
51struct msdos_sb_info {
52 unsigned short sec_per_clus; /* sectors/cluster */
53 unsigned short cluster_bits; /* log2(cluster_size) */
54 unsigned int cluster_size; /* cluster size */
55 unsigned char fats,fat_bits; /* number of FATs, FAT bits (12 or 16) */
56 unsigned short fat_start;
57 unsigned long fat_length; /* FAT start & length (sec.) */
58 unsigned long dir_start;
59 unsigned short dir_entries; /* root dir start & entries */
60 unsigned long data_start; /* first data sector */
61 unsigned long max_cluster; /* maximum cluster number */
62 unsigned long root_cluster; /* first cluster of the root directory */
63 unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */
64 struct mutex fat_lock;
65 unsigned int prev_free; /* previously allocated cluster number */
66 unsigned int free_clusters; /* -1 if undefined */
67 unsigned int free_clus_valid; /* is free_clusters valid? */
68 struct fat_mount_options options;
69 struct nls_table *nls_disk; /* Codepage used on disk */
70 struct nls_table *nls_io; /* Charset used for input and display */
71 const void *dir_ops; /* Opaque; default directory operations */
72 int dir_per_block; /* dir entries per block */
73 int dir_per_block_bits; /* log2(dir_per_block) */
74
75 int fatent_shift;
76 struct fatent_operations *fatent_ops;
77
78 spinlock_t inode_hash_lock;
79 struct hlist_head inode_hashtable[FAT_HASH_SIZE];
80};
81
82#define FAT_CACHE_VALID 0 /* special case for valid cache */
83
84/*
85 * MS-DOS file system inode data in memory
86 */
87struct msdos_inode_info {
88 spinlock_t cache_lru_lock;
89 struct list_head cache_lru;
90 int nr_caches;
91 /* for avoiding the race between fat_free() and fat_get_cluster() */
92 unsigned int cache_valid_id;
93
94 /* NOTE: mmu_private is 64bits, so must hold ->i_mutex to access */
95 loff_t mmu_private; /* physically allocated size */
96
97 int i_start; /* first cluster or 0 */
98 int i_logstart; /* logical first cluster */
99 int i_attrs; /* unused attribute bits */
100 loff_t i_pos; /* on-disk position of directory entry or 0 */
101 struct hlist_node i_fat_hash; /* hash by i_location */
102 struct inode vfs_inode;
103};
104
105struct fat_slot_info {
106 loff_t i_pos; /* on-disk position of directory entry */
107 loff_t slot_off; /* offset for slot or de start */
108 int nr_slots; /* number of slots + 1(de) in filename */
109 struct msdos_dir_entry *de;
110 struct buffer_head *bh;
111};
112
113static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb)
114{
115 return sb->s_fs_info;
116}
117
118static inline struct msdos_inode_info *MSDOS_I(struct inode *inode)
119{
120 return container_of(inode, struct msdos_inode_info, vfs_inode);
121}
122
123/*
124 * If ->i_mode can't hold S_IWUGO (i.e. ATTR_RO), we use ->i_attrs to
125 * save ATTR_RO instead of ->i_mode.
126 *
127 * If it's directory and !sbi->options.rodir, ATTR_RO isn't read-only
128 * bit, it's just used as flag for app.
129 */
130static inline int fat_mode_can_hold_ro(struct inode *inode)
131{
132 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
133 mode_t mask;
134
135 if (S_ISDIR(inode->i_mode)) {
136 if (!sbi->options.rodir)
137 return 0;
138 mask = ~sbi->options.fs_dmask;
139 } else
140 mask = ~sbi->options.fs_fmask;
141
142 if (!(mask & S_IWUGO))
143 return 0;
144 return 1;
145}
146
147/* Convert attribute bits and a mask to the UNIX mode. */
148static inline mode_t fat_make_mode(struct msdos_sb_info *sbi,
149 u8 attrs, mode_t mode)
150{
151 if (attrs & ATTR_RO && !((attrs & ATTR_DIR) && !sbi->options.rodir))
152 mode &= ~S_IWUGO;
153
154 if (attrs & ATTR_DIR)
155 return (mode & ~sbi->options.fs_dmask) | S_IFDIR;
156 else
157 return (mode & ~sbi->options.fs_fmask) | S_IFREG;
158}
159
160/* Return the FAT attribute byte for this inode */
161static inline u8 fat_make_attrs(struct inode *inode)
162{
163 u8 attrs = MSDOS_I(inode)->i_attrs;
164 if (S_ISDIR(inode->i_mode))
165 attrs |= ATTR_DIR;
166 if (fat_mode_can_hold_ro(inode) && !(inode->i_mode & S_IWUGO))
167 attrs |= ATTR_RO;
168 return attrs;
169}
170
171static inline void fat_save_attrs(struct inode *inode, u8 attrs)
172{
173 if (fat_mode_can_hold_ro(inode))
174 MSDOS_I(inode)->i_attrs = attrs & ATTR_UNUSED;
175 else
176 MSDOS_I(inode)->i_attrs = attrs & (ATTR_UNUSED | ATTR_RO);
177}
178
179static inline unsigned char fat_checksum(const __u8 *name)
180{
181 unsigned char s = name[0];
182 s = (s<<7) + (s>>1) + name[1]; s = (s<<7) + (s>>1) + name[2];
183 s = (s<<7) + (s>>1) + name[3]; s = (s<<7) + (s>>1) + name[4];
184 s = (s<<7) + (s>>1) + name[5]; s = (s<<7) + (s>>1) + name[6];
185 s = (s<<7) + (s>>1) + name[7]; s = (s<<7) + (s>>1) + name[8];
186 s = (s<<7) + (s>>1) + name[9]; s = (s<<7) + (s>>1) + name[10];
187 return s;
188}
189
190static inline sector_t fat_clus_to_blknr(struct msdos_sb_info *sbi, int clus)
191{
192 return ((sector_t)clus - FAT_START_ENT) * sbi->sec_per_clus
193 + sbi->data_start;
194}
195
196static inline void fat16_towchar(wchar_t *dst, const __u8 *src, size_t len)
197{
198#ifdef __BIG_ENDIAN
199 while (len--) {
200 *dst++ = src[0] | (src[1] << 8);
201 src += 2;
202 }
203#else
204 memcpy(dst, src, len * 2);
205#endif
206}
207
208static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len)
209{
210#ifdef __BIG_ENDIAN
211 while (len--) {
212 dst[0] = *src & 0x00FF;
213 dst[1] = (*src & 0xFF00) >> 8;
214 dst += 2;
215 src++;
216 }
217#else
218 memcpy(dst, src, len * 2);
219#endif
220}
221
222/* fat/cache.c */
223extern void fat_cache_inval_inode(struct inode *inode);
224extern int fat_get_cluster(struct inode *inode, int cluster,
225 int *fclus, int *dclus);
226extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
227 unsigned long *mapped_blocks, int create);
228
229/* fat/dir.c */
230extern const struct file_operations fat_dir_operations;
231extern int fat_search_long(struct inode *inode, const unsigned char *name,
232 int name_len, struct fat_slot_info *sinfo);
233extern int fat_dir_empty(struct inode *dir);
234extern int fat_subdirs(struct inode *dir);
235extern int fat_scan(struct inode *dir, const unsigned char *name,
236 struct fat_slot_info *sinfo);
237extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh,
238 struct msdos_dir_entry **de, loff_t *i_pos);
239extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts);
240extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
241 struct fat_slot_info *sinfo);
242extern int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo);
243
244/* fat/fatent.c */
245struct fat_entry {
246 int entry;
247 union {
248 u8 *ent12_p[2];
249 __le16 *ent16_p;
250 __le32 *ent32_p;
251 } u;
252 int nr_bhs;
253 struct buffer_head *bhs[2];
254};
255
256static inline void fatent_init(struct fat_entry *fatent)
257{
258 fatent->nr_bhs = 0;
259 fatent->entry = 0;
260 fatent->u.ent32_p = NULL;
261 fatent->bhs[0] = fatent->bhs[1] = NULL;
262}
263
264static inline void fatent_set_entry(struct fat_entry *fatent, int entry)
265{
266 fatent->entry = entry;
267 fatent->u.ent32_p = NULL;
268}
269
270static inline void fatent_brelse(struct fat_entry *fatent)
271{
272 int i;
273 fatent->u.ent32_p = NULL;
274 for (i = 0; i < fatent->nr_bhs; i++)
275 brelse(fatent->bhs[i]);
276 fatent->nr_bhs = 0;
277 fatent->bhs[0] = fatent->bhs[1] = NULL;
278}
279
280extern void fat_ent_access_init(struct super_block *sb);
281extern int fat_ent_read(struct inode *inode, struct fat_entry *fatent,
282 int entry);
283extern int fat_ent_write(struct inode *inode, struct fat_entry *fatent,
284 int new, int wait);
285extern int fat_alloc_clusters(struct inode *inode, int *cluster,
286 int nr_cluster);
287extern int fat_free_clusters(struct inode *inode, int cluster);
288extern int fat_count_free_clusters(struct super_block *sb);
289
290/* fat/file.c */
291extern int fat_generic_ioctl(struct inode *inode, struct file *filp,
292 unsigned int cmd, unsigned long arg);
293extern const struct file_operations fat_file_operations;
294extern const struct inode_operations fat_file_inode_operations;
295extern int fat_setattr(struct dentry * dentry, struct iattr * attr);
296extern void fat_truncate(struct inode *inode);
297extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
298 struct kstat *stat);
299
300/* fat/inode.c */
301extern void fat_attach(struct inode *inode, loff_t i_pos);
302extern void fat_detach(struct inode *inode);
303extern struct inode *fat_iget(struct super_block *sb, loff_t i_pos);
304extern struct inode *fat_build_inode(struct super_block *sb,
305 struct msdos_dir_entry *de, loff_t i_pos);
306extern int fat_sync_inode(struct inode *inode);
307extern int fat_fill_super(struct super_block *sb, void *data, int silent,
308 const struct inode_operations *fs_dir_inode_ops, int isvfat);
309
310extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
311 struct inode *i2);
312/* fat/misc.c */
313extern void fat_fs_panic(struct super_block *s, const char *fmt, ...)
314 __attribute__ ((format (printf, 2, 3))) __cold;
315extern void fat_clusters_flush(struct super_block *sb);
316extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
317extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
318 __le16 __time, __le16 __date, u8 time_cs);
319extern void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts,
320 __le16 *time, __le16 *date, u8 *time_cs);
321extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs);
322
323int fat_cache_init(void);
324void fat_cache_destroy(void);
325
326/* helper for printk */
327typedef unsigned long long llu;
328
329#endif /* !_FAT_H */
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index fb98b3d847ed..da6eea47872f 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -7,6 +7,7 @@
7#include <linux/fs.h> 7#include <linux/fs.h>
8#include <linux/msdos_fs.h> 8#include <linux/msdos_fs.h>
9#include <linux/blkdev.h> 9#include <linux/blkdev.h>
10#include "fat.h"
10 11
11struct fatent_operations { 12struct fatent_operations {
12 void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); 13 void (*ent_blocknr)(struct super_block *, int, int *, sector_t *);
@@ -92,8 +93,7 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent,
92err_brelse: 93err_brelse:
93 brelse(bhs[0]); 94 brelse(bhs[0]);
94err: 95err:
95 printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", 96 printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", (llu)blocknr);
96 (unsigned long long)blocknr);
97 return -EIO; 97 return -EIO;
98} 98}
99 99
@@ -106,7 +106,7 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent,
106 fatent->bhs[0] = sb_bread(sb, blocknr); 106 fatent->bhs[0] = sb_bread(sb, blocknr);
107 if (!fatent->bhs[0]) { 107 if (!fatent->bhs[0]) {
108 printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", 108 printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n",
109 (unsigned long long)blocknr); 109 (llu)blocknr);
110 return -EIO; 110 return -EIO;
111 } 111 }
112 fatent->nr_bhs = 1; 112 fatent->nr_bhs = 1;
@@ -316,10 +316,20 @@ static inline int fat_ent_update_ptr(struct super_block *sb,
316 /* Is this fatent's blocks including this entry? */ 316 /* Is this fatent's blocks including this entry? */
317 if (!fatent->nr_bhs || bhs[0]->b_blocknr != blocknr) 317 if (!fatent->nr_bhs || bhs[0]->b_blocknr != blocknr)
318 return 0; 318 return 0;
319 /* Does this entry need the next block? */ 319 if (sbi->fat_bits == 12) {
320 if (sbi->fat_bits == 12 && (offset + 1) >= sb->s_blocksize) { 320 if ((offset + 1) < sb->s_blocksize) {
321 if (fatent->nr_bhs != 2 || bhs[1]->b_blocknr != (blocknr + 1)) 321 /* This entry is on bhs[0]. */
322 return 0; 322 if (fatent->nr_bhs == 2) {
323 brelse(bhs[1]);
324 fatent->nr_bhs = 1;
325 }
326 } else {
327 /* This entry needs the next block. */
328 if (fatent->nr_bhs != 2)
329 return 0;
330 if (bhs[1]->b_blocknr != (blocknr + 1))
331 return 0;
332 }
323 } 333 }
324 ops->ent_set_ptr(fatent, offset); 334 ops->ent_set_ptr(fatent, offset);
325 return 1; 335 return 1;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index ddde37025ca6..f06a4e525ece 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -10,13 +10,13 @@
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/mount.h> 11#include <linux/mount.h>
12#include <linux/time.h> 12#include <linux/time.h>
13#include <linux/msdos_fs.h>
14#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
15#include <linux/writeback.h> 14#include <linux/writeback.h>
16#include <linux/backing-dev.h> 15#include <linux/backing-dev.h>
17#include <linux/blkdev.h> 16#include <linux/blkdev.h>
18#include <linux/fsnotify.h> 17#include <linux/fsnotify.h>
19#include <linux/security.h> 18#include <linux/security.h>
19#include "fat.h"
20 20
21int fat_generic_ioctl(struct inode *inode, struct file *filp, 21int fat_generic_ioctl(struct inode *inode, struct file *filp,
22 unsigned int cmd, unsigned long arg) 22 unsigned int cmd, unsigned long arg)
@@ -29,10 +29,9 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
29 { 29 {
30 u32 attr; 30 u32 attr;
31 31
32 if (inode->i_ino == MSDOS_ROOT_INO) 32 mutex_lock(&inode->i_mutex);
33 attr = ATTR_DIR; 33 attr = fat_make_attrs(inode);
34 else 34 mutex_unlock(&inode->i_mutex);
35 attr = fat_attr(inode);
36 35
37 return put_user(attr, user_attr); 36 return put_user(attr, user_attr);
38 } 37 }
@@ -62,20 +61,16 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
62 /* Merge in ATTR_VOLUME and ATTR_DIR */ 61 /* Merge in ATTR_VOLUME and ATTR_DIR */
63 attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) | 62 attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) |
64 (is_dir ? ATTR_DIR : 0); 63 (is_dir ? ATTR_DIR : 0);
65 oldattr = fat_attr(inode); 64 oldattr = fat_make_attrs(inode);
66 65
67 /* Equivalent to a chmod() */ 66 /* Equivalent to a chmod() */
68 ia.ia_valid = ATTR_MODE | ATTR_CTIME; 67 ia.ia_valid = ATTR_MODE | ATTR_CTIME;
69 ia.ia_ctime = current_fs_time(inode->i_sb); 68 ia.ia_ctime = current_fs_time(inode->i_sb);
70 if (is_dir) { 69 if (is_dir)
71 ia.ia_mode = MSDOS_MKMODE(attr, 70 ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO);
72 S_IRWXUGO & ~sbi->options.fs_dmask) 71 else {
73 | S_IFDIR; 72 ia.ia_mode = fat_make_mode(sbi, attr,
74 } else { 73 S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO));
75 ia.ia_mode = MSDOS_MKMODE(attr,
76 (S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO))
77 & ~sbi->options.fs_fmask)
78 | S_IFREG;
79 } 74 }
80 75
81 /* The root directory has no attributes */ 76 /* The root directory has no attributes */
@@ -115,7 +110,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
115 inode->i_flags &= S_IMMUTABLE; 110 inode->i_flags &= S_IMMUTABLE;
116 } 111 }
117 112
118 MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED; 113 fat_save_attrs(inode, attr);
119 mark_inode_dirty(inode); 114 mark_inode_dirty(inode);
120up: 115up:
121 mnt_drop_write(filp->f_path.mnt); 116 mnt_drop_write(filp->f_path.mnt);
@@ -274,7 +269,7 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi,
274 269
275 /* 270 /*
276 * Note, the basic check is already done by a caller of 271 * Note, the basic check is already done by a caller of
277 * (attr->ia_mode & ~MSDOS_VALID_MODE) 272 * (attr->ia_mode & ~FAT_VALID_MODE)
278 */ 273 */
279 274
280 if (S_ISREG(inode->i_mode)) 275 if (S_ISREG(inode->i_mode))
@@ -287,11 +282,18 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi,
287 /* 282 /*
288 * Of the r and x bits, all (subject to umask) must be present. Of the 283 * Of the r and x bits, all (subject to umask) must be present. Of the
289 * w bits, either all (subject to umask) or none must be present. 284 * w bits, either all (subject to umask) or none must be present.
285 *
286 * If fat_mode_can_hold_ro(inode) is false, can't change w bits.
290 */ 287 */
291 if ((perm & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO))) 288 if ((perm & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO)))
292 return -EPERM; 289 return -EPERM;
293 if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask))) 290 if (fat_mode_can_hold_ro(inode)) {
294 return -EPERM; 291 if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask)))
292 return -EPERM;
293 } else {
294 if ((perm & S_IWUGO) != (S_IWUGO & ~mask))
295 return -EPERM;
296 }
295 297
296 *mode_ptr &= S_IFMT | perm; 298 *mode_ptr &= S_IFMT | perm;
297 299
@@ -314,13 +316,15 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
314} 316}
315 317
316#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) 318#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
319/* valid file mode bits */
320#define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO)
317 321
318int fat_setattr(struct dentry *dentry, struct iattr *attr) 322int fat_setattr(struct dentry *dentry, struct iattr *attr)
319{ 323{
320 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); 324 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
321 struct inode *inode = dentry->d_inode; 325 struct inode *inode = dentry->d_inode;
322 int error = 0;
323 unsigned int ia_valid; 326 unsigned int ia_valid;
327 int error;
324 328
325 /* 329 /*
326 * Expand the file. Since inode_setattr() updates ->i_size 330 * Expand the file. Since inode_setattr() updates ->i_size
@@ -356,7 +360,7 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
356 ((attr->ia_valid & ATTR_GID) && 360 ((attr->ia_valid & ATTR_GID) &&
357 (attr->ia_gid != sbi->options.fs_gid)) || 361 (attr->ia_gid != sbi->options.fs_gid)) ||
358 ((attr->ia_valid & ATTR_MODE) && 362 ((attr->ia_valid & ATTR_MODE) &&
359 (attr->ia_mode & ~MSDOS_VALID_MODE))) 363 (attr->ia_mode & ~FAT_VALID_MODE)))
360 error = -EPERM; 364 error = -EPERM;
361 365
362 if (error) { 366 if (error) {
@@ -374,7 +378,8 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
374 attr->ia_valid &= ~ATTR_MODE; 378 attr->ia_valid &= ~ATTR_MODE;
375 } 379 }
376 380
377 error = inode_setattr(inode, attr); 381 if (attr->ia_valid)
382 error = inode_setattr(inode, attr);
378out: 383out:
379 return error; 384 return error;
380} 385}
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index d12cdf2a0406..bdd8fb7be2ca 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -16,7 +16,6 @@
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/smp_lock.h> 17#include <linux/smp_lock.h>
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <linux/msdos_fs.h>
20#include <linux/pagemap.h> 19#include <linux/pagemap.h>
21#include <linux/mpage.h> 20#include <linux/mpage.h>
22#include <linux/buffer_head.h> 21#include <linux/buffer_head.h>
@@ -27,7 +26,9 @@
27#include <linux/uio.h> 26#include <linux/uio.h>
28#include <linux/writeback.h> 27#include <linux/writeback.h>
29#include <linux/log2.h> 28#include <linux/log2.h>
29#include <linux/hash.h>
30#include <asm/unaligned.h> 30#include <asm/unaligned.h>
31#include "fat.h"
31 32
32#ifndef CONFIG_FAT_DEFAULT_IOCHARSET 33#ifndef CONFIG_FAT_DEFAULT_IOCHARSET
33/* if user don't select VFAT, this is undefined. */ 34/* if user don't select VFAT, this is undefined. */
@@ -63,7 +64,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
63 sector_t phys; 64 sector_t phys;
64 int err, offset; 65 int err, offset;
65 66
66 err = fat_bmap(inode, iblock, &phys, &mapped_blocks); 67 err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
67 if (err) 68 if (err)
68 return err; 69 return err;
69 if (phys) { 70 if (phys) {
@@ -93,7 +94,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
93 *max_blocks = min(mapped_blocks, *max_blocks); 94 *max_blocks = min(mapped_blocks, *max_blocks);
94 MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits; 95 MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits;
95 96
96 err = fat_bmap(inode, iblock, &phys, &mapped_blocks); 97 err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
97 if (err) 98 if (err)
98 return err; 99 return err;
99 100
@@ -175,7 +176,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
175 176
176 if (rw == WRITE) { 177 if (rw == WRITE) {
177 /* 178 /*
178 * FIXME: blockdev_direct_IO() doesn't use ->prepare_write(), 179 * FIXME: blockdev_direct_IO() doesn't use ->write_begin(),
179 * so we need to update the ->mmu_private to block boundary. 180 * so we need to update the ->mmu_private to block boundary.
180 * 181 *
181 * But we must fill the remaining area or hole by nul for 182 * But we must fill the remaining area or hole by nul for
@@ -198,7 +199,14 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
198 199
199static sector_t _fat_bmap(struct address_space *mapping, sector_t block) 200static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
200{ 201{
201 return generic_block_bmap(mapping, block, fat_get_block); 202 sector_t blocknr;
203
204 /* fat_get_cluster() assumes the requested blocknr isn't truncated. */
205 mutex_lock(&mapping->host->i_mutex);
206 blocknr = generic_block_bmap(mapping, block, fat_get_block);
207 mutex_unlock(&mapping->host->i_mutex);
208
209 return blocknr;
202} 210}
203 211
204static const struct address_space_operations fat_aops = { 212static const struct address_space_operations fat_aops = {
@@ -247,25 +255,21 @@ static void fat_hash_init(struct super_block *sb)
247 INIT_HLIST_HEAD(&sbi->inode_hashtable[i]); 255 INIT_HLIST_HEAD(&sbi->inode_hashtable[i]);
248} 256}
249 257
250static inline unsigned long fat_hash(struct super_block *sb, loff_t i_pos) 258static inline unsigned long fat_hash(loff_t i_pos)
251{ 259{
252 unsigned long tmp = (unsigned long)i_pos | (unsigned long) sb; 260 return hash_32(i_pos, FAT_HASH_BITS);
253 tmp = tmp + (tmp >> FAT_HASH_BITS) + (tmp >> FAT_HASH_BITS * 2);
254 return tmp & FAT_HASH_MASK;
255} 261}
256 262
257void fat_attach(struct inode *inode, loff_t i_pos) 263void fat_attach(struct inode *inode, loff_t i_pos)
258{ 264{
259 struct super_block *sb = inode->i_sb; 265 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
260 struct msdos_sb_info *sbi = MSDOS_SB(sb); 266 struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos);
261 267
262 spin_lock(&sbi->inode_hash_lock); 268 spin_lock(&sbi->inode_hash_lock);
263 MSDOS_I(inode)->i_pos = i_pos; 269 MSDOS_I(inode)->i_pos = i_pos;
264 hlist_add_head(&MSDOS_I(inode)->i_fat_hash, 270 hlist_add_head(&MSDOS_I(inode)->i_fat_hash, head);
265 sbi->inode_hashtable + fat_hash(sb, i_pos));
266 spin_unlock(&sbi->inode_hash_lock); 271 spin_unlock(&sbi->inode_hash_lock);
267} 272}
268
269EXPORT_SYMBOL_GPL(fat_attach); 273EXPORT_SYMBOL_GPL(fat_attach);
270 274
271void fat_detach(struct inode *inode) 275void fat_detach(struct inode *inode)
@@ -276,13 +280,12 @@ void fat_detach(struct inode *inode)
276 hlist_del_init(&MSDOS_I(inode)->i_fat_hash); 280 hlist_del_init(&MSDOS_I(inode)->i_fat_hash);
277 spin_unlock(&sbi->inode_hash_lock); 281 spin_unlock(&sbi->inode_hash_lock);
278} 282}
279
280EXPORT_SYMBOL_GPL(fat_detach); 283EXPORT_SYMBOL_GPL(fat_detach);
281 284
282struct inode *fat_iget(struct super_block *sb, loff_t i_pos) 285struct inode *fat_iget(struct super_block *sb, loff_t i_pos)
283{ 286{
284 struct msdos_sb_info *sbi = MSDOS_SB(sb); 287 struct msdos_sb_info *sbi = MSDOS_SB(sb);
285 struct hlist_head *head = sbi->inode_hashtable + fat_hash(sb, i_pos); 288 struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos);
286 struct hlist_node *_p; 289 struct hlist_node *_p;
287 struct msdos_inode_info *i; 290 struct msdos_inode_info *i;
288 struct inode *inode = NULL; 291 struct inode *inode = NULL;
@@ -341,8 +344,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
341 344
342 if ((de->attr & ATTR_DIR) && !IS_FREE(de->name)) { 345 if ((de->attr & ATTR_DIR) && !IS_FREE(de->name)) {
343 inode->i_generation &= ~1; 346 inode->i_generation &= ~1;
344 inode->i_mode = MSDOS_MKMODE(de->attr, 347 inode->i_mode = fat_make_mode(sbi, de->attr, S_IRWXUGO);
345 S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR;
346 inode->i_op = sbi->dir_ops; 348 inode->i_op = sbi->dir_ops;
347 inode->i_fop = &fat_dir_operations; 349 inode->i_fop = &fat_dir_operations;
348 350
@@ -359,10 +361,9 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
359 inode->i_nlink = fat_subdirs(inode); 361 inode->i_nlink = fat_subdirs(inode);
360 } else { /* not a directory */ 362 } else { /* not a directory */
361 inode->i_generation |= 1; 363 inode->i_generation |= 1;
362 inode->i_mode = MSDOS_MKMODE(de->attr, 364 inode->i_mode = fat_make_mode(sbi, de->attr,
363 ((sbi->options.showexec && !is_exec(de->name + 8)) 365 ((sbi->options.showexec && !is_exec(de->name + 8))
364 ? S_IRUGO|S_IWUGO : S_IRWXUGO) 366 ? S_IRUGO|S_IWUGO : S_IRWXUGO));
365 & ~sbi->options.fs_fmask) | S_IFREG;
366 MSDOS_I(inode)->i_start = le16_to_cpu(de->start); 367 MSDOS_I(inode)->i_start = le16_to_cpu(de->start);
367 if (sbi->fat_bits == 32) 368 if (sbi->fat_bits == 32)
368 MSDOS_I(inode)->i_start |= (le16_to_cpu(de->starthi) << 16); 369 MSDOS_I(inode)->i_start |= (le16_to_cpu(de->starthi) << 16);
@@ -378,25 +379,16 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
378 if (sbi->options.sys_immutable) 379 if (sbi->options.sys_immutable)
379 inode->i_flags |= S_IMMUTABLE; 380 inode->i_flags |= S_IMMUTABLE;
380 } 381 }
381 MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED; 382 fat_save_attrs(inode, de->attr);
383
382 inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) 384 inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
383 & ~((loff_t)sbi->cluster_size - 1)) >> 9; 385 & ~((loff_t)sbi->cluster_size - 1)) >> 9;
384 inode->i_mtime.tv_sec = 386
385 date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date), 387 fat_time_fat2unix(sbi, &inode->i_mtime, de->time, de->date, 0);
386 sbi->options.tz_utc);
387 inode->i_mtime.tv_nsec = 0;
388 if (sbi->options.isvfat) { 388 if (sbi->options.isvfat) {
389 int secs = de->ctime_cs / 100; 389 fat_time_fat2unix(sbi, &inode->i_ctime, de->ctime,
390 int csecs = de->ctime_cs % 100; 390 de->cdate, de->ctime_cs);
391 inode->i_ctime.tv_sec = 391 fat_time_fat2unix(sbi, &inode->i_atime, 0, de->adate, 0);
392 date_dos2unix(le16_to_cpu(de->ctime),
393 le16_to_cpu(de->cdate),
394 sbi->options.tz_utc) + secs;
395 inode->i_ctime.tv_nsec = csecs * 10000000;
396 inode->i_atime.tv_sec =
397 date_dos2unix(0, le16_to_cpu(de->adate),
398 sbi->options.tz_utc);
399 inode->i_atime.tv_nsec = 0;
400 } else 392 } else
401 inode->i_ctime = inode->i_atime = inode->i_mtime; 393 inode->i_ctime = inode->i_atime = inode->i_mtime;
402 394
@@ -443,13 +435,8 @@ static void fat_delete_inode(struct inode *inode)
443 435
444static void fat_clear_inode(struct inode *inode) 436static void fat_clear_inode(struct inode *inode)
445{ 437{
446 struct super_block *sb = inode->i_sb;
447 struct msdos_sb_info *sbi = MSDOS_SB(sb);
448
449 spin_lock(&sbi->inode_hash_lock);
450 fat_cache_inval_inode(inode); 438 fat_cache_inval_inode(inode);
451 hlist_del_init(&MSDOS_I(inode)->i_fat_hash); 439 fat_detach(inode);
452 spin_unlock(&sbi->inode_hash_lock);
453} 440}
454 441
455static void fat_write_super(struct super_block *sb) 442static void fat_write_super(struct super_block *sb)
@@ -555,6 +542,20 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
555 return 0; 542 return 0;
556} 543}
557 544
545static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi,
546 struct inode *inode)
547{
548 loff_t i_pos;
549#if BITS_PER_LONG == 32
550 spin_lock(&sbi->inode_hash_lock);
551#endif
552 i_pos = MSDOS_I(inode)->i_pos;
553#if BITS_PER_LONG == 32
554 spin_unlock(&sbi->inode_hash_lock);
555#endif
556 return i_pos;
557}
558
558static int fat_write_inode(struct inode *inode, int wait) 559static int fat_write_inode(struct inode *inode, int wait)
559{ 560{
560 struct super_block *sb = inode->i_sb; 561 struct super_block *sb = inode->i_sb;
@@ -564,9 +565,12 @@ static int fat_write_inode(struct inode *inode, int wait)
564 loff_t i_pos; 565 loff_t i_pos;
565 int err; 566 int err;
566 567
568 if (inode->i_ino == MSDOS_ROOT_INO)
569 return 0;
570
567retry: 571retry:
568 i_pos = MSDOS_I(inode)->i_pos; 572 i_pos = fat_i_pos_read(sbi, inode);
569 if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) 573 if (!i_pos)
570 return 0; 574 return 0;
571 575
572 bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); 576 bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
@@ -588,19 +592,17 @@ retry:
588 raw_entry->size = 0; 592 raw_entry->size = 0;
589 else 593 else
590 raw_entry->size = cpu_to_le32(inode->i_size); 594 raw_entry->size = cpu_to_le32(inode->i_size);
591 raw_entry->attr = fat_attr(inode); 595 raw_entry->attr = fat_make_attrs(inode);
592 raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart); 596 raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart);
593 raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16); 597 raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
594 fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, 598 fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time,
595 &raw_entry->date, sbi->options.tz_utc); 599 &raw_entry->date, NULL);
596 if (sbi->options.isvfat) { 600 if (sbi->options.isvfat) {
597 __le16 atime; 601 __le16 atime;
598 fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime, 602 fat_time_unix2fat(sbi, &inode->i_ctime, &raw_entry->ctime,
599 &raw_entry->cdate, sbi->options.tz_utc); 603 &raw_entry->cdate, &raw_entry->ctime_cs);
600 fat_date_unix2dos(inode->i_atime.tv_sec, &atime, 604 fat_time_unix2fat(sbi, &inode->i_atime, &atime,
601 &raw_entry->adate, sbi->options.tz_utc); 605 &raw_entry->adate, NULL);
602 raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
603 inode->i_ctime.tv_nsec / 10000000;
604 } 606 }
605 spin_unlock(&sbi->inode_hash_lock); 607 spin_unlock(&sbi->inode_hash_lock);
606 mark_buffer_dirty(bh); 608 mark_buffer_dirty(bh);
@@ -681,33 +683,24 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb,
681 inode = NULL; 683 inode = NULL;
682 } 684 }
683 } 685 }
684 if (!inode) {
685 /* For now, do nothing
686 * What we could do is:
687 * follow the file starting at fh[4], and record
688 * the ".." entry, and the name of the fh[2] entry.
689 * The follow the ".." file finding the next step up.
690 * This way we build a path to the root of
691 * the tree. If this works, we lookup the path and so
692 * get this inode into the cache.
693 * Finally try the fat_iget lookup again
694 * If that fails, then weare totally out of luck
695 * But all that is for another day
696 */
697 }
698 if (!inode)
699 return ERR_PTR(-ESTALE);
700
701 686
702 /* now to find a dentry. 687 /*
703 * If possible, get a well-connected one 688 * For now, do nothing if the inode is not found.
689 *
690 * What we could do is:
691 *
692 * - follow the file starting at fh[4], and record the ".." entry,
693 * and the name of the fh[2] entry.
694 * - then follow the ".." file finding the next step up.
695 *
696 * This way we build a path to the root of the tree. If this works, we
697 * lookup the path and so get this inode into the cache. Finally try
698 * the fat_iget lookup again. If that fails, then we are totally out
699 * of luck. But all that is for another day
704 */ 700 */
705 result = d_alloc_anon(inode); 701 result = d_obtain_alias(inode);
706 if (result == NULL) { 702 if (!IS_ERR(result))
707 iput(inode); 703 result->d_op = sb->s_root->d_op;
708 return ERR_PTR(-ENOMEM);
709 }
710 result->d_op = sb->s_root->d_op;
711 return result; 704 return result;
712} 705}
713 706
@@ -754,15 +747,8 @@ static struct dentry *fat_get_parent(struct dentry *child)
754 } 747 }
755 inode = fat_build_inode(sb, de, i_pos); 748 inode = fat_build_inode(sb, de, i_pos);
756 brelse(bh); 749 brelse(bh);
757 if (IS_ERR(inode)) { 750
758 parent = ERR_CAST(inode); 751 parent = d_obtain_alias(inode);
759 goto out;
760 }
761 parent = d_alloc_anon(inode);
762 if (!parent) {
763 iput(inode);
764 parent = ERR_PTR(-ENOMEM);
765 }
766out: 752out:
767 unlock_super(sb); 753 unlock_super(sb);
768 754
@@ -835,8 +821,10 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
835 seq_puts(m, ",uni_xlate"); 821 seq_puts(m, ",uni_xlate");
836 if (!opts->numtail) 822 if (!opts->numtail)
837 seq_puts(m, ",nonumtail"); 823 seq_puts(m, ",nonumtail");
824 if (opts->rodir)
825 seq_puts(m, ",rodir");
838 } 826 }
839 if (sbi->options.flush) 827 if (opts->flush)
840 seq_puts(m, ",flush"); 828 seq_puts(m, ",flush");
841 if (opts->tz_utc) 829 if (opts->tz_utc)
842 seq_puts(m, ",tz=UTC"); 830 seq_puts(m, ",tz=UTC");
@@ -852,7 +840,7 @@ enum {
852 Opt_charset, Opt_shortname_lower, Opt_shortname_win95, 840 Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
853 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, 841 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
854 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, 842 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
855 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err, 843 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err,
856}; 844};
857 845
858static const match_table_t fat_tokens = { 846static const match_table_t fat_tokens = {
@@ -924,6 +912,7 @@ static const match_table_t vfat_tokens = {
924 {Opt_nonumtail_yes, "nonumtail=yes"}, 912 {Opt_nonumtail_yes, "nonumtail=yes"},
925 {Opt_nonumtail_yes, "nonumtail=true"}, 913 {Opt_nonumtail_yes, "nonumtail=true"},
926 {Opt_nonumtail_yes, "nonumtail"}, 914 {Opt_nonumtail_yes, "nonumtail"},
915 {Opt_rodir, "rodir"},
927 {Opt_err, NULL} 916 {Opt_err, NULL}
928}; 917};
929 918
@@ -943,10 +932,13 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
943 opts->allow_utime = -1; 932 opts->allow_utime = -1;
944 opts->codepage = fat_default_codepage; 933 opts->codepage = fat_default_codepage;
945 opts->iocharset = fat_default_iocharset; 934 opts->iocharset = fat_default_iocharset;
946 if (is_vfat) 935 if (is_vfat) {
947 opts->shortname = VFAT_SFN_DISPLAY_LOWER|VFAT_SFN_CREATE_WIN95; 936 opts->shortname = VFAT_SFN_DISPLAY_LOWER|VFAT_SFN_CREATE_WIN95;
948 else 937 opts->rodir = 0;
938 } else {
949 opts->shortname = 0; 939 opts->shortname = 0;
940 opts->rodir = 1;
941 }
950 opts->name_check = 'n'; 942 opts->name_check = 'n';
951 opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0; 943 opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0;
952 opts->utf8 = opts->unicode_xlate = 0; 944 opts->utf8 = opts->unicode_xlate = 0;
@@ -1097,6 +1089,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1097 case Opt_nonumtail_yes: /* empty or 1 or yes or true */ 1089 case Opt_nonumtail_yes: /* empty or 1 or yes or true */
1098 opts->numtail = 0; /* negated option */ 1090 opts->numtail = 0; /* negated option */
1099 break; 1091 break;
1092 case Opt_rodir:
1093 opts->rodir = 1;
1094 break;
1100 1095
1101 /* obsolete mount options */ 1096 /* obsolete mount options */
1102 case Opt_obsolate: 1097 case Opt_obsolate:
@@ -1142,7 +1137,7 @@ static int fat_read_root(struct inode *inode)
1142 inode->i_gid = sbi->options.fs_gid; 1137 inode->i_gid = sbi->options.fs_gid;
1143 inode->i_version++; 1138 inode->i_version++;
1144 inode->i_generation = 0; 1139 inode->i_generation = 0;
1145 inode->i_mode = (S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR; 1140 inode->i_mode = fat_make_mode(sbi, ATTR_DIR, S_IRWXUGO);
1146 inode->i_op = sbi->dir_ops; 1141 inode->i_op = sbi->dir_ops;
1147 inode->i_fop = &fat_dir_operations; 1142 inode->i_fop = &fat_dir_operations;
1148 if (sbi->fat_bits == 32) { 1143 if (sbi->fat_bits == 32) {
@@ -1159,7 +1154,7 @@ static int fat_read_root(struct inode *inode)
1159 MSDOS_I(inode)->i_logstart = 0; 1154 MSDOS_I(inode)->i_logstart = 0;
1160 MSDOS_I(inode)->mmu_private = inode->i_size; 1155 MSDOS_I(inode)->mmu_private = inode->i_size;
1161 1156
1162 MSDOS_I(inode)->i_attrs = ATTR_NONE; 1157 fat_save_attrs(inode, ATTR_DIR);
1163 inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0; 1158 inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0;
1164 inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0; 1159 inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0;
1165 inode->i_nlink = fat_subdirs(inode)+2; 1160 inode->i_nlink = fat_subdirs(inode)+2;
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 79fb98ad36d4..ac39ebcc1496 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -8,8 +8,8 @@
8 8
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/msdos_fs.h>
12#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
12#include "fat.h"
13 13
14/* 14/*
15 * fat_fs_panic reports a severe file system problem and sets the file system 15 * fat_fs_panic reports a severe file system problem and sets the file system
@@ -124,8 +124,9 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)
124 mark_inode_dirty(inode); 124 mark_inode_dirty(inode);
125 } 125 }
126 if (new_fclus != (inode->i_blocks >> (sbi->cluster_bits - 9))) { 126 if (new_fclus != (inode->i_blocks >> (sbi->cluster_bits - 9))) {
127 fat_fs_panic(sb, "clusters badly computed (%d != %lu)", 127 fat_fs_panic(sb, "clusters badly computed (%d != %llu)",
128 new_fclus, inode->i_blocks >> (sbi->cluster_bits - 9)); 128 new_fclus,
129 (llu)(inode->i_blocks >> (sbi->cluster_bits - 9)));
129 fat_cache_inval_inode(inode); 130 fat_cache_inval_inode(inode);
130 } 131 }
131 inode->i_blocks += nr_cluster << (sbi->cluster_bits - 9); 132 inode->i_blocks += nr_cluster << (sbi->cluster_bits - 9);
@@ -135,65 +136,131 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)
135 136
136extern struct timezone sys_tz; 137extern struct timezone sys_tz;
137 138
139/*
140 * The epoch of FAT timestamp is 1980.
141 * : bits : value
142 * date: 0 - 4: day (1 - 31)
143 * date: 5 - 8: month (1 - 12)
144 * date: 9 - 15: year (0 - 127) from 1980
145 * time: 0 - 4: sec (0 - 29) 2sec counts
146 * time: 5 - 10: min (0 - 59)
147 * time: 11 - 15: hour (0 - 23)
148 */
149#define SECS_PER_MIN 60
150#define SECS_PER_HOUR (60 * 60)
151#define SECS_PER_DAY (SECS_PER_HOUR * 24)
152#define UNIX_SECS_1980 315532800L
153#if BITS_PER_LONG == 64
154#define UNIX_SECS_2108 4354819200L
155#endif
156/* days between 1.1.70 and 1.1.80 (2 leap days) */
157#define DAYS_DELTA (365 * 10 + 2)
158/* 120 (2100 - 1980) isn't leap year */
159#define YEAR_2100 120
160#define IS_LEAP_YEAR(y) (!((y) & 3) && (y) != YEAR_2100)
161
138/* Linear day numbers of the respective 1sts in non-leap years. */ 162/* Linear day numbers of the respective 1sts in non-leap years. */
139static int day_n[] = { 163static time_t days_in_year[] = {
140 /* Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec */ 164 /* Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec */
141 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, 0 165 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0,
142}; 166};
143 167
144/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */ 168/* Convert a FAT time/date pair to a UNIX date (seconds since 1 1 70). */
145int date_dos2unix(unsigned short time, unsigned short date, int tz_utc) 169void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
170 __le16 __time, __le16 __date, u8 time_cs)
146{ 171{
147 int month, year, secs; 172 u16 time = le16_to_cpu(__time), date = le16_to_cpu(__date);
173 time_t second, day, leap_day, month, year;
148 174
149 /* 175 year = date >> 9;
150 * first subtract and mask after that... Otherwise, if 176 month = max(1, (date >> 5) & 0xf);
151 * date == 0, bad things happen 177 day = max(1, date & 0x1f) - 1;
152 */ 178
153 month = ((date >> 5) - 1) & 15; 179 leap_day = (year + 3) / 4;
154 year = date >> 9; 180 if (year > YEAR_2100) /* 2100 isn't leap year */
155 secs = (time & 31)*2+60*((time >> 5) & 63)+(time >> 11)*3600+86400* 181 leap_day--;
156 ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 && 182 if (IS_LEAP_YEAR(year) && month > 2)
157 month < 2 ? 1 : 0)+3653); 183 leap_day++;
158 /* days since 1.1.70 plus 80's leap day */ 184
159 if (!tz_utc) 185 second = (time & 0x1f) << 1;
160 secs += sys_tz.tz_minuteswest*60; 186 second += ((time >> 5) & 0x3f) * SECS_PER_MIN;
161 return secs; 187 second += (time >> 11) * SECS_PER_HOUR;
188 second += (year * 365 + leap_day
189 + days_in_year[month] + day
190 + DAYS_DELTA) * SECS_PER_DAY;
191
192 if (!sbi->options.tz_utc)
193 second += sys_tz.tz_minuteswest * SECS_PER_MIN;
194
195 if (time_cs) {
196 ts->tv_sec = second + (time_cs / 100);
197 ts->tv_nsec = (time_cs % 100) * 10000000;
198 } else {
199 ts->tv_sec = second;
200 ts->tv_nsec = 0;
201 }
162} 202}
163 203
164/* Convert linear UNIX date to a MS-DOS time/date pair. */ 204/* Convert linear UNIX date to a FAT time/date pair. */
165void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, int tz_utc) 205void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts,
206 __le16 *time, __le16 *date, u8 *time_cs)
166{ 207{
167 int day, year, nl_day, month; 208 time_t second = ts->tv_sec;
209 time_t day, leap_day, month, year;
168 210
169 if (!tz_utc) 211 if (!sbi->options.tz_utc)
170 unix_date -= sys_tz.tz_minuteswest*60; 212 second -= sys_tz.tz_minuteswest * SECS_PER_MIN;
171 213
172 /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */ 214 /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
173 if (unix_date < 315532800) 215 if (second < UNIX_SECS_1980) {
174 unix_date = 315532800; 216 *time = 0;
175 217 *date = cpu_to_le16((0 << 9) | (1 << 5) | 1);
176 *time = cpu_to_le16((unix_date % 60)/2+(((unix_date/60) % 60) << 5)+ 218 if (time_cs)
177 (((unix_date/3600) % 24) << 11)); 219 *time_cs = 0;
178 day = unix_date/86400-3652; 220 return;
179 year = day/365; 221 }
180 if ((year+3)/4+365*year > day) 222#if BITS_PER_LONG == 64
223 if (second >= UNIX_SECS_2108) {
224 *time = cpu_to_le16((23 << 11) | (59 << 5) | 29);
225 *date = cpu_to_le16((127 << 9) | (12 << 5) | 31);
226 if (time_cs)
227 *time_cs = 199;
228 return;
229 }
230#endif
231
232 day = second / SECS_PER_DAY - DAYS_DELTA;
233 year = day / 365;
234 leap_day = (year + 3) / 4;
235 if (year > YEAR_2100) /* 2100 isn't leap year */
236 leap_day--;
237 if (year * 365 + leap_day > day)
181 year--; 238 year--;
182 day -= (year+3)/4+365*year; 239 leap_day = (year + 3) / 4;
183 if (day == 59 && !(year & 3)) { 240 if (year > YEAR_2100) /* 2100 isn't leap year */
184 nl_day = day; 241 leap_day--;
242 day -= year * 365 + leap_day;
243
244 if (IS_LEAP_YEAR(year) && day == days_in_year[3]) {
185 month = 2; 245 month = 2;
186 } else { 246 } else {
187 nl_day = (year & 3) || day <= 59 ? day : day-1; 247 if (IS_LEAP_YEAR(year) && day > days_in_year[3])
188 for (month = 0; month < 12; month++) { 248 day--;
189 if (day_n[month] > nl_day) 249 for (month = 1; month < 12; month++) {
250 if (days_in_year[month + 1] > day)
190 break; 251 break;
191 } 252 }
192 } 253 }
193 *date = cpu_to_le16(nl_day-day_n[month-1]+1+(month << 5)+(year << 9)); 254 day -= days_in_year[month];
194}
195 255
196EXPORT_SYMBOL_GPL(fat_date_unix2dos); 256 *time = cpu_to_le16(((second / SECS_PER_HOUR) % 24) << 11
257 | ((second / SECS_PER_MIN) % 60) << 5
258 | (second % SECS_PER_MIN) >> 1);
259 *date = cpu_to_le16((year << 9) | (month << 5) | (day + 1));
260 if (time_cs)
261 *time_cs = (ts->tv_sec & 1) * 100 + ts->tv_nsec / 10000000;
262}
263EXPORT_SYMBOL_GPL(fat_time_unix2fat);
197 264
198int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) 265int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
199{ 266{
diff --git a/fs/msdos/namei.c b/fs/fat/namei_msdos.c
index e844b9809d27..7ba03a4acbe0 100644
--- a/fs/msdos/namei.c
+++ b/fs/fat/namei_msdos.c
@@ -9,8 +9,8 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/time.h> 10#include <linux/time.h>
11#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
12#include <linux/msdos_fs.h>
13#include <linux/smp_lock.h> 12#include <linux/smp_lock.h>
13#include "fat.h"
14 14
15/* Characters that are undesirable in an MS-DOS file name */ 15/* Characters that are undesirable in an MS-DOS file name */
16static unsigned char bad_chars[] = "*?<>|\""; 16static unsigned char bad_chars[] = "*?<>|\"";
@@ -203,33 +203,37 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry,
203{ 203{
204 struct super_block *sb = dir->i_sb; 204 struct super_block *sb = dir->i_sb;
205 struct fat_slot_info sinfo; 205 struct fat_slot_info sinfo;
206 struct inode *inode = NULL; 206 struct inode *inode;
207 int res; 207 int err;
208
209 dentry->d_op = &msdos_dentry_operations;
210 208
211 lock_super(sb); 209 lock_super(sb);
212 res = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); 210
213 if (res == -ENOENT) 211 err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo);
214 goto add; 212 if (err) {
215 if (res < 0) 213 if (err == -ENOENT) {
216 goto out; 214 inode = NULL;
215 goto out;
216 }
217 goto error;
218 }
219
217 inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); 220 inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos);
218 brelse(sinfo.bh); 221 brelse(sinfo.bh);
219 if (IS_ERR(inode)) { 222 if (IS_ERR(inode)) {
220 res = PTR_ERR(inode); 223 err = PTR_ERR(inode);
221 goto out; 224 goto error;
222 } 225 }
223add: 226out:
224 res = 0; 227 unlock_super(sb);
228 dentry->d_op = &msdos_dentry_operations;
225 dentry = d_splice_alias(inode, dentry); 229 dentry = d_splice_alias(inode, dentry);
226 if (dentry) 230 if (dentry)
227 dentry->d_op = &msdos_dentry_operations; 231 dentry->d_op = &msdos_dentry_operations;
228out: 232 return dentry;
233
234error:
229 unlock_super(sb); 235 unlock_super(sb);
230 if (!res) 236 return ERR_PTR(err);
231 return dentry;
232 return ERR_PTR(res);
233} 237}
234 238
235/***** Creates a directory entry (name is already formatted). */ 239/***** Creates a directory entry (name is already formatted). */
@@ -247,7 +251,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
247 if (is_hid) 251 if (is_hid)
248 de.attr |= ATTR_HIDDEN; 252 de.attr |= ATTR_HIDDEN;
249 de.lcase = 0; 253 de.lcase = 0;
250 fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); 254 fat_time_unix2fat(sbi, ts, &time, &date, NULL);
251 de.cdate = de.adate = 0; 255 de.cdate = de.adate = 0;
252 de.ctime = 0; 256 de.ctime = 0;
253 de.ctime_cs = 0; 257 de.ctime_cs = 0;
diff --git a/fs/vfat/namei.c b/fs/fat/namei_vfat.c
index 155c10b4adbd..bf326d4356a3 100644
--- a/fs/vfat/namei.c
+++ b/fs/fat/namei_vfat.c
@@ -16,36 +16,75 @@
16 */ 16 */
17 17
18#include <linux/module.h> 18#include <linux/module.h>
19
20#include <linux/jiffies.h> 19#include <linux/jiffies.h>
21#include <linux/msdos_fs.h>
22#include <linux/ctype.h> 20#include <linux/ctype.h>
23#include <linux/slab.h> 21#include <linux/slab.h>
24#include <linux/smp_lock.h> 22#include <linux/smp_lock.h>
25#include <linux/buffer_head.h> 23#include <linux/buffer_head.h>
26#include <linux/namei.h> 24#include <linux/namei.h>
25#include "fat.h"
27 26
28static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) 27/*
28 * If new entry was created in the parent, it could create the 8.3
29 * alias (the shortname of logname). So, the parent may have the
30 * negative-dentry which matches the created 8.3 alias.
31 *
32 * If it happened, the negative dentry isn't actually negative
33 * anymore. So, drop it.
34 */
35static int vfat_revalidate_shortname(struct dentry *dentry)
29{ 36{
30 int ret = 1; 37 int ret = 1;
31 38 spin_lock(&dentry->d_lock);
32 if (!dentry->d_inode && 39 if (dentry->d_time != dentry->d_parent->d_inode->i_version)
33 nd && !(nd->flags & LOOKUP_CONTINUE) && (nd->flags & LOOKUP_CREATE))
34 /*
35 * negative dentry is dropped, in order to make sure
36 * to use the name which a user desires if this is
37 * create path.
38 */
39 ret = 0; 40 ret = 0;
40 else { 41 spin_unlock(&dentry->d_lock);
41 spin_lock(&dentry->d_lock);
42 if (dentry->d_time != dentry->d_parent->d_inode->i_version)
43 ret = 0;
44 spin_unlock(&dentry->d_lock);
45 }
46 return ret; 42 return ret;
47} 43}
48 44
45static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
46{
47 /* This is not negative dentry. Always valid. */
48 if (dentry->d_inode)
49 return 1;
50 return vfat_revalidate_shortname(dentry);
51}
52
53static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
54{
55 /*
56 * This is not negative dentry. Always valid.
57 *
58 * Note, rename() to existing directory entry will have ->d_inode,
59 * and will use existing name which isn't specified name by user.
60 *
61 * We may be able to drop this positive dentry here. But dropping
62 * positive dentry isn't good idea. So it's unsupported like
63 * rename("filename", "FILENAME") for now.
64 */
65 if (dentry->d_inode)
66 return 1;
67
68 /*
69 * This may be nfsd (or something), anyway, we can't see the
70 * intent of this. So, since this can be for creation, drop it.
71 */
72 if (!nd)
73 return 0;
74
75 /*
76 * Drop the negative dentry, in order to make sure to use the
77 * case sensitive name which is specified by user if this is
78 * for creation.
79 */
80 if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) {
81 if (nd->flags & LOOKUP_CREATE)
82 return 0;
83 }
84
85 return vfat_revalidate_shortname(dentry);
86}
87
49/* returns the length of a struct qstr, ignoring trailing dots */ 88/* returns the length of a struct qstr, ignoring trailing dots */
50static unsigned int vfat_striptail_len(struct qstr *qstr) 89static unsigned int vfat_striptail_len(struct qstr *qstr)
51{ 90{
@@ -127,25 +166,16 @@ static int vfat_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b)
127 return 1; 166 return 1;
128} 167}
129 168
130static struct dentry_operations vfat_dentry_ops[4] = { 169static struct dentry_operations vfat_ci_dentry_ops = {
131 { 170 .d_revalidate = vfat_revalidate_ci,
132 .d_hash = vfat_hashi, 171 .d_hash = vfat_hashi,
133 .d_compare = vfat_cmpi, 172 .d_compare = vfat_cmpi,
134 }, 173};
135 { 174
136 .d_revalidate = vfat_revalidate, 175static struct dentry_operations vfat_dentry_ops = {
137 .d_hash = vfat_hashi, 176 .d_revalidate = vfat_revalidate,
138 .d_compare = vfat_cmpi, 177 .d_hash = vfat_hash,
139 }, 178 .d_compare = vfat_cmp,
140 {
141 .d_hash = vfat_hash,
142 .d_compare = vfat_cmp,
143 },
144 {
145 .d_revalidate = vfat_revalidate,
146 .d_hash = vfat_hash,
147 .d_compare = vfat_cmp,
148 }
149}; 179};
150 180
151/* Characters that are undesirable in an MS-DOS file name */ 181/* Characters that are undesirable in an MS-DOS file name */
@@ -569,6 +599,7 @@ static int vfat_build_slots(struct inode *dir, const unsigned char *name,
569 unsigned char msdos_name[MSDOS_NAME]; 599 unsigned char msdos_name[MSDOS_NAME];
570 wchar_t *uname; 600 wchar_t *uname;
571 __le16 time, date; 601 __le16 time, date;
602 u8 time_cs;
572 int err, ulen, usize, i; 603 int err, ulen, usize, i;
573 loff_t offset; 604 loff_t offset;
574 605
@@ -621,10 +652,10 @@ shortname:
621 memcpy(de->name, msdos_name, MSDOS_NAME); 652 memcpy(de->name, msdos_name, MSDOS_NAME);
622 de->attr = is_dir ? ATTR_DIR : ATTR_ARCH; 653 de->attr = is_dir ? ATTR_DIR : ATTR_ARCH;
623 de->lcase = lcase; 654 de->lcase = lcase;
624 fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); 655 fat_time_unix2fat(sbi, ts, &time, &date, &time_cs);
625 de->time = de->ctime = time; 656 de->time = de->ctime = time;
626 de->date = de->cdate = de->adate = date; 657 de->date = de->cdate = de->adate = date;
627 de->ctime_cs = 0; 658 de->ctime_cs = time_cs;
628 de->start = cpu_to_le16(cluster); 659 de->start = cpu_to_le16(cluster);
629 de->starthi = cpu_to_le16(cluster >> 16); 660 de->starthi = cpu_to_le16(cluster >> 16);
630 de->size = 0; 661 de->size = 0;
@@ -683,46 +714,58 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
683{ 714{
684 struct super_block *sb = dir->i_sb; 715 struct super_block *sb = dir->i_sb;
685 struct fat_slot_info sinfo; 716 struct fat_slot_info sinfo;
686 struct inode *inode = NULL; 717 struct inode *inode;
687 struct dentry *alias; 718 struct dentry *alias;
688 int err, table; 719 int err;
689 720
690 lock_super(sb); 721 lock_super(sb);
691 table = (MSDOS_SB(sb)->options.name_check == 's') ? 2 : 0;
692 dentry->d_op = &vfat_dentry_ops[table];
693 722
694 err = vfat_find(dir, &dentry->d_name, &sinfo); 723 err = vfat_find(dir, &dentry->d_name, &sinfo);
695 if (err) { 724 if (err) {
696 table++; 725 if (err == -ENOENT) {
726 inode = NULL;
727 goto out;
728 }
697 goto error; 729 goto error;
698 } 730 }
731
699 inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); 732 inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos);
700 brelse(sinfo.bh); 733 brelse(sinfo.bh);
701 if (IS_ERR(inode)) { 734 if (IS_ERR(inode)) {
702 unlock_super(sb); 735 err = PTR_ERR(inode);
703 return ERR_CAST(inode); 736 goto error;
704 } 737 }
705 alias = d_find_alias(inode);
706 if (alias) {
707 if (d_invalidate(alias) == 0)
708 dput(alias);
709 else {
710 iput(inode);
711 unlock_super(sb);
712 return alias;
713 }
714 738
739 alias = d_find_alias(inode);
740 if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
741 /*
742 * This inode has non DCACHE_DISCONNECTED dentry. This
743 * means, the user did ->lookup() by an another name
744 * (longname vs 8.3 alias of it) in past.
745 *
746 * Switch to new one for reason of locality if possible.
747 */
748 BUG_ON(d_unhashed(alias));
749 if (!S_ISDIR(inode->i_mode))
750 d_move(alias, dentry);
751 iput(inode);
752 unlock_super(sb);
753 return alias;
715 } 754 }
716error: 755out:
717 unlock_super(sb); 756 unlock_super(sb);
718 dentry->d_op = &vfat_dentry_ops[table]; 757 dentry->d_op = sb->s_root->d_op;
719 dentry->d_time = dentry->d_parent->d_inode->i_version; 758 dentry->d_time = dentry->d_parent->d_inode->i_version;
720 dentry = d_splice_alias(inode, dentry); 759 dentry = d_splice_alias(inode, dentry);
721 if (dentry) { 760 if (dentry) {
722 dentry->d_op = &vfat_dentry_ops[table]; 761 dentry->d_op = sb->s_root->d_op;
723 dentry->d_time = dentry->d_parent->d_inode->i_version; 762 dentry->d_time = dentry->d_parent->d_inode->i_version;
724 } 763 }
725 return dentry; 764 return dentry;
765
766error:
767 unlock_super(sb);
768 return ERR_PTR(err);
726} 769}
727 770
728static int vfat_create(struct inode *dir, struct dentry *dentry, int mode, 771static int vfat_create(struct inode *dir, struct dentry *dentry, int mode,
@@ -1014,9 +1057,9 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent)
1014 return res; 1057 return res;
1015 1058
1016 if (MSDOS_SB(sb)->options.name_check != 's') 1059 if (MSDOS_SB(sb)->options.name_check != 's')
1017 sb->s_root->d_op = &vfat_dentry_ops[0]; 1060 sb->s_root->d_op = &vfat_ci_dentry_ops;
1018 else 1061 else
1019 sb->s_root->d_op = &vfat_dentry_ops[2]; 1062 sb->s_root->d_op = &vfat_dentry_ops;
1020 1063
1021 return 0; 1064 return 0;
1022} 1065}
diff --git a/fs/fifo.c b/fs/fifo.c
index 987bf9411495..f8f97b8b6d44 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -51,7 +51,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
51 filp->f_mode &= (FMODE_READ | FMODE_WRITE); 51 filp->f_mode &= (FMODE_READ | FMODE_WRITE);
52 52
53 switch (filp->f_mode) { 53 switch (filp->f_mode) {
54 case 1: 54 case FMODE_READ:
55 /* 55 /*
56 * O_RDONLY 56 * O_RDONLY
57 * POSIX.1 says that O_NONBLOCK means return with the FIFO 57 * POSIX.1 says that O_NONBLOCK means return with the FIFO
@@ -76,7 +76,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
76 } 76 }
77 break; 77 break;
78 78
79 case 2: 79 case FMODE_WRITE:
80 /* 80 /*
81 * O_WRONLY 81 * O_WRONLY
82 * POSIX.1 says that O_NONBLOCK means return -1 with 82 * POSIX.1 says that O_NONBLOCK means return -1 with
@@ -98,7 +98,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
98 } 98 }
99 break; 99 break;
100 100
101 case 3: 101 case FMODE_READ | FMODE_WRITE:
102 /* 102 /*
103 * O_RDWR 103 * O_RDWR
104 * POSIX.1 leaves this case "undefined" when O_NONBLOCK is set. 104 * POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
diff --git a/fs/file_table.c b/fs/file_table.c
index f45a4493f9e7..5ad0eca6eea2 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -161,7 +161,7 @@ EXPORT_SYMBOL(get_empty_filp);
161 * code should be moved into this function. 161 * code should be moved into this function.
162 */ 162 */
163struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry, 163struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
164 mode_t mode, const struct file_operations *fop) 164 fmode_t mode, const struct file_operations *fop)
165{ 165{
166 struct file *file; 166 struct file *file;
167 struct path; 167 struct path;
@@ -193,7 +193,7 @@ EXPORT_SYMBOL(alloc_file);
193 * of this should be moving to alloc_file(). 193 * of this should be moving to alloc_file().
194 */ 194 */
195int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry, 195int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
196 mode_t mode, const struct file_operations *fop) 196 fmode_t mode, const struct file_operations *fop)
197{ 197{
198 int error = 0; 198 int error = 0;
199 file->f_path.dentry = dentry; 199 file->f_path.dentry = dentry;
@@ -269,6 +269,10 @@ void __fput(struct file *file)
269 eventpoll_release(file); 269 eventpoll_release(file);
270 locks_remove_flock(file); 270 locks_remove_flock(file);
271 271
272 if (unlikely(file->f_flags & FASYNC)) {
273 if (file->f_op && file->f_op->fasync)
274 file->f_op->fasync(-1, file, 0);
275 }
272 if (file->f_op && file->f_op->release) 276 if (file->f_op && file->f_op->release)
273 file->f_op->release(inode, file); 277 file->f_op->release(inode, file);
274 security_file_free(file); 278 security_file_free(file);
diff --git a/fs/filesystems.c b/fs/filesystems.c
index f37f87262837..d0e20ced62dd 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -8,6 +8,8 @@
8 8
9#include <linux/syscalls.h> 9#include <linux/syscalls.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/proc_fs.h>
12#include <linux/seq_file.h>
11#include <linux/slab.h> 13#include <linux/slab.h>
12#include <linux/kmod.h> 14#include <linux/kmod.h>
13#include <linux/init.h> 15#include <linux/init.h>
@@ -214,6 +216,43 @@ int get_filesystem_list(char * buf)
214 return len; 216 return len;
215} 217}
216 218
219#ifdef CONFIG_PROC_FS
220static int filesystems_proc_show(struct seq_file *m, void *v)
221{
222 struct file_system_type * tmp;
223
224 read_lock(&file_systems_lock);
225 tmp = file_systems;
226 while (tmp) {
227 seq_printf(m, "%s\t%s\n",
228 (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
229 tmp->name);
230 tmp = tmp->next;
231 }
232 read_unlock(&file_systems_lock);
233 return 0;
234}
235
236static int filesystems_proc_open(struct inode *inode, struct file *file)
237{
238 return single_open(file, filesystems_proc_show, NULL);
239}
240
241static const struct file_operations filesystems_proc_fops = {
242 .open = filesystems_proc_open,
243 .read = seq_read,
244 .llseek = seq_lseek,
245 .release = single_release,
246};
247
248static int __init proc_filesystems_init(void)
249{
250 proc_create("filesystems", 0, NULL, &filesystems_proc_fops);
251 return 0;
252}
253module_init(proc_filesystems_init);
254#endif
255
217struct file_system_type *get_fs_type(const char *name) 256struct file_system_type *get_fs_type(const char *name)
218{ 257{
219 struct file_system_type *fs; 258 struct file_system_type *fs;
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 87250b6a8682..b72361479be2 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1056,7 +1056,6 @@ static int fuse_dev_release(struct inode *inode, struct file *file)
1056 end_requests(fc, &fc->pending); 1056 end_requests(fc, &fc->pending);
1057 end_requests(fc, &fc->processing); 1057 end_requests(fc, &fc->processing);
1058 spin_unlock(&fc->lock); 1058 spin_unlock(&fc->lock);
1059 fasync_helper(-1, file, 0, &fc->fasync);
1060 fuse_conn_put(fc); 1059 fuse_conn_put(fc);
1061 } 1060 }
1062 1061
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2bada6bbc317..34930a964b82 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -101,6 +101,8 @@ void fuse_finish_open(struct inode *inode, struct file *file,
101 file->f_op = &fuse_direct_io_file_operations; 101 file->f_op = &fuse_direct_io_file_operations;
102 if (!(outarg->open_flags & FOPEN_KEEP_CACHE)) 102 if (!(outarg->open_flags & FOPEN_KEEP_CACHE))
103 invalidate_inode_pages2(inode->i_mapping); 103 invalidate_inode_pages2(inode->i_mapping);
104 if (outarg->open_flags & FOPEN_NONSEEKABLE)
105 nonseekable_open(inode, file);
104 ff->fh = outarg->fh; 106 ff->fh = outarg->fh;
105 file->private_data = fuse_file_get(ff); 107 file->private_data = fuse_file_get(ff);
106} 108}
@@ -1448,6 +1450,9 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
1448 mutex_lock(&inode->i_mutex); 1450 mutex_lock(&inode->i_mutex);
1449 switch (origin) { 1451 switch (origin) {
1450 case SEEK_END: 1452 case SEEK_END:
1453 retval = fuse_update_attributes(inode, NULL, file, NULL);
1454 if (retval)
1455 return retval;
1451 offset += i_size_read(inode); 1456 offset += i_size_read(inode);
1452 break; 1457 break;
1453 case SEEK_CUR: 1458 case SEEK_CUR:
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 3a876076bdd1..35accfdd747f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -6,6 +6,9 @@
6 See the file COPYING. 6 See the file COPYING.
7*/ 7*/
8 8
9#ifndef _FS_FUSE_I_H
10#define _FS_FUSE_I_H
11
9#include <linux/fuse.h> 12#include <linux/fuse.h>
10#include <linux/fs.h> 13#include <linux/fs.h>
11#include <linux/mount.h> 14#include <linux/mount.h>
@@ -655,3 +658,5 @@ void fuse_set_nowrite(struct inode *inode);
655void fuse_release_nowrite(struct inode *inode); 658void fuse_release_nowrite(struct inode *inode);
656 659
657u64 fuse_get_attr_version(struct fuse_conn *fc); 660u64 fuse_get_attr_version(struct fuse_conn *fc);
661
662#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6a84388cacff..2e99f34b4435 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -596,12 +596,8 @@ static struct dentry *fuse_get_dentry(struct super_block *sb,
596 if (inode->i_generation != handle->generation) 596 if (inode->i_generation != handle->generation)
597 goto out_iput; 597 goto out_iput;
598 598
599 entry = d_alloc_anon(inode); 599 entry = d_obtain_alias(inode);
600 err = -ENOMEM; 600 if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) {
601 if (!entry)
602 goto out_iput;
603
604 if (get_node_id(inode) != FUSE_ROOT_ID) {
605 entry->d_op = &fuse_dentry_operations; 601 entry->d_op = &fuse_dentry_operations;
606 fuse_invalidate_entry_cache(entry); 602 fuse_invalidate_entry_cache(entry);
607 } 603 }
@@ -696,17 +692,14 @@ static struct dentry *fuse_get_parent(struct dentry *child)
696 name.name = ".."; 692 name.name = "..";
697 err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode), 693 err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
698 &name, &outarg, &inode); 694 &name, &outarg, &inode);
699 if (err && err != -ENOENT) 695 if (err) {
696 if (err == -ENOENT)
697 return ERR_PTR(-ESTALE);
700 return ERR_PTR(err); 698 return ERR_PTR(err);
701 if (err || !inode)
702 return ERR_PTR(-ESTALE);
703
704 parent = d_alloc_anon(inode);
705 if (!parent) {
706 iput(inode);
707 return ERR_PTR(-ENOMEM);
708 } 699 }
709 if (get_node_id(inode) != FUSE_ROOT_ID) { 700
701 parent = d_obtain_alias(inode);
702 if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) {
710 parent->d_op = &fuse_dentry_operations; 703 parent->d_op = &fuse_dentry_operations;
711 fuse_invalidate_entry_cache(parent); 704 fuse_invalidate_entry_cache(parent);
712 } 705 }
@@ -865,7 +858,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
865 if (is_bdev) { 858 if (is_bdev) {
866 fc->destroy_req = fuse_request_alloc(); 859 fc->destroy_req = fuse_request_alloc();
867 if (!fc->destroy_req) 860 if (!fc->destroy_req)
868 goto err_put_root; 861 goto err_free_init_req;
869 } 862 }
870 863
871 mutex_lock(&fuse_mutex); 864 mutex_lock(&fuse_mutex);
@@ -895,6 +888,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
895 888
896 err_unlock: 889 err_unlock:
897 mutex_unlock(&fuse_mutex); 890 mutex_unlock(&fuse_mutex);
891 err_free_init_req:
898 fuse_request_free(init_req); 892 fuse_request_free(init_req);
899 err_put_root: 893 err_put_root:
900 dput(root_dentry); 894 dput(root_dentry);
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index 9cda8536530c..bbb8c36403a9 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -130,28 +130,17 @@ static int gfs2_get_name(struct dentry *parent, char *name,
130static struct dentry *gfs2_get_parent(struct dentry *child) 130static struct dentry *gfs2_get_parent(struct dentry *child)
131{ 131{
132 struct qstr dotdot; 132 struct qstr dotdot;
133 struct inode *inode;
134 struct dentry *dentry; 133 struct dentry *dentry;
135 134
136 gfs2_str2qstr(&dotdot, "..");
137 inode = gfs2_lookupi(child->d_inode, &dotdot, 1);
138
139 if (!inode)
140 return ERR_PTR(-ENOENT);
141 /* 135 /*
142 * In case of an error, @inode carries the error value, and we 136 * XXX(hch): it would be a good idea to keep this around as a
143 * have to return that as a(n invalid) pointer to dentry. 137 * static variable.
144 */ 138 */
145 if (IS_ERR(inode)) 139 gfs2_str2qstr(&dotdot, "..");
146 return ERR_CAST(inode);
147
148 dentry = d_alloc_anon(inode);
149 if (!dentry) {
150 iput(inode);
151 return ERR_PTR(-ENOMEM);
152 }
153 140
154 dentry->d_op = &gfs2_dops; 141 dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &dotdot, 1));
142 if (!IS_ERR(dentry))
143 dentry->d_op = &gfs2_dops;
155 return dentry; 144 return dentry;
156} 145}
157 146
@@ -233,13 +222,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
233 gfs2_glock_dq_uninit(&i_gh); 222 gfs2_glock_dq_uninit(&i_gh);
234 223
235out_inode: 224out_inode:
236 dentry = d_alloc_anon(inode); 225 dentry = d_obtain_alias(inode);
237 if (!dentry) { 226 if (!IS_ERR(dentry))
238 iput(inode); 227 dentry->d_op = &gfs2_dops;
239 return ERR_PTR(-ENOMEM);
240 }
241
242 dentry->d_op = &gfs2_dops;
243 return dentry; 228 return dentry;
244 229
245fail_rgd: 230fail_rgd:
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 534e1e2c65ca..d232991b9046 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -69,7 +69,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
69 mark_inode_dirty(inode); 69 mark_inode_dirty(inode);
70 break; 70 break;
71 } else if (PTR_ERR(inode) != -EEXIST || 71 } else if (PTR_ERR(inode) != -EEXIST ||
72 (nd && (nd->intent.open.flags & O_EXCL))) { 72 (nd && nd->flags & LOOKUP_EXCL)) {
73 gfs2_holder_uninit(ghs); 73 gfs2_holder_uninit(ghs);
74 return PTR_ERR(inode); 74 return PTR_ERR(inode);
75 } 75 }
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 7e19835efa2e..c69b7ac75bf7 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -511,13 +511,6 @@ void hfs_clear_inode(struct inode *inode)
511 } 511 }
512} 512}
513 513
514static int hfs_permission(struct inode *inode, int mask)
515{
516 if (S_ISREG(inode->i_mode) && mask & MAY_EXEC)
517 return 0;
518 return generic_permission(inode, mask, NULL);
519}
520
521static int hfs_file_open(struct inode *inode, struct file *file) 514static int hfs_file_open(struct inode *inode, struct file *file)
522{ 515{
523 if (HFS_IS_RSRC(inode)) 516 if (HFS_IS_RSRC(inode))
@@ -616,7 +609,6 @@ static const struct inode_operations hfs_file_inode_operations = {
616 .lookup = hfs_file_lookup, 609 .lookup = hfs_file_lookup,
617 .truncate = hfs_file_truncate, 610 .truncate = hfs_file_truncate,
618 .setattr = hfs_inode_setattr, 611 .setattr = hfs_inode_setattr,
619 .permission = hfs_permission,
620 .setxattr = hfs_setxattr, 612 .setxattr = hfs_setxattr,
621 .getxattr = hfs_getxattr, 613 .getxattr = hfs_getxattr,
622 .listxattr = hfs_listxattr, 614 .listxattr = hfs_listxattr,
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index fec8f61227ff..0022eec63cda 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -199,6 +199,9 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
199 goto done; 199 goto done;
200 } 200 }
201 201
202 if (inode->i_ino == HFSPLUS_EXT_CNID)
203 return -EIO;
204
202 mutex_lock(&HFSPLUS_I(inode).extents_lock); 205 mutex_lock(&HFSPLUS_I(inode).extents_lock);
203 res = hfsplus_ext_read_extent(inode, ablock); 206 res = hfsplus_ext_read_extent(inode, ablock);
204 if (!res) { 207 if (!res) {
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index b085d64a2b67..b207f0e6fc22 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -238,22 +238,12 @@ static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms)
238 perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev); 238 perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev);
239} 239}
240 240
241static int hfsplus_permission(struct inode *inode, int mask)
242{
243 /* MAY_EXEC is also used for lookup, if no x bit is set allow lookup,
244 * open_exec has the same test, so it's still not executable, if a x bit
245 * is set fall back to standard permission check.
246 */
247 if (S_ISREG(inode->i_mode) && mask & MAY_EXEC && !(inode->i_mode & 0111))
248 return 0;
249 return generic_permission(inode, mask, NULL);
250}
251
252
253static int hfsplus_file_open(struct inode *inode, struct file *file) 241static int hfsplus_file_open(struct inode *inode, struct file *file)
254{ 242{
255 if (HFSPLUS_IS_RSRC(inode)) 243 if (HFSPLUS_IS_RSRC(inode))
256 inode = HFSPLUS_I(inode).rsrc_inode; 244 inode = HFSPLUS_I(inode).rsrc_inode;
245 if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
246 return -EOVERFLOW;
257 atomic_inc(&HFSPLUS_I(inode).opencnt); 247 atomic_inc(&HFSPLUS_I(inode).opencnt);
258 return 0; 248 return 0;
259} 249}
@@ -279,7 +269,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
279static const struct inode_operations hfsplus_file_inode_operations = { 269static const struct inode_operations hfsplus_file_inode_operations = {
280 .lookup = hfsplus_file_lookup, 270 .lookup = hfsplus_file_lookup,
281 .truncate = hfsplus_file_truncate, 271 .truncate = hfsplus_file_truncate,
282 .permission = hfsplus_permission,
283 .setxattr = hfsplus_setxattr, 272 .setxattr = hfsplus_setxattr,
284 .getxattr = hfsplus_getxattr, 273 .getxattr = hfsplus_getxattr,
285 .listxattr = hfsplus_listxattr, 274 .listxattr = hfsplus_listxattr,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index d6ecabf4d231..7f34f4385de0 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -20,7 +20,7 @@
20struct hostfs_inode_info { 20struct hostfs_inode_info {
21 char *host_filename; 21 char *host_filename;
22 int fd; 22 int fd;
23 int mode; 23 fmode_t mode;
24 struct inode vfs_inode; 24 struct inode vfs_inode;
25}; 25};
26 26
@@ -373,7 +373,8 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
373int hostfs_file_open(struct inode *ino, struct file *file) 373int hostfs_file_open(struct inode *ino, struct file *file)
374{ 374{
375 char *name; 375 char *name;
376 int mode = 0, r = 0, w = 0, fd; 376 fmode_t mode = 0;
377 int r = 0, w = 0, fd;
377 378
378 mode = file->f_mode & (FMODE_READ | FMODE_WRITE); 379 mode = file->f_mode & (FMODE_READ | FMODE_WRITE);
379 if ((mode & HOSTFS_I(ino)->mode) == mode) 380 if ((mode & HOSTFS_I(ino)->mode) == mode)
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index be8be5040e07..64ab52259204 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -143,5 +143,5 @@ const struct file_operations hpfs_file_ops =
143const struct inode_operations hpfs_file_iops = 143const struct inode_operations hpfs_file_iops =
144{ 144{
145 .truncate = hpfs_truncate, 145 .truncate = hpfs_truncate,
146 .setattr = hpfs_notify_change, 146 .setattr = hpfs_setattr,
147}; 147};
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 42ff60ccf2a9..c2ea31bae313 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -275,7 +275,7 @@ void hpfs_init_inode(struct inode *);
275void hpfs_read_inode(struct inode *); 275void hpfs_read_inode(struct inode *);
276void hpfs_write_inode(struct inode *); 276void hpfs_write_inode(struct inode *);
277void hpfs_write_inode_nolock(struct inode *); 277void hpfs_write_inode_nolock(struct inode *);
278int hpfs_notify_change(struct dentry *, struct iattr *); 278int hpfs_setattr(struct dentry *, struct iattr *);
279void hpfs_write_if_changed(struct inode *); 279void hpfs_write_if_changed(struct inode *);
280void hpfs_delete_inode(struct inode *); 280void hpfs_delete_inode(struct inode *);
281 281
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 85d3e1d9ac00..39a1bfbea312 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -260,19 +260,28 @@ void hpfs_write_inode_nolock(struct inode *i)
260 brelse(bh); 260 brelse(bh);
261} 261}
262 262
263int hpfs_notify_change(struct dentry *dentry, struct iattr *attr) 263int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
264{ 264{
265 struct inode *inode = dentry->d_inode; 265 struct inode *inode = dentry->d_inode;
266 int error=0; 266 int error = -EINVAL;
267
267 lock_kernel(); 268 lock_kernel();
268 if ( ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size) || 269 if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root)
269 (hpfs_sb(inode->i_sb)->sb_root == inode->i_ino) ) { 270 goto out_unlock;
270 error = -EINVAL; 271 if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size)
271 } else if ((error = inode_change_ok(inode, attr))) { 272 goto out_unlock;
272 } else if ((error = inode_setattr(inode, attr))) { 273
273 } else { 274 error = inode_change_ok(inode, attr);
274 hpfs_write_inode(inode); 275 if (error)
275 } 276 goto out_unlock;
277
278 error = inode_setattr(inode, attr);
279 if (error)
280 goto out_unlock;
281
282 hpfs_write_inode(inode);
283
284 out_unlock:
276 unlock_kernel(); 285 unlock_kernel();
277 return error; 286 return error;
278} 287}
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index d9c59a775449..10783f3d265a 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -669,5 +669,5 @@ const struct inode_operations hpfs_dir_iops =
669 .rmdir = hpfs_rmdir, 669 .rmdir = hpfs_rmdir,
670 .mknod = hpfs_mknod, 670 .mknod = hpfs_mknod,
671 .rename = hpfs_rename, 671 .rename = hpfs_rename,
672 .setattr = hpfs_notify_change, 672 .setattr = hpfs_setattr,
673}; 673};
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index d85c7d931cdf..d367e9b92862 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -537,9 +537,6 @@ static int inotify_release(struct inode *ignored, struct file *file)
537 inotify_dev_event_dequeue(dev); 537 inotify_dev_event_dequeue(dev);
538 mutex_unlock(&dev->ev_mutex); 538 mutex_unlock(&dev->ev_mutex);
539 539
540 if (file->f_flags & FASYNC)
541 inotify_fasync(-1, file, 0);
542
543 /* free this device: the put matching the get in inotify_init() */ 540 /* free this device: the put matching the get in inotify_init() */
544 put_inotify_dev(dev); 541 put_inotify_dev(dev);
545 542
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index bb219138331a..e81a30593ba9 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -22,7 +22,7 @@ isofs_export_iget(struct super_block *sb,
22 __u32 generation) 22 __u32 generation)
23{ 23{
24 struct inode *inode; 24 struct inode *inode;
25 struct dentry *result; 25
26 if (block == 0) 26 if (block == 0)
27 return ERR_PTR(-ESTALE); 27 return ERR_PTR(-ESTALE);
28 inode = isofs_iget(sb, block, offset); 28 inode = isofs_iget(sb, block, offset);
@@ -32,12 +32,7 @@ isofs_export_iget(struct super_block *sb,
32 iput(inode); 32 iput(inode);
33 return ERR_PTR(-ESTALE); 33 return ERR_PTR(-ESTALE);
34 } 34 }
35 result = d_alloc_anon(inode); 35 return d_obtain_alias(inode);
36 if (!result) {
37 iput(inode);
38 return ERR_PTR(-ENOMEM);
39 }
40 return result;
41} 36}
42 37
43/* This function is surprisingly simple. The trick is understanding 38/* This function is surprisingly simple. The trick is understanding
@@ -51,7 +46,6 @@ static struct dentry *isofs_export_get_parent(struct dentry *child)
51 unsigned long parent_offset = 0; 46 unsigned long parent_offset = 0;
52 struct inode *child_inode = child->d_inode; 47 struct inode *child_inode = child->d_inode;
53 struct iso_inode_info *e_child_inode = ISOFS_I(child_inode); 48 struct iso_inode_info *e_child_inode = ISOFS_I(child_inode);
54 struct inode *parent_inode = NULL;
55 struct iso_directory_record *de = NULL; 49 struct iso_directory_record *de = NULL;
56 struct buffer_head * bh = NULL; 50 struct buffer_head * bh = NULL;
57 struct dentry *rv = NULL; 51 struct dentry *rv = NULL;
@@ -104,28 +98,11 @@ static struct dentry *isofs_export_get_parent(struct dentry *child)
104 /* Normalize */ 98 /* Normalize */
105 isofs_normalize_block_and_offset(de, &parent_block, &parent_offset); 99 isofs_normalize_block_and_offset(de, &parent_block, &parent_offset);
106 100
107 /* Get the inode. */ 101 rv = d_obtain_alias(isofs_iget(child_inode->i_sb, parent_block,
108 parent_inode = isofs_iget(child_inode->i_sb, 102 parent_offset));
109 parent_block,
110 parent_offset);
111 if (IS_ERR(parent_inode)) {
112 rv = ERR_CAST(parent_inode);
113 if (rv != ERR_PTR(-ENOMEM))
114 rv = ERR_PTR(-EACCES);
115 goto out;
116 }
117
118 /* Allocate the dentry. */
119 rv = d_alloc_anon(parent_inode);
120 if (rv == NULL) {
121 rv = ERR_PTR(-ENOMEM);
122 goto out;
123 }
124
125 out: 103 out:
126 if (bh) { 104 if (bh)
127 brelse(bh); 105 brelse(bh);
128 }
129 return rv; 106 return rv;
130} 107}
131 108
diff --git a/fs/jbd/Kconfig b/fs/jbd/Kconfig
new file mode 100644
index 000000000000..4e28beeed157
--- /dev/null
+++ b/fs/jbd/Kconfig
@@ -0,0 +1,30 @@
1config JBD
2 tristate
3 help
4 This is a generic journalling layer for block devices. It is
5 currently used by the ext3 file system, but it could also be
6 used to add journal support to other file systems or block
7 devices such as RAID or LVM.
8
9 If you are using the ext3 file system, you need to say Y here.
10 If you are not using ext3 then you will probably want to say N.
11
12 To compile this device as a module, choose M here: the module will be
13 called jbd. If you are compiling ext3 into the kernel, you
14 cannot compile this code as a module.
15
16config JBD_DEBUG
17 bool "JBD (ext3) debugging support"
18 depends on JBD && DEBUG_FS
19 help
20 If you are using the ext3 journaled file system (or potentially any
21 other file system/device using JBD), this option allows you to
22 enable debugging output while the system is running, in order to
23 help track down any problems you are having. By default the
24 debugging output will be turned off.
25
26 If you select Y here, then you will be able to turn on debugging
27 with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a
28 number between 1 and 5, the higher the number, the more debugging
29 output is generated. To turn debugging off again, do
30 "echo 0 > /sys/kernel/debug/jbd/jbd-debug".
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index a5432bbbfb88..61f32f3868cd 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -93,7 +93,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
93 int ret = 0; 93 int ret = 0;
94 struct buffer_head *bh = jh2bh(jh); 94 struct buffer_head *bh = jh2bh(jh);
95 95
96 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { 96 if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
97 !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
97 JBUFFER_TRACE(jh, "remove from checkpoint list"); 98 JBUFFER_TRACE(jh, "remove from checkpoint list");
98 ret = __journal_remove_checkpoint(jh) + 1; 99 ret = __journal_remove_checkpoint(jh) + 1;
99 jbd_unlock_bh_state(bh); 100 jbd_unlock_bh_state(bh);
@@ -114,7 +115,7 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
114 */ 115 */
115void __log_wait_for_space(journal_t *journal) 116void __log_wait_for_space(journal_t *journal)
116{ 117{
117 int nblocks; 118 int nblocks, space_left;
118 assert_spin_locked(&journal->j_state_lock); 119 assert_spin_locked(&journal->j_state_lock);
119 120
120 nblocks = jbd_space_needed(journal); 121 nblocks = jbd_space_needed(journal);
@@ -126,14 +127,46 @@ void __log_wait_for_space(journal_t *journal)
126 127
127 /* 128 /*
128 * Test again, another process may have checkpointed while we 129 * Test again, another process may have checkpointed while we
129 * were waiting for the checkpoint lock 130 * were waiting for the checkpoint lock. If there are no
131 * transactions ready to be checkpointed, try to recover
132 * journal space by calling cleanup_journal_tail(), and if
133 * that doesn't work, by waiting for the currently committing
134 * transaction to complete. If there is absolutely no way
135 * to make progress, this is either a BUG or corrupted
136 * filesystem, so abort the journal and leave a stack
137 * trace for forensic evidence.
130 */ 138 */
131 spin_lock(&journal->j_state_lock); 139 spin_lock(&journal->j_state_lock);
140 spin_lock(&journal->j_list_lock);
132 nblocks = jbd_space_needed(journal); 141 nblocks = jbd_space_needed(journal);
133 if (__log_space_left(journal) < nblocks) { 142 space_left = __log_space_left(journal);
143 if (space_left < nblocks) {
144 int chkpt = journal->j_checkpoint_transactions != NULL;
145 tid_t tid = 0;
146
147 if (journal->j_committing_transaction)
148 tid = journal->j_committing_transaction->t_tid;
149 spin_unlock(&journal->j_list_lock);
134 spin_unlock(&journal->j_state_lock); 150 spin_unlock(&journal->j_state_lock);
135 log_do_checkpoint(journal); 151 if (chkpt) {
152 log_do_checkpoint(journal);
153 } else if (cleanup_journal_tail(journal) == 0) {
154 /* We were able to recover space; yay! */
155 ;
156 } else if (tid) {
157 log_wait_commit(journal, tid);
158 } else {
159 printk(KERN_ERR "%s: needed %d blocks and "
160 "only had %d space available\n",
161 __func__, nblocks, space_left);
162 printk(KERN_ERR "%s: no way to get more "
163 "journal space\n", __func__);
164 WARN_ON(1);
165 journal_abort(journal, 0);
166 }
136 spin_lock(&journal->j_state_lock); 167 spin_lock(&journal->j_state_lock);
168 } else {
169 spin_unlock(&journal->j_list_lock);
137 } 170 }
138 mutex_unlock(&journal->j_checkpoint_mutex); 171 mutex_unlock(&journal->j_checkpoint_mutex);
139 } 172 }
@@ -160,21 +193,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
160 * buffers. Note that we take the buffers in the opposite ordering 193 * buffers. Note that we take the buffers in the opposite ordering
161 * from the one in which they were submitted for IO. 194 * from the one in which they were submitted for IO.
162 * 195 *
196 * Return 0 on success, and return <0 if some buffers have failed
197 * to be written out.
198 *
163 * Called with j_list_lock held. 199 * Called with j_list_lock held.
164 */ 200 */
165static void __wait_cp_io(journal_t *journal, transaction_t *transaction) 201static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
166{ 202{
167 struct journal_head *jh; 203 struct journal_head *jh;
168 struct buffer_head *bh; 204 struct buffer_head *bh;
169 tid_t this_tid; 205 tid_t this_tid;
170 int released = 0; 206 int released = 0;
207 int ret = 0;
171 208
172 this_tid = transaction->t_tid; 209 this_tid = transaction->t_tid;
173restart: 210restart:
174 /* Did somebody clean up the transaction in the meanwhile? */ 211 /* Did somebody clean up the transaction in the meanwhile? */
175 if (journal->j_checkpoint_transactions != transaction || 212 if (journal->j_checkpoint_transactions != transaction ||
176 transaction->t_tid != this_tid) 213 transaction->t_tid != this_tid)
177 return; 214 return ret;
178 while (!released && transaction->t_checkpoint_io_list) { 215 while (!released && transaction->t_checkpoint_io_list) {
179 jh = transaction->t_checkpoint_io_list; 216 jh = transaction->t_checkpoint_io_list;
180 bh = jh2bh(jh); 217 bh = jh2bh(jh);
@@ -194,6 +231,9 @@ restart:
194 spin_lock(&journal->j_list_lock); 231 spin_lock(&journal->j_list_lock);
195 goto restart; 232 goto restart;
196 } 233 }
234 if (unlikely(buffer_write_io_error(bh)))
235 ret = -EIO;
236
197 /* 237 /*
198 * Now in whatever state the buffer currently is, we know that 238 * Now in whatever state the buffer currently is, we know that
199 * it has been written out and so we can drop it from the list 239 * it has been written out and so we can drop it from the list
@@ -203,6 +243,8 @@ restart:
203 journal_remove_journal_head(bh); 243 journal_remove_journal_head(bh);
204 __brelse(bh); 244 __brelse(bh);
205 } 245 }
246
247 return ret;
206} 248}
207 249
208#define NR_BATCH 64 250#define NR_BATCH 64
@@ -226,7 +268,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
226 * Try to flush one buffer from the checkpoint list to disk. 268 * Try to flush one buffer from the checkpoint list to disk.
227 * 269 *
228 * Return 1 if something happened which requires us to abort the current 270 * Return 1 if something happened which requires us to abort the current
229 * scan of the checkpoint list. 271 * scan of the checkpoint list. Return <0 if the buffer has failed to
272 * be written out.
230 * 273 *
231 * Called with j_list_lock held and drops it if 1 is returned 274 * Called with j_list_lock held and drops it if 1 is returned
232 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 275 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
@@ -256,6 +299,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
256 log_wait_commit(journal, tid); 299 log_wait_commit(journal, tid);
257 ret = 1; 300 ret = 1;
258 } else if (!buffer_dirty(bh)) { 301 } else if (!buffer_dirty(bh)) {
302 ret = 1;
303 if (unlikely(buffer_write_io_error(bh)))
304 ret = -EIO;
259 J_ASSERT_JH(jh, !buffer_jbddirty(bh)); 305 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
260 BUFFER_TRACE(bh, "remove from checkpoint"); 306 BUFFER_TRACE(bh, "remove from checkpoint");
261 __journal_remove_checkpoint(jh); 307 __journal_remove_checkpoint(jh);
@@ -263,7 +309,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
263 jbd_unlock_bh_state(bh); 309 jbd_unlock_bh_state(bh);
264 journal_remove_journal_head(bh); 310 journal_remove_journal_head(bh);
265 __brelse(bh); 311 __brelse(bh);
266 ret = 1;
267 } else { 312 } else {
268 /* 313 /*
269 * Important: we are about to write the buffer, and 314 * Important: we are about to write the buffer, and
@@ -295,6 +340,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
295 * to disk. We submit larger chunks of data at once. 340 * to disk. We submit larger chunks of data at once.
296 * 341 *
297 * The journal should be locked before calling this function. 342 * The journal should be locked before calling this function.
343 * Called with j_checkpoint_mutex held.
298 */ 344 */
299int log_do_checkpoint(journal_t *journal) 345int log_do_checkpoint(journal_t *journal)
300{ 346{
@@ -318,6 +364,7 @@ int log_do_checkpoint(journal_t *journal)
318 * OK, we need to start writing disk blocks. Take one transaction 364 * OK, we need to start writing disk blocks. Take one transaction
319 * and write it. 365 * and write it.
320 */ 366 */
367 result = 0;
321 spin_lock(&journal->j_list_lock); 368 spin_lock(&journal->j_list_lock);
322 if (!journal->j_checkpoint_transactions) 369 if (!journal->j_checkpoint_transactions)
323 goto out; 370 goto out;
@@ -334,7 +381,7 @@ restart:
334 int batch_count = 0; 381 int batch_count = 0;
335 struct buffer_head *bhs[NR_BATCH]; 382 struct buffer_head *bhs[NR_BATCH];
336 struct journal_head *jh; 383 struct journal_head *jh;
337 int retry = 0; 384 int retry = 0, err;
338 385
339 while (!retry && transaction->t_checkpoint_list) { 386 while (!retry && transaction->t_checkpoint_list) {
340 struct buffer_head *bh; 387 struct buffer_head *bh;
@@ -347,6 +394,8 @@ restart:
347 break; 394 break;
348 } 395 }
349 retry = __process_buffer(journal, jh, bhs,&batch_count); 396 retry = __process_buffer(journal, jh, bhs,&batch_count);
397 if (retry < 0 && !result)
398 result = retry;
350 if (!retry && (need_resched() || 399 if (!retry && (need_resched() ||
351 spin_needbreak(&journal->j_list_lock))) { 400 spin_needbreak(&journal->j_list_lock))) {
352 spin_unlock(&journal->j_list_lock); 401 spin_unlock(&journal->j_list_lock);
@@ -371,14 +420,18 @@ restart:
371 * Now we have cleaned up the first transaction's checkpoint 420 * Now we have cleaned up the first transaction's checkpoint
372 * list. Let's clean up the second one 421 * list. Let's clean up the second one
373 */ 422 */
374 __wait_cp_io(journal, transaction); 423 err = __wait_cp_io(journal, transaction);
424 if (!result)
425 result = err;
375 } 426 }
376out: 427out:
377 spin_unlock(&journal->j_list_lock); 428 spin_unlock(&journal->j_list_lock);
378 result = cleanup_journal_tail(journal);
379 if (result < 0) 429 if (result < 0)
380 return result; 430 journal_abort(journal, result);
381 return 0; 431 else
432 result = cleanup_journal_tail(journal);
433
434 return (result < 0) ? result : 0;
382} 435}
383 436
384/* 437/*
@@ -394,8 +447,9 @@ out:
394 * This is the only part of the journaling code which really needs to be 447 * This is the only part of the journaling code which really needs to be
395 * aware of transaction aborts. Checkpointing involves writing to the 448 * aware of transaction aborts. Checkpointing involves writing to the
396 * main filesystem area rather than to the journal, so it can proceed 449 * main filesystem area rather than to the journal, so it can proceed
397 * even in abort state, but we must not update the journal superblock if 450 * even in abort state, but we must not update the super block if
398 * we have an abort error outstanding. 451 * checkpointing may have failed. Otherwise, we would lose some metadata
452 * buffers which should be written-back to the filesystem.
399 */ 453 */
400 454
401int cleanup_journal_tail(journal_t *journal) 455int cleanup_journal_tail(journal_t *journal)
@@ -404,6 +458,9 @@ int cleanup_journal_tail(journal_t *journal)
404 tid_t first_tid; 458 tid_t first_tid;
405 unsigned long blocknr, freed; 459 unsigned long blocknr, freed;
406 460
461 if (is_journal_aborted(journal))
462 return 1;
463
407 /* OK, work out the oldest transaction remaining in the log, and 464 /* OK, work out the oldest transaction remaining in the log, and
408 * the log block it starts at. 465 * the log block it starts at.
409 * 466 *
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index ae08c057e751..25719d902c51 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -482,6 +482,8 @@ void journal_commit_transaction(journal_t *journal)
482 printk(KERN_WARNING 482 printk(KERN_WARNING
483 "JBD: Detected IO errors while flushing file data " 483 "JBD: Detected IO errors while flushing file data "
484 "on %s\n", bdevname(journal->j_fs_dev, b)); 484 "on %s\n", bdevname(journal->j_fs_dev, b));
485 if (journal->j_flags & JFS_ABORT_ON_SYNCDATA_ERR)
486 journal_abort(journal, err);
485 err = 0; 487 err = 0;
486 } 488 }
487 489
@@ -518,9 +520,10 @@ void journal_commit_transaction(journal_t *journal)
518 jh = commit_transaction->t_buffers; 520 jh = commit_transaction->t_buffers;
519 521
520 /* If we're in abort mode, we just un-journal the buffer and 522 /* If we're in abort mode, we just un-journal the buffer and
521 release it for background writing. */ 523 release it. */
522 524
523 if (is_journal_aborted(journal)) { 525 if (is_journal_aborted(journal)) {
526 clear_buffer_jbddirty(jh2bh(jh));
524 JBUFFER_TRACE(jh, "journal is aborting: refile"); 527 JBUFFER_TRACE(jh, "journal is aborting: refile");
525 journal_refile_buffer(journal, jh); 528 journal_refile_buffer(journal, jh);
526 /* If that was the last one, we need to clean up 529 /* If that was the last one, we need to clean up
@@ -762,6 +765,9 @@ wait_for_iobuf:
762 /* AKPM: bforget here */ 765 /* AKPM: bforget here */
763 } 766 }
764 767
768 if (err)
769 journal_abort(journal, err);
770
765 jbd_debug(3, "JBD: commit phase 6\n"); 771 jbd_debug(3, "JBD: commit phase 6\n");
766 772
767 if (journal_write_commit_record(journal, commit_transaction)) 773 if (journal_write_commit_record(journal, commit_transaction))
@@ -852,6 +858,8 @@ restart_loop:
852 if (buffer_jbddirty(bh)) { 858 if (buffer_jbddirty(bh)) {
853 JBUFFER_TRACE(jh, "add to new checkpointing trans"); 859 JBUFFER_TRACE(jh, "add to new checkpointing trans");
854 __journal_insert_checkpoint(jh, commit_transaction); 860 __journal_insert_checkpoint(jh, commit_transaction);
861 if (is_journal_aborted(journal))
862 clear_buffer_jbddirty(bh);
855 JBUFFER_TRACE(jh, "refile for checkpoint writeback"); 863 JBUFFER_TRACE(jh, "refile for checkpoint writeback");
856 __journal_refile_buffer(jh); 864 __journal_refile_buffer(jh);
857 jbd_unlock_bh_state(bh); 865 jbd_unlock_bh_state(bh);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index aa7143a8349b..9e4fa52d7dc8 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1121,9 +1121,12 @@ recovery_error:
1121 * 1121 *
1122 * Release a journal_t structure once it is no longer in use by the 1122 * Release a journal_t structure once it is no longer in use by the
1123 * journaled object. 1123 * journaled object.
1124 * Return <0 if we couldn't clean up the journal.
1124 */ 1125 */
1125void journal_destroy(journal_t *journal) 1126int journal_destroy(journal_t *journal)
1126{ 1127{
1128 int err = 0;
1129
1127 /* Wait for the commit thread to wake up and die. */ 1130 /* Wait for the commit thread to wake up and die. */
1128 journal_kill_thread(journal); 1131 journal_kill_thread(journal);
1129 1132
@@ -1146,11 +1149,16 @@ void journal_destroy(journal_t *journal)
1146 J_ASSERT(journal->j_checkpoint_transactions == NULL); 1149 J_ASSERT(journal->j_checkpoint_transactions == NULL);
1147 spin_unlock(&journal->j_list_lock); 1150 spin_unlock(&journal->j_list_lock);
1148 1151
1149 /* We can now mark the journal as empty. */
1150 journal->j_tail = 0;
1151 journal->j_tail_sequence = ++journal->j_transaction_sequence;
1152 if (journal->j_sb_buffer) { 1152 if (journal->j_sb_buffer) {
1153 journal_update_superblock(journal, 1); 1153 if (!is_journal_aborted(journal)) {
1154 /* We can now mark the journal as empty. */
1155 journal->j_tail = 0;
1156 journal->j_tail_sequence =
1157 ++journal->j_transaction_sequence;
1158 journal_update_superblock(journal, 1);
1159 } else {
1160 err = -EIO;
1161 }
1154 brelse(journal->j_sb_buffer); 1162 brelse(journal->j_sb_buffer);
1155 } 1163 }
1156 1164
@@ -1160,6 +1168,8 @@ void journal_destroy(journal_t *journal)
1160 journal_destroy_revoke(journal); 1168 journal_destroy_revoke(journal);
1161 kfree(journal->j_wbuf); 1169 kfree(journal->j_wbuf);
1162 kfree(journal); 1170 kfree(journal);
1171
1172 return err;
1163} 1173}
1164 1174
1165 1175
@@ -1359,10 +1369,16 @@ int journal_flush(journal_t *journal)
1359 spin_lock(&journal->j_list_lock); 1369 spin_lock(&journal->j_list_lock);
1360 while (!err && journal->j_checkpoint_transactions != NULL) { 1370 while (!err && journal->j_checkpoint_transactions != NULL) {
1361 spin_unlock(&journal->j_list_lock); 1371 spin_unlock(&journal->j_list_lock);
1372 mutex_lock(&journal->j_checkpoint_mutex);
1362 err = log_do_checkpoint(journal); 1373 err = log_do_checkpoint(journal);
1374 mutex_unlock(&journal->j_checkpoint_mutex);
1363 spin_lock(&journal->j_list_lock); 1375 spin_lock(&journal->j_list_lock);
1364 } 1376 }
1365 spin_unlock(&journal->j_list_lock); 1377 spin_unlock(&journal->j_list_lock);
1378
1379 if (is_journal_aborted(journal))
1380 return -EIO;
1381
1366 cleanup_journal_tail(journal); 1382 cleanup_journal_tail(journal);
1367 1383
1368 /* Finally, mark the journal as really needing no recovery. 1384 /* Finally, mark the journal as really needing no recovery.
@@ -1384,7 +1400,7 @@ int journal_flush(journal_t *journal)
1384 J_ASSERT(journal->j_head == journal->j_tail); 1400 J_ASSERT(journal->j_head == journal->j_tail);
1385 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); 1401 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
1386 spin_unlock(&journal->j_state_lock); 1402 spin_unlock(&journal->j_state_lock);
1387 return err; 1403 return 0;
1388} 1404}
1389 1405
1390/** 1406/**
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 43bc5e5ed064..db5e982c5ddf 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -223,7 +223,7 @@ do { \
223 */ 223 */
224int journal_recover(journal_t *journal) 224int journal_recover(journal_t *journal)
225{ 225{
226 int err; 226 int err, err2;
227 journal_superblock_t * sb; 227 journal_superblock_t * sb;
228 228
229 struct recovery_info info; 229 struct recovery_info info;
@@ -261,7 +261,10 @@ int journal_recover(journal_t *journal)
261 journal->j_transaction_sequence = ++info.end_transaction; 261 journal->j_transaction_sequence = ++info.end_transaction;
262 262
263 journal_clear_revoke(journal); 263 journal_clear_revoke(journal);
264 sync_blockdev(journal->j_fs_dev); 264 err2 = sync_blockdev(journal->j_fs_dev);
265 if (!err)
266 err = err2;
267
265 return err; 268 return err;
266} 269}
267 270
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 0540ca27a446..60d4c32c8808 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -860,7 +860,6 @@ out:
860 * int journal_get_undo_access() - Notify intent to modify metadata with non-rewindable consequences 860 * int journal_get_undo_access() - Notify intent to modify metadata with non-rewindable consequences
861 * @handle: transaction 861 * @handle: transaction
862 * @bh: buffer to undo 862 * @bh: buffer to undo
863 * @credits: store the number of taken credits here (if not NULL)
864 * 863 *
865 * Sometimes there is a need to distinguish between metadata which has 864 * Sometimes there is a need to distinguish between metadata which has
866 * been committed to disk and that which has not. The ext3fs code uses 865 * been committed to disk and that which has not. The ext3fs code uses
@@ -954,9 +953,10 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
954 journal_t *journal = handle->h_transaction->t_journal; 953 journal_t *journal = handle->h_transaction->t_journal;
955 int need_brelse = 0; 954 int need_brelse = 0;
956 struct journal_head *jh; 955 struct journal_head *jh;
956 int ret = 0;
957 957
958 if (is_handle_aborted(handle)) 958 if (is_handle_aborted(handle))
959 return 0; 959 return ret;
960 960
961 jh = journal_add_journal_head(bh); 961 jh = journal_add_journal_head(bh);
962 JBUFFER_TRACE(jh, "entry"); 962 JBUFFER_TRACE(jh, "entry");
@@ -1067,7 +1067,16 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1067 time if it is redirtied */ 1067 time if it is redirtied */
1068 } 1068 }
1069 1069
1070 /* journal_clean_data_list() may have got there first */ 1070 /*
1071 * We cannot remove the buffer with io error from the
1072 * committing transaction, because otherwise it would
1073 * miss the error and the commit would not abort.
1074 */
1075 if (unlikely(!buffer_uptodate(bh))) {
1076 ret = -EIO;
1077 goto no_journal;
1078 }
1079
1071 if (jh->b_transaction != NULL) { 1080 if (jh->b_transaction != NULL) {
1072 JBUFFER_TRACE(jh, "unfile from commit"); 1081 JBUFFER_TRACE(jh, "unfile from commit");
1073 __journal_temp_unlink_buffer(jh); 1082 __journal_temp_unlink_buffer(jh);
@@ -1108,7 +1117,7 @@ no_journal:
1108 } 1117 }
1109 JBUFFER_TRACE(jh, "exit"); 1118 JBUFFER_TRACE(jh, "exit");
1110 journal_put_journal_head(jh); 1119 journal_put_journal_head(jh);
1111 return 0; 1120 return ret;
1112} 1121}
1113 1122
1114/** 1123/**
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig
new file mode 100644
index 000000000000..f32f346f4b0a
--- /dev/null
+++ b/fs/jbd2/Kconfig
@@ -0,0 +1,33 @@
1config JBD2
2 tristate
3 select CRC32
4 help
5 This is a generic journaling layer for block devices that support
6 both 32-bit and 64-bit block numbers. It is currently used by
7 the ext4 and OCFS2 filesystems, but it could also be used to add
8 journal support to other file systems or block devices such
9 as RAID or LVM.
10
11 If you are using ext4 or OCFS2, you need to say Y here.
12 If you are not using ext4 or OCFS2 then you will
13 probably want to say N.
14
15 To compile this device as a module, choose M here. The module will be
16 called jbd2. If you are compiling ext4 or OCFS2 into the kernel,
17 you cannot compile this code as a module.
18
19config JBD2_DEBUG
20 bool "JBD2 (ext4) debugging support"
21 depends on JBD2 && DEBUG_FS
22 help
23 If you are using the ext4 journaled file system (or
24 potentially any other filesystem/device using JBD2), this option
25 allows you to enable debugging output while the system is running,
26 in order to help track down any problems you are having.
27 By default, the debugging output will be turned off.
28
29 If you select Y here, then you will be able to turn on debugging
30 with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a
31 number between 1 and 5. The higher the number, the more debugging
32 output is generated. To turn debugging off again, do
33 "echo 0 > /sys/kernel/debug/jbd2/jbd2-debug".
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 9203c3332f17..9497718fe920 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -116,7 +116,7 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
116 */ 116 */
117void __jbd2_log_wait_for_space(journal_t *journal) 117void __jbd2_log_wait_for_space(journal_t *journal)
118{ 118{
119 int nblocks; 119 int nblocks, space_left;
120 assert_spin_locked(&journal->j_state_lock); 120 assert_spin_locked(&journal->j_state_lock);
121 121
122 nblocks = jbd_space_needed(journal); 122 nblocks = jbd_space_needed(journal);
@@ -129,25 +129,43 @@ void __jbd2_log_wait_for_space(journal_t *journal)
129 /* 129 /*
130 * Test again, another process may have checkpointed while we 130 * Test again, another process may have checkpointed while we
131 * were waiting for the checkpoint lock. If there are no 131 * were waiting for the checkpoint lock. If there are no
132 * outstanding transactions there is nothing to checkpoint and 132 * transactions ready to be checkpointed, try to recover
133 * we can't make progress. Abort the journal in this case. 133 * journal space by calling cleanup_journal_tail(), and if
134 * that doesn't work, by waiting for the currently committing
135 * transaction to complete. If there is absolutely no way
136 * to make progress, this is either a BUG or corrupted
137 * filesystem, so abort the journal and leave a stack
138 * trace for forensic evidence.
134 */ 139 */
135 spin_lock(&journal->j_state_lock); 140 spin_lock(&journal->j_state_lock);
136 spin_lock(&journal->j_list_lock); 141 spin_lock(&journal->j_list_lock);
137 nblocks = jbd_space_needed(journal); 142 nblocks = jbd_space_needed(journal);
138 if (__jbd2_log_space_left(journal) < nblocks) { 143 space_left = __jbd2_log_space_left(journal);
144 if (space_left < nblocks) {
139 int chkpt = journal->j_checkpoint_transactions != NULL; 145 int chkpt = journal->j_checkpoint_transactions != NULL;
146 tid_t tid = 0;
140 147
148 if (journal->j_committing_transaction)
149 tid = journal->j_committing_transaction->t_tid;
141 spin_unlock(&journal->j_list_lock); 150 spin_unlock(&journal->j_list_lock);
142 spin_unlock(&journal->j_state_lock); 151 spin_unlock(&journal->j_state_lock);
143 if (chkpt) { 152 if (chkpt) {
144 jbd2_log_do_checkpoint(journal); 153 jbd2_log_do_checkpoint(journal);
154 } else if (jbd2_cleanup_journal_tail(journal) == 0) {
155 /* We were able to recover space; yay! */
156 ;
157 } else if (tid) {
158 jbd2_log_wait_commit(journal, tid);
145 } else { 159 } else {
146 printk(KERN_ERR "%s: no transactions\n", 160 printk(KERN_ERR "%s: needed %d blocks and "
147 __func__); 161 "only had %d space available\n",
162 __func__, nblocks, space_left);
163 printk(KERN_ERR "%s: no way to get more "
164 "journal space in %s\n", __func__,
165 journal->j_devname);
166 WARN_ON(1);
148 jbd2_journal_abort(journal, 0); 167 jbd2_journal_abort(journal, 0);
149 } 168 }
150
151 spin_lock(&journal->j_state_lock); 169 spin_lock(&journal->j_state_lock);
152 } else { 170 } else {
153 spin_unlock(&journal->j_list_lock); 171 spin_unlock(&journal->j_list_lock);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 8b119e16aa36..ebc667bc54a8 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -974,6 +974,9 @@ restart_loop:
974 journal->j_committing_transaction = NULL; 974 journal->j_committing_transaction = NULL;
975 spin_unlock(&journal->j_state_lock); 975 spin_unlock(&journal->j_state_lock);
976 976
977 if (journal->j_commit_callback)
978 journal->j_commit_callback(journal, commit_transaction);
979
977 if (commit_transaction->t_checkpoint_list == NULL && 980 if (commit_transaction->t_checkpoint_list == NULL &&
978 commit_transaction->t_checkpoint_io_list == NULL) { 981 commit_transaction->t_checkpoint_io_list == NULL) {
979 __jbd2_journal_drop_transaction(journal, commit_transaction); 982 __jbd2_journal_drop_transaction(journal, commit_transaction);
@@ -995,11 +998,8 @@ restart_loop:
995 } 998 }
996 spin_unlock(&journal->j_list_lock); 999 spin_unlock(&journal->j_list_lock);
997 1000
998 if (journal->j_commit_callback)
999 journal->j_commit_callback(journal, commit_transaction);
1000
1001 trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", 1001 trace_mark(jbd2_end_commit, "dev %s transaction %d head %d",
1002 journal->j_devname, commit_transaction->t_tid, 1002 journal->j_devname, journal->j_commit_sequence,
1003 journal->j_tail_sequence); 1003 journal->j_tail_sequence);
1004 jbd_debug(1, "JBD: commit %d complete, head %d\n", 1004 jbd_debug(1, "JBD: commit %d complete, head %d\n",
1005 journal->j_commit_sequence, journal->j_tail_sequence); 1005 journal->j_commit_sequence, journal->j_tail_sequence);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 783de118de92..e70d657a19f8 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1089,6 +1089,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
1089 if (!journal->j_wbuf) { 1089 if (!journal->j_wbuf) {
1090 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 1090 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
1091 __func__); 1091 __func__);
1092 jbd2_stats_proc_exit(journal);
1092 kfree(journal); 1093 kfree(journal);
1093 return NULL; 1094 return NULL;
1094 } 1095 }
@@ -1098,6 +1099,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
1098 if (err) { 1099 if (err) {
1099 printk(KERN_ERR "%s: Cannnot locate journal superblock\n", 1100 printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
1100 __func__); 1101 __func__);
1102 jbd2_stats_proc_exit(journal);
1101 kfree(journal); 1103 kfree(journal);
1102 return NULL; 1104 return NULL;
1103 } 1105 }
diff --git a/fs/jffs2/Kconfig b/fs/jffs2/Kconfig
new file mode 100644
index 000000000000..6ae169cd8faa
--- /dev/null
+++ b/fs/jffs2/Kconfig
@@ -0,0 +1,188 @@
1config JFFS2_FS
2 tristate "Journalling Flash File System v2 (JFFS2) support"
3 select CRC32
4 depends on MTD
5 help
6 JFFS2 is the second generation of the Journalling Flash File System
7 for use on diskless embedded devices. It provides improved wear
8 levelling, compression and support for hard links. You cannot use
9 this on normal block devices, only on 'MTD' devices.
10
11 Further information on the design and implementation of JFFS2 is
12 available at <http://sources.redhat.com/jffs2/>.
13
14config JFFS2_FS_DEBUG
15 int "JFFS2 debugging verbosity (0 = quiet, 2 = noisy)"
16 depends on JFFS2_FS
17 default "0"
18 help
19 This controls the amount of debugging messages produced by the JFFS2
20 code. Set it to zero for use in production systems. For evaluation,
21 testing and debugging, it's advisable to set it to one. This will
22 enable a few assertions and will print debugging messages at the
23 KERN_DEBUG loglevel, where they won't normally be visible. Level 2
24 is unlikely to be useful - it enables extra debugging in certain
25 areas which at one point needed debugging, but when the bugs were
26 located and fixed, the detailed messages were relegated to level 2.
27
28 If reporting bugs, please try to have available a full dump of the
29 messages at debug level 1 while the misbehaviour was occurring.
30
31config JFFS2_FS_WRITEBUFFER
32 bool "JFFS2 write-buffering support"
33 depends on JFFS2_FS
34 default y
35 help
36 This enables the write-buffering support in JFFS2.
37
38 This functionality is required to support JFFS2 on the following
39 types of flash devices:
40 - NAND flash
41 - NOR flash with transparent ECC
42 - DataFlash
43
44config JFFS2_FS_WBUF_VERIFY
45 bool "Verify JFFS2 write-buffer reads"
46 depends on JFFS2_FS_WRITEBUFFER
47 default n
48 help
49 This causes JFFS2 to read back every page written through the
50 write-buffer, and check for errors.
51
52config JFFS2_SUMMARY
53 bool "JFFS2 summary support (EXPERIMENTAL)"
54 depends on JFFS2_FS && EXPERIMENTAL
55 default n
56 help
57 This feature makes it possible to use summary information
58 for faster filesystem mount.
59
60 The summary information can be inserted into a filesystem image
61 by the utility 'sumtool'.
62
63 If unsure, say 'N'.
64
65config JFFS2_FS_XATTR
66 bool "JFFS2 XATTR support (EXPERIMENTAL)"
67 depends on JFFS2_FS && EXPERIMENTAL
68 default n
69 help
70 Extended attributes are name:value pairs associated with inodes by
71 the kernel or by users (see the attr(5) manual page, or visit
72 <http://acl.bestbits.at/> for details).
73
74 If unsure, say N.
75
76config JFFS2_FS_POSIX_ACL
77 bool "JFFS2 POSIX Access Control Lists"
78 depends on JFFS2_FS_XATTR
79 default y
80 select FS_POSIX_ACL
81 help
82 Posix Access Control Lists (ACLs) support permissions for users and
83 groups beyond the owner/group/world scheme.
84
85 To learn more about Access Control Lists, visit the Posix ACLs for
86 Linux website <http://acl.bestbits.at/>.
87
88 If you don't know what Access Control Lists are, say N
89
90config JFFS2_FS_SECURITY
91 bool "JFFS2 Security Labels"
92 depends on JFFS2_FS_XATTR
93 default y
94 help
95 Security labels support alternative access control models
96 implemented by security modules like SELinux. This option
97 enables an extended attribute handler for file security
98 labels in the jffs2 filesystem.
99
100 If you are not using a security module that requires using
101 extended attributes for file security labels, say N.
102
103config JFFS2_COMPRESSION_OPTIONS
104 bool "Advanced compression options for JFFS2"
105 depends on JFFS2_FS
106 default n
107 help
108 Enabling this option allows you to explicitly choose which
109 compression modules, if any, are enabled in JFFS2. Removing
110 compressors can mean you cannot read existing file systems,
111 and enabling experimental compressors can mean that you
112 write a file system which cannot be read by a standard kernel.
113
114 If unsure, you should _definitely_ say 'N'.
115
116config JFFS2_ZLIB
117 bool "JFFS2 ZLIB compression support" if JFFS2_COMPRESSION_OPTIONS
118 select ZLIB_INFLATE
119 select ZLIB_DEFLATE
120 depends on JFFS2_FS
121 default y
122 help
123 Zlib is designed to be a free, general-purpose, legally unencumbered,
124 lossless data-compression library for use on virtually any computer
125 hardware and operating system. See <http://www.gzip.org/zlib/> for
126 further information.
127
128 Say 'Y' if unsure.
129
130config JFFS2_LZO
131 bool "JFFS2 LZO compression support" if JFFS2_COMPRESSION_OPTIONS
132 select LZO_COMPRESS
133 select LZO_DECOMPRESS
134 depends on JFFS2_FS
135 default n
136 help
137 minilzo-based compression. Generally works better than Zlib.
138
139 This feature was added in July, 2007. Say 'N' if you need
140 compatibility with older bootloaders or kernels.
141
142config JFFS2_RTIME
143 bool "JFFS2 RTIME compression support" if JFFS2_COMPRESSION_OPTIONS
144 depends on JFFS2_FS
145 default y
146 help
147 Rtime does manage to recompress already-compressed data. Say 'Y' if unsure.
148
149config JFFS2_RUBIN
150 bool "JFFS2 RUBIN compression support" if JFFS2_COMPRESSION_OPTIONS
151 depends on JFFS2_FS
152 default n
153 help
154 RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure.
155
156choice
157 prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS
158 default JFFS2_CMODE_PRIORITY
159 depends on JFFS2_FS
160 help
161 You can set here the default compression mode of JFFS2 from
162 the available compression modes. Don't touch if unsure.
163
164config JFFS2_CMODE_NONE
165 bool "no compression"
166 help
167 Uses no compression.
168
169config JFFS2_CMODE_PRIORITY
170 bool "priority"
171 help
172 Tries the compressors in a predefined order and chooses the first
173 successful one.
174
175config JFFS2_CMODE_SIZE
176 bool "size (EXPERIMENTAL)"
177 help
178 Tries all compressors and chooses the one which has the smallest
179 result.
180
181config JFFS2_CMODE_FAVOURLZO
182 bool "Favour LZO"
183 help
184 Tries all compressors and chooses the one which has the smallest
185 result but gives some preference to LZO (which has faster
186 decompression) at the expense of size.
187
188endchoice
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 8adebd3e43c6..3cceef4ad2b7 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -85,15 +85,15 @@ static int jffs2_garbage_collect_thread(void *_c)
85 for (;;) { 85 for (;;) {
86 allow_signal(SIGHUP); 86 allow_signal(SIGHUP);
87 again: 87 again:
88 spin_lock(&c->erase_completion_lock);
88 if (!jffs2_thread_should_wake(c)) { 89 if (!jffs2_thread_should_wake(c)) {
89 set_current_state (TASK_INTERRUPTIBLE); 90 set_current_state (TASK_INTERRUPTIBLE);
91 spin_unlock(&c->erase_completion_lock);
90 D1(printk(KERN_DEBUG "jffs2_garbage_collect_thread sleeping...\n")); 92 D1(printk(KERN_DEBUG "jffs2_garbage_collect_thread sleeping...\n"));
91 /* Yes, there's a race here; we checked jffs2_thread_should_wake()
92 before setting current->state to TASK_INTERRUPTIBLE. But it doesn't
93 matter - We don't care if we miss a wakeup, because the GC thread
94 is only an optimisation anyway. */
95 schedule(); 93 schedule();
96 } 94 } else
95 spin_unlock(&c->erase_completion_lock);
96
97 97
98 /* This thread is purely an optimisation. But if it runs when 98 /* This thread is purely an optimisation. But if it runs when
99 other things could be running, it actually makes things a 99 other things could be running, it actually makes things a
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index 86739ee53b37..f25e70c1b51c 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -53,8 +53,8 @@ static int jffs2_is_best_compression(struct jffs2_compressor *this,
53} 53}
54 54
55/* jffs2_compress: 55/* jffs2_compress:
56 * @data: Pointer to uncompressed data 56 * @data_in: Pointer to uncompressed data
57 * @cdata: Pointer to returned pointer to buffer for compressed data 57 * @cpage_out: Pointer to returned pointer to buffer for compressed data
58 * @datalen: On entry, holds the amount of data available for compression. 58 * @datalen: On entry, holds the amount of data available for compression.
59 * On exit, expected to hold the amount of data actually compressed. 59 * On exit, expected to hold the amount of data actually compressed.
60 * @cdatalen: On entry, holds the amount of space available for compressed 60 * @cdatalen: On entry, holds the amount of space available for compressed
diff --git a/fs/jffs2/compr_lzo.c b/fs/jffs2/compr_lzo.c
index 47b045797e42..90cb60d09787 100644
--- a/fs/jffs2/compr_lzo.c
+++ b/fs/jffs2/compr_lzo.c
@@ -19,7 +19,7 @@
19 19
20static void *lzo_mem; 20static void *lzo_mem;
21static void *lzo_compress_buf; 21static void *lzo_compress_buf;
22static DEFINE_MUTEX(deflate_mutex); 22static DEFINE_MUTEX(deflate_mutex); /* for lzo_mem and lzo_compress_buf */
23 23
24static void free_workspace(void) 24static void free_workspace(void)
25{ 25{
@@ -49,18 +49,21 @@ static int jffs2_lzo_compress(unsigned char *data_in, unsigned char *cpage_out,
49 49
50 mutex_lock(&deflate_mutex); 50 mutex_lock(&deflate_mutex);
51 ret = lzo1x_1_compress(data_in, *sourcelen, lzo_compress_buf, &compress_size, lzo_mem); 51 ret = lzo1x_1_compress(data_in, *sourcelen, lzo_compress_buf, &compress_size, lzo_mem);
52 mutex_unlock(&deflate_mutex);
53
54 if (ret != LZO_E_OK) 52 if (ret != LZO_E_OK)
55 return -1; 53 goto fail;
56 54
57 if (compress_size > *dstlen) 55 if (compress_size > *dstlen)
58 return -1; 56 goto fail;
59 57
60 memcpy(cpage_out, lzo_compress_buf, compress_size); 58 memcpy(cpage_out, lzo_compress_buf, compress_size);
61 *dstlen = compress_size; 59 mutex_unlock(&deflate_mutex);
62 60
61 *dstlen = compress_size;
63 return 0; 62 return 0;
63
64 fail:
65 mutex_unlock(&deflate_mutex);
66 return -1;
64} 67}
65 68
66static int jffs2_lzo_decompress(unsigned char *data_in, unsigned char *cpage_out, 69static int jffs2_lzo_decompress(unsigned char *data_in, unsigned char *cpage_out,
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index cd219ef55254..6f60cc910f4c 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -39,7 +39,8 @@ const struct file_operations jffs2_dir_operations =
39 .read = generic_read_dir, 39 .read = generic_read_dir,
40 .readdir = jffs2_readdir, 40 .readdir = jffs2_readdir,
41 .unlocked_ioctl=jffs2_ioctl, 41 .unlocked_ioctl=jffs2_ioctl,
42 .fsync = jffs2_fsync 42 .fsync = jffs2_fsync,
43 .llseek = generic_file_llseek,
43}; 44};
44 45
45 46
@@ -108,9 +109,7 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
108 } 109 }
109 } 110 }
110 111
111 d_add(target, inode); 112 return d_splice_alias(inode, target);
112
113 return NULL;
114} 113}
115 114
116/***********************************************************************/ 115/***********************************************************************/
@@ -311,7 +310,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
311 /* FIXME: If you care. We'd need to use frags for the target 310 /* FIXME: If you care. We'd need to use frags for the target
312 if it grows much more than this */ 311 if it grows much more than this */
313 if (targetlen > 254) 312 if (targetlen > 254)
314 return -EINVAL; 313 return -ENAMETOOLONG;
315 314
316 ri = jffs2_alloc_raw_inode(); 315 ri = jffs2_alloc_raw_inode();
317 316
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index dddb2a6c9e2c..259461b910af 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -68,7 +68,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
68 instr->len = c->sector_size; 68 instr->len = c->sector_size;
69 instr->callback = jffs2_erase_callback; 69 instr->callback = jffs2_erase_callback;
70 instr->priv = (unsigned long)(&instr[1]); 70 instr->priv = (unsigned long)(&instr[1]);
71 instr->fail_addr = 0xffffffff; 71 instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
72 72
73 ((struct erase_priv_struct *)instr->priv)->jeb = jeb; 73 ((struct erase_priv_struct *)instr->priv)->jeb = jeb;
74 ((struct erase_priv_struct *)instr->priv)->c = c; 74 ((struct erase_priv_struct *)instr->priv)->c = c;
@@ -175,7 +175,7 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock
175{ 175{
176 /* For NAND, if the failure did not occur at the device level for a 176 /* For NAND, if the failure did not occur at the device level for a
177 specific physical page, don't bother updating the bad block table. */ 177 specific physical page, don't bother updating the bad block table. */
178 if (jffs2_cleanmarker_oob(c) && (bad_offset != 0xffffffff)) { 178 if (jffs2_cleanmarker_oob(c) && (bad_offset != MTD_FAIL_ADDR_UNKNOWN)) {
179 /* We had a device-level failure to erase. Let's see if we've 179 /* We had a device-level failure to erase. Let's see if we've
180 failed too many times. */ 180 failed too many times. */
181 if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) { 181 if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) {
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 086c43830221..249305d65d5b 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -207,6 +207,8 @@ int jffs2_statfs(struct dentry *dentry, struct kstatfs *buf)
207 buf->f_files = 0; 207 buf->f_files = 0;
208 buf->f_ffree = 0; 208 buf->f_ffree = 0;
209 buf->f_namelen = JFFS2_MAX_NAME_LEN; 209 buf->f_namelen = JFFS2_MAX_NAME_LEN;
210 buf->f_fsid.val[0] = JFFS2_SUPER_MAGIC;
211 buf->f_fsid.val[1] = c->mtd->index;
210 212
211 spin_lock(&c->erase_completion_lock); 213 spin_lock(&c->erase_completion_lock);
212 avail = c->dirty_size + c->free_size; 214 avail = c->dirty_size + c->free_size;
@@ -440,14 +442,14 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
440 442
441 memset(ri, 0, sizeof(*ri)); 443 memset(ri, 0, sizeof(*ri));
442 /* Set OS-specific defaults for new inodes */ 444 /* Set OS-specific defaults for new inodes */
443 ri->uid = cpu_to_je16(current->fsuid); 445 ri->uid = cpu_to_je16(current_fsuid());
444 446
445 if (dir_i->i_mode & S_ISGID) { 447 if (dir_i->i_mode & S_ISGID) {
446 ri->gid = cpu_to_je16(dir_i->i_gid); 448 ri->gid = cpu_to_je16(dir_i->i_gid);
447 if (S_ISDIR(mode)) 449 if (S_ISDIR(mode))
448 mode |= S_ISGID; 450 mode |= S_ISGID;
449 } else { 451 } else {
450 ri->gid = cpu_to_je16(current->fsgid); 452 ri->gid = cpu_to_je16(current_fsgid());
451 } 453 }
452 454
453 /* POSIX ACLs have to be processed now, at least partly. 455 /* POSIX ACLs have to be processed now, at least partly.
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index a9bf9603c1ba..21a052915aa9 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -261,6 +261,12 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c)
261 261
262 jffs2_sum_reset_collected(c->summary); /* reset collected summary */ 262 jffs2_sum_reset_collected(c->summary); /* reset collected summary */
263 263
264#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
265 /* adjust write buffer offset, else we get a non contiguous write bug */
266 if (!(c->wbuf_ofs % c->sector_size) && !c->wbuf_len)
267 c->wbuf_ofs = 0xffffffff;
268#endif
269
264 D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset)); 270 D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset));
265 271
266 return 0; 272 return 0;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index efd401257ed9..4c4e18c54a51 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -22,6 +22,7 @@
22#include <linux/mtd/super.h> 22#include <linux/mtd/super.h>
23#include <linux/ctype.h> 23#include <linux/ctype.h>
24#include <linux/namei.h> 24#include <linux/namei.h>
25#include <linux/exportfs.h>
25#include "compr.h" 26#include "compr.h"
26#include "nodelist.h" 27#include "nodelist.h"
27 28
@@ -62,6 +63,52 @@ static int jffs2_sync_fs(struct super_block *sb, int wait)
62 return 0; 63 return 0;
63} 64}
64 65
66static struct inode *jffs2_nfs_get_inode(struct super_block *sb, uint64_t ino,
67 uint32_t generation)
68{
69 /* We don't care about i_generation. We'll destroy the flash
70 before we start re-using inode numbers anyway. And even
71 if that wasn't true, we'd have other problems...*/
72 return jffs2_iget(sb, ino);
73}
74
75static struct dentry *jffs2_fh_to_dentry(struct super_block *sb, struct fid *fid,
76 int fh_len, int fh_type)
77{
78 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
79 jffs2_nfs_get_inode);
80}
81
82static struct dentry *jffs2_fh_to_parent(struct super_block *sb, struct fid *fid,
83 int fh_len, int fh_type)
84{
85 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
86 jffs2_nfs_get_inode);
87}
88
89static struct dentry *jffs2_get_parent(struct dentry *child)
90{
91 struct jffs2_inode_info *f;
92 uint32_t pino;
93
94 BUG_ON(!S_ISDIR(child->d_inode->i_mode));
95
96 f = JFFS2_INODE_INFO(child->d_inode);
97
98 pino = f->inocache->pino_nlink;
99
100 JFFS2_DEBUG("Parent of directory ino #%u is #%u\n",
101 f->inocache->ino, pino);
102
103 return d_obtain_alias(jffs2_iget(child->d_inode->i_sb, pino));
104}
105
106static struct export_operations jffs2_export_ops = {
107 .get_parent = jffs2_get_parent,
108 .fh_to_dentry = jffs2_fh_to_dentry,
109 .fh_to_parent = jffs2_fh_to_parent,
110};
111
65static const struct super_operations jffs2_super_operations = 112static const struct super_operations jffs2_super_operations =
66{ 113{
67 .alloc_inode = jffs2_alloc_inode, 114 .alloc_inode = jffs2_alloc_inode,
@@ -104,6 +151,7 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent)
104 spin_lock_init(&c->inocache_lock); 151 spin_lock_init(&c->inocache_lock);
105 152
106 sb->s_op = &jffs2_super_operations; 153 sb->s_op = &jffs2_super_operations;
154 sb->s_export_op = &jffs2_export_ops;
107 sb->s_flags = sb->s_flags | MS_NOATIME; 155 sb->s_flags = sb->s_flags | MS_NOATIME;
108 sb->s_xattr = jffs2_xattr_handlers; 156 sb->s_xattr = jffs2_xattr_handlers;
109#ifdef CONFIG_JFFS2_FS_POSIX_ACL 157#ifdef CONFIG_JFFS2_FS_POSIX_ACL
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 0e78b00035e4..d9a721e6db70 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -679,10 +679,7 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
679 679
680 memset(c->wbuf,0xff,c->wbuf_pagesize); 680 memset(c->wbuf,0xff,c->wbuf_pagesize);
681 /* adjust write buffer offset, else we get a non contiguous write bug */ 681 /* adjust write buffer offset, else we get a non contiguous write bug */
682 if (SECTOR_ADDR(c->wbuf_ofs) == SECTOR_ADDR(c->wbuf_ofs+c->wbuf_pagesize)) 682 c->wbuf_ofs += c->wbuf_pagesize;
683 c->wbuf_ofs += c->wbuf_pagesize;
684 else
685 c->wbuf_ofs = 0xffffffff;
686 c->wbuf_len = 0; 683 c->wbuf_len = 0;
687 return 0; 684 return 0;
688} 685}
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index cd2ec2988b59..335c4de6552d 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1168,7 +1168,7 @@ journal_found:
1168 bd_release(bdev); 1168 bd_release(bdev);
1169 1169
1170 close: /* close external log device */ 1170 close: /* close external log device */
1171 blkdev_put(bdev); 1171 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
1172 1172
1173 free: /* free log descriptor */ 1173 free: /* free log descriptor */
1174 mutex_unlock(&jfs_log_mutex); 1174 mutex_unlock(&jfs_log_mutex);
@@ -1514,7 +1514,7 @@ int lmLogClose(struct super_block *sb)
1514 rc = lmLogShutdown(log); 1514 rc = lmLogShutdown(log);
1515 1515
1516 bd_release(bdev); 1516 bd_release(bdev);
1517 blkdev_put(bdev); 1517 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
1518 1518
1519 kfree(log); 1519 kfree(log);
1520 1520
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 2aba82386810..cc3cedffbfa1 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1511,25 +1511,12 @@ struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid,
1511 1511
1512struct dentry *jfs_get_parent(struct dentry *dentry) 1512struct dentry *jfs_get_parent(struct dentry *dentry)
1513{ 1513{
1514 struct super_block *sb = dentry->d_inode->i_sb;
1515 struct dentry *parent = ERR_PTR(-ENOENT);
1516 struct inode *inode;
1517 unsigned long parent_ino; 1514 unsigned long parent_ino;
1518 1515
1519 parent_ino = 1516 parent_ino =
1520 le32_to_cpu(JFS_IP(dentry->d_inode)->i_dtroot.header.idotdot); 1517 le32_to_cpu(JFS_IP(dentry->d_inode)->i_dtroot.header.idotdot);
1521 inode = jfs_iget(sb, parent_ino);
1522 if (IS_ERR(inode)) {
1523 parent = ERR_CAST(inode);
1524 } else {
1525 parent = d_alloc_anon(inode);
1526 if (!parent) {
1527 parent = ERR_PTR(-ENOMEM);
1528 iput(inode);
1529 }
1530 }
1531 1518
1532 return parent; 1519 return d_obtain_alias(jfs_iget(dentry->d_inode->i_sb, parent_ino));
1533} 1520}
1534 1521
1535const struct inode_operations jfs_dir_inode_operations = { 1522const struct inode_operations jfs_dir_inode_operations = {
@@ -1560,6 +1547,7 @@ const struct file_operations jfs_dir_operations = {
1560#ifdef CONFIG_COMPAT 1547#ifdef CONFIG_COMPAT
1561 .compat_ioctl = jfs_compat_ioctl, 1548 .compat_ioctl = jfs_compat_ioctl,
1562#endif 1549#endif
1550 .llseek = generic_file_llseek,
1563}; 1551};
1564 1552
1565static int jfs_ci_hash(struct dentry *dir, struct qstr *this) 1553static int jfs_ci_hash(struct dentry *dir, struct qstr *this)
diff --git a/fs/libfs.c b/fs/libfs.c
index 1add676a19df..e960a8321902 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -732,28 +732,6 @@ out:
732 return ret; 732 return ret;
733} 733}
734 734
735/*
736 * This is what d_alloc_anon should have been. Once the exportfs
737 * argument transition has been finished I will update d_alloc_anon
738 * to this prototype and this wrapper will go away. --hch
739 */
740static struct dentry *exportfs_d_alloc(struct inode *inode)
741{
742 struct dentry *dentry;
743
744 if (!inode)
745 return NULL;
746 if (IS_ERR(inode))
747 return ERR_PTR(PTR_ERR(inode));
748
749 dentry = d_alloc_anon(inode);
750 if (!dentry) {
751 iput(inode);
752 dentry = ERR_PTR(-ENOMEM);
753 }
754 return dentry;
755}
756
757/** 735/**
758 * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation 736 * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation
759 * @sb: filesystem to do the file handle conversion on 737 * @sb: filesystem to do the file handle conversion on
@@ -782,7 +760,7 @@ struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid,
782 break; 760 break;
783 } 761 }
784 762
785 return exportfs_d_alloc(inode); 763 return d_obtain_alias(inode);
786} 764}
787EXPORT_SYMBOL_GPL(generic_fh_to_dentry); 765EXPORT_SYMBOL_GPL(generic_fh_to_dentry);
788 766
@@ -815,7 +793,7 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid,
815 break; 793 break;
816 } 794 }
817 795
818 return exportfs_d_alloc(inode); 796 return d_obtain_alias(inode);
819} 797}
820EXPORT_SYMBOL_GPL(generic_fh_to_parent); 798EXPORT_SYMBOL_GPL(generic_fh_to_parent);
821 799
@@ -836,7 +814,7 @@ EXPORT_SYMBOL(simple_getattr);
836EXPORT_SYMBOL(simple_link); 814EXPORT_SYMBOL(simple_link);
837EXPORT_SYMBOL(simple_lookup); 815EXPORT_SYMBOL(simple_lookup);
838EXPORT_SYMBOL(simple_pin_fs); 816EXPORT_SYMBOL(simple_pin_fs);
839EXPORT_SYMBOL(simple_prepare_write); 817EXPORT_UNUSED_SYMBOL(simple_prepare_write);
840EXPORT_SYMBOL(simple_readpage); 818EXPORT_SYMBOL(simple_readpage);
841EXPORT_SYMBOL(simple_release_fs); 819EXPORT_SYMBOL(simple_release_fs);
842EXPORT_SYMBOL(simple_rename); 820EXPORT_SYMBOL(simple_rename);
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 014f6ce48172..4dfdcbc6bf68 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -434,6 +434,7 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
434 * reclaim all locks we hold on this server. 434 * reclaim all locks we hold on this server.
435 */ 435 */
436 memset(&saddr, 0, sizeof(saddr)); 436 memset(&saddr, 0, sizeof(saddr));
437 saddr.sin_family = AF_INET;
437 saddr.sin_addr.s_addr = argp->addr; 438 saddr.sin_addr.s_addr = argp->addr;
438 nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state); 439 nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state);
439 440
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 548b0bb2b84d..3ca89e2a9381 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -466,6 +466,7 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
466 * reclaim all locks we hold on this server. 466 * reclaim all locks we hold on this server.
467 */ 467 */
468 memset(&saddr, 0, sizeof(saddr)); 468 memset(&saddr, 0, sizeof(saddr));
469 saddr.sin_family = AF_INET;
469 saddr.sin_addr.s_addr = argp->addr; 470 saddr.sin_addr.s_addr = argp->addr;
470 nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state); 471 nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state);
471 472
diff --git a/fs/locks.c b/fs/locks.c
index 5eb259e3cd38..09062e3ff104 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1580,7 +1580,8 @@ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd)
1580 cmd &= ~LOCK_NB; 1580 cmd &= ~LOCK_NB;
1581 unlock = (cmd == LOCK_UN); 1581 unlock = (cmd == LOCK_UN);
1582 1582
1583 if (!unlock && !(cmd & LOCK_MAND) && !(filp->f_mode & 3)) 1583 if (!unlock && !(cmd & LOCK_MAND) &&
1584 !(filp->f_mode & (FMODE_READ|FMODE_WRITE)))
1584 goto out_putf; 1585 goto out_putf;
1585 1586
1586 error = flock_make_lock(filp, &lock, cmd); 1587 error = flock_make_lock(filp, &lock, cmd);
@@ -2078,6 +2079,7 @@ int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
2078EXPORT_SYMBOL_GPL(vfs_cancel_lock); 2079EXPORT_SYMBOL_GPL(vfs_cancel_lock);
2079 2080
2080#ifdef CONFIG_PROC_FS 2081#ifdef CONFIG_PROC_FS
2082#include <linux/proc_fs.h>
2081#include <linux/seq_file.h> 2083#include <linux/seq_file.h>
2082 2084
2083static void lock_get_status(struct seq_file *f, struct file_lock *fl, 2085static void lock_get_status(struct seq_file *f, struct file_lock *fl,
@@ -2183,12 +2185,31 @@ static void locks_stop(struct seq_file *f, void *v)
2183 unlock_kernel(); 2185 unlock_kernel();
2184} 2186}
2185 2187
2186struct seq_operations locks_seq_operations = { 2188static const struct seq_operations locks_seq_operations = {
2187 .start = locks_start, 2189 .start = locks_start,
2188 .next = locks_next, 2190 .next = locks_next,
2189 .stop = locks_stop, 2191 .stop = locks_stop,
2190 .show = locks_show, 2192 .show = locks_show,
2191}; 2193};
2194
2195static int locks_open(struct inode *inode, struct file *filp)
2196{
2197 return seq_open(filp, &locks_seq_operations);
2198}
2199
2200static const struct file_operations proc_locks_operations = {
2201 .open = locks_open,
2202 .read = seq_read,
2203 .llseek = seq_lseek,
2204 .release = seq_release,
2205};
2206
2207static int __init proc_locks_init(void)
2208{
2209 proc_create("locks", 0, NULL, &proc_locks_operations);
2210 return 0;
2211}
2212module_init(proc_locks_init);
2192#endif 2213#endif
2193 2214
2194/** 2215/**
diff --git a/fs/msdos/Makefile b/fs/msdos/Makefile
deleted file mode 100644
index ea67646fcb95..000000000000
--- a/fs/msdos/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
1#
2# Makefile for the Linux msdos filesystem routines.
3#
4
5obj-$(CONFIG_MSDOS_FS) += msdos.o
6
7msdos-y := namei.o
diff --git a/fs/namei.c b/fs/namei.c
index 4ea63ed5e791..09ce58e49e72 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -212,8 +212,7 @@ int generic_permission(struct inode *inode, int mask,
212 * Read/write DACs are always overridable. 212 * Read/write DACs are always overridable.
213 * Executable DACs are overridable if at least one exec bit is set. 213 * Executable DACs are overridable if at least one exec bit is set.
214 */ 214 */
215 if (!(mask & MAY_EXEC) || 215 if (!(mask & MAY_EXEC) || execute_ok(inode))
216 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
217 if (capable(CAP_DAC_OVERRIDE)) 216 if (capable(CAP_DAC_OVERRIDE))
218 return 0; 217 return 0;
219 218
@@ -249,23 +248,11 @@ int inode_permission(struct inode *inode, int mask)
249 } 248 }
250 249
251 /* Ordinary permission routines do not understand MAY_APPEND. */ 250 /* Ordinary permission routines do not understand MAY_APPEND. */
252 if (inode->i_op && inode->i_op->permission) { 251 if (inode->i_op && inode->i_op->permission)
253 retval = inode->i_op->permission(inode, mask); 252 retval = inode->i_op->permission(inode, mask);
254 if (!retval) { 253 else
255 /*
256 * Exec permission on a regular file is denied if none
257 * of the execute bits are set.
258 *
259 * This check should be done by the ->permission()
260 * method.
261 */
262 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode) &&
263 !(inode->i_mode & S_IXUGO))
264 return -EACCES;
265 }
266 } else {
267 retval = generic_permission(inode, mask, NULL); 254 retval = generic_permission(inode, mask, NULL);
268 } 255
269 if (retval) 256 if (retval)
270 return retval; 257 return retval;
271 258
@@ -1106,6 +1093,15 @@ int path_lookup(const char *name, unsigned int flags,
1106 return do_path_lookup(AT_FDCWD, name, flags, nd); 1093 return do_path_lookup(AT_FDCWD, name, flags, nd);
1107} 1094}
1108 1095
1096int kern_path(const char *name, unsigned int flags, struct path *path)
1097{
1098 struct nameidata nd;
1099 int res = do_path_lookup(AT_FDCWD, name, flags, &nd);
1100 if (!res)
1101 *path = nd.path;
1102 return res;
1103}
1104
1109/** 1105/**
1110 * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair 1106 * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair
1111 * @dentry: pointer to dentry of the base directory 1107 * @dentry: pointer to dentry of the base directory
@@ -1138,9 +1134,16 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1138 1134
1139} 1135}
1140 1136
1141static int __path_lookup_intent_open(int dfd, const char *name, 1137/**
1142 unsigned int lookup_flags, struct nameidata *nd, 1138 * path_lookup_open - lookup a file path with open intent
1143 int open_flags, int create_mode) 1139 * @dfd: the directory to use as base, or AT_FDCWD
1140 * @name: pointer to file name
1141 * @lookup_flags: lookup intent flags
1142 * @nd: pointer to nameidata
1143 * @open_flags: open intent flags
1144 */
1145int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags,
1146 struct nameidata *nd, int open_flags)
1144{ 1147{
1145 struct file *filp = get_empty_filp(); 1148 struct file *filp = get_empty_filp();
1146 int err; 1149 int err;
@@ -1149,7 +1152,7 @@ static int __path_lookup_intent_open(int dfd, const char *name,
1149 return -ENFILE; 1152 return -ENFILE;
1150 nd->intent.open.file = filp; 1153 nd->intent.open.file = filp;
1151 nd->intent.open.flags = open_flags; 1154 nd->intent.open.flags = open_flags;
1152 nd->intent.open.create_mode = create_mode; 1155 nd->intent.open.create_mode = 0;
1153 err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd); 1156 err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd);
1154 if (IS_ERR(nd->intent.open.file)) { 1157 if (IS_ERR(nd->intent.open.file)) {
1155 if (err == 0) { 1158 if (err == 0) {
@@ -1161,38 +1164,6 @@ static int __path_lookup_intent_open(int dfd, const char *name,
1161 return err; 1164 return err;
1162} 1165}
1163 1166
1164/**
1165 * path_lookup_open - lookup a file path with open intent
1166 * @dfd: the directory to use as base, or AT_FDCWD
1167 * @name: pointer to file name
1168 * @lookup_flags: lookup intent flags
1169 * @nd: pointer to nameidata
1170 * @open_flags: open intent flags
1171 */
1172int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags,
1173 struct nameidata *nd, int open_flags)
1174{
1175 return __path_lookup_intent_open(dfd, name, lookup_flags, nd,
1176 open_flags, 0);
1177}
1178
1179/**
1180 * path_lookup_create - lookup a file path with open + create intent
1181 * @dfd: the directory to use as base, or AT_FDCWD
1182 * @name: pointer to file name
1183 * @lookup_flags: lookup intent flags
1184 * @nd: pointer to nameidata
1185 * @open_flags: open intent flags
1186 * @create_mode: create intent flags
1187 */
1188static int path_lookup_create(int dfd, const char *name,
1189 unsigned int lookup_flags, struct nameidata *nd,
1190 int open_flags, int create_mode)
1191{
1192 return __path_lookup_intent_open(dfd, name, lookup_flags|LOOKUP_CREATE,
1193 nd, open_flags, create_mode);
1194}
1195
1196static struct dentry *__lookup_hash(struct qstr *name, 1167static struct dentry *__lookup_hash(struct qstr *name,
1197 struct dentry *base, struct nameidata *nd) 1168 struct dentry *base, struct nameidata *nd)
1198{ 1169{
@@ -1470,20 +1441,18 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
1470 1441
1471 mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex); 1442 mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
1472 1443
1473 for (p = p1; p->d_parent != p; p = p->d_parent) { 1444 p = d_ancestor(p2, p1);
1474 if (p->d_parent == p2) { 1445 if (p) {
1475 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT); 1446 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT);
1476 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD); 1447 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD);
1477 return p; 1448 return p;
1478 }
1479 } 1449 }
1480 1450
1481 for (p = p2; p->d_parent != p; p = p->d_parent) { 1451 p = d_ancestor(p1, p2);
1482 if (p->d_parent == p1) { 1452 if (p) {
1483 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); 1453 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
1484 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); 1454 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
1485 return p; 1455 return p;
1486 }
1487 } 1456 }
1488 1457
1489 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); 1458 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
@@ -1702,8 +1671,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
1702 /* 1671 /*
1703 * Create - we need to know the parent. 1672 * Create - we need to know the parent.
1704 */ 1673 */
1705 error = path_lookup_create(dfd, pathname, LOOKUP_PARENT, 1674 error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
1706 &nd, flag, mode);
1707 if (error) 1675 if (error)
1708 return ERR_PTR(error); 1676 return ERR_PTR(error);
1709 1677
@@ -1714,10 +1682,20 @@ struct file *do_filp_open(int dfd, const char *pathname,
1714 */ 1682 */
1715 error = -EISDIR; 1683 error = -EISDIR;
1716 if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len]) 1684 if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len])
1717 goto exit; 1685 goto exit_parent;
1718 1686
1687 error = -ENFILE;
1688 filp = get_empty_filp();
1689 if (filp == NULL)
1690 goto exit_parent;
1691 nd.intent.open.file = filp;
1692 nd.intent.open.flags = flag;
1693 nd.intent.open.create_mode = mode;
1719 dir = nd.path.dentry; 1694 dir = nd.path.dentry;
1720 nd.flags &= ~LOOKUP_PARENT; 1695 nd.flags &= ~LOOKUP_PARENT;
1696 nd.flags |= LOOKUP_CREATE | LOOKUP_OPEN;
1697 if (flag & O_EXCL)
1698 nd.flags |= LOOKUP_EXCL;
1721 mutex_lock(&dir->d_inode->i_mutex); 1699 mutex_lock(&dir->d_inode->i_mutex);
1722 path.dentry = lookup_hash(&nd); 1700 path.dentry = lookup_hash(&nd);
1723 path.mnt = nd.path.mnt; 1701 path.mnt = nd.path.mnt;
@@ -1822,6 +1800,7 @@ exit_dput:
1822exit: 1800exit:
1823 if (!IS_ERR(nd.intent.open.file)) 1801 if (!IS_ERR(nd.intent.open.file))
1824 release_open_intent(&nd); 1802 release_open_intent(&nd);
1803exit_parent:
1825 path_put(&nd.path); 1804 path_put(&nd.path);
1826 return ERR_PTR(error); 1805 return ERR_PTR(error);
1827 1806
@@ -1914,7 +1893,7 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1914 if (nd->last_type != LAST_NORM) 1893 if (nd->last_type != LAST_NORM)
1915 goto fail; 1894 goto fail;
1916 nd->flags &= ~LOOKUP_PARENT; 1895 nd->flags &= ~LOOKUP_PARENT;
1917 nd->flags |= LOOKUP_CREATE; 1896 nd->flags |= LOOKUP_CREATE | LOOKUP_EXCL;
1918 nd->intent.open.flags = O_EXCL; 1897 nd->intent.open.flags = O_EXCL;
1919 1898
1920 /* 1899 /*
@@ -2178,16 +2157,19 @@ static long do_rmdir(int dfd, const char __user *pathname)
2178 return error; 2157 return error;
2179 2158
2180 switch(nd.last_type) { 2159 switch(nd.last_type) {
2181 case LAST_DOTDOT: 2160 case LAST_DOTDOT:
2182 error = -ENOTEMPTY; 2161 error = -ENOTEMPTY;
2183 goto exit1; 2162 goto exit1;
2184 case LAST_DOT: 2163 case LAST_DOT:
2185 error = -EINVAL; 2164 error = -EINVAL;
2186 goto exit1; 2165 goto exit1;
2187 case LAST_ROOT: 2166 case LAST_ROOT:
2188 error = -EBUSY; 2167 error = -EBUSY;
2189 goto exit1; 2168 goto exit1;
2190 } 2169 }
2170
2171 nd.flags &= ~LOOKUP_PARENT;
2172
2191 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 2173 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
2192 dentry = lookup_hash(&nd); 2174 dentry = lookup_hash(&nd);
2193 error = PTR_ERR(dentry); 2175 error = PTR_ERR(dentry);
@@ -2265,6 +2247,9 @@ static long do_unlinkat(int dfd, const char __user *pathname)
2265 error = -EISDIR; 2247 error = -EISDIR;
2266 if (nd.last_type != LAST_NORM) 2248 if (nd.last_type != LAST_NORM)
2267 goto exit1; 2249 goto exit1;
2250
2251 nd.flags &= ~LOOKUP_PARENT;
2252
2268 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 2253 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
2269 dentry = lookup_hash(&nd); 2254 dentry = lookup_hash(&nd);
2270 error = PTR_ERR(dentry); 2255 error = PTR_ERR(dentry);
@@ -2654,6 +2639,10 @@ asmlinkage long sys_renameat(int olddfd, const char __user *oldname,
2654 if (newnd.last_type != LAST_NORM) 2639 if (newnd.last_type != LAST_NORM)
2655 goto exit2; 2640 goto exit2;
2656 2641
2642 oldnd.flags &= ~LOOKUP_PARENT;
2643 newnd.flags &= ~LOOKUP_PARENT;
2644 newnd.flags |= LOOKUP_RENAME_TARGET;
2645
2657 trap = lock_rename(new_dir, old_dir); 2646 trap = lock_rename(new_dir, old_dir);
2658 2647
2659 old_dentry = lookup_hash(&oldnd); 2648 old_dentry = lookup_hash(&oldnd);
@@ -2855,6 +2844,7 @@ EXPORT_SYMBOL(__page_symlink);
2855EXPORT_SYMBOL(page_symlink); 2844EXPORT_SYMBOL(page_symlink);
2856EXPORT_SYMBOL(page_symlink_inode_operations); 2845EXPORT_SYMBOL(page_symlink_inode_operations);
2857EXPORT_SYMBOL(path_lookup); 2846EXPORT_SYMBOL(path_lookup);
2847EXPORT_SYMBOL(kern_path);
2858EXPORT_SYMBOL(vfs_path_lookup); 2848EXPORT_SYMBOL(vfs_path_lookup);
2859EXPORT_SYMBOL(inode_permission); 2849EXPORT_SYMBOL(inode_permission);
2860EXPORT_SYMBOL(vfs_permission); 2850EXPORT_SYMBOL(vfs_permission);
diff --git a/fs/namespace.c b/fs/namespace.c
index 6e283c93b50d..cce46702d33c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1167,19 +1167,19 @@ asmlinkage long sys_oldumount(char __user * name)
1167 1167
1168#endif 1168#endif
1169 1169
1170static int mount_is_safe(struct nameidata *nd) 1170static int mount_is_safe(struct path *path)
1171{ 1171{
1172 if (capable(CAP_SYS_ADMIN)) 1172 if (capable(CAP_SYS_ADMIN))
1173 return 0; 1173 return 0;
1174 return -EPERM; 1174 return -EPERM;
1175#ifdef notyet 1175#ifdef notyet
1176 if (S_ISLNK(nd->path.dentry->d_inode->i_mode)) 1176 if (S_ISLNK(path->dentry->d_inode->i_mode))
1177 return -EPERM; 1177 return -EPERM;
1178 if (nd->path.dentry->d_inode->i_mode & S_ISVTX) { 1178 if (path->dentry->d_inode->i_mode & S_ISVTX) {
1179 if (current->uid != nd->path.dentry->d_inode->i_uid) 1179 if (current->uid != path->dentry->d_inode->i_uid)
1180 return -EPERM; 1180 return -EPERM;
1181 } 1181 }
1182 if (vfs_permission(nd, MAY_WRITE)) 1182 if (inode_permission(path->dentry->d_inode, MAY_WRITE))
1183 return -EPERM; 1183 return -EPERM;
1184 return 0; 1184 return 0;
1185#endif 1185#endif
@@ -1425,11 +1425,10 @@ out_unlock:
1425 1425
1426/* 1426/*
1427 * recursively change the type of the mountpoint. 1427 * recursively change the type of the mountpoint.
1428 * noinline this do_mount helper to save do_mount stack space.
1429 */ 1428 */
1430static noinline int do_change_type(struct nameidata *nd, int flag) 1429static int do_change_type(struct path *path, int flag)
1431{ 1430{
1432 struct vfsmount *m, *mnt = nd->path.mnt; 1431 struct vfsmount *m, *mnt = path->mnt;
1433 int recurse = flag & MS_REC; 1432 int recurse = flag & MS_REC;
1434 int type = flag & ~MS_REC; 1433 int type = flag & ~MS_REC;
1435 int err = 0; 1434 int err = 0;
@@ -1437,7 +1436,7 @@ static noinline int do_change_type(struct nameidata *nd, int flag)
1437 if (!capable(CAP_SYS_ADMIN)) 1436 if (!capable(CAP_SYS_ADMIN))
1438 return -EPERM; 1437 return -EPERM;
1439 1438
1440 if (nd->path.dentry != nd->path.mnt->mnt_root) 1439 if (path->dentry != path->mnt->mnt_root)
1441 return -EINVAL; 1440 return -EINVAL;
1442 1441
1443 down_write(&namespace_sem); 1442 down_write(&namespace_sem);
@@ -1459,40 +1458,39 @@ static noinline int do_change_type(struct nameidata *nd, int flag)
1459 1458
1460/* 1459/*
1461 * do loopback mount. 1460 * do loopback mount.
1462 * noinline this do_mount helper to save do_mount stack space.
1463 */ 1461 */
1464static noinline int do_loopback(struct nameidata *nd, char *old_name, 1462static int do_loopback(struct path *path, char *old_name,
1465 int recurse) 1463 int recurse)
1466{ 1464{
1467 struct nameidata old_nd; 1465 struct path old_path;
1468 struct vfsmount *mnt = NULL; 1466 struct vfsmount *mnt = NULL;
1469 int err = mount_is_safe(nd); 1467 int err = mount_is_safe(path);
1470 if (err) 1468 if (err)
1471 return err; 1469 return err;
1472 if (!old_name || !*old_name) 1470 if (!old_name || !*old_name)
1473 return -EINVAL; 1471 return -EINVAL;
1474 err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); 1472 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
1475 if (err) 1473 if (err)
1476 return err; 1474 return err;
1477 1475
1478 down_write(&namespace_sem); 1476 down_write(&namespace_sem);
1479 err = -EINVAL; 1477 err = -EINVAL;
1480 if (IS_MNT_UNBINDABLE(old_nd.path.mnt)) 1478 if (IS_MNT_UNBINDABLE(old_path.mnt))
1481 goto out; 1479 goto out;
1482 1480
1483 if (!check_mnt(nd->path.mnt) || !check_mnt(old_nd.path.mnt)) 1481 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
1484 goto out; 1482 goto out;
1485 1483
1486 err = -ENOMEM; 1484 err = -ENOMEM;
1487 if (recurse) 1485 if (recurse)
1488 mnt = copy_tree(old_nd.path.mnt, old_nd.path.dentry, 0); 1486 mnt = copy_tree(old_path.mnt, old_path.dentry, 0);
1489 else 1487 else
1490 mnt = clone_mnt(old_nd.path.mnt, old_nd.path.dentry, 0); 1488 mnt = clone_mnt(old_path.mnt, old_path.dentry, 0);
1491 1489
1492 if (!mnt) 1490 if (!mnt)
1493 goto out; 1491 goto out;
1494 1492
1495 err = graft_tree(mnt, &nd->path); 1493 err = graft_tree(mnt, path);
1496 if (err) { 1494 if (err) {
1497 LIST_HEAD(umount_list); 1495 LIST_HEAD(umount_list);
1498 spin_lock(&vfsmount_lock); 1496 spin_lock(&vfsmount_lock);
@@ -1503,7 +1501,7 @@ static noinline int do_loopback(struct nameidata *nd, char *old_name,
1503 1501
1504out: 1502out:
1505 up_write(&namespace_sem); 1503 up_write(&namespace_sem);
1506 path_put(&old_nd.path); 1504 path_put(&old_path);
1507 return err; 1505 return err;
1508} 1506}
1509 1507
@@ -1528,33 +1526,37 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
1528 * change filesystem flags. dir should be a physical root of filesystem. 1526 * change filesystem flags. dir should be a physical root of filesystem.
1529 * If you've mounted a non-root directory somewhere and want to do remount 1527 * If you've mounted a non-root directory somewhere and want to do remount
1530 * on it - tough luck. 1528 * on it - tough luck.
1531 * noinline this do_mount helper to save do_mount stack space.
1532 */ 1529 */
1533static noinline int do_remount(struct nameidata *nd, int flags, int mnt_flags, 1530static int do_remount(struct path *path, int flags, int mnt_flags,
1534 void *data) 1531 void *data)
1535{ 1532{
1536 int err; 1533 int err;
1537 struct super_block *sb = nd->path.mnt->mnt_sb; 1534 struct super_block *sb = path->mnt->mnt_sb;
1538 1535
1539 if (!capable(CAP_SYS_ADMIN)) 1536 if (!capable(CAP_SYS_ADMIN))
1540 return -EPERM; 1537 return -EPERM;
1541 1538
1542 if (!check_mnt(nd->path.mnt)) 1539 if (!check_mnt(path->mnt))
1543 return -EINVAL; 1540 return -EINVAL;
1544 1541
1545 if (nd->path.dentry != nd->path.mnt->mnt_root) 1542 if (path->dentry != path->mnt->mnt_root)
1546 return -EINVAL; 1543 return -EINVAL;
1547 1544
1548 down_write(&sb->s_umount); 1545 down_write(&sb->s_umount);
1549 if (flags & MS_BIND) 1546 if (flags & MS_BIND)
1550 err = change_mount_flags(nd->path.mnt, flags); 1547 err = change_mount_flags(path->mnt, flags);
1551 else 1548 else
1552 err = do_remount_sb(sb, flags, data, 0); 1549 err = do_remount_sb(sb, flags, data, 0);
1553 if (!err) 1550 if (!err)
1554 nd->path.mnt->mnt_flags = mnt_flags; 1551 path->mnt->mnt_flags = mnt_flags;
1555 up_write(&sb->s_umount); 1552 up_write(&sb->s_umount);
1556 if (!err) 1553 if (!err) {
1557 security_sb_post_remount(nd->path.mnt, flags, data); 1554 security_sb_post_remount(path->mnt, flags, data);
1555
1556 spin_lock(&vfsmount_lock);
1557 touch_mnt_namespace(path->mnt->mnt_ns);
1558 spin_unlock(&vfsmount_lock);
1559 }
1558 return err; 1560 return err;
1559} 1561}
1560 1562
@@ -1568,90 +1570,85 @@ static inline int tree_contains_unbindable(struct vfsmount *mnt)
1568 return 0; 1570 return 0;
1569} 1571}
1570 1572
1571/* 1573static int do_move_mount(struct path *path, char *old_name)
1572 * noinline this do_mount helper to save do_mount stack space.
1573 */
1574static noinline int do_move_mount(struct nameidata *nd, char *old_name)
1575{ 1574{
1576 struct nameidata old_nd; 1575 struct path old_path, parent_path;
1577 struct path parent_path;
1578 struct vfsmount *p; 1576 struct vfsmount *p;
1579 int err = 0; 1577 int err = 0;
1580 if (!capable(CAP_SYS_ADMIN)) 1578 if (!capable(CAP_SYS_ADMIN))
1581 return -EPERM; 1579 return -EPERM;
1582 if (!old_name || !*old_name) 1580 if (!old_name || !*old_name)
1583 return -EINVAL; 1581 return -EINVAL;
1584 err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); 1582 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
1585 if (err) 1583 if (err)
1586 return err; 1584 return err;
1587 1585
1588 down_write(&namespace_sem); 1586 down_write(&namespace_sem);
1589 while (d_mountpoint(nd->path.dentry) && 1587 while (d_mountpoint(path->dentry) &&
1590 follow_down(&nd->path.mnt, &nd->path.dentry)) 1588 follow_down(&path->mnt, &path->dentry))
1591 ; 1589 ;
1592 err = -EINVAL; 1590 err = -EINVAL;
1593 if (!check_mnt(nd->path.mnt) || !check_mnt(old_nd.path.mnt)) 1591 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
1594 goto out; 1592 goto out;
1595 1593
1596 err = -ENOENT; 1594 err = -ENOENT;
1597 mutex_lock(&nd->path.dentry->d_inode->i_mutex); 1595 mutex_lock(&path->dentry->d_inode->i_mutex);
1598 if (IS_DEADDIR(nd->path.dentry->d_inode)) 1596 if (IS_DEADDIR(path->dentry->d_inode))
1599 goto out1; 1597 goto out1;
1600 1598
1601 if (!IS_ROOT(nd->path.dentry) && d_unhashed(nd->path.dentry)) 1599 if (!IS_ROOT(path->dentry) && d_unhashed(path->dentry))
1602 goto out1; 1600 goto out1;
1603 1601
1604 err = -EINVAL; 1602 err = -EINVAL;
1605 if (old_nd.path.dentry != old_nd.path.mnt->mnt_root) 1603 if (old_path.dentry != old_path.mnt->mnt_root)
1606 goto out1; 1604 goto out1;
1607 1605
1608 if (old_nd.path.mnt == old_nd.path.mnt->mnt_parent) 1606 if (old_path.mnt == old_path.mnt->mnt_parent)
1609 goto out1; 1607 goto out1;
1610 1608
1611 if (S_ISDIR(nd->path.dentry->d_inode->i_mode) != 1609 if (S_ISDIR(path->dentry->d_inode->i_mode) !=
1612 S_ISDIR(old_nd.path.dentry->d_inode->i_mode)) 1610 S_ISDIR(old_path.dentry->d_inode->i_mode))
1613 goto out1; 1611 goto out1;
1614 /* 1612 /*
1615 * Don't move a mount residing in a shared parent. 1613 * Don't move a mount residing in a shared parent.
1616 */ 1614 */
1617 if (old_nd.path.mnt->mnt_parent && 1615 if (old_path.mnt->mnt_parent &&
1618 IS_MNT_SHARED(old_nd.path.mnt->mnt_parent)) 1616 IS_MNT_SHARED(old_path.mnt->mnt_parent))
1619 goto out1; 1617 goto out1;
1620 /* 1618 /*
1621 * Don't move a mount tree containing unbindable mounts to a destination 1619 * Don't move a mount tree containing unbindable mounts to a destination
1622 * mount which is shared. 1620 * mount which is shared.
1623 */ 1621 */
1624 if (IS_MNT_SHARED(nd->path.mnt) && 1622 if (IS_MNT_SHARED(path->mnt) &&
1625 tree_contains_unbindable(old_nd.path.mnt)) 1623 tree_contains_unbindable(old_path.mnt))
1626 goto out1; 1624 goto out1;
1627 err = -ELOOP; 1625 err = -ELOOP;
1628 for (p = nd->path.mnt; p->mnt_parent != p; p = p->mnt_parent) 1626 for (p = path->mnt; p->mnt_parent != p; p = p->mnt_parent)
1629 if (p == old_nd.path.mnt) 1627 if (p == old_path.mnt)
1630 goto out1; 1628 goto out1;
1631 1629
1632 err = attach_recursive_mnt(old_nd.path.mnt, &nd->path, &parent_path); 1630 err = attach_recursive_mnt(old_path.mnt, path, &parent_path);
1633 if (err) 1631 if (err)
1634 goto out1; 1632 goto out1;
1635 1633
1636 /* if the mount is moved, it should no longer be expire 1634 /* if the mount is moved, it should no longer be expire
1637 * automatically */ 1635 * automatically */
1638 list_del_init(&old_nd.path.mnt->mnt_expire); 1636 list_del_init(&old_path.mnt->mnt_expire);
1639out1: 1637out1:
1640 mutex_unlock(&nd->path.dentry->d_inode->i_mutex); 1638 mutex_unlock(&path->dentry->d_inode->i_mutex);
1641out: 1639out:
1642 up_write(&namespace_sem); 1640 up_write(&namespace_sem);
1643 if (!err) 1641 if (!err)
1644 path_put(&parent_path); 1642 path_put(&parent_path);
1645 path_put(&old_nd.path); 1643 path_put(&old_path);
1646 return err; 1644 return err;
1647} 1645}
1648 1646
1649/* 1647/*
1650 * create a new mount for userspace and request it to be added into the 1648 * create a new mount for userspace and request it to be added into the
1651 * namespace's tree 1649 * namespace's tree
1652 * noinline this do_mount helper to save do_mount stack space.
1653 */ 1650 */
1654static noinline int do_new_mount(struct nameidata *nd, char *type, int flags, 1651static int do_new_mount(struct path *path, char *type, int flags,
1655 int mnt_flags, char *name, void *data) 1652 int mnt_flags, char *name, void *data)
1656{ 1653{
1657 struct vfsmount *mnt; 1654 struct vfsmount *mnt;
@@ -1667,7 +1664,7 @@ static noinline int do_new_mount(struct nameidata *nd, char *type, int flags,
1667 if (IS_ERR(mnt)) 1664 if (IS_ERR(mnt))
1668 return PTR_ERR(mnt); 1665 return PTR_ERR(mnt);
1669 1666
1670 return do_add_mount(mnt, &nd->path, mnt_flags, NULL); 1667 return do_add_mount(mnt, path, mnt_flags, NULL);
1671} 1668}
1672 1669
1673/* 1670/*
@@ -1902,7 +1899,7 @@ int copy_mount_options(const void __user * data, unsigned long *where)
1902long do_mount(char *dev_name, char *dir_name, char *type_page, 1899long do_mount(char *dev_name, char *dir_name, char *type_page,
1903 unsigned long flags, void *data_page) 1900 unsigned long flags, void *data_page)
1904{ 1901{
1905 struct nameidata nd; 1902 struct path path;
1906 int retval = 0; 1903 int retval = 0;
1907 int mnt_flags = 0; 1904 int mnt_flags = 0;
1908 1905
@@ -1940,29 +1937,29 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1940 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT); 1937 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT);
1941 1938
1942 /* ... and get the mountpoint */ 1939 /* ... and get the mountpoint */
1943 retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); 1940 retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
1944 if (retval) 1941 if (retval)
1945 return retval; 1942 return retval;
1946 1943
1947 retval = security_sb_mount(dev_name, &nd.path, 1944 retval = security_sb_mount(dev_name, &path,
1948 type_page, flags, data_page); 1945 type_page, flags, data_page);
1949 if (retval) 1946 if (retval)
1950 goto dput_out; 1947 goto dput_out;
1951 1948
1952 if (flags & MS_REMOUNT) 1949 if (flags & MS_REMOUNT)
1953 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, 1950 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
1954 data_page); 1951 data_page);
1955 else if (flags & MS_BIND) 1952 else if (flags & MS_BIND)
1956 retval = do_loopback(&nd, dev_name, flags & MS_REC); 1953 retval = do_loopback(&path, dev_name, flags & MS_REC);
1957 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) 1954 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
1958 retval = do_change_type(&nd, flags); 1955 retval = do_change_type(&path, flags);
1959 else if (flags & MS_MOVE) 1956 else if (flags & MS_MOVE)
1960 retval = do_move_mount(&nd, dev_name); 1957 retval = do_move_mount(&path, dev_name);
1961 else 1958 else
1962 retval = do_new_mount(&nd, type_page, flags, mnt_flags, 1959 retval = do_new_mount(&path, type_page, flags, mnt_flags,
1963 dev_name, data_page); 1960 dev_name, data_page);
1964dput_out: 1961dput_out:
1965 path_put(&nd.path); 1962 path_put(&path);
1966 return retval; 1963 return retval;
1967} 1964}
1968 1965
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 6a09760c5960..c2e9cfd9e5a4 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -40,6 +40,16 @@ unsigned short nfs_callback_tcpport;
40static const int nfs_set_port_min = 0; 40static const int nfs_set_port_min = 0;
41static const int nfs_set_port_max = 65535; 41static const int nfs_set_port_max = 65535;
42 42
43/*
44 * If the kernel has IPv6 support available, always listen for
45 * both AF_INET and AF_INET6 requests.
46 */
47#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
48static const sa_family_t nfs_callback_family = AF_INET6;
49#else
50static const sa_family_t nfs_callback_family = AF_INET;
51#endif
52
43static int param_set_port(const char *val, struct kernel_param *kp) 53static int param_set_port(const char *val, struct kernel_param *kp)
44{ 54{
45 char *endp; 55 char *endp;
@@ -106,7 +116,7 @@ int nfs_callback_up(void)
106 if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) 116 if (nfs_callback_info.users++ || nfs_callback_info.task != NULL)
107 goto out; 117 goto out;
108 serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, 118 serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE,
109 AF_INET, NULL); 119 nfs_callback_family, NULL);
110 ret = -ENOMEM; 120 ret = -ENOMEM;
111 if (!serv) 121 if (!serv)
112 goto out_err; 122 goto out_err;
@@ -116,7 +126,8 @@ int nfs_callback_up(void)
116 if (ret <= 0) 126 if (ret <= 0)
117 goto out_err; 127 goto out_err;
118 nfs_callback_tcpport = ret; 128 nfs_callback_tcpport = ret;
119 dprintk("Callback port = 0x%x\n", nfs_callback_tcpport); 129 dprintk("NFS: Callback listener port = %u (af %u)\n",
130 nfs_callback_tcpport, nfs_callback_family);
120 131
121 nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]); 132 nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]);
122 if (IS_ERR(nfs_callback_info.rqst)) { 133 if (IS_ERR(nfs_callback_info.rqst)) {
@@ -149,8 +160,8 @@ out:
149 mutex_unlock(&nfs_callback_mutex); 160 mutex_unlock(&nfs_callback_mutex);
150 return ret; 161 return ret;
151out_err: 162out_err:
152 dprintk("Couldn't create callback socket or server thread; err = %d\n", 163 dprintk("NFS: Couldn't create callback socket or server thread; "
153 ret); 164 "err = %d\n", ret);
154 nfs_callback_info.users--; 165 nfs_callback_info.users--;
155 goto out; 166 goto out;
156} 167}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2ab70d46ecbc..3e64b98f3a93 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -707,9 +707,7 @@ static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
707{ 707{
708 if (NFS_PROTO(dir)->version == 2) 708 if (NFS_PROTO(dir)->version == 2)
709 return 0; 709 return 0;
710 if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0) 710 return nd && nfs_lookup_check_intent(nd, LOOKUP_EXCL);
711 return 0;
712 return (nd->intent.open.flags & O_EXCL) != 0;
713} 711}
714 712
715/* 713/*
@@ -1009,7 +1007,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1009 1007
1010 /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash 1008 /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash
1011 * the dentry. */ 1009 * the dentry. */
1012 if (nd->intent.open.flags & O_EXCL) { 1010 if (nd->flags & LOOKUP_EXCL) {
1013 d_instantiate(dentry, NULL); 1011 d_instantiate(dentry, NULL);
1014 goto out; 1012 goto out;
1015 } 1013 }
@@ -1517,7 +1515,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
1517 if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0, 1515 if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0,
1518 GFP_KERNEL)) { 1516 GFP_KERNEL)) {
1519 pagevec_add(&lru_pvec, page); 1517 pagevec_add(&lru_pvec, page);
1520 pagevec_lru_add(&lru_pvec); 1518 pagevec_lru_add_file(&lru_pvec);
1521 SetPageUptodate(page); 1519 SetPageUptodate(page);
1522 unlock_page(page); 1520 unlock_page(page);
1523 } else 1521 } else
@@ -1959,6 +1957,9 @@ force_lookup:
1959 } else 1957 } else
1960 res = PTR_ERR(cred); 1958 res = PTR_ERR(cred);
1961out: 1959out:
1960 if (!res && (mask & MAY_EXEC) && !execute_ok(inode))
1961 res = -EACCES;
1962
1962 dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n", 1963 dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n",
1963 inode->i_sb->s_id, inode->i_ino, mask, res); 1964 inode->i_sb->s_id, inode->i_ino, mask, res);
1964 return res; 1965 return res;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index fae97196daad..b7c9b2df1f29 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -107,11 +107,10 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
107 * if the dentry tree reaches them; however if the dentry already 107 * if the dentry tree reaches them; however if the dentry already
108 * exists, we'll pick it up at this point and use it as the root 108 * exists, we'll pick it up at this point and use it as the root
109 */ 109 */
110 mntroot = d_alloc_anon(inode); 110 mntroot = d_obtain_alias(inode);
111 if (!mntroot) { 111 if (IS_ERR(mntroot)) {
112 iput(inode);
113 dprintk("nfs_get_root: get root dentry failed\n"); 112 dprintk("nfs_get_root: get root dentry failed\n");
114 return ERR_PTR(-ENOMEM); 113 return mntroot;
115 } 114 }
116 115
117 security_d_instantiate(mntroot, inode); 116 security_d_instantiate(mntroot, inode);
@@ -277,11 +276,10 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
277 * if the dentry tree reaches them; however if the dentry already 276 * if the dentry tree reaches them; however if the dentry already
278 * exists, we'll pick it up at this point and use it as the root 277 * exists, we'll pick it up at this point and use it as the root
279 */ 278 */
280 mntroot = d_alloc_anon(inode); 279 mntroot = d_obtain_alias(inode);
281 if (!mntroot) { 280 if (IS_ERR(mntroot)) {
282 iput(inode);
283 dprintk("nfs_get_root: get root dentry failed\n"); 281 dprintk("nfs_get_root: get root dentry failed\n");
284 return ERR_PTR(-ENOMEM); 282 return mntroot;
285 } 283 }
286 284
287 security_d_instantiate(mntroot, inode); 285 security_d_instantiate(mntroot, inode);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b9195c02a863..d22eb383e1cf 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * nfs inode and superblock handling functions 6 * nfs inode and superblock handling functions
7 * 7 *
8 * Modularised by Alan Cox <Alan.Cox@linux.org>, while hacking some 8 * Modularised by Alan Cox <alan@lxorguk.ukuu.org.uk>, while hacking some
9 * experimental NFS changes. Modularisation taken straight from SYS5 fs. 9 * experimental NFS changes. Modularisation taken straight from SYS5 fs.
10 * 10 *
11 * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts. 11 * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts.
@@ -908,21 +908,16 @@ static int nfs_size_need_update(const struct inode *inode, const struct nfs_fatt
908 return nfs_size_to_loff_t(fattr->size) > i_size_read(inode); 908 return nfs_size_to_loff_t(fattr->size) > i_size_read(inode);
909} 909}
910 910
911static unsigned long nfs_attr_generation_counter; 911static atomic_long_t nfs_attr_generation_counter;
912 912
913static unsigned long nfs_read_attr_generation_counter(void) 913static unsigned long nfs_read_attr_generation_counter(void)
914{ 914{
915 smp_rmb(); 915 return atomic_long_read(&nfs_attr_generation_counter);
916 return nfs_attr_generation_counter;
917} 916}
918 917
919unsigned long nfs_inc_attr_generation_counter(void) 918unsigned long nfs_inc_attr_generation_counter(void)
920{ 919{
921 unsigned long ret; 920 return atomic_long_inc_return(&nfs_attr_generation_counter);
922 smp_rmb();
923 ret = ++nfs_attr_generation_counter;
924 smp_wmb();
925 return ret;
926} 921}
927 922
928void nfs_fattr_init(struct nfs_fattr *fattr) 923void nfs_fattr_init(struct nfs_fattr *fattr)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c910413eaeca..83e700a2b0c0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1659,8 +1659,10 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
1659 struct nfs_open_context *ctx; 1659 struct nfs_open_context *ctx;
1660 1660
1661 ctx = nfs_file_open_context(sattr->ia_file); 1661 ctx = nfs_file_open_context(sattr->ia_file);
1662 cred = ctx->cred; 1662 if (ctx) {
1663 state = ctx->state; 1663 cred = ctx->cred;
1664 state = ctx->state;
1665 }
1664 } 1666 }
1665 1667
1666 status = nfs4_do_setattr(inode, cred, fattr, sattr, state); 1668 status = nfs4_do_setattr(inode, cred, fattr, sattr, state);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 8b28b95c9e44..f48db679a1c6 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * nfs superblock handling functions 6 * nfs superblock handling functions
7 * 7 *
8 * Modularised by Alan Cox <Alan.Cox@linux.org>, while hacking some 8 * Modularised by Alan Cox <alan@lxorguk.ukuu.org.uk>, while hacking some
9 * experimental NFS changes. Modularisation taken straight from SYS5 fs. 9 * experimental NFS changes. Modularisation taken straight from SYS5 fs.
10 * 10 *
11 * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts. 11 * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts.
@@ -2459,7 +2459,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
2459 compare_super = NULL; 2459 compare_super = NULL;
2460 2460
2461 /* Get a superblock - note that we may end up sharing one that already exists */ 2461 /* Get a superblock - note that we may end up sharing one that already exists */
2462 s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); 2462 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
2463 if (IS_ERR(s)) { 2463 if (IS_ERR(s)) {
2464 error = PTR_ERR(s); 2464 error = PTR_ERR(s);
2465 goto out_err_nosb; 2465 goto out_err_nosb;
@@ -2544,7 +2544,7 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
2544 compare_super = NULL; 2544 compare_super = NULL;
2545 2545
2546 /* Get a superblock - note that we may end up sharing one that already exists */ 2546 /* Get a superblock - note that we may end up sharing one that already exists */
2547 s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); 2547 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
2548 if (IS_ERR(s)) { 2548 if (IS_ERR(s)) {
2549 error = PTR_ERR(s); 2549 error = PTR_ERR(s);
2550 goto out_err_nosb; 2550 goto out_err_nosb;
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 9dc036f18356..5839b229cd0e 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -99,7 +99,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
99 int fsidtype; 99 int fsidtype;
100 char *ep; 100 char *ep;
101 struct svc_expkey key; 101 struct svc_expkey key;
102 struct svc_expkey *ek; 102 struct svc_expkey *ek = NULL;
103 103
104 if (mesg[mlen-1] != '\n') 104 if (mesg[mlen-1] != '\n')
105 return -EINVAL; 105 return -EINVAL;
@@ -107,7 +107,8 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
107 107
108 buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 108 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
109 err = -ENOMEM; 109 err = -ENOMEM;
110 if (!buf) goto out; 110 if (!buf)
111 goto out;
111 112
112 err = -EINVAL; 113 err = -EINVAL;
113 if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) 114 if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
@@ -151,34 +152,32 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
151 152
152 /* now we want a pathname, or empty meaning NEGATIVE */ 153 /* now we want a pathname, or empty meaning NEGATIVE */
153 err = -EINVAL; 154 err = -EINVAL;
154 if ((len=qword_get(&mesg, buf, PAGE_SIZE)) < 0) 155 len = qword_get(&mesg, buf, PAGE_SIZE);
156 if (len < 0)
155 goto out; 157 goto out;
156 dprintk("Path seems to be <%s>\n", buf); 158 dprintk("Path seems to be <%s>\n", buf);
157 err = 0; 159 err = 0;
158 if (len == 0) { 160 if (len == 0) {
159 set_bit(CACHE_NEGATIVE, &key.h.flags); 161 set_bit(CACHE_NEGATIVE, &key.h.flags);
160 ek = svc_expkey_update(&key, ek); 162 ek = svc_expkey_update(&key, ek);
161 if (ek) 163 if (!ek)
162 cache_put(&ek->h, &svc_expkey_cache); 164 err = -ENOMEM;
163 else err = -ENOMEM;
164 } else { 165 } else {
165 struct nameidata nd; 166 err = kern_path(buf, 0, &key.ek_path);
166 err = path_lookup(buf, 0, &nd);
167 if (err) 167 if (err)
168 goto out; 168 goto out;
169 169
170 dprintk("Found the path %s\n", buf); 170 dprintk("Found the path %s\n", buf);
171 key.ek_path = nd.path;
172 171
173 ek = svc_expkey_update(&key, ek); 172 ek = svc_expkey_update(&key, ek);
174 if (ek) 173 if (!ek)
175 cache_put(&ek->h, &svc_expkey_cache);
176 else
177 err = -ENOMEM; 174 err = -ENOMEM;
178 path_put(&nd.path); 175 path_put(&key.ek_path);
179 } 176 }
180 cache_flush(); 177 cache_flush();
181 out: 178 out:
179 if (ek)
180 cache_put(&ek->h, &svc_expkey_cache);
182 if (dom) 181 if (dom)
183 auth_domain_put(dom); 182 auth_domain_put(dom);
184 kfree(buf); 183 kfree(buf);
@@ -500,35 +499,22 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
500 int len; 499 int len;
501 int err; 500 int err;
502 struct auth_domain *dom = NULL; 501 struct auth_domain *dom = NULL;
503 struct nameidata nd; 502 struct svc_export exp = {}, *expp;
504 struct svc_export exp, *expp;
505 int an_int; 503 int an_int;
506 504
507 nd.path.dentry = NULL;
508 exp.ex_pathname = NULL;
509
510 /* fs locations */
511 exp.ex_fslocs.locations = NULL;
512 exp.ex_fslocs.locations_count = 0;
513 exp.ex_fslocs.migrated = 0;
514
515 exp.ex_uuid = NULL;
516
517 /* secinfo */
518 exp.ex_nflavors = 0;
519
520 if (mesg[mlen-1] != '\n') 505 if (mesg[mlen-1] != '\n')
521 return -EINVAL; 506 return -EINVAL;
522 mesg[mlen-1] = 0; 507 mesg[mlen-1] = 0;
523 508
524 buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 509 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
525 err = -ENOMEM; 510 if (!buf)
526 if (!buf) goto out; 511 return -ENOMEM;
527 512
528 /* client */ 513 /* client */
529 len = qword_get(&mesg, buf, PAGE_SIZE);
530 err = -EINVAL; 514 err = -EINVAL;
531 if (len <= 0) goto out; 515 len = qword_get(&mesg, buf, PAGE_SIZE);
516 if (len <= 0)
517 goto out;
532 518
533 err = -ENOENT; 519 err = -ENOENT;
534 dom = auth_domain_find(buf); 520 dom = auth_domain_find(buf);
@@ -537,25 +523,25 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
537 523
538 /* path */ 524 /* path */
539 err = -EINVAL; 525 err = -EINVAL;
540 if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) 526 if ((len = qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
541 goto out; 527 goto out1;
542 err = path_lookup(buf, 0, &nd); 528
543 if (err) goto out_no_path; 529 err = kern_path(buf, 0, &exp.ex_path);
530 if (err)
531 goto out1;
544 532
545 exp.h.flags = 0;
546 exp.ex_client = dom; 533 exp.ex_client = dom;
547 exp.ex_path.mnt = nd.path.mnt; 534
548 exp.ex_path.dentry = nd.path.dentry;
549 exp.ex_pathname = kstrdup(buf, GFP_KERNEL);
550 err = -ENOMEM; 535 err = -ENOMEM;
536 exp.ex_pathname = kstrdup(buf, GFP_KERNEL);
551 if (!exp.ex_pathname) 537 if (!exp.ex_pathname)
552 goto out; 538 goto out2;
553 539
554 /* expiry */ 540 /* expiry */
555 err = -EINVAL; 541 err = -EINVAL;
556 exp.h.expiry_time = get_expiry(&mesg); 542 exp.h.expiry_time = get_expiry(&mesg);
557 if (exp.h.expiry_time == 0) 543 if (exp.h.expiry_time == 0)
558 goto out; 544 goto out3;
559 545
560 /* flags */ 546 /* flags */
561 err = get_int(&mesg, &an_int); 547 err = get_int(&mesg, &an_int);
@@ -563,22 +549,26 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
563 err = 0; 549 err = 0;
564 set_bit(CACHE_NEGATIVE, &exp.h.flags); 550 set_bit(CACHE_NEGATIVE, &exp.h.flags);
565 } else { 551 } else {
566 if (err || an_int < 0) goto out; 552 if (err || an_int < 0)
553 goto out3;
567 exp.ex_flags= an_int; 554 exp.ex_flags= an_int;
568 555
569 /* anon uid */ 556 /* anon uid */
570 err = get_int(&mesg, &an_int); 557 err = get_int(&mesg, &an_int);
571 if (err) goto out; 558 if (err)
559 goto out3;
572 exp.ex_anon_uid= an_int; 560 exp.ex_anon_uid= an_int;
573 561
574 /* anon gid */ 562 /* anon gid */
575 err = get_int(&mesg, &an_int); 563 err = get_int(&mesg, &an_int);
576 if (err) goto out; 564 if (err)
565 goto out3;
577 exp.ex_anon_gid= an_int; 566 exp.ex_anon_gid= an_int;
578 567
579 /* fsid */ 568 /* fsid */
580 err = get_int(&mesg, &an_int); 569 err = get_int(&mesg, &an_int);
581 if (err) goto out; 570 if (err)
571 goto out3;
582 exp.ex_fsid = an_int; 572 exp.ex_fsid = an_int;
583 573
584 while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) { 574 while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) {
@@ -604,12 +594,13 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
604 */ 594 */
605 break; 595 break;
606 if (err) 596 if (err)
607 goto out; 597 goto out4;
608 } 598 }
609 599
610 err = check_export(nd.path.dentry->d_inode, exp.ex_flags, 600 err = check_export(exp.ex_path.dentry->d_inode, exp.ex_flags,
611 exp.ex_uuid); 601 exp.ex_uuid);
612 if (err) goto out; 602 if (err)
603 goto out4;
613 } 604 }
614 605
615 expp = svc_export_lookup(&exp); 606 expp = svc_export_lookup(&exp);
@@ -622,15 +613,16 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
622 err = -ENOMEM; 613 err = -ENOMEM;
623 else 614 else
624 exp_put(expp); 615 exp_put(expp);
625 out: 616out4:
626 nfsd4_fslocs_free(&exp.ex_fslocs); 617 nfsd4_fslocs_free(&exp.ex_fslocs);
627 kfree(exp.ex_uuid); 618 kfree(exp.ex_uuid);
619out3:
628 kfree(exp.ex_pathname); 620 kfree(exp.ex_pathname);
629 if (nd.path.dentry) 621out2:
630 path_put(&nd.path); 622 path_put(&exp.ex_path);
631 out_no_path: 623out1:
632 if (dom) 624 auth_domain_put(dom);
633 auth_domain_put(dom); 625out:
634 kfree(buf); 626 kfree(buf);
635 return err; 627 return err;
636} 628}
@@ -998,7 +990,7 @@ exp_export(struct nfsctl_export *nxp)
998 struct svc_export *exp = NULL; 990 struct svc_export *exp = NULL;
999 struct svc_export new; 991 struct svc_export new;
1000 struct svc_expkey *fsid_key = NULL; 992 struct svc_expkey *fsid_key = NULL;
1001 struct nameidata nd; 993 struct path path;
1002 int err; 994 int err;
1003 995
1004 /* Consistency check */ 996 /* Consistency check */
@@ -1021,12 +1013,12 @@ exp_export(struct nfsctl_export *nxp)
1021 1013
1022 1014
1023 /* Look up the dentry */ 1015 /* Look up the dentry */
1024 err = path_lookup(nxp->ex_path, 0, &nd); 1016 err = kern_path(nxp->ex_path, 0, &path);
1025 if (err) 1017 if (err)
1026 goto out_put_clp; 1018 goto out_put_clp;
1027 err = -EINVAL; 1019 err = -EINVAL;
1028 1020
1029 exp = exp_get_by_name(clp, nd.path.mnt, nd.path.dentry, NULL); 1021 exp = exp_get_by_name(clp, path.mnt, path.dentry, NULL);
1030 1022
1031 memset(&new, 0, sizeof(new)); 1023 memset(&new, 0, sizeof(new));
1032 1024
@@ -1034,8 +1026,8 @@ exp_export(struct nfsctl_export *nxp)
1034 if ((nxp->ex_flags & NFSEXP_FSID) && 1026 if ((nxp->ex_flags & NFSEXP_FSID) &&
1035 (!IS_ERR(fsid_key = exp_get_fsid_key(clp, nxp->ex_dev))) && 1027 (!IS_ERR(fsid_key = exp_get_fsid_key(clp, nxp->ex_dev))) &&
1036 fsid_key->ek_path.mnt && 1028 fsid_key->ek_path.mnt &&
1037 (fsid_key->ek_path.mnt != nd.path.mnt || 1029 (fsid_key->ek_path.mnt != path.mnt ||
1038 fsid_key->ek_path.dentry != nd.path.dentry)) 1030 fsid_key->ek_path.dentry != path.dentry))
1039 goto finish; 1031 goto finish;
1040 1032
1041 if (!IS_ERR(exp)) { 1033 if (!IS_ERR(exp)) {
@@ -1051,7 +1043,7 @@ exp_export(struct nfsctl_export *nxp)
1051 goto finish; 1043 goto finish;
1052 } 1044 }
1053 1045
1054 err = check_export(nd.path.dentry->d_inode, nxp->ex_flags, NULL); 1046 err = check_export(path.dentry->d_inode, nxp->ex_flags, NULL);
1055 if (err) goto finish; 1047 if (err) goto finish;
1056 1048
1057 err = -ENOMEM; 1049 err = -ENOMEM;
@@ -1064,7 +1056,7 @@ exp_export(struct nfsctl_export *nxp)
1064 if (!new.ex_pathname) 1056 if (!new.ex_pathname)
1065 goto finish; 1057 goto finish;
1066 new.ex_client = clp; 1058 new.ex_client = clp;
1067 new.ex_path = nd.path; 1059 new.ex_path = path;
1068 new.ex_flags = nxp->ex_flags; 1060 new.ex_flags = nxp->ex_flags;
1069 new.ex_anon_uid = nxp->ex_anon_uid; 1061 new.ex_anon_uid = nxp->ex_anon_uid;
1070 new.ex_anon_gid = nxp->ex_anon_gid; 1062 new.ex_anon_gid = nxp->ex_anon_gid;
@@ -1090,7 +1082,7 @@ finish:
1090 exp_put(exp); 1082 exp_put(exp);
1091 if (fsid_key && !IS_ERR(fsid_key)) 1083 if (fsid_key && !IS_ERR(fsid_key))
1092 cache_put(&fsid_key->h, &svc_expkey_cache); 1084 cache_put(&fsid_key->h, &svc_expkey_cache);
1093 path_put(&nd.path); 1085 path_put(&path);
1094out_put_clp: 1086out_put_clp:
1095 auth_domain_put(clp); 1087 auth_domain_put(clp);
1096out_unlock: 1088out_unlock:
@@ -1121,7 +1113,7 @@ exp_unexport(struct nfsctl_export *nxp)
1121{ 1113{
1122 struct auth_domain *dom; 1114 struct auth_domain *dom;
1123 svc_export *exp; 1115 svc_export *exp;
1124 struct nameidata nd; 1116 struct path path;
1125 int err; 1117 int err;
1126 1118
1127 /* Consistency check */ 1119 /* Consistency check */
@@ -1138,13 +1130,13 @@ exp_unexport(struct nfsctl_export *nxp)
1138 goto out_unlock; 1130 goto out_unlock;
1139 } 1131 }
1140 1132
1141 err = path_lookup(nxp->ex_path, 0, &nd); 1133 err = kern_path(nxp->ex_path, 0, &path);
1142 if (err) 1134 if (err)
1143 goto out_domain; 1135 goto out_domain;
1144 1136
1145 err = -EINVAL; 1137 err = -EINVAL;
1146 exp = exp_get_by_name(dom, nd.path.mnt, nd.path.dentry, NULL); 1138 exp = exp_get_by_name(dom, path.mnt, path.dentry, NULL);
1147 path_put(&nd.path); 1139 path_put(&path);
1148 if (IS_ERR(exp)) 1140 if (IS_ERR(exp))
1149 goto out_domain; 1141 goto out_domain;
1150 1142
@@ -1166,26 +1158,26 @@ out_unlock:
1166 * since its harder to fool a kernel module than a user space program. 1158 * since its harder to fool a kernel module than a user space program.
1167 */ 1159 */
1168int 1160int
1169exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize) 1161exp_rootfh(svc_client *clp, char *name, struct knfsd_fh *f, int maxsize)
1170{ 1162{
1171 struct svc_export *exp; 1163 struct svc_export *exp;
1172 struct nameidata nd; 1164 struct path path;
1173 struct inode *inode; 1165 struct inode *inode;
1174 struct svc_fh fh; 1166 struct svc_fh fh;
1175 int err; 1167 int err;
1176 1168
1177 err = -EPERM; 1169 err = -EPERM;
1178 /* NB: we probably ought to check that it's NUL-terminated */ 1170 /* NB: we probably ought to check that it's NUL-terminated */
1179 if (path_lookup(path, 0, &nd)) { 1171 if (kern_path(name, 0, &path)) {
1180 printk("nfsd: exp_rootfh path not found %s", path); 1172 printk("nfsd: exp_rootfh path not found %s", name);
1181 return err; 1173 return err;
1182 } 1174 }
1183 inode = nd.path.dentry->d_inode; 1175 inode = path.dentry->d_inode;
1184 1176
1185 dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n", 1177 dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n",
1186 path, nd.path.dentry, clp->name, 1178 name, path.dentry, clp->name,
1187 inode->i_sb->s_id, inode->i_ino); 1179 inode->i_sb->s_id, inode->i_ino);
1188 exp = exp_parent(clp, nd.path.mnt, nd.path.dentry, NULL); 1180 exp = exp_parent(clp, path.mnt, path.dentry, NULL);
1189 if (IS_ERR(exp)) { 1181 if (IS_ERR(exp)) {
1190 err = PTR_ERR(exp); 1182 err = PTR_ERR(exp);
1191 goto out; 1183 goto out;
@@ -1195,7 +1187,7 @@ exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize)
1195 * fh must be initialized before calling fh_compose 1187 * fh must be initialized before calling fh_compose
1196 */ 1188 */
1197 fh_init(&fh, maxsize); 1189 fh_init(&fh, maxsize);
1198 if (fh_compose(&fh, exp, nd.path.dentry, NULL)) 1190 if (fh_compose(&fh, exp, path.dentry, NULL))
1199 err = -EINVAL; 1191 err = -EINVAL;
1200 else 1192 else
1201 err = 0; 1193 err = 0;
@@ -1203,7 +1195,7 @@ exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize)
1203 fh_put(&fh); 1195 fh_put(&fh);
1204 exp_put(exp); 1196 exp_put(exp);
1205out: 1197out:
1206 path_put(&nd.path); 1198 path_put(&path);
1207 return err; 1199 return err;
1208} 1200}
1209 1201
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 145b3c877a27..bb93946ace22 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -51,7 +51,7 @@
51#define NFSDDBG_FACILITY NFSDDBG_PROC 51#define NFSDDBG_FACILITY NFSDDBG_PROC
52 52
53/* Globals */ 53/* Globals */
54static struct nameidata rec_dir; 54static struct path rec_dir;
55static int rec_dir_init = 0; 55static int rec_dir_init = 0;
56 56
57static void 57static void
@@ -121,9 +121,9 @@ out_no_tfm:
121static void 121static void
122nfsd4_sync_rec_dir(void) 122nfsd4_sync_rec_dir(void)
123{ 123{
124 mutex_lock(&rec_dir.path.dentry->d_inode->i_mutex); 124 mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
125 nfsd_sync_dir(rec_dir.path.dentry); 125 nfsd_sync_dir(rec_dir.dentry);
126 mutex_unlock(&rec_dir.path.dentry->d_inode->i_mutex); 126 mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
127} 127}
128 128
129int 129int
@@ -143,9 +143,9 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
143 nfs4_save_user(&uid, &gid); 143 nfs4_save_user(&uid, &gid);
144 144
145 /* lock the parent */ 145 /* lock the parent */
146 mutex_lock(&rec_dir.path.dentry->d_inode->i_mutex); 146 mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
147 147
148 dentry = lookup_one_len(dname, rec_dir.path.dentry, HEXDIR_LEN-1); 148 dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1);
149 if (IS_ERR(dentry)) { 149 if (IS_ERR(dentry)) {
150 status = PTR_ERR(dentry); 150 status = PTR_ERR(dentry);
151 goto out_unlock; 151 goto out_unlock;
@@ -155,15 +155,15 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
155 dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); 155 dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
156 goto out_put; 156 goto out_put;
157 } 157 }
158 status = mnt_want_write(rec_dir.path.mnt); 158 status = mnt_want_write(rec_dir.mnt);
159 if (status) 159 if (status)
160 goto out_put; 160 goto out_put;
161 status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, S_IRWXU); 161 status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU);
162 mnt_drop_write(rec_dir.path.mnt); 162 mnt_drop_write(rec_dir.mnt);
163out_put: 163out_put:
164 dput(dentry); 164 dput(dentry);
165out_unlock: 165out_unlock:
166 mutex_unlock(&rec_dir.path.dentry->d_inode->i_mutex); 166 mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
167 if (status == 0) { 167 if (status == 0) {
168 clp->cl_firststate = 1; 168 clp->cl_firststate = 1;
169 nfsd4_sync_rec_dir(); 169 nfsd4_sync_rec_dir();
@@ -226,7 +226,7 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
226 226
227 nfs4_save_user(&uid, &gid); 227 nfs4_save_user(&uid, &gid);
228 228
229 filp = dentry_open(dget(dir), mntget(rec_dir.path.mnt), O_RDONLY); 229 filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY);
230 status = PTR_ERR(filp); 230 status = PTR_ERR(filp);
231 if (IS_ERR(filp)) 231 if (IS_ERR(filp))
232 goto out; 232 goto out;
@@ -291,9 +291,9 @@ nfsd4_unlink_clid_dir(char *name, int namlen)
291 291
292 dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); 292 dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
293 293
294 mutex_lock(&rec_dir.path.dentry->d_inode->i_mutex); 294 mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
295 dentry = lookup_one_len(name, rec_dir.path.dentry, namlen); 295 dentry = lookup_one_len(name, rec_dir.dentry, namlen);
296 mutex_unlock(&rec_dir.path.dentry->d_inode->i_mutex); 296 mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
297 if (IS_ERR(dentry)) { 297 if (IS_ERR(dentry)) {
298 status = PTR_ERR(dentry); 298 status = PTR_ERR(dentry);
299 return status; 299 return status;
@@ -302,7 +302,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen)
302 if (!dentry->d_inode) 302 if (!dentry->d_inode)
303 goto out; 303 goto out;
304 304
305 status = nfsd4_clear_clid_dir(rec_dir.path.dentry, dentry); 305 status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry);
306out: 306out:
307 dput(dentry); 307 dput(dentry);
308 return status; 308 return status;
@@ -318,7 +318,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
318 if (!rec_dir_init || !clp->cl_firststate) 318 if (!rec_dir_init || !clp->cl_firststate)
319 return; 319 return;
320 320
321 status = mnt_want_write(rec_dir.path.mnt); 321 status = mnt_want_write(rec_dir.mnt);
322 if (status) 322 if (status)
323 goto out; 323 goto out;
324 clp->cl_firststate = 0; 324 clp->cl_firststate = 0;
@@ -327,7 +327,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
327 nfs4_reset_user(uid, gid); 327 nfs4_reset_user(uid, gid);
328 if (status == 0) 328 if (status == 0)
329 nfsd4_sync_rec_dir(); 329 nfsd4_sync_rec_dir();
330 mnt_drop_write(rec_dir.path.mnt); 330 mnt_drop_write(rec_dir.mnt);
331out: 331out:
332 if (status) 332 if (status)
333 printk("NFSD: Failed to remove expired client state directory" 333 printk("NFSD: Failed to remove expired client state directory"
@@ -357,17 +357,17 @@ nfsd4_recdir_purge_old(void) {
357 357
358 if (!rec_dir_init) 358 if (!rec_dir_init)
359 return; 359 return;
360 status = mnt_want_write(rec_dir.path.mnt); 360 status = mnt_want_write(rec_dir.mnt);
361 if (status) 361 if (status)
362 goto out; 362 goto out;
363 status = nfsd4_list_rec_dir(rec_dir.path.dentry, purge_old); 363 status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old);
364 if (status == 0) 364 if (status == 0)
365 nfsd4_sync_rec_dir(); 365 nfsd4_sync_rec_dir();
366 mnt_drop_write(rec_dir.path.mnt); 366 mnt_drop_write(rec_dir.mnt);
367out: 367out:
368 if (status) 368 if (status)
369 printk("nfsd4: failed to purge old clients from recovery" 369 printk("nfsd4: failed to purge old clients from recovery"
370 " directory %s\n", rec_dir.path.dentry->d_name.name); 370 " directory %s\n", rec_dir.dentry->d_name.name);
371} 371}
372 372
373static int 373static int
@@ -387,10 +387,10 @@ int
387nfsd4_recdir_load(void) { 387nfsd4_recdir_load(void) {
388 int status; 388 int status;
389 389
390 status = nfsd4_list_rec_dir(rec_dir.path.dentry, load_recdir); 390 status = nfsd4_list_rec_dir(rec_dir.dentry, load_recdir);
391 if (status) 391 if (status)
392 printk("nfsd4: failed loading clients from recovery" 392 printk("nfsd4: failed loading clients from recovery"
393 " directory %s\n", rec_dir.path.dentry->d_name.name); 393 " directory %s\n", rec_dir.dentry->d_name.name);
394 return status; 394 return status;
395} 395}
396 396
@@ -412,7 +412,7 @@ nfsd4_init_recdir(char *rec_dirname)
412 412
413 nfs4_save_user(&uid, &gid); 413 nfs4_save_user(&uid, &gid);
414 414
415 status = path_lookup(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, 415 status = kern_path(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
416 &rec_dir); 416 &rec_dir);
417 if (status) 417 if (status)
418 printk("NFSD: unable to find recovery directory %s\n", 418 printk("NFSD: unable to find recovery directory %s\n",
@@ -429,5 +429,5 @@ nfsd4_shutdown_recdir(void)
429 if (!rec_dir_init) 429 if (!rec_dir_init)
430 return; 430 return;
431 rec_dir_init = 0; 431 rec_dir_init = 0;
432 path_put(&rec_dir.path); 432 path_put(&rec_dir);
433} 433}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0cc7ff5d5ab5..b0bebc552a11 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3284,17 +3284,17 @@ int
3284nfs4_reset_recoverydir(char *recdir) 3284nfs4_reset_recoverydir(char *recdir)
3285{ 3285{
3286 int status; 3286 int status;
3287 struct nameidata nd; 3287 struct path path;
3288 3288
3289 status = path_lookup(recdir, LOOKUP_FOLLOW, &nd); 3289 status = kern_path(recdir, LOOKUP_FOLLOW, &path);
3290 if (status) 3290 if (status)
3291 return status; 3291 return status;
3292 status = -ENOTDIR; 3292 status = -ENOTDIR;
3293 if (S_ISDIR(nd.path.dentry->d_inode->i_mode)) { 3293 if (S_ISDIR(path.dentry->d_inode->i_mode)) {
3294 nfs4_set_recdir(recdir); 3294 nfs4_set_recdir(recdir);
3295 status = 0; 3295 status = 0;
3296 } 3296 }
3297 path_put(&nd.path); 3297 path_put(&path);
3298 return status; 3298 return status;
3299} 3299}
3300 3300
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 97543df58242..e3f9783fdcf7 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -341,7 +341,7 @@ static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size)
341 341
342static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size) 342static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size)
343{ 343{
344 struct nameidata nd; 344 struct path path;
345 char *fo_path; 345 char *fo_path;
346 int error; 346 int error;
347 347
@@ -356,13 +356,13 @@ static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size)
356 if (qword_get(&buf, fo_path, size) < 0) 356 if (qword_get(&buf, fo_path, size) < 0)
357 return -EINVAL; 357 return -EINVAL;
358 358
359 error = path_lookup(fo_path, 0, &nd); 359 error = kern_path(fo_path, 0, &path);
360 if (error) 360 if (error)
361 return error; 361 return error;
362 362
363 error = nlmsvc_unlock_all_by_sb(nd.path.mnt->mnt_sb); 363 error = nlmsvc_unlock_all_by_sb(path.mnt->mnt_sb);
364 364
365 path_put(&nd.path); 365 path_put(&path);
366 return error; 366 return error;
367} 367}
368 368
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 59eeb46f82c5..07e4f5d7baa8 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -249,6 +249,10 @@ static int nfsd_init_socks(int port)
249 if (error < 0) 249 if (error < 0)
250 return error; 250 return error;
251 251
252 error = lockd_up();
253 if (error < 0)
254 return error;
255
252 error = svc_create_xprt(nfsd_serv, "tcp", port, 256 error = svc_create_xprt(nfsd_serv, "tcp", port,
253 SVC_SOCK_DEFAULTS); 257 SVC_SOCK_DEFAULTS);
254 if (error < 0) 258 if (error < 0)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index aa1d0d6489a1..4433c8f00163 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -410,6 +410,7 @@ out_nfserr:
410static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) 410static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
411{ 411{
412 ssize_t buflen; 412 ssize_t buflen;
413 ssize_t ret;
413 414
414 buflen = vfs_getxattr(dentry, key, NULL, 0); 415 buflen = vfs_getxattr(dentry, key, NULL, 0);
415 if (buflen <= 0) 416 if (buflen <= 0)
@@ -419,7 +420,10 @@ static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
419 if (!*buf) 420 if (!*buf)
420 return -ENOMEM; 421 return -ENOMEM;
421 422
422 return vfs_getxattr(dentry, key, *buf, buflen); 423 ret = vfs_getxattr(dentry, key, *buf, buflen);
424 if (ret < 0)
425 kfree(*buf);
426 return ret;
423} 427}
424#endif 428#endif
425 429
@@ -1814,6 +1818,113 @@ out:
1814} 1818}
1815 1819
1816/* 1820/*
1821 * We do this buffering because we must not call back into the file
1822 * system's ->lookup() method from the filldir callback. That may well
1823 * deadlock a number of file systems.
1824 *
1825 * This is based heavily on the implementation of same in XFS.
1826 */
1827struct buffered_dirent {
1828 u64 ino;
1829 loff_t offset;
1830 int namlen;
1831 unsigned int d_type;
1832 char name[];
1833};
1834
1835struct readdir_data {
1836 char *dirent;
1837 size_t used;
1838 int full;
1839};
1840
1841static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen,
1842 loff_t offset, u64 ino, unsigned int d_type)
1843{
1844 struct readdir_data *buf = __buf;
1845 struct buffered_dirent *de = (void *)(buf->dirent + buf->used);
1846 unsigned int reclen;
1847
1848 reclen = ALIGN(sizeof(struct buffered_dirent) + namlen, sizeof(u64));
1849 if (buf->used + reclen > PAGE_SIZE) {
1850 buf->full = 1;
1851 return -EINVAL;
1852 }
1853
1854 de->namlen = namlen;
1855 de->offset = offset;
1856 de->ino = ino;
1857 de->d_type = d_type;
1858 memcpy(de->name, name, namlen);
1859 buf->used += reclen;
1860
1861 return 0;
1862}
1863
1864static int nfsd_buffered_readdir(struct file *file, filldir_t func,
1865 struct readdir_cd *cdp, loff_t *offsetp)
1866{
1867 struct readdir_data buf;
1868 struct buffered_dirent *de;
1869 int host_err;
1870 int size;
1871 loff_t offset;
1872
1873 buf.dirent = (void *)__get_free_page(GFP_KERNEL);
1874 if (!buf.dirent)
1875 return -ENOMEM;
1876
1877 offset = *offsetp;
1878
1879 while (1) {
1880 unsigned int reclen;
1881
1882 cdp->err = nfserr_eof; /* will be cleared on successful read */
1883 buf.used = 0;
1884 buf.full = 0;
1885
1886 host_err = vfs_readdir(file, nfsd_buffered_filldir, &buf);
1887 if (buf.full)
1888 host_err = 0;
1889
1890 if (host_err < 0)
1891 break;
1892
1893 size = buf.used;
1894
1895 if (!size)
1896 break;
1897
1898 de = (struct buffered_dirent *)buf.dirent;
1899 while (size > 0) {
1900 offset = de->offset;
1901
1902 if (func(cdp, de->name, de->namlen, de->offset,
1903 de->ino, de->d_type))
1904 goto done;
1905
1906 if (cdp->err != nfs_ok)
1907 goto done;
1908
1909 reclen = ALIGN(sizeof(*de) + de->namlen,
1910 sizeof(u64));
1911 size -= reclen;
1912 de = (struct buffered_dirent *)((char *)de + reclen);
1913 }
1914 offset = vfs_llseek(file, 0, SEEK_CUR);
1915 }
1916
1917 done:
1918 free_page((unsigned long)(buf.dirent));
1919
1920 if (host_err)
1921 return nfserrno(host_err);
1922
1923 *offsetp = offset;
1924 return cdp->err;
1925}
1926
1927/*
1817 * Read entries from a directory. 1928 * Read entries from a directory.
1818 * The NFSv3/4 verifier we ignore for now. 1929 * The NFSv3/4 verifier we ignore for now.
1819 */ 1930 */
@@ -1822,7 +1933,6 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
1822 struct readdir_cd *cdp, filldir_t func) 1933 struct readdir_cd *cdp, filldir_t func)
1823{ 1934{
1824 __be32 err; 1935 __be32 err;
1825 int host_err;
1826 struct file *file; 1936 struct file *file;
1827 loff_t offset = *offsetp; 1937 loff_t offset = *offsetp;
1828 1938
@@ -1836,21 +1946,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
1836 goto out_close; 1946 goto out_close;
1837 } 1947 }
1838 1948
1839 /* 1949 err = nfsd_buffered_readdir(file, func, cdp, offsetp);
1840 * Read the directory entries. This silly loop is necessary because
1841 * readdir() is not guaranteed to fill up the entire buffer, but
1842 * may choose to do less.
1843 */
1844
1845 do {
1846 cdp->err = nfserr_eof; /* will be cleared on successful read */
1847 host_err = vfs_readdir(file, func, cdp);
1848 } while (host_err >=0 && cdp->err == nfs_ok);
1849 if (host_err)
1850 err = nfserrno(host_err);
1851 else
1852 err = cdp->err;
1853 *offsetp = vfs_llseek(file, 0, 1);
1854 1950
1855 if (err == nfserr_eof || err == nfserr_toosmall) 1951 if (err == nfserr_eof || err == nfserr_toosmall)
1856 err = nfs_ok; /* can still be found in ->err */ 1952 err = nfs_ok; /* can still be found in ->err */
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index d020866d4232..3140a4429af1 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -439,7 +439,7 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
439 pages[nr] = *cached_page; 439 pages[nr] = *cached_page;
440 page_cache_get(*cached_page); 440 page_cache_get(*cached_page);
441 if (unlikely(!pagevec_add(lru_pvec, *cached_page))) 441 if (unlikely(!pagevec_add(lru_pvec, *cached_page)))
442 __pagevec_lru_add(lru_pvec); 442 __pagevec_lru_add_file(lru_pvec);
443 *cached_page = NULL; 443 *cached_page = NULL;
444 } 444 }
445 index++; 445 index++;
@@ -2084,7 +2084,7 @@ err_out:
2084 OSYNC_METADATA|OSYNC_DATA); 2084 OSYNC_METADATA|OSYNC_DATA);
2085 } 2085 }
2086 } 2086 }
2087 pagevec_lru_add(&lru_pvec); 2087 pagevec_lru_add_file(&lru_pvec);
2088 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", 2088 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
2089 written ? "written" : "status", (unsigned long)written, 2089 written ? "written" : "status", (unsigned long)written,
2090 (long)status); 2090 (long)status);
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 9e8a95be7a1e..2ca00153b6ec 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -304,8 +304,6 @@ static struct dentry *ntfs_get_parent(struct dentry *child_dent)
304 ntfs_attr_search_ctx *ctx; 304 ntfs_attr_search_ctx *ctx;
305 ATTR_RECORD *attr; 305 ATTR_RECORD *attr;
306 FILE_NAME_ATTR *fn; 306 FILE_NAME_ATTR *fn;
307 struct inode *parent_vi;
308 struct dentry *parent_dent;
309 unsigned long parent_ino; 307 unsigned long parent_ino;
310 int err; 308 int err;
311 309
@@ -345,24 +343,8 @@ try_next:
345 /* Release the search context and the mft record of the child. */ 343 /* Release the search context and the mft record of the child. */
346 ntfs_attr_put_search_ctx(ctx); 344 ntfs_attr_put_search_ctx(ctx);
347 unmap_mft_record(ni); 345 unmap_mft_record(ni);
348 /* Get the inode of the parent directory. */ 346
349 parent_vi = ntfs_iget(vi->i_sb, parent_ino); 347 return d_obtain_alias(ntfs_iget(vi->i_sb, parent_ino));
350 if (IS_ERR(parent_vi) || unlikely(is_bad_inode(parent_vi))) {
351 if (!IS_ERR(parent_vi))
352 iput(parent_vi);
353 ntfs_error(vi->i_sb, "Failed to get parent directory inode "
354 "0x%lx of child inode 0x%lx.", parent_ino,
355 vi->i_ino);
356 return ERR_PTR(-EACCES);
357 }
358 /* Finally get a dentry for the parent directory and return it. */
359 parent_dent = d_alloc_anon(parent_vi);
360 if (unlikely(!parent_dent)) {
361 iput(parent_vi);
362 return ERR_PTR(-ENOMEM);
363 }
364 ntfs_debug("Done for inode 0x%lx.", vi->i_ino);
365 return parent_dent;
366} 348}
367 349
368static struct inode *ntfs_nfs_get_inode(struct super_block *sb, 350static struct inode *ntfs_nfs_get_inode(struct super_block *sb,
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 7dce1612553e..6ebaa58e2c03 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -976,7 +976,7 @@ static void o2hb_region_release(struct config_item *item)
976 } 976 }
977 977
978 if (reg->hr_bdev) 978 if (reg->hr_bdev)
979 blkdev_put(reg->hr_bdev); 979 blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE);
980 980
981 if (reg->hr_slots) 981 if (reg->hr_slots)
982 kfree(reg->hr_slots); 982 kfree(reg->hr_slots);
@@ -1268,7 +1268,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1268 goto out; 1268 goto out;
1269 1269
1270 reg->hr_bdev = I_BDEV(filp->f_mapping->host); 1270 reg->hr_bdev = I_BDEV(filp->f_mapping->host);
1271 ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, 0); 1271 ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ);
1272 if (ret) { 1272 if (ret) {
1273 reg->hr_bdev = NULL; 1273 reg->hr_bdev = NULL;
1274 goto out; 1274 goto out;
@@ -1358,7 +1358,7 @@ out:
1358 iput(inode); 1358 iput(inode);
1359 if (ret < 0) { 1359 if (ret < 0) {
1360 if (reg->hr_bdev) { 1360 if (reg->hr_bdev) {
1361 blkdev_put(reg->hr_bdev); 1361 blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE);
1362 reg->hr_bdev = NULL; 1362 reg->hr_bdev = NULL;
1363 } 1363 }
1364 } 1364 }
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 67527cebf214..2f27b332d8b3 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -68,14 +68,9 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
68 return ERR_PTR(-ESTALE); 68 return ERR_PTR(-ESTALE);
69 } 69 }
70 70
71 result = d_alloc_anon(inode); 71 result = d_obtain_alias(inode);
72 72 if (!IS_ERR(result))
73 if (!result) { 73 result->d_op = &ocfs2_dentry_ops;
74 iput(inode);
75 mlog_errno(-ENOMEM);
76 return ERR_PTR(-ENOMEM);
77 }
78 result->d_op = &ocfs2_dentry_ops;
79 74
80 mlog_exit_ptr(result); 75 mlog_exit_ptr(result);
81 return result; 76 return result;
@@ -86,7 +81,6 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
86 int status; 81 int status;
87 u64 blkno; 82 u64 blkno;
88 struct dentry *parent; 83 struct dentry *parent;
89 struct inode *inode;
90 struct inode *dir = child->d_inode; 84 struct inode *dir = child->d_inode;
91 85
92 mlog_entry("(0x%p, '%.*s')\n", child, 86 mlog_entry("(0x%p, '%.*s')\n", child,
@@ -109,21 +103,9 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
109 goto bail_unlock; 103 goto bail_unlock;
110 } 104 }
111 105
112 inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0); 106 parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0));
113 if (IS_ERR(inode)) { 107 if (!IS_ERR(parent))
114 mlog(ML_ERROR, "Unable to create inode %llu\n", 108 parent->d_op = &ocfs2_dentry_ops;
115 (unsigned long long)blkno);
116 parent = ERR_PTR(-EACCES);
117 goto bail_unlock;
118 }
119
120 parent = d_alloc_anon(inode);
121 if (!parent) {
122 iput(inode);
123 parent = ERR_PTR(-ENOMEM);
124 }
125
126 parent->d_op = &ocfs2_dentry_ops;
127 109
128bail_unlock: 110bail_unlock:
129 ocfs2_inode_unlock(dir, 0); 111 ocfs2_inode_unlock(dir, 0);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8d3225a78073..7efe937a415f 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -679,8 +679,7 @@ leave:
679 679
680/* Some parts of this taken from generic_cont_expand, which turned out 680/* Some parts of this taken from generic_cont_expand, which turned out
681 * to be too fragile to do exactly what we need without us having to 681 * to be too fragile to do exactly what we need without us having to
682 * worry about recursive locking in ->prepare_write() and 682 * worry about recursive locking in ->write_begin() and ->write_end(). */
683 * ->commit_write(). */
684static int ocfs2_write_zero_page(struct inode *inode, 683static int ocfs2_write_zero_page(struct inode *inode,
685 u64 size) 684 u64 size)
686{ 685{
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index c0757e998876..c7275cfbdcfb 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -501,4 +501,5 @@ struct inode_operations omfs_dir_inops = {
501struct file_operations omfs_dir_operations = { 501struct file_operations omfs_dir_operations = {
502 .read = generic_read_dir, 502 .read = generic_read_dir,
503 .readdir = omfs_readdir, 503 .readdir = omfs_readdir,
504 .llseek = generic_file_llseek,
504}; 505};
diff --git a/fs/open.c b/fs/open.c
index 5596049863bf..83cdb9dee0c1 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -798,7 +798,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
798 int error; 798 int error;
799 799
800 f->f_flags = flags; 800 f->f_flags = flags;
801 f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | 801 f->f_mode = (__force fmode_t)((flags+1) & O_ACCMODE) | FMODE_LSEEK |
802 FMODE_PREAD | FMODE_PWRITE; 802 FMODE_PREAD | FMODE_PWRITE;
803 inode = dentry->d_inode; 803 inode = dentry->d_inode;
804 if (f->f_mode & FMODE_WRITE) { 804 if (f->f_mode & FMODE_WRITE) {
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 9f5b054f06b9..d41bdc784de4 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -167,6 +167,7 @@ static int openpromfs_readdir(struct file *, void *, filldir_t);
167static const struct file_operations openprom_operations = { 167static const struct file_operations openprom_operations = {
168 .read = generic_read_dir, 168 .read = generic_read_dir,
169 .readdir = openpromfs_readdir, 169 .readdir = openpromfs_readdir,
170 .llseek = generic_file_llseek,
170}; 171};
171 172
172static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, struct nameidata *); 173static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, struct nameidata *);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index cfb0c80690aa..633f7a0ebb2c 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -485,10 +485,10 @@ void register_disk(struct gendisk *disk)
485 goto exit; 485 goto exit;
486 486
487 bdev->bd_invalidated = 1; 487 bdev->bd_invalidated = 1;
488 err = blkdev_get(bdev, FMODE_READ, 0); 488 err = blkdev_get(bdev, FMODE_READ);
489 if (err < 0) 489 if (err < 0)
490 goto exit; 490 goto exit;
491 blkdev_put(bdev); 491 blkdev_put(bdev, FMODE_READ);
492 492
493exit: 493exit:
494 /* announce disk after possible partitions are created */ 494 /* announce disk after possible partitions are created */
diff --git a/fs/pipe.c b/fs/pipe.c
index fcba6542b8d0..7aea8b89baac 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -717,14 +717,12 @@ pipe_rdwr_fasync(int fd, struct file *filp, int on)
717static int 717static int
718pipe_read_release(struct inode *inode, struct file *filp) 718pipe_read_release(struct inode *inode, struct file *filp)
719{ 719{
720 pipe_read_fasync(-1, filp, 0);
721 return pipe_release(inode, 1, 0); 720 return pipe_release(inode, 1, 0);
722} 721}
723 722
724static int 723static int
725pipe_write_release(struct inode *inode, struct file *filp) 724pipe_write_release(struct inode *inode, struct file *filp)
726{ 725{
727 pipe_write_fasync(-1, filp, 0);
728 return pipe_release(inode, 0, 1); 726 return pipe_release(inode, 0, 1);
729} 727}
730 728
@@ -733,7 +731,6 @@ pipe_rdwr_release(struct inode *inode, struct file *filp)
733{ 731{
734 int decr, decw; 732 int decr, decw;
735 733
736 pipe_rdwr_fasync(-1, filp, 0);
737 decr = (filp->f_mode & FMODE_READ) != 0; 734 decr = (filp->f_mode & FMODE_READ) != 0;
738 decw = (filp->f_mode & FMODE_WRITE) != 0; 735 decw = (filp->f_mode & FMODE_WRITE) != 0;
739 return pipe_release(inode, decr, decw); 736 return pipe_release(inode, decr, decw);
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index ebaba0213546..63d965193b22 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -8,11 +8,20 @@ proc-y := nommu.o task_nommu.o
8proc-$(CONFIG_MMU) := mmu.o task_mmu.o 8proc-$(CONFIG_MMU) := mmu.o task_mmu.o
9 9
10proc-y += inode.o root.o base.o generic.o array.o \ 10proc-y += inode.o root.o base.o generic.o array.o \
11 proc_tty.o proc_misc.o 11 proc_tty.o
12 12proc-y += cmdline.o
13proc-y += cpuinfo.o
14proc-y += devices.o
15proc-y += interrupts.o
16proc-y += loadavg.o
17proc-y += meminfo.o
18proc-y += stat.o
19proc-y += uptime.o
20proc-y += version.o
13proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o 21proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
14proc-$(CONFIG_NET) += proc_net.o 22proc-$(CONFIG_NET) += proc_net.o
15proc-$(CONFIG_PROC_KCORE) += kcore.o 23proc-$(CONFIG_PROC_KCORE) += kcore.o
16proc-$(CONFIG_PROC_VMCORE) += vmcore.o 24proc-$(CONFIG_PROC_VMCORE) += vmcore.o
17proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o 25proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o
18proc-$(CONFIG_PRINTK) += kmsg.o 26proc-$(CONFIG_PRINTK) += kmsg.o
27proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index f4bc0e789539..6af7fba7abb1 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -40,7 +40,7 @@
40 * 40 *
41 * 41 *
42 * Alan Cox : security fixes. 42 * Alan Cox : security fixes.
43 * <Alan.Cox@linux.org> 43 * <alan@lxorguk.ukuu.org.uk>
44 * 44 *
45 * Al Viro : safe handling of mm_struct 45 * Al Viro : safe handling of mm_struct
46 * 46 *
@@ -388,20 +388,20 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
388 388
389 /* add up live thread stats at the group level */ 389 /* add up live thread stats at the group level */
390 if (whole) { 390 if (whole) {
391 struct task_cputime cputime;
391 struct task_struct *t = task; 392 struct task_struct *t = task;
392 do { 393 do {
393 min_flt += t->min_flt; 394 min_flt += t->min_flt;
394 maj_flt += t->maj_flt; 395 maj_flt += t->maj_flt;
395 utime = cputime_add(utime, task_utime(t));
396 stime = cputime_add(stime, task_stime(t));
397 gtime = cputime_add(gtime, task_gtime(t)); 396 gtime = cputime_add(gtime, task_gtime(t));
398 t = next_thread(t); 397 t = next_thread(t);
399 } while (t != task); 398 } while (t != task);
400 399
401 min_flt += sig->min_flt; 400 min_flt += sig->min_flt;
402 maj_flt += sig->maj_flt; 401 maj_flt += sig->maj_flt;
403 utime = cputime_add(utime, sig->utime); 402 thread_group_cputime(task, &cputime);
404 stime = cputime_add(stime, sig->stime); 403 utime = cputime.utime;
404 stime = cputime.stime;
405 gtime = cputime_add(gtime, sig->gtime); 405 gtime = cputime_add(gtime, sig->gtime);
406 } 406 }
407 407
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b5918ae8ca79..486cf3fe7139 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1712,9 +1712,9 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
1712 file = fcheck_files(files, fd); 1712 file = fcheck_files(files, fd);
1713 if (!file) 1713 if (!file)
1714 goto out_unlock; 1714 goto out_unlock;
1715 if (file->f_mode & 1) 1715 if (file->f_mode & FMODE_READ)
1716 inode->i_mode |= S_IRUSR | S_IXUSR; 1716 inode->i_mode |= S_IRUSR | S_IXUSR;
1717 if (file->f_mode & 2) 1717 if (file->f_mode & FMODE_WRITE)
1718 inode->i_mode |= S_IWUSR | S_IXUSR; 1718 inode->i_mode |= S_IWUSR | S_IXUSR;
1719 spin_unlock(&files->file_lock); 1719 spin_unlock(&files->file_lock);
1720 put_files_struct(files); 1720 put_files_struct(files);
diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c
new file mode 100644
index 000000000000..82676e3fcd1d
--- /dev/null
+++ b/fs/proc/cmdline.c
@@ -0,0 +1,29 @@
1#include <linux/fs.h>
2#include <linux/init.h>
3#include <linux/proc_fs.h>
4#include <linux/seq_file.h>
5
6static int cmdline_proc_show(struct seq_file *m, void *v)
7{
8 seq_printf(m, "%s\n", saved_command_line);
9 return 0;
10}
11
12static int cmdline_proc_open(struct inode *inode, struct file *file)
13{
14 return single_open(file, cmdline_proc_show, NULL);
15}
16
17static const struct file_operations cmdline_proc_fops = {
18 .open = cmdline_proc_open,
19 .read = seq_read,
20 .llseek = seq_lseek,
21 .release = single_release,
22};
23
24static int __init proc_cmdline_init(void)
25{
26 proc_create("cmdline", 0, NULL, &cmdline_proc_fops);
27 return 0;
28}
29module_init(proc_cmdline_init);
diff --git a/fs/proc/cpuinfo.c b/fs/proc/cpuinfo.c
new file mode 100644
index 000000000000..5a1e539a234b
--- /dev/null
+++ b/fs/proc/cpuinfo.c
@@ -0,0 +1,24 @@
1#include <linux/fs.h>
2#include <linux/init.h>
3#include <linux/proc_fs.h>
4#include <linux/seq_file.h>
5
6extern const struct seq_operations cpuinfo_op;
7static int cpuinfo_open(struct inode *inode, struct file *file)
8{
9 return seq_open(file, &cpuinfo_op);
10}
11
12static const struct file_operations proc_cpuinfo_operations = {
13 .open = cpuinfo_open,
14 .read = seq_read,
15 .llseek = seq_lseek,
16 .release = seq_release,
17};
18
19static int __init proc_cpuinfo_init(void)
20{
21 proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations);
22 return 0;
23}
24module_init(proc_cpuinfo_init);
diff --git a/fs/proc/devices.c b/fs/proc/devices.c
new file mode 100644
index 000000000000..59ee7da959c9
--- /dev/null
+++ b/fs/proc/devices.c
@@ -0,0 +1,70 @@
1#include <linux/fs.h>
2#include <linux/init.h>
3#include <linux/proc_fs.h>
4#include <linux/seq_file.h>
5
6static int devinfo_show(struct seq_file *f, void *v)
7{
8 int i = *(loff_t *) v;
9
10 if (i < CHRDEV_MAJOR_HASH_SIZE) {
11 if (i == 0)
12 seq_printf(f, "Character devices:\n");
13 chrdev_show(f, i);
14 }
15#ifdef CONFIG_BLOCK
16 else {
17 i -= CHRDEV_MAJOR_HASH_SIZE;
18 if (i == 0)
19 seq_printf(f, "\nBlock devices:\n");
20 blkdev_show(f, i);
21 }
22#endif
23 return 0;
24}
25
26static void *devinfo_start(struct seq_file *f, loff_t *pos)
27{
28 if (*pos < (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE))
29 return pos;
30 return NULL;
31}
32
33static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos)
34{
35 (*pos)++;
36 if (*pos >= (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE))
37 return NULL;
38 return pos;
39}
40
41static void devinfo_stop(struct seq_file *f, void *v)
42{
43 /* Nothing to do */
44}
45
46static const struct seq_operations devinfo_ops = {
47 .start = devinfo_start,
48 .next = devinfo_next,
49 .stop = devinfo_stop,
50 .show = devinfo_show
51};
52
53static int devinfo_open(struct inode *inode, struct file *filp)
54{
55 return seq_open(filp, &devinfo_ops);
56}
57
58static const struct file_operations proc_devinfo_operations = {
59 .open = devinfo_open,
60 .read = seq_read,
61 .llseek = seq_lseek,
62 .release = seq_release,
63};
64
65static int __init proc_devices_init(void)
66{
67 proc_create("devices", 0, NULL, &proc_devinfo_operations);
68 return 0;
69}
70module_init(proc_devices_init);
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 7821589a17d5..60a359b35582 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -547,9 +547,8 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
547 547
548 for (tmp = dir->subdir; tmp; tmp = tmp->next) 548 for (tmp = dir->subdir; tmp; tmp = tmp->next)
549 if (strcmp(tmp->name, dp->name) == 0) { 549 if (strcmp(tmp->name, dp->name) == 0) {
550 printk(KERN_WARNING "proc_dir_entry '%s/%s' already registered\n", 550 WARN(1, KERN_WARNING "proc_dir_entry '%s/%s' already registered\n",
551 dir->name, dp->name); 551 dir->name, dp->name);
552 dump_stack();
553 break; 552 break;
554 } 553 }
555 554
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index c6b4fa7e3b49..2543fd00c658 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -106,14 +106,13 @@ static void init_once(void *foo)
106 inode_init_once(&ei->vfs_inode); 106 inode_init_once(&ei->vfs_inode);
107} 107}
108 108
109int __init proc_init_inodecache(void) 109void __init proc_init_inodecache(void)
110{ 110{
111 proc_inode_cachep = kmem_cache_create("proc_inode_cache", 111 proc_inode_cachep = kmem_cache_create("proc_inode_cache",
112 sizeof(struct proc_inode), 112 sizeof(struct proc_inode),
113 0, (SLAB_RECLAIM_ACCOUNT| 113 0, (SLAB_RECLAIM_ACCOUNT|
114 SLAB_MEM_SPREAD|SLAB_PANIC), 114 SLAB_MEM_SPREAD|SLAB_PANIC),
115 init_once); 115 init_once);
116 return 0;
117} 116}
118 117
119static const struct super_operations proc_sops = { 118static const struct super_operations proc_sops = {
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 3bfb7b8747b3..3e8aeb8b61ce 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -61,12 +61,11 @@ extern const struct file_operations proc_smaps_operations;
61extern const struct file_operations proc_clear_refs_operations; 61extern const struct file_operations proc_clear_refs_operations;
62extern const struct file_operations proc_pagemap_operations; 62extern const struct file_operations proc_pagemap_operations;
63extern const struct file_operations proc_net_operations; 63extern const struct file_operations proc_net_operations;
64extern const struct file_operations proc_kmsg_operations;
65extern const struct inode_operations proc_net_inode_operations; 64extern const struct inode_operations proc_net_inode_operations;
66 65
67void free_proc_entry(struct proc_dir_entry *de); 66void free_proc_entry(struct proc_dir_entry *de);
68 67
69int proc_init_inodecache(void); 68void proc_init_inodecache(void);
70 69
71static inline struct pid *proc_pid(struct inode *inode) 70static inline struct pid *proc_pid(struct inode *inode)
72{ 71{
diff --git a/fs/proc/interrupts.c b/fs/proc/interrupts.c
new file mode 100644
index 000000000000..05029c0e2f24
--- /dev/null
+++ b/fs/proc/interrupts.c
@@ -0,0 +1,53 @@
1#include <linux/fs.h>
2#include <linux/init.h>
3#include <linux/interrupt.h>
4#include <linux/irqnr.h>
5#include <linux/proc_fs.h>
6#include <linux/seq_file.h>
7
8/*
9 * /proc/interrupts
10 */
11static void *int_seq_start(struct seq_file *f, loff_t *pos)
12{
13 return (*pos <= nr_irqs) ? pos : NULL;
14}
15
16static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos)
17{
18 (*pos)++;
19 if (*pos > nr_irqs)
20 return NULL;
21 return pos;
22}
23
24static void int_seq_stop(struct seq_file *f, void *v)
25{
26 /* Nothing to do */
27}
28
29static const struct seq_operations int_seq_ops = {
30 .start = int_seq_start,
31 .next = int_seq_next,
32 .stop = int_seq_stop,
33 .show = show_interrupts
34};
35
36static int interrupts_open(struct inode *inode, struct file *filp)
37{
38 return seq_open(filp, &int_seq_ops);
39}
40
41static const struct file_operations proc_interrupts_operations = {
42 .open = interrupts_open,
43 .read = seq_read,
44 .llseek = seq_lseek,
45 .release = seq_release,
46};
47
48static int __init proc_interrupts_init(void)
49{
50 proc_create("interrupts", 0, NULL, &proc_interrupts_operations);
51 return 0;
52}
53module_init(proc_interrupts_init);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index c2370c76fb71..59b43a068872 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -27,6 +27,8 @@
27#define ELF_CORE_EFLAGS 0 27#define ELF_CORE_EFLAGS 0
28#endif 28#endif
29 29
30static struct proc_dir_entry *proc_root_kcore;
31
30static int open_kcore(struct inode * inode, struct file * filp) 32static int open_kcore(struct inode * inode, struct file * filp)
31{ 33{
32 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; 34 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
@@ -34,7 +36,7 @@ static int open_kcore(struct inode * inode, struct file * filp)
34 36
35static ssize_t read_kcore(struct file *, char __user *, size_t, loff_t *); 37static ssize_t read_kcore(struct file *, char __user *, size_t, loff_t *);
36 38
37const struct file_operations proc_kcore_operations = { 39static const struct file_operations proc_kcore_operations = {
38 .read = read_kcore, 40 .read = read_kcore,
39 .open = open_kcore, 41 .open = open_kcore,
40}; 42};
@@ -399,3 +401,13 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
399 401
400 return acc; 402 return acc;
401} 403}
404
405static int __init proc_kcore_init(void)
406{
407 proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations);
408 if (proc_root_kcore)
409 proc_root_kcore->size =
410 (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE;
411 return 0;
412}
413module_init(proc_kcore_init);
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index 9fd5df3f40ce..7ca78346d3f0 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -10,13 +10,12 @@
10#include <linux/time.h> 10#include <linux/time.h>
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/poll.h> 12#include <linux/poll.h>
13#include <linux/proc_fs.h>
13#include <linux/fs.h> 14#include <linux/fs.h>
14 15
15#include <asm/uaccess.h> 16#include <asm/uaccess.h>
16#include <asm/io.h> 17#include <asm/io.h>
17 18
18#include "internal.h"
19
20extern wait_queue_head_t log_wait; 19extern wait_queue_head_t log_wait;
21 20
22extern int do_syslog(int type, char __user *bug, int count); 21extern int do_syslog(int type, char __user *bug, int count);
@@ -49,9 +48,16 @@ static unsigned int kmsg_poll(struct file *file, poll_table *wait)
49} 48}
50 49
51 50
52const struct file_operations proc_kmsg_operations = { 51static const struct file_operations proc_kmsg_operations = {
53 .read = kmsg_read, 52 .read = kmsg_read,
54 .poll = kmsg_poll, 53 .poll = kmsg_poll,
55 .open = kmsg_open, 54 .open = kmsg_open,
56 .release = kmsg_release, 55 .release = kmsg_release,
57}; 56};
57
58static int __init proc_kmsg_init(void)
59{
60 proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations);
61 return 0;
62}
63module_init(proc_kmsg_init);
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
new file mode 100644
index 000000000000..9bca39cf99ee
--- /dev/null
+++ b/fs/proc/loadavg.c
@@ -0,0 +1,51 @@
1#include <linux/fs.h>
2#include <linux/init.h>
3#include <linux/pid_namespace.h>
4#include <linux/proc_fs.h>
5#include <linux/sched.h>
6#include <linux/seq_file.h>
7#include <linux/seqlock.h>
8#include <linux/time.h>
9
10#define LOAD_INT(x) ((x) >> FSHIFT)
11#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
12
13static int loadavg_proc_show(struct seq_file *m, void *v)
14{
15 int a, b, c;
16 unsigned long seq;
17
18 do {
19 seq = read_seqbegin(&xtime_lock);
20 a = avenrun[0] + (FIXED_1/200);
21 b = avenrun[1] + (FIXED_1/200);
22 c = avenrun[2] + (FIXED_1/200);
23 } while (read_seqretry(&xtime_lock, seq));
24
25 seq_printf(m, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
26 LOAD_INT(a), LOAD_FRAC(a),
27 LOAD_INT(b), LOAD_FRAC(b),
28 LOAD_INT(c), LOAD_FRAC(c),
29 nr_running(), nr_threads,
30 task_active_pid_ns(current)->last_pid);
31 return 0;
32}
33
34static int loadavg_proc_open(struct inode *inode, struct file *file)
35{
36 return single_open(file, loadavg_proc_show, NULL);
37}
38
39static const struct file_operations loadavg_proc_fops = {
40 .open = loadavg_proc_open,
41 .read = seq_read,
42 .llseek = seq_lseek,
43 .release = single_release,
44};
45
46static int __init proc_loadavg_init(void)
47{
48 proc_create("loadavg", 0, NULL, &loadavg_proc_fops);
49 return 0;
50}
51module_init(proc_loadavg_init);
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
new file mode 100644
index 000000000000..b1675c4e66da
--- /dev/null
+++ b/fs/proc/meminfo.c
@@ -0,0 +1,168 @@
1#include <linux/fs.h>
2#include <linux/hugetlb.h>
3#include <linux/init.h>
4#include <linux/kernel.h>
5#include <linux/mm.h>
6#include <linux/mman.h>
7#include <linux/mmzone.h>
8#include <linux/proc_fs.h>
9#include <linux/quicklist.h>
10#include <linux/seq_file.h>
11#include <linux/swap.h>
12#include <linux/vmstat.h>
13#include <asm/atomic.h>
14#include <asm/page.h>
15#include <asm/pgtable.h>
16#include "internal.h"
17
18void __attribute__((weak)) arch_report_meminfo(struct seq_file *m)
19{
20}
21
22static int meminfo_proc_show(struct seq_file *m, void *v)
23{
24 struct sysinfo i;
25 unsigned long committed;
26 unsigned long allowed;
27 struct vmalloc_info vmi;
28 long cached;
29 unsigned long pages[NR_LRU_LISTS];
30 int lru;
31
32/*
33 * display in kilobytes.
34 */
35#define K(x) ((x) << (PAGE_SHIFT - 10))
36 si_meminfo(&i);
37 si_swapinfo(&i);
38 committed = atomic_long_read(&vm_committed_space);
39 allowed = ((totalram_pages - hugetlb_total_pages())
40 * sysctl_overcommit_ratio / 100) + total_swap_pages;
41
42 cached = global_page_state(NR_FILE_PAGES) -
43 total_swapcache_pages - i.bufferram;
44 if (cached < 0)
45 cached = 0;
46
47 get_vmalloc_info(&vmi);
48
49 for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
50 pages[lru] = global_page_state(NR_LRU_BASE + lru);
51
52 /*
53 * Tagged format, for easy grepping and expansion.
54 */
55 seq_printf(m,
56 "MemTotal: %8lu kB\n"
57 "MemFree: %8lu kB\n"
58 "Buffers: %8lu kB\n"
59 "Cached: %8lu kB\n"
60 "SwapCached: %8lu kB\n"
61 "Active: %8lu kB\n"
62 "Inactive: %8lu kB\n"
63 "Active(anon): %8lu kB\n"
64 "Inactive(anon): %8lu kB\n"
65 "Active(file): %8lu kB\n"
66 "Inactive(file): %8lu kB\n"
67#ifdef CONFIG_UNEVICTABLE_LRU
68 "Unevictable: %8lu kB\n"
69 "Mlocked: %8lu kB\n"
70#endif
71#ifdef CONFIG_HIGHMEM
72 "HighTotal: %8lu kB\n"
73 "HighFree: %8lu kB\n"
74 "LowTotal: %8lu kB\n"
75 "LowFree: %8lu kB\n"
76#endif
77 "SwapTotal: %8lu kB\n"
78 "SwapFree: %8lu kB\n"
79 "Dirty: %8lu kB\n"
80 "Writeback: %8lu kB\n"
81 "AnonPages: %8lu kB\n"
82 "Mapped: %8lu kB\n"
83 "Slab: %8lu kB\n"
84 "SReclaimable: %8lu kB\n"
85 "SUnreclaim: %8lu kB\n"
86 "PageTables: %8lu kB\n"
87#ifdef CONFIG_QUICKLIST
88 "Quicklists: %8lu kB\n"
89#endif
90 "NFS_Unstable: %8lu kB\n"
91 "Bounce: %8lu kB\n"
92 "WritebackTmp: %8lu kB\n"
93 "CommitLimit: %8lu kB\n"
94 "Committed_AS: %8lu kB\n"
95 "VmallocTotal: %8lu kB\n"
96 "VmallocUsed: %8lu kB\n"
97 "VmallocChunk: %8lu kB\n",
98 K(i.totalram),
99 K(i.freeram),
100 K(i.bufferram),
101 K(cached),
102 K(total_swapcache_pages),
103 K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]),
104 K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
105 K(pages[LRU_ACTIVE_ANON]),
106 K(pages[LRU_INACTIVE_ANON]),
107 K(pages[LRU_ACTIVE_FILE]),
108 K(pages[LRU_INACTIVE_FILE]),
109#ifdef CONFIG_UNEVICTABLE_LRU
110 K(pages[LRU_UNEVICTABLE]),
111 K(global_page_state(NR_MLOCK)),
112#endif
113#ifdef CONFIG_HIGHMEM
114 K(i.totalhigh),
115 K(i.freehigh),
116 K(i.totalram-i.totalhigh),
117 K(i.freeram-i.freehigh),
118#endif
119 K(i.totalswap),
120 K(i.freeswap),
121 K(global_page_state(NR_FILE_DIRTY)),
122 K(global_page_state(NR_WRITEBACK)),
123 K(global_page_state(NR_ANON_PAGES)),
124 K(global_page_state(NR_FILE_MAPPED)),
125 K(global_page_state(NR_SLAB_RECLAIMABLE) +
126 global_page_state(NR_SLAB_UNRECLAIMABLE)),
127 K(global_page_state(NR_SLAB_RECLAIMABLE)),
128 K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
129 K(global_page_state(NR_PAGETABLE)),
130#ifdef CONFIG_QUICKLIST
131 K(quicklist_total_size()),
132#endif
133 K(global_page_state(NR_UNSTABLE_NFS)),
134 K(global_page_state(NR_BOUNCE)),
135 K(global_page_state(NR_WRITEBACK_TEMP)),
136 K(allowed),
137 K(committed),
138 (unsigned long)VMALLOC_TOTAL >> 10,
139 vmi.used >> 10,
140 vmi.largest_chunk >> 10
141 );
142
143 hugetlb_report_meminfo(m);
144
145 arch_report_meminfo(m);
146
147 return 0;
148#undef K
149}
150
151static int meminfo_proc_open(struct inode *inode, struct file *file)
152{
153 return single_open(file, meminfo_proc_show, NULL);
154}
155
156static const struct file_operations meminfo_proc_fops = {
157 .open = meminfo_proc_open,
158 .read = seq_read,
159 .llseek = seq_lseek,
160 .release = single_release,
161};
162
163static int __init proc_meminfo_init(void)
164{
165 proc_create("meminfo", 0, NULL, &meminfo_proc_fops);
166 return 0;
167}
168module_init(proc_meminfo_init);
diff --git a/fs/proc/page.c b/fs/proc/page.c
new file mode 100644
index 000000000000..767d95a6d1b1
--- /dev/null
+++ b/fs/proc/page.c
@@ -0,0 +1,147 @@
1#include <linux/bootmem.h>
2#include <linux/compiler.h>
3#include <linux/fs.h>
4#include <linux/init.h>
5#include <linux/mm.h>
6#include <linux/mmzone.h>
7#include <linux/proc_fs.h>
8#include <linux/seq_file.h>
9#include <asm/uaccess.h>
10#include "internal.h"
11
12#define KPMSIZE sizeof(u64)
13#define KPMMASK (KPMSIZE - 1)
14/* /proc/kpagecount - an array exposing page counts
15 *
16 * Each entry is a u64 representing the corresponding
17 * physical page count.
18 */
19static ssize_t kpagecount_read(struct file *file, char __user *buf,
20 size_t count, loff_t *ppos)
21{
22 u64 __user *out = (u64 __user *)buf;
23 struct page *ppage;
24 unsigned long src = *ppos;
25 unsigned long pfn;
26 ssize_t ret = 0;
27 u64 pcount;
28
29 pfn = src / KPMSIZE;
30 count = min_t(size_t, count, (max_pfn * KPMSIZE) - src);
31 if (src & KPMMASK || count & KPMMASK)
32 return -EINVAL;
33
34 while (count > 0) {
35 ppage = NULL;
36 if (pfn_valid(pfn))
37 ppage = pfn_to_page(pfn);
38 pfn++;
39 if (!ppage)
40 pcount = 0;
41 else
42 pcount = page_mapcount(ppage);
43
44 if (put_user(pcount, out++)) {
45 ret = -EFAULT;
46 break;
47 }
48
49 count -= KPMSIZE;
50 }
51
52 *ppos += (char __user *)out - buf;
53 if (!ret)
54 ret = (char __user *)out - buf;
55 return ret;
56}
57
58static const struct file_operations proc_kpagecount_operations = {
59 .llseek = mem_lseek,
60 .read = kpagecount_read,
61};
62
63/* /proc/kpageflags - an array exposing page flags
64 *
65 * Each entry is a u64 representing the corresponding
66 * physical page flags.
67 */
68
69/* These macros are used to decouple internal flags from exported ones */
70
71#define KPF_LOCKED 0
72#define KPF_ERROR 1
73#define KPF_REFERENCED 2
74#define KPF_UPTODATE 3
75#define KPF_DIRTY 4
76#define KPF_LRU 5
77#define KPF_ACTIVE 6
78#define KPF_SLAB 7
79#define KPF_WRITEBACK 8
80#define KPF_RECLAIM 9
81#define KPF_BUDDY 10
82
83#define kpf_copy_bit(flags, srcpos, dstpos) (((flags >> srcpos) & 1) << dstpos)
84
85static ssize_t kpageflags_read(struct file *file, char __user *buf,
86 size_t count, loff_t *ppos)
87{
88 u64 __user *out = (u64 __user *)buf;
89 struct page *ppage;
90 unsigned long src = *ppos;
91 unsigned long pfn;
92 ssize_t ret = 0;
93 u64 kflags, uflags;
94
95 pfn = src / KPMSIZE;
96 count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src);
97 if (src & KPMMASK || count & KPMMASK)
98 return -EINVAL;
99
100 while (count > 0) {
101 ppage = NULL;
102 if (pfn_valid(pfn))
103 ppage = pfn_to_page(pfn);
104 pfn++;
105 if (!ppage)
106 kflags = 0;
107 else
108 kflags = ppage->flags;
109
110 uflags = kpf_copy_bit(KPF_LOCKED, PG_locked, kflags) |
111 kpf_copy_bit(kflags, KPF_ERROR, PG_error) |
112 kpf_copy_bit(kflags, KPF_REFERENCED, PG_referenced) |
113 kpf_copy_bit(kflags, KPF_UPTODATE, PG_uptodate) |
114 kpf_copy_bit(kflags, KPF_DIRTY, PG_dirty) |
115 kpf_copy_bit(kflags, KPF_LRU, PG_lru) |
116 kpf_copy_bit(kflags, KPF_ACTIVE, PG_active) |
117 kpf_copy_bit(kflags, KPF_SLAB, PG_slab) |
118 kpf_copy_bit(kflags, KPF_WRITEBACK, PG_writeback) |
119 kpf_copy_bit(kflags, KPF_RECLAIM, PG_reclaim) |
120 kpf_copy_bit(kflags, KPF_BUDDY, PG_buddy);
121
122 if (put_user(uflags, out++)) {
123 ret = -EFAULT;
124 break;
125 }
126
127 count -= KPMSIZE;
128 }
129
130 *ppos += (char __user *)out - buf;
131 if (!ret)
132 ret = (char __user *)out - buf;
133 return ret;
134}
135
136static const struct file_operations proc_kpageflags_operations = {
137 .llseek = mem_lseek,
138 .read = kpageflags_read,
139};
140
141static int __init proc_page_init(void)
142{
143 proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations);
144 proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations);
145 return 0;
146}
147module_init(proc_page_init);
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index eca471bc8512..d777789b7a89 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -4,6 +4,7 @@
4 * Copyright 1997 Paul Mackerras 4 * Copyright 1997 Paul Mackerras
5 */ 5 */
6#include <linux/errno.h> 6#include <linux/errno.h>
7#include <linux/init.h>
7#include <linux/time.h> 8#include <linux/time.h>
8#include <linux/proc_fs.h> 9#include <linux/proc_fs.h>
9#include <linux/stat.h> 10#include <linux/stat.h>
@@ -214,7 +215,7 @@ void proc_device_tree_add_node(struct device_node *np,
214/* 215/*
215 * Called on initialization to set up the /proc/device-tree subtree 216 * Called on initialization to set up the /proc/device-tree subtree
216 */ 217 */
217void proc_device_tree_init(void) 218void __init proc_device_tree_init(void)
218{ 219{
219 struct device_node *root; 220 struct device_node *root;
220 if ( !have_of ) 221 if ( !have_of )
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
deleted file mode 100644
index 59ea42e1ef03..000000000000
--- a/fs/proc/proc_misc.c
+++ /dev/null
@@ -1,914 +0,0 @@
1/*
2 * linux/fs/proc/proc_misc.c
3 *
4 * linux/fs/proc/array.c
5 * Copyright (C) 1992 by Linus Torvalds
6 * based on ideas by Darren Senn
7 *
8 * This used to be the part of array.c. See the rest of history and credits
9 * there. I took this into a separate file and switched the thing to generic
10 * proc_file_inode_operations, leaving in array.c only per-process stuff.
11 * Inumbers allocation made dynamic (via create_proc_entry()). AV, May 1999.
12 *
13 * Changes:
14 * Fulton Green : Encapsulated position metric calculations.
15 * <kernel@FultonGreen.com>
16 */
17
18#include <linux/types.h>
19#include <linux/errno.h>
20#include <linux/time.h>
21#include <linux/kernel.h>
22#include <linux/kernel_stat.h>
23#include <linux/fs.h>
24#include <linux/tty.h>
25#include <linux/string.h>
26#include <linux/mman.h>
27#include <linux/quicklist.h>
28#include <linux/proc_fs.h>
29#include <linux/ioport.h>
30#include <linux/mm.h>
31#include <linux/mmzone.h>
32#include <linux/pagemap.h>
33#include <linux/interrupt.h>
34#include <linux/swap.h>
35#include <linux/slab.h>
36#include <linux/genhd.h>
37#include <linux/smp.h>
38#include <linux/signal.h>
39#include <linux/module.h>
40#include <linux/init.h>
41#include <linux/seq_file.h>
42#include <linux/times.h>
43#include <linux/profile.h>
44#include <linux/utsname.h>
45#include <linux/blkdev.h>
46#include <linux/hugetlb.h>
47#include <linux/jiffies.h>
48#include <linux/vmalloc.h>
49#include <linux/crash_dump.h>
50#include <linux/pid_namespace.h>
51#include <linux/bootmem.h>
52#include <asm/uaccess.h>
53#include <asm/pgtable.h>
54#include <asm/io.h>
55#include <asm/tlb.h>
56#include <asm/div64.h>
57#include "internal.h"
58
59#define LOAD_INT(x) ((x) >> FSHIFT)
60#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
61/*
62 * Warning: stuff below (imported functions) assumes that its output will fit
63 * into one page. For some of those functions it may be wrong. Moreover, we
64 * have a way to deal with that gracefully. Right now I used straightforward
65 * wrappers, but this needs further analysis wrt potential overflows.
66 */
67extern int get_hardware_list(char *);
68extern int get_stram_list(char *);
69extern int get_exec_domain_list(char *);
70
71static int proc_calc_metrics(char *page, char **start, off_t off,
72 int count, int *eof, int len)
73{
74 if (len <= off+count) *eof = 1;
75 *start = page + off;
76 len -= off;
77 if (len>count) len = count;
78 if (len<0) len = 0;
79 return len;
80}
81
82static int loadavg_read_proc(char *page, char **start, off_t off,
83 int count, int *eof, void *data)
84{
85 int a, b, c;
86 int len;
87 unsigned long seq;
88
89 do {
90 seq = read_seqbegin(&xtime_lock);
91 a = avenrun[0] + (FIXED_1/200);
92 b = avenrun[1] + (FIXED_1/200);
93 c = avenrun[2] + (FIXED_1/200);
94 } while (read_seqretry(&xtime_lock, seq));
95
96 len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
97 LOAD_INT(a), LOAD_FRAC(a),
98 LOAD_INT(b), LOAD_FRAC(b),
99 LOAD_INT(c), LOAD_FRAC(c),
100 nr_running(), nr_threads,
101 task_active_pid_ns(current)->last_pid);
102 return proc_calc_metrics(page, start, off, count, eof, len);
103}
104
105static int uptime_read_proc(char *page, char **start, off_t off,
106 int count, int *eof, void *data)
107{
108 struct timespec uptime;
109 struct timespec idle;
110 int len;
111 cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
112
113 do_posix_clock_monotonic_gettime(&uptime);
114 monotonic_to_bootbased(&uptime);
115 cputime_to_timespec(idletime, &idle);
116 len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
117 (unsigned long) uptime.tv_sec,
118 (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
119 (unsigned long) idle.tv_sec,
120 (idle.tv_nsec / (NSEC_PER_SEC / 100)));
121
122 return proc_calc_metrics(page, start, off, count, eof, len);
123}
124
125int __attribute__((weak)) arch_report_meminfo(char *page)
126{
127 return 0;
128}
129
130static int meminfo_read_proc(char *page, char **start, off_t off,
131 int count, int *eof, void *data)
132{
133 struct sysinfo i;
134 int len;
135 unsigned long committed;
136 unsigned long allowed;
137 struct vmalloc_info vmi;
138 long cached;
139
140/*
141 * display in kilobytes.
142 */
143#define K(x) ((x) << (PAGE_SHIFT - 10))
144 si_meminfo(&i);
145 si_swapinfo(&i);
146 committed = atomic_long_read(&vm_committed_space);
147 allowed = ((totalram_pages - hugetlb_total_pages())
148 * sysctl_overcommit_ratio / 100) + total_swap_pages;
149
150 cached = global_page_state(NR_FILE_PAGES) -
151 total_swapcache_pages - i.bufferram;
152 if (cached < 0)
153 cached = 0;
154
155 get_vmalloc_info(&vmi);
156
157 /*
158 * Tagged format, for easy grepping and expansion.
159 */
160 len = sprintf(page,
161 "MemTotal: %8lu kB\n"
162 "MemFree: %8lu kB\n"
163 "Buffers: %8lu kB\n"
164 "Cached: %8lu kB\n"
165 "SwapCached: %8lu kB\n"
166 "Active: %8lu kB\n"
167 "Inactive: %8lu kB\n"
168#ifdef CONFIG_HIGHMEM
169 "HighTotal: %8lu kB\n"
170 "HighFree: %8lu kB\n"
171 "LowTotal: %8lu kB\n"
172 "LowFree: %8lu kB\n"
173#endif
174 "SwapTotal: %8lu kB\n"
175 "SwapFree: %8lu kB\n"
176 "Dirty: %8lu kB\n"
177 "Writeback: %8lu kB\n"
178 "AnonPages: %8lu kB\n"
179 "Mapped: %8lu kB\n"
180 "Slab: %8lu kB\n"
181 "SReclaimable: %8lu kB\n"
182 "SUnreclaim: %8lu kB\n"
183 "PageTables: %8lu kB\n"
184#ifdef CONFIG_QUICKLIST
185 "Quicklists: %8lu kB\n"
186#endif
187 "NFS_Unstable: %8lu kB\n"
188 "Bounce: %8lu kB\n"
189 "WritebackTmp: %8lu kB\n"
190 "CommitLimit: %8lu kB\n"
191 "Committed_AS: %8lu kB\n"
192 "VmallocTotal: %8lu kB\n"
193 "VmallocUsed: %8lu kB\n"
194 "VmallocChunk: %8lu kB\n",
195 K(i.totalram),
196 K(i.freeram),
197 K(i.bufferram),
198 K(cached),
199 K(total_swapcache_pages),
200 K(global_page_state(NR_ACTIVE)),
201 K(global_page_state(NR_INACTIVE)),
202#ifdef CONFIG_HIGHMEM
203 K(i.totalhigh),
204 K(i.freehigh),
205 K(i.totalram-i.totalhigh),
206 K(i.freeram-i.freehigh),
207#endif
208 K(i.totalswap),
209 K(i.freeswap),
210 K(global_page_state(NR_FILE_DIRTY)),
211 K(global_page_state(NR_WRITEBACK)),
212 K(global_page_state(NR_ANON_PAGES)),
213 K(global_page_state(NR_FILE_MAPPED)),
214 K(global_page_state(NR_SLAB_RECLAIMABLE) +
215 global_page_state(NR_SLAB_UNRECLAIMABLE)),
216 K(global_page_state(NR_SLAB_RECLAIMABLE)),
217 K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
218 K(global_page_state(NR_PAGETABLE)),
219#ifdef CONFIG_QUICKLIST
220 K(quicklist_total_size()),
221#endif
222 K(global_page_state(NR_UNSTABLE_NFS)),
223 K(global_page_state(NR_BOUNCE)),
224 K(global_page_state(NR_WRITEBACK_TEMP)),
225 K(allowed),
226 K(committed),
227 (unsigned long)VMALLOC_TOTAL >> 10,
228 vmi.used >> 10,
229 vmi.largest_chunk >> 10
230 );
231
232 len += hugetlb_report_meminfo(page + len);
233
234 len += arch_report_meminfo(page + len);
235
236 return proc_calc_metrics(page, start, off, count, eof, len);
237#undef K
238}
239
240static int fragmentation_open(struct inode *inode, struct file *file)
241{
242 (void)inode;
243 return seq_open(file, &fragmentation_op);
244}
245
246static const struct file_operations fragmentation_file_operations = {
247 .open = fragmentation_open,
248 .read = seq_read,
249 .llseek = seq_lseek,
250 .release = seq_release,
251};
252
253static int pagetypeinfo_open(struct inode *inode, struct file *file)
254{
255 return seq_open(file, &pagetypeinfo_op);
256}
257
258static const struct file_operations pagetypeinfo_file_ops = {
259 .open = pagetypeinfo_open,
260 .read = seq_read,
261 .llseek = seq_lseek,
262 .release = seq_release,
263};
264
265static int zoneinfo_open(struct inode *inode, struct file *file)
266{
267 return seq_open(file, &zoneinfo_op);
268}
269
270static const struct file_operations proc_zoneinfo_file_operations = {
271 .open = zoneinfo_open,
272 .read = seq_read,
273 .llseek = seq_lseek,
274 .release = seq_release,
275};
276
277static int version_read_proc(char *page, char **start, off_t off,
278 int count, int *eof, void *data)
279{
280 int len;
281
282 len = snprintf(page, PAGE_SIZE, linux_proc_banner,
283 utsname()->sysname,
284 utsname()->release,
285 utsname()->version);
286 return proc_calc_metrics(page, start, off, count, eof, len);
287}
288
289extern const struct seq_operations cpuinfo_op;
290static int cpuinfo_open(struct inode *inode, struct file *file)
291{
292 return seq_open(file, &cpuinfo_op);
293}
294
295static const struct file_operations proc_cpuinfo_operations = {
296 .open = cpuinfo_open,
297 .read = seq_read,
298 .llseek = seq_lseek,
299 .release = seq_release,
300};
301
302static int devinfo_show(struct seq_file *f, void *v)
303{
304 int i = *(loff_t *) v;
305
306 if (i < CHRDEV_MAJOR_HASH_SIZE) {
307 if (i == 0)
308 seq_printf(f, "Character devices:\n");
309 chrdev_show(f, i);
310 }
311#ifdef CONFIG_BLOCK
312 else {
313 i -= CHRDEV_MAJOR_HASH_SIZE;
314 if (i == 0)
315 seq_printf(f, "\nBlock devices:\n");
316 blkdev_show(f, i);
317 }
318#endif
319 return 0;
320}
321
322static void *devinfo_start(struct seq_file *f, loff_t *pos)
323{
324 if (*pos < (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE))
325 return pos;
326 return NULL;
327}
328
329static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos)
330{
331 (*pos)++;
332 if (*pos >= (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE))
333 return NULL;
334 return pos;
335}
336
337static void devinfo_stop(struct seq_file *f, void *v)
338{
339 /* Nothing to do */
340}
341
342static const struct seq_operations devinfo_ops = {
343 .start = devinfo_start,
344 .next = devinfo_next,
345 .stop = devinfo_stop,
346 .show = devinfo_show
347};
348
349static int devinfo_open(struct inode *inode, struct file *filp)
350{
351 return seq_open(filp, &devinfo_ops);
352}
353
354static const struct file_operations proc_devinfo_operations = {
355 .open = devinfo_open,
356 .read = seq_read,
357 .llseek = seq_lseek,
358 .release = seq_release,
359};
360
361static int vmstat_open(struct inode *inode, struct file *file)
362{
363 return seq_open(file, &vmstat_op);
364}
365static const struct file_operations proc_vmstat_file_operations = {
366 .open = vmstat_open,
367 .read = seq_read,
368 .llseek = seq_lseek,
369 .release = seq_release,
370};
371
372#ifdef CONFIG_PROC_HARDWARE
373static int hardware_read_proc(char *page, char **start, off_t off,
374 int count, int *eof, void *data)
375{
376 int len = get_hardware_list(page);
377 return proc_calc_metrics(page, start, off, count, eof, len);
378}
379#endif
380
381#ifdef CONFIG_STRAM_PROC
382static int stram_read_proc(char *page, char **start, off_t off,
383 int count, int *eof, void *data)
384{
385 int len = get_stram_list(page);
386 return proc_calc_metrics(page, start, off, count, eof, len);
387}
388#endif
389
390#ifdef CONFIG_BLOCK
391static int partitions_open(struct inode *inode, struct file *file)
392{
393 return seq_open(file, &partitions_op);
394}
395static const struct file_operations proc_partitions_operations = {
396 .open = partitions_open,
397 .read = seq_read,
398 .llseek = seq_lseek,
399 .release = seq_release,
400};
401
402static int diskstats_open(struct inode *inode, struct file *file)
403{
404 return seq_open(file, &diskstats_op);
405}
406static const struct file_operations proc_diskstats_operations = {
407 .open = diskstats_open,
408 .read = seq_read,
409 .llseek = seq_lseek,
410 .release = seq_release,
411};
412#endif
413
414#ifdef CONFIG_MODULES
415extern const struct seq_operations modules_op;
416static int modules_open(struct inode *inode, struct file *file)
417{
418 return seq_open(file, &modules_op);
419}
420static const struct file_operations proc_modules_operations = {
421 .open = modules_open,
422 .read = seq_read,
423 .llseek = seq_lseek,
424 .release = seq_release,
425};
426#endif
427
428#ifdef CONFIG_SLABINFO
429static int slabinfo_open(struct inode *inode, struct file *file)
430{
431 return seq_open(file, &slabinfo_op);
432}
433static const struct file_operations proc_slabinfo_operations = {
434 .open = slabinfo_open,
435 .read = seq_read,
436 .write = slabinfo_write,
437 .llseek = seq_lseek,
438 .release = seq_release,
439};
440
441#ifdef CONFIG_DEBUG_SLAB_LEAK
442extern const struct seq_operations slabstats_op;
443static int slabstats_open(struct inode *inode, struct file *file)
444{
445 unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL);
446 int ret = -ENOMEM;
447 if (n) {
448 ret = seq_open(file, &slabstats_op);
449 if (!ret) {
450 struct seq_file *m = file->private_data;
451 *n = PAGE_SIZE / (2 * sizeof(unsigned long));
452 m->private = n;
453 n = NULL;
454 }
455 kfree(n);
456 }
457 return ret;
458}
459
460static const struct file_operations proc_slabstats_operations = {
461 .open = slabstats_open,
462 .read = seq_read,
463 .llseek = seq_lseek,
464 .release = seq_release_private,
465};
466#endif
467#endif
468
469#ifdef CONFIG_MMU
470static int vmalloc_open(struct inode *inode, struct file *file)
471{
472 unsigned int *ptr = NULL;
473 int ret;
474
475 if (NUMA_BUILD)
476 ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
477 ret = seq_open(file, &vmalloc_op);
478 if (!ret) {
479 struct seq_file *m = file->private_data;
480 m->private = ptr;
481 } else
482 kfree(ptr);
483 return ret;
484}
485
486static const struct file_operations proc_vmalloc_operations = {
487 .open = vmalloc_open,
488 .read = seq_read,
489 .llseek = seq_lseek,
490 .release = seq_release_private,
491};
492#endif
493
494#ifndef arch_irq_stat_cpu
495#define arch_irq_stat_cpu(cpu) 0
496#endif
497#ifndef arch_irq_stat
498#define arch_irq_stat() 0
499#endif
500
501static int show_stat(struct seq_file *p, void *v)
502{
503 int i;
504 unsigned long jif;
505 cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
506 cputime64_t guest;
507 u64 sum = 0;
508 struct timespec boottime;
509 unsigned int *per_irq_sum;
510
511 per_irq_sum = kzalloc(sizeof(unsigned int)*NR_IRQS, GFP_KERNEL);
512 if (!per_irq_sum)
513 return -ENOMEM;
514
515 user = nice = system = idle = iowait =
516 irq = softirq = steal = cputime64_zero;
517 guest = cputime64_zero;
518 getboottime(&boottime);
519 jif = boottime.tv_sec;
520
521 for_each_possible_cpu(i) {
522 int j;
523
524 user = cputime64_add(user, kstat_cpu(i).cpustat.user);
525 nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice);
526 system = cputime64_add(system, kstat_cpu(i).cpustat.system);
527 idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle);
528 iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait);
529 irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq);
530 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
531 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
532 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
533 for (j = 0; j < NR_IRQS; j++) {
534 unsigned int temp = kstat_cpu(i).irqs[j];
535 sum += temp;
536 per_irq_sum[j] += temp;
537 }
538 sum += arch_irq_stat_cpu(i);
539 }
540 sum += arch_irq_stat();
541
542 seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
543 (unsigned long long)cputime64_to_clock_t(user),
544 (unsigned long long)cputime64_to_clock_t(nice),
545 (unsigned long long)cputime64_to_clock_t(system),
546 (unsigned long long)cputime64_to_clock_t(idle),
547 (unsigned long long)cputime64_to_clock_t(iowait),
548 (unsigned long long)cputime64_to_clock_t(irq),
549 (unsigned long long)cputime64_to_clock_t(softirq),
550 (unsigned long long)cputime64_to_clock_t(steal),
551 (unsigned long long)cputime64_to_clock_t(guest));
552 for_each_online_cpu(i) {
553
554 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
555 user = kstat_cpu(i).cpustat.user;
556 nice = kstat_cpu(i).cpustat.nice;
557 system = kstat_cpu(i).cpustat.system;
558 idle = kstat_cpu(i).cpustat.idle;
559 iowait = kstat_cpu(i).cpustat.iowait;
560 irq = kstat_cpu(i).cpustat.irq;
561 softirq = kstat_cpu(i).cpustat.softirq;
562 steal = kstat_cpu(i).cpustat.steal;
563 guest = kstat_cpu(i).cpustat.guest;
564 seq_printf(p,
565 "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
566 i,
567 (unsigned long long)cputime64_to_clock_t(user),
568 (unsigned long long)cputime64_to_clock_t(nice),
569 (unsigned long long)cputime64_to_clock_t(system),
570 (unsigned long long)cputime64_to_clock_t(idle),
571 (unsigned long long)cputime64_to_clock_t(iowait),
572 (unsigned long long)cputime64_to_clock_t(irq),
573 (unsigned long long)cputime64_to_clock_t(softirq),
574 (unsigned long long)cputime64_to_clock_t(steal),
575 (unsigned long long)cputime64_to_clock_t(guest));
576 }
577 seq_printf(p, "intr %llu", (unsigned long long)sum);
578
579 for (i = 0; i < NR_IRQS; i++)
580 seq_printf(p, " %u", per_irq_sum[i]);
581
582 seq_printf(p,
583 "\nctxt %llu\n"
584 "btime %lu\n"
585 "processes %lu\n"
586 "procs_running %lu\n"
587 "procs_blocked %lu\n",
588 nr_context_switches(),
589 (unsigned long)jif,
590 total_forks,
591 nr_running(),
592 nr_iowait());
593
594 kfree(per_irq_sum);
595 return 0;
596}
597
598static int stat_open(struct inode *inode, struct file *file)
599{
600 unsigned size = 4096 * (1 + num_possible_cpus() / 32);
601 char *buf;
602 struct seq_file *m;
603 int res;
604
605 /* don't ask for more than the kmalloc() max size, currently 128 KB */
606 if (size > 128 * 1024)
607 size = 128 * 1024;
608 buf = kmalloc(size, GFP_KERNEL);
609 if (!buf)
610 return -ENOMEM;
611
612 res = single_open(file, show_stat, NULL);
613 if (!res) {
614 m = file->private_data;
615 m->buf = buf;
616 m->size = size;
617 } else
618 kfree(buf);
619 return res;
620}
621static const struct file_operations proc_stat_operations = {
622 .open = stat_open,
623 .read = seq_read,
624 .llseek = seq_lseek,
625 .release = single_release,
626};
627
628/*
629 * /proc/interrupts
630 */
631static void *int_seq_start(struct seq_file *f, loff_t *pos)
632{
633 return (*pos <= NR_IRQS) ? pos : NULL;
634}
635
636static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos)
637{
638 (*pos)++;
639 if (*pos > NR_IRQS)
640 return NULL;
641 return pos;
642}
643
644static void int_seq_stop(struct seq_file *f, void *v)
645{
646 /* Nothing to do */
647}
648
649
650static const struct seq_operations int_seq_ops = {
651 .start = int_seq_start,
652 .next = int_seq_next,
653 .stop = int_seq_stop,
654 .show = show_interrupts
655};
656
657static int interrupts_open(struct inode *inode, struct file *filp)
658{
659 return seq_open(filp, &int_seq_ops);
660}
661
662static const struct file_operations proc_interrupts_operations = {
663 .open = interrupts_open,
664 .read = seq_read,
665 .llseek = seq_lseek,
666 .release = seq_release,
667};
668
669static int filesystems_read_proc(char *page, char **start, off_t off,
670 int count, int *eof, void *data)
671{
672 int len = get_filesystem_list(page);
673 return proc_calc_metrics(page, start, off, count, eof, len);
674}
675
676static int cmdline_read_proc(char *page, char **start, off_t off,
677 int count, int *eof, void *data)
678{
679 int len;
680
681 len = sprintf(page, "%s\n", saved_command_line);
682 return proc_calc_metrics(page, start, off, count, eof, len);
683}
684
685#ifdef CONFIG_FILE_LOCKING
686static int locks_open(struct inode *inode, struct file *filp)
687{
688 return seq_open(filp, &locks_seq_operations);
689}
690
691static const struct file_operations proc_locks_operations = {
692 .open = locks_open,
693 .read = seq_read,
694 .llseek = seq_lseek,
695 .release = seq_release,
696};
697#endif /* CONFIG_FILE_LOCKING */
698
699static int execdomains_read_proc(char *page, char **start, off_t off,
700 int count, int *eof, void *data)
701{
702 int len = get_exec_domain_list(page);
703 return proc_calc_metrics(page, start, off, count, eof, len);
704}
705
706#ifdef CONFIG_PROC_PAGE_MONITOR
707#define KPMSIZE sizeof(u64)
708#define KPMMASK (KPMSIZE - 1)
709/* /proc/kpagecount - an array exposing page counts
710 *
711 * Each entry is a u64 representing the corresponding
712 * physical page count.
713 */
714static ssize_t kpagecount_read(struct file *file, char __user *buf,
715 size_t count, loff_t *ppos)
716{
717 u64 __user *out = (u64 __user *)buf;
718 struct page *ppage;
719 unsigned long src = *ppos;
720 unsigned long pfn;
721 ssize_t ret = 0;
722 u64 pcount;
723
724 pfn = src / KPMSIZE;
725 count = min_t(size_t, count, (max_pfn * KPMSIZE) - src);
726 if (src & KPMMASK || count & KPMMASK)
727 return -EINVAL;
728
729 while (count > 0) {
730 ppage = NULL;
731 if (pfn_valid(pfn))
732 ppage = pfn_to_page(pfn);
733 pfn++;
734 if (!ppage)
735 pcount = 0;
736 else
737 pcount = page_mapcount(ppage);
738
739 if (put_user(pcount, out++)) {
740 ret = -EFAULT;
741 break;
742 }
743
744 count -= KPMSIZE;
745 }
746
747 *ppos += (char __user *)out - buf;
748 if (!ret)
749 ret = (char __user *)out - buf;
750 return ret;
751}
752
753static struct file_operations proc_kpagecount_operations = {
754 .llseek = mem_lseek,
755 .read = kpagecount_read,
756};
757
758/* /proc/kpageflags - an array exposing page flags
759 *
760 * Each entry is a u64 representing the corresponding
761 * physical page flags.
762 */
763
764/* These macros are used to decouple internal flags from exported ones */
765
766#define KPF_LOCKED 0
767#define KPF_ERROR 1
768#define KPF_REFERENCED 2
769#define KPF_UPTODATE 3
770#define KPF_DIRTY 4
771#define KPF_LRU 5
772#define KPF_ACTIVE 6
773#define KPF_SLAB 7
774#define KPF_WRITEBACK 8
775#define KPF_RECLAIM 9
776#define KPF_BUDDY 10
777
778#define kpf_copy_bit(flags, srcpos, dstpos) (((flags >> srcpos) & 1) << dstpos)
779
780static ssize_t kpageflags_read(struct file *file, char __user *buf,
781 size_t count, loff_t *ppos)
782{
783 u64 __user *out = (u64 __user *)buf;
784 struct page *ppage;
785 unsigned long src = *ppos;
786 unsigned long pfn;
787 ssize_t ret = 0;
788 u64 kflags, uflags;
789
790 pfn = src / KPMSIZE;
791 count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src);
792 if (src & KPMMASK || count & KPMMASK)
793 return -EINVAL;
794
795 while (count > 0) {
796 ppage = NULL;
797 if (pfn_valid(pfn))
798 ppage = pfn_to_page(pfn);
799 pfn++;
800 if (!ppage)
801 kflags = 0;
802 else
803 kflags = ppage->flags;
804
805 uflags = kpf_copy_bit(KPF_LOCKED, PG_locked, kflags) |
806 kpf_copy_bit(kflags, KPF_ERROR, PG_error) |
807 kpf_copy_bit(kflags, KPF_REFERENCED, PG_referenced) |
808 kpf_copy_bit(kflags, KPF_UPTODATE, PG_uptodate) |
809 kpf_copy_bit(kflags, KPF_DIRTY, PG_dirty) |
810 kpf_copy_bit(kflags, KPF_LRU, PG_lru) |
811 kpf_copy_bit(kflags, KPF_ACTIVE, PG_active) |
812 kpf_copy_bit(kflags, KPF_SLAB, PG_slab) |
813 kpf_copy_bit(kflags, KPF_WRITEBACK, PG_writeback) |
814 kpf_copy_bit(kflags, KPF_RECLAIM, PG_reclaim) |
815 kpf_copy_bit(kflags, KPF_BUDDY, PG_buddy);
816
817 if (put_user(uflags, out++)) {
818 ret = -EFAULT;
819 break;
820 }
821
822 count -= KPMSIZE;
823 }
824
825 *ppos += (char __user *)out - buf;
826 if (!ret)
827 ret = (char __user *)out - buf;
828 return ret;
829}
830
831static struct file_operations proc_kpageflags_operations = {
832 .llseek = mem_lseek,
833 .read = kpageflags_read,
834};
835#endif /* CONFIG_PROC_PAGE_MONITOR */
836
837struct proc_dir_entry *proc_root_kcore;
838
839void __init proc_misc_init(void)
840{
841 static struct {
842 char *name;
843 int (*read_proc)(char*,char**,off_t,int,int*,void*);
844 } *p, simple_ones[] = {
845 {"loadavg", loadavg_read_proc},
846 {"uptime", uptime_read_proc},
847 {"meminfo", meminfo_read_proc},
848 {"version", version_read_proc},
849#ifdef CONFIG_PROC_HARDWARE
850 {"hardware", hardware_read_proc},
851#endif
852#ifdef CONFIG_STRAM_PROC
853 {"stram", stram_read_proc},
854#endif
855 {"filesystems", filesystems_read_proc},
856 {"cmdline", cmdline_read_proc},
857 {"execdomains", execdomains_read_proc},
858 {NULL,}
859 };
860 for (p = simple_ones; p->name; p++)
861 create_proc_read_entry(p->name, 0, NULL, p->read_proc, NULL);
862
863 proc_symlink("mounts", NULL, "self/mounts");
864
865 /* And now for trickier ones */
866#ifdef CONFIG_PRINTK
867 proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations);
868#endif
869#ifdef CONFIG_FILE_LOCKING
870 proc_create("locks", 0, NULL, &proc_locks_operations);
871#endif
872 proc_create("devices", 0, NULL, &proc_devinfo_operations);
873 proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations);
874#ifdef CONFIG_BLOCK
875 proc_create("partitions", 0, NULL, &proc_partitions_operations);
876#endif
877 proc_create("stat", 0, NULL, &proc_stat_operations);
878 proc_create("interrupts", 0, NULL, &proc_interrupts_operations);
879#ifdef CONFIG_SLABINFO
880 proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);
881#ifdef CONFIG_DEBUG_SLAB_LEAK
882 proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
883#endif
884#endif
885#ifdef CONFIG_MMU
886 proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
887#endif
888 proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
889 proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops);
890 proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
891 proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
892#ifdef CONFIG_BLOCK
893 proc_create("diskstats", 0, NULL, &proc_diskstats_operations);
894#endif
895#ifdef CONFIG_MODULES
896 proc_create("modules", 0, NULL, &proc_modules_operations);
897#endif
898#ifdef CONFIG_SCHEDSTATS
899 proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
900#endif
901#ifdef CONFIG_PROC_KCORE
902 proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations);
903 if (proc_root_kcore)
904 proc_root_kcore->size =
905 (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE;
906#endif
907#ifdef CONFIG_PROC_PAGE_MONITOR
908 proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations);
909 proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations);
910#endif
911#ifdef CONFIG_PROC_VMCORE
912 proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations);
913#endif
914}
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 945a81043ba2..94fcfff6863a 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * /proc/sys support 2 * /proc/sys support
3 */ 3 */
4 4#include <linux/init.h>
5#include <linux/sysctl.h> 5#include <linux/sysctl.h>
6#include <linux/proc_fs.h> 6#include <linux/proc_fs.h>
7#include <linux/security.h> 7#include <linux/security.h>
@@ -298,13 +298,19 @@ static int proc_sys_permission(struct inode *inode, int mask)
298 * sysctl entries that are not writeable, 298 * sysctl entries that are not writeable,
299 * are _NOT_ writeable, capabilities or not. 299 * are _NOT_ writeable, capabilities or not.
300 */ 300 */
301 struct ctl_table_header *head = grab_header(inode); 301 struct ctl_table_header *head;
302 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 302 struct ctl_table *table;
303 int error; 303 int error;
304 304
305 /* Executable files are not allowed under /proc/sys/ */
306 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))
307 return -EACCES;
308
309 head = grab_header(inode);
305 if (IS_ERR(head)) 310 if (IS_ERR(head))
306 return PTR_ERR(head); 311 return PTR_ERR(head);
307 312
313 table = PROC_I(inode)->sysctl_entry;
308 if (!table) /* global root - r-xr-xr-x */ 314 if (!table) /* global root - r-xr-xr-x */
309 error = mask & MAY_WRITE ? -EACCES : 0; 315 error = mask & MAY_WRITE ? -EACCES : 0;
310 else /* Use the permissions on the sysctl table entry */ 316 else /* Use the permissions on the sysctl table entry */
@@ -353,6 +359,7 @@ static const struct file_operations proc_sys_file_operations = {
353 359
354static const struct file_operations proc_sys_dir_file_operations = { 360static const struct file_operations proc_sys_dir_file_operations = {
355 .readdir = proc_sys_readdir, 361 .readdir = proc_sys_readdir,
362 .llseek = generic_file_llseek,
356}; 363};
357 364
358static const struct inode_operations proc_sys_inode_operations = { 365static const struct inode_operations proc_sys_inode_operations = {
@@ -395,7 +402,7 @@ static struct dentry_operations proc_sys_dentry_operations = {
395 .d_compare = proc_sys_compare, 402 .d_compare = proc_sys_compare,
396}; 403};
397 404
398int proc_sys_init(void) 405int __init proc_sys_init(void)
399{ 406{
400 struct proc_dir_entry *proc_sys_root; 407 struct proc_dir_entry *proc_sys_root;
401 408
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 95117538a4f6..7761602af9de 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -104,9 +104,9 @@ static struct file_system_type proc_fs_type = {
104 104
105void __init proc_root_init(void) 105void __init proc_root_init(void)
106{ 106{
107 int err = proc_init_inodecache(); 107 int err;
108 if (err) 108
109 return; 109 proc_init_inodecache();
110 err = register_filesystem(&proc_fs_type); 110 err = register_filesystem(&proc_fs_type);
111 if (err) 111 if (err)
112 return; 112 return;
@@ -117,7 +117,7 @@ void __init proc_root_init(void)
117 return; 117 return;
118 } 118 }
119 119
120 proc_misc_init(); 120 proc_symlink("mounts", NULL, "self/mounts");
121 121
122 proc_net_init(); 122 proc_net_init();
123 123
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
new file mode 100644
index 000000000000..81904f07679d
--- /dev/null
+++ b/fs/proc/stat.c
@@ -0,0 +1,153 @@
1#include <linux/cpumask.h>
2#include <linux/fs.h>
3#include <linux/gfp.h>
4#include <linux/init.h>
5#include <linux/interrupt.h>
6#include <linux/kernel_stat.h>
7#include <linux/proc_fs.h>
8#include <linux/sched.h>
9#include <linux/seq_file.h>
10#include <linux/slab.h>
11#include <linux/time.h>
12#include <asm/cputime.h>
13
14#ifndef arch_irq_stat_cpu
15#define arch_irq_stat_cpu(cpu) 0
16#endif
17#ifndef arch_irq_stat
18#define arch_irq_stat() 0
19#endif
20
21static int show_stat(struct seq_file *p, void *v)
22{
23 int i, j;
24 unsigned long jif;
25 cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
26 cputime64_t guest;
27 u64 sum = 0;
28 struct timespec boottime;
29 unsigned int per_irq_sum;
30
31 user = nice = system = idle = iowait =
32 irq = softirq = steal = cputime64_zero;
33 guest = cputime64_zero;
34 getboottime(&boottime);
35 jif = boottime.tv_sec;
36
37 for_each_possible_cpu(i) {
38 user = cputime64_add(user, kstat_cpu(i).cpustat.user);
39 nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice);
40 system = cputime64_add(system, kstat_cpu(i).cpustat.system);
41 idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle);
42 iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait);
43 irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq);
44 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
45 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
46 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
47
48 for_each_irq_nr(j)
49 sum += kstat_irqs_cpu(j, i);
50
51 sum += arch_irq_stat_cpu(i);
52 }
53 sum += arch_irq_stat();
54
55 seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
56 (unsigned long long)cputime64_to_clock_t(user),
57 (unsigned long long)cputime64_to_clock_t(nice),
58 (unsigned long long)cputime64_to_clock_t(system),
59 (unsigned long long)cputime64_to_clock_t(idle),
60 (unsigned long long)cputime64_to_clock_t(iowait),
61 (unsigned long long)cputime64_to_clock_t(irq),
62 (unsigned long long)cputime64_to_clock_t(softirq),
63 (unsigned long long)cputime64_to_clock_t(steal),
64 (unsigned long long)cputime64_to_clock_t(guest));
65 for_each_online_cpu(i) {
66
67 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
68 user = kstat_cpu(i).cpustat.user;
69 nice = kstat_cpu(i).cpustat.nice;
70 system = kstat_cpu(i).cpustat.system;
71 idle = kstat_cpu(i).cpustat.idle;
72 iowait = kstat_cpu(i).cpustat.iowait;
73 irq = kstat_cpu(i).cpustat.irq;
74 softirq = kstat_cpu(i).cpustat.softirq;
75 steal = kstat_cpu(i).cpustat.steal;
76 guest = kstat_cpu(i).cpustat.guest;
77 seq_printf(p,
78 "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
79 i,
80 (unsigned long long)cputime64_to_clock_t(user),
81 (unsigned long long)cputime64_to_clock_t(nice),
82 (unsigned long long)cputime64_to_clock_t(system),
83 (unsigned long long)cputime64_to_clock_t(idle),
84 (unsigned long long)cputime64_to_clock_t(iowait),
85 (unsigned long long)cputime64_to_clock_t(irq),
86 (unsigned long long)cputime64_to_clock_t(softirq),
87 (unsigned long long)cputime64_to_clock_t(steal),
88 (unsigned long long)cputime64_to_clock_t(guest));
89 }
90 seq_printf(p, "intr %llu", (unsigned long long)sum);
91
92 /* sum again ? it could be updated? */
93 for_each_irq_nr(j) {
94 per_irq_sum = 0;
95
96 for_each_possible_cpu(i)
97 per_irq_sum += kstat_irqs_cpu(j, i);
98
99 seq_printf(p, " %u", per_irq_sum);
100 }
101
102 seq_printf(p,
103 "\nctxt %llu\n"
104 "btime %lu\n"
105 "processes %lu\n"
106 "procs_running %lu\n"
107 "procs_blocked %lu\n",
108 nr_context_switches(),
109 (unsigned long)jif,
110 total_forks,
111 nr_running(),
112 nr_iowait());
113
114 return 0;
115}
116
117static int stat_open(struct inode *inode, struct file *file)
118{
119 unsigned size = 4096 * (1 + num_possible_cpus() / 32);
120 char *buf;
121 struct seq_file *m;
122 int res;
123
124 /* don't ask for more than the kmalloc() max size, currently 128 KB */
125 if (size > 128 * 1024)
126 size = 128 * 1024;
127 buf = kmalloc(size, GFP_KERNEL);
128 if (!buf)
129 return -ENOMEM;
130
131 res = single_open(file, show_stat, NULL);
132 if (!res) {
133 m = file->private_data;
134 m->buf = buf;
135 m->size = size;
136 } else
137 kfree(buf);
138 return res;
139}
140
141static const struct file_operations proc_stat_operations = {
142 .open = stat_open,
143 .read = seq_read,
144 .llseek = seq_lseek,
145 .release = single_release,
146};
147
148static int __init proc_stat_init(void)
149{
150 proc_create("stat", 0, NULL, &proc_stat_operations);
151 return 0;
152}
153module_init(proc_stat_init);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 4806830ea2a1..b770c095e45c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -198,11 +198,8 @@ static int do_maps_open(struct inode *inode, struct file *file,
198 return ret; 198 return ret;
199} 199}
200 200
201static int show_map(struct seq_file *m, void *v) 201static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
202{ 202{
203 struct proc_maps_private *priv = m->private;
204 struct task_struct *task = priv->task;
205 struct vm_area_struct *vma = v;
206 struct mm_struct *mm = vma->vm_mm; 203 struct mm_struct *mm = vma->vm_mm;
207 struct file *file = vma->vm_file; 204 struct file *file = vma->vm_file;
208 int flags = vma->vm_flags; 205 int flags = vma->vm_flags;
@@ -254,6 +251,15 @@ static int show_map(struct seq_file *m, void *v)
254 } 251 }
255 } 252 }
256 seq_putc(m, '\n'); 253 seq_putc(m, '\n');
254}
255
256static int show_map(struct seq_file *m, void *v)
257{
258 struct vm_area_struct *vma = v;
259 struct proc_maps_private *priv = m->private;
260 struct task_struct *task = priv->task;
261
262 show_map_vma(m, vma);
257 263
258 if (m->count < m->size) /* vma is copied successfully */ 264 if (m->count < m->size) /* vma is copied successfully */
259 m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; 265 m->version = (vma != get_gate_vma(task))? vma->vm_start: 0;
@@ -364,9 +370,10 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
364 370
365static int show_smap(struct seq_file *m, void *v) 371static int show_smap(struct seq_file *m, void *v)
366{ 372{
373 struct proc_maps_private *priv = m->private;
374 struct task_struct *task = priv->task;
367 struct vm_area_struct *vma = v; 375 struct vm_area_struct *vma = v;
368 struct mem_size_stats mss; 376 struct mem_size_stats mss;
369 int ret;
370 struct mm_walk smaps_walk = { 377 struct mm_walk smaps_walk = {
371 .pmd_entry = smaps_pte_range, 378 .pmd_entry = smaps_pte_range,
372 .mm = vma->vm_mm, 379 .mm = vma->vm_mm,
@@ -378,9 +385,7 @@ static int show_smap(struct seq_file *m, void *v)
378 if (vma->vm_mm && !is_vm_hugetlb_page(vma)) 385 if (vma->vm_mm && !is_vm_hugetlb_page(vma))
379 walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); 386 walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
380 387
381 ret = show_map(m, v); 388 show_map_vma(m, vma);
382 if (ret)
383 return ret;
384 389
385 seq_printf(m, 390 seq_printf(m,
386 "Size: %8lu kB\n" 391 "Size: %8lu kB\n"
@@ -402,7 +407,9 @@ static int show_smap(struct seq_file *m, void *v)
402 mss.referenced >> 10, 407 mss.referenced >> 10,
403 mss.swap >> 10); 408 mss.swap >> 10);
404 409
405 return ret; 410 if (m->count < m->size) /* vma is copied successfully */
411 m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
412 return 0;
406} 413}
407 414
408static const struct seq_operations proc_pid_smaps_op = { 415static const struct seq_operations proc_pid_smaps_op = {
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
new file mode 100644
index 000000000000..df26aa88fa47
--- /dev/null
+++ b/fs/proc/uptime.c
@@ -0,0 +1,45 @@
1#include <linux/init.h>
2#include <linux/proc_fs.h>
3#include <linux/sched.h>
4#include <linux/time.h>
5#include <asm/cputime.h>
6
7static int proc_calc_metrics(char *page, char **start, off_t off,
8 int count, int *eof, int len)
9{
10 if (len <= off + count)
11 *eof = 1;
12 *start = page + off;
13 len -= off;
14 if (len > count)
15 len = count;
16 if (len < 0)
17 len = 0;
18 return len;
19}
20
21static int uptime_read_proc(char *page, char **start, off_t off, int count,
22 int *eof, void *data)
23{
24 struct timespec uptime;
25 struct timespec idle;
26 int len;
27 cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
28
29 do_posix_clock_monotonic_gettime(&uptime);
30 monotonic_to_bootbased(&uptime);
31 cputime_to_timespec(idletime, &idle);
32 len = sprintf(page, "%lu.%02lu %lu.%02lu\n",
33 (unsigned long) uptime.tv_sec,
34 (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
35 (unsigned long) idle.tv_sec,
36 (idle.tv_nsec / (NSEC_PER_SEC / 100)));
37 return proc_calc_metrics(page, start, off, count, eof, len);
38}
39
40static int __init proc_uptime_init(void)
41{
42 create_proc_read_entry("uptime", 0, NULL, uptime_read_proc, NULL);
43 return 0;
44}
45module_init(proc_uptime_init);
diff --git a/fs/proc/version.c b/fs/proc/version.c
new file mode 100644
index 000000000000..76817a60678c
--- /dev/null
+++ b/fs/proc/version.c
@@ -0,0 +1,34 @@
1#include <linux/fs.h>
2#include <linux/init.h>
3#include <linux/kernel.h>
4#include <linux/proc_fs.h>
5#include <linux/seq_file.h>
6#include <linux/utsname.h>
7
8static int version_proc_show(struct seq_file *m, void *v)
9{
10 seq_printf(m, linux_proc_banner,
11 utsname()->sysname,
12 utsname()->release,
13 utsname()->version);
14 return 0;
15}
16
17static int version_proc_open(struct inode *inode, struct file *file)
18{
19 return single_open(file, version_proc_show, NULL);
20}
21
22static const struct file_operations version_proc_fops = {
23 .open = version_proc_open,
24 .read = seq_read,
25 .llseek = seq_lseek,
26 .release = single_release,
27};
28
29static int __init proc_version_init(void)
30{
31 proc_create("version", 0, NULL, &version_proc_fops);
32 return 0;
33}
34module_init(proc_version_init);
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 841368b87a29..03ec59504906 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -32,10 +32,7 @@ static size_t elfcorebuf_sz;
32/* Total size of vmcore file. */ 32/* Total size of vmcore file. */
33static u64 vmcore_size; 33static u64 vmcore_size;
34 34
35/* Stores the physical address of elf header of crash image. */ 35static struct proc_dir_entry *proc_vmcore = NULL;
36unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
37
38struct proc_dir_entry *proc_vmcore = NULL;
39 36
40/* Reads a page from the oldmem device from given offset. */ 37/* Reads a page from the oldmem device from given offset. */
41static ssize_t read_from_oldmem(char *buf, size_t count, 38static ssize_t read_from_oldmem(char *buf, size_t count,
@@ -165,7 +162,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
165 return acc; 162 return acc;
166} 163}
167 164
168const struct file_operations proc_vmcore_operations = { 165static const struct file_operations proc_vmcore_operations = {
169 .read = read_vmcore, 166 .read = read_vmcore,
170}; 167};
171 168
@@ -647,7 +644,7 @@ static int __init vmcore_init(void)
647 int rc = 0; 644 int rc = 0;
648 645
649 /* If elfcorehdr= has been passed in cmdline, then capture the dump.*/ 646 /* If elfcorehdr= has been passed in cmdline, then capture the dump.*/
650 if (!(elfcorehdr_addr < ELFCORE_ADDR_MAX)) 647 if (!(is_vmcore_usable()))
651 return rc; 648 return rc;
652 rc = parse_crash_elf_headers(); 649 rc = parse_crash_elf_headers();
653 if (rc) { 650 if (rc) {
@@ -655,7 +652,7 @@ static int __init vmcore_init(void)
655 return rc; 652 return rc;
656 } 653 }
657 654
658 /* Initialize /proc/vmcore size if proc is already up. */ 655 proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations);
659 if (proc_vmcore) 656 if (proc_vmcore)
660 proc_vmcore->size = vmcore_size; 657 proc_vmcore->size = vmcore_size;
661 return 0; 658 return 0;
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 5145cb9125af..76acdbc34611 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -112,12 +112,12 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
112 goto add_error; 112 goto add_error;
113 113
114 if (!pagevec_add(&lru_pvec, page)) 114 if (!pagevec_add(&lru_pvec, page))
115 __pagevec_lru_add(&lru_pvec); 115 __pagevec_lru_add_file(&lru_pvec);
116 116
117 unlock_page(page); 117 unlock_page(page);
118 } 118 }
119 119
120 pagevec_lru_add(&lru_pvec); 120 pagevec_lru_add_file(&lru_pvec);
121 return 0; 121 return 0;
122 122
123 fsize_exceeded: 123 fsize_exceeded:
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index b13123424e49..f031d1c925f0 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -61,6 +61,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev)
61 inode->i_mapping->a_ops = &ramfs_aops; 61 inode->i_mapping->a_ops = &ramfs_aops;
62 inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; 62 inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
63 mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); 63 mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
64 mapping_set_unevictable(inode->i_mapping);
64 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 65 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
65 switch (mode & S_IFMT) { 66 switch (mode & S_IFMT) {
66 default: 67 default:
diff --git a/fs/read_write.c b/fs/read_write.c
index 9ba495d5a29b..969a6d9c020b 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -31,39 +31,61 @@ const struct file_operations generic_ro_fops = {
31 31
32EXPORT_SYMBOL(generic_ro_fops); 32EXPORT_SYMBOL(generic_ro_fops);
33 33
34/**
35 * generic_file_llseek_unlocked - lockless generic llseek implementation
36 * @file: file structure to seek on
37 * @offset: file offset to seek to
38 * @origin: type of seek
39 *
40 * Updates the file offset to the value specified by @offset and @origin.
41 * Locking must be provided by the caller.
42 */
34loff_t 43loff_t
35generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin) 44generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
36{ 45{
37 loff_t retval;
38 struct inode *inode = file->f_mapping->host; 46 struct inode *inode = file->f_mapping->host;
39 47
40 switch (origin) { 48 switch (origin) {
41 case SEEK_END: 49 case SEEK_END:
42 offset += inode->i_size; 50 offset += inode->i_size;
43 break; 51 break;
44 case SEEK_CUR: 52 case SEEK_CUR:
45 offset += file->f_pos; 53 offset += file->f_pos;
54 break;
46 } 55 }
47 retval = -EINVAL; 56
48 if (offset>=0 && offset<=inode->i_sb->s_maxbytes) { 57 if (offset < 0 || offset > inode->i_sb->s_maxbytes)
49 /* Special lock needed here? */ 58 return -EINVAL;
50 if (offset != file->f_pos) { 59
51 file->f_pos = offset; 60 /* Special lock needed here? */
52 file->f_version = 0; 61 if (offset != file->f_pos) {
53 } 62 file->f_pos = offset;
54 retval = offset; 63 file->f_version = 0;
55 } 64 }
56 return retval; 65
66 return offset;
57} 67}
58EXPORT_SYMBOL(generic_file_llseek_unlocked); 68EXPORT_SYMBOL(generic_file_llseek_unlocked);
59 69
70/**
71 * generic_file_llseek - generic llseek implementation for regular files
72 * @file: file structure to seek on
73 * @offset: file offset to seek to
74 * @origin: type of seek
75 *
76 * This is a generic implemenation of ->llseek useable for all normal local
77 * filesystems. It just updates the file offset to the value specified by
78 * @offset and @origin under i_mutex.
79 */
60loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) 80loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
61{ 81{
62 loff_t n; 82 loff_t rval;
83
63 mutex_lock(&file->f_dentry->d_inode->i_mutex); 84 mutex_lock(&file->f_dentry->d_inode->i_mutex);
64 n = generic_file_llseek_unlocked(file, offset, origin); 85 rval = generic_file_llseek_unlocked(file, offset, origin);
65 mutex_unlock(&file->f_dentry->d_inode->i_mutex); 86 mutex_unlock(&file->f_dentry->d_inode->i_mutex);
66 return n; 87
88 return rval;
67} 89}
68EXPORT_SYMBOL(generic_file_llseek); 90EXPORT_SYMBOL(generic_file_llseek);
69 91
diff --git a/fs/readdir.c b/fs/readdir.c
index 93a7559bbfd8..b318d9b5af2e 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -117,7 +117,7 @@ asmlinkage long old_readdir(unsigned int fd, struct old_linux_dirent __user * di
117 buf.dirent = dirent; 117 buf.dirent = dirent;
118 118
119 error = vfs_readdir(file, fillonedir, &buf); 119 error = vfs_readdir(file, fillonedir, &buf);
120 if (error >= 0) 120 if (buf.result)
121 error = buf.result; 121 error = buf.result;
122 122
123 fput(file); 123 fput(file);
@@ -209,9 +209,8 @@ asmlinkage long sys_getdents(unsigned int fd, struct linux_dirent __user * diren
209 buf.error = 0; 209 buf.error = 0;
210 210
211 error = vfs_readdir(file, filldir, &buf); 211 error = vfs_readdir(file, filldir, &buf);
212 if (error < 0) 212 if (error >= 0)
213 goto out_putf; 213 error = buf.error;
214 error = buf.error;
215 lastdirent = buf.previous; 214 lastdirent = buf.previous;
216 if (lastdirent) { 215 if (lastdirent) {
217 if (put_user(file->f_pos, &lastdirent->d_off)) 216 if (put_user(file->f_pos, &lastdirent->d_off))
@@ -219,8 +218,6 @@ asmlinkage long sys_getdents(unsigned int fd, struct linux_dirent __user * diren
219 else 218 else
220 error = count - buf.count; 219 error = count - buf.count;
221 } 220 }
222
223out_putf:
224 fput(file); 221 fput(file);
225out: 222out:
226 return error; 223 return error;
@@ -293,19 +290,16 @@ asmlinkage long sys_getdents64(unsigned int fd, struct linux_dirent64 __user * d
293 buf.error = 0; 290 buf.error = 0;
294 291
295 error = vfs_readdir(file, filldir64, &buf); 292 error = vfs_readdir(file, filldir64, &buf);
296 if (error < 0) 293 if (error >= 0)
297 goto out_putf; 294 error = buf.error;
298 error = buf.error;
299 lastdirent = buf.previous; 295 lastdirent = buf.previous;
300 if (lastdirent) { 296 if (lastdirent) {
301 typeof(lastdirent->d_off) d_off = file->f_pos; 297 typeof(lastdirent->d_off) d_off = file->f_pos;
302 error = -EFAULT;
303 if (__put_user(d_off, &lastdirent->d_off)) 298 if (__put_user(d_off, &lastdirent->d_off))
304 goto out_putf; 299 error = -EFAULT;
305 error = count - buf.count; 300 else
301 error = count - buf.count;
306 } 302 }
307
308out_putf:
309 fput(file); 303 fput(file);
310out: 304out:
311 return error; 305 return error;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index a804903d31d1..33408417038c 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -296,6 +296,7 @@ const struct file_operations reiserfs_file_operations = {
296 .aio_write = generic_file_aio_write, 296 .aio_write = generic_file_aio_write,
297 .splice_read = generic_file_splice_read, 297 .splice_read = generic_file_splice_read,
298 .splice_write = generic_file_splice_write, 298 .splice_write = generic_file_splice_write,
299 .llseek = generic_file_llseek,
299}; 300};
300 301
301const struct inode_operations reiserfs_file_inode_operations = { 302const struct inode_operations reiserfs_file_inode_operations = {
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 5699171212ae..6c4c2c69449f 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1522,7 +1522,6 @@ static struct dentry *reiserfs_get_dentry(struct super_block *sb,
1522 1522
1523{ 1523{
1524 struct cpu_key key; 1524 struct cpu_key key;
1525 struct dentry *result;
1526 struct inode *inode; 1525 struct inode *inode;
1527 1526
1528 key.on_disk_key.k_objectid = objectid; 1527 key.on_disk_key.k_objectid = objectid;
@@ -1535,16 +1534,8 @@ static struct dentry *reiserfs_get_dentry(struct super_block *sb,
1535 inode = NULL; 1534 inode = NULL;
1536 } 1535 }
1537 reiserfs_write_unlock(sb); 1536 reiserfs_write_unlock(sb);
1538 if (!inode) 1537
1539 inode = ERR_PTR(-ESTALE); 1538 return d_obtain_alias(inode);
1540 if (IS_ERR(inode))
1541 return ERR_CAST(inode);
1542 result = d_alloc_anon(inode);
1543 if (!result) {
1544 iput(inode);
1545 return ERR_PTR(-ENOMEM);
1546 }
1547 return result;
1548} 1539}
1549 1540
1550struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, 1541struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c21df71943a6..9643c3bbeb3b 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2575,7 +2575,7 @@ static int release_journal_dev(struct super_block *super,
2575 if (journal->j_dev_bd != NULL) { 2575 if (journal->j_dev_bd != NULL) {
2576 if (journal->j_dev_bd->bd_dev != super->s_dev) 2576 if (journal->j_dev_bd->bd_dev != super->s_dev)
2577 bd_release(journal->j_dev_bd); 2577 bd_release(journal->j_dev_bd);
2578 result = blkdev_put(journal->j_dev_bd); 2578 result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode);
2579 journal->j_dev_bd = NULL; 2579 journal->j_dev_bd = NULL;
2580 } 2580 }
2581 2581
@@ -2593,7 +2593,7 @@ static int journal_init_dev(struct super_block *super,
2593{ 2593{
2594 int result; 2594 int result;
2595 dev_t jdev; 2595 dev_t jdev;
2596 int blkdev_mode = FMODE_READ | FMODE_WRITE; 2596 fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE;
2597 char b[BDEVNAME_SIZE]; 2597 char b[BDEVNAME_SIZE];
2598 2598
2599 result = 0; 2599 result = 0;
@@ -2608,6 +2608,7 @@ static int journal_init_dev(struct super_block *super,
2608 /* there is no "jdev" option and journal is on separate device */ 2608 /* there is no "jdev" option and journal is on separate device */
2609 if ((!jdev_name || !jdev_name[0])) { 2609 if ((!jdev_name || !jdev_name[0])) {
2610 journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode); 2610 journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode);
2611 journal->j_dev_mode = blkdev_mode;
2611 if (IS_ERR(journal->j_dev_bd)) { 2612 if (IS_ERR(journal->j_dev_bd)) {
2612 result = PTR_ERR(journal->j_dev_bd); 2613 result = PTR_ERR(journal->j_dev_bd);
2613 journal->j_dev_bd = NULL; 2614 journal->j_dev_bd = NULL;
@@ -2618,7 +2619,7 @@ static int journal_init_dev(struct super_block *super,
2618 } else if (jdev != super->s_dev) { 2619 } else if (jdev != super->s_dev) {
2619 result = bd_claim(journal->j_dev_bd, journal); 2620 result = bd_claim(journal->j_dev_bd, journal);
2620 if (result) { 2621 if (result) {
2621 blkdev_put(journal->j_dev_bd); 2622 blkdev_put(journal->j_dev_bd, blkdev_mode);
2622 return result; 2623 return result;
2623 } 2624 }
2624 2625
@@ -2628,7 +2629,9 @@ static int journal_init_dev(struct super_block *super,
2628 return 0; 2629 return 0;
2629 } 2630 }
2630 2631
2631 journal->j_dev_bd = open_bdev_excl(jdev_name, 0, journal); 2632 journal->j_dev_mode = blkdev_mode;
2633 journal->j_dev_bd = open_bdev_exclusive(jdev_name,
2634 blkdev_mode, journal);
2632 if (IS_ERR(journal->j_dev_bd)) { 2635 if (IS_ERR(journal->j_dev_bd)) {
2633 result = PTR_ERR(journal->j_dev_bd); 2636 result = PTR_ERR(journal->j_dev_bd);
2634 journal->j_dev_bd = NULL; 2637 journal->j_dev_bd = NULL;
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index c1add28dd45e..f89ebb943f3f 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -383,7 +383,6 @@ struct dentry *reiserfs_get_parent(struct dentry *child)
383 struct inode *inode = NULL; 383 struct inode *inode = NULL;
384 struct reiserfs_dir_entry de; 384 struct reiserfs_dir_entry de;
385 INITIALIZE_PATH(path_to_entry); 385 INITIALIZE_PATH(path_to_entry);
386 struct dentry *parent;
387 struct inode *dir = child->d_inode; 386 struct inode *dir = child->d_inode;
388 387
389 if (dir->i_nlink == 0) { 388 if (dir->i_nlink == 0) {
@@ -401,15 +400,7 @@ struct dentry *reiserfs_get_parent(struct dentry *child)
401 inode = reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id)); 400 inode = reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id));
402 reiserfs_write_unlock(dir->i_sb); 401 reiserfs_write_unlock(dir->i_sb);
403 402
404 if (!inode || IS_ERR(inode)) { 403 return d_obtain_alias(inode);
405 return ERR_PTR(-EACCES);
406 }
407 parent = d_alloc_anon(inode);
408 if (!parent) {
409 iput(inode);
410 parent = ERR_PTR(-ENOMEM);
411 }
412 return parent;
413} 404}
414 405
415/* add entry to the directory (entry can be hidden). 406/* add entry to the directory (entry can be hidden).
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index d318c7e663fa..663a91f5dce8 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2058,10 +2058,10 @@ static int reiserfs_quota_on_mount(struct super_block *sb, int type)
2058 * Standard function to be called on quota_on 2058 * Standard function to be called on quota_on
2059 */ 2059 */
2060static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, 2060static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2061 char *path, int remount) 2061 char *name, int remount)
2062{ 2062{
2063 int err; 2063 int err;
2064 struct nameidata nd; 2064 struct path path;
2065 struct inode *inode; 2065 struct inode *inode;
2066 struct reiserfs_transaction_handle th; 2066 struct reiserfs_transaction_handle th;
2067 2067
@@ -2069,16 +2069,16 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2069 return -EINVAL; 2069 return -EINVAL;
2070 /* No more checks needed? Path and format_id are bogus anyway... */ 2070 /* No more checks needed? Path and format_id are bogus anyway... */
2071 if (remount) 2071 if (remount)
2072 return vfs_quota_on(sb, type, format_id, path, 1); 2072 return vfs_quota_on(sb, type, format_id, name, 1);
2073 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 2073 err = kern_path(name, LOOKUP_FOLLOW, &path);
2074 if (err) 2074 if (err)
2075 return err; 2075 return err;
2076 /* Quotafile not on the same filesystem? */ 2076 /* Quotafile not on the same filesystem? */
2077 if (nd.path.mnt->mnt_sb != sb) { 2077 if (path.mnt->mnt_sb != sb) {
2078 err = -EXDEV; 2078 err = -EXDEV;
2079 goto out; 2079 goto out;
2080 } 2080 }
2081 inode = nd.path.dentry->d_inode; 2081 inode = path.dentry->d_inode;
2082 /* We must not pack tails for quota files on reiserfs for quota IO to work */ 2082 /* We must not pack tails for quota files on reiserfs for quota IO to work */
2083 if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) { 2083 if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) {
2084 err = reiserfs_unpack(inode, NULL); 2084 err = reiserfs_unpack(inode, NULL);
@@ -2094,7 +2094,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2094 /* Journaling quota? */ 2094 /* Journaling quota? */
2095 if (REISERFS_SB(sb)->s_qf_names[type]) { 2095 if (REISERFS_SB(sb)->s_qf_names[type]) {
2096 /* Quotafile not of fs root? */ 2096 /* Quotafile not of fs root? */
2097 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) 2097 if (path.dentry->d_parent != sb->s_root)
2098 reiserfs_warning(sb, 2098 reiserfs_warning(sb,
2099 "reiserfs: Quota file not on filesystem root. " 2099 "reiserfs: Quota file not on filesystem root. "
2100 "Journalled quota will not work."); 2100 "Journalled quota will not work.");
@@ -2113,9 +2113,9 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2113 if (err) 2113 if (err)
2114 goto out; 2114 goto out;
2115 } 2115 }
2116 err = vfs_quota_on_path(sb, type, format_id, &nd.path); 2116 err = vfs_quota_on_path(sb, type, format_id, &path);
2117out: 2117out:
2118 path_put(&nd.path); 2118 path_put(&path);
2119 return err; 2119 return err;
2120} 2120}
2121 2121
diff --git a/fs/select.c b/fs/select.c
index da0e88201c3a..87df51eadcf2 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -24,9 +24,64 @@
24#include <linux/fdtable.h> 24#include <linux/fdtable.h>
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/rcupdate.h> 26#include <linux/rcupdate.h>
27#include <linux/hrtimer.h>
27 28
28#include <asm/uaccess.h> 29#include <asm/uaccess.h>
29 30
31
32/*
33 * Estimate expected accuracy in ns from a timeval.
34 *
35 * After quite a bit of churning around, we've settled on
36 * a simple thing of taking 0.1% of the timeout as the
37 * slack, with a cap of 100 msec.
38 * "nice" tasks get a 0.5% slack instead.
39 *
40 * Consider this comment an open invitation to come up with even
41 * better solutions..
42 */
43
44static long __estimate_accuracy(struct timespec *tv)
45{
46 long slack;
47 int divfactor = 1000;
48
49 if (task_nice(current) > 0)
50 divfactor = divfactor / 5;
51
52 slack = tv->tv_nsec / divfactor;
53 slack += tv->tv_sec * (NSEC_PER_SEC/divfactor);
54
55 if (slack > 100 * NSEC_PER_MSEC)
56 slack = 100 * NSEC_PER_MSEC;
57
58 if (slack < 0)
59 slack = 0;
60 return slack;
61}
62
63static long estimate_accuracy(struct timespec *tv)
64{
65 unsigned long ret;
66 struct timespec now;
67
68 /*
69 * Realtime tasks get a slack of 0 for obvious reasons.
70 */
71
72 if (rt_task(current))
73 return 0;
74
75 ktime_get_ts(&now);
76 now = timespec_sub(*tv, now);
77 ret = __estimate_accuracy(&now);
78 if (ret < current->timer_slack_ns)
79 return current->timer_slack_ns;
80 return ret;
81}
82
83
84
30struct poll_table_page { 85struct poll_table_page {
31 struct poll_table_page * next; 86 struct poll_table_page * next;
32 struct poll_table_entry * entry; 87 struct poll_table_entry * entry;
@@ -130,6 +185,79 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
130 add_wait_queue(wait_address, &entry->wait); 185 add_wait_queue(wait_address, &entry->wait);
131} 186}
132 187
188/**
189 * poll_select_set_timeout - helper function to setup the timeout value
190 * @to: pointer to timespec variable for the final timeout
191 * @sec: seconds (from user space)
192 * @nsec: nanoseconds (from user space)
193 *
194 * Note, we do not use a timespec for the user space value here, That
195 * way we can use the function for timeval and compat interfaces as well.
196 *
197 * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0.
198 */
199int poll_select_set_timeout(struct timespec *to, long sec, long nsec)
200{
201 struct timespec ts = {.tv_sec = sec, .tv_nsec = nsec};
202
203 if (!timespec_valid(&ts))
204 return -EINVAL;
205
206 /* Optimize for the zero timeout value here */
207 if (!sec && !nsec) {
208 to->tv_sec = to->tv_nsec = 0;
209 } else {
210 ktime_get_ts(to);
211 *to = timespec_add_safe(*to, ts);
212 }
213 return 0;
214}
215
216static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
217 int timeval, int ret)
218{
219 struct timespec rts;
220 struct timeval rtv;
221
222 if (!p)
223 return ret;
224
225 if (current->personality & STICKY_TIMEOUTS)
226 goto sticky;
227
228 /* No update for zero timeout */
229 if (!end_time->tv_sec && !end_time->tv_nsec)
230 return ret;
231
232 ktime_get_ts(&rts);
233 rts = timespec_sub(*end_time, rts);
234 if (rts.tv_sec < 0)
235 rts.tv_sec = rts.tv_nsec = 0;
236
237 if (timeval) {
238 rtv.tv_sec = rts.tv_sec;
239 rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
240
241 if (!copy_to_user(p, &rtv, sizeof(rtv)))
242 return ret;
243
244 } else if (!copy_to_user(p, &rts, sizeof(rts)))
245 return ret;
246
247 /*
248 * If an application puts its timeval in read-only memory, we
249 * don't want the Linux-specific update to the timeval to
250 * cause a fault after the select has completed
251 * successfully. However, because we're not updating the
252 * timeval, we can't restart the system call.
253 */
254
255sticky:
256 if (ret == -ERESTARTNOHAND)
257 ret = -EINTR;
258 return ret;
259}
260
133#define FDS_IN(fds, n) (fds->in + n) 261#define FDS_IN(fds, n) (fds->in + n)
134#define FDS_OUT(fds, n) (fds->out + n) 262#define FDS_OUT(fds, n) (fds->out + n)
135#define FDS_EX(fds, n) (fds->ex + n) 263#define FDS_EX(fds, n) (fds->ex + n)
@@ -182,11 +310,13 @@ get_max:
182#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) 310#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
183#define POLLEX_SET (POLLPRI) 311#define POLLEX_SET (POLLPRI)
184 312
185int do_select(int n, fd_set_bits *fds, s64 *timeout) 313int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
186{ 314{
315 ktime_t expire, *to = NULL;
187 struct poll_wqueues table; 316 struct poll_wqueues table;
188 poll_table *wait; 317 poll_table *wait;
189 int retval, i; 318 int retval, i, timed_out = 0;
319 unsigned long slack = 0;
190 320
191 rcu_read_lock(); 321 rcu_read_lock();
192 retval = max_select_fd(n, fds); 322 retval = max_select_fd(n, fds);
@@ -198,12 +328,17 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
198 328
199 poll_initwait(&table); 329 poll_initwait(&table);
200 wait = &table.pt; 330 wait = &table.pt;
201 if (!*timeout) 331 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
202 wait = NULL; 332 wait = NULL;
333 timed_out = 1;
334 }
335
336 if (end_time && !timed_out)
337 slack = estimate_accuracy(end_time);
338
203 retval = 0; 339 retval = 0;
204 for (;;) { 340 for (;;) {
205 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; 341 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
206 long __timeout;
207 342
208 set_current_state(TASK_INTERRUPTIBLE); 343 set_current_state(TASK_INTERRUPTIBLE);
209 344
@@ -259,27 +394,25 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
259 cond_resched(); 394 cond_resched();
260 } 395 }
261 wait = NULL; 396 wait = NULL;
262 if (retval || !*timeout || signal_pending(current)) 397 if (retval || timed_out || signal_pending(current))
263 break; 398 break;
264 if (table.error) { 399 if (table.error) {
265 retval = table.error; 400 retval = table.error;
266 break; 401 break;
267 } 402 }
268 403
269 if (*timeout < 0) { 404 /*
270 /* Wait indefinitely */ 405 * If this is the first loop and we have a timeout
271 __timeout = MAX_SCHEDULE_TIMEOUT; 406 * given, then we convert to ktime_t and set the to
272 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT - 1)) { 407 * pointer to the expiry value.
273 /* Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in a loop */ 408 */
274 __timeout = MAX_SCHEDULE_TIMEOUT - 1; 409 if (end_time && !to) {
275 *timeout -= __timeout; 410 expire = timespec_to_ktime(*end_time);
276 } else { 411 to = &expire;
277 __timeout = *timeout;
278 *timeout = 0;
279 } 412 }
280 __timeout = schedule_timeout(__timeout); 413
281 if (*timeout >= 0) 414 if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
282 *timeout += __timeout; 415 timed_out = 1;
283 } 416 }
284 __set_current_state(TASK_RUNNING); 417 __set_current_state(TASK_RUNNING);
285 418
@@ -300,7 +433,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
300 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 433 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
301 434
302int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, 435int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
303 fd_set __user *exp, s64 *timeout) 436 fd_set __user *exp, struct timespec *end_time)
304{ 437{
305 fd_set_bits fds; 438 fd_set_bits fds;
306 void *bits; 439 void *bits;
@@ -351,7 +484,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
351 zero_fd_set(n, fds.res_out); 484 zero_fd_set(n, fds.res_out);
352 zero_fd_set(n, fds.res_ex); 485 zero_fd_set(n, fds.res_ex);
353 486
354 ret = do_select(n, &fds, timeout); 487 ret = do_select(n, &fds, end_time);
355 488
356 if (ret < 0) 489 if (ret < 0)
357 goto out; 490 goto out;
@@ -377,7 +510,7 @@ out_nofds:
377asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, 510asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
378 fd_set __user *exp, struct timeval __user *tvp) 511 fd_set __user *exp, struct timeval __user *tvp)
379{ 512{
380 s64 timeout = -1; 513 struct timespec end_time, *to = NULL;
381 struct timeval tv; 514 struct timeval tv;
382 int ret; 515 int ret;
383 516
@@ -385,43 +518,15 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
385 if (copy_from_user(&tv, tvp, sizeof(tv))) 518 if (copy_from_user(&tv, tvp, sizeof(tv)))
386 return -EFAULT; 519 return -EFAULT;
387 520
388 if (tv.tv_sec < 0 || tv.tv_usec < 0) 521 to = &end_time;
522 if (poll_select_set_timeout(to,
523 tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
524 (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
389 return -EINVAL; 525 return -EINVAL;
390
391 /* Cast to u64 to make GCC stop complaining */
392 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
393 timeout = -1; /* infinite */
394 else {
395 timeout = DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ);
396 timeout += tv.tv_sec * HZ;
397 }
398 } 526 }
399 527
400 ret = core_sys_select(n, inp, outp, exp, &timeout); 528 ret = core_sys_select(n, inp, outp, exp, to);
401 529 ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
402 if (tvp) {
403 struct timeval rtv;
404
405 if (current->personality & STICKY_TIMEOUTS)
406 goto sticky;
407 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
408 rtv.tv_sec = timeout;
409 if (timeval_compare(&rtv, &tv) >= 0)
410 rtv = tv;
411 if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
412sticky:
413 /*
414 * If an application puts its timeval in read-only
415 * memory, we don't want the Linux-specific update to
416 * the timeval to cause a fault after the select has
417 * completed successfully. However, because we're not
418 * updating the timeval, we can't restart the system
419 * call.
420 */
421 if (ret == -ERESTARTNOHAND)
422 ret = -EINTR;
423 }
424 }
425 530
426 return ret; 531 return ret;
427} 532}
@@ -431,25 +536,17 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
431 fd_set __user *exp, struct timespec __user *tsp, 536 fd_set __user *exp, struct timespec __user *tsp,
432 const sigset_t __user *sigmask, size_t sigsetsize) 537 const sigset_t __user *sigmask, size_t sigsetsize)
433{ 538{
434 s64 timeout = MAX_SCHEDULE_TIMEOUT;
435 sigset_t ksigmask, sigsaved; 539 sigset_t ksigmask, sigsaved;
436 struct timespec ts; 540 struct timespec ts, end_time, *to = NULL;
437 int ret; 541 int ret;
438 542
439 if (tsp) { 543 if (tsp) {
440 if (copy_from_user(&ts, tsp, sizeof(ts))) 544 if (copy_from_user(&ts, tsp, sizeof(ts)))
441 return -EFAULT; 545 return -EFAULT;
442 546
443 if (ts.tv_sec < 0 || ts.tv_nsec < 0) 547 to = &end_time;
548 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
444 return -EINVAL; 549 return -EINVAL;
445
446 /* Cast to u64 to make GCC stop complaining */
447 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS)
448 timeout = -1; /* infinite */
449 else {
450 timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
451 timeout += ts.tv_sec * HZ;
452 }
453 } 550 }
454 551
455 if (sigmask) { 552 if (sigmask) {
@@ -463,32 +560,8 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
463 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 560 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
464 } 561 }
465 562
466 ret = core_sys_select(n, inp, outp, exp, &timeout); 563 ret = core_sys_select(n, inp, outp, exp, &end_time);
467 564 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
468 if (tsp) {
469 struct timespec rts;
470
471 if (current->personality & STICKY_TIMEOUTS)
472 goto sticky;
473 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
474 1000;
475 rts.tv_sec = timeout;
476 if (timespec_compare(&rts, &ts) >= 0)
477 rts = ts;
478 if (copy_to_user(tsp, &rts, sizeof(rts))) {
479sticky:
480 /*
481 * If an application puts its timeval in read-only
482 * memory, we don't want the Linux-specific update to
483 * the timeval to cause a fault after the select has
484 * completed successfully. However, because we're not
485 * updating the timeval, we can't restart the system
486 * call.
487 */
488 if (ret == -ERESTARTNOHAND)
489 ret = -EINTR;
490 }
491 }
492 565
493 if (ret == -ERESTARTNOHAND) { 566 if (ret == -ERESTARTNOHAND) {
494 /* 567 /*
@@ -574,18 +647,24 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
574} 647}
575 648
576static int do_poll(unsigned int nfds, struct poll_list *list, 649static int do_poll(unsigned int nfds, struct poll_list *list,
577 struct poll_wqueues *wait, s64 *timeout) 650 struct poll_wqueues *wait, struct timespec *end_time)
578{ 651{
579 int count = 0;
580 poll_table* pt = &wait->pt; 652 poll_table* pt = &wait->pt;
653 ktime_t expire, *to = NULL;
654 int timed_out = 0, count = 0;
655 unsigned long slack = 0;
581 656
582 /* Optimise the no-wait case */ 657 /* Optimise the no-wait case */
583 if (!(*timeout)) 658 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
584 pt = NULL; 659 pt = NULL;
660 timed_out = 1;
661 }
662
663 if (end_time && !timed_out)
664 slack = estimate_accuracy(end_time);
585 665
586 for (;;) { 666 for (;;) {
587 struct poll_list *walk; 667 struct poll_list *walk;
588 long __timeout;
589 668
590 set_current_state(TASK_INTERRUPTIBLE); 669 set_current_state(TASK_INTERRUPTIBLE);
591 for (walk = list; walk != NULL; walk = walk->next) { 670 for (walk = list; walk != NULL; walk = walk->next) {
@@ -617,27 +696,21 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
617 if (signal_pending(current)) 696 if (signal_pending(current))
618 count = -EINTR; 697 count = -EINTR;
619 } 698 }
620 if (count || !*timeout) 699 if (count || timed_out)
621 break; 700 break;
622 701
623 if (*timeout < 0) { 702 /*
624 /* Wait indefinitely */ 703 * If this is the first loop and we have a timeout
625 __timeout = MAX_SCHEDULE_TIMEOUT; 704 * given, then we convert to ktime_t and set the to
626 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT-1)) { 705 * pointer to the expiry value.
627 /* 706 */
628 * Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in 707 if (end_time && !to) {
629 * a loop 708 expire = timespec_to_ktime(*end_time);
630 */ 709 to = &expire;
631 __timeout = MAX_SCHEDULE_TIMEOUT - 1;
632 *timeout -= __timeout;
633 } else {
634 __timeout = *timeout;
635 *timeout = 0;
636 } 710 }
637 711
638 __timeout = schedule_timeout(__timeout); 712 if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
639 if (*timeout >= 0) 713 timed_out = 1;
640 *timeout += __timeout;
641 } 714 }
642 __set_current_state(TASK_RUNNING); 715 __set_current_state(TASK_RUNNING);
643 return count; 716 return count;
@@ -646,7 +719,8 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
646#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ 719#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \
647 sizeof(struct pollfd)) 720 sizeof(struct pollfd))
648 721
649int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) 722int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
723 struct timespec *end_time)
650{ 724{
651 struct poll_wqueues table; 725 struct poll_wqueues table;
652 int err = -EFAULT, fdcount, len, size; 726 int err = -EFAULT, fdcount, len, size;
@@ -686,7 +760,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
686 } 760 }
687 761
688 poll_initwait(&table); 762 poll_initwait(&table);
689 fdcount = do_poll(nfds, head, &table, timeout); 763 fdcount = do_poll(nfds, head, &table, end_time);
690 poll_freewait(&table); 764 poll_freewait(&table);
691 765
692 for (walk = head; walk; walk = walk->next) { 766 for (walk = head; walk; walk = walk->next) {
@@ -712,16 +786,21 @@ out_fds:
712 786
713static long do_restart_poll(struct restart_block *restart_block) 787static long do_restart_poll(struct restart_block *restart_block)
714{ 788{
715 struct pollfd __user *ufds = (struct pollfd __user*)restart_block->arg0; 789 struct pollfd __user *ufds = restart_block->poll.ufds;
716 int nfds = restart_block->arg1; 790 int nfds = restart_block->poll.nfds;
717 s64 timeout = ((s64)restart_block->arg3<<32) | (s64)restart_block->arg2; 791 struct timespec *to = NULL, end_time;
718 int ret; 792 int ret;
719 793
720 ret = do_sys_poll(ufds, nfds, &timeout); 794 if (restart_block->poll.has_timeout) {
795 end_time.tv_sec = restart_block->poll.tv_sec;
796 end_time.tv_nsec = restart_block->poll.tv_nsec;
797 to = &end_time;
798 }
799
800 ret = do_sys_poll(ufds, nfds, to);
801
721 if (ret == -EINTR) { 802 if (ret == -EINTR) {
722 restart_block->fn = do_restart_poll; 803 restart_block->fn = do_restart_poll;
723 restart_block->arg2 = timeout & 0xFFFFFFFF;
724 restart_block->arg3 = (u64)timeout >> 32;
725 ret = -ERESTART_RESTARTBLOCK; 804 ret = -ERESTART_RESTARTBLOCK;
726 } 805 }
727 return ret; 806 return ret;
@@ -730,31 +809,32 @@ static long do_restart_poll(struct restart_block *restart_block)
730asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, 809asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
731 long timeout_msecs) 810 long timeout_msecs)
732{ 811{
733 s64 timeout_jiffies; 812 struct timespec end_time, *to = NULL;
734 int ret; 813 int ret;
735 814
736 if (timeout_msecs > 0) { 815 if (timeout_msecs >= 0) {
737#if HZ > 1000 816 to = &end_time;
738 /* We can only overflow if HZ > 1000 */ 817 poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
739 if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ) 818 NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
740 timeout_jiffies = -1;
741 else
742#endif
743 timeout_jiffies = msecs_to_jiffies(timeout_msecs) + 1;
744 } else {
745 /* Infinite (< 0) or no (0) timeout */
746 timeout_jiffies = timeout_msecs;
747 } 819 }
748 820
749 ret = do_sys_poll(ufds, nfds, &timeout_jiffies); 821 ret = do_sys_poll(ufds, nfds, to);
822
750 if (ret == -EINTR) { 823 if (ret == -EINTR) {
751 struct restart_block *restart_block; 824 struct restart_block *restart_block;
825
752 restart_block = &current_thread_info()->restart_block; 826 restart_block = &current_thread_info()->restart_block;
753 restart_block->fn = do_restart_poll; 827 restart_block->fn = do_restart_poll;
754 restart_block->arg0 = (unsigned long)ufds; 828 restart_block->poll.ufds = ufds;
755 restart_block->arg1 = nfds; 829 restart_block->poll.nfds = nfds;
756 restart_block->arg2 = timeout_jiffies & 0xFFFFFFFF; 830
757 restart_block->arg3 = (u64)timeout_jiffies >> 32; 831 if (timeout_msecs >= 0) {
832 restart_block->poll.tv_sec = end_time.tv_sec;
833 restart_block->poll.tv_nsec = end_time.tv_nsec;
834 restart_block->poll.has_timeout = 1;
835 } else
836 restart_block->poll.has_timeout = 0;
837
758 ret = -ERESTART_RESTARTBLOCK; 838 ret = -ERESTART_RESTARTBLOCK;
759 } 839 }
760 return ret; 840 return ret;
@@ -766,21 +846,16 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
766 size_t sigsetsize) 846 size_t sigsetsize)
767{ 847{
768 sigset_t ksigmask, sigsaved; 848 sigset_t ksigmask, sigsaved;
769 struct timespec ts; 849 struct timespec ts, end_time, *to = NULL;
770 s64 timeout = -1;
771 int ret; 850 int ret;
772 851
773 if (tsp) { 852 if (tsp) {
774 if (copy_from_user(&ts, tsp, sizeof(ts))) 853 if (copy_from_user(&ts, tsp, sizeof(ts)))
775 return -EFAULT; 854 return -EFAULT;
776 855
777 /* Cast to u64 to make GCC stop complaining */ 856 to = &end_time;
778 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) 857 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
779 timeout = -1; /* infinite */ 858 return -EINVAL;
780 else {
781 timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
782 timeout += ts.tv_sec * HZ;
783 }
784 } 859 }
785 860
786 if (sigmask) { 861 if (sigmask) {
@@ -794,7 +869,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
794 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 869 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
795 } 870 }
796 871
797 ret = do_sys_poll(ufds, nfds, &timeout); 872 ret = do_sys_poll(ufds, nfds, to);
798 873
799 /* We can restart this syscall, usually */ 874 /* We can restart this syscall, usually */
800 if (ret == -EINTR) { 875 if (ret == -EINTR) {
@@ -812,31 +887,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
812 } else if (sigmask) 887 } else if (sigmask)
813 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 888 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
814 889
815 if (tsp && timeout >= 0) { 890 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
816 struct timespec rts;
817
818 if (current->personality & STICKY_TIMEOUTS)
819 goto sticky;
820 /* Yes, we know it's actually an s64, but it's also positive. */
821 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
822 1000;
823 rts.tv_sec = timeout;
824 if (timespec_compare(&rts, &ts) >= 0)
825 rts = ts;
826 if (copy_to_user(tsp, &rts, sizeof(rts))) {
827 sticky:
828 /*
829 * If an application puts its timeval in read-only
830 * memory, we don't want the Linux-specific update to
831 * the timeval to cause a fault after the select has
832 * completed successfully. However, because we're not
833 * updating the timeval, we can't restart the system
834 * call.
835 */
836 if (ret == -ERESTARTNOHAND && timeout >= 0)
837 ret = -EINTR;
838 }
839 }
840 891
841 return ret; 892 return ret;
842} 893}
diff --git a/fs/seq_file.c b/fs/seq_file.c
index bd20f7f5a933..eba2eabcd2b8 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -452,17 +452,34 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
452 452
453int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits) 453int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits)
454{ 454{
455 size_t len = bitmap_scnprintf_len(nr_bits); 455 if (m->count < m->size) {
456 int len = bitmap_scnprintf(m->buf + m->count,
457 m->size - m->count, bits, nr_bits);
458 if (m->count + len < m->size) {
459 m->count += len;
460 return 0;
461 }
462 }
463 m->count = m->size;
464 return -1;
465}
466EXPORT_SYMBOL(seq_bitmap);
456 467
457 if (m->count + len < m->size) { 468int seq_bitmap_list(struct seq_file *m, unsigned long *bits,
458 bitmap_scnprintf(m->buf + m->count, m->size - m->count, 469 unsigned int nr_bits)
459 bits, nr_bits); 470{
460 m->count += len; 471 if (m->count < m->size) {
461 return 0; 472 int len = bitmap_scnlistprintf(m->buf + m->count,
473 m->size - m->count, bits, nr_bits);
474 if (m->count + len < m->size) {
475 m->count += len;
476 return 0;
477 }
462 } 478 }
463 m->count = m->size; 479 m->count = m->size;
464 return -1; 480 return -1;
465} 481}
482EXPORT_SYMBOL(seq_bitmap_list);
466 483
467static void *single_start(struct seq_file *p, loff_t *pos) 484static void *single_start(struct seq_file *p, loff_t *pos)
468{ 485{
diff --git a/fs/splice.c b/fs/splice.c
index a1e701c27156..1abab5cee4ba 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -731,8 +731,8 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
731 }; 731 };
732 732
733 /* 733 /*
734 * The actor worker might be calling ->prepare_write and 734 * The actor worker might be calling ->write_begin and
735 * ->commit_write. Most of the time, these expect i_mutex to 735 * ->write_end. Most of the time, these expect i_mutex to
736 * be held. Since this may result in an ABBA deadlock with 736 * be held. Since this may result in an ABBA deadlock with
737 * pipe->inode, we have to order lock acquiry here. 737 * pipe->inode, we have to order lock acquiry here.
738 */ 738 */
diff --git a/fs/super.c b/fs/super.c
index e931ae9511fe..400a7608f15e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -682,7 +682,7 @@ void emergency_remount(void)
682 * filesystems which don't use real block-devices. -- jrs 682 * filesystems which don't use real block-devices. -- jrs
683 */ 683 */
684 684
685static struct idr unnamed_dev_idr; 685static DEFINE_IDA(unnamed_dev_ida);
686static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ 686static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
687 687
688int set_anon_super(struct super_block *s, void *data) 688int set_anon_super(struct super_block *s, void *data)
@@ -691,10 +691,10 @@ int set_anon_super(struct super_block *s, void *data)
691 int error; 691 int error;
692 692
693 retry: 693 retry:
694 if (idr_pre_get(&unnamed_dev_idr, GFP_ATOMIC) == 0) 694 if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0)
695 return -ENOMEM; 695 return -ENOMEM;
696 spin_lock(&unnamed_dev_lock); 696 spin_lock(&unnamed_dev_lock);
697 error = idr_get_new(&unnamed_dev_idr, NULL, &dev); 697 error = ida_get_new(&unnamed_dev_ida, &dev);
698 spin_unlock(&unnamed_dev_lock); 698 spin_unlock(&unnamed_dev_lock);
699 if (error == -EAGAIN) 699 if (error == -EAGAIN)
700 /* We raced and lost with another CPU. */ 700 /* We raced and lost with another CPU. */
@@ -704,7 +704,7 @@ int set_anon_super(struct super_block *s, void *data)
704 704
705 if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) { 705 if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
706 spin_lock(&unnamed_dev_lock); 706 spin_lock(&unnamed_dev_lock);
707 idr_remove(&unnamed_dev_idr, dev); 707 ida_remove(&unnamed_dev_ida, dev);
708 spin_unlock(&unnamed_dev_lock); 708 spin_unlock(&unnamed_dev_lock);
709 return -EMFILE; 709 return -EMFILE;
710 } 710 }
@@ -720,17 +720,12 @@ void kill_anon_super(struct super_block *sb)
720 720
721 generic_shutdown_super(sb); 721 generic_shutdown_super(sb);
722 spin_lock(&unnamed_dev_lock); 722 spin_lock(&unnamed_dev_lock);
723 idr_remove(&unnamed_dev_idr, slot); 723 ida_remove(&unnamed_dev_ida, slot);
724 spin_unlock(&unnamed_dev_lock); 724 spin_unlock(&unnamed_dev_lock);
725} 725}
726 726
727EXPORT_SYMBOL(kill_anon_super); 727EXPORT_SYMBOL(kill_anon_super);
728 728
729void __init unnamed_dev_init(void)
730{
731 idr_init(&unnamed_dev_idr);
732}
733
734void kill_litter_super(struct super_block *sb) 729void kill_litter_super(struct super_block *sb)
735{ 730{
736 if (sb->s_root) 731 if (sb->s_root)
@@ -760,9 +755,13 @@ int get_sb_bdev(struct file_system_type *fs_type,
760{ 755{
761 struct block_device *bdev; 756 struct block_device *bdev;
762 struct super_block *s; 757 struct super_block *s;
758 fmode_t mode = FMODE_READ;
763 int error = 0; 759 int error = 0;
764 760
765 bdev = open_bdev_excl(dev_name, flags, fs_type); 761 if (!(flags & MS_RDONLY))
762 mode |= FMODE_WRITE;
763
764 bdev = open_bdev_exclusive(dev_name, mode, fs_type);
766 if (IS_ERR(bdev)) 765 if (IS_ERR(bdev))
767 return PTR_ERR(bdev); 766 return PTR_ERR(bdev);
768 767
@@ -785,11 +784,12 @@ int get_sb_bdev(struct file_system_type *fs_type,
785 goto error_bdev; 784 goto error_bdev;
786 } 785 }
787 786
788 close_bdev_excl(bdev); 787 close_bdev_exclusive(bdev, mode);
789 } else { 788 } else {
790 char b[BDEVNAME_SIZE]; 789 char b[BDEVNAME_SIZE];
791 790
792 s->s_flags = flags; 791 s->s_flags = flags;
792 s->s_mode = mode;
793 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 793 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
794 sb_set_blocksize(s, block_size(bdev)); 794 sb_set_blocksize(s, block_size(bdev));
795 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); 795 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
@@ -807,7 +807,7 @@ int get_sb_bdev(struct file_system_type *fs_type,
807error_s: 807error_s:
808 error = PTR_ERR(s); 808 error = PTR_ERR(s);
809error_bdev: 809error_bdev:
810 close_bdev_excl(bdev); 810 close_bdev_exclusive(bdev, mode);
811error: 811error:
812 return error; 812 return error;
813} 813}
@@ -817,10 +817,11 @@ EXPORT_SYMBOL(get_sb_bdev);
817void kill_block_super(struct super_block *sb) 817void kill_block_super(struct super_block *sb)
818{ 818{
819 struct block_device *bdev = sb->s_bdev; 819 struct block_device *bdev = sb->s_bdev;
820 fmode_t mode = sb->s_mode;
820 821
821 generic_shutdown_super(sb); 822 generic_shutdown_super(sb);
822 sync_blockdev(bdev); 823 sync_blockdev(bdev);
823 close_bdev_excl(bdev); 824 close_bdev_exclusive(bdev, mode);
824} 825}
825 826
826EXPORT_SYMBOL(kill_block_super); 827EXPORT_SYMBOL(kill_block_super);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 3a05a596e3b4..82d3b79d0e08 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -983,4 +983,5 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
983const struct file_operations sysfs_dir_operations = { 983const struct file_operations sysfs_dir_operations = {
984 .read = generic_read_dir, 984 .read = generic_read_dir,
985 .readdir = sysfs_readdir, 985 .readdir = sysfs_readdir,
986 .llseek = generic_file_llseek,
986}; 987};
diff --git a/fs/timerfd.c b/fs/timerfd.c
index c502c60e4f54..0862f0e49d0c 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -52,11 +52,9 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
52 52
53static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) 53static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
54{ 54{
55 ktime_t now, remaining; 55 ktime_t remaining;
56
57 now = ctx->tmr.base->get_time();
58 remaining = ktime_sub(ctx->tmr.expires, now);
59 56
57 remaining = hrtimer_expires_remaining(&ctx->tmr);
60 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; 58 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
61} 59}
62 60
@@ -74,7 +72,7 @@ static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
74 ctx->ticks = 0; 72 ctx->ticks = 0;
75 ctx->tintv = timespec_to_ktime(ktmr->it_interval); 73 ctx->tintv = timespec_to_ktime(ktmr->it_interval);
76 hrtimer_init(&ctx->tmr, ctx->clockid, htmode); 74 hrtimer_init(&ctx->tmr, ctx->clockid, htmode);
77 ctx->tmr.expires = texp; 75 hrtimer_set_expires(&ctx->tmr, texp);
78 ctx->tmr.function = timerfd_tmrproc; 76 ctx->tmr.function = timerfd_tmrproc;
79 if (texp.tv64 != 0) 77 if (texp.tv64 != 0)
80 hrtimer_start(&ctx->tmr, texp, htmode); 78 hrtimer_start(&ctx->tmr, texp, htmode);
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 73db464cd08b..1a4973e10664 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -414,19 +414,21 @@ static int do_budget_space(struct ubifs_info *c)
414 * @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt - 414 * @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt -
415 * @c->lst.taken_empty_lebs 415 * @c->lst.taken_empty_lebs
416 * 416 *
417 * @empty_lebs are available because they are empty. @freeable_cnt are 417 * @c->lst.empty_lebs are available because they are empty.
418 * available because they contain only free and dirty space and the 418 * @c->freeable_cnt are available because they contain only free and
419 * index allocation always occurs after wbufs are synch'ed. 419 * dirty space, @c->idx_gc_cnt are available because they are index
420 * @idx_gc_cnt are available because they are index LEBs that have been 420 * LEBs that have been garbage collected and are awaiting the commit
421 * garbage collected (including trivial GC) and are awaiting the commit 421 * before they can be used. And the in-the-gaps method will grab these
422 * before they can be unmapped - note that the in-the-gaps method will 422 * if it needs them. @c->lst.taken_empty_lebs are empty LEBs that have
423 * grab these if it needs them. @taken_empty_lebs are empty_lebs that 423 * already been allocated for some purpose.
424 * have already been allocated for some purpose (also includes those
425 * LEBs on the @idx_gc list).
426 * 424 *
427 * Note, @taken_empty_lebs may temporarily be higher by one because of 425 * Note, @c->idx_gc_cnt is included to both @c->lst.empty_lebs (because
428 * the way we serialize LEB allocations and budgeting. See a comment in 426 * these LEBs are empty) and to @c->lst.taken_empty_lebs (because they
429 * 'ubifs_find_free_space()'. 427 * are taken until after the commit).
428 *
429 * Note, @c->lst.taken_empty_lebs may temporarily be higher by one
430 * because of the way we serialize LEB allocations and budgeting. See a
431 * comment in 'ubifs_find_free_space()'.
430 */ 432 */
431 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - 433 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
432 c->lst.taken_empty_lebs; 434 c->lst.taken_empty_lebs;
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
index 5bb51dac3c16..a0ada596b17c 100644
--- a/fs/ubifs/compress.c
+++ b/fs/ubifs/compress.c
@@ -91,8 +91,6 @@ struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
91 * 91 *
92 * Note, if the input buffer was not compressed, it is copied to the output 92 * Note, if the input buffer was not compressed, it is copied to the output
93 * buffer and %UBIFS_COMPR_NONE is returned in @compr_type. 93 * buffer and %UBIFS_COMPR_NONE is returned in @compr_type.
94 *
95 * This functions returns %0 on success or a negative error code on failure.
96 */ 94 */
97void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, 95void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
98 int *compr_type) 96 int *compr_type)
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index d7f7645779f2..7186400750e7 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -222,30 +222,38 @@ void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode)
222{ 222{
223 const struct ubifs_inode *ui = ubifs_inode(inode); 223 const struct ubifs_inode *ui = ubifs_inode(inode);
224 224
225 printk(KERN_DEBUG "inode %lu\n", inode->i_ino); 225 printk(KERN_DEBUG "Dump in-memory inode:");
226 printk(KERN_DEBUG "size %llu\n", 226 printk(KERN_DEBUG "\tinode %lu\n", inode->i_ino);
227 printk(KERN_DEBUG "\tsize %llu\n",
227 (unsigned long long)i_size_read(inode)); 228 (unsigned long long)i_size_read(inode));
228 printk(KERN_DEBUG "nlink %u\n", inode->i_nlink); 229 printk(KERN_DEBUG "\tnlink %u\n", inode->i_nlink);
229 printk(KERN_DEBUG "uid %u\n", (unsigned int)inode->i_uid); 230 printk(KERN_DEBUG "\tuid %u\n", (unsigned int)inode->i_uid);
230 printk(KERN_DEBUG "gid %u\n", (unsigned int)inode->i_gid); 231 printk(KERN_DEBUG "\tgid %u\n", (unsigned int)inode->i_gid);
231 printk(KERN_DEBUG "atime %u.%u\n", 232 printk(KERN_DEBUG "\tatime %u.%u\n",
232 (unsigned int)inode->i_atime.tv_sec, 233 (unsigned int)inode->i_atime.tv_sec,
233 (unsigned int)inode->i_atime.tv_nsec); 234 (unsigned int)inode->i_atime.tv_nsec);
234 printk(KERN_DEBUG "mtime %u.%u\n", 235 printk(KERN_DEBUG "\tmtime %u.%u\n",
235 (unsigned int)inode->i_mtime.tv_sec, 236 (unsigned int)inode->i_mtime.tv_sec,
236 (unsigned int)inode->i_mtime.tv_nsec); 237 (unsigned int)inode->i_mtime.tv_nsec);
237 printk(KERN_DEBUG "ctime %u.%u\n", 238 printk(KERN_DEBUG "\tctime %u.%u\n",
238 (unsigned int)inode->i_ctime.tv_sec, 239 (unsigned int)inode->i_ctime.tv_sec,
239 (unsigned int)inode->i_ctime.tv_nsec); 240 (unsigned int)inode->i_ctime.tv_nsec);
240 printk(KERN_DEBUG "creat_sqnum %llu\n", ui->creat_sqnum); 241 printk(KERN_DEBUG "\tcreat_sqnum %llu\n", ui->creat_sqnum);
241 printk(KERN_DEBUG "xattr_size %u\n", ui->xattr_size); 242 printk(KERN_DEBUG "\txattr_size %u\n", ui->xattr_size);
242 printk(KERN_DEBUG "xattr_cnt %u\n", ui->xattr_cnt); 243 printk(KERN_DEBUG "\txattr_cnt %u\n", ui->xattr_cnt);
243 printk(KERN_DEBUG "xattr_names %u\n", ui->xattr_names); 244 printk(KERN_DEBUG "\txattr_names %u\n", ui->xattr_names);
244 printk(KERN_DEBUG "dirty %u\n", ui->dirty); 245 printk(KERN_DEBUG "\tdirty %u\n", ui->dirty);
245 printk(KERN_DEBUG "xattr %u\n", ui->xattr); 246 printk(KERN_DEBUG "\txattr %u\n", ui->xattr);
246 printk(KERN_DEBUG "flags %d\n", ui->flags); 247 printk(KERN_DEBUG "\tbulk_read %u\n", ui->xattr);
247 printk(KERN_DEBUG "compr_type %d\n", ui->compr_type); 248 printk(KERN_DEBUG "\tsynced_i_size %llu\n",
248 printk(KERN_DEBUG "data_len %d\n", ui->data_len); 249 (unsigned long long)ui->synced_i_size);
250 printk(KERN_DEBUG "\tui_size %llu\n",
251 (unsigned long long)ui->ui_size);
252 printk(KERN_DEBUG "\tflags %d\n", ui->flags);
253 printk(KERN_DEBUG "\tcompr_type %d\n", ui->compr_type);
254 printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read);
255 printk(KERN_DEBUG "\tread_in_a_row %lu\n", ui->read_in_a_row);
256 printk(KERN_DEBUG "\tdata_len %d\n", ui->data_len);
249} 257}
250 258
251void dbg_dump_node(const struct ubifs_info *c, const void *node) 259void dbg_dump_node(const struct ubifs_info *c, const void *node)
@@ -647,6 +655,43 @@ void dbg_dump_lprops(struct ubifs_info *c)
647 } 655 }
648} 656}
649 657
658void dbg_dump_lpt_info(struct ubifs_info *c)
659{
660 int i;
661
662 spin_lock(&dbg_lock);
663 printk(KERN_DEBUG "\tlpt_sz: %lld\n", c->lpt_sz);
664 printk(KERN_DEBUG "\tpnode_sz: %d\n", c->pnode_sz);
665 printk(KERN_DEBUG "\tnnode_sz: %d\n", c->nnode_sz);
666 printk(KERN_DEBUG "\tltab_sz: %d\n", c->ltab_sz);
667 printk(KERN_DEBUG "\tlsave_sz: %d\n", c->lsave_sz);
668 printk(KERN_DEBUG "\tbig_lpt: %d\n", c->big_lpt);
669 printk(KERN_DEBUG "\tlpt_hght: %d\n", c->lpt_hght);
670 printk(KERN_DEBUG "\tpnode_cnt: %d\n", c->pnode_cnt);
671 printk(KERN_DEBUG "\tnnode_cnt: %d\n", c->nnode_cnt);
672 printk(KERN_DEBUG "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt);
673 printk(KERN_DEBUG "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt);
674 printk(KERN_DEBUG "\tlsave_cnt: %d\n", c->lsave_cnt);
675 printk(KERN_DEBUG "\tspace_bits: %d\n", c->space_bits);
676 printk(KERN_DEBUG "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits);
677 printk(KERN_DEBUG "\tlpt_offs_bits: %d\n", c->lpt_offs_bits);
678 printk(KERN_DEBUG "\tlpt_spc_bits: %d\n", c->lpt_spc_bits);
679 printk(KERN_DEBUG "\tpcnt_bits: %d\n", c->pcnt_bits);
680 printk(KERN_DEBUG "\tlnum_bits: %d\n", c->lnum_bits);
681 printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs);
682 printk(KERN_DEBUG "\tLPT head is at %d:%d\n",
683 c->nhead_lnum, c->nhead_offs);
684 printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", c->ltab_lnum, c->ltab_offs);
685 if (c->big_lpt)
686 printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n",
687 c->lsave_lnum, c->lsave_offs);
688 for (i = 0; i < c->lpt_lebs; i++)
689 printk(KERN_DEBUG "\tLPT LEB %d free %d dirty %d tgc %d "
690 "cmt %d\n", i + c->lpt_first, c->ltab[i].free,
691 c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt);
692 spin_unlock(&dbg_lock);
693}
694
650void dbg_dump_leb(const struct ubifs_info *c, int lnum) 695void dbg_dump_leb(const struct ubifs_info *c, int lnum)
651{ 696{
652 struct ubifs_scan_leb *sleb; 697 struct ubifs_scan_leb *sleb;
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 50315fc57185..33d6b95071e4 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -224,6 +224,7 @@ void dbg_dump_lstats(const struct ubifs_lp_stats *lst);
224void dbg_dump_budg(struct ubifs_info *c); 224void dbg_dump_budg(struct ubifs_info *c);
225void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); 225void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp);
226void dbg_dump_lprops(struct ubifs_info *c); 226void dbg_dump_lprops(struct ubifs_info *c);
227void dbg_dump_lpt_info(struct ubifs_info *c);
227void dbg_dump_leb(const struct ubifs_info *c, int lnum); 228void dbg_dump_leb(const struct ubifs_info *c, int lnum);
228void dbg_dump_znode(const struct ubifs_info *c, 229void dbg_dump_znode(const struct ubifs_info *c,
229 const struct ubifs_znode *znode); 230 const struct ubifs_znode *znode);
@@ -249,6 +250,8 @@ int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot);
249int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); 250int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot);
250int dbg_check_cats(struct ubifs_info *c); 251int dbg_check_cats(struct ubifs_info *c);
251int dbg_check_ltab(struct ubifs_info *c); 252int dbg_check_ltab(struct ubifs_info *c);
253int dbg_chk_lpt_free_spc(struct ubifs_info *c);
254int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len);
252int dbg_check_synced_i_size(struct inode *inode); 255int dbg_check_synced_i_size(struct inode *inode);
253int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); 256int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir);
254int dbg_check_tnc(struct ubifs_info *c, int extra); 257int dbg_check_tnc(struct ubifs_info *c, int extra);
@@ -367,6 +370,7 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
367#define dbg_dump_budg(c) ({}) 370#define dbg_dump_budg(c) ({})
368#define dbg_dump_lprop(c, lp) ({}) 371#define dbg_dump_lprop(c, lp) ({})
369#define dbg_dump_lprops(c) ({}) 372#define dbg_dump_lprops(c) ({})
373#define dbg_dump_lpt_info(c) ({})
370#define dbg_dump_leb(c, lnum) ({}) 374#define dbg_dump_leb(c, lnum) ({})
371#define dbg_dump_znode(c, znode) ({}) 375#define dbg_dump_znode(c, znode) ({})
372#define dbg_dump_heap(c, heap, cat) ({}) 376#define dbg_dump_heap(c, heap, cat) ({})
@@ -379,6 +383,8 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
379#define dbg_check_old_index(c, zroot) 0 383#define dbg_check_old_index(c, zroot) 0
380#define dbg_check_cats(c) 0 384#define dbg_check_cats(c) 0
381#define dbg_check_ltab(c) 0 385#define dbg_check_ltab(c) 0
386#define dbg_chk_lpt_free_spc(c) 0
387#define dbg_chk_lpt_sz(c, action, len) 0
382#define dbg_check_synced_i_size(inode) 0 388#define dbg_check_synced_i_size(inode) 0
383#define dbg_check_dir_size(c, dir) 0 389#define dbg_check_dir_size(c, dir) 0
384#define dbg_check_tnc(c, x) 0 390#define dbg_check_tnc(c, x) 0
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 3d698e2022b1..51cf511d44d9 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -147,6 +147,12 @@ static int do_readpage(struct page *page)
147 err = ret; 147 err = ret;
148 if (err != -ENOENT) 148 if (err != -ENOENT)
149 break; 149 break;
150 } else if (block + 1 == beyond) {
151 int dlen = le32_to_cpu(dn->size);
152 int ilen = i_size & (UBIFS_BLOCK_SIZE - 1);
153
154 if (ilen && ilen < dlen)
155 memset(addr + ilen, 0, dlen - ilen);
150 } 156 }
151 } 157 }
152 if (++i >= UBIFS_BLOCKS_PER_PAGE) 158 if (++i >= UBIFS_BLOCKS_PER_PAGE)
@@ -577,8 +583,262 @@ out:
577 return copied; 583 return copied;
578} 584}
579 585
586/**
587 * populate_page - copy data nodes into a page for bulk-read.
588 * @c: UBIFS file-system description object
589 * @page: page
590 * @bu: bulk-read information
591 * @n: next zbranch slot
592 *
593 * This function returns %0 on success and a negative error code on failure.
594 */
595static int populate_page(struct ubifs_info *c, struct page *page,
596 struct bu_info *bu, int *n)
597{
598 int i = 0, nn = *n, offs = bu->zbranch[0].offs, hole = 0, read = 0;
599 struct inode *inode = page->mapping->host;
600 loff_t i_size = i_size_read(inode);
601 unsigned int page_block;
602 void *addr, *zaddr;
603 pgoff_t end_index;
604
605 dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
606 inode->i_ino, page->index, i_size, page->flags);
607
608 addr = zaddr = kmap(page);
609
610 end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
611 if (!i_size || page->index > end_index) {
612 hole = 1;
613 memset(addr, 0, PAGE_CACHE_SIZE);
614 goto out_hole;
615 }
616
617 page_block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
618 while (1) {
619 int err, len, out_len, dlen;
620
621 if (nn >= bu->cnt) {
622 hole = 1;
623 memset(addr, 0, UBIFS_BLOCK_SIZE);
624 } else if (key_block(c, &bu->zbranch[nn].key) == page_block) {
625 struct ubifs_data_node *dn;
626
627 dn = bu->buf + (bu->zbranch[nn].offs - offs);
628
629 ubifs_assert(dn->ch.sqnum >
630 ubifs_inode(inode)->creat_sqnum);
631
632 len = le32_to_cpu(dn->size);
633 if (len <= 0 || len > UBIFS_BLOCK_SIZE)
634 goto out_err;
635
636 dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
637 out_len = UBIFS_BLOCK_SIZE;
638 err = ubifs_decompress(&dn->data, dlen, addr, &out_len,
639 le16_to_cpu(dn->compr_type));
640 if (err || len != out_len)
641 goto out_err;
642
643 if (len < UBIFS_BLOCK_SIZE)
644 memset(addr + len, 0, UBIFS_BLOCK_SIZE - len);
645
646 nn += 1;
647 read = (i << UBIFS_BLOCK_SHIFT) + len;
648 } else if (key_block(c, &bu->zbranch[nn].key) < page_block) {
649 nn += 1;
650 continue;
651 } else {
652 hole = 1;
653 memset(addr, 0, UBIFS_BLOCK_SIZE);
654 }
655 if (++i >= UBIFS_BLOCKS_PER_PAGE)
656 break;
657 addr += UBIFS_BLOCK_SIZE;
658 page_block += 1;
659 }
660
661 if (end_index == page->index) {
662 int len = i_size & (PAGE_CACHE_SIZE - 1);
663
664 if (len && len < read)
665 memset(zaddr + len, 0, read - len);
666 }
667
668out_hole:
669 if (hole) {
670 SetPageChecked(page);
671 dbg_gen("hole");
672 }
673
674 SetPageUptodate(page);
675 ClearPageError(page);
676 flush_dcache_page(page);
677 kunmap(page);
678 *n = nn;
679 return 0;
680
681out_err:
682 ClearPageUptodate(page);
683 SetPageError(page);
684 flush_dcache_page(page);
685 kunmap(page);
686 ubifs_err("bad data node (block %u, inode %lu)",
687 page_block, inode->i_ino);
688 return -EINVAL;
689}
690
691/**
692 * ubifs_do_bulk_read - do bulk-read.
693 * @c: UBIFS file-system description object
694 * @page1: first page
695 *
696 * This function returns %1 if the bulk-read is done, otherwise %0 is returned.
697 */
698static int ubifs_do_bulk_read(struct ubifs_info *c, struct page *page1)
699{
700 pgoff_t offset = page1->index, end_index;
701 struct address_space *mapping = page1->mapping;
702 struct inode *inode = mapping->host;
703 struct ubifs_inode *ui = ubifs_inode(inode);
704 struct bu_info *bu;
705 int err, page_idx, page_cnt, ret = 0, n = 0;
706 loff_t isize;
707
708 bu = kmalloc(sizeof(struct bu_info), GFP_NOFS);
709 if (!bu)
710 return 0;
711
712 bu->buf_len = c->bulk_read_buf_size;
713 bu->buf = kmalloc(bu->buf_len, GFP_NOFS);
714 if (!bu->buf)
715 goto out_free;
716
717 data_key_init(c, &bu->key, inode->i_ino,
718 offset << UBIFS_BLOCKS_PER_PAGE_SHIFT);
719
720 err = ubifs_tnc_get_bu_keys(c, bu);
721 if (err)
722 goto out_warn;
723
724 if (bu->eof) {
725 /* Turn off bulk-read at the end of the file */
726 ui->read_in_a_row = 1;
727 ui->bulk_read = 0;
728 }
729
730 page_cnt = bu->blk_cnt >> UBIFS_BLOCKS_PER_PAGE_SHIFT;
731 if (!page_cnt) {
732 /*
733 * This happens when there are multiple blocks per page and the
734 * blocks for the first page we are looking for, are not
735 * together. If all the pages were like this, bulk-read would
736 * reduce performance, so we turn it off for a while.
737 */
738 ui->read_in_a_row = 0;
739 ui->bulk_read = 0;
740 goto out_free;
741 }
742
743 if (bu->cnt) {
744 err = ubifs_tnc_bulk_read(c, bu);
745 if (err)
746 goto out_warn;
747 }
748
749 err = populate_page(c, page1, bu, &n);
750 if (err)
751 goto out_warn;
752
753 unlock_page(page1);
754 ret = 1;
755
756 isize = i_size_read(inode);
757 if (isize == 0)
758 goto out_free;
759 end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
760
761 for (page_idx = 1; page_idx < page_cnt; page_idx++) {
762 pgoff_t page_offset = offset + page_idx;
763 struct page *page;
764
765 if (page_offset > end_index)
766 break;
767 page = find_or_create_page(mapping, page_offset,
768 GFP_NOFS | __GFP_COLD);
769 if (!page)
770 break;
771 if (!PageUptodate(page))
772 err = populate_page(c, page, bu, &n);
773 unlock_page(page);
774 page_cache_release(page);
775 if (err)
776 break;
777 }
778
779 ui->last_page_read = offset + page_idx - 1;
780
781out_free:
782 kfree(bu->buf);
783 kfree(bu);
784 return ret;
785
786out_warn:
787 ubifs_warn("ignoring error %d and skipping bulk-read", err);
788 goto out_free;
789}
790
791/**
792 * ubifs_bulk_read - determine whether to bulk-read and, if so, do it.
793 * @page: page from which to start bulk-read.
794 *
795 * Some flash media are capable of reading sequentially at faster rates. UBIFS
796 * bulk-read facility is designed to take advantage of that, by reading in one
797 * go consecutive data nodes that are also located consecutively in the same
798 * LEB. This function returns %1 if a bulk-read is done and %0 otherwise.
799 */
800static int ubifs_bulk_read(struct page *page)
801{
802 struct inode *inode = page->mapping->host;
803 struct ubifs_info *c = inode->i_sb->s_fs_info;
804 struct ubifs_inode *ui = ubifs_inode(inode);
805 pgoff_t index = page->index, last_page_read = ui->last_page_read;
806 int ret = 0;
807
808 ui->last_page_read = index;
809
810 if (!c->bulk_read)
811 return 0;
812 /*
813 * Bulk-read is protected by ui_mutex, but it is an optimization, so
814 * don't bother if we cannot lock the mutex.
815 */
816 if (!mutex_trylock(&ui->ui_mutex))
817 return 0;
818 if (index != last_page_read + 1) {
819 /* Turn off bulk-read if we stop reading sequentially */
820 ui->read_in_a_row = 1;
821 if (ui->bulk_read)
822 ui->bulk_read = 0;
823 goto out_unlock;
824 }
825 if (!ui->bulk_read) {
826 ui->read_in_a_row += 1;
827 if (ui->read_in_a_row < 3)
828 goto out_unlock;
829 /* Three reads in a row, so switch on bulk-read */
830 ui->bulk_read = 1;
831 }
832 ret = ubifs_do_bulk_read(c, page);
833out_unlock:
834 mutex_unlock(&ui->ui_mutex);
835 return ret;
836}
837
580static int ubifs_readpage(struct file *file, struct page *page) 838static int ubifs_readpage(struct file *file, struct page *page)
581{ 839{
840 if (ubifs_bulk_read(page))
841 return 0;
582 do_readpage(page); 842 do_readpage(page);
583 unlock_page(page); 843 unlock_page(page);
584 return 0; 844 return 0;
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 47814cde2407..717d79c97c5e 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -901,11 +901,11 @@ static int get_idx_gc_leb(struct ubifs_info *c)
901 * it is needed now for this commit. 901 * it is needed now for this commit.
902 */ 902 */
903 lp = ubifs_lpt_lookup_dirty(c, lnum); 903 lp = ubifs_lpt_lookup_dirty(c, lnum);
904 if (unlikely(IS_ERR(lp))) 904 if (IS_ERR(lp))
905 return PTR_ERR(lp); 905 return PTR_ERR(lp);
906 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, 906 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
907 lp->flags | LPROPS_INDEX, -1); 907 lp->flags | LPROPS_INDEX, -1);
908 if (unlikely(IS_ERR(lp))) 908 if (IS_ERR(lp))
909 return PTR_ERR(lp); 909 return PTR_ERR(lp);
910 dbg_find("LEB %d, dirty %d and free %d flags %#x", 910 dbg_find("LEB %d, dirty %d and free %d flags %#x",
911 lp->lnum, lp->dirty, lp->free, lp->flags); 911 lp->lnum, lp->dirty, lp->free, lp->flags);
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 02aba36fe3d4..0bef6501d58a 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -96,6 +96,48 @@ static int switch_gc_head(struct ubifs_info *c)
96} 96}
97 97
98/** 98/**
99 * joinup - bring data nodes for an inode together.
100 * @c: UBIFS file-system description object
101 * @sleb: describes scanned LEB
102 * @inum: inode number
103 * @blk: block number
104 * @data: list to which to add data nodes
105 *
106 * This function looks at the first few nodes in the scanned LEB @sleb and adds
107 * them to @data if they are data nodes from @inum and have a larger block
108 * number than @blk. This function returns %0 on success and a negative error
109 * code on failure.
110 */
111static int joinup(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ino_t inum,
112 unsigned int blk, struct list_head *data)
113{
114 int err, cnt = 6, lnum = sleb->lnum, offs;
115 struct ubifs_scan_node *snod, *tmp;
116 union ubifs_key *key;
117
118 list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
119 key = &snod->key;
120 if (key_inum(c, key) == inum &&
121 key_type(c, key) == UBIFS_DATA_KEY &&
122 key_block(c, key) > blk) {
123 offs = snod->offs;
124 err = ubifs_tnc_has_node(c, key, 0, lnum, offs, 0);
125 if (err < 0)
126 return err;
127 list_del(&snod->list);
128 if (err) {
129 list_add_tail(&snod->list, data);
130 blk = key_block(c, key);
131 } else
132 kfree(snod);
133 cnt = 6;
134 } else if (--cnt == 0)
135 break;
136 }
137 return 0;
138}
139
140/**
99 * move_nodes - move nodes. 141 * move_nodes - move nodes.
100 * @c: UBIFS file-system description object 142 * @c: UBIFS file-system description object
101 * @sleb: describes nodes to move 143 * @sleb: describes nodes to move
@@ -116,16 +158,21 @@ static int switch_gc_head(struct ubifs_info *c)
116static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) 158static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
117{ 159{
118 struct ubifs_scan_node *snod, *tmp; 160 struct ubifs_scan_node *snod, *tmp;
119 struct list_head large, medium, small; 161 struct list_head data, large, medium, small;
120 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; 162 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
121 int avail, err, min = INT_MAX; 163 int avail, err, min = INT_MAX;
164 unsigned int blk = 0;
165 ino_t inum = 0;
122 166
167 INIT_LIST_HEAD(&data);
123 INIT_LIST_HEAD(&large); 168 INIT_LIST_HEAD(&large);
124 INIT_LIST_HEAD(&medium); 169 INIT_LIST_HEAD(&medium);
125 INIT_LIST_HEAD(&small); 170 INIT_LIST_HEAD(&small);
126 171
127 list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { 172 while (!list_empty(&sleb->nodes)) {
128 struct list_head *lst; 173 struct list_head *lst = sleb->nodes.next;
174
175 snod = list_entry(lst, struct ubifs_scan_node, list);
129 176
130 ubifs_assert(snod->type != UBIFS_IDX_NODE); 177 ubifs_assert(snod->type != UBIFS_IDX_NODE);
131 ubifs_assert(snod->type != UBIFS_REF_NODE); 178 ubifs_assert(snod->type != UBIFS_REF_NODE);
@@ -136,7 +183,6 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
136 if (err < 0) 183 if (err < 0)
137 goto out; 184 goto out;
138 185
139 lst = &snod->list;
140 list_del(lst); 186 list_del(lst);
141 if (!err) { 187 if (!err) {
142 /* The node is obsolete, remove it from the list */ 188 /* The node is obsolete, remove it from the list */
@@ -145,15 +191,30 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
145 } 191 }
146 192
147 /* 193 /*
148 * Sort the list of nodes so that large nodes go first, and 194 * Sort the list of nodes so that data nodes go first, large
149 * small nodes go last. 195 * nodes go second, and small nodes go last.
150 */ 196 */
151 if (snod->len > MEDIUM_NODE_WM) 197 if (key_type(c, &snod->key) == UBIFS_DATA_KEY) {
152 list_add(lst, &large); 198 if (inum != key_inum(c, &snod->key)) {
199 if (inum) {
200 /*
201 * Try to move data nodes from the same
202 * inode together.
203 */
204 err = joinup(c, sleb, inum, blk, &data);
205 if (err)
206 goto out;
207 }
208 inum = key_inum(c, &snod->key);
209 blk = key_block(c, &snod->key);
210 }
211 list_add_tail(lst, &data);
212 } else if (snod->len > MEDIUM_NODE_WM)
213 list_add_tail(lst, &large);
153 else if (snod->len > SMALL_NODE_WM) 214 else if (snod->len > SMALL_NODE_WM)
154 list_add(lst, &medium); 215 list_add_tail(lst, &medium);
155 else 216 else
156 list_add(lst, &small); 217 list_add_tail(lst, &small);
157 218
158 /* And find the smallest node */ 219 /* And find the smallest node */
159 if (snod->len < min) 220 if (snod->len < min)
@@ -164,6 +225,7 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
164 * Join the tree lists so that we'd have one roughly sorted list 225 * Join the tree lists so that we'd have one roughly sorted list
165 * ('large' will be the head of the joined list). 226 * ('large' will be the head of the joined list).
166 */ 227 */
228 list_splice(&data, &large);
167 list_splice(&medium, large.prev); 229 list_splice(&medium, large.prev);
168 list_splice(&small, large.prev); 230 list_splice(&small, large.prev);
169 231
@@ -653,7 +715,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
653 */ 715 */
654 while (1) { 716 while (1) {
655 lp = ubifs_fast_find_freeable(c); 717 lp = ubifs_fast_find_freeable(c);
656 if (unlikely(IS_ERR(lp))) { 718 if (IS_ERR(lp)) {
657 err = PTR_ERR(lp); 719 err = PTR_ERR(lp);
658 goto out; 720 goto out;
659 } 721 }
@@ -665,7 +727,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
665 if (err) 727 if (err)
666 goto out; 728 goto out;
667 lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0); 729 lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0);
668 if (unlikely(IS_ERR(lp))) { 730 if (IS_ERR(lp)) {
669 err = PTR_ERR(lp); 731 err = PTR_ERR(lp);
670 goto out; 732 goto out;
671 } 733 }
@@ -680,7 +742,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
680 /* Record index freeable LEBs for unmapping after commit */ 742 /* Record index freeable LEBs for unmapping after commit */
681 while (1) { 743 while (1) {
682 lp = ubifs_fast_find_frdi_idx(c); 744 lp = ubifs_fast_find_frdi_idx(c);
683 if (unlikely(IS_ERR(lp))) { 745 if (IS_ERR(lp)) {
684 err = PTR_ERR(lp); 746 err = PTR_ERR(lp);
685 goto out; 747 goto out;
686 } 748 }
@@ -696,7 +758,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
696 /* Don't release the LEB until after the next commit */ 758 /* Don't release the LEB until after the next commit */
697 flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX; 759 flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX;
698 lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1); 760 lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1);
699 if (unlikely(IS_ERR(lp))) { 761 if (IS_ERR(lp)) {
700 err = PTR_ERR(lp); 762 err = PTR_ERR(lp);
701 kfree(idx_gc); 763 kfree(idx_gc);
702 goto out; 764 goto out;
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 054363f2b207..01682713af69 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -62,6 +62,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
62{ 62{
63 if (!c->ro_media) { 63 if (!c->ro_media) {
64 c->ro_media = 1; 64 c->ro_media = 1;
65 c->no_chk_data_crc = 0;
65 ubifs_warn("switched to read-only mode, error %d", err); 66 ubifs_warn("switched to read-only mode, error %d", err);
66 dbg_dump_stack(); 67 dbg_dump_stack();
67 } 68 }
@@ -74,6 +75,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
74 * @lnum: logical eraseblock number 75 * @lnum: logical eraseblock number
75 * @offs: offset within the logical eraseblock 76 * @offs: offset within the logical eraseblock
76 * @quiet: print no messages 77 * @quiet: print no messages
78 * @chk_crc: indicates whether to always check the CRC
77 * 79 *
78 * This function checks node magic number and CRC checksum. This function also 80 * This function checks node magic number and CRC checksum. This function also
79 * validates node length to prevent UBIFS from becoming crazy when an attacker 81 * validates node length to prevent UBIFS from becoming crazy when an attacker
@@ -85,7 +87,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
85 * or magic. 87 * or magic.
86 */ 88 */
87int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, 89int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
88 int offs, int quiet) 90 int offs, int quiet, int chk_crc)
89{ 91{
90 int err = -EINVAL, type, node_len; 92 int err = -EINVAL, type, node_len;
91 uint32_t crc, node_crc, magic; 93 uint32_t crc, node_crc, magic;
@@ -121,6 +123,10 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
121 node_len > c->ranges[type].max_len) 123 node_len > c->ranges[type].max_len)
122 goto out_len; 124 goto out_len;
123 125
126 if (!chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc)
127 if (c->no_chk_data_crc)
128 return 0;
129
124 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); 130 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
125 node_crc = le32_to_cpu(ch->crc); 131 node_crc = le32_to_cpu(ch->crc);
126 if (crc != node_crc) { 132 if (crc != node_crc) {
@@ -722,7 +728,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
722 goto out; 728 goto out;
723 } 729 }
724 730
725 err = ubifs_check_node(c, buf, lnum, offs, 0); 731 err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
726 if (err) { 732 if (err) {
727 ubifs_err("expected node type %d", type); 733 ubifs_err("expected node type %d", type);
728 return err; 734 return err;
@@ -781,7 +787,7 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
781 goto out; 787 goto out;
782 } 788 }
783 789
784 err = ubifs_check_node(c, buf, lnum, offs, 0); 790 err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
785 if (err) { 791 if (err) {
786 ubifs_err("expected node type %d", type); 792 ubifs_err("expected node type %d", type);
787 return err; 793 return err;
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
index 8f7476007549..9ee65086f627 100644
--- a/fs/ubifs/key.h
+++ b/fs/ubifs/key.h
@@ -484,7 +484,7 @@ static inline void key_copy(const struct ubifs_info *c,
484 * @key2: the second key to compare 484 * @key2: the second key to compare
485 * 485 *
486 * This function compares 2 keys and returns %-1 if @key1 is less than 486 * This function compares 2 keys and returns %-1 if @key1 is less than
487 * @key2, 0 if the keys are equivalent and %1 if @key1 is greater than @key2. 487 * @key2, %0 if the keys are equivalent and %1 if @key1 is greater than @key2.
488 */ 488 */
489static inline int keys_cmp(const struct ubifs_info *c, 489static inline int keys_cmp(const struct ubifs_info *c,
490 const union ubifs_key *key1, 490 const union ubifs_key *key1,
@@ -503,6 +503,26 @@ static inline int keys_cmp(const struct ubifs_info *c,
503} 503}
504 504
505/** 505/**
506 * keys_eq - determine if keys are equivalent.
507 * @c: UBIFS file-system description object
508 * @key1: the first key to compare
509 * @key2: the second key to compare
510 *
511 * This function compares 2 keys and returns %1 if @key1 is equal to @key2 and
512 * %0 if not.
513 */
514static inline int keys_eq(const struct ubifs_info *c,
515 const union ubifs_key *key1,
516 const union ubifs_key *key2)
517{
518 if (key1->u32[0] != key2->u32[0])
519 return 0;
520 if (key1->u32[1] != key2->u32[1])
521 return 0;
522 return 1;
523}
524
525/**
506 * is_hash_key - is a key vulnerable to hash collisions. 526 * is_hash_key - is a key vulnerable to hash collisions.
507 * @c: UBIFS file-system description object 527 * @c: UBIFS file-system description object
508 * @key: key 528 * @key: key
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 2ba93da71b65..f27176e9b70d 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -125,6 +125,7 @@ static void adjust_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap,
125 } 125 }
126 } 126 }
127 } 127 }
128
128 /* Not greater than parent, so compare to children */ 129 /* Not greater than parent, so compare to children */
129 while (1) { 130 while (1) {
130 /* Compare to left child */ 131 /* Compare to left child */
@@ -460,18 +461,6 @@ static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops)
460} 461}
461 462
462/** 463/**
463 * ubifs_get_lprops - get reference to LEB properties.
464 * @c: the UBIFS file-system description object
465 *
466 * This function locks lprops. Lprops have to be unlocked by
467 * 'ubifs_release_lprops()'.
468 */
469void ubifs_get_lprops(struct ubifs_info *c)
470{
471 mutex_lock(&c->lp_mutex);
472}
473
474/**
475 * calc_dark - calculate LEB dark space size. 464 * calc_dark - calculate LEB dark space size.
476 * @c: the UBIFS file-system description object 465 * @c: the UBIFS file-system description object
477 * @spc: amount of free and dirty space in the LEB 466 * @spc: amount of free and dirty space in the LEB
@@ -576,7 +565,6 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
576 ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7)); 565 ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7));
577 566
578 spin_lock(&c->space_lock); 567 spin_lock(&c->space_lock);
579
580 if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size) 568 if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size)
581 c->lst.taken_empty_lebs -= 1; 569 c->lst.taken_empty_lebs -= 1;
582 570
@@ -637,31 +625,12 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
637 c->lst.taken_empty_lebs += 1; 625 c->lst.taken_empty_lebs += 1;
638 626
639 change_category(c, lprops); 627 change_category(c, lprops);
640
641 c->idx_gc_cnt += idx_gc_cnt; 628 c->idx_gc_cnt += idx_gc_cnt;
642
643 spin_unlock(&c->space_lock); 629 spin_unlock(&c->space_lock);
644
645 return lprops; 630 return lprops;
646} 631}
647 632
648/** 633/**
649 * ubifs_release_lprops - release lprops lock.
650 * @c: the UBIFS file-system description object
651 *
652 * This function has to be called after each 'ubifs_get_lprops()' call to
653 * unlock lprops.
654 */
655void ubifs_release_lprops(struct ubifs_info *c)
656{
657 ubifs_assert(mutex_is_locked(&c->lp_mutex));
658 ubifs_assert(c->lst.empty_lebs >= 0 &&
659 c->lst.empty_lebs <= c->main_lebs);
660
661 mutex_unlock(&c->lp_mutex);
662}
663
664/**
665 * ubifs_get_lp_stats - get lprops statistics. 634 * ubifs_get_lp_stats - get lprops statistics.
666 * @c: UBIFS file-system description object 635 * @c: UBIFS file-system description object
667 * @st: return statistics 636 * @st: return statistics
@@ -1262,7 +1231,6 @@ static int scan_check_cb(struct ubifs_info *c,
1262 } 1231 }
1263 1232
1264 ubifs_scan_destroy(sleb); 1233 ubifs_scan_destroy(sleb);
1265
1266 return LPT_SCAN_CONTINUE; 1234 return LPT_SCAN_CONTINUE;
1267 1235
1268out_print: 1236out_print:
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 9ff2463177e5..db8bd0e518b2 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -109,7 +109,8 @@ static void do_calc_lpt_geom(struct ubifs_info *c)
109 c->lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; 109 c->lpt_sz = (long long)c->pnode_cnt * c->pnode_sz;
110 c->lpt_sz += (long long)c->nnode_cnt * c->nnode_sz; 110 c->lpt_sz += (long long)c->nnode_cnt * c->nnode_sz;
111 c->lpt_sz += c->ltab_sz; 111 c->lpt_sz += c->ltab_sz;
112 c->lpt_sz += c->lsave_sz; 112 if (c->big_lpt)
113 c->lpt_sz += c->lsave_sz;
113 114
114 /* Add wastage */ 115 /* Add wastage */
115 sz = c->lpt_sz; 116 sz = c->lpt_sz;
@@ -287,25 +288,56 @@ uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits)
287 const int k = 32 - nrbits; 288 const int k = 32 - nrbits;
288 uint8_t *p = *addr; 289 uint8_t *p = *addr;
289 int b = *pos; 290 int b = *pos;
290 uint32_t val; 291 uint32_t uninitialized_var(val);
292 const int bytes = (nrbits + b + 7) >> 3;
291 293
292 ubifs_assert(nrbits > 0); 294 ubifs_assert(nrbits > 0);
293 ubifs_assert(nrbits <= 32); 295 ubifs_assert(nrbits <= 32);
294 ubifs_assert(*pos >= 0); 296 ubifs_assert(*pos >= 0);
295 ubifs_assert(*pos < 8); 297 ubifs_assert(*pos < 8);
296 if (b) { 298 if (b) {
297 val = p[1] | ((uint32_t)p[2] << 8) | ((uint32_t)p[3] << 16) | 299 switch (bytes) {
298 ((uint32_t)p[4] << 24); 300 case 2:
301 val = p[1];
302 break;
303 case 3:
304 val = p[1] | ((uint32_t)p[2] << 8);
305 break;
306 case 4:
307 val = p[1] | ((uint32_t)p[2] << 8) |
308 ((uint32_t)p[3] << 16);
309 break;
310 case 5:
311 val = p[1] | ((uint32_t)p[2] << 8) |
312 ((uint32_t)p[3] << 16) |
313 ((uint32_t)p[4] << 24);
314 }
299 val <<= (8 - b); 315 val <<= (8 - b);
300 val |= *p >> b; 316 val |= *p >> b;
301 nrbits += b; 317 nrbits += b;
302 } else 318 } else {
303 val = p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) | 319 switch (bytes) {
304 ((uint32_t)p[3] << 24); 320 case 1:
321 val = p[0];
322 break;
323 case 2:
324 val = p[0] | ((uint32_t)p[1] << 8);
325 break;
326 case 3:
327 val = p[0] | ((uint32_t)p[1] << 8) |
328 ((uint32_t)p[2] << 16);
329 break;
330 case 4:
331 val = p[0] | ((uint32_t)p[1] << 8) |
332 ((uint32_t)p[2] << 16) |
333 ((uint32_t)p[3] << 24);
334 break;
335 }
336 }
305 val <<= k; 337 val <<= k;
306 val >>= k; 338 val >>= k;
307 b = nrbits & 7; 339 b = nrbits & 7;
308 p += nrbits / 8; 340 p += nrbits >> 3;
309 *addr = p; 341 *addr = p;
310 *pos = b; 342 *pos = b;
311 ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32); 343 ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32);
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 5f0b83e20af6..eed5a0025d63 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -177,8 +177,6 @@ static int alloc_lpt_leb(struct ubifs_info *c, int *lnum)
177 return 0; 177 return 0;
178 } 178 }
179 } 179 }
180 dbg_err("last LEB %d", *lnum);
181 dump_stack();
182 return -ENOSPC; 180 return -ENOSPC;
183} 181}
184 182
@@ -193,6 +191,9 @@ static int layout_cnodes(struct ubifs_info *c)
193 int lnum, offs, len, alen, done_lsave, done_ltab, err; 191 int lnum, offs, len, alen, done_lsave, done_ltab, err;
194 struct ubifs_cnode *cnode; 192 struct ubifs_cnode *cnode;
195 193
194 err = dbg_chk_lpt_sz(c, 0, 0);
195 if (err)
196 return err;
196 cnode = c->lpt_cnext; 197 cnode = c->lpt_cnext;
197 if (!cnode) 198 if (!cnode)
198 return 0; 199 return 0;
@@ -206,6 +207,7 @@ static int layout_cnodes(struct ubifs_info *c)
206 c->lsave_lnum = lnum; 207 c->lsave_lnum = lnum;
207 c->lsave_offs = offs; 208 c->lsave_offs = offs;
208 offs += c->lsave_sz; 209 offs += c->lsave_sz;
210 dbg_chk_lpt_sz(c, 1, c->lsave_sz);
209 } 211 }
210 212
211 if (offs + c->ltab_sz <= c->leb_size) { 213 if (offs + c->ltab_sz <= c->leb_size) {
@@ -213,6 +215,7 @@ static int layout_cnodes(struct ubifs_info *c)
213 c->ltab_lnum = lnum; 215 c->ltab_lnum = lnum;
214 c->ltab_offs = offs; 216 c->ltab_offs = offs;
215 offs += c->ltab_sz; 217 offs += c->ltab_sz;
218 dbg_chk_lpt_sz(c, 1, c->ltab_sz);
216 } 219 }
217 220
218 do { 221 do {
@@ -226,9 +229,10 @@ static int layout_cnodes(struct ubifs_info *c)
226 while (offs + len > c->leb_size) { 229 while (offs + len > c->leb_size) {
227 alen = ALIGN(offs, c->min_io_size); 230 alen = ALIGN(offs, c->min_io_size);
228 upd_ltab(c, lnum, c->leb_size - alen, alen - offs); 231 upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
232 dbg_chk_lpt_sz(c, 2, alen - offs);
229 err = alloc_lpt_leb(c, &lnum); 233 err = alloc_lpt_leb(c, &lnum);
230 if (err) 234 if (err)
231 return err; 235 goto no_space;
232 offs = 0; 236 offs = 0;
233 ubifs_assert(lnum >= c->lpt_first && 237 ubifs_assert(lnum >= c->lpt_first &&
234 lnum <= c->lpt_last); 238 lnum <= c->lpt_last);
@@ -238,6 +242,7 @@ static int layout_cnodes(struct ubifs_info *c)
238 c->lsave_lnum = lnum; 242 c->lsave_lnum = lnum;
239 c->lsave_offs = offs; 243 c->lsave_offs = offs;
240 offs += c->lsave_sz; 244 offs += c->lsave_sz;
245 dbg_chk_lpt_sz(c, 1, c->lsave_sz);
241 continue; 246 continue;
242 } 247 }
243 if (!done_ltab) { 248 if (!done_ltab) {
@@ -245,6 +250,7 @@ static int layout_cnodes(struct ubifs_info *c)
245 c->ltab_lnum = lnum; 250 c->ltab_lnum = lnum;
246 c->ltab_offs = offs; 251 c->ltab_offs = offs;
247 offs += c->ltab_sz; 252 offs += c->ltab_sz;
253 dbg_chk_lpt_sz(c, 1, c->ltab_sz);
248 continue; 254 continue;
249 } 255 }
250 break; 256 break;
@@ -257,6 +263,7 @@ static int layout_cnodes(struct ubifs_info *c)
257 c->lpt_offs = offs; 263 c->lpt_offs = offs;
258 } 264 }
259 offs += len; 265 offs += len;
266 dbg_chk_lpt_sz(c, 1, len);
260 cnode = cnode->cnext; 267 cnode = cnode->cnext;
261 } while (cnode && cnode != c->lpt_cnext); 268 } while (cnode && cnode != c->lpt_cnext);
262 269
@@ -265,9 +272,10 @@ static int layout_cnodes(struct ubifs_info *c)
265 if (offs + c->lsave_sz > c->leb_size) { 272 if (offs + c->lsave_sz > c->leb_size) {
266 alen = ALIGN(offs, c->min_io_size); 273 alen = ALIGN(offs, c->min_io_size);
267 upd_ltab(c, lnum, c->leb_size - alen, alen - offs); 274 upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
275 dbg_chk_lpt_sz(c, 2, alen - offs);
268 err = alloc_lpt_leb(c, &lnum); 276 err = alloc_lpt_leb(c, &lnum);
269 if (err) 277 if (err)
270 return err; 278 goto no_space;
271 offs = 0; 279 offs = 0;
272 ubifs_assert(lnum >= c->lpt_first && 280 ubifs_assert(lnum >= c->lpt_first &&
273 lnum <= c->lpt_last); 281 lnum <= c->lpt_last);
@@ -276,6 +284,7 @@ static int layout_cnodes(struct ubifs_info *c)
276 c->lsave_lnum = lnum; 284 c->lsave_lnum = lnum;
277 c->lsave_offs = offs; 285 c->lsave_offs = offs;
278 offs += c->lsave_sz; 286 offs += c->lsave_sz;
287 dbg_chk_lpt_sz(c, 1, c->lsave_sz);
279 } 288 }
280 289
281 /* Make sure to place LPT's own lprops table */ 290 /* Make sure to place LPT's own lprops table */
@@ -283,9 +292,10 @@ static int layout_cnodes(struct ubifs_info *c)
283 if (offs + c->ltab_sz > c->leb_size) { 292 if (offs + c->ltab_sz > c->leb_size) {
284 alen = ALIGN(offs, c->min_io_size); 293 alen = ALIGN(offs, c->min_io_size);
285 upd_ltab(c, lnum, c->leb_size - alen, alen - offs); 294 upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
295 dbg_chk_lpt_sz(c, 2, alen - offs);
286 err = alloc_lpt_leb(c, &lnum); 296 err = alloc_lpt_leb(c, &lnum);
287 if (err) 297 if (err)
288 return err; 298 goto no_space;
289 offs = 0; 299 offs = 0;
290 ubifs_assert(lnum >= c->lpt_first && 300 ubifs_assert(lnum >= c->lpt_first &&
291 lnum <= c->lpt_last); 301 lnum <= c->lpt_last);
@@ -294,11 +304,23 @@ static int layout_cnodes(struct ubifs_info *c)
294 c->ltab_lnum = lnum; 304 c->ltab_lnum = lnum;
295 c->ltab_offs = offs; 305 c->ltab_offs = offs;
296 offs += c->ltab_sz; 306 offs += c->ltab_sz;
307 dbg_chk_lpt_sz(c, 1, c->ltab_sz);
297 } 308 }
298 309
299 alen = ALIGN(offs, c->min_io_size); 310 alen = ALIGN(offs, c->min_io_size);
300 upd_ltab(c, lnum, c->leb_size - alen, alen - offs); 311 upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
312 dbg_chk_lpt_sz(c, 4, alen - offs);
313 err = dbg_chk_lpt_sz(c, 3, alen);
314 if (err)
315 return err;
301 return 0; 316 return 0;
317
318no_space:
319 ubifs_err("LPT out of space");
320 dbg_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, "
321 "done_lsave %d", lnum, offs, len, done_ltab, done_lsave);
322 dbg_dump_lpt_info(c);
323 return err;
302} 324}
303 325
304/** 326/**
@@ -333,8 +355,6 @@ static int realloc_lpt_leb(struct ubifs_info *c, int *lnum)
333 *lnum = i + c->lpt_first; 355 *lnum = i + c->lpt_first;
334 return 0; 356 return 0;
335 } 357 }
336 dbg_err("last LEB %d", *lnum);
337 dump_stack();
338 return -ENOSPC; 358 return -ENOSPC;
339} 359}
340 360
@@ -369,12 +389,14 @@ static int write_cnodes(struct ubifs_info *c)
369 done_lsave = 1; 389 done_lsave = 1;
370 ubifs_pack_lsave(c, buf + offs, c->lsave); 390 ubifs_pack_lsave(c, buf + offs, c->lsave);
371 offs += c->lsave_sz; 391 offs += c->lsave_sz;
392 dbg_chk_lpt_sz(c, 1, c->lsave_sz);
372 } 393 }
373 394
374 if (offs + c->ltab_sz <= c->leb_size) { 395 if (offs + c->ltab_sz <= c->leb_size) {
375 done_ltab = 1; 396 done_ltab = 1;
376 ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); 397 ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
377 offs += c->ltab_sz; 398 offs += c->ltab_sz;
399 dbg_chk_lpt_sz(c, 1, c->ltab_sz);
378 } 400 }
379 401
380 /* Loop for each cnode */ 402 /* Loop for each cnode */
@@ -392,10 +414,12 @@ static int write_cnodes(struct ubifs_info *c)
392 alen, UBI_SHORTTERM); 414 alen, UBI_SHORTTERM);
393 if (err) 415 if (err)
394 return err; 416 return err;
417 dbg_chk_lpt_sz(c, 4, alen - wlen);
395 } 418 }
419 dbg_chk_lpt_sz(c, 2, 0);
396 err = realloc_lpt_leb(c, &lnum); 420 err = realloc_lpt_leb(c, &lnum);
397 if (err) 421 if (err)
398 return err; 422 goto no_space;
399 offs = 0; 423 offs = 0;
400 from = 0; 424 from = 0;
401 ubifs_assert(lnum >= c->lpt_first && 425 ubifs_assert(lnum >= c->lpt_first &&
@@ -408,12 +432,14 @@ static int write_cnodes(struct ubifs_info *c)
408 done_lsave = 1; 432 done_lsave = 1;
409 ubifs_pack_lsave(c, buf + offs, c->lsave); 433 ubifs_pack_lsave(c, buf + offs, c->lsave);
410 offs += c->lsave_sz; 434 offs += c->lsave_sz;
435 dbg_chk_lpt_sz(c, 1, c->lsave_sz);
411 continue; 436 continue;
412 } 437 }
413 if (!done_ltab) { 438 if (!done_ltab) {
414 done_ltab = 1; 439 done_ltab = 1;
415 ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); 440 ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
416 offs += c->ltab_sz; 441 offs += c->ltab_sz;
442 dbg_chk_lpt_sz(c, 1, c->ltab_sz);
417 continue; 443 continue;
418 } 444 }
419 break; 445 break;
@@ -435,6 +461,7 @@ static int write_cnodes(struct ubifs_info *c)
435 clear_bit(COW_ZNODE, &cnode->flags); 461 clear_bit(COW_ZNODE, &cnode->flags);
436 smp_mb__after_clear_bit(); 462 smp_mb__after_clear_bit();
437 offs += len; 463 offs += len;
464 dbg_chk_lpt_sz(c, 1, len);
438 cnode = cnode->cnext; 465 cnode = cnode->cnext;
439 } while (cnode && cnode != c->lpt_cnext); 466 } while (cnode && cnode != c->lpt_cnext);
440 467
@@ -448,9 +475,10 @@ static int write_cnodes(struct ubifs_info *c)
448 UBI_SHORTTERM); 475 UBI_SHORTTERM);
449 if (err) 476 if (err)
450 return err; 477 return err;
478 dbg_chk_lpt_sz(c, 2, alen - wlen);
451 err = realloc_lpt_leb(c, &lnum); 479 err = realloc_lpt_leb(c, &lnum);
452 if (err) 480 if (err)
453 return err; 481 goto no_space;
454 offs = 0; 482 offs = 0;
455 ubifs_assert(lnum >= c->lpt_first && 483 ubifs_assert(lnum >= c->lpt_first &&
456 lnum <= c->lpt_last); 484 lnum <= c->lpt_last);
@@ -461,6 +489,7 @@ static int write_cnodes(struct ubifs_info *c)
461 done_lsave = 1; 489 done_lsave = 1;
462 ubifs_pack_lsave(c, buf + offs, c->lsave); 490 ubifs_pack_lsave(c, buf + offs, c->lsave);
463 offs += c->lsave_sz; 491 offs += c->lsave_sz;
492 dbg_chk_lpt_sz(c, 1, c->lsave_sz);
464 } 493 }
465 494
466 /* Make sure to place LPT's own lprops table */ 495 /* Make sure to place LPT's own lprops table */
@@ -473,9 +502,10 @@ static int write_cnodes(struct ubifs_info *c)
473 UBI_SHORTTERM); 502 UBI_SHORTTERM);
474 if (err) 503 if (err)
475 return err; 504 return err;
505 dbg_chk_lpt_sz(c, 2, alen - wlen);
476 err = realloc_lpt_leb(c, &lnum); 506 err = realloc_lpt_leb(c, &lnum);
477 if (err) 507 if (err)
478 return err; 508 goto no_space;
479 offs = 0; 509 offs = 0;
480 ubifs_assert(lnum >= c->lpt_first && 510 ubifs_assert(lnum >= c->lpt_first &&
481 lnum <= c->lpt_last); 511 lnum <= c->lpt_last);
@@ -486,6 +516,7 @@ static int write_cnodes(struct ubifs_info *c)
486 done_ltab = 1; 516 done_ltab = 1;
487 ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); 517 ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
488 offs += c->ltab_sz; 518 offs += c->ltab_sz;
519 dbg_chk_lpt_sz(c, 1, c->ltab_sz);
489 } 520 }
490 521
491 /* Write remaining data in buffer */ 522 /* Write remaining data in buffer */
@@ -495,6 +526,12 @@ static int write_cnodes(struct ubifs_info *c)
495 err = ubifs_leb_write(c, lnum, buf + from, from, alen, UBI_SHORTTERM); 526 err = ubifs_leb_write(c, lnum, buf + from, from, alen, UBI_SHORTTERM);
496 if (err) 527 if (err)
497 return err; 528 return err;
529
530 dbg_chk_lpt_sz(c, 4, alen - wlen);
531 err = dbg_chk_lpt_sz(c, 3, ALIGN(offs, c->min_io_size));
532 if (err)
533 return err;
534
498 c->nhead_lnum = lnum; 535 c->nhead_lnum = lnum;
499 c->nhead_offs = ALIGN(offs, c->min_io_size); 536 c->nhead_offs = ALIGN(offs, c->min_io_size);
500 537
@@ -503,7 +540,15 @@ static int write_cnodes(struct ubifs_info *c)
503 dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); 540 dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs);
504 if (c->big_lpt) 541 if (c->big_lpt)
505 dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); 542 dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs);
543
506 return 0; 544 return 0;
545
546no_space:
547 ubifs_err("LPT out of space mismatch");
548 dbg_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab "
549 "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave);
550 dbg_dump_lpt_info(c);
551 return err;
507} 552}
508 553
509/** 554/**
@@ -1044,6 +1089,8 @@ static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len)
1044 int pos = 0, node_type, node_len; 1089 int pos = 0, node_type, node_len;
1045 uint16_t crc, calc_crc; 1090 uint16_t crc, calc_crc;
1046 1091
1092 if (len < UBIFS_LPT_CRC_BYTES + (UBIFS_LPT_TYPE_BITS + 7) / 8)
1093 return 0;
1047 node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS); 1094 node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS);
1048 if (node_type == UBIFS_LPT_NOT_A_NODE) 1095 if (node_type == UBIFS_LPT_NOT_A_NODE)
1049 return 0; 1096 return 0;
@@ -1156,6 +1203,9 @@ int ubifs_lpt_start_commit(struct ubifs_info *c)
1156 dbg_lp(""); 1203 dbg_lp("");
1157 1204
1158 mutex_lock(&c->lp_mutex); 1205 mutex_lock(&c->lp_mutex);
1206 err = dbg_chk_lpt_free_spc(c);
1207 if (err)
1208 goto out;
1159 err = dbg_check_ltab(c); 1209 err = dbg_check_ltab(c);
1160 if (err) 1210 if (err)
1161 goto out; 1211 goto out;
@@ -1645,4 +1695,121 @@ int dbg_check_ltab(struct ubifs_info *c)
1645 return 0; 1695 return 0;
1646} 1696}
1647 1697
1698/**
1699 * dbg_chk_lpt_free_spc - check LPT free space is enough to write entire LPT.
1700 * @c: the UBIFS file-system description object
1701 *
1702 * This function returns %0 on success and a negative error code on failure.
1703 */
1704int dbg_chk_lpt_free_spc(struct ubifs_info *c)
1705{
1706 long long free = 0;
1707 int i;
1708
1709 for (i = 0; i < c->lpt_lebs; i++) {
1710 if (c->ltab[i].tgc || c->ltab[i].cmt)
1711 continue;
1712 if (i + c->lpt_first == c->nhead_lnum)
1713 free += c->leb_size - c->nhead_offs;
1714 else if (c->ltab[i].free == c->leb_size)
1715 free += c->leb_size;
1716 }
1717 if (free < c->lpt_sz) {
1718 dbg_err("LPT space error: free %lld lpt_sz %lld",
1719 free, c->lpt_sz);
1720 dbg_dump_lpt_info(c);
1721 return -EINVAL;
1722 }
1723 return 0;
1724}
1725
1726/**
1727 * dbg_chk_lpt_sz - check LPT does not write more than LPT size.
1728 * @c: the UBIFS file-system description object
1729 * @action: action
1730 * @len: length written
1731 *
1732 * This function returns %0 on success and a negative error code on failure.
1733 */
1734int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
1735{
1736 long long chk_lpt_sz, lpt_sz;
1737 int err = 0;
1738
1739 switch (action) {
1740 case 0:
1741 c->chk_lpt_sz = 0;
1742 c->chk_lpt_sz2 = 0;
1743 c->chk_lpt_lebs = 0;
1744 c->chk_lpt_wastage = 0;
1745 if (c->dirty_pn_cnt > c->pnode_cnt) {
1746 dbg_err("dirty pnodes %d exceed max %d",
1747 c->dirty_pn_cnt, c->pnode_cnt);
1748 err = -EINVAL;
1749 }
1750 if (c->dirty_nn_cnt > c->nnode_cnt) {
1751 dbg_err("dirty nnodes %d exceed max %d",
1752 c->dirty_nn_cnt, c->nnode_cnt);
1753 err = -EINVAL;
1754 }
1755 return err;
1756 case 1:
1757 c->chk_lpt_sz += len;
1758 return 0;
1759 case 2:
1760 c->chk_lpt_sz += len;
1761 c->chk_lpt_wastage += len;
1762 c->chk_lpt_lebs += 1;
1763 return 0;
1764 case 3:
1765 chk_lpt_sz = c->leb_size;
1766 chk_lpt_sz *= c->chk_lpt_lebs;
1767 chk_lpt_sz += len - c->nhead_offs;
1768 if (c->chk_lpt_sz != chk_lpt_sz) {
1769 dbg_err("LPT wrote %lld but space used was %lld",
1770 c->chk_lpt_sz, chk_lpt_sz);
1771 err = -EINVAL;
1772 }
1773 if (c->chk_lpt_sz > c->lpt_sz) {
1774 dbg_err("LPT wrote %lld but lpt_sz is %lld",
1775 c->chk_lpt_sz, c->lpt_sz);
1776 err = -EINVAL;
1777 }
1778 if (c->chk_lpt_sz2 && c->chk_lpt_sz != c->chk_lpt_sz2) {
1779 dbg_err("LPT layout size %lld but wrote %lld",
1780 c->chk_lpt_sz, c->chk_lpt_sz2);
1781 err = -EINVAL;
1782 }
1783 if (c->chk_lpt_sz2 && c->new_nhead_offs != len) {
1784 dbg_err("LPT new nhead offs: expected %d was %d",
1785 c->new_nhead_offs, len);
1786 err = -EINVAL;
1787 }
1788 lpt_sz = (long long)c->pnode_cnt * c->pnode_sz;
1789 lpt_sz += (long long)c->nnode_cnt * c->nnode_sz;
1790 lpt_sz += c->ltab_sz;
1791 if (c->big_lpt)
1792 lpt_sz += c->lsave_sz;
1793 if (c->chk_lpt_sz - c->chk_lpt_wastage > lpt_sz) {
1794 dbg_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld",
1795 c->chk_lpt_sz, c->chk_lpt_wastage, lpt_sz);
1796 err = -EINVAL;
1797 }
1798 if (err)
1799 dbg_dump_lpt_info(c);
1800 c->chk_lpt_sz2 = c->chk_lpt_sz;
1801 c->chk_lpt_sz = 0;
1802 c->chk_lpt_wastage = 0;
1803 c->chk_lpt_lebs = 0;
1804 c->new_nhead_offs = len;
1805 return err;
1806 case 4:
1807 c->chk_lpt_sz += len;
1808 c->chk_lpt_wastage += len;
1809 return 0;
1810 default:
1811 return -EINVAL;
1812 }
1813}
1814
1648#endif /* CONFIG_UBIFS_FS_DEBUG */ 1815#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 4c12a9215d7f..4fa81d867e41 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -310,4 +310,31 @@ static inline int ubifs_tnc_lookup(struct ubifs_info *c,
310 return ubifs_tnc_locate(c, key, node, NULL, NULL); 310 return ubifs_tnc_locate(c, key, node, NULL, NULL);
311} 311}
312 312
313/**
314 * ubifs_get_lprops - get reference to LEB properties.
315 * @c: the UBIFS file-system description object
316 *
317 * This function locks lprops. Lprops have to be unlocked by
318 * 'ubifs_release_lprops()'.
319 */
320static inline void ubifs_get_lprops(struct ubifs_info *c)
321{
322 mutex_lock(&c->lp_mutex);
323}
324
325/**
326 * ubifs_release_lprops - release lprops lock.
327 * @c: the UBIFS file-system description object
328 *
329 * This function has to be called after each 'ubifs_get_lprops()' call to
330 * unlock lprops.
331 */
332static inline void ubifs_release_lprops(struct ubifs_info *c)
333{
334 ubifs_assert(mutex_is_locked(&c->lp_mutex));
335 ubifs_assert(c->lst.empty_lebs >= 0 &&
336 c->lst.empty_lebs <= c->main_lebs);
337 mutex_unlock(&c->lp_mutex);
338}
339
313#endif /* __UBIFS_MISC_H__ */ 340#endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index acf5c5fffc60..0ed82479b44b 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -87,7 +87,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
87 87
88 dbg_scan("scanning %s", dbg_ntype(ch->node_type)); 88 dbg_scan("scanning %s", dbg_ntype(ch->node_type));
89 89
90 if (ubifs_check_node(c, buf, lnum, offs, quiet)) 90 if (ubifs_check_node(c, buf, lnum, offs, quiet, 1))
91 return SCANNED_A_CORRUPT_NODE; 91 return SCANNED_A_CORRUPT_NODE;
92 92
93 if (ch->node_type == UBIFS_PAD_NODE) { 93 if (ch->node_type == UBIFS_PAD_NODE) {
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 9a9220333b3b..8780efbf40ac 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -401,6 +401,16 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt)
401 else if (c->mount_opts.unmount_mode == 1) 401 else if (c->mount_opts.unmount_mode == 1)
402 seq_printf(s, ",norm_unmount"); 402 seq_printf(s, ",norm_unmount");
403 403
404 if (c->mount_opts.bulk_read == 2)
405 seq_printf(s, ",bulk_read");
406 else if (c->mount_opts.bulk_read == 1)
407 seq_printf(s, ",no_bulk_read");
408
409 if (c->mount_opts.chk_data_crc == 2)
410 seq_printf(s, ",chk_data_crc");
411 else if (c->mount_opts.chk_data_crc == 1)
412 seq_printf(s, ",no_chk_data_crc");
413
404 return 0; 414 return 0;
405} 415}
406 416
@@ -408,13 +418,26 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
408{ 418{
409 struct ubifs_info *c = sb->s_fs_info; 419 struct ubifs_info *c = sb->s_fs_info;
410 int i, ret = 0, err; 420 int i, ret = 0, err;
421 long long bud_bytes;
411 422
412 if (c->jheads) 423 if (c->jheads) {
413 for (i = 0; i < c->jhead_cnt; i++) { 424 for (i = 0; i < c->jhead_cnt; i++) {
414 err = ubifs_wbuf_sync(&c->jheads[i].wbuf); 425 err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
415 if (err && !ret) 426 if (err && !ret)
416 ret = err; 427 ret = err;
417 } 428 }
429
430 /* Commit the journal unless it has too little data */
431 spin_lock(&c->buds_lock);
432 bud_bytes = c->bud_bytes;
433 spin_unlock(&c->buds_lock);
434 if (bud_bytes > c->leb_size) {
435 err = ubifs_run_commit(c);
436 if (err)
437 return err;
438 }
439 }
440
418 /* 441 /*
419 * We ought to call sync for c->ubi but it does not have one. If it had 442 * We ought to call sync for c->ubi but it does not have one. If it had
420 * it would in turn call mtd->sync, however mtd operations are 443 * it would in turn call mtd->sync, however mtd operations are
@@ -538,6 +561,18 @@ static int init_constants_early(struct ubifs_info *c)
538 * calculations when reporting free space. 561 * calculations when reporting free space.
539 */ 562 */
540 c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; 563 c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ;
564 /* Buffer size for bulk-reads */
565 c->bulk_read_buf_size = UBIFS_MAX_BULK_READ * UBIFS_MAX_DATA_NODE_SZ;
566 if (c->bulk_read_buf_size > c->leb_size)
567 c->bulk_read_buf_size = c->leb_size;
568 if (c->bulk_read_buf_size > 128 * 1024) {
569 /* Check if we can kmalloc more than 128KiB */
570 void *try = kmalloc(c->bulk_read_buf_size, GFP_KERNEL);
571
572 kfree(try);
573 if (!try)
574 c->bulk_read_buf_size = 128 * 1024;
575 }
541 return 0; 576 return 0;
542} 577}
543 578
@@ -840,17 +875,29 @@ static int check_volume_empty(struct ubifs_info *c)
840 * 875 *
841 * Opt_fast_unmount: do not run a journal commit before un-mounting 876 * Opt_fast_unmount: do not run a journal commit before un-mounting
842 * Opt_norm_unmount: run a journal commit before un-mounting 877 * Opt_norm_unmount: run a journal commit before un-mounting
878 * Opt_bulk_read: enable bulk-reads
879 * Opt_no_bulk_read: disable bulk-reads
880 * Opt_chk_data_crc: check CRCs when reading data nodes
881 * Opt_no_chk_data_crc: do not check CRCs when reading data nodes
843 * Opt_err: just end of array marker 882 * Opt_err: just end of array marker
844 */ 883 */
845enum { 884enum {
846 Opt_fast_unmount, 885 Opt_fast_unmount,
847 Opt_norm_unmount, 886 Opt_norm_unmount,
887 Opt_bulk_read,
888 Opt_no_bulk_read,
889 Opt_chk_data_crc,
890 Opt_no_chk_data_crc,
848 Opt_err, 891 Opt_err,
849}; 892};
850 893
851static const match_table_t tokens = { 894static const match_table_t tokens = {
852 {Opt_fast_unmount, "fast_unmount"}, 895 {Opt_fast_unmount, "fast_unmount"},
853 {Opt_norm_unmount, "norm_unmount"}, 896 {Opt_norm_unmount, "norm_unmount"},
897 {Opt_bulk_read, "bulk_read"},
898 {Opt_no_bulk_read, "no_bulk_read"},
899 {Opt_chk_data_crc, "chk_data_crc"},
900 {Opt_no_chk_data_crc, "no_chk_data_crc"},
854 {Opt_err, NULL}, 901 {Opt_err, NULL},
855}; 902};
856 903
@@ -888,6 +935,22 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
888 c->mount_opts.unmount_mode = 1; 935 c->mount_opts.unmount_mode = 1;
889 c->fast_unmount = 0; 936 c->fast_unmount = 0;
890 break; 937 break;
938 case Opt_bulk_read:
939 c->mount_opts.bulk_read = 2;
940 c->bulk_read = 1;
941 break;
942 case Opt_no_bulk_read:
943 c->mount_opts.bulk_read = 1;
944 c->bulk_read = 0;
945 break;
946 case Opt_chk_data_crc:
947 c->mount_opts.chk_data_crc = 2;
948 c->no_chk_data_crc = 0;
949 break;
950 case Opt_no_chk_data_crc:
951 c->mount_opts.chk_data_crc = 1;
952 c->no_chk_data_crc = 1;
953 break;
891 default: 954 default:
892 ubifs_err("unrecognized mount option \"%s\" " 955 ubifs_err("unrecognized mount option \"%s\" "
893 "or missing value", p); 956 "or missing value", p);
@@ -996,6 +1059,8 @@ static int mount_ubifs(struct ubifs_info *c)
996 goto out_free; 1059 goto out_free;
997 } 1060 }
998 1061
1062 c->always_chk_crc = 1;
1063
999 err = ubifs_read_superblock(c); 1064 err = ubifs_read_superblock(c);
1000 if (err) 1065 if (err)
1001 goto out_free; 1066 goto out_free;
@@ -1032,8 +1097,6 @@ static int mount_ubifs(struct ubifs_info *c)
1032 1097
1033 /* Create background thread */ 1098 /* Create background thread */
1034 c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); 1099 c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
1035 if (!c->bgt)
1036 c->bgt = ERR_PTR(-EINVAL);
1037 if (IS_ERR(c->bgt)) { 1100 if (IS_ERR(c->bgt)) {
1038 err = PTR_ERR(c->bgt); 1101 err = PTR_ERR(c->bgt);
1039 c->bgt = NULL; 1102 c->bgt = NULL;
@@ -1139,24 +1202,28 @@ static int mount_ubifs(struct ubifs_info *c)
1139 if (err) 1202 if (err)
1140 goto out_infos; 1203 goto out_infos;
1141 1204
1205 c->always_chk_crc = 0;
1206
1142 ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", 1207 ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"",
1143 c->vi.ubi_num, c->vi.vol_id, c->vi.name); 1208 c->vi.ubi_num, c->vi.vol_id, c->vi.name);
1144 if (mounted_read_only) 1209 if (mounted_read_only)
1145 ubifs_msg("mounted read-only"); 1210 ubifs_msg("mounted read-only");
1146 x = (long long)c->main_lebs * c->leb_size; 1211 x = (long long)c->main_lebs * c->leb_size;
1147 ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)", 1212 ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d "
1148 x, x >> 10, x >> 20, c->main_lebs); 1213 "LEBs)", x, x >> 10, x >> 20, c->main_lebs);
1149 x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; 1214 x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
1150 ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)", 1215 ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d "
1151 x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); 1216 "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt);
1152 ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); 1217 ubifs_msg("media format: %d (latest is %d)",
1153 ubifs_msg("media format %d, latest format %d",
1154 c->fmt_version, UBIFS_FORMAT_VERSION); 1218 c->fmt_version, UBIFS_FORMAT_VERSION);
1219 ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr));
1220 ubifs_msg("reserved for root: %llu bytes (%llu KiB)",
1221 c->report_rp_size, c->report_rp_size >> 10);
1155 1222
1156 dbg_msg("compiled on: " __DATE__ " at " __TIME__); 1223 dbg_msg("compiled on: " __DATE__ " at " __TIME__);
1157 dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); 1224 dbg_msg("min. I/O unit size: %d bytes", c->min_io_size);
1158 dbg_msg("LEB size: %d bytes (%d KiB)", 1225 dbg_msg("LEB size: %d bytes (%d KiB)",
1159 c->leb_size, c->leb_size / 1024); 1226 c->leb_size, c->leb_size >> 10);
1160 dbg_msg("data journal heads: %d", 1227 dbg_msg("data journal heads: %d",
1161 c->jhead_cnt - NONDATA_JHEADS_CNT); 1228 c->jhead_cnt - NONDATA_JHEADS_CNT);
1162 dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X" 1229 dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X"
@@ -1282,6 +1349,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1282 1349
1283 mutex_lock(&c->umount_mutex); 1350 mutex_lock(&c->umount_mutex);
1284 c->remounting_rw = 1; 1351 c->remounting_rw = 1;
1352 c->always_chk_crc = 1;
1285 1353
1286 /* Check for enough free space */ 1354 /* Check for enough free space */
1287 if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) { 1355 if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) {
@@ -1345,20 +1413,20 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1345 1413
1346 /* Create background thread */ 1414 /* Create background thread */
1347 c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); 1415 c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
1348 if (!c->bgt)
1349 c->bgt = ERR_PTR(-EINVAL);
1350 if (IS_ERR(c->bgt)) { 1416 if (IS_ERR(c->bgt)) {
1351 err = PTR_ERR(c->bgt); 1417 err = PTR_ERR(c->bgt);
1352 c->bgt = NULL; 1418 c->bgt = NULL;
1353 ubifs_err("cannot spawn \"%s\", error %d", 1419 ubifs_err("cannot spawn \"%s\", error %d",
1354 c->bgt_name, err); 1420 c->bgt_name, err);
1355 return err; 1421 goto out;
1356 } 1422 }
1357 wake_up_process(c->bgt); 1423 wake_up_process(c->bgt);
1358 1424
1359 c->orph_buf = vmalloc(c->leb_size); 1425 c->orph_buf = vmalloc(c->leb_size);
1360 if (!c->orph_buf) 1426 if (!c->orph_buf) {
1361 return -ENOMEM; 1427 err = -ENOMEM;
1428 goto out;
1429 }
1362 1430
1363 /* Check for enough log space */ 1431 /* Check for enough log space */
1364 lnum = c->lhead_lnum + 1; 1432 lnum = c->lhead_lnum + 1;
@@ -1385,6 +1453,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1385 dbg_gen("re-mounted read-write"); 1453 dbg_gen("re-mounted read-write");
1386 c->vfs_sb->s_flags &= ~MS_RDONLY; 1454 c->vfs_sb->s_flags &= ~MS_RDONLY;
1387 c->remounting_rw = 0; 1455 c->remounting_rw = 0;
1456 c->always_chk_crc = 0;
1388 mutex_unlock(&c->umount_mutex); 1457 mutex_unlock(&c->umount_mutex);
1389 return 0; 1458 return 0;
1390 1459
@@ -1400,6 +1469,7 @@ out:
1400 c->ileb_buf = NULL; 1469 c->ileb_buf = NULL;
1401 ubifs_lpt_free(c, 1); 1470 ubifs_lpt_free(c, 1);
1402 c->remounting_rw = 0; 1471 c->remounting_rw = 0;
1472 c->always_chk_crc = 0;
1403 mutex_unlock(&c->umount_mutex); 1473 mutex_unlock(&c->umount_mutex);
1404 return err; 1474 return err;
1405} 1475}
@@ -1408,12 +1478,9 @@ out:
1408 * commit_on_unmount - commit the journal when un-mounting. 1478 * commit_on_unmount - commit the journal when un-mounting.
1409 * @c: UBIFS file-system description object 1479 * @c: UBIFS file-system description object
1410 * 1480 *
1411 * This function is called during un-mounting and it commits the journal unless 1481 * This function is called during un-mounting and re-mounting, and it commits
1412 * the "fast unmount" mode is enabled. It also avoids committing the journal if 1482 * the journal unless the "fast unmount" mode is enabled. It also avoids
1413 * it contains too few data. 1483 * committing the journal if it contains too few data.
1414 *
1415 * Sometimes recovery requires the journal to be committed at least once, and
1416 * this function takes care about this.
1417 */ 1484 */
1418static void commit_on_unmount(struct ubifs_info *c) 1485static void commit_on_unmount(struct ubifs_info *c)
1419{ 1486{
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 7634c5970887..d27fd918b9c9 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -284,7 +284,7 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c,
284 } 284 }
285 285
286 zn = copy_znode(c, znode); 286 zn = copy_znode(c, znode);
287 if (unlikely(IS_ERR(zn))) 287 if (IS_ERR(zn))
288 return zn; 288 return zn;
289 289
290 if (zbr->len) { 290 if (zbr->len) {
@@ -470,6 +470,10 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
470 if (node_len != len) 470 if (node_len != len)
471 return 0; 471 return 0;
472 472
473 if (type == UBIFS_DATA_NODE && !c->always_chk_crc)
474 if (c->no_chk_data_crc)
475 return 0;
476
473 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); 477 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
474 node_crc = le32_to_cpu(ch->crc); 478 node_crc = le32_to_cpu(ch->crc);
475 if (crc != node_crc) 479 if (crc != node_crc)
@@ -1128,7 +1132,7 @@ static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c,
1128 ubifs_assert(znode == c->zroot.znode); 1132 ubifs_assert(znode == c->zroot.znode);
1129 znode = dirty_cow_znode(c, &c->zroot); 1133 znode = dirty_cow_znode(c, &c->zroot);
1130 } 1134 }
1131 if (unlikely(IS_ERR(znode)) || !p) 1135 if (IS_ERR(znode) || !p)
1132 break; 1136 break;
1133 ubifs_assert(path[p - 1] >= 0); 1137 ubifs_assert(path[p - 1] >= 0);
1134 ubifs_assert(path[p - 1] < znode->child_cnt); 1138 ubifs_assert(path[p - 1] < znode->child_cnt);
@@ -1492,6 +1496,289 @@ out:
1492} 1496}
1493 1497
1494/** 1498/**
1499 * ubifs_tnc_get_bu_keys - lookup keys for bulk-read.
1500 * @c: UBIFS file-system description object
1501 * @bu: bulk-read parameters and results
1502 *
1503 * Lookup consecutive data node keys for the same inode that reside
1504 * consecutively in the same LEB.
1505 */
1506int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu)
1507{
1508 int n, err = 0, lnum = -1, uninitialized_var(offs);
1509 int uninitialized_var(len);
1510 unsigned int block = key_block(c, &bu->key);
1511 struct ubifs_znode *znode;
1512
1513 bu->cnt = 0;
1514 bu->blk_cnt = 0;
1515 bu->eof = 0;
1516
1517 mutex_lock(&c->tnc_mutex);
1518 /* Find first key */
1519 err = ubifs_lookup_level0(c, &bu->key, &znode, &n);
1520 if (err < 0)
1521 goto out;
1522 if (err) {
1523 /* Key found */
1524 len = znode->zbranch[n].len;
1525 /* The buffer must be big enough for at least 1 node */
1526 if (len > bu->buf_len) {
1527 err = -EINVAL;
1528 goto out;
1529 }
1530 /* Add this key */
1531 bu->zbranch[bu->cnt++] = znode->zbranch[n];
1532 bu->blk_cnt += 1;
1533 lnum = znode->zbranch[n].lnum;
1534 offs = ALIGN(znode->zbranch[n].offs + len, 8);
1535 }
1536 while (1) {
1537 struct ubifs_zbranch *zbr;
1538 union ubifs_key *key;
1539 unsigned int next_block;
1540
1541 /* Find next key */
1542 err = tnc_next(c, &znode, &n);
1543 if (err)
1544 goto out;
1545 zbr = &znode->zbranch[n];
1546 key = &zbr->key;
1547 /* See if there is another data key for this file */
1548 if (key_inum(c, key) != key_inum(c, &bu->key) ||
1549 key_type(c, key) != UBIFS_DATA_KEY) {
1550 err = -ENOENT;
1551 goto out;
1552 }
1553 if (lnum < 0) {
1554 /* First key found */
1555 lnum = zbr->lnum;
1556 offs = ALIGN(zbr->offs + zbr->len, 8);
1557 len = zbr->len;
1558 if (len > bu->buf_len) {
1559 err = -EINVAL;
1560 goto out;
1561 }
1562 } else {
1563 /*
1564 * The data nodes must be in consecutive positions in
1565 * the same LEB.
1566 */
1567 if (zbr->lnum != lnum || zbr->offs != offs)
1568 goto out;
1569 offs += ALIGN(zbr->len, 8);
1570 len = ALIGN(len, 8) + zbr->len;
1571 /* Must not exceed buffer length */
1572 if (len > bu->buf_len)
1573 goto out;
1574 }
1575 /* Allow for holes */
1576 next_block = key_block(c, key);
1577 bu->blk_cnt += (next_block - block - 1);
1578 if (bu->blk_cnt >= UBIFS_MAX_BULK_READ)
1579 goto out;
1580 block = next_block;
1581 /* Add this key */
1582 bu->zbranch[bu->cnt++] = *zbr;
1583 bu->blk_cnt += 1;
1584 /* See if we have room for more */
1585 if (bu->cnt >= UBIFS_MAX_BULK_READ)
1586 goto out;
1587 if (bu->blk_cnt >= UBIFS_MAX_BULK_READ)
1588 goto out;
1589 }
1590out:
1591 if (err == -ENOENT) {
1592 bu->eof = 1;
1593 err = 0;
1594 }
1595 bu->gc_seq = c->gc_seq;
1596 mutex_unlock(&c->tnc_mutex);
1597 if (err)
1598 return err;
1599 /*
1600 * An enormous hole could cause bulk-read to encompass too many
1601 * page cache pages, so limit the number here.
1602 */
1603 if (bu->blk_cnt > UBIFS_MAX_BULK_READ)
1604 bu->blk_cnt = UBIFS_MAX_BULK_READ;
1605 /*
1606 * Ensure that bulk-read covers a whole number of page cache
1607 * pages.
1608 */
1609 if (UBIFS_BLOCKS_PER_PAGE == 1 ||
1610 !(bu->blk_cnt & (UBIFS_BLOCKS_PER_PAGE - 1)))
1611 return 0;
1612 if (bu->eof) {
1613 /* At the end of file we can round up */
1614 bu->blk_cnt += UBIFS_BLOCKS_PER_PAGE - 1;
1615 return 0;
1616 }
1617 /* Exclude data nodes that do not make up a whole page cache page */
1618 block = key_block(c, &bu->key) + bu->blk_cnt;
1619 block &= ~(UBIFS_BLOCKS_PER_PAGE - 1);
1620 while (bu->cnt) {
1621 if (key_block(c, &bu->zbranch[bu->cnt - 1].key) < block)
1622 break;
1623 bu->cnt -= 1;
1624 }
1625 return 0;
1626}
1627
1628/**
1629 * read_wbuf - bulk-read from a LEB with a wbuf.
1630 * @wbuf: wbuf that may overlap the read
1631 * @buf: buffer into which to read
1632 * @len: read length
1633 * @lnum: LEB number from which to read
1634 * @offs: offset from which to read
1635 *
1636 * This functions returns %0 on success or a negative error code on failure.
1637 */
1638static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum,
1639 int offs)
1640{
1641 const struct ubifs_info *c = wbuf->c;
1642 int rlen, overlap;
1643
1644 dbg_io("LEB %d:%d, length %d", lnum, offs, len);
1645 ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
1646 ubifs_assert(!(offs & 7) && offs < c->leb_size);
1647 ubifs_assert(offs + len <= c->leb_size);
1648
1649 spin_lock(&wbuf->lock);
1650 overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs);
1651 if (!overlap) {
1652 /* We may safely unlock the write-buffer and read the data */
1653 spin_unlock(&wbuf->lock);
1654 return ubi_read(c->ubi, lnum, buf, offs, len);
1655 }
1656
1657 /* Don't read under wbuf */
1658 rlen = wbuf->offs - offs;
1659 if (rlen < 0)
1660 rlen = 0;
1661
1662 /* Copy the rest from the write-buffer */
1663 memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen);
1664 spin_unlock(&wbuf->lock);
1665
1666 if (rlen > 0)
1667 /* Read everything that goes before write-buffer */
1668 return ubi_read(c->ubi, lnum, buf, offs, rlen);
1669
1670 return 0;
1671}
1672
1673/**
1674 * validate_data_node - validate data nodes for bulk-read.
1675 * @c: UBIFS file-system description object
1676 * @buf: buffer containing data node to validate
1677 * @zbr: zbranch of data node to validate
1678 *
1679 * This functions returns %0 on success or a negative error code on failure.
1680 */
1681static int validate_data_node(struct ubifs_info *c, void *buf,
1682 struct ubifs_zbranch *zbr)
1683{
1684 union ubifs_key key1;
1685 struct ubifs_ch *ch = buf;
1686 int err, len;
1687
1688 if (ch->node_type != UBIFS_DATA_NODE) {
1689 ubifs_err("bad node type (%d but expected %d)",
1690 ch->node_type, UBIFS_DATA_NODE);
1691 goto out_err;
1692 }
1693
1694 err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0);
1695 if (err) {
1696 ubifs_err("expected node type %d", UBIFS_DATA_NODE);
1697 goto out;
1698 }
1699
1700 len = le32_to_cpu(ch->len);
1701 if (len != zbr->len) {
1702 ubifs_err("bad node length %d, expected %d", len, zbr->len);
1703 goto out_err;
1704 }
1705
1706 /* Make sure the key of the read node is correct */
1707 key_read(c, buf + UBIFS_KEY_OFFSET, &key1);
1708 if (!keys_eq(c, &zbr->key, &key1)) {
1709 ubifs_err("bad key in node at LEB %d:%d",
1710 zbr->lnum, zbr->offs);
1711 dbg_tnc("looked for key %s found node's key %s",
1712 DBGKEY(&zbr->key), DBGKEY1(&key1));
1713 goto out_err;
1714 }
1715
1716 return 0;
1717
1718out_err:
1719 err = -EINVAL;
1720out:
1721 ubifs_err("bad node at LEB %d:%d", zbr->lnum, zbr->offs);
1722 dbg_dump_node(c, buf);
1723 dbg_dump_stack();
1724 return err;
1725}
1726
1727/**
1728 * ubifs_tnc_bulk_read - read a number of data nodes in one go.
1729 * @c: UBIFS file-system description object
1730 * @bu: bulk-read parameters and results
1731 *
1732 * This functions reads and validates the data nodes that were identified by the
1733 * 'ubifs_tnc_get_bu_keys()' function. This functions returns %0 on success,
1734 * -EAGAIN to indicate a race with GC, or another negative error code on
1735 * failure.
1736 */
1737int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu)
1738{
1739 int lnum = bu->zbranch[0].lnum, offs = bu->zbranch[0].offs, len, err, i;
1740 struct ubifs_wbuf *wbuf;
1741 void *buf;
1742
1743 len = bu->zbranch[bu->cnt - 1].offs;
1744 len += bu->zbranch[bu->cnt - 1].len - offs;
1745 if (len > bu->buf_len) {
1746 ubifs_err("buffer too small %d vs %d", bu->buf_len, len);
1747 return -EINVAL;
1748 }
1749
1750 /* Do the read */
1751 wbuf = ubifs_get_wbuf(c, lnum);
1752 if (wbuf)
1753 err = read_wbuf(wbuf, bu->buf, len, lnum, offs);
1754 else
1755 err = ubi_read(c->ubi, lnum, bu->buf, offs, len);
1756
1757 /* Check for a race with GC */
1758 if (maybe_leb_gced(c, lnum, bu->gc_seq))
1759 return -EAGAIN;
1760
1761 if (err && err != -EBADMSG) {
1762 ubifs_err("failed to read from LEB %d:%d, error %d",
1763 lnum, offs, err);
1764 dbg_dump_stack();
1765 dbg_tnc("key %s", DBGKEY(&bu->key));
1766 return err;
1767 }
1768
1769 /* Validate the nodes read */
1770 buf = bu->buf;
1771 for (i = 0; i < bu->cnt; i++) {
1772 err = validate_data_node(c, buf, &bu->zbranch[i]);
1773 if (err)
1774 return err;
1775 buf = buf + ALIGN(bu->zbranch[i].len, 8);
1776 }
1777
1778 return 0;
1779}
1780
1781/**
1495 * do_lookup_nm- look up a "hashed" node. 1782 * do_lookup_nm- look up a "hashed" node.
1496 * @c: UBIFS file-system description object 1783 * @c: UBIFS file-system description object
1497 * @key: node key to lookup 1784 * @key: node key to lookup
@@ -1675,7 +1962,7 @@ static int tnc_insert(struct ubifs_info *c, struct ubifs_znode *znode,
1675{ 1962{
1676 struct ubifs_znode *zn, *zi, *zp; 1963 struct ubifs_znode *zn, *zi, *zp;
1677 int i, keep, move, appending = 0; 1964 int i, keep, move, appending = 0;
1678 union ubifs_key *key = &zbr->key; 1965 union ubifs_key *key = &zbr->key, *key1;
1679 1966
1680 ubifs_assert(n >= 0 && n <= c->fanout); 1967 ubifs_assert(n >= 0 && n <= c->fanout);
1681 1968
@@ -1716,20 +2003,33 @@ again:
1716 zn->level = znode->level; 2003 zn->level = znode->level;
1717 2004
1718 /* Decide where to split */ 2005 /* Decide where to split */
1719 if (znode->level == 0 && n == c->fanout && 2006 if (znode->level == 0 && key_type(c, key) == UBIFS_DATA_KEY) {
1720 key_type(c, key) == UBIFS_DATA_KEY) { 2007 /* Try not to split consecutive data keys */
1721 union ubifs_key *key1; 2008 if (n == c->fanout) {
1722 2009 key1 = &znode->zbranch[n - 1].key;
1723 /* 2010 if (key_inum(c, key1) == key_inum(c, key) &&
1724 * If this is an inode which is being appended - do not split 2011 key_type(c, key1) == UBIFS_DATA_KEY)
1725 * it because no other zbranches can be inserted between 2012 appending = 1;
1726 * zbranches of consecutive data nodes anyway. 2013 } else
1727 */ 2014 goto check_split;
1728 key1 = &znode->zbranch[n - 1].key; 2015 } else if (appending && n != c->fanout) {
1729 if (key_inum(c, key1) == key_inum(c, key) && 2016 /* Try not to split consecutive data keys */
1730 key_type(c, key1) == UBIFS_DATA_KEY && 2017 appending = 0;
1731 key_block(c, key1) == key_block(c, key) - 1) 2018check_split:
1732 appending = 1; 2019 if (n >= (c->fanout + 1) / 2) {
2020 key1 = &znode->zbranch[0].key;
2021 if (key_inum(c, key1) == key_inum(c, key) &&
2022 key_type(c, key1) == UBIFS_DATA_KEY) {
2023 key1 = &znode->zbranch[n].key;
2024 if (key_inum(c, key1) != key_inum(c, key) ||
2025 key_type(c, key1) != UBIFS_DATA_KEY) {
2026 keep = n;
2027 move = c->fanout - keep;
2028 zi = znode;
2029 goto do_split;
2030 }
2031 }
2032 }
1733 } 2033 }
1734 2034
1735 if (appending) { 2035 if (appending) {
@@ -1759,6 +2059,8 @@ again:
1759 zbr->znode->parent = zn; 2059 zbr->znode->parent = zn;
1760 } 2060 }
1761 2061
2062do_split:
2063
1762 __set_bit(DIRTY_ZNODE, &zn->flags); 2064 __set_bit(DIRTY_ZNODE, &zn->flags);
1763 atomic_long_inc(&c->dirty_zn_cnt); 2065 atomic_long_inc(&c->dirty_zn_cnt);
1764 2066
@@ -1785,14 +2087,11 @@ again:
1785 2087
1786 /* Insert new znode (produced by spitting) into the parent */ 2088 /* Insert new znode (produced by spitting) into the parent */
1787 if (zp) { 2089 if (zp) {
1788 i = n; 2090 if (n == 0 && zi == znode && znode->iip == 0)
2091 correct_parent_keys(c, znode);
2092
1789 /* Locate insertion point */ 2093 /* Locate insertion point */
1790 n = znode->iip + 1; 2094 n = znode->iip + 1;
1791 if (appending && n != c->fanout)
1792 appending = 0;
1793
1794 if (i == 0 && zi == znode && znode->iip == 0)
1795 correct_parent_keys(c, znode);
1796 2095
1797 /* Tail recursion */ 2096 /* Tail recursion */
1798 zbr->key = zn->zbranch[0].key; 2097 zbr->key = zn->zbranch[0].key;
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c
index a25c1cc1f8d9..b48db999903e 100644
--- a/fs/ubifs/tnc_misc.c
+++ b/fs/ubifs/tnc_misc.c
@@ -480,8 +480,8 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
480 } 480 }
481 481
482 /* Make sure the key of the read node is correct */ 482 /* Make sure the key of the read node is correct */
483 key_read(c, key, &key1); 483 key_read(c, node + UBIFS_KEY_OFFSET, &key1);
484 if (memcmp(node + UBIFS_KEY_OFFSET, &key1, c->key_len)) { 484 if (!keys_eq(c, key, &key1)) {
485 ubifs_err("bad key in node at LEB %d:%d", 485 ubifs_err("bad key in node at LEB %d:%d",
486 zbr->lnum, zbr->offs); 486 zbr->lnum, zbr->offs);
487 dbg_tnc("looked for key %s found node's key %s", 487 dbg_tnc("looked for key %s found node's key %s",
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index a9ecbd9af20d..0b378042a3a2 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -75,7 +75,6 @@
75 */ 75 */
76#define UBIFS_BLOCK_SIZE 4096 76#define UBIFS_BLOCK_SIZE 4096
77#define UBIFS_BLOCK_SHIFT 12 77#define UBIFS_BLOCK_SHIFT 12
78#define UBIFS_BLOCK_MASK 0x00000FFF
79 78
80/* UBIFS padding byte pattern (must not be first or last byte of node magic) */ 79/* UBIFS padding byte pattern (must not be first or last byte of node magic) */
81#define UBIFS_PADDING_BYTE 0xCE 80#define UBIFS_PADDING_BYTE 0xCE
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 17c620b93eec..a7bd32fa15b9 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -142,6 +142,9 @@
142/* Maximum expected tree height for use by bottom_up_buf */ 142/* Maximum expected tree height for use by bottom_up_buf */
143#define BOTTOM_UP_HEIGHT 64 143#define BOTTOM_UP_HEIGHT 64
144 144
145/* Maximum number of data nodes to bulk-read */
146#define UBIFS_MAX_BULK_READ 32
147
145/* 148/*
146 * Lockdep classes for UBIFS inode @ui_mutex. 149 * Lockdep classes for UBIFS inode @ui_mutex.
147 */ 150 */
@@ -328,9 +331,10 @@ struct ubifs_gced_idx_leb {
328 * this inode 331 * this inode
329 * @dirty: non-zero if the inode is dirty 332 * @dirty: non-zero if the inode is dirty
330 * @xattr: non-zero if this is an extended attribute inode 333 * @xattr: non-zero if this is an extended attribute inode
334 * @bulk_read: non-zero if bulk-read should be used
331 * @ui_mutex: serializes inode write-back with the rest of VFS operations, 335 * @ui_mutex: serializes inode write-back with the rest of VFS operations,
332 * serializes "clean <-> dirty" state changes, protects @dirty, 336 * serializes "clean <-> dirty" state changes, serializes bulk-read,
333 * @ui_size, and @xattr_size 337 * protects @dirty, @bulk_read, @ui_size, and @xattr_size
334 * @ui_lock: protects @synced_i_size 338 * @ui_lock: protects @synced_i_size
335 * @synced_i_size: synchronized size of inode, i.e. the value of inode size 339 * @synced_i_size: synchronized size of inode, i.e. the value of inode size
336 * currently stored on the flash; used only for regular file 340 * currently stored on the flash; used only for regular file
@@ -338,6 +342,8 @@ struct ubifs_gced_idx_leb {
338 * @ui_size: inode size used by UBIFS when writing to flash 342 * @ui_size: inode size used by UBIFS when writing to flash
339 * @flags: inode flags (@UBIFS_COMPR_FL, etc) 343 * @flags: inode flags (@UBIFS_COMPR_FL, etc)
340 * @compr_type: default compression type used for this inode 344 * @compr_type: default compression type used for this inode
345 * @last_page_read: page number of last page read (for bulk read)
346 * @read_in_a_row: number of consecutive pages read in a row (for bulk read)
341 * @data_len: length of the data attached to the inode 347 * @data_len: length of the data attached to the inode
342 * @data: inode's data 348 * @data: inode's data
343 * 349 *
@@ -379,12 +385,15 @@ struct ubifs_inode {
379 unsigned int xattr_names; 385 unsigned int xattr_names;
380 unsigned int dirty:1; 386 unsigned int dirty:1;
381 unsigned int xattr:1; 387 unsigned int xattr:1;
388 unsigned int bulk_read:1;
382 struct mutex ui_mutex; 389 struct mutex ui_mutex;
383 spinlock_t ui_lock; 390 spinlock_t ui_lock;
384 loff_t synced_i_size; 391 loff_t synced_i_size;
385 loff_t ui_size; 392 loff_t ui_size;
386 int flags; 393 int flags;
387 int compr_type; 394 int compr_type;
395 pgoff_t last_page_read;
396 pgoff_t read_in_a_row;
388 int data_len; 397 int data_len;
389 void *data; 398 void *data;
390}; 399};
@@ -698,8 +707,8 @@ struct ubifs_jhead {
698 * struct ubifs_zbranch - key/coordinate/length branch stored in znodes. 707 * struct ubifs_zbranch - key/coordinate/length branch stored in znodes.
699 * @key: key 708 * @key: key
700 * @znode: znode address in memory 709 * @znode: znode address in memory
701 * @lnum: LEB number of the indexing node 710 * @lnum: LEB number of the target node (indexing node or data node)
702 * @offs: offset of the indexing node within @lnum 711 * @offs: target node offset within @lnum
703 * @len: target node length 712 * @len: target node length
704 */ 713 */
705struct ubifs_zbranch { 714struct ubifs_zbranch {
@@ -744,6 +753,28 @@ struct ubifs_znode {
744}; 753};
745 754
746/** 755/**
756 * struct bu_info - bulk-read information
757 * @key: first data node key
758 * @zbranch: zbranches of data nodes to bulk read
759 * @buf: buffer to read into
760 * @buf_len: buffer length
761 * @gc_seq: GC sequence number to detect races with GC
762 * @cnt: number of data nodes for bulk read
763 * @blk_cnt: number of data blocks including holes
764 * @oef: end of file reached
765 */
766struct bu_info {
767 union ubifs_key key;
768 struct ubifs_zbranch zbranch[UBIFS_MAX_BULK_READ];
769 void *buf;
770 int buf_len;
771 int gc_seq;
772 int cnt;
773 int blk_cnt;
774 int eof;
775};
776
777/**
747 * struct ubifs_node_range - node length range description data structure. 778 * struct ubifs_node_range - node length range description data structure.
748 * @len: fixed node length 779 * @len: fixed node length
749 * @min_len: minimum possible node length 780 * @min_len: minimum possible node length
@@ -862,9 +893,13 @@ struct ubifs_orphan {
862/** 893/**
863 * struct ubifs_mount_opts - UBIFS-specific mount options information. 894 * struct ubifs_mount_opts - UBIFS-specific mount options information.
864 * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) 895 * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast)
896 * @bulk_read: enable bulk-reads
897 * @chk_data_crc: check CRCs when reading data nodes
865 */ 898 */
866struct ubifs_mount_opts { 899struct ubifs_mount_opts {
867 unsigned int unmount_mode:2; 900 unsigned int unmount_mode:2;
901 unsigned int bulk_read:2;
902 unsigned int chk_data_crc:2;
868}; 903};
869 904
870/** 905/**
@@ -905,13 +940,12 @@ struct ubifs_mount_opts {
905 * @cmt_state: commit state 940 * @cmt_state: commit state
906 * @cs_lock: commit state lock 941 * @cs_lock: commit state lock
907 * @cmt_wq: wait queue to sleep on if the log is full and a commit is running 942 * @cmt_wq: wait queue to sleep on if the log is full and a commit is running
943 *
908 * @fast_unmount: do not run journal commit before un-mounting 944 * @fast_unmount: do not run journal commit before un-mounting
909 * @big_lpt: flag that LPT is too big to write whole during commit 945 * @big_lpt: flag that LPT is too big to write whole during commit
910 * @check_lpt_free: flag that indicates LPT GC may be needed 946 * @no_chk_data_crc: do not check CRCs when reading data nodes (except during
911 * @nospace: non-zero if the file-system does not have flash space (used as 947 * recovery)
912 * optimization) 948 * @bulk_read: enable bulk-reads
913 * @nospace_rp: the same as @nospace, but additionally means that even reserved
914 * pool is full
915 * 949 *
916 * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and 950 * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and
917 * @calc_idx_sz 951 * @calc_idx_sz
@@ -935,6 +969,7 @@ struct ubifs_mount_opts {
935 * @mst_node: master node 969 * @mst_node: master node
936 * @mst_offs: offset of valid master node 970 * @mst_offs: offset of valid master node
937 * @mst_mutex: protects the master node area, @mst_node, and @mst_offs 971 * @mst_mutex: protects the master node area, @mst_node, and @mst_offs
972 * @bulk_read_buf_size: buffer size for bulk-reads
938 * 973 *
939 * @log_lebs: number of logical eraseblocks in the log 974 * @log_lebs: number of logical eraseblocks in the log
940 * @log_bytes: log size in bytes 975 * @log_bytes: log size in bytes
@@ -977,12 +1012,17 @@ struct ubifs_mount_opts {
977 * but which still have to be taken into account because 1012 * but which still have to be taken into account because
978 * the index has not been committed so far 1013 * the index has not been committed so far
979 * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, 1014 * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth,
980 * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, and @lst; 1015 * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst,
1016 * @nospace, and @nospace_rp;
981 * @min_idx_lebs: minimum number of LEBs required for the index 1017 * @min_idx_lebs: minimum number of LEBs required for the index
982 * @old_idx_sz: size of index on flash 1018 * @old_idx_sz: size of index on flash
983 * @calc_idx_sz: temporary variable which is used to calculate new index size 1019 * @calc_idx_sz: temporary variable which is used to calculate new index size
984 * (contains accurate new index size at end of TNC commit start) 1020 * (contains accurate new index size at end of TNC commit start)
985 * @lst: lprops statistics 1021 * @lst: lprops statistics
1022 * @nospace: non-zero if the file-system does not have flash space (used as
1023 * optimization)
1024 * @nospace_rp: the same as @nospace, but additionally means that even reserved
1025 * pool is full
986 * 1026 *
987 * @page_budget: budget for a page 1027 * @page_budget: budget for a page
988 * @inode_budget: budget for an inode 1028 * @inode_budget: budget for an inode
@@ -1061,6 +1101,7 @@ struct ubifs_mount_opts {
1061 * @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab 1101 * @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab
1062 * @dirty_nn_cnt: number of dirty nnodes 1102 * @dirty_nn_cnt: number of dirty nnodes
1063 * @dirty_pn_cnt: number of dirty pnodes 1103 * @dirty_pn_cnt: number of dirty pnodes
1104 * @check_lpt_free: flag that indicates LPT GC may be needed
1064 * @lpt_sz: LPT size 1105 * @lpt_sz: LPT size
1065 * @lpt_nod_buf: buffer for an on-flash nnode or pnode 1106 * @lpt_nod_buf: buffer for an on-flash nnode or pnode
1066 * @lpt_buf: buffer of LEB size used by LPT 1107 * @lpt_buf: buffer of LEB size used by LPT
@@ -1102,6 +1143,7 @@ struct ubifs_mount_opts {
1102 * @rcvrd_mst_node: recovered master node to write when mounting ro to rw 1143 * @rcvrd_mst_node: recovered master node to write when mounting ro to rw
1103 * @size_tree: inode size information for recovery 1144 * @size_tree: inode size information for recovery
1104 * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY) 1145 * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY)
1146 * @always_chk_crc: always check CRCs (while mounting and remounting rw)
1105 * @mount_opts: UBIFS-specific mount options 1147 * @mount_opts: UBIFS-specific mount options
1106 * 1148 *
1107 * @dbg_buf: a buffer of LEB size used for debugging purposes 1149 * @dbg_buf: a buffer of LEB size used for debugging purposes
@@ -1146,11 +1188,11 @@ struct ubifs_info {
1146 int cmt_state; 1188 int cmt_state;
1147 spinlock_t cs_lock; 1189 spinlock_t cs_lock;
1148 wait_queue_head_t cmt_wq; 1190 wait_queue_head_t cmt_wq;
1191
1149 unsigned int fast_unmount:1; 1192 unsigned int fast_unmount:1;
1150 unsigned int big_lpt:1; 1193 unsigned int big_lpt:1;
1151 unsigned int check_lpt_free:1; 1194 unsigned int no_chk_data_crc:1;
1152 unsigned int nospace:1; 1195 unsigned int bulk_read:1;
1153 unsigned int nospace_rp:1;
1154 1196
1155 struct mutex tnc_mutex; 1197 struct mutex tnc_mutex;
1156 struct ubifs_zbranch zroot; 1198 struct ubifs_zbranch zroot;
@@ -1175,6 +1217,7 @@ struct ubifs_info {
1175 struct ubifs_mst_node *mst_node; 1217 struct ubifs_mst_node *mst_node;
1176 int mst_offs; 1218 int mst_offs;
1177 struct mutex mst_mutex; 1219 struct mutex mst_mutex;
1220 int bulk_read_buf_size;
1178 1221
1179 int log_lebs; 1222 int log_lebs;
1180 long long log_bytes; 1223 long long log_bytes;
@@ -1218,6 +1261,8 @@ struct ubifs_info {
1218 unsigned long long old_idx_sz; 1261 unsigned long long old_idx_sz;
1219 unsigned long long calc_idx_sz; 1262 unsigned long long calc_idx_sz;
1220 struct ubifs_lp_stats lst; 1263 struct ubifs_lp_stats lst;
1264 unsigned int nospace:1;
1265 unsigned int nospace_rp:1;
1221 1266
1222 int page_budget; 1267 int page_budget;
1223 int inode_budget; 1268 int inode_budget;
@@ -1294,6 +1339,7 @@ struct ubifs_info {
1294 int lpt_drty_flgs; 1339 int lpt_drty_flgs;
1295 int dirty_nn_cnt; 1340 int dirty_nn_cnt;
1296 int dirty_pn_cnt; 1341 int dirty_pn_cnt;
1342 int check_lpt_free;
1297 long long lpt_sz; 1343 long long lpt_sz;
1298 void *lpt_nod_buf; 1344 void *lpt_nod_buf;
1299 void *lpt_buf; 1345 void *lpt_buf;
@@ -1335,6 +1381,7 @@ struct ubifs_info {
1335 struct ubifs_mst_node *rcvrd_mst_node; 1381 struct ubifs_mst_node *rcvrd_mst_node;
1336 struct rb_root size_tree; 1382 struct rb_root size_tree;
1337 int remounting_rw; 1383 int remounting_rw;
1384 int always_chk_crc;
1338 struct ubifs_mount_opts mount_opts; 1385 struct ubifs_mount_opts mount_opts;
1339 1386
1340#ifdef CONFIG_UBIFS_FS_DEBUG 1387#ifdef CONFIG_UBIFS_FS_DEBUG
@@ -1347,6 +1394,12 @@ struct ubifs_info {
1347 unsigned long fail_timeout; 1394 unsigned long fail_timeout;
1348 unsigned int fail_cnt; 1395 unsigned int fail_cnt;
1349 unsigned int fail_cnt_max; 1396 unsigned int fail_cnt_max;
1397 long long chk_lpt_sz;
1398 long long chk_lpt_sz2;
1399 long long chk_lpt_wastage;
1400 int chk_lpt_lebs;
1401 int new_nhead_lnum;
1402 int new_nhead_offs;
1350#endif 1403#endif
1351}; 1404};
1352 1405
@@ -1377,7 +1430,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
1377int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, 1430int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum,
1378 int offs, int dtype); 1431 int offs, int dtype);
1379int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, 1432int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
1380 int offs, int quiet); 1433 int offs, int quiet, int chk_crc);
1381void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); 1434void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad);
1382void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last); 1435void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last);
1383int ubifs_io_init(struct ubifs_info *c); 1436int ubifs_io_init(struct ubifs_info *c);
@@ -1490,6 +1543,8 @@ void destroy_old_idx(struct ubifs_info *c);
1490int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level, 1543int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level,
1491 int lnum, int offs); 1544 int lnum, int offs);
1492int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode); 1545int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode);
1546int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu);
1547int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu);
1493 1548
1494/* tnc_misc.c */ 1549/* tnc_misc.c */
1495struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr, 1550struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr,
@@ -1586,12 +1641,10 @@ int ubifs_lpt_post_commit(struct ubifs_info *c);
1586void ubifs_lpt_free(struct ubifs_info *c, int wr_only); 1641void ubifs_lpt_free(struct ubifs_info *c, int wr_only);
1587 1642
1588/* lprops.c */ 1643/* lprops.c */
1589void ubifs_get_lprops(struct ubifs_info *c);
1590const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, 1644const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
1591 const struct ubifs_lprops *lp, 1645 const struct ubifs_lprops *lp,
1592 int free, int dirty, int flags, 1646 int free, int dirty, int flags,
1593 int idx_gc_cnt); 1647 int idx_gc_cnt);
1594void ubifs_release_lprops(struct ubifs_info *c);
1595void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats); 1648void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats);
1596void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, 1649void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
1597 int cat); 1650 int cat);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 649bec78b645..cfd31e229c89 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -446,7 +446,7 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
446 int type; 446 int type;
447 447
448 xent = ubifs_tnc_next_ent(c, &key, &nm); 448 xent = ubifs_tnc_next_ent(c, &key, &nm);
449 if (unlikely(IS_ERR(xent))) { 449 if (IS_ERR(xent)) {
450 err = PTR_ERR(xent); 450 err = PTR_ERR(xent);
451 break; 451 break;
452 } 452 }
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index d3231947db19..082409cd4b8a 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -142,7 +142,7 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
142} 142}
143 143
144static struct fileIdentDesc *udf_find_entry(struct inode *dir, 144static struct fileIdentDesc *udf_find_entry(struct inode *dir,
145 struct dentry *dentry, 145 struct qstr *child,
146 struct udf_fileident_bh *fibh, 146 struct udf_fileident_bh *fibh,
147 struct fileIdentDesc *cfi) 147 struct fileIdentDesc *cfi)
148{ 148{
@@ -159,8 +159,8 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
159 sector_t offset; 159 sector_t offset;
160 struct extent_position epos = {}; 160 struct extent_position epos = {};
161 struct udf_inode_info *dinfo = UDF_I(dir); 161 struct udf_inode_info *dinfo = UDF_I(dir);
162 int isdotdot = dentry->d_name.len == 2 && 162 int isdotdot = child->len == 2 &&
163 dentry->d_name.name[0] == '.' && dentry->d_name.name[1] == '.'; 163 child->name[0] == '.' && child->name[1] == '.';
164 164
165 size = udf_ext0_offset(dir) + dir->i_size; 165 size = udf_ext0_offset(dir) + dir->i_size;
166 f_pos = udf_ext0_offset(dir); 166 f_pos = udf_ext0_offset(dir);
@@ -238,8 +238,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
238 continue; 238 continue;
239 239
240 flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); 240 flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi);
241 if (flen && udf_match(flen, fname, dentry->d_name.len, 241 if (flen && udf_match(flen, fname, child->len, child->name))
242 dentry->d_name.name))
243 goto out_ok; 242 goto out_ok;
244 } 243 }
245 244
@@ -283,7 +282,7 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
283 } else 282 } else
284#endif /* UDF_RECOVERY */ 283#endif /* UDF_RECOVERY */
285 284
286 if (udf_find_entry(dir, dentry, &fibh, &cfi)) { 285 if (udf_find_entry(dir, &dentry->d_name, &fibh, &cfi)) {
287 if (fibh.sbh != fibh.ebh) 286 if (fibh.sbh != fibh.ebh)
288 brelse(fibh.ebh); 287 brelse(fibh.ebh);
289 brelse(fibh.sbh); 288 brelse(fibh.sbh);
@@ -783,7 +782,7 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
783 782
784 retval = -ENOENT; 783 retval = -ENOENT;
785 lock_kernel(); 784 lock_kernel();
786 fi = udf_find_entry(dir, dentry, &fibh, &cfi); 785 fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
787 if (!fi) 786 if (!fi)
788 goto out; 787 goto out;
789 788
@@ -829,7 +828,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
829 828
830 retval = -ENOENT; 829 retval = -ENOENT;
831 lock_kernel(); 830 lock_kernel();
832 fi = udf_find_entry(dir, dentry, &fibh, &cfi); 831 fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
833 if (!fi) 832 if (!fi)
834 goto out; 833 goto out;
835 834
@@ -1113,7 +1112,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1113 struct udf_inode_info *old_iinfo = UDF_I(old_inode); 1112 struct udf_inode_info *old_iinfo = UDF_I(old_inode);
1114 1113
1115 lock_kernel(); 1114 lock_kernel();
1116 ofi = udf_find_entry(old_dir, old_dentry, &ofibh, &ocfi); 1115 ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
1117 if (ofi) { 1116 if (ofi) {
1118 if (ofibh.sbh != ofibh.ebh) 1117 if (ofibh.sbh != ofibh.ebh)
1119 brelse(ofibh.ebh); 1118 brelse(ofibh.ebh);
@@ -1124,7 +1123,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1124 != old_inode->i_ino) 1123 != old_inode->i_ino)
1125 goto end_rename; 1124 goto end_rename;
1126 1125
1127 nfi = udf_find_entry(new_dir, new_dentry, &nfibh, &ncfi); 1126 nfi = udf_find_entry(new_dir, &new_dentry->d_name, &nfibh, &ncfi);
1128 if (nfi) { 1127 if (nfi) {
1129 if (!new_inode) { 1128 if (!new_inode) {
1130 if (nfibh.sbh != nfibh.ebh) 1129 if (nfibh.sbh != nfibh.ebh)
@@ -1192,7 +1191,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1192 udf_write_fi(new_dir, &ncfi, nfi, &nfibh, NULL, NULL); 1191 udf_write_fi(new_dir, &ncfi, nfi, &nfibh, NULL, NULL);
1193 1192
1194 /* The old fid may have moved - find it again */ 1193 /* The old fid may have moved - find it again */
1195 ofi = udf_find_entry(old_dir, old_dentry, &ofibh, &ocfi); 1194 ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
1196 udf_delete_entry(old_dir, ofi, &ofibh, &ocfi); 1195 udf_delete_entry(old_dir, ofi, &ofibh, &ocfi);
1197 1196
1198 if (new_inode) { 1197 if (new_inode) {
@@ -1243,15 +1242,11 @@ end_rename:
1243 1242
1244static struct dentry *udf_get_parent(struct dentry *child) 1243static struct dentry *udf_get_parent(struct dentry *child)
1245{ 1244{
1246 struct dentry *parent;
1247 struct inode *inode = NULL; 1245 struct inode *inode = NULL;
1248 struct dentry dotdot; 1246 struct qstr dotdot = {.name = "..", .len = 2};
1249 struct fileIdentDesc cfi; 1247 struct fileIdentDesc cfi;
1250 struct udf_fileident_bh fibh; 1248 struct udf_fileident_bh fibh;
1251 1249
1252 dotdot.d_name.name = "..";
1253 dotdot.d_name.len = 2;
1254
1255 lock_kernel(); 1250 lock_kernel();
1256 if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi)) 1251 if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi))
1257 goto out_unlock; 1252 goto out_unlock;
@@ -1266,13 +1261,7 @@ static struct dentry *udf_get_parent(struct dentry *child)
1266 goto out_unlock; 1261 goto out_unlock;
1267 unlock_kernel(); 1262 unlock_kernel();
1268 1263
1269 parent = d_alloc_anon(inode); 1264 return d_obtain_alias(inode);
1270 if (!parent) {
1271 iput(inode);
1272 parent = ERR_PTR(-ENOMEM);
1273 }
1274
1275 return parent;
1276out_unlock: 1265out_unlock:
1277 unlock_kernel(); 1266 unlock_kernel();
1278 return ERR_PTR(-EACCES); 1267 return ERR_PTR(-EACCES);
@@ -1283,7 +1272,6 @@ static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block,
1283 u16 partref, __u32 generation) 1272 u16 partref, __u32 generation)
1284{ 1273{
1285 struct inode *inode; 1274 struct inode *inode;
1286 struct dentry *result;
1287 kernel_lb_addr loc; 1275 kernel_lb_addr loc;
1288 1276
1289 if (block == 0) 1277 if (block == 0)
@@ -1300,12 +1288,7 @@ static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block,
1300 iput(inode); 1288 iput(inode);
1301 return ERR_PTR(-ESTALE); 1289 return ERR_PTR(-ESTALE);
1302 } 1290 }
1303 result = d_alloc_anon(inode); 1291 return d_obtain_alias(inode);
1304 if (!result) {
1305 iput(inode);
1306 return ERR_PTR(-ENOMEM);
1307 }
1308 return result;
1309} 1292}
1310 1293
1311static struct dentry *udf_fh_to_dentry(struct super_block *sb, 1294static struct dentry *udf_fh_to_dentry(struct super_block *sb,
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index df0bef18742d..dbbbc4668769 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -667,4 +667,5 @@ const struct file_operations ufs_dir_operations = {
667 .read = generic_read_dir, 667 .read = generic_read_dir,
668 .readdir = ufs_readdir, 668 .readdir = ufs_readdir,
669 .fsync = file_fsync, 669 .fsync = file_fsync,
670 .llseek = generic_file_llseek,
670}; 671};
diff --git a/fs/vfat/Makefile b/fs/vfat/Makefile
deleted file mode 100644
index 40f2798a4f08..000000000000
--- a/fs/vfat/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
1#
2# Makefile for the linux vfat-filesystem routines.
3#
4
5obj-$(CONFIG_VFAT_FS) += vfat.o
6
7vfat-y := namei.o
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 24fd598af846..7f7abec25e14 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -148,7 +148,6 @@ xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
148{ 148{
149 struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; 149 struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid;
150 struct inode *inode = NULL; 150 struct inode *inode = NULL;
151 struct dentry *result;
152 151
153 if (fh_len < xfs_fileid_length(fileid_type)) 152 if (fh_len < xfs_fileid_length(fileid_type))
154 return NULL; 153 return NULL;
@@ -164,16 +163,7 @@ xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
164 break; 163 break;
165 } 164 }
166 165
167 if (!inode) 166 return d_obtain_alias(inode);
168 return NULL;
169 if (IS_ERR(inode))
170 return ERR_CAST(inode);
171 result = d_alloc_anon(inode);
172 if (!result) {
173 iput(inode);
174 return ERR_PTR(-ENOMEM);
175 }
176 return result;
177} 167}
178 168
179STATIC struct dentry * 169STATIC struct dentry *
@@ -182,7 +172,6 @@ xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid,
182{ 172{
183 struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; 173 struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid;
184 struct inode *inode = NULL; 174 struct inode *inode = NULL;
185 struct dentry *result;
186 175
187 switch (fileid_type) { 176 switch (fileid_type) {
188 case FILEID_INO32_GEN_PARENT: 177 case FILEID_INO32_GEN_PARENT:
@@ -195,16 +184,7 @@ xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid,
195 break; 184 break;
196 } 185 }
197 186
198 if (!inode) 187 return d_obtain_alias(inode);
199 return NULL;
200 if (IS_ERR(inode))
201 return ERR_CAST(inode);
202 result = d_alloc_anon(inode);
203 if (!result) {
204 iput(inode);
205 return ERR_PTR(-ENOMEM);
206 }
207 return result;
208} 188}
209 189
210STATIC struct dentry * 190STATIC struct dentry *
@@ -213,18 +193,12 @@ xfs_fs_get_parent(
213{ 193{
214 int error; 194 int error;
215 struct xfs_inode *cip; 195 struct xfs_inode *cip;
216 struct dentry *parent;
217 196
218 error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL); 197 error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
219 if (unlikely(error)) 198 if (unlikely(error))
220 return ERR_PTR(-error); 199 return ERR_PTR(-error);
221 200
222 parent = d_alloc_anon(VFS_I(cip)); 201 return d_obtain_alias(VFS_I(cip));
223 if (unlikely(!parent)) {
224 iput(VFS_I(cip));
225 return ERR_PTR(-ENOMEM);
226 }
227 return parent;
228} 202}
229 203
230const struct export_operations xfs_export_operations = { 204const struct export_operations xfs_export_operations = {
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 5311c1acdd40..3fee790f138b 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -204,15 +204,6 @@ xfs_file_fsync(
204 return -xfs_fsync(XFS_I(dentry->d_inode)); 204 return -xfs_fsync(XFS_I(dentry->d_inode));
205} 205}
206 206
207/*
208 * Unfortunately we can't just use the clean and simple readdir implementation
209 * below, because nfs might call back into ->lookup from the filldir callback
210 * and that will deadlock the low-level btree code.
211 *
212 * Hopefully we'll find a better workaround that allows to use the optimal
213 * version at least for local readdirs for 2.6.25.
214 */
215#if 0
216STATIC int 207STATIC int
217xfs_file_readdir( 208xfs_file_readdir(
218 struct file *filp, 209 struct file *filp,
@@ -244,125 +235,6 @@ xfs_file_readdir(
244 return -error; 235 return -error;
245 return 0; 236 return 0;
246} 237}
247#else
248
249struct hack_dirent {
250 u64 ino;
251 loff_t offset;
252 int namlen;
253 unsigned int d_type;
254 char name[];
255};
256
257struct hack_callback {
258 char *dirent;
259 size_t len;
260 size_t used;
261};
262
263STATIC int
264xfs_hack_filldir(
265 void *__buf,
266 const char *name,
267 int namlen,
268 loff_t offset,
269 u64 ino,
270 unsigned int d_type)
271{
272 struct hack_callback *buf = __buf;
273 struct hack_dirent *de = (struct hack_dirent *)(buf->dirent + buf->used);
274 unsigned int reclen;
275
276 reclen = ALIGN(sizeof(struct hack_dirent) + namlen, sizeof(u64));
277 if (buf->used + reclen > buf->len)
278 return -EINVAL;
279
280 de->namlen = namlen;
281 de->offset = offset;
282 de->ino = ino;
283 de->d_type = d_type;
284 memcpy(de->name, name, namlen);
285 buf->used += reclen;
286 return 0;
287}
288
289STATIC int
290xfs_file_readdir(
291 struct file *filp,
292 void *dirent,
293 filldir_t filldir)
294{
295 struct inode *inode = filp->f_path.dentry->d_inode;
296 xfs_inode_t *ip = XFS_I(inode);
297 struct hack_callback buf;
298 struct hack_dirent *de;
299 int error;
300 loff_t size;
301 int eof = 0;
302 xfs_off_t start_offset, curr_offset, offset;
303
304 /*
305 * Try fairly hard to get memory
306 */
307 buf.len = PAGE_CACHE_SIZE;
308 do {
309 buf.dirent = kmalloc(buf.len, GFP_KERNEL);
310 if (buf.dirent)
311 break;
312 buf.len >>= 1;
313 } while (buf.len >= 1024);
314
315 if (!buf.dirent)
316 return -ENOMEM;
317
318 curr_offset = filp->f_pos;
319 if (curr_offset == 0x7fffffff)
320 offset = 0xffffffff;
321 else
322 offset = filp->f_pos;
323
324 while (!eof) {
325 unsigned int reclen;
326
327 start_offset = offset;
328
329 buf.used = 0;
330 error = -xfs_readdir(ip, &buf, buf.len, &offset,
331 xfs_hack_filldir);
332 if (error || offset == start_offset) {
333 size = 0;
334 break;
335 }
336
337 size = buf.used;
338 de = (struct hack_dirent *)buf.dirent;
339 while (size > 0) {
340 curr_offset = de->offset /* & 0x7fffffff */;
341 if (filldir(dirent, de->name, de->namlen,
342 curr_offset & 0x7fffffff,
343 de->ino, de->d_type)) {
344 goto done;
345 }
346
347 reclen = ALIGN(sizeof(struct hack_dirent) + de->namlen,
348 sizeof(u64));
349 size -= reclen;
350 de = (struct hack_dirent *)((char *)de + reclen);
351 }
352 }
353
354 done:
355 if (!error) {
356 if (size == 0)
357 filp->f_pos = offset & 0x7fffffff;
358 else if (de)
359 filp->f_pos = curr_offset;
360 }
361
362 kfree(buf.dirent);
363 return error;
364}
365#endif
366 238
367STATIC int 239STATIC int
368xfs_file_mmap( 240xfs_file_mmap(
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 48799ba7e3e6..d3438c72dcaf 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -311,11 +311,10 @@ xfs_open_by_handle(
311 return new_fd; 311 return new_fd;
312 } 312 }
313 313
314 dentry = d_alloc_anon(inode); 314 dentry = d_obtain_alias(inode);
315 if (dentry == NULL) { 315 if (IS_ERR(dentry)) {
316 iput(inode);
317 put_unused_fd(new_fd); 316 put_unused_fd(new_fd);
318 return -XFS_ERROR(ENOMEM); 317 return PTR_ERR(dentry);
319 } 318 }
320 319
321 /* Ensure umount returns EBUSY on umounts while this file is open. */ 320 /* Ensure umount returns EBUSY on umounts while this file is open. */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index e39013619b26..37ebe36056eb 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -589,7 +589,7 @@ xfs_blkdev_get(
589{ 589{
590 int error = 0; 590 int error = 0;
591 591
592 *bdevp = open_bdev_excl(name, 0, mp); 592 *bdevp = open_bdev_exclusive(name, FMODE_READ|FMODE_WRITE, mp);
593 if (IS_ERR(*bdevp)) { 593 if (IS_ERR(*bdevp)) {
594 error = PTR_ERR(*bdevp); 594 error = PTR_ERR(*bdevp);
595 printk("XFS: Invalid device [%s], error=%d\n", name, error); 595 printk("XFS: Invalid device [%s], error=%d\n", name, error);
@@ -603,7 +603,7 @@ xfs_blkdev_put(
603 struct block_device *bdev) 603 struct block_device *bdev)
604{ 604{
605 if (bdev) 605 if (bdev)
606 close_bdev_excl(bdev); 606 close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE);
607} 607}
608 608
609/* 609/*