aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c4
-rw-r--r--fs/9p/v9fs_vfs.h6
-rw-r--r--fs/9p/vfs_addr.c5
-rw-r--r--fs/9p/vfs_dir.c60
-rw-r--r--fs/9p/vfs_file.c93
-rw-r--r--fs/9p/vfs_inode.c39
-rw-r--r--fs/9p/vfs_super.c6
-rw-r--r--fs/Kconfig607
-rw-r--r--fs/Kconfig.binfmt24
-rw-r--r--fs/Makefile5
-rw-r--r--fs/afs/file.c4
-rw-r--r--fs/afs/internal.h8
-rw-r--r--fs/afs/write.c131
-rw-r--r--fs/autofs4/Makefile2
-rw-r--r--fs/autofs4/autofs_i.h44
-rw-r--r--fs/autofs4/dev-ioctl.c863
-rw-r--r--fs/autofs4/expire.c18
-rw-r--r--fs/autofs4/init.c11
-rw-r--r--fs/autofs4/inode.c8
-rw-r--r--fs/autofs4/waitq.c42
-rw-r--r--fs/befs/befs_fs_types.h4
-rw-r--r--fs/befs/linuxvfs.c4
-rw-r--r--fs/befs/super.c6
-rw-r--r--fs/binfmt_elf.c37
-rw-r--r--fs/binfmt_elf_fdpic.c85
-rw-r--r--fs/binfmt_em86.c2
-rw-r--r--fs/binfmt_flat.c6
-rw-r--r--fs/binfmt_misc.c4
-rw-r--r--fs/binfmt_script.c5
-rw-r--r--fs/binfmt_som.c2
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/buffer.c3
-rw-r--r--fs/char_dev.c3
-rw-r--r--fs/cifs/Kconfig142
-rw-r--r--fs/cifs/file.c4
-rw-r--r--fs/coda/psdev.c5
-rw-r--r--fs/compat.c41
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/dquot.c2
-rw-r--r--fs/ecryptfs/Makefile2
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h28
-rw-r--r--fs/ecryptfs/file.c17
-rw-r--r--fs/ecryptfs/keystore.c32
-rw-r--r--fs/ecryptfs/main.c19
-rw-r--r--fs/ecryptfs/messaging.c118
-rw-r--r--fs/ecryptfs/mmap.c81
-rw-r--r--fs/ecryptfs/netlink.c249
-rw-r--r--fs/eventpoll.c9
-rw-r--r--fs/exec.c22
-rw-r--r--fs/ext2/Kconfig55
-rw-r--r--fs/ext2/balloc.c3
-rw-r--r--fs/ext2/dir.c60
-rw-r--r--fs/ext3/Kconfig67
-rw-r--r--fs/ext3/balloc.c3
-rw-r--r--fs/ext3/dir.c30
-rw-r--r--fs/ext3/inode.c7
-rw-r--r--fs/ext3/resize.c3
-rw-r--r--fs/ext3/super.c16
-rw-r--r--fs/ext4/Kconfig79
-rw-r--r--fs/ext4/balloc.c12
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/ext4_sb.h3
-rw-r--r--fs/ext4/inode.c143
-rw-r--r--fs/ext4/mballoc.c263
-rw-r--r--fs/ext4/mballoc.h31
-rw-r--r--fs/ext4/super.c132
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/fuse/file.c5
-rw-r--r--fs/fuse/fuse_i.h5
-rw-r--r--fs/fuse/inode.c3
-rw-r--r--fs/hfs/catalog.c4
-rw-r--r--fs/hfsplus/bitmap.c12
-rw-r--r--fs/hfsplus/catalog.c5
-rw-r--r--fs/hfsplus/extents.c3
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hfsplus/super.c2
-rw-r--r--fs/jbd/Kconfig30
-rw-r--r--fs/jbd/commit.c10
-rw-r--r--fs/jbd/transaction.c16
-rw-r--r--fs/jbd2/Kconfig33
-rw-r--r--fs/jbd2/commit.c3
-rw-r--r--fs/jbd2/transaction.c1
-rw-r--r--fs/jffs2/Kconfig188
-rw-r--r--fs/jffs2/compr.c4
-rw-r--r--fs/jffs2/dir.c2
-rw-r--r--fs/jffs2/erase.c4
-rw-r--r--fs/jffs2/fs.c6
-rw-r--r--fs/jffs2/nodemgmt.c4
-rw-r--r--fs/jffs2/wbuf.c5
-rw-r--r--fs/mpage.c2
-rw-r--r--fs/nfs/callback.c19
-rw-r--r--fs/nfs/client.c5
-rw-r--r--fs/nfs/dir.c22
-rw-r--r--fs/nfs/file.c18
-rw-r--r--fs/nfs/inode.c183
-rw-r--r--fs/nfs/internal.h25
-rw-r--r--fs/nfs/mount_clnt.c3
-rw-r--r--fs/nfs/namespace.c7
-rw-r--r--fs/nfs/nfs3acl.c2
-rw-r--r--fs/nfs/nfs3proc.c20
-rw-r--r--fs/nfs/nfs4namespace.c105
-rw-r--r--fs/nfs/nfs4proc.c6
-rw-r--r--fs/nfs/proc.c10
-rw-r--r--fs/nfs/super.c130
-rw-r--r--fs/nfs/unlink.c5
-rw-r--r--fs/nfs/write.c3
-rw-r--r--fs/nls/nls_base.c21
-rw-r--r--fs/ntfs/file.c4
-rw-r--r--fs/ocfs2/xattr.c14
-rw-r--r--fs/partitions/acorn.c10
-rw-r--r--fs/partitions/check.c27
-rw-r--r--fs/proc/array.c8
-rw-r--r--fs/proc/proc_misc.c111
-rw-r--r--fs/proc/vmcore.c5
-rw-r--r--fs/ramfs/file-nommu.c4
-rw-r--r--fs/ramfs/inode.c1
-rw-r--r--fs/reiserfs/procfs.c3
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/seq_file.c29
-rw-r--r--fs/sysfs/bin.c42
-rw-r--r--fs/sysfs/dir.c24
-rw-r--r--fs/sysfs/file.c46
-rw-r--r--fs/sysfs/mount.c15
-rw-r--r--fs/sysfs/sysfs.h6
-rw-r--r--fs/ubifs/budget.c26
-rw-r--r--fs/ubifs/compress.c2
-rw-r--r--fs/ubifs/debug.c79
-rw-r--r--fs/ubifs/debug.h6
-rw-r--r--fs/ubifs/file.c260
-rw-r--r--fs/ubifs/find.c4
-rw-r--r--fs/ubifs/gc.c90
-rw-r--r--fs/ubifs/io.c12
-rw-r--r--fs/ubifs/key.h22
-rw-r--r--fs/ubifs/lprops.c34
-rw-r--r--fs/ubifs/lpt.c48
-rw-r--r--fs/ubifs/lpt_commit.c187
-rw-r--r--fs/ubifs/misc.h27
-rw-r--r--fs/ubifs/scan.c2
-rw-r--r--fs/ubifs/super.c109
-rw-r--r--fs/ubifs/tnc.c345
-rw-r--r--fs/ubifs/tnc_misc.c4
-rw-r--r--fs/ubifs/ubifs-media.h1
-rw-r--r--fs/ubifs/ubifs.h85
-rw-r--r--fs/ubifs/xattr.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c2
145 files changed, 4252 insertions, 2207 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index c061c3f18e7c..24eb01087b6d 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -30,8 +30,8 @@
30#include <linux/parser.h> 30#include <linux/parser.h>
31#include <linux/idr.h> 31#include <linux/idr.h>
32#include <net/9p/9p.h> 32#include <net/9p/9p.h>
33#include <net/9p/transport.h>
34#include <net/9p/client.h> 33#include <net/9p/client.h>
34#include <net/9p/transport.h>
35#include "v9fs.h" 35#include "v9fs.h"
36#include "v9fs_vfs.h" 36#include "v9fs_vfs.h"
37 37
@@ -234,7 +234,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
234 if (!v9ses->clnt->dotu) 234 if (!v9ses->clnt->dotu)
235 v9ses->flags &= ~V9FS_EXTENDED; 235 v9ses->flags &= ~V9FS_EXTENDED;
236 236
237 v9ses->maxdata = v9ses->clnt->msize; 237 v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
238 238
239 /* for legacy mode, fall back to V9FS_ACCESS_ANY */ 239 /* for legacy mode, fall back to V9FS_ACCESS_ANY */
240 if (!v9fs_extended(v9ses) && 240 if (!v9fs_extended(v9ses) &&
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 57997fa14e69..c295ba786edd 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -46,9 +46,11 @@ extern struct dentry_operations v9fs_cached_dentry_operations;
46 46
47struct inode *v9fs_get_inode(struct super_block *sb, int mode); 47struct inode *v9fs_get_inode(struct super_block *sb, int mode);
48ino_t v9fs_qid2ino(struct p9_qid *qid); 48ino_t v9fs_qid2ino(struct p9_qid *qid);
49void v9fs_stat2inode(struct p9_stat *, struct inode *, struct super_block *); 49void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
50int v9fs_dir_release(struct inode *inode, struct file *filp); 50int v9fs_dir_release(struct inode *inode, struct file *filp);
51int v9fs_file_open(struct inode *inode, struct file *file); 51int v9fs_file_open(struct inode *inode, struct file *file);
52void v9fs_inode2stat(struct inode *inode, struct p9_stat *stat); 52void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
53void v9fs_dentry_release(struct dentry *); 53void v9fs_dentry_release(struct dentry *);
54int v9fs_uflags2omode(int uflags, int extended); 54int v9fs_uflags2omode(int uflags, int extended);
55
56ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 97d3aed57983..6fcb1e7095cf 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -38,7 +38,6 @@
38 38
39#include "v9fs.h" 39#include "v9fs.h"
40#include "v9fs_vfs.h" 40#include "v9fs_vfs.h"
41#include "fid.h"
42 41
43/** 42/**
44 * v9fs_vfs_readpage - read an entire page in from 9P 43 * v9fs_vfs_readpage - read an entire page in from 9P
@@ -53,14 +52,12 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page)
53 int retval; 52 int retval;
54 loff_t offset; 53 loff_t offset;
55 char *buffer; 54 char *buffer;
56 struct p9_fid *fid;
57 55
58 P9_DPRINTK(P9_DEBUG_VFS, "\n"); 56 P9_DPRINTK(P9_DEBUG_VFS, "\n");
59 fid = filp->private_data;
60 buffer = kmap(page); 57 buffer = kmap(page);
61 offset = page_offset(page); 58 offset = page_offset(page);
62 59
63 retval = p9_client_readn(fid, buffer, offset, PAGE_CACHE_SIZE); 60 retval = v9fs_file_readn(filp, buffer, NULL, offset, PAGE_CACHE_SIZE);
64 if (retval < 0) 61 if (retval < 0)
65 goto done; 62 goto done;
66 63
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index e298fe194093..873cd31baa47 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -45,7 +45,7 @@
45 * 45 *
46 */ 46 */
47 47
48static inline int dt_type(struct p9_stat *mistat) 48static inline int dt_type(struct p9_wstat *mistat)
49{ 49{
50 unsigned long perm = mistat->mode; 50 unsigned long perm = mistat->mode;
51 int rettype = DT_REG; 51 int rettype = DT_REG;
@@ -69,32 +69,58 @@ static inline int dt_type(struct p9_stat *mistat)
69static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) 69static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
70{ 70{
71 int over; 71 int over;
72 struct p9_wstat st;
73 int err;
72 struct p9_fid *fid; 74 struct p9_fid *fid;
73 struct v9fs_session_info *v9ses; 75 int buflen;
74 struct inode *inode; 76 char *statbuf;
75 struct p9_stat *st; 77 int n, i = 0;
76 78
77 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); 79 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
78 inode = filp->f_path.dentry->d_inode;
79 v9ses = v9fs_inode2v9ses(inode);
80 fid = filp->private_data; 80 fid = filp->private_data;
81 while ((st = p9_client_dirread(fid, filp->f_pos)) != NULL) {
82 if (IS_ERR(st))
83 return PTR_ERR(st);
84 81
85 over = filldir(dirent, st->name.str, st->name.len, filp->f_pos, 82 buflen = fid->clnt->msize - P9_IOHDRSZ;
86 v9fs_qid2ino(&st->qid), dt_type(st)); 83 statbuf = kmalloc(buflen, GFP_KERNEL);
84 if (!statbuf)
85 return -ENOMEM;
87 86
88 if (over) 87 while (1) {
88 err = v9fs_file_readn(filp, statbuf, NULL, buflen,
89 fid->rdir_fpos);
90 if (err <= 0)
89 break; 91 break;
90 92
91 filp->f_pos += st->size; 93 n = err;
92 kfree(st); 94 while (i < n) {
93 st = NULL; 95 err = p9stat_read(statbuf + i, buflen-i, &st,
96 fid->clnt->dotu);
97 if (err) {
98 P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
99 err = -EIO;
100 p9stat_free(&st);
101 goto free_and_exit;
102 }
103
104 i += st.size+2;
105 fid->rdir_fpos += st.size+2;
106
107 over = filldir(dirent, st.name, strlen(st.name),
108 filp->f_pos, v9fs_qid2ino(&st.qid), dt_type(&st));
109
110 filp->f_pos += st.size+2;
111
112 p9stat_free(&st);
113
114 if (over) {
115 err = 0;
116 goto free_and_exit;
117 }
118 }
94 } 119 }
95 120
96 kfree(st); 121free_and_exit:
97 return 0; 122 kfree(statbuf);
123 return err;
98} 124}
99 125
100 126
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 52944d2249a4..041c52692284 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -120,23 +120,72 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
120} 120}
121 121
122/** 122/**
123 * v9fs_file_read - read from a file 123 * v9fs_file_readn - read from a file
124 * @filp: file pointer to read 124 * @filp: file pointer to read
125 * @data: data buffer to read data into 125 * @data: data buffer to read data into
126 * @udata: user data buffer to read data into
126 * @count: size of buffer 127 * @count: size of buffer
127 * @offset: offset at which to read data 128 * @offset: offset at which to read data
128 * 129 *
129 */ 130 */
131
132ssize_t
133v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
134 u64 offset)
135{
136 int n, total;
137 struct p9_fid *fid = filp->private_data;
138
139 P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid,
140 (long long unsigned) offset, count);
141
142 n = 0;
143 total = 0;
144 do {
145 n = p9_client_read(fid, data, udata, offset, count);
146 if (n <= 0)
147 break;
148
149 if (data)
150 data += n;
151 if (udata)
152 udata += n;
153
154 offset += n;
155 count -= n;
156 total += n;
157 } while (count > 0 && n == (fid->clnt->msize - P9_IOHDRSZ));
158
159 if (n < 0)
160 total = n;
161
162 return total;
163}
164
165/**
166 * v9fs_file_read - read from a file
167 * @filp: file pointer to read
168 * @udata: user data buffer to read data into
169 * @count: size of buffer
170 * @offset: offset at which to read data
171 *
172 */
173
130static ssize_t 174static ssize_t
131v9fs_file_read(struct file *filp, char __user * data, size_t count, 175v9fs_file_read(struct file *filp, char __user *udata, size_t count,
132 loff_t * offset) 176 loff_t * offset)
133{ 177{
134 int ret; 178 int ret;
135 struct p9_fid *fid; 179 struct p9_fid *fid;
136 180
137 P9_DPRINTK(P9_DEBUG_VFS, "\n"); 181 P9_DPRINTK(P9_DEBUG_VFS, "count %d offset %lld\n", count, *offset);
138 fid = filp->private_data; 182 fid = filp->private_data;
139 ret = p9_client_uread(fid, data, *offset, count); 183
184 if (count > (fid->clnt->msize - P9_IOHDRSZ))
185 ret = v9fs_file_readn(filp, NULL, udata, count, *offset);
186 else
187 ret = p9_client_read(fid, NULL, udata, *offset, count);
188
140 if (ret > 0) 189 if (ret > 0)
141 *offset += ret; 190 *offset += ret;
142 191
@@ -156,19 +205,38 @@ static ssize_t
156v9fs_file_write(struct file *filp, const char __user * data, 205v9fs_file_write(struct file *filp, const char __user * data,
157 size_t count, loff_t * offset) 206 size_t count, loff_t * offset)
158{ 207{
159 int ret; 208 int n, rsize, total = 0;
160 struct p9_fid *fid; 209 struct p9_fid *fid;
210 struct p9_client *clnt;
161 struct inode *inode = filp->f_path.dentry->d_inode; 211 struct inode *inode = filp->f_path.dentry->d_inode;
212 int origin = *offset;
162 213
163 P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data, 214 P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data,
164 (int)count, (int)*offset); 215 (int)count, (int)*offset);
165 216
166 fid = filp->private_data; 217 fid = filp->private_data;
167 ret = p9_client_uwrite(fid, data, *offset, count); 218 clnt = fid->clnt;
168 if (ret > 0) { 219
169 invalidate_inode_pages2_range(inode->i_mapping, *offset, 220 rsize = fid->iounit;
170 *offset+ret); 221 if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
171 *offset += ret; 222 rsize = clnt->msize - P9_IOHDRSZ;
223
224 do {
225 if (count < rsize)
226 rsize = count;
227
228 n = p9_client_write(fid, NULL, data+total, *offset+total,
229 rsize);
230 if (n <= 0)
231 break;
232 count -= n;
233 total += n;
234 } while (count > 0);
235
236 if (total > 0) {
237 invalidate_inode_pages2_range(inode->i_mapping, origin,
238 origin+total);
239 *offset += total;
172 } 240 }
173 241
174 if (*offset > inode->i_size) { 242 if (*offset > inode->i_size) {
@@ -176,7 +244,10 @@ v9fs_file_write(struct file *filp, const char __user * data,
176 inode->i_blocks = (inode->i_size + 512 - 1) >> 9; 244 inode->i_blocks = (inode->i_size + 512 - 1) >> 9;
177 } 245 }
178 246
179 return ret; 247 if (n < 0)
248 return n;
249
250 return total;
180} 251}
181 252
182static const struct file_operations v9fs_cached_file_operations = { 253static const struct file_operations v9fs_cached_file_operations = {
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index e83aa5ebe861..8314d3f43b71 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -334,7 +334,7 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
334{ 334{
335 int err, umode; 335 int err, umode;
336 struct inode *ret; 336 struct inode *ret;
337 struct p9_stat *st; 337 struct p9_wstat *st;
338 338
339 ret = NULL; 339 ret = NULL;
340 st = p9_client_stat(fid); 340 st = p9_client_stat(fid);
@@ -417,6 +417,8 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
417 struct p9_fid *dfid, *ofid, *fid; 417 struct p9_fid *dfid, *ofid, *fid;
418 struct inode *inode; 418 struct inode *inode;
419 419
420 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
421
420 err = 0; 422 err = 0;
421 ofid = NULL; 423 ofid = NULL;
422 fid = NULL; 424 fid = NULL;
@@ -424,6 +426,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
424 dfid = v9fs_fid_clone(dentry->d_parent); 426 dfid = v9fs_fid_clone(dentry->d_parent);
425 if (IS_ERR(dfid)) { 427 if (IS_ERR(dfid)) {
426 err = PTR_ERR(dfid); 428 err = PTR_ERR(dfid);
429 P9_DPRINTK(P9_DEBUG_VFS, "fid clone failed %d\n", err);
427 dfid = NULL; 430 dfid = NULL;
428 goto error; 431 goto error;
429 } 432 }
@@ -432,18 +435,22 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
432 ofid = p9_client_walk(dfid, 0, NULL, 1); 435 ofid = p9_client_walk(dfid, 0, NULL, 1);
433 if (IS_ERR(ofid)) { 436 if (IS_ERR(ofid)) {
434 err = PTR_ERR(ofid); 437 err = PTR_ERR(ofid);
438 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
435 ofid = NULL; 439 ofid = NULL;
436 goto error; 440 goto error;
437 } 441 }
438 442
439 err = p9_client_fcreate(ofid, name, perm, mode, extension); 443 err = p9_client_fcreate(ofid, name, perm, mode, extension);
440 if (err < 0) 444 if (err < 0) {
445 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_fcreate failed %d\n", err);
441 goto error; 446 goto error;
447 }
442 448
443 /* now walk from the parent so we can get unopened fid */ 449 /* now walk from the parent so we can get unopened fid */
444 fid = p9_client_walk(dfid, 1, &name, 0); 450 fid = p9_client_walk(dfid, 1, &name, 0);
445 if (IS_ERR(fid)) { 451 if (IS_ERR(fid)) {
446 err = PTR_ERR(fid); 452 err = PTR_ERR(fid);
453 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
447 fid = NULL; 454 fid = NULL;
448 goto error; 455 goto error;
449 } else 456 } else
@@ -453,6 +460,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
453 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 460 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
454 if (IS_ERR(inode)) { 461 if (IS_ERR(inode)) {
455 err = PTR_ERR(inode); 462 err = PTR_ERR(inode);
463 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
456 goto error; 464 goto error;
457 } 465 }
458 466
@@ -734,7 +742,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
734 int err; 742 int err;
735 struct v9fs_session_info *v9ses; 743 struct v9fs_session_info *v9ses;
736 struct p9_fid *fid; 744 struct p9_fid *fid;
737 struct p9_stat *st; 745 struct p9_wstat *st;
738 746
739 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); 747 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
740 err = -EPERM; 748 err = -EPERM;
@@ -815,10 +823,9 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
815 */ 823 */
816 824
817void 825void
818v9fs_stat2inode(struct p9_stat *stat, struct inode *inode, 826v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
819 struct super_block *sb) 827 struct super_block *sb)
820{ 828{
821 int n;
822 char ext[32]; 829 char ext[32];
823 struct v9fs_session_info *v9ses = sb->s_fs_info; 830 struct v9fs_session_info *v9ses = sb->s_fs_info;
824 831
@@ -842,11 +849,7 @@ v9fs_stat2inode(struct p9_stat *stat, struct inode *inode,
842 int major = -1; 849 int major = -1;
843 int minor = -1; 850 int minor = -1;
844 851
845 n = stat->extension.len; 852 strncpy(ext, stat->extension, sizeof(ext));
846 if (n > sizeof(ext)-1)
847 n = sizeof(ext)-1;
848 memmove(ext, stat->extension.str, n);
849 ext[n] = 0;
850 sscanf(ext, "%c %u %u", &type, &major, &minor); 853 sscanf(ext, "%c %u %u", &type, &major, &minor);
851 switch (type) { 854 switch (type) {
852 case 'c': 855 case 'c':
@@ -857,10 +860,11 @@ v9fs_stat2inode(struct p9_stat *stat, struct inode *inode,
857 break; 860 break;
858 default: 861 default:
859 P9_DPRINTK(P9_DEBUG_ERROR, 862 P9_DPRINTK(P9_DEBUG_ERROR,
860 "Unknown special type %c (%.*s)\n", type, 863 "Unknown special type %c %s\n", type,
861 stat->extension.len, stat->extension.str); 864 stat->extension);
862 }; 865 };
863 inode->i_rdev = MKDEV(major, minor); 866 inode->i_rdev = MKDEV(major, minor);
867 init_special_inode(inode, inode->i_mode, inode->i_rdev);
864 } else 868 } else
865 inode->i_rdev = 0; 869 inode->i_rdev = 0;
866 870
@@ -904,7 +908,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
904 908
905 struct v9fs_session_info *v9ses; 909 struct v9fs_session_info *v9ses;
906 struct p9_fid *fid; 910 struct p9_fid *fid;
907 struct p9_stat *st; 911 struct p9_wstat *st;
908 912
909 P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name); 913 P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
910 retval = -EPERM; 914 retval = -EPERM;
@@ -926,15 +930,10 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
926 } 930 }
927 931
928 /* copy extension buffer into buffer */ 932 /* copy extension buffer into buffer */
929 if (st->extension.len < buflen) 933 strncpy(buffer, st->extension, buflen);
930 buflen = st->extension.len + 1;
931
932 memmove(buffer, st->extension.str, buflen - 1);
933 buffer[buflen-1] = 0;
934 934
935 P9_DPRINTK(P9_DEBUG_VFS, 935 P9_DPRINTK(P9_DEBUG_VFS,
936 "%s -> %.*s (%s)\n", dentry->d_name.name, st->extension.len, 936 "%s -> %s (%s)\n", dentry->d_name.name, st->extension, buffer);
937 st->extension.str, buffer);
938 937
939 retval = buflen; 938 retval = buflen;
940 939
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index bf59c3960494..d6cb1a0ca724 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -111,7 +111,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
111 struct inode *inode = NULL; 111 struct inode *inode = NULL;
112 struct dentry *root = NULL; 112 struct dentry *root = NULL;
113 struct v9fs_session_info *v9ses = NULL; 113 struct v9fs_session_info *v9ses = NULL;
114 struct p9_stat *st = NULL; 114 struct p9_wstat *st = NULL;
115 int mode = S_IRWXUGO | S_ISVTX; 115 int mode = S_IRWXUGO | S_ISVTX;
116 uid_t uid = current->fsuid; 116 uid_t uid = current->fsuid;
117 gid_t gid = current->fsgid; 117 gid_t gid = current->fsgid;
@@ -161,10 +161,14 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
161 161
162 sb->s_root = root; 162 sb->s_root = root;
163 root->d_inode->i_ino = v9fs_qid2ino(&st->qid); 163 root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
164
164 v9fs_stat2inode(st, root->d_inode, sb); 165 v9fs_stat2inode(st, root->d_inode, sb);
166
165 v9fs_fid_add(root, fid); 167 v9fs_fid_add(root, fid);
168 p9stat_free(st);
166 kfree(st); 169 kfree(st);
167 170
171P9_DPRINTK(P9_DEBUG_VFS, " return simple set mount\n");
168 return simple_set_mnt(mnt, sb); 172 return simple_set_mnt(mnt, sb);
169 173
170release_sb: 174release_sb:
diff --git a/fs/Kconfig b/fs/Kconfig
index 9e9d70c02a07..e46297f020c1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -6,61 +6,9 @@ menu "File systems"
6 6
7if BLOCK 7if BLOCK
8 8
9config EXT2_FS 9source "fs/ext2/Kconfig"
10 tristate "Second extended fs support" 10source "fs/ext3/Kconfig"
11 help 11source "fs/ext4/Kconfig"
12 Ext2 is a standard Linux file system for hard disks.
13
14 To compile this file system support as a module, choose M here: the
15 module will be called ext2.
16
17 If unsure, say Y.
18
19config EXT2_FS_XATTR
20 bool "Ext2 extended attributes"
21 depends on EXT2_FS
22 help
23 Extended attributes are name:value pairs associated with inodes by
24 the kernel or by users (see the attr(5) manual page, or visit
25 <http://acl.bestbits.at/> for details).
26
27 If unsure, say N.
28
29config EXT2_FS_POSIX_ACL
30 bool "Ext2 POSIX Access Control Lists"
31 depends on EXT2_FS_XATTR
32 select FS_POSIX_ACL
33 help
34 Posix Access Control Lists (ACLs) support permissions for users and
35 groups beyond the owner/group/world scheme.
36
37 To learn more about Access Control Lists, visit the Posix ACLs for
38 Linux website <http://acl.bestbits.at/>.
39
40 If you don't know what Access Control Lists are, say N
41
42config EXT2_FS_SECURITY
43 bool "Ext2 Security Labels"
44 depends on EXT2_FS_XATTR
45 help
46 Security labels support alternative access control models
47 implemented by security modules like SELinux. This option
48 enables an extended attribute handler for file security
49 labels in the ext2 filesystem.
50
51 If you are not using a security module that requires using
52 extended attributes for file security labels, say N.
53
54config EXT2_FS_XIP
55 bool "Ext2 execute in place support"
56 depends on EXT2_FS && MMU
57 help
58 Execute in place can be used on memory-backed block devices. If you
59 enable this option, you can select to mount block devices which are
60 capable of this feature without using the page cache.
61
62 If you do not use a block device that is capable of using this,
63 or if unsure, say N.
64 12
65config FS_XIP 13config FS_XIP
66# execute in place 14# execute in place
@@ -68,218 +16,8 @@ config FS_XIP
68 depends on EXT2_FS_XIP 16 depends on EXT2_FS_XIP
69 default y 17 default y
70 18
71config EXT3_FS 19source "fs/jbd/Kconfig"
72 tristate "Ext3 journalling file system support" 20source "fs/jbd2/Kconfig"
73 select JBD
74 help
75 This is the journalling version of the Second extended file system
76 (often called ext3), the de facto standard Linux file system
77 (method to organize files on a storage device) for hard disks.
78
79 The journalling code included in this driver means you do not have
80 to run e2fsck (file system checker) on your file systems after a
81 crash. The journal keeps track of any changes that were being made
82 at the time the system crashed, and can ensure that your file system
83 is consistent without the need for a lengthy check.
84
85 Other than adding the journal to the file system, the on-disk format
86 of ext3 is identical to ext2. It is possible to freely switch
87 between using the ext3 driver and the ext2 driver, as long as the
88 file system has been cleanly unmounted, or e2fsck is run on the file
89 system.
90
91 To add a journal on an existing ext2 file system or change the
92 behavior of ext3 file systems, you can use the tune2fs utility ("man
93 tune2fs"). To modify attributes of files and directories on ext3
94 file systems, use chattr ("man chattr"). You need to be using
95 e2fsprogs version 1.20 or later in order to create ext3 journals
96 (available at <http://sourceforge.net/projects/e2fsprogs/>).
97
98 To compile this file system support as a module, choose M here: the
99 module will be called ext3.
100
101config EXT3_FS_XATTR
102 bool "Ext3 extended attributes"
103 depends on EXT3_FS
104 default y
105 help
106 Extended attributes are name:value pairs associated with inodes by
107 the kernel or by users (see the attr(5) manual page, or visit
108 <http://acl.bestbits.at/> for details).
109
110 If unsure, say N.
111
112 You need this for POSIX ACL support on ext3.
113
114config EXT3_FS_POSIX_ACL
115 bool "Ext3 POSIX Access Control Lists"
116 depends on EXT3_FS_XATTR
117 select FS_POSIX_ACL
118 help
119 Posix Access Control Lists (ACLs) support permissions for users and
120 groups beyond the owner/group/world scheme.
121
122 To learn more about Access Control Lists, visit the Posix ACLs for
123 Linux website <http://acl.bestbits.at/>.
124
125 If you don't know what Access Control Lists are, say N
126
127config EXT3_FS_SECURITY
128 bool "Ext3 Security Labels"
129 depends on EXT3_FS_XATTR
130 help
131 Security labels support alternative access control models
132 implemented by security modules like SELinux. This option
133 enables an extended attribute handler for file security
134 labels in the ext3 filesystem.
135
136 If you are not using a security module that requires using
137 extended attributes for file security labels, say N.
138
139config EXT4_FS
140 tristate "The Extended 4 (ext4) filesystem"
141 select JBD2
142 select CRC16
143 help
144 This is the next generation of the ext3 filesystem.
145
146 Unlike the change from ext2 filesystem to ext3 filesystem,
147 the on-disk format of ext4 is not forwards compatible with
148 ext3; it is based on extent maps and it supports 48-bit
149 physical block numbers. The ext4 filesystem also supports delayed
150 allocation, persistent preallocation, high resolution time stamps,
151 and a number of other features to improve performance and speed
152 up fsck time. For more information, please see the web pages at
153 http://ext4.wiki.kernel.org.
154
155 The ext4 filesystem will support mounting an ext3
156 filesystem; while there will be some performance gains from
157 the delayed allocation and inode table readahead, the best
158 performance gains will require enabling ext4 features in the
159 filesystem, or formating a new filesystem as an ext4
160 filesystem initially.
161
162 To compile this file system support as a module, choose M here. The
163 module will be called ext4dev.
164
165 If unsure, say N.
166
167config EXT4DEV_COMPAT
168 bool "Enable ext4dev compatibility"
169 depends on EXT4_FS
170 help
171 Starting with 2.6.28, the name of the ext4 filesystem was
172 renamed from ext4dev to ext4. Unfortunately there are some
173 legacy userspace programs (such as klibc's fstype) have
174 "ext4dev" hardcoded.
175
176 To enable backwards compatibility so that systems that are
177 still expecting to mount ext4 filesystems using ext4dev,
178 chose Y here. This feature will go away by 2.6.31, so
179 please arrange to get your userspace programs fixed!
180
181config EXT4_FS_XATTR
182 bool "Ext4 extended attributes"
183 depends on EXT4_FS
184 default y
185 help
186 Extended attributes are name:value pairs associated with inodes by
187 the kernel or by users (see the attr(5) manual page, or visit
188 <http://acl.bestbits.at/> for details).
189
190 If unsure, say N.
191
192 You need this for POSIX ACL support on ext4.
193
194config EXT4_FS_POSIX_ACL
195 bool "Ext4 POSIX Access Control Lists"
196 depends on EXT4_FS_XATTR
197 select FS_POSIX_ACL
198 help
199 POSIX Access Control Lists (ACLs) support permissions for users and
200 groups beyond the owner/group/world scheme.
201
202 To learn more about Access Control Lists, visit the POSIX ACLs for
203 Linux website <http://acl.bestbits.at/>.
204
205 If you don't know what Access Control Lists are, say N
206
207config EXT4_FS_SECURITY
208 bool "Ext4 Security Labels"
209 depends on EXT4_FS_XATTR
210 help
211 Security labels support alternative access control models
212 implemented by security modules like SELinux. This option
213 enables an extended attribute handler for file security
214 labels in the ext4 filesystem.
215
216 If you are not using a security module that requires using
217 extended attributes for file security labels, say N.
218
219config JBD
220 tristate
221 help
222 This is a generic journalling layer for block devices. It is
223 currently used by the ext3 file system, but it could also be
224 used to add journal support to other file systems or block
225 devices such as RAID or LVM.
226
227 If you are using the ext3 file system, you need to say Y here.
228 If you are not using ext3 then you will probably want to say N.
229
230 To compile this device as a module, choose M here: the module will be
231 called jbd. If you are compiling ext3 into the kernel, you
232 cannot compile this code as a module.
233
234config JBD_DEBUG
235 bool "JBD (ext3) debugging support"
236 depends on JBD && DEBUG_FS
237 help
238 If you are using the ext3 journaled file system (or potentially any
239 other file system/device using JBD), this option allows you to
240 enable debugging output while the system is running, in order to
241 help track down any problems you are having. By default the
242 debugging output will be turned off.
243
244 If you select Y here, then you will be able to turn on debugging
245 with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a
246 number between 1 and 5, the higher the number, the more debugging
247 output is generated. To turn debugging off again, do
248 "echo 0 > /sys/kernel/debug/jbd/jbd-debug".
249
250config JBD2
251 tristate
252 select CRC32
253 help
254 This is a generic journaling layer for block devices that support
255 both 32-bit and 64-bit block numbers. It is currently used by
256 the ext4 and OCFS2 filesystems, but it could also be used to add
257 journal support to other file systems or block devices such
258 as RAID or LVM.
259
260 If you are using ext4 or OCFS2, you need to say Y here.
261 If you are not using ext4 or OCFS2 then you will
262 probably want to say N.
263
264 To compile this device as a module, choose M here. The module will be
265 called jbd2. If you are compiling ext4 or OCFS2 into the kernel,
266 you cannot compile this code as a module.
267
268config JBD2_DEBUG
269 bool "JBD2 (ext4) debugging support"
270 depends on JBD2 && DEBUG_FS
271 help
272 If you are using the ext4 journaled file system (or
273 potentially any other filesystem/device using JBD2), this option
274 allows you to enable debugging output while the system is running,
275 in order to help track down any problems you are having.
276 By default, the debugging output will be turned off.
277
278 If you select Y here, then you will be able to turn on debugging
279 with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a
280 number between 1 and 5. The higher the number, the more debugging
281 output is generated. To turn debugging off again, do
282 "echo 0 > /sys/kernel/debug/jbd2/jbd2-debug".
283 21
284config FS_MBCACHE 22config FS_MBCACHE
285# Meta block cache for Extended Attributes (ext2/ext3/ext4) 23# Meta block cache for Extended Attributes (ext2/ext3/ext4)
@@ -665,7 +403,7 @@ config AUTOFS4_FS
665 N here. 403 N here.
666 404
667config FUSE_FS 405config FUSE_FS
668 tristate "Filesystem in Userspace support" 406 tristate "FUSE (Filesystem in Userspace) support"
669 help 407 help
670 With FUSE it is possible to implement a fully functional filesystem 408 With FUSE it is possible to implement a fully functional filesystem
671 in a userspace program. 409 in a userspace program.
@@ -1168,195 +906,7 @@ config EFS_FS
1168 To compile the EFS file system support as a module, choose M here: the 906 To compile the EFS file system support as a module, choose M here: the
1169 module will be called efs. 907 module will be called efs.
1170 908
1171config JFFS2_FS 909source "fs/jffs2/Kconfig"
1172 tristate "Journalling Flash File System v2 (JFFS2) support"
1173 select CRC32
1174 depends on MTD
1175 help
1176 JFFS2 is the second generation of the Journalling Flash File System
1177 for use on diskless embedded devices. It provides improved wear
1178 levelling, compression and support for hard links. You cannot use
1179 this on normal block devices, only on 'MTD' devices.
1180
1181 Further information on the design and implementation of JFFS2 is
1182 available at <http://sources.redhat.com/jffs2/>.
1183
1184config JFFS2_FS_DEBUG
1185 int "JFFS2 debugging verbosity (0 = quiet, 2 = noisy)"
1186 depends on JFFS2_FS
1187 default "0"
1188 help
1189 This controls the amount of debugging messages produced by the JFFS2
1190 code. Set it to zero for use in production systems. For evaluation,
1191 testing and debugging, it's advisable to set it to one. This will
1192 enable a few assertions and will print debugging messages at the
1193 KERN_DEBUG loglevel, where they won't normally be visible. Level 2
1194 is unlikely to be useful - it enables extra debugging in certain
1195 areas which at one point needed debugging, but when the bugs were
1196 located and fixed, the detailed messages were relegated to level 2.
1197
1198 If reporting bugs, please try to have available a full dump of the
1199 messages at debug level 1 while the misbehaviour was occurring.
1200
1201config JFFS2_FS_WRITEBUFFER
1202 bool "JFFS2 write-buffering support"
1203 depends on JFFS2_FS
1204 default y
1205 help
1206 This enables the write-buffering support in JFFS2.
1207
1208 This functionality is required to support JFFS2 on the following
1209 types of flash devices:
1210 - NAND flash
1211 - NOR flash with transparent ECC
1212 - DataFlash
1213
1214config JFFS2_FS_WBUF_VERIFY
1215 bool "Verify JFFS2 write-buffer reads"
1216 depends on JFFS2_FS_WRITEBUFFER
1217 default n
1218 help
1219 This causes JFFS2 to read back every page written through the
1220 write-buffer, and check for errors.
1221
1222config JFFS2_SUMMARY
1223 bool "JFFS2 summary support (EXPERIMENTAL)"
1224 depends on JFFS2_FS && EXPERIMENTAL
1225 default n
1226 help
1227 This feature makes it possible to use summary information
1228 for faster filesystem mount.
1229
1230 The summary information can be inserted into a filesystem image
1231 by the utility 'sumtool'.
1232
1233 If unsure, say 'N'.
1234
1235config JFFS2_FS_XATTR
1236 bool "JFFS2 XATTR support (EXPERIMENTAL)"
1237 depends on JFFS2_FS && EXPERIMENTAL
1238 default n
1239 help
1240 Extended attributes are name:value pairs associated with inodes by
1241 the kernel or by users (see the attr(5) manual page, or visit
1242 <http://acl.bestbits.at/> for details).
1243
1244 If unsure, say N.
1245
1246config JFFS2_FS_POSIX_ACL
1247 bool "JFFS2 POSIX Access Control Lists"
1248 depends on JFFS2_FS_XATTR
1249 default y
1250 select FS_POSIX_ACL
1251 help
1252 Posix Access Control Lists (ACLs) support permissions for users and
1253 groups beyond the owner/group/world scheme.
1254
1255 To learn more about Access Control Lists, visit the Posix ACLs for
1256 Linux website <http://acl.bestbits.at/>.
1257
1258 If you don't know what Access Control Lists are, say N
1259
1260config JFFS2_FS_SECURITY
1261 bool "JFFS2 Security Labels"
1262 depends on JFFS2_FS_XATTR
1263 default y
1264 help
1265 Security labels support alternative access control models
1266 implemented by security modules like SELinux. This option
1267 enables an extended attribute handler for file security
1268 labels in the jffs2 filesystem.
1269
1270 If you are not using a security module that requires using
1271 extended attributes for file security labels, say N.
1272
1273config JFFS2_COMPRESSION_OPTIONS
1274 bool "Advanced compression options for JFFS2"
1275 depends on JFFS2_FS
1276 default n
1277 help
1278 Enabling this option allows you to explicitly choose which
1279 compression modules, if any, are enabled in JFFS2. Removing
1280 compressors can mean you cannot read existing file systems,
1281 and enabling experimental compressors can mean that you
1282 write a file system which cannot be read by a standard kernel.
1283
1284 If unsure, you should _definitely_ say 'N'.
1285
1286config JFFS2_ZLIB
1287 bool "JFFS2 ZLIB compression support" if JFFS2_COMPRESSION_OPTIONS
1288 select ZLIB_INFLATE
1289 select ZLIB_DEFLATE
1290 depends on JFFS2_FS
1291 default y
1292 help
1293 Zlib is designed to be a free, general-purpose, legally unencumbered,
1294 lossless data-compression library for use on virtually any computer
1295 hardware and operating system. See <http://www.gzip.org/zlib/> for
1296 further information.
1297
1298 Say 'Y' if unsure.
1299
1300config JFFS2_LZO
1301 bool "JFFS2 LZO compression support" if JFFS2_COMPRESSION_OPTIONS
1302 select LZO_COMPRESS
1303 select LZO_DECOMPRESS
1304 depends on JFFS2_FS
1305 default n
1306 help
1307 minilzo-based compression. Generally works better than Zlib.
1308
1309 This feature was added in July, 2007. Say 'N' if you need
1310 compatibility with older bootloaders or kernels.
1311
1312config JFFS2_RTIME
1313 bool "JFFS2 RTIME compression support" if JFFS2_COMPRESSION_OPTIONS
1314 depends on JFFS2_FS
1315 default y
1316 help
1317 Rtime does manage to recompress already-compressed data. Say 'Y' if unsure.
1318
1319config JFFS2_RUBIN
1320 bool "JFFS2 RUBIN compression support" if JFFS2_COMPRESSION_OPTIONS
1321 depends on JFFS2_FS
1322 default n
1323 help
1324 RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure.
1325
1326choice
1327 prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS
1328 default JFFS2_CMODE_PRIORITY
1329 depends on JFFS2_FS
1330 help
1331 You can set here the default compression mode of JFFS2 from
1332 the available compression modes. Don't touch if unsure.
1333
1334config JFFS2_CMODE_NONE
1335 bool "no compression"
1336 help
1337 Uses no compression.
1338
1339config JFFS2_CMODE_PRIORITY
1340 bool "priority"
1341 help
1342 Tries the compressors in a predefined order and chooses the first
1343 successful one.
1344
1345config JFFS2_CMODE_SIZE
1346 bool "size (EXPERIMENTAL)"
1347 help
1348 Tries all compressors and chooses the one which has the smallest
1349 result.
1350
1351config JFFS2_CMODE_FAVOURLZO
1352 bool "Favour LZO"
1353 help
1354 Tries all compressors and chooses the one which has the smallest
1355 result but gives some preference to LZO (which has faster
1356 decompression) at the expense of size.
1357
1358endchoice
1359
1360# UBIFS File system configuration 910# UBIFS File system configuration
1361source "fs/ubifs/Kconfig" 911source "fs/ubifs/Kconfig"
1362 912
@@ -1913,148 +1463,7 @@ config SMB_NLS_REMOTE
1913 1463
1914 smbmount from samba 2.2.0 or later supports this. 1464 smbmount from samba 2.2.0 or later supports this.
1915 1465
1916config CIFS 1466source "fs/cifs/Kconfig"
1917 tristate "CIFS support (advanced network filesystem, SMBFS successor)"
1918 depends on INET
1919 select NLS
1920 help
1921 This is the client VFS module for the Common Internet File System
1922 (CIFS) protocol which is the successor to the Server Message Block
1923 (SMB) protocol, the native file sharing mechanism for most early
1924 PC operating systems. The CIFS protocol is fully supported by
1925 file servers such as Windows 2000 (including Windows 2003, NT 4
1926 and Windows XP) as well by Samba (which provides excellent CIFS
1927 server support for Linux and many other operating systems). Limited
1928 support for OS/2 and Windows ME and similar servers is provided as
1929 well.
1930
1931 The cifs module provides an advanced network file system
1932 client for mounting to CIFS compliant servers. It includes
1933 support for DFS (hierarchical name space), secure per-user
1934 session establishment via Kerberos or NTLM or NTLMv2,
1935 safe distributed caching (oplock), optional packet
1936 signing, Unicode and other internationalization improvements.
1937 If you need to mount to Samba or Windows from this machine, say Y.
1938
1939config CIFS_STATS
1940 bool "CIFS statistics"
1941 depends on CIFS
1942 help
1943 Enabling this option will cause statistics for each server share
1944 mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
1945
1946config CIFS_STATS2
1947 bool "Extended statistics"
1948 depends on CIFS_STATS
1949 help
1950 Enabling this option will allow more detailed statistics on SMB
1951 request timing to be displayed in /proc/fs/cifs/DebugData and also
1952 allow optional logging of slow responses to dmesg (depending on the
1953 value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details).
1954 These additional statistics may have a minor effect on performance
1955 and memory utilization.
1956
1957 Unless you are a developer or are doing network performance analysis
1958 or tuning, say N.
1959
1960config CIFS_WEAK_PW_HASH
1961 bool "Support legacy servers which use weaker LANMAN security"
1962 depends on CIFS
1963 help
1964 Modern CIFS servers including Samba and most Windows versions
1965 (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
1966 security mechanisms. These hash the password more securely
1967 than the mechanisms used in the older LANMAN version of the
1968 SMB protocol but LANMAN based authentication is needed to
1969 establish sessions with some old SMB servers.
1970
1971 Enabling this option allows the cifs module to mount to older
1972 LANMAN based servers such as OS/2 and Windows 95, but such
1973 mounts may be less secure than mounts using NTLM or more recent
1974 security mechanisms if you are on a public network. Unless you
1975 have a need to access old SMB servers (and are on a private
1976 network) you probably want to say N. Even if this support
1977 is enabled in the kernel build, LANMAN authentication will not be
1978 used automatically. At runtime LANMAN mounts are disabled but
1979 can be set to required (or optional) either in
1980 /proc/fs/cifs (see fs/cifs/README for more detail) or via an
1981 option on the mount command. This support is disabled by
1982 default in order to reduce the possibility of a downgrade
1983 attack.
1984
1985 If unsure, say N.
1986
1987config CIFS_UPCALL
1988 bool "Kerberos/SPNEGO advanced session setup"
1989 depends on CIFS && KEYS
1990 help
1991 Enables an upcall mechanism for CIFS which accesses
1992 userspace helper utilities to provide SPNEGO packaged (RFC 4178)
1993 Kerberos tickets which are needed to mount to certain secure servers
1994 (for which more secure Kerberos authentication is required). If
1995 unsure, say N.
1996
1997config CIFS_XATTR
1998 bool "CIFS extended attributes"
1999 depends on CIFS
2000 help
2001 Extended attributes are name:value pairs associated with inodes by
2002 the kernel or by users (see the attr(5) manual page, or visit
2003 <http://acl.bestbits.at/> for details). CIFS maps the name of
2004 extended attributes beginning with the user namespace prefix
2005 to SMB/CIFS EAs. EAs are stored on Windows servers without the
2006 user namespace prefix, but their names are seen by Linux cifs clients
2007 prefaced by the user namespace prefix. The system namespace
2008 (used by some filesystems to store ACLs) is not supported at
2009 this time.
2010
2011 If unsure, say N.
2012
2013config CIFS_POSIX
2014 bool "CIFS POSIX Extensions"
2015 depends on CIFS_XATTR
2016 help
2017 Enabling this option will cause the cifs client to attempt to
2018 negotiate a newer dialect with servers, such as Samba 3.0.5
2019 or later, that optionally can handle more POSIX like (rather
2020 than Windows like) file behavior. It also enables
2021 support for POSIX ACLs (getfacl and setfacl) to servers
2022 (such as Samba 3.10 and later) which can negotiate
2023 CIFS POSIX ACL support. If unsure, say N.
2024
2025config CIFS_DEBUG2
2026 bool "Enable additional CIFS debugging routines"
2027 depends on CIFS
2028 help
2029 Enabling this option adds a few more debugging routines
2030 to the cifs code which slightly increases the size of
2031 the cifs module and can cause additional logging of debug
2032 messages in some error paths, slowing performance. This
2033 option can be turned off unless you are debugging
2034 cifs problems. If unsure, say N.
2035
2036config CIFS_EXPERIMENTAL
2037 bool "CIFS Experimental Features (EXPERIMENTAL)"
2038 depends on CIFS && EXPERIMENTAL
2039 help
2040 Enables cifs features under testing. These features are
2041 experimental and currently include DFS support and directory
2042 change notification ie fcntl(F_DNOTIFY), as well as the upcall
2043 mechanism which will be used for Kerberos session negotiation
2044 and uid remapping. Some of these features also may depend on
2045 setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
2046 (which is disabled by default). See the file fs/cifs/README
2047 for more details. If unsure, say N.
2048
2049config CIFS_DFS_UPCALL
2050 bool "DFS feature support (EXPERIMENTAL)"
2051 depends on CIFS_EXPERIMENTAL
2052 depends on KEYS
2053 help
2054 Enables an upcall mechanism for CIFS which contacts userspace
2055 helper utilities to provide server name resolution (host names to
2056 IP addresses) which is needed for implicit mounts of DFS junction
2057 points. If unsure, say N.
2058 1467
2059config NCP_FS 1468config NCP_FS
2060 tristate "NCP file system support (to mount NetWare volumes)" 1469 tristate "NCP file system support (to mount NetWare volumes)"
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 17c9c5ec14c5..ce9fb3fbfae4 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -25,7 +25,7 @@ config BINFMT_ELF
25 25
26config COMPAT_BINFMT_ELF 26config COMPAT_BINFMT_ELF
27 bool 27 bool
28 depends on COMPAT && MMU 28 depends on COMPAT && BINFMT_ELF
29 29
30config BINFMT_ELF_FDPIC 30config BINFMT_ELF_FDPIC
31 bool "Kernel support for FDPIC ELF binaries" 31 bool "Kernel support for FDPIC ELF binaries"
@@ -40,6 +40,28 @@ config BINFMT_ELF_FDPIC
40 40
41 It is also possible to run FDPIC ELF binaries on MMU linux also. 41 It is also possible to run FDPIC ELF binaries on MMU linux also.
42 42
43config CORE_DUMP_DEFAULT_ELF_HEADERS
44 bool "Write ELF core dumps with partial segments"
45 default n
46 depends on BINFMT_ELF
47 help
48 ELF core dump files describe each memory mapping of the crashed
49 process, and can contain or omit the memory contents of each one.
50 The contents of an unmodified text mapping are omitted by default.
51
52 For an unmodified text mapping of an ELF object, including just
53 the first page of the file in a core dump makes it possible to
54 identify the build ID bits in the file, without paying the i/o
55 cost and disk space to dump all the text. However, versions of
56 GDB before 6.7 are confused by ELF core dump files in this format.
57
58 The core dump behavior can be controlled per process using
59 the /proc/PID/coredump_filter pseudo-file; this setting is
60 inherited. See Documentation/filesystems/proc.txt for details.
61
62 This config option changes the default setting of coredump_filter
63 seen at boot time. If unsure, say N.
64
43config BINFMT_FLAT 65config BINFMT_FLAT
44 bool "Kernel support for flat binaries" 66 bool "Kernel support for flat binaries"
45 depends on !MMU && (!FRV || BROKEN) 67 depends on !MMU && (!FRV || BROKEN)
diff --git a/fs/Makefile b/fs/Makefile
index b6f27dc26b72..2168c902d5ca 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -8,7 +8,7 @@
8obj-y := open.o read_write.o file_table.o super.o \ 8obj-y := open.o read_write.o file_table.o super.o \
9 char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ 9 char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
10 ioctl.o readdir.o select.o fifo.o dcache.o inode.o \ 10 ioctl.o readdir.o select.o fifo.o dcache.o inode.o \
11 attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ 11 attr.o bad_inode.o file.o filesystems.o namespace.o \
12 seq_file.o xattr.o libfs.o fs-writeback.o \ 12 seq_file.o xattr.o libfs.o fs-writeback.o \
13 pnode.o drop_caches.o splice.o sync.o utimes.o \ 13 pnode.o drop_caches.o splice.o sync.o utimes.o \
14 stack.o 14 stack.o
@@ -27,6 +27,7 @@ obj-$(CONFIG_ANON_INODES) += anon_inodes.o
27obj-$(CONFIG_SIGNALFD) += signalfd.o 27obj-$(CONFIG_SIGNALFD) += signalfd.o
28obj-$(CONFIG_TIMERFD) += timerfd.o 28obj-$(CONFIG_TIMERFD) += timerfd.o
29obj-$(CONFIG_EVENTFD) += eventfd.o 29obj-$(CONFIG_EVENTFD) += eventfd.o
30obj-$(CONFIG_AIO) += aio.o
30obj-$(CONFIG_FILE_LOCKING) += locks.o 31obj-$(CONFIG_FILE_LOCKING) += locks.o
31obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o 32obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o
32 33
@@ -70,7 +71,7 @@ obj-$(CONFIG_DLM) += dlm/
70# Do not add any filesystems before this line 71# Do not add any filesystems before this line
71obj-$(CONFIG_REISERFS_FS) += reiserfs/ 72obj-$(CONFIG_REISERFS_FS) += reiserfs/
72obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 73obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3
73obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4dev 74obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4
74obj-$(CONFIG_JBD) += jbd/ 75obj-$(CONFIG_JBD) += jbd/
75obj-$(CONFIG_JBD2) += jbd2/ 76obj-$(CONFIG_JBD2) += jbd2/
76obj-$(CONFIG_EXT2_FS) += ext2/ 77obj-$(CONFIG_EXT2_FS) += ext2/
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 525f7c56e068..a3901769a96c 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -50,8 +50,8 @@ const struct address_space_operations afs_fs_aops = {
50 .launder_page = afs_launder_page, 50 .launder_page = afs_launder_page,
51 .releasepage = afs_releasepage, 51 .releasepage = afs_releasepage,
52 .invalidatepage = afs_invalidatepage, 52 .invalidatepage = afs_invalidatepage,
53 .prepare_write = afs_prepare_write, 53 .write_begin = afs_write_begin,
54 .commit_write = afs_commit_write, 54 .write_end = afs_write_end,
55 .writepage = afs_writepage, 55 .writepage = afs_writepage,
56 .writepages = afs_writepages, 56 .writepages = afs_writepages,
57}; 57};
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 3cb6920ff30b..67f259d99cd6 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -728,8 +728,12 @@ extern int afs_volume_release_fileserver(struct afs_vnode *,
728 */ 728 */
729extern int afs_set_page_dirty(struct page *); 729extern int afs_set_page_dirty(struct page *);
730extern void afs_put_writeback(struct afs_writeback *); 730extern void afs_put_writeback(struct afs_writeback *);
731extern int afs_prepare_write(struct file *, struct page *, unsigned, unsigned); 731extern int afs_write_begin(struct file *file, struct address_space *mapping,
732extern int afs_commit_write(struct file *, struct page *, unsigned, unsigned); 732 loff_t pos, unsigned len, unsigned flags,
733 struct page **pagep, void **fsdata);
734extern int afs_write_end(struct file *file, struct address_space *mapping,
735 loff_t pos, unsigned len, unsigned copied,
736 struct page *page, void *fsdata);
733extern int afs_writepage(struct page *, struct writeback_control *); 737extern int afs_writepage(struct page *, struct writeback_control *);
734extern int afs_writepages(struct address_space *, struct writeback_control *); 738extern int afs_writepages(struct address_space *, struct writeback_control *);
735extern int afs_write_inode(struct inode *, int); 739extern int afs_write_inode(struct inode *, int);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 065b4e10681a..d6b85dab35fc 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -84,15 +84,23 @@ void afs_put_writeback(struct afs_writeback *wb)
84 * partly or wholly fill a page that's under preparation for writing 84 * partly or wholly fill a page that's under preparation for writing
85 */ 85 */
86static int afs_fill_page(struct afs_vnode *vnode, struct key *key, 86static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
87 unsigned start, unsigned len, struct page *page) 87 loff_t pos, unsigned len, struct page *page)
88{ 88{
89 loff_t i_size;
90 unsigned eof;
89 int ret; 91 int ret;
90 92
91 _enter(",,%u,%u", start, len); 93 _enter(",,%llu,%u", (unsigned long long)pos, len);
92 94
93 ASSERTCMP(start + len, <=, PAGE_SIZE); 95 ASSERTCMP(len, <=, PAGE_CACHE_SIZE);
94 96
95 ret = afs_vnode_fetch_data(vnode, key, start, len, page); 97 i_size = i_size_read(&vnode->vfs_inode);
98 if (pos + len > i_size)
99 eof = i_size;
100 else
101 eof = PAGE_CACHE_SIZE;
102
103 ret = afs_vnode_fetch_data(vnode, key, 0, eof, page);
96 if (ret < 0) { 104 if (ret < 0) {
97 if (ret == -ENOENT) { 105 if (ret == -ENOENT) {
98 _debug("got NOENT from server" 106 _debug("got NOENT from server"
@@ -107,109 +115,55 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
107} 115}
108 116
109/* 117/*
110 * prepare a page for being written to
111 */
112static int afs_prepare_page(struct afs_vnode *vnode, struct page *page,
113 struct key *key, unsigned offset, unsigned to)
114{
115 unsigned eof, tail, start, stop, len;
116 loff_t i_size, pos;
117 void *p;
118 int ret;
119
120 _enter("");
121
122 if (offset == 0 && to == PAGE_SIZE)
123 return 0;
124
125 p = kmap_atomic(page, KM_USER0);
126
127 i_size = i_size_read(&vnode->vfs_inode);
128 pos = (loff_t) page->index << PAGE_SHIFT;
129 if (pos >= i_size) {
130 /* partial write, page beyond EOF */
131 _debug("beyond");
132 if (offset > 0)
133 memset(p, 0, offset);
134 if (to < PAGE_SIZE)
135 memset(p + to, 0, PAGE_SIZE - to);
136 kunmap_atomic(p, KM_USER0);
137 return 0;
138 }
139
140 if (i_size - pos >= PAGE_SIZE) {
141 /* partial write, page entirely before EOF */
142 _debug("before");
143 tail = eof = PAGE_SIZE;
144 } else {
145 /* partial write, page overlaps EOF */
146 eof = i_size - pos;
147 _debug("overlap %u", eof);
148 tail = max(eof, to);
149 if (tail < PAGE_SIZE)
150 memset(p + tail, 0, PAGE_SIZE - tail);
151 if (offset > eof)
152 memset(p + eof, 0, PAGE_SIZE - eof);
153 }
154
155 kunmap_atomic(p, KM_USER0);
156
157 ret = 0;
158 if (offset > 0 || eof > to) {
159 /* need to fill one or two bits that aren't going to be written
160 * (cover both fillers in one read if there are two) */
161 start = (offset > 0) ? 0 : to;
162 stop = (eof > to) ? eof : offset;
163 len = stop - start;
164 _debug("wr=%u-%u av=0-%u rd=%u@%u",
165 offset, to, eof, start, len);
166 ret = afs_fill_page(vnode, key, start, len, page);
167 }
168
169 _leave(" = %d", ret);
170 return ret;
171}
172
173/*
174 * prepare to perform part of a write to a page 118 * prepare to perform part of a write to a page
175 * - the caller holds the page locked, preventing it from being written out or
176 * modified by anyone else
177 */ 119 */
178int afs_prepare_write(struct file *file, struct page *page, 120int afs_write_begin(struct file *file, struct address_space *mapping,
179 unsigned offset, unsigned to) 121 loff_t pos, unsigned len, unsigned flags,
122 struct page **pagep, void **fsdata)
180{ 123{
181 struct afs_writeback *candidate, *wb; 124 struct afs_writeback *candidate, *wb;
182 struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); 125 struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
126 struct page *page;
183 struct key *key = file->private_data; 127 struct key *key = file->private_data;
184 pgoff_t index; 128 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
129 unsigned to = from + len;
130 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
185 int ret; 131 int ret;
186 132
187 _enter("{%x:%u},{%lx},%u,%u", 133 _enter("{%x:%u},{%lx},%u,%u",
188 vnode->fid.vid, vnode->fid.vnode, page->index, offset, to); 134 vnode->fid.vid, vnode->fid.vnode, index, from, to);
189 135
190 candidate = kzalloc(sizeof(*candidate), GFP_KERNEL); 136 candidate = kzalloc(sizeof(*candidate), GFP_KERNEL);
191 if (!candidate) 137 if (!candidate)
192 return -ENOMEM; 138 return -ENOMEM;
193 candidate->vnode = vnode; 139 candidate->vnode = vnode;
194 candidate->first = candidate->last = page->index; 140 candidate->first = candidate->last = index;
195 candidate->offset_first = offset; 141 candidate->offset_first = from;
196 candidate->to_last = to; 142 candidate->to_last = to;
197 candidate->usage = 1; 143 candidate->usage = 1;
198 candidate->state = AFS_WBACK_PENDING; 144 candidate->state = AFS_WBACK_PENDING;
199 init_waitqueue_head(&candidate->waitq); 145 init_waitqueue_head(&candidate->waitq);
200 146
147 page = __grab_cache_page(mapping, index);
148 if (!page) {
149 kfree(candidate);
150 return -ENOMEM;
151 }
152 *pagep = page;
153 /* page won't leak in error case: it eventually gets cleaned off LRU */
154
201 if (!PageUptodate(page)) { 155 if (!PageUptodate(page)) {
202 _debug("not up to date"); 156 _debug("not up to date");
203 ret = afs_prepare_page(vnode, page, key, offset, to); 157 ret = afs_fill_page(vnode, key, pos, len, page);
204 if (ret < 0) { 158 if (ret < 0) {
205 kfree(candidate); 159 kfree(candidate);
206 _leave(" = %d [prep]", ret); 160 _leave(" = %d [prep]", ret);
207 return ret; 161 return ret;
208 } 162 }
163 SetPageUptodate(page);
209 } 164 }
210 165
211try_again: 166try_again:
212 index = page->index;
213 spin_lock(&vnode->writeback_lock); 167 spin_lock(&vnode->writeback_lock);
214 168
215 /* see if this page is already pending a writeback under a suitable key 169 /* see if this page is already pending a writeback under a suitable key
@@ -242,8 +196,8 @@ try_again:
242subsume_in_current_wb: 196subsume_in_current_wb:
243 _debug("subsume"); 197 _debug("subsume");
244 ASSERTRANGE(wb->first, <=, index, <=, wb->last); 198 ASSERTRANGE(wb->first, <=, index, <=, wb->last);
245 if (index == wb->first && offset < wb->offset_first) 199 if (index == wb->first && from < wb->offset_first)
246 wb->offset_first = offset; 200 wb->offset_first = from;
247 if (index == wb->last && to > wb->to_last) 201 if (index == wb->last && to > wb->to_last)
248 wb->to_last = to; 202 wb->to_last = to;
249 spin_unlock(&vnode->writeback_lock); 203 spin_unlock(&vnode->writeback_lock);
@@ -289,17 +243,17 @@ flush_conflicting_wb:
289/* 243/*
290 * finalise part of a write to a page 244 * finalise part of a write to a page
291 */ 245 */
292int afs_commit_write(struct file *file, struct page *page, 246int afs_write_end(struct file *file, struct address_space *mapping,
293 unsigned offset, unsigned to) 247 loff_t pos, unsigned len, unsigned copied,
248 struct page *page, void *fsdata)
294{ 249{
295 struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); 250 struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
296 loff_t i_size, maybe_i_size; 251 loff_t i_size, maybe_i_size;
297 252
298 _enter("{%x:%u},{%lx},%u,%u", 253 _enter("{%x:%u},{%lx}",
299 vnode->fid.vid, vnode->fid.vnode, page->index, offset, to); 254 vnode->fid.vid, vnode->fid.vnode, page->index);
300 255
301 maybe_i_size = (loff_t) page->index << PAGE_SHIFT; 256 maybe_i_size = pos + copied;
302 maybe_i_size += to;
303 257
304 i_size = i_size_read(&vnode->vfs_inode); 258 i_size = i_size_read(&vnode->vfs_inode);
305 if (maybe_i_size > i_size) { 259 if (maybe_i_size > i_size) {
@@ -310,12 +264,13 @@ int afs_commit_write(struct file *file, struct page *page,
310 spin_unlock(&vnode->writeback_lock); 264 spin_unlock(&vnode->writeback_lock);
311 } 265 }
312 266
313 SetPageUptodate(page);
314 set_page_dirty(page); 267 set_page_dirty(page);
315 if (PageDirty(page)) 268 if (PageDirty(page))
316 _debug("dirtied"); 269 _debug("dirtied");
270 unlock_page(page);
271 page_cache_release(page);
317 272
318 return 0; 273 return copied;
319} 274}
320 275
321/* 276/*
diff --git a/fs/autofs4/Makefile b/fs/autofs4/Makefile
index f2c3b79e94d2..a811c1f7d9ab 100644
--- a/fs/autofs4/Makefile
+++ b/fs/autofs4/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_AUTOFS4_FS) += autofs4.o 5obj-$(CONFIG_AUTOFS4_FS) += autofs4.o
6 6
7autofs4-objs := init.o inode.o root.o symlink.o waitq.o expire.o 7autofs4-objs := init.o inode.o root.o symlink.o waitq.o expire.o dev-ioctl.o
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 69a2f5c92319..e0f16da00e54 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -14,6 +14,7 @@
14/* Internal header file for autofs */ 14/* Internal header file for autofs */
15 15
16#include <linux/auto_fs4.h> 16#include <linux/auto_fs4.h>
17#include <linux/auto_dev-ioctl.h>
17#include <linux/mutex.h> 18#include <linux/mutex.h>
18#include <linux/list.h> 19#include <linux/list.h>
19 20
@@ -21,6 +22,11 @@
21#define AUTOFS_IOC_FIRST AUTOFS_IOC_READY 22#define AUTOFS_IOC_FIRST AUTOFS_IOC_READY
22#define AUTOFS_IOC_COUNT 32 23#define AUTOFS_IOC_COUNT 32
23 24
25#define AUTOFS_DEV_IOCTL_IOC_FIRST (AUTOFS_DEV_IOCTL_VERSION)
26#define AUTOFS_DEV_IOCTL_IOC_COUNT (AUTOFS_IOC_COUNT - 11)
27
28#define AUTOFS_TYPE_TRIGGER (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET)
29
24#include <linux/kernel.h> 30#include <linux/kernel.h>
25#include <linux/slab.h> 31#include <linux/slab.h>
26#include <linux/time.h> 32#include <linux/time.h>
@@ -35,11 +41,27 @@
35/* #define DEBUG */ 41/* #define DEBUG */
36 42
37#ifdef DEBUG 43#ifdef DEBUG
38#define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __func__ , ##args); } while(0) 44#define DPRINTK(fmt, args...) \
45do { \
46 printk(KERN_DEBUG "pid %d: %s: " fmt "\n", \
47 current->pid, __func__, ##args); \
48} while (0)
39#else 49#else
40#define DPRINTK(fmt,args...) do {} while(0) 50#define DPRINTK(fmt, args...) do {} while (0)
41#endif 51#endif
42 52
53#define AUTOFS_WARN(fmt, args...) \
54do { \
55 printk(KERN_WARNING "pid %d: %s: " fmt "\n", \
56 current->pid, __func__, ##args); \
57} while (0)
58
59#define AUTOFS_ERROR(fmt, args...) \
60do { \
61 printk(KERN_ERR "pid %d: %s: " fmt "\n", \
62 current->pid, __func__, ##args); \
63} while (0)
64
43/* Unified info structure. This is pointed to by both the dentry and 65/* Unified info structure. This is pointed to by both the dentry and
44 inode structures. Each file in the filesystem has an instance of this 66 inode structures. Each file in the filesystem has an instance of this
45 structure. It holds a reference to the dentry, so dentries are never 67 structure. It holds a reference to the dentry, so dentries are never
@@ -61,6 +83,9 @@ struct autofs_info {
61 unsigned long last_used; 83 unsigned long last_used;
62 atomic_t count; 84 atomic_t count;
63 85
86 uid_t uid;
87 gid_t gid;
88
64 mode_t mode; 89 mode_t mode;
65 size_t size; 90 size_t size;
66 91
@@ -92,10 +117,6 @@ struct autofs_wait_queue {
92 117
93#define AUTOFS_SBI_MAGIC 0x6d4a556d 118#define AUTOFS_SBI_MAGIC 0x6d4a556d
94 119
95#define AUTOFS_TYPE_INDIRECT 0x0001
96#define AUTOFS_TYPE_DIRECT 0x0002
97#define AUTOFS_TYPE_OFFSET 0x0004
98
99struct autofs_sb_info { 120struct autofs_sb_info {
100 u32 magic; 121 u32 magic;
101 int pipefd; 122 int pipefd;
@@ -169,6 +190,17 @@ int autofs4_expire_run(struct super_block *, struct vfsmount *,
169 struct autofs_packet_expire __user *); 190 struct autofs_packet_expire __user *);
170int autofs4_expire_multi(struct super_block *, struct vfsmount *, 191int autofs4_expire_multi(struct super_block *, struct vfsmount *,
171 struct autofs_sb_info *, int __user *); 192 struct autofs_sb_info *, int __user *);
193struct dentry *autofs4_expire_direct(struct super_block *sb,
194 struct vfsmount *mnt,
195 struct autofs_sb_info *sbi, int how);
196struct dentry *autofs4_expire_indirect(struct super_block *sb,
197 struct vfsmount *mnt,
198 struct autofs_sb_info *sbi, int how);
199
200/* Device node initialization */
201
202int autofs_dev_ioctl_init(void);
203void autofs_dev_ioctl_exit(void);
172 204
173/* Operations structures */ 205/* Operations structures */
174 206
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
new file mode 100644
index 000000000000..625abf5422e2
--- /dev/null
+++ b/fs/autofs4/dev-ioctl.c
@@ -0,0 +1,863 @@
1/*
2 * Copyright 2008 Red Hat, Inc. All rights reserved.
3 * Copyright 2008 Ian Kent <raven@themaw.net>
4 *
5 * This file is part of the Linux kernel and is made available under
6 * the terms of the GNU General Public License, version 2, or at your
7 * option, any later version, incorporated herein by reference.
8 */
9
10#include <linux/module.h>
11#include <linux/vmalloc.h>
12#include <linux/miscdevice.h>
13#include <linux/init.h>
14#include <linux/wait.h>
15#include <linux/namei.h>
16#include <linux/fcntl.h>
17#include <linux/file.h>
18#include <linux/fdtable.h>
19#include <linux/sched.h>
20#include <linux/compat.h>
21#include <linux/syscalls.h>
22#include <linux/smp_lock.h>
23#include <linux/magic.h>
24#include <linux/dcache.h>
25#include <linux/uaccess.h>
26
27#include "autofs_i.h"
28
29/*
30 * This module implements an interface for routing autofs ioctl control
31 * commands via a miscellaneous device file.
32 *
33 * The alternate interface is needed because we need to be able open
34 * an ioctl file descriptor on an autofs mount that may be covered by
35 * another mount. This situation arises when starting automount(8)
36 * or other user space daemon which uses direct mounts or offset
37 * mounts (used for autofs lazy mount/umount of nested mount trees),
38 * which have been left busy at at service shutdown.
39 */
40
41#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl)
42
43typedef int (*ioctl_fn)(struct file *, struct autofs_sb_info *,
44 struct autofs_dev_ioctl *);
45
46static int check_name(const char *name)
47{
48 if (!strchr(name, '/'))
49 return -EINVAL;
50 return 0;
51}
52
53/*
54 * Check a string doesn't overrun the chunk of
55 * memory we copied from user land.
56 */
57static int invalid_str(char *str, void *end)
58{
59 while ((void *) str <= end)
60 if (!*str++)
61 return 0;
62 return -EINVAL;
63}
64
65/*
66 * Check that the user compiled against correct version of autofs
67 * misc device code.
68 *
69 * As well as checking the version compatibility this always copies
70 * the kernel interface version out.
71 */
72static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param)
73{
74 int err = 0;
75
76 if ((AUTOFS_DEV_IOCTL_VERSION_MAJOR != param->ver_major) ||
77 (AUTOFS_DEV_IOCTL_VERSION_MINOR < param->ver_minor)) {
78 AUTOFS_WARN("ioctl control interface version mismatch: "
79 "kernel(%u.%u), user(%u.%u), cmd(%d)",
80 AUTOFS_DEV_IOCTL_VERSION_MAJOR,
81 AUTOFS_DEV_IOCTL_VERSION_MINOR,
82 param->ver_major, param->ver_minor, cmd);
83 err = -EINVAL;
84 }
85
86 /* Fill in the kernel version. */
87 param->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
88 param->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
89
90 return err;
91}
92
93/*
94 * Copy parameter control struct, including a possible path allocated
95 * at the end of the struct.
96 */
97static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *in)
98{
99 struct autofs_dev_ioctl tmp, *ads;
100
101 if (copy_from_user(&tmp, in, sizeof(tmp)))
102 return ERR_PTR(-EFAULT);
103
104 if (tmp.size < sizeof(tmp))
105 return ERR_PTR(-EINVAL);
106
107 ads = kmalloc(tmp.size, GFP_KERNEL);
108 if (!ads)
109 return ERR_PTR(-ENOMEM);
110
111 if (copy_from_user(ads, in, tmp.size)) {
112 kfree(ads);
113 return ERR_PTR(-EFAULT);
114 }
115
116 return ads;
117}
118
119static inline void free_dev_ioctl(struct autofs_dev_ioctl *param)
120{
121 kfree(param);
122 return;
123}
124
125/*
126 * Check sanity of parameter control fields and if a path is present
127 * check that it has a "/" and is terminated.
128 */
129static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
130{
131 int err = -EINVAL;
132
133 if (check_dev_ioctl_version(cmd, param)) {
134 AUTOFS_WARN("invalid device control module version "
135 "supplied for cmd(0x%08x)", cmd);
136 goto out;
137 }
138
139 if (param->size > sizeof(*param)) {
140 err = check_name(param->path);
141 if (err) {
142 AUTOFS_WARN("invalid path supplied for cmd(0x%08x)",
143 cmd);
144 goto out;
145 }
146
147 err = invalid_str(param->path,
148 (void *) ((size_t) param + param->size));
149 if (err) {
150 AUTOFS_WARN("invalid path supplied for cmd(0x%08x)",
151 cmd);
152 goto out;
153 }
154 }
155
156 err = 0;
157out:
158 return err;
159}
160
161/*
162 * Get the autofs super block info struct from the file opened on
163 * the autofs mount point.
164 */
165static struct autofs_sb_info *autofs_dev_ioctl_sbi(struct file *f)
166{
167 struct autofs_sb_info *sbi = NULL;
168 struct inode *inode;
169
170 if (f) {
171 inode = f->f_path.dentry->d_inode;
172 sbi = autofs4_sbi(inode->i_sb);
173 }
174 return sbi;
175}
176
177/* Return autofs module protocol version */
178static int autofs_dev_ioctl_protover(struct file *fp,
179 struct autofs_sb_info *sbi,
180 struct autofs_dev_ioctl *param)
181{
182 param->arg1 = sbi->version;
183 return 0;
184}
185
186/* Return autofs module protocol sub version */
187static int autofs_dev_ioctl_protosubver(struct file *fp,
188 struct autofs_sb_info *sbi,
189 struct autofs_dev_ioctl *param)
190{
191 param->arg1 = sbi->sub_version;
192 return 0;
193}
194
195/*
196 * Walk down the mount stack looking for an autofs mount that
197 * has the requested device number (aka. new_encode_dev(sb->s_dev).
198 */
199static int autofs_dev_ioctl_find_super(struct nameidata *nd, dev_t devno)
200{
201 struct dentry *dentry;
202 struct inode *inode;
203 struct super_block *sb;
204 dev_t s_dev;
205 unsigned int err;
206
207 err = -ENOENT;
208
209 /* Lookup the dentry name at the base of our mount point */
210 dentry = d_lookup(nd->path.dentry, &nd->last);
211 if (!dentry)
212 goto out;
213
214 dput(nd->path.dentry);
215 nd->path.dentry = dentry;
216
217 /* And follow the mount stack looking for our autofs mount */
218 while (follow_down(&nd->path.mnt, &nd->path.dentry)) {
219 inode = nd->path.dentry->d_inode;
220 if (!inode)
221 break;
222
223 sb = inode->i_sb;
224 s_dev = new_encode_dev(sb->s_dev);
225 if (devno == s_dev) {
226 if (sb->s_magic == AUTOFS_SUPER_MAGIC) {
227 err = 0;
228 break;
229 }
230 }
231 }
232out:
233 return err;
234}
235
236/*
237 * Walk down the mount stack looking for an autofs mount that
238 * has the requested mount type (ie. indirect, direct or offset).
239 */
240static int autofs_dev_ioctl_find_sbi_type(struct nameidata *nd, unsigned int type)
241{
242 struct dentry *dentry;
243 struct autofs_info *ino;
244 unsigned int err;
245
246 err = -ENOENT;
247
248 /* Lookup the dentry name at the base of our mount point */
249 dentry = d_lookup(nd->path.dentry, &nd->last);
250 if (!dentry)
251 goto out;
252
253 dput(nd->path.dentry);
254 nd->path.dentry = dentry;
255
256 /* And follow the mount stack looking for our autofs mount */
257 while (follow_down(&nd->path.mnt, &nd->path.dentry)) {
258 ino = autofs4_dentry_ino(nd->path.dentry);
259 if (ino && ino->sbi->type & type) {
260 err = 0;
261 break;
262 }
263 }
264out:
265 return err;
266}
267
268static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
269{
270 struct files_struct *files = current->files;
271 struct fdtable *fdt;
272
273 spin_lock(&files->file_lock);
274 fdt = files_fdtable(files);
275 BUG_ON(fdt->fd[fd] != NULL);
276 rcu_assign_pointer(fdt->fd[fd], file);
277 FD_SET(fd, fdt->close_on_exec);
278 spin_unlock(&files->file_lock);
279}
280
281
282/*
283 * Open a file descriptor on the autofs mount point corresponding
284 * to the given path and device number (aka. new_encode_dev(sb->s_dev)).
285 */
286static int autofs_dev_ioctl_open_mountpoint(const char *path, dev_t devid)
287{
288 struct file *filp;
289 struct nameidata nd;
290 int err, fd;
291
292 fd = get_unused_fd();
293 if (likely(fd >= 0)) {
294 /* Get nameidata of the parent directory */
295 err = path_lookup(path, LOOKUP_PARENT, &nd);
296 if (err)
297 goto out;
298
299 /*
300 * Search down, within the parent, looking for an
301 * autofs super block that has the device number
302 * corresponding to the autofs fs we want to open.
303 */
304 err = autofs_dev_ioctl_find_super(&nd, devid);
305 if (err) {
306 path_put(&nd.path);
307 goto out;
308 }
309
310 filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY);
311 if (IS_ERR(filp)) {
312 err = PTR_ERR(filp);
313 goto out;
314 }
315
316 autofs_dev_ioctl_fd_install(fd, filp);
317 }
318
319 return fd;
320
321out:
322 put_unused_fd(fd);
323 return err;
324}
325
326/* Open a file descriptor on an autofs mount point */
327static int autofs_dev_ioctl_openmount(struct file *fp,
328 struct autofs_sb_info *sbi,
329 struct autofs_dev_ioctl *param)
330{
331 const char *path;
332 dev_t devid;
333 int err, fd;
334
335 /* param->path has already been checked */
336 if (!param->arg1)
337 return -EINVAL;
338
339 param->ioctlfd = -1;
340
341 path = param->path;
342 devid = param->arg1;
343
344 err = 0;
345 fd = autofs_dev_ioctl_open_mountpoint(path, devid);
346 if (unlikely(fd < 0)) {
347 err = fd;
348 goto out;
349 }
350
351 param->ioctlfd = fd;
352out:
353 return err;
354}
355
356/* Close file descriptor allocated above (user can also use close(2)). */
357static int autofs_dev_ioctl_closemount(struct file *fp,
358 struct autofs_sb_info *sbi,
359 struct autofs_dev_ioctl *param)
360{
361 return sys_close(param->ioctlfd);
362}
363
364/*
365 * Send "ready" status for an existing wait (either a mount or an expire
366 * request).
367 */
368static int autofs_dev_ioctl_ready(struct file *fp,
369 struct autofs_sb_info *sbi,
370 struct autofs_dev_ioctl *param)
371{
372 autofs_wqt_t token;
373
374 token = (autofs_wqt_t) param->arg1;
375 return autofs4_wait_release(sbi, token, 0);
376}
377
378/*
379 * Send "fail" status for an existing wait (either a mount or an expire
380 * request).
381 */
382static int autofs_dev_ioctl_fail(struct file *fp,
383 struct autofs_sb_info *sbi,
384 struct autofs_dev_ioctl *param)
385{
386 autofs_wqt_t token;
387 int status;
388
389 token = (autofs_wqt_t) param->arg1;
390 status = param->arg2 ? param->arg2 : -ENOENT;
391 return autofs4_wait_release(sbi, token, status);
392}
393
394/*
395 * Set the pipe fd for kernel communication to the daemon.
396 *
397 * Normally this is set at mount using an option but if we
398 * are reconnecting to a busy mount then we need to use this
399 * to tell the autofs mount about the new kernel pipe fd. In
400 * order to protect mounts against incorrectly setting the
401 * pipefd we also require that the autofs mount be catatonic.
402 *
403 * This also sets the process group id used to identify the
404 * controlling process (eg. the owning automount(8) daemon).
405 */
406static int autofs_dev_ioctl_setpipefd(struct file *fp,
407 struct autofs_sb_info *sbi,
408 struct autofs_dev_ioctl *param)
409{
410 int pipefd;
411 int err = 0;
412
413 if (param->arg1 == -1)
414 return -EINVAL;
415
416 pipefd = param->arg1;
417
418 mutex_lock(&sbi->wq_mutex);
419 if (!sbi->catatonic) {
420 mutex_unlock(&sbi->wq_mutex);
421 return -EBUSY;
422 } else {
423 struct file *pipe = fget(pipefd);
424 if (!pipe->f_op || !pipe->f_op->write) {
425 err = -EPIPE;
426 fput(pipe);
427 goto out;
428 }
429 sbi->oz_pgrp = task_pgrp_nr(current);
430 sbi->pipefd = pipefd;
431 sbi->pipe = pipe;
432 sbi->catatonic = 0;
433 }
434out:
435 mutex_unlock(&sbi->wq_mutex);
436 return err;
437}
438
439/*
440 * Make the autofs mount point catatonic, no longer responsive to
441 * mount requests. Also closes the kernel pipe file descriptor.
442 */
443static int autofs_dev_ioctl_catatonic(struct file *fp,
444 struct autofs_sb_info *sbi,
445 struct autofs_dev_ioctl *param)
446{
447 autofs4_catatonic_mode(sbi);
448 return 0;
449}
450
451/* Set the autofs mount timeout */
452static int autofs_dev_ioctl_timeout(struct file *fp,
453 struct autofs_sb_info *sbi,
454 struct autofs_dev_ioctl *param)
455{
456 unsigned long timeout;
457
458 timeout = param->arg1;
459 param->arg1 = sbi->exp_timeout / HZ;
460 sbi->exp_timeout = timeout * HZ;
461 return 0;
462}
463
464/*
465 * Return the uid and gid of the last request for the mount
466 *
467 * When reconstructing an autofs mount tree with active mounts
468 * we need to re-connect to mounts that may have used the original
469 * process uid and gid (or string variations of them) for mount
470 * lookups within the map entry.
471 */
472static int autofs_dev_ioctl_requester(struct file *fp,
473 struct autofs_sb_info *sbi,
474 struct autofs_dev_ioctl *param)
475{
476 struct autofs_info *ino;
477 struct nameidata nd;
478 const char *path;
479 dev_t devid;
480 int err = -ENOENT;
481
482 if (param->size <= sizeof(*param)) {
483 err = -EINVAL;
484 goto out;
485 }
486
487 path = param->path;
488 devid = sbi->sb->s_dev;
489
490 param->arg1 = param->arg2 = -1;
491
492 /* Get nameidata of the parent directory */
493 err = path_lookup(path, LOOKUP_PARENT, &nd);
494 if (err)
495 goto out;
496
497 err = autofs_dev_ioctl_find_super(&nd, devid);
498 if (err)
499 goto out_release;
500
501 ino = autofs4_dentry_ino(nd.path.dentry);
502 if (ino) {
503 err = 0;
504 autofs4_expire_wait(nd.path.dentry);
505 spin_lock(&sbi->fs_lock);
506 param->arg1 = ino->uid;
507 param->arg2 = ino->gid;
508 spin_unlock(&sbi->fs_lock);
509 }
510
511out_release:
512 path_put(&nd.path);
513out:
514 return err;
515}
516
517/*
518 * Call repeatedly until it returns -EAGAIN, meaning there's nothing
519 * more that can be done.
520 */
521static int autofs_dev_ioctl_expire(struct file *fp,
522 struct autofs_sb_info *sbi,
523 struct autofs_dev_ioctl *param)
524{
525 struct dentry *dentry;
526 struct vfsmount *mnt;
527 int err = -EAGAIN;
528 int how;
529
530 how = param->arg1;
531 mnt = fp->f_path.mnt;
532
533 if (sbi->type & AUTOFS_TYPE_TRIGGER)
534 dentry = autofs4_expire_direct(sbi->sb, mnt, sbi, how);
535 else
536 dentry = autofs4_expire_indirect(sbi->sb, mnt, sbi, how);
537
538 if (dentry) {
539 struct autofs_info *ino = autofs4_dentry_ino(dentry);
540
541 /*
542 * This is synchronous because it makes the daemon a
543 * little easier
544 */
545 err = autofs4_wait(sbi, dentry, NFY_EXPIRE);
546
547 spin_lock(&sbi->fs_lock);
548 if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
549 ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
550 sbi->sb->s_root->d_mounted++;
551 }
552 ino->flags &= ~AUTOFS_INF_EXPIRING;
553 complete_all(&ino->expire_complete);
554 spin_unlock(&sbi->fs_lock);
555 dput(dentry);
556 }
557
558 return err;
559}
560
561/* Check if autofs mount point is in use */
562static int autofs_dev_ioctl_askumount(struct file *fp,
563 struct autofs_sb_info *sbi,
564 struct autofs_dev_ioctl *param)
565{
566 param->arg1 = 0;
567 if (may_umount(fp->f_path.mnt))
568 param->arg1 = 1;
569 return 0;
570}
571
572/*
573 * Check if the given path is a mountpoint.
574 *
575 * If we are supplied with the file descriptor of an autofs
576 * mount we're looking for a specific mount. In this case
577 * the path is considered a mountpoint if it is itself a
578 * mountpoint or contains a mount, such as a multi-mount
579 * without a root mount. In this case we return 1 if the
580 * path is a mount point and the super magic of the covering
581 * mount if there is one or 0 if it isn't a mountpoint.
582 *
583 * If we aren't supplied with a file descriptor then we
584 * lookup the nameidata of the path and check if it is the
585 * root of a mount. If a type is given we are looking for
586 * a particular autofs mount and if we don't find a match
587 * we return fail. If the located nameidata path is the
588 * root of a mount we return 1 along with the super magic
589 * of the mount or 0 otherwise.
590 *
591 * In both cases the the device number (as returned by
592 * new_encode_dev()) is also returned.
593 */
594static int autofs_dev_ioctl_ismountpoint(struct file *fp,
595 struct autofs_sb_info *sbi,
596 struct autofs_dev_ioctl *param)
597{
598 struct nameidata nd;
599 const char *path;
600 unsigned int type;
601 int err = -ENOENT;
602
603 if (param->size <= sizeof(*param)) {
604 err = -EINVAL;
605 goto out;
606 }
607
608 path = param->path;
609 type = param->arg1;
610
611 param->arg1 = 0;
612 param->arg2 = 0;
613
614 if (!fp || param->ioctlfd == -1) {
615 if (type == AUTOFS_TYPE_ANY) {
616 struct super_block *sb;
617
618 err = path_lookup(path, LOOKUP_FOLLOW, &nd);
619 if (err)
620 goto out;
621
622 sb = nd.path.dentry->d_sb;
623 param->arg1 = new_encode_dev(sb->s_dev);
624 } else {
625 struct autofs_info *ino;
626
627 err = path_lookup(path, LOOKUP_PARENT, &nd);
628 if (err)
629 goto out;
630
631 err = autofs_dev_ioctl_find_sbi_type(&nd, type);
632 if (err)
633 goto out_release;
634
635 ino = autofs4_dentry_ino(nd.path.dentry);
636 param->arg1 = autofs4_get_dev(ino->sbi);
637 }
638
639 err = 0;
640 if (nd.path.dentry->d_inode &&
641 nd.path.mnt->mnt_root == nd.path.dentry) {
642 err = 1;
643 param->arg2 = nd.path.dentry->d_inode->i_sb->s_magic;
644 }
645 } else {
646 dev_t devid = new_encode_dev(sbi->sb->s_dev);
647
648 err = path_lookup(path, LOOKUP_PARENT, &nd);
649 if (err)
650 goto out;
651
652 err = autofs_dev_ioctl_find_super(&nd, devid);
653 if (err)
654 goto out_release;
655
656 param->arg1 = autofs4_get_dev(sbi);
657
658 err = have_submounts(nd.path.dentry);
659
660 if (nd.path.mnt->mnt_mountpoint != nd.path.mnt->mnt_root) {
661 if (follow_down(&nd.path.mnt, &nd.path.dentry)) {
662 struct inode *inode = nd.path.dentry->d_inode;
663 param->arg2 = inode->i_sb->s_magic;
664 }
665 }
666 }
667
668out_release:
669 path_put(&nd.path);
670out:
671 return err;
672}
673
674/*
675 * Our range of ioctl numbers isn't 0 based so we need to shift
676 * the array index by _IOC_NR(AUTOFS_CTL_IOC_FIRST) for the table
677 * lookup.
678 */
679#define cmd_idx(cmd) (cmd - _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST))
680
681static ioctl_fn lookup_dev_ioctl(unsigned int cmd)
682{
683 static struct {
684 int cmd;
685 ioctl_fn fn;
686 } _ioctls[] = {
687 {cmd_idx(AUTOFS_DEV_IOCTL_VERSION_CMD), NULL},
688 {cmd_idx(AUTOFS_DEV_IOCTL_PROTOVER_CMD),
689 autofs_dev_ioctl_protover},
690 {cmd_idx(AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD),
691 autofs_dev_ioctl_protosubver},
692 {cmd_idx(AUTOFS_DEV_IOCTL_OPENMOUNT_CMD),
693 autofs_dev_ioctl_openmount},
694 {cmd_idx(AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD),
695 autofs_dev_ioctl_closemount},
696 {cmd_idx(AUTOFS_DEV_IOCTL_READY_CMD),
697 autofs_dev_ioctl_ready},
698 {cmd_idx(AUTOFS_DEV_IOCTL_FAIL_CMD),
699 autofs_dev_ioctl_fail},
700 {cmd_idx(AUTOFS_DEV_IOCTL_SETPIPEFD_CMD),
701 autofs_dev_ioctl_setpipefd},
702 {cmd_idx(AUTOFS_DEV_IOCTL_CATATONIC_CMD),
703 autofs_dev_ioctl_catatonic},
704 {cmd_idx(AUTOFS_DEV_IOCTL_TIMEOUT_CMD),
705 autofs_dev_ioctl_timeout},
706 {cmd_idx(AUTOFS_DEV_IOCTL_REQUESTER_CMD),
707 autofs_dev_ioctl_requester},
708 {cmd_idx(AUTOFS_DEV_IOCTL_EXPIRE_CMD),
709 autofs_dev_ioctl_expire},
710 {cmd_idx(AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD),
711 autofs_dev_ioctl_askumount},
712 {cmd_idx(AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD),
713 autofs_dev_ioctl_ismountpoint}
714 };
715 unsigned int idx = cmd_idx(cmd);
716
717 return (idx >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[idx].fn;
718}
719
720/* ioctl dispatcher */
721static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __user *user)
722{
723 struct autofs_dev_ioctl *param;
724 struct file *fp;
725 struct autofs_sb_info *sbi;
726 unsigned int cmd_first, cmd;
727 ioctl_fn fn = NULL;
728 int err = 0;
729
730 /* only root can play with this */
731 if (!capable(CAP_SYS_ADMIN))
732 return -EPERM;
733
734 cmd_first = _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST);
735 cmd = _IOC_NR(command);
736
737 if (_IOC_TYPE(command) != _IOC_TYPE(AUTOFS_DEV_IOCTL_IOC_FIRST) ||
738 cmd - cmd_first >= AUTOFS_DEV_IOCTL_IOC_COUNT) {
739 return -ENOTTY;
740 }
741
742 /* Copy the parameters into kernel space. */
743 param = copy_dev_ioctl(user);
744 if (IS_ERR(param))
745 return PTR_ERR(param);
746
747 err = validate_dev_ioctl(command, param);
748 if (err)
749 goto out;
750
751 /* The validate routine above always sets the version */
752 if (cmd == AUTOFS_DEV_IOCTL_VERSION_CMD)
753 goto done;
754
755 fn = lookup_dev_ioctl(cmd);
756 if (!fn) {
757 AUTOFS_WARN("unknown command 0x%08x", command);
758 return -ENOTTY;
759 }
760
761 fp = NULL;
762 sbi = NULL;
763
764 /*
765 * For obvious reasons the openmount can't have a file
766 * descriptor yet. We don't take a reference to the
767 * file during close to allow for immediate release.
768 */
769 if (cmd != AUTOFS_DEV_IOCTL_OPENMOUNT_CMD &&
770 cmd != AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD) {
771 fp = fget(param->ioctlfd);
772 if (!fp) {
773 if (cmd == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD)
774 goto cont;
775 err = -EBADF;
776 goto out;
777 }
778
779 if (!fp->f_op) {
780 err = -ENOTTY;
781 fput(fp);
782 goto out;
783 }
784
785 sbi = autofs_dev_ioctl_sbi(fp);
786 if (!sbi || sbi->magic != AUTOFS_SBI_MAGIC) {
787 err = -EINVAL;
788 fput(fp);
789 goto out;
790 }
791
792 /*
793 * Admin needs to be able to set the mount catatonic in
794 * order to be able to perform the re-open.
795 */
796 if (!autofs4_oz_mode(sbi) &&
797 cmd != AUTOFS_DEV_IOCTL_CATATONIC_CMD) {
798 err = -EACCES;
799 fput(fp);
800 goto out;
801 }
802 }
803cont:
804 err = fn(fp, sbi, param);
805
806 if (fp)
807 fput(fp);
808done:
809 if (err >= 0 && copy_to_user(user, param, AUTOFS_DEV_IOCTL_SIZE))
810 err = -EFAULT;
811out:
812 free_dev_ioctl(param);
813 return err;
814}
815
816static long autofs_dev_ioctl(struct file *file, uint command, ulong u)
817{
818 int err;
819 err = _autofs_dev_ioctl(command, (struct autofs_dev_ioctl __user *) u);
820 return (long) err;
821}
822
823#ifdef CONFIG_COMPAT
824static long autofs_dev_ioctl_compat(struct file *file, uint command, ulong u)
825{
826 return (long) autofs_dev_ioctl(file, command, (ulong) compat_ptr(u));
827}
828#else
829#define autofs_dev_ioctl_compat NULL
830#endif
831
832static const struct file_operations _dev_ioctl_fops = {
833 .unlocked_ioctl = autofs_dev_ioctl,
834 .compat_ioctl = autofs_dev_ioctl_compat,
835 .owner = THIS_MODULE,
836};
837
838static struct miscdevice _autofs_dev_ioctl_misc = {
839 .minor = MISC_DYNAMIC_MINOR,
840 .name = AUTOFS_DEVICE_NAME,
841 .fops = &_dev_ioctl_fops
842};
843
844/* Register/deregister misc character device */
845int autofs_dev_ioctl_init(void)
846{
847 int r;
848
849 r = misc_register(&_autofs_dev_ioctl_misc);
850 if (r) {
851 AUTOFS_ERROR("misc_register failed for control device");
852 return r;
853 }
854
855 return 0;
856}
857
858void autofs_dev_ioctl_exit(void)
859{
860 misc_deregister(&_autofs_dev_ioctl_misc);
861 return;
862}
863
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index cdabb796ff01..cde2f8e8935a 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -244,10 +244,10 @@ cont:
244} 244}
245 245
246/* Check if we can expire a direct mount (possibly a tree) */ 246/* Check if we can expire a direct mount (possibly a tree) */
247static struct dentry *autofs4_expire_direct(struct super_block *sb, 247struct dentry *autofs4_expire_direct(struct super_block *sb,
248 struct vfsmount *mnt, 248 struct vfsmount *mnt,
249 struct autofs_sb_info *sbi, 249 struct autofs_sb_info *sbi,
250 int how) 250 int how)
251{ 251{
252 unsigned long timeout; 252 unsigned long timeout;
253 struct dentry *root = dget(sb->s_root); 253 struct dentry *root = dget(sb->s_root);
@@ -283,10 +283,10 @@ static struct dentry *autofs4_expire_direct(struct super_block *sb,
283 * - it is unused by any user process 283 * - it is unused by any user process
284 * - it has been unused for exp_timeout time 284 * - it has been unused for exp_timeout time
285 */ 285 */
286static struct dentry *autofs4_expire_indirect(struct super_block *sb, 286struct dentry *autofs4_expire_indirect(struct super_block *sb,
287 struct vfsmount *mnt, 287 struct vfsmount *mnt,
288 struct autofs_sb_info *sbi, 288 struct autofs_sb_info *sbi,
289 int how) 289 int how)
290{ 290{
291 unsigned long timeout; 291 unsigned long timeout;
292 struct dentry *root = sb->s_root; 292 struct dentry *root = sb->s_root;
@@ -479,7 +479,7 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
479 if (arg && get_user(do_now, arg)) 479 if (arg && get_user(do_now, arg))
480 return -EFAULT; 480 return -EFAULT;
481 481
482 if (sbi->type & AUTOFS_TYPE_DIRECT) 482 if (sbi->type & AUTOFS_TYPE_TRIGGER)
483 dentry = autofs4_expire_direct(sb, mnt, sbi, do_now); 483 dentry = autofs4_expire_direct(sb, mnt, sbi, do_now);
484 else 484 else
485 dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now); 485 dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now);
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index 723a1c5e361b..9722e4bd8957 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -29,11 +29,20 @@ static struct file_system_type autofs_fs_type = {
29 29
30static int __init init_autofs4_fs(void) 30static int __init init_autofs4_fs(void)
31{ 31{
32 return register_filesystem(&autofs_fs_type); 32 int err;
33
34 err = register_filesystem(&autofs_fs_type);
35 if (err)
36 return err;
37
38 autofs_dev_ioctl_init();
39
40 return err;
33} 41}
34 42
35static void __exit exit_autofs4_fs(void) 43static void __exit exit_autofs4_fs(void)
36{ 44{
45 autofs_dev_ioctl_exit();
37 unregister_filesystem(&autofs_fs_type); 46 unregister_filesystem(&autofs_fs_type);
38} 47}
39 48
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 45d55819203d..c7e65bb30ba0 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -53,6 +53,8 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
53 atomic_set(&ino->count, 0); 53 atomic_set(&ino->count, 0);
54 } 54 }
55 55
56 ino->uid = 0;
57 ino->gid = 0;
56 ino->mode = mode; 58 ino->mode = mode;
57 ino->last_used = jiffies; 59 ino->last_used = jiffies;
58 60
@@ -288,7 +290,7 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
288 *type = AUTOFS_TYPE_DIRECT; 290 *type = AUTOFS_TYPE_DIRECT;
289 break; 291 break;
290 case Opt_offset: 292 case Opt_offset:
291 *type = AUTOFS_TYPE_DIRECT | AUTOFS_TYPE_OFFSET; 293 *type = AUTOFS_TYPE_OFFSET;
292 break; 294 break;
293 default: 295 default:
294 return 1; 296 return 1;
@@ -336,7 +338,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
336 sbi->sb = s; 338 sbi->sb = s;
337 sbi->version = 0; 339 sbi->version = 0;
338 sbi->sub_version = 0; 340 sbi->sub_version = 0;
339 sbi->type = 0; 341 sbi->type = AUTOFS_TYPE_INDIRECT;
340 sbi->min_proto = 0; 342 sbi->min_proto = 0;
341 sbi->max_proto = 0; 343 sbi->max_proto = 0;
342 mutex_init(&sbi->wq_mutex); 344 mutex_init(&sbi->wq_mutex);
@@ -378,7 +380,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
378 } 380 }
379 381
380 root_inode->i_fop = &autofs4_root_operations; 382 root_inode->i_fop = &autofs4_root_operations;
381 root_inode->i_op = sbi->type & AUTOFS_TYPE_DIRECT ? 383 root_inode->i_op = sbi->type & AUTOFS_TYPE_TRIGGER ?
382 &autofs4_direct_root_inode_operations : 384 &autofs4_direct_root_inode_operations :
383 &autofs4_indirect_root_inode_operations; 385 &autofs4_indirect_root_inode_operations;
384 386
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 35216d18d8b5..4b67c2a2d77c 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -337,7 +337,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
337 * is very similar for indirect mounts except only dentrys 337 * is very similar for indirect mounts except only dentrys
338 * in the root of the autofs file system may be negative. 338 * in the root of the autofs file system may be negative.
339 */ 339 */
340 if (sbi->type & (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET)) 340 if (sbi->type & AUTOFS_TYPE_TRIGGER)
341 return -ENOENT; 341 return -ENOENT;
342 else if (!IS_ROOT(dentry->d_parent)) 342 else if (!IS_ROOT(dentry->d_parent))
343 return -ENOENT; 343 return -ENOENT;
@@ -348,7 +348,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
348 return -ENOMEM; 348 return -ENOMEM;
349 349
350 /* If this is a direct mount request create a dummy name */ 350 /* If this is a direct mount request create a dummy name */
351 if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT)) 351 if (IS_ROOT(dentry) && sbi->type & AUTOFS_TYPE_TRIGGER)
352 qstr.len = sprintf(name, "%p", dentry); 352 qstr.len = sprintf(name, "%p", dentry);
353 else { 353 else {
354 qstr.len = autofs4_getpath(sbi, dentry, &name); 354 qstr.len = autofs4_getpath(sbi, dentry, &name);
@@ -406,11 +406,11 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
406 type = autofs_ptype_expire_multi; 406 type = autofs_ptype_expire_multi;
407 } else { 407 } else {
408 if (notify == NFY_MOUNT) 408 if (notify == NFY_MOUNT)
409 type = (sbi->type & AUTOFS_TYPE_DIRECT) ? 409 type = (sbi->type & AUTOFS_TYPE_TRIGGER) ?
410 autofs_ptype_missing_direct : 410 autofs_ptype_missing_direct :
411 autofs_ptype_missing_indirect; 411 autofs_ptype_missing_indirect;
412 else 412 else
413 type = (sbi->type & AUTOFS_TYPE_DIRECT) ? 413 type = (sbi->type & AUTOFS_TYPE_TRIGGER) ?
414 autofs_ptype_expire_direct : 414 autofs_ptype_expire_direct :
415 autofs_ptype_expire_indirect; 415 autofs_ptype_expire_indirect;
416 } 416 }
@@ -457,6 +457,40 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
457 457
458 status = wq->status; 458 status = wq->status;
459 459
460 /*
461 * For direct and offset mounts we need to track the requester's
462 * uid and gid in the dentry info struct. This is so it can be
463 * supplied, on request, by the misc device ioctl interface.
464 * This is needed during daemon resatart when reconnecting
465 * to existing, active, autofs mounts. The uid and gid (and
466 * related string values) may be used for macro substitution
467 * in autofs mount maps.
468 */
469 if (!status) {
470 struct autofs_info *ino;
471 struct dentry *de = NULL;
472
473 /* direct mount or browsable map */
474 ino = autofs4_dentry_ino(dentry);
475 if (!ino) {
476 /* If not lookup actual dentry used */
477 de = d_lookup(dentry->d_parent, &dentry->d_name);
478 if (de)
479 ino = autofs4_dentry_ino(de);
480 }
481
482 /* Set mount requester */
483 if (ino) {
484 spin_lock(&sbi->fs_lock);
485 ino->uid = wq->uid;
486 ino->gid = wq->gid;
487 spin_unlock(&sbi->fs_lock);
488 }
489
490 if (de)
491 dput(de);
492 }
493
460 /* Are we the last process to need status? */ 494 /* Are we the last process to need status? */
461 mutex_lock(&sbi->wq_mutex); 495 mutex_lock(&sbi->wq_mutex);
462 if (!--wq->wait_ctr) 496 if (!--wq->wait_ctr)
diff --git a/fs/befs/befs_fs_types.h b/fs/befs/befs_fs_types.h
index e2595c2c403a..7893eaa1e58c 100644
--- a/fs/befs/befs_fs_types.h
+++ b/fs/befs/befs_fs_types.h
@@ -55,8 +55,12 @@ enum super_flags {
55}; 55};
56 56
57#define BEFS_BYTEORDER_NATIVE 0x42494745 57#define BEFS_BYTEORDER_NATIVE 0x42494745
58#define BEFS_BYTEORDER_NATIVE_LE (__force fs32)cpu_to_le32(BEFS_BYTEORDER_NATIVE)
59#define BEFS_BYTEORDER_NATIVE_BE (__force fs32)cpu_to_be32(BEFS_BYTEORDER_NATIVE)
58 60
59#define BEFS_SUPER_MAGIC BEFS_SUPER_MAGIC1 61#define BEFS_SUPER_MAGIC BEFS_SUPER_MAGIC1
62#define BEFS_SUPER_MAGIC1_LE (__force fs32)cpu_to_le32(BEFS_SUPER_MAGIC1)
63#define BEFS_SUPER_MAGIC1_BE (__force fs32)cpu_to_be32(BEFS_SUPER_MAGIC1)
60 64
61/* 65/*
62 * Flags of inode 66 * Flags of inode
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 9286b2af893a..b6dfee37c7b7 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -809,8 +809,8 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
809 809
810 /* account for offset of super block on x86 */ 810 /* account for offset of super block on x86 */
811 disk_sb = (befs_super_block *) bh->b_data; 811 disk_sb = (befs_super_block *) bh->b_data;
812 if ((le32_to_cpu(disk_sb->magic1) == BEFS_SUPER_MAGIC1) || 812 if ((disk_sb->magic1 == BEFS_SUPER_MAGIC1_LE) ||
813 (be32_to_cpu(disk_sb->magic1) == BEFS_SUPER_MAGIC1)) { 813 (disk_sb->magic1 == BEFS_SUPER_MAGIC1_BE)) {
814 befs_debug(sb, "Using PPC superblock location"); 814 befs_debug(sb, "Using PPC superblock location");
815 } else { 815 } else {
816 befs_debug(sb, "Using x86 superblock location"); 816 befs_debug(sb, "Using x86 superblock location");
diff --git a/fs/befs/super.c b/fs/befs/super.c
index 8c3401ff6d6a..41f2b4d0093e 100644
--- a/fs/befs/super.c
+++ b/fs/befs/super.c
@@ -26,10 +26,10 @@ befs_load_sb(struct super_block *sb, befs_super_block * disk_sb)
26 befs_sb_info *befs_sb = BEFS_SB(sb); 26 befs_sb_info *befs_sb = BEFS_SB(sb);
27 27
28 /* Check the byte order of the filesystem */ 28 /* Check the byte order of the filesystem */
29 if (le32_to_cpu(disk_sb->fs_byte_order) == BEFS_BYTEORDER_NATIVE) 29 if (disk_sb->fs_byte_order == BEFS_BYTEORDER_NATIVE_LE)
30 befs_sb->byte_order = BEFS_BYTESEX_LE; 30 befs_sb->byte_order = BEFS_BYTESEX_LE;
31 else if (be32_to_cpu(disk_sb->fs_byte_order) == BEFS_BYTEORDER_NATIVE) 31 else if (disk_sb->fs_byte_order == BEFS_BYTEORDER_NATIVE_BE)
32 befs_sb->byte_order = BEFS_BYTESEX_BE; 32 befs_sb->byte_order = BEFS_BYTESEX_BE;
33 33
34 befs_sb->magic1 = fs32_to_cpu(sb, disk_sb->magic1); 34 befs_sb->magic1 = fs32_to_cpu(sb, disk_sb->magic1);
35 befs_sb->magic2 = fs32_to_cpu(sb, disk_sb->magic2); 35 befs_sb->magic2 = fs32_to_cpu(sb, disk_sb->magic2);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 655ed8d30a86..8fcfa398d350 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -683,7 +683,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
683 * switch really is going to happen - do this in 683 * switch really is going to happen - do this in
684 * flush_thread(). - akpm 684 * flush_thread(). - akpm
685 */ 685 */
686 SET_PERSONALITY(loc->elf_ex, 0); 686 SET_PERSONALITY(loc->elf_ex);
687 687
688 interpreter = open_exec(elf_interpreter); 688 interpreter = open_exec(elf_interpreter);
689 retval = PTR_ERR(interpreter); 689 retval = PTR_ERR(interpreter);
@@ -734,7 +734,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
734 goto out_free_dentry; 734 goto out_free_dentry;
735 } else { 735 } else {
736 /* Executables without an interpreter also need a personality */ 736 /* Executables without an interpreter also need a personality */
737 SET_PERSONALITY(loc->elf_ex, 0); 737 SET_PERSONALITY(loc->elf_ex);
738 } 738 }
739 739
740 /* Flush all traces of the currently running executable */ 740 /* Flush all traces of the currently running executable */
@@ -748,7 +748,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
748 748
749 /* Do this immediately, since STACK_TOP as used in setup_arg_pages 749 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
750 may depend on the personality. */ 750 may depend on the personality. */
751 SET_PERSONALITY(loc->elf_ex, 0); 751 SET_PERSONALITY(loc->elf_ex);
752 if (elf_read_implies_exec(loc->elf_ex, executable_stack)) 752 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
753 current->personality |= READ_IMPLIES_EXEC; 753 current->personality |= READ_IMPLIES_EXEC;
754 754
@@ -1156,16 +1156,24 @@ static int dump_seek(struct file *file, loff_t off)
1156static unsigned long vma_dump_size(struct vm_area_struct *vma, 1156static unsigned long vma_dump_size(struct vm_area_struct *vma,
1157 unsigned long mm_flags) 1157 unsigned long mm_flags)
1158{ 1158{
1159#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1160
1159 /* The vma can be set up to tell us the answer directly. */ 1161 /* The vma can be set up to tell us the answer directly. */
1160 if (vma->vm_flags & VM_ALWAYSDUMP) 1162 if (vma->vm_flags & VM_ALWAYSDUMP)
1161 goto whole; 1163 goto whole;
1162 1164
1165 /* Hugetlb memory check */
1166 if (vma->vm_flags & VM_HUGETLB) {
1167 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1168 goto whole;
1169 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1170 goto whole;
1171 }
1172
1163 /* Do not dump I/O mapped devices or special mappings */ 1173 /* Do not dump I/O mapped devices or special mappings */
1164 if (vma->vm_flags & (VM_IO | VM_RESERVED)) 1174 if (vma->vm_flags & (VM_IO | VM_RESERVED))
1165 return 0; 1175 return 0;
1166 1176
1167#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1168
1169 /* By default, dump shared memory if mapped from an anonymous file. */ 1177 /* By default, dump shared memory if mapped from an anonymous file. */
1170 if (vma->vm_flags & VM_SHARED) { 1178 if (vma->vm_flags & VM_SHARED) {
1171 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ? 1179 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
@@ -1333,20 +1341,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1333 prstatus->pr_pgrp = task_pgrp_vnr(p); 1341 prstatus->pr_pgrp = task_pgrp_vnr(p);
1334 prstatus->pr_sid = task_session_vnr(p); 1342 prstatus->pr_sid = task_session_vnr(p);
1335 if (thread_group_leader(p)) { 1343 if (thread_group_leader(p)) {
1344 struct task_cputime cputime;
1345
1336 /* 1346 /*
1337 * This is the record for the group leader. Add in the 1347 * This is the record for the group leader. It shows the
1338 * cumulative times of previous dead threads. This total 1348 * group-wide total, not its individual thread total.
1339 * won't include the time of each live thread whose state
1340 * is included in the core dump. The final total reported
1341 * to our parent process when it calls wait4 will include
1342 * those sums as well as the little bit more time it takes
1343 * this and each other thread to finish dying after the
1344 * core dump synchronization phase.
1345 */ 1349 */
1346 cputime_to_timeval(cputime_add(p->utime, p->signal->utime), 1350 thread_group_cputime(p, &cputime);
1347 &prstatus->pr_utime); 1351 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1348 cputime_to_timeval(cputime_add(p->stime, p->signal->stime), 1352 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1349 &prstatus->pr_stime);
1350 } else { 1353 } else {
1351 cputime_to_timeval(p->utime, &prstatus->pr_utime); 1354 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1352 cputime_to_timeval(p->stime, &prstatus->pr_stime); 1355 cputime_to_timeval(p->stime, &prstatus->pr_stime);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 80c1f952ef78..0e8367c54624 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -25,6 +25,7 @@
25#include <linux/fcntl.h> 25#include <linux/fcntl.h>
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/pagemap.h> 27#include <linux/pagemap.h>
28#include <linux/security.h>
28#include <linux/highmem.h> 29#include <linux/highmem.h>
29#include <linux/highuid.h> 30#include <linux/highuid.h>
30#include <linux/personality.h> 31#include <linux/personality.h>
@@ -455,8 +456,19 @@ error_kill:
455} 456}
456 457
457/*****************************************************************************/ 458/*****************************************************************************/
459
460#ifndef ELF_BASE_PLATFORM
458/* 461/*
459 * present useful information to the program 462 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
463 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
464 * will be copied to the user stack in the same manner as AT_PLATFORM.
465 */
466#define ELF_BASE_PLATFORM NULL
467#endif
468
469/*
470 * present useful information to the program by shovelling it onto the new
471 * process's stack
460 */ 472 */
461static int create_elf_fdpic_tables(struct linux_binprm *bprm, 473static int create_elf_fdpic_tables(struct linux_binprm *bprm,
462 struct mm_struct *mm, 474 struct mm_struct *mm,
@@ -466,15 +478,19 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
466 unsigned long sp, csp, nitems; 478 unsigned long sp, csp, nitems;
467 elf_caddr_t __user *argv, *envp; 479 elf_caddr_t __user *argv, *envp;
468 size_t platform_len = 0, len; 480 size_t platform_len = 0, len;
469 char *k_platform; 481 char *k_platform, *k_base_platform;
470 char __user *u_platform, *p; 482 char __user *u_platform, *u_base_platform, *p;
471 long hwcap; 483 long hwcap;
472 int loop; 484 int loop;
473 int nr; /* reset for each csp adjustment */ 485 int nr; /* reset for each csp adjustment */
474 486
475 /* we're going to shovel a whole load of stuff onto the stack */
476#ifdef CONFIG_MMU 487#ifdef CONFIG_MMU
477 sp = bprm->p; 488 /* In some cases (e.g. Hyper-Threading), we want to avoid L1 evictions
489 * by the processes running on the same package. One thing we can do is
490 * to shuffle the initial stack for them, so we give the architecture
491 * an opportunity to do so here.
492 */
493 sp = arch_align_stack(bprm->p);
478#else 494#else
479 sp = mm->start_stack; 495 sp = mm->start_stack;
480 496
@@ -483,11 +499,14 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
483 return -EFAULT; 499 return -EFAULT;
484#endif 500#endif
485 501
486 /* get hold of platform and hardware capabilities masks for the machine
487 * we are running on. In some cases (Sparc), this info is impossible
488 * to get, in others (i386) it is merely difficult.
489 */
490 hwcap = ELF_HWCAP; 502 hwcap = ELF_HWCAP;
503
504 /*
505 * If this architecture has a platform capability string, copy it
506 * to userspace. In some cases (Sparc), this info is impossible
507 * for userspace to get any other way, in others (i386) it is
508 * merely difficult.
509 */
491 k_platform = ELF_PLATFORM; 510 k_platform = ELF_PLATFORM;
492 u_platform = NULL; 511 u_platform = NULL;
493 512
@@ -499,19 +518,20 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
499 return -EFAULT; 518 return -EFAULT;
500 } 519 }
501 520
502#if defined(__i386__) && defined(CONFIG_SMP) 521 /*
503 /* in some cases (e.g. Hyper-Threading), we want to avoid L1 evictions 522 * If this architecture has a "base" platform capability
504 * by the processes running on the same package. One thing we can do is 523 * string, copy it to userspace.
505 * to shuffle the initial stack for them.
506 *
507 * the conditionals here are unneeded, but kept in to make the code
508 * behaviour the same as pre change unless we have hyperthreaded
509 * processors. This keeps Mr Marcelo Person happier but should be
510 * removed for 2.5
511 */ 524 */
512 if (smp_num_siblings > 1) 525 k_base_platform = ELF_BASE_PLATFORM;
513 sp = sp - ((current->pid % 64) << 7); 526 u_base_platform = NULL;
514#endif 527
528 if (k_base_platform) {
529 platform_len = strlen(k_base_platform) + 1;
530 sp -= platform_len;
531 u_base_platform = (char __user *) sp;
532 if (__copy_to_user(u_base_platform, k_base_platform, platform_len) != 0)
533 return -EFAULT;
534 }
515 535
516 sp &= ~7UL; 536 sp &= ~7UL;
517 537
@@ -541,9 +561,13 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
541 } 561 }
542 562
543 /* force 16 byte _final_ alignment here for generality */ 563 /* force 16 byte _final_ alignment here for generality */
544#define DLINFO_ITEMS 13 564#define DLINFO_ITEMS 15
565
566 nitems = 1 + DLINFO_ITEMS + (k_platform ? 1 : 0) +
567 (k_base_platform ? 1 : 0) + AT_VECTOR_SIZE_ARCH;
545 568
546 nitems = 1 + DLINFO_ITEMS + (k_platform ? 1 : 0) + AT_VECTOR_SIZE_ARCH; 569 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD)
570 nitems++;
547 571
548 csp = sp; 572 csp = sp;
549 sp -= nitems * 2 * sizeof(unsigned long); 573 sp -= nitems * 2 * sizeof(unsigned long);
@@ -575,6 +599,19 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
575 (elf_addr_t) (unsigned long) u_platform); 599 (elf_addr_t) (unsigned long) u_platform);
576 } 600 }
577 601
602 if (k_base_platform) {
603 nr = 0;
604 csp -= 2 * sizeof(unsigned long);
605 NEW_AUX_ENT(AT_BASE_PLATFORM,
606 (elf_addr_t) (unsigned long) u_base_platform);
607 }
608
609 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
610 nr = 0;
611 csp -= 2 * sizeof(unsigned long);
612 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
613 }
614
578 nr = 0; 615 nr = 0;
579 csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long); 616 csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long);
580 NEW_AUX_ENT(AT_HWCAP, hwcap); 617 NEW_AUX_ENT(AT_HWCAP, hwcap);
@@ -590,6 +627,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
590 NEW_AUX_ENT(AT_EUID, (elf_addr_t) current->euid); 627 NEW_AUX_ENT(AT_EUID, (elf_addr_t) current->euid);
591 NEW_AUX_ENT(AT_GID, (elf_addr_t) current->gid); 628 NEW_AUX_ENT(AT_GID, (elf_addr_t) current->gid);
592 NEW_AUX_ENT(AT_EGID, (elf_addr_t) current->egid); 629 NEW_AUX_ENT(AT_EGID, (elf_addr_t) current->egid);
630 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
631 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
593 632
594#ifdef ARCH_DLINFO 633#ifdef ARCH_DLINFO
595 nr = 0; 634 nr = 0;
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index f9c88d0c8ced..32fb00b52cd0 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -43,7 +43,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs)
43 return -ENOEXEC; 43 return -ENOEXEC;
44 } 44 }
45 45
46 bprm->sh_bang = 1; /* Well, the bang-shell is implicit... */ 46 bprm->recursion_depth++; /* Well, the bang-shell is implicit... */
47 allow_write_access(bprm->file); 47 allow_write_access(bprm->file);
48 fput(bprm->file); 48 fput(bprm->file);
49 bprm->file = NULL; 49 bprm->file = NULL;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index dfc0197905ca..ccb781a6a804 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -229,13 +229,13 @@ static int decompress_exec(
229 ret = 10; 229 ret = 10;
230 if (buf[3] & EXTRA_FIELD) { 230 if (buf[3] & EXTRA_FIELD) {
231 ret += 2 + buf[10] + (buf[11] << 8); 231 ret += 2 + buf[10] + (buf[11] << 8);
232 if (unlikely(LBUFSIZE == ret)) { 232 if (unlikely(LBUFSIZE <= ret)) {
233 DBG_FLT("binfmt_flat: buffer overflow (EXTRA)?\n"); 233 DBG_FLT("binfmt_flat: buffer overflow (EXTRA)?\n");
234 goto out_free_buf; 234 goto out_free_buf;
235 } 235 }
236 } 236 }
237 if (buf[3] & ORIG_NAME) { 237 if (buf[3] & ORIG_NAME) {
238 for (; ret < LBUFSIZE && (buf[ret] != 0); ret++) 238 while (ret < LBUFSIZE && buf[ret++] != 0)
239 ; 239 ;
240 if (unlikely(LBUFSIZE == ret)) { 240 if (unlikely(LBUFSIZE == ret)) {
241 DBG_FLT("binfmt_flat: buffer overflow (ORIG_NAME)?\n"); 241 DBG_FLT("binfmt_flat: buffer overflow (ORIG_NAME)?\n");
@@ -243,7 +243,7 @@ static int decompress_exec(
243 } 243 }
244 } 244 }
245 if (buf[3] & COMMENT) { 245 if (buf[3] & COMMENT) {
246 for (; ret < LBUFSIZE && (buf[ret] != 0); ret++) 246 while (ret < LBUFSIZE && buf[ret++] != 0)
247 ; 247 ;
248 if (unlikely(LBUFSIZE == ret)) { 248 if (unlikely(LBUFSIZE == ret)) {
249 DBG_FLT("binfmt_flat: buffer overflow (COMMENT)?\n"); 249 DBG_FLT("binfmt_flat: buffer overflow (COMMENT)?\n");
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 8d7e88e02e0f..f2744ab4e5b3 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -117,7 +117,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
117 goto _ret; 117 goto _ret;
118 118
119 retval = -ENOEXEC; 119 retval = -ENOEXEC;
120 if (bprm->misc_bang) 120 if (bprm->recursion_depth > BINPRM_MAX_RECURSION)
121 goto _ret; 121 goto _ret;
122 122
123 /* to keep locking time low, we copy the interpreter string */ 123 /* to keep locking time low, we copy the interpreter string */
@@ -197,7 +197,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
197 if (retval < 0) 197 if (retval < 0)
198 goto _error; 198 goto _error;
199 199
200 bprm->misc_bang = 1; 200 bprm->recursion_depth++;
201 201
202 retval = search_binary_handler (bprm, regs); 202 retval = search_binary_handler (bprm, regs);
203 if (retval < 0) 203 if (retval < 0)
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 9e3963f7ebf1..08343505e184 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -22,14 +22,15 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
22 char interp[BINPRM_BUF_SIZE]; 22 char interp[BINPRM_BUF_SIZE];
23 int retval; 23 int retval;
24 24
25 if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!') || (bprm->sh_bang)) 25 if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!') ||
26 (bprm->recursion_depth > BINPRM_MAX_RECURSION))
26 return -ENOEXEC; 27 return -ENOEXEC;
27 /* 28 /*
28 * This section does the #! interpretation. 29 * This section does the #! interpretation.
29 * Sorta complicated, but hopefully it will work. -TYT 30 * Sorta complicated, but hopefully it will work. -TYT
30 */ 31 */
31 32
32 bprm->sh_bang = 1; 33 bprm->recursion_depth++;
33 allow_write_access(bprm->file); 34 allow_write_access(bprm->file);
34 fput(bprm->file); 35 fput(bprm->file);
35 bprm->file = NULL; 36 bprm->file = NULL;
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index 68be580ba289..74e587a52796 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -306,3 +306,5 @@ static void __exit exit_som_binfmt(void)
306 306
307core_initcall(init_som_binfmt); 307core_initcall(init_som_binfmt);
308module_exit(exit_som_binfmt); 308module_exit(exit_som_binfmt);
309
310MODULE_LICENSE("GPL");
diff --git a/fs/block_dev.c b/fs/block_dev.c
index d84f0469a016..218408eed1bb 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1262,7 +1262,7 @@ EXPORT_SYMBOL(ioctl_by_bdev);
1262 1262
1263/** 1263/**
1264 * lookup_bdev - lookup a struct block_device by name 1264 * lookup_bdev - lookup a struct block_device by name
1265 * @pathname: special file representing the block device 1265 * @path: special file representing the block device
1266 * 1266 *
1267 * Get a reference to the blockdevice at @pathname in the current 1267 * Get a reference to the blockdevice at @pathname in the current
1268 * namespace if possible and return it. Return ERR_PTR(error) 1268 * namespace if possible and return it. Return ERR_PTR(error)
diff --git a/fs/buffer.c b/fs/buffer.c
index ac78d4c19b3b..6569fda5cfed 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -76,8 +76,7 @@ EXPORT_SYMBOL(__lock_buffer);
76 76
77void unlock_buffer(struct buffer_head *bh) 77void unlock_buffer(struct buffer_head *bh)
78{ 78{
79 smp_mb__before_clear_bit(); 79 clear_bit_unlock(BH_Lock, &bh->b_state);
80 clear_buffer_locked(bh);
81 smp_mb__after_clear_bit(); 80 smp_mb__after_clear_bit();
82 wake_up_bit(&bh->b_state, BH_Lock); 81 wake_up_bit(&bh->b_state, BH_Lock);
83} 82}
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 3cb7cda3d780..262fa10e213d 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -22,9 +22,6 @@
22#include <linux/mutex.h> 22#include <linux/mutex.h>
23#include <linux/backing-dev.h> 23#include <linux/backing-dev.h>
24 24
25#ifdef CONFIG_KMOD
26#include <linux/kmod.h>
27#endif
28#include "internal.h" 25#include "internal.h"
29 26
30/* 27/*
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
new file mode 100644
index 000000000000..341a98965bd0
--- /dev/null
+++ b/fs/cifs/Kconfig
@@ -0,0 +1,142 @@
1config CIFS
2 tristate "CIFS support (advanced network filesystem, SMBFS successor)"
3 depends on INET
4 select NLS
5 help
6 This is the client VFS module for the Common Internet File System
7 (CIFS) protocol which is the successor to the Server Message Block
8 (SMB) protocol, the native file sharing mechanism for most early
9 PC operating systems. The CIFS protocol is fully supported by
10 file servers such as Windows 2000 (including Windows 2003, NT 4
11 and Windows XP) as well by Samba (which provides excellent CIFS
12 server support for Linux and many other operating systems). Limited
13 support for OS/2 and Windows ME and similar servers is provided as
14 well.
15
16 The cifs module provides an advanced network file system
17 client for mounting to CIFS compliant servers. It includes
18 support for DFS (hierarchical name space), secure per-user
19 session establishment via Kerberos or NTLM or NTLMv2,
20 safe distributed caching (oplock), optional packet
21 signing, Unicode and other internationalization improvements.
22 If you need to mount to Samba or Windows from this machine, say Y.
23
24config CIFS_STATS
25 bool "CIFS statistics"
26 depends on CIFS
27 help
28 Enabling this option will cause statistics for each server share
29 mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
30
31config CIFS_STATS2
32 bool "Extended statistics"
33 depends on CIFS_STATS
34 help
35 Enabling this option will allow more detailed statistics on SMB
36 request timing to be displayed in /proc/fs/cifs/DebugData and also
37 allow optional logging of slow responses to dmesg (depending on the
38 value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details).
39 These additional statistics may have a minor effect on performance
40 and memory utilization.
41
42 Unless you are a developer or are doing network performance analysis
43 or tuning, say N.
44
45config CIFS_WEAK_PW_HASH
46 bool "Support legacy servers which use weaker LANMAN security"
47 depends on CIFS
48 help
49 Modern CIFS servers including Samba and most Windows versions
50 (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
51 security mechanisms. These hash the password more securely
52 than the mechanisms used in the older LANMAN version of the
53 SMB protocol but LANMAN based authentication is needed to
54 establish sessions with some old SMB servers.
55
56 Enabling this option allows the cifs module to mount to older
57 LANMAN based servers such as OS/2 and Windows 95, but such
58 mounts may be less secure than mounts using NTLM or more recent
59 security mechanisms if you are on a public network. Unless you
60 have a need to access old SMB servers (and are on a private
61 network) you probably want to say N. Even if this support
62 is enabled in the kernel build, LANMAN authentication will not be
63 used automatically. At runtime LANMAN mounts are disabled but
64 can be set to required (or optional) either in
65 /proc/fs/cifs (see fs/cifs/README for more detail) or via an
66 option on the mount command. This support is disabled by
67 default in order to reduce the possibility of a downgrade
68 attack.
69
70 If unsure, say N.
71
72config CIFS_UPCALL
73 bool "Kerberos/SPNEGO advanced session setup"
74 depends on CIFS && KEYS
75 help
76 Enables an upcall mechanism for CIFS which accesses
77 userspace helper utilities to provide SPNEGO packaged (RFC 4178)
78 Kerberos tickets which are needed to mount to certain secure servers
79 (for which more secure Kerberos authentication is required). If
80 unsure, say N.
81
82config CIFS_XATTR
83 bool "CIFS extended attributes"
84 depends on CIFS
85 help
86 Extended attributes are name:value pairs associated with inodes by
87 the kernel or by users (see the attr(5) manual page, or visit
88 <http://acl.bestbits.at/> for details). CIFS maps the name of
89 extended attributes beginning with the user namespace prefix
90 to SMB/CIFS EAs. EAs are stored on Windows servers without the
91 user namespace prefix, but their names are seen by Linux cifs clients
92 prefaced by the user namespace prefix. The system namespace
93 (used by some filesystems to store ACLs) is not supported at
94 this time.
95
96 If unsure, say N.
97
98config CIFS_POSIX
99 bool "CIFS POSIX Extensions"
100 depends on CIFS_XATTR
101 help
102 Enabling this option will cause the cifs client to attempt to
103 negotiate a newer dialect with servers, such as Samba 3.0.5
104 or later, that optionally can handle more POSIX like (rather
105 than Windows like) file behavior. It also enables
106 support for POSIX ACLs (getfacl and setfacl) to servers
107 (such as Samba 3.10 and later) which can negotiate
108 CIFS POSIX ACL support. If unsure, say N.
109
110config CIFS_DEBUG2
111 bool "Enable additional CIFS debugging routines"
112 depends on CIFS
113 help
114 Enabling this option adds a few more debugging routines
115 to the cifs code which slightly increases the size of
116 the cifs module and can cause additional logging of debug
117 messages in some error paths, slowing performance. This
118 option can be turned off unless you are debugging
119 cifs problems. If unsure, say N.
120
121config CIFS_EXPERIMENTAL
122 bool "CIFS Experimental Features (EXPERIMENTAL)"
123 depends on CIFS && EXPERIMENTAL
124 help
125 Enables cifs features under testing. These features are
126 experimental and currently include DFS support and directory
127 change notification ie fcntl(F_DNOTIFY), as well as the upcall
128 mechanism which will be used for Kerberos session negotiation
129 and uid remapping. Some of these features also may depend on
130 setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
131 (which is disabled by default). See the file fs/cifs/README
132 for more details. If unsure, say N.
133
134config CIFS_DFS_UPCALL
135 bool "DFS feature support (EXPERIMENTAL)"
136 depends on CIFS_EXPERIMENTAL
137 depends on KEYS
138 help
139 Enables an upcall mechanism for CIFS which contacts userspace
140 helper utilities to provide server name resolution (host names to
141 IP addresses) which is needed for implicit mounts of DFS junction
142 points. If unsure, say N.
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c4a8a0605125..62d8bd8f14c0 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1791,7 +1791,7 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
1791 SetPageUptodate(page); 1791 SetPageUptodate(page);
1792 unlock_page(page); 1792 unlock_page(page);
1793 if (!pagevec_add(plru_pvec, page)) 1793 if (!pagevec_add(plru_pvec, page))
1794 __pagevec_lru_add(plru_pvec); 1794 __pagevec_lru_add_file(plru_pvec);
1795 data += PAGE_CACHE_SIZE; 1795 data += PAGE_CACHE_SIZE;
1796 } 1796 }
1797 return; 1797 return;
@@ -1925,7 +1925,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1925 bytes_read = 0; 1925 bytes_read = 0;
1926 } 1926 }
1927 1927
1928 pagevec_lru_add(&lru_pvec); 1928 pagevec_lru_add_file(&lru_pvec);
1929 1929
1930/* need to free smb_read_data buf before exit */ 1930/* need to free smb_read_data buf before exit */
1931 if (smb_read_data) { 1931 if (smb_read_data) {
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 0d9b80ec689c..cfd29da714d1 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -362,9 +362,8 @@ static int init_coda_psdev(void)
362 goto out_chrdev; 362 goto out_chrdev;
363 } 363 }
364 for (i = 0; i < MAX_CODADEVS; i++) 364 for (i = 0; i < MAX_CODADEVS; i++)
365 device_create_drvdata(coda_psdev_class, NULL, 365 device_create(coda_psdev_class, NULL,
366 MKDEV(CODA_PSDEV_MAJOR, i), 366 MKDEV(CODA_PSDEV_MAJOR, i), NULL, "cfs%d", i);
367 NULL, "cfs%d", i);
368 coda_sysctl_init(); 367 coda_sysctl_init();
369 goto out; 368 goto out;
370 369
diff --git a/fs/compat.c b/fs/compat.c
index 075d0509970d..5f9ec449c799 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -137,6 +137,45 @@ asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval _
137 return compat_sys_futimesat(AT_FDCWD, filename, t); 137 return compat_sys_futimesat(AT_FDCWD, filename, t);
138} 138}
139 139
140static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
141{
142 compat_ino_t ino = stat->ino;
143 typeof(ubuf->st_uid) uid = 0;
144 typeof(ubuf->st_gid) gid = 0;
145 int err;
146
147 SET_UID(uid, stat->uid);
148 SET_GID(gid, stat->gid);
149
150 if ((u64) stat->size > MAX_NON_LFS ||
151 !old_valid_dev(stat->dev) ||
152 !old_valid_dev(stat->rdev))
153 return -EOVERFLOW;
154 if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino)
155 return -EOVERFLOW;
156
157 if (clear_user(ubuf, sizeof(*ubuf)))
158 return -EFAULT;
159
160 err = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev);
161 err |= __put_user(ino, &ubuf->st_ino);
162 err |= __put_user(stat->mode, &ubuf->st_mode);
163 err |= __put_user(stat->nlink, &ubuf->st_nlink);
164 err |= __put_user(uid, &ubuf->st_uid);
165 err |= __put_user(gid, &ubuf->st_gid);
166 err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev);
167 err |= __put_user(stat->size, &ubuf->st_size);
168 err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime);
169 err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec);
170 err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime);
171 err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec);
172 err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime);
173 err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec);
174 err |= __put_user(stat->blksize, &ubuf->st_blksize);
175 err |= __put_user(stat->blocks, &ubuf->st_blocks);
176 return err;
177}
178
140asmlinkage long compat_sys_newstat(char __user * filename, 179asmlinkage long compat_sys_newstat(char __user * filename,
141 struct compat_stat __user *statbuf) 180 struct compat_stat __user *statbuf)
142{ 181{
@@ -1239,7 +1278,7 @@ static int compat_count(compat_uptr_t __user *argv, int max)
1239 if (!p) 1278 if (!p)
1240 break; 1279 break;
1241 argv++; 1280 argv++;
1242 if(++i > max) 1281 if (i++ >= max)
1243 return -E2BIG; 1282 return -E2BIG;
1244 } 1283 }
1245 } 1284 }
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 9606ee848fd8..af0558dbe8b7 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -5,11 +5,11 @@
5 * 5 *
6 * O_DIRECT 6 * O_DIRECT
7 * 7 *
8 * 04Jul2002 akpm@zip.com.au 8 * 04Jul2002 Andrew Morton
9 * Initial version 9 * Initial version
10 * 11Sep2002 janetinc@us.ibm.com 10 * 11Sep2002 janetinc@us.ibm.com
11 * added readv/writev support. 11 * added readv/writev support.
12 * 29Oct2002 akpm@zip.com.au 12 * 29Oct2002 Andrew Morton
13 * rewrote bio_add_page() support. 13 * rewrote bio_add_page() support.
14 * 30Oct2002 pbadari@us.ibm.com 14 * 30Oct2002 pbadari@us.ibm.com
15 * added support for non-aligned IO. 15 * added support for non-aligned IO.
diff --git a/fs/dquot.c b/fs/dquot.c
index ad7e59003e04..da30a27f2242 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -9,8 +9,6 @@
9 * implementation is based on one of the several variants of the LINUX 9 * implementation is based on one of the several variants of the LINUX
10 * inode-subsystem with added complexity of the diskquota system. 10 * inode-subsystem with added complexity of the diskquota system.
11 * 11 *
12 * Version: $Id: dquot.c,v 6.3 1996/11/17 18:35:34 mvw Exp mvw $
13 *
14 * Author: Marco van Wieringen <mvw@planets.elm.net> 12 * Author: Marco van Wieringen <mvw@planets.elm.net>
15 * 13 *
16 * Fixes: Dmitry Gorodchanin <pgmdsg@ibi.com>, 11 Feb 96 14 * Fixes: Dmitry Gorodchanin <pgmdsg@ibi.com>, 11 Feb 96
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
index b4755a85996e..2cc9ee4ad2eb 100644
--- a/fs/ecryptfs/Makefile
+++ b/fs/ecryptfs/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o 5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
6 6
7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o kthread.o debug.o 7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o miscdev.o kthread.o debug.o
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index b73fb752c5f8..3504cf9df358 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -79,11 +79,6 @@
79#define ECRYPTFS_MAX_PKI_NAME_BYTES 16 79#define ECRYPTFS_MAX_PKI_NAME_BYTES 16
80#define ECRYPTFS_DEFAULT_NUM_USERS 4 80#define ECRYPTFS_DEFAULT_NUM_USERS 4
81#define ECRYPTFS_MAX_NUM_USERS 32768 81#define ECRYPTFS_MAX_NUM_USERS 32768
82#define ECRYPTFS_TRANSPORT_NETLINK 0
83#define ECRYPTFS_TRANSPORT_CONNECTOR 1
84#define ECRYPTFS_TRANSPORT_RELAYFS 2
85#define ECRYPTFS_TRANSPORT_MISCDEV 3
86#define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_MISCDEV
87#define ECRYPTFS_XATTR_NAME "user.ecryptfs" 82#define ECRYPTFS_XATTR_NAME "user.ecryptfs"
88 83
89#define RFC2440_CIPHER_DES3_EDE 0x02 84#define RFC2440_CIPHER_DES3_EDE 0x02
@@ -400,8 +395,6 @@ struct ecryptfs_msg_ctx {
400 struct mutex mux; 395 struct mutex mux;
401}; 396};
402 397
403extern unsigned int ecryptfs_transport;
404
405struct ecryptfs_daemon; 398struct ecryptfs_daemon;
406 399
407struct ecryptfs_daemon { 400struct ecryptfs_daemon {
@@ -627,31 +620,20 @@ int
627ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, 620ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
628 size_t size, int flags); 621 size_t size, int flags);
629int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); 622int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode);
630int ecryptfs_process_helo(unsigned int transport, uid_t euid, 623int ecryptfs_process_helo(uid_t euid, struct user_namespace *user_ns,
631 struct user_namespace *user_ns, struct pid *pid); 624 struct pid *pid);
632int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns, 625int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns,
633 struct pid *pid); 626 struct pid *pid);
634int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, 627int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
635 struct user_namespace *user_ns, struct pid *pid, 628 struct user_namespace *user_ns, struct pid *pid,
636 u32 seq); 629 u32 seq);
637int ecryptfs_send_message(unsigned int transport, char *data, int data_len, 630int ecryptfs_send_message(char *data, int data_len,
638 struct ecryptfs_msg_ctx **msg_ctx); 631 struct ecryptfs_msg_ctx **msg_ctx);
639int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, 632int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx,
640 struct ecryptfs_message **emsg); 633 struct ecryptfs_message **emsg);
641int ecryptfs_init_messaging(unsigned int transport); 634int ecryptfs_init_messaging(void);
642void ecryptfs_release_messaging(unsigned int transport); 635void ecryptfs_release_messaging(void);
643 636
644int ecryptfs_send_netlink(char *data, int data_len,
645 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
646 u16 msg_flags, struct pid *daemon_pid);
647int ecryptfs_init_netlink(void);
648void ecryptfs_release_netlink(void);
649
650int ecryptfs_send_connector(char *data, int data_len,
651 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
652 u16 msg_flags, struct pid *daemon_pid);
653int ecryptfs_init_connector(void);
654void ecryptfs_release_connector(void);
655void 637void
656ecryptfs_write_header_metadata(char *virt, 638ecryptfs_write_header_metadata(char *virt,
657 struct ecryptfs_crypt_stat *crypt_stat, 639 struct ecryptfs_crypt_stat *crypt_stat,
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 9244d653743e..eb3dc4c7ac06 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -71,12 +71,11 @@ struct ecryptfs_getdents_callback {
71 void *dirent; 71 void *dirent;
72 struct dentry *dentry; 72 struct dentry *dentry;
73 filldir_t filldir; 73 filldir_t filldir;
74 int err;
75 int filldir_called; 74 int filldir_called;
76 int entries_written; 75 int entries_written;
77}; 76};
78 77
79/* Inspired by generic filldir in fs/readir.c */ 78/* Inspired by generic filldir in fs/readdir.c */
80static int 79static int
81ecryptfs_filldir(void *dirent, const char *name, int namelen, loff_t offset, 80ecryptfs_filldir(void *dirent, const char *name, int namelen, loff_t offset,
82 u64 ino, unsigned int d_type) 81 u64 ino, unsigned int d_type)
@@ -125,18 +124,18 @@ static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir)
125 buf.dirent = dirent; 124 buf.dirent = dirent;
126 buf.dentry = file->f_path.dentry; 125 buf.dentry = file->f_path.dentry;
127 buf.filldir = filldir; 126 buf.filldir = filldir;
128retry:
129 buf.filldir_called = 0; 127 buf.filldir_called = 0;
130 buf.entries_written = 0; 128 buf.entries_written = 0;
131 buf.err = 0;
132 rc = vfs_readdir(lower_file, ecryptfs_filldir, (void *)&buf); 129 rc = vfs_readdir(lower_file, ecryptfs_filldir, (void *)&buf);
133 if (buf.err)
134 rc = buf.err;
135 if (buf.filldir_called && !buf.entries_written)
136 goto retry;
137 file->f_pos = lower_file->f_pos; 130 file->f_pos = lower_file->f_pos;
131 if (rc < 0)
132 goto out;
133 if (buf.filldir_called && !buf.entries_written)
134 goto out;
138 if (rc >= 0) 135 if (rc >= 0)
139 fsstack_copy_attr_atime(inode, lower_file->f_path.dentry->d_inode); 136 fsstack_copy_attr_atime(inode,
137 lower_file->f_path.dentry->d_inode);
138out:
140 return rc; 139 return rc;
141} 140}
142 141
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index f5b76a331b9c..e22bc3961345 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -234,8 +234,8 @@ parse_tag_65_packet(struct ecryptfs_session_key *session_key, u8 *cipher_code,
234 } 234 }
235 i += data_len; 235 i += data_len;
236 if (message_len < (i + m_size)) { 236 if (message_len < (i + m_size)) {
237 ecryptfs_printk(KERN_ERR, "The received netlink message is " 237 ecryptfs_printk(KERN_ERR, "The message received from ecryptfsd "
238 "shorter than expected\n"); 238 "is shorter than expected\n");
239 rc = -EIO; 239 rc = -EIO;
240 goto out; 240 goto out;
241 } 241 }
@@ -438,8 +438,8 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
438 struct ecryptfs_msg_ctx *msg_ctx; 438 struct ecryptfs_msg_ctx *msg_ctx;
439 struct ecryptfs_message *msg = NULL; 439 struct ecryptfs_message *msg = NULL;
440 char *auth_tok_sig; 440 char *auth_tok_sig;
441 char *netlink_message; 441 char *payload;
442 size_t netlink_message_length; 442 size_t payload_len;
443 int rc; 443 int rc;
444 444
445 rc = ecryptfs_get_auth_tok_sig(&auth_tok_sig, auth_tok); 445 rc = ecryptfs_get_auth_tok_sig(&auth_tok_sig, auth_tok);
@@ -449,15 +449,15 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
449 goto out; 449 goto out;
450 } 450 }
451 rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key), 451 rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key),
452 &netlink_message, &netlink_message_length); 452 &payload, &payload_len);
453 if (rc) { 453 if (rc) {
454 ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet\n"); 454 ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet\n");
455 goto out; 455 goto out;
456 } 456 }
457 rc = ecryptfs_send_message(ecryptfs_transport, netlink_message, 457 rc = ecryptfs_send_message(payload, payload_len, &msg_ctx);
458 netlink_message_length, &msg_ctx);
459 if (rc) { 458 if (rc) {
460 ecryptfs_printk(KERN_ERR, "Error sending netlink message\n"); 459 ecryptfs_printk(KERN_ERR, "Error sending message to "
460 "ecryptfsd\n");
461 goto out; 461 goto out;
462 } 462 }
463 rc = ecryptfs_wait_for_response(msg_ctx, &msg); 463 rc = ecryptfs_wait_for_response(msg_ctx, &msg);
@@ -1333,23 +1333,22 @@ pki_encrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
1333 struct ecryptfs_key_record *key_rec) 1333 struct ecryptfs_key_record *key_rec)
1334{ 1334{
1335 struct ecryptfs_msg_ctx *msg_ctx = NULL; 1335 struct ecryptfs_msg_ctx *msg_ctx = NULL;
1336 char *netlink_payload; 1336 char *payload = NULL;
1337 size_t netlink_payload_length; 1337 size_t payload_len;
1338 struct ecryptfs_message *msg; 1338 struct ecryptfs_message *msg;
1339 int rc; 1339 int rc;
1340 1340
1341 rc = write_tag_66_packet(auth_tok->token.private_key.signature, 1341 rc = write_tag_66_packet(auth_tok->token.private_key.signature,
1342 ecryptfs_code_for_cipher_string(crypt_stat), 1342 ecryptfs_code_for_cipher_string(crypt_stat),
1343 crypt_stat, &netlink_payload, 1343 crypt_stat, &payload, &payload_len);
1344 &netlink_payload_length);
1345 if (rc) { 1344 if (rc) {
1346 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet\n"); 1345 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet\n");
1347 goto out; 1346 goto out;
1348 } 1347 }
1349 rc = ecryptfs_send_message(ecryptfs_transport, netlink_payload, 1348 rc = ecryptfs_send_message(payload, payload_len, &msg_ctx);
1350 netlink_payload_length, &msg_ctx);
1351 if (rc) { 1349 if (rc) {
1352 ecryptfs_printk(KERN_ERR, "Error sending netlink message\n"); 1350 ecryptfs_printk(KERN_ERR, "Error sending message to "
1351 "ecryptfsd\n");
1353 goto out; 1352 goto out;
1354 } 1353 }
1355 rc = ecryptfs_wait_for_response(msg_ctx, &msg); 1354 rc = ecryptfs_wait_for_response(msg_ctx, &msg);
@@ -1364,8 +1363,7 @@ pki_encrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
1364 ecryptfs_printk(KERN_ERR, "Error parsing tag 67 packet\n"); 1363 ecryptfs_printk(KERN_ERR, "Error parsing tag 67 packet\n");
1365 kfree(msg); 1364 kfree(msg);
1366out: 1365out:
1367 if (netlink_payload) 1366 kfree(payload);
1368 kfree(netlink_payload);
1369 return rc; 1367 return rc;
1370} 1368}
1371/** 1369/**
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 8ebe9a5d1d99..046e027a4cb1 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -30,7 +30,6 @@
30#include <linux/namei.h> 30#include <linux/namei.h>
31#include <linux/skbuff.h> 31#include <linux/skbuff.h>
32#include <linux/crypto.h> 32#include <linux/crypto.h>
33#include <linux/netlink.h>
34#include <linux/mount.h> 33#include <linux/mount.h>
35#include <linux/pagemap.h> 34#include <linux/pagemap.h>
36#include <linux/key.h> 35#include <linux/key.h>
@@ -49,8 +48,7 @@ MODULE_PARM_DESC(ecryptfs_verbosity,
49 "0, which is Quiet)"); 48 "0, which is Quiet)");
50 49
51/** 50/**
52 * Module parameter that defines the number of netlink message buffer 51 * Module parameter that defines the number of message buffer elements
53 * elements
54 */ 52 */
55unsigned int ecryptfs_message_buf_len = ECRYPTFS_DEFAULT_MSG_CTX_ELEMS; 53unsigned int ecryptfs_message_buf_len = ECRYPTFS_DEFAULT_MSG_CTX_ELEMS;
56 54
@@ -60,9 +58,9 @@ MODULE_PARM_DESC(ecryptfs_message_buf_len,
60 58
61/** 59/**
62 * Module parameter that defines the maximum guaranteed amount of time to wait 60 * Module parameter that defines the maximum guaranteed amount of time to wait
63 * for a response through netlink. The actual sleep time will be, more than 61 * for a response from ecryptfsd. The actual sleep time will be, more than
64 * likely, a small amount greater than this specified value, but only less if 62 * likely, a small amount greater than this specified value, but only less if
65 * the netlink message successfully arrives. 63 * the message successfully arrives.
66 */ 64 */
67signed long ecryptfs_message_wait_timeout = ECRYPTFS_MAX_MSG_CTX_TTL / HZ; 65signed long ecryptfs_message_wait_timeout = ECRYPTFS_MAX_MSG_CTX_TTL / HZ;
68 66
@@ -83,8 +81,6 @@ module_param(ecryptfs_number_of_users, uint, 0);
83MODULE_PARM_DESC(ecryptfs_number_of_users, "An estimate of the number of " 81MODULE_PARM_DESC(ecryptfs_number_of_users, "An estimate of the number of "
84 "concurrent users of eCryptfs"); 82 "concurrent users of eCryptfs");
85 83
86unsigned int ecryptfs_transport = ECRYPTFS_DEFAULT_TRANSPORT;
87
88void __ecryptfs_printk(const char *fmt, ...) 84void __ecryptfs_printk(const char *fmt, ...)
89{ 85{
90 va_list args; 86 va_list args;
@@ -779,10 +775,11 @@ static int __init ecryptfs_init(void)
779 "rc = [%d]\n", __func__, rc); 775 "rc = [%d]\n", __func__, rc);
780 goto out_do_sysfs_unregistration; 776 goto out_do_sysfs_unregistration;
781 } 777 }
782 rc = ecryptfs_init_messaging(ecryptfs_transport); 778 rc = ecryptfs_init_messaging();
783 if (rc) { 779 if (rc) {
784 printk(KERN_ERR "Failure occured while attempting to " 780 printk(KERN_ERR "Failure occured while attempting to "
785 "initialize the eCryptfs netlink socket\n"); 781 "initialize the communications channel to "
782 "ecryptfsd\n");
786 goto out_destroy_kthread; 783 goto out_destroy_kthread;
787 } 784 }
788 rc = ecryptfs_init_crypto(); 785 rc = ecryptfs_init_crypto();
@@ -797,7 +794,7 @@ static int __init ecryptfs_init(void)
797 794
798 goto out; 795 goto out;
799out_release_messaging: 796out_release_messaging:
800 ecryptfs_release_messaging(ecryptfs_transport); 797 ecryptfs_release_messaging();
801out_destroy_kthread: 798out_destroy_kthread:
802 ecryptfs_destroy_kthread(); 799 ecryptfs_destroy_kthread();
803out_do_sysfs_unregistration: 800out_do_sysfs_unregistration:
@@ -818,7 +815,7 @@ static void __exit ecryptfs_exit(void)
818 if (rc) 815 if (rc)
819 printk(KERN_ERR "Failure whilst attempting to destroy crypto; " 816 printk(KERN_ERR "Failure whilst attempting to destroy crypto; "
820 "rc = [%d]\n", rc); 817 "rc = [%d]\n", rc);
821 ecryptfs_release_messaging(ecryptfs_transport); 818 ecryptfs_release_messaging();
822 ecryptfs_destroy_kthread(); 819 ecryptfs_destroy_kthread();
823 do_sysfs_unregistration(); 820 do_sysfs_unregistration();
824 unregister_filesystem(&ecryptfs_fs_type); 821 unregister_filesystem(&ecryptfs_fs_type);
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 1b5c20058acb..c6983978a31e 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -134,12 +134,11 @@ out:
134} 134}
135 135
136static int 136static int
137ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len, 137ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type,
138 u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx); 138 struct ecryptfs_msg_ctx **msg_ctx);
139 139
140/** 140/**
141 * ecryptfs_send_raw_message 141 * ecryptfs_send_raw_message
142 * @transport: Transport type
143 * @msg_type: Message type 142 * @msg_type: Message type
144 * @daemon: Daemon struct for recipient of message 143 * @daemon: Daemon struct for recipient of message
145 * 144 *
@@ -150,38 +149,25 @@ ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len,
150 * 149 *
151 * Returns zero on success; non-zero otherwise 150 * Returns zero on success; non-zero otherwise
152 */ 151 */
153static int ecryptfs_send_raw_message(unsigned int transport, u8 msg_type, 152static int ecryptfs_send_raw_message(u8 msg_type,
154 struct ecryptfs_daemon *daemon) 153 struct ecryptfs_daemon *daemon)
155{ 154{
156 struct ecryptfs_msg_ctx *msg_ctx; 155 struct ecryptfs_msg_ctx *msg_ctx;
157 int rc; 156 int rc;
158 157
159 switch(transport) { 158 rc = ecryptfs_send_message_locked(NULL, 0, msg_type, &msg_ctx);
160 case ECRYPTFS_TRANSPORT_NETLINK: 159 if (rc) {
161 rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0, 160 printk(KERN_ERR "%s: Error whilst attempting to send "
162 daemon->pid); 161 "message to ecryptfsd; rc = [%d]\n", __func__, rc);
163 break; 162 goto out;
164 case ECRYPTFS_TRANSPORT_MISCDEV:
165 rc = ecryptfs_send_message_locked(transport, NULL, 0, msg_type,
166 &msg_ctx);
167 if (rc) {
168 printk(KERN_ERR "%s: Error whilst attempting to send "
169 "message via procfs; rc = [%d]\n", __func__, rc);
170 goto out;
171 }
172 /* Raw messages are logically context-free (e.g., no
173 * reply is expected), so we set the state of the
174 * ecryptfs_msg_ctx object to indicate that it should
175 * be freed as soon as the transport sends out the message. */
176 mutex_lock(&msg_ctx->mux);
177 msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY;
178 mutex_unlock(&msg_ctx->mux);
179 break;
180 case ECRYPTFS_TRANSPORT_CONNECTOR:
181 case ECRYPTFS_TRANSPORT_RELAYFS:
182 default:
183 rc = -ENOSYS;
184 } 163 }
164 /* Raw messages are logically context-free (e.g., no
165 * reply is expected), so we set the state of the
166 * ecryptfs_msg_ctx object to indicate that it should
167 * be freed as soon as the message is sent. */
168 mutex_lock(&msg_ctx->mux);
169 msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY;
170 mutex_unlock(&msg_ctx->mux);
185out: 171out:
186 return rc; 172 return rc;
187} 173}
@@ -227,7 +213,6 @@ out:
227 213
228/** 214/**
229 * ecryptfs_process_helo 215 * ecryptfs_process_helo
230 * @transport: The underlying transport (netlink, etc.)
231 * @euid: The user ID owner of the message 216 * @euid: The user ID owner of the message
232 * @user_ns: The namespace in which @euid applies 217 * @user_ns: The namespace in which @euid applies
233 * @pid: The process ID for the userspace program that sent the 218 * @pid: The process ID for the userspace program that sent the
@@ -239,8 +224,8 @@ out:
239 * Returns zero after adding a new daemon to the hash list; 224 * Returns zero after adding a new daemon to the hash list;
240 * non-zero otherwise. 225 * non-zero otherwise.
241 */ 226 */
242int ecryptfs_process_helo(unsigned int transport, uid_t euid, 227int ecryptfs_process_helo(uid_t euid, struct user_namespace *user_ns,
243 struct user_namespace *user_ns, struct pid *pid) 228 struct pid *pid)
244{ 229{
245 struct ecryptfs_daemon *new_daemon; 230 struct ecryptfs_daemon *new_daemon;
246 struct ecryptfs_daemon *old_daemon; 231 struct ecryptfs_daemon *old_daemon;
@@ -252,8 +237,7 @@ int ecryptfs_process_helo(unsigned int transport, uid_t euid,
252 printk(KERN_WARNING "Received request from user [%d] " 237 printk(KERN_WARNING "Received request from user [%d] "
253 "to register daemon [0x%p]; unregistering daemon " 238 "to register daemon [0x%p]; unregistering daemon "
254 "[0x%p]\n", euid, pid, old_daemon->pid); 239 "[0x%p]\n", euid, pid, old_daemon->pid);
255 rc = ecryptfs_send_raw_message(transport, ECRYPTFS_MSG_QUIT, 240 rc = ecryptfs_send_raw_message(ECRYPTFS_MSG_QUIT, old_daemon);
256 old_daemon);
257 if (rc) 241 if (rc)
258 printk(KERN_WARNING "Failed to send QUIT " 242 printk(KERN_WARNING "Failed to send QUIT "
259 "message to daemon [0x%p]; rc = [%d]\n", 243 "message to daemon [0x%p]; rc = [%d]\n",
@@ -467,8 +451,6 @@ out:
467 451
468/** 452/**
469 * ecryptfs_send_message_locked 453 * ecryptfs_send_message_locked
470 * @transport: The transport over which to send the message (i.e.,
471 * netlink)
472 * @data: The data to send 454 * @data: The data to send
473 * @data_len: The length of data 455 * @data_len: The length of data
474 * @msg_ctx: The message context allocated for the send 456 * @msg_ctx: The message context allocated for the send
@@ -478,8 +460,8 @@ out:
478 * Returns zero on success; non-zero otherwise 460 * Returns zero on success; non-zero otherwise
479 */ 461 */
480static int 462static int
481ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len, 463ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type,
482 u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx) 464 struct ecryptfs_msg_ctx **msg_ctx)
483{ 465{
484 struct ecryptfs_daemon *daemon; 466 struct ecryptfs_daemon *daemon;
485 int rc; 467 int rc;
@@ -503,20 +485,8 @@ ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len,
503 ecryptfs_msg_ctx_free_to_alloc(*msg_ctx); 485 ecryptfs_msg_ctx_free_to_alloc(*msg_ctx);
504 mutex_unlock(&(*msg_ctx)->mux); 486 mutex_unlock(&(*msg_ctx)->mux);
505 mutex_unlock(&ecryptfs_msg_ctx_lists_mux); 487 mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
506 switch (transport) { 488 rc = ecryptfs_send_miscdev(data, data_len, *msg_ctx, msg_type, 0,
507 case ECRYPTFS_TRANSPORT_NETLINK: 489 daemon);
508 rc = ecryptfs_send_netlink(data, data_len, *msg_ctx, msg_type,
509 0, daemon->pid);
510 break;
511 case ECRYPTFS_TRANSPORT_MISCDEV:
512 rc = ecryptfs_send_miscdev(data, data_len, *msg_ctx, msg_type,
513 0, daemon);
514 break;
515 case ECRYPTFS_TRANSPORT_CONNECTOR:
516 case ECRYPTFS_TRANSPORT_RELAYFS:
517 default:
518 rc = -ENOSYS;
519 }
520 if (rc) 490 if (rc)
521 printk(KERN_ERR "%s: Error attempting to send message to " 491 printk(KERN_ERR "%s: Error attempting to send message to "
522 "userspace daemon; rc = [%d]\n", __func__, rc); 492 "userspace daemon; rc = [%d]\n", __func__, rc);
@@ -526,8 +496,6 @@ out:
526 496
527/** 497/**
528 * ecryptfs_send_message 498 * ecryptfs_send_message
529 * @transport: The transport over which to send the message (i.e.,
530 * netlink)
531 * @data: The data to send 499 * @data: The data to send
532 * @data_len: The length of data 500 * @data_len: The length of data
533 * @msg_ctx: The message context allocated for the send 501 * @msg_ctx: The message context allocated for the send
@@ -536,14 +504,14 @@ out:
536 * 504 *
537 * Returns zero on success; non-zero otherwise 505 * Returns zero on success; non-zero otherwise
538 */ 506 */
539int ecryptfs_send_message(unsigned int transport, char *data, int data_len, 507int ecryptfs_send_message(char *data, int data_len,
540 struct ecryptfs_msg_ctx **msg_ctx) 508 struct ecryptfs_msg_ctx **msg_ctx)
541{ 509{
542 int rc; 510 int rc;
543 511
544 mutex_lock(&ecryptfs_daemon_hash_mux); 512 mutex_lock(&ecryptfs_daemon_hash_mux);
545 rc = ecryptfs_send_message_locked(transport, data, data_len, 513 rc = ecryptfs_send_message_locked(data, data_len, ECRYPTFS_MSG_REQUEST,
546 ECRYPTFS_MSG_REQUEST, msg_ctx); 514 msg_ctx);
547 mutex_unlock(&ecryptfs_daemon_hash_mux); 515 mutex_unlock(&ecryptfs_daemon_hash_mux);
548 return rc; 516 return rc;
549} 517}
@@ -586,7 +554,7 @@ sleep:
586 return rc; 554 return rc;
587} 555}
588 556
589int ecryptfs_init_messaging(unsigned int transport) 557int ecryptfs_init_messaging(void)
590{ 558{
591 int i; 559 int i;
592 int rc = 0; 560 int rc = 0;
@@ -639,27 +607,14 @@ int ecryptfs_init_messaging(unsigned int transport)
639 mutex_unlock(&ecryptfs_msg_ctx_arr[i].mux); 607 mutex_unlock(&ecryptfs_msg_ctx_arr[i].mux);
640 } 608 }
641 mutex_unlock(&ecryptfs_msg_ctx_lists_mux); 609 mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
642 switch(transport) { 610 rc = ecryptfs_init_ecryptfs_miscdev();
643 case ECRYPTFS_TRANSPORT_NETLINK: 611 if (rc)
644 rc = ecryptfs_init_netlink(); 612 ecryptfs_release_messaging();
645 if (rc)
646 ecryptfs_release_messaging(transport);
647 break;
648 case ECRYPTFS_TRANSPORT_MISCDEV:
649 rc = ecryptfs_init_ecryptfs_miscdev();
650 if (rc)
651 ecryptfs_release_messaging(transport);
652 break;
653 case ECRYPTFS_TRANSPORT_CONNECTOR:
654 case ECRYPTFS_TRANSPORT_RELAYFS:
655 default:
656 rc = -ENOSYS;
657 }
658out: 613out:
659 return rc; 614 return rc;
660} 615}
661 616
662void ecryptfs_release_messaging(unsigned int transport) 617void ecryptfs_release_messaging(void)
663{ 618{
664 if (ecryptfs_msg_ctx_arr) { 619 if (ecryptfs_msg_ctx_arr) {
665 int i; 620 int i;
@@ -698,17 +653,6 @@ void ecryptfs_release_messaging(unsigned int transport)
698 kfree(ecryptfs_daemon_hash); 653 kfree(ecryptfs_daemon_hash);
699 mutex_unlock(&ecryptfs_daemon_hash_mux); 654 mutex_unlock(&ecryptfs_daemon_hash_mux);
700 } 655 }
701 switch(transport) { 656 ecryptfs_destroy_ecryptfs_miscdev();
702 case ECRYPTFS_TRANSPORT_NETLINK:
703 ecryptfs_release_netlink();
704 break;
705 case ECRYPTFS_TRANSPORT_MISCDEV:
706 ecryptfs_destroy_ecryptfs_miscdev();
707 break;
708 case ECRYPTFS_TRANSPORT_CONNECTOR:
709 case ECRYPTFS_TRANSPORT_RELAYFS:
710 default:
711 break;
712 }
713 return; 657 return;
714} 658}
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 245c2dc02d5c..04d7b3fa1ac6 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -265,22 +265,34 @@ out:
265} 265}
266 266
267/** 267/**
268 * ecryptfs_prepare_write 268 * ecryptfs_write_begin
269 * @file: The eCryptfs file 269 * @file: The eCryptfs file
270 * @page: The eCryptfs page 270 * @mapping: The eCryptfs object
271 * @from: The start byte from which we will write 271 * @pos: The file offset at which to start writing
272 * @to: The end byte to which we will write 272 * @len: Length of the write
273 * @flags: Various flags
274 * @pagep: Pointer to return the page
275 * @fsdata: Pointer to return fs data (unused)
273 * 276 *
274 * This function must zero any hole we create 277 * This function must zero any hole we create
275 * 278 *
276 * Returns zero on success; non-zero otherwise 279 * Returns zero on success; non-zero otherwise
277 */ 280 */
278static int ecryptfs_prepare_write(struct file *file, struct page *page, 281static int ecryptfs_write_begin(struct file *file,
279 unsigned from, unsigned to) 282 struct address_space *mapping,
283 loff_t pos, unsigned len, unsigned flags,
284 struct page **pagep, void **fsdata)
280{ 285{
286 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
287 struct page *page;
281 loff_t prev_page_end_size; 288 loff_t prev_page_end_size;
282 int rc = 0; 289 int rc = 0;
283 290
291 page = __grab_cache_page(mapping, index);
292 if (!page)
293 return -ENOMEM;
294 *pagep = page;
295
284 if (!PageUptodate(page)) { 296 if (!PageUptodate(page)) {
285 struct ecryptfs_crypt_stat *crypt_stat = 297 struct ecryptfs_crypt_stat *crypt_stat =
286 &ecryptfs_inode_to_private( 298 &ecryptfs_inode_to_private(
@@ -289,8 +301,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
289 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED) 301 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)
290 || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) { 302 || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) {
291 rc = ecryptfs_read_lower_page_segment( 303 rc = ecryptfs_read_lower_page_segment(
292 page, page->index, 0, PAGE_CACHE_SIZE, 304 page, index, 0, PAGE_CACHE_SIZE, mapping->host);
293 page->mapping->host);
294 if (rc) { 305 if (rc) {
295 printk(KERN_ERR "%s: Error attemping to read " 306 printk(KERN_ERR "%s: Error attemping to read "
296 "lower page segment; rc = [%d]\n", 307 "lower page segment; rc = [%d]\n",
@@ -316,8 +327,8 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
316 SetPageUptodate(page); 327 SetPageUptodate(page);
317 } else { 328 } else {
318 rc = ecryptfs_read_lower_page_segment( 329 rc = ecryptfs_read_lower_page_segment(
319 page, page->index, 0, PAGE_CACHE_SIZE, 330 page, index, 0, PAGE_CACHE_SIZE,
320 page->mapping->host); 331 mapping->host);
321 if (rc) { 332 if (rc) {
322 printk(KERN_ERR "%s: Error reading " 333 printk(KERN_ERR "%s: Error reading "
323 "page; rc = [%d]\n", 334 "page; rc = [%d]\n",
@@ -339,10 +350,10 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
339 SetPageUptodate(page); 350 SetPageUptodate(page);
340 } 351 }
341 } 352 }
342 prev_page_end_size = ((loff_t)page->index << PAGE_CACHE_SHIFT); 353 prev_page_end_size = ((loff_t)index << PAGE_CACHE_SHIFT);
343 /* If creating a page or more of holes, zero them out via truncate. 354 /* If creating a page or more of holes, zero them out via truncate.
344 * Note, this will increase i_size. */ 355 * Note, this will increase i_size. */
345 if (page->index != 0) { 356 if (index != 0) {
346 if (prev_page_end_size > i_size_read(page->mapping->host)) { 357 if (prev_page_end_size > i_size_read(page->mapping->host)) {
347 rc = ecryptfs_truncate(file->f_path.dentry, 358 rc = ecryptfs_truncate(file->f_path.dentry,
348 prev_page_end_size); 359 prev_page_end_size);
@@ -357,8 +368,8 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
357 } 368 }
358 /* Writing to a new page, and creating a small hole from start 369 /* Writing to a new page, and creating a small hole from start
359 * of page? Zero it out. */ 370 * of page? Zero it out. */
360 if ((i_size_read(page->mapping->host) == prev_page_end_size) 371 if ((i_size_read(mapping->host) == prev_page_end_size)
361 && (from != 0)) 372 && (pos != 0))
362 zero_user(page, 0, PAGE_CACHE_SIZE); 373 zero_user(page, 0, PAGE_CACHE_SIZE);
363out: 374out:
364 return rc; 375 return rc;
@@ -445,21 +456,28 @@ int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode)
445} 456}
446 457
447/** 458/**
448 * ecryptfs_commit_write 459 * ecryptfs_write_end
449 * @file: The eCryptfs file object 460 * @file: The eCryptfs file object
461 * @mapping: The eCryptfs object
462 * @pos: The file position
463 * @len: The length of the data (unused)
464 * @copied: The amount of data copied
450 * @page: The eCryptfs page 465 * @page: The eCryptfs page
451 * @from: Ignored (we rotate the page IV on each write) 466 * @fsdata: The fsdata (unused)
452 * @to: Ignored
453 * 467 *
454 * This is where we encrypt the data and pass the encrypted data to 468 * This is where we encrypt the data and pass the encrypted data to
455 * the lower filesystem. In OpenPGP-compatible mode, we operate on 469 * the lower filesystem. In OpenPGP-compatible mode, we operate on
456 * entire underlying packets. 470 * entire underlying packets.
457 */ 471 */
458static int ecryptfs_commit_write(struct file *file, struct page *page, 472static int ecryptfs_write_end(struct file *file,
459 unsigned from, unsigned to) 473 struct address_space *mapping,
474 loff_t pos, unsigned len, unsigned copied,
475 struct page *page, void *fsdata)
460{ 476{
461 loff_t pos; 477 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
462 struct inode *ecryptfs_inode = page->mapping->host; 478 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
479 unsigned to = from + copied;
480 struct inode *ecryptfs_inode = mapping->host;
463 struct ecryptfs_crypt_stat *crypt_stat = 481 struct ecryptfs_crypt_stat *crypt_stat =
464 &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)->crypt_stat; 482 &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)->crypt_stat;
465 int rc; 483 int rc;
@@ -471,25 +489,22 @@ static int ecryptfs_commit_write(struct file *file, struct page *page,
471 } else 489 } else
472 ecryptfs_printk(KERN_DEBUG, "Not a new file\n"); 490 ecryptfs_printk(KERN_DEBUG, "Not a new file\n");
473 ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" 491 ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
474 "(page w/ index = [0x%.16x], to = [%d])\n", page->index, 492 "(page w/ index = [0x%.16x], to = [%d])\n", index, to);
475 to);
476 /* Fills in zeros if 'to' goes beyond inode size */ 493 /* Fills in zeros if 'to' goes beyond inode size */
477 rc = fill_zeros_to_end_of_page(page, to); 494 rc = fill_zeros_to_end_of_page(page, to);
478 if (rc) { 495 if (rc) {
479 ecryptfs_printk(KERN_WARNING, "Error attempting to fill " 496 ecryptfs_printk(KERN_WARNING, "Error attempting to fill "
480 "zeros in page with index = [0x%.16x]\n", 497 "zeros in page with index = [0x%.16x]\n", index);
481 page->index);
482 goto out; 498 goto out;
483 } 499 }
484 rc = ecryptfs_encrypt_page(page); 500 rc = ecryptfs_encrypt_page(page);
485 if (rc) { 501 if (rc) {
486 ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper " 502 ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper "
487 "index [0x%.16x])\n", page->index); 503 "index [0x%.16x])\n", index);
488 goto out; 504 goto out;
489 } 505 }
490 pos = (((loff_t)page->index) << PAGE_CACHE_SHIFT) + to; 506 if (pos + copied > i_size_read(ecryptfs_inode)) {
491 if (pos > i_size_read(ecryptfs_inode)) { 507 i_size_write(ecryptfs_inode, pos + copied);
492 i_size_write(ecryptfs_inode, pos);
493 ecryptfs_printk(KERN_DEBUG, "Expanded file size to " 508 ecryptfs_printk(KERN_DEBUG, "Expanded file size to "
494 "[0x%.16x]\n", i_size_read(ecryptfs_inode)); 509 "[0x%.16x]\n", i_size_read(ecryptfs_inode));
495 } 510 }
@@ -497,7 +512,11 @@ static int ecryptfs_commit_write(struct file *file, struct page *page,
497 if (rc) 512 if (rc)
498 printk(KERN_ERR "Error writing inode size to metadata; " 513 printk(KERN_ERR "Error writing inode size to metadata; "
499 "rc = [%d]\n", rc); 514 "rc = [%d]\n", rc);
515 else
516 rc = copied;
500out: 517out:
518 unlock_page(page);
519 page_cache_release(page);
501 return rc; 520 return rc;
502} 521}
503 522
@@ -518,7 +537,7 @@ static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block)
518struct address_space_operations ecryptfs_aops = { 537struct address_space_operations ecryptfs_aops = {
519 .writepage = ecryptfs_writepage, 538 .writepage = ecryptfs_writepage,
520 .readpage = ecryptfs_readpage, 539 .readpage = ecryptfs_readpage,
521 .prepare_write = ecryptfs_prepare_write, 540 .write_begin = ecryptfs_write_begin,
522 .commit_write = ecryptfs_commit_write, 541 .write_end = ecryptfs_write_end,
523 .bmap = ecryptfs_bmap, 542 .bmap = ecryptfs_bmap,
524}; 543};
diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c
deleted file mode 100644
index e0abad62b395..000000000000
--- a/fs/ecryptfs/netlink.c
+++ /dev/null
@@ -1,249 +0,0 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 2004-2006 International Business Machines Corp.
5 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
6 * Tyler Hicks <tyhicks@ou.edu>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version
10 * 2 as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 * 02111-1307, USA.
21 */
22
23#include <net/sock.h>
24#include <linux/hash.h>
25#include <linux/random.h>
26#include "ecryptfs_kernel.h"
27
28static struct sock *ecryptfs_nl_sock;
29
30/**
31 * ecryptfs_send_netlink
32 * @data: The data to include as the payload
33 * @data_len: The byte count of the data
34 * @msg_ctx: The netlink context that will be used to handle the
35 * response message
36 * @msg_type: The type of netlink message to send
37 * @msg_flags: The flags to include in the netlink header
38 * @daemon_pid: The process id of the daemon to send the message to
39 *
40 * Sends the data to the specified daemon pid and uses the netlink
41 * context element to store the data needed for validation upon
42 * receiving the response. The data and the netlink context can be
43 * null if just sending a netlink header is sufficient. Returns zero
44 * upon sending the message; non-zero upon error.
45 */
46int ecryptfs_send_netlink(char *data, int data_len,
47 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
48 u16 msg_flags, struct pid *daemon_pid)
49{
50 struct sk_buff *skb;
51 struct nlmsghdr *nlh;
52 struct ecryptfs_message *msg;
53 size_t payload_len;
54 int rc;
55
56 payload_len = ((data && data_len) ? (sizeof(*msg) + data_len) : 0);
57 skb = alloc_skb(NLMSG_SPACE(payload_len), GFP_KERNEL);
58 if (!skb) {
59 rc = -ENOMEM;
60 ecryptfs_printk(KERN_ERR, "Failed to allocate socket buffer\n");
61 goto out;
62 }
63 nlh = NLMSG_PUT(skb, pid_nr(daemon_pid), msg_ctx ? msg_ctx->counter : 0,
64 msg_type, payload_len);
65 nlh->nlmsg_flags = msg_flags;
66 if (msg_ctx && payload_len) {
67 msg = (struct ecryptfs_message *)NLMSG_DATA(nlh);
68 msg->index = msg_ctx->index;
69 msg->data_len = data_len;
70 memcpy(msg->data, data, data_len);
71 }
72 rc = netlink_unicast(ecryptfs_nl_sock, skb, pid_nr(daemon_pid), 0);
73 if (rc < 0) {
74 ecryptfs_printk(KERN_ERR, "Failed to send eCryptfs netlink "
75 "message; rc = [%d]\n", rc);
76 goto out;
77 }
78 rc = 0;
79 goto out;
80nlmsg_failure:
81 rc = -EMSGSIZE;
82 kfree_skb(skb);
83out:
84 return rc;
85}
86
87/**
88 * ecryptfs_process_nl_reponse
89 * @skb: The socket buffer containing the netlink message of state
90 * RESPONSE
91 *
92 * Processes a response message after sending a operation request to
93 * userspace. Attempts to assign the msg to a netlink context element
94 * at the index specified in the msg. The sk_buff and nlmsghdr must
95 * be validated before this function. Returns zero upon delivery to
96 * desired context element; non-zero upon delivery failure or error.
97 */
98static int ecryptfs_process_nl_response(struct sk_buff *skb)
99{
100 struct nlmsghdr *nlh = nlmsg_hdr(skb);
101 struct ecryptfs_message *msg = NLMSG_DATA(nlh);
102 struct pid *pid;
103 int rc;
104
105 if (skb->len - NLMSG_HDRLEN - sizeof(*msg) != msg->data_len) {
106 rc = -EINVAL;
107 ecryptfs_printk(KERN_ERR, "Received netlink message with "
108 "incorrectly specified data length\n");
109 goto out;
110 }
111 pid = find_get_pid(NETLINK_CREDS(skb)->pid);
112 rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid, NULL,
113 pid, nlh->nlmsg_seq);
114 put_pid(pid);
115 if (rc)
116 printk(KERN_ERR
117 "Error processing response message; rc = [%d]\n", rc);
118out:
119 return rc;
120}
121
122/**
123 * ecryptfs_process_nl_helo
124 * @skb: The socket buffer containing the nlmsghdr in HELO state
125 *
126 * Gets uid and pid of the skb and adds the values to the daemon id
127 * hash. Returns zero after adding a new daemon id to the hash list;
128 * non-zero otherwise.
129 */
130static int ecryptfs_process_nl_helo(struct sk_buff *skb)
131{
132 struct pid *pid;
133 int rc;
134
135 pid = find_get_pid(NETLINK_CREDS(skb)->pid);
136 rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_NETLINK,
137 NETLINK_CREDS(skb)->uid, NULL, pid);
138 put_pid(pid);
139 if (rc)
140 printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc);
141 return rc;
142}
143
144/**
145 * ecryptfs_process_nl_quit
146 * @skb: The socket buffer containing the nlmsghdr in QUIT state
147 *
148 * Gets uid and pid of the skb and deletes the corresponding daemon
149 * id, if it is the registered that is requesting the
150 * deletion. Returns zero after deleting the desired daemon id;
151 * non-zero otherwise.
152 */
153static int ecryptfs_process_nl_quit(struct sk_buff *skb)
154{
155 struct pid *pid;
156 int rc;
157
158 pid = find_get_pid(NETLINK_CREDS(skb)->pid);
159 rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid, NULL, pid);
160 put_pid(pid);
161 if (rc)
162 printk(KERN_WARNING
163 "Error processing QUIT message; rc = [%d]\n", rc);
164 return rc;
165}
166
167/**
168 * ecryptfs_receive_nl_message
169 *
170 * Callback function called by netlink system when a message arrives.
171 * If the message looks to be valid, then an attempt is made to assign
172 * it to its desired netlink context element and wake up the process
173 * that is waiting for a response.
174 */
175static void ecryptfs_receive_nl_message(struct sk_buff *skb)
176{
177 struct nlmsghdr *nlh;
178
179 nlh = nlmsg_hdr(skb);
180 if (!NLMSG_OK(nlh, skb->len)) {
181 ecryptfs_printk(KERN_ERR, "Received corrupt netlink "
182 "message\n");
183 goto free;
184 }
185 switch (nlh->nlmsg_type) {
186 case ECRYPTFS_MSG_RESPONSE:
187 if (ecryptfs_process_nl_response(skb)) {
188 ecryptfs_printk(KERN_WARNING, "Failed to "
189 "deliver netlink response to "
190 "requesting operation\n");
191 }
192 break;
193 case ECRYPTFS_MSG_HELO:
194 if (ecryptfs_process_nl_helo(skb)) {
195 ecryptfs_printk(KERN_WARNING, "Failed to "
196 "fulfill HELO request\n");
197 }
198 break;
199 case ECRYPTFS_MSG_QUIT:
200 if (ecryptfs_process_nl_quit(skb)) {
201 ecryptfs_printk(KERN_WARNING, "Failed to "
202 "fulfill QUIT request\n");
203 }
204 break;
205 default:
206 ecryptfs_printk(KERN_WARNING, "Dropping netlink "
207 "message of unrecognized type [%d]\n",
208 nlh->nlmsg_type);
209 break;
210 }
211free:
212 kfree_skb(skb);
213}
214
215/**
216 * ecryptfs_init_netlink
217 *
218 * Initializes the daemon id hash list, netlink context array, and
219 * necessary locks. Returns zero upon success; non-zero upon error.
220 */
221int ecryptfs_init_netlink(void)
222{
223 int rc;
224
225 ecryptfs_nl_sock = netlink_kernel_create(&init_net, NETLINK_ECRYPTFS, 0,
226 ecryptfs_receive_nl_message,
227 NULL, THIS_MODULE);
228 if (!ecryptfs_nl_sock) {
229 rc = -EIO;
230 ecryptfs_printk(KERN_ERR, "Failed to create netlink socket\n");
231 goto out;
232 }
233 ecryptfs_nl_sock->sk_sndtimeo = ECRYPTFS_DEFAULT_SEND_TIMEOUT;
234 rc = 0;
235out:
236 return rc;
237}
238
239/**
240 * ecryptfs_release_netlink
241 *
242 * Frees all memory used by the netlink context array and releases the
243 * netlink socket.
244 */
245void ecryptfs_release_netlink(void)
246{
247 netlink_kernel_release(ecryptfs_nl_sock);
248 ecryptfs_nl_sock = NULL;
249}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 7cc0eb756b55..99368bda0261 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -927,14 +927,11 @@ errxit:
927 /* 927 /*
928 * During the time we spent in the loop above, some other events 928 * During the time we spent in the loop above, some other events
929 * might have been queued by the poll callback. We re-insert them 929 * might have been queued by the poll callback. We re-insert them
930 * here (in case they are not already queued, or they're one-shot). 930 * inside the main ready-list here.
931 */ 931 */
932 for (nepi = ep->ovflist; (epi = nepi) != NULL; 932 for (nepi = ep->ovflist; (epi = nepi) != NULL;
933 nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { 933 nepi = epi->next, epi->next = EP_UNACTIVE_PTR)
934 if (!ep_is_linked(&epi->rdllink) && 934 list_add_tail(&epi->rdllink, &ep->rdllist);
935 (epi->event.events & ~EP_PRIVATE_BITS))
936 list_add_tail(&epi->rdllink, &ep->rdllist);
937 }
938 /* 935 /*
939 * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after 936 * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
940 * releasing the lock, events will be queued in the normal way inside 937 * releasing the lock, events will be queued in the normal way inside
diff --git a/fs/exec.c b/fs/exec.c
index cecee501ce78..4e834f16d9da 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -50,15 +50,12 @@
50#include <linux/cn_proc.h> 50#include <linux/cn_proc.h>
51#include <linux/audit.h> 51#include <linux/audit.h>
52#include <linux/tracehook.h> 52#include <linux/tracehook.h>
53#include <linux/kmod.h>
53 54
54#include <asm/uaccess.h> 55#include <asm/uaccess.h>
55#include <asm/mmu_context.h> 56#include <asm/mmu_context.h>
56#include <asm/tlb.h> 57#include <asm/tlb.h>
57 58
58#ifdef CONFIG_KMOD
59#include <linux/kmod.h>
60#endif
61
62#ifdef __alpha__ 59#ifdef __alpha__
63/* for /sbin/loader handling in search_binary_handler() */ 60/* for /sbin/loader handling in search_binary_handler() */
64#include <linux/a.out.h> 61#include <linux/a.out.h>
@@ -391,7 +388,7 @@ static int count(char __user * __user * argv, int max)
391 if (!p) 388 if (!p)
392 break; 389 break;
393 argv++; 390 argv++;
394 if(++i > max) 391 if (i++ >= max)
395 return -E2BIG; 392 return -E2BIG;
396 cond_resched(); 393 cond_resched();
397 } 394 }
@@ -825,8 +822,6 @@ static int de_thread(struct task_struct *tsk)
825 schedule(); 822 schedule();
826 } 823 }
827 824
828 if (unlikely(task_child_reaper(tsk) == leader))
829 task_active_pid_ns(tsk)->child_reaper = tsk;
830 /* 825 /*
831 * The only record we have of the real-time age of a 826 * The only record we have of the real-time age of a
832 * process, regardless of execs it's done, is start_time. 827 * process, regardless of execs it's done, is start_time.
@@ -1189,7 +1184,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1189 return retval; 1184 return retval;
1190 1185
1191 /* Remember if the application is TASO. */ 1186 /* Remember if the application is TASO. */
1192 bprm->sh_bang = eh->ah.entry < 0x100000000UL; 1187 bprm->taso = eh->ah.entry < 0x100000000UL;
1193 1188
1194 bprm->file = file; 1189 bprm->file = file;
1195 bprm->loader = loader; 1190 bprm->loader = loader;
@@ -1247,8 +1242,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1247 read_unlock(&binfmt_lock); 1242 read_unlock(&binfmt_lock);
1248 if (retval != -ENOEXEC || bprm->mm == NULL) { 1243 if (retval != -ENOEXEC || bprm->mm == NULL) {
1249 break; 1244 break;
1250#ifdef CONFIG_KMOD 1245#ifdef CONFIG_MODULES
1251 }else{ 1246 } else {
1252#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e)) 1247#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
1253 if (printable(bprm->buf[0]) && 1248 if (printable(bprm->buf[0]) &&
1254 printable(bprm->buf[1]) && 1249 printable(bprm->buf[1]) &&
@@ -1391,7 +1386,7 @@ EXPORT_SYMBOL(set_binfmt);
1391 * name into corename, which must have space for at least 1386 * name into corename, which must have space for at least
1392 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 1387 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
1393 */ 1388 */
1394static int format_corename(char *corename, int nr_threads, long signr) 1389static int format_corename(char *corename, long signr)
1395{ 1390{
1396 const char *pat_ptr = core_pattern; 1391 const char *pat_ptr = core_pattern;
1397 int ispipe = (*pat_ptr == '|'); 1392 int ispipe = (*pat_ptr == '|');
@@ -1498,8 +1493,7 @@ static int format_corename(char *corename, int nr_threads, long signr)
1498 * If core_pattern does not include a %p (as is the default) 1493 * If core_pattern does not include a %p (as is the default)
1499 * and core_uses_pid is set, then .%pid will be appended to 1494 * and core_uses_pid is set, then .%pid will be appended to
1500 * the filename. Do not do this for piped commands. */ 1495 * the filename. Do not do this for piped commands. */
1501 if (!ispipe && !pid_in_pattern 1496 if (!ispipe && !pid_in_pattern && core_uses_pid) {
1502 && (core_uses_pid || nr_threads)) {
1503 rc = snprintf(out_ptr, out_end - out_ptr, 1497 rc = snprintf(out_ptr, out_end - out_ptr,
1504 ".%d", task_tgid_vnr(current)); 1498 ".%d", task_tgid_vnr(current));
1505 if (rc > out_end - out_ptr) 1499 if (rc > out_end - out_ptr)
@@ -1762,7 +1756,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1762 * uses lock_kernel() 1756 * uses lock_kernel()
1763 */ 1757 */
1764 lock_kernel(); 1758 lock_kernel();
1765 ispipe = format_corename(corename, retval, signr); 1759 ispipe = format_corename(corename, signr);
1766 unlock_kernel(); 1760 unlock_kernel();
1767 /* 1761 /*
1768 * Don't bother to check the RLIMIT_CORE value if core_pattern points 1762 * Don't bother to check the RLIMIT_CORE value if core_pattern points
diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig
new file mode 100644
index 000000000000..14a6780fd034
--- /dev/null
+++ b/fs/ext2/Kconfig
@@ -0,0 +1,55 @@
1config EXT2_FS
2 tristate "Second extended fs support"
3 help
4 Ext2 is a standard Linux file system for hard disks.
5
6 To compile this file system support as a module, choose M here: the
7 module will be called ext2.
8
9 If unsure, say Y.
10
11config EXT2_FS_XATTR
12 bool "Ext2 extended attributes"
13 depends on EXT2_FS
14 help
15 Extended attributes are name:value pairs associated with inodes by
16 the kernel or by users (see the attr(5) manual page, or visit
17 <http://acl.bestbits.at/> for details).
18
19 If unsure, say N.
20
21config EXT2_FS_POSIX_ACL
22 bool "Ext2 POSIX Access Control Lists"
23 depends on EXT2_FS_XATTR
24 select FS_POSIX_ACL
25 help
26 Posix Access Control Lists (ACLs) support permissions for users and
27 groups beyond the owner/group/world scheme.
28
29 To learn more about Access Control Lists, visit the Posix ACLs for
30 Linux website <http://acl.bestbits.at/>.
31
32 If you don't know what Access Control Lists are, say N
33
34config EXT2_FS_SECURITY
35 bool "Ext2 Security Labels"
36 depends on EXT2_FS_XATTR
37 help
38 Security labels support alternative access control models
39 implemented by security modules like SELinux. This option
40 enables an extended attribute handler for file security
41 labels in the ext2 filesystem.
42
43 If you are not using a security module that requires using
44 extended attributes for file security labels, say N.
45
46config EXT2_FS_XIP
47 bool "Ext2 execute in place support"
48 depends on EXT2_FS && MMU
49 help
50 Execute in place can be used on memory-backed block devices. If you
51 enable this option, you can select to mount block devices which are
52 capable of this feature without using the page cache.
53
54 If you do not use a block device that is capable of using this,
55 or if unsure, say N.
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 10bb02c3f25c..6dac7ba2d22d 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -1295,6 +1295,7 @@ retry_alloc:
1295 * turn off reservation for this allocation 1295 * turn off reservation for this allocation
1296 */ 1296 */
1297 if (my_rsv && (free_blocks < windowsz) 1297 if (my_rsv && (free_blocks < windowsz)
1298 && (free_blocks > 0)
1298 && (rsv_is_empty(&my_rsv->rsv_window))) 1299 && (rsv_is_empty(&my_rsv->rsv_window)))
1299 my_rsv = NULL; 1300 my_rsv = NULL;
1300 1301
@@ -1332,7 +1333,7 @@ retry_alloc:
1332 * free blocks is less than half of the reservation 1333 * free blocks is less than half of the reservation
1333 * window size. 1334 * window size.
1334 */ 1335 */
1335 if (free_blocks <= (windowsz/2)) 1336 if (my_rsv && (free_blocks <= (windowsz/2)))
1336 continue; 1337 continue;
1337 1338
1338 brelse(bitmap_bh); 1339 brelse(bitmap_bh);
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index a78c6b4af060..11a49ce84392 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -103,7 +103,7 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len)
103 return err; 103 return err;
104} 104}
105 105
106static void ext2_check_page(struct page *page) 106static void ext2_check_page(struct page *page, int quiet)
107{ 107{
108 struct inode *dir = page->mapping->host; 108 struct inode *dir = page->mapping->host;
109 struct super_block *sb = dir->i_sb; 109 struct super_block *sb = dir->i_sb;
@@ -146,10 +146,10 @@ out:
146 /* Too bad, we had an error */ 146 /* Too bad, we had an error */
147 147
148Ebadsize: 148Ebadsize:
149 ext2_error(sb, "ext2_check_page", 149 if (!quiet)
150 "size of directory #%lu is not a multiple of chunk size", 150 ext2_error(sb, __func__,
151 dir->i_ino 151 "size of directory #%lu is not a multiple "
152 ); 152 "of chunk size", dir->i_ino);
153 goto fail; 153 goto fail;
154Eshort: 154Eshort:
155 error = "rec_len is smaller than minimal"; 155 error = "rec_len is smaller than minimal";
@@ -166,32 +166,36 @@ Espan:
166Einumber: 166Einumber:
167 error = "inode out of bounds"; 167 error = "inode out of bounds";
168bad_entry: 168bad_entry:
169 ext2_error (sb, "ext2_check_page", "bad entry in directory #%lu: %s - " 169 if (!quiet)
170 "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", 170 ext2_error(sb, __func__, "bad entry in directory #%lu: : %s - "
171 dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, 171 "offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
172 (unsigned long) le32_to_cpu(p->inode), 172 dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
173 rec_len, p->name_len); 173 (unsigned long) le32_to_cpu(p->inode),
174 rec_len, p->name_len);
174 goto fail; 175 goto fail;
175Eend: 176Eend:
176 p = (ext2_dirent *)(kaddr + offs); 177 if (!quiet) {
177 ext2_error (sb, "ext2_check_page", 178 p = (ext2_dirent *)(kaddr + offs);
178 "entry in directory #%lu spans the page boundary" 179 ext2_error(sb, "ext2_check_page",
179 "offset=%lu, inode=%lu", 180 "entry in directory #%lu spans the page boundary"
180 dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, 181 "offset=%lu, inode=%lu",
181 (unsigned long) le32_to_cpu(p->inode)); 182 dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
183 (unsigned long) le32_to_cpu(p->inode));
184 }
182fail: 185fail:
183 SetPageChecked(page); 186 SetPageChecked(page);
184 SetPageError(page); 187 SetPageError(page);
185} 188}
186 189
187static struct page * ext2_get_page(struct inode *dir, unsigned long n) 190static struct page * ext2_get_page(struct inode *dir, unsigned long n,
191 int quiet)
188{ 192{
189 struct address_space *mapping = dir->i_mapping; 193 struct address_space *mapping = dir->i_mapping;
190 struct page *page = read_mapping_page(mapping, n, NULL); 194 struct page *page = read_mapping_page(mapping, n, NULL);
191 if (!IS_ERR(page)) { 195 if (!IS_ERR(page)) {
192 kmap(page); 196 kmap(page);
193 if (!PageChecked(page)) 197 if (!PageChecked(page))
194 ext2_check_page(page); 198 ext2_check_page(page, quiet);
195 if (PageError(page)) 199 if (PageError(page))
196 goto fail; 200 goto fail;
197 } 201 }
@@ -292,7 +296,7 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
292 for ( ; n < npages; n++, offset = 0) { 296 for ( ; n < npages; n++, offset = 0) {
293 char *kaddr, *limit; 297 char *kaddr, *limit;
294 ext2_dirent *de; 298 ext2_dirent *de;
295 struct page *page = ext2_get_page(inode, n); 299 struct page *page = ext2_get_page(inode, n, 0);
296 300
297 if (IS_ERR(page)) { 301 if (IS_ERR(page)) {
298 ext2_error(sb, __func__, 302 ext2_error(sb, __func__,
@@ -361,6 +365,7 @@ struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
361 struct page *page = NULL; 365 struct page *page = NULL;
362 struct ext2_inode_info *ei = EXT2_I(dir); 366 struct ext2_inode_info *ei = EXT2_I(dir);
363 ext2_dirent * de; 367 ext2_dirent * de;
368 int dir_has_error = 0;
364 369
365 if (npages == 0) 370 if (npages == 0)
366 goto out; 371 goto out;
@@ -374,7 +379,7 @@ struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
374 n = start; 379 n = start;
375 do { 380 do {
376 char *kaddr; 381 char *kaddr;
377 page = ext2_get_page(dir, n); 382 page = ext2_get_page(dir, n, dir_has_error);
378 if (!IS_ERR(page)) { 383 if (!IS_ERR(page)) {
379 kaddr = page_address(page); 384 kaddr = page_address(page);
380 de = (ext2_dirent *) kaddr; 385 de = (ext2_dirent *) kaddr;
@@ -391,7 +396,9 @@ struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
391 de = ext2_next_entry(de); 396 de = ext2_next_entry(de);
392 } 397 }
393 ext2_put_page(page); 398 ext2_put_page(page);
394 } 399 } else
400 dir_has_error = 1;
401
395 if (++n >= npages) 402 if (++n >= npages)
396 n = 0; 403 n = 0;
397 /* next page is past the blocks we've got */ 404 /* next page is past the blocks we've got */
@@ -414,7 +421,7 @@ found:
414 421
415struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p) 422struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p)
416{ 423{
417 struct page *page = ext2_get_page(dir, 0); 424 struct page *page = ext2_get_page(dir, 0, 0);
418 ext2_dirent *de = NULL; 425 ext2_dirent *de = NULL;
419 426
420 if (!IS_ERR(page)) { 427 if (!IS_ERR(page)) {
@@ -487,7 +494,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
487 for (n = 0; n <= npages; n++) { 494 for (n = 0; n <= npages; n++) {
488 char *dir_end; 495 char *dir_end;
489 496
490 page = ext2_get_page(dir, n); 497 page = ext2_get_page(dir, n, 0);
491 err = PTR_ERR(page); 498 err = PTR_ERR(page);
492 if (IS_ERR(page)) 499 if (IS_ERR(page))
493 goto out; 500 goto out;
@@ -655,14 +662,17 @@ int ext2_empty_dir (struct inode * inode)
655{ 662{
656 struct page *page = NULL; 663 struct page *page = NULL;
657 unsigned long i, npages = dir_pages(inode); 664 unsigned long i, npages = dir_pages(inode);
665 int dir_has_error = 0;
658 666
659 for (i = 0; i < npages; i++) { 667 for (i = 0; i < npages; i++) {
660 char *kaddr; 668 char *kaddr;
661 ext2_dirent * de; 669 ext2_dirent * de;
662 page = ext2_get_page(inode, i); 670 page = ext2_get_page(inode, i, dir_has_error);
663 671
664 if (IS_ERR(page)) 672 if (IS_ERR(page)) {
673 dir_has_error = 1;
665 continue; 674 continue;
675 }
666 676
667 kaddr = page_address(page); 677 kaddr = page_address(page);
668 de = (ext2_dirent *)kaddr; 678 de = (ext2_dirent *)kaddr;
diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig
new file mode 100644
index 000000000000..8e0cfe44b0fc
--- /dev/null
+++ b/fs/ext3/Kconfig
@@ -0,0 +1,67 @@
1config EXT3_FS
2 tristate "Ext3 journalling file system support"
3 select JBD
4 help
5 This is the journalling version of the Second extended file system
6 (often called ext3), the de facto standard Linux file system
7 (method to organize files on a storage device) for hard disks.
8
9 The journalling code included in this driver means you do not have
10 to run e2fsck (file system checker) on your file systems after a
11 crash. The journal keeps track of any changes that were being made
12 at the time the system crashed, and can ensure that your file system
13 is consistent without the need for a lengthy check.
14
15 Other than adding the journal to the file system, the on-disk format
16 of ext3 is identical to ext2. It is possible to freely switch
17 between using the ext3 driver and the ext2 driver, as long as the
18 file system has been cleanly unmounted, or e2fsck is run on the file
19 system.
20
21 To add a journal on an existing ext2 file system or change the
22 behavior of ext3 file systems, you can use the tune2fs utility ("man
23 tune2fs"). To modify attributes of files and directories on ext3
24 file systems, use chattr ("man chattr"). You need to be using
25 e2fsprogs version 1.20 or later in order to create ext3 journals
26 (available at <http://sourceforge.net/projects/e2fsprogs/>).
27
28 To compile this file system support as a module, choose M here: the
29 module will be called ext3.
30
31config EXT3_FS_XATTR
32 bool "Ext3 extended attributes"
33 depends on EXT3_FS
34 default y
35 help
36 Extended attributes are name:value pairs associated with inodes by
37 the kernel or by users (see the attr(5) manual page, or visit
38 <http://acl.bestbits.at/> for details).
39
40 If unsure, say N.
41
42 You need this for POSIX ACL support on ext3.
43
44config EXT3_FS_POSIX_ACL
45 bool "Ext3 POSIX Access Control Lists"
46 depends on EXT3_FS_XATTR
47 select FS_POSIX_ACL
48 help
49 Posix Access Control Lists (ACLs) support permissions for users and
50 groups beyond the owner/group/world scheme.
51
52 To learn more about Access Control Lists, visit the Posix ACLs for
53 Linux website <http://acl.bestbits.at/>.
54
55 If you don't know what Access Control Lists are, say N
56
57config EXT3_FS_SECURITY
58 bool "Ext3 Security Labels"
59 depends on EXT3_FS_XATTR
60 help
61 Security labels support alternative access control models
62 implemented by security modules like SELinux. This option
63 enables an extended attribute handler for file security
64 labels in the ext3 filesystem.
65
66 If you are not using a security module that requires using
67 extended attributes for file security labels, say N.
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 92fd0338a6eb..f5b57a2ca35a 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1547,6 +1547,7 @@ retry_alloc:
1547 * turn off reservation for this allocation 1547 * turn off reservation for this allocation
1548 */ 1548 */
1549 if (my_rsv && (free_blocks < windowsz) 1549 if (my_rsv && (free_blocks < windowsz)
1550 && (free_blocks > 0)
1550 && (rsv_is_empty(&my_rsv->rsv_window))) 1551 && (rsv_is_empty(&my_rsv->rsv_window)))
1551 my_rsv = NULL; 1552 my_rsv = NULL;
1552 1553
@@ -1585,7 +1586,7 @@ retry_alloc:
1585 * free blocks is less than half of the reservation 1586 * free blocks is less than half of the reservation
1586 * window size. 1587 * window size.
1587 */ 1588 */
1588 if (free_blocks <= (windowsz/2)) 1589 if (my_rsv && (free_blocks <= (windowsz/2)))
1589 continue; 1590 continue;
1590 1591
1591 brelse(bitmap_bh); 1592 brelse(bitmap_bh);
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 2eea96ec78ed..4c82531ea0a8 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -102,6 +102,7 @@ static int ext3_readdir(struct file * filp,
102 int err; 102 int err;
103 struct inode *inode = filp->f_path.dentry->d_inode; 103 struct inode *inode = filp->f_path.dentry->d_inode;
104 int ret = 0; 104 int ret = 0;
105 int dir_has_error = 0;
105 106
106 sb = inode->i_sb; 107 sb = inode->i_sb;
107 108
@@ -148,9 +149,12 @@ static int ext3_readdir(struct file * filp,
148 * of recovering data when there's a bad sector 149 * of recovering data when there's a bad sector
149 */ 150 */
150 if (!bh) { 151 if (!bh) {
151 ext3_error (sb, "ext3_readdir", 152 if (!dir_has_error) {
152 "directory #%lu contains a hole at offset %lu", 153 ext3_error(sb, __func__, "directory #%lu "
153 inode->i_ino, (unsigned long)filp->f_pos); 154 "contains a hole at offset %lld",
155 inode->i_ino, filp->f_pos);
156 dir_has_error = 1;
157 }
154 /* corrupt size? Maybe no more blocks to read */ 158 /* corrupt size? Maybe no more blocks to read */
155 if (filp->f_pos > inode->i_blocks << 9) 159 if (filp->f_pos > inode->i_blocks << 9)
156 break; 160 break;
@@ -410,7 +414,7 @@ static int call_filldir(struct file * filp, void * dirent,
410 get_dtype(sb, fname->file_type)); 414 get_dtype(sb, fname->file_type));
411 if (error) { 415 if (error) {
412 filp->f_pos = curr_pos; 416 filp->f_pos = curr_pos;
413 info->extra_fname = fname->next; 417 info->extra_fname = fname;
414 return error; 418 return error;
415 } 419 }
416 fname = fname->next; 420 fname = fname->next;
@@ -449,11 +453,21 @@ static int ext3_dx_readdir(struct file * filp,
449 * If there are any leftover names on the hash collision 453 * If there are any leftover names on the hash collision
450 * chain, return them first. 454 * chain, return them first.
451 */ 455 */
452 if (info->extra_fname && 456 if (info->extra_fname) {
453 call_filldir(filp, dirent, filldir, info->extra_fname)) 457 if (call_filldir(filp, dirent, filldir, info->extra_fname))
454 goto finished; 458 goto finished;
455 459
456 if (!info->curr_node) 460 info->extra_fname = NULL;
461 info->curr_node = rb_next(info->curr_node);
462 if (!info->curr_node) {
463 if (info->next_hash == ~0) {
464 filp->f_pos = EXT3_HTREE_EOF;
465 goto finished;
466 }
467 info->curr_hash = info->next_hash;
468 info->curr_minor_hash = 0;
469 }
470 } else if (!info->curr_node)
457 info->curr_node = rb_first(&info->root); 471 info->curr_node = rb_first(&info->root);
458 472
459 while (1) { 473 while (1) {
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ebfec4d0148e..f8424ad89971 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1186,6 +1186,13 @@ write_begin_failed:
1186 ext3_journal_stop(handle); 1186 ext3_journal_stop(handle);
1187 unlock_page(page); 1187 unlock_page(page);
1188 page_cache_release(page); 1188 page_cache_release(page);
1189 /*
1190 * block_write_begin may have instantiated a few blocks
1191 * outside i_size. Trim these off again. Don't need
1192 * i_size_read because we hold i_mutex.
1193 */
1194 if (pos + len > inode->i_size)
1195 vmtruncate(inode, inode->i_size);
1189 } 1196 }
1190 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) 1197 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
1191 goto retry; 1198 goto retry;
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 77278e947e94..78fdf3836370 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -790,7 +790,8 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
790 790
791 if (reserved_gdb || gdb_off == 0) { 791 if (reserved_gdb || gdb_off == 0) {
792 if (!EXT3_HAS_COMPAT_FEATURE(sb, 792 if (!EXT3_HAS_COMPAT_FEATURE(sb,
793 EXT3_FEATURE_COMPAT_RESIZE_INODE)){ 793 EXT3_FEATURE_COMPAT_RESIZE_INODE)
794 || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
794 ext3_warning(sb, __func__, 795 ext3_warning(sb, __func__,
795 "No reserved GDT blocks, can't resize"); 796 "No reserved GDT blocks, can't resize");
796 return -EPERM; 797 return -EPERM;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 399a96a6c556..3a260af5544d 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -625,6 +625,9 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
625 else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA) 625 else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
626 seq_puts(seq, ",data=writeback"); 626 seq_puts(seq, ",data=writeback");
627 627
628 if (test_opt(sb, DATA_ERR_ABORT))
629 seq_puts(seq, ",data_err=abort");
630
628 ext3_show_quota_options(seq, sb); 631 ext3_show_quota_options(seq, sb);
629 632
630 return 0; 633 return 0;
@@ -754,6 +757,7 @@ enum {
754 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, 757 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
755 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 758 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
756 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 759 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
760 Opt_data_err_abort, Opt_data_err_ignore,
757 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 761 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
758 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 762 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
759 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 763 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
@@ -796,6 +800,8 @@ static const match_table_t tokens = {
796 {Opt_data_journal, "data=journal"}, 800 {Opt_data_journal, "data=journal"},
797 {Opt_data_ordered, "data=ordered"}, 801 {Opt_data_ordered, "data=ordered"},
798 {Opt_data_writeback, "data=writeback"}, 802 {Opt_data_writeback, "data=writeback"},
803 {Opt_data_err_abort, "data_err=abort"},
804 {Opt_data_err_ignore, "data_err=ignore"},
799 {Opt_offusrjquota, "usrjquota="}, 805 {Opt_offusrjquota, "usrjquota="},
800 {Opt_usrjquota, "usrjquota=%s"}, 806 {Opt_usrjquota, "usrjquota=%s"},
801 {Opt_offgrpjquota, "grpjquota="}, 807 {Opt_offgrpjquota, "grpjquota="},
@@ -1011,6 +1017,12 @@ static int parse_options (char *options, struct super_block *sb,
1011 sbi->s_mount_opt |= data_opt; 1017 sbi->s_mount_opt |= data_opt;
1012 } 1018 }
1013 break; 1019 break;
1020 case Opt_data_err_abort:
1021 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1022 break;
1023 case Opt_data_err_ignore:
1024 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1025 break;
1014#ifdef CONFIG_QUOTA 1026#ifdef CONFIG_QUOTA
1015 case Opt_usrjquota: 1027 case Opt_usrjquota:
1016 qtype = USRQUOTA; 1028 qtype = USRQUOTA;
@@ -1986,6 +1998,10 @@ static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
1986 journal->j_flags |= JFS_BARRIER; 1998 journal->j_flags |= JFS_BARRIER;
1987 else 1999 else
1988 journal->j_flags &= ~JFS_BARRIER; 2000 journal->j_flags &= ~JFS_BARRIER;
2001 if (test_opt(sb, DATA_ERR_ABORT))
2002 journal->j_flags |= JFS_ABORT_ON_SYNCDATA_ERR;
2003 else
2004 journal->j_flags &= ~JFS_ABORT_ON_SYNCDATA_ERR;
1989 spin_unlock(&journal->j_state_lock); 2005 spin_unlock(&journal->j_state_lock);
1990} 2006}
1991 2007
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
new file mode 100644
index 000000000000..7505482a08fa
--- /dev/null
+++ b/fs/ext4/Kconfig
@@ -0,0 +1,79 @@
1config EXT4_FS
2 tristate "The Extended 4 (ext4) filesystem"
3 select JBD2
4 select CRC16
5 help
6 This is the next generation of the ext3 filesystem.
7
8 Unlike the change from ext2 filesystem to ext3 filesystem,
9 the on-disk format of ext4 is not forwards compatible with
10 ext3; it is based on extent maps and it supports 48-bit
11 physical block numbers. The ext4 filesystem also supports delayed
12 allocation, persistent preallocation, high resolution time stamps,
13 and a number of other features to improve performance and speed
14 up fsck time. For more information, please see the web pages at
15 http://ext4.wiki.kernel.org.
16
17 The ext4 filesystem will support mounting an ext3
18 filesystem; while there will be some performance gains from
19 the delayed allocation and inode table readahead, the best
20 performance gains will require enabling ext4 features in the
21 filesystem, or formating a new filesystem as an ext4
22 filesystem initially.
23
24 To compile this file system support as a module, choose M here. The
25 module will be called ext4.
26
27 If unsure, say N.
28
29config EXT4DEV_COMPAT
30 bool "Enable ext4dev compatibility"
31 depends on EXT4_FS
32 help
33 Starting with 2.6.28, the name of the ext4 filesystem was
34 renamed from ext4dev to ext4. Unfortunately there are some
35 legacy userspace programs (such as klibc's fstype) have
36 "ext4dev" hardcoded.
37
38 To enable backwards compatibility so that systems that are
39 still expecting to mount ext4 filesystems using ext4dev,
40 chose Y here. This feature will go away by 2.6.31, so
41 please arrange to get your userspace programs fixed!
42
43config EXT4_FS_XATTR
44 bool "Ext4 extended attributes"
45 depends on EXT4_FS
46 default y
47 help
48 Extended attributes are name:value pairs associated with inodes by
49 the kernel or by users (see the attr(5) manual page, or visit
50 <http://acl.bestbits.at/> for details).
51
52 If unsure, say N.
53
54 You need this for POSIX ACL support on ext4.
55
56config EXT4_FS_POSIX_ACL
57 bool "Ext4 POSIX Access Control Lists"
58 depends on EXT4_FS_XATTR
59 select FS_POSIX_ACL
60 help
61 POSIX Access Control Lists (ACLs) support permissions for users and
62 groups beyond the owner/group/world scheme.
63
64 To learn more about Access Control Lists, visit the POSIX ACLs for
65 Linux website <http://acl.bestbits.at/>.
66
67 If you don't know what Access Control Lists are, say N
68
69config EXT4_FS_SECURITY
70 bool "Ext4 Security Labels"
71 depends on EXT4_FS_XATTR
72 help
73 Security labels support alternative access control models
74 implemented by security modules like SELinux. This option
75 enables an extended attribute handler for file security
76 labels in the ext4 filesystem.
77
78 If you are not using a security module that requires using
79 extended attributes for file security labels, say N.
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index bd2ece228827..b9821be709bd 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -568,8 +568,16 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
568 568
569 /* this isn't the right place to decide whether block is metadata 569 /* this isn't the right place to decide whether block is metadata
570 * inode.c/extents.c knows better, but for safety ... */ 570 * inode.c/extents.c knows better, but for safety ... */
571 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || 571 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
572 ext4_should_journal_data(inode)) 572 metadata = 1;
573
574 /* We need to make sure we don't reuse
575 * block released untill the transaction commit.
576 * writeback mode have weak data consistency so
577 * don't force data as metadata when freeing block
578 * for writeback mode.
579 */
580 if (metadata == 0 && !ext4_should_writeback_data(inode))
573 metadata = 1; 581 metadata = 1;
574 582
575 sb = inode->i_sb; 583 sb = inode->i_sb;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6690a41cdd9f..4880cc3e6727 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -511,7 +511,6 @@ do { \
511/* 511/*
512 * Mount flags 512 * Mount flags
513 */ 513 */
514#define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */
515#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ 514#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */
516#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ 515#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
517#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ 516#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index 6a0b40d43264..445fde603df8 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -99,9 +99,6 @@ struct ext4_sb_info {
99 struct inode *s_buddy_cache; 99 struct inode *s_buddy_cache;
100 long s_blocks_reserved; 100 long s_blocks_reserved;
101 spinlock_t s_reserve_lock; 101 spinlock_t s_reserve_lock;
102 struct list_head s_active_transaction;
103 struct list_head s_closed_transaction;
104 struct list_head s_committed_transaction;
105 spinlock_t s_md_lock; 102 spinlock_t s_md_lock;
106 tid_t s_last_transaction; 103 tid_t s_last_transaction;
107 unsigned short *s_mb_offsets, *s_mb_maxs; 104 unsigned short *s_mb_offsets, *s_mb_maxs;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9b4ec9decfd1..8dbf6953845b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1648,6 +1648,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1648 int ret = 0, err, nr_pages, i; 1648 int ret = 0, err, nr_pages, i;
1649 unsigned long index, end; 1649 unsigned long index, end;
1650 struct pagevec pvec; 1650 struct pagevec pvec;
1651 long pages_skipped;
1651 1652
1652 BUG_ON(mpd->next_page <= mpd->first_page); 1653 BUG_ON(mpd->next_page <= mpd->first_page);
1653 pagevec_init(&pvec, 0); 1654 pagevec_init(&pvec, 0);
@@ -1655,20 +1656,30 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1655 end = mpd->next_page - 1; 1656 end = mpd->next_page - 1;
1656 1657
1657 while (index <= end) { 1658 while (index <= end) {
1658 /* XXX: optimize tail */ 1659 /*
1659 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); 1660 * We can use PAGECACHE_TAG_DIRTY lookup here because
1661 * even though we have cleared the dirty flag on the page
1662 * We still keep the page in the radix tree with tag
1663 * PAGECACHE_TAG_DIRTY. See clear_page_dirty_for_io.
1664 * The PAGECACHE_TAG_DIRTY is cleared in set_page_writeback
1665 * which is called via the below writepage callback.
1666 */
1667 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1668 PAGECACHE_TAG_DIRTY,
1669 min(end - index,
1670 (pgoff_t)PAGEVEC_SIZE-1) + 1);
1660 if (nr_pages == 0) 1671 if (nr_pages == 0)
1661 break; 1672 break;
1662 for (i = 0; i < nr_pages; i++) { 1673 for (i = 0; i < nr_pages; i++) {
1663 struct page *page = pvec.pages[i]; 1674 struct page *page = pvec.pages[i];
1664 1675
1665 index = page->index; 1676 pages_skipped = mpd->wbc->pages_skipped;
1666 if (index > end)
1667 break;
1668 index++;
1669
1670 err = mapping->a_ops->writepage(page, mpd->wbc); 1677 err = mapping->a_ops->writepage(page, mpd->wbc);
1671 if (!err) 1678 if (!err && (pages_skipped == mpd->wbc->pages_skipped))
1679 /*
1680 * have successfully written the page
1681 * without skipping the same
1682 */
1672 mpd->pages_written++; 1683 mpd->pages_written++;
1673 /* 1684 /*
1674 * In error case, we have to continue because 1685 * In error case, we have to continue because
@@ -2104,7 +2115,6 @@ static int mpage_da_writepages(struct address_space *mapping,
2104 struct writeback_control *wbc, 2115 struct writeback_control *wbc,
2105 struct mpage_da_data *mpd) 2116 struct mpage_da_data *mpd)
2106{ 2117{
2107 long to_write;
2108 int ret; 2118 int ret;
2109 2119
2110 if (!mpd->get_block) 2120 if (!mpd->get_block)
@@ -2119,19 +2129,18 @@ static int mpage_da_writepages(struct address_space *mapping,
2119 mpd->pages_written = 0; 2129 mpd->pages_written = 0;
2120 mpd->retval = 0; 2130 mpd->retval = 0;
2121 2131
2122 to_write = wbc->nr_to_write;
2123
2124 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd); 2132 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
2125
2126 /* 2133 /*
2127 * Handle last extent of pages 2134 * Handle last extent of pages
2128 */ 2135 */
2129 if (!mpd->io_done && mpd->next_page != mpd->first_page) { 2136 if (!mpd->io_done && mpd->next_page != mpd->first_page) {
2130 if (mpage_da_map_blocks(mpd) == 0) 2137 if (mpage_da_map_blocks(mpd) == 0)
2131 mpage_da_submit_io(mpd); 2138 mpage_da_submit_io(mpd);
2132 }
2133 2139
2134 wbc->nr_to_write = to_write - mpd->pages_written; 2140 mpd->io_done = 1;
2141 ret = MPAGE_DA_EXTENT_TAIL;
2142 }
2143 wbc->nr_to_write -= mpd->pages_written;
2135 return ret; 2144 return ret;
2136} 2145}
2137 2146
@@ -2360,12 +2369,14 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
2360static int ext4_da_writepages(struct address_space *mapping, 2369static int ext4_da_writepages(struct address_space *mapping,
2361 struct writeback_control *wbc) 2370 struct writeback_control *wbc)
2362{ 2371{
2372 pgoff_t index;
2373 int range_whole = 0;
2363 handle_t *handle = NULL; 2374 handle_t *handle = NULL;
2364 loff_t range_start = 0;
2365 struct mpage_da_data mpd; 2375 struct mpage_da_data mpd;
2366 struct inode *inode = mapping->host; 2376 struct inode *inode = mapping->host;
2377 int no_nrwrite_index_update;
2378 long pages_written = 0, pages_skipped;
2367 int needed_blocks, ret = 0, nr_to_writebump = 0; 2379 int needed_blocks, ret = 0, nr_to_writebump = 0;
2368 long to_write, pages_skipped = 0;
2369 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2380 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2370 2381
2371 /* 2382 /*
@@ -2385,23 +2396,26 @@ static int ext4_da_writepages(struct address_space *mapping,
2385 nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; 2396 nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
2386 wbc->nr_to_write = sbi->s_mb_stream_request; 2397 wbc->nr_to_write = sbi->s_mb_stream_request;
2387 } 2398 }
2399 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2400 range_whole = 1;
2388 2401
2389 if (!wbc->range_cyclic) 2402 if (wbc->range_cyclic)
2390 /* 2403 index = mapping->writeback_index;
2391 * If range_cyclic is not set force range_cont 2404 else
2392 * and save the old writeback_index 2405 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2393 */
2394 wbc->range_cont = 1;
2395
2396 range_start = wbc->range_start;
2397 pages_skipped = wbc->pages_skipped;
2398 2406
2399 mpd.wbc = wbc; 2407 mpd.wbc = wbc;
2400 mpd.inode = mapping->host; 2408 mpd.inode = mapping->host;
2401 2409
2402restart_loop: 2410 /*
2403 to_write = wbc->nr_to_write; 2411 * we don't want write_cache_pages to update
2404 while (!ret && to_write > 0) { 2412 * nr_to_write and writeback_index
2413 */
2414 no_nrwrite_index_update = wbc->no_nrwrite_index_update;
2415 wbc->no_nrwrite_index_update = 1;
2416 pages_skipped = wbc->pages_skipped;
2417
2418 while (!ret && wbc->nr_to_write > 0) {
2405 2419
2406 /* 2420 /*
2407 * we insert one extent at a time. So we need 2421 * we insert one extent at a time. So we need
@@ -2422,48 +2436,53 @@ restart_loop:
2422 dump_stack(); 2436 dump_stack();
2423 goto out_writepages; 2437 goto out_writepages;
2424 } 2438 }
2425 to_write -= wbc->nr_to_write;
2426
2427 mpd.get_block = ext4_da_get_block_write; 2439 mpd.get_block = ext4_da_get_block_write;
2428 ret = mpage_da_writepages(mapping, wbc, &mpd); 2440 ret = mpage_da_writepages(mapping, wbc, &mpd);
2429 2441
2430 ext4_journal_stop(handle); 2442 ext4_journal_stop(handle);
2431 2443
2432 if (mpd.retval == -ENOSPC) 2444 if (mpd.retval == -ENOSPC) {
2445 /* commit the transaction which would
2446 * free blocks released in the transaction
2447 * and try again
2448 */
2433 jbd2_journal_force_commit_nested(sbi->s_journal); 2449 jbd2_journal_force_commit_nested(sbi->s_journal);
2434 2450 wbc->pages_skipped = pages_skipped;
2435 /* reset the retry count */ 2451 ret = 0;
2436 if (ret == MPAGE_DA_EXTENT_TAIL) { 2452 } else if (ret == MPAGE_DA_EXTENT_TAIL) {
2437 /* 2453 /*
2438 * got one extent now try with 2454 * got one extent now try with
2439 * rest of the pages 2455 * rest of the pages
2440 */ 2456 */
2441 to_write += wbc->nr_to_write; 2457 pages_written += mpd.pages_written;
2458 wbc->pages_skipped = pages_skipped;
2442 ret = 0; 2459 ret = 0;
2443 } else if (wbc->nr_to_write) { 2460 } else if (wbc->nr_to_write)
2444 /* 2461 /*
2445 * There is no more writeout needed 2462 * There is no more writeout needed
2446 * or we requested for a noblocking writeout 2463 * or we requested for a noblocking writeout
2447 * and we found the device congested 2464 * and we found the device congested
2448 */ 2465 */
2449 to_write += wbc->nr_to_write;
2450 break; 2466 break;
2451 }
2452 wbc->nr_to_write = to_write;
2453 }
2454
2455 if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
2456 /* We skipped pages in this loop */
2457 wbc->range_start = range_start;
2458 wbc->nr_to_write = to_write +
2459 wbc->pages_skipped - pages_skipped;
2460 wbc->pages_skipped = pages_skipped;
2461 goto restart_loop;
2462 } 2467 }
2468 if (pages_skipped != wbc->pages_skipped)
2469 printk(KERN_EMERG "This should not happen leaving %s "
2470 "with nr_to_write = %ld ret = %d\n",
2471 __func__, wbc->nr_to_write, ret);
2472
2473 /* Update index */
2474 index += pages_written;
2475 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2476 /*
2477 * set the writeback_index so that range_cyclic
2478 * mode will write it back later
2479 */
2480 mapping->writeback_index = index;
2463 2481
2464out_writepages: 2482out_writepages:
2465 wbc->nr_to_write = to_write - nr_to_writebump; 2483 if (!no_nrwrite_index_update)
2466 wbc->range_start = range_start; 2484 wbc->no_nrwrite_index_update = 0;
2485 wbc->nr_to_write -= nr_to_writebump;
2467 return ret; 2486 return ret;
2468} 2487}
2469 2488
@@ -4175,7 +4194,6 @@ static int ext4_inode_blocks_set(handle_t *handle,
4175 struct inode *inode = &(ei->vfs_inode); 4194 struct inode *inode = &(ei->vfs_inode);
4176 u64 i_blocks = inode->i_blocks; 4195 u64 i_blocks = inode->i_blocks;
4177 struct super_block *sb = inode->i_sb; 4196 struct super_block *sb = inode->i_sb;
4178 int err = 0;
4179 4197
4180 if (i_blocks <= ~0U) { 4198 if (i_blocks <= ~0U) {
4181 /* 4199 /*
@@ -4185,36 +4203,27 @@ static int ext4_inode_blocks_set(handle_t *handle,
4185 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 4203 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
4186 raw_inode->i_blocks_high = 0; 4204 raw_inode->i_blocks_high = 0;
4187 ei->i_flags &= ~EXT4_HUGE_FILE_FL; 4205 ei->i_flags &= ~EXT4_HUGE_FILE_FL;
4188 } else if (i_blocks <= 0xffffffffffffULL) { 4206 return 0;
4207 }
4208 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
4209 return -EFBIG;
4210
4211 if (i_blocks <= 0xffffffffffffULL) {
4189 /* 4212 /*
4190 * i_blocks can be represented in a 48 bit variable 4213 * i_blocks can be represented in a 48 bit variable
4191 * as multiple of 512 bytes 4214 * as multiple of 512 bytes
4192 */ 4215 */
4193 err = ext4_update_rocompat_feature(handle, sb,
4194 EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
4195 if (err)
4196 goto err_out;
4197 /* i_block is stored in the split 48 bit fields */
4198 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 4216 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
4199 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); 4217 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
4200 ei->i_flags &= ~EXT4_HUGE_FILE_FL; 4218 ei->i_flags &= ~EXT4_HUGE_FILE_FL;
4201 } else { 4219 } else {
4202 /*
4203 * i_blocks should be represented in a 48 bit variable
4204 * as multiple of file system block size
4205 */
4206 err = ext4_update_rocompat_feature(handle, sb,
4207 EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
4208 if (err)
4209 goto err_out;
4210 ei->i_flags |= EXT4_HUGE_FILE_FL; 4220 ei->i_flags |= EXT4_HUGE_FILE_FL;
4211 /* i_block is stored in file system block size */ 4221 /* i_block is stored in file system block size */
4212 i_blocks = i_blocks >> (inode->i_blkbits - 9); 4222 i_blocks = i_blocks >> (inode->i_blkbits - 9);
4213 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 4223 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
4214 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); 4224 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
4215 } 4225 }
4216err_out: 4226 return 0;
4217 return err;
4218} 4227}
4219 4228
4220/* 4229/*
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b580714f0d85..dfe17a134052 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2300,6 +2300,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2300 } 2300 }
2301 2301
2302 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); 2302 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2303 meta_group_info[i]->bb_free_root.rb_node = NULL;;
2303 2304
2304#ifdef DOUBLE_CHECK 2305#ifdef DOUBLE_CHECK
2305 { 2306 {
@@ -2522,9 +2523,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2522 } 2523 }
2523 2524
2524 spin_lock_init(&sbi->s_md_lock); 2525 spin_lock_init(&sbi->s_md_lock);
2525 INIT_LIST_HEAD(&sbi->s_active_transaction);
2526 INIT_LIST_HEAD(&sbi->s_closed_transaction);
2527 INIT_LIST_HEAD(&sbi->s_committed_transaction);
2528 spin_lock_init(&sbi->s_bal_lock); 2526 spin_lock_init(&sbi->s_bal_lock);
2529 2527
2530 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; 2528 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
@@ -2553,6 +2551,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2553 ext4_mb_init_per_dev_proc(sb); 2551 ext4_mb_init_per_dev_proc(sb);
2554 ext4_mb_history_init(sb); 2552 ext4_mb_history_init(sb);
2555 2553
2554 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2555
2556 printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); 2556 printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
2557 return 0; 2557 return 0;
2558} 2558}
@@ -2568,7 +2568,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2568 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); 2568 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
2569 list_del(&pa->pa_group_list); 2569 list_del(&pa->pa_group_list);
2570 count++; 2570 count++;
2571 kfree(pa); 2571 kmem_cache_free(ext4_pspace_cachep, pa);
2572 } 2572 }
2573 if (count) 2573 if (count)
2574 mb_debug("mballoc: %u PAs left\n", count); 2574 mb_debug("mballoc: %u PAs left\n", count);
@@ -2582,15 +2582,6 @@ int ext4_mb_release(struct super_block *sb)
2582 struct ext4_group_info *grinfo; 2582 struct ext4_group_info *grinfo;
2583 struct ext4_sb_info *sbi = EXT4_SB(sb); 2583 struct ext4_sb_info *sbi = EXT4_SB(sb);
2584 2584
2585 /* release freed, non-committed blocks */
2586 spin_lock(&sbi->s_md_lock);
2587 list_splice_init(&sbi->s_closed_transaction,
2588 &sbi->s_committed_transaction);
2589 list_splice_init(&sbi->s_active_transaction,
2590 &sbi->s_committed_transaction);
2591 spin_unlock(&sbi->s_md_lock);
2592 ext4_mb_free_committed_blocks(sb);
2593
2594 if (sbi->s_group_info) { 2585 if (sbi->s_group_info) {
2595 for (i = 0; i < sbi->s_groups_count; i++) { 2586 for (i = 0; i < sbi->s_groups_count; i++) {
2596 grinfo = ext4_get_group_info(sb, i); 2587 grinfo = ext4_get_group_info(sb, i);
@@ -2644,61 +2635,57 @@ int ext4_mb_release(struct super_block *sb)
2644 return 0; 2635 return 0;
2645} 2636}
2646 2637
2647static noinline_for_stack void 2638/*
2648ext4_mb_free_committed_blocks(struct super_block *sb) 2639 * This function is called by the jbd2 layer once the commit has finished,
2640 * so we know we can free the blocks that were released with that commit.
2641 */
2642static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2649{ 2643{
2650 struct ext4_sb_info *sbi = EXT4_SB(sb); 2644 struct super_block *sb = journal->j_private;
2651 int err;
2652 int i;
2653 int count = 0;
2654 int count2 = 0;
2655 struct ext4_free_metadata *md;
2656 struct ext4_buddy e4b; 2645 struct ext4_buddy e4b;
2646 struct ext4_group_info *db;
2647 int err, count = 0, count2 = 0;
2648 struct ext4_free_data *entry;
2649 ext4_fsblk_t discard_block;
2650 struct list_head *l, *ltmp;
2657 2651
2658 if (list_empty(&sbi->s_committed_transaction)) 2652 list_for_each_safe(l, ltmp, &txn->t_private_list) {
2659 return; 2653 entry = list_entry(l, struct ext4_free_data, list);
2660
2661 /* there is committed blocks to be freed yet */
2662 do {
2663 /* get next array of blocks */
2664 md = NULL;
2665 spin_lock(&sbi->s_md_lock);
2666 if (!list_empty(&sbi->s_committed_transaction)) {
2667 md = list_entry(sbi->s_committed_transaction.next,
2668 struct ext4_free_metadata, list);
2669 list_del(&md->list);
2670 }
2671 spin_unlock(&sbi->s_md_lock);
2672
2673 if (md == NULL)
2674 break;
2675 2654
2676 mb_debug("gonna free %u blocks in group %lu (0x%p):", 2655 mb_debug("gonna free %u blocks in group %lu (0x%p):",
2677 md->num, md->group, md); 2656 entry->count, entry->group, entry);
2678 2657
2679 err = ext4_mb_load_buddy(sb, md->group, &e4b); 2658 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2680 /* we expect to find existing buddy because it's pinned */ 2659 /* we expect to find existing buddy because it's pinned */
2681 BUG_ON(err != 0); 2660 BUG_ON(err != 0);
2682 2661
2662 db = e4b.bd_info;
2683 /* there are blocks to put in buddy to make them really free */ 2663 /* there are blocks to put in buddy to make them really free */
2684 count += md->num; 2664 count += entry->count;
2685 count2++; 2665 count2++;
2686 ext4_lock_group(sb, md->group); 2666 ext4_lock_group(sb, entry->group);
2687 for (i = 0; i < md->num; i++) { 2667 /* Take it out of per group rb tree */
2688 mb_debug(" %u", md->blocks[i]); 2668 rb_erase(&entry->node, &(db->bb_free_root));
2689 mb_free_blocks(NULL, &e4b, md->blocks[i], 1); 2669 mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
2670
2671 if (!db->bb_free_root.rb_node) {
2672 /* No more items in the per group rb tree
2673 * balance refcounts from ext4_mb_free_metadata()
2674 */
2675 page_cache_release(e4b.bd_buddy_page);
2676 page_cache_release(e4b.bd_bitmap_page);
2690 } 2677 }
2691 mb_debug("\n"); 2678 ext4_unlock_group(sb, entry->group);
2692 ext4_unlock_group(sb, md->group); 2679 discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
2693 2680 + entry->start_blk
2694 /* balance refcounts from ext4_mb_free_metadata() */ 2681 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
2695 page_cache_release(e4b.bd_buddy_page); 2682 trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", sb->s_id,
2696 page_cache_release(e4b.bd_bitmap_page); 2683 (unsigned long long) discard_block, entry->count);
2697 2684 sb_issue_discard(sb, discard_block, entry->count);
2698 kfree(md); 2685
2686 kmem_cache_free(ext4_free_ext_cachep, entry);
2699 ext4_mb_release_desc(&e4b); 2687 ext4_mb_release_desc(&e4b);
2700 2688 }
2701 } while (md);
2702 2689
2703 mb_debug("freed %u blocks in %u structures\n", count, count2); 2690 mb_debug("freed %u blocks in %u structures\n", count, count2);
2704} 2691}
@@ -2712,6 +2699,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
2712 2699
2713static int ext4_mb_init_per_dev_proc(struct super_block *sb) 2700static int ext4_mb_init_per_dev_proc(struct super_block *sb)
2714{ 2701{
2702#ifdef CONFIG_PROC_FS
2715 mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; 2703 mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
2716 struct ext4_sb_info *sbi = EXT4_SB(sb); 2704 struct ext4_sb_info *sbi = EXT4_SB(sb);
2717 struct proc_dir_entry *proc; 2705 struct proc_dir_entry *proc;
@@ -2735,10 +2723,14 @@ err_out:
2735 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); 2723 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
2736 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); 2724 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
2737 return -ENOMEM; 2725 return -ENOMEM;
2726#else
2727 return 0;
2728#endif
2738} 2729}
2739 2730
2740static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) 2731static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
2741{ 2732{
2733#ifdef CONFIG_PROC_FS
2742 struct ext4_sb_info *sbi = EXT4_SB(sb); 2734 struct ext4_sb_info *sbi = EXT4_SB(sb);
2743 2735
2744 if (sbi->s_proc == NULL) 2736 if (sbi->s_proc == NULL)
@@ -2750,7 +2742,7 @@ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
2750 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); 2742 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
2751 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); 2743 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
2752 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); 2744 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
2753 2745#endif
2754 return 0; 2746 return 0;
2755} 2747}
2756 2748
@@ -2771,6 +2763,16 @@ int __init init_ext4_mballoc(void)
2771 kmem_cache_destroy(ext4_pspace_cachep); 2763 kmem_cache_destroy(ext4_pspace_cachep);
2772 return -ENOMEM; 2764 return -ENOMEM;
2773 } 2765 }
2766
2767 ext4_free_ext_cachep =
2768 kmem_cache_create("ext4_free_block_extents",
2769 sizeof(struct ext4_free_data),
2770 0, SLAB_RECLAIM_ACCOUNT, NULL);
2771 if (ext4_free_ext_cachep == NULL) {
2772 kmem_cache_destroy(ext4_pspace_cachep);
2773 kmem_cache_destroy(ext4_ac_cachep);
2774 return -ENOMEM;
2775 }
2774 return 0; 2776 return 0;
2775} 2777}
2776 2778
@@ -2779,6 +2781,7 @@ void exit_ext4_mballoc(void)
2779 /* XXX: synchronize_rcu(); */ 2781 /* XXX: synchronize_rcu(); */
2780 kmem_cache_destroy(ext4_pspace_cachep); 2782 kmem_cache_destroy(ext4_pspace_cachep);
2781 kmem_cache_destroy(ext4_ac_cachep); 2783 kmem_cache_destroy(ext4_ac_cachep);
2784 kmem_cache_destroy(ext4_free_ext_cachep);
2782} 2785}
2783 2786
2784 2787
@@ -4324,8 +4327,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4324 goto out1; 4327 goto out1;
4325 } 4328 }
4326 4329
4327 ext4_mb_poll_new_transaction(sb, handle);
4328
4329 *errp = ext4_mb_initialize_context(ac, ar); 4330 *errp = ext4_mb_initialize_context(ac, ar);
4330 if (*errp) { 4331 if (*errp) {
4331 ar->len = 0; 4332 ar->len = 0;
@@ -4384,35 +4385,20 @@ out1:
4384 4385
4385 return block; 4386 return block;
4386} 4387}
4387static void ext4_mb_poll_new_transaction(struct super_block *sb,
4388 handle_t *handle)
4389{
4390 struct ext4_sb_info *sbi = EXT4_SB(sb);
4391
4392 if (sbi->s_last_transaction == handle->h_transaction->t_tid)
4393 return;
4394
4395 /* new transaction! time to close last one and free blocks for
4396 * committed transaction. we know that only transaction can be
4397 * active, so previos transaction can be being logged and we
4398 * know that transaction before previous is known to be already
4399 * logged. this means that now we may free blocks freed in all
4400 * transactions before previous one. hope I'm clear enough ... */
4401 4388
4402 spin_lock(&sbi->s_md_lock); 4389/*
4403 if (sbi->s_last_transaction != handle->h_transaction->t_tid) { 4390 * We can merge two free data extents only if the physical blocks
4404 mb_debug("new transaction %lu, old %lu\n", 4391 * are contiguous, AND the extents were freed by the same transaction,
4405 (unsigned long) handle->h_transaction->t_tid, 4392 * AND the blocks are associated with the same group.
4406 (unsigned long) sbi->s_last_transaction); 4393 */
4407 list_splice_init(&sbi->s_closed_transaction, 4394static int can_merge(struct ext4_free_data *entry1,
4408 &sbi->s_committed_transaction); 4395 struct ext4_free_data *entry2)
4409 list_splice_init(&sbi->s_active_transaction, 4396{
4410 &sbi->s_closed_transaction); 4397 if ((entry1->t_tid == entry2->t_tid) &&
4411 sbi->s_last_transaction = handle->h_transaction->t_tid; 4398 (entry1->group == entry2->group) &&
4412 } 4399 ((entry1->start_blk + entry1->count) == entry2->start_blk))
4413 spin_unlock(&sbi->s_md_lock); 4400 return 1;
4414 4401 return 0;
4415 ext4_mb_free_committed_blocks(sb);
4416} 4402}
4417 4403
4418static noinline_for_stack int 4404static noinline_for_stack int
@@ -4422,57 +4408,80 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4422 struct ext4_group_info *db = e4b->bd_info; 4408 struct ext4_group_info *db = e4b->bd_info;
4423 struct super_block *sb = e4b->bd_sb; 4409 struct super_block *sb = e4b->bd_sb;
4424 struct ext4_sb_info *sbi = EXT4_SB(sb); 4410 struct ext4_sb_info *sbi = EXT4_SB(sb);
4425 struct ext4_free_metadata *md; 4411 struct ext4_free_data *entry, *new_entry;
4426 int i; 4412 struct rb_node **n = &db->bb_free_root.rb_node, *node;
4413 struct rb_node *parent = NULL, *new_node;
4414
4427 4415
4428 BUG_ON(e4b->bd_bitmap_page == NULL); 4416 BUG_ON(e4b->bd_bitmap_page == NULL);
4429 BUG_ON(e4b->bd_buddy_page == NULL); 4417 BUG_ON(e4b->bd_buddy_page == NULL);
4430 4418
4419 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
4420 new_entry->start_blk = block;
4421 new_entry->group = group;
4422 new_entry->count = count;
4423 new_entry->t_tid = handle->h_transaction->t_tid;
4424 new_node = &new_entry->node;
4425
4431 ext4_lock_group(sb, group); 4426 ext4_lock_group(sb, group);
4432 for (i = 0; i < count; i++) { 4427 if (!*n) {
4433 md = db->bb_md_cur; 4428 /* first free block exent. We need to
4434 if (md && db->bb_tid != handle->h_transaction->t_tid) { 4429 protect buddy cache from being freed,
4435 db->bb_md_cur = NULL; 4430 * otherwise we'll refresh it from
4436 md = NULL; 4431 * on-disk bitmap and lose not-yet-available
4432 * blocks */
4433 page_cache_get(e4b->bd_buddy_page);
4434 page_cache_get(e4b->bd_bitmap_page);
4435 }
4436 while (*n) {
4437 parent = *n;
4438 entry = rb_entry(parent, struct ext4_free_data, node);
4439 if (block < entry->start_blk)
4440 n = &(*n)->rb_left;
4441 else if (block >= (entry->start_blk + entry->count))
4442 n = &(*n)->rb_right;
4443 else {
4444 ext4_error(sb, __func__,
4445 "Double free of blocks %d (%d %d)\n",
4446 block, entry->start_blk, entry->count);
4447 return 0;
4437 } 4448 }
4449 }
4438 4450
4439 if (md == NULL) { 4451 rb_link_node(new_node, parent, n);
4440 ext4_unlock_group(sb, group); 4452 rb_insert_color(new_node, &db->bb_free_root);
4441 md = kmalloc(sizeof(*md), GFP_NOFS); 4453
4442 if (md == NULL) 4454 /* Now try to see the extent can be merged to left and right */
4443 return -ENOMEM; 4455 node = rb_prev(new_node);
4444 md->num = 0; 4456 if (node) {
4445 md->group = group; 4457 entry = rb_entry(node, struct ext4_free_data, node);
4446 4458 if (can_merge(entry, new_entry)) {
4447 ext4_lock_group(sb, group); 4459 new_entry->start_blk = entry->start_blk;
4448 if (db->bb_md_cur == NULL) { 4460 new_entry->count += entry->count;
4449 spin_lock(&sbi->s_md_lock); 4461 rb_erase(node, &(db->bb_free_root));
4450 list_add(&md->list, &sbi->s_active_transaction); 4462 spin_lock(&sbi->s_md_lock);
4451 spin_unlock(&sbi->s_md_lock); 4463 list_del(&entry->list);
4452 /* protect buddy cache from being freed, 4464 spin_unlock(&sbi->s_md_lock);
4453 * otherwise we'll refresh it from 4465 kmem_cache_free(ext4_free_ext_cachep, entry);
4454 * on-disk bitmap and lose not-yet-available
4455 * blocks */
4456 page_cache_get(e4b->bd_buddy_page);
4457 page_cache_get(e4b->bd_bitmap_page);
4458 db->bb_md_cur = md;
4459 db->bb_tid = handle->h_transaction->t_tid;
4460 mb_debug("new md 0x%p for group %lu\n",
4461 md, md->group);
4462 } else {
4463 kfree(md);
4464 md = db->bb_md_cur;
4465 }
4466 } 4466 }
4467 }
4467 4468
4468 BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS); 4469 node = rb_next(new_node);
4469 md->blocks[md->num] = block + i; 4470 if (node) {
4470 md->num++; 4471 entry = rb_entry(node, struct ext4_free_data, node);
4471 if (md->num == EXT4_BB_MAX_BLOCKS) { 4472 if (can_merge(new_entry, entry)) {
4472 /* no more space, put full container on a sb's list */ 4473 new_entry->count += entry->count;
4473 db->bb_md_cur = NULL; 4474 rb_erase(node, &(db->bb_free_root));
4475 spin_lock(&sbi->s_md_lock);
4476 list_del(&entry->list);
4477 spin_unlock(&sbi->s_md_lock);
4478 kmem_cache_free(ext4_free_ext_cachep, entry);
4474 } 4479 }
4475 } 4480 }
4481 /* Add the extent to transaction's private list */
4482 spin_lock(&sbi->s_md_lock);
4483 list_add(&new_entry->list, &handle->h_transaction->t_private_list);
4484 spin_unlock(&sbi->s_md_lock);
4476 ext4_unlock_group(sb, group); 4485 ext4_unlock_group(sb, group);
4477 return 0; 4486 return 0;
4478} 4487}
@@ -4500,8 +4509,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4500 4509
4501 *freed = 0; 4510 *freed = 0;
4502 4511
4503 ext4_mb_poll_new_transaction(sb, handle);
4504
4505 sbi = EXT4_SB(sb); 4512 sbi = EXT4_SB(sb);
4506 es = EXT4_SB(sb)->s_es; 4513 es = EXT4_SB(sb)->s_es;
4507 if (block < le32_to_cpu(es->s_first_data_block) || 4514 if (block < le32_to_cpu(es->s_first_data_block) ||
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index b3b4828f8b89..b5dff1fff1e5 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -18,6 +18,8 @@
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/version.h> 20#include <linux/version.h>
21#include <linux/blkdev.h>
22#include <linux/marker.h>
21#include "ext4_jbd2.h" 23#include "ext4_jbd2.h"
22#include "ext4.h" 24#include "ext4.h"
23#include "group.h" 25#include "group.h"
@@ -98,23 +100,29 @@
98 100
99static struct kmem_cache *ext4_pspace_cachep; 101static struct kmem_cache *ext4_pspace_cachep;
100static struct kmem_cache *ext4_ac_cachep; 102static struct kmem_cache *ext4_ac_cachep;
103static struct kmem_cache *ext4_free_ext_cachep;
101 104
102#ifdef EXT4_BB_MAX_BLOCKS 105struct ext4_free_data {
103#undef EXT4_BB_MAX_BLOCKS 106 /* this links the free block information from group_info */
104#endif 107 struct rb_node node;
105#define EXT4_BB_MAX_BLOCKS 30
106 108
107struct ext4_free_metadata { 109 /* this links the free block information from ext4_sb_info */
108 ext4_group_t group;
109 unsigned short num;
110 ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS];
111 struct list_head list; 110 struct list_head list;
111
112 /* group which free block extent belongs */
113 ext4_group_t group;
114
115 /* free block extent */
116 ext4_grpblk_t start_blk;
117 ext4_grpblk_t count;
118
119 /* transaction which freed this extent */
120 tid_t t_tid;
112}; 121};
113 122
114struct ext4_group_info { 123struct ext4_group_info {
115 unsigned long bb_state; 124 unsigned long bb_state;
116 unsigned long bb_tid; 125 struct rb_root bb_free_root;
117 struct ext4_free_metadata *bb_md_cur;
118 unsigned short bb_first_free; 126 unsigned short bb_first_free;
119 unsigned short bb_free; 127 unsigned short bb_free;
120 unsigned short bb_fragments; 128 unsigned short bb_fragments;
@@ -261,8 +269,6 @@ struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
261 269
262static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, 270static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
263 ext4_group_t group); 271 ext4_group_t group);
264static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
265static void ext4_mb_free_committed_blocks(struct super_block *);
266static void ext4_mb_return_to_preallocation(struct inode *inode, 272static void ext4_mb_return_to_preallocation(struct inode *inode,
267 struct ext4_buddy *e4b, sector_t block, 273 struct ext4_buddy *e4b, sector_t block,
268 int count); 274 int count);
@@ -270,6 +276,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *,
270 struct super_block *, struct ext4_prealloc_space *pa); 276 struct super_block *, struct ext4_prealloc_space *pa);
271static int ext4_mb_init_per_dev_proc(struct super_block *sb); 277static int ext4_mb_init_per_dev_proc(struct super_block *sb);
272static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); 278static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
279static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
273 280
274 281
275static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) 282static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index dea8f13c2fd9..9b2b2bc4ec17 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -374,66 +374,6 @@ void ext4_update_dynamic_rev(struct super_block *sb)
374 */ 374 */
375} 375}
376 376
377int ext4_update_compat_feature(handle_t *handle,
378 struct super_block *sb, __u32 compat)
379{
380 int err = 0;
381 if (!EXT4_HAS_COMPAT_FEATURE(sb, compat)) {
382 err = ext4_journal_get_write_access(handle,
383 EXT4_SB(sb)->s_sbh);
384 if (err)
385 return err;
386 EXT4_SET_COMPAT_FEATURE(sb, compat);
387 sb->s_dirt = 1;
388 handle->h_sync = 1;
389 BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
390 "call ext4_journal_dirty_met adata");
391 err = ext4_journal_dirty_metadata(handle,
392 EXT4_SB(sb)->s_sbh);
393 }
394 return err;
395}
396
397int ext4_update_rocompat_feature(handle_t *handle,
398 struct super_block *sb, __u32 rocompat)
399{
400 int err = 0;
401 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, rocompat)) {
402 err = ext4_journal_get_write_access(handle,
403 EXT4_SB(sb)->s_sbh);
404 if (err)
405 return err;
406 EXT4_SET_RO_COMPAT_FEATURE(sb, rocompat);
407 sb->s_dirt = 1;
408 handle->h_sync = 1;
409 BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
410 "call ext4_journal_dirty_met adata");
411 err = ext4_journal_dirty_metadata(handle,
412 EXT4_SB(sb)->s_sbh);
413 }
414 return err;
415}
416
417int ext4_update_incompat_feature(handle_t *handle,
418 struct super_block *sb, __u32 incompat)
419{
420 int err = 0;
421 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, incompat)) {
422 err = ext4_journal_get_write_access(handle,
423 EXT4_SB(sb)->s_sbh);
424 if (err)
425 return err;
426 EXT4_SET_INCOMPAT_FEATURE(sb, incompat);
427 sb->s_dirt = 1;
428 handle->h_sync = 1;
429 BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
430 "call ext4_journal_dirty_met adata");
431 err = ext4_journal_dirty_metadata(handle,
432 EXT4_SB(sb)->s_sbh);
433 }
434 return err;
435}
436
437/* 377/*
438 * Open the external journal device 378 * Open the external journal device
439 */ 379 */
@@ -904,7 +844,7 @@ static const struct export_operations ext4_export_ops = {
904enum { 844enum {
905 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 845 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
906 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 846 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
907 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, 847 Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
908 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 848 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
909 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, 849 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
910 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 850 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
@@ -915,7 +855,7 @@ enum {
915 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 855 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
916 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 856 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
917 Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, 857 Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
918 Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc, 858 Opt_stripe, Opt_delalloc, Opt_nodelalloc,
919 Opt_inode_readahead_blks 859 Opt_inode_readahead_blks
920}; 860};
921 861
@@ -933,8 +873,6 @@ static const match_table_t tokens = {
933 {Opt_err_panic, "errors=panic"}, 873 {Opt_err_panic, "errors=panic"},
934 {Opt_err_ro, "errors=remount-ro"}, 874 {Opt_err_ro, "errors=remount-ro"},
935 {Opt_nouid32, "nouid32"}, 875 {Opt_nouid32, "nouid32"},
936 {Opt_nocheck, "nocheck"},
937 {Opt_nocheck, "check=none"},
938 {Opt_debug, "debug"}, 876 {Opt_debug, "debug"},
939 {Opt_oldalloc, "oldalloc"}, 877 {Opt_oldalloc, "oldalloc"},
940 {Opt_orlov, "orlov"}, 878 {Opt_orlov, "orlov"},
@@ -973,8 +911,6 @@ static const match_table_t tokens = {
973 {Opt_extents, "extents"}, 911 {Opt_extents, "extents"},
974 {Opt_noextents, "noextents"}, 912 {Opt_noextents, "noextents"},
975 {Opt_i_version, "i_version"}, 913 {Opt_i_version, "i_version"},
976 {Opt_mballoc, "mballoc"},
977 {Opt_nomballoc, "nomballoc"},
978 {Opt_stripe, "stripe=%u"}, 914 {Opt_stripe, "stripe=%u"},
979 {Opt_resize, "resize"}, 915 {Opt_resize, "resize"},
980 {Opt_delalloc, "delalloc"}, 916 {Opt_delalloc, "delalloc"},
@@ -1073,9 +1009,6 @@ static int parse_options(char *options, struct super_block *sb,
1073 case Opt_nouid32: 1009 case Opt_nouid32:
1074 set_opt(sbi->s_mount_opt, NO_UID32); 1010 set_opt(sbi->s_mount_opt, NO_UID32);
1075 break; 1011 break;
1076 case Opt_nocheck:
1077 clear_opt(sbi->s_mount_opt, CHECK);
1078 break;
1079 case Opt_debug: 1012 case Opt_debug:
1080 set_opt(sbi->s_mount_opt, DEBUG); 1013 set_opt(sbi->s_mount_opt, DEBUG);
1081 break; 1014 break;
@@ -1618,14 +1551,14 @@ static int ext4_check_descriptors(struct super_block *sb)
1618 if (block_bitmap < first_block || block_bitmap > last_block) { 1551 if (block_bitmap < first_block || block_bitmap > last_block) {
1619 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1552 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1620 "Block bitmap for group %lu not in group " 1553 "Block bitmap for group %lu not in group "
1621 "(block %llu)!", i, block_bitmap); 1554 "(block %llu)!\n", i, block_bitmap);
1622 return 0; 1555 return 0;
1623 } 1556 }
1624 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1557 inode_bitmap = ext4_inode_bitmap(sb, gdp);
1625 if (inode_bitmap < first_block || inode_bitmap > last_block) { 1558 if (inode_bitmap < first_block || inode_bitmap > last_block) {
1626 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1559 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1627 "Inode bitmap for group %lu not in group " 1560 "Inode bitmap for group %lu not in group "
1628 "(block %llu)!", i, inode_bitmap); 1561 "(block %llu)!\n", i, inode_bitmap);
1629 return 0; 1562 return 0;
1630 } 1563 }
1631 inode_table = ext4_inode_table(sb, gdp); 1564 inode_table = ext4_inode_table(sb, gdp);
@@ -1633,7 +1566,7 @@ static int ext4_check_descriptors(struct super_block *sb)
1633 inode_table + sbi->s_itb_per_group - 1 > last_block) { 1566 inode_table + sbi->s_itb_per_group - 1 > last_block) {
1634 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1567 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1635 "Inode table for group %lu not in group " 1568 "Inode table for group %lu not in group "
1636 "(block %llu)!", i, inode_table); 1569 "(block %llu)!\n", i, inode_table);
1637 return 0; 1570 return 0;
1638 } 1571 }
1639 spin_lock(sb_bgl_lock(sbi, i)); 1572 spin_lock(sb_bgl_lock(sbi, i));
@@ -1778,13 +1711,13 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1778 * 1711 *
1779 * Note, this does *not* consider any metadata overhead for vfs i_blocks. 1712 * Note, this does *not* consider any metadata overhead for vfs i_blocks.
1780 */ 1713 */
1781static loff_t ext4_max_size(int blkbits) 1714static loff_t ext4_max_size(int blkbits, int has_huge_files)
1782{ 1715{
1783 loff_t res; 1716 loff_t res;
1784 loff_t upper_limit = MAX_LFS_FILESIZE; 1717 loff_t upper_limit = MAX_LFS_FILESIZE;
1785 1718
1786 /* small i_blocks in vfs inode? */ 1719 /* small i_blocks in vfs inode? */
1787 if (sizeof(blkcnt_t) < sizeof(u64)) { 1720 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1788 /* 1721 /*
1789 * CONFIG_LSF is not enabled implies the inode 1722 * CONFIG_LSF is not enabled implies the inode
1790 * i_block represent total blocks in 512 bytes 1723 * i_block represent total blocks in 512 bytes
@@ -1814,7 +1747,7 @@ static loff_t ext4_max_size(int blkbits)
1814 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. 1747 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
1815 * We need to be 1 filesystem block less than the 2^48 sector limit. 1748 * We need to be 1 filesystem block less than the 2^48 sector limit.
1816 */ 1749 */
1817static loff_t ext4_max_bitmap_size(int bits) 1750static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
1818{ 1751{
1819 loff_t res = EXT4_NDIR_BLOCKS; 1752 loff_t res = EXT4_NDIR_BLOCKS;
1820 int meta_blocks; 1753 int meta_blocks;
@@ -1827,11 +1760,11 @@ static loff_t ext4_max_bitmap_size(int bits)
1827 * total number of 512 bytes blocks of the file 1760 * total number of 512 bytes blocks of the file
1828 */ 1761 */
1829 1762
1830 if (sizeof(blkcnt_t) < sizeof(u64)) { 1763 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1831 /* 1764 /*
1832 * CONFIG_LSF is not enabled implies the inode 1765 * !has_huge_files or CONFIG_LSF is not enabled
1833 * i_block represent total blocks in 512 bytes 1766 * implies the inode i_block represent total blocks in
1834 * 32 == size of vfs inode i_blocks * 8 1767 * 512 bytes 32 == size of vfs inode i_blocks * 8
1835 */ 1768 */
1836 upper_limit = (1LL << 32) - 1; 1769 upper_limit = (1LL << 32) - 1;
1837 1770
@@ -1940,7 +1873,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
1940 int blocksize; 1873 int blocksize;
1941 int db_count; 1874 int db_count;
1942 int i; 1875 int i;
1943 int needs_recovery; 1876 int needs_recovery, has_huge_files;
1944 __le32 features; 1877 __le32 features;
1945 __u64 blocks_count; 1878 __u64 blocks_count;
1946 int err; 1879 int err;
@@ -2081,7 +2014,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2081 sb->s_id, le32_to_cpu(features)); 2014 sb->s_id, le32_to_cpu(features));
2082 goto failed_mount; 2015 goto failed_mount;
2083 } 2016 }
2084 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { 2017 has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
2018 EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
2019 if (has_huge_files) {
2085 /* 2020 /*
2086 * Large file size enabled file system can only be 2021 * Large file size enabled file system can only be
2087 * mount if kernel is build with CONFIG_LSF 2022 * mount if kernel is build with CONFIG_LSF
@@ -2131,8 +2066,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2131 } 2066 }
2132 } 2067 }
2133 2068
2134 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits); 2069 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
2135 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits); 2070 has_huge_files);
2071 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
2136 2072
2137 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { 2073 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
2138 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; 2074 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
@@ -2456,6 +2392,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2456 "available.\n"); 2392 "available.\n");
2457 } 2393 }
2458 2394
2395 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
2396 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
2397 "requested data journaling mode\n");
2398 clear_opt(sbi->s_mount_opt, DELALLOC);
2399 } else if (test_opt(sb, DELALLOC))
2400 printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
2401
2402 ext4_ext_init(sb);
2403 err = ext4_mb_init(sb, needs_recovery);
2404 if (err) {
2405 printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
2406 err);
2407 goto failed_mount4;
2408 }
2409
2459 /* 2410 /*
2460 * akpm: core read_super() calls in here with the superblock locked. 2411 * akpm: core read_super() calls in here with the superblock locked.
2461 * That deadlocks, because orphan cleanup needs to lock the superblock 2412 * That deadlocks, because orphan cleanup needs to lock the superblock
@@ -2475,21 +2426,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2475 test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": 2426 test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
2476 "writeback"); 2427 "writeback");
2477 2428
2478 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
2479 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
2480 "requested data journaling mode\n");
2481 clear_opt(sbi->s_mount_opt, DELALLOC);
2482 } else if (test_opt(sb, DELALLOC))
2483 printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
2484
2485 ext4_ext_init(sb);
2486 err = ext4_mb_init(sb, needs_recovery);
2487 if (err) {
2488 printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
2489 err);
2490 goto failed_mount4;
2491 }
2492
2493 lock_kernel(); 2429 lock_kernel();
2494 return 0; 2430 return 0;
2495 2431
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 25adfc3c693a..d0ff0b8cf309 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -8,7 +8,7 @@
8 * pages against inodes. ie: data writeback. Writeout of the 8 * pages against inodes. ie: data writeback. Writeout of the
9 * inode itself is not handled here. 9 * inode itself is not handled here.
10 * 10 *
11 * 10Apr2002 akpm@zip.com.au 11 * 10Apr2002 Andrew Morton
12 * Split out of fs/inode.c 12 * Split out of fs/inode.c
13 * Additions for address_space-based writeback 13 * Additions for address_space-based writeback
14 */ 14 */
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2bada6bbc317..34930a964b82 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -101,6 +101,8 @@ void fuse_finish_open(struct inode *inode, struct file *file,
101 file->f_op = &fuse_direct_io_file_operations; 101 file->f_op = &fuse_direct_io_file_operations;
102 if (!(outarg->open_flags & FOPEN_KEEP_CACHE)) 102 if (!(outarg->open_flags & FOPEN_KEEP_CACHE))
103 invalidate_inode_pages2(inode->i_mapping); 103 invalidate_inode_pages2(inode->i_mapping);
104 if (outarg->open_flags & FOPEN_NONSEEKABLE)
105 nonseekable_open(inode, file);
104 ff->fh = outarg->fh; 106 ff->fh = outarg->fh;
105 file->private_data = fuse_file_get(ff); 107 file->private_data = fuse_file_get(ff);
106} 108}
@@ -1448,6 +1450,9 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
1448 mutex_lock(&inode->i_mutex); 1450 mutex_lock(&inode->i_mutex);
1449 switch (origin) { 1451 switch (origin) {
1450 case SEEK_END: 1452 case SEEK_END:
1453 retval = fuse_update_attributes(inode, NULL, file, NULL);
1454 if (retval)
1455 return retval;
1451 offset += i_size_read(inode); 1456 offset += i_size_read(inode);
1452 break; 1457 break;
1453 case SEEK_CUR: 1458 case SEEK_CUR:
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 3a876076bdd1..35accfdd747f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -6,6 +6,9 @@
6 See the file COPYING. 6 See the file COPYING.
7*/ 7*/
8 8
9#ifndef _FS_FUSE_I_H
10#define _FS_FUSE_I_H
11
9#include <linux/fuse.h> 12#include <linux/fuse.h>
10#include <linux/fs.h> 13#include <linux/fs.h>
11#include <linux/mount.h> 14#include <linux/mount.h>
@@ -655,3 +658,5 @@ void fuse_set_nowrite(struct inode *inode);
655void fuse_release_nowrite(struct inode *inode); 658void fuse_release_nowrite(struct inode *inode);
656 659
657u64 fuse_get_attr_version(struct fuse_conn *fc); 660u64 fuse_get_attr_version(struct fuse_conn *fc);
661
662#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6a84388cacff..54b1f0e1ef58 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -865,7 +865,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
865 if (is_bdev) { 865 if (is_bdev) {
866 fc->destroy_req = fuse_request_alloc(); 866 fc->destroy_req = fuse_request_alloc();
867 if (!fc->destroy_req) 867 if (!fc->destroy_req)
868 goto err_put_root; 868 goto err_free_init_req;
869 } 869 }
870 870
871 mutex_lock(&fuse_mutex); 871 mutex_lock(&fuse_mutex);
@@ -895,6 +895,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
895 895
896 err_unlock: 896 err_unlock:
897 mutex_unlock(&fuse_mutex); 897 mutex_unlock(&fuse_mutex);
898 err_free_init_req:
898 fuse_request_free(init_req); 899 fuse_request_free(init_req);
899 err_put_root: 900 err_put_root:
900 dput(root_dentry); 901 dput(root_dentry);
diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c
index ba851576ebb1..6d98f116ca03 100644
--- a/fs/hfs/catalog.c
+++ b/fs/hfs/catalog.c
@@ -190,6 +190,10 @@ int hfs_cat_find_brec(struct super_block *sb, u32 cnid,
190 190
191 fd->search_key->cat.ParID = rec.thread.ParID; 191 fd->search_key->cat.ParID = rec.thread.ParID;
192 len = fd->search_key->cat.CName.len = rec.thread.CName.len; 192 len = fd->search_key->cat.CName.len = rec.thread.CName.len;
193 if (len > HFS_NAMELEN) {
194 printk(KERN_ERR "hfs: bad catalog namelength\n");
195 return -EIO;
196 }
193 memcpy(fd->search_key->cat.CName.name, rec.thread.CName.name, len); 197 memcpy(fd->search_key->cat.CName.name, rec.thread.CName.name, len);
194 return hfs_brec_find(fd); 198 return hfs_brec_find(fd);
195} 199}
diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c
index d128a25b74d2..ea30afc2a03c 100644
--- a/fs/hfsplus/bitmap.c
+++ b/fs/hfsplus/bitmap.c
@@ -32,6 +32,10 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma
32 mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); 32 mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex);
33 mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; 33 mapping = HFSPLUS_SB(sb).alloc_file->i_mapping;
34 page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL); 34 page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL);
35 if (IS_ERR(page)) {
36 start = size;
37 goto out;
38 }
35 pptr = kmap(page); 39 pptr = kmap(page);
36 curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32; 40 curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32;
37 i = offset % 32; 41 i = offset % 32;
@@ -73,6 +77,10 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma
73 break; 77 break;
74 page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, 78 page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS,
75 NULL); 79 NULL);
80 if (IS_ERR(page)) {
81 start = size;
82 goto out;
83 }
76 curr = pptr = kmap(page); 84 curr = pptr = kmap(page);
77 if ((size ^ offset) / PAGE_CACHE_BITS) 85 if ((size ^ offset) / PAGE_CACHE_BITS)
78 end = pptr + PAGE_CACHE_BITS / 32; 86 end = pptr + PAGE_CACHE_BITS / 32;
@@ -120,6 +128,10 @@ found:
120 offset += PAGE_CACHE_BITS; 128 offset += PAGE_CACHE_BITS;
121 page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, 129 page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS,
122 NULL); 130 NULL);
131 if (IS_ERR(page)) {
132 start = size;
133 goto out;
134 }
123 pptr = kmap(page); 135 pptr = kmap(page);
124 curr = pptr; 136 curr = pptr;
125 end = pptr + PAGE_CACHE_BITS / 32; 137 end = pptr + PAGE_CACHE_BITS / 32;
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index ba117c445e78..f6874acb2cf2 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -168,6 +168,11 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid,
168 return -EIO; 168 return -EIO;
169 } 169 }
170 170
171 if (be16_to_cpu(tmp.thread.nodeName.length) > 255) {
172 printk(KERN_ERR "hfs: catalog name length corrupted\n");
173 return -EIO;
174 }
175
171 hfsplus_cat_build_key_uni(fd->search_key, be32_to_cpu(tmp.thread.parentID), 176 hfsplus_cat_build_key_uni(fd->search_key, be32_to_cpu(tmp.thread.parentID),
172 &tmp.thread.nodeName); 177 &tmp.thread.nodeName);
173 return hfs_brec_find(fd); 178 return hfs_brec_find(fd);
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index fec8f61227ff..0022eec63cda 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -199,6 +199,9 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
199 goto done; 199 goto done;
200 } 200 }
201 201
202 if (inode->i_ino == HFSPLUS_EXT_CNID)
203 return -EIO;
204
202 mutex_lock(&HFSPLUS_I(inode).extents_lock); 205 mutex_lock(&HFSPLUS_I(inode).extents_lock);
203 res = hfsplus_ext_read_extent(inode, ablock); 206 res = hfsplus_ext_read_extent(inode, ablock);
204 if (!res) { 207 if (!res) {
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index b085d64a2b67..963be644297a 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -254,6 +254,8 @@ static int hfsplus_file_open(struct inode *inode, struct file *file)
254{ 254{
255 if (HFSPLUS_IS_RSRC(inode)) 255 if (HFSPLUS_IS_RSRC(inode))
256 inode = HFSPLUS_I(inode).rsrc_inode; 256 inode = HFSPLUS_I(inode).rsrc_inode;
257 if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
258 return -EOVERFLOW;
257 atomic_inc(&HFSPLUS_I(inode).opencnt); 259 atomic_inc(&HFSPLUS_I(inode).opencnt);
258 return 0; 260 return 0;
259} 261}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index e834e578c93f..eb74531a0a8e 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -356,7 +356,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
356 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { 356 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) {
357 printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); 357 printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n");
358 sb->s_flags |= MS_RDONLY; 358 sb->s_flags |= MS_RDONLY;
359 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) { 359 } else if ((vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) && !(sb->s_flags & MS_RDONLY)) {
360 printk(KERN_WARNING "hfs: write access to a journaled filesystem is not supported, " 360 printk(KERN_WARNING "hfs: write access to a journaled filesystem is not supported, "
361 "use the force option at your own risk, mounting read-only.\n"); 361 "use the force option at your own risk, mounting read-only.\n");
362 sb->s_flags |= MS_RDONLY; 362 sb->s_flags |= MS_RDONLY;
diff --git a/fs/jbd/Kconfig b/fs/jbd/Kconfig
new file mode 100644
index 000000000000..4e28beeed157
--- /dev/null
+++ b/fs/jbd/Kconfig
@@ -0,0 +1,30 @@
1config JBD
2 tristate
3 help
4 This is a generic journalling layer for block devices. It is
5 currently used by the ext3 file system, but it could also be
6 used to add journal support to other file systems or block
7 devices such as RAID or LVM.
8
9 If you are using the ext3 file system, you need to say Y here.
10 If you are not using ext3 then you will probably want to say N.
11
12 To compile this device as a module, choose M here: the module will be
13 called jbd. If you are compiling ext3 into the kernel, you
14 cannot compile this code as a module.
15
16config JBD_DEBUG
17 bool "JBD (ext3) debugging support"
18 depends on JBD && DEBUG_FS
19 help
20 If you are using the ext3 journaled file system (or potentially any
21 other file system/device using JBD), this option allows you to
22 enable debugging output while the system is running, in order to
23 help track down any problems you are having. By default the
24 debugging output will be turned off.
25
26 If you select Y here, then you will be able to turn on debugging
27 with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a
28 number between 1 and 5, the higher the number, the more debugging
29 output is generated. To turn debugging off again, do
30 "echo 0 > /sys/kernel/debug/jbd/jbd-debug".
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index ae08c057e751..25719d902c51 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -482,6 +482,8 @@ void journal_commit_transaction(journal_t *journal)
482 printk(KERN_WARNING 482 printk(KERN_WARNING
483 "JBD: Detected IO errors while flushing file data " 483 "JBD: Detected IO errors while flushing file data "
484 "on %s\n", bdevname(journal->j_fs_dev, b)); 484 "on %s\n", bdevname(journal->j_fs_dev, b));
485 if (journal->j_flags & JFS_ABORT_ON_SYNCDATA_ERR)
486 journal_abort(journal, err);
485 err = 0; 487 err = 0;
486 } 488 }
487 489
@@ -518,9 +520,10 @@ void journal_commit_transaction(journal_t *journal)
518 jh = commit_transaction->t_buffers; 520 jh = commit_transaction->t_buffers;
519 521
520 /* If we're in abort mode, we just un-journal the buffer and 522 /* If we're in abort mode, we just un-journal the buffer and
521 release it for background writing. */ 523 release it. */
522 524
523 if (is_journal_aborted(journal)) { 525 if (is_journal_aborted(journal)) {
526 clear_buffer_jbddirty(jh2bh(jh));
524 JBUFFER_TRACE(jh, "journal is aborting: refile"); 527 JBUFFER_TRACE(jh, "journal is aborting: refile");
525 journal_refile_buffer(journal, jh); 528 journal_refile_buffer(journal, jh);
526 /* If that was the last one, we need to clean up 529 /* If that was the last one, we need to clean up
@@ -762,6 +765,9 @@ wait_for_iobuf:
762 /* AKPM: bforget here */ 765 /* AKPM: bforget here */
763 } 766 }
764 767
768 if (err)
769 journal_abort(journal, err);
770
765 jbd_debug(3, "JBD: commit phase 6\n"); 771 jbd_debug(3, "JBD: commit phase 6\n");
766 772
767 if (journal_write_commit_record(journal, commit_transaction)) 773 if (journal_write_commit_record(journal, commit_transaction))
@@ -852,6 +858,8 @@ restart_loop:
852 if (buffer_jbddirty(bh)) { 858 if (buffer_jbddirty(bh)) {
853 JBUFFER_TRACE(jh, "add to new checkpointing trans"); 859 JBUFFER_TRACE(jh, "add to new checkpointing trans");
854 __journal_insert_checkpoint(jh, commit_transaction); 860 __journal_insert_checkpoint(jh, commit_transaction);
861 if (is_journal_aborted(journal))
862 clear_buffer_jbddirty(bh);
855 JBUFFER_TRACE(jh, "refile for checkpoint writeback"); 863 JBUFFER_TRACE(jh, "refile for checkpoint writeback");
856 __journal_refile_buffer(jh); 864 __journal_refile_buffer(jh);
857 jbd_unlock_bh_state(bh); 865 jbd_unlock_bh_state(bh);
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 0540ca27a446..d15cd6e7251e 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -954,9 +954,10 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
954 journal_t *journal = handle->h_transaction->t_journal; 954 journal_t *journal = handle->h_transaction->t_journal;
955 int need_brelse = 0; 955 int need_brelse = 0;
956 struct journal_head *jh; 956 struct journal_head *jh;
957 int ret = 0;
957 958
958 if (is_handle_aborted(handle)) 959 if (is_handle_aborted(handle))
959 return 0; 960 return ret;
960 961
961 jh = journal_add_journal_head(bh); 962 jh = journal_add_journal_head(bh);
962 JBUFFER_TRACE(jh, "entry"); 963 JBUFFER_TRACE(jh, "entry");
@@ -1067,7 +1068,16 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1067 time if it is redirtied */ 1068 time if it is redirtied */
1068 } 1069 }
1069 1070
1070 /* journal_clean_data_list() may have got there first */ 1071 /*
1072 * We cannot remove the buffer with io error from the
1073 * committing transaction, because otherwise it would
1074 * miss the error and the commit would not abort.
1075 */
1076 if (unlikely(!buffer_uptodate(bh))) {
1077 ret = -EIO;
1078 goto no_journal;
1079 }
1080
1071 if (jh->b_transaction != NULL) { 1081 if (jh->b_transaction != NULL) {
1072 JBUFFER_TRACE(jh, "unfile from commit"); 1082 JBUFFER_TRACE(jh, "unfile from commit");
1073 __journal_temp_unlink_buffer(jh); 1083 __journal_temp_unlink_buffer(jh);
@@ -1108,7 +1118,7 @@ no_journal:
1108 } 1118 }
1109 JBUFFER_TRACE(jh, "exit"); 1119 JBUFFER_TRACE(jh, "exit");
1110 journal_put_journal_head(jh); 1120 journal_put_journal_head(jh);
1111 return 0; 1121 return ret;
1112} 1122}
1113 1123
1114/** 1124/**
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig
new file mode 100644
index 000000000000..f32f346f4b0a
--- /dev/null
+++ b/fs/jbd2/Kconfig
@@ -0,0 +1,33 @@
1config JBD2
2 tristate
3 select CRC32
4 help
5 This is a generic journaling layer for block devices that support
6 both 32-bit and 64-bit block numbers. It is currently used by
7 the ext4 and OCFS2 filesystems, but it could also be used to add
8 journal support to other file systems or block devices such
9 as RAID or LVM.
10
11 If you are using ext4 or OCFS2, you need to say Y here.
12 If you are not using ext4 or OCFS2 then you will
13 probably want to say N.
14
15 To compile this device as a module, choose M here. The module will be
16 called jbd2. If you are compiling ext4 or OCFS2 into the kernel,
17 you cannot compile this code as a module.
18
19config JBD2_DEBUG
20 bool "JBD2 (ext4) debugging support"
21 depends on JBD2 && DEBUG_FS
22 help
23 If you are using the ext4 journaled file system (or
24 potentially any other filesystem/device using JBD2), this option
25 allows you to enable debugging output while the system is running,
26 in order to help track down any problems you are having.
27 By default, the debugging output will be turned off.
28
29 If you select Y here, then you will be able to turn on debugging
30 with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a
31 number between 1 and 5. The higher the number, the more debugging
32 output is generated. To turn debugging off again, do
33 "echo 0 > /sys/kernel/debug/jbd2/jbd2-debug".
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 0abe02c4242a..8b119e16aa36 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -995,6 +995,9 @@ restart_loop:
995 } 995 }
996 spin_unlock(&journal->j_list_lock); 996 spin_unlock(&journal->j_list_lock);
997 997
998 if (journal->j_commit_callback)
999 journal->j_commit_callback(journal, commit_transaction);
1000
998 trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", 1001 trace_mark(jbd2_end_commit, "dev %s transaction %d head %d",
999 journal->j_devname, commit_transaction->t_tid, 1002 journal->j_devname, commit_transaction->t_tid,
1000 journal->j_tail_sequence); 1003 journal->j_tail_sequence);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e5d540588fa9..39b7805a599a 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -52,6 +52,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
52 transaction->t_expires = jiffies + journal->j_commit_interval; 52 transaction->t_expires = jiffies + journal->j_commit_interval;
53 spin_lock_init(&transaction->t_handle_lock); 53 spin_lock_init(&transaction->t_handle_lock);
54 INIT_LIST_HEAD(&transaction->t_inode_list); 54 INIT_LIST_HEAD(&transaction->t_inode_list);
55 INIT_LIST_HEAD(&transaction->t_private_list);
55 56
56 /* Set up the commit timer for the new transaction. */ 57 /* Set up the commit timer for the new transaction. */
57 journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); 58 journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
diff --git a/fs/jffs2/Kconfig b/fs/jffs2/Kconfig
new file mode 100644
index 000000000000..6ae169cd8faa
--- /dev/null
+++ b/fs/jffs2/Kconfig
@@ -0,0 +1,188 @@
1config JFFS2_FS
2 tristate "Journalling Flash File System v2 (JFFS2) support"
3 select CRC32
4 depends on MTD
5 help
6 JFFS2 is the second generation of the Journalling Flash File System
7 for use on diskless embedded devices. It provides improved wear
8 levelling, compression and support for hard links. You cannot use
9 this on normal block devices, only on 'MTD' devices.
10
11 Further information on the design and implementation of JFFS2 is
12 available at <http://sources.redhat.com/jffs2/>.
13
14config JFFS2_FS_DEBUG
15 int "JFFS2 debugging verbosity (0 = quiet, 2 = noisy)"
16 depends on JFFS2_FS
17 default "0"
18 help
19 This controls the amount of debugging messages produced by the JFFS2
20 code. Set it to zero for use in production systems. For evaluation,
21 testing and debugging, it's advisable to set it to one. This will
22 enable a few assertions and will print debugging messages at the
23 KERN_DEBUG loglevel, where they won't normally be visible. Level 2
24 is unlikely to be useful - it enables extra debugging in certain
25 areas which at one point needed debugging, but when the bugs were
26 located and fixed, the detailed messages were relegated to level 2.
27
28 If reporting bugs, please try to have available a full dump of the
29 messages at debug level 1 while the misbehaviour was occurring.
30
31config JFFS2_FS_WRITEBUFFER
32 bool "JFFS2 write-buffering support"
33 depends on JFFS2_FS
34 default y
35 help
36 This enables the write-buffering support in JFFS2.
37
38 This functionality is required to support JFFS2 on the following
39 types of flash devices:
40 - NAND flash
41 - NOR flash with transparent ECC
42 - DataFlash
43
44config JFFS2_FS_WBUF_VERIFY
45 bool "Verify JFFS2 write-buffer reads"
46 depends on JFFS2_FS_WRITEBUFFER
47 default n
48 help
49 This causes JFFS2 to read back every page written through the
50 write-buffer, and check for errors.
51
52config JFFS2_SUMMARY
53 bool "JFFS2 summary support (EXPERIMENTAL)"
54 depends on JFFS2_FS && EXPERIMENTAL
55 default n
56 help
57 This feature makes it possible to use summary information
58 for faster filesystem mount.
59
60 The summary information can be inserted into a filesystem image
61 by the utility 'sumtool'.
62
63 If unsure, say 'N'.
64
65config JFFS2_FS_XATTR
66 bool "JFFS2 XATTR support (EXPERIMENTAL)"
67 depends on JFFS2_FS && EXPERIMENTAL
68 default n
69 help
70 Extended attributes are name:value pairs associated with inodes by
71 the kernel or by users (see the attr(5) manual page, or visit
72 <http://acl.bestbits.at/> for details).
73
74 If unsure, say N.
75
76config JFFS2_FS_POSIX_ACL
77 bool "JFFS2 POSIX Access Control Lists"
78 depends on JFFS2_FS_XATTR
79 default y
80 select FS_POSIX_ACL
81 help
82 Posix Access Control Lists (ACLs) support permissions for users and
83 groups beyond the owner/group/world scheme.
84
85 To learn more about Access Control Lists, visit the Posix ACLs for
86 Linux website <http://acl.bestbits.at/>.
87
88 If you don't know what Access Control Lists are, say N
89
90config JFFS2_FS_SECURITY
91 bool "JFFS2 Security Labels"
92 depends on JFFS2_FS_XATTR
93 default y
94 help
95 Security labels support alternative access control models
96 implemented by security modules like SELinux. This option
97 enables an extended attribute handler for file security
98 labels in the jffs2 filesystem.
99
100 If you are not using a security module that requires using
101 extended attributes for file security labels, say N.
102
103config JFFS2_COMPRESSION_OPTIONS
104 bool "Advanced compression options for JFFS2"
105 depends on JFFS2_FS
106 default n
107 help
108 Enabling this option allows you to explicitly choose which
109 compression modules, if any, are enabled in JFFS2. Removing
110 compressors can mean you cannot read existing file systems,
111 and enabling experimental compressors can mean that you
112 write a file system which cannot be read by a standard kernel.
113
114 If unsure, you should _definitely_ say 'N'.
115
116config JFFS2_ZLIB
117 bool "JFFS2 ZLIB compression support" if JFFS2_COMPRESSION_OPTIONS
118 select ZLIB_INFLATE
119 select ZLIB_DEFLATE
120 depends on JFFS2_FS
121 default y
122 help
123 Zlib is designed to be a free, general-purpose, legally unencumbered,
124 lossless data-compression library for use on virtually any computer
125 hardware and operating system. See <http://www.gzip.org/zlib/> for
126 further information.
127
128 Say 'Y' if unsure.
129
130config JFFS2_LZO
131 bool "JFFS2 LZO compression support" if JFFS2_COMPRESSION_OPTIONS
132 select LZO_COMPRESS
133 select LZO_DECOMPRESS
134 depends on JFFS2_FS
135 default n
136 help
137 minilzo-based compression. Generally works better than Zlib.
138
139 This feature was added in July, 2007. Say 'N' if you need
140 compatibility with older bootloaders or kernels.
141
142config JFFS2_RTIME
143 bool "JFFS2 RTIME compression support" if JFFS2_COMPRESSION_OPTIONS
144 depends on JFFS2_FS
145 default y
146 help
147 Rtime does manage to recompress already-compressed data. Say 'Y' if unsure.
148
149config JFFS2_RUBIN
150 bool "JFFS2 RUBIN compression support" if JFFS2_COMPRESSION_OPTIONS
151 depends on JFFS2_FS
152 default n
153 help
154 RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure.
155
156choice
157 prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS
158 default JFFS2_CMODE_PRIORITY
159 depends on JFFS2_FS
160 help
161 You can set here the default compression mode of JFFS2 from
162 the available compression modes. Don't touch if unsure.
163
164config JFFS2_CMODE_NONE
165 bool "no compression"
166 help
167 Uses no compression.
168
169config JFFS2_CMODE_PRIORITY
170 bool "priority"
171 help
172 Tries the compressors in a predefined order and chooses the first
173 successful one.
174
175config JFFS2_CMODE_SIZE
176 bool "size (EXPERIMENTAL)"
177 help
178 Tries all compressors and chooses the one which has the smallest
179 result.
180
181config JFFS2_CMODE_FAVOURLZO
182 bool "Favour LZO"
183 help
184 Tries all compressors and chooses the one which has the smallest
185 result but gives some preference to LZO (which has faster
186 decompression) at the expense of size.
187
188endchoice
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index 86739ee53b37..f25e70c1b51c 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -53,8 +53,8 @@ static int jffs2_is_best_compression(struct jffs2_compressor *this,
53} 53}
54 54
55/* jffs2_compress: 55/* jffs2_compress:
56 * @data: Pointer to uncompressed data 56 * @data_in: Pointer to uncompressed data
57 * @cdata: Pointer to returned pointer to buffer for compressed data 57 * @cpage_out: Pointer to returned pointer to buffer for compressed data
58 * @datalen: On entry, holds the amount of data available for compression. 58 * @datalen: On entry, holds the amount of data available for compression.
59 * On exit, expected to hold the amount of data actually compressed. 59 * On exit, expected to hold the amount of data actually compressed.
60 * @cdatalen: On entry, holds the amount of space available for compressed 60 * @cdatalen: On entry, holds the amount of space available for compressed
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index cd219ef55254..b1aaae823a52 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -311,7 +311,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
311 /* FIXME: If you care. We'd need to use frags for the target 311 /* FIXME: If you care. We'd need to use frags for the target
312 if it grows much more than this */ 312 if it grows much more than this */
313 if (targetlen > 254) 313 if (targetlen > 254)
314 return -EINVAL; 314 return -ENAMETOOLONG;
315 315
316 ri = jffs2_alloc_raw_inode(); 316 ri = jffs2_alloc_raw_inode();
317 317
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index dddb2a6c9e2c..259461b910af 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -68,7 +68,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
68 instr->len = c->sector_size; 68 instr->len = c->sector_size;
69 instr->callback = jffs2_erase_callback; 69 instr->callback = jffs2_erase_callback;
70 instr->priv = (unsigned long)(&instr[1]); 70 instr->priv = (unsigned long)(&instr[1]);
71 instr->fail_addr = 0xffffffff; 71 instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
72 72
73 ((struct erase_priv_struct *)instr->priv)->jeb = jeb; 73 ((struct erase_priv_struct *)instr->priv)->jeb = jeb;
74 ((struct erase_priv_struct *)instr->priv)->c = c; 74 ((struct erase_priv_struct *)instr->priv)->c = c;
@@ -175,7 +175,7 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock
175{ 175{
176 /* For NAND, if the failure did not occur at the device level for a 176 /* For NAND, if the failure did not occur at the device level for a
177 specific physical page, don't bother updating the bad block table. */ 177 specific physical page, don't bother updating the bad block table. */
178 if (jffs2_cleanmarker_oob(c) && (bad_offset != 0xffffffff)) { 178 if (jffs2_cleanmarker_oob(c) && (bad_offset != MTD_FAIL_ADDR_UNKNOWN)) {
179 /* We had a device-level failure to erase. Let's see if we've 179 /* We had a device-level failure to erase. Let's see if we've
180 failed too many times. */ 180 failed too many times. */
181 if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) { 181 if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) {
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 086c43830221..249305d65d5b 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -207,6 +207,8 @@ int jffs2_statfs(struct dentry *dentry, struct kstatfs *buf)
207 buf->f_files = 0; 207 buf->f_files = 0;
208 buf->f_ffree = 0; 208 buf->f_ffree = 0;
209 buf->f_namelen = JFFS2_MAX_NAME_LEN; 209 buf->f_namelen = JFFS2_MAX_NAME_LEN;
210 buf->f_fsid.val[0] = JFFS2_SUPER_MAGIC;
211 buf->f_fsid.val[1] = c->mtd->index;
210 212
211 spin_lock(&c->erase_completion_lock); 213 spin_lock(&c->erase_completion_lock);
212 avail = c->dirty_size + c->free_size; 214 avail = c->dirty_size + c->free_size;
@@ -440,14 +442,14 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
440 442
441 memset(ri, 0, sizeof(*ri)); 443 memset(ri, 0, sizeof(*ri));
442 /* Set OS-specific defaults for new inodes */ 444 /* Set OS-specific defaults for new inodes */
443 ri->uid = cpu_to_je16(current->fsuid); 445 ri->uid = cpu_to_je16(current_fsuid());
444 446
445 if (dir_i->i_mode & S_ISGID) { 447 if (dir_i->i_mode & S_ISGID) {
446 ri->gid = cpu_to_je16(dir_i->i_gid); 448 ri->gid = cpu_to_je16(dir_i->i_gid);
447 if (S_ISDIR(mode)) 449 if (S_ISDIR(mode))
448 mode |= S_ISGID; 450 mode |= S_ISGID;
449 } else { 451 } else {
450 ri->gid = cpu_to_je16(current->fsgid); 452 ri->gid = cpu_to_je16(current_fsgid());
451 } 453 }
452 454
453 /* POSIX ACLs have to be processed now, at least partly. 455 /* POSIX ACLs have to be processed now, at least partly.
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index a9bf9603c1ba..0875b60b4bf7 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -261,6 +261,10 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c)
261 261
262 jffs2_sum_reset_collected(c->summary); /* reset collected summary */ 262 jffs2_sum_reset_collected(c->summary); /* reset collected summary */
263 263
264 /* adjust write buffer offset, else we get a non contiguous write bug */
265 if (!(c->wbuf_ofs % c->sector_size) && !c->wbuf_len)
266 c->wbuf_ofs = 0xffffffff;
267
264 D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset)); 268 D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset));
265 269
266 return 0; 270 return 0;
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 0e78b00035e4..d9a721e6db70 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -679,10 +679,7 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
679 679
680 memset(c->wbuf,0xff,c->wbuf_pagesize); 680 memset(c->wbuf,0xff,c->wbuf_pagesize);
681 /* adjust write buffer offset, else we get a non contiguous write bug */ 681 /* adjust write buffer offset, else we get a non contiguous write bug */
682 if (SECTOR_ADDR(c->wbuf_ofs) == SECTOR_ADDR(c->wbuf_ofs+c->wbuf_pagesize)) 682 c->wbuf_ofs += c->wbuf_pagesize;
683 c->wbuf_ofs += c->wbuf_pagesize;
684 else
685 c->wbuf_ofs = 0xffffffff;
686 c->wbuf_len = 0; 683 c->wbuf_len = 0;
687 return 0; 684 return 0;
688} 685}
diff --git a/fs/mpage.c b/fs/mpage.c
index dbcc7af76a15..552b80b3facc 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -6,7 +6,7 @@
6 * Contains functions related to preparing and submitting BIOs which contain 6 * Contains functions related to preparing and submitting BIOs which contain
7 * multiple pagecache pages. 7 * multiple pagecache pages.
8 * 8 *
9 * 15May2002 akpm@zip.com.au 9 * 15May2002 Andrew Morton
10 * Initial version 10 * Initial version
11 * 27Jun2002 axboe@suse.de 11 * 27Jun2002 axboe@suse.de
12 * use bio_add_page() to build bio's just the right size 12 * use bio_add_page() to build bio's just the right size
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 6a09760c5960..c2e9cfd9e5a4 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -40,6 +40,16 @@ unsigned short nfs_callback_tcpport;
40static const int nfs_set_port_min = 0; 40static const int nfs_set_port_min = 0;
41static const int nfs_set_port_max = 65535; 41static const int nfs_set_port_max = 65535;
42 42
43/*
44 * If the kernel has IPv6 support available, always listen for
45 * both AF_INET and AF_INET6 requests.
46 */
47#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
48static const sa_family_t nfs_callback_family = AF_INET6;
49#else
50static const sa_family_t nfs_callback_family = AF_INET;
51#endif
52
43static int param_set_port(const char *val, struct kernel_param *kp) 53static int param_set_port(const char *val, struct kernel_param *kp)
44{ 54{
45 char *endp; 55 char *endp;
@@ -106,7 +116,7 @@ int nfs_callback_up(void)
106 if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) 116 if (nfs_callback_info.users++ || nfs_callback_info.task != NULL)
107 goto out; 117 goto out;
108 serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, 118 serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE,
109 AF_INET, NULL); 119 nfs_callback_family, NULL);
110 ret = -ENOMEM; 120 ret = -ENOMEM;
111 if (!serv) 121 if (!serv)
112 goto out_err; 122 goto out_err;
@@ -116,7 +126,8 @@ int nfs_callback_up(void)
116 if (ret <= 0) 126 if (ret <= 0)
117 goto out_err; 127 goto out_err;
118 nfs_callback_tcpport = ret; 128 nfs_callback_tcpport = ret;
119 dprintk("Callback port = 0x%x\n", nfs_callback_tcpport); 129 dprintk("NFS: Callback listener port = %u (af %u)\n",
130 nfs_callback_tcpport, nfs_callback_family);
120 131
121 nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]); 132 nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]);
122 if (IS_ERR(nfs_callback_info.rqst)) { 133 if (IS_ERR(nfs_callback_info.rqst)) {
@@ -149,8 +160,8 @@ out:
149 mutex_unlock(&nfs_callback_mutex); 160 mutex_unlock(&nfs_callback_mutex);
150 return ret; 161 return ret;
151out_err: 162out_err:
152 dprintk("Couldn't create callback socket or server thread; err = %d\n", 163 dprintk("NFS: Couldn't create callback socket or server thread; "
153 ret); 164 "err = %d\n", ret);
154 nfs_callback_info.users--; 165 nfs_callback_info.users--;
155 goto out; 166 goto out;
156} 167}
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 5ee23e7058b3..7547600b6174 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -675,7 +675,7 @@ static int nfs_init_server(struct nfs_server *server,
675 server->nfs_client = clp; 675 server->nfs_client = clp;
676 676
677 /* Initialise the client representation from the mount data */ 677 /* Initialise the client representation from the mount data */
678 server->flags = data->flags & NFS_MOUNT_FLAGMASK; 678 server->flags = data->flags;
679 679
680 if (data->rsize) 680 if (data->rsize)
681 server->rsize = nfs_block_size(data->rsize, NULL); 681 server->rsize = nfs_block_size(data->rsize, NULL);
@@ -850,7 +850,6 @@ static struct nfs_server *nfs_alloc_server(void)
850 INIT_LIST_HEAD(&server->client_link); 850 INIT_LIST_HEAD(&server->client_link);
851 INIT_LIST_HEAD(&server->master_link); 851 INIT_LIST_HEAD(&server->master_link);
852 852
853 init_waitqueue_head(&server->active_wq);
854 atomic_set(&server->active, 0); 853 atomic_set(&server->active, 0);
855 854
856 server->io_stats = nfs_alloc_iostats(); 855 server->io_stats = nfs_alloc_iostats();
@@ -1073,7 +1072,7 @@ static int nfs4_init_server(struct nfs_server *server,
1073 goto error; 1072 goto error;
1074 1073
1075 /* Initialise the client representation from the mount data */ 1074 /* Initialise the client representation from the mount data */
1076 server->flags = data->flags & NFS_MOUNT_FLAGMASK; 1075 server->flags = data->flags;
1077 server->caps |= NFS_CAP_ATOMIC_OPEN; 1076 server->caps |= NFS_CAP_ATOMIC_OPEN;
1078 1077
1079 if (data->rsize) 1078 if (data->rsize)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 74f92b717f78..efdba2e802d7 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -156,6 +156,7 @@ typedef struct {
156 decode_dirent_t decode; 156 decode_dirent_t decode;
157 int plus; 157 int plus;
158 unsigned long timestamp; 158 unsigned long timestamp;
159 unsigned long gencount;
159 int timestamp_valid; 160 int timestamp_valid;
160} nfs_readdir_descriptor_t; 161} nfs_readdir_descriptor_t;
161 162
@@ -177,7 +178,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
177 struct file *file = desc->file; 178 struct file *file = desc->file;
178 struct inode *inode = file->f_path.dentry->d_inode; 179 struct inode *inode = file->f_path.dentry->d_inode;
179 struct rpc_cred *cred = nfs_file_cred(file); 180 struct rpc_cred *cred = nfs_file_cred(file);
180 unsigned long timestamp; 181 unsigned long timestamp, gencount;
181 int error; 182 int error;
182 183
183 dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n", 184 dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
@@ -186,6 +187,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
186 187
187 again: 188 again:
188 timestamp = jiffies; 189 timestamp = jiffies;
190 gencount = nfs_inc_attr_generation_counter();
189 error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page, 191 error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page,
190 NFS_SERVER(inode)->dtsize, desc->plus); 192 NFS_SERVER(inode)->dtsize, desc->plus);
191 if (error < 0) { 193 if (error < 0) {
@@ -199,6 +201,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
199 goto error; 201 goto error;
200 } 202 }
201 desc->timestamp = timestamp; 203 desc->timestamp = timestamp;
204 desc->gencount = gencount;
202 desc->timestamp_valid = 1; 205 desc->timestamp_valid = 1;
203 SetPageUptodate(page); 206 SetPageUptodate(page);
204 /* Ensure consistent page alignment of the data. 207 /* Ensure consistent page alignment of the data.
@@ -224,9 +227,10 @@ int dir_decode(nfs_readdir_descriptor_t *desc)
224 if (IS_ERR(p)) 227 if (IS_ERR(p))
225 return PTR_ERR(p); 228 return PTR_ERR(p);
226 desc->ptr = p; 229 desc->ptr = p;
227 if (desc->timestamp_valid) 230 if (desc->timestamp_valid) {
228 desc->entry->fattr->time_start = desc->timestamp; 231 desc->entry->fattr->time_start = desc->timestamp;
229 else 232 desc->entry->fattr->gencount = desc->gencount;
233 } else
230 desc->entry->fattr->valid &= ~NFS_ATTR_FATTR; 234 desc->entry->fattr->valid &= ~NFS_ATTR_FATTR;
231 return 0; 235 return 0;
232} 236}
@@ -471,7 +475,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
471 struct rpc_cred *cred = nfs_file_cred(file); 475 struct rpc_cred *cred = nfs_file_cred(file);
472 struct page *page = NULL; 476 struct page *page = NULL;
473 int status; 477 int status;
474 unsigned long timestamp; 478 unsigned long timestamp, gencount;
475 479
476 dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", 480 dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
477 (unsigned long long)*desc->dir_cookie); 481 (unsigned long long)*desc->dir_cookie);
@@ -482,6 +486,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
482 goto out; 486 goto out;
483 } 487 }
484 timestamp = jiffies; 488 timestamp = jiffies;
489 gencount = nfs_inc_attr_generation_counter();
485 status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, 490 status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred,
486 *desc->dir_cookie, page, 491 *desc->dir_cookie, page,
487 NFS_SERVER(inode)->dtsize, 492 NFS_SERVER(inode)->dtsize,
@@ -490,6 +495,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
490 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ 495 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */
491 if (status >= 0) { 496 if (status >= 0) {
492 desc->timestamp = timestamp; 497 desc->timestamp = timestamp;
498 desc->gencount = gencount;
493 desc->timestamp_valid = 1; 499 desc->timestamp_valid = 1;
494 if ((status = dir_decode(desc)) == 0) 500 if ((status = dir_decode(desc)) == 0)
495 desc->entry->prev_cookie = *desc->dir_cookie; 501 desc->entry->prev_cookie = *desc->dir_cookie;
@@ -655,7 +661,7 @@ static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
655 */ 661 */
656void nfs_force_lookup_revalidate(struct inode *dir) 662void nfs_force_lookup_revalidate(struct inode *dir)
657{ 663{
658 NFS_I(dir)->cache_change_attribute = jiffies; 664 NFS_I(dir)->cache_change_attribute++;
659} 665}
660 666
661/* 667/*
@@ -667,6 +673,8 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
667{ 673{
668 if (IS_ROOT(dentry)) 674 if (IS_ROOT(dentry))
669 return 1; 675 return 1;
676 if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
677 return 0;
670 if (!nfs_verify_change_attribute(dir, dentry->d_time)) 678 if (!nfs_verify_change_attribute(dir, dentry->d_time))
671 return 0; 679 return 0;
672 /* Revalidate nfsi->cache_change_attribute before we declare a match */ 680 /* Revalidate nfsi->cache_change_attribute before we declare a match */
@@ -750,6 +758,8 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
750 /* Don't revalidate a negative dentry if we're creating a new file */ 758 /* Don't revalidate a negative dentry if we're creating a new file */
751 if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0) 759 if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0)
752 return 0; 760 return 0;
761 if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
762 return 1;
753 return !nfs_check_verifier(dir, dentry); 763 return !nfs_check_verifier(dir, dentry);
754} 764}
755 765
@@ -1507,7 +1517,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
1507 if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0, 1517 if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0,
1508 GFP_KERNEL)) { 1518 GFP_KERNEL)) {
1509 pagevec_add(&lru_pvec, page); 1519 pagevec_add(&lru_pvec, page);
1510 pagevec_lru_add(&lru_pvec); 1520 pagevec_lru_add_file(&lru_pvec);
1511 SetPageUptodate(page); 1521 SetPageUptodate(page);
1512 unlock_page(page); 1522 unlock_page(page);
1513 } else 1523 } else
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 78460657f5cb..d319b49f8f06 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -188,13 +188,16 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
188 /* origin == SEEK_END => we must revalidate the cached file length */ 188 /* origin == SEEK_END => we must revalidate the cached file length */
189 if (origin == SEEK_END) { 189 if (origin == SEEK_END) {
190 struct inode *inode = filp->f_mapping->host; 190 struct inode *inode = filp->f_mapping->host;
191
191 int retval = nfs_revalidate_file_size(inode, filp); 192 int retval = nfs_revalidate_file_size(inode, filp);
192 if (retval < 0) 193 if (retval < 0)
193 return (loff_t)retval; 194 return (loff_t)retval;
194 } 195
195 lock_kernel(); /* BKL needed? */ 196 spin_lock(&inode->i_lock);
196 loff = generic_file_llseek_unlocked(filp, offset, origin); 197 loff = generic_file_llseek_unlocked(filp, offset, origin);
197 unlock_kernel(); 198 spin_unlock(&inode->i_lock);
199 } else
200 loff = generic_file_llseek_unlocked(filp, offset, origin);
198 return loff; 201 return loff;
199} 202}
200 203
@@ -699,13 +702,6 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
699 filp->f_path.dentry->d_name.name, 702 filp->f_path.dentry->d_name.name,
700 fl->fl_type, fl->fl_flags); 703 fl->fl_type, fl->fl_flags);
701 704
702 /*
703 * No BSD flocks over NFS allowed.
704 * Note: we could try to fake a POSIX lock request here by
705 * using ((u32) filp | 0x80000000) or some such as the pid.
706 * Not sure whether that would be unique, though, or whether
707 * that would break in other places.
708 */
709 if (!(fl->fl_flags & FL_FLOCK)) 705 if (!(fl->fl_flags & FL_FLOCK))
710 return -ENOLCK; 706 return -ENOLCK;
711 707
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 52daefa2f521..b9195c02a863 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -305,8 +305,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
305 init_special_inode(inode, inode->i_mode, fattr->rdev); 305 init_special_inode(inode, inode->i_mode, fattr->rdev);
306 306
307 nfsi->read_cache_jiffies = fattr->time_start; 307 nfsi->read_cache_jiffies = fattr->time_start;
308 nfsi->last_updated = now; 308 nfsi->attr_gencount = fattr->gencount;
309 nfsi->cache_change_attribute = now;
310 inode->i_atime = fattr->atime; 309 inode->i_atime = fattr->atime;
311 inode->i_mtime = fattr->mtime; 310 inode->i_mtime = fattr->mtime;
312 inode->i_ctime = fattr->ctime; 311 inode->i_ctime = fattr->ctime;
@@ -453,6 +452,7 @@ out_big:
453void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) 452void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
454{ 453{
455 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { 454 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
455 spin_lock(&inode->i_lock);
456 if ((attr->ia_valid & ATTR_MODE) != 0) { 456 if ((attr->ia_valid & ATTR_MODE) != 0) {
457 int mode = attr->ia_mode & S_IALLUGO; 457 int mode = attr->ia_mode & S_IALLUGO;
458 mode |= inode->i_mode & ~S_IALLUGO; 458 mode |= inode->i_mode & ~S_IALLUGO;
@@ -462,7 +462,6 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
462 inode->i_uid = attr->ia_uid; 462 inode->i_uid = attr->ia_uid;
463 if ((attr->ia_valid & ATTR_GID) != 0) 463 if ((attr->ia_valid & ATTR_GID) != 0)
464 inode->i_gid = attr->ia_gid; 464 inode->i_gid = attr->ia_gid;
465 spin_lock(&inode->i_lock);
466 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 465 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
467 spin_unlock(&inode->i_lock); 466 spin_unlock(&inode->i_lock);
468 } 467 }
@@ -472,37 +471,6 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
472 } 471 }
473} 472}
474 473
475static int nfs_wait_schedule(void *word)
476{
477 if (signal_pending(current))
478 return -ERESTARTSYS;
479 schedule();
480 return 0;
481}
482
483/*
484 * Wait for the inode to get unlocked.
485 */
486static int nfs_wait_on_inode(struct inode *inode)
487{
488 struct nfs_inode *nfsi = NFS_I(inode);
489 int error;
490
491 error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING,
492 nfs_wait_schedule, TASK_KILLABLE);
493
494 return error;
495}
496
497static void nfs_wake_up_inode(struct inode *inode)
498{
499 struct nfs_inode *nfsi = NFS_I(inode);
500
501 clear_bit(NFS_INO_REVALIDATING, &nfsi->flags);
502 smp_mb__after_clear_bit();
503 wake_up_bit(&nfsi->flags, NFS_INO_REVALIDATING);
504}
505
506int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 474int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
507{ 475{
508 struct inode *inode = dentry->d_inode; 476 struct inode *inode = dentry->d_inode;
@@ -697,20 +665,15 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
697 dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", 665 dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n",
698 inode->i_sb->s_id, (long long)NFS_FILEID(inode)); 666 inode->i_sb->s_id, (long long)NFS_FILEID(inode));
699 667
700 nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
701 if (is_bad_inode(inode)) 668 if (is_bad_inode(inode))
702 goto out_nowait; 669 goto out;
703 if (NFS_STALE(inode)) 670 if (NFS_STALE(inode))
704 goto out_nowait;
705
706 status = nfs_wait_on_inode(inode);
707 if (status < 0)
708 goto out; 671 goto out;
709 672
710 status = -ESTALE;
711 if (NFS_STALE(inode)) 673 if (NFS_STALE(inode))
712 goto out; 674 goto out;
713 675
676 nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
714 status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); 677 status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
715 if (status != 0) { 678 if (status != 0) {
716 dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", 679 dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
@@ -724,16 +687,13 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
724 goto out; 687 goto out;
725 } 688 }
726 689
727 spin_lock(&inode->i_lock); 690 status = nfs_refresh_inode(inode, &fattr);
728 status = nfs_update_inode(inode, &fattr);
729 if (status) { 691 if (status) {
730 spin_unlock(&inode->i_lock);
731 dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", 692 dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n",
732 inode->i_sb->s_id, 693 inode->i_sb->s_id,
733 (long long)NFS_FILEID(inode), status); 694 (long long)NFS_FILEID(inode), status);
734 goto out; 695 goto out;
735 } 696 }
736 spin_unlock(&inode->i_lock);
737 697
738 if (nfsi->cache_validity & NFS_INO_INVALID_ACL) 698 if (nfsi->cache_validity & NFS_INO_INVALID_ACL)
739 nfs_zap_acl_cache(inode); 699 nfs_zap_acl_cache(inode);
@@ -743,9 +703,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
743 (long long)NFS_FILEID(inode)); 703 (long long)NFS_FILEID(inode));
744 704
745 out: 705 out:
746 nfs_wake_up_inode(inode);
747
748 out_nowait:
749 return status; 706 return status;
750} 707}
751 708
@@ -908,9 +865,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
908 return -EIO; 865 return -EIO;
909 } 866 }
910 867
911 /* Do atomic weak cache consistency updates */
912 nfs_wcc_update_inode(inode, fattr);
913
914 if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && 868 if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
915 nfsi->change_attr != fattr->change_attr) 869 nfsi->change_attr != fattr->change_attr)
916 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; 870 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
@@ -939,15 +893,81 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
939 893
940 if (invalid != 0) 894 if (invalid != 0)
941 nfsi->cache_validity |= invalid; 895 nfsi->cache_validity |= invalid;
942 else
943 nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
944 | NFS_INO_INVALID_ATIME
945 | NFS_INO_REVAL_PAGECACHE);
946 896
947 nfsi->read_cache_jiffies = fattr->time_start; 897 nfsi->read_cache_jiffies = fattr->time_start;
948 return 0; 898 return 0;
949} 899}
950 900
901static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
902{
903 return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0;
904}
905
906static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
907{
908 return nfs_size_to_loff_t(fattr->size) > i_size_read(inode);
909}
910
911static unsigned long nfs_attr_generation_counter;
912
913static unsigned long nfs_read_attr_generation_counter(void)
914{
915 smp_rmb();
916 return nfs_attr_generation_counter;
917}
918
919unsigned long nfs_inc_attr_generation_counter(void)
920{
921 unsigned long ret;
922 smp_rmb();
923 ret = ++nfs_attr_generation_counter;
924 smp_wmb();
925 return ret;
926}
927
928void nfs_fattr_init(struct nfs_fattr *fattr)
929{
930 fattr->valid = 0;
931 fattr->time_start = jiffies;
932 fattr->gencount = nfs_inc_attr_generation_counter();
933}
934
935/**
936 * nfs_inode_attrs_need_update - check if the inode attributes need updating
937 * @inode - pointer to inode
938 * @fattr - attributes
939 *
940 * Attempt to divine whether or not an RPC call reply carrying stale
941 * attributes got scheduled after another call carrying updated ones.
942 *
943 * To do so, the function first assumes that a more recent ctime means
944 * that the attributes in fattr are newer, however it also attempt to
945 * catch the case where ctime either didn't change, or went backwards
946 * (if someone reset the clock on the server) by looking at whether
947 * or not this RPC call was started after the inode was last updated.
948 * Note also the check for wraparound of 'attr_gencount'
949 *
950 * The function returns 'true' if it thinks the attributes in 'fattr' are
951 * more recent than the ones cached in the inode.
952 *
953 */
954static int nfs_inode_attrs_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
955{
956 const struct nfs_inode *nfsi = NFS_I(inode);
957
958 return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 ||
959 nfs_ctime_need_update(inode, fattr) ||
960 nfs_size_need_update(inode, fattr) ||
961 ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);
962}
963
964static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
965{
966 if (nfs_inode_attrs_need_update(inode, fattr))
967 return nfs_update_inode(inode, fattr);
968 return nfs_check_inode_attributes(inode, fattr);
969}
970
951/** 971/**
952 * nfs_refresh_inode - try to update the inode attribute cache 972 * nfs_refresh_inode - try to update the inode attribute cache
953 * @inode - pointer to inode 973 * @inode - pointer to inode
@@ -960,21 +980,28 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
960 */ 980 */
961int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) 981int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
962{ 982{
963 struct nfs_inode *nfsi = NFS_I(inode);
964 int status; 983 int status;
965 984
966 if ((fattr->valid & NFS_ATTR_FATTR) == 0) 985 if ((fattr->valid & NFS_ATTR_FATTR) == 0)
967 return 0; 986 return 0;
968 spin_lock(&inode->i_lock); 987 spin_lock(&inode->i_lock);
969 if (time_after(fattr->time_start, nfsi->last_updated)) 988 status = nfs_refresh_inode_locked(inode, fattr);
970 status = nfs_update_inode(inode, fattr);
971 else
972 status = nfs_check_inode_attributes(inode, fattr);
973
974 spin_unlock(&inode->i_lock); 989 spin_unlock(&inode->i_lock);
975 return status; 990 return status;
976} 991}
977 992
993static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
994{
995 struct nfs_inode *nfsi = NFS_I(inode);
996
997 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
998 if (S_ISDIR(inode->i_mode))
999 nfsi->cache_validity |= NFS_INO_INVALID_DATA;
1000 if ((fattr->valid & NFS_ATTR_FATTR) == 0)
1001 return 0;
1002 return nfs_refresh_inode_locked(inode, fattr);
1003}
1004
978/** 1005/**
979 * nfs_post_op_update_inode - try to update the inode attribute cache 1006 * nfs_post_op_update_inode - try to update the inode attribute cache
980 * @inode - pointer to inode 1007 * @inode - pointer to inode
@@ -991,14 +1018,12 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
991 */ 1018 */
992int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) 1019int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
993{ 1020{
994 struct nfs_inode *nfsi = NFS_I(inode); 1021 int status;
995 1022
996 spin_lock(&inode->i_lock); 1023 spin_lock(&inode->i_lock);
997 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; 1024 status = nfs_post_op_update_inode_locked(inode, fattr);
998 if (S_ISDIR(inode->i_mode))
999 nfsi->cache_validity |= NFS_INO_INVALID_DATA;
1000 spin_unlock(&inode->i_lock); 1025 spin_unlock(&inode->i_lock);
1001 return nfs_refresh_inode(inode, fattr); 1026 return status;
1002} 1027}
1003 1028
1004/** 1029/**
@@ -1014,6 +1039,15 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1014 */ 1039 */
1015int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) 1040int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr)
1016{ 1041{
1042 int status;
1043
1044 spin_lock(&inode->i_lock);
1045 /* Don't do a WCC update if these attributes are already stale */
1046 if ((fattr->valid & NFS_ATTR_FATTR) == 0 ||
1047 !nfs_inode_attrs_need_update(inode, fattr)) {
1048 fattr->valid &= ~(NFS_ATTR_WCC_V4|NFS_ATTR_WCC);
1049 goto out_noforce;
1050 }
1017 if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && 1051 if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
1018 (fattr->valid & NFS_ATTR_WCC_V4) == 0) { 1052 (fattr->valid & NFS_ATTR_WCC_V4) == 0) {
1019 fattr->pre_change_attr = NFS_I(inode)->change_attr; 1053 fattr->pre_change_attr = NFS_I(inode)->change_attr;
@@ -1026,7 +1060,10 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
1026 fattr->pre_size = i_size_read(inode); 1060 fattr->pre_size = i_size_read(inode);
1027 fattr->valid |= NFS_ATTR_WCC; 1061 fattr->valid |= NFS_ATTR_WCC;
1028 } 1062 }
1029 return nfs_post_op_update_inode(inode, fattr); 1063out_noforce:
1064 status = nfs_post_op_update_inode_locked(inode, fattr);
1065 spin_unlock(&inode->i_lock);
1066 return status;
1030} 1067}
1031 1068
1032/* 1069/*
@@ -1092,7 +1129,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1092 } 1129 }
1093 /* If ctime has changed we should definitely clear access+acl caches */ 1130 /* If ctime has changed we should definitely clear access+acl caches */
1094 if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) 1131 if (!timespec_equal(&inode->i_ctime, &fattr->ctime))
1095 invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1132 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1096 } else if (nfsi->change_attr != fattr->change_attr) { 1133 } else if (nfsi->change_attr != fattr->change_attr) {
1097 dprintk("NFS: change_attr change on server for file %s/%ld\n", 1134 dprintk("NFS: change_attr change on server for file %s/%ld\n",
1098 inode->i_sb->s_id, inode->i_ino); 1135 inode->i_sb->s_id, inode->i_ino);
@@ -1126,6 +1163,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1126 inode->i_gid != fattr->gid) 1163 inode->i_gid != fattr->gid)
1127 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1164 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1128 1165
1166 if (inode->i_nlink != fattr->nlink)
1167 invalid |= NFS_INO_INVALID_ATTR;
1168
1129 inode->i_mode = fattr->mode; 1169 inode->i_mode = fattr->mode;
1130 inode->i_nlink = fattr->nlink; 1170 inode->i_nlink = fattr->nlink;
1131 inode->i_uid = fattr->uid; 1171 inode->i_uid = fattr->uid;
@@ -1145,18 +1185,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1145 nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); 1185 nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
1146 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); 1186 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
1147 nfsi->attrtimeo_timestamp = now; 1187 nfsi->attrtimeo_timestamp = now;
1148 nfsi->last_updated = now; 1188 nfsi->attr_gencount = nfs_inc_attr_generation_counter();
1149 } else { 1189 } else {
1150 if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { 1190 if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
1151 if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) 1191 if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
1152 nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); 1192 nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
1153 nfsi->attrtimeo_timestamp = now; 1193 nfsi->attrtimeo_timestamp = now;
1154 } 1194 }
1155 /*
1156 * Avoid jiffy wraparound issues with nfsi->last_updated
1157 */
1158 if (!time_in_range(nfsi->last_updated, nfsi->read_cache_jiffies, now))
1159 nfsi->last_updated = nfsi->read_cache_jiffies;
1160 } 1195 }
1161 invalid &= ~NFS_INO_INVALID_ATTR; 1196 invalid &= ~NFS_INO_INVALID_ATTR;
1162 /* Don't invalidate the data if we were to blame */ 1197 /* Don't invalidate the data if we were to blame */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 24241fcbb98d..d212ee41caf2 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -153,6 +153,7 @@ extern void nfs4_clear_inode(struct inode *);
153void nfs_zap_acl_cache(struct inode *inode); 153void nfs_zap_acl_cache(struct inode *inode);
154 154
155/* super.c */ 155/* super.c */
156void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *);
156extern struct file_system_type nfs_xdev_fs_type; 157extern struct file_system_type nfs_xdev_fs_type;
157#ifdef CONFIG_NFS_V4 158#ifdef CONFIG_NFS_V4
158extern struct file_system_type nfs4_xdev_fs_type; 159extern struct file_system_type nfs4_xdev_fs_type;
@@ -163,8 +164,8 @@ extern struct rpc_stat nfs_rpcstat;
163 164
164extern int __init register_nfs_fs(void); 165extern int __init register_nfs_fs(void);
165extern void __exit unregister_nfs_fs(void); 166extern void __exit unregister_nfs_fs(void);
166extern void nfs_sb_active(struct nfs_server *server); 167extern void nfs_sb_active(struct super_block *sb);
167extern void nfs_sb_deactive(struct nfs_server *server); 168extern void nfs_sb_deactive(struct super_block *sb);
168 169
169/* namespace.c */ 170/* namespace.c */
170extern char *nfs_path(const char *base, 171extern char *nfs_path(const char *base,
@@ -276,3 +277,23 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
276 PAGE_SIZE - 1) >> PAGE_SHIFT; 277 PAGE_SIZE - 1) >> PAGE_SHIFT;
277} 278}
278 279
280#define IPV6_SCOPE_DELIMITER '%'
281
282/*
283 * Set the port number in an address. Be agnostic about the address
284 * family.
285 */
286static inline void nfs_set_port(struct sockaddr *sap, unsigned short port)
287{
288 struct sockaddr_in *ap = (struct sockaddr_in *)sap;
289 struct sockaddr_in6 *ap6 = (struct sockaddr_in6 *)sap;
290
291 switch (sap->sa_family) {
292 case AF_INET:
293 ap->sin_port = htons(port);
294 break;
295 case AF_INET6:
296 ap6->sin6_port = htons(port);
297 break;
298 }
299}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 779d2eb649c5..086a6830d785 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -14,6 +14,7 @@
14#include <linux/sunrpc/clnt.h> 14#include <linux/sunrpc/clnt.h>
15#include <linux/sunrpc/sched.h> 15#include <linux/sunrpc/sched.h>
16#include <linux/nfs_fs.h> 16#include <linux/nfs_fs.h>
17#include "internal.h"
17 18
18#ifdef RPC_DEBUG 19#ifdef RPC_DEBUG
19# define NFSDBG_FACILITY NFSDBG_MOUNT 20# define NFSDBG_FACILITY NFSDBG_MOUNT
@@ -98,7 +99,7 @@ out_call_err:
98 99
99out_mnt_err: 100out_mnt_err:
100 dprintk("NFS: MNT server returned result %d\n", result.status); 101 dprintk("NFS: MNT server returned result %d\n", result.status);
101 status = -EACCES; 102 status = nfs_stat_to_errno(result.status);
102 goto out; 103 goto out;
103} 104}
104 105
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 66df08dd1caf..64a288ee046d 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -105,7 +105,10 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
105 105
106 dprintk("--> nfs_follow_mountpoint()\n"); 106 dprintk("--> nfs_follow_mountpoint()\n");
107 107
108 BUG_ON(IS_ROOT(dentry)); 108 err = -ESTALE;
109 if (IS_ROOT(dentry))
110 goto out_err;
111
109 dprintk("%s: enter\n", __func__); 112 dprintk("%s: enter\n", __func__);
110 dput(nd->path.dentry); 113 dput(nd->path.dentry);
111 nd->path.dentry = dget(dentry); 114 nd->path.dentry = dget(dentry);
@@ -189,7 +192,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
189 struct nfs_clone_mount *mountdata) 192 struct nfs_clone_mount *mountdata)
190{ 193{
191#ifdef CONFIG_NFS_V4 194#ifdef CONFIG_NFS_V4
192 struct vfsmount *mnt = NULL; 195 struct vfsmount *mnt = ERR_PTR(-EINVAL);
193 switch (server->nfs_client->rpc_ops->version) { 196 switch (server->nfs_client->rpc_ops->version) {
194 case 2: 197 case 2:
195 case 3: 198 case 3:
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 423842f51ac9..cef62557c87d 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -229,6 +229,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
229 229
230 dprintk("NFS call getacl\n"); 230 dprintk("NFS call getacl\n");
231 msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL]; 231 msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL];
232 nfs_fattr_init(&fattr);
232 status = rpc_call_sync(server->client_acl, &msg, 0); 233 status = rpc_call_sync(server->client_acl, &msg, 0);
233 dprintk("NFS reply getacl: %d\n", status); 234 dprintk("NFS reply getacl: %d\n", status);
234 235
@@ -322,6 +323,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
322 323
323 dprintk("NFS call setacl\n"); 324 dprintk("NFS call setacl\n");
324 msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; 325 msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
326 nfs_fattr_init(&fattr);
325 status = rpc_call_sync(server->client_acl, &msg, 0); 327 status = rpc_call_sync(server->client_acl, &msg, 0);
326 nfs_access_zap_cache(inode); 328 nfs_access_zap_cache(inode);
327 nfs_zap_acl_cache(inode); 329 nfs_zap_acl_cache(inode);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 1e750e4574a9..c55be7a7679e 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -699,7 +699,7 @@ nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
699} 699}
700 700
701static int 701static int
702nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, 702do_proc_fsinfo(struct rpc_clnt *client, struct nfs_fh *fhandle,
703 struct nfs_fsinfo *info) 703 struct nfs_fsinfo *info)
704{ 704{
705 struct rpc_message msg = { 705 struct rpc_message msg = {
@@ -711,11 +711,27 @@ nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
711 711
712 dprintk("NFS call fsinfo\n"); 712 dprintk("NFS call fsinfo\n");
713 nfs_fattr_init(info->fattr); 713 nfs_fattr_init(info->fattr);
714 status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); 714 status = rpc_call_sync(client, &msg, 0);
715 dprintk("NFS reply fsinfo: %d\n", status); 715 dprintk("NFS reply fsinfo: %d\n", status);
716 return status; 716 return status;
717} 717}
718 718
719/*
720 * Bare-bones access to fsinfo: this is for nfs_get_root/nfs_get_sb via
721 * nfs_create_server
722 */
723static int
724nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
725 struct nfs_fsinfo *info)
726{
727 int status;
728
729 status = do_proc_fsinfo(server->client, fhandle, info);
730 if (status && server->nfs_client->cl_rpcclient != server->client)
731 status = do_proc_fsinfo(server->nfs_client->cl_rpcclient, fhandle, info);
732 return status;
733}
734
719static int 735static int
720nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, 736nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
721 struct nfs_pathconf *info) 737 struct nfs_pathconf *info)
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index b112857301f7..30befc39b3c6 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -93,21 +93,52 @@ static int nfs4_validate_fspath(const struct vfsmount *mnt_parent,
93 return 0; 93 return 0;
94} 94}
95 95
96/* 96static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
97 * Check if the string represents a "valid" IPv4 address 97 char *page, char *page2,
98 */ 98 const struct nfs4_fs_location *location)
99static inline int valid_ipaddr4(const char *buf)
100{ 99{
101 int rc, count, in[4]; 100 struct vfsmount *mnt = ERR_PTR(-ENOENT);
102 101 char *mnt_path;
103 rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); 102 int page2len;
104 if (rc != 4) 103 unsigned int s;
105 return -EINVAL; 104
106 for (count = 0; count < 4; count++) { 105 mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE);
107 if (in[count] > 255) 106 if (IS_ERR(mnt_path))
108 return -EINVAL; 107 return mnt;
108 mountdata->mnt_path = mnt_path;
109 page2 += strlen(mnt_path) + 1;
110 page2len = PAGE_SIZE - strlen(mnt_path) - 1;
111
112 for (s = 0; s < location->nservers; s++) {
113 const struct nfs4_string *buf = &location->servers[s];
114 struct sockaddr_storage addr;
115
116 if (buf->len <= 0 || buf->len >= PAGE_SIZE)
117 continue;
118
119 mountdata->addr = (struct sockaddr *)&addr;
120
121 if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len))
122 continue;
123 nfs_parse_ip_address(buf->data, buf->len,
124 mountdata->addr, &mountdata->addrlen);
125 if (mountdata->addr->sa_family == AF_UNSPEC)
126 continue;
127 nfs_set_port(mountdata->addr, NFS_PORT);
128
129 strncpy(page2, buf->data, page2len);
130 page2[page2len] = '\0';
131 mountdata->hostname = page2;
132
133 snprintf(page, PAGE_SIZE, "%s:%s",
134 mountdata->hostname,
135 mountdata->mnt_path);
136
137 mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, mountdata);
138 if (!IS_ERR(mnt))
139 break;
109 } 140 }
110 return 0; 141 return mnt;
111} 142}
112 143
113/** 144/**
@@ -128,7 +159,6 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
128 .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, 159 .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor,
129 }; 160 };
130 char *page = NULL, *page2 = NULL; 161 char *page = NULL, *page2 = NULL;
131 unsigned int s;
132 int loc, error; 162 int loc, error;
133 163
134 if (locations == NULL || locations->nlocations <= 0) 164 if (locations == NULL || locations->nlocations <= 0)
@@ -152,53 +182,16 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
152 goto out; 182 goto out;
153 } 183 }
154 184
155 loc = 0; 185 for (loc = 0; loc < locations->nlocations; loc++) {
156 while (loc < locations->nlocations && IS_ERR(mnt)) {
157 const struct nfs4_fs_location *location = &locations->locations[loc]; 186 const struct nfs4_fs_location *location = &locations->locations[loc];
158 char *mnt_path;
159 187
160 if (location == NULL || location->nservers <= 0 || 188 if (location == NULL || location->nservers <= 0 ||
161 location->rootpath.ncomponents == 0) { 189 location->rootpath.ncomponents == 0)
162 loc++;
163 continue; 190 continue;
164 }
165 191
166 mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); 192 mnt = try_location(&mountdata, page, page2, location);
167 if (IS_ERR(mnt_path)) { 193 if (!IS_ERR(mnt))
168 loc++; 194 break;
169 continue;
170 }
171 mountdata.mnt_path = mnt_path;
172
173 s = 0;
174 while (s < location->nservers) {
175 struct sockaddr_in addr = {
176 .sin_family = AF_INET,
177 .sin_port = htons(NFS_PORT),
178 };
179
180 if (location->servers[s].len <= 0 ||
181 valid_ipaddr4(location->servers[s].data) < 0) {
182 s++;
183 continue;
184 }
185
186 mountdata.hostname = location->servers[s].data;
187 addr.sin_addr.s_addr = in_aton(mountdata.hostname),
188 mountdata.addr = (struct sockaddr *)&addr;
189 mountdata.addrlen = sizeof(addr);
190
191 snprintf(page, PAGE_SIZE, "%s:%s",
192 mountdata.hostname,
193 mountdata.mnt_path);
194
195 mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, &mountdata);
196 if (!IS_ERR(mnt)) {
197 break;
198 }
199 s++;
200 }
201 loc++;
202 } 195 }
203 196
204out: 197out:
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c910413eaeca..83e700a2b0c0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1659,8 +1659,10 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
1659 struct nfs_open_context *ctx; 1659 struct nfs_open_context *ctx;
1660 1660
1661 ctx = nfs_file_open_context(sattr->ia_file); 1661 ctx = nfs_file_open_context(sattr->ia_file);
1662 cred = ctx->cred; 1662 if (ctx) {
1663 state = ctx->state; 1663 cred = ctx->cred;
1664 state = ctx->state;
1665 }
1664 } 1666 }
1665 1667
1666 status = nfs4_do_setattr(inode, cred, fattr, sattr, state); 1668 status = nfs4_do_setattr(inode, cred, fattr, sattr, state);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 4dbb84df1b68..193465210d7c 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -65,14 +65,20 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
65 65
66 dprintk("%s: call getattr\n", __func__); 66 dprintk("%s: call getattr\n", __func__);
67 nfs_fattr_init(fattr); 67 nfs_fattr_init(fattr);
68 status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); 68 status = rpc_call_sync(server->client, &msg, 0);
69 /* Retry with default authentication if different */
70 if (status && server->nfs_client->cl_rpcclient != server->client)
71 status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
69 dprintk("%s: reply getattr: %d\n", __func__, status); 72 dprintk("%s: reply getattr: %d\n", __func__, status);
70 if (status) 73 if (status)
71 return status; 74 return status;
72 dprintk("%s: call statfs\n", __func__); 75 dprintk("%s: call statfs\n", __func__);
73 msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS]; 76 msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS];
74 msg.rpc_resp = &fsinfo; 77 msg.rpc_resp = &fsinfo;
75 status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); 78 status = rpc_call_sync(server->client, &msg, 0);
79 /* Retry with default authentication if different */
80 if (status && server->nfs_client->cl_rpcclient != server->client)
81 status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
76 dprintk("%s: reply statfs: %d\n", __func__, status); 82 dprintk("%s: reply statfs: %d\n", __func__, status);
77 if (status) 83 if (status)
78 return status; 84 return status;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ffb697416cb1..a3b0061dfd45 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -91,6 +91,7 @@ enum {
91 /* Mount options that take string arguments */ 91 /* Mount options that take string arguments */
92 Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, 92 Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
93 Opt_addr, Opt_mountaddr, Opt_clientaddr, 93 Opt_addr, Opt_mountaddr, Opt_clientaddr,
94 Opt_lookupcache,
94 95
95 /* Special mount options */ 96 /* Special mount options */
96 Opt_userspace, Opt_deprecated, Opt_sloppy, 97 Opt_userspace, Opt_deprecated, Opt_sloppy,
@@ -154,6 +155,8 @@ static const match_table_t nfs_mount_option_tokens = {
154 { Opt_mounthost, "mounthost=%s" }, 155 { Opt_mounthost, "mounthost=%s" },
155 { Opt_mountaddr, "mountaddr=%s" }, 156 { Opt_mountaddr, "mountaddr=%s" },
156 157
158 { Opt_lookupcache, "lookupcache=%s" },
159
157 { Opt_err, NULL } 160 { Opt_err, NULL }
158}; 161};
159 162
@@ -200,6 +203,22 @@ static const match_table_t nfs_secflavor_tokens = {
200 { Opt_sec_err, NULL } 203 { Opt_sec_err, NULL }
201}; 204};
202 205
206enum {
207 Opt_lookupcache_all, Opt_lookupcache_positive,
208 Opt_lookupcache_none,
209
210 Opt_lookupcache_err
211};
212
213static match_table_t nfs_lookupcache_tokens = {
214 { Opt_lookupcache_all, "all" },
215 { Opt_lookupcache_positive, "pos" },
216 { Opt_lookupcache_positive, "positive" },
217 { Opt_lookupcache_none, "none" },
218
219 { Opt_lookupcache_err, NULL }
220};
221
203 222
204static void nfs_umount_begin(struct super_block *); 223static void nfs_umount_begin(struct super_block *);
205static int nfs_statfs(struct dentry *, struct kstatfs *); 224static int nfs_statfs(struct dentry *, struct kstatfs *);
@@ -209,7 +228,6 @@ static int nfs_get_sb(struct file_system_type *, int, const char *, void *, stru
209static int nfs_xdev_get_sb(struct file_system_type *fs_type, 228static int nfs_xdev_get_sb(struct file_system_type *fs_type,
210 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 229 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
211static void nfs_kill_super(struct super_block *); 230static void nfs_kill_super(struct super_block *);
212static void nfs_put_super(struct super_block *);
213static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); 231static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
214 232
215static struct file_system_type nfs_fs_type = { 233static struct file_system_type nfs_fs_type = {
@@ -232,7 +250,6 @@ static const struct super_operations nfs_sops = {
232 .alloc_inode = nfs_alloc_inode, 250 .alloc_inode = nfs_alloc_inode,
233 .destroy_inode = nfs_destroy_inode, 251 .destroy_inode = nfs_destroy_inode,
234 .write_inode = nfs_write_inode, 252 .write_inode = nfs_write_inode,
235 .put_super = nfs_put_super,
236 .statfs = nfs_statfs, 253 .statfs = nfs_statfs,
237 .clear_inode = nfs_clear_inode, 254 .clear_inode = nfs_clear_inode,
238 .umount_begin = nfs_umount_begin, 255 .umount_begin = nfs_umount_begin,
@@ -337,26 +354,20 @@ void __exit unregister_nfs_fs(void)
337 unregister_filesystem(&nfs_fs_type); 354 unregister_filesystem(&nfs_fs_type);
338} 355}
339 356
340void nfs_sb_active(struct nfs_server *server) 357void nfs_sb_active(struct super_block *sb)
341{ 358{
342 atomic_inc(&server->active); 359 struct nfs_server *server = NFS_SB(sb);
343}
344 360
345void nfs_sb_deactive(struct nfs_server *server) 361 if (atomic_inc_return(&server->active) == 1)
346{ 362 atomic_inc(&sb->s_active);
347 if (atomic_dec_and_test(&server->active))
348 wake_up(&server->active_wq);
349} 363}
350 364
351static void nfs_put_super(struct super_block *sb) 365void nfs_sb_deactive(struct super_block *sb)
352{ 366{
353 struct nfs_server *server = NFS_SB(sb); 367 struct nfs_server *server = NFS_SB(sb);
354 /* 368
355 * Make sure there are no outstanding ops to this server. 369 if (atomic_dec_and_test(&server->active))
356 * If so, wait for them to finish before allowing the 370 deactivate_super(sb);
357 * unmount to continue.
358 */
359 wait_event(server->active_wq, atomic_read(&server->active) == 0);
360} 371}
361 372
362/* 373/*
@@ -664,25 +675,6 @@ static void nfs_umount_begin(struct super_block *sb)
664} 675}
665 676
666/* 677/*
667 * Set the port number in an address. Be agnostic about the address family.
668 */
669static void nfs_set_port(struct sockaddr *sap, unsigned short port)
670{
671 switch (sap->sa_family) {
672 case AF_INET: {
673 struct sockaddr_in *ap = (struct sockaddr_in *)sap;
674 ap->sin_port = htons(port);
675 break;
676 }
677 case AF_INET6: {
678 struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap;
679 ap->sin6_port = htons(port);
680 break;
681 }
682 }
683}
684
685/*
686 * Sanity-check a server address provided by the mount command. 678 * Sanity-check a server address provided by the mount command.
687 * 679 *
688 * Address family must be initialized, and address must not be 680 * Address family must be initialized, and address must not be
@@ -724,20 +716,22 @@ static void nfs_parse_ipv4_address(char *string, size_t str_len,
724 *addr_len = 0; 716 *addr_len = 0;
725} 717}
726 718
727#define IPV6_SCOPE_DELIMITER '%'
728
729#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 719#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
730static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, 720static int nfs_parse_ipv6_scope_id(const char *string, const size_t str_len,
731 const char *delim, 721 const char *delim,
732 struct sockaddr_in6 *sin6) 722 struct sockaddr_in6 *sin6)
733{ 723{
734 char *p; 724 char *p;
735 size_t len; 725 size_t len;
736 726
737 if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) 727 if ((string + str_len) == delim)
738 return ; 728 return 1;
729
739 if (*delim != IPV6_SCOPE_DELIMITER) 730 if (*delim != IPV6_SCOPE_DELIMITER)
740 return; 731 return 0;
732
733 if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
734 return 0;
741 735
742 len = (string + str_len) - delim - 1; 736 len = (string + str_len) - delim - 1;
743 p = kstrndup(delim + 1, len, GFP_KERNEL); 737 p = kstrndup(delim + 1, len, GFP_KERNEL);
@@ -750,14 +744,20 @@ static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len,
750 scope_id = dev->ifindex; 744 scope_id = dev->ifindex;
751 dev_put(dev); 745 dev_put(dev);
752 } else { 746 } else {
753 /* scope_id is set to zero on error */ 747 if (strict_strtoul(p, 10, &scope_id) == 0) {
754 strict_strtoul(p, 10, &scope_id); 748 kfree(p);
749 return 0;
750 }
755 } 751 }
756 752
757 kfree(p); 753 kfree(p);
754
758 sin6->sin6_scope_id = scope_id; 755 sin6->sin6_scope_id = scope_id;
759 dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id); 756 dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id);
757 return 1;
760 } 758 }
759
760 return 0;
761} 761}
762 762
763static void nfs_parse_ipv6_address(char *string, size_t str_len, 763static void nfs_parse_ipv6_address(char *string, size_t str_len,
@@ -773,9 +773,11 @@ static void nfs_parse_ipv6_address(char *string, size_t str_len,
773 773
774 sin6->sin6_family = AF_INET6; 774 sin6->sin6_family = AF_INET6;
775 *addr_len = sizeof(*sin6); 775 *addr_len = sizeof(*sin6);
776 if (in6_pton(string, str_len, addr, IPV6_SCOPE_DELIMITER, &delim)) { 776 if (in6_pton(string, str_len, addr,
777 nfs_parse_ipv6_scope_id(string, str_len, delim, sin6); 777 IPV6_SCOPE_DELIMITER, &delim) != 0) {
778 return; 778 if (nfs_parse_ipv6_scope_id(string, str_len,
779 delim, sin6) != 0)
780 return;
779 } 781 }
780 } 782 }
781 783
@@ -798,7 +800,7 @@ static void nfs_parse_ipv6_address(char *string, size_t str_len,
798 * If there is a problem constructing the new sockaddr, set the address 800 * If there is a problem constructing the new sockaddr, set the address
799 * family to AF_UNSPEC. 801 * family to AF_UNSPEC.
800 */ 802 */
801static void nfs_parse_ip_address(char *string, size_t str_len, 803void nfs_parse_ip_address(char *string, size_t str_len,
802 struct sockaddr *sap, size_t *addr_len) 804 struct sockaddr *sap, size_t *addr_len)
803{ 805{
804 unsigned int i, colons; 806 unsigned int i, colons;
@@ -1258,6 +1260,30 @@ static int nfs_parse_mount_options(char *raw,
1258 &mnt->mount_server.addrlen); 1260 &mnt->mount_server.addrlen);
1259 kfree(string); 1261 kfree(string);
1260 break; 1262 break;
1263 case Opt_lookupcache:
1264 string = match_strdup(args);
1265 if (string == NULL)
1266 goto out_nomem;
1267 token = match_token(string,
1268 nfs_lookupcache_tokens, args);
1269 kfree(string);
1270 switch (token) {
1271 case Opt_lookupcache_all:
1272 mnt->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE);
1273 break;
1274 case Opt_lookupcache_positive:
1275 mnt->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE;
1276 mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG;
1277 break;
1278 case Opt_lookupcache_none:
1279 mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE;
1280 break;
1281 default:
1282 errors++;
1283 dfprintk(MOUNT, "NFS: invalid "
1284 "lookupcache argument\n");
1285 };
1286 break;
1261 1287
1262 /* 1288 /*
1263 * Special options 1289 * Special options
@@ -1558,7 +1584,7 @@ static int nfs_validate_mount_data(void *options,
1558 * Translate to nfs_parsed_mount_data, which nfs_fill_super 1584 * Translate to nfs_parsed_mount_data, which nfs_fill_super
1559 * can deal with. 1585 * can deal with.
1560 */ 1586 */
1561 args->flags = data->flags; 1587 args->flags = data->flags & NFS_MOUNT_FLAGMASK;
1562 args->rsize = data->rsize; 1588 args->rsize = data->rsize;
1563 args->wsize = data->wsize; 1589 args->wsize = data->wsize;
1564 args->timeo = data->timeo; 1590 args->timeo = data->timeo;
@@ -2433,7 +2459,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
2433 compare_super = NULL; 2459 compare_super = NULL;
2434 2460
2435 /* Get a superblock - note that we may end up sharing one that already exists */ 2461 /* Get a superblock - note that we may end up sharing one that already exists */
2436 s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); 2462 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
2437 if (IS_ERR(s)) { 2463 if (IS_ERR(s)) {
2438 error = PTR_ERR(s); 2464 error = PTR_ERR(s);
2439 goto out_err_nosb; 2465 goto out_err_nosb;
@@ -2518,7 +2544,7 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
2518 compare_super = NULL; 2544 compare_super = NULL;
2519 2545
2520 /* Get a superblock - note that we may end up sharing one that already exists */ 2546 /* Get a superblock - note that we may end up sharing one that already exists */
2521 s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); 2547 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
2522 if (IS_ERR(s)) { 2548 if (IS_ERR(s)) {
2523 error = PTR_ERR(s); 2549 error = PTR_ERR(s);
2524 goto out_err_nosb; 2550 goto out_err_nosb;
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index f089e5839d7d..ecc295347775 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -99,7 +99,7 @@ static void nfs_async_unlink_release(void *calldata)
99 99
100 nfs_dec_sillycount(data->dir); 100 nfs_dec_sillycount(data->dir);
101 nfs_free_unlinkdata(data); 101 nfs_free_unlinkdata(data);
102 nfs_sb_deactive(NFS_SB(sb)); 102 nfs_sb_deactive(sb);
103} 103}
104 104
105static const struct rpc_call_ops nfs_unlink_ops = { 105static const struct rpc_call_ops nfs_unlink_ops = {
@@ -118,6 +118,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
118 .rpc_message = &msg, 118 .rpc_message = &msg,
119 .callback_ops = &nfs_unlink_ops, 119 .callback_ops = &nfs_unlink_ops,
120 .callback_data = data, 120 .callback_data = data,
121 .workqueue = nfsiod_workqueue,
121 .flags = RPC_TASK_ASYNC, 122 .flags = RPC_TASK_ASYNC,
122 }; 123 };
123 struct rpc_task *task; 124 struct rpc_task *task;
@@ -149,7 +150,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
149 nfs_dec_sillycount(dir); 150 nfs_dec_sillycount(dir);
150 return 0; 151 return 0;
151 } 152 }
152 nfs_sb_active(NFS_SERVER(dir)); 153 nfs_sb_active(dir->i_sb);
153 data->args.fh = NFS_FH(dir); 154 data->args.fh = NFS_FH(dir);
154 nfs_fattr_init(&data->res.dir_attr); 155 nfs_fattr_init(&data->res.dir_attr);
155 156
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3229e217c773..9f9845859fc1 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1427,8 +1427,9 @@ static int nfs_write_mapping(struct address_space *mapping, int how)
1427 .bdi = mapping->backing_dev_info, 1427 .bdi = mapping->backing_dev_info,
1428 .sync_mode = WB_SYNC_NONE, 1428 .sync_mode = WB_SYNC_NONE,
1429 .nr_to_write = LONG_MAX, 1429 .nr_to_write = LONG_MAX,
1430 .range_start = 0,
1431 .range_end = LLONG_MAX,
1430 .for_writepages = 1, 1432 .for_writepages = 1,
1431 .range_cyclic = 1,
1432 }; 1433 };
1433 int ret; 1434 int ret;
1434 1435
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index 64965e1c21c4..9b0efdad8910 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -13,9 +13,7 @@
13#include <linux/nls.h> 13#include <linux/nls.h>
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/errno.h> 15#include <linux/errno.h>
16#ifdef CONFIG_KMOD
17#include <linux/kmod.h> 16#include <linux/kmod.h>
18#endif
19#include <linux/spinlock.h> 17#include <linux/spinlock.h>
20 18
21static struct nls_table default_table; 19static struct nls_table default_table;
@@ -215,24 +213,7 @@ static struct nls_table *find_nls(char *charset)
215 213
216struct nls_table *load_nls(char *charset) 214struct nls_table *load_nls(char *charset)
217{ 215{
218 struct nls_table *nls; 216 return try_then_request_module(find_nls(charset), "nls_%s", charset);
219#ifdef CONFIG_KMOD
220 int ret;
221#endif
222
223 nls = find_nls(charset);
224 if (nls)
225 return nls;
226
227#ifdef CONFIG_KMOD
228 ret = request_module("nls_%s", charset);
229 if (ret != 0) {
230 printk("Unable to load NLS charset %s\n", charset);
231 return NULL;
232 }
233 nls = find_nls(charset);
234#endif
235 return nls;
236} 217}
237 218
238void unload_nls(struct nls_table *nls) 219void unload_nls(struct nls_table *nls)
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index d020866d4232..3140a4429af1 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -439,7 +439,7 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
439 pages[nr] = *cached_page; 439 pages[nr] = *cached_page;
440 page_cache_get(*cached_page); 440 page_cache_get(*cached_page);
441 if (unlikely(!pagevec_add(lru_pvec, *cached_page))) 441 if (unlikely(!pagevec_add(lru_pvec, *cached_page)))
442 __pagevec_lru_add(lru_pvec); 442 __pagevec_lru_add_file(lru_pvec);
443 *cached_page = NULL; 443 *cached_page = NULL;
444 } 444 }
445 index++; 445 index++;
@@ -2084,7 +2084,7 @@ err_out:
2084 OSYNC_METADATA|OSYNC_DATA); 2084 OSYNC_METADATA|OSYNC_DATA);
2085 } 2085 }
2086 } 2086 }
2087 pagevec_lru_add(&lru_pvec); 2087 pagevec_lru_add_file(&lru_pvec);
2088 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", 2088 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
2089 written ? "written" : "status", (unsigned long)written, 2089 written ? "written" : "status", (unsigned long)written,
2090 (long)status); 2090 (long)status);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index c25780a70dfd..802c41492214 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2349,7 +2349,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
2349 */ 2349 */
2350 ret = ocfs2_read_blocks(inode, xs->bucket.bhs[0]->b_blocknr + 1, 2350 ret = ocfs2_read_blocks(inode, xs->bucket.bhs[0]->b_blocknr + 1,
2351 blk_per_bucket - 1, &xs->bucket.bhs[1], 2351 blk_per_bucket - 1, &xs->bucket.bhs[1],
2352 OCFS2_BH_CACHED); 2352 0);
2353 if (ret) { 2353 if (ret) {
2354 mlog_errno(ret); 2354 mlog_errno(ret);
2355 goto out; 2355 goto out;
@@ -2426,7 +2426,7 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
2426 2426
2427 for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) { 2427 for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) {
2428 ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, 2428 ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket,
2429 bucket.bhs, OCFS2_BH_CACHED); 2429 bucket.bhs, 0);
2430 if (ret) { 2430 if (ret) {
2431 mlog_errno(ret); 2431 mlog_errno(ret);
2432 goto out; 2432 goto out;
@@ -2694,7 +2694,7 @@ static int ocfs2_xattr_update_xattr_search(struct inode *inode,
2694 ret = ocfs2_read_blocks(inode, 2694 ret = ocfs2_read_blocks(inode,
2695 xs->bucket.bhs[0]->b_blocknr + 1, 2695 xs->bucket.bhs[0]->b_blocknr + 1,
2696 blk_per_bucket - 1, &xs->bucket.bhs[1], 2696 blk_per_bucket - 1, &xs->bucket.bhs[1],
2697 OCFS2_BH_CACHED); 2697 0);
2698 if (ret) { 2698 if (ret) {
2699 mlog_errno(ret); 2699 mlog_errno(ret);
2700 return ret; 2700 return ret;
@@ -2898,8 +2898,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
2898 if (!bhs) 2898 if (!bhs)
2899 return -ENOMEM; 2899 return -ENOMEM;
2900 2900
2901 ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, bhs, 2901 ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, bhs, 0);
2902 OCFS2_BH_CACHED);
2903 if (ret) 2902 if (ret)
2904 goto out; 2903 goto out;
2905 2904
@@ -3153,8 +3152,7 @@ static int ocfs2_read_xattr_bucket(struct inode *inode,
3153 3152
3154 if (!new) 3153 if (!new)
3155 return ocfs2_read_blocks(inode, blkno, 3154 return ocfs2_read_blocks(inode, blkno,
3156 blk_per_bucket, bhs, 3155 blk_per_bucket, bhs, 0);
3157 OCFS2_BH_CACHED);
3158 3156
3159 for (i = 0; i < blk_per_bucket; i++) { 3157 for (i = 0; i < blk_per_bucket; i++) {
3160 bhs[i] = sb_getblk(inode->i_sb, blkno + i); 3158 bhs[i] = sb_getblk(inode->i_sb, blkno + i);
@@ -4101,7 +4099,7 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
4101 ret = ocfs2_read_blocks(inode, 4099 ret = ocfs2_read_blocks(inode,
4102 xs->bucket.bhs[0]->b_blocknr + 1, 4100 xs->bucket.bhs[0]->b_blocknr + 1,
4103 blk_per_bucket - 1, &xs->bucket.bhs[1], 4101 blk_per_bucket - 1, &xs->bucket.bhs[1],
4104 OCFS2_BH_CACHED); 4102 0);
4105 if (ret) { 4103 if (ret) {
4106 mlog_errno(ret); 4104 mlog_errno(ret);
4107 goto out; 4105 goto out;
diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c
index 3d3e16631472..a97b477ac0fc 100644
--- a/fs/partitions/acorn.c
+++ b/fs/partitions/acorn.c
@@ -275,16 +275,6 @@ adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev)
275 id = data[0x1fc] & 15; 275 id = data[0x1fc] & 15;
276 put_dev_sector(sect); 276 put_dev_sector(sect);
277 277
278#ifdef CONFIG_BLK_DEV_MFM
279 if (MAJOR(bdev->bd_dev) == MFM_ACORN_MAJOR) {
280 extern void xd_set_geometry(struct block_device *,
281 unsigned char, unsigned char, unsigned int);
282 xd_set_geometry(bdev, dr->secspertrack, heads, 1);
283 invalidate_bh_lrus();
284 truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
285 }
286#endif
287
288 /* 278 /*
289 * Work out start of non-adfs partition. 279 * Work out start of non-adfs partition.
290 */ 280 */
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 7408227c49c9..cfb0c80690aa 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -195,6 +195,14 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
195 return ERR_PTR(res); 195 return ERR_PTR(res);
196} 196}
197 197
198static ssize_t part_partition_show(struct device *dev,
199 struct device_attribute *attr, char *buf)
200{
201 struct hd_struct *p = dev_to_part(dev);
202
203 return sprintf(buf, "%d\n", p->partno);
204}
205
198static ssize_t part_start_show(struct device *dev, 206static ssize_t part_start_show(struct device *dev,
199 struct device_attribute *attr, char *buf) 207 struct device_attribute *attr, char *buf)
200{ 208{
@@ -260,6 +268,7 @@ ssize_t part_fail_store(struct device *dev,
260} 268}
261#endif 269#endif
262 270
271static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
263static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); 272static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
264static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); 273static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
265static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 274static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
@@ -269,6 +278,7 @@ static struct device_attribute dev_attr_fail =
269#endif 278#endif
270 279
271static struct attribute *part_attrs[] = { 280static struct attribute *part_attrs[] = {
281 &dev_attr_partition.attr,
272 &dev_attr_start.attr, 282 &dev_attr_start.attr,
273 &dev_attr_size.attr, 283 &dev_attr_size.attr,
274 &dev_attr_stat.attr, 284 &dev_attr_stat.attr,
@@ -538,10 +548,23 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
538 sector_t from = state->parts[p].from; 548 sector_t from = state->parts[p].from;
539 if (!size) 549 if (!size)
540 continue; 550 continue;
551 if (from >= get_capacity(disk)) {
552 printk(KERN_WARNING
553 "%s: p%d ignored, start %llu is behind the end of the disk\n",
554 disk->disk_name, p, (unsigned long long) from);
555 continue;
556 }
541 if (from + size > get_capacity(disk)) { 557 if (from + size > get_capacity(disk)) {
558 /*
559 * we can not ignore partitions of broken tables
560 * created by for example camera firmware, but we
561 * limit them to the end of the disk to avoid
562 * creating invalid block devices
563 */
542 printk(KERN_WARNING 564 printk(KERN_WARNING
543 "%s: p%d exceeds device capacity\n", 565 "%s: p%d size %llu limited to end of disk\n",
544 disk->disk_name, p); 566 disk->disk_name, p, (unsigned long long) size);
567 size = get_capacity(disk) - from;
545 } 568 }
546 res = add_partition(disk, p, from, size, state->parts[p].flags); 569 res = add_partition(disk, p, from, size, state->parts[p].flags);
547 if (res) { 570 if (res) {
diff --git a/fs/proc/array.c b/fs/proc/array.c
index f4bc0e789539..bb9f4b05703d 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -388,20 +388,20 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
388 388
389 /* add up live thread stats at the group level */ 389 /* add up live thread stats at the group level */
390 if (whole) { 390 if (whole) {
391 struct task_cputime cputime;
391 struct task_struct *t = task; 392 struct task_struct *t = task;
392 do { 393 do {
393 min_flt += t->min_flt; 394 min_flt += t->min_flt;
394 maj_flt += t->maj_flt; 395 maj_flt += t->maj_flt;
395 utime = cputime_add(utime, task_utime(t));
396 stime = cputime_add(stime, task_stime(t));
397 gtime = cputime_add(gtime, task_gtime(t)); 396 gtime = cputime_add(gtime, task_gtime(t));
398 t = next_thread(t); 397 t = next_thread(t);
399 } while (t != task); 398 } while (t != task);
400 399
401 min_flt += sig->min_flt; 400 min_flt += sig->min_flt;
402 maj_flt += sig->maj_flt; 401 maj_flt += sig->maj_flt;
403 utime = cputime_add(utime, sig->utime); 402 thread_group_cputime(task, &cputime);
404 stime = cputime_add(stime, sig->stime); 403 utime = cputime.utime;
404 stime = cputime.stime;
405 gtime = cputime_add(gtime, sig->gtime); 405 gtime = cputime_add(gtime, sig->gtime);
406 } 406 }
407 407
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 97b4579134d5..7ea52c79b2da 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -46,7 +46,6 @@
46#include <linux/blkdev.h> 46#include <linux/blkdev.h>
47#include <linux/hugetlb.h> 47#include <linux/hugetlb.h>
48#include <linux/jiffies.h> 48#include <linux/jiffies.h>
49#include <linux/sysrq.h>
50#include <linux/vmalloc.h> 49#include <linux/vmalloc.h>
51#include <linux/crash_dump.h> 50#include <linux/crash_dump.h>
52#include <linux/pid_namespace.h> 51#include <linux/pid_namespace.h>
@@ -138,6 +137,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
138 unsigned long allowed; 137 unsigned long allowed;
139 struct vmalloc_info vmi; 138 struct vmalloc_info vmi;
140 long cached; 139 long cached;
140 unsigned long pages[NR_LRU_LISTS];
141 int lru;
141 142
142/* 143/*
143 * display in kilobytes. 144 * display in kilobytes.
@@ -156,51 +157,70 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
156 157
157 get_vmalloc_info(&vmi); 158 get_vmalloc_info(&vmi);
158 159
160 for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
161 pages[lru] = global_page_state(NR_LRU_BASE + lru);
162
159 /* 163 /*
160 * Tagged format, for easy grepping and expansion. 164 * Tagged format, for easy grepping and expansion.
161 */ 165 */
162 len = sprintf(page, 166 len = sprintf(page,
163 "MemTotal: %8lu kB\n" 167 "MemTotal: %8lu kB\n"
164 "MemFree: %8lu kB\n" 168 "MemFree: %8lu kB\n"
165 "Buffers: %8lu kB\n" 169 "Buffers: %8lu kB\n"
166 "Cached: %8lu kB\n" 170 "Cached: %8lu kB\n"
167 "SwapCached: %8lu kB\n" 171 "SwapCached: %8lu kB\n"
168 "Active: %8lu kB\n" 172 "Active: %8lu kB\n"
169 "Inactive: %8lu kB\n" 173 "Inactive: %8lu kB\n"
174 "Active(anon): %8lu kB\n"
175 "Inactive(anon): %8lu kB\n"
176 "Active(file): %8lu kB\n"
177 "Inactive(file): %8lu kB\n"
178#ifdef CONFIG_UNEVICTABLE_LRU
179 "Unevictable: %8lu kB\n"
180 "Mlocked: %8lu kB\n"
181#endif
170#ifdef CONFIG_HIGHMEM 182#ifdef CONFIG_HIGHMEM
171 "HighTotal: %8lu kB\n" 183 "HighTotal: %8lu kB\n"
172 "HighFree: %8lu kB\n" 184 "HighFree: %8lu kB\n"
173 "LowTotal: %8lu kB\n" 185 "LowTotal: %8lu kB\n"
174 "LowFree: %8lu kB\n" 186 "LowFree: %8lu kB\n"
175#endif 187#endif
176 "SwapTotal: %8lu kB\n" 188 "SwapTotal: %8lu kB\n"
177 "SwapFree: %8lu kB\n" 189 "SwapFree: %8lu kB\n"
178 "Dirty: %8lu kB\n" 190 "Dirty: %8lu kB\n"
179 "Writeback: %8lu kB\n" 191 "Writeback: %8lu kB\n"
180 "AnonPages: %8lu kB\n" 192 "AnonPages: %8lu kB\n"
181 "Mapped: %8lu kB\n" 193 "Mapped: %8lu kB\n"
182 "Slab: %8lu kB\n" 194 "Slab: %8lu kB\n"
183 "SReclaimable: %8lu kB\n" 195 "SReclaimable: %8lu kB\n"
184 "SUnreclaim: %8lu kB\n" 196 "SUnreclaim: %8lu kB\n"
185 "PageTables: %8lu kB\n" 197 "PageTables: %8lu kB\n"
186#ifdef CONFIG_QUICKLIST 198#ifdef CONFIG_QUICKLIST
187 "Quicklists: %8lu kB\n" 199 "Quicklists: %8lu kB\n"
188#endif 200#endif
189 "NFS_Unstable: %8lu kB\n" 201 "NFS_Unstable: %8lu kB\n"
190 "Bounce: %8lu kB\n" 202 "Bounce: %8lu kB\n"
191 "WritebackTmp: %8lu kB\n" 203 "WritebackTmp: %8lu kB\n"
192 "CommitLimit: %8lu kB\n" 204 "CommitLimit: %8lu kB\n"
193 "Committed_AS: %8lu kB\n" 205 "Committed_AS: %8lu kB\n"
194 "VmallocTotal: %8lu kB\n" 206 "VmallocTotal: %8lu kB\n"
195 "VmallocUsed: %8lu kB\n" 207 "VmallocUsed: %8lu kB\n"
196 "VmallocChunk: %8lu kB\n", 208 "VmallocChunk: %8lu kB\n",
197 K(i.totalram), 209 K(i.totalram),
198 K(i.freeram), 210 K(i.freeram),
199 K(i.bufferram), 211 K(i.bufferram),
200 K(cached), 212 K(cached),
201 K(total_swapcache_pages), 213 K(total_swapcache_pages),
202 K(global_page_state(NR_ACTIVE)), 214 K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]),
203 K(global_page_state(NR_INACTIVE)), 215 K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
216 K(pages[LRU_ACTIVE_ANON]),
217 K(pages[LRU_INACTIVE_ANON]),
218 K(pages[LRU_ACTIVE_FILE]),
219 K(pages[LRU_INACTIVE_FILE]),
220#ifdef CONFIG_UNEVICTABLE_LRU
221 K(pages[LRU_UNEVICTABLE]),
222 K(global_page_state(NR_MLOCK)),
223#endif
204#ifdef CONFIG_HIGHMEM 224#ifdef CONFIG_HIGHMEM
205 K(i.totalhigh), 225 K(i.totalhigh),
206 K(i.freehigh), 226 K(i.freehigh),
@@ -702,28 +722,6 @@ static int execdomains_read_proc(char *page, char **start, off_t off,
702 return proc_calc_metrics(page, start, off, count, eof, len); 722 return proc_calc_metrics(page, start, off, count, eof, len);
703} 723}
704 724
705#ifdef CONFIG_MAGIC_SYSRQ
706/*
707 * writing 'C' to /proc/sysrq-trigger is like sysrq-C
708 */
709static ssize_t write_sysrq_trigger(struct file *file, const char __user *buf,
710 size_t count, loff_t *ppos)
711{
712 if (count) {
713 char c;
714
715 if (get_user(c, buf))
716 return -EFAULT;
717 __handle_sysrq(c, NULL, 0);
718 }
719 return count;
720}
721
722static const struct file_operations proc_sysrq_trigger_operations = {
723 .write = write_sysrq_trigger,
724};
725#endif
726
727#ifdef CONFIG_PROC_PAGE_MONITOR 725#ifdef CONFIG_PROC_PAGE_MONITOR
728#define KPMSIZE sizeof(u64) 726#define KPMSIZE sizeof(u64)
729#define KPMMASK (KPMSIZE - 1) 727#define KPMMASK (KPMSIZE - 1)
@@ -932,7 +930,4 @@ void __init proc_misc_init(void)
932#ifdef CONFIG_PROC_VMCORE 930#ifdef CONFIG_PROC_VMCORE
933 proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); 931 proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations);
934#endif 932#endif
935#ifdef CONFIG_MAGIC_SYSRQ
936 proc_create("sysrq-trigger", S_IWUSR, NULL, &proc_sysrq_trigger_operations);
937#endif
938} 933}
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 841368b87a29..cd9ca67f841b 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -32,9 +32,6 @@ static size_t elfcorebuf_sz;
32/* Total size of vmcore file. */ 32/* Total size of vmcore file. */
33static u64 vmcore_size; 33static u64 vmcore_size;
34 34
35/* Stores the physical address of elf header of crash image. */
36unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
37
38struct proc_dir_entry *proc_vmcore = NULL; 35struct proc_dir_entry *proc_vmcore = NULL;
39 36
40/* Reads a page from the oldmem device from given offset. */ 37/* Reads a page from the oldmem device from given offset. */
@@ -647,7 +644,7 @@ static int __init vmcore_init(void)
647 int rc = 0; 644 int rc = 0;
648 645
649 /* If elfcorehdr= has been passed in cmdline, then capture the dump.*/ 646 /* If elfcorehdr= has been passed in cmdline, then capture the dump.*/
650 if (!(elfcorehdr_addr < ELFCORE_ADDR_MAX)) 647 if (!(is_vmcore_usable()))
651 return rc; 648 return rc;
652 rc = parse_crash_elf_headers(); 649 rc = parse_crash_elf_headers();
653 if (rc) { 650 if (rc) {
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 5145cb9125af..76acdbc34611 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -112,12 +112,12 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
112 goto add_error; 112 goto add_error;
113 113
114 if (!pagevec_add(&lru_pvec, page)) 114 if (!pagevec_add(&lru_pvec, page))
115 __pagevec_lru_add(&lru_pvec); 115 __pagevec_lru_add_file(&lru_pvec);
116 116
117 unlock_page(page); 117 unlock_page(page);
118 } 118 }
119 119
120 pagevec_lru_add(&lru_pvec); 120 pagevec_lru_add_file(&lru_pvec);
121 return 0; 121 return 0;
122 122
123 fsize_exceeded: 123 fsize_exceeded:
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index b13123424e49..f031d1c925f0 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -61,6 +61,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev)
61 inode->i_mapping->a_ops = &ramfs_aops; 61 inode->i_mapping->a_ops = &ramfs_aops;
62 inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; 62 inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
63 mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); 63 mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
64 mapping_set_unevictable(inode->i_mapping);
64 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 65 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
65 switch (mode & S_IFMT) { 66 switch (mode & S_IFMT) {
66 default: 67 default:
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index b9dbeeca7049..37173fa07d15 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -8,8 +8,6 @@
8 8
9/* proc info support a la one created by Sizif@Botik.RU for PGC */ 9/* proc info support a la one created by Sizif@Botik.RU for PGC */
10 10
11/* $Id: procfs.c,v 1.1.8.2 2001/07/15 17:08:42 god Exp $ */
12
13#include <linux/module.h> 11#include <linux/module.h>
14#include <linux/time.h> 12#include <linux/time.h>
15#include <linux/seq_file.h> 13#include <linux/seq_file.h>
@@ -621,7 +619,6 @@ int reiserfs_global_version_in_proc(char *buffer, char **start,
621#endif 619#endif
622 620
623/* 621/*
624 * $Log: procfs.c,v $
625 * Revision 1.1.8.2 2001/07/15 17:08:42 god 622 * Revision 1.1.8.2 2001/07/15 17:08:42 god
626 * . use get_super() in procfs.c 623 * . use get_super() in procfs.c
627 * . remove remove_save_link() from reiserfs_do_truncate() 624 * . remove remove_save_link() from reiserfs_do_truncate()
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index bb3cb5b7cdb2..ad92461cbfc3 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -155,7 +155,7 @@ static struct dentry *get_xa_file_dentry(const struct inode *inode,
155 xadir = open_xa_dir(inode, flags); 155 xadir = open_xa_dir(inode, flags);
156 if (IS_ERR(xadir)) { 156 if (IS_ERR(xadir)) {
157 return ERR_CAST(xadir); 157 return ERR_CAST(xadir);
158 } else if (xadir && !xadir->d_inode) { 158 } else if (!xadir->d_inode) {
159 dput(xadir); 159 dput(xadir);
160 return ERR_PTR(-ENODATA); 160 return ERR_PTR(-ENODATA);
161 } 161 }
diff --git a/fs/seq_file.c b/fs/seq_file.c
index bd20f7f5a933..eba2eabcd2b8 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -452,17 +452,34 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
452 452
453int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits) 453int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits)
454{ 454{
455 size_t len = bitmap_scnprintf_len(nr_bits); 455 if (m->count < m->size) {
456 int len = bitmap_scnprintf(m->buf + m->count,
457 m->size - m->count, bits, nr_bits);
458 if (m->count + len < m->size) {
459 m->count += len;
460 return 0;
461 }
462 }
463 m->count = m->size;
464 return -1;
465}
466EXPORT_SYMBOL(seq_bitmap);
456 467
457 if (m->count + len < m->size) { 468int seq_bitmap_list(struct seq_file *m, unsigned long *bits,
458 bitmap_scnprintf(m->buf + m->count, m->size - m->count, 469 unsigned int nr_bits)
459 bits, nr_bits); 470{
460 m->count += len; 471 if (m->count < m->size) {
461 return 0; 472 int len = bitmap_scnlistprintf(m->buf + m->count,
473 m->size - m->count, bits, nr_bits);
474 if (m->count + len < m->size) {
475 m->count += len;
476 return 0;
477 }
462 } 478 }
463 m->count = m->size; 479 m->count = m->size;
464 return -1; 480 return -1;
465} 481}
482EXPORT_SYMBOL(seq_bitmap_list);
466 483
467static void *single_start(struct seq_file *p, loff_t *pos) 484static void *single_start(struct seq_file *p, loff_t *pos)
468{ 485{
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 006fc64227dd..66f6e58a7e4b 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -61,6 +61,7 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
61 int size = dentry->d_inode->i_size; 61 int size = dentry->d_inode->i_size;
62 loff_t offs = *off; 62 loff_t offs = *off;
63 int count = min_t(size_t, bytes, PAGE_SIZE); 63 int count = min_t(size_t, bytes, PAGE_SIZE);
64 char *temp;
64 65
65 if (size) { 66 if (size) {
66 if (offs > size) 67 if (offs > size)
@@ -69,23 +70,33 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
69 count = size - offs; 70 count = size - offs;
70 } 71 }
71 72
73 temp = kmalloc(count, GFP_KERNEL);
74 if (!temp)
75 return -ENOMEM;
76
72 mutex_lock(&bb->mutex); 77 mutex_lock(&bb->mutex);
73 78
74 count = fill_read(dentry, bb->buffer, offs, count); 79 count = fill_read(dentry, bb->buffer, offs, count);
75 if (count < 0) 80 if (count < 0) {
76 goto out_unlock; 81 mutex_unlock(&bb->mutex);
82 goto out_free;
83 }
77 84
78 if (copy_to_user(userbuf, bb->buffer, count)) { 85 memcpy(temp, bb->buffer, count);
86
87 mutex_unlock(&bb->mutex);
88
89 if (copy_to_user(userbuf, temp, count)) {
79 count = -EFAULT; 90 count = -EFAULT;
80 goto out_unlock; 91 goto out_free;
81 } 92 }
82 93
83 pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count); 94 pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count);
84 95
85 *off = offs + count; 96 *off = offs + count;
86 97
87 out_unlock: 98 out_free:
88 mutex_unlock(&bb->mutex); 99 kfree(temp);
89 return count; 100 return count;
90} 101}
91 102
@@ -118,6 +129,7 @@ static ssize_t write(struct file *file, const char __user *userbuf,
118 int size = dentry->d_inode->i_size; 129 int size = dentry->d_inode->i_size;
119 loff_t offs = *off; 130 loff_t offs = *off;
120 int count = min_t(size_t, bytes, PAGE_SIZE); 131 int count = min_t(size_t, bytes, PAGE_SIZE);
132 char *temp;
121 133
122 if (size) { 134 if (size) {
123 if (offs > size) 135 if (offs > size)
@@ -126,19 +138,27 @@ static ssize_t write(struct file *file, const char __user *userbuf,
126 count = size - offs; 138 count = size - offs;
127 } 139 }
128 140
129 mutex_lock(&bb->mutex); 141 temp = kmalloc(count, GFP_KERNEL);
142 if (!temp)
143 return -ENOMEM;
130 144
131 if (copy_from_user(bb->buffer, userbuf, count)) { 145 if (copy_from_user(temp, userbuf, count)) {
132 count = -EFAULT; 146 count = -EFAULT;
133 goto out_unlock; 147 goto out_free;
134 } 148 }
135 149
150 mutex_lock(&bb->mutex);
151
152 memcpy(bb->buffer, temp, count);
153
136 count = flush_write(dentry, bb->buffer, offs, count); 154 count = flush_write(dentry, bb->buffer, offs, count);
155 mutex_unlock(&bb->mutex);
156
137 if (count > 0) 157 if (count > 0)
138 *off = offs + count; 158 *off = offs + count;
139 159
140 out_unlock: 160out_free:
141 mutex_unlock(&bb->mutex); 161 kfree(temp);
142 return count; 162 return count;
143} 163}
144 164
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index aedaeba82ae5..3a05a596e3b4 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -370,17 +370,17 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
370 memset(acxt, 0, sizeof(*acxt)); 370 memset(acxt, 0, sizeof(*acxt));
371 acxt->parent_sd = parent_sd; 371 acxt->parent_sd = parent_sd;
372 372
373 /* Lookup parent inode. inode initialization and I_NEW 373 /* Lookup parent inode. inode initialization is protected by
374 * clearing are protected by sysfs_mutex. By grabbing it and 374 * sysfs_mutex, so inode existence can be determined by
375 * looking up with _nowait variant, inode state can be 375 * looking up inode while holding sysfs_mutex.
376 * determined reliably.
377 */ 376 */
378 mutex_lock(&sysfs_mutex); 377 mutex_lock(&sysfs_mutex);
379 378
380 inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test, 379 inode = ilookup5(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test,
381 parent_sd); 380 parent_sd);
381 if (inode) {
382 WARN_ON(inode->i_state & I_NEW);
382 383
383 if (inode && !(inode->i_state & I_NEW)) {
384 /* parent inode available */ 384 /* parent inode available */
385 acxt->parent_inode = inode; 385 acxt->parent_inode = inode;
386 386
@@ -393,8 +393,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
393 mutex_lock(&inode->i_mutex); 393 mutex_lock(&inode->i_mutex);
394 mutex_lock(&sysfs_mutex); 394 mutex_lock(&sysfs_mutex);
395 } 395 }
396 } else 396 }
397 iput(inode);
398} 397}
399 398
400/** 399/**
@@ -636,6 +635,7 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
636 635
637 return sd; 636 return sd;
638} 637}
638EXPORT_SYMBOL_GPL(sysfs_get_dirent);
639 639
640static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, 640static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
641 const char *name, struct sysfs_dirent **p_sd) 641 const char *name, struct sysfs_dirent **p_sd)
@@ -829,16 +829,12 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
829 if (!new_dentry) 829 if (!new_dentry)
830 goto out_unlock; 830 goto out_unlock;
831 831
832 /* rename kobject and sysfs_dirent */ 832 /* rename sysfs_dirent */
833 error = -ENOMEM; 833 error = -ENOMEM;
834 new_name = dup_name = kstrdup(new_name, GFP_KERNEL); 834 new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
835 if (!new_name) 835 if (!new_name)
836 goto out_unlock; 836 goto out_unlock;
837 837
838 error = kobject_set_name(kobj, "%s", new_name);
839 if (error)
840 goto out_unlock;
841
842 dup_name = sd->s_name; 838 dup_name = sd->s_name;
843 sd->s_name = new_name; 839 sd->s_name = new_name;
844 840
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index c9e4e5091da1..1f4a3f877262 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -19,10 +19,18 @@
19#include <linux/poll.h> 19#include <linux/poll.h>
20#include <linux/list.h> 20#include <linux/list.h>
21#include <linux/mutex.h> 21#include <linux/mutex.h>
22#include <linux/limits.h>
22#include <asm/uaccess.h> 23#include <asm/uaccess.h>
23 24
24#include "sysfs.h" 25#include "sysfs.h"
25 26
27/* used in crash dumps to help with debugging */
28static char last_sysfs_file[PATH_MAX];
29void sysfs_printk_last_file(void)
30{
31 printk(KERN_EMERG "last sysfs file: %s\n", last_sysfs_file);
32}
33
26/* 34/*
27 * There's one sysfs_buffer for each open file and one 35 * There's one sysfs_buffer for each open file and one
28 * sysfs_open_dirent for each sysfs_dirent with one or more open 36 * sysfs_open_dirent for each sysfs_dirent with one or more open
@@ -328,6 +336,11 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
328 struct sysfs_buffer *buffer; 336 struct sysfs_buffer *buffer;
329 struct sysfs_ops *ops; 337 struct sysfs_ops *ops;
330 int error = -EACCES; 338 int error = -EACCES;
339 char *p;
340
341 p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file));
342 if (p)
343 memmove(last_sysfs_file, p, strlen(p) + 1);
331 344
332 /* need attr_sd for attr and ops, its parent for kobj */ 345 /* need attr_sd for attr and ops, its parent for kobj */
333 if (!sysfs_get_active_two(attr_sd)) 346 if (!sysfs_get_active_two(attr_sd))
@@ -440,7 +453,23 @@ static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
440 return POLLERR|POLLPRI; 453 return POLLERR|POLLPRI;
441} 454}
442 455
443void sysfs_notify(struct kobject *k, char *dir, char *attr) 456void sysfs_notify_dirent(struct sysfs_dirent *sd)
457{
458 struct sysfs_open_dirent *od;
459
460 spin_lock(&sysfs_open_dirent_lock);
461
462 od = sd->s_attr.open;
463 if (od) {
464 atomic_inc(&od->event);
465 wake_up_interruptible(&od->poll);
466 }
467
468 spin_unlock(&sysfs_open_dirent_lock);
469}
470EXPORT_SYMBOL_GPL(sysfs_notify_dirent);
471
472void sysfs_notify(struct kobject *k, const char *dir, const char *attr)
444{ 473{
445 struct sysfs_dirent *sd = k->sd; 474 struct sysfs_dirent *sd = k->sd;
446 475
@@ -450,19 +479,8 @@ void sysfs_notify(struct kobject *k, char *dir, char *attr)
450 sd = sysfs_find_dirent(sd, dir); 479 sd = sysfs_find_dirent(sd, dir);
451 if (sd && attr) 480 if (sd && attr)
452 sd = sysfs_find_dirent(sd, attr); 481 sd = sysfs_find_dirent(sd, attr);
453 if (sd) { 482 if (sd)
454 struct sysfs_open_dirent *od; 483 sysfs_notify_dirent(sd);
455
456 spin_lock(&sysfs_open_dirent_lock);
457
458 od = sd->s_attr.open;
459 if (od) {
460 atomic_inc(&od->event);
461 wake_up_interruptible(&od->poll);
462 }
463
464 spin_unlock(&sysfs_open_dirent_lock);
465 }
466 484
467 mutex_unlock(&sysfs_mutex); 485 mutex_unlock(&sysfs_mutex);
468} 486}
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 14f0023984d7..ab343e371d64 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -16,6 +16,7 @@
16#include <linux/mount.h> 16#include <linux/mount.h>
17#include <linux/pagemap.h> 17#include <linux/pagemap.h>
18#include <linux/init.h> 18#include <linux/init.h>
19#include <linux/module.h>
19 20
20#include "sysfs.h" 21#include "sysfs.h"
21 22
@@ -115,3 +116,17 @@ out_err:
115 sysfs_dir_cachep = NULL; 116 sysfs_dir_cachep = NULL;
116 goto out; 117 goto out;
117} 118}
119
120#undef sysfs_get
121struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd)
122{
123 return __sysfs_get(sd);
124}
125EXPORT_SYMBOL_GPL(sysfs_get);
126
127#undef sysfs_put
128void sysfs_put(struct sysfs_dirent *sd)
129{
130 __sysfs_put(sd);
131}
132EXPORT_SYMBOL_GPL(sysfs_put);
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index a5db496f71c7..93c6d6b27c4d 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -124,7 +124,7 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name,
124 struct sysfs_dirent **p_sd); 124 struct sysfs_dirent **p_sd);
125void sysfs_remove_subdir(struct sysfs_dirent *sd); 125void sysfs_remove_subdir(struct sysfs_dirent *sd);
126 126
127static inline struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd) 127static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd)
128{ 128{
129 if (sd) { 129 if (sd) {
130 WARN_ON(!atomic_read(&sd->s_count)); 130 WARN_ON(!atomic_read(&sd->s_count));
@@ -132,12 +132,14 @@ static inline struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd)
132 } 132 }
133 return sd; 133 return sd;
134} 134}
135#define sysfs_get(sd) __sysfs_get(sd)
135 136
136static inline void sysfs_put(struct sysfs_dirent *sd) 137static inline void __sysfs_put(struct sysfs_dirent *sd)
137{ 138{
138 if (sd && atomic_dec_and_test(&sd->s_count)) 139 if (sd && atomic_dec_and_test(&sd->s_count))
139 release_sysfs_dirent(sd); 140 release_sysfs_dirent(sd);
140} 141}
142#define sysfs_put(sd) __sysfs_put(sd)
141 143
142/* 144/*
143 * inode.c 145 * inode.c
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 73db464cd08b..1a4973e10664 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -414,19 +414,21 @@ static int do_budget_space(struct ubifs_info *c)
414 * @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt - 414 * @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt -
415 * @c->lst.taken_empty_lebs 415 * @c->lst.taken_empty_lebs
416 * 416 *
417 * @empty_lebs are available because they are empty. @freeable_cnt are 417 * @c->lst.empty_lebs are available because they are empty.
418 * available because they contain only free and dirty space and the 418 * @c->freeable_cnt are available because they contain only free and
419 * index allocation always occurs after wbufs are synch'ed. 419 * dirty space, @c->idx_gc_cnt are available because they are index
420 * @idx_gc_cnt are available because they are index LEBs that have been 420 * LEBs that have been garbage collected and are awaiting the commit
421 * garbage collected (including trivial GC) and are awaiting the commit 421 * before they can be used. And the in-the-gaps method will grab these
422 * before they can be unmapped - note that the in-the-gaps method will 422 * if it needs them. @c->lst.taken_empty_lebs are empty LEBs that have
423 * grab these if it needs them. @taken_empty_lebs are empty_lebs that 423 * already been allocated for some purpose.
424 * have already been allocated for some purpose (also includes those
425 * LEBs on the @idx_gc list).
426 * 424 *
427 * Note, @taken_empty_lebs may temporarily be higher by one because of 425 * Note, @c->idx_gc_cnt is included to both @c->lst.empty_lebs (because
428 * the way we serialize LEB allocations and budgeting. See a comment in 426 * these LEBs are empty) and to @c->lst.taken_empty_lebs (because they
429 * 'ubifs_find_free_space()'. 427 * are taken until after the commit).
428 *
429 * Note, @c->lst.taken_empty_lebs may temporarily be higher by one
430 * because of the way we serialize LEB allocations and budgeting. See a
431 * comment in 'ubifs_find_free_space()'.
430 */ 432 */
431 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - 433 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
432 c->lst.taken_empty_lebs; 434 c->lst.taken_empty_lebs;
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
index 5bb51dac3c16..a0ada596b17c 100644
--- a/fs/ubifs/compress.c
+++ b/fs/ubifs/compress.c
@@ -91,8 +91,6 @@ struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
91 * 91 *
92 * Note, if the input buffer was not compressed, it is copied to the output 92 * Note, if the input buffer was not compressed, it is copied to the output
93 * buffer and %UBIFS_COMPR_NONE is returned in @compr_type. 93 * buffer and %UBIFS_COMPR_NONE is returned in @compr_type.
94 *
95 * This functions returns %0 on success or a negative error code on failure.
96 */ 94 */
97void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, 95void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
98 int *compr_type) 96 int *compr_type)
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index d7f7645779f2..7186400750e7 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -222,30 +222,38 @@ void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode)
222{ 222{
223 const struct ubifs_inode *ui = ubifs_inode(inode); 223 const struct ubifs_inode *ui = ubifs_inode(inode);
224 224
225 printk(KERN_DEBUG "inode %lu\n", inode->i_ino); 225 printk(KERN_DEBUG "Dump in-memory inode:");
226 printk(KERN_DEBUG "size %llu\n", 226 printk(KERN_DEBUG "\tinode %lu\n", inode->i_ino);
227 printk(KERN_DEBUG "\tsize %llu\n",
227 (unsigned long long)i_size_read(inode)); 228 (unsigned long long)i_size_read(inode));
228 printk(KERN_DEBUG "nlink %u\n", inode->i_nlink); 229 printk(KERN_DEBUG "\tnlink %u\n", inode->i_nlink);
229 printk(KERN_DEBUG "uid %u\n", (unsigned int)inode->i_uid); 230 printk(KERN_DEBUG "\tuid %u\n", (unsigned int)inode->i_uid);
230 printk(KERN_DEBUG "gid %u\n", (unsigned int)inode->i_gid); 231 printk(KERN_DEBUG "\tgid %u\n", (unsigned int)inode->i_gid);
231 printk(KERN_DEBUG "atime %u.%u\n", 232 printk(KERN_DEBUG "\tatime %u.%u\n",
232 (unsigned int)inode->i_atime.tv_sec, 233 (unsigned int)inode->i_atime.tv_sec,
233 (unsigned int)inode->i_atime.tv_nsec); 234 (unsigned int)inode->i_atime.tv_nsec);
234 printk(KERN_DEBUG "mtime %u.%u\n", 235 printk(KERN_DEBUG "\tmtime %u.%u\n",
235 (unsigned int)inode->i_mtime.tv_sec, 236 (unsigned int)inode->i_mtime.tv_sec,
236 (unsigned int)inode->i_mtime.tv_nsec); 237 (unsigned int)inode->i_mtime.tv_nsec);
237 printk(KERN_DEBUG "ctime %u.%u\n", 238 printk(KERN_DEBUG "\tctime %u.%u\n",
238 (unsigned int)inode->i_ctime.tv_sec, 239 (unsigned int)inode->i_ctime.tv_sec,
239 (unsigned int)inode->i_ctime.tv_nsec); 240 (unsigned int)inode->i_ctime.tv_nsec);
240 printk(KERN_DEBUG "creat_sqnum %llu\n", ui->creat_sqnum); 241 printk(KERN_DEBUG "\tcreat_sqnum %llu\n", ui->creat_sqnum);
241 printk(KERN_DEBUG "xattr_size %u\n", ui->xattr_size); 242 printk(KERN_DEBUG "\txattr_size %u\n", ui->xattr_size);
242 printk(KERN_DEBUG "xattr_cnt %u\n", ui->xattr_cnt); 243 printk(KERN_DEBUG "\txattr_cnt %u\n", ui->xattr_cnt);
243 printk(KERN_DEBUG "xattr_names %u\n", ui->xattr_names); 244 printk(KERN_DEBUG "\txattr_names %u\n", ui->xattr_names);
244 printk(KERN_DEBUG "dirty %u\n", ui->dirty); 245 printk(KERN_DEBUG "\tdirty %u\n", ui->dirty);
245 printk(KERN_DEBUG "xattr %u\n", ui->xattr); 246 printk(KERN_DEBUG "\txattr %u\n", ui->xattr);
246 printk(KERN_DEBUG "flags %d\n", ui->flags); 247 printk(KERN_DEBUG "\tbulk_read %u\n", ui->xattr);
247 printk(KERN_DEBUG "compr_type %d\n", ui->compr_type); 248 printk(KERN_DEBUG "\tsynced_i_size %llu\n",
248 printk(KERN_DEBUG "data_len %d\n", ui->data_len); 249 (unsigned long long)ui->synced_i_size);
250 printk(KERN_DEBUG "\tui_size %llu\n",
251 (unsigned long long)ui->ui_size);
252 printk(KERN_DEBUG "\tflags %d\n", ui->flags);
253 printk(KERN_DEBUG "\tcompr_type %d\n", ui->compr_type);
254 printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read);
255 printk(KERN_DEBUG "\tread_in_a_row %lu\n", ui->read_in_a_row);
256 printk(KERN_DEBUG "\tdata_len %d\n", ui->data_len);
249} 257}
250 258
251void dbg_dump_node(const struct ubifs_info *c, const void *node) 259void dbg_dump_node(const struct ubifs_info *c, const void *node)
@@ -647,6 +655,43 @@ void dbg_dump_lprops(struct ubifs_info *c)
647 } 655 }
648} 656}
649 657
658void dbg_dump_lpt_info(struct ubifs_info *c)
659{
660 int i;
661
662 spin_lock(&dbg_lock);
663 printk(KERN_DEBUG "\tlpt_sz: %lld\n", c->lpt_sz);
664 printk(KERN_DEBUG "\tpnode_sz: %d\n", c->pnode_sz);
665 printk(KERN_DEBUG "\tnnode_sz: %d\n", c->nnode_sz);
666 printk(KERN_DEBUG "\tltab_sz: %d\n", c->ltab_sz);
667 printk(KERN_DEBUG "\tlsave_sz: %d\n", c->lsave_sz);
668 printk(KERN_DEBUG "\tbig_lpt: %d\n", c->big_lpt);
669 printk(KERN_DEBUG "\tlpt_hght: %d\n", c->lpt_hght);
670 printk(KERN_DEBUG "\tpnode_cnt: %d\n", c->pnode_cnt);
671 printk(KERN_DEBUG "\tnnode_cnt: %d\n", c->nnode_cnt);
672 printk(KERN_DEBUG "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt);
673 printk(KERN_DEBUG "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt);
674 printk(KERN_DEBUG "\tlsave_cnt: %d\n", c->lsave_cnt);
675 printk(KERN_DEBUG "\tspace_bits: %d\n", c->space_bits);
676 printk(KERN_DEBUG "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits);
677 printk(KERN_DEBUG "\tlpt_offs_bits: %d\n", c->lpt_offs_bits);
678 printk(KERN_DEBUG "\tlpt_spc_bits: %d\n", c->lpt_spc_bits);
679 printk(KERN_DEBUG "\tpcnt_bits: %d\n", c->pcnt_bits);
680 printk(KERN_DEBUG "\tlnum_bits: %d\n", c->lnum_bits);
681 printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs);
682 printk(KERN_DEBUG "\tLPT head is at %d:%d\n",
683 c->nhead_lnum, c->nhead_offs);
684 printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", c->ltab_lnum, c->ltab_offs);
685 if (c->big_lpt)
686 printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n",
687 c->lsave_lnum, c->lsave_offs);
688 for (i = 0; i < c->lpt_lebs; i++)
689 printk(KERN_DEBUG "\tLPT LEB %d free %d dirty %d tgc %d "
690 "cmt %d\n", i + c->lpt_first, c->ltab[i].free,
691 c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt);
692 spin_unlock(&dbg_lock);
693}
694
650void dbg_dump_leb(const struct ubifs_info *c, int lnum) 695void dbg_dump_leb(const struct ubifs_info *c, int lnum)
651{ 696{
652 struct ubifs_scan_leb *sleb; 697 struct ubifs_scan_leb *sleb;
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 50315fc57185..33d6b95071e4 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -224,6 +224,7 @@ void dbg_dump_lstats(const struct ubifs_lp_stats *lst);
224void dbg_dump_budg(struct ubifs_info *c); 224void dbg_dump_budg(struct ubifs_info *c);
225void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); 225void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp);
226void dbg_dump_lprops(struct ubifs_info *c); 226void dbg_dump_lprops(struct ubifs_info *c);
227void dbg_dump_lpt_info(struct ubifs_info *c);
227void dbg_dump_leb(const struct ubifs_info *c, int lnum); 228void dbg_dump_leb(const struct ubifs_info *c, int lnum);
228void dbg_dump_znode(const struct ubifs_info *c, 229void dbg_dump_znode(const struct ubifs_info *c,
229 const struct ubifs_znode *znode); 230 const struct ubifs_znode *znode);
@@ -249,6 +250,8 @@ int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot);
249int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); 250int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot);
250int dbg_check_cats(struct ubifs_info *c); 251int dbg_check_cats(struct ubifs_info *c);
251int dbg_check_ltab(struct ubifs_info *c); 252int dbg_check_ltab(struct ubifs_info *c);
253int dbg_chk_lpt_free_spc(struct ubifs_info *c);
254int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len);
252int dbg_check_synced_i_size(struct inode *inode); 255int dbg_check_synced_i_size(struct inode *inode);
253int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); 256int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir);
254int dbg_check_tnc(struct ubifs_info *c, int extra); 257int dbg_check_tnc(struct ubifs_info *c, int extra);
@@ -367,6 +370,7 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
367#define dbg_dump_budg(c) ({}) 370#define dbg_dump_budg(c) ({})
368#define dbg_dump_lprop(c, lp) ({}) 371#define dbg_dump_lprop(c, lp) ({})
369#define dbg_dump_lprops(c) ({}) 372#define dbg_dump_lprops(c) ({})
373#define dbg_dump_lpt_info(c) ({})
370#define dbg_dump_leb(c, lnum) ({}) 374#define dbg_dump_leb(c, lnum) ({})
371#define dbg_dump_znode(c, znode) ({}) 375#define dbg_dump_znode(c, znode) ({})
372#define dbg_dump_heap(c, heap, cat) ({}) 376#define dbg_dump_heap(c, heap, cat) ({})
@@ -379,6 +383,8 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
379#define dbg_check_old_index(c, zroot) 0 383#define dbg_check_old_index(c, zroot) 0
380#define dbg_check_cats(c) 0 384#define dbg_check_cats(c) 0
381#define dbg_check_ltab(c) 0 385#define dbg_check_ltab(c) 0
386#define dbg_chk_lpt_free_spc(c) 0
387#define dbg_chk_lpt_sz(c, action, len) 0
382#define dbg_check_synced_i_size(inode) 0 388#define dbg_check_synced_i_size(inode) 0
383#define dbg_check_dir_size(c, dir) 0 389#define dbg_check_dir_size(c, dir) 0
384#define dbg_check_tnc(c, x) 0 390#define dbg_check_tnc(c, x) 0
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 3d698e2022b1..51cf511d44d9 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -147,6 +147,12 @@ static int do_readpage(struct page *page)
147 err = ret; 147 err = ret;
148 if (err != -ENOENT) 148 if (err != -ENOENT)
149 break; 149 break;
150 } else if (block + 1 == beyond) {
151 int dlen = le32_to_cpu(dn->size);
152 int ilen = i_size & (UBIFS_BLOCK_SIZE - 1);
153
154 if (ilen && ilen < dlen)
155 memset(addr + ilen, 0, dlen - ilen);
150 } 156 }
151 } 157 }
152 if (++i >= UBIFS_BLOCKS_PER_PAGE) 158 if (++i >= UBIFS_BLOCKS_PER_PAGE)
@@ -577,8 +583,262 @@ out:
577 return copied; 583 return copied;
578} 584}
579 585
586/**
587 * populate_page - copy data nodes into a page for bulk-read.
588 * @c: UBIFS file-system description object
589 * @page: page
590 * @bu: bulk-read information
591 * @n: next zbranch slot
592 *
593 * This function returns %0 on success and a negative error code on failure.
594 */
595static int populate_page(struct ubifs_info *c, struct page *page,
596 struct bu_info *bu, int *n)
597{
598 int i = 0, nn = *n, offs = bu->zbranch[0].offs, hole = 0, read = 0;
599 struct inode *inode = page->mapping->host;
600 loff_t i_size = i_size_read(inode);
601 unsigned int page_block;
602 void *addr, *zaddr;
603 pgoff_t end_index;
604
605 dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
606 inode->i_ino, page->index, i_size, page->flags);
607
608 addr = zaddr = kmap(page);
609
610 end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
611 if (!i_size || page->index > end_index) {
612 hole = 1;
613 memset(addr, 0, PAGE_CACHE_SIZE);
614 goto out_hole;
615 }
616
617 page_block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
618 while (1) {
619 int err, len, out_len, dlen;
620
621 if (nn >= bu->cnt) {
622 hole = 1;
623 memset(addr, 0, UBIFS_BLOCK_SIZE);
624 } else if (key_block(c, &bu->zbranch[nn].key) == page_block) {
625 struct ubifs_data_node *dn;
626
627 dn = bu->buf + (bu->zbranch[nn].offs - offs);
628
629 ubifs_assert(dn->ch.sqnum >
630 ubifs_inode(inode)->creat_sqnum);
631
632 len = le32_to_cpu(dn->size);
633 if (len <= 0 || len > UBIFS_BLOCK_SIZE)
634 goto out_err;
635
636 dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
637 out_len = UBIFS_BLOCK_SIZE;
638 err = ubifs_decompress(&dn->data, dlen, addr, &out_len,
639 le16_to_cpu(dn->compr_type));
640 if (err || len != out_len)
641 goto out_err;
642
643 if (len < UBIFS_BLOCK_SIZE)
644 memset(addr + len, 0, UBIFS_BLOCK_SIZE - len);
645
646 nn += 1;
647 read = (i << UBIFS_BLOCK_SHIFT) + len;
648 } else if (key_block(c, &bu->zbranch[nn].key) < page_block) {
649 nn += 1;
650 continue;
651 } else {
652 hole = 1;
653 memset(addr, 0, UBIFS_BLOCK_SIZE);
654 }
655 if (++i >= UBIFS_BLOCKS_PER_PAGE)
656 break;
657 addr += UBIFS_BLOCK_SIZE;
658 page_block += 1;
659 }
660
661 if (end_index == page->index) {
662 int len = i_size & (PAGE_CACHE_SIZE - 1);
663
664 if (len && len < read)
665 memset(zaddr + len, 0, read - len);
666 }
667
668out_hole:
669 if (hole) {
670 SetPageChecked(page);
671 dbg_gen("hole");
672 }
673
674 SetPageUptodate(page);
675 ClearPageError(page);
676 flush_dcache_page(page);
677 kunmap(page);
678 *n = nn;
679 return 0;
680
681out_err:
682 ClearPageUptodate(page);
683 SetPageError(page);
684 flush_dcache_page(page);
685 kunmap(page);
686 ubifs_err("bad data node (block %u, inode %lu)",
687 page_block, inode->i_ino);
688 return -EINVAL;
689}
690
691/**
692 * ubifs_do_bulk_read - do bulk-read.
693 * @c: UBIFS file-system description object
694 * @page1: first page
695 *
696 * This function returns %1 if the bulk-read is done, otherwise %0 is returned.
697 */
698static int ubifs_do_bulk_read(struct ubifs_info *c, struct page *page1)
699{
700 pgoff_t offset = page1->index, end_index;
701 struct address_space *mapping = page1->mapping;
702 struct inode *inode = mapping->host;
703 struct ubifs_inode *ui = ubifs_inode(inode);
704 struct bu_info *bu;
705 int err, page_idx, page_cnt, ret = 0, n = 0;
706 loff_t isize;
707
708 bu = kmalloc(sizeof(struct bu_info), GFP_NOFS);
709 if (!bu)
710 return 0;
711
712 bu->buf_len = c->bulk_read_buf_size;
713 bu->buf = kmalloc(bu->buf_len, GFP_NOFS);
714 if (!bu->buf)
715 goto out_free;
716
717 data_key_init(c, &bu->key, inode->i_ino,
718 offset << UBIFS_BLOCKS_PER_PAGE_SHIFT);
719
720 err = ubifs_tnc_get_bu_keys(c, bu);
721 if (err)
722 goto out_warn;
723
724 if (bu->eof) {
725 /* Turn off bulk-read at the end of the file */
726 ui->read_in_a_row = 1;
727 ui->bulk_read = 0;
728 }
729
730 page_cnt = bu->blk_cnt >> UBIFS_BLOCKS_PER_PAGE_SHIFT;
731 if (!page_cnt) {
732 /*
733 * This happens when there are multiple blocks per page and the
734 * blocks for the first page we are looking for, are not
735 * together. If all the pages were like this, bulk-read would
736 * reduce performance, so we turn it off for a while.
737 */
738 ui->read_in_a_row = 0;
739 ui->bulk_read = 0;
740 goto out_free;
741 }
742
743 if (bu->cnt) {
744 err = ubifs_tnc_bulk_read(c, bu);
745 if (err)
746 goto out_warn;
747 }
748
749 err = populate_page(c, page1, bu, &n);
750 if (err)
751 goto out_warn;
752
753 unlock_page(page1);
754 ret = 1;
755
756 isize = i_size_read(inode);
757 if (isize == 0)
758 goto out_free;
759 end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
760
761 for (page_idx = 1; page_idx < page_cnt; page_idx++) {
762 pgoff_t page_offset = offset + page_idx;
763 struct page *page;
764
765 if (page_offset > end_index)
766 break;
767 page = find_or_create_page(mapping, page_offset,
768 GFP_NOFS | __GFP_COLD);
769 if (!page)
770 break;
771 if (!PageUptodate(page))
772 err = populate_page(c, page, bu, &n);
773 unlock_page(page);
774 page_cache_release(page);
775 if (err)
776 break;
777 }
778
779 ui->last_page_read = offset + page_idx - 1;
780
781out_free:
782 kfree(bu->buf);
783 kfree(bu);
784 return ret;
785
786out_warn:
787 ubifs_warn("ignoring error %d and skipping bulk-read", err);
788 goto out_free;
789}
790
791/**
792 * ubifs_bulk_read - determine whether to bulk-read and, if so, do it.
793 * @page: page from which to start bulk-read.
794 *
795 * Some flash media are capable of reading sequentially at faster rates. UBIFS
796 * bulk-read facility is designed to take advantage of that, by reading in one
797 * go consecutive data nodes that are also located consecutively in the same
798 * LEB. This function returns %1 if a bulk-read is done and %0 otherwise.
799 */
800static int ubifs_bulk_read(struct page *page)
801{
802 struct inode *inode = page->mapping->host;
803 struct ubifs_info *c = inode->i_sb->s_fs_info;
804 struct ubifs_inode *ui = ubifs_inode(inode);
805 pgoff_t index = page->index, last_page_read = ui->last_page_read;
806 int ret = 0;
807
808 ui->last_page_read = index;
809
810 if (!c->bulk_read)
811 return 0;
812 /*
813 * Bulk-read is protected by ui_mutex, but it is an optimization, so
814 * don't bother if we cannot lock the mutex.
815 */
816 if (!mutex_trylock(&ui->ui_mutex))
817 return 0;
818 if (index != last_page_read + 1) {
819 /* Turn off bulk-read if we stop reading sequentially */
820 ui->read_in_a_row = 1;
821 if (ui->bulk_read)
822 ui->bulk_read = 0;
823 goto out_unlock;
824 }
825 if (!ui->bulk_read) {
826 ui->read_in_a_row += 1;
827 if (ui->read_in_a_row < 3)
828 goto out_unlock;
829 /* Three reads in a row, so switch on bulk-read */
830 ui->bulk_read = 1;
831 }
832 ret = ubifs_do_bulk_read(c, page);
833out_unlock:
834 mutex_unlock(&ui->ui_mutex);
835 return ret;
836}
837
580static int ubifs_readpage(struct file *file, struct page *page) 838static int ubifs_readpage(struct file *file, struct page *page)
581{ 839{
840 if (ubifs_bulk_read(page))
841 return 0;
582 do_readpage(page); 842 do_readpage(page);
583 unlock_page(page); 843 unlock_page(page);
584 return 0; 844 return 0;
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 47814cde2407..717d79c97c5e 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -901,11 +901,11 @@ static int get_idx_gc_leb(struct ubifs_info *c)
901 * it is needed now for this commit. 901 * it is needed now for this commit.
902 */ 902 */
903 lp = ubifs_lpt_lookup_dirty(c, lnum); 903 lp = ubifs_lpt_lookup_dirty(c, lnum);
904 if (unlikely(IS_ERR(lp))) 904 if (IS_ERR(lp))
905 return PTR_ERR(lp); 905 return PTR_ERR(lp);
906 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, 906 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
907 lp->flags | LPROPS_INDEX, -1); 907 lp->flags | LPROPS_INDEX, -1);
908 if (unlikely(IS_ERR(lp))) 908 if (IS_ERR(lp))
909 return PTR_ERR(lp); 909 return PTR_ERR(lp);
910 dbg_find("LEB %d, dirty %d and free %d flags %#x", 910 dbg_find("LEB %d, dirty %d and free %d flags %#x",
911 lp->lnum, lp->dirty, lp->free, lp->flags); 911 lp->lnum, lp->dirty, lp->free, lp->flags);
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 02aba36fe3d4..0bef6501d58a 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -96,6 +96,48 @@ static int switch_gc_head(struct ubifs_info *c)
96} 96}
97 97
98/** 98/**
99 * joinup - bring data nodes for an inode together.
100 * @c: UBIFS file-system description object
101 * @sleb: describes scanned LEB
102 * @inum: inode number
103 * @blk: block number
104 * @data: list to which to add data nodes
105 *
106 * This function looks at the first few nodes in the scanned LEB @sleb and adds
107 * them to @data if they are data nodes from @inum and have a larger block
108 * number than @blk. This function returns %0 on success and a negative error
109 * code on failure.
110 */
111static int joinup(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ino_t inum,
112 unsigned int blk, struct list_head *data)
113{
114 int err, cnt = 6, lnum = sleb->lnum, offs;
115 struct ubifs_scan_node *snod, *tmp;
116 union ubifs_key *key;
117
118 list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
119 key = &snod->key;
120 if (key_inum(c, key) == inum &&
121 key_type(c, key) == UBIFS_DATA_KEY &&
122 key_block(c, key) > blk) {
123 offs = snod->offs;
124 err = ubifs_tnc_has_node(c, key, 0, lnum, offs, 0);
125 if (err < 0)
126 return err;
127 list_del(&snod->list);
128 if (err) {
129 list_add_tail(&snod->list, data);
130 blk = key_block(c, key);
131 } else
132 kfree(snod);
133 cnt = 6;
134 } else if (--cnt == 0)
135 break;
136 }
137 return 0;
138}
139
140/**
99 * move_nodes - move nodes. 141 * move_nodes - move nodes.
100 * @c: UBIFS file-system description object 142 * @c: UBIFS file-system description object
101 * @sleb: describes nodes to move 143 * @sleb: describes nodes to move
@@ -116,16 +158,21 @@ static int switch_gc_head(struct ubifs_info *c)
116static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) 158static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
117{ 159{
118 struct ubifs_scan_node *snod, *tmp; 160 struct ubifs_scan_node *snod, *tmp;
119 struct list_head large, medium, small; 161 struct list_head data, large, medium, small;
120 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; 162 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
121 int avail, err, min = INT_MAX; 163 int avail, err, min = INT_MAX;
164 unsigned int blk = 0;
165 ino_t inum = 0;
122 166
167 INIT_LIST_HEAD(&data);
123 INIT_LIST_HEAD(&large); 168 INIT_LIST_HEAD(&large);
124 INIT_LIST_HEAD(&medium); 169 INIT_LIST_HEAD(&medium);
125 INIT_LIST_HEAD(&small); 170 INIT_LIST_HEAD(&small);
126 171
127 list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { 172 while (!list_empty(&sleb->nodes)) {
128 struct list_head *lst; 173 struct list_head *lst = sleb->nodes.next;
174
175 snod = list_entry(lst, struct ubifs_scan_node, list);
129 176
130 ubifs_assert(snod->type != UBIFS_IDX_NODE); 177 ubifs_assert(snod->type != UBIFS_IDX_NODE);
131 ubifs_assert(snod->type != UBIFS_REF_NODE); 178 ubifs_assert(snod->type != UBIFS_REF_NODE);
@@ -136,7 +183,6 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
136 if (err < 0) 183 if (err < 0)
137 goto out; 184 goto out;
138 185
139 lst = &snod->list;
140 list_del(lst); 186 list_del(lst);
141 if (!err) { 187 if (!err) {
142 /* The node is obsolete, remove it from the list */ 188 /* The node is obsolete, remove it from the list */
@@ -145,15 +191,30 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
145 } 191 }
146 192
147 /* 193 /*
148 * Sort the list of nodes so that large nodes go first, and 194 * Sort the list of nodes so that data nodes go first, large
149 * small nodes go last. 195 * nodes go second, and small nodes go last.
150 */ 196 */
151 if (snod->len > MEDIUM_NODE_WM) 197 if (key_type(c, &snod->key) == UBIFS_DATA_KEY) {
152 list_add(lst, &large); 198 if (inum != key_inum(c, &snod->key)) {
199 if (inum) {
200 /*
201 * Try to move data nodes from the same
202 * inode together.
203 */
204 err = joinup(c, sleb, inum, blk, &data);
205 if (err)
206 goto out;
207 }
208 inum = key_inum(c, &snod->key);
209 blk = key_block(c, &snod->key);
210 }
211 list_add_tail(lst, &data);
212 } else if (snod->len > MEDIUM_NODE_WM)
213 list_add_tail(lst, &large);
153 else if (snod->len > SMALL_NODE_WM) 214 else if (snod->len > SMALL_NODE_WM)
154 list_add(lst, &medium); 215 list_add_tail(lst, &medium);
155 else 216 else
156 list_add(lst, &small); 217 list_add_tail(lst, &small);
157 218
158 /* And find the smallest node */ 219 /* And find the smallest node */
159 if (snod->len < min) 220 if (snod->len < min)
@@ -164,6 +225,7 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
164 * Join the tree lists so that we'd have one roughly sorted list 225 * Join the tree lists so that we'd have one roughly sorted list
165 * ('large' will be the head of the joined list). 226 * ('large' will be the head of the joined list).
166 */ 227 */
228 list_splice(&data, &large);
167 list_splice(&medium, large.prev); 229 list_splice(&medium, large.prev);
168 list_splice(&small, large.prev); 230 list_splice(&small, large.prev);
169 231
@@ -653,7 +715,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
653 */ 715 */
654 while (1) { 716 while (1) {
655 lp = ubifs_fast_find_freeable(c); 717 lp = ubifs_fast_find_freeable(c);
656 if (unlikely(IS_ERR(lp))) { 718 if (IS_ERR(lp)) {
657 err = PTR_ERR(lp); 719 err = PTR_ERR(lp);
658 goto out; 720 goto out;
659 } 721 }
@@ -665,7 +727,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
665 if (err) 727 if (err)
666 goto out; 728 goto out;
667 lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0); 729 lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0);
668 if (unlikely(IS_ERR(lp))) { 730 if (IS_ERR(lp)) {
669 err = PTR_ERR(lp); 731 err = PTR_ERR(lp);
670 goto out; 732 goto out;
671 } 733 }
@@ -680,7 +742,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
680 /* Record index freeable LEBs for unmapping after commit */ 742 /* Record index freeable LEBs for unmapping after commit */
681 while (1) { 743 while (1) {
682 lp = ubifs_fast_find_frdi_idx(c); 744 lp = ubifs_fast_find_frdi_idx(c);
683 if (unlikely(IS_ERR(lp))) { 745 if (IS_ERR(lp)) {
684 err = PTR_ERR(lp); 746 err = PTR_ERR(lp);
685 goto out; 747 goto out;
686 } 748 }
@@ -696,7 +758,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
696 /* Don't release the LEB until after the next commit */ 758 /* Don't release the LEB until after the next commit */
697 flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX; 759 flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX;
698 lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1); 760 lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1);
699 if (unlikely(IS_ERR(lp))) { 761 if (IS_ERR(lp)) {
700 err = PTR_ERR(lp); 762 err = PTR_ERR(lp);
701 kfree(idx_gc); 763 kfree(idx_gc);
702 goto out; 764 goto out;
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 054363f2b207..01682713af69 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -62,6 +62,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
62{ 62{
63 if (!c->ro_media) { 63 if (!c->ro_media) {
64 c->ro_media = 1; 64 c->ro_media = 1;
65 c->no_chk_data_crc = 0;
65 ubifs_warn("switched to read-only mode, error %d", err); 66 ubifs_warn("switched to read-only mode, error %d", err);
66 dbg_dump_stack(); 67 dbg_dump_stack();
67 } 68 }
@@ -74,6 +75,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
74 * @lnum: logical eraseblock number 75 * @lnum: logical eraseblock number
75 * @offs: offset within the logical eraseblock 76 * @offs: offset within the logical eraseblock
76 * @quiet: print no messages 77 * @quiet: print no messages
78 * @chk_crc: indicates whether to always check the CRC
77 * 79 *
78 * This function checks node magic number and CRC checksum. This function also 80 * This function checks node magic number and CRC checksum. This function also
79 * validates node length to prevent UBIFS from becoming crazy when an attacker 81 * validates node length to prevent UBIFS from becoming crazy when an attacker
@@ -85,7 +87,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
85 * or magic. 87 * or magic.
86 */ 88 */
87int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, 89int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
88 int offs, int quiet) 90 int offs, int quiet, int chk_crc)
89{ 91{
90 int err = -EINVAL, type, node_len; 92 int err = -EINVAL, type, node_len;
91 uint32_t crc, node_crc, magic; 93 uint32_t crc, node_crc, magic;
@@ -121,6 +123,10 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
121 node_len > c->ranges[type].max_len) 123 node_len > c->ranges[type].max_len)
122 goto out_len; 124 goto out_len;
123 125
126 if (!chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc)
127 if (c->no_chk_data_crc)
128 return 0;
129
124 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); 130 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
125 node_crc = le32_to_cpu(ch->crc); 131 node_crc = le32_to_cpu(ch->crc);
126 if (crc != node_crc) { 132 if (crc != node_crc) {
@@ -722,7 +728,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
722 goto out; 728 goto out;
723 } 729 }
724 730
725 err = ubifs_check_node(c, buf, lnum, offs, 0); 731 err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
726 if (err) { 732 if (err) {
727 ubifs_err("expected node type %d", type); 733 ubifs_err("expected node type %d", type);
728 return err; 734 return err;
@@ -781,7 +787,7 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
781 goto out; 787 goto out;
782 } 788 }
783 789
784 err = ubifs_check_node(c, buf, lnum, offs, 0); 790 err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
785 if (err) { 791 if (err) {
786 ubifs_err("expected node type %d", type); 792 ubifs_err("expected node type %d", type);
787 return err; 793 return err;
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
index 8f7476007549..9ee65086f627 100644
--- a/fs/ubifs/key.h
+++ b/fs/ubifs/key.h
@@ -484,7 +484,7 @@ static inline void key_copy(const struct ubifs_info *c,
484 * @key2: the second key to compare 484 * @key2: the second key to compare
485 * 485 *
486 * This function compares 2 keys and returns %-1 if @key1 is less than 486 * This function compares 2 keys and returns %-1 if @key1 is less than
487 * @key2, 0 if the keys are equivalent and %1 if @key1 is greater than @key2. 487 * @key2, %0 if the keys are equivalent and %1 if @key1 is greater than @key2.
488 */ 488 */
489static inline int keys_cmp(const struct ubifs_info *c, 489static inline int keys_cmp(const struct ubifs_info *c,
490 const union ubifs_key *key1, 490 const union ubifs_key *key1,
@@ -503,6 +503,26 @@ static inline int keys_cmp(const struct ubifs_info *c,
503} 503}
504 504
505/** 505/**
506 * keys_eq - determine if keys are equivalent.
507 * @c: UBIFS file-system description object
508 * @key1: the first key to compare
509 * @key2: the second key to compare
510 *
511 * This function compares 2 keys and returns %1 if @key1 is equal to @key2 and
512 * %0 if not.
513 */
514static inline int keys_eq(const struct ubifs_info *c,
515 const union ubifs_key *key1,
516 const union ubifs_key *key2)
517{
518 if (key1->u32[0] != key2->u32[0])
519 return 0;
520 if (key1->u32[1] != key2->u32[1])
521 return 0;
522 return 1;
523}
524
525/**
506 * is_hash_key - is a key vulnerable to hash collisions. 526 * is_hash_key - is a key vulnerable to hash collisions.
507 * @c: UBIFS file-system description object 527 * @c: UBIFS file-system description object
508 * @key: key 528 * @key: key
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 2ba93da71b65..f27176e9b70d 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -125,6 +125,7 @@ static void adjust_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap,
125 } 125 }
126 } 126 }
127 } 127 }
128
128 /* Not greater than parent, so compare to children */ 129 /* Not greater than parent, so compare to children */
129 while (1) { 130 while (1) {
130 /* Compare to left child */ 131 /* Compare to left child */
@@ -460,18 +461,6 @@ static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops)
460} 461}
461 462
462/** 463/**
463 * ubifs_get_lprops - get reference to LEB properties.
464 * @c: the UBIFS file-system description object
465 *
466 * This function locks lprops. Lprops have to be unlocked by
467 * 'ubifs_release_lprops()'.
468 */
469void ubifs_get_lprops(struct ubifs_info *c)
470{
471 mutex_lock(&c->lp_mutex);
472}
473
474/**
475 * calc_dark - calculate LEB dark space size. 464 * calc_dark - calculate LEB dark space size.
476 * @c: the UBIFS file-system description object 465 * @c: the UBIFS file-system description object
477 * @spc: amount of free and dirty space in the LEB 466 * @spc: amount of free and dirty space in the LEB
@@ -576,7 +565,6 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
576 ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7)); 565 ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7));
577 566
578 spin_lock(&c->space_lock); 567 spin_lock(&c->space_lock);
579
580 if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size) 568 if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size)
581 c->lst.taken_empty_lebs -= 1; 569 c->lst.taken_empty_lebs -= 1;
582 570
@@ -637,31 +625,12 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
637 c->lst.taken_empty_lebs += 1; 625 c->lst.taken_empty_lebs += 1;
638 626
639 change_category(c, lprops); 627 change_category(c, lprops);
640
641 c->idx_gc_cnt += idx_gc_cnt; 628 c->idx_gc_cnt += idx_gc_cnt;
642
643 spin_unlock(&c->space_lock); 629 spin_unlock(&c->space_lock);
644
645 return lprops; 630 return lprops;
646} 631}
647 632
648/** 633/**
649 * ubifs_release_lprops - release lprops lock.
650 * @c: the UBIFS file-system description object
651 *
652 * This function has to be called after each 'ubifs_get_lprops()' call to
653 * unlock lprops.
654 */
655void ubifs_release_lprops(struct ubifs_info *c)
656{
657 ubifs_assert(mutex_is_locked(&c->lp_mutex));
658 ubifs_assert(c->lst.empty_lebs >= 0 &&
659 c->lst.empty_lebs <= c->main_lebs);
660
661 mutex_unlock(&c->lp_mutex);
662}
663
664/**
665 * ubifs_get_lp_stats - get lprops statistics. 634 * ubifs_get_lp_stats - get lprops statistics.
666 * @c: UBIFS file-system description object 635 * @c: UBIFS file-system description object
667 * @st: return statistics 636 * @st: return statistics
@@ -1262,7 +1231,6 @@ static int scan_check_cb(struct ubifs_info *c,
1262 } 1231 }
1263 1232
1264 ubifs_scan_destroy(sleb); 1233 ubifs_scan_destroy(sleb);
1265
1266 return LPT_SCAN_CONTINUE; 1234 return LPT_SCAN_CONTINUE;
1267 1235
1268out_print: 1236out_print:
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 9ff2463177e5..db8bd0e518b2 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -109,7 +109,8 @@ static void do_calc_lpt_geom(struct ubifs_info *c)
109 c->lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; 109 c->lpt_sz = (long long)c->pnode_cnt * c->pnode_sz;
110 c->lpt_sz += (long long)c->nnode_cnt * c->nnode_sz; 110 c->lpt_sz += (long long)c->nnode_cnt * c->nnode_sz;
111 c->lpt_sz += c->ltab_sz; 111 c->lpt_sz += c->ltab_sz;
112 c->lpt_sz += c->lsave_sz; 112 if (c->big_lpt)
113 c->lpt_sz += c->lsave_sz;
113 114
114 /* Add wastage */ 115 /* Add wastage */
115 sz = c->lpt_sz; 116 sz = c->lpt_sz;
@@ -287,25 +288,56 @@ uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits)
287 const int k = 32 - nrbits; 288 const int k = 32 - nrbits;
288 uint8_t *p = *addr; 289 uint8_t *p = *addr;
289 int b = *pos; 290 int b = *pos;
290 uint32_t val; 291 uint32_t uninitialized_var(val);
292 const int bytes = (nrbits + b + 7) >> 3;
291 293
292 ubifs_assert(nrbits > 0); 294 ubifs_assert(nrbits > 0);
293 ubifs_assert(nrbits <= 32); 295 ubifs_assert(nrbits <= 32);
294 ubifs_assert(*pos >= 0); 296 ubifs_assert(*pos >= 0);
295 ubifs_assert(*pos < 8); 297 ubifs_assert(*pos < 8);
296 if (b) { 298 if (b) {
297 val = p[1] | ((uint32_t)p[2] << 8) | ((uint32_t)p[3] << 16) | 299 switch (bytes) {
298 ((uint32_t)p[4] << 24); 300 case 2:
301 val = p[1];
302 break;
303 case 3:
304 val = p[1] | ((uint32_t)p[2] << 8);
305 break;
306 case 4:
307 val = p[1] | ((uint32_t)p[2] << 8) |
308 ((uint32_t)p[3] << 16);
309 break;
310 case 5:
311 val = p[1] | ((uint32_t)p[2] << 8) |
312 ((uint32_t)p[3] << 16) |
313 ((uint32_t)p[4] << 24);
314 }
299 val <<= (8 - b); 315 val <<= (8 - b);
300 val |= *p >> b; 316 val |= *p >> b;
301 nrbits += b; 317 nrbits += b;
302 } else 318 } else {
303 val = p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) | 319 switch (bytes) {
304 ((uint32_t)p[3] << 24); 320 case 1:
321 val = p[0];
322 break;
323 case 2:
324 val = p[0] | ((uint32_t)p[1] << 8);
325 break;
326 case 3:
327 val = p[0] | ((uint32_t)p[1] << 8) |
328 ((uint32_t)p[2] << 16);
329 break;
330 case 4:
331 val = p[0] | ((uint32_t)p[1] << 8) |
332 ((uint32_t)p[2] << 16) |
333 ((uint32_t)p[3] << 24);
334 break;
335 }
336 }
305 val <<= k; 337 val <<= k;
306 val >>= k; 338 val >>= k;
307 b = nrbits & 7; 339 b = nrbits & 7;
308 p += nrbits / 8; 340 p += nrbits >> 3;
309 *addr = p; 341 *addr = p;
310 *pos = b; 342 *pos = b;
311 ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32); 343 ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32);
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 5f0b83e20af6..eed5a0025d63 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -177,8 +177,6 @@ static int alloc_lpt_leb(struct ubifs_info *c, int *lnum)
177 return 0; 177 return 0;
178 } 178 }
179 } 179 }
180 dbg_err("last LEB %d", *lnum);
181 dump_stack();
182 return -ENOSPC; 180 return -ENOSPC;
183} 181}
184 182
@@ -193,6 +191,9 @@ static int layout_cnodes(struct ubifs_info *c)
193 int lnum, offs, len, alen, done_lsave, done_ltab, err; 191 int lnum, offs, len, alen, done_lsave, done_ltab, err;
194 struct ubifs_cnode *cnode; 192 struct ubifs_cnode *cnode;
195 193
194 err = dbg_chk_lpt_sz(c, 0, 0);
195 if (err)
196 return err;
196 cnode = c->lpt_cnext; 197 cnode = c->lpt_cnext;
197 if (!cnode) 198 if (!cnode)
198 return 0; 199 return 0;
@@ -206,6 +207,7 @@ static int layout_cnodes(struct ubifs_info *c)
206 c->lsave_lnum = lnum; 207 c->lsave_lnum = lnum;
207 c->lsave_offs = offs; 208 c->lsave_offs = offs;
208 offs += c->lsave_sz; 209 offs += c->lsave_sz;
210 dbg_chk_lpt_sz(c, 1, c->lsave_sz);
209 } 211 }
210 212
211 if (offs + c->ltab_sz <= c->leb_size) { 213 if (offs + c->ltab_sz <= c->leb_size) {
@@ -213,6 +215,7 @@ static int layout_cnodes(struct ubifs_info *c)
213 c->ltab_lnum = lnum; 215 c->ltab_lnum = lnum;
214 c->ltab_offs = offs; 216 c->ltab_offs = offs;
215 offs += c->ltab_sz; 217 offs += c->ltab_sz;
218 dbg_chk_lpt_sz(c, 1, c->ltab_sz);
216 } 219 }
217 220
218 do { 221 do {
@@ -226,9 +229,10 @@ static int layout_cnodes(struct ubifs_info *c)
226 while (offs + len > c->leb_size) { 229 while (offs + len > c->leb_size) {
227 alen = ALIGN(offs, c->min_io_size); 230 alen = ALIGN(offs, c->min_io_size);
228 upd_ltab(c, lnum, c->leb_size - alen, alen - offs); 231 upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
232 dbg_chk_lpt_sz(c, 2, alen - offs);
229 err = alloc_lpt_leb(c, &lnum); 233 err = alloc_lpt_leb(c, &lnum);
230 if (err) 234 if (err)
231 return err; 235 goto no_space;
232 offs = 0; 236 offs = 0;
233 ubifs_assert(lnum >= c->lpt_first && 237 ubifs_assert(lnum >= c->lpt_first &&
234 lnum <= c->lpt_last); 238 lnum <= c->lpt_last);
@@ -238,6 +242,7 @@ static int layout_cnodes(struct ubifs_info *c)
238 c->lsave_lnum = lnum; 242 c->lsave_lnum = lnum;
239 c->lsave_offs = offs; 243 c->lsave_offs = offs;
240 offs += c->lsave_sz; 244 offs += c->lsave_sz;
245 dbg_chk_lpt_sz(c, 1, c->lsave_sz);
241 continue; 246 continue;
242 } 247 }
243 if (!done_ltab) { 248 if (!done_ltab) {
@@ -245,6 +250,7 @@ static int layout_cnodes(struct ubifs_info *c)
245 c->ltab_lnum = lnum; 250 c->ltab_lnum = lnum;
246 c->ltab_offs = offs; 251 c->ltab_offs = offs;
247 offs += c->ltab_sz; 252 offs += c->ltab_sz;
253 dbg_chk_lpt_sz(c, 1, c->ltab_sz);
248 continue; 254 continue;
249 } 255 }
250 break; 256 break;
@@ -257,6 +263,7 @@ static int layout_cnodes(struct ubifs_info *c)
257 c->lpt_offs = offs; 263 c->lpt_offs = offs;
258 } 264 }
259 offs += len; 265 offs += len;
266 dbg_chk_lpt_sz(c, 1, len);
260 cnode = cnode->cnext; 267 cnode = cnode->cnext;
261 } while (cnode && cnode != c->lpt_cnext); 268 } while (cnode && cnode != c->lpt_cnext);
262 269
@@ -265,9 +272,10 @@ static int layout_cnodes(struct ubifs_info *c)
265 if (offs + c->lsave_sz > c->leb_size) { 272 if (offs + c->lsave_sz > c->leb_size) {
266 alen = ALIGN(offs, c->min_io_size); 273 alen = ALIGN(offs, c->min_io_size);
267 upd_ltab(c, lnum, c->leb_size - alen, alen - offs); 274 upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
275 dbg_chk_lpt_sz(c, 2, alen - offs);
268 err = alloc_lpt_leb(c, &lnum); 276 err = alloc_lpt_leb(c, &lnum);
269 if (err) 277 if (err)
270 return err; 278 goto no_space;
271 offs = 0; 279 offs = 0;
272 ubifs_assert(lnum >= c->lpt_first && 280 ubifs_assert(lnum >= c->lpt_first &&
273 lnum <= c->lpt_last); 281 lnum <= c->lpt_last);
@@ -276,6 +284,7 @@ static int layout_cnodes(struct ubifs_info *c)
276 c->lsave_lnum = lnum; 284 c->lsave_lnum = lnum;
277 c->lsave_offs = offs; 285 c->lsave_offs = offs;
278 offs += c->lsave_sz; 286 offs += c->lsave_sz;
287 dbg_chk_lpt_sz(c, 1, c->lsave_sz);
279 } 288 }
280 289
281 /* Make sure to place LPT's own lprops table */ 290 /* Make sure to place LPT's own lprops table */
@@ -283,9 +292,10 @@ static int layout_cnodes(struct ubifs_info *c)
283 if (offs + c->ltab_sz > c->leb_size) { 292 if (offs + c->ltab_sz > c->leb_size) {
284 alen = ALIGN(offs, c->min_io_size); 293 alen = ALIGN(offs, c->min_io_size);
285 upd_ltab(c, lnum, c->leb_size - alen, alen - offs); 294 upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
295 dbg_chk_lpt_sz(c, 2, alen - offs);
286 err = alloc_lpt_leb(c, &lnum); 296 err = alloc_lpt_leb(c, &lnum);
287 if (err) 297 if (err)
288 return err; 298 goto no_space;
289 offs = 0; 299 offs = 0;
290 ubifs_assert(lnum >= c->lpt_first && 300 ubifs_assert(lnum >= c->lpt_first &&
291 lnum <= c->lpt_last); 301 lnum <= c->lpt_last);
@@ -294,11 +304,23 @@ static int layout_cnodes(struct ubifs_info *c)
294 c->ltab_lnum = lnum; 304 c->ltab_lnum = lnum;
295 c->ltab_offs = offs; 305 c->ltab_offs = offs;
296 offs += c->ltab_sz; 306 offs += c->ltab_sz;
307 dbg_chk_lpt_sz(c, 1, c->ltab_sz);
297 } 308 }
298 309
299 alen = ALIGN(offs, c->min_io_size); 310 alen = ALIGN(offs, c->min_io_size);
300 upd_ltab(c, lnum, c->leb_size - alen, alen - offs); 311 upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
312 dbg_chk_lpt_sz(c, 4, alen - offs);
313 err = dbg_chk_lpt_sz(c, 3, alen);
314 if (err)
315 return err;
301 return 0; 316 return 0;
317
318no_space:
319 ubifs_err("LPT out of space");
320 dbg_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, "
321 "done_lsave %d", lnum, offs, len, done_ltab, done_lsave);
322 dbg_dump_lpt_info(c);
323 return err;
302} 324}
303 325
304/** 326/**
@@ -333,8 +355,6 @@ static int realloc_lpt_leb(struct ubifs_info *c, int *lnum)
333 *lnum = i + c->lpt_first; 355 *lnum = i + c->lpt_first;
334 return 0; 356 return 0;
335 } 357 }
336 dbg_err("last LEB %d", *lnum);
337 dump_stack();
338 return -ENOSPC; 358 return -ENOSPC;
339} 359}
340 360
@@ -369,12 +389,14 @@ static int write_cnodes(struct ubifs_info *c)
369 done_lsave = 1; 389 done_lsave = 1;
370 ubifs_pack_lsave(c, buf + offs, c->lsave); 390 ubifs_pack_lsave(c, buf + offs, c->lsave);
371 offs += c->lsave_sz; 391 offs += c->lsave_sz;
392 dbg_chk_lpt_sz(c, 1, c->lsave_sz);
372 } 393 }
373 394
374 if (offs + c->ltab_sz <= c->leb_size) { 395 if (offs + c->ltab_sz <= c->leb_size) {
375 done_ltab = 1; 396 done_ltab = 1;
376 ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); 397 ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
377 offs += c->ltab_sz; 398 offs += c->ltab_sz;
399 dbg_chk_lpt_sz(c, 1, c->ltab_sz);
378 } 400 }
379 401
380 /* Loop for each cnode */ 402 /* Loop for each cnode */
@@ -392,10 +414,12 @@ static int write_cnodes(struct ubifs_info *c)
392 alen, UBI_SHORTTERM); 414 alen, UBI_SHORTTERM);
393 if (err) 415 if (err)
394 return err; 416 return err;
417 dbg_chk_lpt_sz(c, 4, alen - wlen);
395 } 418 }
419 dbg_chk_lpt_sz(c, 2, 0);
396 err = realloc_lpt_leb(c, &lnum); 420 err = realloc_lpt_leb(c, &lnum);
397 if (err) 421 if (err)
398 return err; 422 goto no_space;
399 offs = 0; 423 offs = 0;
400 from = 0; 424 from = 0;
401 ubifs_assert(lnum >= c->lpt_first && 425 ubifs_assert(lnum >= c->lpt_first &&
@@ -408,12 +432,14 @@ static int write_cnodes(struct ubifs_info *c)
408 done_lsave = 1; 432 done_lsave = 1;
409 ubifs_pack_lsave(c, buf + offs, c->lsave); 433 ubifs_pack_lsave(c, buf + offs, c->lsave);
410 offs += c->lsave_sz; 434 offs += c->lsave_sz;
435 dbg_chk_lpt_sz(c, 1, c->lsave_sz);
411 continue; 436 continue;
412 } 437 }
413 if (!done_ltab) { 438 if (!done_ltab) {
414 done_ltab = 1; 439 done_ltab = 1;
415 ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); 440 ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
416 offs += c->ltab_sz; 441 offs += c->ltab_sz;
442 dbg_chk_lpt_sz(c, 1, c->ltab_sz);
417 continue; 443 continue;
418 } 444 }
419 break; 445 break;
@@ -435,6 +461,7 @@ static int write_cnodes(struct ubifs_info *c)
435 clear_bit(COW_ZNODE, &cnode->flags); 461 clear_bit(COW_ZNODE, &cnode->flags);
436 smp_mb__after_clear_bit(); 462 smp_mb__after_clear_bit();
437 offs += len; 463 offs += len;
464 dbg_chk_lpt_sz(c, 1, len);
438 cnode = cnode->cnext; 465 cnode = cnode->cnext;
439 } while (cnode && cnode != c->lpt_cnext); 466 } while (cnode && cnode != c->lpt_cnext);
440 467
@@ -448,9 +475,10 @@ static int write_cnodes(struct ubifs_info *c)
448 UBI_SHORTTERM); 475 UBI_SHORTTERM);
449 if (err) 476 if (err)
450 return err; 477 return err;
478 dbg_chk_lpt_sz(c, 2, alen - wlen);
451 err = realloc_lpt_leb(c, &lnum); 479 err = realloc_lpt_leb(c, &lnum);
452 if (err) 480 if (err)
453 return err; 481 goto no_space;
454 offs = 0; 482 offs = 0;
455 ubifs_assert(lnum >= c->lpt_first && 483 ubifs_assert(lnum >= c->lpt_first &&
456 lnum <= c->lpt_last); 484 lnum <= c->lpt_last);
@@ -461,6 +489,7 @@ static int write_cnodes(struct ubifs_info *c)
461 done_lsave = 1; 489 done_lsave = 1;
462 ubifs_pack_lsave(c, buf + offs, c->lsave); 490 ubifs_pack_lsave(c, buf + offs, c->lsave);
463 offs += c->lsave_sz; 491 offs += c->lsave_sz;
492 dbg_chk_lpt_sz(c, 1, c->lsave_sz);
464 } 493 }
465 494
466 /* Make sure to place LPT's own lprops table */ 495 /* Make sure to place LPT's own lprops table */
@@ -473,9 +502,10 @@ static int write_cnodes(struct ubifs_info *c)
473 UBI_SHORTTERM); 502 UBI_SHORTTERM);
474 if (err) 503 if (err)
475 return err; 504 return err;
505 dbg_chk_lpt_sz(c, 2, alen - wlen);
476 err = realloc_lpt_leb(c, &lnum); 506 err = realloc_lpt_leb(c, &lnum);
477 if (err) 507 if (err)
478 return err; 508 goto no_space;
479 offs = 0; 509 offs = 0;
480 ubifs_assert(lnum >= c->lpt_first && 510 ubifs_assert(lnum >= c->lpt_first &&
481 lnum <= c->lpt_last); 511 lnum <= c->lpt_last);
@@ -486,6 +516,7 @@ static int write_cnodes(struct ubifs_info *c)
486 done_ltab = 1; 516 done_ltab = 1;
487 ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); 517 ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
488 offs += c->ltab_sz; 518 offs += c->ltab_sz;
519 dbg_chk_lpt_sz(c, 1, c->ltab_sz);
489 } 520 }
490 521
491 /* Write remaining data in buffer */ 522 /* Write remaining data in buffer */
@@ -495,6 +526,12 @@ static int write_cnodes(struct ubifs_info *c)
495 err = ubifs_leb_write(c, lnum, buf + from, from, alen, UBI_SHORTTERM); 526 err = ubifs_leb_write(c, lnum, buf + from, from, alen, UBI_SHORTTERM);
496 if (err) 527 if (err)
497 return err; 528 return err;
529
530 dbg_chk_lpt_sz(c, 4, alen - wlen);
531 err = dbg_chk_lpt_sz(c, 3, ALIGN(offs, c->min_io_size));
532 if (err)
533 return err;
534
498 c->nhead_lnum = lnum; 535 c->nhead_lnum = lnum;
499 c->nhead_offs = ALIGN(offs, c->min_io_size); 536 c->nhead_offs = ALIGN(offs, c->min_io_size);
500 537
@@ -503,7 +540,15 @@ static int write_cnodes(struct ubifs_info *c)
503 dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); 540 dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs);
504 if (c->big_lpt) 541 if (c->big_lpt)
505 dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); 542 dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs);
543
506 return 0; 544 return 0;
545
546no_space:
547 ubifs_err("LPT out of space mismatch");
548 dbg_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab "
549 "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave);
550 dbg_dump_lpt_info(c);
551 return err;
507} 552}
508 553
509/** 554/**
@@ -1044,6 +1089,8 @@ static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len)
1044 int pos = 0, node_type, node_len; 1089 int pos = 0, node_type, node_len;
1045 uint16_t crc, calc_crc; 1090 uint16_t crc, calc_crc;
1046 1091
1092 if (len < UBIFS_LPT_CRC_BYTES + (UBIFS_LPT_TYPE_BITS + 7) / 8)
1093 return 0;
1047 node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS); 1094 node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS);
1048 if (node_type == UBIFS_LPT_NOT_A_NODE) 1095 if (node_type == UBIFS_LPT_NOT_A_NODE)
1049 return 0; 1096 return 0;
@@ -1156,6 +1203,9 @@ int ubifs_lpt_start_commit(struct ubifs_info *c)
1156 dbg_lp(""); 1203 dbg_lp("");
1157 1204
1158 mutex_lock(&c->lp_mutex); 1205 mutex_lock(&c->lp_mutex);
1206 err = dbg_chk_lpt_free_spc(c);
1207 if (err)
1208 goto out;
1159 err = dbg_check_ltab(c); 1209 err = dbg_check_ltab(c);
1160 if (err) 1210 if (err)
1161 goto out; 1211 goto out;
@@ -1645,4 +1695,121 @@ int dbg_check_ltab(struct ubifs_info *c)
1645 return 0; 1695 return 0;
1646} 1696}
1647 1697
1698/**
1699 * dbg_chk_lpt_free_spc - check LPT free space is enough to write entire LPT.
1700 * @c: the UBIFS file-system description object
1701 *
1702 * This function returns %0 on success and a negative error code on failure.
1703 */
1704int dbg_chk_lpt_free_spc(struct ubifs_info *c)
1705{
1706 long long free = 0;
1707 int i;
1708
1709 for (i = 0; i < c->lpt_lebs; i++) {
1710 if (c->ltab[i].tgc || c->ltab[i].cmt)
1711 continue;
1712 if (i + c->lpt_first == c->nhead_lnum)
1713 free += c->leb_size - c->nhead_offs;
1714 else if (c->ltab[i].free == c->leb_size)
1715 free += c->leb_size;
1716 }
1717 if (free < c->lpt_sz) {
1718 dbg_err("LPT space error: free %lld lpt_sz %lld",
1719 free, c->lpt_sz);
1720 dbg_dump_lpt_info(c);
1721 return -EINVAL;
1722 }
1723 return 0;
1724}
1725
1726/**
1727 * dbg_chk_lpt_sz - check LPT does not write more than LPT size.
1728 * @c: the UBIFS file-system description object
1729 * @action: action
1730 * @len: length written
1731 *
1732 * This function returns %0 on success and a negative error code on failure.
1733 */
1734int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
1735{
1736 long long chk_lpt_sz, lpt_sz;
1737 int err = 0;
1738
1739 switch (action) {
1740 case 0:
1741 c->chk_lpt_sz = 0;
1742 c->chk_lpt_sz2 = 0;
1743 c->chk_lpt_lebs = 0;
1744 c->chk_lpt_wastage = 0;
1745 if (c->dirty_pn_cnt > c->pnode_cnt) {
1746 dbg_err("dirty pnodes %d exceed max %d",
1747 c->dirty_pn_cnt, c->pnode_cnt);
1748 err = -EINVAL;
1749 }
1750 if (c->dirty_nn_cnt > c->nnode_cnt) {
1751 dbg_err("dirty nnodes %d exceed max %d",
1752 c->dirty_nn_cnt, c->nnode_cnt);
1753 err = -EINVAL;
1754 }
1755 return err;
1756 case 1:
1757 c->chk_lpt_sz += len;
1758 return 0;
1759 case 2:
1760 c->chk_lpt_sz += len;
1761 c->chk_lpt_wastage += len;
1762 c->chk_lpt_lebs += 1;
1763 return 0;
1764 case 3:
1765 chk_lpt_sz = c->leb_size;
1766 chk_lpt_sz *= c->chk_lpt_lebs;
1767 chk_lpt_sz += len - c->nhead_offs;
1768 if (c->chk_lpt_sz != chk_lpt_sz) {
1769 dbg_err("LPT wrote %lld but space used was %lld",
1770 c->chk_lpt_sz, chk_lpt_sz);
1771 err = -EINVAL;
1772 }
1773 if (c->chk_lpt_sz > c->lpt_sz) {
1774 dbg_err("LPT wrote %lld but lpt_sz is %lld",
1775 c->chk_lpt_sz, c->lpt_sz);
1776 err = -EINVAL;
1777 }
1778 if (c->chk_lpt_sz2 && c->chk_lpt_sz != c->chk_lpt_sz2) {
1779 dbg_err("LPT layout size %lld but wrote %lld",
1780 c->chk_lpt_sz, c->chk_lpt_sz2);
1781 err = -EINVAL;
1782 }
1783 if (c->chk_lpt_sz2 && c->new_nhead_offs != len) {
1784 dbg_err("LPT new nhead offs: expected %d was %d",
1785 c->new_nhead_offs, len);
1786 err = -EINVAL;
1787 }
1788 lpt_sz = (long long)c->pnode_cnt * c->pnode_sz;
1789 lpt_sz += (long long)c->nnode_cnt * c->nnode_sz;
1790 lpt_sz += c->ltab_sz;
1791 if (c->big_lpt)
1792 lpt_sz += c->lsave_sz;
1793 if (c->chk_lpt_sz - c->chk_lpt_wastage > lpt_sz) {
1794 dbg_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld",
1795 c->chk_lpt_sz, c->chk_lpt_wastage, lpt_sz);
1796 err = -EINVAL;
1797 }
1798 if (err)
1799 dbg_dump_lpt_info(c);
1800 c->chk_lpt_sz2 = c->chk_lpt_sz;
1801 c->chk_lpt_sz = 0;
1802 c->chk_lpt_wastage = 0;
1803 c->chk_lpt_lebs = 0;
1804 c->new_nhead_offs = len;
1805 return err;
1806 case 4:
1807 c->chk_lpt_sz += len;
1808 c->chk_lpt_wastage += len;
1809 return 0;
1810 default:
1811 return -EINVAL;
1812 }
1813}
1814
1648#endif /* CONFIG_UBIFS_FS_DEBUG */ 1815#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 4c12a9215d7f..4fa81d867e41 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -310,4 +310,31 @@ static inline int ubifs_tnc_lookup(struct ubifs_info *c,
310 return ubifs_tnc_locate(c, key, node, NULL, NULL); 310 return ubifs_tnc_locate(c, key, node, NULL, NULL);
311} 311}
312 312
313/**
314 * ubifs_get_lprops - get reference to LEB properties.
315 * @c: the UBIFS file-system description object
316 *
317 * This function locks lprops. Lprops have to be unlocked by
318 * 'ubifs_release_lprops()'.
319 */
320static inline void ubifs_get_lprops(struct ubifs_info *c)
321{
322 mutex_lock(&c->lp_mutex);
323}
324
325/**
326 * ubifs_release_lprops - release lprops lock.
327 * @c: the UBIFS file-system description object
328 *
329 * This function has to be called after each 'ubifs_get_lprops()' call to
330 * unlock lprops.
331 */
332static inline void ubifs_release_lprops(struct ubifs_info *c)
333{
334 ubifs_assert(mutex_is_locked(&c->lp_mutex));
335 ubifs_assert(c->lst.empty_lebs >= 0 &&
336 c->lst.empty_lebs <= c->main_lebs);
337 mutex_unlock(&c->lp_mutex);
338}
339
313#endif /* __UBIFS_MISC_H__ */ 340#endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index acf5c5fffc60..0ed82479b44b 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -87,7 +87,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
87 87
88 dbg_scan("scanning %s", dbg_ntype(ch->node_type)); 88 dbg_scan("scanning %s", dbg_ntype(ch->node_type));
89 89
90 if (ubifs_check_node(c, buf, lnum, offs, quiet)) 90 if (ubifs_check_node(c, buf, lnum, offs, quiet, 1))
91 return SCANNED_A_CORRUPT_NODE; 91 return SCANNED_A_CORRUPT_NODE;
92 92
93 if (ch->node_type == UBIFS_PAD_NODE) { 93 if (ch->node_type == UBIFS_PAD_NODE) {
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 9a9220333b3b..8780efbf40ac 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -401,6 +401,16 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt)
401 else if (c->mount_opts.unmount_mode == 1) 401 else if (c->mount_opts.unmount_mode == 1)
402 seq_printf(s, ",norm_unmount"); 402 seq_printf(s, ",norm_unmount");
403 403
404 if (c->mount_opts.bulk_read == 2)
405 seq_printf(s, ",bulk_read");
406 else if (c->mount_opts.bulk_read == 1)
407 seq_printf(s, ",no_bulk_read");
408
409 if (c->mount_opts.chk_data_crc == 2)
410 seq_printf(s, ",chk_data_crc");
411 else if (c->mount_opts.chk_data_crc == 1)
412 seq_printf(s, ",no_chk_data_crc");
413
404 return 0; 414 return 0;
405} 415}
406 416
@@ -408,13 +418,26 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
408{ 418{
409 struct ubifs_info *c = sb->s_fs_info; 419 struct ubifs_info *c = sb->s_fs_info;
410 int i, ret = 0, err; 420 int i, ret = 0, err;
421 long long bud_bytes;
411 422
412 if (c->jheads) 423 if (c->jheads) {
413 for (i = 0; i < c->jhead_cnt; i++) { 424 for (i = 0; i < c->jhead_cnt; i++) {
414 err = ubifs_wbuf_sync(&c->jheads[i].wbuf); 425 err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
415 if (err && !ret) 426 if (err && !ret)
416 ret = err; 427 ret = err;
417 } 428 }
429
430 /* Commit the journal unless it has too little data */
431 spin_lock(&c->buds_lock);
432 bud_bytes = c->bud_bytes;
433 spin_unlock(&c->buds_lock);
434 if (bud_bytes > c->leb_size) {
435 err = ubifs_run_commit(c);
436 if (err)
437 return err;
438 }
439 }
440
418 /* 441 /*
419 * We ought to call sync for c->ubi but it does not have one. If it had 442 * We ought to call sync for c->ubi but it does not have one. If it had
420 * it would in turn call mtd->sync, however mtd operations are 443 * it would in turn call mtd->sync, however mtd operations are
@@ -538,6 +561,18 @@ static int init_constants_early(struct ubifs_info *c)
538 * calculations when reporting free space. 561 * calculations when reporting free space.
539 */ 562 */
540 c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; 563 c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ;
564 /* Buffer size for bulk-reads */
565 c->bulk_read_buf_size = UBIFS_MAX_BULK_READ * UBIFS_MAX_DATA_NODE_SZ;
566 if (c->bulk_read_buf_size > c->leb_size)
567 c->bulk_read_buf_size = c->leb_size;
568 if (c->bulk_read_buf_size > 128 * 1024) {
569 /* Check if we can kmalloc more than 128KiB */
570 void *try = kmalloc(c->bulk_read_buf_size, GFP_KERNEL);
571
572 kfree(try);
573 if (!try)
574 c->bulk_read_buf_size = 128 * 1024;
575 }
541 return 0; 576 return 0;
542} 577}
543 578
@@ -840,17 +875,29 @@ static int check_volume_empty(struct ubifs_info *c)
840 * 875 *
841 * Opt_fast_unmount: do not run a journal commit before un-mounting 876 * Opt_fast_unmount: do not run a journal commit before un-mounting
842 * Opt_norm_unmount: run a journal commit before un-mounting 877 * Opt_norm_unmount: run a journal commit before un-mounting
878 * Opt_bulk_read: enable bulk-reads
879 * Opt_no_bulk_read: disable bulk-reads
880 * Opt_chk_data_crc: check CRCs when reading data nodes
881 * Opt_no_chk_data_crc: do not check CRCs when reading data nodes
843 * Opt_err: just end of array marker 882 * Opt_err: just end of array marker
844 */ 883 */
845enum { 884enum {
846 Opt_fast_unmount, 885 Opt_fast_unmount,
847 Opt_norm_unmount, 886 Opt_norm_unmount,
887 Opt_bulk_read,
888 Opt_no_bulk_read,
889 Opt_chk_data_crc,
890 Opt_no_chk_data_crc,
848 Opt_err, 891 Opt_err,
849}; 892};
850 893
851static const match_table_t tokens = { 894static const match_table_t tokens = {
852 {Opt_fast_unmount, "fast_unmount"}, 895 {Opt_fast_unmount, "fast_unmount"},
853 {Opt_norm_unmount, "norm_unmount"}, 896 {Opt_norm_unmount, "norm_unmount"},
897 {Opt_bulk_read, "bulk_read"},
898 {Opt_no_bulk_read, "no_bulk_read"},
899 {Opt_chk_data_crc, "chk_data_crc"},
900 {Opt_no_chk_data_crc, "no_chk_data_crc"},
854 {Opt_err, NULL}, 901 {Opt_err, NULL},
855}; 902};
856 903
@@ -888,6 +935,22 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
888 c->mount_opts.unmount_mode = 1; 935 c->mount_opts.unmount_mode = 1;
889 c->fast_unmount = 0; 936 c->fast_unmount = 0;
890 break; 937 break;
938 case Opt_bulk_read:
939 c->mount_opts.bulk_read = 2;
940 c->bulk_read = 1;
941 break;
942 case Opt_no_bulk_read:
943 c->mount_opts.bulk_read = 1;
944 c->bulk_read = 0;
945 break;
946 case Opt_chk_data_crc:
947 c->mount_opts.chk_data_crc = 2;
948 c->no_chk_data_crc = 0;
949 break;
950 case Opt_no_chk_data_crc:
951 c->mount_opts.chk_data_crc = 1;
952 c->no_chk_data_crc = 1;
953 break;
891 default: 954 default:
892 ubifs_err("unrecognized mount option \"%s\" " 955 ubifs_err("unrecognized mount option \"%s\" "
893 "or missing value", p); 956 "or missing value", p);
@@ -996,6 +1059,8 @@ static int mount_ubifs(struct ubifs_info *c)
996 goto out_free; 1059 goto out_free;
997 } 1060 }
998 1061
1062 c->always_chk_crc = 1;
1063
999 err = ubifs_read_superblock(c); 1064 err = ubifs_read_superblock(c);
1000 if (err) 1065 if (err)
1001 goto out_free; 1066 goto out_free;
@@ -1032,8 +1097,6 @@ static int mount_ubifs(struct ubifs_info *c)
1032 1097
1033 /* Create background thread */ 1098 /* Create background thread */
1034 c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); 1099 c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
1035 if (!c->bgt)
1036 c->bgt = ERR_PTR(-EINVAL);
1037 if (IS_ERR(c->bgt)) { 1100 if (IS_ERR(c->bgt)) {
1038 err = PTR_ERR(c->bgt); 1101 err = PTR_ERR(c->bgt);
1039 c->bgt = NULL; 1102 c->bgt = NULL;
@@ -1139,24 +1202,28 @@ static int mount_ubifs(struct ubifs_info *c)
1139 if (err) 1202 if (err)
1140 goto out_infos; 1203 goto out_infos;
1141 1204
1205 c->always_chk_crc = 0;
1206
1142 ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", 1207 ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"",
1143 c->vi.ubi_num, c->vi.vol_id, c->vi.name); 1208 c->vi.ubi_num, c->vi.vol_id, c->vi.name);
1144 if (mounted_read_only) 1209 if (mounted_read_only)
1145 ubifs_msg("mounted read-only"); 1210 ubifs_msg("mounted read-only");
1146 x = (long long)c->main_lebs * c->leb_size; 1211 x = (long long)c->main_lebs * c->leb_size;
1147 ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)", 1212 ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d "
1148 x, x >> 10, x >> 20, c->main_lebs); 1213 "LEBs)", x, x >> 10, x >> 20, c->main_lebs);
1149 x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; 1214 x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
1150 ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)", 1215 ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d "
1151 x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); 1216 "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt);
1152 ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); 1217 ubifs_msg("media format: %d (latest is %d)",
1153 ubifs_msg("media format %d, latest format %d",
1154 c->fmt_version, UBIFS_FORMAT_VERSION); 1218 c->fmt_version, UBIFS_FORMAT_VERSION);
1219 ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr));
1220 ubifs_msg("reserved for root: %llu bytes (%llu KiB)",
1221 c->report_rp_size, c->report_rp_size >> 10);
1155 1222
1156 dbg_msg("compiled on: " __DATE__ " at " __TIME__); 1223 dbg_msg("compiled on: " __DATE__ " at " __TIME__);
1157 dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); 1224 dbg_msg("min. I/O unit size: %d bytes", c->min_io_size);
1158 dbg_msg("LEB size: %d bytes (%d KiB)", 1225 dbg_msg("LEB size: %d bytes (%d KiB)",
1159 c->leb_size, c->leb_size / 1024); 1226 c->leb_size, c->leb_size >> 10);
1160 dbg_msg("data journal heads: %d", 1227 dbg_msg("data journal heads: %d",
1161 c->jhead_cnt - NONDATA_JHEADS_CNT); 1228 c->jhead_cnt - NONDATA_JHEADS_CNT);
1162 dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X" 1229 dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X"
@@ -1282,6 +1349,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1282 1349
1283 mutex_lock(&c->umount_mutex); 1350 mutex_lock(&c->umount_mutex);
1284 c->remounting_rw = 1; 1351 c->remounting_rw = 1;
1352 c->always_chk_crc = 1;
1285 1353
1286 /* Check for enough free space */ 1354 /* Check for enough free space */
1287 if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) { 1355 if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) {
@@ -1345,20 +1413,20 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1345 1413
1346 /* Create background thread */ 1414 /* Create background thread */
1347 c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); 1415 c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
1348 if (!c->bgt)
1349 c->bgt = ERR_PTR(-EINVAL);
1350 if (IS_ERR(c->bgt)) { 1416 if (IS_ERR(c->bgt)) {
1351 err = PTR_ERR(c->bgt); 1417 err = PTR_ERR(c->bgt);
1352 c->bgt = NULL; 1418 c->bgt = NULL;
1353 ubifs_err("cannot spawn \"%s\", error %d", 1419 ubifs_err("cannot spawn \"%s\", error %d",
1354 c->bgt_name, err); 1420 c->bgt_name, err);
1355 return err; 1421 goto out;
1356 } 1422 }
1357 wake_up_process(c->bgt); 1423 wake_up_process(c->bgt);
1358 1424
1359 c->orph_buf = vmalloc(c->leb_size); 1425 c->orph_buf = vmalloc(c->leb_size);
1360 if (!c->orph_buf) 1426 if (!c->orph_buf) {
1361 return -ENOMEM; 1427 err = -ENOMEM;
1428 goto out;
1429 }
1362 1430
1363 /* Check for enough log space */ 1431 /* Check for enough log space */
1364 lnum = c->lhead_lnum + 1; 1432 lnum = c->lhead_lnum + 1;
@@ -1385,6 +1453,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1385 dbg_gen("re-mounted read-write"); 1453 dbg_gen("re-mounted read-write");
1386 c->vfs_sb->s_flags &= ~MS_RDONLY; 1454 c->vfs_sb->s_flags &= ~MS_RDONLY;
1387 c->remounting_rw = 0; 1455 c->remounting_rw = 0;
1456 c->always_chk_crc = 0;
1388 mutex_unlock(&c->umount_mutex); 1457 mutex_unlock(&c->umount_mutex);
1389 return 0; 1458 return 0;
1390 1459
@@ -1400,6 +1469,7 @@ out:
1400 c->ileb_buf = NULL; 1469 c->ileb_buf = NULL;
1401 ubifs_lpt_free(c, 1); 1470 ubifs_lpt_free(c, 1);
1402 c->remounting_rw = 0; 1471 c->remounting_rw = 0;
1472 c->always_chk_crc = 0;
1403 mutex_unlock(&c->umount_mutex); 1473 mutex_unlock(&c->umount_mutex);
1404 return err; 1474 return err;
1405} 1475}
@@ -1408,12 +1478,9 @@ out:
1408 * commit_on_unmount - commit the journal when un-mounting. 1478 * commit_on_unmount - commit the journal when un-mounting.
1409 * @c: UBIFS file-system description object 1479 * @c: UBIFS file-system description object
1410 * 1480 *
1411 * This function is called during un-mounting and it commits the journal unless 1481 * This function is called during un-mounting and re-mounting, and it commits
1412 * the "fast unmount" mode is enabled. It also avoids committing the journal if 1482 * the journal unless the "fast unmount" mode is enabled. It also avoids
1413 * it contains too few data. 1483 * committing the journal if it contains too few data.
1414 *
1415 * Sometimes recovery requires the journal to be committed at least once, and
1416 * this function takes care about this.
1417 */ 1484 */
1418static void commit_on_unmount(struct ubifs_info *c) 1485static void commit_on_unmount(struct ubifs_info *c)
1419{ 1486{
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 7634c5970887..d27fd918b9c9 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -284,7 +284,7 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c,
284 } 284 }
285 285
286 zn = copy_znode(c, znode); 286 zn = copy_znode(c, znode);
287 if (unlikely(IS_ERR(zn))) 287 if (IS_ERR(zn))
288 return zn; 288 return zn;
289 289
290 if (zbr->len) { 290 if (zbr->len) {
@@ -470,6 +470,10 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
470 if (node_len != len) 470 if (node_len != len)
471 return 0; 471 return 0;
472 472
473 if (type == UBIFS_DATA_NODE && !c->always_chk_crc)
474 if (c->no_chk_data_crc)
475 return 0;
476
473 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); 477 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
474 node_crc = le32_to_cpu(ch->crc); 478 node_crc = le32_to_cpu(ch->crc);
475 if (crc != node_crc) 479 if (crc != node_crc)
@@ -1128,7 +1132,7 @@ static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c,
1128 ubifs_assert(znode == c->zroot.znode); 1132 ubifs_assert(znode == c->zroot.znode);
1129 znode = dirty_cow_znode(c, &c->zroot); 1133 znode = dirty_cow_znode(c, &c->zroot);
1130 } 1134 }
1131 if (unlikely(IS_ERR(znode)) || !p) 1135 if (IS_ERR(znode) || !p)
1132 break; 1136 break;
1133 ubifs_assert(path[p - 1] >= 0); 1137 ubifs_assert(path[p - 1] >= 0);
1134 ubifs_assert(path[p - 1] < znode->child_cnt); 1138 ubifs_assert(path[p - 1] < znode->child_cnt);
@@ -1492,6 +1496,289 @@ out:
1492} 1496}
1493 1497
1494/** 1498/**
1499 * ubifs_tnc_get_bu_keys - lookup keys for bulk-read.
1500 * @c: UBIFS file-system description object
1501 * @bu: bulk-read parameters and results
1502 *
1503 * Lookup consecutive data node keys for the same inode that reside
1504 * consecutively in the same LEB.
1505 */
1506int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu)
1507{
1508 int n, err = 0, lnum = -1, uninitialized_var(offs);
1509 int uninitialized_var(len);
1510 unsigned int block = key_block(c, &bu->key);
1511 struct ubifs_znode *znode;
1512
1513 bu->cnt = 0;
1514 bu->blk_cnt = 0;
1515 bu->eof = 0;
1516
1517 mutex_lock(&c->tnc_mutex);
1518 /* Find first key */
1519 err = ubifs_lookup_level0(c, &bu->key, &znode, &n);
1520 if (err < 0)
1521 goto out;
1522 if (err) {
1523 /* Key found */
1524 len = znode->zbranch[n].len;
1525 /* The buffer must be big enough for at least 1 node */
1526 if (len > bu->buf_len) {
1527 err = -EINVAL;
1528 goto out;
1529 }
1530 /* Add this key */
1531 bu->zbranch[bu->cnt++] = znode->zbranch[n];
1532 bu->blk_cnt += 1;
1533 lnum = znode->zbranch[n].lnum;
1534 offs = ALIGN(znode->zbranch[n].offs + len, 8);
1535 }
1536 while (1) {
1537 struct ubifs_zbranch *zbr;
1538 union ubifs_key *key;
1539 unsigned int next_block;
1540
1541 /* Find next key */
1542 err = tnc_next(c, &znode, &n);
1543 if (err)
1544 goto out;
1545 zbr = &znode->zbranch[n];
1546 key = &zbr->key;
1547 /* See if there is another data key for this file */
1548 if (key_inum(c, key) != key_inum(c, &bu->key) ||
1549 key_type(c, key) != UBIFS_DATA_KEY) {
1550 err = -ENOENT;
1551 goto out;
1552 }
1553 if (lnum < 0) {
1554 /* First key found */
1555 lnum = zbr->lnum;
1556 offs = ALIGN(zbr->offs + zbr->len, 8);
1557 len = zbr->len;
1558 if (len > bu->buf_len) {
1559 err = -EINVAL;
1560 goto out;
1561 }
1562 } else {
1563 /*
1564 * The data nodes must be in consecutive positions in
1565 * the same LEB.
1566 */
1567 if (zbr->lnum != lnum || zbr->offs != offs)
1568 goto out;
1569 offs += ALIGN(zbr->len, 8);
1570 len = ALIGN(len, 8) + zbr->len;
1571 /* Must not exceed buffer length */
1572 if (len > bu->buf_len)
1573 goto out;
1574 }
1575 /* Allow for holes */
1576 next_block = key_block(c, key);
1577 bu->blk_cnt += (next_block - block - 1);
1578 if (bu->blk_cnt >= UBIFS_MAX_BULK_READ)
1579 goto out;
1580 block = next_block;
1581 /* Add this key */
1582 bu->zbranch[bu->cnt++] = *zbr;
1583 bu->blk_cnt += 1;
1584 /* See if we have room for more */
1585 if (bu->cnt >= UBIFS_MAX_BULK_READ)
1586 goto out;
1587 if (bu->blk_cnt >= UBIFS_MAX_BULK_READ)
1588 goto out;
1589 }
1590out:
1591 if (err == -ENOENT) {
1592 bu->eof = 1;
1593 err = 0;
1594 }
1595 bu->gc_seq = c->gc_seq;
1596 mutex_unlock(&c->tnc_mutex);
1597 if (err)
1598 return err;
1599 /*
1600 * An enormous hole could cause bulk-read to encompass too many
1601 * page cache pages, so limit the number here.
1602 */
1603 if (bu->blk_cnt > UBIFS_MAX_BULK_READ)
1604 bu->blk_cnt = UBIFS_MAX_BULK_READ;
1605 /*
1606 * Ensure that bulk-read covers a whole number of page cache
1607 * pages.
1608 */
1609 if (UBIFS_BLOCKS_PER_PAGE == 1 ||
1610 !(bu->blk_cnt & (UBIFS_BLOCKS_PER_PAGE - 1)))
1611 return 0;
1612 if (bu->eof) {
1613 /* At the end of file we can round up */
1614 bu->blk_cnt += UBIFS_BLOCKS_PER_PAGE - 1;
1615 return 0;
1616 }
1617 /* Exclude data nodes that do not make up a whole page cache page */
1618 block = key_block(c, &bu->key) + bu->blk_cnt;
1619 block &= ~(UBIFS_BLOCKS_PER_PAGE - 1);
1620 while (bu->cnt) {
1621 if (key_block(c, &bu->zbranch[bu->cnt - 1].key) < block)
1622 break;
1623 bu->cnt -= 1;
1624 }
1625 return 0;
1626}
1627
1628/**
1629 * read_wbuf - bulk-read from a LEB with a wbuf.
1630 * @wbuf: wbuf that may overlap the read
1631 * @buf: buffer into which to read
1632 * @len: read length
1633 * @lnum: LEB number from which to read
1634 * @offs: offset from which to read
1635 *
1636 * This functions returns %0 on success or a negative error code on failure.
1637 */
1638static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum,
1639 int offs)
1640{
1641 const struct ubifs_info *c = wbuf->c;
1642 int rlen, overlap;
1643
1644 dbg_io("LEB %d:%d, length %d", lnum, offs, len);
1645 ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
1646 ubifs_assert(!(offs & 7) && offs < c->leb_size);
1647 ubifs_assert(offs + len <= c->leb_size);
1648
1649 spin_lock(&wbuf->lock);
1650 overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs);
1651 if (!overlap) {
1652 /* We may safely unlock the write-buffer and read the data */
1653 spin_unlock(&wbuf->lock);
1654 return ubi_read(c->ubi, lnum, buf, offs, len);
1655 }
1656
1657 /* Don't read under wbuf */
1658 rlen = wbuf->offs - offs;
1659 if (rlen < 0)
1660 rlen = 0;
1661
1662 /* Copy the rest from the write-buffer */
1663 memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen);
1664 spin_unlock(&wbuf->lock);
1665
1666 if (rlen > 0)
1667 /* Read everything that goes before write-buffer */
1668 return ubi_read(c->ubi, lnum, buf, offs, rlen);
1669
1670 return 0;
1671}
1672
1673/**
1674 * validate_data_node - validate data nodes for bulk-read.
1675 * @c: UBIFS file-system description object
1676 * @buf: buffer containing data node to validate
1677 * @zbr: zbranch of data node to validate
1678 *
1679 * This functions returns %0 on success or a negative error code on failure.
1680 */
1681static int validate_data_node(struct ubifs_info *c, void *buf,
1682 struct ubifs_zbranch *zbr)
1683{
1684 union ubifs_key key1;
1685 struct ubifs_ch *ch = buf;
1686 int err, len;
1687
1688 if (ch->node_type != UBIFS_DATA_NODE) {
1689 ubifs_err("bad node type (%d but expected %d)",
1690 ch->node_type, UBIFS_DATA_NODE);
1691 goto out_err;
1692 }
1693
1694 err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0);
1695 if (err) {
1696 ubifs_err("expected node type %d", UBIFS_DATA_NODE);
1697 goto out;
1698 }
1699
1700 len = le32_to_cpu(ch->len);
1701 if (len != zbr->len) {
1702 ubifs_err("bad node length %d, expected %d", len, zbr->len);
1703 goto out_err;
1704 }
1705
1706 /* Make sure the key of the read node is correct */
1707 key_read(c, buf + UBIFS_KEY_OFFSET, &key1);
1708 if (!keys_eq(c, &zbr->key, &key1)) {
1709 ubifs_err("bad key in node at LEB %d:%d",
1710 zbr->lnum, zbr->offs);
1711 dbg_tnc("looked for key %s found node's key %s",
1712 DBGKEY(&zbr->key), DBGKEY1(&key1));
1713 goto out_err;
1714 }
1715
1716 return 0;
1717
1718out_err:
1719 err = -EINVAL;
1720out:
1721 ubifs_err("bad node at LEB %d:%d", zbr->lnum, zbr->offs);
1722 dbg_dump_node(c, buf);
1723 dbg_dump_stack();
1724 return err;
1725}
1726
1727/**
1728 * ubifs_tnc_bulk_read - read a number of data nodes in one go.
1729 * @c: UBIFS file-system description object
1730 * @bu: bulk-read parameters and results
1731 *
1732 * This functions reads and validates the data nodes that were identified by the
1733 * 'ubifs_tnc_get_bu_keys()' function. This functions returns %0 on success,
1734 * -EAGAIN to indicate a race with GC, or another negative error code on
1735 * failure.
1736 */
1737int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu)
1738{
1739 int lnum = bu->zbranch[0].lnum, offs = bu->zbranch[0].offs, len, err, i;
1740 struct ubifs_wbuf *wbuf;
1741 void *buf;
1742
1743 len = bu->zbranch[bu->cnt - 1].offs;
1744 len += bu->zbranch[bu->cnt - 1].len - offs;
1745 if (len > bu->buf_len) {
1746 ubifs_err("buffer too small %d vs %d", bu->buf_len, len);
1747 return -EINVAL;
1748 }
1749
1750 /* Do the read */
1751 wbuf = ubifs_get_wbuf(c, lnum);
1752 if (wbuf)
1753 err = read_wbuf(wbuf, bu->buf, len, lnum, offs);
1754 else
1755 err = ubi_read(c->ubi, lnum, bu->buf, offs, len);
1756
1757 /* Check for a race with GC */
1758 if (maybe_leb_gced(c, lnum, bu->gc_seq))
1759 return -EAGAIN;
1760
1761 if (err && err != -EBADMSG) {
1762 ubifs_err("failed to read from LEB %d:%d, error %d",
1763 lnum, offs, err);
1764 dbg_dump_stack();
1765 dbg_tnc("key %s", DBGKEY(&bu->key));
1766 return err;
1767 }
1768
1769 /* Validate the nodes read */
1770 buf = bu->buf;
1771 for (i = 0; i < bu->cnt; i++) {
1772 err = validate_data_node(c, buf, &bu->zbranch[i]);
1773 if (err)
1774 return err;
1775 buf = buf + ALIGN(bu->zbranch[i].len, 8);
1776 }
1777
1778 return 0;
1779}
1780
1781/**
1495 * do_lookup_nm- look up a "hashed" node. 1782 * do_lookup_nm- look up a "hashed" node.
1496 * @c: UBIFS file-system description object 1783 * @c: UBIFS file-system description object
1497 * @key: node key to lookup 1784 * @key: node key to lookup
@@ -1675,7 +1962,7 @@ static int tnc_insert(struct ubifs_info *c, struct ubifs_znode *znode,
1675{ 1962{
1676 struct ubifs_znode *zn, *zi, *zp; 1963 struct ubifs_znode *zn, *zi, *zp;
1677 int i, keep, move, appending = 0; 1964 int i, keep, move, appending = 0;
1678 union ubifs_key *key = &zbr->key; 1965 union ubifs_key *key = &zbr->key, *key1;
1679 1966
1680 ubifs_assert(n >= 0 && n <= c->fanout); 1967 ubifs_assert(n >= 0 && n <= c->fanout);
1681 1968
@@ -1716,20 +2003,33 @@ again:
1716 zn->level = znode->level; 2003 zn->level = znode->level;
1717 2004
1718 /* Decide where to split */ 2005 /* Decide where to split */
1719 if (znode->level == 0 && n == c->fanout && 2006 if (znode->level == 0 && key_type(c, key) == UBIFS_DATA_KEY) {
1720 key_type(c, key) == UBIFS_DATA_KEY) { 2007 /* Try not to split consecutive data keys */
1721 union ubifs_key *key1; 2008 if (n == c->fanout) {
1722 2009 key1 = &znode->zbranch[n - 1].key;
1723 /* 2010 if (key_inum(c, key1) == key_inum(c, key) &&
1724 * If this is an inode which is being appended - do not split 2011 key_type(c, key1) == UBIFS_DATA_KEY)
1725 * it because no other zbranches can be inserted between 2012 appending = 1;
1726 * zbranches of consecutive data nodes anyway. 2013 } else
1727 */ 2014 goto check_split;
1728 key1 = &znode->zbranch[n - 1].key; 2015 } else if (appending && n != c->fanout) {
1729 if (key_inum(c, key1) == key_inum(c, key) && 2016 /* Try not to split consecutive data keys */
1730 key_type(c, key1) == UBIFS_DATA_KEY && 2017 appending = 0;
1731 key_block(c, key1) == key_block(c, key) - 1) 2018check_split:
1732 appending = 1; 2019 if (n >= (c->fanout + 1) / 2) {
2020 key1 = &znode->zbranch[0].key;
2021 if (key_inum(c, key1) == key_inum(c, key) &&
2022 key_type(c, key1) == UBIFS_DATA_KEY) {
2023 key1 = &znode->zbranch[n].key;
2024 if (key_inum(c, key1) != key_inum(c, key) ||
2025 key_type(c, key1) != UBIFS_DATA_KEY) {
2026 keep = n;
2027 move = c->fanout - keep;
2028 zi = znode;
2029 goto do_split;
2030 }
2031 }
2032 }
1733 } 2033 }
1734 2034
1735 if (appending) { 2035 if (appending) {
@@ -1759,6 +2059,8 @@ again:
1759 zbr->znode->parent = zn; 2059 zbr->znode->parent = zn;
1760 } 2060 }
1761 2061
2062do_split:
2063
1762 __set_bit(DIRTY_ZNODE, &zn->flags); 2064 __set_bit(DIRTY_ZNODE, &zn->flags);
1763 atomic_long_inc(&c->dirty_zn_cnt); 2065 atomic_long_inc(&c->dirty_zn_cnt);
1764 2066
@@ -1785,14 +2087,11 @@ again:
1785 2087
1786 /* Insert new znode (produced by spitting) into the parent */ 2088 /* Insert new znode (produced by spitting) into the parent */
1787 if (zp) { 2089 if (zp) {
1788 i = n; 2090 if (n == 0 && zi == znode && znode->iip == 0)
2091 correct_parent_keys(c, znode);
2092
1789 /* Locate insertion point */ 2093 /* Locate insertion point */
1790 n = znode->iip + 1; 2094 n = znode->iip + 1;
1791 if (appending && n != c->fanout)
1792 appending = 0;
1793
1794 if (i == 0 && zi == znode && znode->iip == 0)
1795 correct_parent_keys(c, znode);
1796 2095
1797 /* Tail recursion */ 2096 /* Tail recursion */
1798 zbr->key = zn->zbranch[0].key; 2097 zbr->key = zn->zbranch[0].key;
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c
index a25c1cc1f8d9..b48db999903e 100644
--- a/fs/ubifs/tnc_misc.c
+++ b/fs/ubifs/tnc_misc.c
@@ -480,8 +480,8 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
480 } 480 }
481 481
482 /* Make sure the key of the read node is correct */ 482 /* Make sure the key of the read node is correct */
483 key_read(c, key, &key1); 483 key_read(c, node + UBIFS_KEY_OFFSET, &key1);
484 if (memcmp(node + UBIFS_KEY_OFFSET, &key1, c->key_len)) { 484 if (!keys_eq(c, key, &key1)) {
485 ubifs_err("bad key in node at LEB %d:%d", 485 ubifs_err("bad key in node at LEB %d:%d",
486 zbr->lnum, zbr->offs); 486 zbr->lnum, zbr->offs);
487 dbg_tnc("looked for key %s found node's key %s", 487 dbg_tnc("looked for key %s found node's key %s",
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index a9ecbd9af20d..0b378042a3a2 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -75,7 +75,6 @@
75 */ 75 */
76#define UBIFS_BLOCK_SIZE 4096 76#define UBIFS_BLOCK_SIZE 4096
77#define UBIFS_BLOCK_SHIFT 12 77#define UBIFS_BLOCK_SHIFT 12
78#define UBIFS_BLOCK_MASK 0x00000FFF
79 78
80/* UBIFS padding byte pattern (must not be first or last byte of node magic) */ 79/* UBIFS padding byte pattern (must not be first or last byte of node magic) */
81#define UBIFS_PADDING_BYTE 0xCE 80#define UBIFS_PADDING_BYTE 0xCE
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 17c620b93eec..a7bd32fa15b9 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -142,6 +142,9 @@
142/* Maximum expected tree height for use by bottom_up_buf */ 142/* Maximum expected tree height for use by bottom_up_buf */
143#define BOTTOM_UP_HEIGHT 64 143#define BOTTOM_UP_HEIGHT 64
144 144
145/* Maximum number of data nodes to bulk-read */
146#define UBIFS_MAX_BULK_READ 32
147
145/* 148/*
146 * Lockdep classes for UBIFS inode @ui_mutex. 149 * Lockdep classes for UBIFS inode @ui_mutex.
147 */ 150 */
@@ -328,9 +331,10 @@ struct ubifs_gced_idx_leb {
328 * this inode 331 * this inode
329 * @dirty: non-zero if the inode is dirty 332 * @dirty: non-zero if the inode is dirty
330 * @xattr: non-zero if this is an extended attribute inode 333 * @xattr: non-zero if this is an extended attribute inode
334 * @bulk_read: non-zero if bulk-read should be used
331 * @ui_mutex: serializes inode write-back with the rest of VFS operations, 335 * @ui_mutex: serializes inode write-back with the rest of VFS operations,
332 * serializes "clean <-> dirty" state changes, protects @dirty, 336 * serializes "clean <-> dirty" state changes, serializes bulk-read,
333 * @ui_size, and @xattr_size 337 * protects @dirty, @bulk_read, @ui_size, and @xattr_size
334 * @ui_lock: protects @synced_i_size 338 * @ui_lock: protects @synced_i_size
335 * @synced_i_size: synchronized size of inode, i.e. the value of inode size 339 * @synced_i_size: synchronized size of inode, i.e. the value of inode size
336 * currently stored on the flash; used only for regular file 340 * currently stored on the flash; used only for regular file
@@ -338,6 +342,8 @@ struct ubifs_gced_idx_leb {
338 * @ui_size: inode size used by UBIFS when writing to flash 342 * @ui_size: inode size used by UBIFS when writing to flash
339 * @flags: inode flags (@UBIFS_COMPR_FL, etc) 343 * @flags: inode flags (@UBIFS_COMPR_FL, etc)
340 * @compr_type: default compression type used for this inode 344 * @compr_type: default compression type used for this inode
345 * @last_page_read: page number of last page read (for bulk read)
346 * @read_in_a_row: number of consecutive pages read in a row (for bulk read)
341 * @data_len: length of the data attached to the inode 347 * @data_len: length of the data attached to the inode
342 * @data: inode's data 348 * @data: inode's data
343 * 349 *
@@ -379,12 +385,15 @@ struct ubifs_inode {
379 unsigned int xattr_names; 385 unsigned int xattr_names;
380 unsigned int dirty:1; 386 unsigned int dirty:1;
381 unsigned int xattr:1; 387 unsigned int xattr:1;
388 unsigned int bulk_read:1;
382 struct mutex ui_mutex; 389 struct mutex ui_mutex;
383 spinlock_t ui_lock; 390 spinlock_t ui_lock;
384 loff_t synced_i_size; 391 loff_t synced_i_size;
385 loff_t ui_size; 392 loff_t ui_size;
386 int flags; 393 int flags;
387 int compr_type; 394 int compr_type;
395 pgoff_t last_page_read;
396 pgoff_t read_in_a_row;
388 int data_len; 397 int data_len;
389 void *data; 398 void *data;
390}; 399};
@@ -698,8 +707,8 @@ struct ubifs_jhead {
698 * struct ubifs_zbranch - key/coordinate/length branch stored in znodes. 707 * struct ubifs_zbranch - key/coordinate/length branch stored in znodes.
699 * @key: key 708 * @key: key
700 * @znode: znode address in memory 709 * @znode: znode address in memory
701 * @lnum: LEB number of the indexing node 710 * @lnum: LEB number of the target node (indexing node or data node)
702 * @offs: offset of the indexing node within @lnum 711 * @offs: target node offset within @lnum
703 * @len: target node length 712 * @len: target node length
704 */ 713 */
705struct ubifs_zbranch { 714struct ubifs_zbranch {
@@ -744,6 +753,28 @@ struct ubifs_znode {
744}; 753};
745 754
746/** 755/**
756 * struct bu_info - bulk-read information
757 * @key: first data node key
758 * @zbranch: zbranches of data nodes to bulk read
759 * @buf: buffer to read into
760 * @buf_len: buffer length
761 * @gc_seq: GC sequence number to detect races with GC
762 * @cnt: number of data nodes for bulk read
763 * @blk_cnt: number of data blocks including holes
764 * @oef: end of file reached
765 */
766struct bu_info {
767 union ubifs_key key;
768 struct ubifs_zbranch zbranch[UBIFS_MAX_BULK_READ];
769 void *buf;
770 int buf_len;
771 int gc_seq;
772 int cnt;
773 int blk_cnt;
774 int eof;
775};
776
777/**
747 * struct ubifs_node_range - node length range description data structure. 778 * struct ubifs_node_range - node length range description data structure.
748 * @len: fixed node length 779 * @len: fixed node length
749 * @min_len: minimum possible node length 780 * @min_len: minimum possible node length
@@ -862,9 +893,13 @@ struct ubifs_orphan {
862/** 893/**
863 * struct ubifs_mount_opts - UBIFS-specific mount options information. 894 * struct ubifs_mount_opts - UBIFS-specific mount options information.
864 * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) 895 * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast)
896 * @bulk_read: enable bulk-reads
897 * @chk_data_crc: check CRCs when reading data nodes
865 */ 898 */
866struct ubifs_mount_opts { 899struct ubifs_mount_opts {
867 unsigned int unmount_mode:2; 900 unsigned int unmount_mode:2;
901 unsigned int bulk_read:2;
902 unsigned int chk_data_crc:2;
868}; 903};
869 904
870/** 905/**
@@ -905,13 +940,12 @@ struct ubifs_mount_opts {
905 * @cmt_state: commit state 940 * @cmt_state: commit state
906 * @cs_lock: commit state lock 941 * @cs_lock: commit state lock
907 * @cmt_wq: wait queue to sleep on if the log is full and a commit is running 942 * @cmt_wq: wait queue to sleep on if the log is full and a commit is running
943 *
908 * @fast_unmount: do not run journal commit before un-mounting 944 * @fast_unmount: do not run journal commit before un-mounting
909 * @big_lpt: flag that LPT is too big to write whole during commit 945 * @big_lpt: flag that LPT is too big to write whole during commit
910 * @check_lpt_free: flag that indicates LPT GC may be needed 946 * @no_chk_data_crc: do not check CRCs when reading data nodes (except during
911 * @nospace: non-zero if the file-system does not have flash space (used as 947 * recovery)
912 * optimization) 948 * @bulk_read: enable bulk-reads
913 * @nospace_rp: the same as @nospace, but additionally means that even reserved
914 * pool is full
915 * 949 *
916 * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and 950 * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and
917 * @calc_idx_sz 951 * @calc_idx_sz
@@ -935,6 +969,7 @@ struct ubifs_mount_opts {
935 * @mst_node: master node 969 * @mst_node: master node
936 * @mst_offs: offset of valid master node 970 * @mst_offs: offset of valid master node
937 * @mst_mutex: protects the master node area, @mst_node, and @mst_offs 971 * @mst_mutex: protects the master node area, @mst_node, and @mst_offs
972 * @bulk_read_buf_size: buffer size for bulk-reads
938 * 973 *
939 * @log_lebs: number of logical eraseblocks in the log 974 * @log_lebs: number of logical eraseblocks in the log
940 * @log_bytes: log size in bytes 975 * @log_bytes: log size in bytes
@@ -977,12 +1012,17 @@ struct ubifs_mount_opts {
977 * but which still have to be taken into account because 1012 * but which still have to be taken into account because
978 * the index has not been committed so far 1013 * the index has not been committed so far
979 * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, 1014 * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth,
980 * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, and @lst; 1015 * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst,
1016 * @nospace, and @nospace_rp;
981 * @min_idx_lebs: minimum number of LEBs required for the index 1017 * @min_idx_lebs: minimum number of LEBs required for the index
982 * @old_idx_sz: size of index on flash 1018 * @old_idx_sz: size of index on flash
983 * @calc_idx_sz: temporary variable which is used to calculate new index size 1019 * @calc_idx_sz: temporary variable which is used to calculate new index size
984 * (contains accurate new index size at end of TNC commit start) 1020 * (contains accurate new index size at end of TNC commit start)
985 * @lst: lprops statistics 1021 * @lst: lprops statistics
1022 * @nospace: non-zero if the file-system does not have flash space (used as
1023 * optimization)
1024 * @nospace_rp: the same as @nospace, but additionally means that even reserved
1025 * pool is full
986 * 1026 *
987 * @page_budget: budget for a page 1027 * @page_budget: budget for a page
988 * @inode_budget: budget for an inode 1028 * @inode_budget: budget for an inode
@@ -1061,6 +1101,7 @@ struct ubifs_mount_opts {
1061 * @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab 1101 * @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab
1062 * @dirty_nn_cnt: number of dirty nnodes 1102 * @dirty_nn_cnt: number of dirty nnodes
1063 * @dirty_pn_cnt: number of dirty pnodes 1103 * @dirty_pn_cnt: number of dirty pnodes
1104 * @check_lpt_free: flag that indicates LPT GC may be needed
1064 * @lpt_sz: LPT size 1105 * @lpt_sz: LPT size
1065 * @lpt_nod_buf: buffer for an on-flash nnode or pnode 1106 * @lpt_nod_buf: buffer for an on-flash nnode or pnode
1066 * @lpt_buf: buffer of LEB size used by LPT 1107 * @lpt_buf: buffer of LEB size used by LPT
@@ -1102,6 +1143,7 @@ struct ubifs_mount_opts {
1102 * @rcvrd_mst_node: recovered master node to write when mounting ro to rw 1143 * @rcvrd_mst_node: recovered master node to write when mounting ro to rw
1103 * @size_tree: inode size information for recovery 1144 * @size_tree: inode size information for recovery
1104 * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY) 1145 * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY)
1146 * @always_chk_crc: always check CRCs (while mounting and remounting rw)
1105 * @mount_opts: UBIFS-specific mount options 1147 * @mount_opts: UBIFS-specific mount options
1106 * 1148 *
1107 * @dbg_buf: a buffer of LEB size used for debugging purposes 1149 * @dbg_buf: a buffer of LEB size used for debugging purposes
@@ -1146,11 +1188,11 @@ struct ubifs_info {
1146 int cmt_state; 1188 int cmt_state;
1147 spinlock_t cs_lock; 1189 spinlock_t cs_lock;
1148 wait_queue_head_t cmt_wq; 1190 wait_queue_head_t cmt_wq;
1191
1149 unsigned int fast_unmount:1; 1192 unsigned int fast_unmount:1;
1150 unsigned int big_lpt:1; 1193 unsigned int big_lpt:1;
1151 unsigned int check_lpt_free:1; 1194 unsigned int no_chk_data_crc:1;
1152 unsigned int nospace:1; 1195 unsigned int bulk_read:1;
1153 unsigned int nospace_rp:1;
1154 1196
1155 struct mutex tnc_mutex; 1197 struct mutex tnc_mutex;
1156 struct ubifs_zbranch zroot; 1198 struct ubifs_zbranch zroot;
@@ -1175,6 +1217,7 @@ struct ubifs_info {
1175 struct ubifs_mst_node *mst_node; 1217 struct ubifs_mst_node *mst_node;
1176 int mst_offs; 1218 int mst_offs;
1177 struct mutex mst_mutex; 1219 struct mutex mst_mutex;
1220 int bulk_read_buf_size;
1178 1221
1179 int log_lebs; 1222 int log_lebs;
1180 long long log_bytes; 1223 long long log_bytes;
@@ -1218,6 +1261,8 @@ struct ubifs_info {
1218 unsigned long long old_idx_sz; 1261 unsigned long long old_idx_sz;
1219 unsigned long long calc_idx_sz; 1262 unsigned long long calc_idx_sz;
1220 struct ubifs_lp_stats lst; 1263 struct ubifs_lp_stats lst;
1264 unsigned int nospace:1;
1265 unsigned int nospace_rp:1;
1221 1266
1222 int page_budget; 1267 int page_budget;
1223 int inode_budget; 1268 int inode_budget;
@@ -1294,6 +1339,7 @@ struct ubifs_info {
1294 int lpt_drty_flgs; 1339 int lpt_drty_flgs;
1295 int dirty_nn_cnt; 1340 int dirty_nn_cnt;
1296 int dirty_pn_cnt; 1341 int dirty_pn_cnt;
1342 int check_lpt_free;
1297 long long lpt_sz; 1343 long long lpt_sz;
1298 void *lpt_nod_buf; 1344 void *lpt_nod_buf;
1299 void *lpt_buf; 1345 void *lpt_buf;
@@ -1335,6 +1381,7 @@ struct ubifs_info {
1335 struct ubifs_mst_node *rcvrd_mst_node; 1381 struct ubifs_mst_node *rcvrd_mst_node;
1336 struct rb_root size_tree; 1382 struct rb_root size_tree;
1337 int remounting_rw; 1383 int remounting_rw;
1384 int always_chk_crc;
1338 struct ubifs_mount_opts mount_opts; 1385 struct ubifs_mount_opts mount_opts;
1339 1386
1340#ifdef CONFIG_UBIFS_FS_DEBUG 1387#ifdef CONFIG_UBIFS_FS_DEBUG
@@ -1347,6 +1394,12 @@ struct ubifs_info {
1347 unsigned long fail_timeout; 1394 unsigned long fail_timeout;
1348 unsigned int fail_cnt; 1395 unsigned int fail_cnt;
1349 unsigned int fail_cnt_max; 1396 unsigned int fail_cnt_max;
1397 long long chk_lpt_sz;
1398 long long chk_lpt_sz2;
1399 long long chk_lpt_wastage;
1400 int chk_lpt_lebs;
1401 int new_nhead_lnum;
1402 int new_nhead_offs;
1350#endif 1403#endif
1351}; 1404};
1352 1405
@@ -1377,7 +1430,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
1377int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, 1430int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum,
1378 int offs, int dtype); 1431 int offs, int dtype);
1379int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, 1432int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
1380 int offs, int quiet); 1433 int offs, int quiet, int chk_crc);
1381void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); 1434void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad);
1382void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last); 1435void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last);
1383int ubifs_io_init(struct ubifs_info *c); 1436int ubifs_io_init(struct ubifs_info *c);
@@ -1490,6 +1543,8 @@ void destroy_old_idx(struct ubifs_info *c);
1490int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level, 1543int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level,
1491 int lnum, int offs); 1544 int lnum, int offs);
1492int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode); 1545int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode);
1546int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu);
1547int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu);
1493 1548
1494/* tnc_misc.c */ 1549/* tnc_misc.c */
1495struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr, 1550struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr,
@@ -1586,12 +1641,10 @@ int ubifs_lpt_post_commit(struct ubifs_info *c);
1586void ubifs_lpt_free(struct ubifs_info *c, int wr_only); 1641void ubifs_lpt_free(struct ubifs_info *c, int wr_only);
1587 1642
1588/* lprops.c */ 1643/* lprops.c */
1589void ubifs_get_lprops(struct ubifs_info *c);
1590const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, 1644const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
1591 const struct ubifs_lprops *lp, 1645 const struct ubifs_lprops *lp,
1592 int free, int dirty, int flags, 1646 int free, int dirty, int flags,
1593 int idx_gc_cnt); 1647 int idx_gc_cnt);
1594void ubifs_release_lprops(struct ubifs_info *c);
1595void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats); 1648void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats);
1596void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, 1649void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
1597 int cat); 1650 int cat);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 649bec78b645..cfd31e229c89 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -446,7 +446,7 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
446 int type; 446 int type;
447 447
448 xent = ubifs_tnc_next_ent(c, &key, &nm); 448 xent = ubifs_tnc_next_ent(c, &key, &nm);
449 if (unlikely(IS_ERR(xent))) { 449 if (IS_ERR(xent)) {
450 err = PTR_ERR(xent); 450 err = PTR_ERR(xent);
451 break; 451 break;
452 } 452 }
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 7227b2efef22..e39013619b26 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1323,7 +1323,7 @@ xfs_fs_remount(
1323 "XFS: mount option \"%s\" not supported for remount\n", p); 1323 "XFS: mount option \"%s\" not supported for remount\n", p);
1324 return -EINVAL; 1324 return -EINVAL;
1325#else 1325#else
1326 return 0; 1326 break;
1327#endif 1327#endif
1328 } 1328 }
1329 } 1329 }