diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-10-24 06:48:46 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-10-24 06:48:46 -0400 |
commit | 8c82a17e9c924c0e9f13e75e4c2f6bca19a4b516 (patch) | |
tree | d535f46a917e14e90deccb29ad00aac016ad18dd /fs | |
parent | 4ce72a2c063a7fa8e42a9435440ae3364115a58d (diff) | |
parent | 57f8f7b60db6f1ed2c6918ab9230c4623a9dbe37 (diff) |
Merge commit 'v2.6.28-rc1' into sched/urgent
Diffstat (limited to 'fs')
186 files changed, 4929 insertions, 3793 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index c061c3f18e7c..24eb01087b6d 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c | |||
@@ -30,8 +30,8 @@ | |||
30 | #include <linux/parser.h> | 30 | #include <linux/parser.h> |
31 | #include <linux/idr.h> | 31 | #include <linux/idr.h> |
32 | #include <net/9p/9p.h> | 32 | #include <net/9p/9p.h> |
33 | #include <net/9p/transport.h> | ||
34 | #include <net/9p/client.h> | 33 | #include <net/9p/client.h> |
34 | #include <net/9p/transport.h> | ||
35 | #include "v9fs.h" | 35 | #include "v9fs.h" |
36 | #include "v9fs_vfs.h" | 36 | #include "v9fs_vfs.h" |
37 | 37 | ||
@@ -234,7 +234,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, | |||
234 | if (!v9ses->clnt->dotu) | 234 | if (!v9ses->clnt->dotu) |
235 | v9ses->flags &= ~V9FS_EXTENDED; | 235 | v9ses->flags &= ~V9FS_EXTENDED; |
236 | 236 | ||
237 | v9ses->maxdata = v9ses->clnt->msize; | 237 | v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ; |
238 | 238 | ||
239 | /* for legacy mode, fall back to V9FS_ACCESS_ANY */ | 239 | /* for legacy mode, fall back to V9FS_ACCESS_ANY */ |
240 | if (!v9fs_extended(v9ses) && | 240 | if (!v9fs_extended(v9ses) && |
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 57997fa14e69..c295ba786edd 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h | |||
@@ -46,9 +46,11 @@ extern struct dentry_operations v9fs_cached_dentry_operations; | |||
46 | 46 | ||
47 | struct inode *v9fs_get_inode(struct super_block *sb, int mode); | 47 | struct inode *v9fs_get_inode(struct super_block *sb, int mode); |
48 | ino_t v9fs_qid2ino(struct p9_qid *qid); | 48 | ino_t v9fs_qid2ino(struct p9_qid *qid); |
49 | void v9fs_stat2inode(struct p9_stat *, struct inode *, struct super_block *); | 49 | void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); |
50 | int v9fs_dir_release(struct inode *inode, struct file *filp); | 50 | int v9fs_dir_release(struct inode *inode, struct file *filp); |
51 | int v9fs_file_open(struct inode *inode, struct file *file); | 51 | int v9fs_file_open(struct inode *inode, struct file *file); |
52 | void v9fs_inode2stat(struct inode *inode, struct p9_stat *stat); | 52 | void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat); |
53 | void v9fs_dentry_release(struct dentry *); | 53 | void v9fs_dentry_release(struct dentry *); |
54 | int v9fs_uflags2omode(int uflags, int extended); | 54 | int v9fs_uflags2omode(int uflags, int extended); |
55 | |||
56 | ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64); | ||
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 97d3aed57983..6fcb1e7095cf 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c | |||
@@ -38,7 +38,6 @@ | |||
38 | 38 | ||
39 | #include "v9fs.h" | 39 | #include "v9fs.h" |
40 | #include "v9fs_vfs.h" | 40 | #include "v9fs_vfs.h" |
41 | #include "fid.h" | ||
42 | 41 | ||
43 | /** | 42 | /** |
44 | * v9fs_vfs_readpage - read an entire page in from 9P | 43 | * v9fs_vfs_readpage - read an entire page in from 9P |
@@ -53,14 +52,12 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page) | |||
53 | int retval; | 52 | int retval; |
54 | loff_t offset; | 53 | loff_t offset; |
55 | char *buffer; | 54 | char *buffer; |
56 | struct p9_fid *fid; | ||
57 | 55 | ||
58 | P9_DPRINTK(P9_DEBUG_VFS, "\n"); | 56 | P9_DPRINTK(P9_DEBUG_VFS, "\n"); |
59 | fid = filp->private_data; | ||
60 | buffer = kmap(page); | 57 | buffer = kmap(page); |
61 | offset = page_offset(page); | 58 | offset = page_offset(page); |
62 | 59 | ||
63 | retval = p9_client_readn(fid, buffer, offset, PAGE_CACHE_SIZE); | 60 | retval = v9fs_file_readn(filp, buffer, NULL, offset, PAGE_CACHE_SIZE); |
64 | if (retval < 0) | 61 | if (retval < 0) |
65 | goto done; | 62 | goto done; |
66 | 63 | ||
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index e298fe194093..873cd31baa47 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c | |||
@@ -45,7 +45,7 @@ | |||
45 | * | 45 | * |
46 | */ | 46 | */ |
47 | 47 | ||
48 | static inline int dt_type(struct p9_stat *mistat) | 48 | static inline int dt_type(struct p9_wstat *mistat) |
49 | { | 49 | { |
50 | unsigned long perm = mistat->mode; | 50 | unsigned long perm = mistat->mode; |
51 | int rettype = DT_REG; | 51 | int rettype = DT_REG; |
@@ -69,32 +69,58 @@ static inline int dt_type(struct p9_stat *mistat) | |||
69 | static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) | 69 | static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) |
70 | { | 70 | { |
71 | int over; | 71 | int over; |
72 | struct p9_wstat st; | ||
73 | int err; | ||
72 | struct p9_fid *fid; | 74 | struct p9_fid *fid; |
73 | struct v9fs_session_info *v9ses; | 75 | int buflen; |
74 | struct inode *inode; | 76 | char *statbuf; |
75 | struct p9_stat *st; | 77 | int n, i = 0; |
76 | 78 | ||
77 | P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); | 79 | P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); |
78 | inode = filp->f_path.dentry->d_inode; | ||
79 | v9ses = v9fs_inode2v9ses(inode); | ||
80 | fid = filp->private_data; | 80 | fid = filp->private_data; |
81 | while ((st = p9_client_dirread(fid, filp->f_pos)) != NULL) { | ||
82 | if (IS_ERR(st)) | ||
83 | return PTR_ERR(st); | ||
84 | 81 | ||
85 | over = filldir(dirent, st->name.str, st->name.len, filp->f_pos, | 82 | buflen = fid->clnt->msize - P9_IOHDRSZ; |
86 | v9fs_qid2ino(&st->qid), dt_type(st)); | 83 | statbuf = kmalloc(buflen, GFP_KERNEL); |
84 | if (!statbuf) | ||
85 | return -ENOMEM; | ||
87 | 86 | ||
88 | if (over) | 87 | while (1) { |
88 | err = v9fs_file_readn(filp, statbuf, NULL, buflen, | ||
89 | fid->rdir_fpos); | ||
90 | if (err <= 0) | ||
89 | break; | 91 | break; |
90 | 92 | ||
91 | filp->f_pos += st->size; | 93 | n = err; |
92 | kfree(st); | 94 | while (i < n) { |
93 | st = NULL; | 95 | err = p9stat_read(statbuf + i, buflen-i, &st, |
96 | fid->clnt->dotu); | ||
97 | if (err) { | ||
98 | P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); | ||
99 | err = -EIO; | ||
100 | p9stat_free(&st); | ||
101 | goto free_and_exit; | ||
102 | } | ||
103 | |||
104 | i += st.size+2; | ||
105 | fid->rdir_fpos += st.size+2; | ||
106 | |||
107 | over = filldir(dirent, st.name, strlen(st.name), | ||
108 | filp->f_pos, v9fs_qid2ino(&st.qid), dt_type(&st)); | ||
109 | |||
110 | filp->f_pos += st.size+2; | ||
111 | |||
112 | p9stat_free(&st); | ||
113 | |||
114 | if (over) { | ||
115 | err = 0; | ||
116 | goto free_and_exit; | ||
117 | } | ||
118 | } | ||
94 | } | 119 | } |
95 | 120 | ||
96 | kfree(st); | 121 | free_and_exit: |
97 | return 0; | 122 | kfree(statbuf); |
123 | return err; | ||
98 | } | 124 | } |
99 | 125 | ||
100 | 126 | ||
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 52944d2249a4..68bf2af6c389 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c | |||
@@ -120,23 +120,72 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) | |||
120 | } | 120 | } |
121 | 121 | ||
122 | /** | 122 | /** |
123 | * v9fs_file_read - read from a file | 123 | * v9fs_file_readn - read from a file |
124 | * @filp: file pointer to read | 124 | * @filp: file pointer to read |
125 | * @data: data buffer to read data into | 125 | * @data: data buffer to read data into |
126 | * @udata: user data buffer to read data into | ||
126 | * @count: size of buffer | 127 | * @count: size of buffer |
127 | * @offset: offset at which to read data | 128 | * @offset: offset at which to read data |
128 | * | 129 | * |
129 | */ | 130 | */ |
131 | |||
132 | ssize_t | ||
133 | v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count, | ||
134 | u64 offset) | ||
135 | { | ||
136 | int n, total; | ||
137 | struct p9_fid *fid = filp->private_data; | ||
138 | |||
139 | P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid, | ||
140 | (long long unsigned) offset, count); | ||
141 | |||
142 | n = 0; | ||
143 | total = 0; | ||
144 | do { | ||
145 | n = p9_client_read(fid, data, udata, offset, count); | ||
146 | if (n <= 0) | ||
147 | break; | ||
148 | |||
149 | if (data) | ||
150 | data += n; | ||
151 | if (udata) | ||
152 | udata += n; | ||
153 | |||
154 | offset += n; | ||
155 | count -= n; | ||
156 | total += n; | ||
157 | } while (count > 0 && n == (fid->clnt->msize - P9_IOHDRSZ)); | ||
158 | |||
159 | if (n < 0) | ||
160 | total = n; | ||
161 | |||
162 | return total; | ||
163 | } | ||
164 | |||
165 | /** | ||
166 | * v9fs_file_read - read from a file | ||
167 | * @filp: file pointer to read | ||
168 | * @udata: user data buffer to read data into | ||
169 | * @count: size of buffer | ||
170 | * @offset: offset at which to read data | ||
171 | * | ||
172 | */ | ||
173 | |||
130 | static ssize_t | 174 | static ssize_t |
131 | v9fs_file_read(struct file *filp, char __user * data, size_t count, | 175 | v9fs_file_read(struct file *filp, char __user *udata, size_t count, |
132 | loff_t * offset) | 176 | loff_t * offset) |
133 | { | 177 | { |
134 | int ret; | 178 | int ret; |
135 | struct p9_fid *fid; | 179 | struct p9_fid *fid; |
136 | 180 | ||
137 | P9_DPRINTK(P9_DEBUG_VFS, "\n"); | 181 | P9_DPRINTK(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset); |
138 | fid = filp->private_data; | 182 | fid = filp->private_data; |
139 | ret = p9_client_uread(fid, data, *offset, count); | 183 | |
184 | if (count > (fid->clnt->msize - P9_IOHDRSZ)) | ||
185 | ret = v9fs_file_readn(filp, NULL, udata, count, *offset); | ||
186 | else | ||
187 | ret = p9_client_read(fid, NULL, udata, *offset, count); | ||
188 | |||
140 | if (ret > 0) | 189 | if (ret > 0) |
141 | *offset += ret; | 190 | *offset += ret; |
142 | 191 | ||
@@ -156,19 +205,38 @@ static ssize_t | |||
156 | v9fs_file_write(struct file *filp, const char __user * data, | 205 | v9fs_file_write(struct file *filp, const char __user * data, |
157 | size_t count, loff_t * offset) | 206 | size_t count, loff_t * offset) |
158 | { | 207 | { |
159 | int ret; | 208 | int n, rsize, total = 0; |
160 | struct p9_fid *fid; | 209 | struct p9_fid *fid; |
210 | struct p9_client *clnt; | ||
161 | struct inode *inode = filp->f_path.dentry->d_inode; | 211 | struct inode *inode = filp->f_path.dentry->d_inode; |
212 | int origin = *offset; | ||
162 | 213 | ||
163 | P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data, | 214 | P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data, |
164 | (int)count, (int)*offset); | 215 | (int)count, (int)*offset); |
165 | 216 | ||
166 | fid = filp->private_data; | 217 | fid = filp->private_data; |
167 | ret = p9_client_uwrite(fid, data, *offset, count); | 218 | clnt = fid->clnt; |
168 | if (ret > 0) { | 219 | |
169 | invalidate_inode_pages2_range(inode->i_mapping, *offset, | 220 | rsize = fid->iounit; |
170 | *offset+ret); | 221 | if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) |
171 | *offset += ret; | 222 | rsize = clnt->msize - P9_IOHDRSZ; |
223 | |||
224 | do { | ||
225 | if (count < rsize) | ||
226 | rsize = count; | ||
227 | |||
228 | n = p9_client_write(fid, NULL, data+total, *offset+total, | ||
229 | rsize); | ||
230 | if (n <= 0) | ||
231 | break; | ||
232 | count -= n; | ||
233 | total += n; | ||
234 | } while (count > 0); | ||
235 | |||
236 | if (total > 0) { | ||
237 | invalidate_inode_pages2_range(inode->i_mapping, origin, | ||
238 | origin+total); | ||
239 | *offset += total; | ||
172 | } | 240 | } |
173 | 241 | ||
174 | if (*offset > inode->i_size) { | 242 | if (*offset > inode->i_size) { |
@@ -176,7 +244,10 @@ v9fs_file_write(struct file *filp, const char __user * data, | |||
176 | inode->i_blocks = (inode->i_size + 512 - 1) >> 9; | 244 | inode->i_blocks = (inode->i_size + 512 - 1) >> 9; |
177 | } | 245 | } |
178 | 246 | ||
179 | return ret; | 247 | if (n < 0) |
248 | return n; | ||
249 | |||
250 | return total; | ||
180 | } | 251 | } |
181 | 252 | ||
182 | static const struct file_operations v9fs_cached_file_operations = { | 253 | static const struct file_operations v9fs_cached_file_operations = { |
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index e83aa5ebe861..8314d3f43b71 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -334,7 +334,7 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, | |||
334 | { | 334 | { |
335 | int err, umode; | 335 | int err, umode; |
336 | struct inode *ret; | 336 | struct inode *ret; |
337 | struct p9_stat *st; | 337 | struct p9_wstat *st; |
338 | 338 | ||
339 | ret = NULL; | 339 | ret = NULL; |
340 | st = p9_client_stat(fid); | 340 | st = p9_client_stat(fid); |
@@ -417,6 +417,8 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
417 | struct p9_fid *dfid, *ofid, *fid; | 417 | struct p9_fid *dfid, *ofid, *fid; |
418 | struct inode *inode; | 418 | struct inode *inode; |
419 | 419 | ||
420 | P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); | ||
421 | |||
420 | err = 0; | 422 | err = 0; |
421 | ofid = NULL; | 423 | ofid = NULL; |
422 | fid = NULL; | 424 | fid = NULL; |
@@ -424,6 +426,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
424 | dfid = v9fs_fid_clone(dentry->d_parent); | 426 | dfid = v9fs_fid_clone(dentry->d_parent); |
425 | if (IS_ERR(dfid)) { | 427 | if (IS_ERR(dfid)) { |
426 | err = PTR_ERR(dfid); | 428 | err = PTR_ERR(dfid); |
429 | P9_DPRINTK(P9_DEBUG_VFS, "fid clone failed %d\n", err); | ||
427 | dfid = NULL; | 430 | dfid = NULL; |
428 | goto error; | 431 | goto error; |
429 | } | 432 | } |
@@ -432,18 +435,22 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
432 | ofid = p9_client_walk(dfid, 0, NULL, 1); | 435 | ofid = p9_client_walk(dfid, 0, NULL, 1); |
433 | if (IS_ERR(ofid)) { | 436 | if (IS_ERR(ofid)) { |
434 | err = PTR_ERR(ofid); | 437 | err = PTR_ERR(ofid); |
438 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); | ||
435 | ofid = NULL; | 439 | ofid = NULL; |
436 | goto error; | 440 | goto error; |
437 | } | 441 | } |
438 | 442 | ||
439 | err = p9_client_fcreate(ofid, name, perm, mode, extension); | 443 | err = p9_client_fcreate(ofid, name, perm, mode, extension); |
440 | if (err < 0) | 444 | if (err < 0) { |
445 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_fcreate failed %d\n", err); | ||
441 | goto error; | 446 | goto error; |
447 | } | ||
442 | 448 | ||
443 | /* now walk from the parent so we can get unopened fid */ | 449 | /* now walk from the parent so we can get unopened fid */ |
444 | fid = p9_client_walk(dfid, 1, &name, 0); | 450 | fid = p9_client_walk(dfid, 1, &name, 0); |
445 | if (IS_ERR(fid)) { | 451 | if (IS_ERR(fid)) { |
446 | err = PTR_ERR(fid); | 452 | err = PTR_ERR(fid); |
453 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); | ||
447 | fid = NULL; | 454 | fid = NULL; |
448 | goto error; | 455 | goto error; |
449 | } else | 456 | } else |
@@ -453,6 +460,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
453 | inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); | 460 | inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); |
454 | if (IS_ERR(inode)) { | 461 | if (IS_ERR(inode)) { |
455 | err = PTR_ERR(inode); | 462 | err = PTR_ERR(inode); |
463 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); | ||
456 | goto error; | 464 | goto error; |
457 | } | 465 | } |
458 | 466 | ||
@@ -734,7 +742,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
734 | int err; | 742 | int err; |
735 | struct v9fs_session_info *v9ses; | 743 | struct v9fs_session_info *v9ses; |
736 | struct p9_fid *fid; | 744 | struct p9_fid *fid; |
737 | struct p9_stat *st; | 745 | struct p9_wstat *st; |
738 | 746 | ||
739 | P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); | 747 | P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); |
740 | err = -EPERM; | 748 | err = -EPERM; |
@@ -815,10 +823,9 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) | |||
815 | */ | 823 | */ |
816 | 824 | ||
817 | void | 825 | void |
818 | v9fs_stat2inode(struct p9_stat *stat, struct inode *inode, | 826 | v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, |
819 | struct super_block *sb) | 827 | struct super_block *sb) |
820 | { | 828 | { |
821 | int n; | ||
822 | char ext[32]; | 829 | char ext[32]; |
823 | struct v9fs_session_info *v9ses = sb->s_fs_info; | 830 | struct v9fs_session_info *v9ses = sb->s_fs_info; |
824 | 831 | ||
@@ -842,11 +849,7 @@ v9fs_stat2inode(struct p9_stat *stat, struct inode *inode, | |||
842 | int major = -1; | 849 | int major = -1; |
843 | int minor = -1; | 850 | int minor = -1; |
844 | 851 | ||
845 | n = stat->extension.len; | 852 | strncpy(ext, stat->extension, sizeof(ext)); |
846 | if (n > sizeof(ext)-1) | ||
847 | n = sizeof(ext)-1; | ||
848 | memmove(ext, stat->extension.str, n); | ||
849 | ext[n] = 0; | ||
850 | sscanf(ext, "%c %u %u", &type, &major, &minor); | 853 | sscanf(ext, "%c %u %u", &type, &major, &minor); |
851 | switch (type) { | 854 | switch (type) { |
852 | case 'c': | 855 | case 'c': |
@@ -857,10 +860,11 @@ v9fs_stat2inode(struct p9_stat *stat, struct inode *inode, | |||
857 | break; | 860 | break; |
858 | default: | 861 | default: |
859 | P9_DPRINTK(P9_DEBUG_ERROR, | 862 | P9_DPRINTK(P9_DEBUG_ERROR, |
860 | "Unknown special type %c (%.*s)\n", type, | 863 | "Unknown special type %c %s\n", type, |
861 | stat->extension.len, stat->extension.str); | 864 | stat->extension); |
862 | }; | 865 | }; |
863 | inode->i_rdev = MKDEV(major, minor); | 866 | inode->i_rdev = MKDEV(major, minor); |
867 | init_special_inode(inode, inode->i_mode, inode->i_rdev); | ||
864 | } else | 868 | } else |
865 | inode->i_rdev = 0; | 869 | inode->i_rdev = 0; |
866 | 870 | ||
@@ -904,7 +908,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) | |||
904 | 908 | ||
905 | struct v9fs_session_info *v9ses; | 909 | struct v9fs_session_info *v9ses; |
906 | struct p9_fid *fid; | 910 | struct p9_fid *fid; |
907 | struct p9_stat *st; | 911 | struct p9_wstat *st; |
908 | 912 | ||
909 | P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name); | 913 | P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name); |
910 | retval = -EPERM; | 914 | retval = -EPERM; |
@@ -926,15 +930,10 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) | |||
926 | } | 930 | } |
927 | 931 | ||
928 | /* copy extension buffer into buffer */ | 932 | /* copy extension buffer into buffer */ |
929 | if (st->extension.len < buflen) | 933 | strncpy(buffer, st->extension, buflen); |
930 | buflen = st->extension.len + 1; | ||
931 | |||
932 | memmove(buffer, st->extension.str, buflen - 1); | ||
933 | buffer[buflen-1] = 0; | ||
934 | 934 | ||
935 | P9_DPRINTK(P9_DEBUG_VFS, | 935 | P9_DPRINTK(P9_DEBUG_VFS, |
936 | "%s -> %.*s (%s)\n", dentry->d_name.name, st->extension.len, | 936 | "%s -> %s (%s)\n", dentry->d_name.name, st->extension, buffer); |
937 | st->extension.str, buffer); | ||
938 | 937 | ||
939 | retval = buflen; | 938 | retval = buflen; |
940 | 939 | ||
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index bf59c3960494..d6cb1a0ca724 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c | |||
@@ -111,7 +111,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, | |||
111 | struct inode *inode = NULL; | 111 | struct inode *inode = NULL; |
112 | struct dentry *root = NULL; | 112 | struct dentry *root = NULL; |
113 | struct v9fs_session_info *v9ses = NULL; | 113 | struct v9fs_session_info *v9ses = NULL; |
114 | struct p9_stat *st = NULL; | 114 | struct p9_wstat *st = NULL; |
115 | int mode = S_IRWXUGO | S_ISVTX; | 115 | int mode = S_IRWXUGO | S_ISVTX; |
116 | uid_t uid = current->fsuid; | 116 | uid_t uid = current->fsuid; |
117 | gid_t gid = current->fsgid; | 117 | gid_t gid = current->fsgid; |
@@ -161,10 +161,14 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, | |||
161 | 161 | ||
162 | sb->s_root = root; | 162 | sb->s_root = root; |
163 | root->d_inode->i_ino = v9fs_qid2ino(&st->qid); | 163 | root->d_inode->i_ino = v9fs_qid2ino(&st->qid); |
164 | |||
164 | v9fs_stat2inode(st, root->d_inode, sb); | 165 | v9fs_stat2inode(st, root->d_inode, sb); |
166 | |||
165 | v9fs_fid_add(root, fid); | 167 | v9fs_fid_add(root, fid); |
168 | p9stat_free(st); | ||
166 | kfree(st); | 169 | kfree(st); |
167 | 170 | ||
171 | P9_DPRINTK(P9_DEBUG_VFS, " return simple set mount\n"); | ||
168 | return simple_set_mnt(mnt, sb); | 172 | return simple_set_mnt(mnt, sb); |
169 | 173 | ||
170 | release_sb: | 174 | release_sb: |
diff --git a/fs/Kconfig b/fs/Kconfig index 9e9d70c02a07..522469a7eca3 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -6,61 +6,9 @@ menu "File systems" | |||
6 | 6 | ||
7 | if BLOCK | 7 | if BLOCK |
8 | 8 | ||
9 | config EXT2_FS | 9 | source "fs/ext2/Kconfig" |
10 | tristate "Second extended fs support" | 10 | source "fs/ext3/Kconfig" |
11 | help | 11 | source "fs/ext4/Kconfig" |
12 | Ext2 is a standard Linux file system for hard disks. | ||
13 | |||
14 | To compile this file system support as a module, choose M here: the | ||
15 | module will be called ext2. | ||
16 | |||
17 | If unsure, say Y. | ||
18 | |||
19 | config EXT2_FS_XATTR | ||
20 | bool "Ext2 extended attributes" | ||
21 | depends on EXT2_FS | ||
22 | help | ||
23 | Extended attributes are name:value pairs associated with inodes by | ||
24 | the kernel or by users (see the attr(5) manual page, or visit | ||
25 | <http://acl.bestbits.at/> for details). | ||
26 | |||
27 | If unsure, say N. | ||
28 | |||
29 | config EXT2_FS_POSIX_ACL | ||
30 | bool "Ext2 POSIX Access Control Lists" | ||
31 | depends on EXT2_FS_XATTR | ||
32 | select FS_POSIX_ACL | ||
33 | help | ||
34 | Posix Access Control Lists (ACLs) support permissions for users and | ||
35 | groups beyond the owner/group/world scheme. | ||
36 | |||
37 | To learn more about Access Control Lists, visit the Posix ACLs for | ||
38 | Linux website <http://acl.bestbits.at/>. | ||
39 | |||
40 | If you don't know what Access Control Lists are, say N | ||
41 | |||
42 | config EXT2_FS_SECURITY | ||
43 | bool "Ext2 Security Labels" | ||
44 | depends on EXT2_FS_XATTR | ||
45 | help | ||
46 | Security labels support alternative access control models | ||
47 | implemented by security modules like SELinux. This option | ||
48 | enables an extended attribute handler for file security | ||
49 | labels in the ext2 filesystem. | ||
50 | |||
51 | If you are not using a security module that requires using | ||
52 | extended attributes for file security labels, say N. | ||
53 | |||
54 | config EXT2_FS_XIP | ||
55 | bool "Ext2 execute in place support" | ||
56 | depends on EXT2_FS && MMU | ||
57 | help | ||
58 | Execute in place can be used on memory-backed block devices. If you | ||
59 | enable this option, you can select to mount block devices which are | ||
60 | capable of this feature without using the page cache. | ||
61 | |||
62 | If you do not use a block device that is capable of using this, | ||
63 | or if unsure, say N. | ||
64 | 12 | ||
65 | config FS_XIP | 13 | config FS_XIP |
66 | # execute in place | 14 | # execute in place |
@@ -68,225 +16,16 @@ config FS_XIP | |||
68 | depends on EXT2_FS_XIP | 16 | depends on EXT2_FS_XIP |
69 | default y | 17 | default y |
70 | 18 | ||
71 | config EXT3_FS | 19 | source "fs/jbd/Kconfig" |
72 | tristate "Ext3 journalling file system support" | 20 | source "fs/jbd2/Kconfig" |
73 | select JBD | ||
74 | help | ||
75 | This is the journalling version of the Second extended file system | ||
76 | (often called ext3), the de facto standard Linux file system | ||
77 | (method to organize files on a storage device) for hard disks. | ||
78 | |||
79 | The journalling code included in this driver means you do not have | ||
80 | to run e2fsck (file system checker) on your file systems after a | ||
81 | crash. The journal keeps track of any changes that were being made | ||
82 | at the time the system crashed, and can ensure that your file system | ||
83 | is consistent without the need for a lengthy check. | ||
84 | |||
85 | Other than adding the journal to the file system, the on-disk format | ||
86 | of ext3 is identical to ext2. It is possible to freely switch | ||
87 | between using the ext3 driver and the ext2 driver, as long as the | ||
88 | file system has been cleanly unmounted, or e2fsck is run on the file | ||
89 | system. | ||
90 | |||
91 | To add a journal on an existing ext2 file system or change the | ||
92 | behavior of ext3 file systems, you can use the tune2fs utility ("man | ||
93 | tune2fs"). To modify attributes of files and directories on ext3 | ||
94 | file systems, use chattr ("man chattr"). You need to be using | ||
95 | e2fsprogs version 1.20 or later in order to create ext3 journals | ||
96 | (available at <http://sourceforge.net/projects/e2fsprogs/>). | ||
97 | |||
98 | To compile this file system support as a module, choose M here: the | ||
99 | module will be called ext3. | ||
100 | |||
101 | config EXT3_FS_XATTR | ||
102 | bool "Ext3 extended attributes" | ||
103 | depends on EXT3_FS | ||
104 | default y | ||
105 | help | ||
106 | Extended attributes are name:value pairs associated with inodes by | ||
107 | the kernel or by users (see the attr(5) manual page, or visit | ||
108 | <http://acl.bestbits.at/> for details). | ||
109 | |||
110 | If unsure, say N. | ||
111 | |||
112 | You need this for POSIX ACL support on ext3. | ||
113 | |||
114 | config EXT3_FS_POSIX_ACL | ||
115 | bool "Ext3 POSIX Access Control Lists" | ||
116 | depends on EXT3_FS_XATTR | ||
117 | select FS_POSIX_ACL | ||
118 | help | ||
119 | Posix Access Control Lists (ACLs) support permissions for users and | ||
120 | groups beyond the owner/group/world scheme. | ||
121 | |||
122 | To learn more about Access Control Lists, visit the Posix ACLs for | ||
123 | Linux website <http://acl.bestbits.at/>. | ||
124 | |||
125 | If you don't know what Access Control Lists are, say N | ||
126 | |||
127 | config EXT3_FS_SECURITY | ||
128 | bool "Ext3 Security Labels" | ||
129 | depends on EXT3_FS_XATTR | ||
130 | help | ||
131 | Security labels support alternative access control models | ||
132 | implemented by security modules like SELinux. This option | ||
133 | enables an extended attribute handler for file security | ||
134 | labels in the ext3 filesystem. | ||
135 | |||
136 | If you are not using a security module that requires using | ||
137 | extended attributes for file security labels, say N. | ||
138 | |||
139 | config EXT4_FS | ||
140 | tristate "The Extended 4 (ext4) filesystem" | ||
141 | select JBD2 | ||
142 | select CRC16 | ||
143 | help | ||
144 | This is the next generation of the ext3 filesystem. | ||
145 | |||
146 | Unlike the change from ext2 filesystem to ext3 filesystem, | ||
147 | the on-disk format of ext4 is not forwards compatible with | ||
148 | ext3; it is based on extent maps and it supports 48-bit | ||
149 | physical block numbers. The ext4 filesystem also supports delayed | ||
150 | allocation, persistent preallocation, high resolution time stamps, | ||
151 | and a number of other features to improve performance and speed | ||
152 | up fsck time. For more information, please see the web pages at | ||
153 | http://ext4.wiki.kernel.org. | ||
154 | |||
155 | The ext4 filesystem will support mounting an ext3 | ||
156 | filesystem; while there will be some performance gains from | ||
157 | the delayed allocation and inode table readahead, the best | ||
158 | performance gains will require enabling ext4 features in the | ||
159 | filesystem, or formating a new filesystem as an ext4 | ||
160 | filesystem initially. | ||
161 | |||
162 | To compile this file system support as a module, choose M here. The | ||
163 | module will be called ext4dev. | ||
164 | |||
165 | If unsure, say N. | ||
166 | |||
167 | config EXT4DEV_COMPAT | ||
168 | bool "Enable ext4dev compatibility" | ||
169 | depends on EXT4_FS | ||
170 | help | ||
171 | Starting with 2.6.28, the name of the ext4 filesystem was | ||
172 | renamed from ext4dev to ext4. Unfortunately there are some | ||
173 | legacy userspace programs (such as klibc's fstype) have | ||
174 | "ext4dev" hardcoded. | ||
175 | |||
176 | To enable backwards compatibility so that systems that are | ||
177 | still expecting to mount ext4 filesystems using ext4dev, | ||
178 | chose Y here. This feature will go away by 2.6.31, so | ||
179 | please arrange to get your userspace programs fixed! | ||
180 | |||
181 | config EXT4_FS_XATTR | ||
182 | bool "Ext4 extended attributes" | ||
183 | depends on EXT4_FS | ||
184 | default y | ||
185 | help | ||
186 | Extended attributes are name:value pairs associated with inodes by | ||
187 | the kernel or by users (see the attr(5) manual page, or visit | ||
188 | <http://acl.bestbits.at/> for details). | ||
189 | |||
190 | If unsure, say N. | ||
191 | |||
192 | You need this for POSIX ACL support on ext4. | ||
193 | |||
194 | config EXT4_FS_POSIX_ACL | ||
195 | bool "Ext4 POSIX Access Control Lists" | ||
196 | depends on EXT4_FS_XATTR | ||
197 | select FS_POSIX_ACL | ||
198 | help | ||
199 | POSIX Access Control Lists (ACLs) support permissions for users and | ||
200 | groups beyond the owner/group/world scheme. | ||
201 | |||
202 | To learn more about Access Control Lists, visit the POSIX ACLs for | ||
203 | Linux website <http://acl.bestbits.at/>. | ||
204 | |||
205 | If you don't know what Access Control Lists are, say N | ||
206 | |||
207 | config EXT4_FS_SECURITY | ||
208 | bool "Ext4 Security Labels" | ||
209 | depends on EXT4_FS_XATTR | ||
210 | help | ||
211 | Security labels support alternative access control models | ||
212 | implemented by security modules like SELinux. This option | ||
213 | enables an extended attribute handler for file security | ||
214 | labels in the ext4 filesystem. | ||
215 | |||
216 | If you are not using a security module that requires using | ||
217 | extended attributes for file security labels, say N. | ||
218 | |||
219 | config JBD | ||
220 | tristate | ||
221 | help | ||
222 | This is a generic journalling layer for block devices. It is | ||
223 | currently used by the ext3 file system, but it could also be | ||
224 | used to add journal support to other file systems or block | ||
225 | devices such as RAID or LVM. | ||
226 | |||
227 | If you are using the ext3 file system, you need to say Y here. | ||
228 | If you are not using ext3 then you will probably want to say N. | ||
229 | |||
230 | To compile this device as a module, choose M here: the module will be | ||
231 | called jbd. If you are compiling ext3 into the kernel, you | ||
232 | cannot compile this code as a module. | ||
233 | |||
234 | config JBD_DEBUG | ||
235 | bool "JBD (ext3) debugging support" | ||
236 | depends on JBD && DEBUG_FS | ||
237 | help | ||
238 | If you are using the ext3 journaled file system (or potentially any | ||
239 | other file system/device using JBD), this option allows you to | ||
240 | enable debugging output while the system is running, in order to | ||
241 | help track down any problems you are having. By default the | ||
242 | debugging output will be turned off. | ||
243 | |||
244 | If you select Y here, then you will be able to turn on debugging | ||
245 | with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a | ||
246 | number between 1 and 5, the higher the number, the more debugging | ||
247 | output is generated. To turn debugging off again, do | ||
248 | "echo 0 > /sys/kernel/debug/jbd/jbd-debug". | ||
249 | |||
250 | config JBD2 | ||
251 | tristate | ||
252 | select CRC32 | ||
253 | help | ||
254 | This is a generic journaling layer for block devices that support | ||
255 | both 32-bit and 64-bit block numbers. It is currently used by | ||
256 | the ext4 and OCFS2 filesystems, but it could also be used to add | ||
257 | journal support to other file systems or block devices such | ||
258 | as RAID or LVM. | ||
259 | |||
260 | If you are using ext4 or OCFS2, you need to say Y here. | ||
261 | If you are not using ext4 or OCFS2 then you will | ||
262 | probably want to say N. | ||
263 | |||
264 | To compile this device as a module, choose M here. The module will be | ||
265 | called jbd2. If you are compiling ext4 or OCFS2 into the kernel, | ||
266 | you cannot compile this code as a module. | ||
267 | |||
268 | config JBD2_DEBUG | ||
269 | bool "JBD2 (ext4) debugging support" | ||
270 | depends on JBD2 && DEBUG_FS | ||
271 | help | ||
272 | If you are using the ext4 journaled file system (or | ||
273 | potentially any other filesystem/device using JBD2), this option | ||
274 | allows you to enable debugging output while the system is running, | ||
275 | in order to help track down any problems you are having. | ||
276 | By default, the debugging output will be turned off. | ||
277 | |||
278 | If you select Y here, then you will be able to turn on debugging | ||
279 | with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a | ||
280 | number between 1 and 5. The higher the number, the more debugging | ||
281 | output is generated. To turn debugging off again, do | ||
282 | "echo 0 > /sys/kernel/debug/jbd2/jbd2-debug". | ||
283 | 21 | ||
284 | config FS_MBCACHE | 22 | config FS_MBCACHE |
285 | # Meta block cache for Extended Attributes (ext2/ext3/ext4) | 23 | # Meta block cache for Extended Attributes (ext2/ext3/ext4) |
286 | tristate | 24 | tristate |
287 | depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS_XATTR | 25 | default y if EXT2_FS=y && EXT2_FS_XATTR |
288 | default y if EXT2_FS=y || EXT3_FS=y || EXT4_FS=y | 26 | default y if EXT3_FS=y && EXT3_FS_XATTR |
289 | default m if EXT2_FS=m || EXT3_FS=m || EXT4_FS=m | 27 | default y if EXT4_FS=y && EXT4_FS_XATTR |
28 | default m if EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS_XATTR | ||
290 | 29 | ||
291 | config REISERFS_FS | 30 | config REISERFS_FS |
292 | tristate "Reiserfs support" | 31 | tristate "Reiserfs support" |
@@ -665,7 +404,7 @@ config AUTOFS4_FS | |||
665 | N here. | 404 | N here. |
666 | 405 | ||
667 | config FUSE_FS | 406 | config FUSE_FS |
668 | tristate "Filesystem in Userspace support" | 407 | tristate "FUSE (Filesystem in Userspace) support" |
669 | help | 408 | help |
670 | With FUSE it is possible to implement a fully functional filesystem | 409 | With FUSE it is possible to implement a fully functional filesystem |
671 | in a userspace program. | 410 | in a userspace program. |
@@ -1168,195 +907,7 @@ config EFS_FS | |||
1168 | To compile the EFS file system support as a module, choose M here: the | 907 | To compile the EFS file system support as a module, choose M here: the |
1169 | module will be called efs. | 908 | module will be called efs. |
1170 | 909 | ||
1171 | config JFFS2_FS | 910 | source "fs/jffs2/Kconfig" |
1172 | tristate "Journalling Flash File System v2 (JFFS2) support" | ||
1173 | select CRC32 | ||
1174 | depends on MTD | ||
1175 | help | ||
1176 | JFFS2 is the second generation of the Journalling Flash File System | ||
1177 | for use on diskless embedded devices. It provides improved wear | ||
1178 | levelling, compression and support for hard links. You cannot use | ||
1179 | this on normal block devices, only on 'MTD' devices. | ||
1180 | |||
1181 | Further information on the design and implementation of JFFS2 is | ||
1182 | available at <http://sources.redhat.com/jffs2/>. | ||
1183 | |||
1184 | config JFFS2_FS_DEBUG | ||
1185 | int "JFFS2 debugging verbosity (0 = quiet, 2 = noisy)" | ||
1186 | depends on JFFS2_FS | ||
1187 | default "0" | ||
1188 | help | ||
1189 | This controls the amount of debugging messages produced by the JFFS2 | ||
1190 | code. Set it to zero for use in production systems. For evaluation, | ||
1191 | testing and debugging, it's advisable to set it to one. This will | ||
1192 | enable a few assertions and will print debugging messages at the | ||
1193 | KERN_DEBUG loglevel, where they won't normally be visible. Level 2 | ||
1194 | is unlikely to be useful - it enables extra debugging in certain | ||
1195 | areas which at one point needed debugging, but when the bugs were | ||
1196 | located and fixed, the detailed messages were relegated to level 2. | ||
1197 | |||
1198 | If reporting bugs, please try to have available a full dump of the | ||
1199 | messages at debug level 1 while the misbehaviour was occurring. | ||
1200 | |||
1201 | config JFFS2_FS_WRITEBUFFER | ||
1202 | bool "JFFS2 write-buffering support" | ||
1203 | depends on JFFS2_FS | ||
1204 | default y | ||
1205 | help | ||
1206 | This enables the write-buffering support in JFFS2. | ||
1207 | |||
1208 | This functionality is required to support JFFS2 on the following | ||
1209 | types of flash devices: | ||
1210 | - NAND flash | ||
1211 | - NOR flash with transparent ECC | ||
1212 | - DataFlash | ||
1213 | |||
1214 | config JFFS2_FS_WBUF_VERIFY | ||
1215 | bool "Verify JFFS2 write-buffer reads" | ||
1216 | depends on JFFS2_FS_WRITEBUFFER | ||
1217 | default n | ||
1218 | help | ||
1219 | This causes JFFS2 to read back every page written through the | ||
1220 | write-buffer, and check for errors. | ||
1221 | |||
1222 | config JFFS2_SUMMARY | ||
1223 | bool "JFFS2 summary support (EXPERIMENTAL)" | ||
1224 | depends on JFFS2_FS && EXPERIMENTAL | ||
1225 | default n | ||
1226 | help | ||
1227 | This feature makes it possible to use summary information | ||
1228 | for faster filesystem mount. | ||
1229 | |||
1230 | The summary information can be inserted into a filesystem image | ||
1231 | by the utility 'sumtool'. | ||
1232 | |||
1233 | If unsure, say 'N'. | ||
1234 | |||
1235 | config JFFS2_FS_XATTR | ||
1236 | bool "JFFS2 XATTR support (EXPERIMENTAL)" | ||
1237 | depends on JFFS2_FS && EXPERIMENTAL | ||
1238 | default n | ||
1239 | help | ||
1240 | Extended attributes are name:value pairs associated with inodes by | ||
1241 | the kernel or by users (see the attr(5) manual page, or visit | ||
1242 | <http://acl.bestbits.at/> for details). | ||
1243 | |||
1244 | If unsure, say N. | ||
1245 | |||
1246 | config JFFS2_FS_POSIX_ACL | ||
1247 | bool "JFFS2 POSIX Access Control Lists" | ||
1248 | depends on JFFS2_FS_XATTR | ||
1249 | default y | ||
1250 | select FS_POSIX_ACL | ||
1251 | help | ||
1252 | Posix Access Control Lists (ACLs) support permissions for users and | ||
1253 | groups beyond the owner/group/world scheme. | ||
1254 | |||
1255 | To learn more about Access Control Lists, visit the Posix ACLs for | ||
1256 | Linux website <http://acl.bestbits.at/>. | ||
1257 | |||
1258 | If you don't know what Access Control Lists are, say N | ||
1259 | |||
1260 | config JFFS2_FS_SECURITY | ||
1261 | bool "JFFS2 Security Labels" | ||
1262 | depends on JFFS2_FS_XATTR | ||
1263 | default y | ||
1264 | help | ||
1265 | Security labels support alternative access control models | ||
1266 | implemented by security modules like SELinux. This option | ||
1267 | enables an extended attribute handler for file security | ||
1268 | labels in the jffs2 filesystem. | ||
1269 | |||
1270 | If you are not using a security module that requires using | ||
1271 | extended attributes for file security labels, say N. | ||
1272 | |||
1273 | config JFFS2_COMPRESSION_OPTIONS | ||
1274 | bool "Advanced compression options for JFFS2" | ||
1275 | depends on JFFS2_FS | ||
1276 | default n | ||
1277 | help | ||
1278 | Enabling this option allows you to explicitly choose which | ||
1279 | compression modules, if any, are enabled in JFFS2. Removing | ||
1280 | compressors can mean you cannot read existing file systems, | ||
1281 | and enabling experimental compressors can mean that you | ||
1282 | write a file system which cannot be read by a standard kernel. | ||
1283 | |||
1284 | If unsure, you should _definitely_ say 'N'. | ||
1285 | |||
1286 | config JFFS2_ZLIB | ||
1287 | bool "JFFS2 ZLIB compression support" if JFFS2_COMPRESSION_OPTIONS | ||
1288 | select ZLIB_INFLATE | ||
1289 | select ZLIB_DEFLATE | ||
1290 | depends on JFFS2_FS | ||
1291 | default y | ||
1292 | help | ||
1293 | Zlib is designed to be a free, general-purpose, legally unencumbered, | ||
1294 | lossless data-compression library for use on virtually any computer | ||
1295 | hardware and operating system. See <http://www.gzip.org/zlib/> for | ||
1296 | further information. | ||
1297 | |||
1298 | Say 'Y' if unsure. | ||
1299 | |||
1300 | config JFFS2_LZO | ||
1301 | bool "JFFS2 LZO compression support" if JFFS2_COMPRESSION_OPTIONS | ||
1302 | select LZO_COMPRESS | ||
1303 | select LZO_DECOMPRESS | ||
1304 | depends on JFFS2_FS | ||
1305 | default n | ||
1306 | help | ||
1307 | minilzo-based compression. Generally works better than Zlib. | ||
1308 | |||
1309 | This feature was added in July, 2007. Say 'N' if you need | ||
1310 | compatibility with older bootloaders or kernels. | ||
1311 | |||
1312 | config JFFS2_RTIME | ||
1313 | bool "JFFS2 RTIME compression support" if JFFS2_COMPRESSION_OPTIONS | ||
1314 | depends on JFFS2_FS | ||
1315 | default y | ||
1316 | help | ||
1317 | Rtime does manage to recompress already-compressed data. Say 'Y' if unsure. | ||
1318 | |||
1319 | config JFFS2_RUBIN | ||
1320 | bool "JFFS2 RUBIN compression support" if JFFS2_COMPRESSION_OPTIONS | ||
1321 | depends on JFFS2_FS | ||
1322 | default n | ||
1323 | help | ||
1324 | RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure. | ||
1325 | |||
1326 | choice | ||
1327 | prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS | ||
1328 | default JFFS2_CMODE_PRIORITY | ||
1329 | depends on JFFS2_FS | ||
1330 | help | ||
1331 | You can set here the default compression mode of JFFS2 from | ||
1332 | the available compression modes. Don't touch if unsure. | ||
1333 | |||
1334 | config JFFS2_CMODE_NONE | ||
1335 | bool "no compression" | ||
1336 | help | ||
1337 | Uses no compression. | ||
1338 | |||
1339 | config JFFS2_CMODE_PRIORITY | ||
1340 | bool "priority" | ||
1341 | help | ||
1342 | Tries the compressors in a predefined order and chooses the first | ||
1343 | successful one. | ||
1344 | |||
1345 | config JFFS2_CMODE_SIZE | ||
1346 | bool "size (EXPERIMENTAL)" | ||
1347 | help | ||
1348 | Tries all compressors and chooses the one which has the smallest | ||
1349 | result. | ||
1350 | |||
1351 | config JFFS2_CMODE_FAVOURLZO | ||
1352 | bool "Favour LZO" | ||
1353 | help | ||
1354 | Tries all compressors and chooses the one which has the smallest | ||
1355 | result but gives some preference to LZO (which has faster | ||
1356 | decompression) at the expense of size. | ||
1357 | |||
1358 | endchoice | ||
1359 | |||
1360 | # UBIFS File system configuration | 911 | # UBIFS File system configuration |
1361 | source "fs/ubifs/Kconfig" | 912 | source "fs/ubifs/Kconfig" |
1362 | 913 | ||
@@ -1913,148 +1464,7 @@ config SMB_NLS_REMOTE | |||
1913 | 1464 | ||
1914 | smbmount from samba 2.2.0 or later supports this. | 1465 | smbmount from samba 2.2.0 or later supports this. |
1915 | 1466 | ||
1916 | config CIFS | 1467 | source "fs/cifs/Kconfig" |
1917 | tristate "CIFS support (advanced network filesystem, SMBFS successor)" | ||
1918 | depends on INET | ||
1919 | select NLS | ||
1920 | help | ||
1921 | This is the client VFS module for the Common Internet File System | ||
1922 | (CIFS) protocol which is the successor to the Server Message Block | ||
1923 | (SMB) protocol, the native file sharing mechanism for most early | ||
1924 | PC operating systems. The CIFS protocol is fully supported by | ||
1925 | file servers such as Windows 2000 (including Windows 2003, NT 4 | ||
1926 | and Windows XP) as well by Samba (which provides excellent CIFS | ||
1927 | server support for Linux and many other operating systems). Limited | ||
1928 | support for OS/2 and Windows ME and similar servers is provided as | ||
1929 | well. | ||
1930 | |||
1931 | The cifs module provides an advanced network file system | ||
1932 | client for mounting to CIFS compliant servers. It includes | ||
1933 | support for DFS (hierarchical name space), secure per-user | ||
1934 | session establishment via Kerberos or NTLM or NTLMv2, | ||
1935 | safe distributed caching (oplock), optional packet | ||
1936 | signing, Unicode and other internationalization improvements. | ||
1937 | If you need to mount to Samba or Windows from this machine, say Y. | ||
1938 | |||
1939 | config CIFS_STATS | ||
1940 | bool "CIFS statistics" | ||
1941 | depends on CIFS | ||
1942 | help | ||
1943 | Enabling this option will cause statistics for each server share | ||
1944 | mounted by the cifs client to be displayed in /proc/fs/cifs/Stats | ||
1945 | |||
1946 | config CIFS_STATS2 | ||
1947 | bool "Extended statistics" | ||
1948 | depends on CIFS_STATS | ||
1949 | help | ||
1950 | Enabling this option will allow more detailed statistics on SMB | ||
1951 | request timing to be displayed in /proc/fs/cifs/DebugData and also | ||
1952 | allow optional logging of slow responses to dmesg (depending on the | ||
1953 | value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details). | ||
1954 | These additional statistics may have a minor effect on performance | ||
1955 | and memory utilization. | ||
1956 | |||
1957 | Unless you are a developer or are doing network performance analysis | ||
1958 | or tuning, say N. | ||
1959 | |||
1960 | config CIFS_WEAK_PW_HASH | ||
1961 | bool "Support legacy servers which use weaker LANMAN security" | ||
1962 | depends on CIFS | ||
1963 | help | ||
1964 | Modern CIFS servers including Samba and most Windows versions | ||
1965 | (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos) | ||
1966 | security mechanisms. These hash the password more securely | ||
1967 | than the mechanisms used in the older LANMAN version of the | ||
1968 | SMB protocol but LANMAN based authentication is needed to | ||
1969 | establish sessions with some old SMB servers. | ||
1970 | |||
1971 | Enabling this option allows the cifs module to mount to older | ||
1972 | LANMAN based servers such as OS/2 and Windows 95, but such | ||
1973 | mounts may be less secure than mounts using NTLM or more recent | ||
1974 | security mechanisms if you are on a public network. Unless you | ||
1975 | have a need to access old SMB servers (and are on a private | ||
1976 | network) you probably want to say N. Even if this support | ||
1977 | is enabled in the kernel build, LANMAN authentication will not be | ||
1978 | used automatically. At runtime LANMAN mounts are disabled but | ||
1979 | can be set to required (or optional) either in | ||
1980 | /proc/fs/cifs (see fs/cifs/README for more detail) or via an | ||
1981 | option on the mount command. This support is disabled by | ||
1982 | default in order to reduce the possibility of a downgrade | ||
1983 | attack. | ||
1984 | |||
1985 | If unsure, say N. | ||
1986 | |||
1987 | config CIFS_UPCALL | ||
1988 | bool "Kerberos/SPNEGO advanced session setup" | ||
1989 | depends on CIFS && KEYS | ||
1990 | help | ||
1991 | Enables an upcall mechanism for CIFS which accesses | ||
1992 | userspace helper utilities to provide SPNEGO packaged (RFC 4178) | ||
1993 | Kerberos tickets which are needed to mount to certain secure servers | ||
1994 | (for which more secure Kerberos authentication is required). If | ||
1995 | unsure, say N. | ||
1996 | |||
1997 | config CIFS_XATTR | ||
1998 | bool "CIFS extended attributes" | ||
1999 | depends on CIFS | ||
2000 | help | ||
2001 | Extended attributes are name:value pairs associated with inodes by | ||
2002 | the kernel or by users (see the attr(5) manual page, or visit | ||
2003 | <http://acl.bestbits.at/> for details). CIFS maps the name of | ||
2004 | extended attributes beginning with the user namespace prefix | ||
2005 | to SMB/CIFS EAs. EAs are stored on Windows servers without the | ||
2006 | user namespace prefix, but their names are seen by Linux cifs clients | ||
2007 | prefaced by the user namespace prefix. The system namespace | ||
2008 | (used by some filesystems to store ACLs) is not supported at | ||
2009 | this time. | ||
2010 | |||
2011 | If unsure, say N. | ||
2012 | |||
2013 | config CIFS_POSIX | ||
2014 | bool "CIFS POSIX Extensions" | ||
2015 | depends on CIFS_XATTR | ||
2016 | help | ||
2017 | Enabling this option will cause the cifs client to attempt to | ||
2018 | negotiate a newer dialect with servers, such as Samba 3.0.5 | ||
2019 | or later, that optionally can handle more POSIX like (rather | ||
2020 | than Windows like) file behavior. It also enables | ||
2021 | support for POSIX ACLs (getfacl and setfacl) to servers | ||
2022 | (such as Samba 3.10 and later) which can negotiate | ||
2023 | CIFS POSIX ACL support. If unsure, say N. | ||
2024 | |||
2025 | config CIFS_DEBUG2 | ||
2026 | bool "Enable additional CIFS debugging routines" | ||
2027 | depends on CIFS | ||
2028 | help | ||
2029 | Enabling this option adds a few more debugging routines | ||
2030 | to the cifs code which slightly increases the size of | ||
2031 | the cifs module and can cause additional logging of debug | ||
2032 | messages in some error paths, slowing performance. This | ||
2033 | option can be turned off unless you are debugging | ||
2034 | cifs problems. If unsure, say N. | ||
2035 | |||
2036 | config CIFS_EXPERIMENTAL | ||
2037 | bool "CIFS Experimental Features (EXPERIMENTAL)" | ||
2038 | depends on CIFS && EXPERIMENTAL | ||
2039 | help | ||
2040 | Enables cifs features under testing. These features are | ||
2041 | experimental and currently include DFS support and directory | ||
2042 | change notification ie fcntl(F_DNOTIFY), as well as the upcall | ||
2043 | mechanism which will be used for Kerberos session negotiation | ||
2044 | and uid remapping. Some of these features also may depend on | ||
2045 | setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental | ||
2046 | (which is disabled by default). See the file fs/cifs/README | ||
2047 | for more details. If unsure, say N. | ||
2048 | |||
2049 | config CIFS_DFS_UPCALL | ||
2050 | bool "DFS feature support (EXPERIMENTAL)" | ||
2051 | depends on CIFS_EXPERIMENTAL | ||
2052 | depends on KEYS | ||
2053 | help | ||
2054 | Enables an upcall mechanism for CIFS which contacts userspace | ||
2055 | helper utilities to provide server name resolution (host names to | ||
2056 | IP addresses) which is needed for implicit mounts of DFS junction | ||
2057 | points. If unsure, say N. | ||
2058 | 1468 | ||
2059 | config NCP_FS | 1469 | config NCP_FS |
2060 | tristate "NCP file system support (to mount NetWare volumes)" | 1470 | tristate "NCP file system support (to mount NetWare volumes)" |
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 801db1341811..ce9fb3fbfae4 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt | |||
@@ -40,6 +40,28 @@ config BINFMT_ELF_FDPIC | |||
40 | 40 | ||
41 | It is also possible to run FDPIC ELF binaries on MMU linux also. | 41 | It is also possible to run FDPIC ELF binaries on MMU linux also. |
42 | 42 | ||
43 | config CORE_DUMP_DEFAULT_ELF_HEADERS | ||
44 | bool "Write ELF core dumps with partial segments" | ||
45 | default n | ||
46 | depends on BINFMT_ELF | ||
47 | help | ||
48 | ELF core dump files describe each memory mapping of the crashed | ||
49 | process, and can contain or omit the memory contents of each one. | ||
50 | The contents of an unmodified text mapping are omitted by default. | ||
51 | |||
52 | For an unmodified text mapping of an ELF object, including just | ||
53 | the first page of the file in a core dump makes it possible to | ||
54 | identify the build ID bits in the file, without paying the i/o | ||
55 | cost and disk space to dump all the text. However, versions of | ||
56 | GDB before 6.7 are confused by ELF core dump files in this format. | ||
57 | |||
58 | The core dump behavior can be controlled per process using | ||
59 | the /proc/PID/coredump_filter pseudo-file; this setting is | ||
60 | inherited. See Documentation/filesystems/proc.txt for details. | ||
61 | |||
62 | This config option changes the default setting of coredump_filter | ||
63 | seen at boot time. If unsure, say N. | ||
64 | |||
43 | config BINFMT_FLAT | 65 | config BINFMT_FLAT |
44 | bool "Kernel support for flat binaries" | 66 | bool "Kernel support for flat binaries" |
45 | depends on !MMU && (!FRV || BROKEN) | 67 | depends on !MMU && (!FRV || BROKEN) |
diff --git a/fs/Makefile b/fs/Makefile index d0c69f57e5bf..2168c902d5ca 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -71,7 +71,7 @@ obj-$(CONFIG_DLM) += dlm/ | |||
71 | # Do not add any filesystems before this line | 71 | # Do not add any filesystems before this line |
72 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ | 72 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ |
73 | obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 | 73 | obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 |
74 | obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4dev | 74 | obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4 |
75 | obj-$(CONFIG_JBD) += jbd/ | 75 | obj-$(CONFIG_JBD) += jbd/ |
76 | obj-$(CONFIG_JBD2) += jbd2/ | 76 | obj-$(CONFIG_JBD2) += jbd2/ |
77 | obj-$(CONFIG_EXT2_FS) += ext2/ | 77 | obj-$(CONFIG_EXT2_FS) += ext2/ |
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index dfda03d4397d..99cf390641f7 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c | |||
@@ -45,6 +45,7 @@ const struct file_operations afs_dir_file_operations = { | |||
45 | .release = afs_release, | 45 | .release = afs_release, |
46 | .readdir = afs_readdir, | 46 | .readdir = afs_readdir, |
47 | .lock = afs_lock, | 47 | .lock = afs_lock, |
48 | .llseek = generic_file_llseek, | ||
48 | }; | 49 | }; |
49 | 50 | ||
50 | const struct inode_operations afs_dir_inode_operations = { | 51 | const struct inode_operations afs_dir_inode_operations = { |
@@ -159,17 +159,17 @@ int notify_change(struct dentry * dentry, struct iattr * attr) | |||
159 | if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID))) | 159 | if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID))) |
160 | return 0; | 160 | return 0; |
161 | 161 | ||
162 | error = security_inode_setattr(dentry, attr); | ||
163 | if (error) | ||
164 | return error; | ||
165 | |||
162 | if (ia_valid & ATTR_SIZE) | 166 | if (ia_valid & ATTR_SIZE) |
163 | down_write(&dentry->d_inode->i_alloc_sem); | 167 | down_write(&dentry->d_inode->i_alloc_sem); |
164 | 168 | ||
165 | if (inode->i_op && inode->i_op->setattr) { | 169 | if (inode->i_op && inode->i_op->setattr) { |
166 | error = security_inode_setattr(dentry, attr); | 170 | error = inode->i_op->setattr(dentry, attr); |
167 | if (!error) | ||
168 | error = inode->i_op->setattr(dentry, attr); | ||
169 | } else { | 171 | } else { |
170 | error = inode_change_ok(inode, attr); | 172 | error = inode_change_ok(inode, attr); |
171 | if (!error) | ||
172 | error = security_inode_setattr(dentry, attr); | ||
173 | if (!error) { | 173 | if (!error) { |
174 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || | 174 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || |
175 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) | 175 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) |
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index ed8feb052df9..daae463068e4 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c | |||
@@ -80,6 +80,7 @@ const struct file_operations bfs_dir_operations = { | |||
80 | .read = generic_read_dir, | 80 | .read = generic_read_dir, |
81 | .readdir = bfs_readdir, | 81 | .readdir = bfs_readdir, |
82 | .fsync = file_fsync, | 82 | .fsync = file_fsync, |
83 | .llseek = generic_file_llseek, | ||
83 | }; | 84 | }; |
84 | 85 | ||
85 | extern void dump_imap(const char *, struct super_block *); | 86 | extern void dump_imap(const char *, struct super_block *); |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index c76afa26edf7..8fcfa398d350 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -1156,16 +1156,24 @@ static int dump_seek(struct file *file, loff_t off) | |||
1156 | static unsigned long vma_dump_size(struct vm_area_struct *vma, | 1156 | static unsigned long vma_dump_size(struct vm_area_struct *vma, |
1157 | unsigned long mm_flags) | 1157 | unsigned long mm_flags) |
1158 | { | 1158 | { |
1159 | #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type)) | ||
1160 | |||
1159 | /* The vma can be set up to tell us the answer directly. */ | 1161 | /* The vma can be set up to tell us the answer directly. */ |
1160 | if (vma->vm_flags & VM_ALWAYSDUMP) | 1162 | if (vma->vm_flags & VM_ALWAYSDUMP) |
1161 | goto whole; | 1163 | goto whole; |
1162 | 1164 | ||
1165 | /* Hugetlb memory check */ | ||
1166 | if (vma->vm_flags & VM_HUGETLB) { | ||
1167 | if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED)) | ||
1168 | goto whole; | ||
1169 | if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE)) | ||
1170 | goto whole; | ||
1171 | } | ||
1172 | |||
1163 | /* Do not dump I/O mapped devices or special mappings */ | 1173 | /* Do not dump I/O mapped devices or special mappings */ |
1164 | if (vma->vm_flags & (VM_IO | VM_RESERVED)) | 1174 | if (vma->vm_flags & (VM_IO | VM_RESERVED)) |
1165 | return 0; | 1175 | return 0; |
1166 | 1176 | ||
1167 | #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type)) | ||
1168 | |||
1169 | /* By default, dump shared memory if mapped from an anonymous file. */ | 1177 | /* By default, dump shared memory if mapped from an anonymous file. */ |
1170 | if (vma->vm_flags & VM_SHARED) { | 1178 | if (vma->vm_flags & VM_SHARED) { |
1171 | if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ? | 1179 | if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ? |
@@ -1333,20 +1341,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus, | |||
1333 | prstatus->pr_pgrp = task_pgrp_vnr(p); | 1341 | prstatus->pr_pgrp = task_pgrp_vnr(p); |
1334 | prstatus->pr_sid = task_session_vnr(p); | 1342 | prstatus->pr_sid = task_session_vnr(p); |
1335 | if (thread_group_leader(p)) { | 1343 | if (thread_group_leader(p)) { |
1344 | struct task_cputime cputime; | ||
1345 | |||
1336 | /* | 1346 | /* |
1337 | * This is the record for the group leader. Add in the | 1347 | * This is the record for the group leader. It shows the |
1338 | * cumulative times of previous dead threads. This total | 1348 | * group-wide total, not its individual thread total. |
1339 | * won't include the time of each live thread whose state | ||
1340 | * is included in the core dump. The final total reported | ||
1341 | * to our parent process when it calls wait4 will include | ||
1342 | * those sums as well as the little bit more time it takes | ||
1343 | * this and each other thread to finish dying after the | ||
1344 | * core dump synchronization phase. | ||
1345 | */ | 1349 | */ |
1346 | cputime_to_timeval(cputime_add(p->utime, p->signal->utime), | 1350 | thread_group_cputime(p, &cputime); |
1347 | &prstatus->pr_utime); | 1351 | cputime_to_timeval(cputime.utime, &prstatus->pr_utime); |
1348 | cputime_to_timeval(cputime_add(p->stime, p->signal->stime), | 1352 | cputime_to_timeval(cputime.stime, &prstatus->pr_stime); |
1349 | &prstatus->pr_stime); | ||
1350 | } else { | 1353 | } else { |
1351 | cputime_to_timeval(p->utime, &prstatus->pr_utime); | 1354 | cputime_to_timeval(p->utime, &prstatus->pr_utime); |
1352 | cputime_to_timeval(p->stime, &prstatus->pr_stime); | 1355 | cputime_to_timeval(p->stime, &prstatus->pr_stime); |
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 0e8367c54624..5b5424cb3391 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c | |||
@@ -1390,20 +1390,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus, | |||
1390 | prstatus->pr_pgrp = task_pgrp_vnr(p); | 1390 | prstatus->pr_pgrp = task_pgrp_vnr(p); |
1391 | prstatus->pr_sid = task_session_vnr(p); | 1391 | prstatus->pr_sid = task_session_vnr(p); |
1392 | if (thread_group_leader(p)) { | 1392 | if (thread_group_leader(p)) { |
1393 | struct task_cputime cputime; | ||
1394 | |||
1393 | /* | 1395 | /* |
1394 | * This is the record for the group leader. Add in the | 1396 | * This is the record for the group leader. It shows the |
1395 | * cumulative times of previous dead threads. This total | 1397 | * group-wide total, not its individual thread total. |
1396 | * won't include the time of each live thread whose state | ||
1397 | * is included in the core dump. The final total reported | ||
1398 | * to our parent process when it calls wait4 will include | ||
1399 | * those sums as well as the little bit more time it takes | ||
1400 | * this and each other thread to finish dying after the | ||
1401 | * core dump synchronization phase. | ||
1402 | */ | 1398 | */ |
1403 | cputime_to_timeval(cputime_add(p->utime, p->signal->utime), | 1399 | thread_group_cputime(p, &cputime); |
1404 | &prstatus->pr_utime); | 1400 | cputime_to_timeval(cputime.utime, &prstatus->pr_utime); |
1405 | cputime_to_timeval(cputime_add(p->stime, p->signal->stime), | 1401 | cputime_to_timeval(cputime.stime, &prstatus->pr_stime); |
1406 | &prstatus->pr_stime); | ||
1407 | } else { | 1402 | } else { |
1408 | cputime_to_timeval(p->utime, &prstatus->pr_utime); | 1403 | cputime_to_timeval(p->utime, &prstatus->pr_utime); |
1409 | cputime_to_timeval(p->stime, &prstatus->pr_stime); | 1404 | cputime_to_timeval(p->stime, &prstatus->pr_stime); |
diff --git a/fs/block_dev.c b/fs/block_dev.c index d84f0469a016..88a776fa0ef6 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -840,13 +840,12 @@ EXPORT_SYMBOL_GPL(bd_release_from_disk); | |||
840 | * to be used for internal purposes. If you ever need it - reconsider | 840 | * to be used for internal purposes. If you ever need it - reconsider |
841 | * your API. | 841 | * your API. |
842 | */ | 842 | */ |
843 | struct block_device *open_by_devnum(dev_t dev, unsigned mode) | 843 | struct block_device *open_by_devnum(dev_t dev, fmode_t mode) |
844 | { | 844 | { |
845 | struct block_device *bdev = bdget(dev); | 845 | struct block_device *bdev = bdget(dev); |
846 | int err = -ENOMEM; | 846 | int err = -ENOMEM; |
847 | int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY; | ||
848 | if (bdev) | 847 | if (bdev) |
849 | err = blkdev_get(bdev, mode, flags); | 848 | err = blkdev_get(bdev, mode); |
850 | return err ? ERR_PTR(err) : bdev; | 849 | return err ? ERR_PTR(err) : bdev; |
851 | } | 850 | } |
852 | 851 | ||
@@ -975,9 +974,7 @@ void bd_set_size(struct block_device *bdev, loff_t size) | |||
975 | } | 974 | } |
976 | EXPORT_SYMBOL(bd_set_size); | 975 | EXPORT_SYMBOL(bd_set_size); |
977 | 976 | ||
978 | static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, | 977 | static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); |
979 | int for_part); | ||
980 | static int __blkdev_put(struct block_device *bdev, int for_part); | ||
981 | 978 | ||
982 | /* | 979 | /* |
983 | * bd_mutex locking: | 980 | * bd_mutex locking: |
@@ -986,7 +983,7 @@ static int __blkdev_put(struct block_device *bdev, int for_part); | |||
986 | * mutex_lock_nested(whole->bd_mutex, 1) | 983 | * mutex_lock_nested(whole->bd_mutex, 1) |
987 | */ | 984 | */ |
988 | 985 | ||
989 | static int do_open(struct block_device *bdev, struct file *file, int for_part) | 986 | static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) |
990 | { | 987 | { |
991 | struct gendisk *disk; | 988 | struct gendisk *disk; |
992 | struct hd_struct *part = NULL; | 989 | struct hd_struct *part = NULL; |
@@ -994,9 +991,9 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
994 | int partno; | 991 | int partno; |
995 | int perm = 0; | 992 | int perm = 0; |
996 | 993 | ||
997 | if (file->f_mode & FMODE_READ) | 994 | if (mode & FMODE_READ) |
998 | perm |= MAY_READ; | 995 | perm |= MAY_READ; |
999 | if (file->f_mode & FMODE_WRITE) | 996 | if (mode & FMODE_WRITE) |
1000 | perm |= MAY_WRITE; | 997 | perm |= MAY_WRITE; |
1001 | /* | 998 | /* |
1002 | * hooks: /n/, see "layering violations". | 999 | * hooks: /n/, see "layering violations". |
@@ -1008,7 +1005,6 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
1008 | } | 1005 | } |
1009 | 1006 | ||
1010 | ret = -ENXIO; | 1007 | ret = -ENXIO; |
1011 | file->f_mapping = bdev->bd_inode->i_mapping; | ||
1012 | 1008 | ||
1013 | lock_kernel(); | 1009 | lock_kernel(); |
1014 | 1010 | ||
@@ -1027,7 +1023,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
1027 | if (!partno) { | 1023 | if (!partno) { |
1028 | struct backing_dev_info *bdi; | 1024 | struct backing_dev_info *bdi; |
1029 | if (disk->fops->open) { | 1025 | if (disk->fops->open) { |
1030 | ret = disk->fops->open(bdev->bd_inode, file); | 1026 | ret = disk->fops->open(bdev, mode); |
1031 | if (ret) | 1027 | if (ret) |
1032 | goto out_clear; | 1028 | goto out_clear; |
1033 | } | 1029 | } |
@@ -1047,7 +1043,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
1047 | if (!whole) | 1043 | if (!whole) |
1048 | goto out_clear; | 1044 | goto out_clear; |
1049 | BUG_ON(for_part); | 1045 | BUG_ON(for_part); |
1050 | ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); | 1046 | ret = __blkdev_get(whole, mode, 1); |
1051 | if (ret) | 1047 | if (ret) |
1052 | goto out_clear; | 1048 | goto out_clear; |
1053 | bdev->bd_contains = whole; | 1049 | bdev->bd_contains = whole; |
@@ -1068,7 +1064,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
1068 | disk = NULL; | 1064 | disk = NULL; |
1069 | if (bdev->bd_contains == bdev) { | 1065 | if (bdev->bd_contains == bdev) { |
1070 | if (bdev->bd_disk->fops->open) { | 1066 | if (bdev->bd_disk->fops->open) { |
1071 | ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); | 1067 | ret = bdev->bd_disk->fops->open(bdev, mode); |
1072 | if (ret) | 1068 | if (ret) |
1073 | goto out_unlock_bdev; | 1069 | goto out_unlock_bdev; |
1074 | } | 1070 | } |
@@ -1088,7 +1084,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
1088 | bdev->bd_part = NULL; | 1084 | bdev->bd_part = NULL; |
1089 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | 1085 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; |
1090 | if (bdev != bdev->bd_contains) | 1086 | if (bdev != bdev->bd_contains) |
1091 | __blkdev_put(bdev->bd_contains, 1); | 1087 | __blkdev_put(bdev->bd_contains, mode, 1); |
1092 | bdev->bd_contains = NULL; | 1088 | bdev->bd_contains = NULL; |
1093 | out_unlock_bdev: | 1089 | out_unlock_bdev: |
1094 | mutex_unlock(&bdev->bd_mutex); | 1090 | mutex_unlock(&bdev->bd_mutex); |
@@ -1104,28 +1100,9 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
1104 | return ret; | 1100 | return ret; |
1105 | } | 1101 | } |
1106 | 1102 | ||
1107 | static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, | 1103 | int blkdev_get(struct block_device *bdev, fmode_t mode) |
1108 | int for_part) | ||
1109 | { | 1104 | { |
1110 | /* | 1105 | return __blkdev_get(bdev, mode, 0); |
1111 | * This crockload is due to bad choice of ->open() type. | ||
1112 | * It will go away. | ||
1113 | * For now, block device ->open() routine must _not_ | ||
1114 | * examine anything in 'inode' argument except ->i_rdev. | ||
1115 | */ | ||
1116 | struct file fake_file = {}; | ||
1117 | struct dentry fake_dentry = {}; | ||
1118 | fake_file.f_mode = mode; | ||
1119 | fake_file.f_flags = flags; | ||
1120 | fake_file.f_path.dentry = &fake_dentry; | ||
1121 | fake_dentry.d_inode = bdev->bd_inode; | ||
1122 | |||
1123 | return do_open(bdev, &fake_file, for_part); | ||
1124 | } | ||
1125 | |||
1126 | int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags) | ||
1127 | { | ||
1128 | return __blkdev_get(bdev, mode, flags, 0); | ||
1129 | } | 1106 | } |
1130 | EXPORT_SYMBOL(blkdev_get); | 1107 | EXPORT_SYMBOL(blkdev_get); |
1131 | 1108 | ||
@@ -1142,28 +1119,36 @@ static int blkdev_open(struct inode * inode, struct file * filp) | |||
1142 | */ | 1119 | */ |
1143 | filp->f_flags |= O_LARGEFILE; | 1120 | filp->f_flags |= O_LARGEFILE; |
1144 | 1121 | ||
1122 | if (filp->f_flags & O_NDELAY) | ||
1123 | filp->f_mode |= FMODE_NDELAY; | ||
1124 | if (filp->f_flags & O_EXCL) | ||
1125 | filp->f_mode |= FMODE_EXCL; | ||
1126 | if ((filp->f_flags & O_ACCMODE) == 3) | ||
1127 | filp->f_mode |= FMODE_WRITE_IOCTL; | ||
1128 | |||
1145 | bdev = bd_acquire(inode); | 1129 | bdev = bd_acquire(inode); |
1146 | if (bdev == NULL) | 1130 | if (bdev == NULL) |
1147 | return -ENOMEM; | 1131 | return -ENOMEM; |
1148 | 1132 | ||
1149 | res = do_open(bdev, filp, 0); | 1133 | filp->f_mapping = bdev->bd_inode->i_mapping; |
1134 | |||
1135 | res = blkdev_get(bdev, filp->f_mode); | ||
1150 | if (res) | 1136 | if (res) |
1151 | return res; | 1137 | return res; |
1152 | 1138 | ||
1153 | if (!(filp->f_flags & O_EXCL) ) | 1139 | if (!(filp->f_mode & FMODE_EXCL)) |
1154 | return 0; | 1140 | return 0; |
1155 | 1141 | ||
1156 | if (!(res = bd_claim(bdev, filp))) | 1142 | if (!(res = bd_claim(bdev, filp))) |
1157 | return 0; | 1143 | return 0; |
1158 | 1144 | ||
1159 | blkdev_put(bdev); | 1145 | blkdev_put(bdev, filp->f_mode); |
1160 | return res; | 1146 | return res; |
1161 | } | 1147 | } |
1162 | 1148 | ||
1163 | static int __blkdev_put(struct block_device *bdev, int for_part) | 1149 | static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) |
1164 | { | 1150 | { |
1165 | int ret = 0; | 1151 | int ret = 0; |
1166 | struct inode *bd_inode = bdev->bd_inode; | ||
1167 | struct gendisk *disk = bdev->bd_disk; | 1152 | struct gendisk *disk = bdev->bd_disk; |
1168 | struct block_device *victim = NULL; | 1153 | struct block_device *victim = NULL; |
1169 | 1154 | ||
@@ -1178,7 +1163,7 @@ static int __blkdev_put(struct block_device *bdev, int for_part) | |||
1178 | } | 1163 | } |
1179 | if (bdev->bd_contains == bdev) { | 1164 | if (bdev->bd_contains == bdev) { |
1180 | if (disk->fops->release) | 1165 | if (disk->fops->release) |
1181 | ret = disk->fops->release(bd_inode, NULL); | 1166 | ret = disk->fops->release(disk, mode); |
1182 | } | 1167 | } |
1183 | if (!bdev->bd_openers) { | 1168 | if (!bdev->bd_openers) { |
1184 | struct module *owner = disk->fops->owner; | 1169 | struct module *owner = disk->fops->owner; |
@@ -1197,13 +1182,13 @@ static int __blkdev_put(struct block_device *bdev, int for_part) | |||
1197 | mutex_unlock(&bdev->bd_mutex); | 1182 | mutex_unlock(&bdev->bd_mutex); |
1198 | bdput(bdev); | 1183 | bdput(bdev); |
1199 | if (victim) | 1184 | if (victim) |
1200 | __blkdev_put(victim, 1); | 1185 | __blkdev_put(victim, mode, 1); |
1201 | return ret; | 1186 | return ret; |
1202 | } | 1187 | } |
1203 | 1188 | ||
1204 | int blkdev_put(struct block_device *bdev) | 1189 | int blkdev_put(struct block_device *bdev, fmode_t mode) |
1205 | { | 1190 | { |
1206 | return __blkdev_put(bdev, 0); | 1191 | return __blkdev_put(bdev, mode, 0); |
1207 | } | 1192 | } |
1208 | EXPORT_SYMBOL(blkdev_put); | 1193 | EXPORT_SYMBOL(blkdev_put); |
1209 | 1194 | ||
@@ -1212,12 +1197,16 @@ static int blkdev_close(struct inode * inode, struct file * filp) | |||
1212 | struct block_device *bdev = I_BDEV(filp->f_mapping->host); | 1197 | struct block_device *bdev = I_BDEV(filp->f_mapping->host); |
1213 | if (bdev->bd_holder == filp) | 1198 | if (bdev->bd_holder == filp) |
1214 | bd_release(bdev); | 1199 | bd_release(bdev); |
1215 | return blkdev_put(bdev); | 1200 | return blkdev_put(bdev, filp->f_mode); |
1216 | } | 1201 | } |
1217 | 1202 | ||
1218 | static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) | 1203 | static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) |
1219 | { | 1204 | { |
1220 | return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); | 1205 | struct block_device *bdev = I_BDEV(file->f_mapping->host); |
1206 | fmode_t mode = file->f_mode; | ||
1207 | if (file->f_flags & O_NDELAY) | ||
1208 | mode |= FMODE_NDELAY_NOW; | ||
1209 | return blkdev_ioctl(bdev, mode, cmd, arg); | ||
1221 | } | 1210 | } |
1222 | 1211 | ||
1223 | static const struct address_space_operations def_blk_aops = { | 1212 | static const struct address_space_operations def_blk_aops = { |
@@ -1253,7 +1242,7 @@ int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) | |||
1253 | int res; | 1242 | int res; |
1254 | mm_segment_t old_fs = get_fs(); | 1243 | mm_segment_t old_fs = get_fs(); |
1255 | set_fs(KERNEL_DS); | 1244 | set_fs(KERNEL_DS); |
1256 | res = blkdev_ioctl(bdev->bd_inode, NULL, cmd, arg); | 1245 | res = blkdev_ioctl(bdev, 0, cmd, arg); |
1257 | set_fs(old_fs); | 1246 | set_fs(old_fs); |
1258 | return res; | 1247 | return res; |
1259 | } | 1248 | } |
@@ -1262,39 +1251,39 @@ EXPORT_SYMBOL(ioctl_by_bdev); | |||
1262 | 1251 | ||
1263 | /** | 1252 | /** |
1264 | * lookup_bdev - lookup a struct block_device by name | 1253 | * lookup_bdev - lookup a struct block_device by name |
1265 | * @pathname: special file representing the block device | 1254 | * @path: special file representing the block device |
1266 | * | 1255 | * |
1267 | * Get a reference to the blockdevice at @pathname in the current | 1256 | * Get a reference to the blockdevice at @pathname in the current |
1268 | * namespace if possible and return it. Return ERR_PTR(error) | 1257 | * namespace if possible and return it. Return ERR_PTR(error) |
1269 | * otherwise. | 1258 | * otherwise. |
1270 | */ | 1259 | */ |
1271 | struct block_device *lookup_bdev(const char *path) | 1260 | struct block_device *lookup_bdev(const char *pathname) |
1272 | { | 1261 | { |
1273 | struct block_device *bdev; | 1262 | struct block_device *bdev; |
1274 | struct inode *inode; | 1263 | struct inode *inode; |
1275 | struct nameidata nd; | 1264 | struct path path; |
1276 | int error; | 1265 | int error; |
1277 | 1266 | ||
1278 | if (!path || !*path) | 1267 | if (!pathname || !*pathname) |
1279 | return ERR_PTR(-EINVAL); | 1268 | return ERR_PTR(-EINVAL); |
1280 | 1269 | ||
1281 | error = path_lookup(path, LOOKUP_FOLLOW, &nd); | 1270 | error = kern_path(pathname, LOOKUP_FOLLOW, &path); |
1282 | if (error) | 1271 | if (error) |
1283 | return ERR_PTR(error); | 1272 | return ERR_PTR(error); |
1284 | 1273 | ||
1285 | inode = nd.path.dentry->d_inode; | 1274 | inode = path.dentry->d_inode; |
1286 | error = -ENOTBLK; | 1275 | error = -ENOTBLK; |
1287 | if (!S_ISBLK(inode->i_mode)) | 1276 | if (!S_ISBLK(inode->i_mode)) |
1288 | goto fail; | 1277 | goto fail; |
1289 | error = -EACCES; | 1278 | error = -EACCES; |
1290 | if (nd.path.mnt->mnt_flags & MNT_NODEV) | 1279 | if (path.mnt->mnt_flags & MNT_NODEV) |
1291 | goto fail; | 1280 | goto fail; |
1292 | error = -ENOMEM; | 1281 | error = -ENOMEM; |
1293 | bdev = bd_acquire(inode); | 1282 | bdev = bd_acquire(inode); |
1294 | if (!bdev) | 1283 | if (!bdev) |
1295 | goto fail; | 1284 | goto fail; |
1296 | out: | 1285 | out: |
1297 | path_put(&nd.path); | 1286 | path_put(&path); |
1298 | return bdev; | 1287 | return bdev; |
1299 | fail: | 1288 | fail: |
1300 | bdev = ERR_PTR(error); | 1289 | bdev = ERR_PTR(error); |
@@ -1303,32 +1292,29 @@ fail: | |||
1303 | EXPORT_SYMBOL(lookup_bdev); | 1292 | EXPORT_SYMBOL(lookup_bdev); |
1304 | 1293 | ||
1305 | /** | 1294 | /** |
1306 | * open_bdev_excl - open a block device by name and set it up for use | 1295 | * open_bdev_exclusive - open a block device by name and set it up for use |
1307 | * | 1296 | * |
1308 | * @path: special file representing the block device | 1297 | * @path: special file representing the block device |
1309 | * @flags: %MS_RDONLY for opening read-only | 1298 | * @mode: FMODE_... combination to pass be used |
1310 | * @holder: owner for exclusion | 1299 | * @holder: owner for exclusion |
1311 | * | 1300 | * |
1312 | * Open the blockdevice described by the special file at @path, claim it | 1301 | * Open the blockdevice described by the special file at @path, claim it |
1313 | * for the @holder. | 1302 | * for the @holder. |
1314 | */ | 1303 | */ |
1315 | struct block_device *open_bdev_excl(const char *path, int flags, void *holder) | 1304 | struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) |
1316 | { | 1305 | { |
1317 | struct block_device *bdev; | 1306 | struct block_device *bdev; |
1318 | mode_t mode = FMODE_READ; | ||
1319 | int error = 0; | 1307 | int error = 0; |
1320 | 1308 | ||
1321 | bdev = lookup_bdev(path); | 1309 | bdev = lookup_bdev(path); |
1322 | if (IS_ERR(bdev)) | 1310 | if (IS_ERR(bdev)) |
1323 | return bdev; | 1311 | return bdev; |
1324 | 1312 | ||
1325 | if (!(flags & MS_RDONLY)) | 1313 | error = blkdev_get(bdev, mode); |
1326 | mode |= FMODE_WRITE; | ||
1327 | error = blkdev_get(bdev, mode, 0); | ||
1328 | if (error) | 1314 | if (error) |
1329 | return ERR_PTR(error); | 1315 | return ERR_PTR(error); |
1330 | error = -EACCES; | 1316 | error = -EACCES; |
1331 | if (!(flags & MS_RDONLY) && bdev_read_only(bdev)) | 1317 | if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) |
1332 | goto blkdev_put; | 1318 | goto blkdev_put; |
1333 | error = bd_claim(bdev, holder); | 1319 | error = bd_claim(bdev, holder); |
1334 | if (error) | 1320 | if (error) |
@@ -1337,26 +1323,27 @@ struct block_device *open_bdev_excl(const char *path, int flags, void *holder) | |||
1337 | return bdev; | 1323 | return bdev; |
1338 | 1324 | ||
1339 | blkdev_put: | 1325 | blkdev_put: |
1340 | blkdev_put(bdev); | 1326 | blkdev_put(bdev, mode); |
1341 | return ERR_PTR(error); | 1327 | return ERR_PTR(error); |
1342 | } | 1328 | } |
1343 | 1329 | ||
1344 | EXPORT_SYMBOL(open_bdev_excl); | 1330 | EXPORT_SYMBOL(open_bdev_exclusive); |
1345 | 1331 | ||
1346 | /** | 1332 | /** |
1347 | * close_bdev_excl - release a blockdevice openen by open_bdev_excl() | 1333 | * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive() |
1348 | * | 1334 | * |
1349 | * @bdev: blockdevice to close | 1335 | * @bdev: blockdevice to close |
1336 | * @mode: mode, must match that used to open. | ||
1350 | * | 1337 | * |
1351 | * This is the counterpart to open_bdev_excl(). | 1338 | * This is the counterpart to open_bdev_exclusive(). |
1352 | */ | 1339 | */ |
1353 | void close_bdev_excl(struct block_device *bdev) | 1340 | void close_bdev_exclusive(struct block_device *bdev, fmode_t mode) |
1354 | { | 1341 | { |
1355 | bd_release(bdev); | 1342 | bd_release(bdev); |
1356 | blkdev_put(bdev); | 1343 | blkdev_put(bdev, mode); |
1357 | } | 1344 | } |
1358 | 1345 | ||
1359 | EXPORT_SYMBOL(close_bdev_excl); | 1346 | EXPORT_SYMBOL(close_bdev_exclusive); |
1360 | 1347 | ||
1361 | int __invalidate_device(struct block_device *bdev) | 1348 | int __invalidate_device(struct block_device *bdev) |
1362 | { | 1349 | { |
diff --git a/fs/buffer.c b/fs/buffer.c index ac78d4c19b3b..6569fda5cfed 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -76,8 +76,7 @@ EXPORT_SYMBOL(__lock_buffer); | |||
76 | 76 | ||
77 | void unlock_buffer(struct buffer_head *bh) | 77 | void unlock_buffer(struct buffer_head *bh) |
78 | { | 78 | { |
79 | smp_mb__before_clear_bit(); | 79 | clear_bit_unlock(BH_Lock, &bh->b_state); |
80 | clear_buffer_locked(bh); | ||
81 | smp_mb__after_clear_bit(); | 80 | smp_mb__after_clear_bit(); |
82 | wake_up_bit(&bh->b_state, BH_Lock); | 81 | wake_up_bit(&bh->b_state, BH_Lock); |
83 | } | 82 | } |
diff --git a/fs/char_dev.c b/fs/char_dev.c index 262fa10e213d..700697a72618 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c | |||
@@ -386,15 +386,22 @@ static int chrdev_open(struct inode *inode, struct file *filp) | |||
386 | cdev_put(new); | 386 | cdev_put(new); |
387 | if (ret) | 387 | if (ret) |
388 | return ret; | 388 | return ret; |
389 | |||
390 | ret = -ENXIO; | ||
389 | filp->f_op = fops_get(p->ops); | 391 | filp->f_op = fops_get(p->ops); |
390 | if (!filp->f_op) { | 392 | if (!filp->f_op) |
391 | cdev_put(p); | 393 | goto out_cdev_put; |
392 | return -ENXIO; | 394 | |
393 | } | 395 | if (filp->f_op->open) { |
394 | if (filp->f_op->open) | ||
395 | ret = filp->f_op->open(inode,filp); | 396 | ret = filp->f_op->open(inode,filp); |
396 | if (ret) | 397 | if (ret) |
397 | cdev_put(p); | 398 | goto out_cdev_put; |
399 | } | ||
400 | |||
401 | return 0; | ||
402 | |||
403 | out_cdev_put: | ||
404 | cdev_put(p); | ||
398 | return ret; | 405 | return ret; |
399 | } | 406 | } |
400 | 407 | ||
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 06e521a945c3..8f528ea24c48 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES | |||
@@ -1,3 +1,11 @@ | |||
1 | Version 1.55 | ||
2 | ------------ | ||
3 | Various fixes to make delete of open files behavior more predictable | ||
4 | (when delete of an open file fails we mark the file as "delete-on-close" | ||
5 | in a way that more servers accept, but only if we can first rename the | ||
6 | file to a temporary name). Add experimental support for more safely | ||
7 | handling fcntl(F_SETLEASE). | ||
8 | |||
1 | Version 1.54 | 9 | Version 1.54 |
2 | ------------ | 10 | ------------ |
3 | Fix premature write failure on congested networks (we would give up | 11 | Fix premature write failure on congested networks (we would give up |
@@ -13,6 +21,7 @@ on dns_upcall (resolving DFS referralls). Fix plain text password | |||
13 | authentication (requires setting SecurityFlags to 0x30030 to enable | 21 | authentication (requires setting SecurityFlags to 0x30030 to enable |
14 | lanman and plain text though). Fix writes to be at correct offset when | 22 | lanman and plain text though). Fix writes to be at correct offset when |
15 | file is open with O_APPEND and file is on a directio (forcediretio) mount. | 23 | file is open with O_APPEND and file is on a directio (forcediretio) mount. |
24 | Fix bug in rewinding readdir directory searches. Add nodfs mount option. | ||
16 | 25 | ||
17 | Version 1.53 | 26 | Version 1.53 |
18 | ------------ | 27 | ------------ |
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig new file mode 100644 index 000000000000..341a98965bd0 --- /dev/null +++ b/fs/cifs/Kconfig | |||
@@ -0,0 +1,142 @@ | |||
1 | config CIFS | ||
2 | tristate "CIFS support (advanced network filesystem, SMBFS successor)" | ||
3 | depends on INET | ||
4 | select NLS | ||
5 | help | ||
6 | This is the client VFS module for the Common Internet File System | ||
7 | (CIFS) protocol which is the successor to the Server Message Block | ||
8 | (SMB) protocol, the native file sharing mechanism for most early | ||
9 | PC operating systems. The CIFS protocol is fully supported by | ||
10 | file servers such as Windows 2000 (including Windows 2003, NT 4 | ||
11 | and Windows XP) as well by Samba (which provides excellent CIFS | ||
12 | server support for Linux and many other operating systems). Limited | ||
13 | support for OS/2 and Windows ME and similar servers is provided as | ||
14 | well. | ||
15 | |||
16 | The cifs module provides an advanced network file system | ||
17 | client for mounting to CIFS compliant servers. It includes | ||
18 | support for DFS (hierarchical name space), secure per-user | ||
19 | session establishment via Kerberos or NTLM or NTLMv2, | ||
20 | safe distributed caching (oplock), optional packet | ||
21 | signing, Unicode and other internationalization improvements. | ||
22 | If you need to mount to Samba or Windows from this machine, say Y. | ||
23 | |||
24 | config CIFS_STATS | ||
25 | bool "CIFS statistics" | ||
26 | depends on CIFS | ||
27 | help | ||
28 | Enabling this option will cause statistics for each server share | ||
29 | mounted by the cifs client to be displayed in /proc/fs/cifs/Stats | ||
30 | |||
31 | config CIFS_STATS2 | ||
32 | bool "Extended statistics" | ||
33 | depends on CIFS_STATS | ||
34 | help | ||
35 | Enabling this option will allow more detailed statistics on SMB | ||
36 | request timing to be displayed in /proc/fs/cifs/DebugData and also | ||
37 | allow optional logging of slow responses to dmesg (depending on the | ||
38 | value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details). | ||
39 | These additional statistics may have a minor effect on performance | ||
40 | and memory utilization. | ||
41 | |||
42 | Unless you are a developer or are doing network performance analysis | ||
43 | or tuning, say N. | ||
44 | |||
45 | config CIFS_WEAK_PW_HASH | ||
46 | bool "Support legacy servers which use weaker LANMAN security" | ||
47 | depends on CIFS | ||
48 | help | ||
49 | Modern CIFS servers including Samba and most Windows versions | ||
50 | (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos) | ||
51 | security mechanisms. These hash the password more securely | ||
52 | than the mechanisms used in the older LANMAN version of the | ||
53 | SMB protocol but LANMAN based authentication is needed to | ||
54 | establish sessions with some old SMB servers. | ||
55 | |||
56 | Enabling this option allows the cifs module to mount to older | ||
57 | LANMAN based servers such as OS/2 and Windows 95, but such | ||
58 | mounts may be less secure than mounts using NTLM or more recent | ||
59 | security mechanisms if you are on a public network. Unless you | ||
60 | have a need to access old SMB servers (and are on a private | ||
61 | network) you probably want to say N. Even if this support | ||
62 | is enabled in the kernel build, LANMAN authentication will not be | ||
63 | used automatically. At runtime LANMAN mounts are disabled but | ||
64 | can be set to required (or optional) either in | ||
65 | /proc/fs/cifs (see fs/cifs/README for more detail) or via an | ||
66 | option on the mount command. This support is disabled by | ||
67 | default in order to reduce the possibility of a downgrade | ||
68 | attack. | ||
69 | |||
70 | If unsure, say N. | ||
71 | |||
72 | config CIFS_UPCALL | ||
73 | bool "Kerberos/SPNEGO advanced session setup" | ||
74 | depends on CIFS && KEYS | ||
75 | help | ||
76 | Enables an upcall mechanism for CIFS which accesses | ||
77 | userspace helper utilities to provide SPNEGO packaged (RFC 4178) | ||
78 | Kerberos tickets which are needed to mount to certain secure servers | ||
79 | (for which more secure Kerberos authentication is required). If | ||
80 | unsure, say N. | ||
81 | |||
82 | config CIFS_XATTR | ||
83 | bool "CIFS extended attributes" | ||
84 | depends on CIFS | ||
85 | help | ||
86 | Extended attributes are name:value pairs associated with inodes by | ||
87 | the kernel or by users (see the attr(5) manual page, or visit | ||
88 | <http://acl.bestbits.at/> for details). CIFS maps the name of | ||
89 | extended attributes beginning with the user namespace prefix | ||
90 | to SMB/CIFS EAs. EAs are stored on Windows servers without the | ||
91 | user namespace prefix, but their names are seen by Linux cifs clients | ||
92 | prefaced by the user namespace prefix. The system namespace | ||
93 | (used by some filesystems to store ACLs) is not supported at | ||
94 | this time. | ||
95 | |||
96 | If unsure, say N. | ||
97 | |||
98 | config CIFS_POSIX | ||
99 | bool "CIFS POSIX Extensions" | ||
100 | depends on CIFS_XATTR | ||
101 | help | ||
102 | Enabling this option will cause the cifs client to attempt to | ||
103 | negotiate a newer dialect with servers, such as Samba 3.0.5 | ||
104 | or later, that optionally can handle more POSIX like (rather | ||
105 | than Windows like) file behavior. It also enables | ||
106 | support for POSIX ACLs (getfacl and setfacl) to servers | ||
107 | (such as Samba 3.10 and later) which can negotiate | ||
108 | CIFS POSIX ACL support. If unsure, say N. | ||
109 | |||
110 | config CIFS_DEBUG2 | ||
111 | bool "Enable additional CIFS debugging routines" | ||
112 | depends on CIFS | ||
113 | help | ||
114 | Enabling this option adds a few more debugging routines | ||
115 | to the cifs code which slightly increases the size of | ||
116 | the cifs module and can cause additional logging of debug | ||
117 | messages in some error paths, slowing performance. This | ||
118 | option can be turned off unless you are debugging | ||
119 | cifs problems. If unsure, say N. | ||
120 | |||
121 | config CIFS_EXPERIMENTAL | ||
122 | bool "CIFS Experimental Features (EXPERIMENTAL)" | ||
123 | depends on CIFS && EXPERIMENTAL | ||
124 | help | ||
125 | Enables cifs features under testing. These features are | ||
126 | experimental and currently include DFS support and directory | ||
127 | change notification ie fcntl(F_DNOTIFY), as well as the upcall | ||
128 | mechanism which will be used for Kerberos session negotiation | ||
129 | and uid remapping. Some of these features also may depend on | ||
130 | setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental | ||
131 | (which is disabled by default). See the file fs/cifs/README | ||
132 | for more details. If unsure, say N. | ||
133 | |||
134 | config CIFS_DFS_UPCALL | ||
135 | bool "DFS feature support (EXPERIMENTAL)" | ||
136 | depends on CIFS_EXPERIMENTAL | ||
137 | depends on KEYS | ||
138 | help | ||
139 | Enables an upcall mechanism for CIFS which contacts userspace | ||
140 | helper utilities to provide server name resolution (host names to | ||
141 | IP addresses) which is needed for implicit mounts of DFS junction | ||
142 | points. If unsure, say N. | ||
diff --git a/fs/cifs/README b/fs/cifs/README index bd2343d4c6a6..a439dc1739b3 100644 --- a/fs/cifs/README +++ b/fs/cifs/README | |||
@@ -463,6 +463,9 @@ A partial list of the supported mount options follows: | |||
463 | with cifs style mandatory byte range locks (and most | 463 | with cifs style mandatory byte range locks (and most |
464 | cifs servers do not yet support requesting advisory | 464 | cifs servers do not yet support requesting advisory |
465 | byte range locks). | 465 | byte range locks). |
466 | nodfs Disable DFS (global name space support) even if the | ||
467 | server claims to support it. This can help work around | ||
468 | a problem with parsing of DFS paths with Samba 3.0.24 server. | ||
466 | remount remount the share (often used to change from ro to rw mounts | 469 | remount remount the share (often used to change from ro to rw mounts |
467 | or vice versa) | 470 | or vice versa) |
468 | cifsacl Report mode bits (e.g. on stat) based on the Windows ACL for | 471 | cifsacl Report mode bits (e.g. on stat) based on the Windows ACL for |
@@ -488,6 +491,19 @@ A partial list of the supported mount options follows: | |||
488 | Note that this differs from the sign mount option in that it | 491 | Note that this differs from the sign mount option in that it |
489 | causes encryption of data sent over this mounted share but other | 492 | causes encryption of data sent over this mounted share but other |
490 | shares mounted to the same server are unaffected. | 493 | shares mounted to the same server are unaffected. |
494 | locallease This option is rarely needed. Fcntl F_SETLEASE is | ||
495 | used by some applications such as Samba and NFSv4 server to | ||
496 | check to see whether a file is cacheable. CIFS has no way | ||
497 | to explicitly request a lease, but can check whether a file | ||
498 | is cacheable (oplocked). Unfortunately, even if a file | ||
499 | is not oplocked, it could still be cacheable (ie cifs client | ||
500 | could grant fcntl leases if no other local processes are using | ||
501 | the file) for cases for example such as when the server does not | ||
502 | support oplocks and the user is sure that the only updates to | ||
503 | the file will be from this client. Specifying this mount option | ||
504 | will allow the cifs client to check for leases (only) locally | ||
505 | for files which are not oplocked instead of denying leases | ||
506 | in that case. (EXPERIMENTAL) | ||
491 | sec Security mode. Allowed values are: | 507 | sec Security mode. Allowed values are: |
492 | none attempt to connection as a null user (no name) | 508 | none attempt to connection as a null user (no name) |
493 | krb5 Use Kerberos version 5 authentication | 509 | krb5 Use Kerberos version 5 authentication |
@@ -638,6 +654,9 @@ requires enabling CONFIG_CIFS_EXPERIMENTAL | |||
638 | cifsacl support needed to retrieve approximated mode bits based on | 654 | cifsacl support needed to retrieve approximated mode bits based on |
639 | the contents on the CIFS ACL. | 655 | the contents on the CIFS ACL. |
640 | 656 | ||
657 | lease support: cifs will check the oplock state before calling into | ||
658 | the vfs to see if we can grant a lease on a file. | ||
659 | |||
641 | DNOTIFY fcntl: needed for support of directory change | 660 | DNOTIFY fcntl: needed for support of directory change |
642 | notification and perhaps later for file leases) | 661 | notification and perhaps later for file leases) |
643 | 662 | ||
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 25ecbd5b0404..ac5915d61dca 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -275,9 +275,12 @@ static int cifs_permission(struct inode *inode, int mask) | |||
275 | 275 | ||
276 | cifs_sb = CIFS_SB(inode->i_sb); | 276 | cifs_sb = CIFS_SB(inode->i_sb); |
277 | 277 | ||
278 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) | 278 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) { |
279 | return 0; | 279 | if ((mask & MAY_EXEC) && !execute_ok(inode)) |
280 | else /* file mode might have been restricted at mount time | 280 | return -EACCES; |
281 | else | ||
282 | return 0; | ||
283 | } else /* file mode might have been restricted at mount time | ||
281 | on the client (above and beyond ACL on servers) for | 284 | on the client (above and beyond ACL on servers) for |
282 | servers which do not support setting and viewing mode bits, | 285 | servers which do not support setting and viewing mode bits, |
283 | so allowing client to check permissions is useful */ | 286 | so allowing client to check permissions is useful */ |
@@ -309,6 +312,7 @@ cifs_alloc_inode(struct super_block *sb) | |||
309 | file data or metadata */ | 312 | file data or metadata */ |
310 | cifs_inode->clientCanCacheRead = false; | 313 | cifs_inode->clientCanCacheRead = false; |
311 | cifs_inode->clientCanCacheAll = false; | 314 | cifs_inode->clientCanCacheAll = false; |
315 | cifs_inode->delete_pending = false; | ||
312 | cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ | 316 | cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ |
313 | 317 | ||
314 | /* Can not set i_flags here - they get immediately overwritten | 318 | /* Can not set i_flags here - they get immediately overwritten |
@@ -617,6 +621,37 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin) | |||
617 | return generic_file_llseek_unlocked(file, offset, origin); | 621 | return generic_file_llseek_unlocked(file, offset, origin); |
618 | } | 622 | } |
619 | 623 | ||
624 | #ifdef CONFIG_CIFS_EXPERIMENTAL | ||
625 | static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) | ||
626 | { | ||
627 | /* note that this is called by vfs setlease with the BKL held | ||
628 | although I doubt that BKL is needed here in cifs */ | ||
629 | struct inode *inode = file->f_path.dentry->d_inode; | ||
630 | |||
631 | if (!(S_ISREG(inode->i_mode))) | ||
632 | return -EINVAL; | ||
633 | |||
634 | /* check if file is oplocked */ | ||
635 | if (((arg == F_RDLCK) && | ||
636 | (CIFS_I(inode)->clientCanCacheRead)) || | ||
637 | ((arg == F_WRLCK) && | ||
638 | (CIFS_I(inode)->clientCanCacheAll))) | ||
639 | return generic_setlease(file, arg, lease); | ||
640 | else if (CIFS_SB(inode->i_sb)->tcon->local_lease && | ||
641 | !CIFS_I(inode)->clientCanCacheRead) | ||
642 | /* If the server claims to support oplock on this | ||
643 | file, then we still need to check oplock even | ||
644 | if the local_lease mount option is set, but there | ||
645 | are servers which do not support oplock for which | ||
646 | this mount option may be useful if the user | ||
647 | knows that the file won't be changed on the server | ||
648 | by anyone else */ | ||
649 | return generic_setlease(file, arg, lease); | ||
650 | else | ||
651 | return -EAGAIN; | ||
652 | } | ||
653 | #endif | ||
654 | |||
620 | struct file_system_type cifs_fs_type = { | 655 | struct file_system_type cifs_fs_type = { |
621 | .owner = THIS_MODULE, | 656 | .owner = THIS_MODULE, |
622 | .name = "cifs", | 657 | .name = "cifs", |
@@ -695,6 +730,7 @@ const struct file_operations cifs_file_ops = { | |||
695 | 730 | ||
696 | #ifdef CONFIG_CIFS_EXPERIMENTAL | 731 | #ifdef CONFIG_CIFS_EXPERIMENTAL |
697 | .dir_notify = cifs_dir_notify, | 732 | .dir_notify = cifs_dir_notify, |
733 | .setlease = cifs_setlease, | ||
698 | #endif /* CONFIG_CIFS_EXPERIMENTAL */ | 734 | #endif /* CONFIG_CIFS_EXPERIMENTAL */ |
699 | }; | 735 | }; |
700 | 736 | ||
@@ -715,6 +751,7 @@ const struct file_operations cifs_file_direct_ops = { | |||
715 | .llseek = cifs_llseek, | 751 | .llseek = cifs_llseek, |
716 | #ifdef CONFIG_CIFS_EXPERIMENTAL | 752 | #ifdef CONFIG_CIFS_EXPERIMENTAL |
717 | .dir_notify = cifs_dir_notify, | 753 | .dir_notify = cifs_dir_notify, |
754 | .setlease = cifs_setlease, | ||
718 | #endif /* CONFIG_CIFS_EXPERIMENTAL */ | 755 | #endif /* CONFIG_CIFS_EXPERIMENTAL */ |
719 | }; | 756 | }; |
720 | const struct file_operations cifs_file_nobrl_ops = { | 757 | const struct file_operations cifs_file_nobrl_ops = { |
@@ -735,6 +772,7 @@ const struct file_operations cifs_file_nobrl_ops = { | |||
735 | 772 | ||
736 | #ifdef CONFIG_CIFS_EXPERIMENTAL | 773 | #ifdef CONFIG_CIFS_EXPERIMENTAL |
737 | .dir_notify = cifs_dir_notify, | 774 | .dir_notify = cifs_dir_notify, |
775 | .setlease = cifs_setlease, | ||
738 | #endif /* CONFIG_CIFS_EXPERIMENTAL */ | 776 | #endif /* CONFIG_CIFS_EXPERIMENTAL */ |
739 | }; | 777 | }; |
740 | 778 | ||
@@ -754,6 +792,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { | |||
754 | .llseek = cifs_llseek, | 792 | .llseek = cifs_llseek, |
755 | #ifdef CONFIG_CIFS_EXPERIMENTAL | 793 | #ifdef CONFIG_CIFS_EXPERIMENTAL |
756 | .dir_notify = cifs_dir_notify, | 794 | .dir_notify = cifs_dir_notify, |
795 | .setlease = cifs_setlease, | ||
757 | #endif /* CONFIG_CIFS_EXPERIMENTAL */ | 796 | #endif /* CONFIG_CIFS_EXPERIMENTAL */ |
758 | }; | 797 | }; |
759 | 798 | ||
@@ -765,6 +804,7 @@ const struct file_operations cifs_dir_ops = { | |||
765 | .dir_notify = cifs_dir_notify, | 804 | .dir_notify = cifs_dir_notify, |
766 | #endif /* CONFIG_CIFS_EXPERIMENTAL */ | 805 | #endif /* CONFIG_CIFS_EXPERIMENTAL */ |
767 | .unlocked_ioctl = cifs_ioctl, | 806 | .unlocked_ioctl = cifs_ioctl, |
807 | .llseek = generic_file_llseek, | ||
768 | }; | 808 | }; |
769 | 809 | ||
770 | static void | 810 | static void |
@@ -945,6 +985,12 @@ static int cifs_oplock_thread(void *dummyarg) | |||
945 | the call */ | 985 | the call */ |
946 | /* mutex_lock(&inode->i_mutex);*/ | 986 | /* mutex_lock(&inode->i_mutex);*/ |
947 | if (S_ISREG(inode->i_mode)) { | 987 | if (S_ISREG(inode->i_mode)) { |
988 | #ifdef CONFIG_CIFS_EXPERIMENTAL | ||
989 | if (CIFS_I(inode)->clientCanCacheAll == 0) | ||
990 | break_lease(inode, FMODE_READ); | ||
991 | else if (CIFS_I(inode)->clientCanCacheRead == 0) | ||
992 | break_lease(inode, FMODE_WRITE); | ||
993 | #endif | ||
948 | rc = filemap_fdatawrite(inode->i_mapping); | 994 | rc = filemap_fdatawrite(inode->i_mapping); |
949 | if (CIFS_I(inode)->clientCanCacheRead == 0) { | 995 | if (CIFS_I(inode)->clientCanCacheRead == 0) { |
950 | waitrc = filemap_fdatawait( | 996 | waitrc = filemap_fdatawait( |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index f7b4a5cd837b..074de0b5064d 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -101,5 +101,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); | |||
101 | extern const struct export_operations cifs_export_ops; | 101 | extern const struct export_operations cifs_export_ops; |
102 | #endif /* EXPERIMENTAL */ | 102 | #endif /* EXPERIMENTAL */ |
103 | 103 | ||
104 | #define CIFS_VERSION "1.54" | 104 | #define CIFS_VERSION "1.55" |
105 | #endif /* _CIFSFS_H */ | 105 | #endif /* _CIFSFS_H */ |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 0d22479d99b7..c791e5b5a914 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -285,6 +285,7 @@ struct cifsTconInfo { | |||
285 | bool seal:1; /* transport encryption for this mounted share */ | 285 | bool seal:1; /* transport encryption for this mounted share */ |
286 | bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol | 286 | bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol |
287 | for this mount even if server would support */ | 287 | for this mount even if server would support */ |
288 | bool local_lease:1; /* check leases (only) on local system not remote */ | ||
288 | /* BB add field for back pointer to sb struct(s)? */ | 289 | /* BB add field for back pointer to sb struct(s)? */ |
289 | }; | 290 | }; |
290 | 291 | ||
@@ -353,6 +354,7 @@ struct cifsInodeInfo { | |||
353 | bool clientCanCacheRead:1; /* read oplock */ | 354 | bool clientCanCacheRead:1; /* read oplock */ |
354 | bool clientCanCacheAll:1; /* read and writebehind oplock */ | 355 | bool clientCanCacheAll:1; /* read and writebehind oplock */ |
355 | bool oplockPending:1; | 356 | bool oplockPending:1; |
357 | bool delete_pending:1; /* DELETE_ON_CLOSE is set */ | ||
356 | struct inode vfs_inode; | 358 | struct inode vfs_inode; |
357 | }; | 359 | }; |
358 | 360 | ||
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 6f4ffe15d68d..843a85fb8b9a 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -1309,6 +1309,7 @@ OldOpenRetry: | |||
1309 | cpu_to_le64(le32_to_cpu(pSMBr->EndOfFile)); | 1309 | cpu_to_le64(le32_to_cpu(pSMBr->EndOfFile)); |
1310 | pfile_info->EndOfFile = pfile_info->AllocationSize; | 1310 | pfile_info->EndOfFile = pfile_info->AllocationSize; |
1311 | pfile_info->NumberOfLinks = cpu_to_le32(1); | 1311 | pfile_info->NumberOfLinks = cpu_to_le32(1); |
1312 | pfile_info->DeletePending = 0; | ||
1312 | } | 1313 | } |
1313 | } | 1314 | } |
1314 | 1315 | ||
@@ -1410,6 +1411,7 @@ openRetry: | |||
1410 | pfile_info->AllocationSize = pSMBr->AllocationSize; | 1411 | pfile_info->AllocationSize = pSMBr->AllocationSize; |
1411 | pfile_info->EndOfFile = pSMBr->EndOfFile; | 1412 | pfile_info->EndOfFile = pSMBr->EndOfFile; |
1412 | pfile_info->NumberOfLinks = cpu_to_le32(1); | 1413 | pfile_info->NumberOfLinks = cpu_to_le32(1); |
1414 | pfile_info->DeletePending = 0; | ||
1413 | } | 1415 | } |
1414 | } | 1416 | } |
1415 | 1417 | ||
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 4c13bcdb92a5..71b7661e2260 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -90,6 +90,8 @@ struct smb_vol { | |||
90 | bool nocase:1; /* request case insensitive filenames */ | 90 | bool nocase:1; /* request case insensitive filenames */ |
91 | bool nobrl:1; /* disable sending byte range locks to srv */ | 91 | bool nobrl:1; /* disable sending byte range locks to srv */ |
92 | bool seal:1; /* request transport encryption on share */ | 92 | bool seal:1; /* request transport encryption on share */ |
93 | bool nodfs:1; /* Do not request DFS, even if available */ | ||
94 | bool local_lease:1; /* check leases only on local system, not remote */ | ||
93 | unsigned int rsize; | 95 | unsigned int rsize; |
94 | unsigned int wsize; | 96 | unsigned int wsize; |
95 | unsigned int sockopt; | 97 | unsigned int sockopt; |
@@ -124,7 +126,7 @@ cifs_reconnect(struct TCP_Server_Info *server) | |||
124 | struct mid_q_entry *mid_entry; | 126 | struct mid_q_entry *mid_entry; |
125 | 127 | ||
126 | spin_lock(&GlobalMid_Lock); | 128 | spin_lock(&GlobalMid_Lock); |
127 | if (kthread_should_stop()) { | 129 | if (server->tcpStatus == CifsExiting) { |
128 | /* the demux thread will exit normally | 130 | /* the demux thread will exit normally |
129 | next time through the loop */ | 131 | next time through the loop */ |
130 | spin_unlock(&GlobalMid_Lock); | 132 | spin_unlock(&GlobalMid_Lock); |
@@ -184,7 +186,8 @@ cifs_reconnect(struct TCP_Server_Info *server) | |||
184 | spin_unlock(&GlobalMid_Lock); | 186 | spin_unlock(&GlobalMid_Lock); |
185 | up(&server->tcpSem); | 187 | up(&server->tcpSem); |
186 | 188 | ||
187 | while ((!kthread_should_stop()) && (server->tcpStatus != CifsGood)) { | 189 | while ((server->tcpStatus != CifsExiting) && |
190 | (server->tcpStatus != CifsGood)) { | ||
188 | try_to_freeze(); | 191 | try_to_freeze(); |
189 | if (server->protocolType == IPV6) { | 192 | if (server->protocolType == IPV6) { |
190 | rc = ipv6_connect(&server->addr.sockAddr6, | 193 | rc = ipv6_connect(&server->addr.sockAddr6, |
@@ -201,7 +204,7 @@ cifs_reconnect(struct TCP_Server_Info *server) | |||
201 | } else { | 204 | } else { |
202 | atomic_inc(&tcpSesReconnectCount); | 205 | atomic_inc(&tcpSesReconnectCount); |
203 | spin_lock(&GlobalMid_Lock); | 206 | spin_lock(&GlobalMid_Lock); |
204 | if (!kthread_should_stop()) | 207 | if (server->tcpStatus != CifsExiting) |
205 | server->tcpStatus = CifsGood; | 208 | server->tcpStatus = CifsGood; |
206 | server->sequence_number = 0; | 209 | server->sequence_number = 0; |
207 | spin_unlock(&GlobalMid_Lock); | 210 | spin_unlock(&GlobalMid_Lock); |
@@ -356,7 +359,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) | |||
356 | GFP_KERNEL); | 359 | GFP_KERNEL); |
357 | 360 | ||
358 | set_freezable(); | 361 | set_freezable(); |
359 | while (!kthread_should_stop()) { | 362 | while (server->tcpStatus != CifsExiting) { |
360 | if (try_to_freeze()) | 363 | if (try_to_freeze()) |
361 | continue; | 364 | continue; |
362 | if (bigbuf == NULL) { | 365 | if (bigbuf == NULL) { |
@@ -397,7 +400,7 @@ incomplete_rcv: | |||
397 | kernel_recvmsg(csocket, &smb_msg, | 400 | kernel_recvmsg(csocket, &smb_msg, |
398 | &iov, 1, pdu_length, 0 /* BB other flags? */); | 401 | &iov, 1, pdu_length, 0 /* BB other flags? */); |
399 | 402 | ||
400 | if (kthread_should_stop()) { | 403 | if (server->tcpStatus == CifsExiting) { |
401 | break; | 404 | break; |
402 | } else if (server->tcpStatus == CifsNeedReconnect) { | 405 | } else if (server->tcpStatus == CifsNeedReconnect) { |
403 | cFYI(1, ("Reconnect after server stopped responding")); | 406 | cFYI(1, ("Reconnect after server stopped responding")); |
@@ -522,7 +525,7 @@ incomplete_rcv: | |||
522 | total_read += length) { | 525 | total_read += length) { |
523 | length = kernel_recvmsg(csocket, &smb_msg, &iov, 1, | 526 | length = kernel_recvmsg(csocket, &smb_msg, &iov, 1, |
524 | pdu_length - total_read, 0); | 527 | pdu_length - total_read, 0); |
525 | if (kthread_should_stop() || | 528 | if ((server->tcpStatus == CifsExiting) || |
526 | (length == -EINTR)) { | 529 | (length == -EINTR)) { |
527 | /* then will exit */ | 530 | /* then will exit */ |
528 | reconnect = 2; | 531 | reconnect = 2; |
@@ -651,14 +654,6 @@ multi_t2_fnd: | |||
651 | spin_unlock(&GlobalMid_Lock); | 654 | spin_unlock(&GlobalMid_Lock); |
652 | wake_up_all(&server->response_q); | 655 | wake_up_all(&server->response_q); |
653 | 656 | ||
654 | /* don't exit until kthread_stop is called */ | ||
655 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
656 | while (!kthread_should_stop()) { | ||
657 | schedule(); | ||
658 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
659 | } | ||
660 | set_current_state(TASK_RUNNING); | ||
661 | |||
662 | /* check if we have blocked requests that need to free */ | 657 | /* check if we have blocked requests that need to free */ |
663 | /* Note that cifs_max_pending is normally 50, but | 658 | /* Note that cifs_max_pending is normally 50, but |
664 | can be set at module install time to as little as two */ | 659 | can be set at module install time to as little as two */ |
@@ -755,6 +750,7 @@ multi_t2_fnd: | |||
755 | write_unlock(&GlobalSMBSeslock); | 750 | write_unlock(&GlobalSMBSeslock); |
756 | 751 | ||
757 | kfree(server->hostname); | 752 | kfree(server->hostname); |
753 | task_to_wake = xchg(&server->tsk, NULL); | ||
758 | kfree(server); | 754 | kfree(server); |
759 | 755 | ||
760 | length = atomic_dec_return(&tcpSesAllocCount); | 756 | length = atomic_dec_return(&tcpSesAllocCount); |
@@ -762,6 +758,16 @@ multi_t2_fnd: | |||
762 | mempool_resize(cifs_req_poolp, length + cifs_min_rcv, | 758 | mempool_resize(cifs_req_poolp, length + cifs_min_rcv, |
763 | GFP_KERNEL); | 759 | GFP_KERNEL); |
764 | 760 | ||
761 | /* if server->tsk was NULL then wait for a signal before exiting */ | ||
762 | if (!task_to_wake) { | ||
763 | set_current_state(TASK_INTERRUPTIBLE); | ||
764 | while (!signal_pending(current)) { | ||
765 | schedule(); | ||
766 | set_current_state(TASK_INTERRUPTIBLE); | ||
767 | } | ||
768 | set_current_state(TASK_RUNNING); | ||
769 | } | ||
770 | |||
765 | return 0; | 771 | return 0; |
766 | } | 772 | } |
767 | 773 | ||
@@ -1218,6 +1224,8 @@ cifs_parse_mount_options(char *options, const char *devname, | |||
1218 | vol->sfu_emul = 1; | 1224 | vol->sfu_emul = 1; |
1219 | } else if (strnicmp(data, "nosfu", 5) == 0) { | 1225 | } else if (strnicmp(data, "nosfu", 5) == 0) { |
1220 | vol->sfu_emul = 0; | 1226 | vol->sfu_emul = 0; |
1227 | } else if (strnicmp(data, "nodfs", 5) == 0) { | ||
1228 | vol->nodfs = 1; | ||
1221 | } else if (strnicmp(data, "posixpaths", 10) == 0) { | 1229 | } else if (strnicmp(data, "posixpaths", 10) == 0) { |
1222 | vol->posix_paths = 1; | 1230 | vol->posix_paths = 1; |
1223 | } else if (strnicmp(data, "noposixpaths", 12) == 0) { | 1231 | } else if (strnicmp(data, "noposixpaths", 12) == 0) { |
@@ -1268,6 +1276,10 @@ cifs_parse_mount_options(char *options, const char *devname, | |||
1268 | vol->no_psx_acl = 0; | 1276 | vol->no_psx_acl = 0; |
1269 | } else if (strnicmp(data, "noacl", 5) == 0) { | 1277 | } else if (strnicmp(data, "noacl", 5) == 0) { |
1270 | vol->no_psx_acl = 1; | 1278 | vol->no_psx_acl = 1; |
1279 | #ifdef CONFIG_CIFS_EXPERIMENTAL | ||
1280 | } else if (strnicmp(data, "locallease", 6) == 0) { | ||
1281 | vol->local_lease = 1; | ||
1282 | #endif | ||
1271 | } else if (strnicmp(data, "sign", 4) == 0) { | 1283 | } else if (strnicmp(data, "sign", 4) == 0) { |
1272 | vol->secFlg |= CIFSSEC_MUST_SIGN; | 1284 | vol->secFlg |= CIFSSEC_MUST_SIGN; |
1273 | } else if (strnicmp(data, "seal", 4) == 0) { | 1285 | } else if (strnicmp(data, "seal", 4) == 0) { |
@@ -1845,6 +1857,16 @@ convert_delimiter(char *path, char delim) | |||
1845 | } | 1857 | } |
1846 | } | 1858 | } |
1847 | 1859 | ||
1860 | static void | ||
1861 | kill_cifsd(struct TCP_Server_Info *server) | ||
1862 | { | ||
1863 | struct task_struct *task; | ||
1864 | |||
1865 | task = xchg(&server->tsk, NULL); | ||
1866 | if (task) | ||
1867 | force_sig(SIGKILL, task); | ||
1868 | } | ||
1869 | |||
1848 | int | 1870 | int |
1849 | cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, | 1871 | cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, |
1850 | char *mount_data, const char *devname) | 1872 | char *mount_data, const char *devname) |
@@ -2166,6 +2188,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, | |||
2166 | for the retry flag is used */ | 2188 | for the retry flag is used */ |
2167 | tcon->retry = volume_info.retry; | 2189 | tcon->retry = volume_info.retry; |
2168 | tcon->nocase = volume_info.nocase; | 2190 | tcon->nocase = volume_info.nocase; |
2191 | tcon->local_lease = volume_info.local_lease; | ||
2169 | if (tcon->seal != volume_info.seal) | 2192 | if (tcon->seal != volume_info.seal) |
2170 | cERROR(1, ("transport encryption setting " | 2193 | cERROR(1, ("transport encryption setting " |
2171 | "conflicts with existing tid")); | 2194 | "conflicts with existing tid")); |
@@ -2197,6 +2220,12 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, | |||
2197 | volume_info.UNC, | 2220 | volume_info.UNC, |
2198 | tcon, cifs_sb->local_nls); | 2221 | tcon, cifs_sb->local_nls); |
2199 | cFYI(1, ("CIFS Tcon rc = %d", rc)); | 2222 | cFYI(1, ("CIFS Tcon rc = %d", rc)); |
2223 | if (volume_info.nodfs) { | ||
2224 | tcon->Flags &= | ||
2225 | ~SMB_SHARE_IS_IN_DFS; | ||
2226 | cFYI(1, ("DFS disabled (%d)", | ||
2227 | tcon->Flags)); | ||
2228 | } | ||
2200 | } | 2229 | } |
2201 | if (!rc) { | 2230 | if (!rc) { |
2202 | atomic_inc(&pSesInfo->inUse); | 2231 | atomic_inc(&pSesInfo->inUse); |
@@ -2225,14 +2254,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, | |||
2225 | spin_lock(&GlobalMid_Lock); | 2254 | spin_lock(&GlobalMid_Lock); |
2226 | srvTcp->tcpStatus = CifsExiting; | 2255 | srvTcp->tcpStatus = CifsExiting; |
2227 | spin_unlock(&GlobalMid_Lock); | 2256 | spin_unlock(&GlobalMid_Lock); |
2228 | if (srvTcp->tsk) { | 2257 | kill_cifsd(srvTcp); |
2229 | /* If we could verify that kthread_stop would | ||
2230 | always wake up processes blocked in | ||
2231 | tcp in recv_mesg then we could remove the | ||
2232 | send_sig call */ | ||
2233 | force_sig(SIGKILL, srvTcp->tsk); | ||
2234 | kthread_stop(srvTcp->tsk); | ||
2235 | } | ||
2236 | } | 2258 | } |
2237 | /* If find_unc succeeded then rc == 0 so we can not end */ | 2259 | /* If find_unc succeeded then rc == 0 so we can not end */ |
2238 | if (tcon) /* up accidently freeing someone elses tcon struct */ | 2260 | if (tcon) /* up accidently freeing someone elses tcon struct */ |
@@ -2245,19 +2267,15 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, | |||
2245 | temp_rc = CIFSSMBLogoff(xid, pSesInfo); | 2267 | temp_rc = CIFSSMBLogoff(xid, pSesInfo); |
2246 | /* if the socketUseCount is now zero */ | 2268 | /* if the socketUseCount is now zero */ |
2247 | if ((temp_rc == -ESHUTDOWN) && | 2269 | if ((temp_rc == -ESHUTDOWN) && |
2248 | (pSesInfo->server) && | 2270 | (pSesInfo->server)) |
2249 | (pSesInfo->server->tsk)) { | 2271 | kill_cifsd(pSesInfo->server); |
2250 | force_sig(SIGKILL, | ||
2251 | pSesInfo->server->tsk); | ||
2252 | kthread_stop(pSesInfo->server->tsk); | ||
2253 | } | ||
2254 | } else { | 2272 | } else { |
2255 | cFYI(1, ("No session or bad tcon")); | 2273 | cFYI(1, ("No session or bad tcon")); |
2256 | if ((pSesInfo->server) && | 2274 | if (pSesInfo->server) { |
2257 | (pSesInfo->server->tsk)) { | 2275 | spin_lock(&GlobalMid_Lock); |
2258 | force_sig(SIGKILL, | 2276 | srvTcp->tcpStatus = CifsExiting; |
2259 | pSesInfo->server->tsk); | 2277 | spin_unlock(&GlobalMid_Lock); |
2260 | kthread_stop(pSesInfo->server->tsk); | 2278 | kill_cifsd(pSesInfo->server); |
2261 | } | 2279 | } |
2262 | } | 2280 | } |
2263 | sesInfoFree(pSesInfo); | 2281 | sesInfoFree(pSesInfo); |
@@ -3544,7 +3562,6 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) | |||
3544 | int rc = 0; | 3562 | int rc = 0; |
3545 | int xid; | 3563 | int xid; |
3546 | struct cifsSesInfo *ses = NULL; | 3564 | struct cifsSesInfo *ses = NULL; |
3547 | struct task_struct *cifsd_task; | ||
3548 | char *tmp; | 3565 | char *tmp; |
3549 | 3566 | ||
3550 | xid = GetXid(); | 3567 | xid = GetXid(); |
@@ -3560,7 +3577,6 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) | |||
3560 | tconInfoFree(cifs_sb->tcon); | 3577 | tconInfoFree(cifs_sb->tcon); |
3561 | if ((ses) && (ses->server)) { | 3578 | if ((ses) && (ses->server)) { |
3562 | /* save off task so we do not refer to ses later */ | 3579 | /* save off task so we do not refer to ses later */ |
3563 | cifsd_task = ses->server->tsk; | ||
3564 | cFYI(1, ("About to do SMBLogoff ")); | 3580 | cFYI(1, ("About to do SMBLogoff ")); |
3565 | rc = CIFSSMBLogoff(xid, ses); | 3581 | rc = CIFSSMBLogoff(xid, ses); |
3566 | if (rc == -EBUSY) { | 3582 | if (rc == -EBUSY) { |
@@ -3568,10 +3584,8 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) | |||
3568 | return 0; | 3584 | return 0; |
3569 | } else if (rc == -ESHUTDOWN) { | 3585 | } else if (rc == -ESHUTDOWN) { |
3570 | cFYI(1, ("Waking up socket by sending signal")); | 3586 | cFYI(1, ("Waking up socket by sending signal")); |
3571 | if (cifsd_task) { | 3587 | if (ses->server) |
3572 | force_sig(SIGKILL, cifsd_task); | 3588 | kill_cifsd(ses->server); |
3573 | kthread_stop(cifsd_task); | ||
3574 | } | ||
3575 | rc = 0; | 3589 | rc = 0; |
3576 | } /* else - we have an smb session | 3590 | } /* else - we have an smb session |
3577 | left on this socket do not kill cifsd */ | 3591 | left on this socket do not kill cifsd */ |
@@ -3701,7 +3715,9 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, | |||
3701 | cERROR(1, ("Send error in SessSetup = %d", rc)); | 3715 | cERROR(1, ("Send error in SessSetup = %d", rc)); |
3702 | } else { | 3716 | } else { |
3703 | cFYI(1, ("CIFS Session Established successfully")); | 3717 | cFYI(1, ("CIFS Session Established successfully")); |
3718 | spin_lock(&GlobalMid_Lock); | ||
3704 | pSesInfo->status = CifsGood; | 3719 | pSesInfo->status = CifsGood; |
3720 | spin_unlock(&GlobalMid_Lock); | ||
3705 | } | 3721 | } |
3706 | 3722 | ||
3707 | ss_err_exit: | 3723 | ss_err_exit: |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index c4a8a0605125..62d8bd8f14c0 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -1791,7 +1791,7 @@ static void cifs_copy_cache_pages(struct address_space *mapping, | |||
1791 | SetPageUptodate(page); | 1791 | SetPageUptodate(page); |
1792 | unlock_page(page); | 1792 | unlock_page(page); |
1793 | if (!pagevec_add(plru_pvec, page)) | 1793 | if (!pagevec_add(plru_pvec, page)) |
1794 | __pagevec_lru_add(plru_pvec); | 1794 | __pagevec_lru_add_file(plru_pvec); |
1795 | data += PAGE_CACHE_SIZE; | 1795 | data += PAGE_CACHE_SIZE; |
1796 | } | 1796 | } |
1797 | return; | 1797 | return; |
@@ -1925,7 +1925,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, | |||
1925 | bytes_read = 0; | 1925 | bytes_read = 0; |
1926 | } | 1926 | } |
1927 | 1927 | ||
1928 | pagevec_lru_add(&lru_pvec); | 1928 | pagevec_lru_add_file(&lru_pvec); |
1929 | 1929 | ||
1930 | /* need to free smb_read_data buf before exit */ | 1930 | /* need to free smb_read_data buf before exit */ |
1931 | if (smb_read_data) { | 1931 | if (smb_read_data) { |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a8c833345fc9..d54fa8aeaea9 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -506,6 +506,7 @@ int cifs_get_inode_info(struct inode **pinode, | |||
506 | inode = *pinode; | 506 | inode = *pinode; |
507 | cifsInfo = CIFS_I(inode); | 507 | cifsInfo = CIFS_I(inode); |
508 | cifsInfo->cifsAttrs = attr; | 508 | cifsInfo->cifsAttrs = attr; |
509 | cifsInfo->delete_pending = pfindData->DeletePending ? true : false; | ||
509 | cFYI(1, ("Old time %ld", cifsInfo->time)); | 510 | cFYI(1, ("Old time %ld", cifsInfo->time)); |
510 | cifsInfo->time = jiffies; | 511 | cifsInfo->time = jiffies; |
511 | cFYI(1, ("New time %ld", cifsInfo->time)); | 512 | cFYI(1, ("New time %ld", cifsInfo->time)); |
@@ -772,63 +773,106 @@ out: | |||
772 | * anything else. | 773 | * anything else. |
773 | */ | 774 | */ |
774 | static int | 775 | static int |
775 | cifs_rename_pending_delete(char *full_path, struct inode *inode, int xid) | 776 | cifs_rename_pending_delete(char *full_path, struct dentry *dentry, int xid) |
776 | { | 777 | { |
777 | int oplock = 0; | 778 | int oplock = 0; |
778 | int rc; | 779 | int rc; |
779 | __u16 netfid; | 780 | __u16 netfid; |
781 | struct inode *inode = dentry->d_inode; | ||
780 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); | 782 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); |
781 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 783 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
782 | struct cifsTconInfo *tcon = cifs_sb->tcon; | 784 | struct cifsTconInfo *tcon = cifs_sb->tcon; |
783 | __u32 dosattr; | 785 | __u32 dosattr, origattr; |
784 | FILE_BASIC_INFO *info_buf; | 786 | FILE_BASIC_INFO *info_buf = NULL; |
785 | 787 | ||
786 | rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, | 788 | rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, |
787 | DELETE|FILE_WRITE_ATTRIBUTES, | 789 | DELETE|FILE_WRITE_ATTRIBUTES, CREATE_NOT_DIR, |
788 | CREATE_NOT_DIR|CREATE_DELETE_ON_CLOSE, | ||
789 | &netfid, &oplock, NULL, cifs_sb->local_nls, | 790 | &netfid, &oplock, NULL, cifs_sb->local_nls, |
790 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 791 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
791 | if (rc != 0) | 792 | if (rc != 0) |
792 | goto out; | 793 | goto out; |
793 | 794 | ||
794 | /* set ATTR_HIDDEN and clear ATTR_READONLY */ | 795 | origattr = cifsInode->cifsAttrs; |
795 | cifsInode = CIFS_I(inode); | 796 | if (origattr == 0) |
796 | dosattr = cifsInode->cifsAttrs & ~ATTR_READONLY; | 797 | origattr |= ATTR_NORMAL; |
798 | |||
799 | dosattr = origattr & ~ATTR_READONLY; | ||
797 | if (dosattr == 0) | 800 | if (dosattr == 0) |
798 | dosattr |= ATTR_NORMAL; | 801 | dosattr |= ATTR_NORMAL; |
799 | dosattr |= ATTR_HIDDEN; | 802 | dosattr |= ATTR_HIDDEN; |
800 | 803 | ||
801 | info_buf = kzalloc(sizeof(*info_buf), GFP_KERNEL); | 804 | /* set ATTR_HIDDEN and clear ATTR_READONLY, but only if needed */ |
802 | if (info_buf == NULL) { | 805 | if (dosattr != origattr) { |
803 | rc = -ENOMEM; | 806 | info_buf = kzalloc(sizeof(*info_buf), GFP_KERNEL); |
804 | goto out_close; | 807 | if (info_buf == NULL) { |
808 | rc = -ENOMEM; | ||
809 | goto out_close; | ||
810 | } | ||
811 | info_buf->Attributes = cpu_to_le32(dosattr); | ||
812 | rc = CIFSSMBSetFileInfo(xid, tcon, info_buf, netfid, | ||
813 | current->tgid); | ||
814 | /* although we would like to mark the file hidden | ||
815 | if that fails we will still try to rename it */ | ||
816 | if (rc != 0) | ||
817 | cifsInode->cifsAttrs = dosattr; | ||
818 | else | ||
819 | dosattr = origattr; /* since not able to change them */ | ||
805 | } | 820 | } |
806 | info_buf->Attributes = cpu_to_le32(dosattr); | ||
807 | rc = CIFSSMBSetFileInfo(xid, tcon, info_buf, netfid, current->tgid); | ||
808 | kfree(info_buf); | ||
809 | if (rc != 0) | ||
810 | goto out_close; | ||
811 | cifsInode->cifsAttrs = dosattr; | ||
812 | 821 | ||
813 | /* silly-rename the file */ | 822 | /* rename the file */ |
814 | CIFSSMBRenameOpenFile(xid, tcon, netfid, NULL, cifs_sb->local_nls, | 823 | rc = CIFSSMBRenameOpenFile(xid, tcon, netfid, NULL, cifs_sb->local_nls, |
815 | cifs_sb->mnt_cifs_flags & | 824 | cifs_sb->mnt_cifs_flags & |
816 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 825 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
826 | if (rc != 0) { | ||
827 | rc = -ETXTBSY; | ||
828 | goto undo_setattr; | ||
829 | } | ||
817 | 830 | ||
818 | /* set DELETE_ON_CLOSE */ | 831 | /* try to set DELETE_ON_CLOSE */ |
819 | rc = CIFSSMBSetFileDisposition(xid, tcon, true, netfid, current->tgid); | 832 | if (!cifsInode->delete_pending) { |
820 | 833 | rc = CIFSSMBSetFileDisposition(xid, tcon, true, netfid, | |
821 | /* | 834 | current->tgid); |
822 | * some samba versions return -ENOENT when we try to set the file | 835 | /* |
823 | * disposition here. Likely a samba bug, but work around it for now | 836 | * some samba versions return -ENOENT when we try to set the |
824 | */ | 837 | * file disposition here. Likely a samba bug, but work around |
825 | if (rc == -ENOENT) | 838 | * it for now. This means that some cifsXXX files may hang |
826 | rc = 0; | 839 | * around after they shouldn't. |
840 | * | ||
841 | * BB: remove this hack after more servers have the fix | ||
842 | */ | ||
843 | if (rc == -ENOENT) | ||
844 | rc = 0; | ||
845 | else if (rc != 0) { | ||
846 | rc = -ETXTBSY; | ||
847 | goto undo_rename; | ||
848 | } | ||
849 | cifsInode->delete_pending = true; | ||
850 | } | ||
827 | 851 | ||
828 | out_close: | 852 | out_close: |
829 | CIFSSMBClose(xid, tcon, netfid); | 853 | CIFSSMBClose(xid, tcon, netfid); |
830 | out: | 854 | out: |
855 | kfree(info_buf); | ||
831 | return rc; | 856 | return rc; |
857 | |||
858 | /* | ||
859 | * reset everything back to the original state. Don't bother | ||
860 | * dealing with errors here since we can't do anything about | ||
861 | * them anyway. | ||
862 | */ | ||
863 | undo_rename: | ||
864 | CIFSSMBRenameOpenFile(xid, tcon, netfid, dentry->d_name.name, | ||
865 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | ||
866 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
867 | undo_setattr: | ||
868 | if (dosattr != origattr) { | ||
869 | info_buf->Attributes = cpu_to_le32(origattr); | ||
870 | if (!CIFSSMBSetFileInfo(xid, tcon, info_buf, netfid, | ||
871 | current->tgid)) | ||
872 | cifsInode->cifsAttrs = origattr; | ||
873 | } | ||
874 | |||
875 | goto out_close; | ||
832 | } | 876 | } |
833 | 877 | ||
834 | int cifs_unlink(struct inode *dir, struct dentry *dentry) | 878 | int cifs_unlink(struct inode *dir, struct dentry *dentry) |
@@ -878,7 +922,7 @@ psx_del_no_retry: | |||
878 | } else if (rc == -ENOENT) { | 922 | } else if (rc == -ENOENT) { |
879 | d_drop(dentry); | 923 | d_drop(dentry); |
880 | } else if (rc == -ETXTBSY) { | 924 | } else if (rc == -ETXTBSY) { |
881 | rc = cifs_rename_pending_delete(full_path, inode, xid); | 925 | rc = cifs_rename_pending_delete(full_path, dentry, xid); |
882 | if (rc == 0) | 926 | if (rc == 0) |
883 | drop_nlink(inode); | 927 | drop_nlink(inode); |
884 | } else if (rc == -EACCES && dosattr == 0) { | 928 | } else if (rc == -EACCES && dosattr == 0) { |
@@ -1241,22 +1285,21 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath, | |||
1241 | return rc; | 1285 | return rc; |
1242 | } | 1286 | } |
1243 | 1287 | ||
1244 | int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, | 1288 | int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, |
1245 | struct inode *target_inode, struct dentry *target_direntry) | 1289 | struct inode *target_dir, struct dentry *target_dentry) |
1246 | { | 1290 | { |
1247 | char *fromName = NULL; | 1291 | char *fromName = NULL; |
1248 | char *toName = NULL; | 1292 | char *toName = NULL; |
1249 | struct cifs_sb_info *cifs_sb_source; | 1293 | struct cifs_sb_info *cifs_sb_source; |
1250 | struct cifs_sb_info *cifs_sb_target; | 1294 | struct cifs_sb_info *cifs_sb_target; |
1251 | struct cifsTconInfo *pTcon; | 1295 | struct cifsTconInfo *tcon; |
1252 | FILE_UNIX_BASIC_INFO *info_buf_source = NULL; | 1296 | FILE_UNIX_BASIC_INFO *info_buf_source = NULL; |
1253 | FILE_UNIX_BASIC_INFO *info_buf_target; | 1297 | FILE_UNIX_BASIC_INFO *info_buf_target; |
1254 | int xid; | 1298 | int xid, rc, tmprc; |
1255 | int rc; | ||
1256 | 1299 | ||
1257 | cifs_sb_target = CIFS_SB(target_inode->i_sb); | 1300 | cifs_sb_target = CIFS_SB(target_dir->i_sb); |
1258 | cifs_sb_source = CIFS_SB(source_inode->i_sb); | 1301 | cifs_sb_source = CIFS_SB(source_dir->i_sb); |
1259 | pTcon = cifs_sb_source->tcon; | 1302 | tcon = cifs_sb_source->tcon; |
1260 | 1303 | ||
1261 | xid = GetXid(); | 1304 | xid = GetXid(); |
1262 | 1305 | ||
@@ -1264,7 +1307,7 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, | |||
1264 | * BB: this might be allowed if same server, but different share. | 1307 | * BB: this might be allowed if same server, but different share. |
1265 | * Consider adding support for this | 1308 | * Consider adding support for this |
1266 | */ | 1309 | */ |
1267 | if (pTcon != cifs_sb_target->tcon) { | 1310 | if (tcon != cifs_sb_target->tcon) { |
1268 | rc = -EXDEV; | 1311 | rc = -EXDEV; |
1269 | goto cifs_rename_exit; | 1312 | goto cifs_rename_exit; |
1270 | } | 1313 | } |
@@ -1273,65 +1316,65 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, | |||
1273 | * we already have the rename sem so we do not need to | 1316 | * we already have the rename sem so we do not need to |
1274 | * grab it again here to protect the path integrity | 1317 | * grab it again here to protect the path integrity |
1275 | */ | 1318 | */ |
1276 | fromName = build_path_from_dentry(source_direntry); | 1319 | fromName = build_path_from_dentry(source_dentry); |
1277 | if (fromName == NULL) { | 1320 | if (fromName == NULL) { |
1278 | rc = -ENOMEM; | 1321 | rc = -ENOMEM; |
1279 | goto cifs_rename_exit; | 1322 | goto cifs_rename_exit; |
1280 | } | 1323 | } |
1281 | 1324 | ||
1282 | toName = build_path_from_dentry(target_direntry); | 1325 | toName = build_path_from_dentry(target_dentry); |
1283 | if (toName == NULL) { | 1326 | if (toName == NULL) { |
1284 | rc = -ENOMEM; | 1327 | rc = -ENOMEM; |
1285 | goto cifs_rename_exit; | 1328 | goto cifs_rename_exit; |
1286 | } | 1329 | } |
1287 | 1330 | ||
1288 | rc = cifs_do_rename(xid, source_direntry, fromName, | 1331 | rc = cifs_do_rename(xid, source_dentry, fromName, |
1289 | target_direntry, toName); | 1332 | target_dentry, toName); |
1290 | 1333 | ||
1291 | if (rc == -EEXIST) { | 1334 | if (rc == -EEXIST && tcon->unix_ext) { |
1292 | if (pTcon->unix_ext) { | 1335 | /* |
1293 | /* | 1336 | * Are src and dst hardlinks of same inode? We can |
1294 | * Are src and dst hardlinks of same inode? We can | 1337 | * only tell with unix extensions enabled |
1295 | * only tell with unix extensions enabled | 1338 | */ |
1296 | */ | 1339 | info_buf_source = |
1297 | info_buf_source = | 1340 | kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), |
1298 | kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), | 1341 | GFP_KERNEL); |
1299 | GFP_KERNEL); | 1342 | if (info_buf_source == NULL) { |
1300 | if (info_buf_source == NULL) | 1343 | rc = -ENOMEM; |
1301 | goto unlink_target; | 1344 | goto cifs_rename_exit; |
1302 | 1345 | } | |
1303 | info_buf_target = info_buf_source + 1; | ||
1304 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName, | ||
1305 | info_buf_source, | ||
1306 | cifs_sb_source->local_nls, | ||
1307 | cifs_sb_source->mnt_cifs_flags & | ||
1308 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1309 | if (rc != 0) | ||
1310 | goto unlink_target; | ||
1311 | |||
1312 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, | ||
1313 | toName, info_buf_target, | ||
1314 | cifs_sb_target->local_nls, | ||
1315 | /* remap based on source sb */ | ||
1316 | cifs_sb_source->mnt_cifs_flags & | ||
1317 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1318 | 1346 | ||
1319 | if (rc == 0 && (info_buf_source->UniqueId == | 1347 | info_buf_target = info_buf_source + 1; |
1320 | info_buf_target->UniqueId)) | 1348 | tmprc = CIFSSMBUnixQPathInfo(xid, tcon, fromName, |
1321 | /* same file, POSIX says that this is a noop */ | 1349 | info_buf_source, |
1322 | goto cifs_rename_exit; | 1350 | cifs_sb_source->local_nls, |
1323 | } /* else ... BB we could add the same check for Windows by | 1351 | cifs_sb_source->mnt_cifs_flags & |
1352 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1353 | if (tmprc != 0) | ||
1354 | goto unlink_target; | ||
1355 | |||
1356 | tmprc = CIFSSMBUnixQPathInfo(xid, tcon, | ||
1357 | toName, info_buf_target, | ||
1358 | cifs_sb_target->local_nls, | ||
1359 | /* remap based on source sb */ | ||
1360 | cifs_sb_source->mnt_cifs_flags & | ||
1361 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1362 | |||
1363 | if (tmprc == 0 && (info_buf_source->UniqueId == | ||
1364 | info_buf_target->UniqueId)) | ||
1365 | /* same file, POSIX says that this is a noop */ | ||
1366 | goto cifs_rename_exit; | ||
1367 | } /* else ... BB we could add the same check for Windows by | ||
1324 | checking the UniqueId via FILE_INTERNAL_INFO */ | 1368 | checking the UniqueId via FILE_INTERNAL_INFO */ |
1369 | |||
1325 | unlink_target: | 1370 | unlink_target: |
1326 | /* | 1371 | if ((rc == -EACCES) || (rc == -EEXIST)) { |
1327 | * we either can not tell the files are hardlinked (as with | 1372 | tmprc = cifs_unlink(target_dir, target_dentry); |
1328 | * Windows servers) or files are not hardlinked. Delete the | 1373 | if (tmprc) |
1329 | * target manually before renaming to follow POSIX rather than | 1374 | goto cifs_rename_exit; |
1330 | * Windows semantics | 1375 | |
1331 | */ | 1376 | rc = cifs_do_rename(xid, source_dentry, fromName, |
1332 | cifs_unlink(target_inode, target_direntry); | 1377 | target_dentry, toName); |
1333 | rc = cifs_do_rename(xid, source_direntry, fromName, | ||
1334 | target_direntry, toName); | ||
1335 | } | 1378 | } |
1336 | 1379 | ||
1337 | cifs_rename_exit: | 1380 | cifs_rename_exit: |
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 765adf12d54f..58d57299f2a0 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c | |||
@@ -762,14 +762,15 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, | |||
762 | rc)); | 762 | rc)); |
763 | return rc; | 763 | return rc; |
764 | } | 764 | } |
765 | cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile); | ||
765 | } | 766 | } |
766 | 767 | ||
767 | while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && | 768 | while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && |
768 | (rc == 0) && !cifsFile->srch_inf.endOfSearch) { | 769 | (rc == 0) && !cifsFile->srch_inf.endOfSearch) { |
769 | cFYI(1, ("calling findnext2")); | 770 | cFYI(1, ("calling findnext2")); |
770 | cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile); | ||
771 | rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, | 771 | rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, |
772 | &cifsFile->srch_inf); | 772 | &cifsFile->srch_inf); |
773 | cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile); | ||
773 | if (rc) | 774 | if (rc) |
774 | return -ENOENT; | 775 | return -ENOENT; |
775 | } | 776 | } |
diff --git a/fs/coda/dir.c b/fs/coda/dir.c index c5916228243c..75b1fa90b2cb 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c | |||
@@ -146,6 +146,9 @@ int coda_permission(struct inode *inode, int mask) | |||
146 | if (!mask) | 146 | if (!mask) |
147 | return 0; | 147 | return 0; |
148 | 148 | ||
149 | if ((mask & MAY_EXEC) && !execute_ok(inode)) | ||
150 | return -EACCES; | ||
151 | |||
149 | lock_kernel(); | 152 | lock_kernel(); |
150 | 153 | ||
151 | if (coda_cache_check(inode, mask)) | 154 | if (coda_cache_check(inode, mask)) |
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index c51365422aa8..773f2ce9aa06 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c | |||
@@ -43,7 +43,7 @@ const struct file_operations coda_ioctl_operations = { | |||
43 | /* the coda pioctl inode ops */ | 43 | /* the coda pioctl inode ops */ |
44 | static int coda_ioctl_permission(struct inode *inode, int mask) | 44 | static int coda_ioctl_permission(struct inode *inode, int mask) |
45 | { | 45 | { |
46 | return 0; | 46 | return (mask & MAY_EXEC) ? -EACCES : 0; |
47 | } | 47 | } |
48 | 48 | ||
49 | static int coda_pioctl(struct inode * inode, struct file * filp, | 49 | static int coda_pioctl(struct inode * inode, struct file * filp, |
diff --git a/fs/compat.c b/fs/compat.c index 5f9ec449c799..fe3c9bf87608 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -869,7 +869,7 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, | |||
869 | buf.dirent = dirent; | 869 | buf.dirent = dirent; |
870 | 870 | ||
871 | error = vfs_readdir(file, compat_fillonedir, &buf); | 871 | error = vfs_readdir(file, compat_fillonedir, &buf); |
872 | if (error >= 0) | 872 | if (buf.result) |
873 | error = buf.result; | 873 | error = buf.result; |
874 | 874 | ||
875 | fput(file); | 875 | fput(file); |
@@ -956,9 +956,8 @@ asmlinkage long compat_sys_getdents(unsigned int fd, | |||
956 | buf.error = 0; | 956 | buf.error = 0; |
957 | 957 | ||
958 | error = vfs_readdir(file, compat_filldir, &buf); | 958 | error = vfs_readdir(file, compat_filldir, &buf); |
959 | if (error < 0) | 959 | if (error >= 0) |
960 | goto out_putf; | 960 | error = buf.error; |
961 | error = buf.error; | ||
962 | lastdirent = buf.previous; | 961 | lastdirent = buf.previous; |
963 | if (lastdirent) { | 962 | if (lastdirent) { |
964 | if (put_user(file->f_pos, &lastdirent->d_off)) | 963 | if (put_user(file->f_pos, &lastdirent->d_off)) |
@@ -966,8 +965,6 @@ asmlinkage long compat_sys_getdents(unsigned int fd, | |||
966 | else | 965 | else |
967 | error = count - buf.count; | 966 | error = count - buf.count; |
968 | } | 967 | } |
969 | |||
970 | out_putf: | ||
971 | fput(file); | 968 | fput(file); |
972 | out: | 969 | out: |
973 | return error; | 970 | return error; |
@@ -1047,19 +1044,16 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, | |||
1047 | buf.error = 0; | 1044 | buf.error = 0; |
1048 | 1045 | ||
1049 | error = vfs_readdir(file, compat_filldir64, &buf); | 1046 | error = vfs_readdir(file, compat_filldir64, &buf); |
1050 | if (error < 0) | 1047 | if (error >= 0) |
1051 | goto out_putf; | 1048 | error = buf.error; |
1052 | error = buf.error; | ||
1053 | lastdirent = buf.previous; | 1049 | lastdirent = buf.previous; |
1054 | if (lastdirent) { | 1050 | if (lastdirent) { |
1055 | typeof(lastdirent->d_off) d_off = file->f_pos; | 1051 | typeof(lastdirent->d_off) d_off = file->f_pos; |
1056 | error = -EFAULT; | ||
1057 | if (__put_user_unaligned(d_off, &lastdirent->d_off)) | 1052 | if (__put_user_unaligned(d_off, &lastdirent->d_off)) |
1058 | goto out_putf; | 1053 | error = -EFAULT; |
1059 | error = count - buf.count; | 1054 | else |
1055 | error = count - buf.count; | ||
1060 | } | 1056 | } |
1061 | |||
1062 | out_putf: | ||
1063 | fput(file); | 1057 | fput(file); |
1064 | out: | 1058 | out: |
1065 | return error; | 1059 | return error; |
@@ -1475,6 +1469,57 @@ out_ret: | |||
1475 | 1469 | ||
1476 | #define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) | 1470 | #define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) |
1477 | 1471 | ||
1472 | static int poll_select_copy_remaining(struct timespec *end_time, void __user *p, | ||
1473 | int timeval, int ret) | ||
1474 | { | ||
1475 | struct timespec ts; | ||
1476 | |||
1477 | if (!p) | ||
1478 | return ret; | ||
1479 | |||
1480 | if (current->personality & STICKY_TIMEOUTS) | ||
1481 | goto sticky; | ||
1482 | |||
1483 | /* No update for zero timeout */ | ||
1484 | if (!end_time->tv_sec && !end_time->tv_nsec) | ||
1485 | return ret; | ||
1486 | |||
1487 | ktime_get_ts(&ts); | ||
1488 | ts = timespec_sub(*end_time, ts); | ||
1489 | if (ts.tv_sec < 0) | ||
1490 | ts.tv_sec = ts.tv_nsec = 0; | ||
1491 | |||
1492 | if (timeval) { | ||
1493 | struct compat_timeval rtv; | ||
1494 | |||
1495 | rtv.tv_sec = ts.tv_sec; | ||
1496 | rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC; | ||
1497 | |||
1498 | if (!copy_to_user(p, &rtv, sizeof(rtv))) | ||
1499 | return ret; | ||
1500 | } else { | ||
1501 | struct compat_timespec rts; | ||
1502 | |||
1503 | rts.tv_sec = ts.tv_sec; | ||
1504 | rts.tv_nsec = ts.tv_nsec; | ||
1505 | |||
1506 | if (!copy_to_user(p, &rts, sizeof(rts))) | ||
1507 | return ret; | ||
1508 | } | ||
1509 | /* | ||
1510 | * If an application puts its timeval in read-only memory, we | ||
1511 | * don't want the Linux-specific update to the timeval to | ||
1512 | * cause a fault after the select has completed | ||
1513 | * successfully. However, because we're not updating the | ||
1514 | * timeval, we can't restart the system call. | ||
1515 | */ | ||
1516 | |||
1517 | sticky: | ||
1518 | if (ret == -ERESTARTNOHAND) | ||
1519 | ret = -EINTR; | ||
1520 | return ret; | ||
1521 | } | ||
1522 | |||
1478 | /* | 1523 | /* |
1479 | * Ooo, nasty. We need here to frob 32-bit unsigned longs to | 1524 | * Ooo, nasty. We need here to frob 32-bit unsigned longs to |
1480 | * 64-bit unsigned longs. | 1525 | * 64-bit unsigned longs. |
@@ -1556,7 +1601,8 @@ int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, | |||
1556 | ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) | 1601 | ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) |
1557 | 1602 | ||
1558 | int compat_core_sys_select(int n, compat_ulong_t __user *inp, | 1603 | int compat_core_sys_select(int n, compat_ulong_t __user *inp, |
1559 | compat_ulong_t __user *outp, compat_ulong_t __user *exp, s64 *timeout) | 1604 | compat_ulong_t __user *outp, compat_ulong_t __user *exp, |
1605 | struct timespec *end_time) | ||
1560 | { | 1606 | { |
1561 | fd_set_bits fds; | 1607 | fd_set_bits fds; |
1562 | void *bits; | 1608 | void *bits; |
@@ -1603,7 +1649,7 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp, | |||
1603 | zero_fd_set(n, fds.res_out); | 1649 | zero_fd_set(n, fds.res_out); |
1604 | zero_fd_set(n, fds.res_ex); | 1650 | zero_fd_set(n, fds.res_ex); |
1605 | 1651 | ||
1606 | ret = do_select(n, &fds, timeout); | 1652 | ret = do_select(n, &fds, end_time); |
1607 | 1653 | ||
1608 | if (ret < 0) | 1654 | if (ret < 0) |
1609 | goto out; | 1655 | goto out; |
@@ -1629,7 +1675,7 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, | |||
1629 | compat_ulong_t __user *outp, compat_ulong_t __user *exp, | 1675 | compat_ulong_t __user *outp, compat_ulong_t __user *exp, |
1630 | struct compat_timeval __user *tvp) | 1676 | struct compat_timeval __user *tvp) |
1631 | { | 1677 | { |
1632 | s64 timeout = -1; | 1678 | struct timespec end_time, *to = NULL; |
1633 | struct compat_timeval tv; | 1679 | struct compat_timeval tv; |
1634 | int ret; | 1680 | int ret; |
1635 | 1681 | ||
@@ -1637,43 +1683,14 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, | |||
1637 | if (copy_from_user(&tv, tvp, sizeof(tv))) | 1683 | if (copy_from_user(&tv, tvp, sizeof(tv))) |
1638 | return -EFAULT; | 1684 | return -EFAULT; |
1639 | 1685 | ||
1640 | if (tv.tv_sec < 0 || tv.tv_usec < 0) | 1686 | to = &end_time; |
1687 | if (poll_select_set_timeout(to, tv.tv_sec, | ||
1688 | tv.tv_usec * NSEC_PER_USEC)) | ||
1641 | return -EINVAL; | 1689 | return -EINVAL; |
1642 | |||
1643 | /* Cast to u64 to make GCC stop complaining */ | ||
1644 | if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS) | ||
1645 | timeout = -1; /* infinite */ | ||
1646 | else { | ||
1647 | timeout = DIV_ROUND_UP(tv.tv_usec, 1000000/HZ); | ||
1648 | timeout += tv.tv_sec * HZ; | ||
1649 | } | ||
1650 | } | 1690 | } |
1651 | 1691 | ||
1652 | ret = compat_core_sys_select(n, inp, outp, exp, &timeout); | 1692 | ret = compat_core_sys_select(n, inp, outp, exp, to); |
1653 | 1693 | ret = poll_select_copy_remaining(&end_time, tvp, 1, ret); | |
1654 | if (tvp) { | ||
1655 | struct compat_timeval rtv; | ||
1656 | |||
1657 | if (current->personality & STICKY_TIMEOUTS) | ||
1658 | goto sticky; | ||
1659 | rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); | ||
1660 | rtv.tv_sec = timeout; | ||
1661 | if (compat_timeval_compare(&rtv, &tv) >= 0) | ||
1662 | rtv = tv; | ||
1663 | if (copy_to_user(tvp, &rtv, sizeof(rtv))) { | ||
1664 | sticky: | ||
1665 | /* | ||
1666 | * If an application puts its timeval in read-only | ||
1667 | * memory, we don't want the Linux-specific update to | ||
1668 | * the timeval to cause a fault after the select has | ||
1669 | * completed successfully. However, because we're not | ||
1670 | * updating the timeval, we can't restart the system | ||
1671 | * call. | ||
1672 | */ | ||
1673 | if (ret == -ERESTARTNOHAND) | ||
1674 | ret = -EINTR; | ||
1675 | } | ||
1676 | } | ||
1677 | 1694 | ||
1678 | return ret; | 1695 | return ret; |
1679 | } | 1696 | } |
@@ -1686,15 +1703,16 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, | |||
1686 | { | 1703 | { |
1687 | compat_sigset_t ss32; | 1704 | compat_sigset_t ss32; |
1688 | sigset_t ksigmask, sigsaved; | 1705 | sigset_t ksigmask, sigsaved; |
1689 | s64 timeout = MAX_SCHEDULE_TIMEOUT; | ||
1690 | struct compat_timespec ts; | 1706 | struct compat_timespec ts; |
1707 | struct timespec end_time, *to = NULL; | ||
1691 | int ret; | 1708 | int ret; |
1692 | 1709 | ||
1693 | if (tsp) { | 1710 | if (tsp) { |
1694 | if (copy_from_user(&ts, tsp, sizeof(ts))) | 1711 | if (copy_from_user(&ts, tsp, sizeof(ts))) |
1695 | return -EFAULT; | 1712 | return -EFAULT; |
1696 | 1713 | ||
1697 | if (ts.tv_sec < 0 || ts.tv_nsec < 0) | 1714 | to = &end_time; |
1715 | if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) | ||
1698 | return -EINVAL; | 1716 | return -EINVAL; |
1699 | } | 1717 | } |
1700 | 1718 | ||
@@ -1709,51 +1727,8 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, | |||
1709 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); | 1727 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); |
1710 | } | 1728 | } |
1711 | 1729 | ||
1712 | do { | 1730 | ret = compat_core_sys_select(n, inp, outp, exp, to); |
1713 | if (tsp) { | 1731 | ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); |
1714 | if ((unsigned long)ts.tv_sec < MAX_SELECT_SECONDS) { | ||
1715 | timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ); | ||
1716 | timeout += ts.tv_sec * (unsigned long)HZ; | ||
1717 | ts.tv_sec = 0; | ||
1718 | ts.tv_nsec = 0; | ||
1719 | } else { | ||
1720 | ts.tv_sec -= MAX_SELECT_SECONDS; | ||
1721 | timeout = MAX_SELECT_SECONDS * HZ; | ||
1722 | } | ||
1723 | } | ||
1724 | |||
1725 | ret = compat_core_sys_select(n, inp, outp, exp, &timeout); | ||
1726 | |||
1727 | } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec)); | ||
1728 | |||
1729 | if (tsp) { | ||
1730 | struct compat_timespec rts; | ||
1731 | |||
1732 | if (current->personality & STICKY_TIMEOUTS) | ||
1733 | goto sticky; | ||
1734 | |||
1735 | rts.tv_sec = timeout / HZ; | ||
1736 | rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ); | ||
1737 | if (rts.tv_nsec >= NSEC_PER_SEC) { | ||
1738 | rts.tv_sec++; | ||
1739 | rts.tv_nsec -= NSEC_PER_SEC; | ||
1740 | } | ||
1741 | if (compat_timespec_compare(&rts, &ts) >= 0) | ||
1742 | rts = ts; | ||
1743 | if (copy_to_user(tsp, &rts, sizeof(rts))) { | ||
1744 | sticky: | ||
1745 | /* | ||
1746 | * If an application puts its timeval in read-only | ||
1747 | * memory, we don't want the Linux-specific update to | ||
1748 | * the timeval to cause a fault after the select has | ||
1749 | * completed successfully. However, because we're not | ||
1750 | * updating the timeval, we can't restart the system | ||
1751 | * call. | ||
1752 | */ | ||
1753 | if (ret == -ERESTARTNOHAND) | ||
1754 | ret = -EINTR; | ||
1755 | } | ||
1756 | } | ||
1757 | 1732 | ||
1758 | if (ret == -ERESTARTNOHAND) { | 1733 | if (ret == -ERESTARTNOHAND) { |
1759 | /* | 1734 | /* |
@@ -1798,18 +1773,16 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, | |||
1798 | compat_sigset_t ss32; | 1773 | compat_sigset_t ss32; |
1799 | sigset_t ksigmask, sigsaved; | 1774 | sigset_t ksigmask, sigsaved; |
1800 | struct compat_timespec ts; | 1775 | struct compat_timespec ts; |
1801 | s64 timeout = -1; | 1776 | struct timespec end_time, *to = NULL; |
1802 | int ret; | 1777 | int ret; |
1803 | 1778 | ||
1804 | if (tsp) { | 1779 | if (tsp) { |
1805 | if (copy_from_user(&ts, tsp, sizeof(ts))) | 1780 | if (copy_from_user(&ts, tsp, sizeof(ts))) |
1806 | return -EFAULT; | 1781 | return -EFAULT; |
1807 | 1782 | ||
1808 | /* We assume that ts.tv_sec is always lower than | 1783 | to = &end_time; |
1809 | the number of seconds that can be expressed in | 1784 | if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) |
1810 | an s64. Otherwise the compiler bitches at us */ | 1785 | return -EINVAL; |
1811 | timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ); | ||
1812 | timeout += ts.tv_sec * HZ; | ||
1813 | } | 1786 | } |
1814 | 1787 | ||
1815 | if (sigmask) { | 1788 | if (sigmask) { |
@@ -1823,7 +1796,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, | |||
1823 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); | 1796 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); |
1824 | } | 1797 | } |
1825 | 1798 | ||
1826 | ret = do_sys_poll(ufds, nfds, &timeout); | 1799 | ret = do_sys_poll(ufds, nfds, to); |
1827 | 1800 | ||
1828 | /* We can restart this syscall, usually */ | 1801 | /* We can restart this syscall, usually */ |
1829 | if (ret == -EINTR) { | 1802 | if (ret == -EINTR) { |
@@ -1841,31 +1814,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, | |||
1841 | } else if (sigmask) | 1814 | } else if (sigmask) |
1842 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 1815 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
1843 | 1816 | ||
1844 | if (tsp && timeout >= 0) { | 1817 | ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); |
1845 | struct compat_timespec rts; | ||
1846 | |||
1847 | if (current->personality & STICKY_TIMEOUTS) | ||
1848 | goto sticky; | ||
1849 | /* Yes, we know it's actually an s64, but it's also positive. */ | ||
1850 | rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * | ||
1851 | 1000; | ||
1852 | rts.tv_sec = timeout; | ||
1853 | if (compat_timespec_compare(&rts, &ts) >= 0) | ||
1854 | rts = ts; | ||
1855 | if (copy_to_user(tsp, &rts, sizeof(rts))) { | ||
1856 | sticky: | ||
1857 | /* | ||
1858 | * If an application puts its timeval in read-only | ||
1859 | * memory, we don't want the Linux-specific update to | ||
1860 | * the timeval to cause a fault after the select has | ||
1861 | * completed successfully. However, because we're not | ||
1862 | * updating the timeval, we can't restart the system | ||
1863 | * call. | ||
1864 | */ | ||
1865 | if (ret == -ERESTARTNOHAND && timeout >= 0) | ||
1866 | ret = -EINTR; | ||
1867 | } | ||
1868 | } | ||
1869 | 1818 | ||
1870 | return ret; | 1819 | return ret; |
1871 | } | 1820 | } |
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index bf74973b0492..932a92b31483 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c | |||
@@ -108,18 +108,18 @@ out: | |||
108 | } | 108 | } |
109 | 109 | ||
110 | 110 | ||
111 | static int get_target(const char *symname, struct nameidata *nd, | 111 | static int get_target(const char *symname, struct path *path, |
112 | struct config_item **target) | 112 | struct config_item **target) |
113 | { | 113 | { |
114 | int ret; | 114 | int ret; |
115 | 115 | ||
116 | ret = path_lookup(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, nd); | 116 | ret = kern_path(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, path); |
117 | if (!ret) { | 117 | if (!ret) { |
118 | if (nd->path.dentry->d_sb == configfs_sb) { | 118 | if (path->dentry->d_sb == configfs_sb) { |
119 | *target = configfs_get_config_item(nd->path.dentry); | 119 | *target = configfs_get_config_item(path->dentry); |
120 | if (!*target) { | 120 | if (!*target) { |
121 | ret = -ENOENT; | 121 | ret = -ENOENT; |
122 | path_put(&nd->path); | 122 | path_put(path); |
123 | } | 123 | } |
124 | } else | 124 | } else |
125 | ret = -EPERM; | 125 | ret = -EPERM; |
@@ -132,7 +132,7 @@ static int get_target(const char *symname, struct nameidata *nd, | |||
132 | int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) | 132 | int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) |
133 | { | 133 | { |
134 | int ret; | 134 | int ret; |
135 | struct nameidata nd; | 135 | struct path path; |
136 | struct configfs_dirent *sd; | 136 | struct configfs_dirent *sd; |
137 | struct config_item *parent_item; | 137 | struct config_item *parent_item; |
138 | struct config_item *target_item; | 138 | struct config_item *target_item; |
@@ -159,7 +159,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna | |||
159 | !type->ct_item_ops->allow_link) | 159 | !type->ct_item_ops->allow_link) |
160 | goto out_put; | 160 | goto out_put; |
161 | 161 | ||
162 | ret = get_target(symname, &nd, &target_item); | 162 | ret = get_target(symname, &path, &target_item); |
163 | if (ret) | 163 | if (ret) |
164 | goto out_put; | 164 | goto out_put; |
165 | 165 | ||
@@ -174,7 +174,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna | |||
174 | } | 174 | } |
175 | 175 | ||
176 | config_item_put(target_item); | 176 | config_item_put(target_item); |
177 | path_put(&nd.path); | 177 | path_put(&path); |
178 | 178 | ||
179 | out_put: | 179 | out_put: |
180 | config_item_put(parent_item); | 180 | config_item_put(parent_item); |
diff --git a/fs/dcache.c b/fs/dcache.c index e7a1a99b7464..a1d86c7f3e66 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -69,6 +69,7 @@ struct dentry_stat_t dentry_stat = { | |||
69 | 69 | ||
70 | static void __d_free(struct dentry *dentry) | 70 | static void __d_free(struct dentry *dentry) |
71 | { | 71 | { |
72 | WARN_ON(!list_empty(&dentry->d_alias)); | ||
72 | if (dname_external(dentry)) | 73 | if (dname_external(dentry)) |
73 | kfree(dentry->d_name.name); | 74 | kfree(dentry->d_name.name); |
74 | kmem_cache_free(dentry_cache, dentry); | 75 | kmem_cache_free(dentry_cache, dentry); |
@@ -174,9 +175,12 @@ static struct dentry *d_kill(struct dentry *dentry) | |||
174 | dentry_stat.nr_dentry--; /* For d_free, below */ | 175 | dentry_stat.nr_dentry--; /* For d_free, below */ |
175 | /*drops the locks, at that point nobody can reach this dentry */ | 176 | /*drops the locks, at that point nobody can reach this dentry */ |
176 | dentry_iput(dentry); | 177 | dentry_iput(dentry); |
177 | parent = dentry->d_parent; | 178 | if (IS_ROOT(dentry)) |
179 | parent = NULL; | ||
180 | else | ||
181 | parent = dentry->d_parent; | ||
178 | d_free(dentry); | 182 | d_free(dentry); |
179 | return dentry == parent ? NULL : parent; | 183 | return parent; |
180 | } | 184 | } |
181 | 185 | ||
182 | /* | 186 | /* |
@@ -666,11 +670,12 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) | |||
666 | BUG(); | 670 | BUG(); |
667 | } | 671 | } |
668 | 672 | ||
669 | parent = dentry->d_parent; | 673 | if (IS_ROOT(dentry)) |
670 | if (parent == dentry) | ||
671 | parent = NULL; | 674 | parent = NULL; |
672 | else | 675 | else { |
676 | parent = dentry->d_parent; | ||
673 | atomic_dec(&parent->d_count); | 677 | atomic_dec(&parent->d_count); |
678 | } | ||
674 | 679 | ||
675 | list_del(&dentry->d_u.d_child); | 680 | list_del(&dentry->d_u.d_child); |
676 | detached++; | 681 | detached++; |
@@ -977,6 +982,15 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name) | |||
977 | return d_alloc(parent, &q); | 982 | return d_alloc(parent, &q); |
978 | } | 983 | } |
979 | 984 | ||
985 | /* the caller must hold dcache_lock */ | ||
986 | static void __d_instantiate(struct dentry *dentry, struct inode *inode) | ||
987 | { | ||
988 | if (inode) | ||
989 | list_add(&dentry->d_alias, &inode->i_dentry); | ||
990 | dentry->d_inode = inode; | ||
991 | fsnotify_d_instantiate(dentry, inode); | ||
992 | } | ||
993 | |||
980 | /** | 994 | /** |
981 | * d_instantiate - fill in inode information for a dentry | 995 | * d_instantiate - fill in inode information for a dentry |
982 | * @entry: dentry to complete | 996 | * @entry: dentry to complete |
@@ -996,10 +1010,7 @@ void d_instantiate(struct dentry *entry, struct inode * inode) | |||
996 | { | 1010 | { |
997 | BUG_ON(!list_empty(&entry->d_alias)); | 1011 | BUG_ON(!list_empty(&entry->d_alias)); |
998 | spin_lock(&dcache_lock); | 1012 | spin_lock(&dcache_lock); |
999 | if (inode) | 1013 | __d_instantiate(entry, inode); |
1000 | list_add(&entry->d_alias, &inode->i_dentry); | ||
1001 | entry->d_inode = inode; | ||
1002 | fsnotify_d_instantiate(entry, inode); | ||
1003 | spin_unlock(&dcache_lock); | 1014 | spin_unlock(&dcache_lock); |
1004 | security_d_instantiate(entry, inode); | 1015 | security_d_instantiate(entry, inode); |
1005 | } | 1016 | } |
@@ -1029,7 +1040,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, | |||
1029 | unsigned int hash = entry->d_name.hash; | 1040 | unsigned int hash = entry->d_name.hash; |
1030 | 1041 | ||
1031 | if (!inode) { | 1042 | if (!inode) { |
1032 | entry->d_inode = NULL; | 1043 | __d_instantiate(entry, NULL); |
1033 | return NULL; | 1044 | return NULL; |
1034 | } | 1045 | } |
1035 | 1046 | ||
@@ -1048,9 +1059,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, | |||
1048 | return alias; | 1059 | return alias; |
1049 | } | 1060 | } |
1050 | 1061 | ||
1051 | list_add(&entry->d_alias, &inode->i_dentry); | 1062 | __d_instantiate(entry, inode); |
1052 | entry->d_inode = inode; | ||
1053 | fsnotify_d_instantiate(entry, inode); | ||
1054 | return NULL; | 1063 | return NULL; |
1055 | } | 1064 | } |
1056 | 1065 | ||
@@ -1111,69 +1120,71 @@ static inline struct hlist_head *d_hash(struct dentry *parent, | |||
1111 | } | 1120 | } |
1112 | 1121 | ||
1113 | /** | 1122 | /** |
1114 | * d_alloc_anon - allocate an anonymous dentry | 1123 | * d_obtain_alias - find or allocate a dentry for a given inode |
1115 | * @inode: inode to allocate the dentry for | 1124 | * @inode: inode to allocate the dentry for |
1116 | * | 1125 | * |
1117 | * This is similar to d_alloc_root. It is used by filesystems when | 1126 | * Obtain a dentry for an inode resulting from NFS filehandle conversion or |
1118 | * creating a dentry for a given inode, often in the process of | 1127 | * similar open by handle operations. The returned dentry may be anonymous, |
1119 | * mapping a filehandle to a dentry. The returned dentry may be | 1128 | * or may have a full name (if the inode was already in the cache). |
1120 | * anonymous, or may have a full name (if the inode was already | ||
1121 | * in the cache). The file system may need to make further | ||
1122 | * efforts to connect this dentry into the dcache properly. | ||
1123 | * | 1129 | * |
1124 | * When called on a directory inode, we must ensure that | 1130 | * When called on a directory inode, we must ensure that the inode only ever |
1125 | * the inode only ever has one dentry. If a dentry is | 1131 | * has one dentry. If a dentry is found, that is returned instead of |
1126 | * found, that is returned instead of allocating a new one. | 1132 | * allocating a new one. |
1127 | * | 1133 | * |
1128 | * On successful return, the reference to the inode has been transferred | 1134 | * On successful return, the reference to the inode has been transferred |
1129 | * to the dentry. If %NULL is returned (indicating kmalloc failure), | 1135 | * to the dentry. In case of an error the reference on the inode is released. |
1130 | * the reference on the inode has not been released. | 1136 | * To make it easier to use in export operations a %NULL or IS_ERR inode may |
1137 | * be passed in and will be the error will be propagate to the return value, | ||
1138 | * with a %NULL @inode replaced by ERR_PTR(-ESTALE). | ||
1131 | */ | 1139 | */ |
1132 | 1140 | struct dentry *d_obtain_alias(struct inode *inode) | |
1133 | struct dentry * d_alloc_anon(struct inode *inode) | ||
1134 | { | 1141 | { |
1135 | static const struct qstr anonstring = { .name = "" }; | 1142 | static const struct qstr anonstring = { .name = "" }; |
1136 | struct dentry *tmp; | 1143 | struct dentry *tmp; |
1137 | struct dentry *res; | 1144 | struct dentry *res; |
1138 | 1145 | ||
1139 | if ((res = d_find_alias(inode))) { | 1146 | if (!inode) |
1140 | iput(inode); | 1147 | return ERR_PTR(-ESTALE); |
1141 | return res; | 1148 | if (IS_ERR(inode)) |
1142 | } | 1149 | return ERR_CAST(inode); |
1143 | 1150 | ||
1144 | tmp = d_alloc(NULL, &anonstring); | 1151 | res = d_find_alias(inode); |
1145 | if (!tmp) | 1152 | if (res) |
1146 | return NULL; | 1153 | goto out_iput; |
1147 | 1154 | ||
1155 | tmp = d_alloc(NULL, &anonstring); | ||
1156 | if (!tmp) { | ||
1157 | res = ERR_PTR(-ENOMEM); | ||
1158 | goto out_iput; | ||
1159 | } | ||
1148 | tmp->d_parent = tmp; /* make sure dput doesn't croak */ | 1160 | tmp->d_parent = tmp; /* make sure dput doesn't croak */ |
1149 | 1161 | ||
1150 | spin_lock(&dcache_lock); | 1162 | spin_lock(&dcache_lock); |
1151 | res = __d_find_alias(inode, 0); | 1163 | res = __d_find_alias(inode, 0); |
1152 | if (!res) { | 1164 | if (res) { |
1153 | /* attach a disconnected dentry */ | 1165 | spin_unlock(&dcache_lock); |
1154 | res = tmp; | 1166 | dput(tmp); |
1155 | tmp = NULL; | 1167 | goto out_iput; |
1156 | spin_lock(&res->d_lock); | ||
1157 | res->d_sb = inode->i_sb; | ||
1158 | res->d_parent = res; | ||
1159 | res->d_inode = inode; | ||
1160 | res->d_flags |= DCACHE_DISCONNECTED; | ||
1161 | res->d_flags &= ~DCACHE_UNHASHED; | ||
1162 | list_add(&res->d_alias, &inode->i_dentry); | ||
1163 | hlist_add_head(&res->d_hash, &inode->i_sb->s_anon); | ||
1164 | spin_unlock(&res->d_lock); | ||
1165 | |||
1166 | inode = NULL; /* don't drop reference */ | ||
1167 | } | 1168 | } |
1169 | |||
1170 | /* attach a disconnected dentry */ | ||
1171 | spin_lock(&tmp->d_lock); | ||
1172 | tmp->d_sb = inode->i_sb; | ||
1173 | tmp->d_inode = inode; | ||
1174 | tmp->d_flags |= DCACHE_DISCONNECTED; | ||
1175 | tmp->d_flags &= ~DCACHE_UNHASHED; | ||
1176 | list_add(&tmp->d_alias, &inode->i_dentry); | ||
1177 | hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon); | ||
1178 | spin_unlock(&tmp->d_lock); | ||
1179 | |||
1168 | spin_unlock(&dcache_lock); | 1180 | spin_unlock(&dcache_lock); |
1181 | return tmp; | ||
1169 | 1182 | ||
1170 | if (inode) | 1183 | out_iput: |
1171 | iput(inode); | 1184 | iput(inode); |
1172 | if (tmp) | ||
1173 | dput(tmp); | ||
1174 | return res; | 1185 | return res; |
1175 | } | 1186 | } |
1176 | 1187 | EXPORT_SYMBOL_GPL(d_obtain_alias); | |
1177 | 1188 | ||
1178 | /** | 1189 | /** |
1179 | * d_splice_alias - splice a disconnected dentry into the tree if one exists | 1190 | * d_splice_alias - splice a disconnected dentry into the tree if one exists |
@@ -1200,17 +1211,14 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) | |||
1200 | new = __d_find_alias(inode, 1); | 1211 | new = __d_find_alias(inode, 1); |
1201 | if (new) { | 1212 | if (new) { |
1202 | BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); | 1213 | BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); |
1203 | fsnotify_d_instantiate(new, inode); | ||
1204 | spin_unlock(&dcache_lock); | 1214 | spin_unlock(&dcache_lock); |
1205 | security_d_instantiate(new, inode); | 1215 | security_d_instantiate(new, inode); |
1206 | d_rehash(dentry); | 1216 | d_rehash(dentry); |
1207 | d_move(new, dentry); | 1217 | d_move(new, dentry); |
1208 | iput(inode); | 1218 | iput(inode); |
1209 | } else { | 1219 | } else { |
1210 | /* d_instantiate takes dcache_lock, so we do it by hand */ | 1220 | /* already taking dcache_lock, so d_add() by hand */ |
1211 | list_add(&dentry->d_alias, &inode->i_dentry); | 1221 | __d_instantiate(dentry, inode); |
1212 | dentry->d_inode = inode; | ||
1213 | fsnotify_d_instantiate(dentry, inode); | ||
1214 | spin_unlock(&dcache_lock); | 1222 | spin_unlock(&dcache_lock); |
1215 | security_d_instantiate(dentry, inode); | 1223 | security_d_instantiate(dentry, inode); |
1216 | d_rehash(dentry); | 1224 | d_rehash(dentry); |
@@ -1293,8 +1301,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, | |||
1293 | * d_instantiate() by hand because it takes dcache_lock which | 1301 | * d_instantiate() by hand because it takes dcache_lock which |
1294 | * we already hold. | 1302 | * we already hold. |
1295 | */ | 1303 | */ |
1296 | list_add(&found->d_alias, &inode->i_dentry); | 1304 | __d_instantiate(found, inode); |
1297 | found->d_inode = inode; | ||
1298 | spin_unlock(&dcache_lock); | 1305 | spin_unlock(&dcache_lock); |
1299 | security_d_instantiate(found, inode); | 1306 | security_d_instantiate(found, inode); |
1300 | return found; | 1307 | return found; |
@@ -1456,8 +1463,6 @@ out: | |||
1456 | * d_validate - verify dentry provided from insecure source | 1463 | * d_validate - verify dentry provided from insecure source |
1457 | * @dentry: The dentry alleged to be valid child of @dparent | 1464 | * @dentry: The dentry alleged to be valid child of @dparent |
1458 | * @dparent: The parent dentry (known to be valid) | 1465 | * @dparent: The parent dentry (known to be valid) |
1459 | * @hash: Hash of the dentry | ||
1460 | * @len: Length of the name | ||
1461 | * | 1466 | * |
1462 | * An insecure source has sent us a dentry, here we verify it and dget() it. | 1467 | * An insecure source has sent us a dentry, here we verify it and dget() it. |
1463 | * This is used by ncpfs in its readdir implementation. | 1468 | * This is used by ncpfs in its readdir implementation. |
@@ -1714,18 +1719,23 @@ void d_move(struct dentry * dentry, struct dentry * target) | |||
1714 | spin_unlock(&dcache_lock); | 1719 | spin_unlock(&dcache_lock); |
1715 | } | 1720 | } |
1716 | 1721 | ||
1717 | /* | 1722 | /** |
1718 | * Helper that returns 1 if p1 is a parent of p2, else 0 | 1723 | * d_ancestor - search for an ancestor |
1724 | * @p1: ancestor dentry | ||
1725 | * @p2: child dentry | ||
1726 | * | ||
1727 | * Returns the ancestor dentry of p2 which is a child of p1, if p1 is | ||
1728 | * an ancestor of p2, else NULL. | ||
1719 | */ | 1729 | */ |
1720 | static int d_isparent(struct dentry *p1, struct dentry *p2) | 1730 | struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) |
1721 | { | 1731 | { |
1722 | struct dentry *p; | 1732 | struct dentry *p; |
1723 | 1733 | ||
1724 | for (p = p2; p->d_parent != p; p = p->d_parent) { | 1734 | for (p = p2; !IS_ROOT(p); p = p->d_parent) { |
1725 | if (p->d_parent == p1) | 1735 | if (p->d_parent == p1) |
1726 | return 1; | 1736 | return p; |
1727 | } | 1737 | } |
1728 | return 0; | 1738 | return NULL; |
1729 | } | 1739 | } |
1730 | 1740 | ||
1731 | /* | 1741 | /* |
@@ -1749,7 +1759,7 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) | |||
1749 | 1759 | ||
1750 | /* Check for loops */ | 1760 | /* Check for loops */ |
1751 | ret = ERR_PTR(-ELOOP); | 1761 | ret = ERR_PTR(-ELOOP); |
1752 | if (d_isparent(alias, dentry)) | 1762 | if (d_ancestor(alias, dentry)) |
1753 | goto out_err; | 1763 | goto out_err; |
1754 | 1764 | ||
1755 | /* See lock_rename() */ | 1765 | /* See lock_rename() */ |
@@ -1822,7 +1832,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) | |||
1822 | 1832 | ||
1823 | if (!inode) { | 1833 | if (!inode) { |
1824 | actual = dentry; | 1834 | actual = dentry; |
1825 | dentry->d_inode = NULL; | 1835 | __d_instantiate(dentry, NULL); |
1826 | goto found_lock; | 1836 | goto found_lock; |
1827 | } | 1837 | } |
1828 | 1838 | ||
@@ -2149,32 +2159,27 @@ out: | |||
2149 | * Caller must ensure that "new_dentry" is pinned before calling is_subdir() | 2159 | * Caller must ensure that "new_dentry" is pinned before calling is_subdir() |
2150 | */ | 2160 | */ |
2151 | 2161 | ||
2152 | int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry) | 2162 | int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) |
2153 | { | 2163 | { |
2154 | int result; | 2164 | int result; |
2155 | struct dentry * saved = new_dentry; | ||
2156 | unsigned long seq; | 2165 | unsigned long seq; |
2157 | 2166 | ||
2158 | /* need rcu_readlock to protect against the d_parent trashing due to | 2167 | /* FIXME: This is old behavior, needed? Please check callers. */ |
2159 | * d_move | 2168 | if (new_dentry == old_dentry) |
2169 | return 1; | ||
2170 | |||
2171 | /* | ||
2172 | * Need rcu_readlock to protect against the d_parent trashing | ||
2173 | * due to d_move | ||
2160 | */ | 2174 | */ |
2161 | rcu_read_lock(); | 2175 | rcu_read_lock(); |
2162 | do { | 2176 | do { |
2163 | /* for restarting inner loop in case of seq retry */ | 2177 | /* for restarting inner loop in case of seq retry */ |
2164 | new_dentry = saved; | ||
2165 | result = 0; | ||
2166 | seq = read_seqbegin(&rename_lock); | 2178 | seq = read_seqbegin(&rename_lock); |
2167 | for (;;) { | 2179 | if (d_ancestor(old_dentry, new_dentry)) |
2168 | if (new_dentry != old_dentry) { | ||
2169 | struct dentry * parent = new_dentry->d_parent; | ||
2170 | if (parent == new_dentry) | ||
2171 | break; | ||
2172 | new_dentry = parent; | ||
2173 | continue; | ||
2174 | } | ||
2175 | result = 1; | 2180 | result = 1; |
2176 | break; | 2181 | else |
2177 | } | 2182 | result = 0; |
2178 | } while (read_seqretry(&rename_lock, seq)); | 2183 | } while (read_seqretry(&rename_lock, seq)); |
2179 | rcu_read_unlock(); | 2184 | rcu_read_unlock(); |
2180 | 2185 | ||
@@ -2344,7 +2349,6 @@ void __init vfs_caches_init(unsigned long mempages) | |||
2344 | } | 2349 | } |
2345 | 2350 | ||
2346 | EXPORT_SYMBOL(d_alloc); | 2351 | EXPORT_SYMBOL(d_alloc); |
2347 | EXPORT_SYMBOL(d_alloc_anon); | ||
2348 | EXPORT_SYMBOL(d_alloc_root); | 2352 | EXPORT_SYMBOL(d_alloc_root); |
2349 | EXPORT_SYMBOL(d_delete); | 2353 | EXPORT_SYMBOL(d_delete); |
2350 | EXPORT_SYMBOL(d_find_alias); | 2354 | EXPORT_SYMBOL(d_find_alias); |
diff --git a/fs/dquot.c b/fs/dquot.c index da30a27f2242..5e95261005b2 100644 --- a/fs/dquot.c +++ b/fs/dquot.c | |||
@@ -1805,19 +1805,19 @@ int vfs_quota_on_path(struct super_block *sb, int type, int format_id, | |||
1805 | } | 1805 | } |
1806 | 1806 | ||
1807 | /* Actual function called from quotactl() */ | 1807 | /* Actual function called from quotactl() */ |
1808 | int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path, | 1808 | int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name, |
1809 | int remount) | 1809 | int remount) |
1810 | { | 1810 | { |
1811 | struct nameidata nd; | 1811 | struct path path; |
1812 | int error; | 1812 | int error; |
1813 | 1813 | ||
1814 | if (remount) | 1814 | if (remount) |
1815 | return vfs_quota_on_remount(sb, type); | 1815 | return vfs_quota_on_remount(sb, type); |
1816 | 1816 | ||
1817 | error = path_lookup(path, LOOKUP_FOLLOW, &nd); | 1817 | error = kern_path(name, LOOKUP_FOLLOW, &path); |
1818 | if (!error) { | 1818 | if (!error) { |
1819 | error = vfs_quota_on_path(sb, type, format_id, &nd.path); | 1819 | error = vfs_quota_on_path(sb, type, format_id, &path); |
1820 | path_put(&nd.path); | 1820 | path_put(&path); |
1821 | } | 1821 | } |
1822 | return error; | 1822 | return error; |
1823 | } | 1823 | } |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 046e027a4cb1..64d2ba980df4 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -471,31 +471,26 @@ out: | |||
471 | */ | 471 | */ |
472 | static int ecryptfs_read_super(struct super_block *sb, const char *dev_name) | 472 | static int ecryptfs_read_super(struct super_block *sb, const char *dev_name) |
473 | { | 473 | { |
474 | struct path path; | ||
474 | int rc; | 475 | int rc; |
475 | struct nameidata nd; | ||
476 | struct dentry *lower_root; | ||
477 | struct vfsmount *lower_mnt; | ||
478 | 476 | ||
479 | memset(&nd, 0, sizeof(struct nameidata)); | 477 | rc = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path); |
480 | rc = path_lookup(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &nd); | ||
481 | if (rc) { | 478 | if (rc) { |
482 | ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n"); | 479 | ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n"); |
483 | goto out; | 480 | goto out; |
484 | } | 481 | } |
485 | lower_root = nd.path.dentry; | 482 | ecryptfs_set_superblock_lower(sb, path.dentry->d_sb); |
486 | lower_mnt = nd.path.mnt; | 483 | sb->s_maxbytes = path.dentry->d_sb->s_maxbytes; |
487 | ecryptfs_set_superblock_lower(sb, lower_root->d_sb); | 484 | sb->s_blocksize = path.dentry->d_sb->s_blocksize; |
488 | sb->s_maxbytes = lower_root->d_sb->s_maxbytes; | 485 | ecryptfs_set_dentry_lower(sb->s_root, path.dentry); |
489 | sb->s_blocksize = lower_root->d_sb->s_blocksize; | 486 | ecryptfs_set_dentry_lower_mnt(sb->s_root, path.mnt); |
490 | ecryptfs_set_dentry_lower(sb->s_root, lower_root); | 487 | rc = ecryptfs_interpose(path.dentry, sb->s_root, sb, 0); |
491 | ecryptfs_set_dentry_lower_mnt(sb->s_root, lower_mnt); | ||
492 | rc = ecryptfs_interpose(lower_root, sb->s_root, sb, 0); | ||
493 | if (rc) | 488 | if (rc) |
494 | goto out_free; | 489 | goto out_free; |
495 | rc = 0; | 490 | rc = 0; |
496 | goto out; | 491 | goto out; |
497 | out_free: | 492 | out_free: |
498 | path_put(&nd.path); | 493 | path_put(&path); |
499 | out: | 494 | out: |
500 | return rc; | 495 | return rc; |
501 | } | 496 | } |
diff --git a/fs/efs/namei.c b/fs/efs/namei.c index 291abb11e20e..c3fb5f9c4a44 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c | |||
@@ -112,35 +112,14 @@ struct dentry *efs_fh_to_parent(struct super_block *sb, struct fid *fid, | |||
112 | 112 | ||
113 | struct dentry *efs_get_parent(struct dentry *child) | 113 | struct dentry *efs_get_parent(struct dentry *child) |
114 | { | 114 | { |
115 | struct dentry *parent; | 115 | struct dentry *parent = ERR_PTR(-ENOENT); |
116 | struct inode *inode; | ||
117 | efs_ino_t ino; | 116 | efs_ino_t ino; |
118 | long error; | ||
119 | 117 | ||
120 | lock_kernel(); | 118 | lock_kernel(); |
121 | |||
122 | error = -ENOENT; | ||
123 | ino = efs_find_entry(child->d_inode, "..", 2); | 119 | ino = efs_find_entry(child->d_inode, "..", 2); |
124 | if (!ino) | 120 | if (ino) |
125 | goto fail; | 121 | parent = d_obtain_alias(efs_iget(child->d_inode->i_sb, ino)); |
126 | |||
127 | inode = efs_iget(child->d_inode->i_sb, ino); | ||
128 | if (IS_ERR(inode)) { | ||
129 | error = PTR_ERR(inode); | ||
130 | goto fail; | ||
131 | } | ||
132 | |||
133 | error = -ENOMEM; | ||
134 | parent = d_alloc_anon(inode); | ||
135 | if (!parent) | ||
136 | goto fail_iput; | ||
137 | |||
138 | unlock_kernel(); | 122 | unlock_kernel(); |
139 | return parent; | ||
140 | 123 | ||
141 | fail_iput: | 124 | return parent; |
142 | iput(inode); | ||
143 | fail: | ||
144 | unlock_kernel(); | ||
145 | return ERR_PTR(error); | ||
146 | } | 125 | } |
@@ -1386,7 +1386,7 @@ EXPORT_SYMBOL(set_binfmt); | |||
1386 | * name into corename, which must have space for at least | 1386 | * name into corename, which must have space for at least |
1387 | * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. | 1387 | * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. |
1388 | */ | 1388 | */ |
1389 | static int format_corename(char *corename, int nr_threads, long signr) | 1389 | static int format_corename(char *corename, long signr) |
1390 | { | 1390 | { |
1391 | const char *pat_ptr = core_pattern; | 1391 | const char *pat_ptr = core_pattern; |
1392 | int ispipe = (*pat_ptr == '|'); | 1392 | int ispipe = (*pat_ptr == '|'); |
@@ -1493,8 +1493,7 @@ static int format_corename(char *corename, int nr_threads, long signr) | |||
1493 | * If core_pattern does not include a %p (as is the default) | 1493 | * If core_pattern does not include a %p (as is the default) |
1494 | * and core_uses_pid is set, then .%pid will be appended to | 1494 | * and core_uses_pid is set, then .%pid will be appended to |
1495 | * the filename. Do not do this for piped commands. */ | 1495 | * the filename. Do not do this for piped commands. */ |
1496 | if (!ispipe && !pid_in_pattern | 1496 | if (!ispipe && !pid_in_pattern && core_uses_pid) { |
1497 | && (core_uses_pid || nr_threads)) { | ||
1498 | rc = snprintf(out_ptr, out_end - out_ptr, | 1497 | rc = snprintf(out_ptr, out_end - out_ptr, |
1499 | ".%d", task_tgid_vnr(current)); | 1498 | ".%d", task_tgid_vnr(current)); |
1500 | if (rc > out_end - out_ptr) | 1499 | if (rc > out_end - out_ptr) |
@@ -1757,7 +1756,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) | |||
1757 | * uses lock_kernel() | 1756 | * uses lock_kernel() |
1758 | */ | 1757 | */ |
1759 | lock_kernel(); | 1758 | lock_kernel(); |
1760 | ispipe = format_corename(corename, retval, signr); | 1759 | ispipe = format_corename(corename, signr); |
1761 | unlock_kernel(); | 1760 | unlock_kernel(); |
1762 | /* | 1761 | /* |
1763 | * Don't bother to check the RLIMIT_CORE value if core_pattern points | 1762 | * Don't bother to check the RLIMIT_CORE value if core_pattern points |
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index cc91227d3bb8..80246bad1b7f 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c | |||
@@ -94,9 +94,8 @@ find_disconnected_root(struct dentry *dentry) | |||
94 | * It may already be, as the flag isn't always updated when connection happens. | 94 | * It may already be, as the flag isn't always updated when connection happens. |
95 | */ | 95 | */ |
96 | static int | 96 | static int |
97 | reconnect_path(struct vfsmount *mnt, struct dentry *target_dir) | 97 | reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf) |
98 | { | 98 | { |
99 | char nbuf[NAME_MAX+1]; | ||
100 | int noprogress = 0; | 99 | int noprogress = 0; |
101 | int err = -ESTALE; | 100 | int err = -ESTALE; |
102 | 101 | ||
@@ -281,13 +280,14 @@ static int get_name(struct vfsmount *mnt, struct dentry *dentry, | |||
281 | int old_seq = buffer.sequence; | 280 | int old_seq = buffer.sequence; |
282 | 281 | ||
283 | error = vfs_readdir(file, filldir_one, &buffer); | 282 | error = vfs_readdir(file, filldir_one, &buffer); |
283 | if (buffer.found) { | ||
284 | error = 0; | ||
285 | break; | ||
286 | } | ||
284 | 287 | ||
285 | if (error < 0) | 288 | if (error < 0) |
286 | break; | 289 | break; |
287 | 290 | ||
288 | error = 0; | ||
289 | if (buffer.found) | ||
290 | break; | ||
291 | error = -ENOENT; | 291 | error = -ENOENT; |
292 | if (old_seq == buffer.sequence) | 292 | if (old_seq == buffer.sequence) |
293 | break; | 293 | break; |
@@ -360,14 +360,13 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, | |||
360 | { | 360 | { |
361 | const struct export_operations *nop = mnt->mnt_sb->s_export_op; | 361 | const struct export_operations *nop = mnt->mnt_sb->s_export_op; |
362 | struct dentry *result, *alias; | 362 | struct dentry *result, *alias; |
363 | char nbuf[NAME_MAX+1]; | ||
363 | int err; | 364 | int err; |
364 | 365 | ||
365 | /* | 366 | /* |
366 | * Try to get any dentry for the given file handle from the filesystem. | 367 | * Try to get any dentry for the given file handle from the filesystem. |
367 | */ | 368 | */ |
368 | result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); | 369 | result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); |
369 | if (!result) | ||
370 | result = ERR_PTR(-ESTALE); | ||
371 | if (IS_ERR(result)) | 370 | if (IS_ERR(result)) |
372 | return result; | 371 | return result; |
373 | 372 | ||
@@ -381,7 +380,7 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, | |||
381 | * filesystem root. | 380 | * filesystem root. |
382 | */ | 381 | */ |
383 | if (result->d_flags & DCACHE_DISCONNECTED) { | 382 | if (result->d_flags & DCACHE_DISCONNECTED) { |
384 | err = reconnect_path(mnt, result); | 383 | err = reconnect_path(mnt, result, nbuf); |
385 | if (err) | 384 | if (err) |
386 | goto err_result; | 385 | goto err_result; |
387 | } | 386 | } |
@@ -397,7 +396,6 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, | |||
397 | * It's not a directory. Life is a little more complicated. | 396 | * It's not a directory. Life is a little more complicated. |
398 | */ | 397 | */ |
399 | struct dentry *target_dir, *nresult; | 398 | struct dentry *target_dir, *nresult; |
400 | char nbuf[NAME_MAX+1]; | ||
401 | 399 | ||
402 | /* | 400 | /* |
403 | * See if either the dentry we just got from the filesystem | 401 | * See if either the dentry we just got from the filesystem |
@@ -422,8 +420,6 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, | |||
422 | 420 | ||
423 | target_dir = nop->fh_to_parent(mnt->mnt_sb, fid, | 421 | target_dir = nop->fh_to_parent(mnt->mnt_sb, fid, |
424 | fh_len, fileid_type); | 422 | fh_len, fileid_type); |
425 | if (!target_dir) | ||
426 | goto err_result; | ||
427 | err = PTR_ERR(target_dir); | 423 | err = PTR_ERR(target_dir); |
428 | if (IS_ERR(target_dir)) | 424 | if (IS_ERR(target_dir)) |
429 | goto err_result; | 425 | goto err_result; |
@@ -433,7 +429,7 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, | |||
433 | * connected to the filesystem root. The VFS really doesn't | 429 | * connected to the filesystem root. The VFS really doesn't |
434 | * like disconnected directories.. | 430 | * like disconnected directories.. |
435 | */ | 431 | */ |
436 | err = reconnect_path(mnt, target_dir); | 432 | err = reconnect_path(mnt, target_dir, nbuf); |
437 | if (err) { | 433 | if (err) { |
438 | dput(target_dir); | 434 | dput(target_dir); |
439 | goto err_result; | 435 | goto err_result; |
diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig new file mode 100644 index 000000000000..14a6780fd034 --- /dev/null +++ b/fs/ext2/Kconfig | |||
@@ -0,0 +1,55 @@ | |||
1 | config EXT2_FS | ||
2 | tristate "Second extended fs support" | ||
3 | help | ||
4 | Ext2 is a standard Linux file system for hard disks. | ||
5 | |||
6 | To compile this file system support as a module, choose M here: the | ||
7 | module will be called ext2. | ||
8 | |||
9 | If unsure, say Y. | ||
10 | |||
11 | config EXT2_FS_XATTR | ||
12 | bool "Ext2 extended attributes" | ||
13 | depends on EXT2_FS | ||
14 | help | ||
15 | Extended attributes are name:value pairs associated with inodes by | ||
16 | the kernel or by users (see the attr(5) manual page, or visit | ||
17 | <http://acl.bestbits.at/> for details). | ||
18 | |||
19 | If unsure, say N. | ||
20 | |||
21 | config EXT2_FS_POSIX_ACL | ||
22 | bool "Ext2 POSIX Access Control Lists" | ||
23 | depends on EXT2_FS_XATTR | ||
24 | select FS_POSIX_ACL | ||
25 | help | ||
26 | Posix Access Control Lists (ACLs) support permissions for users and | ||
27 | groups beyond the owner/group/world scheme. | ||
28 | |||
29 | To learn more about Access Control Lists, visit the Posix ACLs for | ||
30 | Linux website <http://acl.bestbits.at/>. | ||
31 | |||
32 | If you don't know what Access Control Lists are, say N | ||
33 | |||
34 | config EXT2_FS_SECURITY | ||
35 | bool "Ext2 Security Labels" | ||
36 | depends on EXT2_FS_XATTR | ||
37 | help | ||
38 | Security labels support alternative access control models | ||
39 | implemented by security modules like SELinux. This option | ||
40 | enables an extended attribute handler for file security | ||
41 | labels in the ext2 filesystem. | ||
42 | |||
43 | If you are not using a security module that requires using | ||
44 | extended attributes for file security labels, say N. | ||
45 | |||
46 | config EXT2_FS_XIP | ||
47 | bool "Ext2 execute in place support" | ||
48 | depends on EXT2_FS && MMU | ||
49 | help | ||
50 | Execute in place can be used on memory-backed block devices. If you | ||
51 | enable this option, you can select to mount block devices which are | ||
52 | capable of this feature without using the page cache. | ||
53 | |||
54 | If you do not use a block device that is capable of using this, | ||
55 | or if unsure, say N. | ||
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 11a49ce84392..9a0fc400f91c 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c | |||
@@ -354,11 +354,11 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) | |||
354 | * (as a parameter - res_dir). Page is returned mapped and unlocked. | 354 | * (as a parameter - res_dir). Page is returned mapped and unlocked. |
355 | * Entry is guaranteed to be valid. | 355 | * Entry is guaranteed to be valid. |
356 | */ | 356 | */ |
357 | struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, | 357 | struct ext2_dir_entry_2 *ext2_find_entry (struct inode * dir, |
358 | struct dentry *dentry, struct page ** res_page) | 358 | struct qstr *child, struct page ** res_page) |
359 | { | 359 | { |
360 | const char *name = dentry->d_name.name; | 360 | const char *name = child->name; |
361 | int namelen = dentry->d_name.len; | 361 | int namelen = child->len; |
362 | unsigned reclen = EXT2_DIR_REC_LEN(namelen); | 362 | unsigned reclen = EXT2_DIR_REC_LEN(namelen); |
363 | unsigned long start, n; | 363 | unsigned long start, n; |
364 | unsigned long npages = dir_pages(dir); | 364 | unsigned long npages = dir_pages(dir); |
@@ -431,13 +431,13 @@ struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p) | |||
431 | return de; | 431 | return de; |
432 | } | 432 | } |
433 | 433 | ||
434 | ino_t ext2_inode_by_name(struct inode * dir, struct dentry *dentry) | 434 | ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child) |
435 | { | 435 | { |
436 | ino_t res = 0; | 436 | ino_t res = 0; |
437 | struct ext2_dir_entry_2 * de; | 437 | struct ext2_dir_entry_2 *de; |
438 | struct page *page; | 438 | struct page *page; |
439 | 439 | ||
440 | de = ext2_find_entry (dir, dentry, &page); | 440 | de = ext2_find_entry (dir, child, &page); |
441 | if (de) { | 441 | if (de) { |
442 | res = le32_to_cpu(de->inode); | 442 | res = le32_to_cpu(de->inode); |
443 | ext2_put_page(page); | 443 | ext2_put_page(page); |
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index bae998c1e44e..3203042b36ef 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h | |||
@@ -105,9 +105,9 @@ extern void ext2_rsv_window_add(struct super_block *sb, struct ext2_reserve_wind | |||
105 | 105 | ||
106 | /* dir.c */ | 106 | /* dir.c */ |
107 | extern int ext2_add_link (struct dentry *, struct inode *); | 107 | extern int ext2_add_link (struct dentry *, struct inode *); |
108 | extern ino_t ext2_inode_by_name(struct inode *, struct dentry *); | 108 | extern ino_t ext2_inode_by_name(struct inode *, struct qstr *); |
109 | extern int ext2_make_empty(struct inode *, struct inode *); | 109 | extern int ext2_make_empty(struct inode *, struct inode *); |
110 | extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct dentry *, struct page **); | 110 | extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct qstr *, struct page **); |
111 | extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *); | 111 | extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *); |
112 | extern int ext2_empty_dir (struct inode *); | 112 | extern int ext2_empty_dir (struct inode *); |
113 | extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **); | 113 | extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **); |
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 80c97fd8c571..2a747252ec12 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c | |||
@@ -60,7 +60,7 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str | |||
60 | if (dentry->d_name.len > EXT2_NAME_LEN) | 60 | if (dentry->d_name.len > EXT2_NAME_LEN) |
61 | return ERR_PTR(-ENAMETOOLONG); | 61 | return ERR_PTR(-ENAMETOOLONG); |
62 | 62 | ||
63 | ino = ext2_inode_by_name(dir, dentry); | 63 | ino = ext2_inode_by_name(dir, &dentry->d_name); |
64 | inode = NULL; | 64 | inode = NULL; |
65 | if (ino) { | 65 | if (ino) { |
66 | inode = ext2_iget(dir->i_sb, ino); | 66 | inode = ext2_iget(dir->i_sb, ino); |
@@ -72,27 +72,11 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str | |||
72 | 72 | ||
73 | struct dentry *ext2_get_parent(struct dentry *child) | 73 | struct dentry *ext2_get_parent(struct dentry *child) |
74 | { | 74 | { |
75 | unsigned long ino; | 75 | struct qstr dotdot = {.name = "..", .len = 2}; |
76 | struct dentry *parent; | 76 | unsigned long ino = ext2_inode_by_name(child->d_inode, &dotdot); |
77 | struct inode *inode; | ||
78 | struct dentry dotdot; | ||
79 | |||
80 | dotdot.d_name.name = ".."; | ||
81 | dotdot.d_name.len = 2; | ||
82 | |||
83 | ino = ext2_inode_by_name(child->d_inode, &dotdot); | ||
84 | if (!ino) | 77 | if (!ino) |
85 | return ERR_PTR(-ENOENT); | 78 | return ERR_PTR(-ENOENT); |
86 | inode = ext2_iget(child->d_inode->i_sb, ino); | 79 | return d_obtain_alias(ext2_iget(child->d_inode->i_sb, ino)); |
87 | |||
88 | if (IS_ERR(inode)) | ||
89 | return ERR_CAST(inode); | ||
90 | parent = d_alloc_anon(inode); | ||
91 | if (!parent) { | ||
92 | iput(inode); | ||
93 | parent = ERR_PTR(-ENOMEM); | ||
94 | } | ||
95 | return parent; | ||
96 | } | 80 | } |
97 | 81 | ||
98 | /* | 82 | /* |
@@ -257,7 +241,7 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry) | |||
257 | struct page * page; | 241 | struct page * page; |
258 | int err = -ENOENT; | 242 | int err = -ENOENT; |
259 | 243 | ||
260 | de = ext2_find_entry (dir, dentry, &page); | 244 | de = ext2_find_entry (dir, &dentry->d_name, &page); |
261 | if (!de) | 245 | if (!de) |
262 | goto out; | 246 | goto out; |
263 | 247 | ||
@@ -299,7 +283,7 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry, | |||
299 | struct ext2_dir_entry_2 * old_de; | 283 | struct ext2_dir_entry_2 * old_de; |
300 | int err = -ENOENT; | 284 | int err = -ENOENT; |
301 | 285 | ||
302 | old_de = ext2_find_entry (old_dir, old_dentry, &old_page); | 286 | old_de = ext2_find_entry (old_dir, &old_dentry->d_name, &old_page); |
303 | if (!old_de) | 287 | if (!old_de) |
304 | goto out; | 288 | goto out; |
305 | 289 | ||
@@ -319,7 +303,7 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry, | |||
319 | goto out_dir; | 303 | goto out_dir; |
320 | 304 | ||
321 | err = -ENOENT; | 305 | err = -ENOENT; |
322 | new_de = ext2_find_entry (new_dir, new_dentry, &new_page); | 306 | new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page); |
323 | if (!new_de) | 307 | if (!new_de) |
324 | goto out_dir; | 308 | goto out_dir; |
325 | inode_inc_link_count(old_inode); | 309 | inode_inc_link_count(old_inode); |
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c index 4fb94c20041b..b72b85884223 100644 --- a/fs/ext2/xip.c +++ b/fs/ext2/xip.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/buffer_head.h> | 11 | #include <linux/buffer_head.h> |
12 | #include <linux/ext2_fs_sb.h> | 12 | #include <linux/ext2_fs_sb.h> |
13 | #include <linux/ext2_fs.h> | 13 | #include <linux/ext2_fs.h> |
14 | #include <linux/blkdev.h> | ||
14 | #include "ext2.h" | 15 | #include "ext2.h" |
15 | #include "xip.h" | 16 | #include "xip.h" |
16 | 17 | ||
diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig new file mode 100644 index 000000000000..8e0cfe44b0fc --- /dev/null +++ b/fs/ext3/Kconfig | |||
@@ -0,0 +1,67 @@ | |||
1 | config EXT3_FS | ||
2 | tristate "Ext3 journalling file system support" | ||
3 | select JBD | ||
4 | help | ||
5 | This is the journalling version of the Second extended file system | ||
6 | (often called ext3), the de facto standard Linux file system | ||
7 | (method to organize files on a storage device) for hard disks. | ||
8 | |||
9 | The journalling code included in this driver means you do not have | ||
10 | to run e2fsck (file system checker) on your file systems after a | ||
11 | crash. The journal keeps track of any changes that were being made | ||
12 | at the time the system crashed, and can ensure that your file system | ||
13 | is consistent without the need for a lengthy check. | ||
14 | |||
15 | Other than adding the journal to the file system, the on-disk format | ||
16 | of ext3 is identical to ext2. It is possible to freely switch | ||
17 | between using the ext3 driver and the ext2 driver, as long as the | ||
18 | file system has been cleanly unmounted, or e2fsck is run on the file | ||
19 | system. | ||
20 | |||
21 | To add a journal on an existing ext2 file system or change the | ||
22 | behavior of ext3 file systems, you can use the tune2fs utility ("man | ||
23 | tune2fs"). To modify attributes of files and directories on ext3 | ||
24 | file systems, use chattr ("man chattr"). You need to be using | ||
25 | e2fsprogs version 1.20 or later in order to create ext3 journals | ||
26 | (available at <http://sourceforge.net/projects/e2fsprogs/>). | ||
27 | |||
28 | To compile this file system support as a module, choose M here: the | ||
29 | module will be called ext3. | ||
30 | |||
31 | config EXT3_FS_XATTR | ||
32 | bool "Ext3 extended attributes" | ||
33 | depends on EXT3_FS | ||
34 | default y | ||
35 | help | ||
36 | Extended attributes are name:value pairs associated with inodes by | ||
37 | the kernel or by users (see the attr(5) manual page, or visit | ||
38 | <http://acl.bestbits.at/> for details). | ||
39 | |||
40 | If unsure, say N. | ||
41 | |||
42 | You need this for POSIX ACL support on ext3. | ||
43 | |||
44 | config EXT3_FS_POSIX_ACL | ||
45 | bool "Ext3 POSIX Access Control Lists" | ||
46 | depends on EXT3_FS_XATTR | ||
47 | select FS_POSIX_ACL | ||
48 | help | ||
49 | Posix Access Control Lists (ACLs) support permissions for users and | ||
50 | groups beyond the owner/group/world scheme. | ||
51 | |||
52 | To learn more about Access Control Lists, visit the Posix ACLs for | ||
53 | Linux website <http://acl.bestbits.at/>. | ||
54 | |||
55 | If you don't know what Access Control Lists are, say N | ||
56 | |||
57 | config EXT3_FS_SECURITY | ||
58 | bool "Ext3 Security Labels" | ||
59 | depends on EXT3_FS_XATTR | ||
60 | help | ||
61 | Security labels support alternative access control models | ||
62 | implemented by security modules like SELinux. This option | ||
63 | enables an extended attribute handler for file security | ||
64 | labels in the ext3 filesystem. | ||
65 | |||
66 | If you are not using a security module that requires using | ||
67 | extended attributes for file security labels, say N. | ||
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 92fd0338a6eb..f5b57a2ca35a 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c | |||
@@ -1547,6 +1547,7 @@ retry_alloc: | |||
1547 | * turn off reservation for this allocation | 1547 | * turn off reservation for this allocation |
1548 | */ | 1548 | */ |
1549 | if (my_rsv && (free_blocks < windowsz) | 1549 | if (my_rsv && (free_blocks < windowsz) |
1550 | && (free_blocks > 0) | ||
1550 | && (rsv_is_empty(&my_rsv->rsv_window))) | 1551 | && (rsv_is_empty(&my_rsv->rsv_window))) |
1551 | my_rsv = NULL; | 1552 | my_rsv = NULL; |
1552 | 1553 | ||
@@ -1585,7 +1586,7 @@ retry_alloc: | |||
1585 | * free blocks is less than half of the reservation | 1586 | * free blocks is less than half of the reservation |
1586 | * window size. | 1587 | * window size. |
1587 | */ | 1588 | */ |
1588 | if (free_blocks <= (windowsz/2)) | 1589 | if (my_rsv && (free_blocks <= (windowsz/2))) |
1589 | continue; | 1590 | continue; |
1590 | 1591 | ||
1591 | brelse(bitmap_bh); | 1592 | brelse(bitmap_bh); |
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 2eea96ec78ed..4c82531ea0a8 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c | |||
@@ -102,6 +102,7 @@ static int ext3_readdir(struct file * filp, | |||
102 | int err; | 102 | int err; |
103 | struct inode *inode = filp->f_path.dentry->d_inode; | 103 | struct inode *inode = filp->f_path.dentry->d_inode; |
104 | int ret = 0; | 104 | int ret = 0; |
105 | int dir_has_error = 0; | ||
105 | 106 | ||
106 | sb = inode->i_sb; | 107 | sb = inode->i_sb; |
107 | 108 | ||
@@ -148,9 +149,12 @@ static int ext3_readdir(struct file * filp, | |||
148 | * of recovering data when there's a bad sector | 149 | * of recovering data when there's a bad sector |
149 | */ | 150 | */ |
150 | if (!bh) { | 151 | if (!bh) { |
151 | ext3_error (sb, "ext3_readdir", | 152 | if (!dir_has_error) { |
152 | "directory #%lu contains a hole at offset %lu", | 153 | ext3_error(sb, __func__, "directory #%lu " |
153 | inode->i_ino, (unsigned long)filp->f_pos); | 154 | "contains a hole at offset %lld", |
155 | inode->i_ino, filp->f_pos); | ||
156 | dir_has_error = 1; | ||
157 | } | ||
154 | /* corrupt size? Maybe no more blocks to read */ | 158 | /* corrupt size? Maybe no more blocks to read */ |
155 | if (filp->f_pos > inode->i_blocks << 9) | 159 | if (filp->f_pos > inode->i_blocks << 9) |
156 | break; | 160 | break; |
@@ -410,7 +414,7 @@ static int call_filldir(struct file * filp, void * dirent, | |||
410 | get_dtype(sb, fname->file_type)); | 414 | get_dtype(sb, fname->file_type)); |
411 | if (error) { | 415 | if (error) { |
412 | filp->f_pos = curr_pos; | 416 | filp->f_pos = curr_pos; |
413 | info->extra_fname = fname->next; | 417 | info->extra_fname = fname; |
414 | return error; | 418 | return error; |
415 | } | 419 | } |
416 | fname = fname->next; | 420 | fname = fname->next; |
@@ -449,11 +453,21 @@ static int ext3_dx_readdir(struct file * filp, | |||
449 | * If there are any leftover names on the hash collision | 453 | * If there are any leftover names on the hash collision |
450 | * chain, return them first. | 454 | * chain, return them first. |
451 | */ | 455 | */ |
452 | if (info->extra_fname && | 456 | if (info->extra_fname) { |
453 | call_filldir(filp, dirent, filldir, info->extra_fname)) | 457 | if (call_filldir(filp, dirent, filldir, info->extra_fname)) |
454 | goto finished; | 458 | goto finished; |
455 | 459 | ||
456 | if (!info->curr_node) | 460 | info->extra_fname = NULL; |
461 | info->curr_node = rb_next(info->curr_node); | ||
462 | if (!info->curr_node) { | ||
463 | if (info->next_hash == ~0) { | ||
464 | filp->f_pos = EXT3_HTREE_EOF; | ||
465 | goto finished; | ||
466 | } | ||
467 | info->curr_hash = info->next_hash; | ||
468 | info->curr_minor_hash = 0; | ||
469 | } | ||
470 | } else if (!info->curr_node) | ||
457 | info->curr_node = rb_first(&info->root); | 471 | info->curr_node = rb_first(&info->root); |
458 | 472 | ||
459 | while (1) { | 473 | while (1) { |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index ebfec4d0148e..f8424ad89971 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -1186,6 +1186,13 @@ write_begin_failed: | |||
1186 | ext3_journal_stop(handle); | 1186 | ext3_journal_stop(handle); |
1187 | unlock_page(page); | 1187 | unlock_page(page); |
1188 | page_cache_release(page); | 1188 | page_cache_release(page); |
1189 | /* | ||
1190 | * block_write_begin may have instantiated a few blocks | ||
1191 | * outside i_size. Trim these off again. Don't need | ||
1192 | * i_size_read because we hold i_mutex. | ||
1193 | */ | ||
1194 | if (pos + len > inode->i_size) | ||
1195 | vmtruncate(inode, inode->i_size); | ||
1189 | } | 1196 | } |
1190 | if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) | 1197 | if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) |
1191 | goto retry; | 1198 | goto retry; |
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index 0d0c70151642..b7394d05ee8e 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c | |||
@@ -239,7 +239,7 @@ setrsvsz_out: | |||
239 | case EXT3_IOC_GROUP_EXTEND: { | 239 | case EXT3_IOC_GROUP_EXTEND: { |
240 | ext3_fsblk_t n_blocks_count; | 240 | ext3_fsblk_t n_blocks_count; |
241 | struct super_block *sb = inode->i_sb; | 241 | struct super_block *sb = inode->i_sb; |
242 | int err; | 242 | int err, err2; |
243 | 243 | ||
244 | if (!capable(CAP_SYS_RESOURCE)) | 244 | if (!capable(CAP_SYS_RESOURCE)) |
245 | return -EPERM; | 245 | return -EPERM; |
@@ -254,8 +254,10 @@ setrsvsz_out: | |||
254 | } | 254 | } |
255 | err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count); | 255 | err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count); |
256 | journal_lock_updates(EXT3_SB(sb)->s_journal); | 256 | journal_lock_updates(EXT3_SB(sb)->s_journal); |
257 | journal_flush(EXT3_SB(sb)->s_journal); | 257 | err2 = journal_flush(EXT3_SB(sb)->s_journal); |
258 | journal_unlock_updates(EXT3_SB(sb)->s_journal); | 258 | journal_unlock_updates(EXT3_SB(sb)->s_journal); |
259 | if (err == 0) | ||
260 | err = err2; | ||
259 | group_extend_out: | 261 | group_extend_out: |
260 | mnt_drop_write(filp->f_path.mnt); | 262 | mnt_drop_write(filp->f_path.mnt); |
261 | return err; | 263 | return err; |
@@ -263,7 +265,7 @@ group_extend_out: | |||
263 | case EXT3_IOC_GROUP_ADD: { | 265 | case EXT3_IOC_GROUP_ADD: { |
264 | struct ext3_new_group_data input; | 266 | struct ext3_new_group_data input; |
265 | struct super_block *sb = inode->i_sb; | 267 | struct super_block *sb = inode->i_sb; |
266 | int err; | 268 | int err, err2; |
267 | 269 | ||
268 | if (!capable(CAP_SYS_RESOURCE)) | 270 | if (!capable(CAP_SYS_RESOURCE)) |
269 | return -EPERM; | 271 | return -EPERM; |
@@ -280,8 +282,10 @@ group_extend_out: | |||
280 | 282 | ||
281 | err = ext3_group_add(sb, &input); | 283 | err = ext3_group_add(sb, &input); |
282 | journal_lock_updates(EXT3_SB(sb)->s_journal); | 284 | journal_lock_updates(EXT3_SB(sb)->s_journal); |
283 | journal_flush(EXT3_SB(sb)->s_journal); | 285 | err2 = journal_flush(EXT3_SB(sb)->s_journal); |
284 | journal_unlock_updates(EXT3_SB(sb)->s_journal); | 286 | journal_unlock_updates(EXT3_SB(sb)->s_journal); |
287 | if (err == 0) | ||
288 | err = err2; | ||
285 | group_add_out: | 289 | group_add_out: |
286 | mnt_drop_write(filp->f_path.mnt); | 290 | mnt_drop_write(filp->f_path.mnt); |
287 | return err; | 291 | return err; |
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index de13e919cd81..3e5edc92aa0b 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c | |||
@@ -159,7 +159,7 @@ static void dx_set_count (struct dx_entry *entries, unsigned value); | |||
159 | static void dx_set_limit (struct dx_entry *entries, unsigned value); | 159 | static void dx_set_limit (struct dx_entry *entries, unsigned value); |
160 | static unsigned dx_root_limit (struct inode *dir, unsigned infosize); | 160 | static unsigned dx_root_limit (struct inode *dir, unsigned infosize); |
161 | static unsigned dx_node_limit (struct inode *dir); | 161 | static unsigned dx_node_limit (struct inode *dir); |
162 | static struct dx_frame *dx_probe(struct dentry *dentry, | 162 | static struct dx_frame *dx_probe(struct qstr *entry, |
163 | struct inode *dir, | 163 | struct inode *dir, |
164 | struct dx_hash_info *hinfo, | 164 | struct dx_hash_info *hinfo, |
165 | struct dx_frame *frame, | 165 | struct dx_frame *frame, |
@@ -176,8 +176,9 @@ static int ext3_htree_next_block(struct inode *dir, __u32 hash, | |||
176 | struct dx_frame *frame, | 176 | struct dx_frame *frame, |
177 | struct dx_frame *frames, | 177 | struct dx_frame *frames, |
178 | __u32 *start_hash); | 178 | __u32 *start_hash); |
179 | static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, | 179 | static struct buffer_head * ext3_dx_find_entry(struct inode *dir, |
180 | struct ext3_dir_entry_2 **res_dir, int *err); | 180 | struct qstr *entry, struct ext3_dir_entry_2 **res_dir, |
181 | int *err); | ||
181 | static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, | 182 | static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, |
182 | struct inode *inode); | 183 | struct inode *inode); |
183 | 184 | ||
@@ -342,7 +343,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
342 | * back to userspace. | 343 | * back to userspace. |
343 | */ | 344 | */ |
344 | static struct dx_frame * | 345 | static struct dx_frame * |
345 | dx_probe(struct dentry *dentry, struct inode *dir, | 346 | dx_probe(struct qstr *entry, struct inode *dir, |
346 | struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) | 347 | struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) |
347 | { | 348 | { |
348 | unsigned count, indirect; | 349 | unsigned count, indirect; |
@@ -353,8 +354,6 @@ dx_probe(struct dentry *dentry, struct inode *dir, | |||
353 | u32 hash; | 354 | u32 hash; |
354 | 355 | ||
355 | frame->bh = NULL; | 356 | frame->bh = NULL; |
356 | if (dentry) | ||
357 | dir = dentry->d_parent->d_inode; | ||
358 | if (!(bh = ext3_bread (NULL,dir, 0, 0, err))) | 357 | if (!(bh = ext3_bread (NULL,dir, 0, 0, err))) |
359 | goto fail; | 358 | goto fail; |
360 | root = (struct dx_root *) bh->b_data; | 359 | root = (struct dx_root *) bh->b_data; |
@@ -370,8 +369,8 @@ dx_probe(struct dentry *dentry, struct inode *dir, | |||
370 | } | 369 | } |
371 | hinfo->hash_version = root->info.hash_version; | 370 | hinfo->hash_version = root->info.hash_version; |
372 | hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed; | 371 | hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed; |
373 | if (dentry) | 372 | if (entry) |
374 | ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); | 373 | ext3fs_dirhash(entry->name, entry->len, hinfo); |
375 | hash = hinfo->hash; | 374 | hash = hinfo->hash; |
376 | 375 | ||
377 | if (root->info.unused_flags & 1) { | 376 | if (root->info.unused_flags & 1) { |
@@ -803,15 +802,15 @@ static inline int ext3_match (int len, const char * const name, | |||
803 | */ | 802 | */ |
804 | static inline int search_dirblock(struct buffer_head * bh, | 803 | static inline int search_dirblock(struct buffer_head * bh, |
805 | struct inode *dir, | 804 | struct inode *dir, |
806 | struct dentry *dentry, | 805 | struct qstr *child, |
807 | unsigned long offset, | 806 | unsigned long offset, |
808 | struct ext3_dir_entry_2 ** res_dir) | 807 | struct ext3_dir_entry_2 ** res_dir) |
809 | { | 808 | { |
810 | struct ext3_dir_entry_2 * de; | 809 | struct ext3_dir_entry_2 * de; |
811 | char * dlimit; | 810 | char * dlimit; |
812 | int de_len; | 811 | int de_len; |
813 | const char *name = dentry->d_name.name; | 812 | const char *name = child->name; |
814 | int namelen = dentry->d_name.len; | 813 | int namelen = child->len; |
815 | 814 | ||
816 | de = (struct ext3_dir_entry_2 *) bh->b_data; | 815 | de = (struct ext3_dir_entry_2 *) bh->b_data; |
817 | dlimit = bh->b_data + dir->i_sb->s_blocksize; | 816 | dlimit = bh->b_data + dir->i_sb->s_blocksize; |
@@ -850,8 +849,9 @@ static inline int search_dirblock(struct buffer_head * bh, | |||
850 | * The returned buffer_head has ->b_count elevated. The caller is expected | 849 | * The returned buffer_head has ->b_count elevated. The caller is expected |
851 | * to brelse() it when appropriate. | 850 | * to brelse() it when appropriate. |
852 | */ | 851 | */ |
853 | static struct buffer_head * ext3_find_entry (struct dentry *dentry, | 852 | static struct buffer_head *ext3_find_entry(struct inode *dir, |
854 | struct ext3_dir_entry_2 ** res_dir) | 853 | struct qstr *entry, |
854 | struct ext3_dir_entry_2 **res_dir) | ||
855 | { | 855 | { |
856 | struct super_block * sb; | 856 | struct super_block * sb; |
857 | struct buffer_head * bh_use[NAMEI_RA_SIZE]; | 857 | struct buffer_head * bh_use[NAMEI_RA_SIZE]; |
@@ -863,16 +863,15 @@ static struct buffer_head * ext3_find_entry (struct dentry *dentry, | |||
863 | buffer */ | 863 | buffer */ |
864 | int num = 0; | 864 | int num = 0; |
865 | int nblocks, i, err; | 865 | int nblocks, i, err; |
866 | struct inode *dir = dentry->d_parent->d_inode; | ||
867 | int namelen; | 866 | int namelen; |
868 | 867 | ||
869 | *res_dir = NULL; | 868 | *res_dir = NULL; |
870 | sb = dir->i_sb; | 869 | sb = dir->i_sb; |
871 | namelen = dentry->d_name.len; | 870 | namelen = entry->len; |
872 | if (namelen > EXT3_NAME_LEN) | 871 | if (namelen > EXT3_NAME_LEN) |
873 | return NULL; | 872 | return NULL; |
874 | if (is_dx(dir)) { | 873 | if (is_dx(dir)) { |
875 | bh = ext3_dx_find_entry(dentry, res_dir, &err); | 874 | bh = ext3_dx_find_entry(dir, entry, res_dir, &err); |
876 | /* | 875 | /* |
877 | * On success, or if the error was file not found, | 876 | * On success, or if the error was file not found, |
878 | * return. Otherwise, fall back to doing a search the | 877 | * return. Otherwise, fall back to doing a search the |
@@ -923,7 +922,7 @@ restart: | |||
923 | brelse(bh); | 922 | brelse(bh); |
924 | goto next; | 923 | goto next; |
925 | } | 924 | } |
926 | i = search_dirblock(bh, dir, dentry, | 925 | i = search_dirblock(bh, dir, entry, |
927 | block << EXT3_BLOCK_SIZE_BITS(sb), res_dir); | 926 | block << EXT3_BLOCK_SIZE_BITS(sb), res_dir); |
928 | if (i == 1) { | 927 | if (i == 1) { |
929 | EXT3_I(dir)->i_dir_start_lookup = block; | 928 | EXT3_I(dir)->i_dir_start_lookup = block; |
@@ -957,8 +956,9 @@ cleanup_and_exit: | |||
957 | return ret; | 956 | return ret; |
958 | } | 957 | } |
959 | 958 | ||
960 | static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, | 959 | static struct buffer_head * ext3_dx_find_entry(struct inode *dir, |
961 | struct ext3_dir_entry_2 **res_dir, int *err) | 960 | struct qstr *entry, struct ext3_dir_entry_2 **res_dir, |
961 | int *err) | ||
962 | { | 962 | { |
963 | struct super_block * sb; | 963 | struct super_block * sb; |
964 | struct dx_hash_info hinfo; | 964 | struct dx_hash_info hinfo; |
@@ -968,14 +968,13 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, | |||
968 | struct buffer_head *bh; | 968 | struct buffer_head *bh; |
969 | unsigned long block; | 969 | unsigned long block; |
970 | int retval; | 970 | int retval; |
971 | int namelen = dentry->d_name.len; | 971 | int namelen = entry->len; |
972 | const u8 *name = dentry->d_name.name; | 972 | const u8 *name = entry->name; |
973 | struct inode *dir = dentry->d_parent->d_inode; | ||
974 | 973 | ||
975 | sb = dir->i_sb; | 974 | sb = dir->i_sb; |
976 | /* NFS may look up ".." - look at dx_root directory block */ | 975 | /* NFS may look up ".." - look at dx_root directory block */ |
977 | if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ | 976 | if (namelen > 2 || name[0] != '.'|| (namelen == 2 && name[1] != '.')) { |
978 | if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) | 977 | if (!(frame = dx_probe(entry, dir, &hinfo, frames, err))) |
979 | return NULL; | 978 | return NULL; |
980 | } else { | 979 | } else { |
981 | frame = frames; | 980 | frame = frames; |
@@ -1036,7 +1035,7 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str | |||
1036 | if (dentry->d_name.len > EXT3_NAME_LEN) | 1035 | if (dentry->d_name.len > EXT3_NAME_LEN) |
1037 | return ERR_PTR(-ENAMETOOLONG); | 1036 | return ERR_PTR(-ENAMETOOLONG); |
1038 | 1037 | ||
1039 | bh = ext3_find_entry(dentry, &de); | 1038 | bh = ext3_find_entry(dir, &dentry->d_name, &de); |
1040 | inode = NULL; | 1039 | inode = NULL; |
1041 | if (bh) { | 1040 | if (bh) { |
1042 | unsigned long ino = le32_to_cpu(de->inode); | 1041 | unsigned long ino = le32_to_cpu(de->inode); |
@@ -1057,18 +1056,11 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str | |||
1057 | struct dentry *ext3_get_parent(struct dentry *child) | 1056 | struct dentry *ext3_get_parent(struct dentry *child) |
1058 | { | 1057 | { |
1059 | unsigned long ino; | 1058 | unsigned long ino; |
1060 | struct dentry *parent; | 1059 | struct qstr dotdot = {.name = "..", .len = 2}; |
1061 | struct inode *inode; | ||
1062 | struct dentry dotdot; | ||
1063 | struct ext3_dir_entry_2 * de; | 1060 | struct ext3_dir_entry_2 * de; |
1064 | struct buffer_head *bh; | 1061 | struct buffer_head *bh; |
1065 | 1062 | ||
1066 | dotdot.d_name.name = ".."; | 1063 | bh = ext3_find_entry(child->d_inode, &dotdot, &de); |
1067 | dotdot.d_name.len = 2; | ||
1068 | dotdot.d_parent = child; /* confusing, isn't it! */ | ||
1069 | |||
1070 | bh = ext3_find_entry(&dotdot, &de); | ||
1071 | inode = NULL; | ||
1072 | if (!bh) | 1064 | if (!bh) |
1073 | return ERR_PTR(-ENOENT); | 1065 | return ERR_PTR(-ENOENT); |
1074 | ino = le32_to_cpu(de->inode); | 1066 | ino = le32_to_cpu(de->inode); |
@@ -1080,16 +1072,7 @@ struct dentry *ext3_get_parent(struct dentry *child) | |||
1080 | return ERR_PTR(-EIO); | 1072 | return ERR_PTR(-EIO); |
1081 | } | 1073 | } |
1082 | 1074 | ||
1083 | inode = ext3_iget(child->d_inode->i_sb, ino); | 1075 | return d_obtain_alias(ext3_iget(child->d_inode->i_sb, ino)); |
1084 | if (IS_ERR(inode)) | ||
1085 | return ERR_CAST(inode); | ||
1086 | |||
1087 | parent = d_alloc_anon(inode); | ||
1088 | if (!parent) { | ||
1089 | iput(inode); | ||
1090 | parent = ERR_PTR(-ENOMEM); | ||
1091 | } | ||
1092 | return parent; | ||
1093 | } | 1076 | } |
1094 | 1077 | ||
1095 | #define S_SHIFT 12 | 1078 | #define S_SHIFT 12 |
@@ -1503,7 +1486,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1503 | struct ext3_dir_entry_2 *de; | 1486 | struct ext3_dir_entry_2 *de; |
1504 | int err; | 1487 | int err; |
1505 | 1488 | ||
1506 | frame = dx_probe(dentry, NULL, &hinfo, frames, &err); | 1489 | frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err); |
1507 | if (!frame) | 1490 | if (!frame) |
1508 | return err; | 1491 | return err; |
1509 | entries = frame->entries; | 1492 | entries = frame->entries; |
@@ -2056,7 +2039,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry) | |||
2056 | return PTR_ERR(handle); | 2039 | return PTR_ERR(handle); |
2057 | 2040 | ||
2058 | retval = -ENOENT; | 2041 | retval = -ENOENT; |
2059 | bh = ext3_find_entry (dentry, &de); | 2042 | bh = ext3_find_entry(dir, &dentry->d_name, &de); |
2060 | if (!bh) | 2043 | if (!bh) |
2061 | goto end_rmdir; | 2044 | goto end_rmdir; |
2062 | 2045 | ||
@@ -2118,7 +2101,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry) | |||
2118 | handle->h_sync = 1; | 2101 | handle->h_sync = 1; |
2119 | 2102 | ||
2120 | retval = -ENOENT; | 2103 | retval = -ENOENT; |
2121 | bh = ext3_find_entry (dentry, &de); | 2104 | bh = ext3_find_entry(dir, &dentry->d_name, &de); |
2122 | if (!bh) | 2105 | if (!bh) |
2123 | goto end_unlink; | 2106 | goto end_unlink; |
2124 | 2107 | ||
@@ -2276,7 +2259,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
2276 | if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) | 2259 | if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) |
2277 | handle->h_sync = 1; | 2260 | handle->h_sync = 1; |
2278 | 2261 | ||
2279 | old_bh = ext3_find_entry (old_dentry, &old_de); | 2262 | old_bh = ext3_find_entry(old_dir, &old_dentry->d_name, &old_de); |
2280 | /* | 2263 | /* |
2281 | * Check for inode number is _not_ due to possible IO errors. | 2264 | * Check for inode number is _not_ due to possible IO errors. |
2282 | * We might rmdir the source, keep it as pwd of some process | 2265 | * We might rmdir the source, keep it as pwd of some process |
@@ -2289,7 +2272,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
2289 | goto end_rename; | 2272 | goto end_rename; |
2290 | 2273 | ||
2291 | new_inode = new_dentry->d_inode; | 2274 | new_inode = new_dentry->d_inode; |
2292 | new_bh = ext3_find_entry (new_dentry, &new_de); | 2275 | new_bh = ext3_find_entry(new_dir, &new_dentry->d_name, &new_de); |
2293 | if (new_bh) { | 2276 | if (new_bh) { |
2294 | if (!new_inode) { | 2277 | if (!new_inode) { |
2295 | brelse (new_bh); | 2278 | brelse (new_bh); |
@@ -2355,7 +2338,8 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
2355 | struct buffer_head *old_bh2; | 2338 | struct buffer_head *old_bh2; |
2356 | struct ext3_dir_entry_2 *old_de2; | 2339 | struct ext3_dir_entry_2 *old_de2; |
2357 | 2340 | ||
2358 | old_bh2 = ext3_find_entry(old_dentry, &old_de2); | 2341 | old_bh2 = ext3_find_entry(old_dir, &old_dentry->d_name, |
2342 | &old_de2); | ||
2359 | if (old_bh2) { | 2343 | if (old_bh2) { |
2360 | retval = ext3_delete_entry(handle, old_dir, | 2344 | retval = ext3_delete_entry(handle, old_dir, |
2361 | old_de2, old_bh2); | 2345 | old_de2, old_bh2); |
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 77278e947e94..78fdf3836370 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c | |||
@@ -790,7 +790,8 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) | |||
790 | 790 | ||
791 | if (reserved_gdb || gdb_off == 0) { | 791 | if (reserved_gdb || gdb_off == 0) { |
792 | if (!EXT3_HAS_COMPAT_FEATURE(sb, | 792 | if (!EXT3_HAS_COMPAT_FEATURE(sb, |
793 | EXT3_FEATURE_COMPAT_RESIZE_INODE)){ | 793 | EXT3_FEATURE_COMPAT_RESIZE_INODE) |
794 | || !le16_to_cpu(es->s_reserved_gdt_blocks)) { | ||
794 | ext3_warning(sb, __func__, | 795 | ext3_warning(sb, __func__, |
795 | "No reserved GDT blocks, can't resize"); | 796 | "No reserved GDT blocks, can't resize"); |
796 | return -EPERM; | 797 | return -EPERM; |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 399a96a6c556..18eaa78ecb4e 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -347,7 +347,7 @@ fail: | |||
347 | static int ext3_blkdev_put(struct block_device *bdev) | 347 | static int ext3_blkdev_put(struct block_device *bdev) |
348 | { | 348 | { |
349 | bd_release(bdev); | 349 | bd_release(bdev); |
350 | return blkdev_put(bdev); | 350 | return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); |
351 | } | 351 | } |
352 | 352 | ||
353 | static int ext3_blkdev_remove(struct ext3_sb_info *sbi) | 353 | static int ext3_blkdev_remove(struct ext3_sb_info *sbi) |
@@ -393,7 +393,8 @@ static void ext3_put_super (struct super_block * sb) | |||
393 | int i; | 393 | int i; |
394 | 394 | ||
395 | ext3_xattr_put_super(sb); | 395 | ext3_xattr_put_super(sb); |
396 | journal_destroy(sbi->s_journal); | 396 | if (journal_destroy(sbi->s_journal) < 0) |
397 | ext3_abort(sb, __func__, "Couldn't clean up the journal"); | ||
397 | if (!(sb->s_flags & MS_RDONLY)) { | 398 | if (!(sb->s_flags & MS_RDONLY)) { |
398 | EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); | 399 | EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); |
399 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 400 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
@@ -625,6 +626,9 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
625 | else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA) | 626 | else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA) |
626 | seq_puts(seq, ",data=writeback"); | 627 | seq_puts(seq, ",data=writeback"); |
627 | 628 | ||
629 | if (test_opt(sb, DATA_ERR_ABORT)) | ||
630 | seq_puts(seq, ",data_err=abort"); | ||
631 | |||
628 | ext3_show_quota_options(seq, sb); | 632 | ext3_show_quota_options(seq, sb); |
629 | 633 | ||
630 | return 0; | 634 | return 0; |
@@ -754,6 +758,7 @@ enum { | |||
754 | Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, | 758 | Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, |
755 | Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, | 759 | Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, |
756 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, | 760 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, |
761 | Opt_data_err_abort, Opt_data_err_ignore, | ||
757 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, | 762 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, |
758 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, | 763 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, |
759 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, | 764 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, |
@@ -796,6 +801,8 @@ static const match_table_t tokens = { | |||
796 | {Opt_data_journal, "data=journal"}, | 801 | {Opt_data_journal, "data=journal"}, |
797 | {Opt_data_ordered, "data=ordered"}, | 802 | {Opt_data_ordered, "data=ordered"}, |
798 | {Opt_data_writeback, "data=writeback"}, | 803 | {Opt_data_writeback, "data=writeback"}, |
804 | {Opt_data_err_abort, "data_err=abort"}, | ||
805 | {Opt_data_err_ignore, "data_err=ignore"}, | ||
799 | {Opt_offusrjquota, "usrjquota="}, | 806 | {Opt_offusrjquota, "usrjquota="}, |
800 | {Opt_usrjquota, "usrjquota=%s"}, | 807 | {Opt_usrjquota, "usrjquota=%s"}, |
801 | {Opt_offgrpjquota, "grpjquota="}, | 808 | {Opt_offgrpjquota, "grpjquota="}, |
@@ -1011,6 +1018,12 @@ static int parse_options (char *options, struct super_block *sb, | |||
1011 | sbi->s_mount_opt |= data_opt; | 1018 | sbi->s_mount_opt |= data_opt; |
1012 | } | 1019 | } |
1013 | break; | 1020 | break; |
1021 | case Opt_data_err_abort: | ||
1022 | set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); | ||
1023 | break; | ||
1024 | case Opt_data_err_ignore: | ||
1025 | clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); | ||
1026 | break; | ||
1014 | #ifdef CONFIG_QUOTA | 1027 | #ifdef CONFIG_QUOTA |
1015 | case Opt_usrjquota: | 1028 | case Opt_usrjquota: |
1016 | qtype = USRQUOTA; | 1029 | qtype = USRQUOTA; |
@@ -1986,6 +1999,10 @@ static void ext3_init_journal_params(struct super_block *sb, journal_t *journal) | |||
1986 | journal->j_flags |= JFS_BARRIER; | 1999 | journal->j_flags |= JFS_BARRIER; |
1987 | else | 2000 | else |
1988 | journal->j_flags &= ~JFS_BARRIER; | 2001 | journal->j_flags &= ~JFS_BARRIER; |
2002 | if (test_opt(sb, DATA_ERR_ABORT)) | ||
2003 | journal->j_flags |= JFS_ABORT_ON_SYNCDATA_ERR; | ||
2004 | else | ||
2005 | journal->j_flags &= ~JFS_ABORT_ON_SYNCDATA_ERR; | ||
1989 | spin_unlock(&journal->j_state_lock); | 2006 | spin_unlock(&journal->j_state_lock); |
1990 | } | 2007 | } |
1991 | 2008 | ||
@@ -2050,7 +2067,7 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb, | |||
2050 | if (bd_claim(bdev, sb)) { | 2067 | if (bd_claim(bdev, sb)) { |
2051 | printk(KERN_ERR | 2068 | printk(KERN_ERR |
2052 | "EXT3: failed to claim external journal device.\n"); | 2069 | "EXT3: failed to claim external journal device.\n"); |
2053 | blkdev_put(bdev); | 2070 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE); |
2054 | return NULL; | 2071 | return NULL; |
2055 | } | 2072 | } |
2056 | 2073 | ||
@@ -2280,7 +2297,9 @@ static void ext3_mark_recovery_complete(struct super_block * sb, | |||
2280 | journal_t *journal = EXT3_SB(sb)->s_journal; | 2297 | journal_t *journal = EXT3_SB(sb)->s_journal; |
2281 | 2298 | ||
2282 | journal_lock_updates(journal); | 2299 | journal_lock_updates(journal); |
2283 | journal_flush(journal); | 2300 | if (journal_flush(journal) < 0) |
2301 | goto out; | ||
2302 | |||
2284 | lock_super(sb); | 2303 | lock_super(sb); |
2285 | if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && | 2304 | if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && |
2286 | sb->s_flags & MS_RDONLY) { | 2305 | sb->s_flags & MS_RDONLY) { |
@@ -2289,6 +2308,8 @@ static void ext3_mark_recovery_complete(struct super_block * sb, | |||
2289 | ext3_commit_super(sb, es, 1); | 2308 | ext3_commit_super(sb, es, 1); |
2290 | } | 2309 | } |
2291 | unlock_super(sb); | 2310 | unlock_super(sb); |
2311 | |||
2312 | out: | ||
2292 | journal_unlock_updates(journal); | 2313 | journal_unlock_updates(journal); |
2293 | } | 2314 | } |
2294 | 2315 | ||
@@ -2388,7 +2409,13 @@ static void ext3_write_super_lockfs(struct super_block *sb) | |||
2388 | 2409 | ||
2389 | /* Now we set up the journal barrier. */ | 2410 | /* Now we set up the journal barrier. */ |
2390 | journal_lock_updates(journal); | 2411 | journal_lock_updates(journal); |
2391 | journal_flush(journal); | 2412 | |
2413 | /* | ||
2414 | * We don't want to clear needs_recovery flag when we failed | ||
2415 | * to flush the journal. | ||
2416 | */ | ||
2417 | if (journal_flush(journal) < 0) | ||
2418 | return; | ||
2392 | 2419 | ||
2393 | /* Journal blocked and flushed, clear needs_recovery flag. */ | 2420 | /* Journal blocked and flushed, clear needs_recovery flag. */ |
2394 | EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); | 2421 | EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); |
@@ -2767,30 +2794,30 @@ static int ext3_quota_on_mount(struct super_block *sb, int type) | |||
2767 | * Standard function to be called on quota_on | 2794 | * Standard function to be called on quota_on |
2768 | */ | 2795 | */ |
2769 | static int ext3_quota_on(struct super_block *sb, int type, int format_id, | 2796 | static int ext3_quota_on(struct super_block *sb, int type, int format_id, |
2770 | char *path, int remount) | 2797 | char *name, int remount) |
2771 | { | 2798 | { |
2772 | int err; | 2799 | int err; |
2773 | struct nameidata nd; | 2800 | struct path path; |
2774 | 2801 | ||
2775 | if (!test_opt(sb, QUOTA)) | 2802 | if (!test_opt(sb, QUOTA)) |
2776 | return -EINVAL; | 2803 | return -EINVAL; |
2777 | /* When remounting, no checks are needed and in fact, path is NULL */ | 2804 | /* When remounting, no checks are needed and in fact, name is NULL */ |
2778 | if (remount) | 2805 | if (remount) |
2779 | return vfs_quota_on(sb, type, format_id, path, remount); | 2806 | return vfs_quota_on(sb, type, format_id, name, remount); |
2780 | 2807 | ||
2781 | err = path_lookup(path, LOOKUP_FOLLOW, &nd); | 2808 | err = kern_path(name, LOOKUP_FOLLOW, &path); |
2782 | if (err) | 2809 | if (err) |
2783 | return err; | 2810 | return err; |
2784 | 2811 | ||
2785 | /* Quotafile not on the same filesystem? */ | 2812 | /* Quotafile not on the same filesystem? */ |
2786 | if (nd.path.mnt->mnt_sb != sb) { | 2813 | if (path.mnt->mnt_sb != sb) { |
2787 | path_put(&nd.path); | 2814 | path_put(&path); |
2788 | return -EXDEV; | 2815 | return -EXDEV; |
2789 | } | 2816 | } |
2790 | /* Journaling quota? */ | 2817 | /* Journaling quota? */ |
2791 | if (EXT3_SB(sb)->s_qf_names[type]) { | 2818 | if (EXT3_SB(sb)->s_qf_names[type]) { |
2792 | /* Quotafile not of fs root? */ | 2819 | /* Quotafile not of fs root? */ |
2793 | if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) | 2820 | if (path.dentry->d_parent != sb->s_root) |
2794 | printk(KERN_WARNING | 2821 | printk(KERN_WARNING |
2795 | "EXT3-fs: Quota file not on filesystem root. " | 2822 | "EXT3-fs: Quota file not on filesystem root. " |
2796 | "Journaled quota will not work.\n"); | 2823 | "Journaled quota will not work.\n"); |
@@ -2800,18 +2827,22 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id, | |||
2800 | * When we journal data on quota file, we have to flush journal to see | 2827 | * When we journal data on quota file, we have to flush journal to see |
2801 | * all updates to the file when we bypass pagecache... | 2828 | * all updates to the file when we bypass pagecache... |
2802 | */ | 2829 | */ |
2803 | if (ext3_should_journal_data(nd.path.dentry->d_inode)) { | 2830 | if (ext3_should_journal_data(path.dentry->d_inode)) { |
2804 | /* | 2831 | /* |
2805 | * We don't need to lock updates but journal_flush() could | 2832 | * We don't need to lock updates but journal_flush() could |
2806 | * otherwise be livelocked... | 2833 | * otherwise be livelocked... |
2807 | */ | 2834 | */ |
2808 | journal_lock_updates(EXT3_SB(sb)->s_journal); | 2835 | journal_lock_updates(EXT3_SB(sb)->s_journal); |
2809 | journal_flush(EXT3_SB(sb)->s_journal); | 2836 | err = journal_flush(EXT3_SB(sb)->s_journal); |
2810 | journal_unlock_updates(EXT3_SB(sb)->s_journal); | 2837 | journal_unlock_updates(EXT3_SB(sb)->s_journal); |
2838 | if (err) { | ||
2839 | path_put(&path); | ||
2840 | return err; | ||
2841 | } | ||
2811 | } | 2842 | } |
2812 | 2843 | ||
2813 | err = vfs_quota_on_path(sb, type, format_id, &nd.path); | 2844 | err = vfs_quota_on_path(sb, type, format_id, &path); |
2814 | path_put(&nd.path); | 2845 | path_put(&path); |
2815 | return err; | 2846 | return err; |
2816 | } | 2847 | } |
2817 | 2848 | ||
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig new file mode 100644 index 000000000000..7505482a08fa --- /dev/null +++ b/fs/ext4/Kconfig | |||
@@ -0,0 +1,79 @@ | |||
1 | config EXT4_FS | ||
2 | tristate "The Extended 4 (ext4) filesystem" | ||
3 | select JBD2 | ||
4 | select CRC16 | ||
5 | help | ||
6 | This is the next generation of the ext3 filesystem. | ||
7 | |||
8 | Unlike the change from ext2 filesystem to ext3 filesystem, | ||
9 | the on-disk format of ext4 is not forwards compatible with | ||
10 | ext3; it is based on extent maps and it supports 48-bit | ||
11 | physical block numbers. The ext4 filesystem also supports delayed | ||
12 | allocation, persistent preallocation, high resolution time stamps, | ||
13 | and a number of other features to improve performance and speed | ||
14 | up fsck time. For more information, please see the web pages at | ||
15 | http://ext4.wiki.kernel.org. | ||
16 | |||
17 | The ext4 filesystem will support mounting an ext3 | ||
18 | filesystem; while there will be some performance gains from | ||
19 | the delayed allocation and inode table readahead, the best | ||
20 | performance gains will require enabling ext4 features in the | ||
21 | filesystem, or formating a new filesystem as an ext4 | ||
22 | filesystem initially. | ||
23 | |||
24 | To compile this file system support as a module, choose M here. The | ||
25 | module will be called ext4. | ||
26 | |||
27 | If unsure, say N. | ||
28 | |||
29 | config EXT4DEV_COMPAT | ||
30 | bool "Enable ext4dev compatibility" | ||
31 | depends on EXT4_FS | ||
32 | help | ||
33 | Starting with 2.6.28, the name of the ext4 filesystem was | ||
34 | renamed from ext4dev to ext4. Unfortunately there are some | ||
35 | legacy userspace programs (such as klibc's fstype) have | ||
36 | "ext4dev" hardcoded. | ||
37 | |||
38 | To enable backwards compatibility so that systems that are | ||
39 | still expecting to mount ext4 filesystems using ext4dev, | ||
40 | chose Y here. This feature will go away by 2.6.31, so | ||
41 | please arrange to get your userspace programs fixed! | ||
42 | |||
43 | config EXT4_FS_XATTR | ||
44 | bool "Ext4 extended attributes" | ||
45 | depends on EXT4_FS | ||
46 | default y | ||
47 | help | ||
48 | Extended attributes are name:value pairs associated with inodes by | ||
49 | the kernel or by users (see the attr(5) manual page, or visit | ||
50 | <http://acl.bestbits.at/> for details). | ||
51 | |||
52 | If unsure, say N. | ||
53 | |||
54 | You need this for POSIX ACL support on ext4. | ||
55 | |||
56 | config EXT4_FS_POSIX_ACL | ||
57 | bool "Ext4 POSIX Access Control Lists" | ||
58 | depends on EXT4_FS_XATTR | ||
59 | select FS_POSIX_ACL | ||
60 | help | ||
61 | POSIX Access Control Lists (ACLs) support permissions for users and | ||
62 | groups beyond the owner/group/world scheme. | ||
63 | |||
64 | To learn more about Access Control Lists, visit the POSIX ACLs for | ||
65 | Linux website <http://acl.bestbits.at/>. | ||
66 | |||
67 | If you don't know what Access Control Lists are, say N | ||
68 | |||
69 | config EXT4_FS_SECURITY | ||
70 | bool "Ext4 Security Labels" | ||
71 | depends on EXT4_FS_XATTR | ||
72 | help | ||
73 | Security labels support alternative access control models | ||
74 | implemented by security modules like SELinux. This option | ||
75 | enables an extended attribute handler for file security | ||
76 | labels in the ext4 filesystem. | ||
77 | |||
78 | If you are not using a security module that requires using | ||
79 | extended attributes for file security labels, say N. | ||
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index bd2ece228827..b9821be709bd 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -568,8 +568,16 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
568 | 568 | ||
569 | /* this isn't the right place to decide whether block is metadata | 569 | /* this isn't the right place to decide whether block is metadata |
570 | * inode.c/extents.c knows better, but for safety ... */ | 570 | * inode.c/extents.c knows better, but for safety ... */ |
571 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || | 571 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
572 | ext4_should_journal_data(inode)) | 572 | metadata = 1; |
573 | |||
574 | /* We need to make sure we don't reuse | ||
575 | * block released untill the transaction commit. | ||
576 | * writeback mode have weak data consistency so | ||
577 | * don't force data as metadata when freeing block | ||
578 | * for writeback mode. | ||
579 | */ | ||
580 | if (metadata == 0 && !ext4_should_writeback_data(inode)) | ||
573 | metadata = 1; | 581 | metadata = 1; |
574 | 582 | ||
575 | sb = inode->i_sb; | 583 | sb = inode->i_sb; |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6690a41cdd9f..4880cc3e6727 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -511,7 +511,6 @@ do { \ | |||
511 | /* | 511 | /* |
512 | * Mount flags | 512 | * Mount flags |
513 | */ | 513 | */ |
514 | #define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */ | ||
515 | #define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ | 514 | #define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ |
516 | #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ | 515 | #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ |
517 | #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ | 516 | #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ |
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h index 6a0b40d43264..445fde603df8 100644 --- a/fs/ext4/ext4_sb.h +++ b/fs/ext4/ext4_sb.h | |||
@@ -99,9 +99,6 @@ struct ext4_sb_info { | |||
99 | struct inode *s_buddy_cache; | 99 | struct inode *s_buddy_cache; |
100 | long s_blocks_reserved; | 100 | long s_blocks_reserved; |
101 | spinlock_t s_reserve_lock; | 101 | spinlock_t s_reserve_lock; |
102 | struct list_head s_active_transaction; | ||
103 | struct list_head s_closed_transaction; | ||
104 | struct list_head s_committed_transaction; | ||
105 | spinlock_t s_md_lock; | 102 | spinlock_t s_md_lock; |
106 | tid_t s_last_transaction; | 103 | tid_t s_last_transaction; |
107 | unsigned short *s_mb_offsets, *s_mb_maxs; | 104 | unsigned short *s_mb_offsets, *s_mb_maxs; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9b4ec9decfd1..8dbf6953845b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -1648,6 +1648,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
1648 | int ret = 0, err, nr_pages, i; | 1648 | int ret = 0, err, nr_pages, i; |
1649 | unsigned long index, end; | 1649 | unsigned long index, end; |
1650 | struct pagevec pvec; | 1650 | struct pagevec pvec; |
1651 | long pages_skipped; | ||
1651 | 1652 | ||
1652 | BUG_ON(mpd->next_page <= mpd->first_page); | 1653 | BUG_ON(mpd->next_page <= mpd->first_page); |
1653 | pagevec_init(&pvec, 0); | 1654 | pagevec_init(&pvec, 0); |
@@ -1655,20 +1656,30 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
1655 | end = mpd->next_page - 1; | 1656 | end = mpd->next_page - 1; |
1656 | 1657 | ||
1657 | while (index <= end) { | 1658 | while (index <= end) { |
1658 | /* XXX: optimize tail */ | 1659 | /* |
1659 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | 1660 | * We can use PAGECACHE_TAG_DIRTY lookup here because |
1661 | * even though we have cleared the dirty flag on the page | ||
1662 | * We still keep the page in the radix tree with tag | ||
1663 | * PAGECACHE_TAG_DIRTY. See clear_page_dirty_for_io. | ||
1664 | * The PAGECACHE_TAG_DIRTY is cleared in set_page_writeback | ||
1665 | * which is called via the below writepage callback. | ||
1666 | */ | ||
1667 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
1668 | PAGECACHE_TAG_DIRTY, | ||
1669 | min(end - index, | ||
1670 | (pgoff_t)PAGEVEC_SIZE-1) + 1); | ||
1660 | if (nr_pages == 0) | 1671 | if (nr_pages == 0) |
1661 | break; | 1672 | break; |
1662 | for (i = 0; i < nr_pages; i++) { | 1673 | for (i = 0; i < nr_pages; i++) { |
1663 | struct page *page = pvec.pages[i]; | 1674 | struct page *page = pvec.pages[i]; |
1664 | 1675 | ||
1665 | index = page->index; | 1676 | pages_skipped = mpd->wbc->pages_skipped; |
1666 | if (index > end) | ||
1667 | break; | ||
1668 | index++; | ||
1669 | |||
1670 | err = mapping->a_ops->writepage(page, mpd->wbc); | 1677 | err = mapping->a_ops->writepage(page, mpd->wbc); |
1671 | if (!err) | 1678 | if (!err && (pages_skipped == mpd->wbc->pages_skipped)) |
1679 | /* | ||
1680 | * have successfully written the page | ||
1681 | * without skipping the same | ||
1682 | */ | ||
1672 | mpd->pages_written++; | 1683 | mpd->pages_written++; |
1673 | /* | 1684 | /* |
1674 | * In error case, we have to continue because | 1685 | * In error case, we have to continue because |
@@ -2104,7 +2115,6 @@ static int mpage_da_writepages(struct address_space *mapping, | |||
2104 | struct writeback_control *wbc, | 2115 | struct writeback_control *wbc, |
2105 | struct mpage_da_data *mpd) | 2116 | struct mpage_da_data *mpd) |
2106 | { | 2117 | { |
2107 | long to_write; | ||
2108 | int ret; | 2118 | int ret; |
2109 | 2119 | ||
2110 | if (!mpd->get_block) | 2120 | if (!mpd->get_block) |
@@ -2119,19 +2129,18 @@ static int mpage_da_writepages(struct address_space *mapping, | |||
2119 | mpd->pages_written = 0; | 2129 | mpd->pages_written = 0; |
2120 | mpd->retval = 0; | 2130 | mpd->retval = 0; |
2121 | 2131 | ||
2122 | to_write = wbc->nr_to_write; | ||
2123 | |||
2124 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd); | 2132 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd); |
2125 | |||
2126 | /* | 2133 | /* |
2127 | * Handle last extent of pages | 2134 | * Handle last extent of pages |
2128 | */ | 2135 | */ |
2129 | if (!mpd->io_done && mpd->next_page != mpd->first_page) { | 2136 | if (!mpd->io_done && mpd->next_page != mpd->first_page) { |
2130 | if (mpage_da_map_blocks(mpd) == 0) | 2137 | if (mpage_da_map_blocks(mpd) == 0) |
2131 | mpage_da_submit_io(mpd); | 2138 | mpage_da_submit_io(mpd); |
2132 | } | ||
2133 | 2139 | ||
2134 | wbc->nr_to_write = to_write - mpd->pages_written; | 2140 | mpd->io_done = 1; |
2141 | ret = MPAGE_DA_EXTENT_TAIL; | ||
2142 | } | ||
2143 | wbc->nr_to_write -= mpd->pages_written; | ||
2135 | return ret; | 2144 | return ret; |
2136 | } | 2145 | } |
2137 | 2146 | ||
@@ -2360,12 +2369,14 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
2360 | static int ext4_da_writepages(struct address_space *mapping, | 2369 | static int ext4_da_writepages(struct address_space *mapping, |
2361 | struct writeback_control *wbc) | 2370 | struct writeback_control *wbc) |
2362 | { | 2371 | { |
2372 | pgoff_t index; | ||
2373 | int range_whole = 0; | ||
2363 | handle_t *handle = NULL; | 2374 | handle_t *handle = NULL; |
2364 | loff_t range_start = 0; | ||
2365 | struct mpage_da_data mpd; | 2375 | struct mpage_da_data mpd; |
2366 | struct inode *inode = mapping->host; | 2376 | struct inode *inode = mapping->host; |
2377 | int no_nrwrite_index_update; | ||
2378 | long pages_written = 0, pages_skipped; | ||
2367 | int needed_blocks, ret = 0, nr_to_writebump = 0; | 2379 | int needed_blocks, ret = 0, nr_to_writebump = 0; |
2368 | long to_write, pages_skipped = 0; | ||
2369 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2380 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2370 | 2381 | ||
2371 | /* | 2382 | /* |
@@ -2385,23 +2396,26 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2385 | nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; | 2396 | nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; |
2386 | wbc->nr_to_write = sbi->s_mb_stream_request; | 2397 | wbc->nr_to_write = sbi->s_mb_stream_request; |
2387 | } | 2398 | } |
2399 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | ||
2400 | range_whole = 1; | ||
2388 | 2401 | ||
2389 | if (!wbc->range_cyclic) | 2402 | if (wbc->range_cyclic) |
2390 | /* | 2403 | index = mapping->writeback_index; |
2391 | * If range_cyclic is not set force range_cont | 2404 | else |
2392 | * and save the old writeback_index | 2405 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2393 | */ | ||
2394 | wbc->range_cont = 1; | ||
2395 | |||
2396 | range_start = wbc->range_start; | ||
2397 | pages_skipped = wbc->pages_skipped; | ||
2398 | 2406 | ||
2399 | mpd.wbc = wbc; | 2407 | mpd.wbc = wbc; |
2400 | mpd.inode = mapping->host; | 2408 | mpd.inode = mapping->host; |
2401 | 2409 | ||
2402 | restart_loop: | 2410 | /* |
2403 | to_write = wbc->nr_to_write; | 2411 | * we don't want write_cache_pages to update |
2404 | while (!ret && to_write > 0) { | 2412 | * nr_to_write and writeback_index |
2413 | */ | ||
2414 | no_nrwrite_index_update = wbc->no_nrwrite_index_update; | ||
2415 | wbc->no_nrwrite_index_update = 1; | ||
2416 | pages_skipped = wbc->pages_skipped; | ||
2417 | |||
2418 | while (!ret && wbc->nr_to_write > 0) { | ||
2405 | 2419 | ||
2406 | /* | 2420 | /* |
2407 | * we insert one extent at a time. So we need | 2421 | * we insert one extent at a time. So we need |
@@ -2422,48 +2436,53 @@ restart_loop: | |||
2422 | dump_stack(); | 2436 | dump_stack(); |
2423 | goto out_writepages; | 2437 | goto out_writepages; |
2424 | } | 2438 | } |
2425 | to_write -= wbc->nr_to_write; | ||
2426 | |||
2427 | mpd.get_block = ext4_da_get_block_write; | 2439 | mpd.get_block = ext4_da_get_block_write; |
2428 | ret = mpage_da_writepages(mapping, wbc, &mpd); | 2440 | ret = mpage_da_writepages(mapping, wbc, &mpd); |
2429 | 2441 | ||
2430 | ext4_journal_stop(handle); | 2442 | ext4_journal_stop(handle); |
2431 | 2443 | ||
2432 | if (mpd.retval == -ENOSPC) | 2444 | if (mpd.retval == -ENOSPC) { |
2445 | /* commit the transaction which would | ||
2446 | * free blocks released in the transaction | ||
2447 | * and try again | ||
2448 | */ | ||
2433 | jbd2_journal_force_commit_nested(sbi->s_journal); | 2449 | jbd2_journal_force_commit_nested(sbi->s_journal); |
2434 | 2450 | wbc->pages_skipped = pages_skipped; | |
2435 | /* reset the retry count */ | 2451 | ret = 0; |
2436 | if (ret == MPAGE_DA_EXTENT_TAIL) { | 2452 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { |
2437 | /* | 2453 | /* |
2438 | * got one extent now try with | 2454 | * got one extent now try with |
2439 | * rest of the pages | 2455 | * rest of the pages |
2440 | */ | 2456 | */ |
2441 | to_write += wbc->nr_to_write; | 2457 | pages_written += mpd.pages_written; |
2458 | wbc->pages_skipped = pages_skipped; | ||
2442 | ret = 0; | 2459 | ret = 0; |
2443 | } else if (wbc->nr_to_write) { | 2460 | } else if (wbc->nr_to_write) |
2444 | /* | 2461 | /* |
2445 | * There is no more writeout needed | 2462 | * There is no more writeout needed |
2446 | * or we requested for a noblocking writeout | 2463 | * or we requested for a noblocking writeout |
2447 | * and we found the device congested | 2464 | * and we found the device congested |
2448 | */ | 2465 | */ |
2449 | to_write += wbc->nr_to_write; | ||
2450 | break; | 2466 | break; |
2451 | } | ||
2452 | wbc->nr_to_write = to_write; | ||
2453 | } | ||
2454 | |||
2455 | if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) { | ||
2456 | /* We skipped pages in this loop */ | ||
2457 | wbc->range_start = range_start; | ||
2458 | wbc->nr_to_write = to_write + | ||
2459 | wbc->pages_skipped - pages_skipped; | ||
2460 | wbc->pages_skipped = pages_skipped; | ||
2461 | goto restart_loop; | ||
2462 | } | 2467 | } |
2468 | if (pages_skipped != wbc->pages_skipped) | ||
2469 | printk(KERN_EMERG "This should not happen leaving %s " | ||
2470 | "with nr_to_write = %ld ret = %d\n", | ||
2471 | __func__, wbc->nr_to_write, ret); | ||
2472 | |||
2473 | /* Update index */ | ||
2474 | index += pages_written; | ||
2475 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | ||
2476 | /* | ||
2477 | * set the writeback_index so that range_cyclic | ||
2478 | * mode will write it back later | ||
2479 | */ | ||
2480 | mapping->writeback_index = index; | ||
2463 | 2481 | ||
2464 | out_writepages: | 2482 | out_writepages: |
2465 | wbc->nr_to_write = to_write - nr_to_writebump; | 2483 | if (!no_nrwrite_index_update) |
2466 | wbc->range_start = range_start; | 2484 | wbc->no_nrwrite_index_update = 0; |
2485 | wbc->nr_to_write -= nr_to_writebump; | ||
2467 | return ret; | 2486 | return ret; |
2468 | } | 2487 | } |
2469 | 2488 | ||
@@ -4175,7 +4194,6 @@ static int ext4_inode_blocks_set(handle_t *handle, | |||
4175 | struct inode *inode = &(ei->vfs_inode); | 4194 | struct inode *inode = &(ei->vfs_inode); |
4176 | u64 i_blocks = inode->i_blocks; | 4195 | u64 i_blocks = inode->i_blocks; |
4177 | struct super_block *sb = inode->i_sb; | 4196 | struct super_block *sb = inode->i_sb; |
4178 | int err = 0; | ||
4179 | 4197 | ||
4180 | if (i_blocks <= ~0U) { | 4198 | if (i_blocks <= ~0U) { |
4181 | /* | 4199 | /* |
@@ -4185,36 +4203,27 @@ static int ext4_inode_blocks_set(handle_t *handle, | |||
4185 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | 4203 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); |
4186 | raw_inode->i_blocks_high = 0; | 4204 | raw_inode->i_blocks_high = 0; |
4187 | ei->i_flags &= ~EXT4_HUGE_FILE_FL; | 4205 | ei->i_flags &= ~EXT4_HUGE_FILE_FL; |
4188 | } else if (i_blocks <= 0xffffffffffffULL) { | 4206 | return 0; |
4207 | } | ||
4208 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) | ||
4209 | return -EFBIG; | ||
4210 | |||
4211 | if (i_blocks <= 0xffffffffffffULL) { | ||
4189 | /* | 4212 | /* |
4190 | * i_blocks can be represented in a 48 bit variable | 4213 | * i_blocks can be represented in a 48 bit variable |
4191 | * as multiple of 512 bytes | 4214 | * as multiple of 512 bytes |
4192 | */ | 4215 | */ |
4193 | err = ext4_update_rocompat_feature(handle, sb, | ||
4194 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
4195 | if (err) | ||
4196 | goto err_out; | ||
4197 | /* i_block is stored in the split 48 bit fields */ | ||
4198 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | 4216 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); |
4199 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); | 4217 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); |
4200 | ei->i_flags &= ~EXT4_HUGE_FILE_FL; | 4218 | ei->i_flags &= ~EXT4_HUGE_FILE_FL; |
4201 | } else { | 4219 | } else { |
4202 | /* | ||
4203 | * i_blocks should be represented in a 48 bit variable | ||
4204 | * as multiple of file system block size | ||
4205 | */ | ||
4206 | err = ext4_update_rocompat_feature(handle, sb, | ||
4207 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
4208 | if (err) | ||
4209 | goto err_out; | ||
4210 | ei->i_flags |= EXT4_HUGE_FILE_FL; | 4220 | ei->i_flags |= EXT4_HUGE_FILE_FL; |
4211 | /* i_block is stored in file system block size */ | 4221 | /* i_block is stored in file system block size */ |
4212 | i_blocks = i_blocks >> (inode->i_blkbits - 9); | 4222 | i_blocks = i_blocks >> (inode->i_blkbits - 9); |
4213 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | 4223 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); |
4214 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); | 4224 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); |
4215 | } | 4225 | } |
4216 | err_out: | 4226 | return 0; |
4217 | return err; | ||
4218 | } | 4227 | } |
4219 | 4228 | ||
4220 | /* | 4229 | /* |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b580714f0d85..dfe17a134052 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -2300,6 +2300,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2300 | } | 2300 | } |
2301 | 2301 | ||
2302 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | 2302 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); |
2303 | meta_group_info[i]->bb_free_root.rb_node = NULL;; | ||
2303 | 2304 | ||
2304 | #ifdef DOUBLE_CHECK | 2305 | #ifdef DOUBLE_CHECK |
2305 | { | 2306 | { |
@@ -2522,9 +2523,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2522 | } | 2523 | } |
2523 | 2524 | ||
2524 | spin_lock_init(&sbi->s_md_lock); | 2525 | spin_lock_init(&sbi->s_md_lock); |
2525 | INIT_LIST_HEAD(&sbi->s_active_transaction); | ||
2526 | INIT_LIST_HEAD(&sbi->s_closed_transaction); | ||
2527 | INIT_LIST_HEAD(&sbi->s_committed_transaction); | ||
2528 | spin_lock_init(&sbi->s_bal_lock); | 2526 | spin_lock_init(&sbi->s_bal_lock); |
2529 | 2527 | ||
2530 | sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; | 2528 | sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; |
@@ -2553,6 +2551,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2553 | ext4_mb_init_per_dev_proc(sb); | 2551 | ext4_mb_init_per_dev_proc(sb); |
2554 | ext4_mb_history_init(sb); | 2552 | ext4_mb_history_init(sb); |
2555 | 2553 | ||
2554 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; | ||
2555 | |||
2556 | printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); | 2556 | printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); |
2557 | return 0; | 2557 | return 0; |
2558 | } | 2558 | } |
@@ -2568,7 +2568,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) | |||
2568 | pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); | 2568 | pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); |
2569 | list_del(&pa->pa_group_list); | 2569 | list_del(&pa->pa_group_list); |
2570 | count++; | 2570 | count++; |
2571 | kfree(pa); | 2571 | kmem_cache_free(ext4_pspace_cachep, pa); |
2572 | } | 2572 | } |
2573 | if (count) | 2573 | if (count) |
2574 | mb_debug("mballoc: %u PAs left\n", count); | 2574 | mb_debug("mballoc: %u PAs left\n", count); |
@@ -2582,15 +2582,6 @@ int ext4_mb_release(struct super_block *sb) | |||
2582 | struct ext4_group_info *grinfo; | 2582 | struct ext4_group_info *grinfo; |
2583 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2583 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2584 | 2584 | ||
2585 | /* release freed, non-committed blocks */ | ||
2586 | spin_lock(&sbi->s_md_lock); | ||
2587 | list_splice_init(&sbi->s_closed_transaction, | ||
2588 | &sbi->s_committed_transaction); | ||
2589 | list_splice_init(&sbi->s_active_transaction, | ||
2590 | &sbi->s_committed_transaction); | ||
2591 | spin_unlock(&sbi->s_md_lock); | ||
2592 | ext4_mb_free_committed_blocks(sb); | ||
2593 | |||
2594 | if (sbi->s_group_info) { | 2585 | if (sbi->s_group_info) { |
2595 | for (i = 0; i < sbi->s_groups_count; i++) { | 2586 | for (i = 0; i < sbi->s_groups_count; i++) { |
2596 | grinfo = ext4_get_group_info(sb, i); | 2587 | grinfo = ext4_get_group_info(sb, i); |
@@ -2644,61 +2635,57 @@ int ext4_mb_release(struct super_block *sb) | |||
2644 | return 0; | 2635 | return 0; |
2645 | } | 2636 | } |
2646 | 2637 | ||
2647 | static noinline_for_stack void | 2638 | /* |
2648 | ext4_mb_free_committed_blocks(struct super_block *sb) | 2639 | * This function is called by the jbd2 layer once the commit has finished, |
2640 | * so we know we can free the blocks that were released with that commit. | ||
2641 | */ | ||
2642 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | ||
2649 | { | 2643 | { |
2650 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2644 | struct super_block *sb = journal->j_private; |
2651 | int err; | ||
2652 | int i; | ||
2653 | int count = 0; | ||
2654 | int count2 = 0; | ||
2655 | struct ext4_free_metadata *md; | ||
2656 | struct ext4_buddy e4b; | 2645 | struct ext4_buddy e4b; |
2646 | struct ext4_group_info *db; | ||
2647 | int err, count = 0, count2 = 0; | ||
2648 | struct ext4_free_data *entry; | ||
2649 | ext4_fsblk_t discard_block; | ||
2650 | struct list_head *l, *ltmp; | ||
2657 | 2651 | ||
2658 | if (list_empty(&sbi->s_committed_transaction)) | 2652 | list_for_each_safe(l, ltmp, &txn->t_private_list) { |
2659 | return; | 2653 | entry = list_entry(l, struct ext4_free_data, list); |
2660 | |||
2661 | /* there is committed blocks to be freed yet */ | ||
2662 | do { | ||
2663 | /* get next array of blocks */ | ||
2664 | md = NULL; | ||
2665 | spin_lock(&sbi->s_md_lock); | ||
2666 | if (!list_empty(&sbi->s_committed_transaction)) { | ||
2667 | md = list_entry(sbi->s_committed_transaction.next, | ||
2668 | struct ext4_free_metadata, list); | ||
2669 | list_del(&md->list); | ||
2670 | } | ||
2671 | spin_unlock(&sbi->s_md_lock); | ||
2672 | |||
2673 | if (md == NULL) | ||
2674 | break; | ||
2675 | 2654 | ||
2676 | mb_debug("gonna free %u blocks in group %lu (0x%p):", | 2655 | mb_debug("gonna free %u blocks in group %lu (0x%p):", |
2677 | md->num, md->group, md); | 2656 | entry->count, entry->group, entry); |
2678 | 2657 | ||
2679 | err = ext4_mb_load_buddy(sb, md->group, &e4b); | 2658 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
2680 | /* we expect to find existing buddy because it's pinned */ | 2659 | /* we expect to find existing buddy because it's pinned */ |
2681 | BUG_ON(err != 0); | 2660 | BUG_ON(err != 0); |
2682 | 2661 | ||
2662 | db = e4b.bd_info; | ||
2683 | /* there are blocks to put in buddy to make them really free */ | 2663 | /* there are blocks to put in buddy to make them really free */ |
2684 | count += md->num; | 2664 | count += entry->count; |
2685 | count2++; | 2665 | count2++; |
2686 | ext4_lock_group(sb, md->group); | 2666 | ext4_lock_group(sb, entry->group); |
2687 | for (i = 0; i < md->num; i++) { | 2667 | /* Take it out of per group rb tree */ |
2688 | mb_debug(" %u", md->blocks[i]); | 2668 | rb_erase(&entry->node, &(db->bb_free_root)); |
2689 | mb_free_blocks(NULL, &e4b, md->blocks[i], 1); | 2669 | mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); |
2670 | |||
2671 | if (!db->bb_free_root.rb_node) { | ||
2672 | /* No more items in the per group rb tree | ||
2673 | * balance refcounts from ext4_mb_free_metadata() | ||
2674 | */ | ||
2675 | page_cache_release(e4b.bd_buddy_page); | ||
2676 | page_cache_release(e4b.bd_bitmap_page); | ||
2690 | } | 2677 | } |
2691 | mb_debug("\n"); | 2678 | ext4_unlock_group(sb, entry->group); |
2692 | ext4_unlock_group(sb, md->group); | 2679 | discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) |
2693 | 2680 | + entry->start_blk | |
2694 | /* balance refcounts from ext4_mb_free_metadata() */ | 2681 | + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); |
2695 | page_cache_release(e4b.bd_buddy_page); | 2682 | trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", sb->s_id, |
2696 | page_cache_release(e4b.bd_bitmap_page); | 2683 | (unsigned long long) discard_block, entry->count); |
2697 | 2684 | sb_issue_discard(sb, discard_block, entry->count); | |
2698 | kfree(md); | 2685 | |
2686 | kmem_cache_free(ext4_free_ext_cachep, entry); | ||
2699 | ext4_mb_release_desc(&e4b); | 2687 | ext4_mb_release_desc(&e4b); |
2700 | 2688 | } | |
2701 | } while (md); | ||
2702 | 2689 | ||
2703 | mb_debug("freed %u blocks in %u structures\n", count, count2); | 2690 | mb_debug("freed %u blocks in %u structures\n", count, count2); |
2704 | } | 2691 | } |
@@ -2712,6 +2699,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb) | |||
2712 | 2699 | ||
2713 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) | 2700 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) |
2714 | { | 2701 | { |
2702 | #ifdef CONFIG_PROC_FS | ||
2715 | mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; | 2703 | mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; |
2716 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2704 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2717 | struct proc_dir_entry *proc; | 2705 | struct proc_dir_entry *proc; |
@@ -2735,10 +2723,14 @@ err_out: | |||
2735 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); | 2723 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); |
2736 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); | 2724 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); |
2737 | return -ENOMEM; | 2725 | return -ENOMEM; |
2726 | #else | ||
2727 | return 0; | ||
2728 | #endif | ||
2738 | } | 2729 | } |
2739 | 2730 | ||
2740 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) | 2731 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) |
2741 | { | 2732 | { |
2733 | #ifdef CONFIG_PROC_FS | ||
2742 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2734 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2743 | 2735 | ||
2744 | if (sbi->s_proc == NULL) | 2736 | if (sbi->s_proc == NULL) |
@@ -2750,7 +2742,7 @@ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) | |||
2750 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); | 2742 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); |
2751 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); | 2743 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); |
2752 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); | 2744 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); |
2753 | 2745 | #endif | |
2754 | return 0; | 2746 | return 0; |
2755 | } | 2747 | } |
2756 | 2748 | ||
@@ -2771,6 +2763,16 @@ int __init init_ext4_mballoc(void) | |||
2771 | kmem_cache_destroy(ext4_pspace_cachep); | 2763 | kmem_cache_destroy(ext4_pspace_cachep); |
2772 | return -ENOMEM; | 2764 | return -ENOMEM; |
2773 | } | 2765 | } |
2766 | |||
2767 | ext4_free_ext_cachep = | ||
2768 | kmem_cache_create("ext4_free_block_extents", | ||
2769 | sizeof(struct ext4_free_data), | ||
2770 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
2771 | if (ext4_free_ext_cachep == NULL) { | ||
2772 | kmem_cache_destroy(ext4_pspace_cachep); | ||
2773 | kmem_cache_destroy(ext4_ac_cachep); | ||
2774 | return -ENOMEM; | ||
2775 | } | ||
2774 | return 0; | 2776 | return 0; |
2775 | } | 2777 | } |
2776 | 2778 | ||
@@ -2779,6 +2781,7 @@ void exit_ext4_mballoc(void) | |||
2779 | /* XXX: synchronize_rcu(); */ | 2781 | /* XXX: synchronize_rcu(); */ |
2780 | kmem_cache_destroy(ext4_pspace_cachep); | 2782 | kmem_cache_destroy(ext4_pspace_cachep); |
2781 | kmem_cache_destroy(ext4_ac_cachep); | 2783 | kmem_cache_destroy(ext4_ac_cachep); |
2784 | kmem_cache_destroy(ext4_free_ext_cachep); | ||
2782 | } | 2785 | } |
2783 | 2786 | ||
2784 | 2787 | ||
@@ -4324,8 +4327,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4324 | goto out1; | 4327 | goto out1; |
4325 | } | 4328 | } |
4326 | 4329 | ||
4327 | ext4_mb_poll_new_transaction(sb, handle); | ||
4328 | |||
4329 | *errp = ext4_mb_initialize_context(ac, ar); | 4330 | *errp = ext4_mb_initialize_context(ac, ar); |
4330 | if (*errp) { | 4331 | if (*errp) { |
4331 | ar->len = 0; | 4332 | ar->len = 0; |
@@ -4384,35 +4385,20 @@ out1: | |||
4384 | 4385 | ||
4385 | return block; | 4386 | return block; |
4386 | } | 4387 | } |
4387 | static void ext4_mb_poll_new_transaction(struct super_block *sb, | ||
4388 | handle_t *handle) | ||
4389 | { | ||
4390 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
4391 | |||
4392 | if (sbi->s_last_transaction == handle->h_transaction->t_tid) | ||
4393 | return; | ||
4394 | |||
4395 | /* new transaction! time to close last one and free blocks for | ||
4396 | * committed transaction. we know that only transaction can be | ||
4397 | * active, so previos transaction can be being logged and we | ||
4398 | * know that transaction before previous is known to be already | ||
4399 | * logged. this means that now we may free blocks freed in all | ||
4400 | * transactions before previous one. hope I'm clear enough ... */ | ||
4401 | 4388 | ||
4402 | spin_lock(&sbi->s_md_lock); | 4389 | /* |
4403 | if (sbi->s_last_transaction != handle->h_transaction->t_tid) { | 4390 | * We can merge two free data extents only if the physical blocks |
4404 | mb_debug("new transaction %lu, old %lu\n", | 4391 | * are contiguous, AND the extents were freed by the same transaction, |
4405 | (unsigned long) handle->h_transaction->t_tid, | 4392 | * AND the blocks are associated with the same group. |
4406 | (unsigned long) sbi->s_last_transaction); | 4393 | */ |
4407 | list_splice_init(&sbi->s_closed_transaction, | 4394 | static int can_merge(struct ext4_free_data *entry1, |
4408 | &sbi->s_committed_transaction); | 4395 | struct ext4_free_data *entry2) |
4409 | list_splice_init(&sbi->s_active_transaction, | 4396 | { |
4410 | &sbi->s_closed_transaction); | 4397 | if ((entry1->t_tid == entry2->t_tid) && |
4411 | sbi->s_last_transaction = handle->h_transaction->t_tid; | 4398 | (entry1->group == entry2->group) && |
4412 | } | 4399 | ((entry1->start_blk + entry1->count) == entry2->start_blk)) |
4413 | spin_unlock(&sbi->s_md_lock); | 4400 | return 1; |
4414 | 4401 | return 0; | |
4415 | ext4_mb_free_committed_blocks(sb); | ||
4416 | } | 4402 | } |
4417 | 4403 | ||
4418 | static noinline_for_stack int | 4404 | static noinline_for_stack int |
@@ -4422,57 +4408,80 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4422 | struct ext4_group_info *db = e4b->bd_info; | 4408 | struct ext4_group_info *db = e4b->bd_info; |
4423 | struct super_block *sb = e4b->bd_sb; | 4409 | struct super_block *sb = e4b->bd_sb; |
4424 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4410 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
4425 | struct ext4_free_metadata *md; | 4411 | struct ext4_free_data *entry, *new_entry; |
4426 | int i; | 4412 | struct rb_node **n = &db->bb_free_root.rb_node, *node; |
4413 | struct rb_node *parent = NULL, *new_node; | ||
4414 | |||
4427 | 4415 | ||
4428 | BUG_ON(e4b->bd_bitmap_page == NULL); | 4416 | BUG_ON(e4b->bd_bitmap_page == NULL); |
4429 | BUG_ON(e4b->bd_buddy_page == NULL); | 4417 | BUG_ON(e4b->bd_buddy_page == NULL); |
4430 | 4418 | ||
4419 | new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); | ||
4420 | new_entry->start_blk = block; | ||
4421 | new_entry->group = group; | ||
4422 | new_entry->count = count; | ||
4423 | new_entry->t_tid = handle->h_transaction->t_tid; | ||
4424 | new_node = &new_entry->node; | ||
4425 | |||
4431 | ext4_lock_group(sb, group); | 4426 | ext4_lock_group(sb, group); |
4432 | for (i = 0; i < count; i++) { | 4427 | if (!*n) { |
4433 | md = db->bb_md_cur; | 4428 | /* first free block exent. We need to |
4434 | if (md && db->bb_tid != handle->h_transaction->t_tid) { | 4429 | protect buddy cache from being freed, |
4435 | db->bb_md_cur = NULL; | 4430 | * otherwise we'll refresh it from |
4436 | md = NULL; | 4431 | * on-disk bitmap and lose not-yet-available |
4432 | * blocks */ | ||
4433 | page_cache_get(e4b->bd_buddy_page); | ||
4434 | page_cache_get(e4b->bd_bitmap_page); | ||
4435 | } | ||
4436 | while (*n) { | ||
4437 | parent = *n; | ||
4438 | entry = rb_entry(parent, struct ext4_free_data, node); | ||
4439 | if (block < entry->start_blk) | ||
4440 | n = &(*n)->rb_left; | ||
4441 | else if (block >= (entry->start_blk + entry->count)) | ||
4442 | n = &(*n)->rb_right; | ||
4443 | else { | ||
4444 | ext4_error(sb, __func__, | ||
4445 | "Double free of blocks %d (%d %d)\n", | ||
4446 | block, entry->start_blk, entry->count); | ||
4447 | return 0; | ||
4437 | } | 4448 | } |
4449 | } | ||
4438 | 4450 | ||
4439 | if (md == NULL) { | 4451 | rb_link_node(new_node, parent, n); |
4440 | ext4_unlock_group(sb, group); | 4452 | rb_insert_color(new_node, &db->bb_free_root); |
4441 | md = kmalloc(sizeof(*md), GFP_NOFS); | 4453 | |
4442 | if (md == NULL) | 4454 | /* Now try to see the extent can be merged to left and right */ |
4443 | return -ENOMEM; | 4455 | node = rb_prev(new_node); |
4444 | md->num = 0; | 4456 | if (node) { |
4445 | md->group = group; | 4457 | entry = rb_entry(node, struct ext4_free_data, node); |
4446 | 4458 | if (can_merge(entry, new_entry)) { | |
4447 | ext4_lock_group(sb, group); | 4459 | new_entry->start_blk = entry->start_blk; |
4448 | if (db->bb_md_cur == NULL) { | 4460 | new_entry->count += entry->count; |
4449 | spin_lock(&sbi->s_md_lock); | 4461 | rb_erase(node, &(db->bb_free_root)); |
4450 | list_add(&md->list, &sbi->s_active_transaction); | 4462 | spin_lock(&sbi->s_md_lock); |
4451 | spin_unlock(&sbi->s_md_lock); | 4463 | list_del(&entry->list); |
4452 | /* protect buddy cache from being freed, | 4464 | spin_unlock(&sbi->s_md_lock); |
4453 | * otherwise we'll refresh it from | 4465 | kmem_cache_free(ext4_free_ext_cachep, entry); |
4454 | * on-disk bitmap and lose not-yet-available | ||
4455 | * blocks */ | ||
4456 | page_cache_get(e4b->bd_buddy_page); | ||
4457 | page_cache_get(e4b->bd_bitmap_page); | ||
4458 | db->bb_md_cur = md; | ||
4459 | db->bb_tid = handle->h_transaction->t_tid; | ||
4460 | mb_debug("new md 0x%p for group %lu\n", | ||
4461 | md, md->group); | ||
4462 | } else { | ||
4463 | kfree(md); | ||
4464 | md = db->bb_md_cur; | ||
4465 | } | ||
4466 | } | 4466 | } |
4467 | } | ||
4467 | 4468 | ||
4468 | BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS); | 4469 | node = rb_next(new_node); |
4469 | md->blocks[md->num] = block + i; | 4470 | if (node) { |
4470 | md->num++; | 4471 | entry = rb_entry(node, struct ext4_free_data, node); |
4471 | if (md->num == EXT4_BB_MAX_BLOCKS) { | 4472 | if (can_merge(new_entry, entry)) { |
4472 | /* no more space, put full container on a sb's list */ | 4473 | new_entry->count += entry->count; |
4473 | db->bb_md_cur = NULL; | 4474 | rb_erase(node, &(db->bb_free_root)); |
4475 | spin_lock(&sbi->s_md_lock); | ||
4476 | list_del(&entry->list); | ||
4477 | spin_unlock(&sbi->s_md_lock); | ||
4478 | kmem_cache_free(ext4_free_ext_cachep, entry); | ||
4474 | } | 4479 | } |
4475 | } | 4480 | } |
4481 | /* Add the extent to transaction's private list */ | ||
4482 | spin_lock(&sbi->s_md_lock); | ||
4483 | list_add(&new_entry->list, &handle->h_transaction->t_private_list); | ||
4484 | spin_unlock(&sbi->s_md_lock); | ||
4476 | ext4_unlock_group(sb, group); | 4485 | ext4_unlock_group(sb, group); |
4477 | return 0; | 4486 | return 0; |
4478 | } | 4487 | } |
@@ -4500,8 +4509,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, | |||
4500 | 4509 | ||
4501 | *freed = 0; | 4510 | *freed = 0; |
4502 | 4511 | ||
4503 | ext4_mb_poll_new_transaction(sb, handle); | ||
4504 | |||
4505 | sbi = EXT4_SB(sb); | 4512 | sbi = EXT4_SB(sb); |
4506 | es = EXT4_SB(sb)->s_es; | 4513 | es = EXT4_SB(sb)->s_es; |
4507 | if (block < le32_to_cpu(es->s_first_data_block) || | 4514 | if (block < le32_to_cpu(es->s_first_data_block) || |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index b3b4828f8b89..b5dff1fff1e5 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -18,6 +18,8 @@ | |||
18 | #include <linux/pagemap.h> | 18 | #include <linux/pagemap.h> |
19 | #include <linux/seq_file.h> | 19 | #include <linux/seq_file.h> |
20 | #include <linux/version.h> | 20 | #include <linux/version.h> |
21 | #include <linux/blkdev.h> | ||
22 | #include <linux/marker.h> | ||
21 | #include "ext4_jbd2.h" | 23 | #include "ext4_jbd2.h" |
22 | #include "ext4.h" | 24 | #include "ext4.h" |
23 | #include "group.h" | 25 | #include "group.h" |
@@ -98,23 +100,29 @@ | |||
98 | 100 | ||
99 | static struct kmem_cache *ext4_pspace_cachep; | 101 | static struct kmem_cache *ext4_pspace_cachep; |
100 | static struct kmem_cache *ext4_ac_cachep; | 102 | static struct kmem_cache *ext4_ac_cachep; |
103 | static struct kmem_cache *ext4_free_ext_cachep; | ||
101 | 104 | ||
102 | #ifdef EXT4_BB_MAX_BLOCKS | 105 | struct ext4_free_data { |
103 | #undef EXT4_BB_MAX_BLOCKS | 106 | /* this links the free block information from group_info */ |
104 | #endif | 107 | struct rb_node node; |
105 | #define EXT4_BB_MAX_BLOCKS 30 | ||
106 | 108 | ||
107 | struct ext4_free_metadata { | 109 | /* this links the free block information from ext4_sb_info */ |
108 | ext4_group_t group; | ||
109 | unsigned short num; | ||
110 | ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS]; | ||
111 | struct list_head list; | 110 | struct list_head list; |
111 | |||
112 | /* group which free block extent belongs */ | ||
113 | ext4_group_t group; | ||
114 | |||
115 | /* free block extent */ | ||
116 | ext4_grpblk_t start_blk; | ||
117 | ext4_grpblk_t count; | ||
118 | |||
119 | /* transaction which freed this extent */ | ||
120 | tid_t t_tid; | ||
112 | }; | 121 | }; |
113 | 122 | ||
114 | struct ext4_group_info { | 123 | struct ext4_group_info { |
115 | unsigned long bb_state; | 124 | unsigned long bb_state; |
116 | unsigned long bb_tid; | 125 | struct rb_root bb_free_root; |
117 | struct ext4_free_metadata *bb_md_cur; | ||
118 | unsigned short bb_first_free; | 126 | unsigned short bb_first_free; |
119 | unsigned short bb_free; | 127 | unsigned short bb_free; |
120 | unsigned short bb_fragments; | 128 | unsigned short bb_fragments; |
@@ -261,8 +269,6 @@ struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); | |||
261 | 269 | ||
262 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | 270 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, |
263 | ext4_group_t group); | 271 | ext4_group_t group); |
264 | static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *); | ||
265 | static void ext4_mb_free_committed_blocks(struct super_block *); | ||
266 | static void ext4_mb_return_to_preallocation(struct inode *inode, | 272 | static void ext4_mb_return_to_preallocation(struct inode *inode, |
267 | struct ext4_buddy *e4b, sector_t block, | 273 | struct ext4_buddy *e4b, sector_t block, |
268 | int count); | 274 | int count); |
@@ -270,6 +276,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *, | |||
270 | struct super_block *, struct ext4_prealloc_space *pa); | 276 | struct super_block *, struct ext4_prealloc_space *pa); |
271 | static int ext4_mb_init_per_dev_proc(struct super_block *sb); | 277 | static int ext4_mb_init_per_dev_proc(struct super_block *sb); |
272 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); | 278 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); |
279 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); | ||
273 | 280 | ||
274 | 281 | ||
275 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) | 282 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 92db9e945147..63adcb792988 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -1061,7 +1061,6 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru | |||
1061 | struct dentry *ext4_get_parent(struct dentry *child) | 1061 | struct dentry *ext4_get_parent(struct dentry *child) |
1062 | { | 1062 | { |
1063 | unsigned long ino; | 1063 | unsigned long ino; |
1064 | struct dentry *parent; | ||
1065 | struct inode *inode; | 1064 | struct inode *inode; |
1066 | static const struct qstr dotdot = { | 1065 | static const struct qstr dotdot = { |
1067 | .name = "..", | 1066 | .name = "..", |
@@ -1083,16 +1082,7 @@ struct dentry *ext4_get_parent(struct dentry *child) | |||
1083 | return ERR_PTR(-EIO); | 1082 | return ERR_PTR(-EIO); |
1084 | } | 1083 | } |
1085 | 1084 | ||
1086 | inode = ext4_iget(child->d_inode->i_sb, ino); | 1085 | return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino)); |
1087 | if (IS_ERR(inode)) | ||
1088 | return ERR_CAST(inode); | ||
1089 | |||
1090 | parent = d_alloc_anon(inode); | ||
1091 | if (!parent) { | ||
1092 | iput(inode); | ||
1093 | parent = ERR_PTR(-ENOMEM); | ||
1094 | } | ||
1095 | return parent; | ||
1096 | } | 1086 | } |
1097 | 1087 | ||
1098 | #define S_SHIFT 12 | 1088 | #define S_SHIFT 12 |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index dea8f13c2fd9..bdddea14e782 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -374,66 +374,6 @@ void ext4_update_dynamic_rev(struct super_block *sb) | |||
374 | */ | 374 | */ |
375 | } | 375 | } |
376 | 376 | ||
377 | int ext4_update_compat_feature(handle_t *handle, | ||
378 | struct super_block *sb, __u32 compat) | ||
379 | { | ||
380 | int err = 0; | ||
381 | if (!EXT4_HAS_COMPAT_FEATURE(sb, compat)) { | ||
382 | err = ext4_journal_get_write_access(handle, | ||
383 | EXT4_SB(sb)->s_sbh); | ||
384 | if (err) | ||
385 | return err; | ||
386 | EXT4_SET_COMPAT_FEATURE(sb, compat); | ||
387 | sb->s_dirt = 1; | ||
388 | handle->h_sync = 1; | ||
389 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, | ||
390 | "call ext4_journal_dirty_met adata"); | ||
391 | err = ext4_journal_dirty_metadata(handle, | ||
392 | EXT4_SB(sb)->s_sbh); | ||
393 | } | ||
394 | return err; | ||
395 | } | ||
396 | |||
397 | int ext4_update_rocompat_feature(handle_t *handle, | ||
398 | struct super_block *sb, __u32 rocompat) | ||
399 | { | ||
400 | int err = 0; | ||
401 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, rocompat)) { | ||
402 | err = ext4_journal_get_write_access(handle, | ||
403 | EXT4_SB(sb)->s_sbh); | ||
404 | if (err) | ||
405 | return err; | ||
406 | EXT4_SET_RO_COMPAT_FEATURE(sb, rocompat); | ||
407 | sb->s_dirt = 1; | ||
408 | handle->h_sync = 1; | ||
409 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, | ||
410 | "call ext4_journal_dirty_met adata"); | ||
411 | err = ext4_journal_dirty_metadata(handle, | ||
412 | EXT4_SB(sb)->s_sbh); | ||
413 | } | ||
414 | return err; | ||
415 | } | ||
416 | |||
417 | int ext4_update_incompat_feature(handle_t *handle, | ||
418 | struct super_block *sb, __u32 incompat) | ||
419 | { | ||
420 | int err = 0; | ||
421 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, incompat)) { | ||
422 | err = ext4_journal_get_write_access(handle, | ||
423 | EXT4_SB(sb)->s_sbh); | ||
424 | if (err) | ||
425 | return err; | ||
426 | EXT4_SET_INCOMPAT_FEATURE(sb, incompat); | ||
427 | sb->s_dirt = 1; | ||
428 | handle->h_sync = 1; | ||
429 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, | ||
430 | "call ext4_journal_dirty_met adata"); | ||
431 | err = ext4_journal_dirty_metadata(handle, | ||
432 | EXT4_SB(sb)->s_sbh); | ||
433 | } | ||
434 | return err; | ||
435 | } | ||
436 | |||
437 | /* | 377 | /* |
438 | * Open the external journal device | 378 | * Open the external journal device |
439 | */ | 379 | */ |
@@ -459,7 +399,7 @@ fail: | |||
459 | static int ext4_blkdev_put(struct block_device *bdev) | 399 | static int ext4_blkdev_put(struct block_device *bdev) |
460 | { | 400 | { |
461 | bd_release(bdev); | 401 | bd_release(bdev); |
462 | return blkdev_put(bdev); | 402 | return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); |
463 | } | 403 | } |
464 | 404 | ||
465 | static int ext4_blkdev_remove(struct ext4_sb_info *sbi) | 405 | static int ext4_blkdev_remove(struct ext4_sb_info *sbi) |
@@ -904,7 +844,7 @@ static const struct export_operations ext4_export_ops = { | |||
904 | enum { | 844 | enum { |
905 | Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, | 845 | Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, |
906 | Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, | 846 | Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, |
907 | Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, | 847 | Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, |
908 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, | 848 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, |
909 | Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, | 849 | Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, |
910 | Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, | 850 | Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, |
@@ -915,7 +855,7 @@ enum { | |||
915 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, | 855 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, |
916 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, | 856 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, |
917 | Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, | 857 | Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, |
918 | Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc, | 858 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, |
919 | Opt_inode_readahead_blks | 859 | Opt_inode_readahead_blks |
920 | }; | 860 | }; |
921 | 861 | ||
@@ -933,8 +873,6 @@ static const match_table_t tokens = { | |||
933 | {Opt_err_panic, "errors=panic"}, | 873 | {Opt_err_panic, "errors=panic"}, |
934 | {Opt_err_ro, "errors=remount-ro"}, | 874 | {Opt_err_ro, "errors=remount-ro"}, |
935 | {Opt_nouid32, "nouid32"}, | 875 | {Opt_nouid32, "nouid32"}, |
936 | {Opt_nocheck, "nocheck"}, | ||
937 | {Opt_nocheck, "check=none"}, | ||
938 | {Opt_debug, "debug"}, | 876 | {Opt_debug, "debug"}, |
939 | {Opt_oldalloc, "oldalloc"}, | 877 | {Opt_oldalloc, "oldalloc"}, |
940 | {Opt_orlov, "orlov"}, | 878 | {Opt_orlov, "orlov"}, |
@@ -973,8 +911,6 @@ static const match_table_t tokens = { | |||
973 | {Opt_extents, "extents"}, | 911 | {Opt_extents, "extents"}, |
974 | {Opt_noextents, "noextents"}, | 912 | {Opt_noextents, "noextents"}, |
975 | {Opt_i_version, "i_version"}, | 913 | {Opt_i_version, "i_version"}, |
976 | {Opt_mballoc, "mballoc"}, | ||
977 | {Opt_nomballoc, "nomballoc"}, | ||
978 | {Opt_stripe, "stripe=%u"}, | 914 | {Opt_stripe, "stripe=%u"}, |
979 | {Opt_resize, "resize"}, | 915 | {Opt_resize, "resize"}, |
980 | {Opt_delalloc, "delalloc"}, | 916 | {Opt_delalloc, "delalloc"}, |
@@ -1073,9 +1009,6 @@ static int parse_options(char *options, struct super_block *sb, | |||
1073 | case Opt_nouid32: | 1009 | case Opt_nouid32: |
1074 | set_opt(sbi->s_mount_opt, NO_UID32); | 1010 | set_opt(sbi->s_mount_opt, NO_UID32); |
1075 | break; | 1011 | break; |
1076 | case Opt_nocheck: | ||
1077 | clear_opt(sbi->s_mount_opt, CHECK); | ||
1078 | break; | ||
1079 | case Opt_debug: | 1012 | case Opt_debug: |
1080 | set_opt(sbi->s_mount_opt, DEBUG); | 1013 | set_opt(sbi->s_mount_opt, DEBUG); |
1081 | break; | 1014 | break; |
@@ -1618,14 +1551,14 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1618 | if (block_bitmap < first_block || block_bitmap > last_block) { | 1551 | if (block_bitmap < first_block || block_bitmap > last_block) { |
1619 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " | 1552 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " |
1620 | "Block bitmap for group %lu not in group " | 1553 | "Block bitmap for group %lu not in group " |
1621 | "(block %llu)!", i, block_bitmap); | 1554 | "(block %llu)!\n", i, block_bitmap); |
1622 | return 0; | 1555 | return 0; |
1623 | } | 1556 | } |
1624 | inode_bitmap = ext4_inode_bitmap(sb, gdp); | 1557 | inode_bitmap = ext4_inode_bitmap(sb, gdp); |
1625 | if (inode_bitmap < first_block || inode_bitmap > last_block) { | 1558 | if (inode_bitmap < first_block || inode_bitmap > last_block) { |
1626 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " | 1559 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " |
1627 | "Inode bitmap for group %lu not in group " | 1560 | "Inode bitmap for group %lu not in group " |
1628 | "(block %llu)!", i, inode_bitmap); | 1561 | "(block %llu)!\n", i, inode_bitmap); |
1629 | return 0; | 1562 | return 0; |
1630 | } | 1563 | } |
1631 | inode_table = ext4_inode_table(sb, gdp); | 1564 | inode_table = ext4_inode_table(sb, gdp); |
@@ -1633,7 +1566,7 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1633 | inode_table + sbi->s_itb_per_group - 1 > last_block) { | 1566 | inode_table + sbi->s_itb_per_group - 1 > last_block) { |
1634 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " | 1567 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " |
1635 | "Inode table for group %lu not in group " | 1568 | "Inode table for group %lu not in group " |
1636 | "(block %llu)!", i, inode_table); | 1569 | "(block %llu)!\n", i, inode_table); |
1637 | return 0; | 1570 | return 0; |
1638 | } | 1571 | } |
1639 | spin_lock(sb_bgl_lock(sbi, i)); | 1572 | spin_lock(sb_bgl_lock(sbi, i)); |
@@ -1778,13 +1711,13 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
1778 | * | 1711 | * |
1779 | * Note, this does *not* consider any metadata overhead for vfs i_blocks. | 1712 | * Note, this does *not* consider any metadata overhead for vfs i_blocks. |
1780 | */ | 1713 | */ |
1781 | static loff_t ext4_max_size(int blkbits) | 1714 | static loff_t ext4_max_size(int blkbits, int has_huge_files) |
1782 | { | 1715 | { |
1783 | loff_t res; | 1716 | loff_t res; |
1784 | loff_t upper_limit = MAX_LFS_FILESIZE; | 1717 | loff_t upper_limit = MAX_LFS_FILESIZE; |
1785 | 1718 | ||
1786 | /* small i_blocks in vfs inode? */ | 1719 | /* small i_blocks in vfs inode? */ |
1787 | if (sizeof(blkcnt_t) < sizeof(u64)) { | 1720 | if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { |
1788 | /* | 1721 | /* |
1789 | * CONFIG_LSF is not enabled implies the inode | 1722 | * CONFIG_LSF is not enabled implies the inode |
1790 | * i_block represent total blocks in 512 bytes | 1723 | * i_block represent total blocks in 512 bytes |
@@ -1814,7 +1747,7 @@ static loff_t ext4_max_size(int blkbits) | |||
1814 | * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. | 1747 | * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. |
1815 | * We need to be 1 filesystem block less than the 2^48 sector limit. | 1748 | * We need to be 1 filesystem block less than the 2^48 sector limit. |
1816 | */ | 1749 | */ |
1817 | static loff_t ext4_max_bitmap_size(int bits) | 1750 | static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) |
1818 | { | 1751 | { |
1819 | loff_t res = EXT4_NDIR_BLOCKS; | 1752 | loff_t res = EXT4_NDIR_BLOCKS; |
1820 | int meta_blocks; | 1753 | int meta_blocks; |
@@ -1827,11 +1760,11 @@ static loff_t ext4_max_bitmap_size(int bits) | |||
1827 | * total number of 512 bytes blocks of the file | 1760 | * total number of 512 bytes blocks of the file |
1828 | */ | 1761 | */ |
1829 | 1762 | ||
1830 | if (sizeof(blkcnt_t) < sizeof(u64)) { | 1763 | if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { |
1831 | /* | 1764 | /* |
1832 | * CONFIG_LSF is not enabled implies the inode | 1765 | * !has_huge_files or CONFIG_LSF is not enabled |
1833 | * i_block represent total blocks in 512 bytes | 1766 | * implies the inode i_block represent total blocks in |
1834 | * 32 == size of vfs inode i_blocks * 8 | 1767 | * 512 bytes 32 == size of vfs inode i_blocks * 8 |
1835 | */ | 1768 | */ |
1836 | upper_limit = (1LL << 32) - 1; | 1769 | upper_limit = (1LL << 32) - 1; |
1837 | 1770 | ||
@@ -1940,7 +1873,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
1940 | int blocksize; | 1873 | int blocksize; |
1941 | int db_count; | 1874 | int db_count; |
1942 | int i; | 1875 | int i; |
1943 | int needs_recovery; | 1876 | int needs_recovery, has_huge_files; |
1944 | __le32 features; | 1877 | __le32 features; |
1945 | __u64 blocks_count; | 1878 | __u64 blocks_count; |
1946 | int err; | 1879 | int err; |
@@ -2081,7 +2014,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2081 | sb->s_id, le32_to_cpu(features)); | 2014 | sb->s_id, le32_to_cpu(features)); |
2082 | goto failed_mount; | 2015 | goto failed_mount; |
2083 | } | 2016 | } |
2084 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { | 2017 | has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, |
2018 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
2019 | if (has_huge_files) { | ||
2085 | /* | 2020 | /* |
2086 | * Large file size enabled file system can only be | 2021 | * Large file size enabled file system can only be |
2087 | * mount if kernel is build with CONFIG_LSF | 2022 | * mount if kernel is build with CONFIG_LSF |
@@ -2131,8 +2066,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2131 | } | 2066 | } |
2132 | } | 2067 | } |
2133 | 2068 | ||
2134 | sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits); | 2069 | sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, |
2135 | sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits); | 2070 | has_huge_files); |
2071 | sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); | ||
2136 | 2072 | ||
2137 | if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { | 2073 | if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { |
2138 | sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; | 2074 | sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; |
@@ -2456,6 +2392,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2456 | "available.\n"); | 2392 | "available.\n"); |
2457 | } | 2393 | } |
2458 | 2394 | ||
2395 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | ||
2396 | printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " | ||
2397 | "requested data journaling mode\n"); | ||
2398 | clear_opt(sbi->s_mount_opt, DELALLOC); | ||
2399 | } else if (test_opt(sb, DELALLOC)) | ||
2400 | printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); | ||
2401 | |||
2402 | ext4_ext_init(sb); | ||
2403 | err = ext4_mb_init(sb, needs_recovery); | ||
2404 | if (err) { | ||
2405 | printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", | ||
2406 | err); | ||
2407 | goto failed_mount4; | ||
2408 | } | ||
2409 | |||
2459 | /* | 2410 | /* |
2460 | * akpm: core read_super() calls in here with the superblock locked. | 2411 | * akpm: core read_super() calls in here with the superblock locked. |
2461 | * That deadlocks, because orphan cleanup needs to lock the superblock | 2412 | * That deadlocks, because orphan cleanup needs to lock the superblock |
@@ -2475,21 +2426,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2475 | test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": | 2426 | test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": |
2476 | "writeback"); | 2427 | "writeback"); |
2477 | 2428 | ||
2478 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | ||
2479 | printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " | ||
2480 | "requested data journaling mode\n"); | ||
2481 | clear_opt(sbi->s_mount_opt, DELALLOC); | ||
2482 | } else if (test_opt(sb, DELALLOC)) | ||
2483 | printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); | ||
2484 | |||
2485 | ext4_ext_init(sb); | ||
2486 | err = ext4_mb_init(sb, needs_recovery); | ||
2487 | if (err) { | ||
2488 | printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", | ||
2489 | err); | ||
2490 | goto failed_mount4; | ||
2491 | } | ||
2492 | |||
2493 | lock_kernel(); | 2429 | lock_kernel(); |
2494 | return 0; | 2430 | return 0; |
2495 | 2431 | ||
@@ -2617,7 +2553,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, | |||
2617 | if (bd_claim(bdev, sb)) { | 2553 | if (bd_claim(bdev, sb)) { |
2618 | printk(KERN_ERR | 2554 | printk(KERN_ERR |
2619 | "EXT4: failed to claim external journal device.\n"); | 2555 | "EXT4: failed to claim external journal device.\n"); |
2620 | blkdev_put(bdev); | 2556 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE); |
2621 | return NULL; | 2557 | return NULL; |
2622 | } | 2558 | } |
2623 | 2559 | ||
@@ -3392,30 +3328,30 @@ static int ext4_quota_on_mount(struct super_block *sb, int type) | |||
3392 | * Standard function to be called on quota_on | 3328 | * Standard function to be called on quota_on |
3393 | */ | 3329 | */ |
3394 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, | 3330 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, |
3395 | char *path, int remount) | 3331 | char *name, int remount) |
3396 | { | 3332 | { |
3397 | int err; | 3333 | int err; |
3398 | struct nameidata nd; | 3334 | struct path path; |
3399 | 3335 | ||
3400 | if (!test_opt(sb, QUOTA)) | 3336 | if (!test_opt(sb, QUOTA)) |
3401 | return -EINVAL; | 3337 | return -EINVAL; |
3402 | /* When remounting, no checks are needed and in fact, path is NULL */ | 3338 | /* When remounting, no checks are needed and in fact, name is NULL */ |
3403 | if (remount) | 3339 | if (remount) |
3404 | return vfs_quota_on(sb, type, format_id, path, remount); | 3340 | return vfs_quota_on(sb, type, format_id, name, remount); |
3405 | 3341 | ||
3406 | err = path_lookup(path, LOOKUP_FOLLOW, &nd); | 3342 | err = kern_path(name, LOOKUP_FOLLOW, &path); |
3407 | if (err) | 3343 | if (err) |
3408 | return err; | 3344 | return err; |
3409 | 3345 | ||
3410 | /* Quotafile not on the same filesystem? */ | 3346 | /* Quotafile not on the same filesystem? */ |
3411 | if (nd.path.mnt->mnt_sb != sb) { | 3347 | if (path.mnt->mnt_sb != sb) { |
3412 | path_put(&nd.path); | 3348 | path_put(&path); |
3413 | return -EXDEV; | 3349 | return -EXDEV; |
3414 | } | 3350 | } |
3415 | /* Journaling quota? */ | 3351 | /* Journaling quota? */ |
3416 | if (EXT4_SB(sb)->s_qf_names[type]) { | 3352 | if (EXT4_SB(sb)->s_qf_names[type]) { |
3417 | /* Quotafile not in fs root? */ | 3353 | /* Quotafile not in fs root? */ |
3418 | if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) | 3354 | if (path.dentry->d_parent != sb->s_root) |
3419 | printk(KERN_WARNING | 3355 | printk(KERN_WARNING |
3420 | "EXT4-fs: Quota file not on filesystem root. " | 3356 | "EXT4-fs: Quota file not on filesystem root. " |
3421 | "Journaled quota will not work.\n"); | 3357 | "Journaled quota will not work.\n"); |
@@ -3425,7 +3361,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, | |||
3425 | * When we journal data on quota file, we have to flush journal to see | 3361 | * When we journal data on quota file, we have to flush journal to see |
3426 | * all updates to the file when we bypass pagecache... | 3362 | * all updates to the file when we bypass pagecache... |
3427 | */ | 3363 | */ |
3428 | if (ext4_should_journal_data(nd.path.dentry->d_inode)) { | 3364 | if (ext4_should_journal_data(path.dentry->d_inode)) { |
3429 | /* | 3365 | /* |
3430 | * We don't need to lock updates but journal_flush() could | 3366 | * We don't need to lock updates but journal_flush() could |
3431 | * otherwise be livelocked... | 3367 | * otherwise be livelocked... |
@@ -3434,13 +3370,13 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, | |||
3434 | err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); | 3370 | err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); |
3435 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 3371 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); |
3436 | if (err) { | 3372 | if (err) { |
3437 | path_put(&nd.path); | 3373 | path_put(&path); |
3438 | return err; | 3374 | return err; |
3439 | } | 3375 | } |
3440 | } | 3376 | } |
3441 | 3377 | ||
3442 | err = vfs_quota_on_path(sb, type, format_id, &nd.path); | 3378 | err = vfs_quota_on_path(sb, type, format_id, &path); |
3443 | path_put(&nd.path); | 3379 | path_put(&path); |
3444 | return err; | 3380 | return err; |
3445 | } | 3381 | } |
3446 | 3382 | ||
diff --git a/fs/fat/dir.c b/fs/fat/dir.c index cd4a0162e10d..bae1c3292522 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c | |||
@@ -839,6 +839,7 @@ const struct file_operations fat_dir_operations = { | |||
839 | .compat_ioctl = fat_compat_dir_ioctl, | 839 | .compat_ioctl = fat_compat_dir_ioctl, |
840 | #endif | 840 | #endif |
841 | .fsync = file_fsync, | 841 | .fsync = file_fsync, |
842 | .llseek = generic_file_llseek, | ||
842 | }; | 843 | }; |
843 | 844 | ||
844 | static int fat_get_short_entry(struct inode *dir, loff_t *pos, | 845 | static int fat_get_short_entry(struct inode *dir, loff_t *pos, |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index d12cdf2a0406..19eafbe3c379 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -681,33 +681,24 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb, | |||
681 | inode = NULL; | 681 | inode = NULL; |
682 | } | 682 | } |
683 | } | 683 | } |
684 | if (!inode) { | ||
685 | /* For now, do nothing | ||
686 | * What we could do is: | ||
687 | * follow the file starting at fh[4], and record | ||
688 | * the ".." entry, and the name of the fh[2] entry. | ||
689 | * The follow the ".." file finding the next step up. | ||
690 | * This way we build a path to the root of | ||
691 | * the tree. If this works, we lookup the path and so | ||
692 | * get this inode into the cache. | ||
693 | * Finally try the fat_iget lookup again | ||
694 | * If that fails, then weare totally out of luck | ||
695 | * But all that is for another day | ||
696 | */ | ||
697 | } | ||
698 | if (!inode) | ||
699 | return ERR_PTR(-ESTALE); | ||
700 | |||
701 | 684 | ||
702 | /* now to find a dentry. | 685 | /* |
703 | * If possible, get a well-connected one | 686 | * For now, do nothing if the inode is not found. |
687 | * | ||
688 | * What we could do is: | ||
689 | * | ||
690 | * - follow the file starting at fh[4], and record the ".." entry, | ||
691 | * and the name of the fh[2] entry. | ||
692 | * - then follow the ".." file finding the next step up. | ||
693 | * | ||
694 | * This way we build a path to the root of the tree. If this works, we | ||
695 | * lookup the path and so get this inode into the cache. Finally try | ||
696 | * the fat_iget lookup again. If that fails, then we are totally out | ||
697 | * of luck. But all that is for another day | ||
704 | */ | 698 | */ |
705 | result = d_alloc_anon(inode); | 699 | result = d_obtain_alias(inode); |
706 | if (result == NULL) { | 700 | if (!IS_ERR(result)) |
707 | iput(inode); | 701 | result->d_op = sb->s_root->d_op; |
708 | return ERR_PTR(-ENOMEM); | ||
709 | } | ||
710 | result->d_op = sb->s_root->d_op; | ||
711 | return result; | 702 | return result; |
712 | } | 703 | } |
713 | 704 | ||
@@ -754,15 +745,8 @@ static struct dentry *fat_get_parent(struct dentry *child) | |||
754 | } | 745 | } |
755 | inode = fat_build_inode(sb, de, i_pos); | 746 | inode = fat_build_inode(sb, de, i_pos); |
756 | brelse(bh); | 747 | brelse(bh); |
757 | if (IS_ERR(inode)) { | 748 | |
758 | parent = ERR_CAST(inode); | 749 | parent = d_obtain_alias(inode); |
759 | goto out; | ||
760 | } | ||
761 | parent = d_alloc_anon(inode); | ||
762 | if (!parent) { | ||
763 | iput(inode); | ||
764 | parent = ERR_PTR(-ENOMEM); | ||
765 | } | ||
766 | out: | 750 | out: |
767 | unlock_super(sb); | 751 | unlock_super(sb); |
768 | 752 | ||
@@ -51,7 +51,7 @@ static int fifo_open(struct inode *inode, struct file *filp) | |||
51 | filp->f_mode &= (FMODE_READ | FMODE_WRITE); | 51 | filp->f_mode &= (FMODE_READ | FMODE_WRITE); |
52 | 52 | ||
53 | switch (filp->f_mode) { | 53 | switch (filp->f_mode) { |
54 | case 1: | 54 | case FMODE_READ: |
55 | /* | 55 | /* |
56 | * O_RDONLY | 56 | * O_RDONLY |
57 | * POSIX.1 says that O_NONBLOCK means return with the FIFO | 57 | * POSIX.1 says that O_NONBLOCK means return with the FIFO |
@@ -76,7 +76,7 @@ static int fifo_open(struct inode *inode, struct file *filp) | |||
76 | } | 76 | } |
77 | break; | 77 | break; |
78 | 78 | ||
79 | case 2: | 79 | case FMODE_WRITE: |
80 | /* | 80 | /* |
81 | * O_WRONLY | 81 | * O_WRONLY |
82 | * POSIX.1 says that O_NONBLOCK means return -1 with | 82 | * POSIX.1 says that O_NONBLOCK means return -1 with |
@@ -98,7 +98,7 @@ static int fifo_open(struct inode *inode, struct file *filp) | |||
98 | } | 98 | } |
99 | break; | 99 | break; |
100 | 100 | ||
101 | case 3: | 101 | case FMODE_READ | FMODE_WRITE: |
102 | /* | 102 | /* |
103 | * O_RDWR | 103 | * O_RDWR |
104 | * POSIX.1 leaves this case "undefined" when O_NONBLOCK is set. | 104 | * POSIX.1 leaves this case "undefined" when O_NONBLOCK is set. |
diff --git a/fs/file_table.c b/fs/file_table.c index f45a4493f9e7..efc06faede6c 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -161,7 +161,7 @@ EXPORT_SYMBOL(get_empty_filp); | |||
161 | * code should be moved into this function. | 161 | * code should be moved into this function. |
162 | */ | 162 | */ |
163 | struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry, | 163 | struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry, |
164 | mode_t mode, const struct file_operations *fop) | 164 | fmode_t mode, const struct file_operations *fop) |
165 | { | 165 | { |
166 | struct file *file; | 166 | struct file *file; |
167 | struct path; | 167 | struct path; |
@@ -193,7 +193,7 @@ EXPORT_SYMBOL(alloc_file); | |||
193 | * of this should be moving to alloc_file(). | 193 | * of this should be moving to alloc_file(). |
194 | */ | 194 | */ |
195 | int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry, | 195 | int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry, |
196 | mode_t mode, const struct file_operations *fop) | 196 | fmode_t mode, const struct file_operations *fop) |
197 | { | 197 | { |
198 | int error = 0; | 198 | int error = 0; |
199 | file->f_path.dentry = dentry; | 199 | file->f_path.dentry = dentry; |
diff --git a/fs/filesystems.c b/fs/filesystems.c index f37f87262837..d0e20ced62dd 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c | |||
@@ -8,6 +8,8 @@ | |||
8 | 8 | ||
9 | #include <linux/syscalls.h> | 9 | #include <linux/syscalls.h> |
10 | #include <linux/fs.h> | 10 | #include <linux/fs.h> |
11 | #include <linux/proc_fs.h> | ||
12 | #include <linux/seq_file.h> | ||
11 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
12 | #include <linux/kmod.h> | 14 | #include <linux/kmod.h> |
13 | #include <linux/init.h> | 15 | #include <linux/init.h> |
@@ -214,6 +216,43 @@ int get_filesystem_list(char * buf) | |||
214 | return len; | 216 | return len; |
215 | } | 217 | } |
216 | 218 | ||
219 | #ifdef CONFIG_PROC_FS | ||
220 | static int filesystems_proc_show(struct seq_file *m, void *v) | ||
221 | { | ||
222 | struct file_system_type * tmp; | ||
223 | |||
224 | read_lock(&file_systems_lock); | ||
225 | tmp = file_systems; | ||
226 | while (tmp) { | ||
227 | seq_printf(m, "%s\t%s\n", | ||
228 | (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev", | ||
229 | tmp->name); | ||
230 | tmp = tmp->next; | ||
231 | } | ||
232 | read_unlock(&file_systems_lock); | ||
233 | return 0; | ||
234 | } | ||
235 | |||
236 | static int filesystems_proc_open(struct inode *inode, struct file *file) | ||
237 | { | ||
238 | return single_open(file, filesystems_proc_show, NULL); | ||
239 | } | ||
240 | |||
241 | static const struct file_operations filesystems_proc_fops = { | ||
242 | .open = filesystems_proc_open, | ||
243 | .read = seq_read, | ||
244 | .llseek = seq_lseek, | ||
245 | .release = single_release, | ||
246 | }; | ||
247 | |||
248 | static int __init proc_filesystems_init(void) | ||
249 | { | ||
250 | proc_create("filesystems", 0, NULL, &filesystems_proc_fops); | ||
251 | return 0; | ||
252 | } | ||
253 | module_init(proc_filesystems_init); | ||
254 | #endif | ||
255 | |||
217 | struct file_system_type *get_fs_type(const char *name) | 256 | struct file_system_type *get_fs_type(const char *name) |
218 | { | 257 | { |
219 | struct file_system_type *fs; | 258 | struct file_system_type *fs; |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 2bada6bbc317..34930a964b82 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -101,6 +101,8 @@ void fuse_finish_open(struct inode *inode, struct file *file, | |||
101 | file->f_op = &fuse_direct_io_file_operations; | 101 | file->f_op = &fuse_direct_io_file_operations; |
102 | if (!(outarg->open_flags & FOPEN_KEEP_CACHE)) | 102 | if (!(outarg->open_flags & FOPEN_KEEP_CACHE)) |
103 | invalidate_inode_pages2(inode->i_mapping); | 103 | invalidate_inode_pages2(inode->i_mapping); |
104 | if (outarg->open_flags & FOPEN_NONSEEKABLE) | ||
105 | nonseekable_open(inode, file); | ||
104 | ff->fh = outarg->fh; | 106 | ff->fh = outarg->fh; |
105 | file->private_data = fuse_file_get(ff); | 107 | file->private_data = fuse_file_get(ff); |
106 | } | 108 | } |
@@ -1448,6 +1450,9 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin) | |||
1448 | mutex_lock(&inode->i_mutex); | 1450 | mutex_lock(&inode->i_mutex); |
1449 | switch (origin) { | 1451 | switch (origin) { |
1450 | case SEEK_END: | 1452 | case SEEK_END: |
1453 | retval = fuse_update_attributes(inode, NULL, file, NULL); | ||
1454 | if (retval) | ||
1455 | return retval; | ||
1451 | offset += i_size_read(inode); | 1456 | offset += i_size_read(inode); |
1452 | break; | 1457 | break; |
1453 | case SEEK_CUR: | 1458 | case SEEK_CUR: |
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 3a876076bdd1..35accfdd747f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
@@ -6,6 +6,9 @@ | |||
6 | See the file COPYING. | 6 | See the file COPYING. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #ifndef _FS_FUSE_I_H | ||
10 | #define _FS_FUSE_I_H | ||
11 | |||
9 | #include <linux/fuse.h> | 12 | #include <linux/fuse.h> |
10 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
11 | #include <linux/mount.h> | 14 | #include <linux/mount.h> |
@@ -655,3 +658,5 @@ void fuse_set_nowrite(struct inode *inode); | |||
655 | void fuse_release_nowrite(struct inode *inode); | 658 | void fuse_release_nowrite(struct inode *inode); |
656 | 659 | ||
657 | u64 fuse_get_attr_version(struct fuse_conn *fc); | 660 | u64 fuse_get_attr_version(struct fuse_conn *fc); |
661 | |||
662 | #endif /* _FS_FUSE_I_H */ | ||
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 6a84388cacff..2e99f34b4435 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -596,12 +596,8 @@ static struct dentry *fuse_get_dentry(struct super_block *sb, | |||
596 | if (inode->i_generation != handle->generation) | 596 | if (inode->i_generation != handle->generation) |
597 | goto out_iput; | 597 | goto out_iput; |
598 | 598 | ||
599 | entry = d_alloc_anon(inode); | 599 | entry = d_obtain_alias(inode); |
600 | err = -ENOMEM; | 600 | if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) { |
601 | if (!entry) | ||
602 | goto out_iput; | ||
603 | |||
604 | if (get_node_id(inode) != FUSE_ROOT_ID) { | ||
605 | entry->d_op = &fuse_dentry_operations; | 601 | entry->d_op = &fuse_dentry_operations; |
606 | fuse_invalidate_entry_cache(entry); | 602 | fuse_invalidate_entry_cache(entry); |
607 | } | 603 | } |
@@ -696,17 +692,14 @@ static struct dentry *fuse_get_parent(struct dentry *child) | |||
696 | name.name = ".."; | 692 | name.name = ".."; |
697 | err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode), | 693 | err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode), |
698 | &name, &outarg, &inode); | 694 | &name, &outarg, &inode); |
699 | if (err && err != -ENOENT) | 695 | if (err) { |
696 | if (err == -ENOENT) | ||
697 | return ERR_PTR(-ESTALE); | ||
700 | return ERR_PTR(err); | 698 | return ERR_PTR(err); |
701 | if (err || !inode) | ||
702 | return ERR_PTR(-ESTALE); | ||
703 | |||
704 | parent = d_alloc_anon(inode); | ||
705 | if (!parent) { | ||
706 | iput(inode); | ||
707 | return ERR_PTR(-ENOMEM); | ||
708 | } | 699 | } |
709 | if (get_node_id(inode) != FUSE_ROOT_ID) { | 700 | |
701 | parent = d_obtain_alias(inode); | ||
702 | if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) { | ||
710 | parent->d_op = &fuse_dentry_operations; | 703 | parent->d_op = &fuse_dentry_operations; |
711 | fuse_invalidate_entry_cache(parent); | 704 | fuse_invalidate_entry_cache(parent); |
712 | } | 705 | } |
@@ -865,7 +858,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) | |||
865 | if (is_bdev) { | 858 | if (is_bdev) { |
866 | fc->destroy_req = fuse_request_alloc(); | 859 | fc->destroy_req = fuse_request_alloc(); |
867 | if (!fc->destroy_req) | 860 | if (!fc->destroy_req) |
868 | goto err_put_root; | 861 | goto err_free_init_req; |
869 | } | 862 | } |
870 | 863 | ||
871 | mutex_lock(&fuse_mutex); | 864 | mutex_lock(&fuse_mutex); |
@@ -895,6 +888,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) | |||
895 | 888 | ||
896 | err_unlock: | 889 | err_unlock: |
897 | mutex_unlock(&fuse_mutex); | 890 | mutex_unlock(&fuse_mutex); |
891 | err_free_init_req: | ||
898 | fuse_request_free(init_req); | 892 | fuse_request_free(init_req); |
899 | err_put_root: | 893 | err_put_root: |
900 | dput(root_dentry); | 894 | dput(root_dentry); |
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index 9cda8536530c..bbb8c36403a9 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c | |||
@@ -130,28 +130,17 @@ static int gfs2_get_name(struct dentry *parent, char *name, | |||
130 | static struct dentry *gfs2_get_parent(struct dentry *child) | 130 | static struct dentry *gfs2_get_parent(struct dentry *child) |
131 | { | 131 | { |
132 | struct qstr dotdot; | 132 | struct qstr dotdot; |
133 | struct inode *inode; | ||
134 | struct dentry *dentry; | 133 | struct dentry *dentry; |
135 | 134 | ||
136 | gfs2_str2qstr(&dotdot, ".."); | ||
137 | inode = gfs2_lookupi(child->d_inode, &dotdot, 1); | ||
138 | |||
139 | if (!inode) | ||
140 | return ERR_PTR(-ENOENT); | ||
141 | /* | 135 | /* |
142 | * In case of an error, @inode carries the error value, and we | 136 | * XXX(hch): it would be a good idea to keep this around as a |
143 | * have to return that as a(n invalid) pointer to dentry. | 137 | * static variable. |
144 | */ | 138 | */ |
145 | if (IS_ERR(inode)) | 139 | gfs2_str2qstr(&dotdot, ".."); |
146 | return ERR_CAST(inode); | ||
147 | |||
148 | dentry = d_alloc_anon(inode); | ||
149 | if (!dentry) { | ||
150 | iput(inode); | ||
151 | return ERR_PTR(-ENOMEM); | ||
152 | } | ||
153 | 140 | ||
154 | dentry->d_op = &gfs2_dops; | 141 | dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &dotdot, 1)); |
142 | if (!IS_ERR(dentry)) | ||
143 | dentry->d_op = &gfs2_dops; | ||
155 | return dentry; | 144 | return dentry; |
156 | } | 145 | } |
157 | 146 | ||
@@ -233,13 +222,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, | |||
233 | gfs2_glock_dq_uninit(&i_gh); | 222 | gfs2_glock_dq_uninit(&i_gh); |
234 | 223 | ||
235 | out_inode: | 224 | out_inode: |
236 | dentry = d_alloc_anon(inode); | 225 | dentry = d_obtain_alias(inode); |
237 | if (!dentry) { | 226 | if (!IS_ERR(dentry)) |
238 | iput(inode); | 227 | dentry->d_op = &gfs2_dops; |
239 | return ERR_PTR(-ENOMEM); | ||
240 | } | ||
241 | |||
242 | dentry->d_op = &gfs2_dops; | ||
243 | return dentry; | 228 | return dentry; |
244 | 229 | ||
245 | fail_rgd: | 230 | fail_rgd: |
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 534e1e2c65ca..d232991b9046 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -69,7 +69,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry, | |||
69 | mark_inode_dirty(inode); | 69 | mark_inode_dirty(inode); |
70 | break; | 70 | break; |
71 | } else if (PTR_ERR(inode) != -EEXIST || | 71 | } else if (PTR_ERR(inode) != -EEXIST || |
72 | (nd && (nd->intent.open.flags & O_EXCL))) { | 72 | (nd && nd->flags & LOOKUP_EXCL)) { |
73 | gfs2_holder_uninit(ghs); | 73 | gfs2_holder_uninit(ghs); |
74 | return PTR_ERR(inode); | 74 | return PTR_ERR(inode); |
75 | } | 75 | } |
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 7e19835efa2e..c69b7ac75bf7 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c | |||
@@ -511,13 +511,6 @@ void hfs_clear_inode(struct inode *inode) | |||
511 | } | 511 | } |
512 | } | 512 | } |
513 | 513 | ||
514 | static int hfs_permission(struct inode *inode, int mask) | ||
515 | { | ||
516 | if (S_ISREG(inode->i_mode) && mask & MAY_EXEC) | ||
517 | return 0; | ||
518 | return generic_permission(inode, mask, NULL); | ||
519 | } | ||
520 | |||
521 | static int hfs_file_open(struct inode *inode, struct file *file) | 514 | static int hfs_file_open(struct inode *inode, struct file *file) |
522 | { | 515 | { |
523 | if (HFS_IS_RSRC(inode)) | 516 | if (HFS_IS_RSRC(inode)) |
@@ -616,7 +609,6 @@ static const struct inode_operations hfs_file_inode_operations = { | |||
616 | .lookup = hfs_file_lookup, | 609 | .lookup = hfs_file_lookup, |
617 | .truncate = hfs_file_truncate, | 610 | .truncate = hfs_file_truncate, |
618 | .setattr = hfs_inode_setattr, | 611 | .setattr = hfs_inode_setattr, |
619 | .permission = hfs_permission, | ||
620 | .setxattr = hfs_setxattr, | 612 | .setxattr = hfs_setxattr, |
621 | .getxattr = hfs_getxattr, | 613 | .getxattr = hfs_getxattr, |
622 | .listxattr = hfs_listxattr, | 614 | .listxattr = hfs_listxattr, |
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index fec8f61227ff..0022eec63cda 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c | |||
@@ -199,6 +199,9 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, | |||
199 | goto done; | 199 | goto done; |
200 | } | 200 | } |
201 | 201 | ||
202 | if (inode->i_ino == HFSPLUS_EXT_CNID) | ||
203 | return -EIO; | ||
204 | |||
202 | mutex_lock(&HFSPLUS_I(inode).extents_lock); | 205 | mutex_lock(&HFSPLUS_I(inode).extents_lock); |
203 | res = hfsplus_ext_read_extent(inode, ablock); | 206 | res = hfsplus_ext_read_extent(inode, ablock); |
204 | if (!res) { | 207 | if (!res) { |
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index b085d64a2b67..b207f0e6fc22 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c | |||
@@ -238,22 +238,12 @@ static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms) | |||
238 | perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev); | 238 | perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev); |
239 | } | 239 | } |
240 | 240 | ||
241 | static int hfsplus_permission(struct inode *inode, int mask) | ||
242 | { | ||
243 | /* MAY_EXEC is also used for lookup, if no x bit is set allow lookup, | ||
244 | * open_exec has the same test, so it's still not executable, if a x bit | ||
245 | * is set fall back to standard permission check. | ||
246 | */ | ||
247 | if (S_ISREG(inode->i_mode) && mask & MAY_EXEC && !(inode->i_mode & 0111)) | ||
248 | return 0; | ||
249 | return generic_permission(inode, mask, NULL); | ||
250 | } | ||
251 | |||
252 | |||
253 | static int hfsplus_file_open(struct inode *inode, struct file *file) | 241 | static int hfsplus_file_open(struct inode *inode, struct file *file) |
254 | { | 242 | { |
255 | if (HFSPLUS_IS_RSRC(inode)) | 243 | if (HFSPLUS_IS_RSRC(inode)) |
256 | inode = HFSPLUS_I(inode).rsrc_inode; | 244 | inode = HFSPLUS_I(inode).rsrc_inode; |
245 | if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) | ||
246 | return -EOVERFLOW; | ||
257 | atomic_inc(&HFSPLUS_I(inode).opencnt); | 247 | atomic_inc(&HFSPLUS_I(inode).opencnt); |
258 | return 0; | 248 | return 0; |
259 | } | 249 | } |
@@ -279,7 +269,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file) | |||
279 | static const struct inode_operations hfsplus_file_inode_operations = { | 269 | static const struct inode_operations hfsplus_file_inode_operations = { |
280 | .lookup = hfsplus_file_lookup, | 270 | .lookup = hfsplus_file_lookup, |
281 | .truncate = hfsplus_file_truncate, | 271 | .truncate = hfsplus_file_truncate, |
282 | .permission = hfsplus_permission, | ||
283 | .setxattr = hfsplus_setxattr, | 272 | .setxattr = hfsplus_setxattr, |
284 | .getxattr = hfsplus_getxattr, | 273 | .getxattr = hfsplus_getxattr, |
285 | .listxattr = hfsplus_listxattr, | 274 | .listxattr = hfsplus_listxattr, |
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index d6ecabf4d231..7f34f4385de0 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c | |||
@@ -20,7 +20,7 @@ | |||
20 | struct hostfs_inode_info { | 20 | struct hostfs_inode_info { |
21 | char *host_filename; | 21 | char *host_filename; |
22 | int fd; | 22 | int fd; |
23 | int mode; | 23 | fmode_t mode; |
24 | struct inode vfs_inode; | 24 | struct inode vfs_inode; |
25 | }; | 25 | }; |
26 | 26 | ||
@@ -373,7 +373,8 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) | |||
373 | int hostfs_file_open(struct inode *ino, struct file *file) | 373 | int hostfs_file_open(struct inode *ino, struct file *file) |
374 | { | 374 | { |
375 | char *name; | 375 | char *name; |
376 | int mode = 0, r = 0, w = 0, fd; | 376 | fmode_t mode = 0; |
377 | int r = 0, w = 0, fd; | ||
377 | 378 | ||
378 | mode = file->f_mode & (FMODE_READ | FMODE_WRITE); | 379 | mode = file->f_mode & (FMODE_READ | FMODE_WRITE); |
379 | if ((mode & HOSTFS_I(ino)->mode) == mode) | 380 | if ((mode & HOSTFS_I(ino)->mode) == mode) |
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index be8be5040e07..64ab52259204 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c | |||
@@ -143,5 +143,5 @@ const struct file_operations hpfs_file_ops = | |||
143 | const struct inode_operations hpfs_file_iops = | 143 | const struct inode_operations hpfs_file_iops = |
144 | { | 144 | { |
145 | .truncate = hpfs_truncate, | 145 | .truncate = hpfs_truncate, |
146 | .setattr = hpfs_notify_change, | 146 | .setattr = hpfs_setattr, |
147 | }; | 147 | }; |
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 42ff60ccf2a9..c2ea31bae313 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h | |||
@@ -275,7 +275,7 @@ void hpfs_init_inode(struct inode *); | |||
275 | void hpfs_read_inode(struct inode *); | 275 | void hpfs_read_inode(struct inode *); |
276 | void hpfs_write_inode(struct inode *); | 276 | void hpfs_write_inode(struct inode *); |
277 | void hpfs_write_inode_nolock(struct inode *); | 277 | void hpfs_write_inode_nolock(struct inode *); |
278 | int hpfs_notify_change(struct dentry *, struct iattr *); | 278 | int hpfs_setattr(struct dentry *, struct iattr *); |
279 | void hpfs_write_if_changed(struct inode *); | 279 | void hpfs_write_if_changed(struct inode *); |
280 | void hpfs_delete_inode(struct inode *); | 280 | void hpfs_delete_inode(struct inode *); |
281 | 281 | ||
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 85d3e1d9ac00..39a1bfbea312 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c | |||
@@ -260,19 +260,28 @@ void hpfs_write_inode_nolock(struct inode *i) | |||
260 | brelse(bh); | 260 | brelse(bh); |
261 | } | 261 | } |
262 | 262 | ||
263 | int hpfs_notify_change(struct dentry *dentry, struct iattr *attr) | 263 | int hpfs_setattr(struct dentry *dentry, struct iattr *attr) |
264 | { | 264 | { |
265 | struct inode *inode = dentry->d_inode; | 265 | struct inode *inode = dentry->d_inode; |
266 | int error=0; | 266 | int error = -EINVAL; |
267 | |||
267 | lock_kernel(); | 268 | lock_kernel(); |
268 | if ( ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size) || | 269 | if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root) |
269 | (hpfs_sb(inode->i_sb)->sb_root == inode->i_ino) ) { | 270 | goto out_unlock; |
270 | error = -EINVAL; | 271 | if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size) |
271 | } else if ((error = inode_change_ok(inode, attr))) { | 272 | goto out_unlock; |
272 | } else if ((error = inode_setattr(inode, attr))) { | 273 | |
273 | } else { | 274 | error = inode_change_ok(inode, attr); |
274 | hpfs_write_inode(inode); | 275 | if (error) |
275 | } | 276 | goto out_unlock; |
277 | |||
278 | error = inode_setattr(inode, attr); | ||
279 | if (error) | ||
280 | goto out_unlock; | ||
281 | |||
282 | hpfs_write_inode(inode); | ||
283 | |||
284 | out_unlock: | ||
276 | unlock_kernel(); | 285 | unlock_kernel(); |
277 | return error; | 286 | return error; |
278 | } | 287 | } |
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index d9c59a775449..10783f3d265a 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c | |||
@@ -669,5 +669,5 @@ const struct inode_operations hpfs_dir_iops = | |||
669 | .rmdir = hpfs_rmdir, | 669 | .rmdir = hpfs_rmdir, |
670 | .mknod = hpfs_mknod, | 670 | .mknod = hpfs_mknod, |
671 | .rename = hpfs_rename, | 671 | .rename = hpfs_rename, |
672 | .setattr = hpfs_notify_change, | 672 | .setattr = hpfs_setattr, |
673 | }; | 673 | }; |
diff --git a/fs/isofs/export.c b/fs/isofs/export.c index bb219138331a..e81a30593ba9 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c | |||
@@ -22,7 +22,7 @@ isofs_export_iget(struct super_block *sb, | |||
22 | __u32 generation) | 22 | __u32 generation) |
23 | { | 23 | { |
24 | struct inode *inode; | 24 | struct inode *inode; |
25 | struct dentry *result; | 25 | |
26 | if (block == 0) | 26 | if (block == 0) |
27 | return ERR_PTR(-ESTALE); | 27 | return ERR_PTR(-ESTALE); |
28 | inode = isofs_iget(sb, block, offset); | 28 | inode = isofs_iget(sb, block, offset); |
@@ -32,12 +32,7 @@ isofs_export_iget(struct super_block *sb, | |||
32 | iput(inode); | 32 | iput(inode); |
33 | return ERR_PTR(-ESTALE); | 33 | return ERR_PTR(-ESTALE); |
34 | } | 34 | } |
35 | result = d_alloc_anon(inode); | 35 | return d_obtain_alias(inode); |
36 | if (!result) { | ||
37 | iput(inode); | ||
38 | return ERR_PTR(-ENOMEM); | ||
39 | } | ||
40 | return result; | ||
41 | } | 36 | } |
42 | 37 | ||
43 | /* This function is surprisingly simple. The trick is understanding | 38 | /* This function is surprisingly simple. The trick is understanding |
@@ -51,7 +46,6 @@ static struct dentry *isofs_export_get_parent(struct dentry *child) | |||
51 | unsigned long parent_offset = 0; | 46 | unsigned long parent_offset = 0; |
52 | struct inode *child_inode = child->d_inode; | 47 | struct inode *child_inode = child->d_inode; |
53 | struct iso_inode_info *e_child_inode = ISOFS_I(child_inode); | 48 | struct iso_inode_info *e_child_inode = ISOFS_I(child_inode); |
54 | struct inode *parent_inode = NULL; | ||
55 | struct iso_directory_record *de = NULL; | 49 | struct iso_directory_record *de = NULL; |
56 | struct buffer_head * bh = NULL; | 50 | struct buffer_head * bh = NULL; |
57 | struct dentry *rv = NULL; | 51 | struct dentry *rv = NULL; |
@@ -104,28 +98,11 @@ static struct dentry *isofs_export_get_parent(struct dentry *child) | |||
104 | /* Normalize */ | 98 | /* Normalize */ |
105 | isofs_normalize_block_and_offset(de, &parent_block, &parent_offset); | 99 | isofs_normalize_block_and_offset(de, &parent_block, &parent_offset); |
106 | 100 | ||
107 | /* Get the inode. */ | 101 | rv = d_obtain_alias(isofs_iget(child_inode->i_sb, parent_block, |
108 | parent_inode = isofs_iget(child_inode->i_sb, | 102 | parent_offset)); |
109 | parent_block, | ||
110 | parent_offset); | ||
111 | if (IS_ERR(parent_inode)) { | ||
112 | rv = ERR_CAST(parent_inode); | ||
113 | if (rv != ERR_PTR(-ENOMEM)) | ||
114 | rv = ERR_PTR(-EACCES); | ||
115 | goto out; | ||
116 | } | ||
117 | |||
118 | /* Allocate the dentry. */ | ||
119 | rv = d_alloc_anon(parent_inode); | ||
120 | if (rv == NULL) { | ||
121 | rv = ERR_PTR(-ENOMEM); | ||
122 | goto out; | ||
123 | } | ||
124 | |||
125 | out: | 103 | out: |
126 | if (bh) { | 104 | if (bh) |
127 | brelse(bh); | 105 | brelse(bh); |
128 | } | ||
129 | return rv; | 106 | return rv; |
130 | } | 107 | } |
131 | 108 | ||
diff --git a/fs/jbd/Kconfig b/fs/jbd/Kconfig new file mode 100644 index 000000000000..4e28beeed157 --- /dev/null +++ b/fs/jbd/Kconfig | |||
@@ -0,0 +1,30 @@ | |||
1 | config JBD | ||
2 | tristate | ||
3 | help | ||
4 | This is a generic journalling layer for block devices. It is | ||
5 | currently used by the ext3 file system, but it could also be | ||
6 | used to add journal support to other file systems or block | ||
7 | devices such as RAID or LVM. | ||
8 | |||
9 | If you are using the ext3 file system, you need to say Y here. | ||
10 | If you are not using ext3 then you will probably want to say N. | ||
11 | |||
12 | To compile this device as a module, choose M here: the module will be | ||
13 | called jbd. If you are compiling ext3 into the kernel, you | ||
14 | cannot compile this code as a module. | ||
15 | |||
16 | config JBD_DEBUG | ||
17 | bool "JBD (ext3) debugging support" | ||
18 | depends on JBD && DEBUG_FS | ||
19 | help | ||
20 | If you are using the ext3 journaled file system (or potentially any | ||
21 | other file system/device using JBD), this option allows you to | ||
22 | enable debugging output while the system is running, in order to | ||
23 | help track down any problems you are having. By default the | ||
24 | debugging output will be turned off. | ||
25 | |||
26 | If you select Y here, then you will be able to turn on debugging | ||
27 | with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a | ||
28 | number between 1 and 5, the higher the number, the more debugging | ||
29 | output is generated. To turn debugging off again, do | ||
30 | "echo 0 > /sys/kernel/debug/jbd/jbd-debug". | ||
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index a5432bbbfb88..1bd8d4acc6f2 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c | |||
@@ -93,7 +93,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
93 | int ret = 0; | 93 | int ret = 0; |
94 | struct buffer_head *bh = jh2bh(jh); | 94 | struct buffer_head *bh = jh2bh(jh); |
95 | 95 | ||
96 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { | 96 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && |
97 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { | ||
97 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 98 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
98 | ret = __journal_remove_checkpoint(jh) + 1; | 99 | ret = __journal_remove_checkpoint(jh) + 1; |
99 | jbd_unlock_bh_state(bh); | 100 | jbd_unlock_bh_state(bh); |
@@ -126,14 +127,29 @@ void __log_wait_for_space(journal_t *journal) | |||
126 | 127 | ||
127 | /* | 128 | /* |
128 | * Test again, another process may have checkpointed while we | 129 | * Test again, another process may have checkpointed while we |
129 | * were waiting for the checkpoint lock | 130 | * were waiting for the checkpoint lock. If there are no |
131 | * outstanding transactions there is nothing to checkpoint and | ||
132 | * we can't make progress. Abort the journal in this case. | ||
130 | */ | 133 | */ |
131 | spin_lock(&journal->j_state_lock); | 134 | spin_lock(&journal->j_state_lock); |
135 | spin_lock(&journal->j_list_lock); | ||
132 | nblocks = jbd_space_needed(journal); | 136 | nblocks = jbd_space_needed(journal); |
133 | if (__log_space_left(journal) < nblocks) { | 137 | if (__log_space_left(journal) < nblocks) { |
138 | int chkpt = journal->j_checkpoint_transactions != NULL; | ||
139 | |||
140 | spin_unlock(&journal->j_list_lock); | ||
134 | spin_unlock(&journal->j_state_lock); | 141 | spin_unlock(&journal->j_state_lock); |
135 | log_do_checkpoint(journal); | 142 | if (chkpt) { |
143 | log_do_checkpoint(journal); | ||
144 | } else { | ||
145 | printk(KERN_ERR "%s: no transactions\n", | ||
146 | __func__); | ||
147 | journal_abort(journal, 0); | ||
148 | } | ||
149 | |||
136 | spin_lock(&journal->j_state_lock); | 150 | spin_lock(&journal->j_state_lock); |
151 | } else { | ||
152 | spin_unlock(&journal->j_list_lock); | ||
137 | } | 153 | } |
138 | mutex_unlock(&journal->j_checkpoint_mutex); | 154 | mutex_unlock(&journal->j_checkpoint_mutex); |
139 | } | 155 | } |
@@ -160,21 +176,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) | |||
160 | * buffers. Note that we take the buffers in the opposite ordering | 176 | * buffers. Note that we take the buffers in the opposite ordering |
161 | * from the one in which they were submitted for IO. | 177 | * from the one in which they were submitted for IO. |
162 | * | 178 | * |
179 | * Return 0 on success, and return <0 if some buffers have failed | ||
180 | * to be written out. | ||
181 | * | ||
163 | * Called with j_list_lock held. | 182 | * Called with j_list_lock held. |
164 | */ | 183 | */ |
165 | static void __wait_cp_io(journal_t *journal, transaction_t *transaction) | 184 | static int __wait_cp_io(journal_t *journal, transaction_t *transaction) |
166 | { | 185 | { |
167 | struct journal_head *jh; | 186 | struct journal_head *jh; |
168 | struct buffer_head *bh; | 187 | struct buffer_head *bh; |
169 | tid_t this_tid; | 188 | tid_t this_tid; |
170 | int released = 0; | 189 | int released = 0; |
190 | int ret = 0; | ||
171 | 191 | ||
172 | this_tid = transaction->t_tid; | 192 | this_tid = transaction->t_tid; |
173 | restart: | 193 | restart: |
174 | /* Did somebody clean up the transaction in the meanwhile? */ | 194 | /* Did somebody clean up the transaction in the meanwhile? */ |
175 | if (journal->j_checkpoint_transactions != transaction || | 195 | if (journal->j_checkpoint_transactions != transaction || |
176 | transaction->t_tid != this_tid) | 196 | transaction->t_tid != this_tid) |
177 | return; | 197 | return ret; |
178 | while (!released && transaction->t_checkpoint_io_list) { | 198 | while (!released && transaction->t_checkpoint_io_list) { |
179 | jh = transaction->t_checkpoint_io_list; | 199 | jh = transaction->t_checkpoint_io_list; |
180 | bh = jh2bh(jh); | 200 | bh = jh2bh(jh); |
@@ -194,6 +214,9 @@ restart: | |||
194 | spin_lock(&journal->j_list_lock); | 214 | spin_lock(&journal->j_list_lock); |
195 | goto restart; | 215 | goto restart; |
196 | } | 216 | } |
217 | if (unlikely(buffer_write_io_error(bh))) | ||
218 | ret = -EIO; | ||
219 | |||
197 | /* | 220 | /* |
198 | * Now in whatever state the buffer currently is, we know that | 221 | * Now in whatever state the buffer currently is, we know that |
199 | * it has been written out and so we can drop it from the list | 222 | * it has been written out and so we can drop it from the list |
@@ -203,6 +226,8 @@ restart: | |||
203 | journal_remove_journal_head(bh); | 226 | journal_remove_journal_head(bh); |
204 | __brelse(bh); | 227 | __brelse(bh); |
205 | } | 228 | } |
229 | |||
230 | return ret; | ||
206 | } | 231 | } |
207 | 232 | ||
208 | #define NR_BATCH 64 | 233 | #define NR_BATCH 64 |
@@ -226,7 +251,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) | |||
226 | * Try to flush one buffer from the checkpoint list to disk. | 251 | * Try to flush one buffer from the checkpoint list to disk. |
227 | * | 252 | * |
228 | * Return 1 if something happened which requires us to abort the current | 253 | * Return 1 if something happened which requires us to abort the current |
229 | * scan of the checkpoint list. | 254 | * scan of the checkpoint list. Return <0 if the buffer has failed to |
255 | * be written out. | ||
230 | * | 256 | * |
231 | * Called with j_list_lock held and drops it if 1 is returned | 257 | * Called with j_list_lock held and drops it if 1 is returned |
232 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it | 258 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it |
@@ -256,6 +282,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
256 | log_wait_commit(journal, tid); | 282 | log_wait_commit(journal, tid); |
257 | ret = 1; | 283 | ret = 1; |
258 | } else if (!buffer_dirty(bh)) { | 284 | } else if (!buffer_dirty(bh)) { |
285 | ret = 1; | ||
286 | if (unlikely(buffer_write_io_error(bh))) | ||
287 | ret = -EIO; | ||
259 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); | 288 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); |
260 | BUFFER_TRACE(bh, "remove from checkpoint"); | 289 | BUFFER_TRACE(bh, "remove from checkpoint"); |
261 | __journal_remove_checkpoint(jh); | 290 | __journal_remove_checkpoint(jh); |
@@ -263,7 +292,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
263 | jbd_unlock_bh_state(bh); | 292 | jbd_unlock_bh_state(bh); |
264 | journal_remove_journal_head(bh); | 293 | journal_remove_journal_head(bh); |
265 | __brelse(bh); | 294 | __brelse(bh); |
266 | ret = 1; | ||
267 | } else { | 295 | } else { |
268 | /* | 296 | /* |
269 | * Important: we are about to write the buffer, and | 297 | * Important: we are about to write the buffer, and |
@@ -295,6 +323,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
295 | * to disk. We submit larger chunks of data at once. | 323 | * to disk. We submit larger chunks of data at once. |
296 | * | 324 | * |
297 | * The journal should be locked before calling this function. | 325 | * The journal should be locked before calling this function. |
326 | * Called with j_checkpoint_mutex held. | ||
298 | */ | 327 | */ |
299 | int log_do_checkpoint(journal_t *journal) | 328 | int log_do_checkpoint(journal_t *journal) |
300 | { | 329 | { |
@@ -318,6 +347,7 @@ int log_do_checkpoint(journal_t *journal) | |||
318 | * OK, we need to start writing disk blocks. Take one transaction | 347 | * OK, we need to start writing disk blocks. Take one transaction |
319 | * and write it. | 348 | * and write it. |
320 | */ | 349 | */ |
350 | result = 0; | ||
321 | spin_lock(&journal->j_list_lock); | 351 | spin_lock(&journal->j_list_lock); |
322 | if (!journal->j_checkpoint_transactions) | 352 | if (!journal->j_checkpoint_transactions) |
323 | goto out; | 353 | goto out; |
@@ -334,7 +364,7 @@ restart: | |||
334 | int batch_count = 0; | 364 | int batch_count = 0; |
335 | struct buffer_head *bhs[NR_BATCH]; | 365 | struct buffer_head *bhs[NR_BATCH]; |
336 | struct journal_head *jh; | 366 | struct journal_head *jh; |
337 | int retry = 0; | 367 | int retry = 0, err; |
338 | 368 | ||
339 | while (!retry && transaction->t_checkpoint_list) { | 369 | while (!retry && transaction->t_checkpoint_list) { |
340 | struct buffer_head *bh; | 370 | struct buffer_head *bh; |
@@ -347,6 +377,8 @@ restart: | |||
347 | break; | 377 | break; |
348 | } | 378 | } |
349 | retry = __process_buffer(journal, jh, bhs,&batch_count); | 379 | retry = __process_buffer(journal, jh, bhs,&batch_count); |
380 | if (retry < 0 && !result) | ||
381 | result = retry; | ||
350 | if (!retry && (need_resched() || | 382 | if (!retry && (need_resched() || |
351 | spin_needbreak(&journal->j_list_lock))) { | 383 | spin_needbreak(&journal->j_list_lock))) { |
352 | spin_unlock(&journal->j_list_lock); | 384 | spin_unlock(&journal->j_list_lock); |
@@ -371,14 +403,18 @@ restart: | |||
371 | * Now we have cleaned up the first transaction's checkpoint | 403 | * Now we have cleaned up the first transaction's checkpoint |
372 | * list. Let's clean up the second one | 404 | * list. Let's clean up the second one |
373 | */ | 405 | */ |
374 | __wait_cp_io(journal, transaction); | 406 | err = __wait_cp_io(journal, transaction); |
407 | if (!result) | ||
408 | result = err; | ||
375 | } | 409 | } |
376 | out: | 410 | out: |
377 | spin_unlock(&journal->j_list_lock); | 411 | spin_unlock(&journal->j_list_lock); |
378 | result = cleanup_journal_tail(journal); | ||
379 | if (result < 0) | 412 | if (result < 0) |
380 | return result; | 413 | journal_abort(journal, result); |
381 | return 0; | 414 | else |
415 | result = cleanup_journal_tail(journal); | ||
416 | |||
417 | return (result < 0) ? result : 0; | ||
382 | } | 418 | } |
383 | 419 | ||
384 | /* | 420 | /* |
@@ -394,8 +430,9 @@ out: | |||
394 | * This is the only part of the journaling code which really needs to be | 430 | * This is the only part of the journaling code which really needs to be |
395 | * aware of transaction aborts. Checkpointing involves writing to the | 431 | * aware of transaction aborts. Checkpointing involves writing to the |
396 | * main filesystem area rather than to the journal, so it can proceed | 432 | * main filesystem area rather than to the journal, so it can proceed |
397 | * even in abort state, but we must not update the journal superblock if | 433 | * even in abort state, but we must not update the super block if |
398 | * we have an abort error outstanding. | 434 | * checkpointing may have failed. Otherwise, we would lose some metadata |
435 | * buffers which should be written-back to the filesystem. | ||
399 | */ | 436 | */ |
400 | 437 | ||
401 | int cleanup_journal_tail(journal_t *journal) | 438 | int cleanup_journal_tail(journal_t *journal) |
@@ -404,6 +441,9 @@ int cleanup_journal_tail(journal_t *journal) | |||
404 | tid_t first_tid; | 441 | tid_t first_tid; |
405 | unsigned long blocknr, freed; | 442 | unsigned long blocknr, freed; |
406 | 443 | ||
444 | if (is_journal_aborted(journal)) | ||
445 | return 1; | ||
446 | |||
407 | /* OK, work out the oldest transaction remaining in the log, and | 447 | /* OK, work out the oldest transaction remaining in the log, and |
408 | * the log block it starts at. | 448 | * the log block it starts at. |
409 | * | 449 | * |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index ae08c057e751..25719d902c51 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -482,6 +482,8 @@ void journal_commit_transaction(journal_t *journal) | |||
482 | printk(KERN_WARNING | 482 | printk(KERN_WARNING |
483 | "JBD: Detected IO errors while flushing file data " | 483 | "JBD: Detected IO errors while flushing file data " |
484 | "on %s\n", bdevname(journal->j_fs_dev, b)); | 484 | "on %s\n", bdevname(journal->j_fs_dev, b)); |
485 | if (journal->j_flags & JFS_ABORT_ON_SYNCDATA_ERR) | ||
486 | journal_abort(journal, err); | ||
485 | err = 0; | 487 | err = 0; |
486 | } | 488 | } |
487 | 489 | ||
@@ -518,9 +520,10 @@ void journal_commit_transaction(journal_t *journal) | |||
518 | jh = commit_transaction->t_buffers; | 520 | jh = commit_transaction->t_buffers; |
519 | 521 | ||
520 | /* If we're in abort mode, we just un-journal the buffer and | 522 | /* If we're in abort mode, we just un-journal the buffer and |
521 | release it for background writing. */ | 523 | release it. */ |
522 | 524 | ||
523 | if (is_journal_aborted(journal)) { | 525 | if (is_journal_aborted(journal)) { |
526 | clear_buffer_jbddirty(jh2bh(jh)); | ||
524 | JBUFFER_TRACE(jh, "journal is aborting: refile"); | 527 | JBUFFER_TRACE(jh, "journal is aborting: refile"); |
525 | journal_refile_buffer(journal, jh); | 528 | journal_refile_buffer(journal, jh); |
526 | /* If that was the last one, we need to clean up | 529 | /* If that was the last one, we need to clean up |
@@ -762,6 +765,9 @@ wait_for_iobuf: | |||
762 | /* AKPM: bforget here */ | 765 | /* AKPM: bforget here */ |
763 | } | 766 | } |
764 | 767 | ||
768 | if (err) | ||
769 | journal_abort(journal, err); | ||
770 | |||
765 | jbd_debug(3, "JBD: commit phase 6\n"); | 771 | jbd_debug(3, "JBD: commit phase 6\n"); |
766 | 772 | ||
767 | if (journal_write_commit_record(journal, commit_transaction)) | 773 | if (journal_write_commit_record(journal, commit_transaction)) |
@@ -852,6 +858,8 @@ restart_loop: | |||
852 | if (buffer_jbddirty(bh)) { | 858 | if (buffer_jbddirty(bh)) { |
853 | JBUFFER_TRACE(jh, "add to new checkpointing trans"); | 859 | JBUFFER_TRACE(jh, "add to new checkpointing trans"); |
854 | __journal_insert_checkpoint(jh, commit_transaction); | 860 | __journal_insert_checkpoint(jh, commit_transaction); |
861 | if (is_journal_aborted(journal)) | ||
862 | clear_buffer_jbddirty(bh); | ||
855 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); | 863 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); |
856 | __journal_refile_buffer(jh); | 864 | __journal_refile_buffer(jh); |
857 | jbd_unlock_bh_state(bh); | 865 | jbd_unlock_bh_state(bh); |
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index aa7143a8349b..9e4fa52d7dc8 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -1121,9 +1121,12 @@ recovery_error: | |||
1121 | * | 1121 | * |
1122 | * Release a journal_t structure once it is no longer in use by the | 1122 | * Release a journal_t structure once it is no longer in use by the |
1123 | * journaled object. | 1123 | * journaled object. |
1124 | * Return <0 if we couldn't clean up the journal. | ||
1124 | */ | 1125 | */ |
1125 | void journal_destroy(journal_t *journal) | 1126 | int journal_destroy(journal_t *journal) |
1126 | { | 1127 | { |
1128 | int err = 0; | ||
1129 | |||
1127 | /* Wait for the commit thread to wake up and die. */ | 1130 | /* Wait for the commit thread to wake up and die. */ |
1128 | journal_kill_thread(journal); | 1131 | journal_kill_thread(journal); |
1129 | 1132 | ||
@@ -1146,11 +1149,16 @@ void journal_destroy(journal_t *journal) | |||
1146 | J_ASSERT(journal->j_checkpoint_transactions == NULL); | 1149 | J_ASSERT(journal->j_checkpoint_transactions == NULL); |
1147 | spin_unlock(&journal->j_list_lock); | 1150 | spin_unlock(&journal->j_list_lock); |
1148 | 1151 | ||
1149 | /* We can now mark the journal as empty. */ | ||
1150 | journal->j_tail = 0; | ||
1151 | journal->j_tail_sequence = ++journal->j_transaction_sequence; | ||
1152 | if (journal->j_sb_buffer) { | 1152 | if (journal->j_sb_buffer) { |
1153 | journal_update_superblock(journal, 1); | 1153 | if (!is_journal_aborted(journal)) { |
1154 | /* We can now mark the journal as empty. */ | ||
1155 | journal->j_tail = 0; | ||
1156 | journal->j_tail_sequence = | ||
1157 | ++journal->j_transaction_sequence; | ||
1158 | journal_update_superblock(journal, 1); | ||
1159 | } else { | ||
1160 | err = -EIO; | ||
1161 | } | ||
1154 | brelse(journal->j_sb_buffer); | 1162 | brelse(journal->j_sb_buffer); |
1155 | } | 1163 | } |
1156 | 1164 | ||
@@ -1160,6 +1168,8 @@ void journal_destroy(journal_t *journal) | |||
1160 | journal_destroy_revoke(journal); | 1168 | journal_destroy_revoke(journal); |
1161 | kfree(journal->j_wbuf); | 1169 | kfree(journal->j_wbuf); |
1162 | kfree(journal); | 1170 | kfree(journal); |
1171 | |||
1172 | return err; | ||
1163 | } | 1173 | } |
1164 | 1174 | ||
1165 | 1175 | ||
@@ -1359,10 +1369,16 @@ int journal_flush(journal_t *journal) | |||
1359 | spin_lock(&journal->j_list_lock); | 1369 | spin_lock(&journal->j_list_lock); |
1360 | while (!err && journal->j_checkpoint_transactions != NULL) { | 1370 | while (!err && journal->j_checkpoint_transactions != NULL) { |
1361 | spin_unlock(&journal->j_list_lock); | 1371 | spin_unlock(&journal->j_list_lock); |
1372 | mutex_lock(&journal->j_checkpoint_mutex); | ||
1362 | err = log_do_checkpoint(journal); | 1373 | err = log_do_checkpoint(journal); |
1374 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
1363 | spin_lock(&journal->j_list_lock); | 1375 | spin_lock(&journal->j_list_lock); |
1364 | } | 1376 | } |
1365 | spin_unlock(&journal->j_list_lock); | 1377 | spin_unlock(&journal->j_list_lock); |
1378 | |||
1379 | if (is_journal_aborted(journal)) | ||
1380 | return -EIO; | ||
1381 | |||
1366 | cleanup_journal_tail(journal); | 1382 | cleanup_journal_tail(journal); |
1367 | 1383 | ||
1368 | /* Finally, mark the journal as really needing no recovery. | 1384 | /* Finally, mark the journal as really needing no recovery. |
@@ -1384,7 +1400,7 @@ int journal_flush(journal_t *journal) | |||
1384 | J_ASSERT(journal->j_head == journal->j_tail); | 1400 | J_ASSERT(journal->j_head == journal->j_tail); |
1385 | J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); | 1401 | J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); |
1386 | spin_unlock(&journal->j_state_lock); | 1402 | spin_unlock(&journal->j_state_lock); |
1387 | return err; | 1403 | return 0; |
1388 | } | 1404 | } |
1389 | 1405 | ||
1390 | /** | 1406 | /** |
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index 43bc5e5ed064..db5e982c5ddf 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c | |||
@@ -223,7 +223,7 @@ do { \ | |||
223 | */ | 223 | */ |
224 | int journal_recover(journal_t *journal) | 224 | int journal_recover(journal_t *journal) |
225 | { | 225 | { |
226 | int err; | 226 | int err, err2; |
227 | journal_superblock_t * sb; | 227 | journal_superblock_t * sb; |
228 | 228 | ||
229 | struct recovery_info info; | 229 | struct recovery_info info; |
@@ -261,7 +261,10 @@ int journal_recover(journal_t *journal) | |||
261 | journal->j_transaction_sequence = ++info.end_transaction; | 261 | journal->j_transaction_sequence = ++info.end_transaction; |
262 | 262 | ||
263 | journal_clear_revoke(journal); | 263 | journal_clear_revoke(journal); |
264 | sync_blockdev(journal->j_fs_dev); | 264 | err2 = sync_blockdev(journal->j_fs_dev); |
265 | if (!err) | ||
266 | err = err2; | ||
267 | |||
265 | return err; | 268 | return err; |
266 | } | 269 | } |
267 | 270 | ||
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 0540ca27a446..d15cd6e7251e 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -954,9 +954,10 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) | |||
954 | journal_t *journal = handle->h_transaction->t_journal; | 954 | journal_t *journal = handle->h_transaction->t_journal; |
955 | int need_brelse = 0; | 955 | int need_brelse = 0; |
956 | struct journal_head *jh; | 956 | struct journal_head *jh; |
957 | int ret = 0; | ||
957 | 958 | ||
958 | if (is_handle_aborted(handle)) | 959 | if (is_handle_aborted(handle)) |
959 | return 0; | 960 | return ret; |
960 | 961 | ||
961 | jh = journal_add_journal_head(bh); | 962 | jh = journal_add_journal_head(bh); |
962 | JBUFFER_TRACE(jh, "entry"); | 963 | JBUFFER_TRACE(jh, "entry"); |
@@ -1067,7 +1068,16 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) | |||
1067 | time if it is redirtied */ | 1068 | time if it is redirtied */ |
1068 | } | 1069 | } |
1069 | 1070 | ||
1070 | /* journal_clean_data_list() may have got there first */ | 1071 | /* |
1072 | * We cannot remove the buffer with io error from the | ||
1073 | * committing transaction, because otherwise it would | ||
1074 | * miss the error and the commit would not abort. | ||
1075 | */ | ||
1076 | if (unlikely(!buffer_uptodate(bh))) { | ||
1077 | ret = -EIO; | ||
1078 | goto no_journal; | ||
1079 | } | ||
1080 | |||
1071 | if (jh->b_transaction != NULL) { | 1081 | if (jh->b_transaction != NULL) { |
1072 | JBUFFER_TRACE(jh, "unfile from commit"); | 1082 | JBUFFER_TRACE(jh, "unfile from commit"); |
1073 | __journal_temp_unlink_buffer(jh); | 1083 | __journal_temp_unlink_buffer(jh); |
@@ -1108,7 +1118,7 @@ no_journal: | |||
1108 | } | 1118 | } |
1109 | JBUFFER_TRACE(jh, "exit"); | 1119 | JBUFFER_TRACE(jh, "exit"); |
1110 | journal_put_journal_head(jh); | 1120 | journal_put_journal_head(jh); |
1111 | return 0; | 1121 | return ret; |
1112 | } | 1122 | } |
1113 | 1123 | ||
1114 | /** | 1124 | /** |
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig new file mode 100644 index 000000000000..f32f346f4b0a --- /dev/null +++ b/fs/jbd2/Kconfig | |||
@@ -0,0 +1,33 @@ | |||
1 | config JBD2 | ||
2 | tristate | ||
3 | select CRC32 | ||
4 | help | ||
5 | This is a generic journaling layer for block devices that support | ||
6 | both 32-bit and 64-bit block numbers. It is currently used by | ||
7 | the ext4 and OCFS2 filesystems, but it could also be used to add | ||
8 | journal support to other file systems or block devices such | ||
9 | as RAID or LVM. | ||
10 | |||
11 | If you are using ext4 or OCFS2, you need to say Y here. | ||
12 | If you are not using ext4 or OCFS2 then you will | ||
13 | probably want to say N. | ||
14 | |||
15 | To compile this device as a module, choose M here. The module will be | ||
16 | called jbd2. If you are compiling ext4 or OCFS2 into the kernel, | ||
17 | you cannot compile this code as a module. | ||
18 | |||
19 | config JBD2_DEBUG | ||
20 | bool "JBD2 (ext4) debugging support" | ||
21 | depends on JBD2 && DEBUG_FS | ||
22 | help | ||
23 | If you are using the ext4 journaled file system (or | ||
24 | potentially any other filesystem/device using JBD2), this option | ||
25 | allows you to enable debugging output while the system is running, | ||
26 | in order to help track down any problems you are having. | ||
27 | By default, the debugging output will be turned off. | ||
28 | |||
29 | If you select Y here, then you will be able to turn on debugging | ||
30 | with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a | ||
31 | number between 1 and 5. The higher the number, the more debugging | ||
32 | output is generated. To turn debugging off again, do | ||
33 | "echo 0 > /sys/kernel/debug/jbd2/jbd2-debug". | ||
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 0abe02c4242a..8b119e16aa36 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -995,6 +995,9 @@ restart_loop: | |||
995 | } | 995 | } |
996 | spin_unlock(&journal->j_list_lock); | 996 | spin_unlock(&journal->j_list_lock); |
997 | 997 | ||
998 | if (journal->j_commit_callback) | ||
999 | journal->j_commit_callback(journal, commit_transaction); | ||
1000 | |||
998 | trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", | 1001 | trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", |
999 | journal->j_devname, commit_transaction->t_tid, | 1002 | journal->j_devname, commit_transaction->t_tid, |
1000 | journal->j_tail_sequence); | 1003 | journal->j_tail_sequence); |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index e5d540588fa9..39b7805a599a 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -52,6 +52,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
52 | transaction->t_expires = jiffies + journal->j_commit_interval; | 52 | transaction->t_expires = jiffies + journal->j_commit_interval; |
53 | spin_lock_init(&transaction->t_handle_lock); | 53 | spin_lock_init(&transaction->t_handle_lock); |
54 | INIT_LIST_HEAD(&transaction->t_inode_list); | 54 | INIT_LIST_HEAD(&transaction->t_inode_list); |
55 | INIT_LIST_HEAD(&transaction->t_private_list); | ||
55 | 56 | ||
56 | /* Set up the commit timer for the new transaction. */ | 57 | /* Set up the commit timer for the new transaction. */ |
57 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); | 58 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); |
diff --git a/fs/jffs2/Kconfig b/fs/jffs2/Kconfig new file mode 100644 index 000000000000..6ae169cd8faa --- /dev/null +++ b/fs/jffs2/Kconfig | |||
@@ -0,0 +1,188 @@ | |||
1 | config JFFS2_FS | ||
2 | tristate "Journalling Flash File System v2 (JFFS2) support" | ||
3 | select CRC32 | ||
4 | depends on MTD | ||
5 | help | ||
6 | JFFS2 is the second generation of the Journalling Flash File System | ||
7 | for use on diskless embedded devices. It provides improved wear | ||
8 | levelling, compression and support for hard links. You cannot use | ||
9 | this on normal block devices, only on 'MTD' devices. | ||
10 | |||
11 | Further information on the design and implementation of JFFS2 is | ||
12 | available at <http://sources.redhat.com/jffs2/>. | ||
13 | |||
14 | config JFFS2_FS_DEBUG | ||
15 | int "JFFS2 debugging verbosity (0 = quiet, 2 = noisy)" | ||
16 | depends on JFFS2_FS | ||
17 | default "0" | ||
18 | help | ||
19 | This controls the amount of debugging messages produced by the JFFS2 | ||
20 | code. Set it to zero for use in production systems. For evaluation, | ||
21 | testing and debugging, it's advisable to set it to one. This will | ||
22 | enable a few assertions and will print debugging messages at the | ||
23 | KERN_DEBUG loglevel, where they won't normally be visible. Level 2 | ||
24 | is unlikely to be useful - it enables extra debugging in certain | ||
25 | areas which at one point needed debugging, but when the bugs were | ||
26 | located and fixed, the detailed messages were relegated to level 2. | ||
27 | |||
28 | If reporting bugs, please try to have available a full dump of the | ||
29 | messages at debug level 1 while the misbehaviour was occurring. | ||
30 | |||
31 | config JFFS2_FS_WRITEBUFFER | ||
32 | bool "JFFS2 write-buffering support" | ||
33 | depends on JFFS2_FS | ||
34 | default y | ||
35 | help | ||
36 | This enables the write-buffering support in JFFS2. | ||
37 | |||
38 | This functionality is required to support JFFS2 on the following | ||
39 | types of flash devices: | ||
40 | - NAND flash | ||
41 | - NOR flash with transparent ECC | ||
42 | - DataFlash | ||
43 | |||
44 | config JFFS2_FS_WBUF_VERIFY | ||
45 | bool "Verify JFFS2 write-buffer reads" | ||
46 | depends on JFFS2_FS_WRITEBUFFER | ||
47 | default n | ||
48 | help | ||
49 | This causes JFFS2 to read back every page written through the | ||
50 | write-buffer, and check for errors. | ||
51 | |||
52 | config JFFS2_SUMMARY | ||
53 | bool "JFFS2 summary support (EXPERIMENTAL)" | ||
54 | depends on JFFS2_FS && EXPERIMENTAL | ||
55 | default n | ||
56 | help | ||
57 | This feature makes it possible to use summary information | ||
58 | for faster filesystem mount. | ||
59 | |||
60 | The summary information can be inserted into a filesystem image | ||
61 | by the utility 'sumtool'. | ||
62 | |||
63 | If unsure, say 'N'. | ||
64 | |||
65 | config JFFS2_FS_XATTR | ||
66 | bool "JFFS2 XATTR support (EXPERIMENTAL)" | ||
67 | depends on JFFS2_FS && EXPERIMENTAL | ||
68 | default n | ||
69 | help | ||
70 | Extended attributes are name:value pairs associated with inodes by | ||
71 | the kernel or by users (see the attr(5) manual page, or visit | ||
72 | <http://acl.bestbits.at/> for details). | ||
73 | |||
74 | If unsure, say N. | ||
75 | |||
76 | config JFFS2_FS_POSIX_ACL | ||
77 | bool "JFFS2 POSIX Access Control Lists" | ||
78 | depends on JFFS2_FS_XATTR | ||
79 | default y | ||
80 | select FS_POSIX_ACL | ||
81 | help | ||
82 | Posix Access Control Lists (ACLs) support permissions for users and | ||
83 | groups beyond the owner/group/world scheme. | ||
84 | |||
85 | To learn more about Access Control Lists, visit the Posix ACLs for | ||
86 | Linux website <http://acl.bestbits.at/>. | ||
87 | |||
88 | If you don't know what Access Control Lists are, say N | ||
89 | |||
90 | config JFFS2_FS_SECURITY | ||
91 | bool "JFFS2 Security Labels" | ||
92 | depends on JFFS2_FS_XATTR | ||
93 | default y | ||
94 | help | ||
95 | Security labels support alternative access control models | ||
96 | implemented by security modules like SELinux. This option | ||
97 | enables an extended attribute handler for file security | ||
98 | labels in the jffs2 filesystem. | ||
99 | |||
100 | If you are not using a security module that requires using | ||
101 | extended attributes for file security labels, say N. | ||
102 | |||
103 | config JFFS2_COMPRESSION_OPTIONS | ||
104 | bool "Advanced compression options for JFFS2" | ||
105 | depends on JFFS2_FS | ||
106 | default n | ||
107 | help | ||
108 | Enabling this option allows you to explicitly choose which | ||
109 | compression modules, if any, are enabled in JFFS2. Removing | ||
110 | compressors can mean you cannot read existing file systems, | ||
111 | and enabling experimental compressors can mean that you | ||
112 | write a file system which cannot be read by a standard kernel. | ||
113 | |||
114 | If unsure, you should _definitely_ say 'N'. | ||
115 | |||
116 | config JFFS2_ZLIB | ||
117 | bool "JFFS2 ZLIB compression support" if JFFS2_COMPRESSION_OPTIONS | ||
118 | select ZLIB_INFLATE | ||
119 | select ZLIB_DEFLATE | ||
120 | depends on JFFS2_FS | ||
121 | default y | ||
122 | help | ||
123 | Zlib is designed to be a free, general-purpose, legally unencumbered, | ||
124 | lossless data-compression library for use on virtually any computer | ||
125 | hardware and operating system. See <http://www.gzip.org/zlib/> for | ||
126 | further information. | ||
127 | |||
128 | Say 'Y' if unsure. | ||
129 | |||
130 | config JFFS2_LZO | ||
131 | bool "JFFS2 LZO compression support" if JFFS2_COMPRESSION_OPTIONS | ||
132 | select LZO_COMPRESS | ||
133 | select LZO_DECOMPRESS | ||
134 | depends on JFFS2_FS | ||
135 | default n | ||
136 | help | ||
137 | minilzo-based compression. Generally works better than Zlib. | ||
138 | |||
139 | This feature was added in July, 2007. Say 'N' if you need | ||
140 | compatibility with older bootloaders or kernels. | ||
141 | |||
142 | config JFFS2_RTIME | ||
143 | bool "JFFS2 RTIME compression support" if JFFS2_COMPRESSION_OPTIONS | ||
144 | depends on JFFS2_FS | ||
145 | default y | ||
146 | help | ||
147 | Rtime does manage to recompress already-compressed data. Say 'Y' if unsure. | ||
148 | |||
149 | config JFFS2_RUBIN | ||
150 | bool "JFFS2 RUBIN compression support" if JFFS2_COMPRESSION_OPTIONS | ||
151 | depends on JFFS2_FS | ||
152 | default n | ||
153 | help | ||
154 | RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure. | ||
155 | |||
156 | choice | ||
157 | prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS | ||
158 | default JFFS2_CMODE_PRIORITY | ||
159 | depends on JFFS2_FS | ||
160 | help | ||
161 | You can set here the default compression mode of JFFS2 from | ||
162 | the available compression modes. Don't touch if unsure. | ||
163 | |||
164 | config JFFS2_CMODE_NONE | ||
165 | bool "no compression" | ||
166 | help | ||
167 | Uses no compression. | ||
168 | |||
169 | config JFFS2_CMODE_PRIORITY | ||
170 | bool "priority" | ||
171 | help | ||
172 | Tries the compressors in a predefined order and chooses the first | ||
173 | successful one. | ||
174 | |||
175 | config JFFS2_CMODE_SIZE | ||
176 | bool "size (EXPERIMENTAL)" | ||
177 | help | ||
178 | Tries all compressors and chooses the one which has the smallest | ||
179 | result. | ||
180 | |||
181 | config JFFS2_CMODE_FAVOURLZO | ||
182 | bool "Favour LZO" | ||
183 | help | ||
184 | Tries all compressors and chooses the one which has the smallest | ||
185 | result but gives some preference to LZO (which has faster | ||
186 | decompression) at the expense of size. | ||
187 | |||
188 | endchoice | ||
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c index 86739ee53b37..f25e70c1b51c 100644 --- a/fs/jffs2/compr.c +++ b/fs/jffs2/compr.c | |||
@@ -53,8 +53,8 @@ static int jffs2_is_best_compression(struct jffs2_compressor *this, | |||
53 | } | 53 | } |
54 | 54 | ||
55 | /* jffs2_compress: | 55 | /* jffs2_compress: |
56 | * @data: Pointer to uncompressed data | 56 | * @data_in: Pointer to uncompressed data |
57 | * @cdata: Pointer to returned pointer to buffer for compressed data | 57 | * @cpage_out: Pointer to returned pointer to buffer for compressed data |
58 | * @datalen: On entry, holds the amount of data available for compression. | 58 | * @datalen: On entry, holds the amount of data available for compression. |
59 | * On exit, expected to hold the amount of data actually compressed. | 59 | * On exit, expected to hold the amount of data actually compressed. |
60 | * @cdatalen: On entry, holds the amount of space available for compressed | 60 | * @cdatalen: On entry, holds the amount of space available for compressed |
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index cd219ef55254..6f60cc910f4c 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c | |||
@@ -39,7 +39,8 @@ const struct file_operations jffs2_dir_operations = | |||
39 | .read = generic_read_dir, | 39 | .read = generic_read_dir, |
40 | .readdir = jffs2_readdir, | 40 | .readdir = jffs2_readdir, |
41 | .unlocked_ioctl=jffs2_ioctl, | 41 | .unlocked_ioctl=jffs2_ioctl, |
42 | .fsync = jffs2_fsync | 42 | .fsync = jffs2_fsync, |
43 | .llseek = generic_file_llseek, | ||
43 | }; | 44 | }; |
44 | 45 | ||
45 | 46 | ||
@@ -108,9 +109,7 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target, | |||
108 | } | 109 | } |
109 | } | 110 | } |
110 | 111 | ||
111 | d_add(target, inode); | 112 | return d_splice_alias(inode, target); |
112 | |||
113 | return NULL; | ||
114 | } | 113 | } |
115 | 114 | ||
116 | /***********************************************************************/ | 115 | /***********************************************************************/ |
@@ -311,7 +310,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char | |||
311 | /* FIXME: If you care. We'd need to use frags for the target | 310 | /* FIXME: If you care. We'd need to use frags for the target |
312 | if it grows much more than this */ | 311 | if it grows much more than this */ |
313 | if (targetlen > 254) | 312 | if (targetlen > 254) |
314 | return -EINVAL; | 313 | return -ENAMETOOLONG; |
315 | 314 | ||
316 | ri = jffs2_alloc_raw_inode(); | 315 | ri = jffs2_alloc_raw_inode(); |
317 | 316 | ||
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index dddb2a6c9e2c..259461b910af 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c | |||
@@ -68,7 +68,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c, | |||
68 | instr->len = c->sector_size; | 68 | instr->len = c->sector_size; |
69 | instr->callback = jffs2_erase_callback; | 69 | instr->callback = jffs2_erase_callback; |
70 | instr->priv = (unsigned long)(&instr[1]); | 70 | instr->priv = (unsigned long)(&instr[1]); |
71 | instr->fail_addr = 0xffffffff; | 71 | instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN; |
72 | 72 | ||
73 | ((struct erase_priv_struct *)instr->priv)->jeb = jeb; | 73 | ((struct erase_priv_struct *)instr->priv)->jeb = jeb; |
74 | ((struct erase_priv_struct *)instr->priv)->c = c; | 74 | ((struct erase_priv_struct *)instr->priv)->c = c; |
@@ -175,7 +175,7 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock | |||
175 | { | 175 | { |
176 | /* For NAND, if the failure did not occur at the device level for a | 176 | /* For NAND, if the failure did not occur at the device level for a |
177 | specific physical page, don't bother updating the bad block table. */ | 177 | specific physical page, don't bother updating the bad block table. */ |
178 | if (jffs2_cleanmarker_oob(c) && (bad_offset != 0xffffffff)) { | 178 | if (jffs2_cleanmarker_oob(c) && (bad_offset != MTD_FAIL_ADDR_UNKNOWN)) { |
179 | /* We had a device-level failure to erase. Let's see if we've | 179 | /* We had a device-level failure to erase. Let's see if we've |
180 | failed too many times. */ | 180 | failed too many times. */ |
181 | if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) { | 181 | if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) { |
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 086c43830221..249305d65d5b 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c | |||
@@ -207,6 +207,8 @@ int jffs2_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
207 | buf->f_files = 0; | 207 | buf->f_files = 0; |
208 | buf->f_ffree = 0; | 208 | buf->f_ffree = 0; |
209 | buf->f_namelen = JFFS2_MAX_NAME_LEN; | 209 | buf->f_namelen = JFFS2_MAX_NAME_LEN; |
210 | buf->f_fsid.val[0] = JFFS2_SUPER_MAGIC; | ||
211 | buf->f_fsid.val[1] = c->mtd->index; | ||
210 | 212 | ||
211 | spin_lock(&c->erase_completion_lock); | 213 | spin_lock(&c->erase_completion_lock); |
212 | avail = c->dirty_size + c->free_size; | 214 | avail = c->dirty_size + c->free_size; |
@@ -440,14 +442,14 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i | |||
440 | 442 | ||
441 | memset(ri, 0, sizeof(*ri)); | 443 | memset(ri, 0, sizeof(*ri)); |
442 | /* Set OS-specific defaults for new inodes */ | 444 | /* Set OS-specific defaults for new inodes */ |
443 | ri->uid = cpu_to_je16(current->fsuid); | 445 | ri->uid = cpu_to_je16(current_fsuid()); |
444 | 446 | ||
445 | if (dir_i->i_mode & S_ISGID) { | 447 | if (dir_i->i_mode & S_ISGID) { |
446 | ri->gid = cpu_to_je16(dir_i->i_gid); | 448 | ri->gid = cpu_to_je16(dir_i->i_gid); |
447 | if (S_ISDIR(mode)) | 449 | if (S_ISDIR(mode)) |
448 | mode |= S_ISGID; | 450 | mode |= S_ISGID; |
449 | } else { | 451 | } else { |
450 | ri->gid = cpu_to_je16(current->fsgid); | 452 | ri->gid = cpu_to_je16(current_fsgid()); |
451 | } | 453 | } |
452 | 454 | ||
453 | /* POSIX ACLs have to be processed now, at least partly. | 455 | /* POSIX ACLs have to be processed now, at least partly. |
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c index a9bf9603c1ba..0875b60b4bf7 100644 --- a/fs/jffs2/nodemgmt.c +++ b/fs/jffs2/nodemgmt.c | |||
@@ -261,6 +261,10 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c) | |||
261 | 261 | ||
262 | jffs2_sum_reset_collected(c->summary); /* reset collected summary */ | 262 | jffs2_sum_reset_collected(c->summary); /* reset collected summary */ |
263 | 263 | ||
264 | /* adjust write buffer offset, else we get a non contiguous write bug */ | ||
265 | if (!(c->wbuf_ofs % c->sector_size) && !c->wbuf_len) | ||
266 | c->wbuf_ofs = 0xffffffff; | ||
267 | |||
264 | D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset)); | 268 | D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset)); |
265 | 269 | ||
266 | return 0; | 270 | return 0; |
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index efd401257ed9..4c4e18c54a51 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/mtd/super.h> | 22 | #include <linux/mtd/super.h> |
23 | #include <linux/ctype.h> | 23 | #include <linux/ctype.h> |
24 | #include <linux/namei.h> | 24 | #include <linux/namei.h> |
25 | #include <linux/exportfs.h> | ||
25 | #include "compr.h" | 26 | #include "compr.h" |
26 | #include "nodelist.h" | 27 | #include "nodelist.h" |
27 | 28 | ||
@@ -62,6 +63,52 @@ static int jffs2_sync_fs(struct super_block *sb, int wait) | |||
62 | return 0; | 63 | return 0; |
63 | } | 64 | } |
64 | 65 | ||
66 | static struct inode *jffs2_nfs_get_inode(struct super_block *sb, uint64_t ino, | ||
67 | uint32_t generation) | ||
68 | { | ||
69 | /* We don't care about i_generation. We'll destroy the flash | ||
70 | before we start re-using inode numbers anyway. And even | ||
71 | if that wasn't true, we'd have other problems...*/ | ||
72 | return jffs2_iget(sb, ino); | ||
73 | } | ||
74 | |||
75 | static struct dentry *jffs2_fh_to_dentry(struct super_block *sb, struct fid *fid, | ||
76 | int fh_len, int fh_type) | ||
77 | { | ||
78 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, | ||
79 | jffs2_nfs_get_inode); | ||
80 | } | ||
81 | |||
82 | static struct dentry *jffs2_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
83 | int fh_len, int fh_type) | ||
84 | { | ||
85 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | ||
86 | jffs2_nfs_get_inode); | ||
87 | } | ||
88 | |||
89 | static struct dentry *jffs2_get_parent(struct dentry *child) | ||
90 | { | ||
91 | struct jffs2_inode_info *f; | ||
92 | uint32_t pino; | ||
93 | |||
94 | BUG_ON(!S_ISDIR(child->d_inode->i_mode)); | ||
95 | |||
96 | f = JFFS2_INODE_INFO(child->d_inode); | ||
97 | |||
98 | pino = f->inocache->pino_nlink; | ||
99 | |||
100 | JFFS2_DEBUG("Parent of directory ino #%u is #%u\n", | ||
101 | f->inocache->ino, pino); | ||
102 | |||
103 | return d_obtain_alias(jffs2_iget(child->d_inode->i_sb, pino)); | ||
104 | } | ||
105 | |||
106 | static struct export_operations jffs2_export_ops = { | ||
107 | .get_parent = jffs2_get_parent, | ||
108 | .fh_to_dentry = jffs2_fh_to_dentry, | ||
109 | .fh_to_parent = jffs2_fh_to_parent, | ||
110 | }; | ||
111 | |||
65 | static const struct super_operations jffs2_super_operations = | 112 | static const struct super_operations jffs2_super_operations = |
66 | { | 113 | { |
67 | .alloc_inode = jffs2_alloc_inode, | 114 | .alloc_inode = jffs2_alloc_inode, |
@@ -104,6 +151,7 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent) | |||
104 | spin_lock_init(&c->inocache_lock); | 151 | spin_lock_init(&c->inocache_lock); |
105 | 152 | ||
106 | sb->s_op = &jffs2_super_operations; | 153 | sb->s_op = &jffs2_super_operations; |
154 | sb->s_export_op = &jffs2_export_ops; | ||
107 | sb->s_flags = sb->s_flags | MS_NOATIME; | 155 | sb->s_flags = sb->s_flags | MS_NOATIME; |
108 | sb->s_xattr = jffs2_xattr_handlers; | 156 | sb->s_xattr = jffs2_xattr_handlers; |
109 | #ifdef CONFIG_JFFS2_FS_POSIX_ACL | 157 | #ifdef CONFIG_JFFS2_FS_POSIX_ACL |
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index 0e78b00035e4..d9a721e6db70 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c | |||
@@ -679,10 +679,7 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad) | |||
679 | 679 | ||
680 | memset(c->wbuf,0xff,c->wbuf_pagesize); | 680 | memset(c->wbuf,0xff,c->wbuf_pagesize); |
681 | /* adjust write buffer offset, else we get a non contiguous write bug */ | 681 | /* adjust write buffer offset, else we get a non contiguous write bug */ |
682 | if (SECTOR_ADDR(c->wbuf_ofs) == SECTOR_ADDR(c->wbuf_ofs+c->wbuf_pagesize)) | 682 | c->wbuf_ofs += c->wbuf_pagesize; |
683 | c->wbuf_ofs += c->wbuf_pagesize; | ||
684 | else | ||
685 | c->wbuf_ofs = 0xffffffff; | ||
686 | c->wbuf_len = 0; | 683 | c->wbuf_len = 0; |
687 | return 0; | 684 | return 0; |
688 | } | 685 | } |
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index cd2ec2988b59..335c4de6552d 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c | |||
@@ -1168,7 +1168,7 @@ journal_found: | |||
1168 | bd_release(bdev); | 1168 | bd_release(bdev); |
1169 | 1169 | ||
1170 | close: /* close external log device */ | 1170 | close: /* close external log device */ |
1171 | blkdev_put(bdev); | 1171 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE); |
1172 | 1172 | ||
1173 | free: /* free log descriptor */ | 1173 | free: /* free log descriptor */ |
1174 | mutex_unlock(&jfs_log_mutex); | 1174 | mutex_unlock(&jfs_log_mutex); |
@@ -1514,7 +1514,7 @@ int lmLogClose(struct super_block *sb) | |||
1514 | rc = lmLogShutdown(log); | 1514 | rc = lmLogShutdown(log); |
1515 | 1515 | ||
1516 | bd_release(bdev); | 1516 | bd_release(bdev); |
1517 | blkdev_put(bdev); | 1517 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE); |
1518 | 1518 | ||
1519 | kfree(log); | 1519 | kfree(log); |
1520 | 1520 | ||
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 2aba82386810..cc3cedffbfa1 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
@@ -1511,25 +1511,12 @@ struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid, | |||
1511 | 1511 | ||
1512 | struct dentry *jfs_get_parent(struct dentry *dentry) | 1512 | struct dentry *jfs_get_parent(struct dentry *dentry) |
1513 | { | 1513 | { |
1514 | struct super_block *sb = dentry->d_inode->i_sb; | ||
1515 | struct dentry *parent = ERR_PTR(-ENOENT); | ||
1516 | struct inode *inode; | ||
1517 | unsigned long parent_ino; | 1514 | unsigned long parent_ino; |
1518 | 1515 | ||
1519 | parent_ino = | 1516 | parent_ino = |
1520 | le32_to_cpu(JFS_IP(dentry->d_inode)->i_dtroot.header.idotdot); | 1517 | le32_to_cpu(JFS_IP(dentry->d_inode)->i_dtroot.header.idotdot); |
1521 | inode = jfs_iget(sb, parent_ino); | ||
1522 | if (IS_ERR(inode)) { | ||
1523 | parent = ERR_CAST(inode); | ||
1524 | } else { | ||
1525 | parent = d_alloc_anon(inode); | ||
1526 | if (!parent) { | ||
1527 | parent = ERR_PTR(-ENOMEM); | ||
1528 | iput(inode); | ||
1529 | } | ||
1530 | } | ||
1531 | 1518 | ||
1532 | return parent; | 1519 | return d_obtain_alias(jfs_iget(dentry->d_inode->i_sb, parent_ino)); |
1533 | } | 1520 | } |
1534 | 1521 | ||
1535 | const struct inode_operations jfs_dir_inode_operations = { | 1522 | const struct inode_operations jfs_dir_inode_operations = { |
@@ -1560,6 +1547,7 @@ const struct file_operations jfs_dir_operations = { | |||
1560 | #ifdef CONFIG_COMPAT | 1547 | #ifdef CONFIG_COMPAT |
1561 | .compat_ioctl = jfs_compat_ioctl, | 1548 | .compat_ioctl = jfs_compat_ioctl, |
1562 | #endif | 1549 | #endif |
1550 | .llseek = generic_file_llseek, | ||
1563 | }; | 1551 | }; |
1564 | 1552 | ||
1565 | static int jfs_ci_hash(struct dentry *dir, struct qstr *this) | 1553 | static int jfs_ci_hash(struct dentry *dir, struct qstr *this) |
diff --git a/fs/libfs.c b/fs/libfs.c index 1add676a19df..74688598bcf7 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
@@ -732,28 +732,6 @@ out: | |||
732 | return ret; | 732 | return ret; |
733 | } | 733 | } |
734 | 734 | ||
735 | /* | ||
736 | * This is what d_alloc_anon should have been. Once the exportfs | ||
737 | * argument transition has been finished I will update d_alloc_anon | ||
738 | * to this prototype and this wrapper will go away. --hch | ||
739 | */ | ||
740 | static struct dentry *exportfs_d_alloc(struct inode *inode) | ||
741 | { | ||
742 | struct dentry *dentry; | ||
743 | |||
744 | if (!inode) | ||
745 | return NULL; | ||
746 | if (IS_ERR(inode)) | ||
747 | return ERR_PTR(PTR_ERR(inode)); | ||
748 | |||
749 | dentry = d_alloc_anon(inode); | ||
750 | if (!dentry) { | ||
751 | iput(inode); | ||
752 | dentry = ERR_PTR(-ENOMEM); | ||
753 | } | ||
754 | return dentry; | ||
755 | } | ||
756 | |||
757 | /** | 735 | /** |
758 | * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation | 736 | * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation |
759 | * @sb: filesystem to do the file handle conversion on | 737 | * @sb: filesystem to do the file handle conversion on |
@@ -782,7 +760,7 @@ struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid, | |||
782 | break; | 760 | break; |
783 | } | 761 | } |
784 | 762 | ||
785 | return exportfs_d_alloc(inode); | 763 | return d_obtain_alias(inode); |
786 | } | 764 | } |
787 | EXPORT_SYMBOL_GPL(generic_fh_to_dentry); | 765 | EXPORT_SYMBOL_GPL(generic_fh_to_dentry); |
788 | 766 | ||
@@ -815,7 +793,7 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, | |||
815 | break; | 793 | break; |
816 | } | 794 | } |
817 | 795 | ||
818 | return exportfs_d_alloc(inode); | 796 | return d_obtain_alias(inode); |
819 | } | 797 | } |
820 | EXPORT_SYMBOL_GPL(generic_fh_to_parent); | 798 | EXPORT_SYMBOL_GPL(generic_fh_to_parent); |
821 | 799 | ||
diff --git a/fs/locks.c b/fs/locks.c index 5eb259e3cd38..09062e3ff104 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -1580,7 +1580,8 @@ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd) | |||
1580 | cmd &= ~LOCK_NB; | 1580 | cmd &= ~LOCK_NB; |
1581 | unlock = (cmd == LOCK_UN); | 1581 | unlock = (cmd == LOCK_UN); |
1582 | 1582 | ||
1583 | if (!unlock && !(cmd & LOCK_MAND) && !(filp->f_mode & 3)) | 1583 | if (!unlock && !(cmd & LOCK_MAND) && |
1584 | !(filp->f_mode & (FMODE_READ|FMODE_WRITE))) | ||
1584 | goto out_putf; | 1585 | goto out_putf; |
1585 | 1586 | ||
1586 | error = flock_make_lock(filp, &lock, cmd); | 1587 | error = flock_make_lock(filp, &lock, cmd); |
@@ -2078,6 +2079,7 @@ int vfs_cancel_lock(struct file *filp, struct file_lock *fl) | |||
2078 | EXPORT_SYMBOL_GPL(vfs_cancel_lock); | 2079 | EXPORT_SYMBOL_GPL(vfs_cancel_lock); |
2079 | 2080 | ||
2080 | #ifdef CONFIG_PROC_FS | 2081 | #ifdef CONFIG_PROC_FS |
2082 | #include <linux/proc_fs.h> | ||
2081 | #include <linux/seq_file.h> | 2083 | #include <linux/seq_file.h> |
2082 | 2084 | ||
2083 | static void lock_get_status(struct seq_file *f, struct file_lock *fl, | 2085 | static void lock_get_status(struct seq_file *f, struct file_lock *fl, |
@@ -2183,12 +2185,31 @@ static void locks_stop(struct seq_file *f, void *v) | |||
2183 | unlock_kernel(); | 2185 | unlock_kernel(); |
2184 | } | 2186 | } |
2185 | 2187 | ||
2186 | struct seq_operations locks_seq_operations = { | 2188 | static const struct seq_operations locks_seq_operations = { |
2187 | .start = locks_start, | 2189 | .start = locks_start, |
2188 | .next = locks_next, | 2190 | .next = locks_next, |
2189 | .stop = locks_stop, | 2191 | .stop = locks_stop, |
2190 | .show = locks_show, | 2192 | .show = locks_show, |
2191 | }; | 2193 | }; |
2194 | |||
2195 | static int locks_open(struct inode *inode, struct file *filp) | ||
2196 | { | ||
2197 | return seq_open(filp, &locks_seq_operations); | ||
2198 | } | ||
2199 | |||
2200 | static const struct file_operations proc_locks_operations = { | ||
2201 | .open = locks_open, | ||
2202 | .read = seq_read, | ||
2203 | .llseek = seq_lseek, | ||
2204 | .release = seq_release, | ||
2205 | }; | ||
2206 | |||
2207 | static int __init proc_locks_init(void) | ||
2208 | { | ||
2209 | proc_create("locks", 0, NULL, &proc_locks_operations); | ||
2210 | return 0; | ||
2211 | } | ||
2212 | module_init(proc_locks_init); | ||
2192 | #endif | 2213 | #endif |
2193 | 2214 | ||
2194 | /** | 2215 | /** |
diff --git a/fs/namei.c b/fs/namei.c index 4ea63ed5e791..09ce58e49e72 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -212,8 +212,7 @@ int generic_permission(struct inode *inode, int mask, | |||
212 | * Read/write DACs are always overridable. | 212 | * Read/write DACs are always overridable. |
213 | * Executable DACs are overridable if at least one exec bit is set. | 213 | * Executable DACs are overridable if at least one exec bit is set. |
214 | */ | 214 | */ |
215 | if (!(mask & MAY_EXEC) || | 215 | if (!(mask & MAY_EXEC) || execute_ok(inode)) |
216 | (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode)) | ||
217 | if (capable(CAP_DAC_OVERRIDE)) | 216 | if (capable(CAP_DAC_OVERRIDE)) |
218 | return 0; | 217 | return 0; |
219 | 218 | ||
@@ -249,23 +248,11 @@ int inode_permission(struct inode *inode, int mask) | |||
249 | } | 248 | } |
250 | 249 | ||
251 | /* Ordinary permission routines do not understand MAY_APPEND. */ | 250 | /* Ordinary permission routines do not understand MAY_APPEND. */ |
252 | if (inode->i_op && inode->i_op->permission) { | 251 | if (inode->i_op && inode->i_op->permission) |
253 | retval = inode->i_op->permission(inode, mask); | 252 | retval = inode->i_op->permission(inode, mask); |
254 | if (!retval) { | 253 | else |
255 | /* | ||
256 | * Exec permission on a regular file is denied if none | ||
257 | * of the execute bits are set. | ||
258 | * | ||
259 | * This check should be done by the ->permission() | ||
260 | * method. | ||
261 | */ | ||
262 | if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode) && | ||
263 | !(inode->i_mode & S_IXUGO)) | ||
264 | return -EACCES; | ||
265 | } | ||
266 | } else { | ||
267 | retval = generic_permission(inode, mask, NULL); | 254 | retval = generic_permission(inode, mask, NULL); |
268 | } | 255 | |
269 | if (retval) | 256 | if (retval) |
270 | return retval; | 257 | return retval; |
271 | 258 | ||
@@ -1106,6 +1093,15 @@ int path_lookup(const char *name, unsigned int flags, | |||
1106 | return do_path_lookup(AT_FDCWD, name, flags, nd); | 1093 | return do_path_lookup(AT_FDCWD, name, flags, nd); |
1107 | } | 1094 | } |
1108 | 1095 | ||
1096 | int kern_path(const char *name, unsigned int flags, struct path *path) | ||
1097 | { | ||
1098 | struct nameidata nd; | ||
1099 | int res = do_path_lookup(AT_FDCWD, name, flags, &nd); | ||
1100 | if (!res) | ||
1101 | *path = nd.path; | ||
1102 | return res; | ||
1103 | } | ||
1104 | |||
1109 | /** | 1105 | /** |
1110 | * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair | 1106 | * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair |
1111 | * @dentry: pointer to dentry of the base directory | 1107 | * @dentry: pointer to dentry of the base directory |
@@ -1138,9 +1134,16 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, | |||
1138 | 1134 | ||
1139 | } | 1135 | } |
1140 | 1136 | ||
1141 | static int __path_lookup_intent_open(int dfd, const char *name, | 1137 | /** |
1142 | unsigned int lookup_flags, struct nameidata *nd, | 1138 | * path_lookup_open - lookup a file path with open intent |
1143 | int open_flags, int create_mode) | 1139 | * @dfd: the directory to use as base, or AT_FDCWD |
1140 | * @name: pointer to file name | ||
1141 | * @lookup_flags: lookup intent flags | ||
1142 | * @nd: pointer to nameidata | ||
1143 | * @open_flags: open intent flags | ||
1144 | */ | ||
1145 | int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags, | ||
1146 | struct nameidata *nd, int open_flags) | ||
1144 | { | 1147 | { |
1145 | struct file *filp = get_empty_filp(); | 1148 | struct file *filp = get_empty_filp(); |
1146 | int err; | 1149 | int err; |
@@ -1149,7 +1152,7 @@ static int __path_lookup_intent_open(int dfd, const char *name, | |||
1149 | return -ENFILE; | 1152 | return -ENFILE; |
1150 | nd->intent.open.file = filp; | 1153 | nd->intent.open.file = filp; |
1151 | nd->intent.open.flags = open_flags; | 1154 | nd->intent.open.flags = open_flags; |
1152 | nd->intent.open.create_mode = create_mode; | 1155 | nd->intent.open.create_mode = 0; |
1153 | err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd); | 1156 | err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd); |
1154 | if (IS_ERR(nd->intent.open.file)) { | 1157 | if (IS_ERR(nd->intent.open.file)) { |
1155 | if (err == 0) { | 1158 | if (err == 0) { |
@@ -1161,38 +1164,6 @@ static int __path_lookup_intent_open(int dfd, const char *name, | |||
1161 | return err; | 1164 | return err; |
1162 | } | 1165 | } |
1163 | 1166 | ||
1164 | /** | ||
1165 | * path_lookup_open - lookup a file path with open intent | ||
1166 | * @dfd: the directory to use as base, or AT_FDCWD | ||
1167 | * @name: pointer to file name | ||
1168 | * @lookup_flags: lookup intent flags | ||
1169 | * @nd: pointer to nameidata | ||
1170 | * @open_flags: open intent flags | ||
1171 | */ | ||
1172 | int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags, | ||
1173 | struct nameidata *nd, int open_flags) | ||
1174 | { | ||
1175 | return __path_lookup_intent_open(dfd, name, lookup_flags, nd, | ||
1176 | open_flags, 0); | ||
1177 | } | ||
1178 | |||
1179 | /** | ||
1180 | * path_lookup_create - lookup a file path with open + create intent | ||
1181 | * @dfd: the directory to use as base, or AT_FDCWD | ||
1182 | * @name: pointer to file name | ||
1183 | * @lookup_flags: lookup intent flags | ||
1184 | * @nd: pointer to nameidata | ||
1185 | * @open_flags: open intent flags | ||
1186 | * @create_mode: create intent flags | ||
1187 | */ | ||
1188 | static int path_lookup_create(int dfd, const char *name, | ||
1189 | unsigned int lookup_flags, struct nameidata *nd, | ||
1190 | int open_flags, int create_mode) | ||
1191 | { | ||
1192 | return __path_lookup_intent_open(dfd, name, lookup_flags|LOOKUP_CREATE, | ||
1193 | nd, open_flags, create_mode); | ||
1194 | } | ||
1195 | |||
1196 | static struct dentry *__lookup_hash(struct qstr *name, | 1167 | static struct dentry *__lookup_hash(struct qstr *name, |
1197 | struct dentry *base, struct nameidata *nd) | 1168 | struct dentry *base, struct nameidata *nd) |
1198 | { | 1169 | { |
@@ -1470,20 +1441,18 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) | |||
1470 | 1441 | ||
1471 | mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex); | 1442 | mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex); |
1472 | 1443 | ||
1473 | for (p = p1; p->d_parent != p; p = p->d_parent) { | 1444 | p = d_ancestor(p2, p1); |
1474 | if (p->d_parent == p2) { | 1445 | if (p) { |
1475 | mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT); | 1446 | mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT); |
1476 | mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD); | 1447 | mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD); |
1477 | return p; | 1448 | return p; |
1478 | } | ||
1479 | } | 1449 | } |
1480 | 1450 | ||
1481 | for (p = p2; p->d_parent != p; p = p->d_parent) { | 1451 | p = d_ancestor(p1, p2); |
1482 | if (p->d_parent == p1) { | 1452 | if (p) { |
1483 | mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); | 1453 | mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); |
1484 | mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); | 1454 | mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); |
1485 | return p; | 1455 | return p; |
1486 | } | ||
1487 | } | 1456 | } |
1488 | 1457 | ||
1489 | mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); | 1458 | mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); |
@@ -1702,8 +1671,7 @@ struct file *do_filp_open(int dfd, const char *pathname, | |||
1702 | /* | 1671 | /* |
1703 | * Create - we need to know the parent. | 1672 | * Create - we need to know the parent. |
1704 | */ | 1673 | */ |
1705 | error = path_lookup_create(dfd, pathname, LOOKUP_PARENT, | 1674 | error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd); |
1706 | &nd, flag, mode); | ||
1707 | if (error) | 1675 | if (error) |
1708 | return ERR_PTR(error); | 1676 | return ERR_PTR(error); |
1709 | 1677 | ||
@@ -1714,10 +1682,20 @@ struct file *do_filp_open(int dfd, const char *pathname, | |||
1714 | */ | 1682 | */ |
1715 | error = -EISDIR; | 1683 | error = -EISDIR; |
1716 | if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len]) | 1684 | if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len]) |
1717 | goto exit; | 1685 | goto exit_parent; |
1718 | 1686 | ||
1687 | error = -ENFILE; | ||
1688 | filp = get_empty_filp(); | ||
1689 | if (filp == NULL) | ||
1690 | goto exit_parent; | ||
1691 | nd.intent.open.file = filp; | ||
1692 | nd.intent.open.flags = flag; | ||
1693 | nd.intent.open.create_mode = mode; | ||
1719 | dir = nd.path.dentry; | 1694 | dir = nd.path.dentry; |
1720 | nd.flags &= ~LOOKUP_PARENT; | 1695 | nd.flags &= ~LOOKUP_PARENT; |
1696 | nd.flags |= LOOKUP_CREATE | LOOKUP_OPEN; | ||
1697 | if (flag & O_EXCL) | ||
1698 | nd.flags |= LOOKUP_EXCL; | ||
1721 | mutex_lock(&dir->d_inode->i_mutex); | 1699 | mutex_lock(&dir->d_inode->i_mutex); |
1722 | path.dentry = lookup_hash(&nd); | 1700 | path.dentry = lookup_hash(&nd); |
1723 | path.mnt = nd.path.mnt; | 1701 | path.mnt = nd.path.mnt; |
@@ -1822,6 +1800,7 @@ exit_dput: | |||
1822 | exit: | 1800 | exit: |
1823 | if (!IS_ERR(nd.intent.open.file)) | 1801 | if (!IS_ERR(nd.intent.open.file)) |
1824 | release_open_intent(&nd); | 1802 | release_open_intent(&nd); |
1803 | exit_parent: | ||
1825 | path_put(&nd.path); | 1804 | path_put(&nd.path); |
1826 | return ERR_PTR(error); | 1805 | return ERR_PTR(error); |
1827 | 1806 | ||
@@ -1914,7 +1893,7 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir) | |||
1914 | if (nd->last_type != LAST_NORM) | 1893 | if (nd->last_type != LAST_NORM) |
1915 | goto fail; | 1894 | goto fail; |
1916 | nd->flags &= ~LOOKUP_PARENT; | 1895 | nd->flags &= ~LOOKUP_PARENT; |
1917 | nd->flags |= LOOKUP_CREATE; | 1896 | nd->flags |= LOOKUP_CREATE | LOOKUP_EXCL; |
1918 | nd->intent.open.flags = O_EXCL; | 1897 | nd->intent.open.flags = O_EXCL; |
1919 | 1898 | ||
1920 | /* | 1899 | /* |
@@ -2178,16 +2157,19 @@ static long do_rmdir(int dfd, const char __user *pathname) | |||
2178 | return error; | 2157 | return error; |
2179 | 2158 | ||
2180 | switch(nd.last_type) { | 2159 | switch(nd.last_type) { |
2181 | case LAST_DOTDOT: | 2160 | case LAST_DOTDOT: |
2182 | error = -ENOTEMPTY; | 2161 | error = -ENOTEMPTY; |
2183 | goto exit1; | 2162 | goto exit1; |
2184 | case LAST_DOT: | 2163 | case LAST_DOT: |
2185 | error = -EINVAL; | 2164 | error = -EINVAL; |
2186 | goto exit1; | 2165 | goto exit1; |
2187 | case LAST_ROOT: | 2166 | case LAST_ROOT: |
2188 | error = -EBUSY; | 2167 | error = -EBUSY; |
2189 | goto exit1; | 2168 | goto exit1; |
2190 | } | 2169 | } |
2170 | |||
2171 | nd.flags &= ~LOOKUP_PARENT; | ||
2172 | |||
2191 | mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); | 2173 | mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); |
2192 | dentry = lookup_hash(&nd); | 2174 | dentry = lookup_hash(&nd); |
2193 | error = PTR_ERR(dentry); | 2175 | error = PTR_ERR(dentry); |
@@ -2265,6 +2247,9 @@ static long do_unlinkat(int dfd, const char __user *pathname) | |||
2265 | error = -EISDIR; | 2247 | error = -EISDIR; |
2266 | if (nd.last_type != LAST_NORM) | 2248 | if (nd.last_type != LAST_NORM) |
2267 | goto exit1; | 2249 | goto exit1; |
2250 | |||
2251 | nd.flags &= ~LOOKUP_PARENT; | ||
2252 | |||
2268 | mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); | 2253 | mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); |
2269 | dentry = lookup_hash(&nd); | 2254 | dentry = lookup_hash(&nd); |
2270 | error = PTR_ERR(dentry); | 2255 | error = PTR_ERR(dentry); |
@@ -2654,6 +2639,10 @@ asmlinkage long sys_renameat(int olddfd, const char __user *oldname, | |||
2654 | if (newnd.last_type != LAST_NORM) | 2639 | if (newnd.last_type != LAST_NORM) |
2655 | goto exit2; | 2640 | goto exit2; |
2656 | 2641 | ||
2642 | oldnd.flags &= ~LOOKUP_PARENT; | ||
2643 | newnd.flags &= ~LOOKUP_PARENT; | ||
2644 | newnd.flags |= LOOKUP_RENAME_TARGET; | ||
2645 | |||
2657 | trap = lock_rename(new_dir, old_dir); | 2646 | trap = lock_rename(new_dir, old_dir); |
2658 | 2647 | ||
2659 | old_dentry = lookup_hash(&oldnd); | 2648 | old_dentry = lookup_hash(&oldnd); |
@@ -2855,6 +2844,7 @@ EXPORT_SYMBOL(__page_symlink); | |||
2855 | EXPORT_SYMBOL(page_symlink); | 2844 | EXPORT_SYMBOL(page_symlink); |
2856 | EXPORT_SYMBOL(page_symlink_inode_operations); | 2845 | EXPORT_SYMBOL(page_symlink_inode_operations); |
2857 | EXPORT_SYMBOL(path_lookup); | 2846 | EXPORT_SYMBOL(path_lookup); |
2847 | EXPORT_SYMBOL(kern_path); | ||
2858 | EXPORT_SYMBOL(vfs_path_lookup); | 2848 | EXPORT_SYMBOL(vfs_path_lookup); |
2859 | EXPORT_SYMBOL(inode_permission); | 2849 | EXPORT_SYMBOL(inode_permission); |
2860 | EXPORT_SYMBOL(vfs_permission); | 2850 | EXPORT_SYMBOL(vfs_permission); |
diff --git a/fs/namespace.c b/fs/namespace.c index 6e283c93b50d..cce46702d33c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -1167,19 +1167,19 @@ asmlinkage long sys_oldumount(char __user * name) | |||
1167 | 1167 | ||
1168 | #endif | 1168 | #endif |
1169 | 1169 | ||
1170 | static int mount_is_safe(struct nameidata *nd) | 1170 | static int mount_is_safe(struct path *path) |
1171 | { | 1171 | { |
1172 | if (capable(CAP_SYS_ADMIN)) | 1172 | if (capable(CAP_SYS_ADMIN)) |
1173 | return 0; | 1173 | return 0; |
1174 | return -EPERM; | 1174 | return -EPERM; |
1175 | #ifdef notyet | 1175 | #ifdef notyet |
1176 | if (S_ISLNK(nd->path.dentry->d_inode->i_mode)) | 1176 | if (S_ISLNK(path->dentry->d_inode->i_mode)) |
1177 | return -EPERM; | 1177 | return -EPERM; |
1178 | if (nd->path.dentry->d_inode->i_mode & S_ISVTX) { | 1178 | if (path->dentry->d_inode->i_mode & S_ISVTX) { |
1179 | if (current->uid != nd->path.dentry->d_inode->i_uid) | 1179 | if (current->uid != path->dentry->d_inode->i_uid) |
1180 | return -EPERM; | 1180 | return -EPERM; |
1181 | } | 1181 | } |
1182 | if (vfs_permission(nd, MAY_WRITE)) | 1182 | if (inode_permission(path->dentry->d_inode, MAY_WRITE)) |
1183 | return -EPERM; | 1183 | return -EPERM; |
1184 | return 0; | 1184 | return 0; |
1185 | #endif | 1185 | #endif |
@@ -1425,11 +1425,10 @@ out_unlock: | |||
1425 | 1425 | ||
1426 | /* | 1426 | /* |
1427 | * recursively change the type of the mountpoint. | 1427 | * recursively change the type of the mountpoint. |
1428 | * noinline this do_mount helper to save do_mount stack space. | ||
1429 | */ | 1428 | */ |
1430 | static noinline int do_change_type(struct nameidata *nd, int flag) | 1429 | static int do_change_type(struct path *path, int flag) |
1431 | { | 1430 | { |
1432 | struct vfsmount *m, *mnt = nd->path.mnt; | 1431 | struct vfsmount *m, *mnt = path->mnt; |
1433 | int recurse = flag & MS_REC; | 1432 | int recurse = flag & MS_REC; |
1434 | int type = flag & ~MS_REC; | 1433 | int type = flag & ~MS_REC; |
1435 | int err = 0; | 1434 | int err = 0; |
@@ -1437,7 +1436,7 @@ static noinline int do_change_type(struct nameidata *nd, int flag) | |||
1437 | if (!capable(CAP_SYS_ADMIN)) | 1436 | if (!capable(CAP_SYS_ADMIN)) |
1438 | return -EPERM; | 1437 | return -EPERM; |
1439 | 1438 | ||
1440 | if (nd->path.dentry != nd->path.mnt->mnt_root) | 1439 | if (path->dentry != path->mnt->mnt_root) |
1441 | return -EINVAL; | 1440 | return -EINVAL; |
1442 | 1441 | ||
1443 | down_write(&namespace_sem); | 1442 | down_write(&namespace_sem); |
@@ -1459,40 +1458,39 @@ static noinline int do_change_type(struct nameidata *nd, int flag) | |||
1459 | 1458 | ||
1460 | /* | 1459 | /* |
1461 | * do loopback mount. | 1460 | * do loopback mount. |
1462 | * noinline this do_mount helper to save do_mount stack space. | ||
1463 | */ | 1461 | */ |
1464 | static noinline int do_loopback(struct nameidata *nd, char *old_name, | 1462 | static int do_loopback(struct path *path, char *old_name, |
1465 | int recurse) | 1463 | int recurse) |
1466 | { | 1464 | { |
1467 | struct nameidata old_nd; | 1465 | struct path old_path; |
1468 | struct vfsmount *mnt = NULL; | 1466 | struct vfsmount *mnt = NULL; |
1469 | int err = mount_is_safe(nd); | 1467 | int err = mount_is_safe(path); |
1470 | if (err) | 1468 | if (err) |
1471 | return err; | 1469 | return err; |
1472 | if (!old_name || !*old_name) | 1470 | if (!old_name || !*old_name) |
1473 | return -EINVAL; | 1471 | return -EINVAL; |
1474 | err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); | 1472 | err = kern_path(old_name, LOOKUP_FOLLOW, &old_path); |
1475 | if (err) | 1473 | if (err) |
1476 | return err; | 1474 | return err; |
1477 | 1475 | ||
1478 | down_write(&namespace_sem); | 1476 | down_write(&namespace_sem); |
1479 | err = -EINVAL; | 1477 | err = -EINVAL; |
1480 | if (IS_MNT_UNBINDABLE(old_nd.path.mnt)) | 1478 | if (IS_MNT_UNBINDABLE(old_path.mnt)) |
1481 | goto out; | 1479 | goto out; |
1482 | 1480 | ||
1483 | if (!check_mnt(nd->path.mnt) || !check_mnt(old_nd.path.mnt)) | 1481 | if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) |
1484 | goto out; | 1482 | goto out; |
1485 | 1483 | ||
1486 | err = -ENOMEM; | 1484 | err = -ENOMEM; |
1487 | if (recurse) | 1485 | if (recurse) |
1488 | mnt = copy_tree(old_nd.path.mnt, old_nd.path.dentry, 0); | 1486 | mnt = copy_tree(old_path.mnt, old_path.dentry, 0); |
1489 | else | 1487 | else |
1490 | mnt = clone_mnt(old_nd.path.mnt, old_nd.path.dentry, 0); | 1488 | mnt = clone_mnt(old_path.mnt, old_path.dentry, 0); |
1491 | 1489 | ||
1492 | if (!mnt) | 1490 | if (!mnt) |
1493 | goto out; | 1491 | goto out; |
1494 | 1492 | ||
1495 | err = graft_tree(mnt, &nd->path); | 1493 | err = graft_tree(mnt, path); |
1496 | if (err) { | 1494 | if (err) { |
1497 | LIST_HEAD(umount_list); | 1495 | LIST_HEAD(umount_list); |
1498 | spin_lock(&vfsmount_lock); | 1496 | spin_lock(&vfsmount_lock); |
@@ -1503,7 +1501,7 @@ static noinline int do_loopback(struct nameidata *nd, char *old_name, | |||
1503 | 1501 | ||
1504 | out: | 1502 | out: |
1505 | up_write(&namespace_sem); | 1503 | up_write(&namespace_sem); |
1506 | path_put(&old_nd.path); | 1504 | path_put(&old_path); |
1507 | return err; | 1505 | return err; |
1508 | } | 1506 | } |
1509 | 1507 | ||
@@ -1528,33 +1526,37 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags) | |||
1528 | * change filesystem flags. dir should be a physical root of filesystem. | 1526 | * change filesystem flags. dir should be a physical root of filesystem. |
1529 | * If you've mounted a non-root directory somewhere and want to do remount | 1527 | * If you've mounted a non-root directory somewhere and want to do remount |
1530 | * on it - tough luck. | 1528 | * on it - tough luck. |
1531 | * noinline this do_mount helper to save do_mount stack space. | ||
1532 | */ | 1529 | */ |
1533 | static noinline int do_remount(struct nameidata *nd, int flags, int mnt_flags, | 1530 | static int do_remount(struct path *path, int flags, int mnt_flags, |
1534 | void *data) | 1531 | void *data) |
1535 | { | 1532 | { |
1536 | int err; | 1533 | int err; |
1537 | struct super_block *sb = nd->path.mnt->mnt_sb; | 1534 | struct super_block *sb = path->mnt->mnt_sb; |
1538 | 1535 | ||
1539 | if (!capable(CAP_SYS_ADMIN)) | 1536 | if (!capable(CAP_SYS_ADMIN)) |
1540 | return -EPERM; | 1537 | return -EPERM; |
1541 | 1538 | ||
1542 | if (!check_mnt(nd->path.mnt)) | 1539 | if (!check_mnt(path->mnt)) |
1543 | return -EINVAL; | 1540 | return -EINVAL; |
1544 | 1541 | ||
1545 | if (nd->path.dentry != nd->path.mnt->mnt_root) | 1542 | if (path->dentry != path->mnt->mnt_root) |
1546 | return -EINVAL; | 1543 | return -EINVAL; |
1547 | 1544 | ||
1548 | down_write(&sb->s_umount); | 1545 | down_write(&sb->s_umount); |
1549 | if (flags & MS_BIND) | 1546 | if (flags & MS_BIND) |
1550 | err = change_mount_flags(nd->path.mnt, flags); | 1547 | err = change_mount_flags(path->mnt, flags); |
1551 | else | 1548 | else |
1552 | err = do_remount_sb(sb, flags, data, 0); | 1549 | err = do_remount_sb(sb, flags, data, 0); |
1553 | if (!err) | 1550 | if (!err) |
1554 | nd->path.mnt->mnt_flags = mnt_flags; | 1551 | path->mnt->mnt_flags = mnt_flags; |
1555 | up_write(&sb->s_umount); | 1552 | up_write(&sb->s_umount); |
1556 | if (!err) | 1553 | if (!err) { |
1557 | security_sb_post_remount(nd->path.mnt, flags, data); | 1554 | security_sb_post_remount(path->mnt, flags, data); |
1555 | |||
1556 | spin_lock(&vfsmount_lock); | ||
1557 | touch_mnt_namespace(path->mnt->mnt_ns); | ||
1558 | spin_unlock(&vfsmount_lock); | ||
1559 | } | ||
1558 | return err; | 1560 | return err; |
1559 | } | 1561 | } |
1560 | 1562 | ||
@@ -1568,90 +1570,85 @@ static inline int tree_contains_unbindable(struct vfsmount *mnt) | |||
1568 | return 0; | 1570 | return 0; |
1569 | } | 1571 | } |
1570 | 1572 | ||
1571 | /* | 1573 | static int do_move_mount(struct path *path, char *old_name) |
1572 | * noinline this do_mount helper to save do_mount stack space. | ||
1573 | */ | ||
1574 | static noinline int do_move_mount(struct nameidata *nd, char *old_name) | ||
1575 | { | 1574 | { |
1576 | struct nameidata old_nd; | 1575 | struct path old_path, parent_path; |
1577 | struct path parent_path; | ||
1578 | struct vfsmount *p; | 1576 | struct vfsmount *p; |
1579 | int err = 0; | 1577 | int err = 0; |
1580 | if (!capable(CAP_SYS_ADMIN)) | 1578 | if (!capable(CAP_SYS_ADMIN)) |
1581 | return -EPERM; | 1579 | return -EPERM; |
1582 | if (!old_name || !*old_name) | 1580 | if (!old_name || !*old_name) |
1583 | return -EINVAL; | 1581 | return -EINVAL; |
1584 | err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); | 1582 | err = kern_path(old_name, LOOKUP_FOLLOW, &old_path); |
1585 | if (err) | 1583 | if (err) |
1586 | return err; | 1584 | return err; |
1587 | 1585 | ||
1588 | down_write(&namespace_sem); | 1586 | down_write(&namespace_sem); |
1589 | while (d_mountpoint(nd->path.dentry) && | 1587 | while (d_mountpoint(path->dentry) && |
1590 | follow_down(&nd->path.mnt, &nd->path.dentry)) | 1588 | follow_down(&path->mnt, &path->dentry)) |
1591 | ; | 1589 | ; |
1592 | err = -EINVAL; | 1590 | err = -EINVAL; |
1593 | if (!check_mnt(nd->path.mnt) || !check_mnt(old_nd.path.mnt)) | 1591 | if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) |
1594 | goto out; | 1592 | goto out; |
1595 | 1593 | ||
1596 | err = -ENOENT; | 1594 | err = -ENOENT; |
1597 | mutex_lock(&nd->path.dentry->d_inode->i_mutex); | 1595 | mutex_lock(&path->dentry->d_inode->i_mutex); |
1598 | if (IS_DEADDIR(nd->path.dentry->d_inode)) | 1596 | if (IS_DEADDIR(path->dentry->d_inode)) |
1599 | goto out1; | 1597 | goto out1; |
1600 | 1598 | ||
1601 | if (!IS_ROOT(nd->path.dentry) && d_unhashed(nd->path.dentry)) | 1599 | if (!IS_ROOT(path->dentry) && d_unhashed(path->dentry)) |
1602 | goto out1; | 1600 | goto out1; |
1603 | 1601 | ||
1604 | err = -EINVAL; | 1602 | err = -EINVAL; |
1605 | if (old_nd.path.dentry != old_nd.path.mnt->mnt_root) | 1603 | if (old_path.dentry != old_path.mnt->mnt_root) |
1606 | goto out1; | 1604 | goto out1; |
1607 | 1605 | ||
1608 | if (old_nd.path.mnt == old_nd.path.mnt->mnt_parent) | 1606 | if (old_path.mnt == old_path.mnt->mnt_parent) |
1609 | goto out1; | 1607 | goto out1; |
1610 | 1608 | ||
1611 | if (S_ISDIR(nd->path.dentry->d_inode->i_mode) != | 1609 | if (S_ISDIR(path->dentry->d_inode->i_mode) != |
1612 | S_ISDIR(old_nd.path.dentry->d_inode->i_mode)) | 1610 | S_ISDIR(old_path.dentry->d_inode->i_mode)) |
1613 | goto out1; | 1611 | goto out1; |
1614 | /* | 1612 | /* |
1615 | * Don't move a mount residing in a shared parent. | 1613 | * Don't move a mount residing in a shared parent. |
1616 | */ | 1614 | */ |
1617 | if (old_nd.path.mnt->mnt_parent && | 1615 | if (old_path.mnt->mnt_parent && |
1618 | IS_MNT_SHARED(old_nd.path.mnt->mnt_parent)) | 1616 | IS_MNT_SHARED(old_path.mnt->mnt_parent)) |
1619 | goto out1; | 1617 | goto out1; |
1620 | /* | 1618 | /* |
1621 | * Don't move a mount tree containing unbindable mounts to a destination | 1619 | * Don't move a mount tree containing unbindable mounts to a destination |
1622 | * mount which is shared. | 1620 | * mount which is shared. |
1623 | */ | 1621 | */ |
1624 | if (IS_MNT_SHARED(nd->path.mnt) && | 1622 | if (IS_MNT_SHARED(path->mnt) && |
1625 | tree_contains_unbindable(old_nd.path.mnt)) | 1623 | tree_contains_unbindable(old_path.mnt)) |
1626 | goto out1; | 1624 | goto out1; |
1627 | err = -ELOOP; | 1625 | err = -ELOOP; |
1628 | for (p = nd->path.mnt; p->mnt_parent != p; p = p->mnt_parent) | 1626 | for (p = path->mnt; p->mnt_parent != p; p = p->mnt_parent) |
1629 | if (p == old_nd.path.mnt) | 1627 | if (p == old_path.mnt) |
1630 | goto out1; | 1628 | goto out1; |
1631 | 1629 | ||
1632 | err = attach_recursive_mnt(old_nd.path.mnt, &nd->path, &parent_path); | 1630 | err = attach_recursive_mnt(old_path.mnt, path, &parent_path); |
1633 | if (err) | 1631 | if (err) |
1634 | goto out1; | 1632 | goto out1; |
1635 | 1633 | ||
1636 | /* if the mount is moved, it should no longer be expire | 1634 | /* if the mount is moved, it should no longer be expire |
1637 | * automatically */ | 1635 | * automatically */ |
1638 | list_del_init(&old_nd.path.mnt->mnt_expire); | 1636 | list_del_init(&old_path.mnt->mnt_expire); |
1639 | out1: | 1637 | out1: |
1640 | mutex_unlock(&nd->path.dentry->d_inode->i_mutex); | 1638 | mutex_unlock(&path->dentry->d_inode->i_mutex); |
1641 | out: | 1639 | out: |
1642 | up_write(&namespace_sem); | 1640 | up_write(&namespace_sem); |
1643 | if (!err) | 1641 | if (!err) |
1644 | path_put(&parent_path); | 1642 | path_put(&parent_path); |
1645 | path_put(&old_nd.path); | 1643 | path_put(&old_path); |
1646 | return err; | 1644 | return err; |
1647 | } | 1645 | } |
1648 | 1646 | ||
1649 | /* | 1647 | /* |
1650 | * create a new mount for userspace and request it to be added into the | 1648 | * create a new mount for userspace and request it to be added into the |
1651 | * namespace's tree | 1649 | * namespace's tree |
1652 | * noinline this do_mount helper to save do_mount stack space. | ||
1653 | */ | 1650 | */ |
1654 | static noinline int do_new_mount(struct nameidata *nd, char *type, int flags, | 1651 | static int do_new_mount(struct path *path, char *type, int flags, |
1655 | int mnt_flags, char *name, void *data) | 1652 | int mnt_flags, char *name, void *data) |
1656 | { | 1653 | { |
1657 | struct vfsmount *mnt; | 1654 | struct vfsmount *mnt; |
@@ -1667,7 +1664,7 @@ static noinline int do_new_mount(struct nameidata *nd, char *type, int flags, | |||
1667 | if (IS_ERR(mnt)) | 1664 | if (IS_ERR(mnt)) |
1668 | return PTR_ERR(mnt); | 1665 | return PTR_ERR(mnt); |
1669 | 1666 | ||
1670 | return do_add_mount(mnt, &nd->path, mnt_flags, NULL); | 1667 | return do_add_mount(mnt, path, mnt_flags, NULL); |
1671 | } | 1668 | } |
1672 | 1669 | ||
1673 | /* | 1670 | /* |
@@ -1902,7 +1899,7 @@ int copy_mount_options(const void __user * data, unsigned long *where) | |||
1902 | long do_mount(char *dev_name, char *dir_name, char *type_page, | 1899 | long do_mount(char *dev_name, char *dir_name, char *type_page, |
1903 | unsigned long flags, void *data_page) | 1900 | unsigned long flags, void *data_page) |
1904 | { | 1901 | { |
1905 | struct nameidata nd; | 1902 | struct path path; |
1906 | int retval = 0; | 1903 | int retval = 0; |
1907 | int mnt_flags = 0; | 1904 | int mnt_flags = 0; |
1908 | 1905 | ||
@@ -1940,29 +1937,29 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, | |||
1940 | MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT); | 1937 | MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT); |
1941 | 1938 | ||
1942 | /* ... and get the mountpoint */ | 1939 | /* ... and get the mountpoint */ |
1943 | retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); | 1940 | retval = kern_path(dir_name, LOOKUP_FOLLOW, &path); |
1944 | if (retval) | 1941 | if (retval) |
1945 | return retval; | 1942 | return retval; |
1946 | 1943 | ||
1947 | retval = security_sb_mount(dev_name, &nd.path, | 1944 | retval = security_sb_mount(dev_name, &path, |
1948 | type_page, flags, data_page); | 1945 | type_page, flags, data_page); |
1949 | if (retval) | 1946 | if (retval) |
1950 | goto dput_out; | 1947 | goto dput_out; |
1951 | 1948 | ||
1952 | if (flags & MS_REMOUNT) | 1949 | if (flags & MS_REMOUNT) |
1953 | retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, | 1950 | retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags, |
1954 | data_page); | 1951 | data_page); |
1955 | else if (flags & MS_BIND) | 1952 | else if (flags & MS_BIND) |
1956 | retval = do_loopback(&nd, dev_name, flags & MS_REC); | 1953 | retval = do_loopback(&path, dev_name, flags & MS_REC); |
1957 | else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) | 1954 | else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) |
1958 | retval = do_change_type(&nd, flags); | 1955 | retval = do_change_type(&path, flags); |
1959 | else if (flags & MS_MOVE) | 1956 | else if (flags & MS_MOVE) |
1960 | retval = do_move_mount(&nd, dev_name); | 1957 | retval = do_move_mount(&path, dev_name); |
1961 | else | 1958 | else |
1962 | retval = do_new_mount(&nd, type_page, flags, mnt_flags, | 1959 | retval = do_new_mount(&path, type_page, flags, mnt_flags, |
1963 | dev_name, data_page); | 1960 | dev_name, data_page); |
1964 | dput_out: | 1961 | dput_out: |
1965 | path_put(&nd.path); | 1962 | path_put(&path); |
1966 | return retval; | 1963 | return retval; |
1967 | } | 1964 | } |
1968 | 1965 | ||
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 6a09760c5960..c2e9cfd9e5a4 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -40,6 +40,16 @@ unsigned short nfs_callback_tcpport; | |||
40 | static const int nfs_set_port_min = 0; | 40 | static const int nfs_set_port_min = 0; |
41 | static const int nfs_set_port_max = 65535; | 41 | static const int nfs_set_port_max = 65535; |
42 | 42 | ||
43 | /* | ||
44 | * If the kernel has IPv6 support available, always listen for | ||
45 | * both AF_INET and AF_INET6 requests. | ||
46 | */ | ||
47 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
48 | static const sa_family_t nfs_callback_family = AF_INET6; | ||
49 | #else | ||
50 | static const sa_family_t nfs_callback_family = AF_INET; | ||
51 | #endif | ||
52 | |||
43 | static int param_set_port(const char *val, struct kernel_param *kp) | 53 | static int param_set_port(const char *val, struct kernel_param *kp) |
44 | { | 54 | { |
45 | char *endp; | 55 | char *endp; |
@@ -106,7 +116,7 @@ int nfs_callback_up(void) | |||
106 | if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) | 116 | if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) |
107 | goto out; | 117 | goto out; |
108 | serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, | 118 | serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, |
109 | AF_INET, NULL); | 119 | nfs_callback_family, NULL); |
110 | ret = -ENOMEM; | 120 | ret = -ENOMEM; |
111 | if (!serv) | 121 | if (!serv) |
112 | goto out_err; | 122 | goto out_err; |
@@ -116,7 +126,8 @@ int nfs_callback_up(void) | |||
116 | if (ret <= 0) | 126 | if (ret <= 0) |
117 | goto out_err; | 127 | goto out_err; |
118 | nfs_callback_tcpport = ret; | 128 | nfs_callback_tcpport = ret; |
119 | dprintk("Callback port = 0x%x\n", nfs_callback_tcpport); | 129 | dprintk("NFS: Callback listener port = %u (af %u)\n", |
130 | nfs_callback_tcpport, nfs_callback_family); | ||
120 | 131 | ||
121 | nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]); | 132 | nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]); |
122 | if (IS_ERR(nfs_callback_info.rqst)) { | 133 | if (IS_ERR(nfs_callback_info.rqst)) { |
@@ -149,8 +160,8 @@ out: | |||
149 | mutex_unlock(&nfs_callback_mutex); | 160 | mutex_unlock(&nfs_callback_mutex); |
150 | return ret; | 161 | return ret; |
151 | out_err: | 162 | out_err: |
152 | dprintk("Couldn't create callback socket or server thread; err = %d\n", | 163 | dprintk("NFS: Couldn't create callback socket or server thread; " |
153 | ret); | 164 | "err = %d\n", ret); |
154 | nfs_callback_info.users--; | 165 | nfs_callback_info.users--; |
155 | goto out; | 166 | goto out; |
156 | } | 167 | } |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 2ab70d46ecbc..3e64b98f3a93 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -707,9 +707,7 @@ static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) | |||
707 | { | 707 | { |
708 | if (NFS_PROTO(dir)->version == 2) | 708 | if (NFS_PROTO(dir)->version == 2) |
709 | return 0; | 709 | return 0; |
710 | if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0) | 710 | return nd && nfs_lookup_check_intent(nd, LOOKUP_EXCL); |
711 | return 0; | ||
712 | return (nd->intent.open.flags & O_EXCL) != 0; | ||
713 | } | 711 | } |
714 | 712 | ||
715 | /* | 713 | /* |
@@ -1009,7 +1007,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry | |||
1009 | 1007 | ||
1010 | /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash | 1008 | /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash |
1011 | * the dentry. */ | 1009 | * the dentry. */ |
1012 | if (nd->intent.open.flags & O_EXCL) { | 1010 | if (nd->flags & LOOKUP_EXCL) { |
1013 | d_instantiate(dentry, NULL); | 1011 | d_instantiate(dentry, NULL); |
1014 | goto out; | 1012 | goto out; |
1015 | } | 1013 | } |
@@ -1517,7 +1515,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym | |||
1517 | if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0, | 1515 | if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0, |
1518 | GFP_KERNEL)) { | 1516 | GFP_KERNEL)) { |
1519 | pagevec_add(&lru_pvec, page); | 1517 | pagevec_add(&lru_pvec, page); |
1520 | pagevec_lru_add(&lru_pvec); | 1518 | pagevec_lru_add_file(&lru_pvec); |
1521 | SetPageUptodate(page); | 1519 | SetPageUptodate(page); |
1522 | unlock_page(page); | 1520 | unlock_page(page); |
1523 | } else | 1521 | } else |
@@ -1959,6 +1957,9 @@ force_lookup: | |||
1959 | } else | 1957 | } else |
1960 | res = PTR_ERR(cred); | 1958 | res = PTR_ERR(cred); |
1961 | out: | 1959 | out: |
1960 | if (!res && (mask & MAY_EXEC) && !execute_ok(inode)) | ||
1961 | res = -EACCES; | ||
1962 | |||
1962 | dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n", | 1963 | dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n", |
1963 | inode->i_sb->s_id, inode->i_ino, mask, res); | 1964 | inode->i_sb->s_id, inode->i_ino, mask, res); |
1964 | return res; | 1965 | return res; |
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index fae97196daad..b7c9b2df1f29 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c | |||
@@ -107,11 +107,10 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh) | |||
107 | * if the dentry tree reaches them; however if the dentry already | 107 | * if the dentry tree reaches them; however if the dentry already |
108 | * exists, we'll pick it up at this point and use it as the root | 108 | * exists, we'll pick it up at this point and use it as the root |
109 | */ | 109 | */ |
110 | mntroot = d_alloc_anon(inode); | 110 | mntroot = d_obtain_alias(inode); |
111 | if (!mntroot) { | 111 | if (IS_ERR(mntroot)) { |
112 | iput(inode); | ||
113 | dprintk("nfs_get_root: get root dentry failed\n"); | 112 | dprintk("nfs_get_root: get root dentry failed\n"); |
114 | return ERR_PTR(-ENOMEM); | 113 | return mntroot; |
115 | } | 114 | } |
116 | 115 | ||
117 | security_d_instantiate(mntroot, inode); | 116 | security_d_instantiate(mntroot, inode); |
@@ -277,11 +276,10 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) | |||
277 | * if the dentry tree reaches them; however if the dentry already | 276 | * if the dentry tree reaches them; however if the dentry already |
278 | * exists, we'll pick it up at this point and use it as the root | 277 | * exists, we'll pick it up at this point and use it as the root |
279 | */ | 278 | */ |
280 | mntroot = d_alloc_anon(inode); | 279 | mntroot = d_obtain_alias(inode); |
281 | if (!mntroot) { | 280 | if (IS_ERR(mntroot)) { |
282 | iput(inode); | ||
283 | dprintk("nfs_get_root: get root dentry failed\n"); | 281 | dprintk("nfs_get_root: get root dentry failed\n"); |
284 | return ERR_PTR(-ENOMEM); | 282 | return mntroot; |
285 | } | 283 | } |
286 | 284 | ||
287 | security_d_instantiate(mntroot, inode); | 285 | security_d_instantiate(mntroot, inode); |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c910413eaeca..83e700a2b0c0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -1659,8 +1659,10 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
1659 | struct nfs_open_context *ctx; | 1659 | struct nfs_open_context *ctx; |
1660 | 1660 | ||
1661 | ctx = nfs_file_open_context(sattr->ia_file); | 1661 | ctx = nfs_file_open_context(sattr->ia_file); |
1662 | cred = ctx->cred; | 1662 | if (ctx) { |
1663 | state = ctx->state; | 1663 | cred = ctx->cred; |
1664 | state = ctx->state; | ||
1665 | } | ||
1664 | } | 1666 | } |
1665 | 1667 | ||
1666 | status = nfs4_do_setattr(inode, cred, fattr, sattr, state); | 1668 | status = nfs4_do_setattr(inode, cred, fattr, sattr, state); |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 8b28b95c9e44..a3b0061dfd45 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -2459,7 +2459,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, | |||
2459 | compare_super = NULL; | 2459 | compare_super = NULL; |
2460 | 2460 | ||
2461 | /* Get a superblock - note that we may end up sharing one that already exists */ | 2461 | /* Get a superblock - note that we may end up sharing one that already exists */ |
2462 | s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); | 2462 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); |
2463 | if (IS_ERR(s)) { | 2463 | if (IS_ERR(s)) { |
2464 | error = PTR_ERR(s); | 2464 | error = PTR_ERR(s); |
2465 | goto out_err_nosb; | 2465 | goto out_err_nosb; |
@@ -2544,7 +2544,7 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, | |||
2544 | compare_super = NULL; | 2544 | compare_super = NULL; |
2545 | 2545 | ||
2546 | /* Get a superblock - note that we may end up sharing one that already exists */ | 2546 | /* Get a superblock - note that we may end up sharing one that already exists */ |
2547 | s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); | 2547 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); |
2548 | if (IS_ERR(s)) { | 2548 | if (IS_ERR(s)) { |
2549 | error = PTR_ERR(s); | 2549 | error = PTR_ERR(s); |
2550 | goto out_err_nosb; | 2550 | goto out_err_nosb; |
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 9dc036f18356..5839b229cd0e 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
@@ -99,7 +99,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
99 | int fsidtype; | 99 | int fsidtype; |
100 | char *ep; | 100 | char *ep; |
101 | struct svc_expkey key; | 101 | struct svc_expkey key; |
102 | struct svc_expkey *ek; | 102 | struct svc_expkey *ek = NULL; |
103 | 103 | ||
104 | if (mesg[mlen-1] != '\n') | 104 | if (mesg[mlen-1] != '\n') |
105 | return -EINVAL; | 105 | return -EINVAL; |
@@ -107,7 +107,8 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
107 | 107 | ||
108 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); | 108 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); |
109 | err = -ENOMEM; | 109 | err = -ENOMEM; |
110 | if (!buf) goto out; | 110 | if (!buf) |
111 | goto out; | ||
111 | 112 | ||
112 | err = -EINVAL; | 113 | err = -EINVAL; |
113 | if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) | 114 | if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) |
@@ -151,34 +152,32 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
151 | 152 | ||
152 | /* now we want a pathname, or empty meaning NEGATIVE */ | 153 | /* now we want a pathname, or empty meaning NEGATIVE */ |
153 | err = -EINVAL; | 154 | err = -EINVAL; |
154 | if ((len=qword_get(&mesg, buf, PAGE_SIZE)) < 0) | 155 | len = qword_get(&mesg, buf, PAGE_SIZE); |
156 | if (len < 0) | ||
155 | goto out; | 157 | goto out; |
156 | dprintk("Path seems to be <%s>\n", buf); | 158 | dprintk("Path seems to be <%s>\n", buf); |
157 | err = 0; | 159 | err = 0; |
158 | if (len == 0) { | 160 | if (len == 0) { |
159 | set_bit(CACHE_NEGATIVE, &key.h.flags); | 161 | set_bit(CACHE_NEGATIVE, &key.h.flags); |
160 | ek = svc_expkey_update(&key, ek); | 162 | ek = svc_expkey_update(&key, ek); |
161 | if (ek) | 163 | if (!ek) |
162 | cache_put(&ek->h, &svc_expkey_cache); | 164 | err = -ENOMEM; |
163 | else err = -ENOMEM; | ||
164 | } else { | 165 | } else { |
165 | struct nameidata nd; | 166 | err = kern_path(buf, 0, &key.ek_path); |
166 | err = path_lookup(buf, 0, &nd); | ||
167 | if (err) | 167 | if (err) |
168 | goto out; | 168 | goto out; |
169 | 169 | ||
170 | dprintk("Found the path %s\n", buf); | 170 | dprintk("Found the path %s\n", buf); |
171 | key.ek_path = nd.path; | ||
172 | 171 | ||
173 | ek = svc_expkey_update(&key, ek); | 172 | ek = svc_expkey_update(&key, ek); |
174 | if (ek) | 173 | if (!ek) |
175 | cache_put(&ek->h, &svc_expkey_cache); | ||
176 | else | ||
177 | err = -ENOMEM; | 174 | err = -ENOMEM; |
178 | path_put(&nd.path); | 175 | path_put(&key.ek_path); |
179 | } | 176 | } |
180 | cache_flush(); | 177 | cache_flush(); |
181 | out: | 178 | out: |
179 | if (ek) | ||
180 | cache_put(&ek->h, &svc_expkey_cache); | ||
182 | if (dom) | 181 | if (dom) |
183 | auth_domain_put(dom); | 182 | auth_domain_put(dom); |
184 | kfree(buf); | 183 | kfree(buf); |
@@ -500,35 +499,22 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
500 | int len; | 499 | int len; |
501 | int err; | 500 | int err; |
502 | struct auth_domain *dom = NULL; | 501 | struct auth_domain *dom = NULL; |
503 | struct nameidata nd; | 502 | struct svc_export exp = {}, *expp; |
504 | struct svc_export exp, *expp; | ||
505 | int an_int; | 503 | int an_int; |
506 | 504 | ||
507 | nd.path.dentry = NULL; | ||
508 | exp.ex_pathname = NULL; | ||
509 | |||
510 | /* fs locations */ | ||
511 | exp.ex_fslocs.locations = NULL; | ||
512 | exp.ex_fslocs.locations_count = 0; | ||
513 | exp.ex_fslocs.migrated = 0; | ||
514 | |||
515 | exp.ex_uuid = NULL; | ||
516 | |||
517 | /* secinfo */ | ||
518 | exp.ex_nflavors = 0; | ||
519 | |||
520 | if (mesg[mlen-1] != '\n') | 505 | if (mesg[mlen-1] != '\n') |
521 | return -EINVAL; | 506 | return -EINVAL; |
522 | mesg[mlen-1] = 0; | 507 | mesg[mlen-1] = 0; |
523 | 508 | ||
524 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); | 509 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); |
525 | err = -ENOMEM; | 510 | if (!buf) |
526 | if (!buf) goto out; | 511 | return -ENOMEM; |
527 | 512 | ||
528 | /* client */ | 513 | /* client */ |
529 | len = qword_get(&mesg, buf, PAGE_SIZE); | ||
530 | err = -EINVAL; | 514 | err = -EINVAL; |
531 | if (len <= 0) goto out; | 515 | len = qword_get(&mesg, buf, PAGE_SIZE); |
516 | if (len <= 0) | ||
517 | goto out; | ||
532 | 518 | ||
533 | err = -ENOENT; | 519 | err = -ENOENT; |
534 | dom = auth_domain_find(buf); | 520 | dom = auth_domain_find(buf); |
@@ -537,25 +523,25 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
537 | 523 | ||
538 | /* path */ | 524 | /* path */ |
539 | err = -EINVAL; | 525 | err = -EINVAL; |
540 | if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) | 526 | if ((len = qword_get(&mesg, buf, PAGE_SIZE)) <= 0) |
541 | goto out; | 527 | goto out1; |
542 | err = path_lookup(buf, 0, &nd); | 528 | |
543 | if (err) goto out_no_path; | 529 | err = kern_path(buf, 0, &exp.ex_path); |
530 | if (err) | ||
531 | goto out1; | ||
544 | 532 | ||
545 | exp.h.flags = 0; | ||
546 | exp.ex_client = dom; | 533 | exp.ex_client = dom; |
547 | exp.ex_path.mnt = nd.path.mnt; | 534 | |
548 | exp.ex_path.dentry = nd.path.dentry; | ||
549 | exp.ex_pathname = kstrdup(buf, GFP_KERNEL); | ||
550 | err = -ENOMEM; | 535 | err = -ENOMEM; |
536 | exp.ex_pathname = kstrdup(buf, GFP_KERNEL); | ||
551 | if (!exp.ex_pathname) | 537 | if (!exp.ex_pathname) |
552 | goto out; | 538 | goto out2; |
553 | 539 | ||
554 | /* expiry */ | 540 | /* expiry */ |
555 | err = -EINVAL; | 541 | err = -EINVAL; |
556 | exp.h.expiry_time = get_expiry(&mesg); | 542 | exp.h.expiry_time = get_expiry(&mesg); |
557 | if (exp.h.expiry_time == 0) | 543 | if (exp.h.expiry_time == 0) |
558 | goto out; | 544 | goto out3; |
559 | 545 | ||
560 | /* flags */ | 546 | /* flags */ |
561 | err = get_int(&mesg, &an_int); | 547 | err = get_int(&mesg, &an_int); |
@@ -563,22 +549,26 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
563 | err = 0; | 549 | err = 0; |
564 | set_bit(CACHE_NEGATIVE, &exp.h.flags); | 550 | set_bit(CACHE_NEGATIVE, &exp.h.flags); |
565 | } else { | 551 | } else { |
566 | if (err || an_int < 0) goto out; | 552 | if (err || an_int < 0) |
553 | goto out3; | ||
567 | exp.ex_flags= an_int; | 554 | exp.ex_flags= an_int; |
568 | 555 | ||
569 | /* anon uid */ | 556 | /* anon uid */ |
570 | err = get_int(&mesg, &an_int); | 557 | err = get_int(&mesg, &an_int); |
571 | if (err) goto out; | 558 | if (err) |
559 | goto out3; | ||
572 | exp.ex_anon_uid= an_int; | 560 | exp.ex_anon_uid= an_int; |
573 | 561 | ||
574 | /* anon gid */ | 562 | /* anon gid */ |
575 | err = get_int(&mesg, &an_int); | 563 | err = get_int(&mesg, &an_int); |
576 | if (err) goto out; | 564 | if (err) |
565 | goto out3; | ||
577 | exp.ex_anon_gid= an_int; | 566 | exp.ex_anon_gid= an_int; |
578 | 567 | ||
579 | /* fsid */ | 568 | /* fsid */ |
580 | err = get_int(&mesg, &an_int); | 569 | err = get_int(&mesg, &an_int); |
581 | if (err) goto out; | 570 | if (err) |
571 | goto out3; | ||
582 | exp.ex_fsid = an_int; | 572 | exp.ex_fsid = an_int; |
583 | 573 | ||
584 | while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) { | 574 | while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) { |
@@ -604,12 +594,13 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
604 | */ | 594 | */ |
605 | break; | 595 | break; |
606 | if (err) | 596 | if (err) |
607 | goto out; | 597 | goto out4; |
608 | } | 598 | } |
609 | 599 | ||
610 | err = check_export(nd.path.dentry->d_inode, exp.ex_flags, | 600 | err = check_export(exp.ex_path.dentry->d_inode, exp.ex_flags, |
611 | exp.ex_uuid); | 601 | exp.ex_uuid); |
612 | if (err) goto out; | 602 | if (err) |
603 | goto out4; | ||
613 | } | 604 | } |
614 | 605 | ||
615 | expp = svc_export_lookup(&exp); | 606 | expp = svc_export_lookup(&exp); |
@@ -622,15 +613,16 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
622 | err = -ENOMEM; | 613 | err = -ENOMEM; |
623 | else | 614 | else |
624 | exp_put(expp); | 615 | exp_put(expp); |
625 | out: | 616 | out4: |
626 | nfsd4_fslocs_free(&exp.ex_fslocs); | 617 | nfsd4_fslocs_free(&exp.ex_fslocs); |
627 | kfree(exp.ex_uuid); | 618 | kfree(exp.ex_uuid); |
619 | out3: | ||
628 | kfree(exp.ex_pathname); | 620 | kfree(exp.ex_pathname); |
629 | if (nd.path.dentry) | 621 | out2: |
630 | path_put(&nd.path); | 622 | path_put(&exp.ex_path); |
631 | out_no_path: | 623 | out1: |
632 | if (dom) | 624 | auth_domain_put(dom); |
633 | auth_domain_put(dom); | 625 | out: |
634 | kfree(buf); | 626 | kfree(buf); |
635 | return err; | 627 | return err; |
636 | } | 628 | } |
@@ -998,7 +990,7 @@ exp_export(struct nfsctl_export *nxp) | |||
998 | struct svc_export *exp = NULL; | 990 | struct svc_export *exp = NULL; |
999 | struct svc_export new; | 991 | struct svc_export new; |
1000 | struct svc_expkey *fsid_key = NULL; | 992 | struct svc_expkey *fsid_key = NULL; |
1001 | struct nameidata nd; | 993 | struct path path; |
1002 | int err; | 994 | int err; |
1003 | 995 | ||
1004 | /* Consistency check */ | 996 | /* Consistency check */ |
@@ -1021,12 +1013,12 @@ exp_export(struct nfsctl_export *nxp) | |||
1021 | 1013 | ||
1022 | 1014 | ||
1023 | /* Look up the dentry */ | 1015 | /* Look up the dentry */ |
1024 | err = path_lookup(nxp->ex_path, 0, &nd); | 1016 | err = kern_path(nxp->ex_path, 0, &path); |
1025 | if (err) | 1017 | if (err) |
1026 | goto out_put_clp; | 1018 | goto out_put_clp; |
1027 | err = -EINVAL; | 1019 | err = -EINVAL; |
1028 | 1020 | ||
1029 | exp = exp_get_by_name(clp, nd.path.mnt, nd.path.dentry, NULL); | 1021 | exp = exp_get_by_name(clp, path.mnt, path.dentry, NULL); |
1030 | 1022 | ||
1031 | memset(&new, 0, sizeof(new)); | 1023 | memset(&new, 0, sizeof(new)); |
1032 | 1024 | ||
@@ -1034,8 +1026,8 @@ exp_export(struct nfsctl_export *nxp) | |||
1034 | if ((nxp->ex_flags & NFSEXP_FSID) && | 1026 | if ((nxp->ex_flags & NFSEXP_FSID) && |
1035 | (!IS_ERR(fsid_key = exp_get_fsid_key(clp, nxp->ex_dev))) && | 1027 | (!IS_ERR(fsid_key = exp_get_fsid_key(clp, nxp->ex_dev))) && |
1036 | fsid_key->ek_path.mnt && | 1028 | fsid_key->ek_path.mnt && |
1037 | (fsid_key->ek_path.mnt != nd.path.mnt || | 1029 | (fsid_key->ek_path.mnt != path.mnt || |
1038 | fsid_key->ek_path.dentry != nd.path.dentry)) | 1030 | fsid_key->ek_path.dentry != path.dentry)) |
1039 | goto finish; | 1031 | goto finish; |
1040 | 1032 | ||
1041 | if (!IS_ERR(exp)) { | 1033 | if (!IS_ERR(exp)) { |
@@ -1051,7 +1043,7 @@ exp_export(struct nfsctl_export *nxp) | |||
1051 | goto finish; | 1043 | goto finish; |
1052 | } | 1044 | } |
1053 | 1045 | ||
1054 | err = check_export(nd.path.dentry->d_inode, nxp->ex_flags, NULL); | 1046 | err = check_export(path.dentry->d_inode, nxp->ex_flags, NULL); |
1055 | if (err) goto finish; | 1047 | if (err) goto finish; |
1056 | 1048 | ||
1057 | err = -ENOMEM; | 1049 | err = -ENOMEM; |
@@ -1064,7 +1056,7 @@ exp_export(struct nfsctl_export *nxp) | |||
1064 | if (!new.ex_pathname) | 1056 | if (!new.ex_pathname) |
1065 | goto finish; | 1057 | goto finish; |
1066 | new.ex_client = clp; | 1058 | new.ex_client = clp; |
1067 | new.ex_path = nd.path; | 1059 | new.ex_path = path; |
1068 | new.ex_flags = nxp->ex_flags; | 1060 | new.ex_flags = nxp->ex_flags; |
1069 | new.ex_anon_uid = nxp->ex_anon_uid; | 1061 | new.ex_anon_uid = nxp->ex_anon_uid; |
1070 | new.ex_anon_gid = nxp->ex_anon_gid; | 1062 | new.ex_anon_gid = nxp->ex_anon_gid; |
@@ -1090,7 +1082,7 @@ finish: | |||
1090 | exp_put(exp); | 1082 | exp_put(exp); |
1091 | if (fsid_key && !IS_ERR(fsid_key)) | 1083 | if (fsid_key && !IS_ERR(fsid_key)) |
1092 | cache_put(&fsid_key->h, &svc_expkey_cache); | 1084 | cache_put(&fsid_key->h, &svc_expkey_cache); |
1093 | path_put(&nd.path); | 1085 | path_put(&path); |
1094 | out_put_clp: | 1086 | out_put_clp: |
1095 | auth_domain_put(clp); | 1087 | auth_domain_put(clp); |
1096 | out_unlock: | 1088 | out_unlock: |
@@ -1121,7 +1113,7 @@ exp_unexport(struct nfsctl_export *nxp) | |||
1121 | { | 1113 | { |
1122 | struct auth_domain *dom; | 1114 | struct auth_domain *dom; |
1123 | svc_export *exp; | 1115 | svc_export *exp; |
1124 | struct nameidata nd; | 1116 | struct path path; |
1125 | int err; | 1117 | int err; |
1126 | 1118 | ||
1127 | /* Consistency check */ | 1119 | /* Consistency check */ |
@@ -1138,13 +1130,13 @@ exp_unexport(struct nfsctl_export *nxp) | |||
1138 | goto out_unlock; | 1130 | goto out_unlock; |
1139 | } | 1131 | } |
1140 | 1132 | ||
1141 | err = path_lookup(nxp->ex_path, 0, &nd); | 1133 | err = kern_path(nxp->ex_path, 0, &path); |
1142 | if (err) | 1134 | if (err) |
1143 | goto out_domain; | 1135 | goto out_domain; |
1144 | 1136 | ||
1145 | err = -EINVAL; | 1137 | err = -EINVAL; |
1146 | exp = exp_get_by_name(dom, nd.path.mnt, nd.path.dentry, NULL); | 1138 | exp = exp_get_by_name(dom, path.mnt, path.dentry, NULL); |
1147 | path_put(&nd.path); | 1139 | path_put(&path); |
1148 | if (IS_ERR(exp)) | 1140 | if (IS_ERR(exp)) |
1149 | goto out_domain; | 1141 | goto out_domain; |
1150 | 1142 | ||
@@ -1166,26 +1158,26 @@ out_unlock: | |||
1166 | * since its harder to fool a kernel module than a user space program. | 1158 | * since its harder to fool a kernel module than a user space program. |
1167 | */ | 1159 | */ |
1168 | int | 1160 | int |
1169 | exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize) | 1161 | exp_rootfh(svc_client *clp, char *name, struct knfsd_fh *f, int maxsize) |
1170 | { | 1162 | { |
1171 | struct svc_export *exp; | 1163 | struct svc_export *exp; |
1172 | struct nameidata nd; | 1164 | struct path path; |
1173 | struct inode *inode; | 1165 | struct inode *inode; |
1174 | struct svc_fh fh; | 1166 | struct svc_fh fh; |
1175 | int err; | 1167 | int err; |
1176 | 1168 | ||
1177 | err = -EPERM; | 1169 | err = -EPERM; |
1178 | /* NB: we probably ought to check that it's NUL-terminated */ | 1170 | /* NB: we probably ought to check that it's NUL-terminated */ |
1179 | if (path_lookup(path, 0, &nd)) { | 1171 | if (kern_path(name, 0, &path)) { |
1180 | printk("nfsd: exp_rootfh path not found %s", path); | 1172 | printk("nfsd: exp_rootfh path not found %s", name); |
1181 | return err; | 1173 | return err; |
1182 | } | 1174 | } |
1183 | inode = nd.path.dentry->d_inode; | 1175 | inode = path.dentry->d_inode; |
1184 | 1176 | ||
1185 | dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n", | 1177 | dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n", |
1186 | path, nd.path.dentry, clp->name, | 1178 | name, path.dentry, clp->name, |
1187 | inode->i_sb->s_id, inode->i_ino); | 1179 | inode->i_sb->s_id, inode->i_ino); |
1188 | exp = exp_parent(clp, nd.path.mnt, nd.path.dentry, NULL); | 1180 | exp = exp_parent(clp, path.mnt, path.dentry, NULL); |
1189 | if (IS_ERR(exp)) { | 1181 | if (IS_ERR(exp)) { |
1190 | err = PTR_ERR(exp); | 1182 | err = PTR_ERR(exp); |
1191 | goto out; | 1183 | goto out; |
@@ -1195,7 +1187,7 @@ exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize) | |||
1195 | * fh must be initialized before calling fh_compose | 1187 | * fh must be initialized before calling fh_compose |
1196 | */ | 1188 | */ |
1197 | fh_init(&fh, maxsize); | 1189 | fh_init(&fh, maxsize); |
1198 | if (fh_compose(&fh, exp, nd.path.dentry, NULL)) | 1190 | if (fh_compose(&fh, exp, path.dentry, NULL)) |
1199 | err = -EINVAL; | 1191 | err = -EINVAL; |
1200 | else | 1192 | else |
1201 | err = 0; | 1193 | err = 0; |
@@ -1203,7 +1195,7 @@ exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize) | |||
1203 | fh_put(&fh); | 1195 | fh_put(&fh); |
1204 | exp_put(exp); | 1196 | exp_put(exp); |
1205 | out: | 1197 | out: |
1206 | path_put(&nd.path); | 1198 | path_put(&path); |
1207 | return err; | 1199 | return err; |
1208 | } | 1200 | } |
1209 | 1201 | ||
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 145b3c877a27..bb93946ace22 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c | |||
@@ -51,7 +51,7 @@ | |||
51 | #define NFSDDBG_FACILITY NFSDDBG_PROC | 51 | #define NFSDDBG_FACILITY NFSDDBG_PROC |
52 | 52 | ||
53 | /* Globals */ | 53 | /* Globals */ |
54 | static struct nameidata rec_dir; | 54 | static struct path rec_dir; |
55 | static int rec_dir_init = 0; | 55 | static int rec_dir_init = 0; |
56 | 56 | ||
57 | static void | 57 | static void |
@@ -121,9 +121,9 @@ out_no_tfm: | |||
121 | static void | 121 | static void |
122 | nfsd4_sync_rec_dir(void) | 122 | nfsd4_sync_rec_dir(void) |
123 | { | 123 | { |
124 | mutex_lock(&rec_dir.path.dentry->d_inode->i_mutex); | 124 | mutex_lock(&rec_dir.dentry->d_inode->i_mutex); |
125 | nfsd_sync_dir(rec_dir.path.dentry); | 125 | nfsd_sync_dir(rec_dir.dentry); |
126 | mutex_unlock(&rec_dir.path.dentry->d_inode->i_mutex); | 126 | mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); |
127 | } | 127 | } |
128 | 128 | ||
129 | int | 129 | int |
@@ -143,9 +143,9 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) | |||
143 | nfs4_save_user(&uid, &gid); | 143 | nfs4_save_user(&uid, &gid); |
144 | 144 | ||
145 | /* lock the parent */ | 145 | /* lock the parent */ |
146 | mutex_lock(&rec_dir.path.dentry->d_inode->i_mutex); | 146 | mutex_lock(&rec_dir.dentry->d_inode->i_mutex); |
147 | 147 | ||
148 | dentry = lookup_one_len(dname, rec_dir.path.dentry, HEXDIR_LEN-1); | 148 | dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1); |
149 | if (IS_ERR(dentry)) { | 149 | if (IS_ERR(dentry)) { |
150 | status = PTR_ERR(dentry); | 150 | status = PTR_ERR(dentry); |
151 | goto out_unlock; | 151 | goto out_unlock; |
@@ -155,15 +155,15 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) | |||
155 | dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); | 155 | dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); |
156 | goto out_put; | 156 | goto out_put; |
157 | } | 157 | } |
158 | status = mnt_want_write(rec_dir.path.mnt); | 158 | status = mnt_want_write(rec_dir.mnt); |
159 | if (status) | 159 | if (status) |
160 | goto out_put; | 160 | goto out_put; |
161 | status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, S_IRWXU); | 161 | status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU); |
162 | mnt_drop_write(rec_dir.path.mnt); | 162 | mnt_drop_write(rec_dir.mnt); |
163 | out_put: | 163 | out_put: |
164 | dput(dentry); | 164 | dput(dentry); |
165 | out_unlock: | 165 | out_unlock: |
166 | mutex_unlock(&rec_dir.path.dentry->d_inode->i_mutex); | 166 | mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); |
167 | if (status == 0) { | 167 | if (status == 0) { |
168 | clp->cl_firststate = 1; | 168 | clp->cl_firststate = 1; |
169 | nfsd4_sync_rec_dir(); | 169 | nfsd4_sync_rec_dir(); |
@@ -226,7 +226,7 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) | |||
226 | 226 | ||
227 | nfs4_save_user(&uid, &gid); | 227 | nfs4_save_user(&uid, &gid); |
228 | 228 | ||
229 | filp = dentry_open(dget(dir), mntget(rec_dir.path.mnt), O_RDONLY); | 229 | filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY); |
230 | status = PTR_ERR(filp); | 230 | status = PTR_ERR(filp); |
231 | if (IS_ERR(filp)) | 231 | if (IS_ERR(filp)) |
232 | goto out; | 232 | goto out; |
@@ -291,9 +291,9 @@ nfsd4_unlink_clid_dir(char *name, int namlen) | |||
291 | 291 | ||
292 | dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); | 292 | dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); |
293 | 293 | ||
294 | mutex_lock(&rec_dir.path.dentry->d_inode->i_mutex); | 294 | mutex_lock(&rec_dir.dentry->d_inode->i_mutex); |
295 | dentry = lookup_one_len(name, rec_dir.path.dentry, namlen); | 295 | dentry = lookup_one_len(name, rec_dir.dentry, namlen); |
296 | mutex_unlock(&rec_dir.path.dentry->d_inode->i_mutex); | 296 | mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); |
297 | if (IS_ERR(dentry)) { | 297 | if (IS_ERR(dentry)) { |
298 | status = PTR_ERR(dentry); | 298 | status = PTR_ERR(dentry); |
299 | return status; | 299 | return status; |
@@ -302,7 +302,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen) | |||
302 | if (!dentry->d_inode) | 302 | if (!dentry->d_inode) |
303 | goto out; | 303 | goto out; |
304 | 304 | ||
305 | status = nfsd4_clear_clid_dir(rec_dir.path.dentry, dentry); | 305 | status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry); |
306 | out: | 306 | out: |
307 | dput(dentry); | 307 | dput(dentry); |
308 | return status; | 308 | return status; |
@@ -318,7 +318,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) | |||
318 | if (!rec_dir_init || !clp->cl_firststate) | 318 | if (!rec_dir_init || !clp->cl_firststate) |
319 | return; | 319 | return; |
320 | 320 | ||
321 | status = mnt_want_write(rec_dir.path.mnt); | 321 | status = mnt_want_write(rec_dir.mnt); |
322 | if (status) | 322 | if (status) |
323 | goto out; | 323 | goto out; |
324 | clp->cl_firststate = 0; | 324 | clp->cl_firststate = 0; |
@@ -327,7 +327,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) | |||
327 | nfs4_reset_user(uid, gid); | 327 | nfs4_reset_user(uid, gid); |
328 | if (status == 0) | 328 | if (status == 0) |
329 | nfsd4_sync_rec_dir(); | 329 | nfsd4_sync_rec_dir(); |
330 | mnt_drop_write(rec_dir.path.mnt); | 330 | mnt_drop_write(rec_dir.mnt); |
331 | out: | 331 | out: |
332 | if (status) | 332 | if (status) |
333 | printk("NFSD: Failed to remove expired client state directory" | 333 | printk("NFSD: Failed to remove expired client state directory" |
@@ -357,17 +357,17 @@ nfsd4_recdir_purge_old(void) { | |||
357 | 357 | ||
358 | if (!rec_dir_init) | 358 | if (!rec_dir_init) |
359 | return; | 359 | return; |
360 | status = mnt_want_write(rec_dir.path.mnt); | 360 | status = mnt_want_write(rec_dir.mnt); |
361 | if (status) | 361 | if (status) |
362 | goto out; | 362 | goto out; |
363 | status = nfsd4_list_rec_dir(rec_dir.path.dentry, purge_old); | 363 | status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old); |
364 | if (status == 0) | 364 | if (status == 0) |
365 | nfsd4_sync_rec_dir(); | 365 | nfsd4_sync_rec_dir(); |
366 | mnt_drop_write(rec_dir.path.mnt); | 366 | mnt_drop_write(rec_dir.mnt); |
367 | out: | 367 | out: |
368 | if (status) | 368 | if (status) |
369 | printk("nfsd4: failed to purge old clients from recovery" | 369 | printk("nfsd4: failed to purge old clients from recovery" |
370 | " directory %s\n", rec_dir.path.dentry->d_name.name); | 370 | " directory %s\n", rec_dir.dentry->d_name.name); |
371 | } | 371 | } |
372 | 372 | ||
373 | static int | 373 | static int |
@@ -387,10 +387,10 @@ int | |||
387 | nfsd4_recdir_load(void) { | 387 | nfsd4_recdir_load(void) { |
388 | int status; | 388 | int status; |
389 | 389 | ||
390 | status = nfsd4_list_rec_dir(rec_dir.path.dentry, load_recdir); | 390 | status = nfsd4_list_rec_dir(rec_dir.dentry, load_recdir); |
391 | if (status) | 391 | if (status) |
392 | printk("nfsd4: failed loading clients from recovery" | 392 | printk("nfsd4: failed loading clients from recovery" |
393 | " directory %s\n", rec_dir.path.dentry->d_name.name); | 393 | " directory %s\n", rec_dir.dentry->d_name.name); |
394 | return status; | 394 | return status; |
395 | } | 395 | } |
396 | 396 | ||
@@ -412,7 +412,7 @@ nfsd4_init_recdir(char *rec_dirname) | |||
412 | 412 | ||
413 | nfs4_save_user(&uid, &gid); | 413 | nfs4_save_user(&uid, &gid); |
414 | 414 | ||
415 | status = path_lookup(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, | 415 | status = kern_path(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, |
416 | &rec_dir); | 416 | &rec_dir); |
417 | if (status) | 417 | if (status) |
418 | printk("NFSD: unable to find recovery directory %s\n", | 418 | printk("NFSD: unable to find recovery directory %s\n", |
@@ -429,5 +429,5 @@ nfsd4_shutdown_recdir(void) | |||
429 | if (!rec_dir_init) | 429 | if (!rec_dir_init) |
430 | return; | 430 | return; |
431 | rec_dir_init = 0; | 431 | rec_dir_init = 0; |
432 | path_put(&rec_dir.path); | 432 | path_put(&rec_dir); |
433 | } | 433 | } |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0cc7ff5d5ab5..b0bebc552a11 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -3284,17 +3284,17 @@ int | |||
3284 | nfs4_reset_recoverydir(char *recdir) | 3284 | nfs4_reset_recoverydir(char *recdir) |
3285 | { | 3285 | { |
3286 | int status; | 3286 | int status; |
3287 | struct nameidata nd; | 3287 | struct path path; |
3288 | 3288 | ||
3289 | status = path_lookup(recdir, LOOKUP_FOLLOW, &nd); | 3289 | status = kern_path(recdir, LOOKUP_FOLLOW, &path); |
3290 | if (status) | 3290 | if (status) |
3291 | return status; | 3291 | return status; |
3292 | status = -ENOTDIR; | 3292 | status = -ENOTDIR; |
3293 | if (S_ISDIR(nd.path.dentry->d_inode->i_mode)) { | 3293 | if (S_ISDIR(path.dentry->d_inode->i_mode)) { |
3294 | nfs4_set_recdir(recdir); | 3294 | nfs4_set_recdir(recdir); |
3295 | status = 0; | 3295 | status = 0; |
3296 | } | 3296 | } |
3297 | path_put(&nd.path); | 3297 | path_put(&path); |
3298 | return status; | 3298 | return status; |
3299 | } | 3299 | } |
3300 | 3300 | ||
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 97543df58242..e3f9783fdcf7 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -341,7 +341,7 @@ static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size) | |||
341 | 341 | ||
342 | static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size) | 342 | static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size) |
343 | { | 343 | { |
344 | struct nameidata nd; | 344 | struct path path; |
345 | char *fo_path; | 345 | char *fo_path; |
346 | int error; | 346 | int error; |
347 | 347 | ||
@@ -356,13 +356,13 @@ static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size) | |||
356 | if (qword_get(&buf, fo_path, size) < 0) | 356 | if (qword_get(&buf, fo_path, size) < 0) |
357 | return -EINVAL; | 357 | return -EINVAL; |
358 | 358 | ||
359 | error = path_lookup(fo_path, 0, &nd); | 359 | error = kern_path(fo_path, 0, &path); |
360 | if (error) | 360 | if (error) |
361 | return error; | 361 | return error; |
362 | 362 | ||
363 | error = nlmsvc_unlock_all_by_sb(nd.path.mnt->mnt_sb); | 363 | error = nlmsvc_unlock_all_by_sb(path.mnt->mnt_sb); |
364 | 364 | ||
365 | path_put(&nd.path); | 365 | path_put(&path); |
366 | return error; | 366 | return error; |
367 | } | 367 | } |
368 | 368 | ||
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 59eeb46f82c5..07e4f5d7baa8 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -249,6 +249,10 @@ static int nfsd_init_socks(int port) | |||
249 | if (error < 0) | 249 | if (error < 0) |
250 | return error; | 250 | return error; |
251 | 251 | ||
252 | error = lockd_up(); | ||
253 | if (error < 0) | ||
254 | return error; | ||
255 | |||
252 | error = svc_create_xprt(nfsd_serv, "tcp", port, | 256 | error = svc_create_xprt(nfsd_serv, "tcp", port, |
253 | SVC_SOCK_DEFAULTS); | 257 | SVC_SOCK_DEFAULTS); |
254 | if (error < 0) | 258 | if (error < 0) |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index aa1d0d6489a1..0bc56f6d9276 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -410,6 +410,7 @@ out_nfserr: | |||
410 | static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) | 410 | static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) |
411 | { | 411 | { |
412 | ssize_t buflen; | 412 | ssize_t buflen; |
413 | ssize_t ret; | ||
413 | 414 | ||
414 | buflen = vfs_getxattr(dentry, key, NULL, 0); | 415 | buflen = vfs_getxattr(dentry, key, NULL, 0); |
415 | if (buflen <= 0) | 416 | if (buflen <= 0) |
@@ -419,7 +420,10 @@ static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) | |||
419 | if (!*buf) | 420 | if (!*buf) |
420 | return -ENOMEM; | 421 | return -ENOMEM; |
421 | 422 | ||
422 | return vfs_getxattr(dentry, key, *buf, buflen); | 423 | ret = vfs_getxattr(dentry, key, *buf, buflen); |
424 | if (ret < 0) | ||
425 | kfree(*buf); | ||
426 | return ret; | ||
423 | } | 427 | } |
424 | #endif | 428 | #endif |
425 | 429 | ||
@@ -1814,6 +1818,115 @@ out: | |||
1814 | } | 1818 | } |
1815 | 1819 | ||
1816 | /* | 1820 | /* |
1821 | * We do this buffering because we must not call back into the file | ||
1822 | * system's ->lookup() method from the filldir callback. That may well | ||
1823 | * deadlock a number of file systems. | ||
1824 | * | ||
1825 | * This is based heavily on the implementation of same in XFS. | ||
1826 | */ | ||
1827 | struct buffered_dirent { | ||
1828 | u64 ino; | ||
1829 | loff_t offset; | ||
1830 | int namlen; | ||
1831 | unsigned int d_type; | ||
1832 | char name[]; | ||
1833 | }; | ||
1834 | |||
1835 | struct readdir_data { | ||
1836 | char *dirent; | ||
1837 | size_t used; | ||
1838 | int full; | ||
1839 | }; | ||
1840 | |||
1841 | static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, | ||
1842 | loff_t offset, u64 ino, unsigned int d_type) | ||
1843 | { | ||
1844 | struct readdir_data *buf = __buf; | ||
1845 | struct buffered_dirent *de = (void *)(buf->dirent + buf->used); | ||
1846 | unsigned int reclen; | ||
1847 | |||
1848 | reclen = ALIGN(sizeof(struct buffered_dirent) + namlen, sizeof(u64)); | ||
1849 | if (buf->used + reclen > PAGE_SIZE) { | ||
1850 | buf->full = 1; | ||
1851 | return -EINVAL; | ||
1852 | } | ||
1853 | |||
1854 | de->namlen = namlen; | ||
1855 | de->offset = offset; | ||
1856 | de->ino = ino; | ||
1857 | de->d_type = d_type; | ||
1858 | memcpy(de->name, name, namlen); | ||
1859 | buf->used += reclen; | ||
1860 | |||
1861 | return 0; | ||
1862 | } | ||
1863 | |||
1864 | static int nfsd_buffered_readdir(struct file *file, filldir_t func, | ||
1865 | struct readdir_cd *cdp, loff_t *offsetp) | ||
1866 | { | ||
1867 | struct readdir_data buf; | ||
1868 | struct buffered_dirent *de; | ||
1869 | int host_err; | ||
1870 | int size; | ||
1871 | loff_t offset; | ||
1872 | |||
1873 | buf.dirent = (void *)__get_free_page(GFP_KERNEL); | ||
1874 | if (!buf.dirent) | ||
1875 | return -ENOMEM; | ||
1876 | |||
1877 | offset = *offsetp; | ||
1878 | cdp->err = nfserr_eof; /* will be cleared on successful read */ | ||
1879 | |||
1880 | while (1) { | ||
1881 | unsigned int reclen; | ||
1882 | |||
1883 | buf.used = 0; | ||
1884 | buf.full = 0; | ||
1885 | |||
1886 | host_err = vfs_readdir(file, nfsd_buffered_filldir, &buf); | ||
1887 | if (buf.full) | ||
1888 | host_err = 0; | ||
1889 | |||
1890 | if (host_err < 0) | ||
1891 | break; | ||
1892 | |||
1893 | size = buf.used; | ||
1894 | |||
1895 | if (!size) | ||
1896 | break; | ||
1897 | |||
1898 | de = (struct buffered_dirent *)buf.dirent; | ||
1899 | while (size > 0) { | ||
1900 | offset = de->offset; | ||
1901 | |||
1902 | if (func(cdp, de->name, de->namlen, de->offset, | ||
1903 | de->ino, de->d_type)) | ||
1904 | goto done; | ||
1905 | |||
1906 | if (cdp->err != nfs_ok) | ||
1907 | goto done; | ||
1908 | |||
1909 | reclen = ALIGN(sizeof(*de) + de->namlen, | ||
1910 | sizeof(u64)); | ||
1911 | size -= reclen; | ||
1912 | de = (struct buffered_dirent *)((char *)de + reclen); | ||
1913 | } | ||
1914 | offset = vfs_llseek(file, 0, SEEK_CUR); | ||
1915 | if (!buf.full) | ||
1916 | break; | ||
1917 | } | ||
1918 | |||
1919 | done: | ||
1920 | free_page((unsigned long)(buf.dirent)); | ||
1921 | |||
1922 | if (host_err) | ||
1923 | return nfserrno(host_err); | ||
1924 | |||
1925 | *offsetp = offset; | ||
1926 | return cdp->err; | ||
1927 | } | ||
1928 | |||
1929 | /* | ||
1817 | * Read entries from a directory. | 1930 | * Read entries from a directory. |
1818 | * The NFSv3/4 verifier we ignore for now. | 1931 | * The NFSv3/4 verifier we ignore for now. |
1819 | */ | 1932 | */ |
@@ -1822,7 +1935,6 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, | |||
1822 | struct readdir_cd *cdp, filldir_t func) | 1935 | struct readdir_cd *cdp, filldir_t func) |
1823 | { | 1936 | { |
1824 | __be32 err; | 1937 | __be32 err; |
1825 | int host_err; | ||
1826 | struct file *file; | 1938 | struct file *file; |
1827 | loff_t offset = *offsetp; | 1939 | loff_t offset = *offsetp; |
1828 | 1940 | ||
@@ -1836,21 +1948,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, | |||
1836 | goto out_close; | 1948 | goto out_close; |
1837 | } | 1949 | } |
1838 | 1950 | ||
1839 | /* | 1951 | err = nfsd_buffered_readdir(file, func, cdp, offsetp); |
1840 | * Read the directory entries. This silly loop is necessary because | ||
1841 | * readdir() is not guaranteed to fill up the entire buffer, but | ||
1842 | * may choose to do less. | ||
1843 | */ | ||
1844 | |||
1845 | do { | ||
1846 | cdp->err = nfserr_eof; /* will be cleared on successful read */ | ||
1847 | host_err = vfs_readdir(file, func, cdp); | ||
1848 | } while (host_err >=0 && cdp->err == nfs_ok); | ||
1849 | if (host_err) | ||
1850 | err = nfserrno(host_err); | ||
1851 | else | ||
1852 | err = cdp->err; | ||
1853 | *offsetp = vfs_llseek(file, 0, 1); | ||
1854 | 1952 | ||
1855 | if (err == nfserr_eof || err == nfserr_toosmall) | 1953 | if (err == nfserr_eof || err == nfserr_toosmall) |
1856 | err = nfs_ok; /* can still be found in ->err */ | 1954 | err = nfs_ok; /* can still be found in ->err */ |
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index d020866d4232..3140a4429af1 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c | |||
@@ -439,7 +439,7 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping, | |||
439 | pages[nr] = *cached_page; | 439 | pages[nr] = *cached_page; |
440 | page_cache_get(*cached_page); | 440 | page_cache_get(*cached_page); |
441 | if (unlikely(!pagevec_add(lru_pvec, *cached_page))) | 441 | if (unlikely(!pagevec_add(lru_pvec, *cached_page))) |
442 | __pagevec_lru_add(lru_pvec); | 442 | __pagevec_lru_add_file(lru_pvec); |
443 | *cached_page = NULL; | 443 | *cached_page = NULL; |
444 | } | 444 | } |
445 | index++; | 445 | index++; |
@@ -2084,7 +2084,7 @@ err_out: | |||
2084 | OSYNC_METADATA|OSYNC_DATA); | 2084 | OSYNC_METADATA|OSYNC_DATA); |
2085 | } | 2085 | } |
2086 | } | 2086 | } |
2087 | pagevec_lru_add(&lru_pvec); | 2087 | pagevec_lru_add_file(&lru_pvec); |
2088 | ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", | 2088 | ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", |
2089 | written ? "written" : "status", (unsigned long)written, | 2089 | written ? "written" : "status", (unsigned long)written, |
2090 | (long)status); | 2090 | (long)status); |
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index 9e8a95be7a1e..2ca00153b6ec 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c | |||
@@ -304,8 +304,6 @@ static struct dentry *ntfs_get_parent(struct dentry *child_dent) | |||
304 | ntfs_attr_search_ctx *ctx; | 304 | ntfs_attr_search_ctx *ctx; |
305 | ATTR_RECORD *attr; | 305 | ATTR_RECORD *attr; |
306 | FILE_NAME_ATTR *fn; | 306 | FILE_NAME_ATTR *fn; |
307 | struct inode *parent_vi; | ||
308 | struct dentry *parent_dent; | ||
309 | unsigned long parent_ino; | 307 | unsigned long parent_ino; |
310 | int err; | 308 | int err; |
311 | 309 | ||
@@ -345,24 +343,8 @@ try_next: | |||
345 | /* Release the search context and the mft record of the child. */ | 343 | /* Release the search context and the mft record of the child. */ |
346 | ntfs_attr_put_search_ctx(ctx); | 344 | ntfs_attr_put_search_ctx(ctx); |
347 | unmap_mft_record(ni); | 345 | unmap_mft_record(ni); |
348 | /* Get the inode of the parent directory. */ | 346 | |
349 | parent_vi = ntfs_iget(vi->i_sb, parent_ino); | 347 | return d_obtain_alias(ntfs_iget(vi->i_sb, parent_ino)); |
350 | if (IS_ERR(parent_vi) || unlikely(is_bad_inode(parent_vi))) { | ||
351 | if (!IS_ERR(parent_vi)) | ||
352 | iput(parent_vi); | ||
353 | ntfs_error(vi->i_sb, "Failed to get parent directory inode " | ||
354 | "0x%lx of child inode 0x%lx.", parent_ino, | ||
355 | vi->i_ino); | ||
356 | return ERR_PTR(-EACCES); | ||
357 | } | ||
358 | /* Finally get a dentry for the parent directory and return it. */ | ||
359 | parent_dent = d_alloc_anon(parent_vi); | ||
360 | if (unlikely(!parent_dent)) { | ||
361 | iput(parent_vi); | ||
362 | return ERR_PTR(-ENOMEM); | ||
363 | } | ||
364 | ntfs_debug("Done for inode 0x%lx.", vi->i_ino); | ||
365 | return parent_dent; | ||
366 | } | 348 | } |
367 | 349 | ||
368 | static struct inode *ntfs_nfs_get_inode(struct super_block *sb, | 350 | static struct inode *ntfs_nfs_get_inode(struct super_block *sb, |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 7dce1612553e..6ebaa58e2c03 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -976,7 +976,7 @@ static void o2hb_region_release(struct config_item *item) | |||
976 | } | 976 | } |
977 | 977 | ||
978 | if (reg->hr_bdev) | 978 | if (reg->hr_bdev) |
979 | blkdev_put(reg->hr_bdev); | 979 | blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); |
980 | 980 | ||
981 | if (reg->hr_slots) | 981 | if (reg->hr_slots) |
982 | kfree(reg->hr_slots); | 982 | kfree(reg->hr_slots); |
@@ -1268,7 +1268,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1268 | goto out; | 1268 | goto out; |
1269 | 1269 | ||
1270 | reg->hr_bdev = I_BDEV(filp->f_mapping->host); | 1270 | reg->hr_bdev = I_BDEV(filp->f_mapping->host); |
1271 | ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, 0); | 1271 | ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ); |
1272 | if (ret) { | 1272 | if (ret) { |
1273 | reg->hr_bdev = NULL; | 1273 | reg->hr_bdev = NULL; |
1274 | goto out; | 1274 | goto out; |
@@ -1358,7 +1358,7 @@ out: | |||
1358 | iput(inode); | 1358 | iput(inode); |
1359 | if (ret < 0) { | 1359 | if (ret < 0) { |
1360 | if (reg->hr_bdev) { | 1360 | if (reg->hr_bdev) { |
1361 | blkdev_put(reg->hr_bdev); | 1361 | blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); |
1362 | reg->hr_bdev = NULL; | 1362 | reg->hr_bdev = NULL; |
1363 | } | 1363 | } |
1364 | } | 1364 | } |
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 67527cebf214..2f27b332d8b3 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c | |||
@@ -68,14 +68,9 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, | |||
68 | return ERR_PTR(-ESTALE); | 68 | return ERR_PTR(-ESTALE); |
69 | } | 69 | } |
70 | 70 | ||
71 | result = d_alloc_anon(inode); | 71 | result = d_obtain_alias(inode); |
72 | 72 | if (!IS_ERR(result)) | |
73 | if (!result) { | 73 | result->d_op = &ocfs2_dentry_ops; |
74 | iput(inode); | ||
75 | mlog_errno(-ENOMEM); | ||
76 | return ERR_PTR(-ENOMEM); | ||
77 | } | ||
78 | result->d_op = &ocfs2_dentry_ops; | ||
79 | 74 | ||
80 | mlog_exit_ptr(result); | 75 | mlog_exit_ptr(result); |
81 | return result; | 76 | return result; |
@@ -86,7 +81,6 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) | |||
86 | int status; | 81 | int status; |
87 | u64 blkno; | 82 | u64 blkno; |
88 | struct dentry *parent; | 83 | struct dentry *parent; |
89 | struct inode *inode; | ||
90 | struct inode *dir = child->d_inode; | 84 | struct inode *dir = child->d_inode; |
91 | 85 | ||
92 | mlog_entry("(0x%p, '%.*s')\n", child, | 86 | mlog_entry("(0x%p, '%.*s')\n", child, |
@@ -109,21 +103,9 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) | |||
109 | goto bail_unlock; | 103 | goto bail_unlock; |
110 | } | 104 | } |
111 | 105 | ||
112 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0); | 106 | parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0)); |
113 | if (IS_ERR(inode)) { | 107 | if (!IS_ERR(parent)) |
114 | mlog(ML_ERROR, "Unable to create inode %llu\n", | 108 | parent->d_op = &ocfs2_dentry_ops; |
115 | (unsigned long long)blkno); | ||
116 | parent = ERR_PTR(-EACCES); | ||
117 | goto bail_unlock; | ||
118 | } | ||
119 | |||
120 | parent = d_alloc_anon(inode); | ||
121 | if (!parent) { | ||
122 | iput(inode); | ||
123 | parent = ERR_PTR(-ENOMEM); | ||
124 | } | ||
125 | |||
126 | parent->d_op = &ocfs2_dentry_ops; | ||
127 | 109 | ||
128 | bail_unlock: | 110 | bail_unlock: |
129 | ocfs2_inode_unlock(dir, 0); | 111 | ocfs2_inode_unlock(dir, 0); |
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index c0757e998876..c7275cfbdcfb 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c | |||
@@ -501,4 +501,5 @@ struct inode_operations omfs_dir_inops = { | |||
501 | struct file_operations omfs_dir_operations = { | 501 | struct file_operations omfs_dir_operations = { |
502 | .read = generic_read_dir, | 502 | .read = generic_read_dir, |
503 | .readdir = omfs_readdir, | 503 | .readdir = omfs_readdir, |
504 | .llseek = generic_file_llseek, | ||
504 | }; | 505 | }; |
@@ -798,7 +798,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, | |||
798 | int error; | 798 | int error; |
799 | 799 | ||
800 | f->f_flags = flags; | 800 | f->f_flags = flags; |
801 | f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | | 801 | f->f_mode = (__force fmode_t)((flags+1) & O_ACCMODE) | FMODE_LSEEK | |
802 | FMODE_PREAD | FMODE_PWRITE; | 802 | FMODE_PREAD | FMODE_PWRITE; |
803 | inode = dentry->d_inode; | 803 | inode = dentry->d_inode; |
804 | if (f->f_mode & FMODE_WRITE) { | 804 | if (f->f_mode & FMODE_WRITE) { |
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 9f5b054f06b9..d41bdc784de4 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c | |||
@@ -167,6 +167,7 @@ static int openpromfs_readdir(struct file *, void *, filldir_t); | |||
167 | static const struct file_operations openprom_operations = { | 167 | static const struct file_operations openprom_operations = { |
168 | .read = generic_read_dir, | 168 | .read = generic_read_dir, |
169 | .readdir = openpromfs_readdir, | 169 | .readdir = openpromfs_readdir, |
170 | .llseek = generic_file_llseek, | ||
170 | }; | 171 | }; |
171 | 172 | ||
172 | static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, struct nameidata *); | 173 | static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, struct nameidata *); |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index fbeb2f372a93..633f7a0ebb2c 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -195,6 +195,14 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
195 | return ERR_PTR(res); | 195 | return ERR_PTR(res); |
196 | } | 196 | } |
197 | 197 | ||
198 | static ssize_t part_partition_show(struct device *dev, | ||
199 | struct device_attribute *attr, char *buf) | ||
200 | { | ||
201 | struct hd_struct *p = dev_to_part(dev); | ||
202 | |||
203 | return sprintf(buf, "%d\n", p->partno); | ||
204 | } | ||
205 | |||
198 | static ssize_t part_start_show(struct device *dev, | 206 | static ssize_t part_start_show(struct device *dev, |
199 | struct device_attribute *attr, char *buf) | 207 | struct device_attribute *attr, char *buf) |
200 | { | 208 | { |
@@ -260,6 +268,7 @@ ssize_t part_fail_store(struct device *dev, | |||
260 | } | 268 | } |
261 | #endif | 269 | #endif |
262 | 270 | ||
271 | static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); | ||
263 | static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); | 272 | static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); |
264 | static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); | 273 | static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); |
265 | static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); | 274 | static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); |
@@ -269,6 +278,7 @@ static struct device_attribute dev_attr_fail = | |||
269 | #endif | 278 | #endif |
270 | 279 | ||
271 | static struct attribute *part_attrs[] = { | 280 | static struct attribute *part_attrs[] = { |
281 | &dev_attr_partition.attr, | ||
272 | &dev_attr_start.attr, | 282 | &dev_attr_start.attr, |
273 | &dev_attr_size.attr, | 283 | &dev_attr_size.attr, |
274 | &dev_attr_stat.attr, | 284 | &dev_attr_stat.attr, |
@@ -475,10 +485,10 @@ void register_disk(struct gendisk *disk) | |||
475 | goto exit; | 485 | goto exit; |
476 | 486 | ||
477 | bdev->bd_invalidated = 1; | 487 | bdev->bd_invalidated = 1; |
478 | err = blkdev_get(bdev, FMODE_READ, 0); | 488 | err = blkdev_get(bdev, FMODE_READ); |
479 | if (err < 0) | 489 | if (err < 0) |
480 | goto exit; | 490 | goto exit; |
481 | blkdev_put(bdev); | 491 | blkdev_put(bdev, FMODE_READ); |
482 | 492 | ||
483 | exit: | 493 | exit: |
484 | /* announce disk after possible partitions are created */ | 494 | /* announce disk after possible partitions are created */ |
diff --git a/fs/proc/Makefile b/fs/proc/Makefile index ebaba0213546..63d965193b22 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile | |||
@@ -8,11 +8,20 @@ proc-y := nommu.o task_nommu.o | |||
8 | proc-$(CONFIG_MMU) := mmu.o task_mmu.o | 8 | proc-$(CONFIG_MMU) := mmu.o task_mmu.o |
9 | 9 | ||
10 | proc-y += inode.o root.o base.o generic.o array.o \ | 10 | proc-y += inode.o root.o base.o generic.o array.o \ |
11 | proc_tty.o proc_misc.o | 11 | proc_tty.o |
12 | 12 | proc-y += cmdline.o | |
13 | proc-y += cpuinfo.o | ||
14 | proc-y += devices.o | ||
15 | proc-y += interrupts.o | ||
16 | proc-y += loadavg.o | ||
17 | proc-y += meminfo.o | ||
18 | proc-y += stat.o | ||
19 | proc-y += uptime.o | ||
20 | proc-y += version.o | ||
13 | proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o | 21 | proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o |
14 | proc-$(CONFIG_NET) += proc_net.o | 22 | proc-$(CONFIG_NET) += proc_net.o |
15 | proc-$(CONFIG_PROC_KCORE) += kcore.o | 23 | proc-$(CONFIG_PROC_KCORE) += kcore.o |
16 | proc-$(CONFIG_PROC_VMCORE) += vmcore.o | 24 | proc-$(CONFIG_PROC_VMCORE) += vmcore.o |
17 | proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o | 25 | proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o |
18 | proc-$(CONFIG_PRINTK) += kmsg.o | 26 | proc-$(CONFIG_PRINTK) += kmsg.o |
27 | proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o | ||
diff --git a/fs/proc/array.c b/fs/proc/array.c index f4bc0e789539..bb9f4b05703d 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -388,20 +388,20 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
388 | 388 | ||
389 | /* add up live thread stats at the group level */ | 389 | /* add up live thread stats at the group level */ |
390 | if (whole) { | 390 | if (whole) { |
391 | struct task_cputime cputime; | ||
391 | struct task_struct *t = task; | 392 | struct task_struct *t = task; |
392 | do { | 393 | do { |
393 | min_flt += t->min_flt; | 394 | min_flt += t->min_flt; |
394 | maj_flt += t->maj_flt; | 395 | maj_flt += t->maj_flt; |
395 | utime = cputime_add(utime, task_utime(t)); | ||
396 | stime = cputime_add(stime, task_stime(t)); | ||
397 | gtime = cputime_add(gtime, task_gtime(t)); | 396 | gtime = cputime_add(gtime, task_gtime(t)); |
398 | t = next_thread(t); | 397 | t = next_thread(t); |
399 | } while (t != task); | 398 | } while (t != task); |
400 | 399 | ||
401 | min_flt += sig->min_flt; | 400 | min_flt += sig->min_flt; |
402 | maj_flt += sig->maj_flt; | 401 | maj_flt += sig->maj_flt; |
403 | utime = cputime_add(utime, sig->utime); | 402 | thread_group_cputime(task, &cputime); |
404 | stime = cputime_add(stime, sig->stime); | 403 | utime = cputime.utime; |
404 | stime = cputime.stime; | ||
405 | gtime = cputime_add(gtime, sig->gtime); | 405 | gtime = cputime_add(gtime, sig->gtime); |
406 | } | 406 | } |
407 | 407 | ||
diff --git a/fs/proc/base.c b/fs/proc/base.c index b5918ae8ca79..486cf3fe7139 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -1712,9 +1712,9 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, | |||
1712 | file = fcheck_files(files, fd); | 1712 | file = fcheck_files(files, fd); |
1713 | if (!file) | 1713 | if (!file) |
1714 | goto out_unlock; | 1714 | goto out_unlock; |
1715 | if (file->f_mode & 1) | 1715 | if (file->f_mode & FMODE_READ) |
1716 | inode->i_mode |= S_IRUSR | S_IXUSR; | 1716 | inode->i_mode |= S_IRUSR | S_IXUSR; |
1717 | if (file->f_mode & 2) | 1717 | if (file->f_mode & FMODE_WRITE) |
1718 | inode->i_mode |= S_IWUSR | S_IXUSR; | 1718 | inode->i_mode |= S_IWUSR | S_IXUSR; |
1719 | spin_unlock(&files->file_lock); | 1719 | spin_unlock(&files->file_lock); |
1720 | put_files_struct(files); | 1720 | put_files_struct(files); |
diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c new file mode 100644 index 000000000000..82676e3fcd1d --- /dev/null +++ b/fs/proc/cmdline.c | |||
@@ -0,0 +1,29 @@ | |||
1 | #include <linux/fs.h> | ||
2 | #include <linux/init.h> | ||
3 | #include <linux/proc_fs.h> | ||
4 | #include <linux/seq_file.h> | ||
5 | |||
6 | static int cmdline_proc_show(struct seq_file *m, void *v) | ||
7 | { | ||
8 | seq_printf(m, "%s\n", saved_command_line); | ||
9 | return 0; | ||
10 | } | ||
11 | |||
12 | static int cmdline_proc_open(struct inode *inode, struct file *file) | ||
13 | { | ||
14 | return single_open(file, cmdline_proc_show, NULL); | ||
15 | } | ||
16 | |||
17 | static const struct file_operations cmdline_proc_fops = { | ||
18 | .open = cmdline_proc_open, | ||
19 | .read = seq_read, | ||
20 | .llseek = seq_lseek, | ||
21 | .release = single_release, | ||
22 | }; | ||
23 | |||
24 | static int __init proc_cmdline_init(void) | ||
25 | { | ||
26 | proc_create("cmdline", 0, NULL, &cmdline_proc_fops); | ||
27 | return 0; | ||
28 | } | ||
29 | module_init(proc_cmdline_init); | ||
diff --git a/fs/proc/cpuinfo.c b/fs/proc/cpuinfo.c new file mode 100644 index 000000000000..5a1e539a234b --- /dev/null +++ b/fs/proc/cpuinfo.c | |||
@@ -0,0 +1,24 @@ | |||
1 | #include <linux/fs.h> | ||
2 | #include <linux/init.h> | ||
3 | #include <linux/proc_fs.h> | ||
4 | #include <linux/seq_file.h> | ||
5 | |||
6 | extern const struct seq_operations cpuinfo_op; | ||
7 | static int cpuinfo_open(struct inode *inode, struct file *file) | ||
8 | { | ||
9 | return seq_open(file, &cpuinfo_op); | ||
10 | } | ||
11 | |||
12 | static const struct file_operations proc_cpuinfo_operations = { | ||
13 | .open = cpuinfo_open, | ||
14 | .read = seq_read, | ||
15 | .llseek = seq_lseek, | ||
16 | .release = seq_release, | ||
17 | }; | ||
18 | |||
19 | static int __init proc_cpuinfo_init(void) | ||
20 | { | ||
21 | proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); | ||
22 | return 0; | ||
23 | } | ||
24 | module_init(proc_cpuinfo_init); | ||
diff --git a/fs/proc/devices.c b/fs/proc/devices.c new file mode 100644 index 000000000000..59ee7da959c9 --- /dev/null +++ b/fs/proc/devices.c | |||
@@ -0,0 +1,70 @@ | |||
1 | #include <linux/fs.h> | ||
2 | #include <linux/init.h> | ||
3 | #include <linux/proc_fs.h> | ||
4 | #include <linux/seq_file.h> | ||
5 | |||
6 | static int devinfo_show(struct seq_file *f, void *v) | ||
7 | { | ||
8 | int i = *(loff_t *) v; | ||
9 | |||
10 | if (i < CHRDEV_MAJOR_HASH_SIZE) { | ||
11 | if (i == 0) | ||
12 | seq_printf(f, "Character devices:\n"); | ||
13 | chrdev_show(f, i); | ||
14 | } | ||
15 | #ifdef CONFIG_BLOCK | ||
16 | else { | ||
17 | i -= CHRDEV_MAJOR_HASH_SIZE; | ||
18 | if (i == 0) | ||
19 | seq_printf(f, "\nBlock devices:\n"); | ||
20 | blkdev_show(f, i); | ||
21 | } | ||
22 | #endif | ||
23 | return 0; | ||
24 | } | ||
25 | |||
26 | static void *devinfo_start(struct seq_file *f, loff_t *pos) | ||
27 | { | ||
28 | if (*pos < (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE)) | ||
29 | return pos; | ||
30 | return NULL; | ||
31 | } | ||
32 | |||
33 | static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos) | ||
34 | { | ||
35 | (*pos)++; | ||
36 | if (*pos >= (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE)) | ||
37 | return NULL; | ||
38 | return pos; | ||
39 | } | ||
40 | |||
41 | static void devinfo_stop(struct seq_file *f, void *v) | ||
42 | { | ||
43 | /* Nothing to do */ | ||
44 | } | ||
45 | |||
46 | static const struct seq_operations devinfo_ops = { | ||
47 | .start = devinfo_start, | ||
48 | .next = devinfo_next, | ||
49 | .stop = devinfo_stop, | ||
50 | .show = devinfo_show | ||
51 | }; | ||
52 | |||
53 | static int devinfo_open(struct inode *inode, struct file *filp) | ||
54 | { | ||
55 | return seq_open(filp, &devinfo_ops); | ||
56 | } | ||
57 | |||
58 | static const struct file_operations proc_devinfo_operations = { | ||
59 | .open = devinfo_open, | ||
60 | .read = seq_read, | ||
61 | .llseek = seq_lseek, | ||
62 | .release = seq_release, | ||
63 | }; | ||
64 | |||
65 | static int __init proc_devices_init(void) | ||
66 | { | ||
67 | proc_create("devices", 0, NULL, &proc_devinfo_operations); | ||
68 | return 0; | ||
69 | } | ||
70 | module_init(proc_devices_init); | ||
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 7821589a17d5..60a359b35582 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -547,9 +547,8 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp | |||
547 | 547 | ||
548 | for (tmp = dir->subdir; tmp; tmp = tmp->next) | 548 | for (tmp = dir->subdir; tmp; tmp = tmp->next) |
549 | if (strcmp(tmp->name, dp->name) == 0) { | 549 | if (strcmp(tmp->name, dp->name) == 0) { |
550 | printk(KERN_WARNING "proc_dir_entry '%s/%s' already registered\n", | 550 | WARN(1, KERN_WARNING "proc_dir_entry '%s/%s' already registered\n", |
551 | dir->name, dp->name); | 551 | dir->name, dp->name); |
552 | dump_stack(); | ||
553 | break; | 552 | break; |
554 | } | 553 | } |
555 | 554 | ||
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index c6b4fa7e3b49..2543fd00c658 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -106,14 +106,13 @@ static void init_once(void *foo) | |||
106 | inode_init_once(&ei->vfs_inode); | 106 | inode_init_once(&ei->vfs_inode); |
107 | } | 107 | } |
108 | 108 | ||
109 | int __init proc_init_inodecache(void) | 109 | void __init proc_init_inodecache(void) |
110 | { | 110 | { |
111 | proc_inode_cachep = kmem_cache_create("proc_inode_cache", | 111 | proc_inode_cachep = kmem_cache_create("proc_inode_cache", |
112 | sizeof(struct proc_inode), | 112 | sizeof(struct proc_inode), |
113 | 0, (SLAB_RECLAIM_ACCOUNT| | 113 | 0, (SLAB_RECLAIM_ACCOUNT| |
114 | SLAB_MEM_SPREAD|SLAB_PANIC), | 114 | SLAB_MEM_SPREAD|SLAB_PANIC), |
115 | init_once); | 115 | init_once); |
116 | return 0; | ||
117 | } | 116 | } |
118 | 117 | ||
119 | static const struct super_operations proc_sops = { | 118 | static const struct super_operations proc_sops = { |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 3bfb7b8747b3..3e8aeb8b61ce 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -61,12 +61,11 @@ extern const struct file_operations proc_smaps_operations; | |||
61 | extern const struct file_operations proc_clear_refs_operations; | 61 | extern const struct file_operations proc_clear_refs_operations; |
62 | extern const struct file_operations proc_pagemap_operations; | 62 | extern const struct file_operations proc_pagemap_operations; |
63 | extern const struct file_operations proc_net_operations; | 63 | extern const struct file_operations proc_net_operations; |
64 | extern const struct file_operations proc_kmsg_operations; | ||
65 | extern const struct inode_operations proc_net_inode_operations; | 64 | extern const struct inode_operations proc_net_inode_operations; |
66 | 65 | ||
67 | void free_proc_entry(struct proc_dir_entry *de); | 66 | void free_proc_entry(struct proc_dir_entry *de); |
68 | 67 | ||
69 | int proc_init_inodecache(void); | 68 | void proc_init_inodecache(void); |
70 | 69 | ||
71 | static inline struct pid *proc_pid(struct inode *inode) | 70 | static inline struct pid *proc_pid(struct inode *inode) |
72 | { | 71 | { |
diff --git a/fs/proc/interrupts.c b/fs/proc/interrupts.c new file mode 100644 index 000000000000..05029c0e2f24 --- /dev/null +++ b/fs/proc/interrupts.c | |||
@@ -0,0 +1,53 @@ | |||
1 | #include <linux/fs.h> | ||
2 | #include <linux/init.h> | ||
3 | #include <linux/interrupt.h> | ||
4 | #include <linux/irqnr.h> | ||
5 | #include <linux/proc_fs.h> | ||
6 | #include <linux/seq_file.h> | ||
7 | |||
8 | /* | ||
9 | * /proc/interrupts | ||
10 | */ | ||
11 | static void *int_seq_start(struct seq_file *f, loff_t *pos) | ||
12 | { | ||
13 | return (*pos <= nr_irqs) ? pos : NULL; | ||
14 | } | ||
15 | |||
16 | static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos) | ||
17 | { | ||
18 | (*pos)++; | ||
19 | if (*pos > nr_irqs) | ||
20 | return NULL; | ||
21 | return pos; | ||
22 | } | ||
23 | |||
24 | static void int_seq_stop(struct seq_file *f, void *v) | ||
25 | { | ||
26 | /* Nothing to do */ | ||
27 | } | ||
28 | |||
29 | static const struct seq_operations int_seq_ops = { | ||
30 | .start = int_seq_start, | ||
31 | .next = int_seq_next, | ||
32 | .stop = int_seq_stop, | ||
33 | .show = show_interrupts | ||
34 | }; | ||
35 | |||
36 | static int interrupts_open(struct inode *inode, struct file *filp) | ||
37 | { | ||
38 | return seq_open(filp, &int_seq_ops); | ||
39 | } | ||
40 | |||
41 | static const struct file_operations proc_interrupts_operations = { | ||
42 | .open = interrupts_open, | ||
43 | .read = seq_read, | ||
44 | .llseek = seq_lseek, | ||
45 | .release = seq_release, | ||
46 | }; | ||
47 | |||
48 | static int __init proc_interrupts_init(void) | ||
49 | { | ||
50 | proc_create("interrupts", 0, NULL, &proc_interrupts_operations); | ||
51 | return 0; | ||
52 | } | ||
53 | module_init(proc_interrupts_init); | ||
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index c2370c76fb71..59b43a068872 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c | |||
@@ -27,6 +27,8 @@ | |||
27 | #define ELF_CORE_EFLAGS 0 | 27 | #define ELF_CORE_EFLAGS 0 |
28 | #endif | 28 | #endif |
29 | 29 | ||
30 | static struct proc_dir_entry *proc_root_kcore; | ||
31 | |||
30 | static int open_kcore(struct inode * inode, struct file * filp) | 32 | static int open_kcore(struct inode * inode, struct file * filp) |
31 | { | 33 | { |
32 | return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; | 34 | return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; |
@@ -34,7 +36,7 @@ static int open_kcore(struct inode * inode, struct file * filp) | |||
34 | 36 | ||
35 | static ssize_t read_kcore(struct file *, char __user *, size_t, loff_t *); | 37 | static ssize_t read_kcore(struct file *, char __user *, size_t, loff_t *); |
36 | 38 | ||
37 | const struct file_operations proc_kcore_operations = { | 39 | static const struct file_operations proc_kcore_operations = { |
38 | .read = read_kcore, | 40 | .read = read_kcore, |
39 | .open = open_kcore, | 41 | .open = open_kcore, |
40 | }; | 42 | }; |
@@ -399,3 +401,13 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) | |||
399 | 401 | ||
400 | return acc; | 402 | return acc; |
401 | } | 403 | } |
404 | |||
405 | static int __init proc_kcore_init(void) | ||
406 | { | ||
407 | proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations); | ||
408 | if (proc_root_kcore) | ||
409 | proc_root_kcore->size = | ||
410 | (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; | ||
411 | return 0; | ||
412 | } | ||
413 | module_init(proc_kcore_init); | ||
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c index 9fd5df3f40ce..7ca78346d3f0 100644 --- a/fs/proc/kmsg.c +++ b/fs/proc/kmsg.c | |||
@@ -10,13 +10,12 @@ | |||
10 | #include <linux/time.h> | 10 | #include <linux/time.h> |
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/poll.h> | 12 | #include <linux/poll.h> |
13 | #include <linux/proc_fs.h> | ||
13 | #include <linux/fs.h> | 14 | #include <linux/fs.h> |
14 | 15 | ||
15 | #include <asm/uaccess.h> | 16 | #include <asm/uaccess.h> |
16 | #include <asm/io.h> | 17 | #include <asm/io.h> |
17 | 18 | ||
18 | #include "internal.h" | ||
19 | |||
20 | extern wait_queue_head_t log_wait; | 19 | extern wait_queue_head_t log_wait; |
21 | 20 | ||
22 | extern int do_syslog(int type, char __user *bug, int count); | 21 | extern int do_syslog(int type, char __user *bug, int count); |
@@ -49,9 +48,16 @@ static unsigned int kmsg_poll(struct file *file, poll_table *wait) | |||
49 | } | 48 | } |
50 | 49 | ||
51 | 50 | ||
52 | const struct file_operations proc_kmsg_operations = { | 51 | static const struct file_operations proc_kmsg_operations = { |
53 | .read = kmsg_read, | 52 | .read = kmsg_read, |
54 | .poll = kmsg_poll, | 53 | .poll = kmsg_poll, |
55 | .open = kmsg_open, | 54 | .open = kmsg_open, |
56 | .release = kmsg_release, | 55 | .release = kmsg_release, |
57 | }; | 56 | }; |
57 | |||
58 | static int __init proc_kmsg_init(void) | ||
59 | { | ||
60 | proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); | ||
61 | return 0; | ||
62 | } | ||
63 | module_init(proc_kmsg_init); | ||
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c new file mode 100644 index 000000000000..9bca39cf99ee --- /dev/null +++ b/fs/proc/loadavg.c | |||
@@ -0,0 +1,51 @@ | |||
1 | #include <linux/fs.h> | ||
2 | #include <linux/init.h> | ||
3 | #include <linux/pid_namespace.h> | ||
4 | #include <linux/proc_fs.h> | ||
5 | #include <linux/sched.h> | ||
6 | #include <linux/seq_file.h> | ||
7 | #include <linux/seqlock.h> | ||
8 | #include <linux/time.h> | ||
9 | |||
10 | #define LOAD_INT(x) ((x) >> FSHIFT) | ||
11 | #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) | ||
12 | |||
13 | static int loadavg_proc_show(struct seq_file *m, void *v) | ||
14 | { | ||
15 | int a, b, c; | ||
16 | unsigned long seq; | ||
17 | |||
18 | do { | ||
19 | seq = read_seqbegin(&xtime_lock); | ||
20 | a = avenrun[0] + (FIXED_1/200); | ||
21 | b = avenrun[1] + (FIXED_1/200); | ||
22 | c = avenrun[2] + (FIXED_1/200); | ||
23 | } while (read_seqretry(&xtime_lock, seq)); | ||
24 | |||
25 | seq_printf(m, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n", | ||
26 | LOAD_INT(a), LOAD_FRAC(a), | ||
27 | LOAD_INT(b), LOAD_FRAC(b), | ||
28 | LOAD_INT(c), LOAD_FRAC(c), | ||
29 | nr_running(), nr_threads, | ||
30 | task_active_pid_ns(current)->last_pid); | ||
31 | return 0; | ||
32 | } | ||
33 | |||
34 | static int loadavg_proc_open(struct inode *inode, struct file *file) | ||
35 | { | ||
36 | return single_open(file, loadavg_proc_show, NULL); | ||
37 | } | ||
38 | |||
39 | static const struct file_operations loadavg_proc_fops = { | ||
40 | .open = loadavg_proc_open, | ||
41 | .read = seq_read, | ||
42 | .llseek = seq_lseek, | ||
43 | .release = single_release, | ||
44 | }; | ||
45 | |||
46 | static int __init proc_loadavg_init(void) | ||
47 | { | ||
48 | proc_create("loadavg", 0, NULL, &loadavg_proc_fops); | ||
49 | return 0; | ||
50 | } | ||
51 | module_init(proc_loadavg_init); | ||
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c new file mode 100644 index 000000000000..b1675c4e66da --- /dev/null +++ b/fs/proc/meminfo.c | |||
@@ -0,0 +1,168 @@ | |||
1 | #include <linux/fs.h> | ||
2 | #include <linux/hugetlb.h> | ||
3 | #include <linux/init.h> | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/mm.h> | ||
6 | #include <linux/mman.h> | ||
7 | #include <linux/mmzone.h> | ||
8 | #include <linux/proc_fs.h> | ||
9 | #include <linux/quicklist.h> | ||
10 | #include <linux/seq_file.h> | ||
11 | #include <linux/swap.h> | ||
12 | #include <linux/vmstat.h> | ||
13 | #include <asm/atomic.h> | ||
14 | #include <asm/page.h> | ||
15 | #include <asm/pgtable.h> | ||
16 | #include "internal.h" | ||
17 | |||
18 | void __attribute__((weak)) arch_report_meminfo(struct seq_file *m) | ||
19 | { | ||
20 | } | ||
21 | |||
22 | static int meminfo_proc_show(struct seq_file *m, void *v) | ||
23 | { | ||
24 | struct sysinfo i; | ||
25 | unsigned long committed; | ||
26 | unsigned long allowed; | ||
27 | struct vmalloc_info vmi; | ||
28 | long cached; | ||
29 | unsigned long pages[NR_LRU_LISTS]; | ||
30 | int lru; | ||
31 | |||
32 | /* | ||
33 | * display in kilobytes. | ||
34 | */ | ||
35 | #define K(x) ((x) << (PAGE_SHIFT - 10)) | ||
36 | si_meminfo(&i); | ||
37 | si_swapinfo(&i); | ||
38 | committed = atomic_long_read(&vm_committed_space); | ||
39 | allowed = ((totalram_pages - hugetlb_total_pages()) | ||
40 | * sysctl_overcommit_ratio / 100) + total_swap_pages; | ||
41 | |||
42 | cached = global_page_state(NR_FILE_PAGES) - | ||
43 | total_swapcache_pages - i.bufferram; | ||
44 | if (cached < 0) | ||
45 | cached = 0; | ||
46 | |||
47 | get_vmalloc_info(&vmi); | ||
48 | |||
49 | for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++) | ||
50 | pages[lru] = global_page_state(NR_LRU_BASE + lru); | ||
51 | |||
52 | /* | ||
53 | * Tagged format, for easy grepping and expansion. | ||
54 | */ | ||
55 | seq_printf(m, | ||
56 | "MemTotal: %8lu kB\n" | ||
57 | "MemFree: %8lu kB\n" | ||
58 | "Buffers: %8lu kB\n" | ||
59 | "Cached: %8lu kB\n" | ||
60 | "SwapCached: %8lu kB\n" | ||
61 | "Active: %8lu kB\n" | ||
62 | "Inactive: %8lu kB\n" | ||
63 | "Active(anon): %8lu kB\n" | ||
64 | "Inactive(anon): %8lu kB\n" | ||
65 | "Active(file): %8lu kB\n" | ||
66 | "Inactive(file): %8lu kB\n" | ||
67 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
68 | "Unevictable: %8lu kB\n" | ||
69 | "Mlocked: %8lu kB\n" | ||
70 | #endif | ||
71 | #ifdef CONFIG_HIGHMEM | ||
72 | "HighTotal: %8lu kB\n" | ||
73 | "HighFree: %8lu kB\n" | ||
74 | "LowTotal: %8lu kB\n" | ||
75 | "LowFree: %8lu kB\n" | ||
76 | #endif | ||
77 | "SwapTotal: %8lu kB\n" | ||
78 | "SwapFree: %8lu kB\n" | ||
79 | "Dirty: %8lu kB\n" | ||
80 | "Writeback: %8lu kB\n" | ||
81 | "AnonPages: %8lu kB\n" | ||
82 | "Mapped: %8lu kB\n" | ||
83 | "Slab: %8lu kB\n" | ||
84 | "SReclaimable: %8lu kB\n" | ||
85 | "SUnreclaim: %8lu kB\n" | ||
86 | "PageTables: %8lu kB\n" | ||
87 | #ifdef CONFIG_QUICKLIST | ||
88 | "Quicklists: %8lu kB\n" | ||
89 | #endif | ||
90 | "NFS_Unstable: %8lu kB\n" | ||
91 | "Bounce: %8lu kB\n" | ||
92 | "WritebackTmp: %8lu kB\n" | ||
93 | "CommitLimit: %8lu kB\n" | ||
94 | "Committed_AS: %8lu kB\n" | ||
95 | "VmallocTotal: %8lu kB\n" | ||
96 | "VmallocUsed: %8lu kB\n" | ||
97 | "VmallocChunk: %8lu kB\n", | ||
98 | K(i.totalram), | ||
99 | K(i.freeram), | ||
100 | K(i.bufferram), | ||
101 | K(cached), | ||
102 | K(total_swapcache_pages), | ||
103 | K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]), | ||
104 | K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]), | ||
105 | K(pages[LRU_ACTIVE_ANON]), | ||
106 | K(pages[LRU_INACTIVE_ANON]), | ||
107 | K(pages[LRU_ACTIVE_FILE]), | ||
108 | K(pages[LRU_INACTIVE_FILE]), | ||
109 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
110 | K(pages[LRU_UNEVICTABLE]), | ||
111 | K(global_page_state(NR_MLOCK)), | ||
112 | #endif | ||
113 | #ifdef CONFIG_HIGHMEM | ||
114 | K(i.totalhigh), | ||
115 | K(i.freehigh), | ||
116 | K(i.totalram-i.totalhigh), | ||
117 | K(i.freeram-i.freehigh), | ||
118 | #endif | ||
119 | K(i.totalswap), | ||
120 | K(i.freeswap), | ||
121 | K(global_page_state(NR_FILE_DIRTY)), | ||
122 | K(global_page_state(NR_WRITEBACK)), | ||
123 | K(global_page_state(NR_ANON_PAGES)), | ||
124 | K(global_page_state(NR_FILE_MAPPED)), | ||
125 | K(global_page_state(NR_SLAB_RECLAIMABLE) + | ||
126 | global_page_state(NR_SLAB_UNRECLAIMABLE)), | ||
127 | K(global_page_state(NR_SLAB_RECLAIMABLE)), | ||
128 | K(global_page_state(NR_SLAB_UNRECLAIMABLE)), | ||
129 | K(global_page_state(NR_PAGETABLE)), | ||
130 | #ifdef CONFIG_QUICKLIST | ||
131 | K(quicklist_total_size()), | ||
132 | #endif | ||
133 | K(global_page_state(NR_UNSTABLE_NFS)), | ||
134 | K(global_page_state(NR_BOUNCE)), | ||
135 | K(global_page_state(NR_WRITEBACK_TEMP)), | ||
136 | K(allowed), | ||
137 | K(committed), | ||
138 | (unsigned long)VMALLOC_TOTAL >> 10, | ||
139 | vmi.used >> 10, | ||
140 | vmi.largest_chunk >> 10 | ||
141 | ); | ||
142 | |||
143 | hugetlb_report_meminfo(m); | ||
144 | |||
145 | arch_report_meminfo(m); | ||
146 | |||
147 | return 0; | ||
148 | #undef K | ||
149 | } | ||
150 | |||
151 | static int meminfo_proc_open(struct inode *inode, struct file *file) | ||
152 | { | ||
153 | return single_open(file, meminfo_proc_show, NULL); | ||
154 | } | ||
155 | |||
156 | static const struct file_operations meminfo_proc_fops = { | ||
157 | .open = meminfo_proc_open, | ||
158 | .read = seq_read, | ||
159 | .llseek = seq_lseek, | ||
160 | .release = single_release, | ||
161 | }; | ||
162 | |||
163 | static int __init proc_meminfo_init(void) | ||
164 | { | ||
165 | proc_create("meminfo", 0, NULL, &meminfo_proc_fops); | ||
166 | return 0; | ||
167 | } | ||
168 | module_init(proc_meminfo_init); | ||
diff --git a/fs/proc/page.c b/fs/proc/page.c new file mode 100644 index 000000000000..767d95a6d1b1 --- /dev/null +++ b/fs/proc/page.c | |||
@@ -0,0 +1,147 @@ | |||
1 | #include <linux/bootmem.h> | ||
2 | #include <linux/compiler.h> | ||
3 | #include <linux/fs.h> | ||
4 | #include <linux/init.h> | ||
5 | #include <linux/mm.h> | ||
6 | #include <linux/mmzone.h> | ||
7 | #include <linux/proc_fs.h> | ||
8 | #include <linux/seq_file.h> | ||
9 | #include <asm/uaccess.h> | ||
10 | #include "internal.h" | ||
11 | |||
12 | #define KPMSIZE sizeof(u64) | ||
13 | #define KPMMASK (KPMSIZE - 1) | ||
14 | /* /proc/kpagecount - an array exposing page counts | ||
15 | * | ||
16 | * Each entry is a u64 representing the corresponding | ||
17 | * physical page count. | ||
18 | */ | ||
19 | static ssize_t kpagecount_read(struct file *file, char __user *buf, | ||
20 | size_t count, loff_t *ppos) | ||
21 | { | ||
22 | u64 __user *out = (u64 __user *)buf; | ||
23 | struct page *ppage; | ||
24 | unsigned long src = *ppos; | ||
25 | unsigned long pfn; | ||
26 | ssize_t ret = 0; | ||
27 | u64 pcount; | ||
28 | |||
29 | pfn = src / KPMSIZE; | ||
30 | count = min_t(size_t, count, (max_pfn * KPMSIZE) - src); | ||
31 | if (src & KPMMASK || count & KPMMASK) | ||
32 | return -EINVAL; | ||
33 | |||
34 | while (count > 0) { | ||
35 | ppage = NULL; | ||
36 | if (pfn_valid(pfn)) | ||
37 | ppage = pfn_to_page(pfn); | ||
38 | pfn++; | ||
39 | if (!ppage) | ||
40 | pcount = 0; | ||
41 | else | ||
42 | pcount = page_mapcount(ppage); | ||
43 | |||
44 | if (put_user(pcount, out++)) { | ||
45 | ret = -EFAULT; | ||
46 | break; | ||
47 | } | ||
48 | |||
49 | count -= KPMSIZE; | ||
50 | } | ||
51 | |||
52 | *ppos += (char __user *)out - buf; | ||
53 | if (!ret) | ||
54 | ret = (char __user *)out - buf; | ||
55 | return ret; | ||
56 | } | ||
57 | |||
58 | static const struct file_operations proc_kpagecount_operations = { | ||
59 | .llseek = mem_lseek, | ||
60 | .read = kpagecount_read, | ||
61 | }; | ||
62 | |||
63 | /* /proc/kpageflags - an array exposing page flags | ||
64 | * | ||
65 | * Each entry is a u64 representing the corresponding | ||
66 | * physical page flags. | ||
67 | */ | ||
68 | |||
69 | /* These macros are used to decouple internal flags from exported ones */ | ||
70 | |||
71 | #define KPF_LOCKED 0 | ||
72 | #define KPF_ERROR 1 | ||
73 | #define KPF_REFERENCED 2 | ||
74 | #define KPF_UPTODATE 3 | ||
75 | #define KPF_DIRTY 4 | ||
76 | #define KPF_LRU 5 | ||
77 | #define KPF_ACTIVE 6 | ||
78 | #define KPF_SLAB 7 | ||
79 | #define KPF_WRITEBACK 8 | ||
80 | #define KPF_RECLAIM 9 | ||
81 | #define KPF_BUDDY 10 | ||
82 | |||
83 | #define kpf_copy_bit(flags, srcpos, dstpos) (((flags >> srcpos) & 1) << dstpos) | ||
84 | |||
85 | static ssize_t kpageflags_read(struct file *file, char __user *buf, | ||
86 | size_t count, loff_t *ppos) | ||
87 | { | ||
88 | u64 __user *out = (u64 __user *)buf; | ||
89 | struct page *ppage; | ||
90 | unsigned long src = *ppos; | ||
91 | unsigned long pfn; | ||
92 | ssize_t ret = 0; | ||
93 | u64 kflags, uflags; | ||
94 | |||
95 | pfn = src / KPMSIZE; | ||
96 | count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src); | ||
97 | if (src & KPMMASK || count & KPMMASK) | ||
98 | return -EINVAL; | ||
99 | |||
100 | while (count > 0) { | ||
101 | ppage = NULL; | ||
102 | if (pfn_valid(pfn)) | ||
103 | ppage = pfn_to_page(pfn); | ||
104 | pfn++; | ||
105 | if (!ppage) | ||
106 | kflags = 0; | ||
107 | else | ||
108 | kflags = ppage->flags; | ||
109 | |||
110 | uflags = kpf_copy_bit(KPF_LOCKED, PG_locked, kflags) | | ||
111 | kpf_copy_bit(kflags, KPF_ERROR, PG_error) | | ||
112 | kpf_copy_bit(kflags, KPF_REFERENCED, PG_referenced) | | ||
113 | kpf_copy_bit(kflags, KPF_UPTODATE, PG_uptodate) | | ||
114 | kpf_copy_bit(kflags, KPF_DIRTY, PG_dirty) | | ||
115 | kpf_copy_bit(kflags, KPF_LRU, PG_lru) | | ||
116 | kpf_copy_bit(kflags, KPF_ACTIVE, PG_active) | | ||
117 | kpf_copy_bit(kflags, KPF_SLAB, PG_slab) | | ||
118 | kpf_copy_bit(kflags, KPF_WRITEBACK, PG_writeback) | | ||
119 | kpf_copy_bit(kflags, KPF_RECLAIM, PG_reclaim) | | ||
120 | kpf_copy_bit(kflags, KPF_BUDDY, PG_buddy); | ||
121 | |||
122 | if (put_user(uflags, out++)) { | ||
123 | ret = -EFAULT; | ||
124 | break; | ||
125 | } | ||
126 | |||
127 | count -= KPMSIZE; | ||
128 | } | ||
129 | |||
130 | *ppos += (char __user *)out - buf; | ||
131 | if (!ret) | ||
132 | ret = (char __user *)out - buf; | ||
133 | return ret; | ||
134 | } | ||
135 | |||
136 | static const struct file_operations proc_kpageflags_operations = { | ||
137 | .llseek = mem_lseek, | ||
138 | .read = kpageflags_read, | ||
139 | }; | ||
140 | |||
141 | static int __init proc_page_init(void) | ||
142 | { | ||
143 | proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations); | ||
144 | proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations); | ||
145 | return 0; | ||
146 | } | ||
147 | module_init(proc_page_init); | ||
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index eca471bc8512..d777789b7a89 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c | |||
@@ -4,6 +4,7 @@ | |||
4 | * Copyright 1997 Paul Mackerras | 4 | * Copyright 1997 Paul Mackerras |
5 | */ | 5 | */ |
6 | #include <linux/errno.h> | 6 | #include <linux/errno.h> |
7 | #include <linux/init.h> | ||
7 | #include <linux/time.h> | 8 | #include <linux/time.h> |
8 | #include <linux/proc_fs.h> | 9 | #include <linux/proc_fs.h> |
9 | #include <linux/stat.h> | 10 | #include <linux/stat.h> |
@@ -214,7 +215,7 @@ void proc_device_tree_add_node(struct device_node *np, | |||
214 | /* | 215 | /* |
215 | * Called on initialization to set up the /proc/device-tree subtree | 216 | * Called on initialization to set up the /proc/device-tree subtree |
216 | */ | 217 | */ |
217 | void proc_device_tree_init(void) | 218 | void __init proc_device_tree_init(void) |
218 | { | 219 | { |
219 | struct device_node *root; | 220 | struct device_node *root; |
220 | if ( !have_of ) | 221 | if ( !have_of ) |
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c deleted file mode 100644 index 59ea42e1ef03..000000000000 --- a/fs/proc/proc_misc.c +++ /dev/null | |||
@@ -1,914 +0,0 @@ | |||
1 | /* | ||
2 | * linux/fs/proc/proc_misc.c | ||
3 | * | ||
4 | * linux/fs/proc/array.c | ||
5 | * Copyright (C) 1992 by Linus Torvalds | ||
6 | * based on ideas by Darren Senn | ||
7 | * | ||
8 | * This used to be the part of array.c. See the rest of history and credits | ||
9 | * there. I took this into a separate file and switched the thing to generic | ||
10 | * proc_file_inode_operations, leaving in array.c only per-process stuff. | ||
11 | * Inumbers allocation made dynamic (via create_proc_entry()). AV, May 1999. | ||
12 | * | ||
13 | * Changes: | ||
14 | * Fulton Green : Encapsulated position metric calculations. | ||
15 | * <kernel@FultonGreen.com> | ||
16 | */ | ||
17 | |||
18 | #include <linux/types.h> | ||
19 | #include <linux/errno.h> | ||
20 | #include <linux/time.h> | ||
21 | #include <linux/kernel.h> | ||
22 | #include <linux/kernel_stat.h> | ||
23 | #include <linux/fs.h> | ||
24 | #include <linux/tty.h> | ||
25 | #include <linux/string.h> | ||
26 | #include <linux/mman.h> | ||
27 | #include <linux/quicklist.h> | ||
28 | #include <linux/proc_fs.h> | ||
29 | #include <linux/ioport.h> | ||
30 | #include <linux/mm.h> | ||
31 | #include <linux/mmzone.h> | ||
32 | #include <linux/pagemap.h> | ||
33 | #include <linux/interrupt.h> | ||
34 | #include <linux/swap.h> | ||
35 | #include <linux/slab.h> | ||
36 | #include <linux/genhd.h> | ||
37 | #include <linux/smp.h> | ||
38 | #include <linux/signal.h> | ||
39 | #include <linux/module.h> | ||
40 | #include <linux/init.h> | ||
41 | #include <linux/seq_file.h> | ||
42 | #include <linux/times.h> | ||
43 | #include <linux/profile.h> | ||
44 | #include <linux/utsname.h> | ||
45 | #include <linux/blkdev.h> | ||
46 | #include <linux/hugetlb.h> | ||
47 | #include <linux/jiffies.h> | ||
48 | #include <linux/vmalloc.h> | ||
49 | #include <linux/crash_dump.h> | ||
50 | #include <linux/pid_namespace.h> | ||
51 | #include <linux/bootmem.h> | ||
52 | #include <asm/uaccess.h> | ||
53 | #include <asm/pgtable.h> | ||
54 | #include <asm/io.h> | ||
55 | #include <asm/tlb.h> | ||
56 | #include <asm/div64.h> | ||
57 | #include "internal.h" | ||
58 | |||
59 | #define LOAD_INT(x) ((x) >> FSHIFT) | ||
60 | #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) | ||
61 | /* | ||
62 | * Warning: stuff below (imported functions) assumes that its output will fit | ||
63 | * into one page. For some of those functions it may be wrong. Moreover, we | ||
64 | * have a way to deal with that gracefully. Right now I used straightforward | ||
65 | * wrappers, but this needs further analysis wrt potential overflows. | ||
66 | */ | ||
67 | extern int get_hardware_list(char *); | ||
68 | extern int get_stram_list(char *); | ||
69 | extern int get_exec_domain_list(char *); | ||
70 | |||
71 | static int proc_calc_metrics(char *page, char **start, off_t off, | ||
72 | int count, int *eof, int len) | ||
73 | { | ||
74 | if (len <= off+count) *eof = 1; | ||
75 | *start = page + off; | ||
76 | len -= off; | ||
77 | if (len>count) len = count; | ||
78 | if (len<0) len = 0; | ||
79 | return len; | ||
80 | } | ||
81 | |||
82 | static int loadavg_read_proc(char *page, char **start, off_t off, | ||
83 | int count, int *eof, void *data) | ||
84 | { | ||
85 | int a, b, c; | ||
86 | int len; | ||
87 | unsigned long seq; | ||
88 | |||
89 | do { | ||
90 | seq = read_seqbegin(&xtime_lock); | ||
91 | a = avenrun[0] + (FIXED_1/200); | ||
92 | b = avenrun[1] + (FIXED_1/200); | ||
93 | c = avenrun[2] + (FIXED_1/200); | ||
94 | } while (read_seqretry(&xtime_lock, seq)); | ||
95 | |||
96 | len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n", | ||
97 | LOAD_INT(a), LOAD_FRAC(a), | ||
98 | LOAD_INT(b), LOAD_FRAC(b), | ||
99 | LOAD_INT(c), LOAD_FRAC(c), | ||
100 | nr_running(), nr_threads, | ||
101 | task_active_pid_ns(current)->last_pid); | ||
102 | return proc_calc_metrics(page, start, off, count, eof, len); | ||
103 | } | ||
104 | |||
105 | static int uptime_read_proc(char *page, char **start, off_t off, | ||
106 | int count, int *eof, void *data) | ||
107 | { | ||
108 | struct timespec uptime; | ||
109 | struct timespec idle; | ||
110 | int len; | ||
111 | cputime_t idletime = cputime_add(init_task.utime, init_task.stime); | ||
112 | |||
113 | do_posix_clock_monotonic_gettime(&uptime); | ||
114 | monotonic_to_bootbased(&uptime); | ||
115 | cputime_to_timespec(idletime, &idle); | ||
116 | len = sprintf(page,"%lu.%02lu %lu.%02lu\n", | ||
117 | (unsigned long) uptime.tv_sec, | ||
118 | (uptime.tv_nsec / (NSEC_PER_SEC / 100)), | ||
119 | (unsigned long) idle.tv_sec, | ||
120 | (idle.tv_nsec / (NSEC_PER_SEC / 100))); | ||
121 | |||
122 | return proc_calc_metrics(page, start, off, count, eof, len); | ||
123 | } | ||
124 | |||
125 | int __attribute__((weak)) arch_report_meminfo(char *page) | ||
126 | { | ||
127 | return 0; | ||
128 | } | ||
129 | |||
130 | static int meminfo_read_proc(char *page, char **start, off_t off, | ||
131 | int count, int *eof, void *data) | ||
132 | { | ||
133 | struct sysinfo i; | ||
134 | int len; | ||
135 | unsigned long committed; | ||
136 | unsigned long allowed; | ||
137 | struct vmalloc_info vmi; | ||
138 | long cached; | ||
139 | |||
140 | /* | ||
141 | * display in kilobytes. | ||
142 | */ | ||
143 | #define K(x) ((x) << (PAGE_SHIFT - 10)) | ||
144 | si_meminfo(&i); | ||
145 | si_swapinfo(&i); | ||
146 | committed = atomic_long_read(&vm_committed_space); | ||
147 | allowed = ((totalram_pages - hugetlb_total_pages()) | ||
148 | * sysctl_overcommit_ratio / 100) + total_swap_pages; | ||
149 | |||
150 | cached = global_page_state(NR_FILE_PAGES) - | ||
151 | total_swapcache_pages - i.bufferram; | ||
152 | if (cached < 0) | ||
153 | cached = 0; | ||
154 | |||
155 | get_vmalloc_info(&vmi); | ||
156 | |||
157 | /* | ||
158 | * Tagged format, for easy grepping and expansion. | ||
159 | */ | ||
160 | len = sprintf(page, | ||
161 | "MemTotal: %8lu kB\n" | ||
162 | "MemFree: %8lu kB\n" | ||
163 | "Buffers: %8lu kB\n" | ||
164 | "Cached: %8lu kB\n" | ||
165 | "SwapCached: %8lu kB\n" | ||
166 | "Active: %8lu kB\n" | ||
167 | "Inactive: %8lu kB\n" | ||
168 | #ifdef CONFIG_HIGHMEM | ||
169 | "HighTotal: %8lu kB\n" | ||
170 | "HighFree: %8lu kB\n" | ||
171 | "LowTotal: %8lu kB\n" | ||
172 | "LowFree: %8lu kB\n" | ||
173 | #endif | ||
174 | "SwapTotal: %8lu kB\n" | ||
175 | "SwapFree: %8lu kB\n" | ||
176 | "Dirty: %8lu kB\n" | ||
177 | "Writeback: %8lu kB\n" | ||
178 | "AnonPages: %8lu kB\n" | ||
179 | "Mapped: %8lu kB\n" | ||
180 | "Slab: %8lu kB\n" | ||
181 | "SReclaimable: %8lu kB\n" | ||
182 | "SUnreclaim: %8lu kB\n" | ||
183 | "PageTables: %8lu kB\n" | ||
184 | #ifdef CONFIG_QUICKLIST | ||
185 | "Quicklists: %8lu kB\n" | ||
186 | #endif | ||
187 | "NFS_Unstable: %8lu kB\n" | ||
188 | "Bounce: %8lu kB\n" | ||
189 | "WritebackTmp: %8lu kB\n" | ||
190 | "CommitLimit: %8lu kB\n" | ||
191 | "Committed_AS: %8lu kB\n" | ||
192 | "VmallocTotal: %8lu kB\n" | ||
193 | "VmallocUsed: %8lu kB\n" | ||
194 | "VmallocChunk: %8lu kB\n", | ||
195 | K(i.totalram), | ||
196 | K(i.freeram), | ||
197 | K(i.bufferram), | ||
198 | K(cached), | ||
199 | K(total_swapcache_pages), | ||
200 | K(global_page_state(NR_ACTIVE)), | ||
201 | K(global_page_state(NR_INACTIVE)), | ||
202 | #ifdef CONFIG_HIGHMEM | ||
203 | K(i.totalhigh), | ||
204 | K(i.freehigh), | ||
205 | K(i.totalram-i.totalhigh), | ||
206 | K(i.freeram-i.freehigh), | ||
207 | #endif | ||
208 | K(i.totalswap), | ||
209 | K(i.freeswap), | ||
210 | K(global_page_state(NR_FILE_DIRTY)), | ||
211 | K(global_page_state(NR_WRITEBACK)), | ||
212 | K(global_page_state(NR_ANON_PAGES)), | ||
213 | K(global_page_state(NR_FILE_MAPPED)), | ||
214 | K(global_page_state(NR_SLAB_RECLAIMABLE) + | ||
215 | global_page_state(NR_SLAB_UNRECLAIMABLE)), | ||
216 | K(global_page_state(NR_SLAB_RECLAIMABLE)), | ||
217 | K(global_page_state(NR_SLAB_UNRECLAIMABLE)), | ||
218 | K(global_page_state(NR_PAGETABLE)), | ||
219 | #ifdef CONFIG_QUICKLIST | ||
220 | K(quicklist_total_size()), | ||
221 | #endif | ||
222 | K(global_page_state(NR_UNSTABLE_NFS)), | ||
223 | K(global_page_state(NR_BOUNCE)), | ||
224 | K(global_page_state(NR_WRITEBACK_TEMP)), | ||
225 | K(allowed), | ||
226 | K(committed), | ||
227 | (unsigned long)VMALLOC_TOTAL >> 10, | ||
228 | vmi.used >> 10, | ||
229 | vmi.largest_chunk >> 10 | ||
230 | ); | ||
231 | |||
232 | len += hugetlb_report_meminfo(page + len); | ||
233 | |||
234 | len += arch_report_meminfo(page + len); | ||
235 | |||
236 | return proc_calc_metrics(page, start, off, count, eof, len); | ||
237 | #undef K | ||
238 | } | ||
239 | |||
240 | static int fragmentation_open(struct inode *inode, struct file *file) | ||
241 | { | ||
242 | (void)inode; | ||
243 | return seq_open(file, &fragmentation_op); | ||
244 | } | ||
245 | |||
246 | static const struct file_operations fragmentation_file_operations = { | ||
247 | .open = fragmentation_open, | ||
248 | .read = seq_read, | ||
249 | .llseek = seq_lseek, | ||
250 | .release = seq_release, | ||
251 | }; | ||
252 | |||
253 | static int pagetypeinfo_open(struct inode *inode, struct file *file) | ||
254 | { | ||
255 | return seq_open(file, &pagetypeinfo_op); | ||
256 | } | ||
257 | |||
258 | static const struct file_operations pagetypeinfo_file_ops = { | ||
259 | .open = pagetypeinfo_open, | ||
260 | .read = seq_read, | ||
261 | .llseek = seq_lseek, | ||
262 | .release = seq_release, | ||
263 | }; | ||
264 | |||
265 | static int zoneinfo_open(struct inode *inode, struct file *file) | ||
266 | { | ||
267 | return seq_open(file, &zoneinfo_op); | ||
268 | } | ||
269 | |||
270 | static const struct file_operations proc_zoneinfo_file_operations = { | ||
271 | .open = zoneinfo_open, | ||
272 | .read = seq_read, | ||
273 | .llseek = seq_lseek, | ||
274 | .release = seq_release, | ||
275 | }; | ||
276 | |||
277 | static int version_read_proc(char *page, char **start, off_t off, | ||
278 | int count, int *eof, void *data) | ||
279 | { | ||
280 | int len; | ||
281 | |||
282 | len = snprintf(page, PAGE_SIZE, linux_proc_banner, | ||
283 | utsname()->sysname, | ||
284 | utsname()->release, | ||
285 | utsname()->version); | ||
286 | return proc_calc_metrics(page, start, off, count, eof, len); | ||
287 | } | ||
288 | |||
289 | extern const struct seq_operations cpuinfo_op; | ||
290 | static int cpuinfo_open(struct inode *inode, struct file *file) | ||
291 | { | ||
292 | return seq_open(file, &cpuinfo_op); | ||
293 | } | ||
294 | |||
295 | static const struct file_operations proc_cpuinfo_operations = { | ||
296 | .open = cpuinfo_open, | ||
297 | .read = seq_read, | ||
298 | .llseek = seq_lseek, | ||
299 | .release = seq_release, | ||
300 | }; | ||
301 | |||
302 | static int devinfo_show(struct seq_file *f, void *v) | ||
303 | { | ||
304 | int i = *(loff_t *) v; | ||
305 | |||
306 | if (i < CHRDEV_MAJOR_HASH_SIZE) { | ||
307 | if (i == 0) | ||
308 | seq_printf(f, "Character devices:\n"); | ||
309 | chrdev_show(f, i); | ||
310 | } | ||
311 | #ifdef CONFIG_BLOCK | ||
312 | else { | ||
313 | i -= CHRDEV_MAJOR_HASH_SIZE; | ||
314 | if (i == 0) | ||
315 | seq_printf(f, "\nBlock devices:\n"); | ||
316 | blkdev_show(f, i); | ||
317 | } | ||
318 | #endif | ||
319 | return 0; | ||
320 | } | ||
321 | |||
322 | static void *devinfo_start(struct seq_file *f, loff_t *pos) | ||
323 | { | ||
324 | if (*pos < (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE)) | ||
325 | return pos; | ||
326 | return NULL; | ||
327 | } | ||
328 | |||
329 | static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos) | ||
330 | { | ||
331 | (*pos)++; | ||
332 | if (*pos >= (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE)) | ||
333 | return NULL; | ||
334 | return pos; | ||
335 | } | ||
336 | |||
337 | static void devinfo_stop(struct seq_file *f, void *v) | ||
338 | { | ||
339 | /* Nothing to do */ | ||
340 | } | ||
341 | |||
342 | static const struct seq_operations devinfo_ops = { | ||
343 | .start = devinfo_start, | ||
344 | .next = devinfo_next, | ||
345 | .stop = devinfo_stop, | ||
346 | .show = devinfo_show | ||
347 | }; | ||
348 | |||
349 | static int devinfo_open(struct inode *inode, struct file *filp) | ||
350 | { | ||
351 | return seq_open(filp, &devinfo_ops); | ||
352 | } | ||
353 | |||
354 | static const struct file_operations proc_devinfo_operations = { | ||
355 | .open = devinfo_open, | ||
356 | .read = seq_read, | ||
357 | .llseek = seq_lseek, | ||
358 | .release = seq_release, | ||
359 | }; | ||
360 | |||
361 | static int vmstat_open(struct inode *inode, struct file *file) | ||
362 | { | ||
363 | return seq_open(file, &vmstat_op); | ||
364 | } | ||
365 | static const struct file_operations proc_vmstat_file_operations = { | ||
366 | .open = vmstat_open, | ||
367 | .read = seq_read, | ||
368 | .llseek = seq_lseek, | ||
369 | .release = seq_release, | ||
370 | }; | ||
371 | |||
372 | #ifdef CONFIG_PROC_HARDWARE | ||
373 | static int hardware_read_proc(char *page, char **start, off_t off, | ||
374 | int count, int *eof, void *data) | ||
375 | { | ||
376 | int len = get_hardware_list(page); | ||
377 | return proc_calc_metrics(page, start, off, count, eof, len); | ||
378 | } | ||
379 | #endif | ||
380 | |||
381 | #ifdef CONFIG_STRAM_PROC | ||
382 | static int stram_read_proc(char *page, char **start, off_t off, | ||
383 | int count, int *eof, void *data) | ||
384 | { | ||
385 | int len = get_stram_list(page); | ||
386 | return proc_calc_metrics(page, start, off, count, eof, len); | ||
387 | } | ||
388 | #endif | ||
389 | |||
390 | #ifdef CONFIG_BLOCK | ||
391 | static int partitions_open(struct inode *inode, struct file *file) | ||
392 | { | ||
393 | return seq_open(file, &partitions_op); | ||
394 | } | ||
395 | static const struct file_operations proc_partitions_operations = { | ||
396 | .open = partitions_open, | ||
397 | .read = seq_read, | ||
398 | .llseek = seq_lseek, | ||
399 | .release = seq_release, | ||
400 | }; | ||
401 | |||
402 | static int diskstats_open(struct inode *inode, struct file *file) | ||
403 | { | ||
404 | return seq_open(file, &diskstats_op); | ||
405 | } | ||
406 | static const struct file_operations proc_diskstats_operations = { | ||
407 | .open = diskstats_open, | ||
408 | .read = seq_read, | ||
409 | .llseek = seq_lseek, | ||
410 | .release = seq_release, | ||
411 | }; | ||
412 | #endif | ||
413 | |||
414 | #ifdef CONFIG_MODULES | ||
415 | extern const struct seq_operations modules_op; | ||
416 | static int modules_open(struct inode *inode, struct file *file) | ||
417 | { | ||
418 | return seq_open(file, &modules_op); | ||
419 | } | ||
420 | static const struct file_operations proc_modules_operations = { | ||
421 | .open = modules_open, | ||
422 | .read = seq_read, | ||
423 | .llseek = seq_lseek, | ||
424 | .release = seq_release, | ||
425 | }; | ||
426 | #endif | ||
427 | |||
428 | #ifdef CONFIG_SLABINFO | ||
429 | static int slabinfo_open(struct inode *inode, struct file *file) | ||
430 | { | ||
431 | return seq_open(file, &slabinfo_op); | ||
432 | } | ||
433 | static const struct file_operations proc_slabinfo_operations = { | ||
434 | .open = slabinfo_open, | ||
435 | .read = seq_read, | ||
436 | .write = slabinfo_write, | ||
437 | .llseek = seq_lseek, | ||
438 | .release = seq_release, | ||
439 | }; | ||
440 | |||
441 | #ifdef CONFIG_DEBUG_SLAB_LEAK | ||
442 | extern const struct seq_operations slabstats_op; | ||
443 | static int slabstats_open(struct inode *inode, struct file *file) | ||
444 | { | ||
445 | unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL); | ||
446 | int ret = -ENOMEM; | ||
447 | if (n) { | ||
448 | ret = seq_open(file, &slabstats_op); | ||
449 | if (!ret) { | ||
450 | struct seq_file *m = file->private_data; | ||
451 | *n = PAGE_SIZE / (2 * sizeof(unsigned long)); | ||
452 | m->private = n; | ||
453 | n = NULL; | ||
454 | } | ||
455 | kfree(n); | ||
456 | } | ||
457 | return ret; | ||
458 | } | ||
459 | |||
460 | static const struct file_operations proc_slabstats_operations = { | ||
461 | .open = slabstats_open, | ||
462 | .read = seq_read, | ||
463 | .llseek = seq_lseek, | ||
464 | .release = seq_release_private, | ||
465 | }; | ||
466 | #endif | ||
467 | #endif | ||
468 | |||
469 | #ifdef CONFIG_MMU | ||
470 | static int vmalloc_open(struct inode *inode, struct file *file) | ||
471 | { | ||
472 | unsigned int *ptr = NULL; | ||
473 | int ret; | ||
474 | |||
475 | if (NUMA_BUILD) | ||
476 | ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL); | ||
477 | ret = seq_open(file, &vmalloc_op); | ||
478 | if (!ret) { | ||
479 | struct seq_file *m = file->private_data; | ||
480 | m->private = ptr; | ||
481 | } else | ||
482 | kfree(ptr); | ||
483 | return ret; | ||
484 | } | ||
485 | |||
486 | static const struct file_operations proc_vmalloc_operations = { | ||
487 | .open = vmalloc_open, | ||
488 | .read = seq_read, | ||
489 | .llseek = seq_lseek, | ||
490 | .release = seq_release_private, | ||
491 | }; | ||
492 | #endif | ||
493 | |||
494 | #ifndef arch_irq_stat_cpu | ||
495 | #define arch_irq_stat_cpu(cpu) 0 | ||
496 | #endif | ||
497 | #ifndef arch_irq_stat | ||
498 | #define arch_irq_stat() 0 | ||
499 | #endif | ||
500 | |||
501 | static int show_stat(struct seq_file *p, void *v) | ||
502 | { | ||
503 | int i; | ||
504 | unsigned long jif; | ||
505 | cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; | ||
506 | cputime64_t guest; | ||
507 | u64 sum = 0; | ||
508 | struct timespec boottime; | ||
509 | unsigned int *per_irq_sum; | ||
510 | |||
511 | per_irq_sum = kzalloc(sizeof(unsigned int)*NR_IRQS, GFP_KERNEL); | ||
512 | if (!per_irq_sum) | ||
513 | return -ENOMEM; | ||
514 | |||
515 | user = nice = system = idle = iowait = | ||
516 | irq = softirq = steal = cputime64_zero; | ||
517 | guest = cputime64_zero; | ||
518 | getboottime(&boottime); | ||
519 | jif = boottime.tv_sec; | ||
520 | |||
521 | for_each_possible_cpu(i) { | ||
522 | int j; | ||
523 | |||
524 | user = cputime64_add(user, kstat_cpu(i).cpustat.user); | ||
525 | nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); | ||
526 | system = cputime64_add(system, kstat_cpu(i).cpustat.system); | ||
527 | idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle); | ||
528 | iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait); | ||
529 | irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); | ||
530 | softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); | ||
531 | steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); | ||
532 | guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); | ||
533 | for (j = 0; j < NR_IRQS; j++) { | ||
534 | unsigned int temp = kstat_cpu(i).irqs[j]; | ||
535 | sum += temp; | ||
536 | per_irq_sum[j] += temp; | ||
537 | } | ||
538 | sum += arch_irq_stat_cpu(i); | ||
539 | } | ||
540 | sum += arch_irq_stat(); | ||
541 | |||
542 | seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", | ||
543 | (unsigned long long)cputime64_to_clock_t(user), | ||
544 | (unsigned long long)cputime64_to_clock_t(nice), | ||
545 | (unsigned long long)cputime64_to_clock_t(system), | ||
546 | (unsigned long long)cputime64_to_clock_t(idle), | ||
547 | (unsigned long long)cputime64_to_clock_t(iowait), | ||
548 | (unsigned long long)cputime64_to_clock_t(irq), | ||
549 | (unsigned long long)cputime64_to_clock_t(softirq), | ||
550 | (unsigned long long)cputime64_to_clock_t(steal), | ||
551 | (unsigned long long)cputime64_to_clock_t(guest)); | ||
552 | for_each_online_cpu(i) { | ||
553 | |||
554 | /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ | ||
555 | user = kstat_cpu(i).cpustat.user; | ||
556 | nice = kstat_cpu(i).cpustat.nice; | ||
557 | system = kstat_cpu(i).cpustat.system; | ||
558 | idle = kstat_cpu(i).cpustat.idle; | ||
559 | iowait = kstat_cpu(i).cpustat.iowait; | ||
560 | irq = kstat_cpu(i).cpustat.irq; | ||
561 | softirq = kstat_cpu(i).cpustat.softirq; | ||
562 | steal = kstat_cpu(i).cpustat.steal; | ||
563 | guest = kstat_cpu(i).cpustat.guest; | ||
564 | seq_printf(p, | ||
565 | "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", | ||
566 | i, | ||
567 | (unsigned long long)cputime64_to_clock_t(user), | ||
568 | (unsigned long long)cputime64_to_clock_t(nice), | ||
569 | (unsigned long long)cputime64_to_clock_t(system), | ||
570 | (unsigned long long)cputime64_to_clock_t(idle), | ||
571 | (unsigned long long)cputime64_to_clock_t(iowait), | ||
572 | (unsigned long long)cputime64_to_clock_t(irq), | ||
573 | (unsigned long long)cputime64_to_clock_t(softirq), | ||
574 | (unsigned long long)cputime64_to_clock_t(steal), | ||
575 | (unsigned long long)cputime64_to_clock_t(guest)); | ||
576 | } | ||
577 | seq_printf(p, "intr %llu", (unsigned long long)sum); | ||
578 | |||
579 | for (i = 0; i < NR_IRQS; i++) | ||
580 | seq_printf(p, " %u", per_irq_sum[i]); | ||
581 | |||
582 | seq_printf(p, | ||
583 | "\nctxt %llu\n" | ||
584 | "btime %lu\n" | ||
585 | "processes %lu\n" | ||
586 | "procs_running %lu\n" | ||
587 | "procs_blocked %lu\n", | ||
588 | nr_context_switches(), | ||
589 | (unsigned long)jif, | ||
590 | total_forks, | ||
591 | nr_running(), | ||
592 | nr_iowait()); | ||
593 | |||
594 | kfree(per_irq_sum); | ||
595 | return 0; | ||
596 | } | ||
597 | |||
598 | static int stat_open(struct inode *inode, struct file *file) | ||
599 | { | ||
600 | unsigned size = 4096 * (1 + num_possible_cpus() / 32); | ||
601 | char *buf; | ||
602 | struct seq_file *m; | ||
603 | int res; | ||
604 | |||
605 | /* don't ask for more than the kmalloc() max size, currently 128 KB */ | ||
606 | if (size > 128 * 1024) | ||
607 | size = 128 * 1024; | ||
608 | buf = kmalloc(size, GFP_KERNEL); | ||
609 | if (!buf) | ||
610 | return -ENOMEM; | ||
611 | |||
612 | res = single_open(file, show_stat, NULL); | ||
613 | if (!res) { | ||
614 | m = file->private_data; | ||
615 | m->buf = buf; | ||
616 | m->size = size; | ||
617 | } else | ||
618 | kfree(buf); | ||
619 | return res; | ||
620 | } | ||
621 | static const struct file_operations proc_stat_operations = { | ||
622 | .open = stat_open, | ||
623 | .read = seq_read, | ||
624 | .llseek = seq_lseek, | ||
625 | .release = single_release, | ||
626 | }; | ||
627 | |||
628 | /* | ||
629 | * /proc/interrupts | ||
630 | */ | ||
631 | static void *int_seq_start(struct seq_file *f, loff_t *pos) | ||
632 | { | ||
633 | return (*pos <= NR_IRQS) ? pos : NULL; | ||
634 | } | ||
635 | |||
636 | static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos) | ||
637 | { | ||
638 | (*pos)++; | ||
639 | if (*pos > NR_IRQS) | ||
640 | return NULL; | ||
641 | return pos; | ||
642 | } | ||
643 | |||
644 | static void int_seq_stop(struct seq_file *f, void *v) | ||
645 | { | ||
646 | /* Nothing to do */ | ||
647 | } | ||
648 | |||
649 | |||
650 | static const struct seq_operations int_seq_ops = { | ||
651 | .start = int_seq_start, | ||
652 | .next = int_seq_next, | ||
653 | .stop = int_seq_stop, | ||
654 | .show = show_interrupts | ||
655 | }; | ||
656 | |||
657 | static int interrupts_open(struct inode *inode, struct file *filp) | ||
658 | { | ||
659 | return seq_open(filp, &int_seq_ops); | ||
660 | } | ||
661 | |||
662 | static const struct file_operations proc_interrupts_operations = { | ||
663 | .open = interrupts_open, | ||
664 | .read = seq_read, | ||
665 | .llseek = seq_lseek, | ||
666 | .release = seq_release, | ||
667 | }; | ||
668 | |||
669 | static int filesystems_read_proc(char *page, char **start, off_t off, | ||
670 | int count, int *eof, void *data) | ||
671 | { | ||
672 | int len = get_filesystem_list(page); | ||
673 | return proc_calc_metrics(page, start, off, count, eof, len); | ||
674 | } | ||
675 | |||
676 | static int cmdline_read_proc(char *page, char **start, off_t off, | ||
677 | int count, int *eof, void *data) | ||
678 | { | ||
679 | int len; | ||
680 | |||
681 | len = sprintf(page, "%s\n", saved_command_line); | ||
682 | return proc_calc_metrics(page, start, off, count, eof, len); | ||
683 | } | ||
684 | |||
685 | #ifdef CONFIG_FILE_LOCKING | ||
686 | static int locks_open(struct inode *inode, struct file *filp) | ||
687 | { | ||
688 | return seq_open(filp, &locks_seq_operations); | ||
689 | } | ||
690 | |||
691 | static const struct file_operations proc_locks_operations = { | ||
692 | .open = locks_open, | ||
693 | .read = seq_read, | ||
694 | .llseek = seq_lseek, | ||
695 | .release = seq_release, | ||
696 | }; | ||
697 | #endif /* CONFIG_FILE_LOCKING */ | ||
698 | |||
699 | static int execdomains_read_proc(char *page, char **start, off_t off, | ||
700 | int count, int *eof, void *data) | ||
701 | { | ||
702 | int len = get_exec_domain_list(page); | ||
703 | return proc_calc_metrics(page, start, off, count, eof, len); | ||
704 | } | ||
705 | |||
706 | #ifdef CONFIG_PROC_PAGE_MONITOR | ||
707 | #define KPMSIZE sizeof(u64) | ||
708 | #define KPMMASK (KPMSIZE - 1) | ||
709 | /* /proc/kpagecount - an array exposing page counts | ||
710 | * | ||
711 | * Each entry is a u64 representing the corresponding | ||
712 | * physical page count. | ||
713 | */ | ||
714 | static ssize_t kpagecount_read(struct file *file, char __user *buf, | ||
715 | size_t count, loff_t *ppos) | ||
716 | { | ||
717 | u64 __user *out = (u64 __user *)buf; | ||
718 | struct page *ppage; | ||
719 | unsigned long src = *ppos; | ||
720 | unsigned long pfn; | ||
721 | ssize_t ret = 0; | ||
722 | u64 pcount; | ||
723 | |||
724 | pfn = src / KPMSIZE; | ||
725 | count = min_t(size_t, count, (max_pfn * KPMSIZE) - src); | ||
726 | if (src & KPMMASK || count & KPMMASK) | ||
727 | return -EINVAL; | ||
728 | |||
729 | while (count > 0) { | ||
730 | ppage = NULL; | ||
731 | if (pfn_valid(pfn)) | ||
732 | ppage = pfn_to_page(pfn); | ||
733 | pfn++; | ||
734 | if (!ppage) | ||
735 | pcount = 0; | ||
736 | else | ||
737 | pcount = page_mapcount(ppage); | ||
738 | |||
739 | if (put_user(pcount, out++)) { | ||
740 | ret = -EFAULT; | ||
741 | break; | ||
742 | } | ||
743 | |||
744 | count -= KPMSIZE; | ||
745 | } | ||
746 | |||
747 | *ppos += (char __user *)out - buf; | ||
748 | if (!ret) | ||
749 | ret = (char __user *)out - buf; | ||
750 | return ret; | ||
751 | } | ||
752 | |||
753 | static struct file_operations proc_kpagecount_operations = { | ||
754 | .llseek = mem_lseek, | ||
755 | .read = kpagecount_read, | ||
756 | }; | ||
757 | |||
758 | /* /proc/kpageflags - an array exposing page flags | ||
759 | * | ||
760 | * Each entry is a u64 representing the corresponding | ||
761 | * physical page flags. | ||
762 | */ | ||
763 | |||
764 | /* These macros are used to decouple internal flags from exported ones */ | ||
765 | |||
766 | #define KPF_LOCKED 0 | ||
767 | #define KPF_ERROR 1 | ||
768 | #define KPF_REFERENCED 2 | ||
769 | #define KPF_UPTODATE 3 | ||
770 | #define KPF_DIRTY 4 | ||
771 | #define KPF_LRU 5 | ||
772 | #define KPF_ACTIVE 6 | ||
773 | #define KPF_SLAB 7 | ||
774 | #define KPF_WRITEBACK 8 | ||
775 | #define KPF_RECLAIM 9 | ||
776 | #define KPF_BUDDY 10 | ||
777 | |||
778 | #define kpf_copy_bit(flags, srcpos, dstpos) (((flags >> srcpos) & 1) << dstpos) | ||
779 | |||
780 | static ssize_t kpageflags_read(struct file *file, char __user *buf, | ||
781 | size_t count, loff_t *ppos) | ||
782 | { | ||
783 | u64 __user *out = (u64 __user *)buf; | ||
784 | struct page *ppage; | ||
785 | unsigned long src = *ppos; | ||
786 | unsigned long pfn; | ||
787 | ssize_t ret = 0; | ||
788 | u64 kflags, uflags; | ||
789 | |||
790 | pfn = src / KPMSIZE; | ||
791 | count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src); | ||
792 | if (src & KPMMASK || count & KPMMASK) | ||
793 | return -EINVAL; | ||
794 | |||
795 | while (count > 0) { | ||
796 | ppage = NULL; | ||
797 | if (pfn_valid(pfn)) | ||
798 | ppage = pfn_to_page(pfn); | ||
799 | pfn++; | ||
800 | if (!ppage) | ||
801 | kflags = 0; | ||
802 | else | ||
803 | kflags = ppage->flags; | ||
804 | |||
805 | uflags = kpf_copy_bit(KPF_LOCKED, PG_locked, kflags) | | ||
806 | kpf_copy_bit(kflags, KPF_ERROR, PG_error) | | ||
807 | kpf_copy_bit(kflags, KPF_REFERENCED, PG_referenced) | | ||
808 | kpf_copy_bit(kflags, KPF_UPTODATE, PG_uptodate) | | ||
809 | kpf_copy_bit(kflags, KPF_DIRTY, PG_dirty) | | ||
810 | kpf_copy_bit(kflags, KPF_LRU, PG_lru) | | ||
811 | kpf_copy_bit(kflags, KPF_ACTIVE, PG_active) | | ||
812 | kpf_copy_bit(kflags, KPF_SLAB, PG_slab) | | ||
813 | kpf_copy_bit(kflags, KPF_WRITEBACK, PG_writeback) | | ||
814 | kpf_copy_bit(kflags, KPF_RECLAIM, PG_reclaim) | | ||
815 | kpf_copy_bit(kflags, KPF_BUDDY, PG_buddy); | ||
816 | |||
817 | if (put_user(uflags, out++)) { | ||
818 | ret = -EFAULT; | ||
819 | break; | ||
820 | } | ||
821 | |||
822 | count -= KPMSIZE; | ||
823 | } | ||
824 | |||
825 | *ppos += (char __user *)out - buf; | ||
826 | if (!ret) | ||
827 | ret = (char __user *)out - buf; | ||
828 | return ret; | ||
829 | } | ||
830 | |||
831 | static struct file_operations proc_kpageflags_operations = { | ||
832 | .llseek = mem_lseek, | ||
833 | .read = kpageflags_read, | ||
834 | }; | ||
835 | #endif /* CONFIG_PROC_PAGE_MONITOR */ | ||
836 | |||
837 | struct proc_dir_entry *proc_root_kcore; | ||
838 | |||
839 | void __init proc_misc_init(void) | ||
840 | { | ||
841 | static struct { | ||
842 | char *name; | ||
843 | int (*read_proc)(char*,char**,off_t,int,int*,void*); | ||
844 | } *p, simple_ones[] = { | ||
845 | {"loadavg", loadavg_read_proc}, | ||
846 | {"uptime", uptime_read_proc}, | ||
847 | {"meminfo", meminfo_read_proc}, | ||
848 | {"version", version_read_proc}, | ||
849 | #ifdef CONFIG_PROC_HARDWARE | ||
850 | {"hardware", hardware_read_proc}, | ||
851 | #endif | ||
852 | #ifdef CONFIG_STRAM_PROC | ||
853 | {"stram", stram_read_proc}, | ||
854 | #endif | ||
855 | {"filesystems", filesystems_read_proc}, | ||
856 | {"cmdline", cmdline_read_proc}, | ||
857 | {"execdomains", execdomains_read_proc}, | ||
858 | {NULL,} | ||
859 | }; | ||
860 | for (p = simple_ones; p->name; p++) | ||
861 | create_proc_read_entry(p->name, 0, NULL, p->read_proc, NULL); | ||
862 | |||
863 | proc_symlink("mounts", NULL, "self/mounts"); | ||
864 | |||
865 | /* And now for trickier ones */ | ||
866 | #ifdef CONFIG_PRINTK | ||
867 | proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); | ||
868 | #endif | ||
869 | #ifdef CONFIG_FILE_LOCKING | ||
870 | proc_create("locks", 0, NULL, &proc_locks_operations); | ||
871 | #endif | ||
872 | proc_create("devices", 0, NULL, &proc_devinfo_operations); | ||
873 | proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); | ||
874 | #ifdef CONFIG_BLOCK | ||
875 | proc_create("partitions", 0, NULL, &proc_partitions_operations); | ||
876 | #endif | ||
877 | proc_create("stat", 0, NULL, &proc_stat_operations); | ||
878 | proc_create("interrupts", 0, NULL, &proc_interrupts_operations); | ||
879 | #ifdef CONFIG_SLABINFO | ||
880 | proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); | ||
881 | #ifdef CONFIG_DEBUG_SLAB_LEAK | ||
882 | proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations); | ||
883 | #endif | ||
884 | #endif | ||
885 | #ifdef CONFIG_MMU | ||
886 | proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations); | ||
887 | #endif | ||
888 | proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); | ||
889 | proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops); | ||
890 | proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations); | ||
891 | proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations); | ||
892 | #ifdef CONFIG_BLOCK | ||
893 | proc_create("diskstats", 0, NULL, &proc_diskstats_operations); | ||
894 | #endif | ||
895 | #ifdef CONFIG_MODULES | ||
896 | proc_create("modules", 0, NULL, &proc_modules_operations); | ||
897 | #endif | ||
898 | #ifdef CONFIG_SCHEDSTATS | ||
899 | proc_create("schedstat", 0, NULL, &proc_schedstat_operations); | ||
900 | #endif | ||
901 | #ifdef CONFIG_PROC_KCORE | ||
902 | proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations); | ||
903 | if (proc_root_kcore) | ||
904 | proc_root_kcore->size = | ||
905 | (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; | ||
906 | #endif | ||
907 | #ifdef CONFIG_PROC_PAGE_MONITOR | ||
908 | proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations); | ||
909 | proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations); | ||
910 | #endif | ||
911 | #ifdef CONFIG_PROC_VMCORE | ||
912 | proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); | ||
913 | #endif | ||
914 | } | ||
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 945a81043ba2..94fcfff6863a 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * /proc/sys support | 2 | * /proc/sys support |
3 | */ | 3 | */ |
4 | 4 | #include <linux/init.h> | |
5 | #include <linux/sysctl.h> | 5 | #include <linux/sysctl.h> |
6 | #include <linux/proc_fs.h> | 6 | #include <linux/proc_fs.h> |
7 | #include <linux/security.h> | 7 | #include <linux/security.h> |
@@ -298,13 +298,19 @@ static int proc_sys_permission(struct inode *inode, int mask) | |||
298 | * sysctl entries that are not writeable, | 298 | * sysctl entries that are not writeable, |
299 | * are _NOT_ writeable, capabilities or not. | 299 | * are _NOT_ writeable, capabilities or not. |
300 | */ | 300 | */ |
301 | struct ctl_table_header *head = grab_header(inode); | 301 | struct ctl_table_header *head; |
302 | struct ctl_table *table = PROC_I(inode)->sysctl_entry; | 302 | struct ctl_table *table; |
303 | int error; | 303 | int error; |
304 | 304 | ||
305 | /* Executable files are not allowed under /proc/sys/ */ | ||
306 | if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) | ||
307 | return -EACCES; | ||
308 | |||
309 | head = grab_header(inode); | ||
305 | if (IS_ERR(head)) | 310 | if (IS_ERR(head)) |
306 | return PTR_ERR(head); | 311 | return PTR_ERR(head); |
307 | 312 | ||
313 | table = PROC_I(inode)->sysctl_entry; | ||
308 | if (!table) /* global root - r-xr-xr-x */ | 314 | if (!table) /* global root - r-xr-xr-x */ |
309 | error = mask & MAY_WRITE ? -EACCES : 0; | 315 | error = mask & MAY_WRITE ? -EACCES : 0; |
310 | else /* Use the permissions on the sysctl table entry */ | 316 | else /* Use the permissions on the sysctl table entry */ |
@@ -353,6 +359,7 @@ static const struct file_operations proc_sys_file_operations = { | |||
353 | 359 | ||
354 | static const struct file_operations proc_sys_dir_file_operations = { | 360 | static const struct file_operations proc_sys_dir_file_operations = { |
355 | .readdir = proc_sys_readdir, | 361 | .readdir = proc_sys_readdir, |
362 | .llseek = generic_file_llseek, | ||
356 | }; | 363 | }; |
357 | 364 | ||
358 | static const struct inode_operations proc_sys_inode_operations = { | 365 | static const struct inode_operations proc_sys_inode_operations = { |
@@ -395,7 +402,7 @@ static struct dentry_operations proc_sys_dentry_operations = { | |||
395 | .d_compare = proc_sys_compare, | 402 | .d_compare = proc_sys_compare, |
396 | }; | 403 | }; |
397 | 404 | ||
398 | int proc_sys_init(void) | 405 | int __init proc_sys_init(void) |
399 | { | 406 | { |
400 | struct proc_dir_entry *proc_sys_root; | 407 | struct proc_dir_entry *proc_sys_root; |
401 | 408 | ||
diff --git a/fs/proc/root.c b/fs/proc/root.c index 95117538a4f6..7761602af9de 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -104,9 +104,9 @@ static struct file_system_type proc_fs_type = { | |||
104 | 104 | ||
105 | void __init proc_root_init(void) | 105 | void __init proc_root_init(void) |
106 | { | 106 | { |
107 | int err = proc_init_inodecache(); | 107 | int err; |
108 | if (err) | 108 | |
109 | return; | 109 | proc_init_inodecache(); |
110 | err = register_filesystem(&proc_fs_type); | 110 | err = register_filesystem(&proc_fs_type); |
111 | if (err) | 111 | if (err) |
112 | return; | 112 | return; |
@@ -117,7 +117,7 @@ void __init proc_root_init(void) | |||
117 | return; | 117 | return; |
118 | } | 118 | } |
119 | 119 | ||
120 | proc_misc_init(); | 120 | proc_symlink("mounts", NULL, "self/mounts"); |
121 | 121 | ||
122 | proc_net_init(); | 122 | proc_net_init(); |
123 | 123 | ||
diff --git a/fs/proc/stat.c b/fs/proc/stat.c new file mode 100644 index 000000000000..81904f07679d --- /dev/null +++ b/fs/proc/stat.c | |||
@@ -0,0 +1,153 @@ | |||
1 | #include <linux/cpumask.h> | ||
2 | #include <linux/fs.h> | ||
3 | #include <linux/gfp.h> | ||
4 | #include <linux/init.h> | ||
5 | #include <linux/interrupt.h> | ||
6 | #include <linux/kernel_stat.h> | ||
7 | #include <linux/proc_fs.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/seq_file.h> | ||
10 | #include <linux/slab.h> | ||
11 | #include <linux/time.h> | ||
12 | #include <asm/cputime.h> | ||
13 | |||
14 | #ifndef arch_irq_stat_cpu | ||
15 | #define arch_irq_stat_cpu(cpu) 0 | ||
16 | #endif | ||
17 | #ifndef arch_irq_stat | ||
18 | #define arch_irq_stat() 0 | ||
19 | #endif | ||
20 | |||
21 | static int show_stat(struct seq_file *p, void *v) | ||
22 | { | ||
23 | int i, j; | ||
24 | unsigned long jif; | ||
25 | cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; | ||
26 | cputime64_t guest; | ||
27 | u64 sum = 0; | ||
28 | struct timespec boottime; | ||
29 | unsigned int per_irq_sum; | ||
30 | |||
31 | user = nice = system = idle = iowait = | ||
32 | irq = softirq = steal = cputime64_zero; | ||
33 | guest = cputime64_zero; | ||
34 | getboottime(&boottime); | ||
35 | jif = boottime.tv_sec; | ||
36 | |||
37 | for_each_possible_cpu(i) { | ||
38 | user = cputime64_add(user, kstat_cpu(i).cpustat.user); | ||
39 | nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); | ||
40 | system = cputime64_add(system, kstat_cpu(i).cpustat.system); | ||
41 | idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle); | ||
42 | iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait); | ||
43 | irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); | ||
44 | softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); | ||
45 | steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); | ||
46 | guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); | ||
47 | |||
48 | for_each_irq_nr(j) | ||
49 | sum += kstat_irqs_cpu(j, i); | ||
50 | |||
51 | sum += arch_irq_stat_cpu(i); | ||
52 | } | ||
53 | sum += arch_irq_stat(); | ||
54 | |||
55 | seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", | ||
56 | (unsigned long long)cputime64_to_clock_t(user), | ||
57 | (unsigned long long)cputime64_to_clock_t(nice), | ||
58 | (unsigned long long)cputime64_to_clock_t(system), | ||
59 | (unsigned long long)cputime64_to_clock_t(idle), | ||
60 | (unsigned long long)cputime64_to_clock_t(iowait), | ||
61 | (unsigned long long)cputime64_to_clock_t(irq), | ||
62 | (unsigned long long)cputime64_to_clock_t(softirq), | ||
63 | (unsigned long long)cputime64_to_clock_t(steal), | ||
64 | (unsigned long long)cputime64_to_clock_t(guest)); | ||
65 | for_each_online_cpu(i) { | ||
66 | |||
67 | /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ | ||
68 | user = kstat_cpu(i).cpustat.user; | ||
69 | nice = kstat_cpu(i).cpustat.nice; | ||
70 | system = kstat_cpu(i).cpustat.system; | ||
71 | idle = kstat_cpu(i).cpustat.idle; | ||
72 | iowait = kstat_cpu(i).cpustat.iowait; | ||
73 | irq = kstat_cpu(i).cpustat.irq; | ||
74 | softirq = kstat_cpu(i).cpustat.softirq; | ||
75 | steal = kstat_cpu(i).cpustat.steal; | ||
76 | guest = kstat_cpu(i).cpustat.guest; | ||
77 | seq_printf(p, | ||
78 | "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", | ||
79 | i, | ||
80 | (unsigned long long)cputime64_to_clock_t(user), | ||
81 | (unsigned long long)cputime64_to_clock_t(nice), | ||
82 | (unsigned long long)cputime64_to_clock_t(system), | ||
83 | (unsigned long long)cputime64_to_clock_t(idle), | ||
84 | (unsigned long long)cputime64_to_clock_t(iowait), | ||
85 | (unsigned long long)cputime64_to_clock_t(irq), | ||
86 | (unsigned long long)cputime64_to_clock_t(softirq), | ||
87 | (unsigned long long)cputime64_to_clock_t(steal), | ||
88 | (unsigned long long)cputime64_to_clock_t(guest)); | ||
89 | } | ||
90 | seq_printf(p, "intr %llu", (unsigned long long)sum); | ||
91 | |||
92 | /* sum again ? it could be updated? */ | ||
93 | for_each_irq_nr(j) { | ||
94 | per_irq_sum = 0; | ||
95 | |||
96 | for_each_possible_cpu(i) | ||
97 | per_irq_sum += kstat_irqs_cpu(j, i); | ||
98 | |||
99 | seq_printf(p, " %u", per_irq_sum); | ||
100 | } | ||
101 | |||
102 | seq_printf(p, | ||
103 | "\nctxt %llu\n" | ||
104 | "btime %lu\n" | ||
105 | "processes %lu\n" | ||
106 | "procs_running %lu\n" | ||
107 | "procs_blocked %lu\n", | ||
108 | nr_context_switches(), | ||
109 | (unsigned long)jif, | ||
110 | total_forks, | ||
111 | nr_running(), | ||
112 | nr_iowait()); | ||
113 | |||
114 | return 0; | ||
115 | } | ||
116 | |||
117 | static int stat_open(struct inode *inode, struct file *file) | ||
118 | { | ||
119 | unsigned size = 4096 * (1 + num_possible_cpus() / 32); | ||
120 | char *buf; | ||
121 | struct seq_file *m; | ||
122 | int res; | ||
123 | |||
124 | /* don't ask for more than the kmalloc() max size, currently 128 KB */ | ||
125 | if (size > 128 * 1024) | ||
126 | size = 128 * 1024; | ||
127 | buf = kmalloc(size, GFP_KERNEL); | ||
128 | if (!buf) | ||
129 | return -ENOMEM; | ||
130 | |||
131 | res = single_open(file, show_stat, NULL); | ||
132 | if (!res) { | ||
133 | m = file->private_data; | ||
134 | m->buf = buf; | ||
135 | m->size = size; | ||
136 | } else | ||
137 | kfree(buf); | ||
138 | return res; | ||
139 | } | ||
140 | |||
141 | static const struct file_operations proc_stat_operations = { | ||
142 | .open = stat_open, | ||
143 | .read = seq_read, | ||
144 | .llseek = seq_lseek, | ||
145 | .release = single_release, | ||
146 | }; | ||
147 | |||
148 | static int __init proc_stat_init(void) | ||
149 | { | ||
150 | proc_create("stat", 0, NULL, &proc_stat_operations); | ||
151 | return 0; | ||
152 | } | ||
153 | module_init(proc_stat_init); | ||
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 4806830ea2a1..b770c095e45c 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -198,11 +198,8 @@ static int do_maps_open(struct inode *inode, struct file *file, | |||
198 | return ret; | 198 | return ret; |
199 | } | 199 | } |
200 | 200 | ||
201 | static int show_map(struct seq_file *m, void *v) | 201 | static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) |
202 | { | 202 | { |
203 | struct proc_maps_private *priv = m->private; | ||
204 | struct task_struct *task = priv->task; | ||
205 | struct vm_area_struct *vma = v; | ||
206 | struct mm_struct *mm = vma->vm_mm; | 203 | struct mm_struct *mm = vma->vm_mm; |
207 | struct file *file = vma->vm_file; | 204 | struct file *file = vma->vm_file; |
208 | int flags = vma->vm_flags; | 205 | int flags = vma->vm_flags; |
@@ -254,6 +251,15 @@ static int show_map(struct seq_file *m, void *v) | |||
254 | } | 251 | } |
255 | } | 252 | } |
256 | seq_putc(m, '\n'); | 253 | seq_putc(m, '\n'); |
254 | } | ||
255 | |||
256 | static int show_map(struct seq_file *m, void *v) | ||
257 | { | ||
258 | struct vm_area_struct *vma = v; | ||
259 | struct proc_maps_private *priv = m->private; | ||
260 | struct task_struct *task = priv->task; | ||
261 | |||
262 | show_map_vma(m, vma); | ||
257 | 263 | ||
258 | if (m->count < m->size) /* vma is copied successfully */ | 264 | if (m->count < m->size) /* vma is copied successfully */ |
259 | m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; | 265 | m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; |
@@ -364,9 +370,10 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
364 | 370 | ||
365 | static int show_smap(struct seq_file *m, void *v) | 371 | static int show_smap(struct seq_file *m, void *v) |
366 | { | 372 | { |
373 | struct proc_maps_private *priv = m->private; | ||
374 | struct task_struct *task = priv->task; | ||
367 | struct vm_area_struct *vma = v; | 375 | struct vm_area_struct *vma = v; |
368 | struct mem_size_stats mss; | 376 | struct mem_size_stats mss; |
369 | int ret; | ||
370 | struct mm_walk smaps_walk = { | 377 | struct mm_walk smaps_walk = { |
371 | .pmd_entry = smaps_pte_range, | 378 | .pmd_entry = smaps_pte_range, |
372 | .mm = vma->vm_mm, | 379 | .mm = vma->vm_mm, |
@@ -378,9 +385,7 @@ static int show_smap(struct seq_file *m, void *v) | |||
378 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) | 385 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) |
379 | walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); | 386 | walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); |
380 | 387 | ||
381 | ret = show_map(m, v); | 388 | show_map_vma(m, vma); |
382 | if (ret) | ||
383 | return ret; | ||
384 | 389 | ||
385 | seq_printf(m, | 390 | seq_printf(m, |
386 | "Size: %8lu kB\n" | 391 | "Size: %8lu kB\n" |
@@ -402,7 +407,9 @@ static int show_smap(struct seq_file *m, void *v) | |||
402 | mss.referenced >> 10, | 407 | mss.referenced >> 10, |
403 | mss.swap >> 10); | 408 | mss.swap >> 10); |
404 | 409 | ||
405 | return ret; | 410 | if (m->count < m->size) /* vma is copied successfully */ |
411 | m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; | ||
412 | return 0; | ||
406 | } | 413 | } |
407 | 414 | ||
408 | static const struct seq_operations proc_pid_smaps_op = { | 415 | static const struct seq_operations proc_pid_smaps_op = { |
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c new file mode 100644 index 000000000000..0c10a0b3f146 --- /dev/null +++ b/fs/proc/uptime.c | |||
@@ -0,0 +1,43 @@ | |||
1 | #include <linux/fs.h> | ||
2 | #include <linux/init.h> | ||
3 | #include <linux/proc_fs.h> | ||
4 | #include <linux/sched.h> | ||
5 | #include <linux/seq_file.h> | ||
6 | #include <linux/time.h> | ||
7 | #include <asm/cputime.h> | ||
8 | |||
9 | static int uptime_proc_show(struct seq_file *m, void *v) | ||
10 | { | ||
11 | struct timespec uptime; | ||
12 | struct timespec idle; | ||
13 | cputime_t idletime = cputime_add(init_task.utime, init_task.stime); | ||
14 | |||
15 | do_posix_clock_monotonic_gettime(&uptime); | ||
16 | monotonic_to_bootbased(&uptime); | ||
17 | cputime_to_timespec(idletime, &idle); | ||
18 | seq_printf(m, "%lu.%02lu %lu.%02lu\n", | ||
19 | (unsigned long) uptime.tv_sec, | ||
20 | (uptime.tv_nsec / (NSEC_PER_SEC / 100)), | ||
21 | (unsigned long) idle.tv_sec, | ||
22 | (idle.tv_nsec / (NSEC_PER_SEC / 100))); | ||
23 | return 0; | ||
24 | } | ||
25 | |||
26 | static int uptime_proc_open(struct inode *inode, struct file *file) | ||
27 | { | ||
28 | return single_open(file, uptime_proc_show, NULL); | ||
29 | } | ||
30 | |||
31 | static const struct file_operations uptime_proc_fops = { | ||
32 | .open = uptime_proc_open, | ||
33 | .read = seq_read, | ||
34 | .llseek = seq_lseek, | ||
35 | .release = single_release, | ||
36 | }; | ||
37 | |||
38 | static int __init proc_uptime_init(void) | ||
39 | { | ||
40 | proc_create("uptime", 0, NULL, &uptime_proc_fops); | ||
41 | return 0; | ||
42 | } | ||
43 | module_init(proc_uptime_init); | ||
diff --git a/fs/proc/version.c b/fs/proc/version.c new file mode 100644 index 000000000000..76817a60678c --- /dev/null +++ b/fs/proc/version.c | |||
@@ -0,0 +1,34 @@ | |||
1 | #include <linux/fs.h> | ||
2 | #include <linux/init.h> | ||
3 | #include <linux/kernel.h> | ||
4 | #include <linux/proc_fs.h> | ||
5 | #include <linux/seq_file.h> | ||
6 | #include <linux/utsname.h> | ||
7 | |||
8 | static int version_proc_show(struct seq_file *m, void *v) | ||
9 | { | ||
10 | seq_printf(m, linux_proc_banner, | ||
11 | utsname()->sysname, | ||
12 | utsname()->release, | ||
13 | utsname()->version); | ||
14 | return 0; | ||
15 | } | ||
16 | |||
17 | static int version_proc_open(struct inode *inode, struct file *file) | ||
18 | { | ||
19 | return single_open(file, version_proc_show, NULL); | ||
20 | } | ||
21 | |||
22 | static const struct file_operations version_proc_fops = { | ||
23 | .open = version_proc_open, | ||
24 | .read = seq_read, | ||
25 | .llseek = seq_lseek, | ||
26 | .release = single_release, | ||
27 | }; | ||
28 | |||
29 | static int __init proc_version_init(void) | ||
30 | { | ||
31 | proc_create("version", 0, NULL, &version_proc_fops); | ||
32 | return 0; | ||
33 | } | ||
34 | module_init(proc_version_init); | ||
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 841368b87a29..03ec59504906 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c | |||
@@ -32,10 +32,7 @@ static size_t elfcorebuf_sz; | |||
32 | /* Total size of vmcore file. */ | 32 | /* Total size of vmcore file. */ |
33 | static u64 vmcore_size; | 33 | static u64 vmcore_size; |
34 | 34 | ||
35 | /* Stores the physical address of elf header of crash image. */ | 35 | static struct proc_dir_entry *proc_vmcore = NULL; |
36 | unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; | ||
37 | |||
38 | struct proc_dir_entry *proc_vmcore = NULL; | ||
39 | 36 | ||
40 | /* Reads a page from the oldmem device from given offset. */ | 37 | /* Reads a page from the oldmem device from given offset. */ |
41 | static ssize_t read_from_oldmem(char *buf, size_t count, | 38 | static ssize_t read_from_oldmem(char *buf, size_t count, |
@@ -165,7 +162,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
165 | return acc; | 162 | return acc; |
166 | } | 163 | } |
167 | 164 | ||
168 | const struct file_operations proc_vmcore_operations = { | 165 | static const struct file_operations proc_vmcore_operations = { |
169 | .read = read_vmcore, | 166 | .read = read_vmcore, |
170 | }; | 167 | }; |
171 | 168 | ||
@@ -647,7 +644,7 @@ static int __init vmcore_init(void) | |||
647 | int rc = 0; | 644 | int rc = 0; |
648 | 645 | ||
649 | /* If elfcorehdr= has been passed in cmdline, then capture the dump.*/ | 646 | /* If elfcorehdr= has been passed in cmdline, then capture the dump.*/ |
650 | if (!(elfcorehdr_addr < ELFCORE_ADDR_MAX)) | 647 | if (!(is_vmcore_usable())) |
651 | return rc; | 648 | return rc; |
652 | rc = parse_crash_elf_headers(); | 649 | rc = parse_crash_elf_headers(); |
653 | if (rc) { | 650 | if (rc) { |
@@ -655,7 +652,7 @@ static int __init vmcore_init(void) | |||
655 | return rc; | 652 | return rc; |
656 | } | 653 | } |
657 | 654 | ||
658 | /* Initialize /proc/vmcore size if proc is already up. */ | 655 | proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); |
659 | if (proc_vmcore) | 656 | if (proc_vmcore) |
660 | proc_vmcore->size = vmcore_size; | 657 | proc_vmcore->size = vmcore_size; |
661 | return 0; | 658 | return 0; |
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 5145cb9125af..76acdbc34611 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c | |||
@@ -112,12 +112,12 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) | |||
112 | goto add_error; | 112 | goto add_error; |
113 | 113 | ||
114 | if (!pagevec_add(&lru_pvec, page)) | 114 | if (!pagevec_add(&lru_pvec, page)) |
115 | __pagevec_lru_add(&lru_pvec); | 115 | __pagevec_lru_add_file(&lru_pvec); |
116 | 116 | ||
117 | unlock_page(page); | 117 | unlock_page(page); |
118 | } | 118 | } |
119 | 119 | ||
120 | pagevec_lru_add(&lru_pvec); | 120 | pagevec_lru_add_file(&lru_pvec); |
121 | return 0; | 121 | return 0; |
122 | 122 | ||
123 | fsize_exceeded: | 123 | fsize_exceeded: |
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index b13123424e49..f031d1c925f0 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c | |||
@@ -61,6 +61,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev) | |||
61 | inode->i_mapping->a_ops = &ramfs_aops; | 61 | inode->i_mapping->a_ops = &ramfs_aops; |
62 | inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; | 62 | inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; |
63 | mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); | 63 | mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); |
64 | mapping_set_unevictable(inode->i_mapping); | ||
64 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 65 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
65 | switch (mode & S_IFMT) { | 66 | switch (mode & S_IFMT) { |
66 | default: | 67 | default: |
diff --git a/fs/read_write.c b/fs/read_write.c index 9ba495d5a29b..969a6d9c020b 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -31,39 +31,61 @@ const struct file_operations generic_ro_fops = { | |||
31 | 31 | ||
32 | EXPORT_SYMBOL(generic_ro_fops); | 32 | EXPORT_SYMBOL(generic_ro_fops); |
33 | 33 | ||
34 | /** | ||
35 | * generic_file_llseek_unlocked - lockless generic llseek implementation | ||
36 | * @file: file structure to seek on | ||
37 | * @offset: file offset to seek to | ||
38 | * @origin: type of seek | ||
39 | * | ||
40 | * Updates the file offset to the value specified by @offset and @origin. | ||
41 | * Locking must be provided by the caller. | ||
42 | */ | ||
34 | loff_t | 43 | loff_t |
35 | generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin) | 44 | generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin) |
36 | { | 45 | { |
37 | loff_t retval; | ||
38 | struct inode *inode = file->f_mapping->host; | 46 | struct inode *inode = file->f_mapping->host; |
39 | 47 | ||
40 | switch (origin) { | 48 | switch (origin) { |
41 | case SEEK_END: | 49 | case SEEK_END: |
42 | offset += inode->i_size; | 50 | offset += inode->i_size; |
43 | break; | 51 | break; |
44 | case SEEK_CUR: | 52 | case SEEK_CUR: |
45 | offset += file->f_pos; | 53 | offset += file->f_pos; |
54 | break; | ||
46 | } | 55 | } |
47 | retval = -EINVAL; | 56 | |
48 | if (offset>=0 && offset<=inode->i_sb->s_maxbytes) { | 57 | if (offset < 0 || offset > inode->i_sb->s_maxbytes) |
49 | /* Special lock needed here? */ | 58 | return -EINVAL; |
50 | if (offset != file->f_pos) { | 59 | |
51 | file->f_pos = offset; | 60 | /* Special lock needed here? */ |
52 | file->f_version = 0; | 61 | if (offset != file->f_pos) { |
53 | } | 62 | file->f_pos = offset; |
54 | retval = offset; | 63 | file->f_version = 0; |
55 | } | 64 | } |
56 | return retval; | 65 | |
66 | return offset; | ||
57 | } | 67 | } |
58 | EXPORT_SYMBOL(generic_file_llseek_unlocked); | 68 | EXPORT_SYMBOL(generic_file_llseek_unlocked); |
59 | 69 | ||
70 | /** | ||
71 | * generic_file_llseek - generic llseek implementation for regular files | ||
72 | * @file: file structure to seek on | ||
73 | * @offset: file offset to seek to | ||
74 | * @origin: type of seek | ||
75 | * | ||
76 | * This is a generic implemenation of ->llseek useable for all normal local | ||
77 | * filesystems. It just updates the file offset to the value specified by | ||
78 | * @offset and @origin under i_mutex. | ||
79 | */ | ||
60 | loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) | 80 | loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) |
61 | { | 81 | { |
62 | loff_t n; | 82 | loff_t rval; |
83 | |||
63 | mutex_lock(&file->f_dentry->d_inode->i_mutex); | 84 | mutex_lock(&file->f_dentry->d_inode->i_mutex); |
64 | n = generic_file_llseek_unlocked(file, offset, origin); | 85 | rval = generic_file_llseek_unlocked(file, offset, origin); |
65 | mutex_unlock(&file->f_dentry->d_inode->i_mutex); | 86 | mutex_unlock(&file->f_dentry->d_inode->i_mutex); |
66 | return n; | 87 | |
88 | return rval; | ||
67 | } | 89 | } |
68 | EXPORT_SYMBOL(generic_file_llseek); | 90 | EXPORT_SYMBOL(generic_file_llseek); |
69 | 91 | ||
diff --git a/fs/readdir.c b/fs/readdir.c index 93a7559bbfd8..b318d9b5af2e 100644 --- a/fs/readdir.c +++ b/fs/readdir.c | |||
@@ -117,7 +117,7 @@ asmlinkage long old_readdir(unsigned int fd, struct old_linux_dirent __user * di | |||
117 | buf.dirent = dirent; | 117 | buf.dirent = dirent; |
118 | 118 | ||
119 | error = vfs_readdir(file, fillonedir, &buf); | 119 | error = vfs_readdir(file, fillonedir, &buf); |
120 | if (error >= 0) | 120 | if (buf.result) |
121 | error = buf.result; | 121 | error = buf.result; |
122 | 122 | ||
123 | fput(file); | 123 | fput(file); |
@@ -209,9 +209,8 @@ asmlinkage long sys_getdents(unsigned int fd, struct linux_dirent __user * diren | |||
209 | buf.error = 0; | 209 | buf.error = 0; |
210 | 210 | ||
211 | error = vfs_readdir(file, filldir, &buf); | 211 | error = vfs_readdir(file, filldir, &buf); |
212 | if (error < 0) | 212 | if (error >= 0) |
213 | goto out_putf; | 213 | error = buf.error; |
214 | error = buf.error; | ||
215 | lastdirent = buf.previous; | 214 | lastdirent = buf.previous; |
216 | if (lastdirent) { | 215 | if (lastdirent) { |
217 | if (put_user(file->f_pos, &lastdirent->d_off)) | 216 | if (put_user(file->f_pos, &lastdirent->d_off)) |
@@ -219,8 +218,6 @@ asmlinkage long sys_getdents(unsigned int fd, struct linux_dirent __user * diren | |||
219 | else | 218 | else |
220 | error = count - buf.count; | 219 | error = count - buf.count; |
221 | } | 220 | } |
222 | |||
223 | out_putf: | ||
224 | fput(file); | 221 | fput(file); |
225 | out: | 222 | out: |
226 | return error; | 223 | return error; |
@@ -293,19 +290,16 @@ asmlinkage long sys_getdents64(unsigned int fd, struct linux_dirent64 __user * d | |||
293 | buf.error = 0; | 290 | buf.error = 0; |
294 | 291 | ||
295 | error = vfs_readdir(file, filldir64, &buf); | 292 | error = vfs_readdir(file, filldir64, &buf); |
296 | if (error < 0) | 293 | if (error >= 0) |
297 | goto out_putf; | 294 | error = buf.error; |
298 | error = buf.error; | ||
299 | lastdirent = buf.previous; | 295 | lastdirent = buf.previous; |
300 | if (lastdirent) { | 296 | if (lastdirent) { |
301 | typeof(lastdirent->d_off) d_off = file->f_pos; | 297 | typeof(lastdirent->d_off) d_off = file->f_pos; |
302 | error = -EFAULT; | ||
303 | if (__put_user(d_off, &lastdirent->d_off)) | 298 | if (__put_user(d_off, &lastdirent->d_off)) |
304 | goto out_putf; | 299 | error = -EFAULT; |
305 | error = count - buf.count; | 300 | else |
301 | error = count - buf.count; | ||
306 | } | 302 | } |
307 | |||
308 | out_putf: | ||
309 | fput(file); | 303 | fput(file); |
310 | out: | 304 | out: |
311 | return error; | 305 | return error; |
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index a804903d31d1..33408417038c 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c | |||
@@ -296,6 +296,7 @@ const struct file_operations reiserfs_file_operations = { | |||
296 | .aio_write = generic_file_aio_write, | 296 | .aio_write = generic_file_aio_write, |
297 | .splice_read = generic_file_splice_read, | 297 | .splice_read = generic_file_splice_read, |
298 | .splice_write = generic_file_splice_write, | 298 | .splice_write = generic_file_splice_write, |
299 | .llseek = generic_file_llseek, | ||
299 | }; | 300 | }; |
300 | 301 | ||
301 | const struct inode_operations reiserfs_file_inode_operations = { | 302 | const struct inode_operations reiserfs_file_inode_operations = { |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 5699171212ae..6c4c2c69449f 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -1522,7 +1522,6 @@ static struct dentry *reiserfs_get_dentry(struct super_block *sb, | |||
1522 | 1522 | ||
1523 | { | 1523 | { |
1524 | struct cpu_key key; | 1524 | struct cpu_key key; |
1525 | struct dentry *result; | ||
1526 | struct inode *inode; | 1525 | struct inode *inode; |
1527 | 1526 | ||
1528 | key.on_disk_key.k_objectid = objectid; | 1527 | key.on_disk_key.k_objectid = objectid; |
@@ -1535,16 +1534,8 @@ static struct dentry *reiserfs_get_dentry(struct super_block *sb, | |||
1535 | inode = NULL; | 1534 | inode = NULL; |
1536 | } | 1535 | } |
1537 | reiserfs_write_unlock(sb); | 1536 | reiserfs_write_unlock(sb); |
1538 | if (!inode) | 1537 | |
1539 | inode = ERR_PTR(-ESTALE); | 1538 | return d_obtain_alias(inode); |
1540 | if (IS_ERR(inode)) | ||
1541 | return ERR_CAST(inode); | ||
1542 | result = d_alloc_anon(inode); | ||
1543 | if (!result) { | ||
1544 | iput(inode); | ||
1545 | return ERR_PTR(-ENOMEM); | ||
1546 | } | ||
1547 | return result; | ||
1548 | } | 1539 | } |
1549 | 1540 | ||
1550 | struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, | 1541 | struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, |
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index c21df71943a6..9643c3bbeb3b 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -2575,7 +2575,7 @@ static int release_journal_dev(struct super_block *super, | |||
2575 | if (journal->j_dev_bd != NULL) { | 2575 | if (journal->j_dev_bd != NULL) { |
2576 | if (journal->j_dev_bd->bd_dev != super->s_dev) | 2576 | if (journal->j_dev_bd->bd_dev != super->s_dev) |
2577 | bd_release(journal->j_dev_bd); | 2577 | bd_release(journal->j_dev_bd); |
2578 | result = blkdev_put(journal->j_dev_bd); | 2578 | result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode); |
2579 | journal->j_dev_bd = NULL; | 2579 | journal->j_dev_bd = NULL; |
2580 | } | 2580 | } |
2581 | 2581 | ||
@@ -2593,7 +2593,7 @@ static int journal_init_dev(struct super_block *super, | |||
2593 | { | 2593 | { |
2594 | int result; | 2594 | int result; |
2595 | dev_t jdev; | 2595 | dev_t jdev; |
2596 | int blkdev_mode = FMODE_READ | FMODE_WRITE; | 2596 | fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE; |
2597 | char b[BDEVNAME_SIZE]; | 2597 | char b[BDEVNAME_SIZE]; |
2598 | 2598 | ||
2599 | result = 0; | 2599 | result = 0; |
@@ -2608,6 +2608,7 @@ static int journal_init_dev(struct super_block *super, | |||
2608 | /* there is no "jdev" option and journal is on separate device */ | 2608 | /* there is no "jdev" option and journal is on separate device */ |
2609 | if ((!jdev_name || !jdev_name[0])) { | 2609 | if ((!jdev_name || !jdev_name[0])) { |
2610 | journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode); | 2610 | journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode); |
2611 | journal->j_dev_mode = blkdev_mode; | ||
2611 | if (IS_ERR(journal->j_dev_bd)) { | 2612 | if (IS_ERR(journal->j_dev_bd)) { |
2612 | result = PTR_ERR(journal->j_dev_bd); | 2613 | result = PTR_ERR(journal->j_dev_bd); |
2613 | journal->j_dev_bd = NULL; | 2614 | journal->j_dev_bd = NULL; |
@@ -2618,7 +2619,7 @@ static int journal_init_dev(struct super_block *super, | |||
2618 | } else if (jdev != super->s_dev) { | 2619 | } else if (jdev != super->s_dev) { |
2619 | result = bd_claim(journal->j_dev_bd, journal); | 2620 | result = bd_claim(journal->j_dev_bd, journal); |
2620 | if (result) { | 2621 | if (result) { |
2621 | blkdev_put(journal->j_dev_bd); | 2622 | blkdev_put(journal->j_dev_bd, blkdev_mode); |
2622 | return result; | 2623 | return result; |
2623 | } | 2624 | } |
2624 | 2625 | ||
@@ -2628,7 +2629,9 @@ static int journal_init_dev(struct super_block *super, | |||
2628 | return 0; | 2629 | return 0; |
2629 | } | 2630 | } |
2630 | 2631 | ||
2631 | journal->j_dev_bd = open_bdev_excl(jdev_name, 0, journal); | 2632 | journal->j_dev_mode = blkdev_mode; |
2633 | journal->j_dev_bd = open_bdev_exclusive(jdev_name, | ||
2634 | blkdev_mode, journal); | ||
2632 | if (IS_ERR(journal->j_dev_bd)) { | 2635 | if (IS_ERR(journal->j_dev_bd)) { |
2633 | result = PTR_ERR(journal->j_dev_bd); | 2636 | result = PTR_ERR(journal->j_dev_bd); |
2634 | journal->j_dev_bd = NULL; | 2637 | journal->j_dev_bd = NULL; |
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index c1add28dd45e..f89ebb943f3f 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c | |||
@@ -383,7 +383,6 @@ struct dentry *reiserfs_get_parent(struct dentry *child) | |||
383 | struct inode *inode = NULL; | 383 | struct inode *inode = NULL; |
384 | struct reiserfs_dir_entry de; | 384 | struct reiserfs_dir_entry de; |
385 | INITIALIZE_PATH(path_to_entry); | 385 | INITIALIZE_PATH(path_to_entry); |
386 | struct dentry *parent; | ||
387 | struct inode *dir = child->d_inode; | 386 | struct inode *dir = child->d_inode; |
388 | 387 | ||
389 | if (dir->i_nlink == 0) { | 388 | if (dir->i_nlink == 0) { |
@@ -401,15 +400,7 @@ struct dentry *reiserfs_get_parent(struct dentry *child) | |||
401 | inode = reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id)); | 400 | inode = reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id)); |
402 | reiserfs_write_unlock(dir->i_sb); | 401 | reiserfs_write_unlock(dir->i_sb); |
403 | 402 | ||
404 | if (!inode || IS_ERR(inode)) { | 403 | return d_obtain_alias(inode); |
405 | return ERR_PTR(-EACCES); | ||
406 | } | ||
407 | parent = d_alloc_anon(inode); | ||
408 | if (!parent) { | ||
409 | iput(inode); | ||
410 | parent = ERR_PTR(-ENOMEM); | ||
411 | } | ||
412 | return parent; | ||
413 | } | 404 | } |
414 | 405 | ||
415 | /* add entry to the directory (entry can be hidden). | 406 | /* add entry to the directory (entry can be hidden). |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index d318c7e663fa..663a91f5dce8 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -2058,10 +2058,10 @@ static int reiserfs_quota_on_mount(struct super_block *sb, int type) | |||
2058 | * Standard function to be called on quota_on | 2058 | * Standard function to be called on quota_on |
2059 | */ | 2059 | */ |
2060 | static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, | 2060 | static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, |
2061 | char *path, int remount) | 2061 | char *name, int remount) |
2062 | { | 2062 | { |
2063 | int err; | 2063 | int err; |
2064 | struct nameidata nd; | 2064 | struct path path; |
2065 | struct inode *inode; | 2065 | struct inode *inode; |
2066 | struct reiserfs_transaction_handle th; | 2066 | struct reiserfs_transaction_handle th; |
2067 | 2067 | ||
@@ -2069,16 +2069,16 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, | |||
2069 | return -EINVAL; | 2069 | return -EINVAL; |
2070 | /* No more checks needed? Path and format_id are bogus anyway... */ | 2070 | /* No more checks needed? Path and format_id are bogus anyway... */ |
2071 | if (remount) | 2071 | if (remount) |
2072 | return vfs_quota_on(sb, type, format_id, path, 1); | 2072 | return vfs_quota_on(sb, type, format_id, name, 1); |
2073 | err = path_lookup(path, LOOKUP_FOLLOW, &nd); | 2073 | err = kern_path(name, LOOKUP_FOLLOW, &path); |
2074 | if (err) | 2074 | if (err) |
2075 | return err; | 2075 | return err; |
2076 | /* Quotafile not on the same filesystem? */ | 2076 | /* Quotafile not on the same filesystem? */ |
2077 | if (nd.path.mnt->mnt_sb != sb) { | 2077 | if (path.mnt->mnt_sb != sb) { |
2078 | err = -EXDEV; | 2078 | err = -EXDEV; |
2079 | goto out; | 2079 | goto out; |
2080 | } | 2080 | } |
2081 | inode = nd.path.dentry->d_inode; | 2081 | inode = path.dentry->d_inode; |
2082 | /* We must not pack tails for quota files on reiserfs for quota IO to work */ | 2082 | /* We must not pack tails for quota files on reiserfs for quota IO to work */ |
2083 | if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) { | 2083 | if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) { |
2084 | err = reiserfs_unpack(inode, NULL); | 2084 | err = reiserfs_unpack(inode, NULL); |
@@ -2094,7 +2094,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, | |||
2094 | /* Journaling quota? */ | 2094 | /* Journaling quota? */ |
2095 | if (REISERFS_SB(sb)->s_qf_names[type]) { | 2095 | if (REISERFS_SB(sb)->s_qf_names[type]) { |
2096 | /* Quotafile not of fs root? */ | 2096 | /* Quotafile not of fs root? */ |
2097 | if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) | 2097 | if (path.dentry->d_parent != sb->s_root) |
2098 | reiserfs_warning(sb, | 2098 | reiserfs_warning(sb, |
2099 | "reiserfs: Quota file not on filesystem root. " | 2099 | "reiserfs: Quota file not on filesystem root. " |
2100 | "Journalled quota will not work."); | 2100 | "Journalled quota will not work."); |
@@ -2113,9 +2113,9 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, | |||
2113 | if (err) | 2113 | if (err) |
2114 | goto out; | 2114 | goto out; |
2115 | } | 2115 | } |
2116 | err = vfs_quota_on_path(sb, type, format_id, &nd.path); | 2116 | err = vfs_quota_on_path(sb, type, format_id, &path); |
2117 | out: | 2117 | out: |
2118 | path_put(&nd.path); | 2118 | path_put(&path); |
2119 | return err; | 2119 | return err; |
2120 | } | 2120 | } |
2121 | 2121 | ||
diff --git a/fs/select.c b/fs/select.c index da0e88201c3a..448e44001286 100644 --- a/fs/select.c +++ b/fs/select.c | |||
@@ -24,9 +24,64 @@ | |||
24 | #include <linux/fdtable.h> | 24 | #include <linux/fdtable.h> |
25 | #include <linux/fs.h> | 25 | #include <linux/fs.h> |
26 | #include <linux/rcupdate.h> | 26 | #include <linux/rcupdate.h> |
27 | #include <linux/hrtimer.h> | ||
27 | 28 | ||
28 | #include <asm/uaccess.h> | 29 | #include <asm/uaccess.h> |
29 | 30 | ||
31 | |||
32 | /* | ||
33 | * Estimate expected accuracy in ns from a timeval. | ||
34 | * | ||
35 | * After quite a bit of churning around, we've settled on | ||
36 | * a simple thing of taking 0.1% of the timeout as the | ||
37 | * slack, with a cap of 100 msec. | ||
38 | * "nice" tasks get a 0.5% slack instead. | ||
39 | * | ||
40 | * Consider this comment an open invitation to come up with even | ||
41 | * better solutions.. | ||
42 | */ | ||
43 | |||
44 | static long __estimate_accuracy(struct timespec *tv) | ||
45 | { | ||
46 | long slack; | ||
47 | int divfactor = 1000; | ||
48 | |||
49 | if (task_nice(current) > 0) | ||
50 | divfactor = divfactor / 5; | ||
51 | |||
52 | slack = tv->tv_nsec / divfactor; | ||
53 | slack += tv->tv_sec * (NSEC_PER_SEC/divfactor); | ||
54 | |||
55 | if (slack > 100 * NSEC_PER_MSEC) | ||
56 | slack = 100 * NSEC_PER_MSEC; | ||
57 | |||
58 | if (slack < 0) | ||
59 | slack = 0; | ||
60 | return slack; | ||
61 | } | ||
62 | |||
63 | static long estimate_accuracy(struct timespec *tv) | ||
64 | { | ||
65 | unsigned long ret; | ||
66 | struct timespec now; | ||
67 | |||
68 | /* | ||
69 | * Realtime tasks get a slack of 0 for obvious reasons. | ||
70 | */ | ||
71 | |||
72 | if (rt_task(current)) | ||
73 | return 0; | ||
74 | |||
75 | ktime_get_ts(&now); | ||
76 | now = timespec_sub(*tv, now); | ||
77 | ret = __estimate_accuracy(&now); | ||
78 | if (ret < current->timer_slack_ns) | ||
79 | return current->timer_slack_ns; | ||
80 | return ret; | ||
81 | } | ||
82 | |||
83 | |||
84 | |||
30 | struct poll_table_page { | 85 | struct poll_table_page { |
31 | struct poll_table_page * next; | 86 | struct poll_table_page * next; |
32 | struct poll_table_entry * entry; | 87 | struct poll_table_entry * entry; |
@@ -130,6 +185,79 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, | |||
130 | add_wait_queue(wait_address, &entry->wait); | 185 | add_wait_queue(wait_address, &entry->wait); |
131 | } | 186 | } |
132 | 187 | ||
188 | /** | ||
189 | * poll_select_set_timeout - helper function to setup the timeout value | ||
190 | * @to: pointer to timespec variable for the final timeout | ||
191 | * @sec: seconds (from user space) | ||
192 | * @nsec: nanoseconds (from user space) | ||
193 | * | ||
194 | * Note, we do not use a timespec for the user space value here, That | ||
195 | * way we can use the function for timeval and compat interfaces as well. | ||
196 | * | ||
197 | * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0. | ||
198 | */ | ||
199 | int poll_select_set_timeout(struct timespec *to, long sec, long nsec) | ||
200 | { | ||
201 | struct timespec ts = {.tv_sec = sec, .tv_nsec = nsec}; | ||
202 | |||
203 | if (!timespec_valid(&ts)) | ||
204 | return -EINVAL; | ||
205 | |||
206 | /* Optimize for the zero timeout value here */ | ||
207 | if (!sec && !nsec) { | ||
208 | to->tv_sec = to->tv_nsec = 0; | ||
209 | } else { | ||
210 | ktime_get_ts(to); | ||
211 | *to = timespec_add_safe(*to, ts); | ||
212 | } | ||
213 | return 0; | ||
214 | } | ||
215 | |||
216 | static int poll_select_copy_remaining(struct timespec *end_time, void __user *p, | ||
217 | int timeval, int ret) | ||
218 | { | ||
219 | struct timespec rts; | ||
220 | struct timeval rtv; | ||
221 | |||
222 | if (!p) | ||
223 | return ret; | ||
224 | |||
225 | if (current->personality & STICKY_TIMEOUTS) | ||
226 | goto sticky; | ||
227 | |||
228 | /* No update for zero timeout */ | ||
229 | if (!end_time->tv_sec && !end_time->tv_nsec) | ||
230 | return ret; | ||
231 | |||
232 | ktime_get_ts(&rts); | ||
233 | rts = timespec_sub(*end_time, rts); | ||
234 | if (rts.tv_sec < 0) | ||
235 | rts.tv_sec = rts.tv_nsec = 0; | ||
236 | |||
237 | if (timeval) { | ||
238 | rtv.tv_sec = rts.tv_sec; | ||
239 | rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC; | ||
240 | |||
241 | if (!copy_to_user(p, &rtv, sizeof(rtv))) | ||
242 | return ret; | ||
243 | |||
244 | } else if (!copy_to_user(p, &rts, sizeof(rts))) | ||
245 | return ret; | ||
246 | |||
247 | /* | ||
248 | * If an application puts its timeval in read-only memory, we | ||
249 | * don't want the Linux-specific update to the timeval to | ||
250 | * cause a fault after the select has completed | ||
251 | * successfully. However, because we're not updating the | ||
252 | * timeval, we can't restart the system call. | ||
253 | */ | ||
254 | |||
255 | sticky: | ||
256 | if (ret == -ERESTARTNOHAND) | ||
257 | ret = -EINTR; | ||
258 | return ret; | ||
259 | } | ||
260 | |||
133 | #define FDS_IN(fds, n) (fds->in + n) | 261 | #define FDS_IN(fds, n) (fds->in + n) |
134 | #define FDS_OUT(fds, n) (fds->out + n) | 262 | #define FDS_OUT(fds, n) (fds->out + n) |
135 | #define FDS_EX(fds, n) (fds->ex + n) | 263 | #define FDS_EX(fds, n) (fds->ex + n) |
@@ -182,11 +310,13 @@ get_max: | |||
182 | #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) | 310 | #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) |
183 | #define POLLEX_SET (POLLPRI) | 311 | #define POLLEX_SET (POLLPRI) |
184 | 312 | ||
185 | int do_select(int n, fd_set_bits *fds, s64 *timeout) | 313 | int do_select(int n, fd_set_bits *fds, struct timespec *end_time) |
186 | { | 314 | { |
315 | ktime_t expire, *to = NULL; | ||
187 | struct poll_wqueues table; | 316 | struct poll_wqueues table; |
188 | poll_table *wait; | 317 | poll_table *wait; |
189 | int retval, i; | 318 | int retval, i, timed_out = 0; |
319 | unsigned long slack = 0; | ||
190 | 320 | ||
191 | rcu_read_lock(); | 321 | rcu_read_lock(); |
192 | retval = max_select_fd(n, fds); | 322 | retval = max_select_fd(n, fds); |
@@ -198,12 +328,17 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) | |||
198 | 328 | ||
199 | poll_initwait(&table); | 329 | poll_initwait(&table); |
200 | wait = &table.pt; | 330 | wait = &table.pt; |
201 | if (!*timeout) | 331 | if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { |
202 | wait = NULL; | 332 | wait = NULL; |
333 | timed_out = 1; | ||
334 | } | ||
335 | |||
336 | if (end_time && !timed_out) | ||
337 | slack = estimate_accuracy(end_time); | ||
338 | |||
203 | retval = 0; | 339 | retval = 0; |
204 | for (;;) { | 340 | for (;;) { |
205 | unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; | 341 | unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; |
206 | long __timeout; | ||
207 | 342 | ||
208 | set_current_state(TASK_INTERRUPTIBLE); | 343 | set_current_state(TASK_INTERRUPTIBLE); |
209 | 344 | ||
@@ -259,27 +394,25 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) | |||
259 | cond_resched(); | 394 | cond_resched(); |
260 | } | 395 | } |
261 | wait = NULL; | 396 | wait = NULL; |
262 | if (retval || !*timeout || signal_pending(current)) | 397 | if (retval || timed_out || signal_pending(current)) |
263 | break; | 398 | break; |
264 | if (table.error) { | 399 | if (table.error) { |
265 | retval = table.error; | 400 | retval = table.error; |
266 | break; | 401 | break; |
267 | } | 402 | } |
268 | 403 | ||
269 | if (*timeout < 0) { | 404 | /* |
270 | /* Wait indefinitely */ | 405 | * If this is the first loop and we have a timeout |
271 | __timeout = MAX_SCHEDULE_TIMEOUT; | 406 | * given, then we convert to ktime_t and set the to |
272 | } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT - 1)) { | 407 | * pointer to the expiry value. |
273 | /* Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in a loop */ | 408 | */ |
274 | __timeout = MAX_SCHEDULE_TIMEOUT - 1; | 409 | if (end_time && !to) { |
275 | *timeout -= __timeout; | 410 | expire = timespec_to_ktime(*end_time); |
276 | } else { | 411 | to = &expire; |
277 | __timeout = *timeout; | ||
278 | *timeout = 0; | ||
279 | } | 412 | } |
280 | __timeout = schedule_timeout(__timeout); | 413 | |
281 | if (*timeout >= 0) | 414 | if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) |
282 | *timeout += __timeout; | 415 | timed_out = 1; |
283 | } | 416 | } |
284 | __set_current_state(TASK_RUNNING); | 417 | __set_current_state(TASK_RUNNING); |
285 | 418 | ||
@@ -300,7 +433,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) | |||
300 | ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) | 433 | ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) |
301 | 434 | ||
302 | int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, | 435 | int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, |
303 | fd_set __user *exp, s64 *timeout) | 436 | fd_set __user *exp, struct timespec *end_time) |
304 | { | 437 | { |
305 | fd_set_bits fds; | 438 | fd_set_bits fds; |
306 | void *bits; | 439 | void *bits; |
@@ -351,7 +484,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, | |||
351 | zero_fd_set(n, fds.res_out); | 484 | zero_fd_set(n, fds.res_out); |
352 | zero_fd_set(n, fds.res_ex); | 485 | zero_fd_set(n, fds.res_ex); |
353 | 486 | ||
354 | ret = do_select(n, &fds, timeout); | 487 | ret = do_select(n, &fds, end_time); |
355 | 488 | ||
356 | if (ret < 0) | 489 | if (ret < 0) |
357 | goto out; | 490 | goto out; |
@@ -377,7 +510,7 @@ out_nofds: | |||
377 | asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, | 510 | asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, |
378 | fd_set __user *exp, struct timeval __user *tvp) | 511 | fd_set __user *exp, struct timeval __user *tvp) |
379 | { | 512 | { |
380 | s64 timeout = -1; | 513 | struct timespec end_time, *to = NULL; |
381 | struct timeval tv; | 514 | struct timeval tv; |
382 | int ret; | 515 | int ret; |
383 | 516 | ||
@@ -385,43 +518,14 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, | |||
385 | if (copy_from_user(&tv, tvp, sizeof(tv))) | 518 | if (copy_from_user(&tv, tvp, sizeof(tv))) |
386 | return -EFAULT; | 519 | return -EFAULT; |
387 | 520 | ||
388 | if (tv.tv_sec < 0 || tv.tv_usec < 0) | 521 | to = &end_time; |
522 | if (poll_select_set_timeout(to, tv.tv_sec, | ||
523 | tv.tv_usec * NSEC_PER_USEC)) | ||
389 | return -EINVAL; | 524 | return -EINVAL; |
390 | |||
391 | /* Cast to u64 to make GCC stop complaining */ | ||
392 | if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS) | ||
393 | timeout = -1; /* infinite */ | ||
394 | else { | ||
395 | timeout = DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ); | ||
396 | timeout += tv.tv_sec * HZ; | ||
397 | } | ||
398 | } | 525 | } |
399 | 526 | ||
400 | ret = core_sys_select(n, inp, outp, exp, &timeout); | 527 | ret = core_sys_select(n, inp, outp, exp, to); |
401 | 528 | ret = poll_select_copy_remaining(&end_time, tvp, 1, ret); | |
402 | if (tvp) { | ||
403 | struct timeval rtv; | ||
404 | |||
405 | if (current->personality & STICKY_TIMEOUTS) | ||
406 | goto sticky; | ||
407 | rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); | ||
408 | rtv.tv_sec = timeout; | ||
409 | if (timeval_compare(&rtv, &tv) >= 0) | ||
410 | rtv = tv; | ||
411 | if (copy_to_user(tvp, &rtv, sizeof(rtv))) { | ||
412 | sticky: | ||
413 | /* | ||
414 | * If an application puts its timeval in read-only | ||
415 | * memory, we don't want the Linux-specific update to | ||
416 | * the timeval to cause a fault after the select has | ||
417 | * completed successfully. However, because we're not | ||
418 | * updating the timeval, we can't restart the system | ||
419 | * call. | ||
420 | */ | ||
421 | if (ret == -ERESTARTNOHAND) | ||
422 | ret = -EINTR; | ||
423 | } | ||
424 | } | ||
425 | 529 | ||
426 | return ret; | 530 | return ret; |
427 | } | 531 | } |
@@ -431,25 +535,17 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, | |||
431 | fd_set __user *exp, struct timespec __user *tsp, | 535 | fd_set __user *exp, struct timespec __user *tsp, |
432 | const sigset_t __user *sigmask, size_t sigsetsize) | 536 | const sigset_t __user *sigmask, size_t sigsetsize) |
433 | { | 537 | { |
434 | s64 timeout = MAX_SCHEDULE_TIMEOUT; | ||
435 | sigset_t ksigmask, sigsaved; | 538 | sigset_t ksigmask, sigsaved; |
436 | struct timespec ts; | 539 | struct timespec ts, end_time, *to = NULL; |
437 | int ret; | 540 | int ret; |
438 | 541 | ||
439 | if (tsp) { | 542 | if (tsp) { |
440 | if (copy_from_user(&ts, tsp, sizeof(ts))) | 543 | if (copy_from_user(&ts, tsp, sizeof(ts))) |
441 | return -EFAULT; | 544 | return -EFAULT; |
442 | 545 | ||
443 | if (ts.tv_sec < 0 || ts.tv_nsec < 0) | 546 | to = &end_time; |
547 | if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) | ||
444 | return -EINVAL; | 548 | return -EINVAL; |
445 | |||
446 | /* Cast to u64 to make GCC stop complaining */ | ||
447 | if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) | ||
448 | timeout = -1; /* infinite */ | ||
449 | else { | ||
450 | timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ); | ||
451 | timeout += ts.tv_sec * HZ; | ||
452 | } | ||
453 | } | 549 | } |
454 | 550 | ||
455 | if (sigmask) { | 551 | if (sigmask) { |
@@ -463,32 +559,8 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, | |||
463 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); | 559 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); |
464 | } | 560 | } |
465 | 561 | ||
466 | ret = core_sys_select(n, inp, outp, exp, &timeout); | 562 | ret = core_sys_select(n, inp, outp, exp, &end_time); |
467 | 563 | ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); | |
468 | if (tsp) { | ||
469 | struct timespec rts; | ||
470 | |||
471 | if (current->personality & STICKY_TIMEOUTS) | ||
472 | goto sticky; | ||
473 | rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * | ||
474 | 1000; | ||
475 | rts.tv_sec = timeout; | ||
476 | if (timespec_compare(&rts, &ts) >= 0) | ||
477 | rts = ts; | ||
478 | if (copy_to_user(tsp, &rts, sizeof(rts))) { | ||
479 | sticky: | ||
480 | /* | ||
481 | * If an application puts its timeval in read-only | ||
482 | * memory, we don't want the Linux-specific update to | ||
483 | * the timeval to cause a fault after the select has | ||
484 | * completed successfully. However, because we're not | ||
485 | * updating the timeval, we can't restart the system | ||
486 | * call. | ||
487 | */ | ||
488 | if (ret == -ERESTARTNOHAND) | ||
489 | ret = -EINTR; | ||
490 | } | ||
491 | } | ||
492 | 564 | ||
493 | if (ret == -ERESTARTNOHAND) { | 565 | if (ret == -ERESTARTNOHAND) { |
494 | /* | 566 | /* |
@@ -574,18 +646,24 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) | |||
574 | } | 646 | } |
575 | 647 | ||
576 | static int do_poll(unsigned int nfds, struct poll_list *list, | 648 | static int do_poll(unsigned int nfds, struct poll_list *list, |
577 | struct poll_wqueues *wait, s64 *timeout) | 649 | struct poll_wqueues *wait, struct timespec *end_time) |
578 | { | 650 | { |
579 | int count = 0; | ||
580 | poll_table* pt = &wait->pt; | 651 | poll_table* pt = &wait->pt; |
652 | ktime_t expire, *to = NULL; | ||
653 | int timed_out = 0, count = 0; | ||
654 | unsigned long slack = 0; | ||
581 | 655 | ||
582 | /* Optimise the no-wait case */ | 656 | /* Optimise the no-wait case */ |
583 | if (!(*timeout)) | 657 | if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { |
584 | pt = NULL; | 658 | pt = NULL; |
659 | timed_out = 1; | ||
660 | } | ||
661 | |||
662 | if (end_time && !timed_out) | ||
663 | slack = estimate_accuracy(end_time); | ||
585 | 664 | ||
586 | for (;;) { | 665 | for (;;) { |
587 | struct poll_list *walk; | 666 | struct poll_list *walk; |
588 | long __timeout; | ||
589 | 667 | ||
590 | set_current_state(TASK_INTERRUPTIBLE); | 668 | set_current_state(TASK_INTERRUPTIBLE); |
591 | for (walk = list; walk != NULL; walk = walk->next) { | 669 | for (walk = list; walk != NULL; walk = walk->next) { |
@@ -617,27 +695,21 @@ static int do_poll(unsigned int nfds, struct poll_list *list, | |||
617 | if (signal_pending(current)) | 695 | if (signal_pending(current)) |
618 | count = -EINTR; | 696 | count = -EINTR; |
619 | } | 697 | } |
620 | if (count || !*timeout) | 698 | if (count || timed_out) |
621 | break; | 699 | break; |
622 | 700 | ||
623 | if (*timeout < 0) { | 701 | /* |
624 | /* Wait indefinitely */ | 702 | * If this is the first loop and we have a timeout |
625 | __timeout = MAX_SCHEDULE_TIMEOUT; | 703 | * given, then we convert to ktime_t and set the to |
626 | } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT-1)) { | 704 | * pointer to the expiry value. |
627 | /* | 705 | */ |
628 | * Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in | 706 | if (end_time && !to) { |
629 | * a loop | 707 | expire = timespec_to_ktime(*end_time); |
630 | */ | 708 | to = &expire; |
631 | __timeout = MAX_SCHEDULE_TIMEOUT - 1; | ||
632 | *timeout -= __timeout; | ||
633 | } else { | ||
634 | __timeout = *timeout; | ||
635 | *timeout = 0; | ||
636 | } | 709 | } |
637 | 710 | ||
638 | __timeout = schedule_timeout(__timeout); | 711 | if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) |
639 | if (*timeout >= 0) | 712 | timed_out = 1; |
640 | *timeout += __timeout; | ||
641 | } | 713 | } |
642 | __set_current_state(TASK_RUNNING); | 714 | __set_current_state(TASK_RUNNING); |
643 | return count; | 715 | return count; |
@@ -646,7 +718,8 @@ static int do_poll(unsigned int nfds, struct poll_list *list, | |||
646 | #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ | 718 | #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ |
647 | sizeof(struct pollfd)) | 719 | sizeof(struct pollfd)) |
648 | 720 | ||
649 | int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) | 721 | int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, |
722 | struct timespec *end_time) | ||
650 | { | 723 | { |
651 | struct poll_wqueues table; | 724 | struct poll_wqueues table; |
652 | int err = -EFAULT, fdcount, len, size; | 725 | int err = -EFAULT, fdcount, len, size; |
@@ -686,7 +759,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) | |||
686 | } | 759 | } |
687 | 760 | ||
688 | poll_initwait(&table); | 761 | poll_initwait(&table); |
689 | fdcount = do_poll(nfds, head, &table, timeout); | 762 | fdcount = do_poll(nfds, head, &table, end_time); |
690 | poll_freewait(&table); | 763 | poll_freewait(&table); |
691 | 764 | ||
692 | for (walk = head; walk; walk = walk->next) { | 765 | for (walk = head; walk; walk = walk->next) { |
@@ -712,16 +785,21 @@ out_fds: | |||
712 | 785 | ||
713 | static long do_restart_poll(struct restart_block *restart_block) | 786 | static long do_restart_poll(struct restart_block *restart_block) |
714 | { | 787 | { |
715 | struct pollfd __user *ufds = (struct pollfd __user*)restart_block->arg0; | 788 | struct pollfd __user *ufds = restart_block->poll.ufds; |
716 | int nfds = restart_block->arg1; | 789 | int nfds = restart_block->poll.nfds; |
717 | s64 timeout = ((s64)restart_block->arg3<<32) | (s64)restart_block->arg2; | 790 | struct timespec *to = NULL, end_time; |
718 | int ret; | 791 | int ret; |
719 | 792 | ||
720 | ret = do_sys_poll(ufds, nfds, &timeout); | 793 | if (restart_block->poll.has_timeout) { |
794 | end_time.tv_sec = restart_block->poll.tv_sec; | ||
795 | end_time.tv_nsec = restart_block->poll.tv_nsec; | ||
796 | to = &end_time; | ||
797 | } | ||
798 | |||
799 | ret = do_sys_poll(ufds, nfds, to); | ||
800 | |||
721 | if (ret == -EINTR) { | 801 | if (ret == -EINTR) { |
722 | restart_block->fn = do_restart_poll; | 802 | restart_block->fn = do_restart_poll; |
723 | restart_block->arg2 = timeout & 0xFFFFFFFF; | ||
724 | restart_block->arg3 = (u64)timeout >> 32; | ||
725 | ret = -ERESTART_RESTARTBLOCK; | 803 | ret = -ERESTART_RESTARTBLOCK; |
726 | } | 804 | } |
727 | return ret; | 805 | return ret; |
@@ -730,31 +808,32 @@ static long do_restart_poll(struct restart_block *restart_block) | |||
730 | asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, | 808 | asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, |
731 | long timeout_msecs) | 809 | long timeout_msecs) |
732 | { | 810 | { |
733 | s64 timeout_jiffies; | 811 | struct timespec end_time, *to = NULL; |
734 | int ret; | 812 | int ret; |
735 | 813 | ||
736 | if (timeout_msecs > 0) { | 814 | if (timeout_msecs >= 0) { |
737 | #if HZ > 1000 | 815 | to = &end_time; |
738 | /* We can only overflow if HZ > 1000 */ | 816 | poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC, |
739 | if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ) | 817 | NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC)); |
740 | timeout_jiffies = -1; | ||
741 | else | ||
742 | #endif | ||
743 | timeout_jiffies = msecs_to_jiffies(timeout_msecs) + 1; | ||
744 | } else { | ||
745 | /* Infinite (< 0) or no (0) timeout */ | ||
746 | timeout_jiffies = timeout_msecs; | ||
747 | } | 818 | } |
748 | 819 | ||
749 | ret = do_sys_poll(ufds, nfds, &timeout_jiffies); | 820 | ret = do_sys_poll(ufds, nfds, to); |
821 | |||
750 | if (ret == -EINTR) { | 822 | if (ret == -EINTR) { |
751 | struct restart_block *restart_block; | 823 | struct restart_block *restart_block; |
824 | |||
752 | restart_block = ¤t_thread_info()->restart_block; | 825 | restart_block = ¤t_thread_info()->restart_block; |
753 | restart_block->fn = do_restart_poll; | 826 | restart_block->fn = do_restart_poll; |
754 | restart_block->arg0 = (unsigned long)ufds; | 827 | restart_block->poll.ufds = ufds; |
755 | restart_block->arg1 = nfds; | 828 | restart_block->poll.nfds = nfds; |
756 | restart_block->arg2 = timeout_jiffies & 0xFFFFFFFF; | 829 | |
757 | restart_block->arg3 = (u64)timeout_jiffies >> 32; | 830 | if (timeout_msecs >= 0) { |
831 | restart_block->poll.tv_sec = end_time.tv_sec; | ||
832 | restart_block->poll.tv_nsec = end_time.tv_nsec; | ||
833 | restart_block->poll.has_timeout = 1; | ||
834 | } else | ||
835 | restart_block->poll.has_timeout = 0; | ||
836 | |||
758 | ret = -ERESTART_RESTARTBLOCK; | 837 | ret = -ERESTART_RESTARTBLOCK; |
759 | } | 838 | } |
760 | return ret; | 839 | return ret; |
@@ -766,21 +845,16 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, | |||
766 | size_t sigsetsize) | 845 | size_t sigsetsize) |
767 | { | 846 | { |
768 | sigset_t ksigmask, sigsaved; | 847 | sigset_t ksigmask, sigsaved; |
769 | struct timespec ts; | 848 | struct timespec ts, end_time, *to = NULL; |
770 | s64 timeout = -1; | ||
771 | int ret; | 849 | int ret; |
772 | 850 | ||
773 | if (tsp) { | 851 | if (tsp) { |
774 | if (copy_from_user(&ts, tsp, sizeof(ts))) | 852 | if (copy_from_user(&ts, tsp, sizeof(ts))) |
775 | return -EFAULT; | 853 | return -EFAULT; |
776 | 854 | ||
777 | /* Cast to u64 to make GCC stop complaining */ | 855 | to = &end_time; |
778 | if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) | 856 | if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) |
779 | timeout = -1; /* infinite */ | 857 | return -EINVAL; |
780 | else { | ||
781 | timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ); | ||
782 | timeout += ts.tv_sec * HZ; | ||
783 | } | ||
784 | } | 858 | } |
785 | 859 | ||
786 | if (sigmask) { | 860 | if (sigmask) { |
@@ -794,7 +868,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, | |||
794 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); | 868 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); |
795 | } | 869 | } |
796 | 870 | ||
797 | ret = do_sys_poll(ufds, nfds, &timeout); | 871 | ret = do_sys_poll(ufds, nfds, to); |
798 | 872 | ||
799 | /* We can restart this syscall, usually */ | 873 | /* We can restart this syscall, usually */ |
800 | if (ret == -EINTR) { | 874 | if (ret == -EINTR) { |
@@ -812,31 +886,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, | |||
812 | } else if (sigmask) | 886 | } else if (sigmask) |
813 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 887 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
814 | 888 | ||
815 | if (tsp && timeout >= 0) { | 889 | ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); |
816 | struct timespec rts; | ||
817 | |||
818 | if (current->personality & STICKY_TIMEOUTS) | ||
819 | goto sticky; | ||
820 | /* Yes, we know it's actually an s64, but it's also positive. */ | ||
821 | rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * | ||
822 | 1000; | ||
823 | rts.tv_sec = timeout; | ||
824 | if (timespec_compare(&rts, &ts) >= 0) | ||
825 | rts = ts; | ||
826 | if (copy_to_user(tsp, &rts, sizeof(rts))) { | ||
827 | sticky: | ||
828 | /* | ||
829 | * If an application puts its timeval in read-only | ||
830 | * memory, we don't want the Linux-specific update to | ||
831 | * the timeval to cause a fault after the select has | ||
832 | * completed successfully. However, because we're not | ||
833 | * updating the timeval, we can't restart the system | ||
834 | * call. | ||
835 | */ | ||
836 | if (ret == -ERESTARTNOHAND && timeout >= 0) | ||
837 | ret = -EINTR; | ||
838 | } | ||
839 | } | ||
840 | 890 | ||
841 | return ret; | 891 | return ret; |
842 | } | 892 | } |
diff --git a/fs/seq_file.c b/fs/seq_file.c index bd20f7f5a933..eba2eabcd2b8 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c | |||
@@ -452,17 +452,34 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc) | |||
452 | 452 | ||
453 | int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits) | 453 | int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits) |
454 | { | 454 | { |
455 | size_t len = bitmap_scnprintf_len(nr_bits); | 455 | if (m->count < m->size) { |
456 | int len = bitmap_scnprintf(m->buf + m->count, | ||
457 | m->size - m->count, bits, nr_bits); | ||
458 | if (m->count + len < m->size) { | ||
459 | m->count += len; | ||
460 | return 0; | ||
461 | } | ||
462 | } | ||
463 | m->count = m->size; | ||
464 | return -1; | ||
465 | } | ||
466 | EXPORT_SYMBOL(seq_bitmap); | ||
456 | 467 | ||
457 | if (m->count + len < m->size) { | 468 | int seq_bitmap_list(struct seq_file *m, unsigned long *bits, |
458 | bitmap_scnprintf(m->buf + m->count, m->size - m->count, | 469 | unsigned int nr_bits) |
459 | bits, nr_bits); | 470 | { |
460 | m->count += len; | 471 | if (m->count < m->size) { |
461 | return 0; | 472 | int len = bitmap_scnlistprintf(m->buf + m->count, |
473 | m->size - m->count, bits, nr_bits); | ||
474 | if (m->count + len < m->size) { | ||
475 | m->count += len; | ||
476 | return 0; | ||
477 | } | ||
462 | } | 478 | } |
463 | m->count = m->size; | 479 | m->count = m->size; |
464 | return -1; | 480 | return -1; |
465 | } | 481 | } |
482 | EXPORT_SYMBOL(seq_bitmap_list); | ||
466 | 483 | ||
467 | static void *single_start(struct seq_file *p, loff_t *pos) | 484 | static void *single_start(struct seq_file *p, loff_t *pos) |
468 | { | 485 | { |
diff --git a/fs/super.c b/fs/super.c index e931ae9511fe..400a7608f15e 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -682,7 +682,7 @@ void emergency_remount(void) | |||
682 | * filesystems which don't use real block-devices. -- jrs | 682 | * filesystems which don't use real block-devices. -- jrs |
683 | */ | 683 | */ |
684 | 684 | ||
685 | static struct idr unnamed_dev_idr; | 685 | static DEFINE_IDA(unnamed_dev_ida); |
686 | static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ | 686 | static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ |
687 | 687 | ||
688 | int set_anon_super(struct super_block *s, void *data) | 688 | int set_anon_super(struct super_block *s, void *data) |
@@ -691,10 +691,10 @@ int set_anon_super(struct super_block *s, void *data) | |||
691 | int error; | 691 | int error; |
692 | 692 | ||
693 | retry: | 693 | retry: |
694 | if (idr_pre_get(&unnamed_dev_idr, GFP_ATOMIC) == 0) | 694 | if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0) |
695 | return -ENOMEM; | 695 | return -ENOMEM; |
696 | spin_lock(&unnamed_dev_lock); | 696 | spin_lock(&unnamed_dev_lock); |
697 | error = idr_get_new(&unnamed_dev_idr, NULL, &dev); | 697 | error = ida_get_new(&unnamed_dev_ida, &dev); |
698 | spin_unlock(&unnamed_dev_lock); | 698 | spin_unlock(&unnamed_dev_lock); |
699 | if (error == -EAGAIN) | 699 | if (error == -EAGAIN) |
700 | /* We raced and lost with another CPU. */ | 700 | /* We raced and lost with another CPU. */ |
@@ -704,7 +704,7 @@ int set_anon_super(struct super_block *s, void *data) | |||
704 | 704 | ||
705 | if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) { | 705 | if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) { |
706 | spin_lock(&unnamed_dev_lock); | 706 | spin_lock(&unnamed_dev_lock); |
707 | idr_remove(&unnamed_dev_idr, dev); | 707 | ida_remove(&unnamed_dev_ida, dev); |
708 | spin_unlock(&unnamed_dev_lock); | 708 | spin_unlock(&unnamed_dev_lock); |
709 | return -EMFILE; | 709 | return -EMFILE; |
710 | } | 710 | } |
@@ -720,17 +720,12 @@ void kill_anon_super(struct super_block *sb) | |||
720 | 720 | ||
721 | generic_shutdown_super(sb); | 721 | generic_shutdown_super(sb); |
722 | spin_lock(&unnamed_dev_lock); | 722 | spin_lock(&unnamed_dev_lock); |
723 | idr_remove(&unnamed_dev_idr, slot); | 723 | ida_remove(&unnamed_dev_ida, slot); |
724 | spin_unlock(&unnamed_dev_lock); | 724 | spin_unlock(&unnamed_dev_lock); |
725 | } | 725 | } |
726 | 726 | ||
727 | EXPORT_SYMBOL(kill_anon_super); | 727 | EXPORT_SYMBOL(kill_anon_super); |
728 | 728 | ||
729 | void __init unnamed_dev_init(void) | ||
730 | { | ||
731 | idr_init(&unnamed_dev_idr); | ||
732 | } | ||
733 | |||
734 | void kill_litter_super(struct super_block *sb) | 729 | void kill_litter_super(struct super_block *sb) |
735 | { | 730 | { |
736 | if (sb->s_root) | 731 | if (sb->s_root) |
@@ -760,9 +755,13 @@ int get_sb_bdev(struct file_system_type *fs_type, | |||
760 | { | 755 | { |
761 | struct block_device *bdev; | 756 | struct block_device *bdev; |
762 | struct super_block *s; | 757 | struct super_block *s; |
758 | fmode_t mode = FMODE_READ; | ||
763 | int error = 0; | 759 | int error = 0; |
764 | 760 | ||
765 | bdev = open_bdev_excl(dev_name, flags, fs_type); | 761 | if (!(flags & MS_RDONLY)) |
762 | mode |= FMODE_WRITE; | ||
763 | |||
764 | bdev = open_bdev_exclusive(dev_name, mode, fs_type); | ||
766 | if (IS_ERR(bdev)) | 765 | if (IS_ERR(bdev)) |
767 | return PTR_ERR(bdev); | 766 | return PTR_ERR(bdev); |
768 | 767 | ||
@@ -785,11 +784,12 @@ int get_sb_bdev(struct file_system_type *fs_type, | |||
785 | goto error_bdev; | 784 | goto error_bdev; |
786 | } | 785 | } |
787 | 786 | ||
788 | close_bdev_excl(bdev); | 787 | close_bdev_exclusive(bdev, mode); |
789 | } else { | 788 | } else { |
790 | char b[BDEVNAME_SIZE]; | 789 | char b[BDEVNAME_SIZE]; |
791 | 790 | ||
792 | s->s_flags = flags; | 791 | s->s_flags = flags; |
792 | s->s_mode = mode; | ||
793 | strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); | 793 | strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); |
794 | sb_set_blocksize(s, block_size(bdev)); | 794 | sb_set_blocksize(s, block_size(bdev)); |
795 | error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); | 795 | error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); |
@@ -807,7 +807,7 @@ int get_sb_bdev(struct file_system_type *fs_type, | |||
807 | error_s: | 807 | error_s: |
808 | error = PTR_ERR(s); | 808 | error = PTR_ERR(s); |
809 | error_bdev: | 809 | error_bdev: |
810 | close_bdev_excl(bdev); | 810 | close_bdev_exclusive(bdev, mode); |
811 | error: | 811 | error: |
812 | return error; | 812 | return error; |
813 | } | 813 | } |
@@ -817,10 +817,11 @@ EXPORT_SYMBOL(get_sb_bdev); | |||
817 | void kill_block_super(struct super_block *sb) | 817 | void kill_block_super(struct super_block *sb) |
818 | { | 818 | { |
819 | struct block_device *bdev = sb->s_bdev; | 819 | struct block_device *bdev = sb->s_bdev; |
820 | fmode_t mode = sb->s_mode; | ||
820 | 821 | ||
821 | generic_shutdown_super(sb); | 822 | generic_shutdown_super(sb); |
822 | sync_blockdev(bdev); | 823 | sync_blockdev(bdev); |
823 | close_bdev_excl(bdev); | 824 | close_bdev_exclusive(bdev, mode); |
824 | } | 825 | } |
825 | 826 | ||
826 | EXPORT_SYMBOL(kill_block_super); | 827 | EXPORT_SYMBOL(kill_block_super); |
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 3a05a596e3b4..82d3b79d0e08 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c | |||
@@ -983,4 +983,5 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
983 | const struct file_operations sysfs_dir_operations = { | 983 | const struct file_operations sysfs_dir_operations = { |
984 | .read = generic_read_dir, | 984 | .read = generic_read_dir, |
985 | .readdir = sysfs_readdir, | 985 | .readdir = sysfs_readdir, |
986 | .llseek = generic_file_llseek, | ||
986 | }; | 987 | }; |
diff --git a/fs/timerfd.c b/fs/timerfd.c index c502c60e4f54..0862f0e49d0c 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c | |||
@@ -52,11 +52,9 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) | |||
52 | 52 | ||
53 | static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) | 53 | static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) |
54 | { | 54 | { |
55 | ktime_t now, remaining; | 55 | ktime_t remaining; |
56 | |||
57 | now = ctx->tmr.base->get_time(); | ||
58 | remaining = ktime_sub(ctx->tmr.expires, now); | ||
59 | 56 | ||
57 | remaining = hrtimer_expires_remaining(&ctx->tmr); | ||
60 | return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; | 58 | return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; |
61 | } | 59 | } |
62 | 60 | ||
@@ -74,7 +72,7 @@ static void timerfd_setup(struct timerfd_ctx *ctx, int flags, | |||
74 | ctx->ticks = 0; | 72 | ctx->ticks = 0; |
75 | ctx->tintv = timespec_to_ktime(ktmr->it_interval); | 73 | ctx->tintv = timespec_to_ktime(ktmr->it_interval); |
76 | hrtimer_init(&ctx->tmr, ctx->clockid, htmode); | 74 | hrtimer_init(&ctx->tmr, ctx->clockid, htmode); |
77 | ctx->tmr.expires = texp; | 75 | hrtimer_set_expires(&ctx->tmr, texp); |
78 | ctx->tmr.function = timerfd_tmrproc; | 76 | ctx->tmr.function = timerfd_tmrproc; |
79 | if (texp.tv64 != 0) | 77 | if (texp.tv64 != 0) |
80 | hrtimer_start(&ctx->tmr, texp, htmode); | 78 | hrtimer_start(&ctx->tmr, texp, htmode); |
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 73db464cd08b..1a4973e10664 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c | |||
@@ -414,19 +414,21 @@ static int do_budget_space(struct ubifs_info *c) | |||
414 | * @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt - | 414 | * @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt - |
415 | * @c->lst.taken_empty_lebs | 415 | * @c->lst.taken_empty_lebs |
416 | * | 416 | * |
417 | * @empty_lebs are available because they are empty. @freeable_cnt are | 417 | * @c->lst.empty_lebs are available because they are empty. |
418 | * available because they contain only free and dirty space and the | 418 | * @c->freeable_cnt are available because they contain only free and |
419 | * index allocation always occurs after wbufs are synch'ed. | 419 | * dirty space, @c->idx_gc_cnt are available because they are index |
420 | * @idx_gc_cnt are available because they are index LEBs that have been | 420 | * LEBs that have been garbage collected and are awaiting the commit |
421 | * garbage collected (including trivial GC) and are awaiting the commit | 421 | * before they can be used. And the in-the-gaps method will grab these |
422 | * before they can be unmapped - note that the in-the-gaps method will | 422 | * if it needs them. @c->lst.taken_empty_lebs are empty LEBs that have |
423 | * grab these if it needs them. @taken_empty_lebs are empty_lebs that | 423 | * already been allocated for some purpose. |
424 | * have already been allocated for some purpose (also includes those | ||
425 | * LEBs on the @idx_gc list). | ||
426 | * | 424 | * |
427 | * Note, @taken_empty_lebs may temporarily be higher by one because of | 425 | * Note, @c->idx_gc_cnt is included to both @c->lst.empty_lebs (because |
428 | * the way we serialize LEB allocations and budgeting. See a comment in | 426 | * these LEBs are empty) and to @c->lst.taken_empty_lebs (because they |
429 | * 'ubifs_find_free_space()'. | 427 | * are taken until after the commit). |
428 | * | ||
429 | * Note, @c->lst.taken_empty_lebs may temporarily be higher by one | ||
430 | * because of the way we serialize LEB allocations and budgeting. See a | ||
431 | * comment in 'ubifs_find_free_space()'. | ||
430 | */ | 432 | */ |
431 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - | 433 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - |
432 | c->lst.taken_empty_lebs; | 434 | c->lst.taken_empty_lebs; |
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c index 5bb51dac3c16..a0ada596b17c 100644 --- a/fs/ubifs/compress.c +++ b/fs/ubifs/compress.c | |||
@@ -91,8 +91,6 @@ struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; | |||
91 | * | 91 | * |
92 | * Note, if the input buffer was not compressed, it is copied to the output | 92 | * Note, if the input buffer was not compressed, it is copied to the output |
93 | * buffer and %UBIFS_COMPR_NONE is returned in @compr_type. | 93 | * buffer and %UBIFS_COMPR_NONE is returned in @compr_type. |
94 | * | ||
95 | * This functions returns %0 on success or a negative error code on failure. | ||
96 | */ | 94 | */ |
97 | void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, | 95 | void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, |
98 | int *compr_type) | 96 | int *compr_type) |
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index d7f7645779f2..7186400750e7 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c | |||
@@ -222,30 +222,38 @@ void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode) | |||
222 | { | 222 | { |
223 | const struct ubifs_inode *ui = ubifs_inode(inode); | 223 | const struct ubifs_inode *ui = ubifs_inode(inode); |
224 | 224 | ||
225 | printk(KERN_DEBUG "inode %lu\n", inode->i_ino); | 225 | printk(KERN_DEBUG "Dump in-memory inode:"); |
226 | printk(KERN_DEBUG "size %llu\n", | 226 | printk(KERN_DEBUG "\tinode %lu\n", inode->i_ino); |
227 | printk(KERN_DEBUG "\tsize %llu\n", | ||
227 | (unsigned long long)i_size_read(inode)); | 228 | (unsigned long long)i_size_read(inode)); |
228 | printk(KERN_DEBUG "nlink %u\n", inode->i_nlink); | 229 | printk(KERN_DEBUG "\tnlink %u\n", inode->i_nlink); |
229 | printk(KERN_DEBUG "uid %u\n", (unsigned int)inode->i_uid); | 230 | printk(KERN_DEBUG "\tuid %u\n", (unsigned int)inode->i_uid); |
230 | printk(KERN_DEBUG "gid %u\n", (unsigned int)inode->i_gid); | 231 | printk(KERN_DEBUG "\tgid %u\n", (unsigned int)inode->i_gid); |
231 | printk(KERN_DEBUG "atime %u.%u\n", | 232 | printk(KERN_DEBUG "\tatime %u.%u\n", |
232 | (unsigned int)inode->i_atime.tv_sec, | 233 | (unsigned int)inode->i_atime.tv_sec, |
233 | (unsigned int)inode->i_atime.tv_nsec); | 234 | (unsigned int)inode->i_atime.tv_nsec); |
234 | printk(KERN_DEBUG "mtime %u.%u\n", | 235 | printk(KERN_DEBUG "\tmtime %u.%u\n", |
235 | (unsigned int)inode->i_mtime.tv_sec, | 236 | (unsigned int)inode->i_mtime.tv_sec, |
236 | (unsigned int)inode->i_mtime.tv_nsec); | 237 | (unsigned int)inode->i_mtime.tv_nsec); |
237 | printk(KERN_DEBUG "ctime %u.%u\n", | 238 | printk(KERN_DEBUG "\tctime %u.%u\n", |
238 | (unsigned int)inode->i_ctime.tv_sec, | 239 | (unsigned int)inode->i_ctime.tv_sec, |
239 | (unsigned int)inode->i_ctime.tv_nsec); | 240 | (unsigned int)inode->i_ctime.tv_nsec); |
240 | printk(KERN_DEBUG "creat_sqnum %llu\n", ui->creat_sqnum); | 241 | printk(KERN_DEBUG "\tcreat_sqnum %llu\n", ui->creat_sqnum); |
241 | printk(KERN_DEBUG "xattr_size %u\n", ui->xattr_size); | 242 | printk(KERN_DEBUG "\txattr_size %u\n", ui->xattr_size); |
242 | printk(KERN_DEBUG "xattr_cnt %u\n", ui->xattr_cnt); | 243 | printk(KERN_DEBUG "\txattr_cnt %u\n", ui->xattr_cnt); |
243 | printk(KERN_DEBUG "xattr_names %u\n", ui->xattr_names); | 244 | printk(KERN_DEBUG "\txattr_names %u\n", ui->xattr_names); |
244 | printk(KERN_DEBUG "dirty %u\n", ui->dirty); | 245 | printk(KERN_DEBUG "\tdirty %u\n", ui->dirty); |
245 | printk(KERN_DEBUG "xattr %u\n", ui->xattr); | 246 | printk(KERN_DEBUG "\txattr %u\n", ui->xattr); |
246 | printk(KERN_DEBUG "flags %d\n", ui->flags); | 247 | printk(KERN_DEBUG "\tbulk_read %u\n", ui->xattr); |
247 | printk(KERN_DEBUG "compr_type %d\n", ui->compr_type); | 248 | printk(KERN_DEBUG "\tsynced_i_size %llu\n", |
248 | printk(KERN_DEBUG "data_len %d\n", ui->data_len); | 249 | (unsigned long long)ui->synced_i_size); |
250 | printk(KERN_DEBUG "\tui_size %llu\n", | ||
251 | (unsigned long long)ui->ui_size); | ||
252 | printk(KERN_DEBUG "\tflags %d\n", ui->flags); | ||
253 | printk(KERN_DEBUG "\tcompr_type %d\n", ui->compr_type); | ||
254 | printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read); | ||
255 | printk(KERN_DEBUG "\tread_in_a_row %lu\n", ui->read_in_a_row); | ||
256 | printk(KERN_DEBUG "\tdata_len %d\n", ui->data_len); | ||
249 | } | 257 | } |
250 | 258 | ||
251 | void dbg_dump_node(const struct ubifs_info *c, const void *node) | 259 | void dbg_dump_node(const struct ubifs_info *c, const void *node) |
@@ -647,6 +655,43 @@ void dbg_dump_lprops(struct ubifs_info *c) | |||
647 | } | 655 | } |
648 | } | 656 | } |
649 | 657 | ||
658 | void dbg_dump_lpt_info(struct ubifs_info *c) | ||
659 | { | ||
660 | int i; | ||
661 | |||
662 | spin_lock(&dbg_lock); | ||
663 | printk(KERN_DEBUG "\tlpt_sz: %lld\n", c->lpt_sz); | ||
664 | printk(KERN_DEBUG "\tpnode_sz: %d\n", c->pnode_sz); | ||
665 | printk(KERN_DEBUG "\tnnode_sz: %d\n", c->nnode_sz); | ||
666 | printk(KERN_DEBUG "\tltab_sz: %d\n", c->ltab_sz); | ||
667 | printk(KERN_DEBUG "\tlsave_sz: %d\n", c->lsave_sz); | ||
668 | printk(KERN_DEBUG "\tbig_lpt: %d\n", c->big_lpt); | ||
669 | printk(KERN_DEBUG "\tlpt_hght: %d\n", c->lpt_hght); | ||
670 | printk(KERN_DEBUG "\tpnode_cnt: %d\n", c->pnode_cnt); | ||
671 | printk(KERN_DEBUG "\tnnode_cnt: %d\n", c->nnode_cnt); | ||
672 | printk(KERN_DEBUG "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); | ||
673 | printk(KERN_DEBUG "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); | ||
674 | printk(KERN_DEBUG "\tlsave_cnt: %d\n", c->lsave_cnt); | ||
675 | printk(KERN_DEBUG "\tspace_bits: %d\n", c->space_bits); | ||
676 | printk(KERN_DEBUG "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); | ||
677 | printk(KERN_DEBUG "\tlpt_offs_bits: %d\n", c->lpt_offs_bits); | ||
678 | printk(KERN_DEBUG "\tlpt_spc_bits: %d\n", c->lpt_spc_bits); | ||
679 | printk(KERN_DEBUG "\tpcnt_bits: %d\n", c->pcnt_bits); | ||
680 | printk(KERN_DEBUG "\tlnum_bits: %d\n", c->lnum_bits); | ||
681 | printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); | ||
682 | printk(KERN_DEBUG "\tLPT head is at %d:%d\n", | ||
683 | c->nhead_lnum, c->nhead_offs); | ||
684 | printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", c->ltab_lnum, c->ltab_offs); | ||
685 | if (c->big_lpt) | ||
686 | printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n", | ||
687 | c->lsave_lnum, c->lsave_offs); | ||
688 | for (i = 0; i < c->lpt_lebs; i++) | ||
689 | printk(KERN_DEBUG "\tLPT LEB %d free %d dirty %d tgc %d " | ||
690 | "cmt %d\n", i + c->lpt_first, c->ltab[i].free, | ||
691 | c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt); | ||
692 | spin_unlock(&dbg_lock); | ||
693 | } | ||
694 | |||
650 | void dbg_dump_leb(const struct ubifs_info *c, int lnum) | 695 | void dbg_dump_leb(const struct ubifs_info *c, int lnum) |
651 | { | 696 | { |
652 | struct ubifs_scan_leb *sleb; | 697 | struct ubifs_scan_leb *sleb; |
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 50315fc57185..33d6b95071e4 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h | |||
@@ -224,6 +224,7 @@ void dbg_dump_lstats(const struct ubifs_lp_stats *lst); | |||
224 | void dbg_dump_budg(struct ubifs_info *c); | 224 | void dbg_dump_budg(struct ubifs_info *c); |
225 | void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); | 225 | void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); |
226 | void dbg_dump_lprops(struct ubifs_info *c); | 226 | void dbg_dump_lprops(struct ubifs_info *c); |
227 | void dbg_dump_lpt_info(struct ubifs_info *c); | ||
227 | void dbg_dump_leb(const struct ubifs_info *c, int lnum); | 228 | void dbg_dump_leb(const struct ubifs_info *c, int lnum); |
228 | void dbg_dump_znode(const struct ubifs_info *c, | 229 | void dbg_dump_znode(const struct ubifs_info *c, |
229 | const struct ubifs_znode *znode); | 230 | const struct ubifs_znode *znode); |
@@ -249,6 +250,8 @@ int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot); | |||
249 | int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); | 250 | int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); |
250 | int dbg_check_cats(struct ubifs_info *c); | 251 | int dbg_check_cats(struct ubifs_info *c); |
251 | int dbg_check_ltab(struct ubifs_info *c); | 252 | int dbg_check_ltab(struct ubifs_info *c); |
253 | int dbg_chk_lpt_free_spc(struct ubifs_info *c); | ||
254 | int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len); | ||
252 | int dbg_check_synced_i_size(struct inode *inode); | 255 | int dbg_check_synced_i_size(struct inode *inode); |
253 | int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); | 256 | int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); |
254 | int dbg_check_tnc(struct ubifs_info *c, int extra); | 257 | int dbg_check_tnc(struct ubifs_info *c, int extra); |
@@ -367,6 +370,7 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, | |||
367 | #define dbg_dump_budg(c) ({}) | 370 | #define dbg_dump_budg(c) ({}) |
368 | #define dbg_dump_lprop(c, lp) ({}) | 371 | #define dbg_dump_lprop(c, lp) ({}) |
369 | #define dbg_dump_lprops(c) ({}) | 372 | #define dbg_dump_lprops(c) ({}) |
373 | #define dbg_dump_lpt_info(c) ({}) | ||
370 | #define dbg_dump_leb(c, lnum) ({}) | 374 | #define dbg_dump_leb(c, lnum) ({}) |
371 | #define dbg_dump_znode(c, znode) ({}) | 375 | #define dbg_dump_znode(c, znode) ({}) |
372 | #define dbg_dump_heap(c, heap, cat) ({}) | 376 | #define dbg_dump_heap(c, heap, cat) ({}) |
@@ -379,6 +383,8 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, | |||
379 | #define dbg_check_old_index(c, zroot) 0 | 383 | #define dbg_check_old_index(c, zroot) 0 |
380 | #define dbg_check_cats(c) 0 | 384 | #define dbg_check_cats(c) 0 |
381 | #define dbg_check_ltab(c) 0 | 385 | #define dbg_check_ltab(c) 0 |
386 | #define dbg_chk_lpt_free_spc(c) 0 | ||
387 | #define dbg_chk_lpt_sz(c, action, len) 0 | ||
382 | #define dbg_check_synced_i_size(inode) 0 | 388 | #define dbg_check_synced_i_size(inode) 0 |
383 | #define dbg_check_dir_size(c, dir) 0 | 389 | #define dbg_check_dir_size(c, dir) 0 |
384 | #define dbg_check_tnc(c, x) 0 | 390 | #define dbg_check_tnc(c, x) 0 |
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 3d698e2022b1..51cf511d44d9 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
@@ -147,6 +147,12 @@ static int do_readpage(struct page *page) | |||
147 | err = ret; | 147 | err = ret; |
148 | if (err != -ENOENT) | 148 | if (err != -ENOENT) |
149 | break; | 149 | break; |
150 | } else if (block + 1 == beyond) { | ||
151 | int dlen = le32_to_cpu(dn->size); | ||
152 | int ilen = i_size & (UBIFS_BLOCK_SIZE - 1); | ||
153 | |||
154 | if (ilen && ilen < dlen) | ||
155 | memset(addr + ilen, 0, dlen - ilen); | ||
150 | } | 156 | } |
151 | } | 157 | } |
152 | if (++i >= UBIFS_BLOCKS_PER_PAGE) | 158 | if (++i >= UBIFS_BLOCKS_PER_PAGE) |
@@ -577,8 +583,262 @@ out: | |||
577 | return copied; | 583 | return copied; |
578 | } | 584 | } |
579 | 585 | ||
586 | /** | ||
587 | * populate_page - copy data nodes into a page for bulk-read. | ||
588 | * @c: UBIFS file-system description object | ||
589 | * @page: page | ||
590 | * @bu: bulk-read information | ||
591 | * @n: next zbranch slot | ||
592 | * | ||
593 | * This function returns %0 on success and a negative error code on failure. | ||
594 | */ | ||
595 | static int populate_page(struct ubifs_info *c, struct page *page, | ||
596 | struct bu_info *bu, int *n) | ||
597 | { | ||
598 | int i = 0, nn = *n, offs = bu->zbranch[0].offs, hole = 0, read = 0; | ||
599 | struct inode *inode = page->mapping->host; | ||
600 | loff_t i_size = i_size_read(inode); | ||
601 | unsigned int page_block; | ||
602 | void *addr, *zaddr; | ||
603 | pgoff_t end_index; | ||
604 | |||
605 | dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx", | ||
606 | inode->i_ino, page->index, i_size, page->flags); | ||
607 | |||
608 | addr = zaddr = kmap(page); | ||
609 | |||
610 | end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; | ||
611 | if (!i_size || page->index > end_index) { | ||
612 | hole = 1; | ||
613 | memset(addr, 0, PAGE_CACHE_SIZE); | ||
614 | goto out_hole; | ||
615 | } | ||
616 | |||
617 | page_block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; | ||
618 | while (1) { | ||
619 | int err, len, out_len, dlen; | ||
620 | |||
621 | if (nn >= bu->cnt) { | ||
622 | hole = 1; | ||
623 | memset(addr, 0, UBIFS_BLOCK_SIZE); | ||
624 | } else if (key_block(c, &bu->zbranch[nn].key) == page_block) { | ||
625 | struct ubifs_data_node *dn; | ||
626 | |||
627 | dn = bu->buf + (bu->zbranch[nn].offs - offs); | ||
628 | |||
629 | ubifs_assert(dn->ch.sqnum > | ||
630 | ubifs_inode(inode)->creat_sqnum); | ||
631 | |||
632 | len = le32_to_cpu(dn->size); | ||
633 | if (len <= 0 || len > UBIFS_BLOCK_SIZE) | ||
634 | goto out_err; | ||
635 | |||
636 | dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; | ||
637 | out_len = UBIFS_BLOCK_SIZE; | ||
638 | err = ubifs_decompress(&dn->data, dlen, addr, &out_len, | ||
639 | le16_to_cpu(dn->compr_type)); | ||
640 | if (err || len != out_len) | ||
641 | goto out_err; | ||
642 | |||
643 | if (len < UBIFS_BLOCK_SIZE) | ||
644 | memset(addr + len, 0, UBIFS_BLOCK_SIZE - len); | ||
645 | |||
646 | nn += 1; | ||
647 | read = (i << UBIFS_BLOCK_SHIFT) + len; | ||
648 | } else if (key_block(c, &bu->zbranch[nn].key) < page_block) { | ||
649 | nn += 1; | ||
650 | continue; | ||
651 | } else { | ||
652 | hole = 1; | ||
653 | memset(addr, 0, UBIFS_BLOCK_SIZE); | ||
654 | } | ||
655 | if (++i >= UBIFS_BLOCKS_PER_PAGE) | ||
656 | break; | ||
657 | addr += UBIFS_BLOCK_SIZE; | ||
658 | page_block += 1; | ||
659 | } | ||
660 | |||
661 | if (end_index == page->index) { | ||
662 | int len = i_size & (PAGE_CACHE_SIZE - 1); | ||
663 | |||
664 | if (len && len < read) | ||
665 | memset(zaddr + len, 0, read - len); | ||
666 | } | ||
667 | |||
668 | out_hole: | ||
669 | if (hole) { | ||
670 | SetPageChecked(page); | ||
671 | dbg_gen("hole"); | ||
672 | } | ||
673 | |||
674 | SetPageUptodate(page); | ||
675 | ClearPageError(page); | ||
676 | flush_dcache_page(page); | ||
677 | kunmap(page); | ||
678 | *n = nn; | ||
679 | return 0; | ||
680 | |||
681 | out_err: | ||
682 | ClearPageUptodate(page); | ||
683 | SetPageError(page); | ||
684 | flush_dcache_page(page); | ||
685 | kunmap(page); | ||
686 | ubifs_err("bad data node (block %u, inode %lu)", | ||
687 | page_block, inode->i_ino); | ||
688 | return -EINVAL; | ||
689 | } | ||
690 | |||
691 | /** | ||
692 | * ubifs_do_bulk_read - do bulk-read. | ||
693 | * @c: UBIFS file-system description object | ||
694 | * @page1: first page | ||
695 | * | ||
696 | * This function returns %1 if the bulk-read is done, otherwise %0 is returned. | ||
697 | */ | ||
698 | static int ubifs_do_bulk_read(struct ubifs_info *c, struct page *page1) | ||
699 | { | ||
700 | pgoff_t offset = page1->index, end_index; | ||
701 | struct address_space *mapping = page1->mapping; | ||
702 | struct inode *inode = mapping->host; | ||
703 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
704 | struct bu_info *bu; | ||
705 | int err, page_idx, page_cnt, ret = 0, n = 0; | ||
706 | loff_t isize; | ||
707 | |||
708 | bu = kmalloc(sizeof(struct bu_info), GFP_NOFS); | ||
709 | if (!bu) | ||
710 | return 0; | ||
711 | |||
712 | bu->buf_len = c->bulk_read_buf_size; | ||
713 | bu->buf = kmalloc(bu->buf_len, GFP_NOFS); | ||
714 | if (!bu->buf) | ||
715 | goto out_free; | ||
716 | |||
717 | data_key_init(c, &bu->key, inode->i_ino, | ||
718 | offset << UBIFS_BLOCKS_PER_PAGE_SHIFT); | ||
719 | |||
720 | err = ubifs_tnc_get_bu_keys(c, bu); | ||
721 | if (err) | ||
722 | goto out_warn; | ||
723 | |||
724 | if (bu->eof) { | ||
725 | /* Turn off bulk-read at the end of the file */ | ||
726 | ui->read_in_a_row = 1; | ||
727 | ui->bulk_read = 0; | ||
728 | } | ||
729 | |||
730 | page_cnt = bu->blk_cnt >> UBIFS_BLOCKS_PER_PAGE_SHIFT; | ||
731 | if (!page_cnt) { | ||
732 | /* | ||
733 | * This happens when there are multiple blocks per page and the | ||
734 | * blocks for the first page we are looking for, are not | ||
735 | * together. If all the pages were like this, bulk-read would | ||
736 | * reduce performance, so we turn it off for a while. | ||
737 | */ | ||
738 | ui->read_in_a_row = 0; | ||
739 | ui->bulk_read = 0; | ||
740 | goto out_free; | ||
741 | } | ||
742 | |||
743 | if (bu->cnt) { | ||
744 | err = ubifs_tnc_bulk_read(c, bu); | ||
745 | if (err) | ||
746 | goto out_warn; | ||
747 | } | ||
748 | |||
749 | err = populate_page(c, page1, bu, &n); | ||
750 | if (err) | ||
751 | goto out_warn; | ||
752 | |||
753 | unlock_page(page1); | ||
754 | ret = 1; | ||
755 | |||
756 | isize = i_size_read(inode); | ||
757 | if (isize == 0) | ||
758 | goto out_free; | ||
759 | end_index = ((isize - 1) >> PAGE_CACHE_SHIFT); | ||
760 | |||
761 | for (page_idx = 1; page_idx < page_cnt; page_idx++) { | ||
762 | pgoff_t page_offset = offset + page_idx; | ||
763 | struct page *page; | ||
764 | |||
765 | if (page_offset > end_index) | ||
766 | break; | ||
767 | page = find_or_create_page(mapping, page_offset, | ||
768 | GFP_NOFS | __GFP_COLD); | ||
769 | if (!page) | ||
770 | break; | ||
771 | if (!PageUptodate(page)) | ||
772 | err = populate_page(c, page, bu, &n); | ||
773 | unlock_page(page); | ||
774 | page_cache_release(page); | ||
775 | if (err) | ||
776 | break; | ||
777 | } | ||
778 | |||
779 | ui->last_page_read = offset + page_idx - 1; | ||
780 | |||
781 | out_free: | ||
782 | kfree(bu->buf); | ||
783 | kfree(bu); | ||
784 | return ret; | ||
785 | |||
786 | out_warn: | ||
787 | ubifs_warn("ignoring error %d and skipping bulk-read", err); | ||
788 | goto out_free; | ||
789 | } | ||
790 | |||
791 | /** | ||
792 | * ubifs_bulk_read - determine whether to bulk-read and, if so, do it. | ||
793 | * @page: page from which to start bulk-read. | ||
794 | * | ||
795 | * Some flash media are capable of reading sequentially at faster rates. UBIFS | ||
796 | * bulk-read facility is designed to take advantage of that, by reading in one | ||
797 | * go consecutive data nodes that are also located consecutively in the same | ||
798 | * LEB. This function returns %1 if a bulk-read is done and %0 otherwise. | ||
799 | */ | ||
800 | static int ubifs_bulk_read(struct page *page) | ||
801 | { | ||
802 | struct inode *inode = page->mapping->host; | ||
803 | struct ubifs_info *c = inode->i_sb->s_fs_info; | ||
804 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
805 | pgoff_t index = page->index, last_page_read = ui->last_page_read; | ||
806 | int ret = 0; | ||
807 | |||
808 | ui->last_page_read = index; | ||
809 | |||
810 | if (!c->bulk_read) | ||
811 | return 0; | ||
812 | /* | ||
813 | * Bulk-read is protected by ui_mutex, but it is an optimization, so | ||
814 | * don't bother if we cannot lock the mutex. | ||
815 | */ | ||
816 | if (!mutex_trylock(&ui->ui_mutex)) | ||
817 | return 0; | ||
818 | if (index != last_page_read + 1) { | ||
819 | /* Turn off bulk-read if we stop reading sequentially */ | ||
820 | ui->read_in_a_row = 1; | ||
821 | if (ui->bulk_read) | ||
822 | ui->bulk_read = 0; | ||
823 | goto out_unlock; | ||
824 | } | ||
825 | if (!ui->bulk_read) { | ||
826 | ui->read_in_a_row += 1; | ||
827 | if (ui->read_in_a_row < 3) | ||
828 | goto out_unlock; | ||
829 | /* Three reads in a row, so switch on bulk-read */ | ||
830 | ui->bulk_read = 1; | ||
831 | } | ||
832 | ret = ubifs_do_bulk_read(c, page); | ||
833 | out_unlock: | ||
834 | mutex_unlock(&ui->ui_mutex); | ||
835 | return ret; | ||
836 | } | ||
837 | |||
580 | static int ubifs_readpage(struct file *file, struct page *page) | 838 | static int ubifs_readpage(struct file *file, struct page *page) |
581 | { | 839 | { |
840 | if (ubifs_bulk_read(page)) | ||
841 | return 0; | ||
582 | do_readpage(page); | 842 | do_readpage(page); |
583 | unlock_page(page); | 843 | unlock_page(page); |
584 | return 0; | 844 | return 0; |
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 47814cde2407..717d79c97c5e 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c | |||
@@ -901,11 +901,11 @@ static int get_idx_gc_leb(struct ubifs_info *c) | |||
901 | * it is needed now for this commit. | 901 | * it is needed now for this commit. |
902 | */ | 902 | */ |
903 | lp = ubifs_lpt_lookup_dirty(c, lnum); | 903 | lp = ubifs_lpt_lookup_dirty(c, lnum); |
904 | if (unlikely(IS_ERR(lp))) | 904 | if (IS_ERR(lp)) |
905 | return PTR_ERR(lp); | 905 | return PTR_ERR(lp); |
906 | lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, | 906 | lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, |
907 | lp->flags | LPROPS_INDEX, -1); | 907 | lp->flags | LPROPS_INDEX, -1); |
908 | if (unlikely(IS_ERR(lp))) | 908 | if (IS_ERR(lp)) |
909 | return PTR_ERR(lp); | 909 | return PTR_ERR(lp); |
910 | dbg_find("LEB %d, dirty %d and free %d flags %#x", | 910 | dbg_find("LEB %d, dirty %d and free %d flags %#x", |
911 | lp->lnum, lp->dirty, lp->free, lp->flags); | 911 | lp->lnum, lp->dirty, lp->free, lp->flags); |
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 02aba36fe3d4..0bef6501d58a 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c | |||
@@ -96,6 +96,48 @@ static int switch_gc_head(struct ubifs_info *c) | |||
96 | } | 96 | } |
97 | 97 | ||
98 | /** | 98 | /** |
99 | * joinup - bring data nodes for an inode together. | ||
100 | * @c: UBIFS file-system description object | ||
101 | * @sleb: describes scanned LEB | ||
102 | * @inum: inode number | ||
103 | * @blk: block number | ||
104 | * @data: list to which to add data nodes | ||
105 | * | ||
106 | * This function looks at the first few nodes in the scanned LEB @sleb and adds | ||
107 | * them to @data if they are data nodes from @inum and have a larger block | ||
108 | * number than @blk. This function returns %0 on success and a negative error | ||
109 | * code on failure. | ||
110 | */ | ||
111 | static int joinup(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ino_t inum, | ||
112 | unsigned int blk, struct list_head *data) | ||
113 | { | ||
114 | int err, cnt = 6, lnum = sleb->lnum, offs; | ||
115 | struct ubifs_scan_node *snod, *tmp; | ||
116 | union ubifs_key *key; | ||
117 | |||
118 | list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { | ||
119 | key = &snod->key; | ||
120 | if (key_inum(c, key) == inum && | ||
121 | key_type(c, key) == UBIFS_DATA_KEY && | ||
122 | key_block(c, key) > blk) { | ||
123 | offs = snod->offs; | ||
124 | err = ubifs_tnc_has_node(c, key, 0, lnum, offs, 0); | ||
125 | if (err < 0) | ||
126 | return err; | ||
127 | list_del(&snod->list); | ||
128 | if (err) { | ||
129 | list_add_tail(&snod->list, data); | ||
130 | blk = key_block(c, key); | ||
131 | } else | ||
132 | kfree(snod); | ||
133 | cnt = 6; | ||
134 | } else if (--cnt == 0) | ||
135 | break; | ||
136 | } | ||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | /** | ||
99 | * move_nodes - move nodes. | 141 | * move_nodes - move nodes. |
100 | * @c: UBIFS file-system description object | 142 | * @c: UBIFS file-system description object |
101 | * @sleb: describes nodes to move | 143 | * @sleb: describes nodes to move |
@@ -116,16 +158,21 @@ static int switch_gc_head(struct ubifs_info *c) | |||
116 | static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) | 158 | static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) |
117 | { | 159 | { |
118 | struct ubifs_scan_node *snod, *tmp; | 160 | struct ubifs_scan_node *snod, *tmp; |
119 | struct list_head large, medium, small; | 161 | struct list_head data, large, medium, small; |
120 | struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; | 162 | struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; |
121 | int avail, err, min = INT_MAX; | 163 | int avail, err, min = INT_MAX; |
164 | unsigned int blk = 0; | ||
165 | ino_t inum = 0; | ||
122 | 166 | ||
167 | INIT_LIST_HEAD(&data); | ||
123 | INIT_LIST_HEAD(&large); | 168 | INIT_LIST_HEAD(&large); |
124 | INIT_LIST_HEAD(&medium); | 169 | INIT_LIST_HEAD(&medium); |
125 | INIT_LIST_HEAD(&small); | 170 | INIT_LIST_HEAD(&small); |
126 | 171 | ||
127 | list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { | 172 | while (!list_empty(&sleb->nodes)) { |
128 | struct list_head *lst; | 173 | struct list_head *lst = sleb->nodes.next; |
174 | |||
175 | snod = list_entry(lst, struct ubifs_scan_node, list); | ||
129 | 176 | ||
130 | ubifs_assert(snod->type != UBIFS_IDX_NODE); | 177 | ubifs_assert(snod->type != UBIFS_IDX_NODE); |
131 | ubifs_assert(snod->type != UBIFS_REF_NODE); | 178 | ubifs_assert(snod->type != UBIFS_REF_NODE); |
@@ -136,7 +183,6 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) | |||
136 | if (err < 0) | 183 | if (err < 0) |
137 | goto out; | 184 | goto out; |
138 | 185 | ||
139 | lst = &snod->list; | ||
140 | list_del(lst); | 186 | list_del(lst); |
141 | if (!err) { | 187 | if (!err) { |
142 | /* The node is obsolete, remove it from the list */ | 188 | /* The node is obsolete, remove it from the list */ |
@@ -145,15 +191,30 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) | |||
145 | } | 191 | } |
146 | 192 | ||
147 | /* | 193 | /* |
148 | * Sort the list of nodes so that large nodes go first, and | 194 | * Sort the list of nodes so that data nodes go first, large |
149 | * small nodes go last. | 195 | * nodes go second, and small nodes go last. |
150 | */ | 196 | */ |
151 | if (snod->len > MEDIUM_NODE_WM) | 197 | if (key_type(c, &snod->key) == UBIFS_DATA_KEY) { |
152 | list_add(lst, &large); | 198 | if (inum != key_inum(c, &snod->key)) { |
199 | if (inum) { | ||
200 | /* | ||
201 | * Try to move data nodes from the same | ||
202 | * inode together. | ||
203 | */ | ||
204 | err = joinup(c, sleb, inum, blk, &data); | ||
205 | if (err) | ||
206 | goto out; | ||
207 | } | ||
208 | inum = key_inum(c, &snod->key); | ||
209 | blk = key_block(c, &snod->key); | ||
210 | } | ||
211 | list_add_tail(lst, &data); | ||
212 | } else if (snod->len > MEDIUM_NODE_WM) | ||
213 | list_add_tail(lst, &large); | ||
153 | else if (snod->len > SMALL_NODE_WM) | 214 | else if (snod->len > SMALL_NODE_WM) |
154 | list_add(lst, &medium); | 215 | list_add_tail(lst, &medium); |
155 | else | 216 | else |
156 | list_add(lst, &small); | 217 | list_add_tail(lst, &small); |
157 | 218 | ||
158 | /* And find the smallest node */ | 219 | /* And find the smallest node */ |
159 | if (snod->len < min) | 220 | if (snod->len < min) |
@@ -164,6 +225,7 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) | |||
164 | * Join the tree lists so that we'd have one roughly sorted list | 225 | * Join the tree lists so that we'd have one roughly sorted list |
165 | * ('large' will be the head of the joined list). | 226 | * ('large' will be the head of the joined list). |
166 | */ | 227 | */ |
228 | list_splice(&data, &large); | ||
167 | list_splice(&medium, large.prev); | 229 | list_splice(&medium, large.prev); |
168 | list_splice(&small, large.prev); | 230 | list_splice(&small, large.prev); |
169 | 231 | ||
@@ -653,7 +715,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c) | |||
653 | */ | 715 | */ |
654 | while (1) { | 716 | while (1) { |
655 | lp = ubifs_fast_find_freeable(c); | 717 | lp = ubifs_fast_find_freeable(c); |
656 | if (unlikely(IS_ERR(lp))) { | 718 | if (IS_ERR(lp)) { |
657 | err = PTR_ERR(lp); | 719 | err = PTR_ERR(lp); |
658 | goto out; | 720 | goto out; |
659 | } | 721 | } |
@@ -665,7 +727,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c) | |||
665 | if (err) | 727 | if (err) |
666 | goto out; | 728 | goto out; |
667 | lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0); | 729 | lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0); |
668 | if (unlikely(IS_ERR(lp))) { | 730 | if (IS_ERR(lp)) { |
669 | err = PTR_ERR(lp); | 731 | err = PTR_ERR(lp); |
670 | goto out; | 732 | goto out; |
671 | } | 733 | } |
@@ -680,7 +742,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c) | |||
680 | /* Record index freeable LEBs for unmapping after commit */ | 742 | /* Record index freeable LEBs for unmapping after commit */ |
681 | while (1) { | 743 | while (1) { |
682 | lp = ubifs_fast_find_frdi_idx(c); | 744 | lp = ubifs_fast_find_frdi_idx(c); |
683 | if (unlikely(IS_ERR(lp))) { | 745 | if (IS_ERR(lp)) { |
684 | err = PTR_ERR(lp); | 746 | err = PTR_ERR(lp); |
685 | goto out; | 747 | goto out; |
686 | } | 748 | } |
@@ -696,7 +758,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c) | |||
696 | /* Don't release the LEB until after the next commit */ | 758 | /* Don't release the LEB until after the next commit */ |
697 | flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX; | 759 | flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX; |
698 | lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1); | 760 | lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1); |
699 | if (unlikely(IS_ERR(lp))) { | 761 | if (IS_ERR(lp)) { |
700 | err = PTR_ERR(lp); | 762 | err = PTR_ERR(lp); |
701 | kfree(idx_gc); | 763 | kfree(idx_gc); |
702 | goto out; | 764 | goto out; |
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 054363f2b207..01682713af69 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c | |||
@@ -62,6 +62,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) | |||
62 | { | 62 | { |
63 | if (!c->ro_media) { | 63 | if (!c->ro_media) { |
64 | c->ro_media = 1; | 64 | c->ro_media = 1; |
65 | c->no_chk_data_crc = 0; | ||
65 | ubifs_warn("switched to read-only mode, error %d", err); | 66 | ubifs_warn("switched to read-only mode, error %d", err); |
66 | dbg_dump_stack(); | 67 | dbg_dump_stack(); |
67 | } | 68 | } |
@@ -74,6 +75,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) | |||
74 | * @lnum: logical eraseblock number | 75 | * @lnum: logical eraseblock number |
75 | * @offs: offset within the logical eraseblock | 76 | * @offs: offset within the logical eraseblock |
76 | * @quiet: print no messages | 77 | * @quiet: print no messages |
78 | * @chk_crc: indicates whether to always check the CRC | ||
77 | * | 79 | * |
78 | * This function checks node magic number and CRC checksum. This function also | 80 | * This function checks node magic number and CRC checksum. This function also |
79 | * validates node length to prevent UBIFS from becoming crazy when an attacker | 81 | * validates node length to prevent UBIFS from becoming crazy when an attacker |
@@ -85,7 +87,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) | |||
85 | * or magic. | 87 | * or magic. |
86 | */ | 88 | */ |
87 | int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, | 89 | int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, |
88 | int offs, int quiet) | 90 | int offs, int quiet, int chk_crc) |
89 | { | 91 | { |
90 | int err = -EINVAL, type, node_len; | 92 | int err = -EINVAL, type, node_len; |
91 | uint32_t crc, node_crc, magic; | 93 | uint32_t crc, node_crc, magic; |
@@ -121,6 +123,10 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, | |||
121 | node_len > c->ranges[type].max_len) | 123 | node_len > c->ranges[type].max_len) |
122 | goto out_len; | 124 | goto out_len; |
123 | 125 | ||
126 | if (!chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc) | ||
127 | if (c->no_chk_data_crc) | ||
128 | return 0; | ||
129 | |||
124 | crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); | 130 | crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); |
125 | node_crc = le32_to_cpu(ch->crc); | 131 | node_crc = le32_to_cpu(ch->crc); |
126 | if (crc != node_crc) { | 132 | if (crc != node_crc) { |
@@ -722,7 +728,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, | |||
722 | goto out; | 728 | goto out; |
723 | } | 729 | } |
724 | 730 | ||
725 | err = ubifs_check_node(c, buf, lnum, offs, 0); | 731 | err = ubifs_check_node(c, buf, lnum, offs, 0, 0); |
726 | if (err) { | 732 | if (err) { |
727 | ubifs_err("expected node type %d", type); | 733 | ubifs_err("expected node type %d", type); |
728 | return err; | 734 | return err; |
@@ -781,7 +787,7 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, | |||
781 | goto out; | 787 | goto out; |
782 | } | 788 | } |
783 | 789 | ||
784 | err = ubifs_check_node(c, buf, lnum, offs, 0); | 790 | err = ubifs_check_node(c, buf, lnum, offs, 0, 0); |
785 | if (err) { | 791 | if (err) { |
786 | ubifs_err("expected node type %d", type); | 792 | ubifs_err("expected node type %d", type); |
787 | return err; | 793 | return err; |
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index 8f7476007549..9ee65086f627 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h | |||
@@ -484,7 +484,7 @@ static inline void key_copy(const struct ubifs_info *c, | |||
484 | * @key2: the second key to compare | 484 | * @key2: the second key to compare |
485 | * | 485 | * |
486 | * This function compares 2 keys and returns %-1 if @key1 is less than | 486 | * This function compares 2 keys and returns %-1 if @key1 is less than |
487 | * @key2, 0 if the keys are equivalent and %1 if @key1 is greater than @key2. | 487 | * @key2, %0 if the keys are equivalent and %1 if @key1 is greater than @key2. |
488 | */ | 488 | */ |
489 | static inline int keys_cmp(const struct ubifs_info *c, | 489 | static inline int keys_cmp(const struct ubifs_info *c, |
490 | const union ubifs_key *key1, | 490 | const union ubifs_key *key1, |
@@ -503,6 +503,26 @@ static inline int keys_cmp(const struct ubifs_info *c, | |||
503 | } | 503 | } |
504 | 504 | ||
505 | /** | 505 | /** |
506 | * keys_eq - determine if keys are equivalent. | ||
507 | * @c: UBIFS file-system description object | ||
508 | * @key1: the first key to compare | ||
509 | * @key2: the second key to compare | ||
510 | * | ||
511 | * This function compares 2 keys and returns %1 if @key1 is equal to @key2 and | ||
512 | * %0 if not. | ||
513 | */ | ||
514 | static inline int keys_eq(const struct ubifs_info *c, | ||
515 | const union ubifs_key *key1, | ||
516 | const union ubifs_key *key2) | ||
517 | { | ||
518 | if (key1->u32[0] != key2->u32[0]) | ||
519 | return 0; | ||
520 | if (key1->u32[1] != key2->u32[1]) | ||
521 | return 0; | ||
522 | return 1; | ||
523 | } | ||
524 | |||
525 | /** | ||
506 | * is_hash_key - is a key vulnerable to hash collisions. | 526 | * is_hash_key - is a key vulnerable to hash collisions. |
507 | * @c: UBIFS file-system description object | 527 | * @c: UBIFS file-system description object |
508 | * @key: key | 528 | * @key: key |
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 2ba93da71b65..f27176e9b70d 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c | |||
@@ -125,6 +125,7 @@ static void adjust_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, | |||
125 | } | 125 | } |
126 | } | 126 | } |
127 | } | 127 | } |
128 | |||
128 | /* Not greater than parent, so compare to children */ | 129 | /* Not greater than parent, so compare to children */ |
129 | while (1) { | 130 | while (1) { |
130 | /* Compare to left child */ | 131 | /* Compare to left child */ |
@@ -460,18 +461,6 @@ static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) | |||
460 | } | 461 | } |
461 | 462 | ||
462 | /** | 463 | /** |
463 | * ubifs_get_lprops - get reference to LEB properties. | ||
464 | * @c: the UBIFS file-system description object | ||
465 | * | ||
466 | * This function locks lprops. Lprops have to be unlocked by | ||
467 | * 'ubifs_release_lprops()'. | ||
468 | */ | ||
469 | void ubifs_get_lprops(struct ubifs_info *c) | ||
470 | { | ||
471 | mutex_lock(&c->lp_mutex); | ||
472 | } | ||
473 | |||
474 | /** | ||
475 | * calc_dark - calculate LEB dark space size. | 464 | * calc_dark - calculate LEB dark space size. |
476 | * @c: the UBIFS file-system description object | 465 | * @c: the UBIFS file-system description object |
477 | * @spc: amount of free and dirty space in the LEB | 466 | * @spc: amount of free and dirty space in the LEB |
@@ -576,7 +565,6 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, | |||
576 | ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7)); | 565 | ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7)); |
577 | 566 | ||
578 | spin_lock(&c->space_lock); | 567 | spin_lock(&c->space_lock); |
579 | |||
580 | if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size) | 568 | if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size) |
581 | c->lst.taken_empty_lebs -= 1; | 569 | c->lst.taken_empty_lebs -= 1; |
582 | 570 | ||
@@ -637,31 +625,12 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, | |||
637 | c->lst.taken_empty_lebs += 1; | 625 | c->lst.taken_empty_lebs += 1; |
638 | 626 | ||
639 | change_category(c, lprops); | 627 | change_category(c, lprops); |
640 | |||
641 | c->idx_gc_cnt += idx_gc_cnt; | 628 | c->idx_gc_cnt += idx_gc_cnt; |
642 | |||
643 | spin_unlock(&c->space_lock); | 629 | spin_unlock(&c->space_lock); |
644 | |||
645 | return lprops; | 630 | return lprops; |
646 | } | 631 | } |
647 | 632 | ||
648 | /** | 633 | /** |
649 | * ubifs_release_lprops - release lprops lock. | ||
650 | * @c: the UBIFS file-system description object | ||
651 | * | ||
652 | * This function has to be called after each 'ubifs_get_lprops()' call to | ||
653 | * unlock lprops. | ||
654 | */ | ||
655 | void ubifs_release_lprops(struct ubifs_info *c) | ||
656 | { | ||
657 | ubifs_assert(mutex_is_locked(&c->lp_mutex)); | ||
658 | ubifs_assert(c->lst.empty_lebs >= 0 && | ||
659 | c->lst.empty_lebs <= c->main_lebs); | ||
660 | |||
661 | mutex_unlock(&c->lp_mutex); | ||
662 | } | ||
663 | |||
664 | /** | ||
665 | * ubifs_get_lp_stats - get lprops statistics. | 634 | * ubifs_get_lp_stats - get lprops statistics. |
666 | * @c: UBIFS file-system description object | 635 | * @c: UBIFS file-system description object |
667 | * @st: return statistics | 636 | * @st: return statistics |
@@ -1262,7 +1231,6 @@ static int scan_check_cb(struct ubifs_info *c, | |||
1262 | } | 1231 | } |
1263 | 1232 | ||
1264 | ubifs_scan_destroy(sleb); | 1233 | ubifs_scan_destroy(sleb); |
1265 | |||
1266 | return LPT_SCAN_CONTINUE; | 1234 | return LPT_SCAN_CONTINUE; |
1267 | 1235 | ||
1268 | out_print: | 1236 | out_print: |
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index 9ff2463177e5..db8bd0e518b2 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c | |||
@@ -109,7 +109,8 @@ static void do_calc_lpt_geom(struct ubifs_info *c) | |||
109 | c->lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; | 109 | c->lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; |
110 | c->lpt_sz += (long long)c->nnode_cnt * c->nnode_sz; | 110 | c->lpt_sz += (long long)c->nnode_cnt * c->nnode_sz; |
111 | c->lpt_sz += c->ltab_sz; | 111 | c->lpt_sz += c->ltab_sz; |
112 | c->lpt_sz += c->lsave_sz; | 112 | if (c->big_lpt) |
113 | c->lpt_sz += c->lsave_sz; | ||
113 | 114 | ||
114 | /* Add wastage */ | 115 | /* Add wastage */ |
115 | sz = c->lpt_sz; | 116 | sz = c->lpt_sz; |
@@ -287,25 +288,56 @@ uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits) | |||
287 | const int k = 32 - nrbits; | 288 | const int k = 32 - nrbits; |
288 | uint8_t *p = *addr; | 289 | uint8_t *p = *addr; |
289 | int b = *pos; | 290 | int b = *pos; |
290 | uint32_t val; | 291 | uint32_t uninitialized_var(val); |
292 | const int bytes = (nrbits + b + 7) >> 3; | ||
291 | 293 | ||
292 | ubifs_assert(nrbits > 0); | 294 | ubifs_assert(nrbits > 0); |
293 | ubifs_assert(nrbits <= 32); | 295 | ubifs_assert(nrbits <= 32); |
294 | ubifs_assert(*pos >= 0); | 296 | ubifs_assert(*pos >= 0); |
295 | ubifs_assert(*pos < 8); | 297 | ubifs_assert(*pos < 8); |
296 | if (b) { | 298 | if (b) { |
297 | val = p[1] | ((uint32_t)p[2] << 8) | ((uint32_t)p[3] << 16) | | 299 | switch (bytes) { |
298 | ((uint32_t)p[4] << 24); | 300 | case 2: |
301 | val = p[1]; | ||
302 | break; | ||
303 | case 3: | ||
304 | val = p[1] | ((uint32_t)p[2] << 8); | ||
305 | break; | ||
306 | case 4: | ||
307 | val = p[1] | ((uint32_t)p[2] << 8) | | ||
308 | ((uint32_t)p[3] << 16); | ||
309 | break; | ||
310 | case 5: | ||
311 | val = p[1] | ((uint32_t)p[2] << 8) | | ||
312 | ((uint32_t)p[3] << 16) | | ||
313 | ((uint32_t)p[4] << 24); | ||
314 | } | ||
299 | val <<= (8 - b); | 315 | val <<= (8 - b); |
300 | val |= *p >> b; | 316 | val |= *p >> b; |
301 | nrbits += b; | 317 | nrbits += b; |
302 | } else | 318 | } else { |
303 | val = p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) | | 319 | switch (bytes) { |
304 | ((uint32_t)p[3] << 24); | 320 | case 1: |
321 | val = p[0]; | ||
322 | break; | ||
323 | case 2: | ||
324 | val = p[0] | ((uint32_t)p[1] << 8); | ||
325 | break; | ||
326 | case 3: | ||
327 | val = p[0] | ((uint32_t)p[1] << 8) | | ||
328 | ((uint32_t)p[2] << 16); | ||
329 | break; | ||
330 | case 4: | ||
331 | val = p[0] | ((uint32_t)p[1] << 8) | | ||
332 | ((uint32_t)p[2] << 16) | | ||
333 | ((uint32_t)p[3] << 24); | ||
334 | break; | ||
335 | } | ||
336 | } | ||
305 | val <<= k; | 337 | val <<= k; |
306 | val >>= k; | 338 | val >>= k; |
307 | b = nrbits & 7; | 339 | b = nrbits & 7; |
308 | p += nrbits / 8; | 340 | p += nrbits >> 3; |
309 | *addr = p; | 341 | *addr = p; |
310 | *pos = b; | 342 | *pos = b; |
311 | ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32); | 343 | ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32); |
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 5f0b83e20af6..eed5a0025d63 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c | |||
@@ -177,8 +177,6 @@ static int alloc_lpt_leb(struct ubifs_info *c, int *lnum) | |||
177 | return 0; | 177 | return 0; |
178 | } | 178 | } |
179 | } | 179 | } |
180 | dbg_err("last LEB %d", *lnum); | ||
181 | dump_stack(); | ||
182 | return -ENOSPC; | 180 | return -ENOSPC; |
183 | } | 181 | } |
184 | 182 | ||
@@ -193,6 +191,9 @@ static int layout_cnodes(struct ubifs_info *c) | |||
193 | int lnum, offs, len, alen, done_lsave, done_ltab, err; | 191 | int lnum, offs, len, alen, done_lsave, done_ltab, err; |
194 | struct ubifs_cnode *cnode; | 192 | struct ubifs_cnode *cnode; |
195 | 193 | ||
194 | err = dbg_chk_lpt_sz(c, 0, 0); | ||
195 | if (err) | ||
196 | return err; | ||
196 | cnode = c->lpt_cnext; | 197 | cnode = c->lpt_cnext; |
197 | if (!cnode) | 198 | if (!cnode) |
198 | return 0; | 199 | return 0; |
@@ -206,6 +207,7 @@ static int layout_cnodes(struct ubifs_info *c) | |||
206 | c->lsave_lnum = lnum; | 207 | c->lsave_lnum = lnum; |
207 | c->lsave_offs = offs; | 208 | c->lsave_offs = offs; |
208 | offs += c->lsave_sz; | 209 | offs += c->lsave_sz; |
210 | dbg_chk_lpt_sz(c, 1, c->lsave_sz); | ||
209 | } | 211 | } |
210 | 212 | ||
211 | if (offs + c->ltab_sz <= c->leb_size) { | 213 | if (offs + c->ltab_sz <= c->leb_size) { |
@@ -213,6 +215,7 @@ static int layout_cnodes(struct ubifs_info *c) | |||
213 | c->ltab_lnum = lnum; | 215 | c->ltab_lnum = lnum; |
214 | c->ltab_offs = offs; | 216 | c->ltab_offs = offs; |
215 | offs += c->ltab_sz; | 217 | offs += c->ltab_sz; |
218 | dbg_chk_lpt_sz(c, 1, c->ltab_sz); | ||
216 | } | 219 | } |
217 | 220 | ||
218 | do { | 221 | do { |
@@ -226,9 +229,10 @@ static int layout_cnodes(struct ubifs_info *c) | |||
226 | while (offs + len > c->leb_size) { | 229 | while (offs + len > c->leb_size) { |
227 | alen = ALIGN(offs, c->min_io_size); | 230 | alen = ALIGN(offs, c->min_io_size); |
228 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); | 231 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); |
232 | dbg_chk_lpt_sz(c, 2, alen - offs); | ||
229 | err = alloc_lpt_leb(c, &lnum); | 233 | err = alloc_lpt_leb(c, &lnum); |
230 | if (err) | 234 | if (err) |
231 | return err; | 235 | goto no_space; |
232 | offs = 0; | 236 | offs = 0; |
233 | ubifs_assert(lnum >= c->lpt_first && | 237 | ubifs_assert(lnum >= c->lpt_first && |
234 | lnum <= c->lpt_last); | 238 | lnum <= c->lpt_last); |
@@ -238,6 +242,7 @@ static int layout_cnodes(struct ubifs_info *c) | |||
238 | c->lsave_lnum = lnum; | 242 | c->lsave_lnum = lnum; |
239 | c->lsave_offs = offs; | 243 | c->lsave_offs = offs; |
240 | offs += c->lsave_sz; | 244 | offs += c->lsave_sz; |
245 | dbg_chk_lpt_sz(c, 1, c->lsave_sz); | ||
241 | continue; | 246 | continue; |
242 | } | 247 | } |
243 | if (!done_ltab) { | 248 | if (!done_ltab) { |
@@ -245,6 +250,7 @@ static int layout_cnodes(struct ubifs_info *c) | |||
245 | c->ltab_lnum = lnum; | 250 | c->ltab_lnum = lnum; |
246 | c->ltab_offs = offs; | 251 | c->ltab_offs = offs; |
247 | offs += c->ltab_sz; | 252 | offs += c->ltab_sz; |
253 | dbg_chk_lpt_sz(c, 1, c->ltab_sz); | ||
248 | continue; | 254 | continue; |
249 | } | 255 | } |
250 | break; | 256 | break; |
@@ -257,6 +263,7 @@ static int layout_cnodes(struct ubifs_info *c) | |||
257 | c->lpt_offs = offs; | 263 | c->lpt_offs = offs; |
258 | } | 264 | } |
259 | offs += len; | 265 | offs += len; |
266 | dbg_chk_lpt_sz(c, 1, len); | ||
260 | cnode = cnode->cnext; | 267 | cnode = cnode->cnext; |
261 | } while (cnode && cnode != c->lpt_cnext); | 268 | } while (cnode && cnode != c->lpt_cnext); |
262 | 269 | ||
@@ -265,9 +272,10 @@ static int layout_cnodes(struct ubifs_info *c) | |||
265 | if (offs + c->lsave_sz > c->leb_size) { | 272 | if (offs + c->lsave_sz > c->leb_size) { |
266 | alen = ALIGN(offs, c->min_io_size); | 273 | alen = ALIGN(offs, c->min_io_size); |
267 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); | 274 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); |
275 | dbg_chk_lpt_sz(c, 2, alen - offs); | ||
268 | err = alloc_lpt_leb(c, &lnum); | 276 | err = alloc_lpt_leb(c, &lnum); |
269 | if (err) | 277 | if (err) |
270 | return err; | 278 | goto no_space; |
271 | offs = 0; | 279 | offs = 0; |
272 | ubifs_assert(lnum >= c->lpt_first && | 280 | ubifs_assert(lnum >= c->lpt_first && |
273 | lnum <= c->lpt_last); | 281 | lnum <= c->lpt_last); |
@@ -276,6 +284,7 @@ static int layout_cnodes(struct ubifs_info *c) | |||
276 | c->lsave_lnum = lnum; | 284 | c->lsave_lnum = lnum; |
277 | c->lsave_offs = offs; | 285 | c->lsave_offs = offs; |
278 | offs += c->lsave_sz; | 286 | offs += c->lsave_sz; |
287 | dbg_chk_lpt_sz(c, 1, c->lsave_sz); | ||
279 | } | 288 | } |
280 | 289 | ||
281 | /* Make sure to place LPT's own lprops table */ | 290 | /* Make sure to place LPT's own lprops table */ |
@@ -283,9 +292,10 @@ static int layout_cnodes(struct ubifs_info *c) | |||
283 | if (offs + c->ltab_sz > c->leb_size) { | 292 | if (offs + c->ltab_sz > c->leb_size) { |
284 | alen = ALIGN(offs, c->min_io_size); | 293 | alen = ALIGN(offs, c->min_io_size); |
285 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); | 294 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); |
295 | dbg_chk_lpt_sz(c, 2, alen - offs); | ||
286 | err = alloc_lpt_leb(c, &lnum); | 296 | err = alloc_lpt_leb(c, &lnum); |
287 | if (err) | 297 | if (err) |
288 | return err; | 298 | goto no_space; |
289 | offs = 0; | 299 | offs = 0; |
290 | ubifs_assert(lnum >= c->lpt_first && | 300 | ubifs_assert(lnum >= c->lpt_first && |
291 | lnum <= c->lpt_last); | 301 | lnum <= c->lpt_last); |
@@ -294,11 +304,23 @@ static int layout_cnodes(struct ubifs_info *c) | |||
294 | c->ltab_lnum = lnum; | 304 | c->ltab_lnum = lnum; |
295 | c->ltab_offs = offs; | 305 | c->ltab_offs = offs; |
296 | offs += c->ltab_sz; | 306 | offs += c->ltab_sz; |
307 | dbg_chk_lpt_sz(c, 1, c->ltab_sz); | ||
297 | } | 308 | } |
298 | 309 | ||
299 | alen = ALIGN(offs, c->min_io_size); | 310 | alen = ALIGN(offs, c->min_io_size); |
300 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); | 311 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); |
312 | dbg_chk_lpt_sz(c, 4, alen - offs); | ||
313 | err = dbg_chk_lpt_sz(c, 3, alen); | ||
314 | if (err) | ||
315 | return err; | ||
301 | return 0; | 316 | return 0; |
317 | |||
318 | no_space: | ||
319 | ubifs_err("LPT out of space"); | ||
320 | dbg_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, " | ||
321 | "done_lsave %d", lnum, offs, len, done_ltab, done_lsave); | ||
322 | dbg_dump_lpt_info(c); | ||
323 | return err; | ||
302 | } | 324 | } |
303 | 325 | ||
304 | /** | 326 | /** |
@@ -333,8 +355,6 @@ static int realloc_lpt_leb(struct ubifs_info *c, int *lnum) | |||
333 | *lnum = i + c->lpt_first; | 355 | *lnum = i + c->lpt_first; |
334 | return 0; | 356 | return 0; |
335 | } | 357 | } |
336 | dbg_err("last LEB %d", *lnum); | ||
337 | dump_stack(); | ||
338 | return -ENOSPC; | 358 | return -ENOSPC; |
339 | } | 359 | } |
340 | 360 | ||
@@ -369,12 +389,14 @@ static int write_cnodes(struct ubifs_info *c) | |||
369 | done_lsave = 1; | 389 | done_lsave = 1; |
370 | ubifs_pack_lsave(c, buf + offs, c->lsave); | 390 | ubifs_pack_lsave(c, buf + offs, c->lsave); |
371 | offs += c->lsave_sz; | 391 | offs += c->lsave_sz; |
392 | dbg_chk_lpt_sz(c, 1, c->lsave_sz); | ||
372 | } | 393 | } |
373 | 394 | ||
374 | if (offs + c->ltab_sz <= c->leb_size) { | 395 | if (offs + c->ltab_sz <= c->leb_size) { |
375 | done_ltab = 1; | 396 | done_ltab = 1; |
376 | ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); | 397 | ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); |
377 | offs += c->ltab_sz; | 398 | offs += c->ltab_sz; |
399 | dbg_chk_lpt_sz(c, 1, c->ltab_sz); | ||
378 | } | 400 | } |
379 | 401 | ||
380 | /* Loop for each cnode */ | 402 | /* Loop for each cnode */ |
@@ -392,10 +414,12 @@ static int write_cnodes(struct ubifs_info *c) | |||
392 | alen, UBI_SHORTTERM); | 414 | alen, UBI_SHORTTERM); |
393 | if (err) | 415 | if (err) |
394 | return err; | 416 | return err; |
417 | dbg_chk_lpt_sz(c, 4, alen - wlen); | ||
395 | } | 418 | } |
419 | dbg_chk_lpt_sz(c, 2, 0); | ||
396 | err = realloc_lpt_leb(c, &lnum); | 420 | err = realloc_lpt_leb(c, &lnum); |
397 | if (err) | 421 | if (err) |
398 | return err; | 422 | goto no_space; |
399 | offs = 0; | 423 | offs = 0; |
400 | from = 0; | 424 | from = 0; |
401 | ubifs_assert(lnum >= c->lpt_first && | 425 | ubifs_assert(lnum >= c->lpt_first && |
@@ -408,12 +432,14 @@ static int write_cnodes(struct ubifs_info *c) | |||
408 | done_lsave = 1; | 432 | done_lsave = 1; |
409 | ubifs_pack_lsave(c, buf + offs, c->lsave); | 433 | ubifs_pack_lsave(c, buf + offs, c->lsave); |
410 | offs += c->lsave_sz; | 434 | offs += c->lsave_sz; |
435 | dbg_chk_lpt_sz(c, 1, c->lsave_sz); | ||
411 | continue; | 436 | continue; |
412 | } | 437 | } |
413 | if (!done_ltab) { | 438 | if (!done_ltab) { |
414 | done_ltab = 1; | 439 | done_ltab = 1; |
415 | ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); | 440 | ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); |
416 | offs += c->ltab_sz; | 441 | offs += c->ltab_sz; |
442 | dbg_chk_lpt_sz(c, 1, c->ltab_sz); | ||
417 | continue; | 443 | continue; |
418 | } | 444 | } |
419 | break; | 445 | break; |
@@ -435,6 +461,7 @@ static int write_cnodes(struct ubifs_info *c) | |||
435 | clear_bit(COW_ZNODE, &cnode->flags); | 461 | clear_bit(COW_ZNODE, &cnode->flags); |
436 | smp_mb__after_clear_bit(); | 462 | smp_mb__after_clear_bit(); |
437 | offs += len; | 463 | offs += len; |
464 | dbg_chk_lpt_sz(c, 1, len); | ||
438 | cnode = cnode->cnext; | 465 | cnode = cnode->cnext; |
439 | } while (cnode && cnode != c->lpt_cnext); | 466 | } while (cnode && cnode != c->lpt_cnext); |
440 | 467 | ||
@@ -448,9 +475,10 @@ static int write_cnodes(struct ubifs_info *c) | |||
448 | UBI_SHORTTERM); | 475 | UBI_SHORTTERM); |
449 | if (err) | 476 | if (err) |
450 | return err; | 477 | return err; |
478 | dbg_chk_lpt_sz(c, 2, alen - wlen); | ||
451 | err = realloc_lpt_leb(c, &lnum); | 479 | err = realloc_lpt_leb(c, &lnum); |
452 | if (err) | 480 | if (err) |
453 | return err; | 481 | goto no_space; |
454 | offs = 0; | 482 | offs = 0; |
455 | ubifs_assert(lnum >= c->lpt_first && | 483 | ubifs_assert(lnum >= c->lpt_first && |
456 | lnum <= c->lpt_last); | 484 | lnum <= c->lpt_last); |
@@ -461,6 +489,7 @@ static int write_cnodes(struct ubifs_info *c) | |||
461 | done_lsave = 1; | 489 | done_lsave = 1; |
462 | ubifs_pack_lsave(c, buf + offs, c->lsave); | 490 | ubifs_pack_lsave(c, buf + offs, c->lsave); |
463 | offs += c->lsave_sz; | 491 | offs += c->lsave_sz; |
492 | dbg_chk_lpt_sz(c, 1, c->lsave_sz); | ||
464 | } | 493 | } |
465 | 494 | ||
466 | /* Make sure to place LPT's own lprops table */ | 495 | /* Make sure to place LPT's own lprops table */ |
@@ -473,9 +502,10 @@ static int write_cnodes(struct ubifs_info *c) | |||
473 | UBI_SHORTTERM); | 502 | UBI_SHORTTERM); |
474 | if (err) | 503 | if (err) |
475 | return err; | 504 | return err; |
505 | dbg_chk_lpt_sz(c, 2, alen - wlen); | ||
476 | err = realloc_lpt_leb(c, &lnum); | 506 | err = realloc_lpt_leb(c, &lnum); |
477 | if (err) | 507 | if (err) |
478 | return err; | 508 | goto no_space; |
479 | offs = 0; | 509 | offs = 0; |
480 | ubifs_assert(lnum >= c->lpt_first && | 510 | ubifs_assert(lnum >= c->lpt_first && |
481 | lnum <= c->lpt_last); | 511 | lnum <= c->lpt_last); |
@@ -486,6 +516,7 @@ static int write_cnodes(struct ubifs_info *c) | |||
486 | done_ltab = 1; | 516 | done_ltab = 1; |
487 | ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); | 517 | ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); |
488 | offs += c->ltab_sz; | 518 | offs += c->ltab_sz; |
519 | dbg_chk_lpt_sz(c, 1, c->ltab_sz); | ||
489 | } | 520 | } |
490 | 521 | ||
491 | /* Write remaining data in buffer */ | 522 | /* Write remaining data in buffer */ |
@@ -495,6 +526,12 @@ static int write_cnodes(struct ubifs_info *c) | |||
495 | err = ubifs_leb_write(c, lnum, buf + from, from, alen, UBI_SHORTTERM); | 526 | err = ubifs_leb_write(c, lnum, buf + from, from, alen, UBI_SHORTTERM); |
496 | if (err) | 527 | if (err) |
497 | return err; | 528 | return err; |
529 | |||
530 | dbg_chk_lpt_sz(c, 4, alen - wlen); | ||
531 | err = dbg_chk_lpt_sz(c, 3, ALIGN(offs, c->min_io_size)); | ||
532 | if (err) | ||
533 | return err; | ||
534 | |||
498 | c->nhead_lnum = lnum; | 535 | c->nhead_lnum = lnum; |
499 | c->nhead_offs = ALIGN(offs, c->min_io_size); | 536 | c->nhead_offs = ALIGN(offs, c->min_io_size); |
500 | 537 | ||
@@ -503,7 +540,15 @@ static int write_cnodes(struct ubifs_info *c) | |||
503 | dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); | 540 | dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); |
504 | if (c->big_lpt) | 541 | if (c->big_lpt) |
505 | dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); | 542 | dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); |
543 | |||
506 | return 0; | 544 | return 0; |
545 | |||
546 | no_space: | ||
547 | ubifs_err("LPT out of space mismatch"); | ||
548 | dbg_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab " | ||
549 | "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave); | ||
550 | dbg_dump_lpt_info(c); | ||
551 | return err; | ||
507 | } | 552 | } |
508 | 553 | ||
509 | /** | 554 | /** |
@@ -1044,6 +1089,8 @@ static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len) | |||
1044 | int pos = 0, node_type, node_len; | 1089 | int pos = 0, node_type, node_len; |
1045 | uint16_t crc, calc_crc; | 1090 | uint16_t crc, calc_crc; |
1046 | 1091 | ||
1092 | if (len < UBIFS_LPT_CRC_BYTES + (UBIFS_LPT_TYPE_BITS + 7) / 8) | ||
1093 | return 0; | ||
1047 | node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS); | 1094 | node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS); |
1048 | if (node_type == UBIFS_LPT_NOT_A_NODE) | 1095 | if (node_type == UBIFS_LPT_NOT_A_NODE) |
1049 | return 0; | 1096 | return 0; |
@@ -1156,6 +1203,9 @@ int ubifs_lpt_start_commit(struct ubifs_info *c) | |||
1156 | dbg_lp(""); | 1203 | dbg_lp(""); |
1157 | 1204 | ||
1158 | mutex_lock(&c->lp_mutex); | 1205 | mutex_lock(&c->lp_mutex); |
1206 | err = dbg_chk_lpt_free_spc(c); | ||
1207 | if (err) | ||
1208 | goto out; | ||
1159 | err = dbg_check_ltab(c); | 1209 | err = dbg_check_ltab(c); |
1160 | if (err) | 1210 | if (err) |
1161 | goto out; | 1211 | goto out; |
@@ -1645,4 +1695,121 @@ int dbg_check_ltab(struct ubifs_info *c) | |||
1645 | return 0; | 1695 | return 0; |
1646 | } | 1696 | } |
1647 | 1697 | ||
1698 | /** | ||
1699 | * dbg_chk_lpt_free_spc - check LPT free space is enough to write entire LPT. | ||
1700 | * @c: the UBIFS file-system description object | ||
1701 | * | ||
1702 | * This function returns %0 on success and a negative error code on failure. | ||
1703 | */ | ||
1704 | int dbg_chk_lpt_free_spc(struct ubifs_info *c) | ||
1705 | { | ||
1706 | long long free = 0; | ||
1707 | int i; | ||
1708 | |||
1709 | for (i = 0; i < c->lpt_lebs; i++) { | ||
1710 | if (c->ltab[i].tgc || c->ltab[i].cmt) | ||
1711 | continue; | ||
1712 | if (i + c->lpt_first == c->nhead_lnum) | ||
1713 | free += c->leb_size - c->nhead_offs; | ||
1714 | else if (c->ltab[i].free == c->leb_size) | ||
1715 | free += c->leb_size; | ||
1716 | } | ||
1717 | if (free < c->lpt_sz) { | ||
1718 | dbg_err("LPT space error: free %lld lpt_sz %lld", | ||
1719 | free, c->lpt_sz); | ||
1720 | dbg_dump_lpt_info(c); | ||
1721 | return -EINVAL; | ||
1722 | } | ||
1723 | return 0; | ||
1724 | } | ||
1725 | |||
1726 | /** | ||
1727 | * dbg_chk_lpt_sz - check LPT does not write more than LPT size. | ||
1728 | * @c: the UBIFS file-system description object | ||
1729 | * @action: action | ||
1730 | * @len: length written | ||
1731 | * | ||
1732 | * This function returns %0 on success and a negative error code on failure. | ||
1733 | */ | ||
1734 | int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) | ||
1735 | { | ||
1736 | long long chk_lpt_sz, lpt_sz; | ||
1737 | int err = 0; | ||
1738 | |||
1739 | switch (action) { | ||
1740 | case 0: | ||
1741 | c->chk_lpt_sz = 0; | ||
1742 | c->chk_lpt_sz2 = 0; | ||
1743 | c->chk_lpt_lebs = 0; | ||
1744 | c->chk_lpt_wastage = 0; | ||
1745 | if (c->dirty_pn_cnt > c->pnode_cnt) { | ||
1746 | dbg_err("dirty pnodes %d exceed max %d", | ||
1747 | c->dirty_pn_cnt, c->pnode_cnt); | ||
1748 | err = -EINVAL; | ||
1749 | } | ||
1750 | if (c->dirty_nn_cnt > c->nnode_cnt) { | ||
1751 | dbg_err("dirty nnodes %d exceed max %d", | ||
1752 | c->dirty_nn_cnt, c->nnode_cnt); | ||
1753 | err = -EINVAL; | ||
1754 | } | ||
1755 | return err; | ||
1756 | case 1: | ||
1757 | c->chk_lpt_sz += len; | ||
1758 | return 0; | ||
1759 | case 2: | ||
1760 | c->chk_lpt_sz += len; | ||
1761 | c->chk_lpt_wastage += len; | ||
1762 | c->chk_lpt_lebs += 1; | ||
1763 | return 0; | ||
1764 | case 3: | ||
1765 | chk_lpt_sz = c->leb_size; | ||
1766 | chk_lpt_sz *= c->chk_lpt_lebs; | ||
1767 | chk_lpt_sz += len - c->nhead_offs; | ||
1768 | if (c->chk_lpt_sz != chk_lpt_sz) { | ||
1769 | dbg_err("LPT wrote %lld but space used was %lld", | ||
1770 | c->chk_lpt_sz, chk_lpt_sz); | ||
1771 | err = -EINVAL; | ||
1772 | } | ||
1773 | if (c->chk_lpt_sz > c->lpt_sz) { | ||
1774 | dbg_err("LPT wrote %lld but lpt_sz is %lld", | ||
1775 | c->chk_lpt_sz, c->lpt_sz); | ||
1776 | err = -EINVAL; | ||
1777 | } | ||
1778 | if (c->chk_lpt_sz2 && c->chk_lpt_sz != c->chk_lpt_sz2) { | ||
1779 | dbg_err("LPT layout size %lld but wrote %lld", | ||
1780 | c->chk_lpt_sz, c->chk_lpt_sz2); | ||
1781 | err = -EINVAL; | ||
1782 | } | ||
1783 | if (c->chk_lpt_sz2 && c->new_nhead_offs != len) { | ||
1784 | dbg_err("LPT new nhead offs: expected %d was %d", | ||
1785 | c->new_nhead_offs, len); | ||
1786 | err = -EINVAL; | ||
1787 | } | ||
1788 | lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; | ||
1789 | lpt_sz += (long long)c->nnode_cnt * c->nnode_sz; | ||
1790 | lpt_sz += c->ltab_sz; | ||
1791 | if (c->big_lpt) | ||
1792 | lpt_sz += c->lsave_sz; | ||
1793 | if (c->chk_lpt_sz - c->chk_lpt_wastage > lpt_sz) { | ||
1794 | dbg_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld", | ||
1795 | c->chk_lpt_sz, c->chk_lpt_wastage, lpt_sz); | ||
1796 | err = -EINVAL; | ||
1797 | } | ||
1798 | if (err) | ||
1799 | dbg_dump_lpt_info(c); | ||
1800 | c->chk_lpt_sz2 = c->chk_lpt_sz; | ||
1801 | c->chk_lpt_sz = 0; | ||
1802 | c->chk_lpt_wastage = 0; | ||
1803 | c->chk_lpt_lebs = 0; | ||
1804 | c->new_nhead_offs = len; | ||
1805 | return err; | ||
1806 | case 4: | ||
1807 | c->chk_lpt_sz += len; | ||
1808 | c->chk_lpt_wastage += len; | ||
1809 | return 0; | ||
1810 | default: | ||
1811 | return -EINVAL; | ||
1812 | } | ||
1813 | } | ||
1814 | |||
1648 | #endif /* CONFIG_UBIFS_FS_DEBUG */ | 1815 | #endif /* CONFIG_UBIFS_FS_DEBUG */ |
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 4c12a9215d7f..4fa81d867e41 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h | |||
@@ -310,4 +310,31 @@ static inline int ubifs_tnc_lookup(struct ubifs_info *c, | |||
310 | return ubifs_tnc_locate(c, key, node, NULL, NULL); | 310 | return ubifs_tnc_locate(c, key, node, NULL, NULL); |
311 | } | 311 | } |
312 | 312 | ||
313 | /** | ||
314 | * ubifs_get_lprops - get reference to LEB properties. | ||
315 | * @c: the UBIFS file-system description object | ||
316 | * | ||
317 | * This function locks lprops. Lprops have to be unlocked by | ||
318 | * 'ubifs_release_lprops()'. | ||
319 | */ | ||
320 | static inline void ubifs_get_lprops(struct ubifs_info *c) | ||
321 | { | ||
322 | mutex_lock(&c->lp_mutex); | ||
323 | } | ||
324 | |||
325 | /** | ||
326 | * ubifs_release_lprops - release lprops lock. | ||
327 | * @c: the UBIFS file-system description object | ||
328 | * | ||
329 | * This function has to be called after each 'ubifs_get_lprops()' call to | ||
330 | * unlock lprops. | ||
331 | */ | ||
332 | static inline void ubifs_release_lprops(struct ubifs_info *c) | ||
333 | { | ||
334 | ubifs_assert(mutex_is_locked(&c->lp_mutex)); | ||
335 | ubifs_assert(c->lst.empty_lebs >= 0 && | ||
336 | c->lst.empty_lebs <= c->main_lebs); | ||
337 | mutex_unlock(&c->lp_mutex); | ||
338 | } | ||
339 | |||
313 | #endif /* __UBIFS_MISC_H__ */ | 340 | #endif /* __UBIFS_MISC_H__ */ |
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index acf5c5fffc60..0ed82479b44b 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c | |||
@@ -87,7 +87,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, | |||
87 | 87 | ||
88 | dbg_scan("scanning %s", dbg_ntype(ch->node_type)); | 88 | dbg_scan("scanning %s", dbg_ntype(ch->node_type)); |
89 | 89 | ||
90 | if (ubifs_check_node(c, buf, lnum, offs, quiet)) | 90 | if (ubifs_check_node(c, buf, lnum, offs, quiet, 1)) |
91 | return SCANNED_A_CORRUPT_NODE; | 91 | return SCANNED_A_CORRUPT_NODE; |
92 | 92 | ||
93 | if (ch->node_type == UBIFS_PAD_NODE) { | 93 | if (ch->node_type == UBIFS_PAD_NODE) { |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 9a9220333b3b..8780efbf40ac 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -401,6 +401,16 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
401 | else if (c->mount_opts.unmount_mode == 1) | 401 | else if (c->mount_opts.unmount_mode == 1) |
402 | seq_printf(s, ",norm_unmount"); | 402 | seq_printf(s, ",norm_unmount"); |
403 | 403 | ||
404 | if (c->mount_opts.bulk_read == 2) | ||
405 | seq_printf(s, ",bulk_read"); | ||
406 | else if (c->mount_opts.bulk_read == 1) | ||
407 | seq_printf(s, ",no_bulk_read"); | ||
408 | |||
409 | if (c->mount_opts.chk_data_crc == 2) | ||
410 | seq_printf(s, ",chk_data_crc"); | ||
411 | else if (c->mount_opts.chk_data_crc == 1) | ||
412 | seq_printf(s, ",no_chk_data_crc"); | ||
413 | |||
404 | return 0; | 414 | return 0; |
405 | } | 415 | } |
406 | 416 | ||
@@ -408,13 +418,26 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) | |||
408 | { | 418 | { |
409 | struct ubifs_info *c = sb->s_fs_info; | 419 | struct ubifs_info *c = sb->s_fs_info; |
410 | int i, ret = 0, err; | 420 | int i, ret = 0, err; |
421 | long long bud_bytes; | ||
411 | 422 | ||
412 | if (c->jheads) | 423 | if (c->jheads) { |
413 | for (i = 0; i < c->jhead_cnt; i++) { | 424 | for (i = 0; i < c->jhead_cnt; i++) { |
414 | err = ubifs_wbuf_sync(&c->jheads[i].wbuf); | 425 | err = ubifs_wbuf_sync(&c->jheads[i].wbuf); |
415 | if (err && !ret) | 426 | if (err && !ret) |
416 | ret = err; | 427 | ret = err; |
417 | } | 428 | } |
429 | |||
430 | /* Commit the journal unless it has too little data */ | ||
431 | spin_lock(&c->buds_lock); | ||
432 | bud_bytes = c->bud_bytes; | ||
433 | spin_unlock(&c->buds_lock); | ||
434 | if (bud_bytes > c->leb_size) { | ||
435 | err = ubifs_run_commit(c); | ||
436 | if (err) | ||
437 | return err; | ||
438 | } | ||
439 | } | ||
440 | |||
418 | /* | 441 | /* |
419 | * We ought to call sync for c->ubi but it does not have one. If it had | 442 | * We ought to call sync for c->ubi but it does not have one. If it had |
420 | * it would in turn call mtd->sync, however mtd operations are | 443 | * it would in turn call mtd->sync, however mtd operations are |
@@ -538,6 +561,18 @@ static int init_constants_early(struct ubifs_info *c) | |||
538 | * calculations when reporting free space. | 561 | * calculations when reporting free space. |
539 | */ | 562 | */ |
540 | c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; | 563 | c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; |
564 | /* Buffer size for bulk-reads */ | ||
565 | c->bulk_read_buf_size = UBIFS_MAX_BULK_READ * UBIFS_MAX_DATA_NODE_SZ; | ||
566 | if (c->bulk_read_buf_size > c->leb_size) | ||
567 | c->bulk_read_buf_size = c->leb_size; | ||
568 | if (c->bulk_read_buf_size > 128 * 1024) { | ||
569 | /* Check if we can kmalloc more than 128KiB */ | ||
570 | void *try = kmalloc(c->bulk_read_buf_size, GFP_KERNEL); | ||
571 | |||
572 | kfree(try); | ||
573 | if (!try) | ||
574 | c->bulk_read_buf_size = 128 * 1024; | ||
575 | } | ||
541 | return 0; | 576 | return 0; |
542 | } | 577 | } |
543 | 578 | ||
@@ -840,17 +875,29 @@ static int check_volume_empty(struct ubifs_info *c) | |||
840 | * | 875 | * |
841 | * Opt_fast_unmount: do not run a journal commit before un-mounting | 876 | * Opt_fast_unmount: do not run a journal commit before un-mounting |
842 | * Opt_norm_unmount: run a journal commit before un-mounting | 877 | * Opt_norm_unmount: run a journal commit before un-mounting |
878 | * Opt_bulk_read: enable bulk-reads | ||
879 | * Opt_no_bulk_read: disable bulk-reads | ||
880 | * Opt_chk_data_crc: check CRCs when reading data nodes | ||
881 | * Opt_no_chk_data_crc: do not check CRCs when reading data nodes | ||
843 | * Opt_err: just end of array marker | 882 | * Opt_err: just end of array marker |
844 | */ | 883 | */ |
845 | enum { | 884 | enum { |
846 | Opt_fast_unmount, | 885 | Opt_fast_unmount, |
847 | Opt_norm_unmount, | 886 | Opt_norm_unmount, |
887 | Opt_bulk_read, | ||
888 | Opt_no_bulk_read, | ||
889 | Opt_chk_data_crc, | ||
890 | Opt_no_chk_data_crc, | ||
848 | Opt_err, | 891 | Opt_err, |
849 | }; | 892 | }; |
850 | 893 | ||
851 | static const match_table_t tokens = { | 894 | static const match_table_t tokens = { |
852 | {Opt_fast_unmount, "fast_unmount"}, | 895 | {Opt_fast_unmount, "fast_unmount"}, |
853 | {Opt_norm_unmount, "norm_unmount"}, | 896 | {Opt_norm_unmount, "norm_unmount"}, |
897 | {Opt_bulk_read, "bulk_read"}, | ||
898 | {Opt_no_bulk_read, "no_bulk_read"}, | ||
899 | {Opt_chk_data_crc, "chk_data_crc"}, | ||
900 | {Opt_no_chk_data_crc, "no_chk_data_crc"}, | ||
854 | {Opt_err, NULL}, | 901 | {Opt_err, NULL}, |
855 | }; | 902 | }; |
856 | 903 | ||
@@ -888,6 +935,22 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options, | |||
888 | c->mount_opts.unmount_mode = 1; | 935 | c->mount_opts.unmount_mode = 1; |
889 | c->fast_unmount = 0; | 936 | c->fast_unmount = 0; |
890 | break; | 937 | break; |
938 | case Opt_bulk_read: | ||
939 | c->mount_opts.bulk_read = 2; | ||
940 | c->bulk_read = 1; | ||
941 | break; | ||
942 | case Opt_no_bulk_read: | ||
943 | c->mount_opts.bulk_read = 1; | ||
944 | c->bulk_read = 0; | ||
945 | break; | ||
946 | case Opt_chk_data_crc: | ||
947 | c->mount_opts.chk_data_crc = 2; | ||
948 | c->no_chk_data_crc = 0; | ||
949 | break; | ||
950 | case Opt_no_chk_data_crc: | ||
951 | c->mount_opts.chk_data_crc = 1; | ||
952 | c->no_chk_data_crc = 1; | ||
953 | break; | ||
891 | default: | 954 | default: |
892 | ubifs_err("unrecognized mount option \"%s\" " | 955 | ubifs_err("unrecognized mount option \"%s\" " |
893 | "or missing value", p); | 956 | "or missing value", p); |
@@ -996,6 +1059,8 @@ static int mount_ubifs(struct ubifs_info *c) | |||
996 | goto out_free; | 1059 | goto out_free; |
997 | } | 1060 | } |
998 | 1061 | ||
1062 | c->always_chk_crc = 1; | ||
1063 | |||
999 | err = ubifs_read_superblock(c); | 1064 | err = ubifs_read_superblock(c); |
1000 | if (err) | 1065 | if (err) |
1001 | goto out_free; | 1066 | goto out_free; |
@@ -1032,8 +1097,6 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1032 | 1097 | ||
1033 | /* Create background thread */ | 1098 | /* Create background thread */ |
1034 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); | 1099 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); |
1035 | if (!c->bgt) | ||
1036 | c->bgt = ERR_PTR(-EINVAL); | ||
1037 | if (IS_ERR(c->bgt)) { | 1100 | if (IS_ERR(c->bgt)) { |
1038 | err = PTR_ERR(c->bgt); | 1101 | err = PTR_ERR(c->bgt); |
1039 | c->bgt = NULL; | 1102 | c->bgt = NULL; |
@@ -1139,24 +1202,28 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1139 | if (err) | 1202 | if (err) |
1140 | goto out_infos; | 1203 | goto out_infos; |
1141 | 1204 | ||
1205 | c->always_chk_crc = 0; | ||
1206 | |||
1142 | ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", | 1207 | ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", |
1143 | c->vi.ubi_num, c->vi.vol_id, c->vi.name); | 1208 | c->vi.ubi_num, c->vi.vol_id, c->vi.name); |
1144 | if (mounted_read_only) | 1209 | if (mounted_read_only) |
1145 | ubifs_msg("mounted read-only"); | 1210 | ubifs_msg("mounted read-only"); |
1146 | x = (long long)c->main_lebs * c->leb_size; | 1211 | x = (long long)c->main_lebs * c->leb_size; |
1147 | ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)", | 1212 | ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d " |
1148 | x, x >> 10, x >> 20, c->main_lebs); | 1213 | "LEBs)", x, x >> 10, x >> 20, c->main_lebs); |
1149 | x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; | 1214 | x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; |
1150 | ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)", | 1215 | ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d " |
1151 | x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); | 1216 | "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); |
1152 | ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); | 1217 | ubifs_msg("media format: %d (latest is %d)", |
1153 | ubifs_msg("media format %d, latest format %d", | ||
1154 | c->fmt_version, UBIFS_FORMAT_VERSION); | 1218 | c->fmt_version, UBIFS_FORMAT_VERSION); |
1219 | ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); | ||
1220 | ubifs_msg("reserved for root: %llu bytes (%llu KiB)", | ||
1221 | c->report_rp_size, c->report_rp_size >> 10); | ||
1155 | 1222 | ||
1156 | dbg_msg("compiled on: " __DATE__ " at " __TIME__); | 1223 | dbg_msg("compiled on: " __DATE__ " at " __TIME__); |
1157 | dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); | 1224 | dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); |
1158 | dbg_msg("LEB size: %d bytes (%d KiB)", | 1225 | dbg_msg("LEB size: %d bytes (%d KiB)", |
1159 | c->leb_size, c->leb_size / 1024); | 1226 | c->leb_size, c->leb_size >> 10); |
1160 | dbg_msg("data journal heads: %d", | 1227 | dbg_msg("data journal heads: %d", |
1161 | c->jhead_cnt - NONDATA_JHEADS_CNT); | 1228 | c->jhead_cnt - NONDATA_JHEADS_CNT); |
1162 | dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X" | 1229 | dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X" |
@@ -1282,6 +1349,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) | |||
1282 | 1349 | ||
1283 | mutex_lock(&c->umount_mutex); | 1350 | mutex_lock(&c->umount_mutex); |
1284 | c->remounting_rw = 1; | 1351 | c->remounting_rw = 1; |
1352 | c->always_chk_crc = 1; | ||
1285 | 1353 | ||
1286 | /* Check for enough free space */ | 1354 | /* Check for enough free space */ |
1287 | if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) { | 1355 | if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) { |
@@ -1345,20 +1413,20 @@ static int ubifs_remount_rw(struct ubifs_info *c) | |||
1345 | 1413 | ||
1346 | /* Create background thread */ | 1414 | /* Create background thread */ |
1347 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); | 1415 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); |
1348 | if (!c->bgt) | ||
1349 | c->bgt = ERR_PTR(-EINVAL); | ||
1350 | if (IS_ERR(c->bgt)) { | 1416 | if (IS_ERR(c->bgt)) { |
1351 | err = PTR_ERR(c->bgt); | 1417 | err = PTR_ERR(c->bgt); |
1352 | c->bgt = NULL; | 1418 | c->bgt = NULL; |
1353 | ubifs_err("cannot spawn \"%s\", error %d", | 1419 | ubifs_err("cannot spawn \"%s\", error %d", |
1354 | c->bgt_name, err); | 1420 | c->bgt_name, err); |
1355 | return err; | 1421 | goto out; |
1356 | } | 1422 | } |
1357 | wake_up_process(c->bgt); | 1423 | wake_up_process(c->bgt); |
1358 | 1424 | ||
1359 | c->orph_buf = vmalloc(c->leb_size); | 1425 | c->orph_buf = vmalloc(c->leb_size); |
1360 | if (!c->orph_buf) | 1426 | if (!c->orph_buf) { |
1361 | return -ENOMEM; | 1427 | err = -ENOMEM; |
1428 | goto out; | ||
1429 | } | ||
1362 | 1430 | ||
1363 | /* Check for enough log space */ | 1431 | /* Check for enough log space */ |
1364 | lnum = c->lhead_lnum + 1; | 1432 | lnum = c->lhead_lnum + 1; |
@@ -1385,6 +1453,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) | |||
1385 | dbg_gen("re-mounted read-write"); | 1453 | dbg_gen("re-mounted read-write"); |
1386 | c->vfs_sb->s_flags &= ~MS_RDONLY; | 1454 | c->vfs_sb->s_flags &= ~MS_RDONLY; |
1387 | c->remounting_rw = 0; | 1455 | c->remounting_rw = 0; |
1456 | c->always_chk_crc = 0; | ||
1388 | mutex_unlock(&c->umount_mutex); | 1457 | mutex_unlock(&c->umount_mutex); |
1389 | return 0; | 1458 | return 0; |
1390 | 1459 | ||
@@ -1400,6 +1469,7 @@ out: | |||
1400 | c->ileb_buf = NULL; | 1469 | c->ileb_buf = NULL; |
1401 | ubifs_lpt_free(c, 1); | 1470 | ubifs_lpt_free(c, 1); |
1402 | c->remounting_rw = 0; | 1471 | c->remounting_rw = 0; |
1472 | c->always_chk_crc = 0; | ||
1403 | mutex_unlock(&c->umount_mutex); | 1473 | mutex_unlock(&c->umount_mutex); |
1404 | return err; | 1474 | return err; |
1405 | } | 1475 | } |
@@ -1408,12 +1478,9 @@ out: | |||
1408 | * commit_on_unmount - commit the journal when un-mounting. | 1478 | * commit_on_unmount - commit the journal when un-mounting. |
1409 | * @c: UBIFS file-system description object | 1479 | * @c: UBIFS file-system description object |
1410 | * | 1480 | * |
1411 | * This function is called during un-mounting and it commits the journal unless | 1481 | * This function is called during un-mounting and re-mounting, and it commits |
1412 | * the "fast unmount" mode is enabled. It also avoids committing the journal if | 1482 | * the journal unless the "fast unmount" mode is enabled. It also avoids |
1413 | * it contains too few data. | 1483 | * committing the journal if it contains too few data. |
1414 | * | ||
1415 | * Sometimes recovery requires the journal to be committed at least once, and | ||
1416 | * this function takes care about this. | ||
1417 | */ | 1484 | */ |
1418 | static void commit_on_unmount(struct ubifs_info *c) | 1485 | static void commit_on_unmount(struct ubifs_info *c) |
1419 | { | 1486 | { |
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 7634c5970887..d27fd918b9c9 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c | |||
@@ -284,7 +284,7 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c, | |||
284 | } | 284 | } |
285 | 285 | ||
286 | zn = copy_znode(c, znode); | 286 | zn = copy_znode(c, znode); |
287 | if (unlikely(IS_ERR(zn))) | 287 | if (IS_ERR(zn)) |
288 | return zn; | 288 | return zn; |
289 | 289 | ||
290 | if (zbr->len) { | 290 | if (zbr->len) { |
@@ -470,6 +470,10 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type, | |||
470 | if (node_len != len) | 470 | if (node_len != len) |
471 | return 0; | 471 | return 0; |
472 | 472 | ||
473 | if (type == UBIFS_DATA_NODE && !c->always_chk_crc) | ||
474 | if (c->no_chk_data_crc) | ||
475 | return 0; | ||
476 | |||
473 | crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); | 477 | crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); |
474 | node_crc = le32_to_cpu(ch->crc); | 478 | node_crc = le32_to_cpu(ch->crc); |
475 | if (crc != node_crc) | 479 | if (crc != node_crc) |
@@ -1128,7 +1132,7 @@ static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c, | |||
1128 | ubifs_assert(znode == c->zroot.znode); | 1132 | ubifs_assert(znode == c->zroot.znode); |
1129 | znode = dirty_cow_znode(c, &c->zroot); | 1133 | znode = dirty_cow_znode(c, &c->zroot); |
1130 | } | 1134 | } |
1131 | if (unlikely(IS_ERR(znode)) || !p) | 1135 | if (IS_ERR(znode) || !p) |
1132 | break; | 1136 | break; |
1133 | ubifs_assert(path[p - 1] >= 0); | 1137 | ubifs_assert(path[p - 1] >= 0); |
1134 | ubifs_assert(path[p - 1] < znode->child_cnt); | 1138 | ubifs_assert(path[p - 1] < znode->child_cnt); |
@@ -1492,6 +1496,289 @@ out: | |||
1492 | } | 1496 | } |
1493 | 1497 | ||
1494 | /** | 1498 | /** |
1499 | * ubifs_tnc_get_bu_keys - lookup keys for bulk-read. | ||
1500 | * @c: UBIFS file-system description object | ||
1501 | * @bu: bulk-read parameters and results | ||
1502 | * | ||
1503 | * Lookup consecutive data node keys for the same inode that reside | ||
1504 | * consecutively in the same LEB. | ||
1505 | */ | ||
1506 | int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu) | ||
1507 | { | ||
1508 | int n, err = 0, lnum = -1, uninitialized_var(offs); | ||
1509 | int uninitialized_var(len); | ||
1510 | unsigned int block = key_block(c, &bu->key); | ||
1511 | struct ubifs_znode *znode; | ||
1512 | |||
1513 | bu->cnt = 0; | ||
1514 | bu->blk_cnt = 0; | ||
1515 | bu->eof = 0; | ||
1516 | |||
1517 | mutex_lock(&c->tnc_mutex); | ||
1518 | /* Find first key */ | ||
1519 | err = ubifs_lookup_level0(c, &bu->key, &znode, &n); | ||
1520 | if (err < 0) | ||
1521 | goto out; | ||
1522 | if (err) { | ||
1523 | /* Key found */ | ||
1524 | len = znode->zbranch[n].len; | ||
1525 | /* The buffer must be big enough for at least 1 node */ | ||
1526 | if (len > bu->buf_len) { | ||
1527 | err = -EINVAL; | ||
1528 | goto out; | ||
1529 | } | ||
1530 | /* Add this key */ | ||
1531 | bu->zbranch[bu->cnt++] = znode->zbranch[n]; | ||
1532 | bu->blk_cnt += 1; | ||
1533 | lnum = znode->zbranch[n].lnum; | ||
1534 | offs = ALIGN(znode->zbranch[n].offs + len, 8); | ||
1535 | } | ||
1536 | while (1) { | ||
1537 | struct ubifs_zbranch *zbr; | ||
1538 | union ubifs_key *key; | ||
1539 | unsigned int next_block; | ||
1540 | |||
1541 | /* Find next key */ | ||
1542 | err = tnc_next(c, &znode, &n); | ||
1543 | if (err) | ||
1544 | goto out; | ||
1545 | zbr = &znode->zbranch[n]; | ||
1546 | key = &zbr->key; | ||
1547 | /* See if there is another data key for this file */ | ||
1548 | if (key_inum(c, key) != key_inum(c, &bu->key) || | ||
1549 | key_type(c, key) != UBIFS_DATA_KEY) { | ||
1550 | err = -ENOENT; | ||
1551 | goto out; | ||
1552 | } | ||
1553 | if (lnum < 0) { | ||
1554 | /* First key found */ | ||
1555 | lnum = zbr->lnum; | ||
1556 | offs = ALIGN(zbr->offs + zbr->len, 8); | ||
1557 | len = zbr->len; | ||
1558 | if (len > bu->buf_len) { | ||
1559 | err = -EINVAL; | ||
1560 | goto out; | ||
1561 | } | ||
1562 | } else { | ||
1563 | /* | ||
1564 | * The data nodes must be in consecutive positions in | ||
1565 | * the same LEB. | ||
1566 | */ | ||
1567 | if (zbr->lnum != lnum || zbr->offs != offs) | ||
1568 | goto out; | ||
1569 | offs += ALIGN(zbr->len, 8); | ||
1570 | len = ALIGN(len, 8) + zbr->len; | ||
1571 | /* Must not exceed buffer length */ | ||
1572 | if (len > bu->buf_len) | ||
1573 | goto out; | ||
1574 | } | ||
1575 | /* Allow for holes */ | ||
1576 | next_block = key_block(c, key); | ||
1577 | bu->blk_cnt += (next_block - block - 1); | ||
1578 | if (bu->blk_cnt >= UBIFS_MAX_BULK_READ) | ||
1579 | goto out; | ||
1580 | block = next_block; | ||
1581 | /* Add this key */ | ||
1582 | bu->zbranch[bu->cnt++] = *zbr; | ||
1583 | bu->blk_cnt += 1; | ||
1584 | /* See if we have room for more */ | ||
1585 | if (bu->cnt >= UBIFS_MAX_BULK_READ) | ||
1586 | goto out; | ||
1587 | if (bu->blk_cnt >= UBIFS_MAX_BULK_READ) | ||
1588 | goto out; | ||
1589 | } | ||
1590 | out: | ||
1591 | if (err == -ENOENT) { | ||
1592 | bu->eof = 1; | ||
1593 | err = 0; | ||
1594 | } | ||
1595 | bu->gc_seq = c->gc_seq; | ||
1596 | mutex_unlock(&c->tnc_mutex); | ||
1597 | if (err) | ||
1598 | return err; | ||
1599 | /* | ||
1600 | * An enormous hole could cause bulk-read to encompass too many | ||
1601 | * page cache pages, so limit the number here. | ||
1602 | */ | ||
1603 | if (bu->blk_cnt > UBIFS_MAX_BULK_READ) | ||
1604 | bu->blk_cnt = UBIFS_MAX_BULK_READ; | ||
1605 | /* | ||
1606 | * Ensure that bulk-read covers a whole number of page cache | ||
1607 | * pages. | ||
1608 | */ | ||
1609 | if (UBIFS_BLOCKS_PER_PAGE == 1 || | ||
1610 | !(bu->blk_cnt & (UBIFS_BLOCKS_PER_PAGE - 1))) | ||
1611 | return 0; | ||
1612 | if (bu->eof) { | ||
1613 | /* At the end of file we can round up */ | ||
1614 | bu->blk_cnt += UBIFS_BLOCKS_PER_PAGE - 1; | ||
1615 | return 0; | ||
1616 | } | ||
1617 | /* Exclude data nodes that do not make up a whole page cache page */ | ||
1618 | block = key_block(c, &bu->key) + bu->blk_cnt; | ||
1619 | block &= ~(UBIFS_BLOCKS_PER_PAGE - 1); | ||
1620 | while (bu->cnt) { | ||
1621 | if (key_block(c, &bu->zbranch[bu->cnt - 1].key) < block) | ||
1622 | break; | ||
1623 | bu->cnt -= 1; | ||
1624 | } | ||
1625 | return 0; | ||
1626 | } | ||
1627 | |||
1628 | /** | ||
1629 | * read_wbuf - bulk-read from a LEB with a wbuf. | ||
1630 | * @wbuf: wbuf that may overlap the read | ||
1631 | * @buf: buffer into which to read | ||
1632 | * @len: read length | ||
1633 | * @lnum: LEB number from which to read | ||
1634 | * @offs: offset from which to read | ||
1635 | * | ||
1636 | * This functions returns %0 on success or a negative error code on failure. | ||
1637 | */ | ||
1638 | static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum, | ||
1639 | int offs) | ||
1640 | { | ||
1641 | const struct ubifs_info *c = wbuf->c; | ||
1642 | int rlen, overlap; | ||
1643 | |||
1644 | dbg_io("LEB %d:%d, length %d", lnum, offs, len); | ||
1645 | ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); | ||
1646 | ubifs_assert(!(offs & 7) && offs < c->leb_size); | ||
1647 | ubifs_assert(offs + len <= c->leb_size); | ||
1648 | |||
1649 | spin_lock(&wbuf->lock); | ||
1650 | overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs); | ||
1651 | if (!overlap) { | ||
1652 | /* We may safely unlock the write-buffer and read the data */ | ||
1653 | spin_unlock(&wbuf->lock); | ||
1654 | return ubi_read(c->ubi, lnum, buf, offs, len); | ||
1655 | } | ||
1656 | |||
1657 | /* Don't read under wbuf */ | ||
1658 | rlen = wbuf->offs - offs; | ||
1659 | if (rlen < 0) | ||
1660 | rlen = 0; | ||
1661 | |||
1662 | /* Copy the rest from the write-buffer */ | ||
1663 | memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen); | ||
1664 | spin_unlock(&wbuf->lock); | ||
1665 | |||
1666 | if (rlen > 0) | ||
1667 | /* Read everything that goes before write-buffer */ | ||
1668 | return ubi_read(c->ubi, lnum, buf, offs, rlen); | ||
1669 | |||
1670 | return 0; | ||
1671 | } | ||
1672 | |||
1673 | /** | ||
1674 | * validate_data_node - validate data nodes for bulk-read. | ||
1675 | * @c: UBIFS file-system description object | ||
1676 | * @buf: buffer containing data node to validate | ||
1677 | * @zbr: zbranch of data node to validate | ||
1678 | * | ||
1679 | * This functions returns %0 on success or a negative error code on failure. | ||
1680 | */ | ||
1681 | static int validate_data_node(struct ubifs_info *c, void *buf, | ||
1682 | struct ubifs_zbranch *zbr) | ||
1683 | { | ||
1684 | union ubifs_key key1; | ||
1685 | struct ubifs_ch *ch = buf; | ||
1686 | int err, len; | ||
1687 | |||
1688 | if (ch->node_type != UBIFS_DATA_NODE) { | ||
1689 | ubifs_err("bad node type (%d but expected %d)", | ||
1690 | ch->node_type, UBIFS_DATA_NODE); | ||
1691 | goto out_err; | ||
1692 | } | ||
1693 | |||
1694 | err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0); | ||
1695 | if (err) { | ||
1696 | ubifs_err("expected node type %d", UBIFS_DATA_NODE); | ||
1697 | goto out; | ||
1698 | } | ||
1699 | |||
1700 | len = le32_to_cpu(ch->len); | ||
1701 | if (len != zbr->len) { | ||
1702 | ubifs_err("bad node length %d, expected %d", len, zbr->len); | ||
1703 | goto out_err; | ||
1704 | } | ||
1705 | |||
1706 | /* Make sure the key of the read node is correct */ | ||
1707 | key_read(c, buf + UBIFS_KEY_OFFSET, &key1); | ||
1708 | if (!keys_eq(c, &zbr->key, &key1)) { | ||
1709 | ubifs_err("bad key in node at LEB %d:%d", | ||
1710 | zbr->lnum, zbr->offs); | ||
1711 | dbg_tnc("looked for key %s found node's key %s", | ||
1712 | DBGKEY(&zbr->key), DBGKEY1(&key1)); | ||
1713 | goto out_err; | ||
1714 | } | ||
1715 | |||
1716 | return 0; | ||
1717 | |||
1718 | out_err: | ||
1719 | err = -EINVAL; | ||
1720 | out: | ||
1721 | ubifs_err("bad node at LEB %d:%d", zbr->lnum, zbr->offs); | ||
1722 | dbg_dump_node(c, buf); | ||
1723 | dbg_dump_stack(); | ||
1724 | return err; | ||
1725 | } | ||
1726 | |||
1727 | /** | ||
1728 | * ubifs_tnc_bulk_read - read a number of data nodes in one go. | ||
1729 | * @c: UBIFS file-system description object | ||
1730 | * @bu: bulk-read parameters and results | ||
1731 | * | ||
1732 | * This functions reads and validates the data nodes that were identified by the | ||
1733 | * 'ubifs_tnc_get_bu_keys()' function. This functions returns %0 on success, | ||
1734 | * -EAGAIN to indicate a race with GC, or another negative error code on | ||
1735 | * failure. | ||
1736 | */ | ||
1737 | int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) | ||
1738 | { | ||
1739 | int lnum = bu->zbranch[0].lnum, offs = bu->zbranch[0].offs, len, err, i; | ||
1740 | struct ubifs_wbuf *wbuf; | ||
1741 | void *buf; | ||
1742 | |||
1743 | len = bu->zbranch[bu->cnt - 1].offs; | ||
1744 | len += bu->zbranch[bu->cnt - 1].len - offs; | ||
1745 | if (len > bu->buf_len) { | ||
1746 | ubifs_err("buffer too small %d vs %d", bu->buf_len, len); | ||
1747 | return -EINVAL; | ||
1748 | } | ||
1749 | |||
1750 | /* Do the read */ | ||
1751 | wbuf = ubifs_get_wbuf(c, lnum); | ||
1752 | if (wbuf) | ||
1753 | err = read_wbuf(wbuf, bu->buf, len, lnum, offs); | ||
1754 | else | ||
1755 | err = ubi_read(c->ubi, lnum, bu->buf, offs, len); | ||
1756 | |||
1757 | /* Check for a race with GC */ | ||
1758 | if (maybe_leb_gced(c, lnum, bu->gc_seq)) | ||
1759 | return -EAGAIN; | ||
1760 | |||
1761 | if (err && err != -EBADMSG) { | ||
1762 | ubifs_err("failed to read from LEB %d:%d, error %d", | ||
1763 | lnum, offs, err); | ||
1764 | dbg_dump_stack(); | ||
1765 | dbg_tnc("key %s", DBGKEY(&bu->key)); | ||
1766 | return err; | ||
1767 | } | ||
1768 | |||
1769 | /* Validate the nodes read */ | ||
1770 | buf = bu->buf; | ||
1771 | for (i = 0; i < bu->cnt; i++) { | ||
1772 | err = validate_data_node(c, buf, &bu->zbranch[i]); | ||
1773 | if (err) | ||
1774 | return err; | ||
1775 | buf = buf + ALIGN(bu->zbranch[i].len, 8); | ||
1776 | } | ||
1777 | |||
1778 | return 0; | ||
1779 | } | ||
1780 | |||
1781 | /** | ||
1495 | * do_lookup_nm- look up a "hashed" node. | 1782 | * do_lookup_nm- look up a "hashed" node. |
1496 | * @c: UBIFS file-system description object | 1783 | * @c: UBIFS file-system description object |
1497 | * @key: node key to lookup | 1784 | * @key: node key to lookup |
@@ -1675,7 +1962,7 @@ static int tnc_insert(struct ubifs_info *c, struct ubifs_znode *znode, | |||
1675 | { | 1962 | { |
1676 | struct ubifs_znode *zn, *zi, *zp; | 1963 | struct ubifs_znode *zn, *zi, *zp; |
1677 | int i, keep, move, appending = 0; | 1964 | int i, keep, move, appending = 0; |
1678 | union ubifs_key *key = &zbr->key; | 1965 | union ubifs_key *key = &zbr->key, *key1; |
1679 | 1966 | ||
1680 | ubifs_assert(n >= 0 && n <= c->fanout); | 1967 | ubifs_assert(n >= 0 && n <= c->fanout); |
1681 | 1968 | ||
@@ -1716,20 +2003,33 @@ again: | |||
1716 | zn->level = znode->level; | 2003 | zn->level = znode->level; |
1717 | 2004 | ||
1718 | /* Decide where to split */ | 2005 | /* Decide where to split */ |
1719 | if (znode->level == 0 && n == c->fanout && | 2006 | if (znode->level == 0 && key_type(c, key) == UBIFS_DATA_KEY) { |
1720 | key_type(c, key) == UBIFS_DATA_KEY) { | 2007 | /* Try not to split consecutive data keys */ |
1721 | union ubifs_key *key1; | 2008 | if (n == c->fanout) { |
1722 | 2009 | key1 = &znode->zbranch[n - 1].key; | |
1723 | /* | 2010 | if (key_inum(c, key1) == key_inum(c, key) && |
1724 | * If this is an inode which is being appended - do not split | 2011 | key_type(c, key1) == UBIFS_DATA_KEY) |
1725 | * it because no other zbranches can be inserted between | 2012 | appending = 1; |
1726 | * zbranches of consecutive data nodes anyway. | 2013 | } else |
1727 | */ | 2014 | goto check_split; |
1728 | key1 = &znode->zbranch[n - 1].key; | 2015 | } else if (appending && n != c->fanout) { |
1729 | if (key_inum(c, key1) == key_inum(c, key) && | 2016 | /* Try not to split consecutive data keys */ |
1730 | key_type(c, key1) == UBIFS_DATA_KEY && | 2017 | appending = 0; |
1731 | key_block(c, key1) == key_block(c, key) - 1) | 2018 | check_split: |
1732 | appending = 1; | 2019 | if (n >= (c->fanout + 1) / 2) { |
2020 | key1 = &znode->zbranch[0].key; | ||
2021 | if (key_inum(c, key1) == key_inum(c, key) && | ||
2022 | key_type(c, key1) == UBIFS_DATA_KEY) { | ||
2023 | key1 = &znode->zbranch[n].key; | ||
2024 | if (key_inum(c, key1) != key_inum(c, key) || | ||
2025 | key_type(c, key1) != UBIFS_DATA_KEY) { | ||
2026 | keep = n; | ||
2027 | move = c->fanout - keep; | ||
2028 | zi = znode; | ||
2029 | goto do_split; | ||
2030 | } | ||
2031 | } | ||
2032 | } | ||
1733 | } | 2033 | } |
1734 | 2034 | ||
1735 | if (appending) { | 2035 | if (appending) { |
@@ -1759,6 +2059,8 @@ again: | |||
1759 | zbr->znode->parent = zn; | 2059 | zbr->znode->parent = zn; |
1760 | } | 2060 | } |
1761 | 2061 | ||
2062 | do_split: | ||
2063 | |||
1762 | __set_bit(DIRTY_ZNODE, &zn->flags); | 2064 | __set_bit(DIRTY_ZNODE, &zn->flags); |
1763 | atomic_long_inc(&c->dirty_zn_cnt); | 2065 | atomic_long_inc(&c->dirty_zn_cnt); |
1764 | 2066 | ||
@@ -1785,14 +2087,11 @@ again: | |||
1785 | 2087 | ||
1786 | /* Insert new znode (produced by spitting) into the parent */ | 2088 | /* Insert new znode (produced by spitting) into the parent */ |
1787 | if (zp) { | 2089 | if (zp) { |
1788 | i = n; | 2090 | if (n == 0 && zi == znode && znode->iip == 0) |
2091 | correct_parent_keys(c, znode); | ||
2092 | |||
1789 | /* Locate insertion point */ | 2093 | /* Locate insertion point */ |
1790 | n = znode->iip + 1; | 2094 | n = znode->iip + 1; |
1791 | if (appending && n != c->fanout) | ||
1792 | appending = 0; | ||
1793 | |||
1794 | if (i == 0 && zi == znode && znode->iip == 0) | ||
1795 | correct_parent_keys(c, znode); | ||
1796 | 2095 | ||
1797 | /* Tail recursion */ | 2096 | /* Tail recursion */ |
1798 | zbr->key = zn->zbranch[0].key; | 2097 | zbr->key = zn->zbranch[0].key; |
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c index a25c1cc1f8d9..b48db999903e 100644 --- a/fs/ubifs/tnc_misc.c +++ b/fs/ubifs/tnc_misc.c | |||
@@ -480,8 +480,8 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, | |||
480 | } | 480 | } |
481 | 481 | ||
482 | /* Make sure the key of the read node is correct */ | 482 | /* Make sure the key of the read node is correct */ |
483 | key_read(c, key, &key1); | 483 | key_read(c, node + UBIFS_KEY_OFFSET, &key1); |
484 | if (memcmp(node + UBIFS_KEY_OFFSET, &key1, c->key_len)) { | 484 | if (!keys_eq(c, key, &key1)) { |
485 | ubifs_err("bad key in node at LEB %d:%d", | 485 | ubifs_err("bad key in node at LEB %d:%d", |
486 | zbr->lnum, zbr->offs); | 486 | zbr->lnum, zbr->offs); |
487 | dbg_tnc("looked for key %s found node's key %s", | 487 | dbg_tnc("looked for key %s found node's key %s", |
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index a9ecbd9af20d..0b378042a3a2 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h | |||
@@ -75,7 +75,6 @@ | |||
75 | */ | 75 | */ |
76 | #define UBIFS_BLOCK_SIZE 4096 | 76 | #define UBIFS_BLOCK_SIZE 4096 |
77 | #define UBIFS_BLOCK_SHIFT 12 | 77 | #define UBIFS_BLOCK_SHIFT 12 |
78 | #define UBIFS_BLOCK_MASK 0x00000FFF | ||
79 | 78 | ||
80 | /* UBIFS padding byte pattern (must not be first or last byte of node magic) */ | 79 | /* UBIFS padding byte pattern (must not be first or last byte of node magic) */ |
81 | #define UBIFS_PADDING_BYTE 0xCE | 80 | #define UBIFS_PADDING_BYTE 0xCE |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 17c620b93eec..a7bd32fa15b9 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -142,6 +142,9 @@ | |||
142 | /* Maximum expected tree height for use by bottom_up_buf */ | 142 | /* Maximum expected tree height for use by bottom_up_buf */ |
143 | #define BOTTOM_UP_HEIGHT 64 | 143 | #define BOTTOM_UP_HEIGHT 64 |
144 | 144 | ||
145 | /* Maximum number of data nodes to bulk-read */ | ||
146 | #define UBIFS_MAX_BULK_READ 32 | ||
147 | |||
145 | /* | 148 | /* |
146 | * Lockdep classes for UBIFS inode @ui_mutex. | 149 | * Lockdep classes for UBIFS inode @ui_mutex. |
147 | */ | 150 | */ |
@@ -328,9 +331,10 @@ struct ubifs_gced_idx_leb { | |||
328 | * this inode | 331 | * this inode |
329 | * @dirty: non-zero if the inode is dirty | 332 | * @dirty: non-zero if the inode is dirty |
330 | * @xattr: non-zero if this is an extended attribute inode | 333 | * @xattr: non-zero if this is an extended attribute inode |
334 | * @bulk_read: non-zero if bulk-read should be used | ||
331 | * @ui_mutex: serializes inode write-back with the rest of VFS operations, | 335 | * @ui_mutex: serializes inode write-back with the rest of VFS operations, |
332 | * serializes "clean <-> dirty" state changes, protects @dirty, | 336 | * serializes "clean <-> dirty" state changes, serializes bulk-read, |
333 | * @ui_size, and @xattr_size | 337 | * protects @dirty, @bulk_read, @ui_size, and @xattr_size |
334 | * @ui_lock: protects @synced_i_size | 338 | * @ui_lock: protects @synced_i_size |
335 | * @synced_i_size: synchronized size of inode, i.e. the value of inode size | 339 | * @synced_i_size: synchronized size of inode, i.e. the value of inode size |
336 | * currently stored on the flash; used only for regular file | 340 | * currently stored on the flash; used only for regular file |
@@ -338,6 +342,8 @@ struct ubifs_gced_idx_leb { | |||
338 | * @ui_size: inode size used by UBIFS when writing to flash | 342 | * @ui_size: inode size used by UBIFS when writing to flash |
339 | * @flags: inode flags (@UBIFS_COMPR_FL, etc) | 343 | * @flags: inode flags (@UBIFS_COMPR_FL, etc) |
340 | * @compr_type: default compression type used for this inode | 344 | * @compr_type: default compression type used for this inode |
345 | * @last_page_read: page number of last page read (for bulk read) | ||
346 | * @read_in_a_row: number of consecutive pages read in a row (for bulk read) | ||
341 | * @data_len: length of the data attached to the inode | 347 | * @data_len: length of the data attached to the inode |
342 | * @data: inode's data | 348 | * @data: inode's data |
343 | * | 349 | * |
@@ -379,12 +385,15 @@ struct ubifs_inode { | |||
379 | unsigned int xattr_names; | 385 | unsigned int xattr_names; |
380 | unsigned int dirty:1; | 386 | unsigned int dirty:1; |
381 | unsigned int xattr:1; | 387 | unsigned int xattr:1; |
388 | unsigned int bulk_read:1; | ||
382 | struct mutex ui_mutex; | 389 | struct mutex ui_mutex; |
383 | spinlock_t ui_lock; | 390 | spinlock_t ui_lock; |
384 | loff_t synced_i_size; | 391 | loff_t synced_i_size; |
385 | loff_t ui_size; | 392 | loff_t ui_size; |
386 | int flags; | 393 | int flags; |
387 | int compr_type; | 394 | int compr_type; |
395 | pgoff_t last_page_read; | ||
396 | pgoff_t read_in_a_row; | ||
388 | int data_len; | 397 | int data_len; |
389 | void *data; | 398 | void *data; |
390 | }; | 399 | }; |
@@ -698,8 +707,8 @@ struct ubifs_jhead { | |||
698 | * struct ubifs_zbranch - key/coordinate/length branch stored in znodes. | 707 | * struct ubifs_zbranch - key/coordinate/length branch stored in znodes. |
699 | * @key: key | 708 | * @key: key |
700 | * @znode: znode address in memory | 709 | * @znode: znode address in memory |
701 | * @lnum: LEB number of the indexing node | 710 | * @lnum: LEB number of the target node (indexing node or data node) |
702 | * @offs: offset of the indexing node within @lnum | 711 | * @offs: target node offset within @lnum |
703 | * @len: target node length | 712 | * @len: target node length |
704 | */ | 713 | */ |
705 | struct ubifs_zbranch { | 714 | struct ubifs_zbranch { |
@@ -744,6 +753,28 @@ struct ubifs_znode { | |||
744 | }; | 753 | }; |
745 | 754 | ||
746 | /** | 755 | /** |
756 | * struct bu_info - bulk-read information | ||
757 | * @key: first data node key | ||
758 | * @zbranch: zbranches of data nodes to bulk read | ||
759 | * @buf: buffer to read into | ||
760 | * @buf_len: buffer length | ||
761 | * @gc_seq: GC sequence number to detect races with GC | ||
762 | * @cnt: number of data nodes for bulk read | ||
763 | * @blk_cnt: number of data blocks including holes | ||
764 | * @oef: end of file reached | ||
765 | */ | ||
766 | struct bu_info { | ||
767 | union ubifs_key key; | ||
768 | struct ubifs_zbranch zbranch[UBIFS_MAX_BULK_READ]; | ||
769 | void *buf; | ||
770 | int buf_len; | ||
771 | int gc_seq; | ||
772 | int cnt; | ||
773 | int blk_cnt; | ||
774 | int eof; | ||
775 | }; | ||
776 | |||
777 | /** | ||
747 | * struct ubifs_node_range - node length range description data structure. | 778 | * struct ubifs_node_range - node length range description data structure. |
748 | * @len: fixed node length | 779 | * @len: fixed node length |
749 | * @min_len: minimum possible node length | 780 | * @min_len: minimum possible node length |
@@ -862,9 +893,13 @@ struct ubifs_orphan { | |||
862 | /** | 893 | /** |
863 | * struct ubifs_mount_opts - UBIFS-specific mount options information. | 894 | * struct ubifs_mount_opts - UBIFS-specific mount options information. |
864 | * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) | 895 | * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) |
896 | * @bulk_read: enable bulk-reads | ||
897 | * @chk_data_crc: check CRCs when reading data nodes | ||
865 | */ | 898 | */ |
866 | struct ubifs_mount_opts { | 899 | struct ubifs_mount_opts { |
867 | unsigned int unmount_mode:2; | 900 | unsigned int unmount_mode:2; |
901 | unsigned int bulk_read:2; | ||
902 | unsigned int chk_data_crc:2; | ||
868 | }; | 903 | }; |
869 | 904 | ||
870 | /** | 905 | /** |
@@ -905,13 +940,12 @@ struct ubifs_mount_opts { | |||
905 | * @cmt_state: commit state | 940 | * @cmt_state: commit state |
906 | * @cs_lock: commit state lock | 941 | * @cs_lock: commit state lock |
907 | * @cmt_wq: wait queue to sleep on if the log is full and a commit is running | 942 | * @cmt_wq: wait queue to sleep on if the log is full and a commit is running |
943 | * | ||
908 | * @fast_unmount: do not run journal commit before un-mounting | 944 | * @fast_unmount: do not run journal commit before un-mounting |
909 | * @big_lpt: flag that LPT is too big to write whole during commit | 945 | * @big_lpt: flag that LPT is too big to write whole during commit |
910 | * @check_lpt_free: flag that indicates LPT GC may be needed | 946 | * @no_chk_data_crc: do not check CRCs when reading data nodes (except during |
911 | * @nospace: non-zero if the file-system does not have flash space (used as | 947 | * recovery) |
912 | * optimization) | 948 | * @bulk_read: enable bulk-reads |
913 | * @nospace_rp: the same as @nospace, but additionally means that even reserved | ||
914 | * pool is full | ||
915 | * | 949 | * |
916 | * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and | 950 | * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and |
917 | * @calc_idx_sz | 951 | * @calc_idx_sz |
@@ -935,6 +969,7 @@ struct ubifs_mount_opts { | |||
935 | * @mst_node: master node | 969 | * @mst_node: master node |
936 | * @mst_offs: offset of valid master node | 970 | * @mst_offs: offset of valid master node |
937 | * @mst_mutex: protects the master node area, @mst_node, and @mst_offs | 971 | * @mst_mutex: protects the master node area, @mst_node, and @mst_offs |
972 | * @bulk_read_buf_size: buffer size for bulk-reads | ||
938 | * | 973 | * |
939 | * @log_lebs: number of logical eraseblocks in the log | 974 | * @log_lebs: number of logical eraseblocks in the log |
940 | * @log_bytes: log size in bytes | 975 | * @log_bytes: log size in bytes |
@@ -977,12 +1012,17 @@ struct ubifs_mount_opts { | |||
977 | * but which still have to be taken into account because | 1012 | * but which still have to be taken into account because |
978 | * the index has not been committed so far | 1013 | * the index has not been committed so far |
979 | * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, | 1014 | * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, |
980 | * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, and @lst; | 1015 | * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst, |
1016 | * @nospace, and @nospace_rp; | ||
981 | * @min_idx_lebs: minimum number of LEBs required for the index | 1017 | * @min_idx_lebs: minimum number of LEBs required for the index |
982 | * @old_idx_sz: size of index on flash | 1018 | * @old_idx_sz: size of index on flash |
983 | * @calc_idx_sz: temporary variable which is used to calculate new index size | 1019 | * @calc_idx_sz: temporary variable which is used to calculate new index size |
984 | * (contains accurate new index size at end of TNC commit start) | 1020 | * (contains accurate new index size at end of TNC commit start) |
985 | * @lst: lprops statistics | 1021 | * @lst: lprops statistics |
1022 | * @nospace: non-zero if the file-system does not have flash space (used as | ||
1023 | * optimization) | ||
1024 | * @nospace_rp: the same as @nospace, but additionally means that even reserved | ||
1025 | * pool is full | ||
986 | * | 1026 | * |
987 | * @page_budget: budget for a page | 1027 | * @page_budget: budget for a page |
988 | * @inode_budget: budget for an inode | 1028 | * @inode_budget: budget for an inode |
@@ -1061,6 +1101,7 @@ struct ubifs_mount_opts { | |||
1061 | * @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab | 1101 | * @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab |
1062 | * @dirty_nn_cnt: number of dirty nnodes | 1102 | * @dirty_nn_cnt: number of dirty nnodes |
1063 | * @dirty_pn_cnt: number of dirty pnodes | 1103 | * @dirty_pn_cnt: number of dirty pnodes |
1104 | * @check_lpt_free: flag that indicates LPT GC may be needed | ||
1064 | * @lpt_sz: LPT size | 1105 | * @lpt_sz: LPT size |
1065 | * @lpt_nod_buf: buffer for an on-flash nnode or pnode | 1106 | * @lpt_nod_buf: buffer for an on-flash nnode or pnode |
1066 | * @lpt_buf: buffer of LEB size used by LPT | 1107 | * @lpt_buf: buffer of LEB size used by LPT |
@@ -1102,6 +1143,7 @@ struct ubifs_mount_opts { | |||
1102 | * @rcvrd_mst_node: recovered master node to write when mounting ro to rw | 1143 | * @rcvrd_mst_node: recovered master node to write when mounting ro to rw |
1103 | * @size_tree: inode size information for recovery | 1144 | * @size_tree: inode size information for recovery |
1104 | * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY) | 1145 | * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY) |
1146 | * @always_chk_crc: always check CRCs (while mounting and remounting rw) | ||
1105 | * @mount_opts: UBIFS-specific mount options | 1147 | * @mount_opts: UBIFS-specific mount options |
1106 | * | 1148 | * |
1107 | * @dbg_buf: a buffer of LEB size used for debugging purposes | 1149 | * @dbg_buf: a buffer of LEB size used for debugging purposes |
@@ -1146,11 +1188,11 @@ struct ubifs_info { | |||
1146 | int cmt_state; | 1188 | int cmt_state; |
1147 | spinlock_t cs_lock; | 1189 | spinlock_t cs_lock; |
1148 | wait_queue_head_t cmt_wq; | 1190 | wait_queue_head_t cmt_wq; |
1191 | |||
1149 | unsigned int fast_unmount:1; | 1192 | unsigned int fast_unmount:1; |
1150 | unsigned int big_lpt:1; | 1193 | unsigned int big_lpt:1; |
1151 | unsigned int check_lpt_free:1; | 1194 | unsigned int no_chk_data_crc:1; |
1152 | unsigned int nospace:1; | 1195 | unsigned int bulk_read:1; |
1153 | unsigned int nospace_rp:1; | ||
1154 | 1196 | ||
1155 | struct mutex tnc_mutex; | 1197 | struct mutex tnc_mutex; |
1156 | struct ubifs_zbranch zroot; | 1198 | struct ubifs_zbranch zroot; |
@@ -1175,6 +1217,7 @@ struct ubifs_info { | |||
1175 | struct ubifs_mst_node *mst_node; | 1217 | struct ubifs_mst_node *mst_node; |
1176 | int mst_offs; | 1218 | int mst_offs; |
1177 | struct mutex mst_mutex; | 1219 | struct mutex mst_mutex; |
1220 | int bulk_read_buf_size; | ||
1178 | 1221 | ||
1179 | int log_lebs; | 1222 | int log_lebs; |
1180 | long long log_bytes; | 1223 | long long log_bytes; |
@@ -1218,6 +1261,8 @@ struct ubifs_info { | |||
1218 | unsigned long long old_idx_sz; | 1261 | unsigned long long old_idx_sz; |
1219 | unsigned long long calc_idx_sz; | 1262 | unsigned long long calc_idx_sz; |
1220 | struct ubifs_lp_stats lst; | 1263 | struct ubifs_lp_stats lst; |
1264 | unsigned int nospace:1; | ||
1265 | unsigned int nospace_rp:1; | ||
1221 | 1266 | ||
1222 | int page_budget; | 1267 | int page_budget; |
1223 | int inode_budget; | 1268 | int inode_budget; |
@@ -1294,6 +1339,7 @@ struct ubifs_info { | |||
1294 | int lpt_drty_flgs; | 1339 | int lpt_drty_flgs; |
1295 | int dirty_nn_cnt; | 1340 | int dirty_nn_cnt; |
1296 | int dirty_pn_cnt; | 1341 | int dirty_pn_cnt; |
1342 | int check_lpt_free; | ||
1297 | long long lpt_sz; | 1343 | long long lpt_sz; |
1298 | void *lpt_nod_buf; | 1344 | void *lpt_nod_buf; |
1299 | void *lpt_buf; | 1345 | void *lpt_buf; |
@@ -1335,6 +1381,7 @@ struct ubifs_info { | |||
1335 | struct ubifs_mst_node *rcvrd_mst_node; | 1381 | struct ubifs_mst_node *rcvrd_mst_node; |
1336 | struct rb_root size_tree; | 1382 | struct rb_root size_tree; |
1337 | int remounting_rw; | 1383 | int remounting_rw; |
1384 | int always_chk_crc; | ||
1338 | struct ubifs_mount_opts mount_opts; | 1385 | struct ubifs_mount_opts mount_opts; |
1339 | 1386 | ||
1340 | #ifdef CONFIG_UBIFS_FS_DEBUG | 1387 | #ifdef CONFIG_UBIFS_FS_DEBUG |
@@ -1347,6 +1394,12 @@ struct ubifs_info { | |||
1347 | unsigned long fail_timeout; | 1394 | unsigned long fail_timeout; |
1348 | unsigned int fail_cnt; | 1395 | unsigned int fail_cnt; |
1349 | unsigned int fail_cnt_max; | 1396 | unsigned int fail_cnt_max; |
1397 | long long chk_lpt_sz; | ||
1398 | long long chk_lpt_sz2; | ||
1399 | long long chk_lpt_wastage; | ||
1400 | int chk_lpt_lebs; | ||
1401 | int new_nhead_lnum; | ||
1402 | int new_nhead_offs; | ||
1350 | #endif | 1403 | #endif |
1351 | }; | 1404 | }; |
1352 | 1405 | ||
@@ -1377,7 +1430,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, | |||
1377 | int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, | 1430 | int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, |
1378 | int offs, int dtype); | 1431 | int offs, int dtype); |
1379 | int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, | 1432 | int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, |
1380 | int offs, int quiet); | 1433 | int offs, int quiet, int chk_crc); |
1381 | void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); | 1434 | void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); |
1382 | void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last); | 1435 | void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last); |
1383 | int ubifs_io_init(struct ubifs_info *c); | 1436 | int ubifs_io_init(struct ubifs_info *c); |
@@ -1490,6 +1543,8 @@ void destroy_old_idx(struct ubifs_info *c); | |||
1490 | int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level, | 1543 | int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level, |
1491 | int lnum, int offs); | 1544 | int lnum, int offs); |
1492 | int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode); | 1545 | int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode); |
1546 | int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu); | ||
1547 | int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu); | ||
1493 | 1548 | ||
1494 | /* tnc_misc.c */ | 1549 | /* tnc_misc.c */ |
1495 | struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr, | 1550 | struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr, |
@@ -1586,12 +1641,10 @@ int ubifs_lpt_post_commit(struct ubifs_info *c); | |||
1586 | void ubifs_lpt_free(struct ubifs_info *c, int wr_only); | 1641 | void ubifs_lpt_free(struct ubifs_info *c, int wr_only); |
1587 | 1642 | ||
1588 | /* lprops.c */ | 1643 | /* lprops.c */ |
1589 | void ubifs_get_lprops(struct ubifs_info *c); | ||
1590 | const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, | 1644 | const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, |
1591 | const struct ubifs_lprops *lp, | 1645 | const struct ubifs_lprops *lp, |
1592 | int free, int dirty, int flags, | 1646 | int free, int dirty, int flags, |
1593 | int idx_gc_cnt); | 1647 | int idx_gc_cnt); |
1594 | void ubifs_release_lprops(struct ubifs_info *c); | ||
1595 | void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats); | 1648 | void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats); |
1596 | void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, | 1649 | void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, |
1597 | int cat); | 1650 | int cat); |
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 649bec78b645..cfd31e229c89 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c | |||
@@ -446,7 +446,7 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
446 | int type; | 446 | int type; |
447 | 447 | ||
448 | xent = ubifs_tnc_next_ent(c, &key, &nm); | 448 | xent = ubifs_tnc_next_ent(c, &key, &nm); |
449 | if (unlikely(IS_ERR(xent))) { | 449 | if (IS_ERR(xent)) { |
450 | err = PTR_ERR(xent); | 450 | err = PTR_ERR(xent); |
451 | break; | 451 | break; |
452 | } | 452 | } |
diff --git a/fs/udf/namei.c b/fs/udf/namei.c index d3231947db19..082409cd4b8a 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c | |||
@@ -142,7 +142,7 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, | |||
142 | } | 142 | } |
143 | 143 | ||
144 | static struct fileIdentDesc *udf_find_entry(struct inode *dir, | 144 | static struct fileIdentDesc *udf_find_entry(struct inode *dir, |
145 | struct dentry *dentry, | 145 | struct qstr *child, |
146 | struct udf_fileident_bh *fibh, | 146 | struct udf_fileident_bh *fibh, |
147 | struct fileIdentDesc *cfi) | 147 | struct fileIdentDesc *cfi) |
148 | { | 148 | { |
@@ -159,8 +159,8 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, | |||
159 | sector_t offset; | 159 | sector_t offset; |
160 | struct extent_position epos = {}; | 160 | struct extent_position epos = {}; |
161 | struct udf_inode_info *dinfo = UDF_I(dir); | 161 | struct udf_inode_info *dinfo = UDF_I(dir); |
162 | int isdotdot = dentry->d_name.len == 2 && | 162 | int isdotdot = child->len == 2 && |
163 | dentry->d_name.name[0] == '.' && dentry->d_name.name[1] == '.'; | 163 | child->name[0] == '.' && child->name[1] == '.'; |
164 | 164 | ||
165 | size = udf_ext0_offset(dir) + dir->i_size; | 165 | size = udf_ext0_offset(dir) + dir->i_size; |
166 | f_pos = udf_ext0_offset(dir); | 166 | f_pos = udf_ext0_offset(dir); |
@@ -238,8 +238,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, | |||
238 | continue; | 238 | continue; |
239 | 239 | ||
240 | flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); | 240 | flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); |
241 | if (flen && udf_match(flen, fname, dentry->d_name.len, | 241 | if (flen && udf_match(flen, fname, child->len, child->name)) |
242 | dentry->d_name.name)) | ||
243 | goto out_ok; | 242 | goto out_ok; |
244 | } | 243 | } |
245 | 244 | ||
@@ -283,7 +282,7 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, | |||
283 | } else | 282 | } else |
284 | #endif /* UDF_RECOVERY */ | 283 | #endif /* UDF_RECOVERY */ |
285 | 284 | ||
286 | if (udf_find_entry(dir, dentry, &fibh, &cfi)) { | 285 | if (udf_find_entry(dir, &dentry->d_name, &fibh, &cfi)) { |
287 | if (fibh.sbh != fibh.ebh) | 286 | if (fibh.sbh != fibh.ebh) |
288 | brelse(fibh.ebh); | 287 | brelse(fibh.ebh); |
289 | brelse(fibh.sbh); | 288 | brelse(fibh.sbh); |
@@ -783,7 +782,7 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry) | |||
783 | 782 | ||
784 | retval = -ENOENT; | 783 | retval = -ENOENT; |
785 | lock_kernel(); | 784 | lock_kernel(); |
786 | fi = udf_find_entry(dir, dentry, &fibh, &cfi); | 785 | fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); |
787 | if (!fi) | 786 | if (!fi) |
788 | goto out; | 787 | goto out; |
789 | 788 | ||
@@ -829,7 +828,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry) | |||
829 | 828 | ||
830 | retval = -ENOENT; | 829 | retval = -ENOENT; |
831 | lock_kernel(); | 830 | lock_kernel(); |
832 | fi = udf_find_entry(dir, dentry, &fibh, &cfi); | 831 | fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); |
833 | if (!fi) | 832 | if (!fi) |
834 | goto out; | 833 | goto out; |
835 | 834 | ||
@@ -1113,7 +1112,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1113 | struct udf_inode_info *old_iinfo = UDF_I(old_inode); | 1112 | struct udf_inode_info *old_iinfo = UDF_I(old_inode); |
1114 | 1113 | ||
1115 | lock_kernel(); | 1114 | lock_kernel(); |
1116 | ofi = udf_find_entry(old_dir, old_dentry, &ofibh, &ocfi); | 1115 | ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); |
1117 | if (ofi) { | 1116 | if (ofi) { |
1118 | if (ofibh.sbh != ofibh.ebh) | 1117 | if (ofibh.sbh != ofibh.ebh) |
1119 | brelse(ofibh.ebh); | 1118 | brelse(ofibh.ebh); |
@@ -1124,7 +1123,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1124 | != old_inode->i_ino) | 1123 | != old_inode->i_ino) |
1125 | goto end_rename; | 1124 | goto end_rename; |
1126 | 1125 | ||
1127 | nfi = udf_find_entry(new_dir, new_dentry, &nfibh, &ncfi); | 1126 | nfi = udf_find_entry(new_dir, &new_dentry->d_name, &nfibh, &ncfi); |
1128 | if (nfi) { | 1127 | if (nfi) { |
1129 | if (!new_inode) { | 1128 | if (!new_inode) { |
1130 | if (nfibh.sbh != nfibh.ebh) | 1129 | if (nfibh.sbh != nfibh.ebh) |
@@ -1192,7 +1191,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1192 | udf_write_fi(new_dir, &ncfi, nfi, &nfibh, NULL, NULL); | 1191 | udf_write_fi(new_dir, &ncfi, nfi, &nfibh, NULL, NULL); |
1193 | 1192 | ||
1194 | /* The old fid may have moved - find it again */ | 1193 | /* The old fid may have moved - find it again */ |
1195 | ofi = udf_find_entry(old_dir, old_dentry, &ofibh, &ocfi); | 1194 | ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); |
1196 | udf_delete_entry(old_dir, ofi, &ofibh, &ocfi); | 1195 | udf_delete_entry(old_dir, ofi, &ofibh, &ocfi); |
1197 | 1196 | ||
1198 | if (new_inode) { | 1197 | if (new_inode) { |
@@ -1243,15 +1242,11 @@ end_rename: | |||
1243 | 1242 | ||
1244 | static struct dentry *udf_get_parent(struct dentry *child) | 1243 | static struct dentry *udf_get_parent(struct dentry *child) |
1245 | { | 1244 | { |
1246 | struct dentry *parent; | ||
1247 | struct inode *inode = NULL; | 1245 | struct inode *inode = NULL; |
1248 | struct dentry dotdot; | 1246 | struct qstr dotdot = {.name = "..", .len = 2}; |
1249 | struct fileIdentDesc cfi; | 1247 | struct fileIdentDesc cfi; |
1250 | struct udf_fileident_bh fibh; | 1248 | struct udf_fileident_bh fibh; |
1251 | 1249 | ||
1252 | dotdot.d_name.name = ".."; | ||
1253 | dotdot.d_name.len = 2; | ||
1254 | |||
1255 | lock_kernel(); | 1250 | lock_kernel(); |
1256 | if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi)) | 1251 | if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi)) |
1257 | goto out_unlock; | 1252 | goto out_unlock; |
@@ -1266,13 +1261,7 @@ static struct dentry *udf_get_parent(struct dentry *child) | |||
1266 | goto out_unlock; | 1261 | goto out_unlock; |
1267 | unlock_kernel(); | 1262 | unlock_kernel(); |
1268 | 1263 | ||
1269 | parent = d_alloc_anon(inode); | 1264 | return d_obtain_alias(inode); |
1270 | if (!parent) { | ||
1271 | iput(inode); | ||
1272 | parent = ERR_PTR(-ENOMEM); | ||
1273 | } | ||
1274 | |||
1275 | return parent; | ||
1276 | out_unlock: | 1265 | out_unlock: |
1277 | unlock_kernel(); | 1266 | unlock_kernel(); |
1278 | return ERR_PTR(-EACCES); | 1267 | return ERR_PTR(-EACCES); |
@@ -1283,7 +1272,6 @@ static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block, | |||
1283 | u16 partref, __u32 generation) | 1272 | u16 partref, __u32 generation) |
1284 | { | 1273 | { |
1285 | struct inode *inode; | 1274 | struct inode *inode; |
1286 | struct dentry *result; | ||
1287 | kernel_lb_addr loc; | 1275 | kernel_lb_addr loc; |
1288 | 1276 | ||
1289 | if (block == 0) | 1277 | if (block == 0) |
@@ -1300,12 +1288,7 @@ static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block, | |||
1300 | iput(inode); | 1288 | iput(inode); |
1301 | return ERR_PTR(-ESTALE); | 1289 | return ERR_PTR(-ESTALE); |
1302 | } | 1290 | } |
1303 | result = d_alloc_anon(inode); | 1291 | return d_obtain_alias(inode); |
1304 | if (!result) { | ||
1305 | iput(inode); | ||
1306 | return ERR_PTR(-ENOMEM); | ||
1307 | } | ||
1308 | return result; | ||
1309 | } | 1292 | } |
1310 | 1293 | ||
1311 | static struct dentry *udf_fh_to_dentry(struct super_block *sb, | 1294 | static struct dentry *udf_fh_to_dentry(struct super_block *sb, |
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index df0bef18742d..dbbbc4668769 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c | |||
@@ -667,4 +667,5 @@ const struct file_operations ufs_dir_operations = { | |||
667 | .read = generic_read_dir, | 667 | .read = generic_read_dir, |
668 | .readdir = ufs_readdir, | 668 | .readdir = ufs_readdir, |
669 | .fsync = file_fsync, | 669 | .fsync = file_fsync, |
670 | .llseek = generic_file_llseek, | ||
670 | }; | 671 | }; |
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 24fd598af846..7f7abec25e14 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c | |||
@@ -148,7 +148,6 @@ xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid, | |||
148 | { | 148 | { |
149 | struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; | 149 | struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; |
150 | struct inode *inode = NULL; | 150 | struct inode *inode = NULL; |
151 | struct dentry *result; | ||
152 | 151 | ||
153 | if (fh_len < xfs_fileid_length(fileid_type)) | 152 | if (fh_len < xfs_fileid_length(fileid_type)) |
154 | return NULL; | 153 | return NULL; |
@@ -164,16 +163,7 @@ xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid, | |||
164 | break; | 163 | break; |
165 | } | 164 | } |
166 | 165 | ||
167 | if (!inode) | 166 | return d_obtain_alias(inode); |
168 | return NULL; | ||
169 | if (IS_ERR(inode)) | ||
170 | return ERR_CAST(inode); | ||
171 | result = d_alloc_anon(inode); | ||
172 | if (!result) { | ||
173 | iput(inode); | ||
174 | return ERR_PTR(-ENOMEM); | ||
175 | } | ||
176 | return result; | ||
177 | } | 167 | } |
178 | 168 | ||
179 | STATIC struct dentry * | 169 | STATIC struct dentry * |
@@ -182,7 +172,6 @@ xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid, | |||
182 | { | 172 | { |
183 | struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; | 173 | struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; |
184 | struct inode *inode = NULL; | 174 | struct inode *inode = NULL; |
185 | struct dentry *result; | ||
186 | 175 | ||
187 | switch (fileid_type) { | 176 | switch (fileid_type) { |
188 | case FILEID_INO32_GEN_PARENT: | 177 | case FILEID_INO32_GEN_PARENT: |
@@ -195,16 +184,7 @@ xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid, | |||
195 | break; | 184 | break; |
196 | } | 185 | } |
197 | 186 | ||
198 | if (!inode) | 187 | return d_obtain_alias(inode); |
199 | return NULL; | ||
200 | if (IS_ERR(inode)) | ||
201 | return ERR_CAST(inode); | ||
202 | result = d_alloc_anon(inode); | ||
203 | if (!result) { | ||
204 | iput(inode); | ||
205 | return ERR_PTR(-ENOMEM); | ||
206 | } | ||
207 | return result; | ||
208 | } | 188 | } |
209 | 189 | ||
210 | STATIC struct dentry * | 190 | STATIC struct dentry * |
@@ -213,18 +193,12 @@ xfs_fs_get_parent( | |||
213 | { | 193 | { |
214 | int error; | 194 | int error; |
215 | struct xfs_inode *cip; | 195 | struct xfs_inode *cip; |
216 | struct dentry *parent; | ||
217 | 196 | ||
218 | error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL); | 197 | error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL); |
219 | if (unlikely(error)) | 198 | if (unlikely(error)) |
220 | return ERR_PTR(-error); | 199 | return ERR_PTR(-error); |
221 | 200 | ||
222 | parent = d_alloc_anon(VFS_I(cip)); | 201 | return d_obtain_alias(VFS_I(cip)); |
223 | if (unlikely(!parent)) { | ||
224 | iput(VFS_I(cip)); | ||
225 | return ERR_PTR(-ENOMEM); | ||
226 | } | ||
227 | return parent; | ||
228 | } | 202 | } |
229 | 203 | ||
230 | const struct export_operations xfs_export_operations = { | 204 | const struct export_operations xfs_export_operations = { |
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 5311c1acdd40..3fee790f138b 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -204,15 +204,6 @@ xfs_file_fsync( | |||
204 | return -xfs_fsync(XFS_I(dentry->d_inode)); | 204 | return -xfs_fsync(XFS_I(dentry->d_inode)); |
205 | } | 205 | } |
206 | 206 | ||
207 | /* | ||
208 | * Unfortunately we can't just use the clean and simple readdir implementation | ||
209 | * below, because nfs might call back into ->lookup from the filldir callback | ||
210 | * and that will deadlock the low-level btree code. | ||
211 | * | ||
212 | * Hopefully we'll find a better workaround that allows to use the optimal | ||
213 | * version at least for local readdirs for 2.6.25. | ||
214 | */ | ||
215 | #if 0 | ||
216 | STATIC int | 207 | STATIC int |
217 | xfs_file_readdir( | 208 | xfs_file_readdir( |
218 | struct file *filp, | 209 | struct file *filp, |
@@ -244,125 +235,6 @@ xfs_file_readdir( | |||
244 | return -error; | 235 | return -error; |
245 | return 0; | 236 | return 0; |
246 | } | 237 | } |
247 | #else | ||
248 | |||
249 | struct hack_dirent { | ||
250 | u64 ino; | ||
251 | loff_t offset; | ||
252 | int namlen; | ||
253 | unsigned int d_type; | ||
254 | char name[]; | ||
255 | }; | ||
256 | |||
257 | struct hack_callback { | ||
258 | char *dirent; | ||
259 | size_t len; | ||
260 | size_t used; | ||
261 | }; | ||
262 | |||
263 | STATIC int | ||
264 | xfs_hack_filldir( | ||
265 | void *__buf, | ||
266 | const char *name, | ||
267 | int namlen, | ||
268 | loff_t offset, | ||
269 | u64 ino, | ||
270 | unsigned int d_type) | ||
271 | { | ||
272 | struct hack_callback *buf = __buf; | ||
273 | struct hack_dirent *de = (struct hack_dirent *)(buf->dirent + buf->used); | ||
274 | unsigned int reclen; | ||
275 | |||
276 | reclen = ALIGN(sizeof(struct hack_dirent) + namlen, sizeof(u64)); | ||
277 | if (buf->used + reclen > buf->len) | ||
278 | return -EINVAL; | ||
279 | |||
280 | de->namlen = namlen; | ||
281 | de->offset = offset; | ||
282 | de->ino = ino; | ||
283 | de->d_type = d_type; | ||
284 | memcpy(de->name, name, namlen); | ||
285 | buf->used += reclen; | ||
286 | return 0; | ||
287 | } | ||
288 | |||
289 | STATIC int | ||
290 | xfs_file_readdir( | ||
291 | struct file *filp, | ||
292 | void *dirent, | ||
293 | filldir_t filldir) | ||
294 | { | ||
295 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
296 | xfs_inode_t *ip = XFS_I(inode); | ||
297 | struct hack_callback buf; | ||
298 | struct hack_dirent *de; | ||
299 | int error; | ||
300 | loff_t size; | ||
301 | int eof = 0; | ||
302 | xfs_off_t start_offset, curr_offset, offset; | ||
303 | |||
304 | /* | ||
305 | * Try fairly hard to get memory | ||
306 | */ | ||
307 | buf.len = PAGE_CACHE_SIZE; | ||
308 | do { | ||
309 | buf.dirent = kmalloc(buf.len, GFP_KERNEL); | ||
310 | if (buf.dirent) | ||
311 | break; | ||
312 | buf.len >>= 1; | ||
313 | } while (buf.len >= 1024); | ||
314 | |||
315 | if (!buf.dirent) | ||
316 | return -ENOMEM; | ||
317 | |||
318 | curr_offset = filp->f_pos; | ||
319 | if (curr_offset == 0x7fffffff) | ||
320 | offset = 0xffffffff; | ||
321 | else | ||
322 | offset = filp->f_pos; | ||
323 | |||
324 | while (!eof) { | ||
325 | unsigned int reclen; | ||
326 | |||
327 | start_offset = offset; | ||
328 | |||
329 | buf.used = 0; | ||
330 | error = -xfs_readdir(ip, &buf, buf.len, &offset, | ||
331 | xfs_hack_filldir); | ||
332 | if (error || offset == start_offset) { | ||
333 | size = 0; | ||
334 | break; | ||
335 | } | ||
336 | |||
337 | size = buf.used; | ||
338 | de = (struct hack_dirent *)buf.dirent; | ||
339 | while (size > 0) { | ||
340 | curr_offset = de->offset /* & 0x7fffffff */; | ||
341 | if (filldir(dirent, de->name, de->namlen, | ||
342 | curr_offset & 0x7fffffff, | ||
343 | de->ino, de->d_type)) { | ||
344 | goto done; | ||
345 | } | ||
346 | |||
347 | reclen = ALIGN(sizeof(struct hack_dirent) + de->namlen, | ||
348 | sizeof(u64)); | ||
349 | size -= reclen; | ||
350 | de = (struct hack_dirent *)((char *)de + reclen); | ||
351 | } | ||
352 | } | ||
353 | |||
354 | done: | ||
355 | if (!error) { | ||
356 | if (size == 0) | ||
357 | filp->f_pos = offset & 0x7fffffff; | ||
358 | else if (de) | ||
359 | filp->f_pos = curr_offset; | ||
360 | } | ||
361 | |||
362 | kfree(buf.dirent); | ||
363 | return error; | ||
364 | } | ||
365 | #endif | ||
366 | 238 | ||
367 | STATIC int | 239 | STATIC int |
368 | xfs_file_mmap( | 240 | xfs_file_mmap( |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 48799ba7e3e6..d3438c72dcaf 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -311,11 +311,10 @@ xfs_open_by_handle( | |||
311 | return new_fd; | 311 | return new_fd; |
312 | } | 312 | } |
313 | 313 | ||
314 | dentry = d_alloc_anon(inode); | 314 | dentry = d_obtain_alias(inode); |
315 | if (dentry == NULL) { | 315 | if (IS_ERR(dentry)) { |
316 | iput(inode); | ||
317 | put_unused_fd(new_fd); | 316 | put_unused_fd(new_fd); |
318 | return -XFS_ERROR(ENOMEM); | 317 | return PTR_ERR(dentry); |
319 | } | 318 | } |
320 | 319 | ||
321 | /* Ensure umount returns EBUSY on umounts while this file is open. */ | 320 | /* Ensure umount returns EBUSY on umounts while this file is open. */ |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index e39013619b26..37ebe36056eb 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -589,7 +589,7 @@ xfs_blkdev_get( | |||
589 | { | 589 | { |
590 | int error = 0; | 590 | int error = 0; |
591 | 591 | ||
592 | *bdevp = open_bdev_excl(name, 0, mp); | 592 | *bdevp = open_bdev_exclusive(name, FMODE_READ|FMODE_WRITE, mp); |
593 | if (IS_ERR(*bdevp)) { | 593 | if (IS_ERR(*bdevp)) { |
594 | error = PTR_ERR(*bdevp); | 594 | error = PTR_ERR(*bdevp); |
595 | printk("XFS: Invalid device [%s], error=%d\n", name, error); | 595 | printk("XFS: Invalid device [%s], error=%d\n", name, error); |
@@ -603,7 +603,7 @@ xfs_blkdev_put( | |||
603 | struct block_device *bdev) | 603 | struct block_device *bdev) |
604 | { | 604 | { |
605 | if (bdev) | 605 | if (bdev) |
606 | close_bdev_excl(bdev); | 606 | close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE); |
607 | } | 607 | } |
608 | 608 | ||
609 | /* | 609 | /* |